mirror of
https://github.com/yuzu-emu/yuzu-android.git
synced 2025-06-18 04:27:58 -05:00
Merge remote-tracking branch 'upstream/master' into int-flags
This commit is contained in:
@ -5,8 +5,27 @@ add_library(video_core STATIC
|
||||
buffer_cache/buffer_cache.h
|
||||
buffer_cache/map_interval.cpp
|
||||
buffer_cache/map_interval.h
|
||||
cdma_pusher.cpp
|
||||
cdma_pusher.h
|
||||
command_classes/codecs/codec.cpp
|
||||
command_classes/codecs/codec.h
|
||||
command_classes/codecs/h264.cpp
|
||||
command_classes/codecs/h264.h
|
||||
command_classes/codecs/vp9.cpp
|
||||
command_classes/codecs/vp9.h
|
||||
command_classes/codecs/vp9_types.h
|
||||
command_classes/host1x.cpp
|
||||
command_classes/host1x.h
|
||||
command_classes/nvdec.cpp
|
||||
command_classes/nvdec.h
|
||||
command_classes/nvdec_common.h
|
||||
command_classes/sync_manager.cpp
|
||||
command_classes/sync_manager.h
|
||||
command_classes/vic.cpp
|
||||
command_classes/vic.h
|
||||
compatible_formats.cpp
|
||||
compatible_formats.h
|
||||
delayed_destruction_ring.h
|
||||
dirty_flags.cpp
|
||||
dirty_flags.h
|
||||
dma_pusher.cpp
|
||||
@ -29,6 +48,7 @@ add_library(video_core STATIC
|
||||
engines/shader_bytecode.h
|
||||
engines/shader_header.h
|
||||
engines/shader_type.h
|
||||
framebuffer_config.h
|
||||
macro/macro.cpp
|
||||
macro/macro.h
|
||||
macro/macro_hle.cpp
|
||||
@ -40,10 +60,6 @@ add_library(video_core STATIC
|
||||
fence_manager.h
|
||||
gpu.cpp
|
||||
gpu.h
|
||||
gpu_asynch.cpp
|
||||
gpu_asynch.h
|
||||
gpu_synch.cpp
|
||||
gpu_synch.h
|
||||
gpu_thread.cpp
|
||||
gpu_thread.h
|
||||
guest_driver.cpp
|
||||
@ -66,14 +82,10 @@ add_library(video_core STATIC
|
||||
renderer_opengl/gl_device.h
|
||||
renderer_opengl/gl_fence_manager.cpp
|
||||
renderer_opengl/gl_fence_manager.h
|
||||
renderer_opengl/gl_framebuffer_cache.cpp
|
||||
renderer_opengl/gl_framebuffer_cache.h
|
||||
renderer_opengl/gl_rasterizer.cpp
|
||||
renderer_opengl/gl_rasterizer.h
|
||||
renderer_opengl/gl_resource_manager.cpp
|
||||
renderer_opengl/gl_resource_manager.h
|
||||
renderer_opengl/gl_sampler_cache.cpp
|
||||
renderer_opengl/gl_sampler_cache.h
|
||||
renderer_opengl/gl_shader_cache.cpp
|
||||
renderer_opengl/gl_shader_cache.h
|
||||
renderer_opengl/gl_shader_decompiler.cpp
|
||||
@ -95,10 +107,62 @@ add_library(video_core STATIC
|
||||
renderer_opengl/maxwell_to_gl.h
|
||||
renderer_opengl/renderer_opengl.cpp
|
||||
renderer_opengl/renderer_opengl.h
|
||||
renderer_opengl/utils.cpp
|
||||
renderer_opengl/utils.h
|
||||
sampler_cache.cpp
|
||||
sampler_cache.h
|
||||
renderer_opengl/util_shaders.cpp
|
||||
renderer_opengl/util_shaders.h
|
||||
renderer_vulkan/blit_image.cpp
|
||||
renderer_vulkan/blit_image.h
|
||||
renderer_vulkan/fixed_pipeline_state.cpp
|
||||
renderer_vulkan/fixed_pipeline_state.h
|
||||
renderer_vulkan/maxwell_to_vk.cpp
|
||||
renderer_vulkan/maxwell_to_vk.h
|
||||
renderer_vulkan/renderer_vulkan.h
|
||||
renderer_vulkan/renderer_vulkan.cpp
|
||||
renderer_vulkan/vk_blit_screen.cpp
|
||||
renderer_vulkan/vk_blit_screen.h
|
||||
renderer_vulkan/vk_buffer_cache.cpp
|
||||
renderer_vulkan/vk_buffer_cache.h
|
||||
renderer_vulkan/vk_command_pool.cpp
|
||||
renderer_vulkan/vk_command_pool.h
|
||||
renderer_vulkan/vk_compute_pass.cpp
|
||||
renderer_vulkan/vk_compute_pass.h
|
||||
renderer_vulkan/vk_compute_pipeline.cpp
|
||||
renderer_vulkan/vk_compute_pipeline.h
|
||||
renderer_vulkan/vk_descriptor_pool.cpp
|
||||
renderer_vulkan/vk_descriptor_pool.h
|
||||
renderer_vulkan/vk_fence_manager.cpp
|
||||
renderer_vulkan/vk_fence_manager.h
|
||||
renderer_vulkan/vk_graphics_pipeline.cpp
|
||||
renderer_vulkan/vk_graphics_pipeline.h
|
||||
renderer_vulkan/vk_master_semaphore.cpp
|
||||
renderer_vulkan/vk_master_semaphore.h
|
||||
renderer_vulkan/vk_memory_manager.cpp
|
||||
renderer_vulkan/vk_memory_manager.h
|
||||
renderer_vulkan/vk_pipeline_cache.cpp
|
||||
renderer_vulkan/vk_pipeline_cache.h
|
||||
renderer_vulkan/vk_query_cache.cpp
|
||||
renderer_vulkan/vk_query_cache.h
|
||||
renderer_vulkan/vk_rasterizer.cpp
|
||||
renderer_vulkan/vk_rasterizer.h
|
||||
renderer_vulkan/vk_resource_pool.cpp
|
||||
renderer_vulkan/vk_resource_pool.h
|
||||
renderer_vulkan/vk_scheduler.cpp
|
||||
renderer_vulkan/vk_scheduler.h
|
||||
renderer_vulkan/vk_shader_decompiler.cpp
|
||||
renderer_vulkan/vk_shader_decompiler.h
|
||||
renderer_vulkan/vk_shader_util.cpp
|
||||
renderer_vulkan/vk_shader_util.h
|
||||
renderer_vulkan/vk_staging_buffer_pool.cpp
|
||||
renderer_vulkan/vk_staging_buffer_pool.h
|
||||
renderer_vulkan/vk_state_tracker.cpp
|
||||
renderer_vulkan/vk_state_tracker.h
|
||||
renderer_vulkan/vk_stream_buffer.cpp
|
||||
renderer_vulkan/vk_stream_buffer.h
|
||||
renderer_vulkan/vk_swapchain.cpp
|
||||
renderer_vulkan/vk_swapchain.h
|
||||
renderer_vulkan/vk_texture_cache.cpp
|
||||
renderer_vulkan/vk_texture_cache.h
|
||||
renderer_vulkan/vk_update_descriptor.cpp
|
||||
renderer_vulkan/vk_update_descriptor.h
|
||||
shader_cache.h
|
||||
shader_notify.cpp
|
||||
shader_notify.h
|
||||
@ -155,109 +219,71 @@ add_library(video_core STATIC
|
||||
shader/transform_feedback.h
|
||||
surface.cpp
|
||||
surface.h
|
||||
texture_cache/accelerated_swizzle.cpp
|
||||
texture_cache/accelerated_swizzle.h
|
||||
texture_cache/decode_bc4.cpp
|
||||
texture_cache/decode_bc4.h
|
||||
texture_cache/descriptor_table.h
|
||||
texture_cache/formatter.cpp
|
||||
texture_cache/formatter.h
|
||||
texture_cache/format_lookup_table.cpp
|
||||
texture_cache/format_lookup_table.h
|
||||
texture_cache/surface_base.cpp
|
||||
texture_cache/surface_base.h
|
||||
texture_cache/surface_params.cpp
|
||||
texture_cache/surface_params.h
|
||||
texture_cache/surface_view.cpp
|
||||
texture_cache/surface_view.h
|
||||
texture_cache/image_base.cpp
|
||||
texture_cache/image_base.h
|
||||
texture_cache/image_info.cpp
|
||||
texture_cache/image_info.h
|
||||
texture_cache/image_view_base.cpp
|
||||
texture_cache/image_view_base.h
|
||||
texture_cache/image_view_info.cpp
|
||||
texture_cache/image_view_info.h
|
||||
texture_cache/render_targets.h
|
||||
texture_cache/samples_helper.h
|
||||
texture_cache/slot_vector.h
|
||||
texture_cache/texture_cache.h
|
||||
texture_cache/types.h
|
||||
texture_cache/util.cpp
|
||||
texture_cache/util.h
|
||||
textures/astc.cpp
|
||||
textures/astc.h
|
||||
textures/convert.cpp
|
||||
textures/convert.h
|
||||
textures/decoders.cpp
|
||||
textures/decoders.h
|
||||
textures/texture.cpp
|
||||
textures/texture.h
|
||||
video_core.cpp
|
||||
video_core.h
|
||||
vulkan_common/vulkan_debug_callback.cpp
|
||||
vulkan_common/vulkan_debug_callback.h
|
||||
vulkan_common/vulkan_device.cpp
|
||||
vulkan_common/vulkan_device.h
|
||||
vulkan_common/vulkan_instance.cpp
|
||||
vulkan_common/vulkan_instance.h
|
||||
vulkan_common/vulkan_library.cpp
|
||||
vulkan_common/vulkan_library.h
|
||||
vulkan_common/vulkan_surface.cpp
|
||||
vulkan_common/vulkan_surface.h
|
||||
vulkan_common/vulkan_wrapper.cpp
|
||||
vulkan_common/vulkan_wrapper.h
|
||||
vulkan_common/nsight_aftermath_tracker.cpp
|
||||
vulkan_common/nsight_aftermath_tracker.h
|
||||
)
|
||||
|
||||
if (ENABLE_VULKAN)
|
||||
target_sources(video_core PRIVATE
|
||||
renderer_vulkan/fixed_pipeline_state.cpp
|
||||
renderer_vulkan/fixed_pipeline_state.h
|
||||
renderer_vulkan/maxwell_to_vk.cpp
|
||||
renderer_vulkan/maxwell_to_vk.h
|
||||
renderer_vulkan/nsight_aftermath_tracker.cpp
|
||||
renderer_vulkan/nsight_aftermath_tracker.h
|
||||
renderer_vulkan/renderer_vulkan.h
|
||||
renderer_vulkan/renderer_vulkan.cpp
|
||||
renderer_vulkan/vk_blit_screen.cpp
|
||||
renderer_vulkan/vk_blit_screen.h
|
||||
renderer_vulkan/vk_buffer_cache.cpp
|
||||
renderer_vulkan/vk_buffer_cache.h
|
||||
renderer_vulkan/vk_command_pool.cpp
|
||||
renderer_vulkan/vk_command_pool.h
|
||||
renderer_vulkan/vk_compute_pass.cpp
|
||||
renderer_vulkan/vk_compute_pass.h
|
||||
renderer_vulkan/vk_compute_pipeline.cpp
|
||||
renderer_vulkan/vk_compute_pipeline.h
|
||||
renderer_vulkan/vk_descriptor_pool.cpp
|
||||
renderer_vulkan/vk_descriptor_pool.h
|
||||
renderer_vulkan/vk_device.cpp
|
||||
renderer_vulkan/vk_device.h
|
||||
renderer_vulkan/vk_fence_manager.cpp
|
||||
renderer_vulkan/vk_fence_manager.h
|
||||
renderer_vulkan/vk_graphics_pipeline.cpp
|
||||
renderer_vulkan/vk_graphics_pipeline.h
|
||||
renderer_vulkan/vk_image.cpp
|
||||
renderer_vulkan/vk_image.h
|
||||
renderer_vulkan/vk_master_semaphore.cpp
|
||||
renderer_vulkan/vk_master_semaphore.h
|
||||
renderer_vulkan/vk_memory_manager.cpp
|
||||
renderer_vulkan/vk_memory_manager.h
|
||||
renderer_vulkan/vk_pipeline_cache.cpp
|
||||
renderer_vulkan/vk_pipeline_cache.h
|
||||
renderer_vulkan/vk_query_cache.cpp
|
||||
renderer_vulkan/vk_query_cache.h
|
||||
renderer_vulkan/vk_rasterizer.cpp
|
||||
renderer_vulkan/vk_rasterizer.h
|
||||
renderer_vulkan/vk_renderpass_cache.cpp
|
||||
renderer_vulkan/vk_renderpass_cache.h
|
||||
renderer_vulkan/vk_resource_pool.cpp
|
||||
renderer_vulkan/vk_resource_pool.h
|
||||
renderer_vulkan/vk_sampler_cache.cpp
|
||||
renderer_vulkan/vk_sampler_cache.h
|
||||
renderer_vulkan/vk_scheduler.cpp
|
||||
renderer_vulkan/vk_scheduler.h
|
||||
renderer_vulkan/vk_shader_decompiler.cpp
|
||||
renderer_vulkan/vk_shader_decompiler.h
|
||||
renderer_vulkan/vk_shader_util.cpp
|
||||
renderer_vulkan/vk_shader_util.h
|
||||
renderer_vulkan/vk_staging_buffer_pool.cpp
|
||||
renderer_vulkan/vk_staging_buffer_pool.h
|
||||
renderer_vulkan/vk_state_tracker.cpp
|
||||
renderer_vulkan/vk_state_tracker.h
|
||||
renderer_vulkan/vk_stream_buffer.cpp
|
||||
renderer_vulkan/vk_stream_buffer.h
|
||||
renderer_vulkan/vk_swapchain.cpp
|
||||
renderer_vulkan/vk_swapchain.h
|
||||
renderer_vulkan/vk_texture_cache.cpp
|
||||
renderer_vulkan/vk_texture_cache.h
|
||||
renderer_vulkan/vk_update_descriptor.cpp
|
||||
renderer_vulkan/vk_update_descriptor.h
|
||||
renderer_vulkan/wrapper.cpp
|
||||
renderer_vulkan/wrapper.h
|
||||
)
|
||||
endif()
|
||||
|
||||
create_target_directory_groups(video_core)
|
||||
|
||||
target_link_libraries(video_core PUBLIC common core)
|
||||
target_link_libraries(video_core PRIVATE glad xbyak)
|
||||
|
||||
if (MSVC)
|
||||
target_include_directories(video_core PRIVATE ${FFMPEG_INCLUDE_DIR})
|
||||
target_link_libraries(video_core PUBLIC ${FFMPEG_LIBRARY_DIR}/swscale.lib ${FFMPEG_LIBRARY_DIR}/avcodec.lib ${FFMPEG_LIBRARY_DIR}/avutil.lib)
|
||||
else()
|
||||
target_include_directories(video_core PRIVATE ${FFMPEG_INCLUDE_DIR})
|
||||
target_link_libraries(video_core PRIVATE ${FFMPEG_LIBRARIES})
|
||||
endif()
|
||||
|
||||
add_dependencies(video_core host_shaders)
|
||||
target_include_directories(video_core PRIVATE ${HOST_SHADERS_INCLUDE})
|
||||
|
||||
if (ENABLE_VULKAN)
|
||||
target_include_directories(video_core PRIVATE sirit ../../externals/Vulkan-Headers/include)
|
||||
target_compile_definitions(video_core PRIVATE HAS_VULKAN)
|
||||
target_link_libraries(video_core PRIVATE sirit)
|
||||
endif()
|
||||
target_include_directories(video_core PRIVATE sirit ../../externals/Vulkan-Headers/include)
|
||||
target_link_libraries(video_core PRIVATE sirit)
|
||||
|
||||
if (ENABLE_NSIGHT_AFTERMATH)
|
||||
if (NOT DEFINED ENV{NSIGHT_AFTERMATH_SDK})
|
||||
@ -271,7 +297,27 @@ if (ENABLE_NSIGHT_AFTERMATH)
|
||||
endif()
|
||||
|
||||
if (MSVC)
|
||||
target_compile_options(video_core PRIVATE /we4267)
|
||||
target_compile_options(video_core PRIVATE
|
||||
/we4267 # 'var' : conversion from 'size_t' to 'type', possible loss of data
|
||||
/we4456 # Declaration of 'identifier' hides previous local declaration
|
||||
/we4457 # Declaration of 'identifier' hides function parameter
|
||||
/we4458 # Declaration of 'identifier' hides class member
|
||||
/we4459 # Declaration of 'identifier' hides global declaration
|
||||
/we4715 # 'function' : not all control paths return a value
|
||||
)
|
||||
else()
|
||||
target_compile_options(video_core PRIVATE -Werror=conversion -Wno-error=sign-conversion -Werror=switch)
|
||||
target_compile_options(video_core PRIVATE
|
||||
-Werror=conversion
|
||||
-Wno-error=sign-conversion
|
||||
-Werror=pessimizing-move
|
||||
-Werror=redundant-move
|
||||
-Werror=shadow
|
||||
-Werror=switch
|
||||
-Werror=type-limits
|
||||
-Werror=unused-variable
|
||||
|
||||
$<$<CXX_COMPILER_ID:GNU>:-Werror=class-memaccess>
|
||||
$<$<CXX_COMPILER_ID:GNU>:-Werror=unused-but-set-parameter>
|
||||
$<$<CXX_COMPILER_ID:GNU>:-Werror=unused-but-set-variable>
|
||||
)
|
||||
endif()
|
||||
|
@ -4,34 +4,29 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <unordered_set>
|
||||
#include <utility>
|
||||
|
||||
#include "common/alignment.h"
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/gpu.h"
|
||||
|
||||
namespace VideoCommon {
|
||||
|
||||
class BufferBlock {
|
||||
public:
|
||||
bool Overlaps(VAddr start, VAddr end) const {
|
||||
[[nodiscard]] bool Overlaps(VAddr start, VAddr end) const {
|
||||
return (cpu_addr < end) && (cpu_addr_end > start);
|
||||
}
|
||||
|
||||
bool IsInside(VAddr other_start, VAddr other_end) const {
|
||||
[[nodiscard]] bool IsInside(VAddr other_start, VAddr other_end) const {
|
||||
return cpu_addr <= other_start && other_end <= cpu_addr_end;
|
||||
}
|
||||
|
||||
std::size_t Offset(VAddr in_addr) const {
|
||||
[[nodiscard]] std::size_t Offset(VAddr in_addr) const {
|
||||
return static_cast<std::size_t>(in_addr - cpu_addr);
|
||||
}
|
||||
|
||||
VAddr CpuAddr() const {
|
||||
[[nodiscard]] VAddr CpuAddr() const {
|
||||
return cpu_addr;
|
||||
}
|
||||
|
||||
VAddr CpuAddrEnd() const {
|
||||
[[nodiscard]] VAddr CpuAddrEnd() const {
|
||||
return cpu_addr_end;
|
||||
}
|
||||
|
||||
@ -40,11 +35,11 @@ public:
|
||||
cpu_addr_end = new_addr + size;
|
||||
}
|
||||
|
||||
std::size_t Size() const {
|
||||
[[nodiscard]] std::size_t Size() const {
|
||||
return size;
|
||||
}
|
||||
|
||||
u64 Epoch() const {
|
||||
[[nodiscard]] u64 Epoch() const {
|
||||
return epoch;
|
||||
}
|
||||
|
||||
|
@ -118,20 +118,17 @@ public:
|
||||
/// Prepares the buffer cache for data uploading
|
||||
/// @param max_size Maximum number of bytes that will be uploaded
|
||||
/// @return True when a stream buffer invalidation was required, false otherwise
|
||||
bool Map(std::size_t max_size) {
|
||||
void Map(std::size_t max_size) {
|
||||
std::lock_guard lock{mutex};
|
||||
|
||||
bool invalidated;
|
||||
std::tie(buffer_ptr, buffer_offset_base, invalidated) = stream_buffer->Map(max_size, 4);
|
||||
std::tie(buffer_ptr, buffer_offset_base) = stream_buffer.Map(max_size, 4);
|
||||
buffer_offset = buffer_offset_base;
|
||||
|
||||
return invalidated;
|
||||
}
|
||||
|
||||
/// Finishes the upload stream
|
||||
void Unmap() {
|
||||
std::lock_guard lock{mutex};
|
||||
stream_buffer->Unmap(buffer_offset - buffer_offset_base);
|
||||
stream_buffer.Unmap(buffer_offset - buffer_offset_base);
|
||||
}
|
||||
|
||||
/// Function called at the end of each frame, inteded for deferred operations
|
||||
@ -261,9 +258,9 @@ public:
|
||||
protected:
|
||||
explicit BufferCache(VideoCore::RasterizerInterface& rasterizer_,
|
||||
Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_,
|
||||
std::unique_ptr<StreamBuffer> stream_buffer_)
|
||||
StreamBuffer& stream_buffer_)
|
||||
: rasterizer{rasterizer_}, gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_},
|
||||
stream_buffer{std::move(stream_buffer_)}, stream_buffer_handle{stream_buffer->Handle()} {}
|
||||
stream_buffer{stream_buffer_} {}
|
||||
|
||||
~BufferCache() = default;
|
||||
|
||||
@ -441,7 +438,7 @@ private:
|
||||
|
||||
buffer_ptr += size;
|
||||
buffer_offset += size;
|
||||
return BufferInfo{stream_buffer->Handle(), uploaded_offset, stream_buffer->Address()};
|
||||
return BufferInfo{stream_buffer.Handle(), uploaded_offset, stream_buffer.Address()};
|
||||
}
|
||||
|
||||
void AlignBuffer(std::size_t alignment) {
|
||||
@ -545,7 +542,7 @@ private:
|
||||
bool IsRegionWritten(VAddr start, VAddr end) const {
|
||||
const u64 page_end = end >> WRITE_PAGE_BIT;
|
||||
for (u64 page_start = start >> WRITE_PAGE_BIT; page_start <= page_end; ++page_start) {
|
||||
if (written_pages.count(page_start) > 0) {
|
||||
if (written_pages.contains(page_start)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
@ -567,9 +564,7 @@ private:
|
||||
VideoCore::RasterizerInterface& rasterizer;
|
||||
Tegra::MemoryManager& gpu_memory;
|
||||
Core::Memory::Memory& cpu_memory;
|
||||
|
||||
std::unique_ptr<StreamBuffer> stream_buffer;
|
||||
BufferType stream_buffer_handle;
|
||||
StreamBuffer& stream_buffer;
|
||||
|
||||
u8* buffer_ptr = nullptr;
|
||||
u64 buffer_offset = 0;
|
||||
|
@ -84,9 +84,10 @@ private:
|
||||
void FillFreeList(Chunk& chunk);
|
||||
|
||||
std::vector<MapInterval*> free_list;
|
||||
std::unique_ptr<Chunk>* new_chunk = &first_chunk.next;
|
||||
|
||||
Chunk first_chunk;
|
||||
|
||||
std::unique_ptr<Chunk>* new_chunk = &first_chunk.next;
|
||||
};
|
||||
|
||||
} // namespace VideoCommon
|
||||
|
170
src/video_core/cdma_pusher.cpp
Normal file
170
src/video_core/cdma_pusher.cpp
Normal file
@ -0,0 +1,170 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) Ryujinx Team and Contributors
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
|
||||
// associated documentation files (the "Software"), to deal in the Software without restriction,
|
||||
// including without limitation the rights to use, copy, modify, merge, publish, distribute,
|
||||
// sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all copies or
|
||||
// substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
|
||||
// NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
|
||||
// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
//
|
||||
|
||||
#include "command_classes/host1x.h"
|
||||
#include "command_classes/nvdec.h"
|
||||
#include "command_classes/vic.h"
|
||||
#include "common/bit_util.h"
|
||||
#include "video_core/cdma_pusher.h"
|
||||
#include "video_core/command_classes/nvdec_common.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/gpu.h"
|
||||
#include "video_core/memory_manager.h"
|
||||
|
||||
namespace Tegra {
|
||||
CDmaPusher::CDmaPusher(GPU& gpu_)
|
||||
: gpu{gpu_}, nvdec_processor(std::make_shared<Nvdec>(gpu)),
|
||||
vic_processor(std::make_unique<Vic>(gpu, nvdec_processor)),
|
||||
host1x_processor(std::make_unique<Host1x>(gpu)),
|
||||
sync_manager(std::make_unique<SyncptIncrManager>(gpu)) {}
|
||||
|
||||
CDmaPusher::~CDmaPusher() = default;
|
||||
|
||||
void CDmaPusher::Push(ChCommandHeaderList&& entries) {
|
||||
cdma_queue.push(std::move(entries));
|
||||
}
|
||||
|
||||
void CDmaPusher::DispatchCalls() {
|
||||
while (!cdma_queue.empty()) {
|
||||
Step();
|
||||
}
|
||||
}
|
||||
|
||||
void CDmaPusher::Step() {
|
||||
const auto entries{cdma_queue.front()};
|
||||
cdma_queue.pop();
|
||||
|
||||
std::vector<u32> values(entries.size());
|
||||
std::memcpy(values.data(), entries.data(), entries.size() * sizeof(u32));
|
||||
|
||||
for (const u32 value : values) {
|
||||
if (mask != 0) {
|
||||
const u32 lbs = Common::CountTrailingZeroes32(mask);
|
||||
mask &= ~(1U << lbs);
|
||||
ExecuteCommand(static_cast<u32>(offset + lbs), value);
|
||||
continue;
|
||||
} else if (count != 0) {
|
||||
--count;
|
||||
ExecuteCommand(static_cast<u32>(offset), value);
|
||||
if (incrementing) {
|
||||
++offset;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
const auto mode = static_cast<ChSubmissionMode>((value >> 28) & 0xf);
|
||||
switch (mode) {
|
||||
case ChSubmissionMode::SetClass: {
|
||||
mask = value & 0x3f;
|
||||
offset = (value >> 16) & 0xfff;
|
||||
current_class = static_cast<ChClassId>((value >> 6) & 0x3ff);
|
||||
break;
|
||||
}
|
||||
case ChSubmissionMode::Incrementing:
|
||||
case ChSubmissionMode::NonIncrementing:
|
||||
count = value & 0xffff;
|
||||
offset = (value >> 16) & 0xfff;
|
||||
incrementing = mode == ChSubmissionMode::Incrementing;
|
||||
break;
|
||||
case ChSubmissionMode::Mask:
|
||||
mask = value & 0xffff;
|
||||
offset = (value >> 16) & 0xfff;
|
||||
break;
|
||||
case ChSubmissionMode::Immediate: {
|
||||
const u32 data = value & 0xfff;
|
||||
offset = (value >> 16) & 0xfff;
|
||||
ExecuteCommand(static_cast<u32>(offset), data);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("ChSubmission mode {} is not implemented!", static_cast<u32>(mode));
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void CDmaPusher::ExecuteCommand(u32 state_offset, u32 data) {
|
||||
switch (current_class) {
|
||||
case ChClassId::NvDec:
|
||||
ThiStateWrite(nvdec_thi_state, state_offset, {data});
|
||||
switch (static_cast<ThiMethod>(state_offset)) {
|
||||
case ThiMethod::IncSyncpt: {
|
||||
LOG_DEBUG(Service_NVDRV, "NVDEC Class IncSyncpt Method");
|
||||
const auto syncpoint_id = static_cast<u32>(data & 0xFF);
|
||||
const auto cond = static_cast<u32>((data >> 8) & 0xFF);
|
||||
if (cond == 0) {
|
||||
sync_manager->Increment(syncpoint_id);
|
||||
} else {
|
||||
sync_manager->SignalDone(
|
||||
sync_manager->IncrementWhenDone(static_cast<u32>(current_class), syncpoint_id));
|
||||
}
|
||||
break;
|
||||
}
|
||||
case ThiMethod::SetMethod1:
|
||||
LOG_DEBUG(Service_NVDRV, "NVDEC method 0x{:X}",
|
||||
static_cast<u32>(nvdec_thi_state.method_0));
|
||||
nvdec_processor->ProcessMethod(static_cast<Nvdec::Method>(nvdec_thi_state.method_0),
|
||||
{data});
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case ChClassId::GraphicsVic:
|
||||
ThiStateWrite(vic_thi_state, static_cast<u32>(state_offset), {data});
|
||||
switch (static_cast<ThiMethod>(state_offset)) {
|
||||
case ThiMethod::IncSyncpt: {
|
||||
LOG_DEBUG(Service_NVDRV, "VIC Class IncSyncpt Method");
|
||||
const auto syncpoint_id = static_cast<u32>(data & 0xFF);
|
||||
const auto cond = static_cast<u32>((data >> 8) & 0xFF);
|
||||
if (cond == 0) {
|
||||
sync_manager->Increment(syncpoint_id);
|
||||
} else {
|
||||
sync_manager->SignalDone(
|
||||
sync_manager->IncrementWhenDone(static_cast<u32>(current_class), syncpoint_id));
|
||||
}
|
||||
break;
|
||||
}
|
||||
case ThiMethod::SetMethod1:
|
||||
LOG_DEBUG(Service_NVDRV, "VIC method 0x{:X}, Args=({})",
|
||||
static_cast<u32>(vic_thi_state.method_0), data);
|
||||
vic_processor->ProcessMethod(static_cast<Vic::Method>(vic_thi_state.method_0), {data});
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case ChClassId::Host1x:
|
||||
// This device is mainly for syncpoint synchronization
|
||||
LOG_DEBUG(Service_NVDRV, "Host1X Class Method");
|
||||
host1x_processor->ProcessMethod(static_cast<Host1x::Method>(state_offset), {data});
|
||||
break;
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Current class not implemented {:X}", static_cast<u32>(current_class));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void CDmaPusher::ThiStateWrite(ThiRegisters& state, u32 state_offset,
|
||||
const std::vector<u32>& arguments) {
|
||||
u8* const state_offset_ptr = reinterpret_cast<u8*>(&state) + sizeof(u32) * state_offset;
|
||||
std::memcpy(state_offset_ptr, arguments.data(), sizeof(u32) * arguments.size());
|
||||
}
|
||||
|
||||
} // namespace Tegra
|
136
src/video_core/cdma_pusher.h
Normal file
136
src/video_core/cdma_pusher.h
Normal file
@ -0,0 +1,136 @@
|
||||
// Copyright 2020 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
#include <queue>
|
||||
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/command_classes/sync_manager.h"
|
||||
|
||||
namespace Tegra {
|
||||
|
||||
class GPU;
|
||||
class Nvdec;
|
||||
class Vic;
|
||||
class Host1x;
|
||||
|
||||
enum class ChSubmissionMode : u32 {
|
||||
SetClass = 0,
|
||||
Incrementing = 1,
|
||||
NonIncrementing = 2,
|
||||
Mask = 3,
|
||||
Immediate = 4,
|
||||
Restart = 5,
|
||||
Gather = 6,
|
||||
};
|
||||
|
||||
enum class ChClassId : u32 {
|
||||
NoClass = 0x0,
|
||||
Host1x = 0x1,
|
||||
VideoEncodeMpeg = 0x20,
|
||||
VideoEncodeNvEnc = 0x21,
|
||||
VideoStreamingVi = 0x30,
|
||||
VideoStreamingIsp = 0x32,
|
||||
VideoStreamingIspB = 0x34,
|
||||
VideoStreamingViI2c = 0x36,
|
||||
GraphicsVic = 0x5d,
|
||||
Graphics3D = 0x60,
|
||||
GraphicsGpu = 0x61,
|
||||
Tsec = 0xe0,
|
||||
TsecB = 0xe1,
|
||||
NvJpg = 0xc0,
|
||||
NvDec = 0xf0
|
||||
};
|
||||
|
||||
enum class ChMethod : u32 {
|
||||
Empty = 0,
|
||||
SetMethod = 0x10,
|
||||
SetData = 0x11,
|
||||
};
|
||||
|
||||
union ChCommandHeader {
|
||||
u32 raw;
|
||||
BitField<0, 16, u32> value;
|
||||
BitField<16, 12, ChMethod> method_offset;
|
||||
BitField<28, 4, ChSubmissionMode> submission_mode;
|
||||
};
|
||||
static_assert(sizeof(ChCommandHeader) == sizeof(u32), "ChCommand header is an invalid size");
|
||||
|
||||
struct ChCommand {
|
||||
ChClassId class_id{};
|
||||
int method_offset{};
|
||||
std::vector<u32> arguments;
|
||||
};
|
||||
|
||||
using ChCommandHeaderList = std::vector<ChCommandHeader>;
|
||||
using ChCommandList = std::vector<ChCommand>;
|
||||
|
||||
struct ThiRegisters {
|
||||
u32_le increment_syncpt{};
|
||||
INSERT_PADDING_WORDS(1);
|
||||
u32_le increment_syncpt_error{};
|
||||
u32_le ctx_switch_incremement_syncpt{};
|
||||
INSERT_PADDING_WORDS(4);
|
||||
u32_le ctx_switch{};
|
||||
INSERT_PADDING_WORDS(1);
|
||||
u32_le ctx_syncpt_eof{};
|
||||
INSERT_PADDING_WORDS(5);
|
||||
u32_le method_0{};
|
||||
u32_le method_1{};
|
||||
INSERT_PADDING_WORDS(12);
|
||||
u32_le int_status{};
|
||||
u32_le int_mask{};
|
||||
};
|
||||
|
||||
enum class ThiMethod : u32 {
|
||||
IncSyncpt = offsetof(ThiRegisters, increment_syncpt) / sizeof(u32),
|
||||
SetMethod0 = offsetof(ThiRegisters, method_0) / sizeof(u32),
|
||||
SetMethod1 = offsetof(ThiRegisters, method_1) / sizeof(u32),
|
||||
};
|
||||
|
||||
class CDmaPusher {
|
||||
public:
|
||||
explicit CDmaPusher(GPU& gpu_);
|
||||
~CDmaPusher();
|
||||
|
||||
/// Push NVDEC command buffer entries into queue
|
||||
void Push(ChCommandHeaderList&& entries);
|
||||
|
||||
/// Process queued command buffer entries
|
||||
void DispatchCalls();
|
||||
|
||||
/// Process one queue element
|
||||
void Step();
|
||||
|
||||
/// Invoke command class devices to execute the command based on the current state
|
||||
void ExecuteCommand(u32 state_offset, u32 data);
|
||||
|
||||
private:
|
||||
/// Write arguments value to the ThiRegisters member at the specified offset
|
||||
void ThiStateWrite(ThiRegisters& state, u32 state_offset, const std::vector<u32>& arguments);
|
||||
|
||||
GPU& gpu;
|
||||
std::shared_ptr<Tegra::Nvdec> nvdec_processor;
|
||||
std::unique_ptr<Tegra::Vic> vic_processor;
|
||||
std::unique_ptr<Tegra::Host1x> host1x_processor;
|
||||
std::unique_ptr<SyncptIncrManager> sync_manager;
|
||||
ChClassId current_class{};
|
||||
ThiRegisters vic_thi_state{};
|
||||
ThiRegisters nvdec_thi_state{};
|
||||
|
||||
s32 count{};
|
||||
s32 offset{};
|
||||
s32 mask{};
|
||||
bool incrementing{};
|
||||
|
||||
// Queue of command lists to be processed
|
||||
std::queue<ChCommandHeaderList> cdma_queue;
|
||||
};
|
||||
|
||||
} // namespace Tegra
|
129
src/video_core/command_classes/codecs/codec.cpp
Normal file
129
src/video_core/command_classes/codecs/codec.cpp
Normal file
@ -0,0 +1,129 @@
|
||||
// Copyright 2020 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <cstring>
|
||||
#include <fstream>
|
||||
#include <vector>
|
||||
#include "common/assert.h"
|
||||
#include "video_core/command_classes/codecs/codec.h"
|
||||
#include "video_core/command_classes/codecs/h264.h"
|
||||
#include "video_core/command_classes/codecs/vp9.h"
|
||||
#include "video_core/gpu.h"
|
||||
#include "video_core/memory_manager.h"
|
||||
|
||||
extern "C" {
|
||||
#include <libavutil/opt.h>
|
||||
}
|
||||
|
||||
namespace Tegra {
|
||||
|
||||
void AVFrameDeleter(AVFrame* ptr) {
|
||||
av_frame_unref(ptr);
|
||||
av_free(ptr);
|
||||
}
|
||||
|
||||
Codec::Codec(GPU& gpu_)
|
||||
: gpu(gpu_), h264_decoder(std::make_unique<Decoder::H264>(gpu)),
|
||||
vp9_decoder(std::make_unique<Decoder::VP9>(gpu)) {}
|
||||
|
||||
Codec::~Codec() {
|
||||
if (!initialized) {
|
||||
return;
|
||||
}
|
||||
// Free libav memory
|
||||
AVFrame* av_frame{nullptr};
|
||||
avcodec_send_packet(av_codec_ctx, nullptr);
|
||||
av_frame = av_frame_alloc();
|
||||
avcodec_receive_frame(av_codec_ctx, av_frame);
|
||||
avcodec_flush_buffers(av_codec_ctx);
|
||||
|
||||
av_frame_unref(av_frame);
|
||||
av_free(av_frame);
|
||||
avcodec_close(av_codec_ctx);
|
||||
}
|
||||
|
||||
void Codec::SetTargetCodec(NvdecCommon::VideoCodec codec) {
|
||||
LOG_INFO(Service_NVDRV, "NVDEC video codec initialized to {}", codec);
|
||||
current_codec = codec;
|
||||
}
|
||||
|
||||
void Codec::StateWrite(u32 offset, u64 arguments) {
|
||||
u8* const state_offset = reinterpret_cast<u8*>(&state) + offset * sizeof(u64);
|
||||
std::memcpy(state_offset, &arguments, sizeof(u64));
|
||||
}
|
||||
|
||||
void Codec::Decode() {
|
||||
bool is_first_frame = false;
|
||||
|
||||
if (!initialized) {
|
||||
if (current_codec == NvdecCommon::VideoCodec::H264) {
|
||||
av_codec = avcodec_find_decoder(AV_CODEC_ID_H264);
|
||||
} else if (current_codec == NvdecCommon::VideoCodec::Vp9) {
|
||||
av_codec = avcodec_find_decoder(AV_CODEC_ID_VP9);
|
||||
} else {
|
||||
LOG_ERROR(Service_NVDRV, "Unknown video codec {}", current_codec);
|
||||
return;
|
||||
}
|
||||
|
||||
av_codec_ctx = avcodec_alloc_context3(av_codec);
|
||||
av_opt_set(av_codec_ctx->priv_data, "tune", "zerolatency", 0);
|
||||
|
||||
// TODO(ameerj): libavcodec gpu hw acceleration
|
||||
|
||||
const auto av_error = avcodec_open2(av_codec_ctx, av_codec, nullptr);
|
||||
if (av_error < 0) {
|
||||
LOG_ERROR(Service_NVDRV, "avcodec_open2() Failed.");
|
||||
avcodec_close(av_codec_ctx);
|
||||
return;
|
||||
}
|
||||
initialized = true;
|
||||
is_first_frame = true;
|
||||
}
|
||||
bool vp9_hidden_frame = false;
|
||||
|
||||
AVPacket packet{};
|
||||
av_init_packet(&packet);
|
||||
std::vector<u8> frame_data;
|
||||
|
||||
if (current_codec == NvdecCommon::VideoCodec::H264) {
|
||||
frame_data = h264_decoder->ComposeFrameHeader(state, is_first_frame);
|
||||
} else if (current_codec == NvdecCommon::VideoCodec::Vp9) {
|
||||
frame_data = vp9_decoder->ComposeFrameHeader(state);
|
||||
vp9_hidden_frame = vp9_decoder->WasFrameHidden();
|
||||
}
|
||||
|
||||
packet.data = frame_data.data();
|
||||
packet.size = static_cast<int>(frame_data.size());
|
||||
|
||||
avcodec_send_packet(av_codec_ctx, &packet);
|
||||
|
||||
if (!vp9_hidden_frame) {
|
||||
// Only receive/store visible frames
|
||||
AVFramePtr frame = AVFramePtr{av_frame_alloc(), AVFrameDeleter};
|
||||
avcodec_receive_frame(av_codec_ctx, frame.get());
|
||||
av_frames.push(std::move(frame));
|
||||
// Limit queue to 10 frames. Workaround for ZLA decode and queue spam
|
||||
if (av_frames.size() > 10) {
|
||||
av_frames.pop();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
AVFramePtr Codec::GetCurrentFrame() {
|
||||
// Sometimes VIC will request more frames than have been decoded.
|
||||
// in this case, return a nullptr and don't overwrite previous frame data
|
||||
if (av_frames.empty()) {
|
||||
return AVFramePtr{nullptr, AVFrameDeleter};
|
||||
}
|
||||
|
||||
AVFramePtr frame = std::move(av_frames.front());
|
||||
av_frames.pop();
|
||||
return frame;
|
||||
}
|
||||
|
||||
NvdecCommon::VideoCodec Codec::GetCurrentCodec() const {
|
||||
return current_codec;
|
||||
}
|
||||
|
||||
} // namespace Tegra
|
70
src/video_core/command_classes/codecs/codec.h
Normal file
70
src/video_core/command_classes/codecs/codec.h
Normal file
@ -0,0 +1,70 @@
|
||||
// Copyright 2020 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <queue>
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/command_classes/nvdec_common.h"
|
||||
|
||||
extern "C" {
|
||||
#if defined(__GNUC__) || defined(__clang__)
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wconversion"
|
||||
#endif
|
||||
#include <libavcodec/avcodec.h>
|
||||
#if defined(__GNUC__) || defined(__clang__)
|
||||
#pragma GCC diagnostic pop
|
||||
#endif
|
||||
}
|
||||
|
||||
namespace Tegra {
|
||||
class GPU;
|
||||
struct VicRegisters;
|
||||
|
||||
void AVFrameDeleter(AVFrame* ptr);
|
||||
using AVFramePtr = std::unique_ptr<AVFrame, decltype(&AVFrameDeleter)>;
|
||||
|
||||
namespace Decoder {
|
||||
class H264;
|
||||
class VP9;
|
||||
} // namespace Decoder
|
||||
|
||||
class Codec {
|
||||
public:
|
||||
explicit Codec(GPU& gpu);
|
||||
~Codec();
|
||||
|
||||
/// Sets NVDEC video stream codec
|
||||
void SetTargetCodec(NvdecCommon::VideoCodec codec);
|
||||
|
||||
/// Populate NvdecRegisters state with argument value at the provided offset
|
||||
void StateWrite(u32 offset, u64 arguments);
|
||||
|
||||
/// Call decoders to construct headers, decode AVFrame with ffmpeg
|
||||
void Decode();
|
||||
|
||||
/// Returns next decoded frame
|
||||
[[nodiscard]] AVFramePtr GetCurrentFrame();
|
||||
|
||||
/// Returns the value of current_codec
|
||||
[[nodiscard]] NvdecCommon::VideoCodec GetCurrentCodec() const;
|
||||
|
||||
private:
|
||||
bool initialized{};
|
||||
NvdecCommon::VideoCodec current_codec{NvdecCommon::VideoCodec::None};
|
||||
|
||||
AVCodec* av_codec{nullptr};
|
||||
AVCodecContext* av_codec_ctx{nullptr};
|
||||
|
||||
GPU& gpu;
|
||||
std::unique_ptr<Decoder::H264> h264_decoder;
|
||||
std::unique_ptr<Decoder::VP9> vp9_decoder;
|
||||
|
||||
NvdecCommon::NvdecRegisters state{};
|
||||
std::queue<AVFramePtr> av_frames{};
|
||||
};
|
||||
|
||||
} // namespace Tegra
|
293
src/video_core/command_classes/codecs/h264.cpp
Normal file
293
src/video_core/command_classes/codecs/h264.cpp
Normal file
@ -0,0 +1,293 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) Ryujinx Team and Contributors
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
|
||||
// associated documentation files (the "Software"), to deal in the Software without restriction,
|
||||
// including without limitation the rights to use, copy, modify, merge, publish, distribute,
|
||||
// sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all copies or
|
||||
// substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
|
||||
// NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
|
||||
// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
//
|
||||
|
||||
#include <array>
|
||||
#include "common/bit_util.h"
|
||||
#include "video_core/command_classes/codecs/h264.h"
|
||||
#include "video_core/gpu.h"
|
||||
#include "video_core/memory_manager.h"
|
||||
|
||||
namespace Tegra::Decoder {
|
||||
namespace {
|
||||
// ZigZag LUTs from libavcodec.
|
||||
constexpr std::array<u8, 64> zig_zag_direct{
|
||||
0, 1, 8, 16, 9, 2, 3, 10, 17, 24, 32, 25, 18, 11, 4, 5, 12, 19, 26, 33, 40, 48,
|
||||
41, 34, 27, 20, 13, 6, 7, 14, 21, 28, 35, 42, 49, 56, 57, 50, 43, 36, 29, 22, 15, 23,
|
||||
30, 37, 44, 51, 58, 59, 52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63,
|
||||
};
|
||||
|
||||
constexpr std::array<u8, 16> zig_zag_scan{
|
||||
0 + 0 * 4, 1 + 0 * 4, 0 + 1 * 4, 0 + 2 * 4, 1 + 1 * 4, 2 + 0 * 4, 3 + 0 * 4, 2 + 1 * 4,
|
||||
1 + 2 * 4, 0 + 3 * 4, 1 + 3 * 4, 2 + 2 * 4, 3 + 1 * 4, 3 + 2 * 4, 2 + 3 * 4, 3 + 3 * 4,
|
||||
};
|
||||
} // Anonymous namespace
|
||||
|
||||
H264::H264(GPU& gpu_) : gpu(gpu_) {}
|
||||
|
||||
H264::~H264() = default;
|
||||
|
||||
const std::vector<u8>& H264::ComposeFrameHeader(const NvdecCommon::NvdecRegisters& state,
|
||||
bool is_first_frame) {
|
||||
H264DecoderContext context{};
|
||||
gpu.MemoryManager().ReadBlock(state.picture_info_offset, &context, sizeof(H264DecoderContext));
|
||||
|
||||
const s32 frame_number = static_cast<s32>((context.h264_parameter_set.flags >> 46) & 0x1ffff);
|
||||
if (!is_first_frame && frame_number != 0) {
|
||||
frame.resize(context.frame_data_size);
|
||||
|
||||
gpu.MemoryManager().ReadBlock(state.frame_bitstream_offset, frame.data(), frame.size());
|
||||
} else {
|
||||
/// Encode header
|
||||
H264BitWriter writer{};
|
||||
writer.WriteU(1, 24);
|
||||
writer.WriteU(0, 1);
|
||||
writer.WriteU(3, 2);
|
||||
writer.WriteU(7, 5);
|
||||
writer.WriteU(100, 8);
|
||||
writer.WriteU(0, 8);
|
||||
writer.WriteU(31, 8);
|
||||
writer.WriteUe(0);
|
||||
const auto chroma_format_idc =
|
||||
static_cast<u32>((context.h264_parameter_set.flags >> 12) & 3);
|
||||
writer.WriteUe(chroma_format_idc);
|
||||
if (chroma_format_idc == 3) {
|
||||
writer.WriteBit(false);
|
||||
}
|
||||
|
||||
writer.WriteUe(0);
|
||||
writer.WriteUe(0);
|
||||
writer.WriteBit(false); // QpprimeYZeroTransformBypassFlag
|
||||
writer.WriteBit(false); // Scaling matrix present flag
|
||||
|
||||
const auto order_cnt_type = static_cast<u32>((context.h264_parameter_set.flags >> 14) & 3);
|
||||
writer.WriteUe(static_cast<u32>((context.h264_parameter_set.flags >> 8) & 0xf));
|
||||
writer.WriteUe(order_cnt_type);
|
||||
if (order_cnt_type == 0) {
|
||||
writer.WriteUe(context.h264_parameter_set.log2_max_pic_order_cnt);
|
||||
} else if (order_cnt_type == 1) {
|
||||
writer.WriteBit(context.h264_parameter_set.delta_pic_order_always_zero_flag != 0);
|
||||
|
||||
writer.WriteSe(0);
|
||||
writer.WriteSe(0);
|
||||
writer.WriteUe(0);
|
||||
}
|
||||
|
||||
const s32 pic_height = context.h264_parameter_set.pic_height_in_map_units /
|
||||
(context.h264_parameter_set.frame_mbs_only_flag ? 1 : 2);
|
||||
|
||||
writer.WriteUe(16);
|
||||
writer.WriteBit(false);
|
||||
writer.WriteUe(context.h264_parameter_set.pic_width_in_mbs - 1);
|
||||
writer.WriteUe(pic_height - 1);
|
||||
writer.WriteBit(context.h264_parameter_set.frame_mbs_only_flag != 0);
|
||||
|
||||
if (!context.h264_parameter_set.frame_mbs_only_flag) {
|
||||
writer.WriteBit(((context.h264_parameter_set.flags >> 0) & 1) != 0);
|
||||
}
|
||||
|
||||
writer.WriteBit(((context.h264_parameter_set.flags >> 1) & 1) != 0);
|
||||
writer.WriteBit(false); // Frame cropping flag
|
||||
writer.WriteBit(false); // VUI parameter present flag
|
||||
|
||||
writer.End();
|
||||
|
||||
// H264 PPS
|
||||
writer.WriteU(1, 24);
|
||||
writer.WriteU(0, 1);
|
||||
writer.WriteU(3, 2);
|
||||
writer.WriteU(8, 5);
|
||||
|
||||
writer.WriteUe(0);
|
||||
writer.WriteUe(0);
|
||||
|
||||
writer.WriteBit(context.h264_parameter_set.entropy_coding_mode_flag != 0);
|
||||
writer.WriteBit(false);
|
||||
writer.WriteUe(0);
|
||||
writer.WriteUe(context.h264_parameter_set.num_refidx_l0_default_active);
|
||||
writer.WriteUe(context.h264_parameter_set.num_refidx_l1_default_active);
|
||||
writer.WriteBit(((context.h264_parameter_set.flags >> 2) & 1) != 0);
|
||||
writer.WriteU(static_cast<s32>((context.h264_parameter_set.flags >> 32) & 0x3), 2);
|
||||
s32 pic_init_qp = static_cast<s32>((context.h264_parameter_set.flags >> 16) & 0x3f);
|
||||
pic_init_qp = (pic_init_qp << 26) >> 26;
|
||||
writer.WriteSe(pic_init_qp);
|
||||
writer.WriteSe(0);
|
||||
s32 chroma_qp_index_offset =
|
||||
static_cast<s32>((context.h264_parameter_set.flags >> 22) & 0x1f);
|
||||
chroma_qp_index_offset = (chroma_qp_index_offset << 27) >> 27;
|
||||
|
||||
writer.WriteSe(chroma_qp_index_offset);
|
||||
writer.WriteBit(context.h264_parameter_set.deblocking_filter_control_flag != 0);
|
||||
writer.WriteBit(((context.h264_parameter_set.flags >> 3) & 1) != 0);
|
||||
writer.WriteBit(context.h264_parameter_set.redundant_pic_count_flag != 0);
|
||||
writer.WriteBit(context.h264_parameter_set.transform_8x8_mode_flag != 0);
|
||||
|
||||
writer.WriteBit(true);
|
||||
|
||||
for (s32 index = 0; index < 6; index++) {
|
||||
writer.WriteBit(true);
|
||||
const auto matrix_x4 =
|
||||
std::vector<u8>(context.scaling_matrix_4.begin(), context.scaling_matrix_4.end());
|
||||
writer.WriteScalingList(matrix_x4, index * 16, 16);
|
||||
}
|
||||
|
||||
if (context.h264_parameter_set.transform_8x8_mode_flag) {
|
||||
for (s32 index = 0; index < 2; index++) {
|
||||
writer.WriteBit(true);
|
||||
const auto matrix_x8 = std::vector<u8>(context.scaling_matrix_8.begin(),
|
||||
context.scaling_matrix_8.end());
|
||||
|
||||
writer.WriteScalingList(matrix_x8, index * 64, 64);
|
||||
}
|
||||
}
|
||||
|
||||
s32 chroma_qp_index_offset2 =
|
||||
static_cast<s32>((context.h264_parameter_set.flags >> 27) & 0x1f);
|
||||
chroma_qp_index_offset2 = (chroma_qp_index_offset2 << 27) >> 27;
|
||||
|
||||
writer.WriteSe(chroma_qp_index_offset2);
|
||||
|
||||
writer.End();
|
||||
|
||||
const auto& encoded_header = writer.GetByteArray();
|
||||
frame.resize(encoded_header.size() + context.frame_data_size);
|
||||
std::memcpy(frame.data(), encoded_header.data(), encoded_header.size());
|
||||
|
||||
gpu.MemoryManager().ReadBlock(state.frame_bitstream_offset,
|
||||
frame.data() + encoded_header.size(),
|
||||
context.frame_data_size);
|
||||
}
|
||||
|
||||
return frame;
|
||||
}
|
||||
|
||||
H264BitWriter::H264BitWriter() = default;
|
||||
|
||||
H264BitWriter::~H264BitWriter() = default;
|
||||
|
||||
void H264BitWriter::WriteU(s32 value, s32 value_sz) {
|
||||
WriteBits(value, value_sz);
|
||||
}
|
||||
|
||||
void H264BitWriter::WriteSe(s32 value) {
|
||||
WriteExpGolombCodedInt(value);
|
||||
}
|
||||
|
||||
void H264BitWriter::WriteUe(u32 value) {
|
||||
WriteExpGolombCodedUInt(value);
|
||||
}
|
||||
|
||||
void H264BitWriter::End() {
|
||||
WriteBit(true);
|
||||
Flush();
|
||||
}
|
||||
|
||||
void H264BitWriter::WriteBit(bool state) {
|
||||
WriteBits(state ? 1 : 0, 1);
|
||||
}
|
||||
|
||||
void H264BitWriter::WriteScalingList(const std::vector<u8>& list, s32 start, s32 count) {
|
||||
std::vector<u8> scan(count);
|
||||
if (count == 16) {
|
||||
std::memcpy(scan.data(), zig_zag_scan.data(), scan.size());
|
||||
} else {
|
||||
std::memcpy(scan.data(), zig_zag_direct.data(), scan.size());
|
||||
}
|
||||
u8 last_scale = 8;
|
||||
|
||||
for (s32 index = 0; index < count; index++) {
|
||||
const u8 value = list[start + scan[index]];
|
||||
const s32 delta_scale = static_cast<s32>(value - last_scale);
|
||||
|
||||
WriteSe(delta_scale);
|
||||
|
||||
last_scale = value;
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<u8>& H264BitWriter::GetByteArray() {
|
||||
return byte_array;
|
||||
}
|
||||
|
||||
const std::vector<u8>& H264BitWriter::GetByteArray() const {
|
||||
return byte_array;
|
||||
}
|
||||
|
||||
void H264BitWriter::WriteBits(s32 value, s32 bit_count) {
|
||||
s32 value_pos = 0;
|
||||
|
||||
s32 remaining = bit_count;
|
||||
|
||||
while (remaining > 0) {
|
||||
s32 copy_size = remaining;
|
||||
|
||||
const s32 free_bits = GetFreeBufferBits();
|
||||
|
||||
if (copy_size > free_bits) {
|
||||
copy_size = free_bits;
|
||||
}
|
||||
|
||||
const s32 mask = (1 << copy_size) - 1;
|
||||
|
||||
const s32 src_shift = (bit_count - value_pos) - copy_size;
|
||||
const s32 dst_shift = (buffer_size - buffer_pos) - copy_size;
|
||||
|
||||
buffer |= ((value >> src_shift) & mask) << dst_shift;
|
||||
|
||||
value_pos += copy_size;
|
||||
buffer_pos += copy_size;
|
||||
remaining -= copy_size;
|
||||
}
|
||||
}
|
||||
|
||||
void H264BitWriter::WriteExpGolombCodedInt(s32 value) {
|
||||
const s32 sign = value <= 0 ? 0 : 1;
|
||||
if (value < 0) {
|
||||
value = -value;
|
||||
}
|
||||
value = (value << 1) - sign;
|
||||
WriteExpGolombCodedUInt(value);
|
||||
}
|
||||
|
||||
void H264BitWriter::WriteExpGolombCodedUInt(u32 value) {
|
||||
const s32 size = 32 - Common::CountLeadingZeroes32(static_cast<s32>(value + 1));
|
||||
WriteBits(1, size);
|
||||
|
||||
value -= (1U << (size - 1)) - 1;
|
||||
WriteBits(static_cast<s32>(value), size - 1);
|
||||
}
|
||||
|
||||
s32 H264BitWriter::GetFreeBufferBits() {
|
||||
if (buffer_pos == buffer_size) {
|
||||
Flush();
|
||||
}
|
||||
|
||||
return buffer_size - buffer_pos;
|
||||
}
|
||||
|
||||
void H264BitWriter::Flush() {
|
||||
if (buffer_pos == 0) {
|
||||
return;
|
||||
}
|
||||
byte_array.push_back(static_cast<u8>(buffer));
|
||||
|
||||
buffer = 0;
|
||||
buffer_pos = 0;
|
||||
}
|
||||
} // namespace Tegra::Decoder
|
118
src/video_core/command_classes/codecs/h264.h
Normal file
118
src/video_core/command_classes/codecs/h264.h
Normal file
@ -0,0 +1,118 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) Ryujinx Team and Contributors
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
|
||||
// associated documentation files (the "Software"), to deal in the Software without restriction,
|
||||
// including without limitation the rights to use, copy, modify, merge, publish, distribute,
|
||||
// sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all copies or
|
||||
// substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
|
||||
// NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
|
||||
// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
#include "common/common_funcs.h"
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/command_classes/nvdec_common.h"
|
||||
|
||||
namespace Tegra {
|
||||
class GPU;
|
||||
namespace Decoder {
|
||||
|
||||
class H264BitWriter {
|
||||
public:
|
||||
H264BitWriter();
|
||||
~H264BitWriter();
|
||||
|
||||
/// The following Write methods are based on clause 9.1 in the H.264 specification.
|
||||
/// WriteSe and WriteUe write in the Exp-Golomb-coded syntax
|
||||
void WriteU(s32 value, s32 value_sz);
|
||||
void WriteSe(s32 value);
|
||||
void WriteUe(u32 value);
|
||||
|
||||
/// Finalize the bitstream
|
||||
void End();
|
||||
|
||||
/// append a bit to the stream, equivalent value to the state parameter
|
||||
void WriteBit(bool state);
|
||||
|
||||
/// Based on section 7.3.2.1.1.1 and Table 7-4 in the H.264 specification
|
||||
/// Writes the scaling matrices of the sream
|
||||
void WriteScalingList(const std::vector<u8>& list, s32 start, s32 count);
|
||||
|
||||
/// Return the bitstream as a vector.
|
||||
[[nodiscard]] std::vector<u8>& GetByteArray();
|
||||
[[nodiscard]] const std::vector<u8>& GetByteArray() const;
|
||||
|
||||
private:
|
||||
void WriteBits(s32 value, s32 bit_count);
|
||||
void WriteExpGolombCodedInt(s32 value);
|
||||
void WriteExpGolombCodedUInt(u32 value);
|
||||
[[nodiscard]] s32 GetFreeBufferBits();
|
||||
void Flush();
|
||||
|
||||
s32 buffer_size{8};
|
||||
|
||||
s32 buffer{};
|
||||
s32 buffer_pos{};
|
||||
std::vector<u8> byte_array;
|
||||
};
|
||||
|
||||
class H264 {
|
||||
public:
|
||||
explicit H264(GPU& gpu);
|
||||
~H264();
|
||||
|
||||
/// Compose the H264 header of the frame for FFmpeg decoding
|
||||
[[nodiscard]] const std::vector<u8>& ComposeFrameHeader(
|
||||
const NvdecCommon::NvdecRegisters& state, bool is_first_frame = false);
|
||||
|
||||
private:
|
||||
struct H264ParameterSet {
|
||||
u32 log2_max_pic_order_cnt{};
|
||||
u32 delta_pic_order_always_zero_flag{};
|
||||
u32 frame_mbs_only_flag{};
|
||||
u32 pic_width_in_mbs{};
|
||||
u32 pic_height_in_map_units{};
|
||||
INSERT_PADDING_WORDS(1);
|
||||
u32 entropy_coding_mode_flag{};
|
||||
u32 bottom_field_pic_order_flag{};
|
||||
u32 num_refidx_l0_default_active{};
|
||||
u32 num_refidx_l1_default_active{};
|
||||
u32 deblocking_filter_control_flag{};
|
||||
u32 redundant_pic_count_flag{};
|
||||
u32 transform_8x8_mode_flag{};
|
||||
INSERT_PADDING_WORDS(9);
|
||||
u64 flags{};
|
||||
u32 frame_number{};
|
||||
u32 frame_number2{};
|
||||
};
|
||||
static_assert(sizeof(H264ParameterSet) == 0x68, "H264ParameterSet is an invalid size");
|
||||
|
||||
struct H264DecoderContext {
|
||||
INSERT_PADDING_BYTES(0x48);
|
||||
u32 frame_data_size{};
|
||||
INSERT_PADDING_BYTES(0xc);
|
||||
H264ParameterSet h264_parameter_set{};
|
||||
INSERT_PADDING_BYTES(0x100);
|
||||
std::array<u8, 0x60> scaling_matrix_4;
|
||||
std::array<u8, 0x80> scaling_matrix_8;
|
||||
};
|
||||
static_assert(sizeof(H264DecoderContext) == 0x2a0, "H264DecoderContext is an invalid size");
|
||||
|
||||
std::vector<u8> frame;
|
||||
GPU& gpu;
|
||||
};
|
||||
|
||||
} // namespace Decoder
|
||||
} // namespace Tegra
|
989
src/video_core/command_classes/codecs/vp9.cpp
Normal file
989
src/video_core/command_classes/codecs/vp9.cpp
Normal file
@ -0,0 +1,989 @@
|
||||
// Copyright 2020 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <cstring> // for std::memcpy
|
||||
#include <numeric>
|
||||
#include "video_core/command_classes/codecs/vp9.h"
|
||||
#include "video_core/gpu.h"
|
||||
#include "video_core/memory_manager.h"
|
||||
|
||||
namespace Tegra::Decoder {
|
||||
namespace {
|
||||
// Default compressed header probabilities once frame context resets
|
||||
constexpr Vp9EntropyProbs default_probs{
|
||||
.y_mode_prob{
|
||||
65, 32, 18, 144, 162, 194, 41, 51, 98, 132, 68, 18, 165, 217, 196, 45, 40, 78,
|
||||
173, 80, 19, 176, 240, 193, 64, 35, 46, 221, 135, 38, 194, 248, 121, 96, 85, 29,
|
||||
},
|
||||
.partition_prob{
|
||||
199, 122, 141, 0, 147, 63, 159, 0, 148, 133, 118, 0, 121, 104, 114, 0,
|
||||
174, 73, 87, 0, 92, 41, 83, 0, 82, 99, 50, 0, 53, 39, 39, 0,
|
||||
177, 58, 59, 0, 68, 26, 63, 0, 52, 79, 25, 0, 17, 14, 12, 0,
|
||||
222, 34, 30, 0, 72, 16, 44, 0, 58, 32, 12, 0, 10, 7, 6, 0,
|
||||
},
|
||||
.coef_probs{
|
||||
195, 29, 183, 84, 49, 136, 8, 42, 71, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
31, 107, 169, 35, 99, 159, 17, 82, 140, 8, 66, 114, 2, 44, 76, 1, 19, 32,
|
||||
40, 132, 201, 29, 114, 187, 13, 91, 157, 7, 75, 127, 3, 58, 95, 1, 28, 47,
|
||||
69, 142, 221, 42, 122, 201, 15, 91, 159, 6, 67, 121, 1, 42, 77, 1, 17, 31,
|
||||
102, 148, 228, 67, 117, 204, 17, 82, 154, 6, 59, 114, 2, 39, 75, 1, 15, 29,
|
||||
156, 57, 233, 119, 57, 212, 58, 48, 163, 29, 40, 124, 12, 30, 81, 3, 12, 31,
|
||||
191, 107, 226, 124, 117, 204, 25, 99, 155, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
29, 148, 210, 37, 126, 194, 8, 93, 157, 2, 68, 118, 1, 39, 69, 1, 17, 33,
|
||||
41, 151, 213, 27, 123, 193, 3, 82, 144, 1, 58, 105, 1, 32, 60, 1, 13, 26,
|
||||
59, 159, 220, 23, 126, 198, 4, 88, 151, 1, 66, 114, 1, 38, 71, 1, 18, 34,
|
||||
114, 136, 232, 51, 114, 207, 11, 83, 155, 3, 56, 105, 1, 33, 65, 1, 17, 34,
|
||||
149, 65, 234, 121, 57, 215, 61, 49, 166, 28, 36, 114, 12, 25, 76, 3, 16, 42,
|
||||
214, 49, 220, 132, 63, 188, 42, 65, 137, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
85, 137, 221, 104, 131, 216, 49, 111, 192, 21, 87, 155, 2, 49, 87, 1, 16, 28,
|
||||
89, 163, 230, 90, 137, 220, 29, 100, 183, 10, 70, 135, 2, 42, 81, 1, 17, 33,
|
||||
108, 167, 237, 55, 133, 222, 15, 97, 179, 4, 72, 135, 1, 45, 85, 1, 19, 38,
|
||||
124, 146, 240, 66, 124, 224, 17, 88, 175, 4, 58, 122, 1, 36, 75, 1, 18, 37,
|
||||
141, 79, 241, 126, 70, 227, 66, 58, 182, 30, 44, 136, 12, 34, 96, 2, 20, 47,
|
||||
229, 99, 249, 143, 111, 235, 46, 109, 192, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
82, 158, 236, 94, 146, 224, 25, 117, 191, 9, 87, 149, 3, 56, 99, 1, 33, 57,
|
||||
83, 167, 237, 68, 145, 222, 10, 103, 177, 2, 72, 131, 1, 41, 79, 1, 20, 39,
|
||||
99, 167, 239, 47, 141, 224, 10, 104, 178, 2, 73, 133, 1, 44, 85, 1, 22, 47,
|
||||
127, 145, 243, 71, 129, 228, 17, 93, 177, 3, 61, 124, 1, 41, 84, 1, 21, 52,
|
||||
157, 78, 244, 140, 72, 231, 69, 58, 184, 31, 44, 137, 14, 38, 105, 8, 23, 61,
|
||||
125, 34, 187, 52, 41, 133, 6, 31, 56, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
37, 109, 153, 51, 102, 147, 23, 87, 128, 8, 67, 101, 1, 41, 63, 1, 19, 29,
|
||||
31, 154, 185, 17, 127, 175, 6, 96, 145, 2, 73, 114, 1, 51, 82, 1, 28, 45,
|
||||
23, 163, 200, 10, 131, 185, 2, 93, 148, 1, 67, 111, 1, 41, 69, 1, 14, 24,
|
||||
29, 176, 217, 12, 145, 201, 3, 101, 156, 1, 69, 111, 1, 39, 63, 1, 14, 23,
|
||||
57, 192, 233, 25, 154, 215, 6, 109, 167, 3, 78, 118, 1, 48, 69, 1, 21, 29,
|
||||
202, 105, 245, 108, 106, 216, 18, 90, 144, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
33, 172, 219, 64, 149, 206, 14, 117, 177, 5, 90, 141, 2, 61, 95, 1, 37, 57,
|
||||
33, 179, 220, 11, 140, 198, 1, 89, 148, 1, 60, 104, 1, 33, 57, 1, 12, 21,
|
||||
30, 181, 221, 8, 141, 198, 1, 87, 145, 1, 58, 100, 1, 31, 55, 1, 12, 20,
|
||||
32, 186, 224, 7, 142, 198, 1, 86, 143, 1, 58, 100, 1, 31, 55, 1, 12, 22,
|
||||
57, 192, 227, 20, 143, 204, 3, 96, 154, 1, 68, 112, 1, 42, 69, 1, 19, 32,
|
||||
212, 35, 215, 113, 47, 169, 29, 48, 105, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
74, 129, 203, 106, 120, 203, 49, 107, 178, 19, 84, 144, 4, 50, 84, 1, 15, 25,
|
||||
71, 172, 217, 44, 141, 209, 15, 102, 173, 6, 76, 133, 2, 51, 89, 1, 24, 42,
|
||||
64, 185, 231, 31, 148, 216, 8, 103, 175, 3, 74, 131, 1, 46, 81, 1, 18, 30,
|
||||
65, 196, 235, 25, 157, 221, 5, 105, 174, 1, 67, 120, 1, 38, 69, 1, 15, 30,
|
||||
65, 204, 238, 30, 156, 224, 7, 107, 177, 2, 70, 124, 1, 42, 73, 1, 18, 34,
|
||||
225, 86, 251, 144, 104, 235, 42, 99, 181, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
85, 175, 239, 112, 165, 229, 29, 136, 200, 12, 103, 162, 6, 77, 123, 2, 53, 84,
|
||||
75, 183, 239, 30, 155, 221, 3, 106, 171, 1, 74, 128, 1, 44, 76, 1, 17, 28,
|
||||
73, 185, 240, 27, 159, 222, 2, 107, 172, 1, 75, 127, 1, 42, 73, 1, 17, 29,
|
||||
62, 190, 238, 21, 159, 222, 2, 107, 172, 1, 72, 122, 1, 40, 71, 1, 18, 32,
|
||||
61, 199, 240, 27, 161, 226, 4, 113, 180, 1, 76, 129, 1, 46, 80, 1, 23, 41,
|
||||
7, 27, 153, 5, 30, 95, 1, 16, 30, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
50, 75, 127, 57, 75, 124, 27, 67, 108, 10, 54, 86, 1, 33, 52, 1, 12, 18,
|
||||
43, 125, 151, 26, 108, 148, 7, 83, 122, 2, 59, 89, 1, 38, 60, 1, 17, 27,
|
||||
23, 144, 163, 13, 112, 154, 2, 75, 117, 1, 50, 81, 1, 31, 51, 1, 14, 23,
|
||||
18, 162, 185, 6, 123, 171, 1, 78, 125, 1, 51, 86, 1, 31, 54, 1, 14, 23,
|
||||
15, 199, 227, 3, 150, 204, 1, 91, 146, 1, 55, 95, 1, 30, 53, 1, 11, 20,
|
||||
19, 55, 240, 19, 59, 196, 3, 52, 105, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
41, 166, 207, 104, 153, 199, 31, 123, 181, 14, 101, 152, 5, 72, 106, 1, 36, 52,
|
||||
35, 176, 211, 12, 131, 190, 2, 88, 144, 1, 60, 101, 1, 36, 60, 1, 16, 28,
|
||||
28, 183, 213, 8, 134, 191, 1, 86, 142, 1, 56, 96, 1, 30, 53, 1, 12, 20,
|
||||
20, 190, 215, 4, 135, 192, 1, 84, 139, 1, 53, 91, 1, 28, 49, 1, 11, 20,
|
||||
13, 196, 216, 2, 137, 192, 1, 86, 143, 1, 57, 99, 1, 32, 56, 1, 13, 24,
|
||||
211, 29, 217, 96, 47, 156, 22, 43, 87, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
78, 120, 193, 111, 116, 186, 46, 102, 164, 15, 80, 128, 2, 49, 76, 1, 18, 28,
|
||||
71, 161, 203, 42, 132, 192, 10, 98, 150, 3, 69, 109, 1, 44, 70, 1, 18, 29,
|
||||
57, 186, 211, 30, 140, 196, 4, 93, 146, 1, 62, 102, 1, 38, 65, 1, 16, 27,
|
||||
47, 199, 217, 14, 145, 196, 1, 88, 142, 1, 57, 98, 1, 36, 62, 1, 15, 26,
|
||||
26, 219, 229, 5, 155, 207, 1, 94, 151, 1, 60, 104, 1, 36, 62, 1, 16, 28,
|
||||
233, 29, 248, 146, 47, 220, 43, 52, 140, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
100, 163, 232, 179, 161, 222, 63, 142, 204, 37, 113, 174, 26, 89, 137, 18, 68, 97,
|
||||
85, 181, 230, 32, 146, 209, 7, 100, 164, 3, 71, 121, 1, 45, 77, 1, 18, 30,
|
||||
65, 187, 230, 20, 148, 207, 2, 97, 159, 1, 68, 116, 1, 40, 70, 1, 14, 29,
|
||||
40, 194, 227, 8, 147, 204, 1, 94, 155, 1, 65, 112, 1, 39, 66, 1, 14, 26,
|
||||
16, 208, 228, 3, 151, 207, 1, 98, 160, 1, 67, 117, 1, 41, 74, 1, 17, 31,
|
||||
17, 38, 140, 7, 34, 80, 1, 17, 29, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
37, 75, 128, 41, 76, 128, 26, 66, 116, 12, 52, 94, 2, 32, 55, 1, 10, 16,
|
||||
50, 127, 154, 37, 109, 152, 16, 82, 121, 5, 59, 85, 1, 35, 54, 1, 13, 20,
|
||||
40, 142, 167, 17, 110, 157, 2, 71, 112, 1, 44, 72, 1, 27, 45, 1, 11, 17,
|
||||
30, 175, 188, 9, 124, 169, 1, 74, 116, 1, 48, 78, 1, 30, 49, 1, 11, 18,
|
||||
10, 222, 223, 2, 150, 194, 1, 83, 128, 1, 48, 79, 1, 27, 45, 1, 11, 17,
|
||||
36, 41, 235, 29, 36, 193, 10, 27, 111, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
85, 165, 222, 177, 162, 215, 110, 135, 195, 57, 113, 168, 23, 83, 120, 10, 49, 61,
|
||||
85, 190, 223, 36, 139, 200, 5, 90, 146, 1, 60, 103, 1, 38, 65, 1, 18, 30,
|
||||
72, 202, 223, 23, 141, 199, 2, 86, 140, 1, 56, 97, 1, 36, 61, 1, 16, 27,
|
||||
55, 218, 225, 13, 145, 200, 1, 86, 141, 1, 57, 99, 1, 35, 61, 1, 13, 22,
|
||||
15, 235, 212, 1, 132, 184, 1, 84, 139, 1, 57, 97, 1, 34, 56, 1, 14, 23,
|
||||
181, 21, 201, 61, 37, 123, 10, 38, 71, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
47, 106, 172, 95, 104, 173, 42, 93, 159, 18, 77, 131, 4, 50, 81, 1, 17, 23,
|
||||
62, 147, 199, 44, 130, 189, 28, 102, 154, 18, 75, 115, 2, 44, 65, 1, 12, 19,
|
||||
55, 153, 210, 24, 130, 194, 3, 93, 146, 1, 61, 97, 1, 31, 50, 1, 10, 16,
|
||||
49, 186, 223, 17, 148, 204, 1, 96, 142, 1, 53, 83, 1, 26, 44, 1, 11, 17,
|
||||
13, 217, 212, 2, 136, 180, 1, 78, 124, 1, 50, 83, 1, 29, 49, 1, 14, 23,
|
||||
197, 13, 247, 82, 17, 222, 25, 17, 162, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
126, 186, 247, 234, 191, 243, 176, 177, 234, 104, 158, 220, 66, 128, 186, 55, 90, 137,
|
||||
111, 197, 242, 46, 158, 219, 9, 104, 171, 2, 65, 125, 1, 44, 80, 1, 17, 91,
|
||||
104, 208, 245, 39, 168, 224, 3, 109, 162, 1, 79, 124, 1, 50, 102, 1, 43, 102,
|
||||
84, 220, 246, 31, 177, 231, 2, 115, 180, 1, 79, 134, 1, 55, 77, 1, 60, 79,
|
||||
43, 243, 240, 8, 180, 217, 1, 115, 166, 1, 84, 121, 1, 51, 67, 1, 16, 6,
|
||||
},
|
||||
.switchable_interp_prob{235, 162, 36, 255, 34, 3, 149, 144},
|
||||
.inter_mode_prob{
|
||||
2, 173, 34, 0, 7, 145, 85, 0, 7, 166, 63, 0, 7, 94,
|
||||
66, 0, 8, 64, 46, 0, 17, 81, 31, 0, 25, 29, 30, 0,
|
||||
},
|
||||
.intra_inter_prob{9, 102, 187, 225},
|
||||
.comp_inter_prob{9, 102, 187, 225, 0},
|
||||
.single_ref_prob{33, 16, 77, 74, 142, 142, 172, 170, 238, 247},
|
||||
.comp_ref_prob{50, 126, 123, 221, 226},
|
||||
.tx_32x32_prob{3, 136, 37, 5, 52, 13},
|
||||
.tx_16x16_prob{20, 152, 15, 101},
|
||||
.tx_8x8_prob{100, 66},
|
||||
.skip_probs{192, 128, 64},
|
||||
.joints{32, 64, 96},
|
||||
.sign{128, 128},
|
||||
.classes{
|
||||
224, 144, 192, 168, 192, 176, 192, 198, 198, 245,
|
||||
216, 128, 176, 160, 176, 176, 192, 198, 198, 208,
|
||||
},
|
||||
.class_0{216, 208},
|
||||
.prob_bits{
|
||||
136, 140, 148, 160, 176, 192, 224, 234, 234, 240,
|
||||
136, 140, 148, 160, 176, 192, 224, 234, 234, 240,
|
||||
},
|
||||
.class_0_fr{128, 128, 64, 96, 112, 64, 128, 128, 64, 96, 112, 64},
|
||||
.fr{64, 96, 64, 64, 96, 64},
|
||||
.class_0_hp{160, 160},
|
||||
.high_precision{128, 128},
|
||||
};
|
||||
|
||||
constexpr std::array<s32, 256> norm_lut{
|
||||
0, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
};
|
||||
|
||||
constexpr std::array<s32, 254> map_lut{
|
||||
20, 21, 22, 23, 24, 25, 0, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37,
|
||||
1, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 2, 50, 51, 52, 53, 54,
|
||||
55, 56, 57, 58, 59, 60, 61, 3, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72,
|
||||
73, 4, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 5, 86, 87, 88, 89,
|
||||
90, 91, 92, 93, 94, 95, 96, 97, 6, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107,
|
||||
108, 109, 7, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 8, 122, 123, 124,
|
||||
125, 126, 127, 128, 129, 130, 131, 132, 133, 9, 134, 135, 136, 137, 138, 139, 140, 141, 142,
|
||||
143, 144, 145, 10, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 11, 158, 159,
|
||||
160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 12, 170, 171, 172, 173, 174, 175, 176, 177,
|
||||
178, 179, 180, 181, 13, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 14, 194,
|
||||
195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 15, 206, 207, 208, 209, 210, 211, 212,
|
||||
213, 214, 215, 216, 217, 16, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 17,
|
||||
230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 18, 242, 243, 244, 245, 246, 247,
|
||||
248, 249, 250, 251, 252, 253, 19,
|
||||
};
|
||||
|
||||
// 6.2.14 Tile size calculation
|
||||
|
||||
[[nodiscard]] s32 CalcMinLog2TileCols(s32 frame_width) {
|
||||
const s32 sb64_cols = (frame_width + 63) / 64;
|
||||
s32 min_log2 = 0;
|
||||
|
||||
while ((64 << min_log2) < sb64_cols) {
|
||||
min_log2++;
|
||||
}
|
||||
|
||||
return min_log2;
|
||||
}
|
||||
|
||||
[[nodiscard]] s32 CalcMaxLog2TileCols(s32 frame_width) {
|
||||
const s32 sb64_cols = (frame_width + 63) / 64;
|
||||
s32 max_log2 = 1;
|
||||
|
||||
while ((sb64_cols >> max_log2) >= 4) {
|
||||
max_log2++;
|
||||
}
|
||||
|
||||
return max_log2 - 1;
|
||||
}
|
||||
|
||||
// Recenters probability. Based on section 6.3.6 of VP9 Specification
|
||||
[[nodiscard]] s32 RecenterNonNeg(s32 new_prob, s32 old_prob) {
|
||||
if (new_prob > old_prob * 2) {
|
||||
return new_prob;
|
||||
}
|
||||
|
||||
if (new_prob >= old_prob) {
|
||||
return (new_prob - old_prob) * 2;
|
||||
}
|
||||
|
||||
return (old_prob - new_prob) * 2 - 1;
|
||||
}
|
||||
|
||||
// Adjusts old_prob depending on new_prob. Based on section 6.3.5 of VP9 Specification
|
||||
[[nodiscard]] s32 RemapProbability(s32 new_prob, s32 old_prob) {
|
||||
new_prob--;
|
||||
old_prob--;
|
||||
|
||||
std::size_t index{};
|
||||
|
||||
if (old_prob * 2 <= 0xff) {
|
||||
index = static_cast<std::size_t>(std::max(0, RecenterNonNeg(new_prob, old_prob) - 1));
|
||||
} else {
|
||||
index = static_cast<std::size_t>(
|
||||
std::max(0, RecenterNonNeg(0xff - 1 - new_prob, 0xff - 1 - old_prob) - 1));
|
||||
}
|
||||
|
||||
return map_lut[index];
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
VP9::VP9(GPU& gpu_) : gpu{gpu_} {}
|
||||
|
||||
VP9::~VP9() = default;
|
||||
|
||||
void VP9::WriteProbabilityUpdate(VpxRangeEncoder& writer, u8 new_prob, u8 old_prob) {
|
||||
const bool update = new_prob != old_prob;
|
||||
|
||||
writer.Write(update, diff_update_probability);
|
||||
|
||||
if (update) {
|
||||
WriteProbabilityDelta(writer, new_prob, old_prob);
|
||||
}
|
||||
}
|
||||
template <typename T, std::size_t N>
|
||||
void VP9::WriteProbabilityUpdate(VpxRangeEncoder& writer, const std::array<T, N>& new_prob,
|
||||
const std::array<T, N>& old_prob) {
|
||||
for (std::size_t offset = 0; offset < new_prob.size(); ++offset) {
|
||||
WriteProbabilityUpdate(writer, new_prob[offset], old_prob[offset]);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T, std::size_t N>
|
||||
void VP9::WriteProbabilityUpdateAligned4(VpxRangeEncoder& writer, const std::array<T, N>& new_prob,
|
||||
const std::array<T, N>& old_prob) {
|
||||
for (std::size_t offset = 0; offset < new_prob.size(); offset += 4) {
|
||||
WriteProbabilityUpdate(writer, new_prob[offset + 0], old_prob[offset + 0]);
|
||||
WriteProbabilityUpdate(writer, new_prob[offset + 1], old_prob[offset + 1]);
|
||||
WriteProbabilityUpdate(writer, new_prob[offset + 2], old_prob[offset + 2]);
|
||||
}
|
||||
}
|
||||
|
||||
void VP9::WriteProbabilityDelta(VpxRangeEncoder& writer, u8 new_prob, u8 old_prob) {
|
||||
const int delta = RemapProbability(new_prob, old_prob);
|
||||
|
||||
EncodeTermSubExp(writer, delta);
|
||||
}
|
||||
|
||||
void VP9::EncodeTermSubExp(VpxRangeEncoder& writer, s32 value) {
|
||||
if (WriteLessThan(writer, value, 16)) {
|
||||
writer.Write(value, 4);
|
||||
} else if (WriteLessThan(writer, value, 32)) {
|
||||
writer.Write(value - 16, 4);
|
||||
} else if (WriteLessThan(writer, value, 64)) {
|
||||
writer.Write(value - 32, 5);
|
||||
} else {
|
||||
value -= 64;
|
||||
|
||||
constexpr s32 size = 8;
|
||||
|
||||
const s32 mask = (1 << size) - 191;
|
||||
|
||||
const s32 delta = value - mask;
|
||||
|
||||
if (delta < 0) {
|
||||
writer.Write(value, size - 1);
|
||||
} else {
|
||||
writer.Write(delta / 2 + mask, size - 1);
|
||||
writer.Write(delta & 1, 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool VP9::WriteLessThan(VpxRangeEncoder& writer, s32 value, s32 test) {
|
||||
const bool is_lt = value < test;
|
||||
writer.Write(!is_lt);
|
||||
return is_lt;
|
||||
}
|
||||
|
||||
void VP9::WriteCoefProbabilityUpdate(VpxRangeEncoder& writer, s32 tx_mode,
|
||||
const std::array<u8, 1728>& new_prob,
|
||||
const std::array<u8, 1728>& old_prob) {
|
||||
constexpr u32 block_bytes = 2 * 2 * 6 * 6 * 3;
|
||||
|
||||
const auto needs_update = [&](u32 base_index) {
|
||||
return !std::equal(new_prob.begin() + base_index,
|
||||
new_prob.begin() + base_index + block_bytes,
|
||||
old_prob.begin() + base_index);
|
||||
};
|
||||
|
||||
for (u32 block_index = 0; block_index < 4; block_index++) {
|
||||
const u32 base_index = block_index * block_bytes;
|
||||
const bool update = needs_update(base_index);
|
||||
writer.Write(update);
|
||||
|
||||
if (update) {
|
||||
u32 index = base_index;
|
||||
for (s32 i = 0; i < 2; i++) {
|
||||
for (s32 j = 0; j < 2; j++) {
|
||||
for (s32 k = 0; k < 6; k++) {
|
||||
for (s32 l = 0; l < 6; l++) {
|
||||
if (k != 0 || l < 3) {
|
||||
WriteProbabilityUpdate(writer, new_prob[index + 0],
|
||||
old_prob[index + 0]);
|
||||
WriteProbabilityUpdate(writer, new_prob[index + 1],
|
||||
old_prob[index + 1]);
|
||||
WriteProbabilityUpdate(writer, new_prob[index + 2],
|
||||
old_prob[index + 2]);
|
||||
}
|
||||
index += 3;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (block_index == static_cast<u32>(tx_mode)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void VP9::WriteMvProbabilityUpdate(VpxRangeEncoder& writer, u8 new_prob, u8 old_prob) {
|
||||
const bool update = new_prob != old_prob;
|
||||
writer.Write(update, diff_update_probability);
|
||||
|
||||
if (update) {
|
||||
writer.Write(new_prob >> 1, 7);
|
||||
}
|
||||
}
|
||||
|
||||
Vp9PictureInfo VP9::GetVp9PictureInfo(const NvdecCommon::NvdecRegisters& state) {
|
||||
PictureInfo picture_info{};
|
||||
gpu.MemoryManager().ReadBlock(state.picture_info_offset, &picture_info, sizeof(PictureInfo));
|
||||
Vp9PictureInfo vp9_info = picture_info.Convert();
|
||||
|
||||
InsertEntropy(state.vp9_entropy_probs_offset, vp9_info.entropy);
|
||||
|
||||
// surface_luma_offset[0:3] contains the address of the reference frame offsets in the following
|
||||
// order: last, golden, altref, current. It may be worthwhile to track the updates done here
|
||||
// to avoid buffering frame data needed for reference frame updating in the header composition.
|
||||
std::memcpy(vp9_info.frame_offsets.data(), state.surface_luma_offset.data(), 4 * sizeof(u64));
|
||||
|
||||
return vp9_info;
|
||||
}
|
||||
|
||||
void VP9::InsertEntropy(u64 offset, Vp9EntropyProbs& dst) {
|
||||
EntropyProbs entropy{};
|
||||
gpu.MemoryManager().ReadBlock(offset, &entropy, sizeof(EntropyProbs));
|
||||
entropy.Convert(dst);
|
||||
}
|
||||
|
||||
Vp9FrameContainer VP9::GetCurrentFrame(const NvdecCommon::NvdecRegisters& state) {
|
||||
Vp9FrameContainer current_frame{};
|
||||
{
|
||||
gpu.SyncGuestHost();
|
||||
current_frame.info = GetVp9PictureInfo(state);
|
||||
current_frame.bit_stream.resize(current_frame.info.bitstream_size);
|
||||
gpu.MemoryManager().ReadBlock(state.frame_bitstream_offset, current_frame.bit_stream.data(),
|
||||
current_frame.info.bitstream_size);
|
||||
}
|
||||
// Buffer two frames, saving the last show frame info
|
||||
if (!next_next_frame.bit_stream.empty()) {
|
||||
Vp9FrameContainer temp{
|
||||
.info = current_frame.info,
|
||||
.bit_stream = std::move(current_frame.bit_stream),
|
||||
};
|
||||
next_next_frame.info.show_frame = current_frame.info.last_frame_shown;
|
||||
current_frame.info = next_next_frame.info;
|
||||
current_frame.bit_stream = std::move(next_next_frame.bit_stream);
|
||||
next_next_frame = std::move(temp);
|
||||
|
||||
if (!next_frame.bit_stream.empty()) {
|
||||
Vp9FrameContainer temp2{
|
||||
.info = current_frame.info,
|
||||
.bit_stream = std::move(current_frame.bit_stream),
|
||||
};
|
||||
next_frame.info.show_frame = current_frame.info.last_frame_shown;
|
||||
current_frame.info = next_frame.info;
|
||||
current_frame.bit_stream = std::move(next_frame.bit_stream);
|
||||
next_frame = std::move(temp2);
|
||||
} else {
|
||||
next_frame.info = current_frame.info;
|
||||
next_frame.bit_stream = std::move(current_frame.bit_stream);
|
||||
}
|
||||
} else {
|
||||
next_next_frame.info = current_frame.info;
|
||||
next_next_frame.bit_stream = std::move(current_frame.bit_stream);
|
||||
}
|
||||
return current_frame;
|
||||
}
|
||||
|
||||
std::vector<u8> VP9::ComposeCompressedHeader() {
|
||||
VpxRangeEncoder writer{};
|
||||
const bool update_probs = current_frame_info.show_frame && !current_frame_info.is_key_frame;
|
||||
if (!current_frame_info.lossless) {
|
||||
if (static_cast<u32>(current_frame_info.transform_mode) >= 3) {
|
||||
writer.Write(3, 2);
|
||||
writer.Write(current_frame_info.transform_mode == 4);
|
||||
} else {
|
||||
writer.Write(current_frame_info.transform_mode, 2);
|
||||
}
|
||||
}
|
||||
|
||||
if (current_frame_info.transform_mode == 4) {
|
||||
// tx_mode_probs() in the spec
|
||||
WriteProbabilityUpdate(writer, current_frame_info.entropy.tx_8x8_prob,
|
||||
prev_frame_probs.tx_8x8_prob);
|
||||
WriteProbabilityUpdate(writer, current_frame_info.entropy.tx_16x16_prob,
|
||||
prev_frame_probs.tx_16x16_prob);
|
||||
WriteProbabilityUpdate(writer, current_frame_info.entropy.tx_32x32_prob,
|
||||
prev_frame_probs.tx_32x32_prob);
|
||||
if (update_probs) {
|
||||
prev_frame_probs.tx_8x8_prob = current_frame_info.entropy.tx_8x8_prob;
|
||||
prev_frame_probs.tx_16x16_prob = current_frame_info.entropy.tx_16x16_prob;
|
||||
prev_frame_probs.tx_32x32_prob = current_frame_info.entropy.tx_32x32_prob;
|
||||
}
|
||||
}
|
||||
// read_coef_probs() in the spec
|
||||
WriteCoefProbabilityUpdate(writer, current_frame_info.transform_mode,
|
||||
current_frame_info.entropy.coef_probs, prev_frame_probs.coef_probs);
|
||||
// read_skip_probs() in the spec
|
||||
WriteProbabilityUpdate(writer, current_frame_info.entropy.skip_probs,
|
||||
prev_frame_probs.skip_probs);
|
||||
|
||||
if (update_probs) {
|
||||
prev_frame_probs.coef_probs = current_frame_info.entropy.coef_probs;
|
||||
prev_frame_probs.skip_probs = current_frame_info.entropy.skip_probs;
|
||||
}
|
||||
|
||||
if (!current_frame_info.intra_only) {
|
||||
// read_inter_probs() in the spec
|
||||
WriteProbabilityUpdateAligned4(writer, current_frame_info.entropy.inter_mode_prob,
|
||||
prev_frame_probs.inter_mode_prob);
|
||||
|
||||
if (current_frame_info.interp_filter == 4) {
|
||||
// read_interp_filter_probs() in the spec
|
||||
WriteProbabilityUpdate(writer, current_frame_info.entropy.switchable_interp_prob,
|
||||
prev_frame_probs.switchable_interp_prob);
|
||||
if (update_probs) {
|
||||
prev_frame_probs.switchable_interp_prob =
|
||||
current_frame_info.entropy.switchable_interp_prob;
|
||||
}
|
||||
}
|
||||
|
||||
// read_is_inter_probs() in the spec
|
||||
WriteProbabilityUpdate(writer, current_frame_info.entropy.intra_inter_prob,
|
||||
prev_frame_probs.intra_inter_prob);
|
||||
|
||||
// frame_reference_mode() in the spec
|
||||
if ((current_frame_info.ref_frame_sign_bias[1] & 1) !=
|
||||
(current_frame_info.ref_frame_sign_bias[2] & 1) ||
|
||||
(current_frame_info.ref_frame_sign_bias[1] & 1) !=
|
||||
(current_frame_info.ref_frame_sign_bias[3] & 1)) {
|
||||
if (current_frame_info.reference_mode >= 1) {
|
||||
writer.Write(1, 1);
|
||||
writer.Write(current_frame_info.reference_mode == 2);
|
||||
} else {
|
||||
writer.Write(0, 1);
|
||||
}
|
||||
}
|
||||
|
||||
// frame_reference_mode_probs() in the spec
|
||||
if (current_frame_info.reference_mode == 2) {
|
||||
WriteProbabilityUpdate(writer, current_frame_info.entropy.comp_inter_prob,
|
||||
prev_frame_probs.comp_inter_prob);
|
||||
if (update_probs) {
|
||||
prev_frame_probs.comp_inter_prob = current_frame_info.entropy.comp_inter_prob;
|
||||
}
|
||||
}
|
||||
|
||||
if (current_frame_info.reference_mode != 1) {
|
||||
WriteProbabilityUpdate(writer, current_frame_info.entropy.single_ref_prob,
|
||||
prev_frame_probs.single_ref_prob);
|
||||
if (update_probs) {
|
||||
prev_frame_probs.single_ref_prob = current_frame_info.entropy.single_ref_prob;
|
||||
}
|
||||
}
|
||||
|
||||
if (current_frame_info.reference_mode != 0) {
|
||||
WriteProbabilityUpdate(writer, current_frame_info.entropy.comp_ref_prob,
|
||||
prev_frame_probs.comp_ref_prob);
|
||||
if (update_probs) {
|
||||
prev_frame_probs.comp_ref_prob = current_frame_info.entropy.comp_ref_prob;
|
||||
}
|
||||
}
|
||||
|
||||
// read_y_mode_probs
|
||||
for (std::size_t index = 0; index < current_frame_info.entropy.y_mode_prob.size();
|
||||
++index) {
|
||||
WriteProbabilityUpdate(writer, current_frame_info.entropy.y_mode_prob[index],
|
||||
prev_frame_probs.y_mode_prob[index]);
|
||||
}
|
||||
|
||||
// read_partition_probs
|
||||
WriteProbabilityUpdateAligned4(writer, current_frame_info.entropy.partition_prob,
|
||||
prev_frame_probs.partition_prob);
|
||||
|
||||
// mv_probs
|
||||
for (s32 i = 0; i < 3; i++) {
|
||||
WriteMvProbabilityUpdate(writer, current_frame_info.entropy.joints[i],
|
||||
prev_frame_probs.joints[i]);
|
||||
}
|
||||
if (update_probs) {
|
||||
prev_frame_probs.inter_mode_prob = current_frame_info.entropy.inter_mode_prob;
|
||||
prev_frame_probs.intra_inter_prob = current_frame_info.entropy.intra_inter_prob;
|
||||
prev_frame_probs.y_mode_prob = current_frame_info.entropy.y_mode_prob;
|
||||
prev_frame_probs.partition_prob = current_frame_info.entropy.partition_prob;
|
||||
prev_frame_probs.joints = current_frame_info.entropy.joints;
|
||||
}
|
||||
|
||||
for (s32 i = 0; i < 2; i++) {
|
||||
WriteMvProbabilityUpdate(writer, current_frame_info.entropy.sign[i],
|
||||
prev_frame_probs.sign[i]);
|
||||
for (s32 j = 0; j < 10; j++) {
|
||||
const int index = i * 10 + j;
|
||||
WriteMvProbabilityUpdate(writer, current_frame_info.entropy.classes[index],
|
||||
prev_frame_probs.classes[index]);
|
||||
}
|
||||
WriteMvProbabilityUpdate(writer, current_frame_info.entropy.class_0[i],
|
||||
prev_frame_probs.class_0[i]);
|
||||
|
||||
for (s32 j = 0; j < 10; j++) {
|
||||
const int index = i * 10 + j;
|
||||
WriteMvProbabilityUpdate(writer, current_frame_info.entropy.prob_bits[index],
|
||||
prev_frame_probs.prob_bits[index]);
|
||||
}
|
||||
}
|
||||
|
||||
for (s32 i = 0; i < 2; i++) {
|
||||
for (s32 j = 0; j < 2; j++) {
|
||||
for (s32 k = 0; k < 3; k++) {
|
||||
const int index = i * 2 * 3 + j * 3 + k;
|
||||
WriteMvProbabilityUpdate(writer, current_frame_info.entropy.class_0_fr[index],
|
||||
prev_frame_probs.class_0_fr[index]);
|
||||
}
|
||||
}
|
||||
|
||||
for (s32 j = 0; j < 3; j++) {
|
||||
const int index = i * 3 + j;
|
||||
WriteMvProbabilityUpdate(writer, current_frame_info.entropy.fr[index],
|
||||
prev_frame_probs.fr[index]);
|
||||
}
|
||||
}
|
||||
|
||||
if (current_frame_info.allow_high_precision_mv) {
|
||||
for (s32 index = 0; index < 2; index++) {
|
||||
WriteMvProbabilityUpdate(writer, current_frame_info.entropy.class_0_hp[index],
|
||||
prev_frame_probs.class_0_hp[index]);
|
||||
WriteMvProbabilityUpdate(writer, current_frame_info.entropy.high_precision[index],
|
||||
prev_frame_probs.high_precision[index]);
|
||||
}
|
||||
}
|
||||
|
||||
// save previous probs
|
||||
if (update_probs) {
|
||||
prev_frame_probs.sign = current_frame_info.entropy.sign;
|
||||
prev_frame_probs.classes = current_frame_info.entropy.classes;
|
||||
prev_frame_probs.class_0 = current_frame_info.entropy.class_0;
|
||||
prev_frame_probs.prob_bits = current_frame_info.entropy.prob_bits;
|
||||
prev_frame_probs.class_0_fr = current_frame_info.entropy.class_0_fr;
|
||||
prev_frame_probs.fr = current_frame_info.entropy.fr;
|
||||
prev_frame_probs.class_0_hp = current_frame_info.entropy.class_0_hp;
|
||||
prev_frame_probs.high_precision = current_frame_info.entropy.high_precision;
|
||||
}
|
||||
}
|
||||
writer.End();
|
||||
return writer.GetBuffer();
|
||||
}
|
||||
|
||||
VpxBitStreamWriter VP9::ComposeUncompressedHeader() {
|
||||
VpxBitStreamWriter uncomp_writer{};
|
||||
|
||||
uncomp_writer.WriteU(2, 2); // Frame marker.
|
||||
uncomp_writer.WriteU(0, 2); // Profile.
|
||||
uncomp_writer.WriteBit(false); // Show existing frame.
|
||||
uncomp_writer.WriteBit(!current_frame_info.is_key_frame); // is key frame?
|
||||
uncomp_writer.WriteBit(current_frame_info.show_frame); // show frame?
|
||||
uncomp_writer.WriteBit(current_frame_info.error_resilient_mode); // error reslience
|
||||
|
||||
if (current_frame_info.is_key_frame) {
|
||||
uncomp_writer.WriteU(frame_sync_code, 24);
|
||||
uncomp_writer.WriteU(0, 3); // Color space.
|
||||
uncomp_writer.WriteU(0, 1); // Color range.
|
||||
uncomp_writer.WriteU(current_frame_info.frame_size.width - 1, 16);
|
||||
uncomp_writer.WriteU(current_frame_info.frame_size.height - 1, 16);
|
||||
uncomp_writer.WriteBit(false); // Render and frame size different.
|
||||
|
||||
// Reset context
|
||||
prev_frame_probs = default_probs;
|
||||
swap_next_golden = false;
|
||||
loop_filter_ref_deltas.fill(0);
|
||||
loop_filter_mode_deltas.fill(0);
|
||||
|
||||
// allow frames offsets to stabilize before checking for golden frames
|
||||
grace_period = 4;
|
||||
|
||||
// On key frames, all frame slots are set to the current frame,
|
||||
// so the value of the selected slot doesn't really matter.
|
||||
frame_ctxs.fill({current_frame_number, false, default_probs});
|
||||
|
||||
// intra only, meaning the frame can be recreated with no other references
|
||||
current_frame_info.intra_only = true;
|
||||
|
||||
} else {
|
||||
|
||||
if (!current_frame_info.show_frame) {
|
||||
uncomp_writer.WriteBit(current_frame_info.intra_only);
|
||||
if (!current_frame_info.last_frame_was_key) {
|
||||
swap_next_golden = !swap_next_golden;
|
||||
}
|
||||
} else {
|
||||
current_frame_info.intra_only = false;
|
||||
}
|
||||
if (!current_frame_info.error_resilient_mode) {
|
||||
uncomp_writer.WriteU(0, 2); // Reset frame context.
|
||||
}
|
||||
|
||||
// Last, Golden, Altref frames
|
||||
std::array<s32, 3> ref_frame_index{0, 1, 2};
|
||||
|
||||
// Set when next frame is hidden
|
||||
// altref and golden references are swapped
|
||||
if (swap_next_golden) {
|
||||
ref_frame_index = std::array<s32, 3>{0, 2, 1};
|
||||
}
|
||||
|
||||
// update Last Frame
|
||||
u64 refresh_frame_flags = 1;
|
||||
|
||||
// golden frame may refresh, determined if the next golden frame offset is changed
|
||||
bool golden_refresh = false;
|
||||
if (grace_period <= 0) {
|
||||
for (s32 index = 1; index < 3; ++index) {
|
||||
if (current_frame_info.frame_offsets[index] !=
|
||||
next_frame.info.frame_offsets[index]) {
|
||||
current_frame_info.refresh_frame[index] = true;
|
||||
golden_refresh = true;
|
||||
grace_period = 3;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (current_frame_info.show_frame &&
|
||||
(!next_frame.info.show_frame || next_frame.info.is_key_frame)) {
|
||||
// Update golden frame
|
||||
refresh_frame_flags = swap_next_golden ? 2 : 4;
|
||||
}
|
||||
|
||||
if (!current_frame_info.show_frame) {
|
||||
// Update altref
|
||||
refresh_frame_flags = swap_next_golden ? 2 : 4;
|
||||
} else if (golden_refresh) {
|
||||
refresh_frame_flags = 3;
|
||||
}
|
||||
|
||||
if (current_frame_info.intra_only) {
|
||||
uncomp_writer.WriteU(frame_sync_code, 24);
|
||||
uncomp_writer.WriteU(static_cast<s32>(refresh_frame_flags), 8);
|
||||
uncomp_writer.WriteU(current_frame_info.frame_size.width - 1, 16);
|
||||
uncomp_writer.WriteU(current_frame_info.frame_size.height - 1, 16);
|
||||
uncomp_writer.WriteBit(false); // Render and frame size different.
|
||||
} else {
|
||||
uncomp_writer.WriteU(static_cast<s32>(refresh_frame_flags), 8);
|
||||
|
||||
for (s32 index = 1; index < 4; index++) {
|
||||
uncomp_writer.WriteU(ref_frame_index[index - 1], 3);
|
||||
uncomp_writer.WriteU(current_frame_info.ref_frame_sign_bias[index], 1);
|
||||
}
|
||||
|
||||
uncomp_writer.WriteBit(true); // Frame size with refs.
|
||||
uncomp_writer.WriteBit(false); // Render and frame size different.
|
||||
uncomp_writer.WriteBit(current_frame_info.allow_high_precision_mv);
|
||||
uncomp_writer.WriteBit(current_frame_info.interp_filter == 4);
|
||||
|
||||
if (current_frame_info.interp_filter != 4) {
|
||||
uncomp_writer.WriteU(current_frame_info.interp_filter, 2);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!current_frame_info.error_resilient_mode) {
|
||||
uncomp_writer.WriteBit(true); // Refresh frame context. where do i get this info from?
|
||||
uncomp_writer.WriteBit(true); // Frame parallel decoding mode.
|
||||
}
|
||||
|
||||
int frame_ctx_idx = 0;
|
||||
if (!current_frame_info.show_frame) {
|
||||
frame_ctx_idx = 1;
|
||||
}
|
||||
|
||||
uncomp_writer.WriteU(frame_ctx_idx, 2); // Frame context index.
|
||||
prev_frame_probs =
|
||||
frame_ctxs[frame_ctx_idx].probs; // reference probabilities for compressed header
|
||||
frame_ctxs[frame_ctx_idx] = {current_frame_number, false, current_frame_info.entropy};
|
||||
|
||||
uncomp_writer.WriteU(current_frame_info.first_level, 6);
|
||||
uncomp_writer.WriteU(current_frame_info.sharpness_level, 3);
|
||||
uncomp_writer.WriteBit(current_frame_info.mode_ref_delta_enabled);
|
||||
|
||||
if (current_frame_info.mode_ref_delta_enabled) {
|
||||
// check if ref deltas are different, update accordingly
|
||||
std::array<bool, 4> update_loop_filter_ref_deltas;
|
||||
std::array<bool, 2> update_loop_filter_mode_deltas;
|
||||
|
||||
bool loop_filter_delta_update = false;
|
||||
|
||||
for (std::size_t index = 0; index < current_frame_info.ref_deltas.size(); index++) {
|
||||
const s8 old_deltas = loop_filter_ref_deltas[index];
|
||||
const s8 new_deltas = current_frame_info.ref_deltas[index];
|
||||
const bool differing_delta = old_deltas != new_deltas;
|
||||
|
||||
update_loop_filter_ref_deltas[index] = differing_delta;
|
||||
loop_filter_delta_update |= differing_delta;
|
||||
}
|
||||
|
||||
for (std::size_t index = 0; index < current_frame_info.mode_deltas.size(); index++) {
|
||||
const s8 old_deltas = loop_filter_mode_deltas[index];
|
||||
const s8 new_deltas = current_frame_info.mode_deltas[index];
|
||||
const bool differing_delta = old_deltas != new_deltas;
|
||||
|
||||
update_loop_filter_mode_deltas[index] = differing_delta;
|
||||
loop_filter_delta_update |= differing_delta;
|
||||
}
|
||||
|
||||
uncomp_writer.WriteBit(loop_filter_delta_update);
|
||||
|
||||
if (loop_filter_delta_update) {
|
||||
for (std::size_t index = 0; index < current_frame_info.ref_deltas.size(); index++) {
|
||||
uncomp_writer.WriteBit(update_loop_filter_ref_deltas[index]);
|
||||
|
||||
if (update_loop_filter_ref_deltas[index]) {
|
||||
uncomp_writer.WriteS(current_frame_info.ref_deltas[index], 6);
|
||||
}
|
||||
}
|
||||
|
||||
for (std::size_t index = 0; index < current_frame_info.mode_deltas.size(); index++) {
|
||||
uncomp_writer.WriteBit(update_loop_filter_mode_deltas[index]);
|
||||
|
||||
if (update_loop_filter_mode_deltas[index]) {
|
||||
uncomp_writer.WriteS(current_frame_info.mode_deltas[index], 6);
|
||||
}
|
||||
}
|
||||
// save new deltas
|
||||
loop_filter_ref_deltas = current_frame_info.ref_deltas;
|
||||
loop_filter_mode_deltas = current_frame_info.mode_deltas;
|
||||
}
|
||||
}
|
||||
|
||||
uncomp_writer.WriteU(current_frame_info.base_q_index, 8);
|
||||
|
||||
uncomp_writer.WriteDeltaQ(current_frame_info.y_dc_delta_q);
|
||||
uncomp_writer.WriteDeltaQ(current_frame_info.uv_dc_delta_q);
|
||||
uncomp_writer.WriteDeltaQ(current_frame_info.uv_ac_delta_q);
|
||||
|
||||
uncomp_writer.WriteBit(false); // Segmentation enabled (TODO).
|
||||
|
||||
const s32 min_tile_cols_log2 = CalcMinLog2TileCols(current_frame_info.frame_size.width);
|
||||
const s32 max_tile_cols_log2 = CalcMaxLog2TileCols(current_frame_info.frame_size.width);
|
||||
|
||||
const s32 tile_cols_log2_diff = current_frame_info.log2_tile_cols - min_tile_cols_log2;
|
||||
const s32 tile_cols_log2_inc_mask = (1 << tile_cols_log2_diff) - 1;
|
||||
|
||||
// If it's less than the maximum, we need to add an extra 0 on the bitstream
|
||||
// to indicate that it should stop reading.
|
||||
if (current_frame_info.log2_tile_cols < max_tile_cols_log2) {
|
||||
uncomp_writer.WriteU(tile_cols_log2_inc_mask << 1, tile_cols_log2_diff + 1);
|
||||
} else {
|
||||
uncomp_writer.WriteU(tile_cols_log2_inc_mask, tile_cols_log2_diff);
|
||||
}
|
||||
|
||||
const bool tile_rows_log2_is_nonzero = current_frame_info.log2_tile_rows != 0;
|
||||
|
||||
uncomp_writer.WriteBit(tile_rows_log2_is_nonzero);
|
||||
|
||||
if (tile_rows_log2_is_nonzero) {
|
||||
uncomp_writer.WriteBit(current_frame_info.log2_tile_rows > 1);
|
||||
}
|
||||
|
||||
return uncomp_writer;
|
||||
}
|
||||
|
||||
const std::vector<u8>& VP9::ComposeFrameHeader(const NvdecCommon::NvdecRegisters& state) {
|
||||
std::vector<u8> bitstream;
|
||||
{
|
||||
Vp9FrameContainer curr_frame = GetCurrentFrame(state);
|
||||
current_frame_info = curr_frame.info;
|
||||
bitstream = std::move(curr_frame.bit_stream);
|
||||
}
|
||||
|
||||
// The uncompressed header routine sets PrevProb parameters needed for the compressed header
|
||||
auto uncomp_writer = ComposeUncompressedHeader();
|
||||
std::vector<u8> compressed_header = ComposeCompressedHeader();
|
||||
|
||||
uncomp_writer.WriteU(static_cast<s32>(compressed_header.size()), 16);
|
||||
uncomp_writer.Flush();
|
||||
std::vector<u8> uncompressed_header = uncomp_writer.GetByteArray();
|
||||
|
||||
// Write headers and frame to buffer
|
||||
frame.resize(uncompressed_header.size() + compressed_header.size() + bitstream.size());
|
||||
std::memcpy(frame.data(), uncompressed_header.data(), uncompressed_header.size());
|
||||
std::memcpy(frame.data() + uncompressed_header.size(), compressed_header.data(),
|
||||
compressed_header.size());
|
||||
std::memcpy(frame.data() + uncompressed_header.size() + compressed_header.size(),
|
||||
bitstream.data(), bitstream.size());
|
||||
|
||||
// keep track of frame number
|
||||
current_frame_number++;
|
||||
grace_period--;
|
||||
|
||||
// don't display hidden frames
|
||||
hidden = !current_frame_info.show_frame;
|
||||
return frame;
|
||||
}
|
||||
|
||||
VpxRangeEncoder::VpxRangeEncoder() {
|
||||
Write(false);
|
||||
}
|
||||
|
||||
VpxRangeEncoder::~VpxRangeEncoder() = default;
|
||||
|
||||
void VpxRangeEncoder::Write(s32 value, s32 value_size) {
|
||||
for (s32 bit = value_size - 1; bit >= 0; bit--) {
|
||||
Write(((value >> bit) & 1) != 0);
|
||||
}
|
||||
}
|
||||
|
||||
void VpxRangeEncoder::Write(bool bit) {
|
||||
Write(bit, half_probability);
|
||||
}
|
||||
|
||||
void VpxRangeEncoder::Write(bool bit, s32 probability) {
|
||||
u32 local_range = range;
|
||||
const u32 split = 1 + (((local_range - 1) * static_cast<u32>(probability)) >> 8);
|
||||
local_range = split;
|
||||
|
||||
if (bit) {
|
||||
low_value += split;
|
||||
local_range = range - split;
|
||||
}
|
||||
|
||||
s32 shift = norm_lut[local_range];
|
||||
local_range <<= shift;
|
||||
count += shift;
|
||||
|
||||
if (count >= 0) {
|
||||
const s32 offset = shift - count;
|
||||
|
||||
if (((low_value << (offset - 1)) >> 31) != 0) {
|
||||
const s32 current_pos = static_cast<s32>(base_stream.GetPosition());
|
||||
base_stream.Seek(-1, Common::SeekOrigin::FromCurrentPos);
|
||||
while (PeekByte() == 0xff) {
|
||||
base_stream.WriteByte(0);
|
||||
|
||||
base_stream.Seek(-2, Common::SeekOrigin::FromCurrentPos);
|
||||
}
|
||||
base_stream.WriteByte(static_cast<u8>((PeekByte() + 1)));
|
||||
base_stream.Seek(current_pos, Common::SeekOrigin::SetOrigin);
|
||||
}
|
||||
base_stream.WriteByte(static_cast<u8>((low_value >> (24 - offset))));
|
||||
|
||||
low_value <<= offset;
|
||||
shift = count;
|
||||
low_value &= 0xffffff;
|
||||
count -= 8;
|
||||
}
|
||||
|
||||
low_value <<= shift;
|
||||
range = local_range;
|
||||
}
|
||||
|
||||
void VpxRangeEncoder::End() {
|
||||
for (std::size_t index = 0; index < 32; ++index) {
|
||||
Write(false);
|
||||
}
|
||||
}
|
||||
|
||||
u8 VpxRangeEncoder::PeekByte() {
|
||||
const u8 value = base_stream.ReadByte();
|
||||
base_stream.Seek(-1, Common::SeekOrigin::FromCurrentPos);
|
||||
|
||||
return value;
|
||||
}
|
||||
|
||||
VpxBitStreamWriter::VpxBitStreamWriter() = default;
|
||||
|
||||
VpxBitStreamWriter::~VpxBitStreamWriter() = default;
|
||||
|
||||
void VpxBitStreamWriter::WriteU(u32 value, u32 value_size) {
|
||||
WriteBits(value, value_size);
|
||||
}
|
||||
|
||||
void VpxBitStreamWriter::WriteS(s32 value, u32 value_size) {
|
||||
const bool sign = value < 0;
|
||||
if (sign) {
|
||||
value = -value;
|
||||
}
|
||||
|
||||
WriteBits(static_cast<u32>(value << 1) | (sign ? 1 : 0), value_size + 1);
|
||||
}
|
||||
|
||||
void VpxBitStreamWriter::WriteDeltaQ(u32 value) {
|
||||
const bool delta_coded = value != 0;
|
||||
WriteBit(delta_coded);
|
||||
|
||||
if (delta_coded) {
|
||||
WriteBits(value, 4);
|
||||
}
|
||||
}
|
||||
|
||||
void VpxBitStreamWriter::WriteBits(u32 value, u32 bit_count) {
|
||||
s32 value_pos = 0;
|
||||
s32 remaining = bit_count;
|
||||
|
||||
while (remaining > 0) {
|
||||
s32 copy_size = remaining;
|
||||
|
||||
const s32 free = GetFreeBufferBits();
|
||||
|
||||
if (copy_size > free) {
|
||||
copy_size = free;
|
||||
}
|
||||
|
||||
const s32 mask = (1 << copy_size) - 1;
|
||||
|
||||
const s32 src_shift = (bit_count - value_pos) - copy_size;
|
||||
const s32 dst_shift = (buffer_size - buffer_pos) - copy_size;
|
||||
|
||||
buffer |= ((value >> src_shift) & mask) << dst_shift;
|
||||
|
||||
value_pos += copy_size;
|
||||
buffer_pos += copy_size;
|
||||
remaining -= copy_size;
|
||||
}
|
||||
}
|
||||
|
||||
void VpxBitStreamWriter::WriteBit(bool state) {
|
||||
WriteBits(state ? 1 : 0, 1);
|
||||
}
|
||||
|
||||
s32 VpxBitStreamWriter::GetFreeBufferBits() {
|
||||
if (buffer_pos == buffer_size) {
|
||||
Flush();
|
||||
}
|
||||
|
||||
return buffer_size - buffer_pos;
|
||||
}
|
||||
|
||||
void VpxBitStreamWriter::Flush() {
|
||||
if (buffer_pos == 0) {
|
||||
return;
|
||||
}
|
||||
byte_array.push_back(static_cast<u8>(buffer));
|
||||
buffer = 0;
|
||||
buffer_pos = 0;
|
||||
}
|
||||
|
||||
std::vector<u8>& VpxBitStreamWriter::GetByteArray() {
|
||||
return byte_array;
|
||||
}
|
||||
|
||||
const std::vector<u8>& VpxBitStreamWriter::GetByteArray() const {
|
||||
return byte_array;
|
||||
}
|
||||
|
||||
} // namespace Tegra::Decoder
|
197
src/video_core/command_classes/codecs/vp9.h
Normal file
197
src/video_core/command_classes/codecs/vp9.h
Normal file
@ -0,0 +1,197 @@
|
||||
// Copyright 2020 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <vector>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "common/stream.h"
|
||||
#include "video_core/command_classes/codecs/vp9_types.h"
|
||||
#include "video_core/command_classes/nvdec_common.h"
|
||||
|
||||
namespace Tegra {
|
||||
class GPU;
|
||||
enum class FrameType { KeyFrame = 0, InterFrame = 1 };
|
||||
namespace Decoder {
|
||||
|
||||
/// The VpxRangeEncoder, and VpxBitStreamWriter classes are used to compose the
|
||||
/// VP9 header bitstreams.
|
||||
|
||||
class VpxRangeEncoder {
|
||||
public:
|
||||
VpxRangeEncoder();
|
||||
~VpxRangeEncoder();
|
||||
|
||||
VpxRangeEncoder(const VpxRangeEncoder&) = delete;
|
||||
VpxRangeEncoder& operator=(const VpxRangeEncoder&) = delete;
|
||||
|
||||
VpxRangeEncoder(VpxRangeEncoder&&) = default;
|
||||
VpxRangeEncoder& operator=(VpxRangeEncoder&&) = default;
|
||||
|
||||
/// Writes the rightmost value_size bits from value into the stream
|
||||
void Write(s32 value, s32 value_size);
|
||||
|
||||
/// Writes a single bit with half probability
|
||||
void Write(bool bit);
|
||||
|
||||
/// Writes a bit to the base_stream encoded with probability
|
||||
void Write(bool bit, s32 probability);
|
||||
|
||||
/// Signal the end of the bitstream
|
||||
void End();
|
||||
|
||||
[[nodiscard]] std::vector<u8>& GetBuffer() {
|
||||
return base_stream.GetBuffer();
|
||||
}
|
||||
|
||||
[[nodiscard]] const std::vector<u8>& GetBuffer() const {
|
||||
return base_stream.GetBuffer();
|
||||
}
|
||||
|
||||
private:
|
||||
u8 PeekByte();
|
||||
Common::Stream base_stream{};
|
||||
u32 low_value{};
|
||||
u32 range{0xff};
|
||||
s32 count{-24};
|
||||
s32 half_probability{128};
|
||||
};
|
||||
|
||||
class VpxBitStreamWriter {
|
||||
public:
|
||||
VpxBitStreamWriter();
|
||||
~VpxBitStreamWriter();
|
||||
|
||||
VpxBitStreamWriter(const VpxBitStreamWriter&) = delete;
|
||||
VpxBitStreamWriter& operator=(const VpxBitStreamWriter&) = delete;
|
||||
|
||||
VpxBitStreamWriter(VpxBitStreamWriter&&) = default;
|
||||
VpxBitStreamWriter& operator=(VpxBitStreamWriter&&) = default;
|
||||
|
||||
/// Write an unsigned integer value
|
||||
void WriteU(u32 value, u32 value_size);
|
||||
|
||||
/// Write a signed integer value
|
||||
void WriteS(s32 value, u32 value_size);
|
||||
|
||||
/// Based on 6.2.10 of VP9 Spec, writes a delta coded value
|
||||
void WriteDeltaQ(u32 value);
|
||||
|
||||
/// Write a single bit.
|
||||
void WriteBit(bool state);
|
||||
|
||||
/// Pushes current buffer into buffer_array, resets buffer
|
||||
void Flush();
|
||||
|
||||
/// Returns byte_array
|
||||
[[nodiscard]] std::vector<u8>& GetByteArray();
|
||||
|
||||
/// Returns const byte_array
|
||||
[[nodiscard]] const std::vector<u8>& GetByteArray() const;
|
||||
|
||||
private:
|
||||
/// Write bit_count bits from value into buffer
|
||||
void WriteBits(u32 value, u32 bit_count);
|
||||
|
||||
/// Gets next available position in buffer, invokes Flush() if buffer is full
|
||||
s32 GetFreeBufferBits();
|
||||
|
||||
s32 buffer_size{8};
|
||||
|
||||
s32 buffer{};
|
||||
s32 buffer_pos{};
|
||||
std::vector<u8> byte_array;
|
||||
};
|
||||
|
||||
class VP9 {
|
||||
public:
|
||||
explicit VP9(GPU& gpu_);
|
||||
~VP9();
|
||||
|
||||
VP9(const VP9&) = delete;
|
||||
VP9& operator=(const VP9&) = delete;
|
||||
|
||||
VP9(VP9&&) = default;
|
||||
VP9& operator=(VP9&&) = delete;
|
||||
|
||||
/// Composes the VP9 frame from the GPU state information. Based on the official VP9 spec
|
||||
/// documentation
|
||||
[[nodiscard]] const std::vector<u8>& ComposeFrameHeader(
|
||||
const NvdecCommon::NvdecRegisters& state);
|
||||
|
||||
/// Returns true if the most recent frame was a hidden frame.
|
||||
[[nodiscard]] bool WasFrameHidden() const {
|
||||
return hidden;
|
||||
}
|
||||
|
||||
private:
|
||||
/// Generates compressed header probability updates in the bitstream writer
|
||||
template <typename T, std::size_t N>
|
||||
void WriteProbabilityUpdate(VpxRangeEncoder& writer, const std::array<T, N>& new_prob,
|
||||
const std::array<T, N>& old_prob);
|
||||
|
||||
/// Generates compressed header probability updates in the bitstream writer
|
||||
/// If probs are not equal, WriteProbabilityDelta is invoked
|
||||
void WriteProbabilityUpdate(VpxRangeEncoder& writer, u8 new_prob, u8 old_prob);
|
||||
|
||||
/// Generates compressed header probability deltas in the bitstream writer
|
||||
void WriteProbabilityDelta(VpxRangeEncoder& writer, u8 new_prob, u8 old_prob);
|
||||
|
||||
/// Inverse of 6.3.4 Decode term subexp
|
||||
void EncodeTermSubExp(VpxRangeEncoder& writer, s32 value);
|
||||
|
||||
/// Writes if the value is less than the test value
|
||||
bool WriteLessThan(VpxRangeEncoder& writer, s32 value, s32 test);
|
||||
|
||||
/// Writes probability updates for the Coef probabilities
|
||||
void WriteCoefProbabilityUpdate(VpxRangeEncoder& writer, s32 tx_mode,
|
||||
const std::array<u8, 1728>& new_prob,
|
||||
const std::array<u8, 1728>& old_prob);
|
||||
|
||||
/// Write probabilities for 4-byte aligned structures
|
||||
template <typename T, std::size_t N>
|
||||
void WriteProbabilityUpdateAligned4(VpxRangeEncoder& writer, const std::array<T, N>& new_prob,
|
||||
const std::array<T, N>& old_prob);
|
||||
|
||||
/// Write motion vector probability updates. 6.3.17 in the spec
|
||||
void WriteMvProbabilityUpdate(VpxRangeEncoder& writer, u8 new_prob, u8 old_prob);
|
||||
|
||||
/// Returns VP9 information from NVDEC provided offset and size
|
||||
[[nodiscard]] Vp9PictureInfo GetVp9PictureInfo(const NvdecCommon::NvdecRegisters& state);
|
||||
|
||||
/// Read and convert NVDEC provided entropy probs to Vp9EntropyProbs struct
|
||||
void InsertEntropy(u64 offset, Vp9EntropyProbs& dst);
|
||||
|
||||
/// Returns frame to be decoded after buffering
|
||||
[[nodiscard]] Vp9FrameContainer GetCurrentFrame(const NvdecCommon::NvdecRegisters& state);
|
||||
|
||||
/// Use NVDEC providied information to compose the headers for the current frame
|
||||
[[nodiscard]] std::vector<u8> ComposeCompressedHeader();
|
||||
[[nodiscard]] VpxBitStreamWriter ComposeUncompressedHeader();
|
||||
|
||||
GPU& gpu;
|
||||
std::vector<u8> frame;
|
||||
|
||||
std::array<s8, 4> loop_filter_ref_deltas{};
|
||||
std::array<s8, 2> loop_filter_mode_deltas{};
|
||||
|
||||
bool hidden = false;
|
||||
s64 current_frame_number = -2; // since we buffer 2 frames
|
||||
s32 grace_period = 6; // frame offsets need to stabilize
|
||||
std::array<FrameContexts, 4> frame_ctxs{};
|
||||
Vp9FrameContainer next_frame{};
|
||||
Vp9FrameContainer next_next_frame{};
|
||||
bool swap_next_golden{};
|
||||
|
||||
Vp9PictureInfo current_frame_info{};
|
||||
Vp9EntropyProbs prev_frame_probs{};
|
||||
|
||||
s32 diff_update_probability = 252;
|
||||
s32 frame_sync_code = 0x498342;
|
||||
};
|
||||
|
||||
} // namespace Decoder
|
||||
} // namespace Tegra
|
302
src/video_core/command_classes/codecs/vp9_types.h
Normal file
302
src/video_core/command_classes/codecs/vp9_types.h
Normal file
@ -0,0 +1,302 @@
|
||||
// Copyright 2020 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <cstring>
|
||||
#include <vector>
|
||||
#include "common/common_funcs.h"
|
||||
#include "common/common_types.h"
|
||||
|
||||
namespace Tegra {
|
||||
class GPU;
|
||||
|
||||
namespace Decoder {
|
||||
struct Vp9FrameDimensions {
|
||||
s16 width{};
|
||||
s16 height{};
|
||||
s16 luma_pitch{};
|
||||
s16 chroma_pitch{};
|
||||
};
|
||||
static_assert(sizeof(Vp9FrameDimensions) == 0x8, "Vp9 Vp9FrameDimensions is an invalid size");
|
||||
|
||||
enum FrameFlags : u32 {
|
||||
IsKeyFrame = 1 << 0,
|
||||
LastFrameIsKeyFrame = 1 << 1,
|
||||
FrameSizeChanged = 1 << 2,
|
||||
ErrorResilientMode = 1 << 3,
|
||||
LastShowFrame = 1 << 4,
|
||||
IntraOnly = 1 << 5,
|
||||
};
|
||||
|
||||
enum class TxSize {
|
||||
Tx4x4 = 0, // 4x4 transform
|
||||
Tx8x8 = 1, // 8x8 transform
|
||||
Tx16x16 = 2, // 16x16 transform
|
||||
Tx32x32 = 3, // 32x32 transform
|
||||
TxSizes = 4
|
||||
};
|
||||
|
||||
enum class TxMode {
|
||||
Only4X4 = 0, // Only 4x4 transform used
|
||||
Allow8X8 = 1, // Allow block transform size up to 8x8
|
||||
Allow16X16 = 2, // Allow block transform size up to 16x16
|
||||
Allow32X32 = 3, // Allow block transform size up to 32x32
|
||||
TxModeSelect = 4, // Transform specified for each block
|
||||
TxModes = 5
|
||||
};
|
||||
|
||||
struct Segmentation {
|
||||
u8 enabled{};
|
||||
u8 update_map{};
|
||||
u8 temporal_update{};
|
||||
u8 abs_delta{};
|
||||
std::array<u32, 8> feature_mask{};
|
||||
std::array<std::array<s16, 4>, 8> feature_data{};
|
||||
};
|
||||
static_assert(sizeof(Segmentation) == 0x64, "Segmentation is an invalid size");
|
||||
|
||||
struct LoopFilter {
|
||||
u8 mode_ref_delta_enabled{};
|
||||
std::array<s8, 4> ref_deltas{};
|
||||
std::array<s8, 2> mode_deltas{};
|
||||
};
|
||||
static_assert(sizeof(LoopFilter) == 0x7, "LoopFilter is an invalid size");
|
||||
|
||||
struct Vp9EntropyProbs {
|
||||
std::array<u8, 36> y_mode_prob{};
|
||||
std::array<u8, 64> partition_prob{};
|
||||
std::array<u8, 1728> coef_probs{};
|
||||
std::array<u8, 8> switchable_interp_prob{};
|
||||
std::array<u8, 28> inter_mode_prob{};
|
||||
std::array<u8, 4> intra_inter_prob{};
|
||||
std::array<u8, 5> comp_inter_prob{};
|
||||
std::array<u8, 10> single_ref_prob{};
|
||||
std::array<u8, 5> comp_ref_prob{};
|
||||
std::array<u8, 6> tx_32x32_prob{};
|
||||
std::array<u8, 4> tx_16x16_prob{};
|
||||
std::array<u8, 2> tx_8x8_prob{};
|
||||
std::array<u8, 3> skip_probs{};
|
||||
std::array<u8, 3> joints{};
|
||||
std::array<u8, 2> sign{};
|
||||
std::array<u8, 20> classes{};
|
||||
std::array<u8, 2> class_0{};
|
||||
std::array<u8, 20> prob_bits{};
|
||||
std::array<u8, 12> class_0_fr{};
|
||||
std::array<u8, 6> fr{};
|
||||
std::array<u8, 2> class_0_hp{};
|
||||
std::array<u8, 2> high_precision{};
|
||||
};
|
||||
static_assert(sizeof(Vp9EntropyProbs) == 0x7B4, "Vp9EntropyProbs is an invalid size");
|
||||
|
||||
struct Vp9PictureInfo {
|
||||
bool is_key_frame{};
|
||||
bool intra_only{};
|
||||
bool last_frame_was_key{};
|
||||
bool frame_size_changed{};
|
||||
bool error_resilient_mode{};
|
||||
bool last_frame_shown{};
|
||||
bool show_frame{};
|
||||
std::array<s8, 4> ref_frame_sign_bias{};
|
||||
s32 base_q_index{};
|
||||
s32 y_dc_delta_q{};
|
||||
s32 uv_dc_delta_q{};
|
||||
s32 uv_ac_delta_q{};
|
||||
bool lossless{};
|
||||
s32 transform_mode{};
|
||||
bool allow_high_precision_mv{};
|
||||
s32 interp_filter{};
|
||||
s32 reference_mode{};
|
||||
s8 comp_fixed_ref{};
|
||||
std::array<s8, 2> comp_var_ref{};
|
||||
s32 log2_tile_cols{};
|
||||
s32 log2_tile_rows{};
|
||||
bool segment_enabled{};
|
||||
bool segment_map_update{};
|
||||
bool segment_map_temporal_update{};
|
||||
s32 segment_abs_delta{};
|
||||
std::array<u32, 8> segment_feature_enable{};
|
||||
std::array<std::array<s16, 4>, 8> segment_feature_data{};
|
||||
bool mode_ref_delta_enabled{};
|
||||
bool use_prev_in_find_mv_refs{};
|
||||
std::array<s8, 4> ref_deltas{};
|
||||
std::array<s8, 2> mode_deltas{};
|
||||
Vp9EntropyProbs entropy{};
|
||||
Vp9FrameDimensions frame_size{};
|
||||
u8 first_level{};
|
||||
u8 sharpness_level{};
|
||||
u32 bitstream_size{};
|
||||
std::array<u64, 4> frame_offsets{};
|
||||
std::array<bool, 4> refresh_frame{};
|
||||
};
|
||||
|
||||
struct Vp9FrameContainer {
|
||||
Vp9PictureInfo info{};
|
||||
std::vector<u8> bit_stream;
|
||||
};
|
||||
|
||||
struct PictureInfo {
|
||||
INSERT_PADDING_WORDS(12);
|
||||
u32 bitstream_size{};
|
||||
INSERT_PADDING_WORDS(5);
|
||||
Vp9FrameDimensions last_frame_size{};
|
||||
Vp9FrameDimensions golden_frame_size{};
|
||||
Vp9FrameDimensions alt_frame_size{};
|
||||
Vp9FrameDimensions current_frame_size{};
|
||||
u32 vp9_flags{};
|
||||
std::array<s8, 4> ref_frame_sign_bias{};
|
||||
u8 first_level{};
|
||||
u8 sharpness_level{};
|
||||
u8 base_q_index{};
|
||||
u8 y_dc_delta_q{};
|
||||
u8 uv_ac_delta_q{};
|
||||
u8 uv_dc_delta_q{};
|
||||
u8 lossless{};
|
||||
u8 tx_mode{};
|
||||
u8 allow_high_precision_mv{};
|
||||
u8 interp_filter{};
|
||||
u8 reference_mode{};
|
||||
s8 comp_fixed_ref{};
|
||||
std::array<s8, 2> comp_var_ref{};
|
||||
u8 log2_tile_cols{};
|
||||
u8 log2_tile_rows{};
|
||||
Segmentation segmentation{};
|
||||
LoopFilter loop_filter{};
|
||||
INSERT_PADDING_BYTES(5);
|
||||
u32 surface_params{};
|
||||
INSERT_PADDING_WORDS(3);
|
||||
|
||||
[[nodiscard]] Vp9PictureInfo Convert() const {
|
||||
return {
|
||||
.is_key_frame = (vp9_flags & FrameFlags::IsKeyFrame) != 0,
|
||||
.intra_only = (vp9_flags & FrameFlags::IntraOnly) != 0,
|
||||
.last_frame_was_key = (vp9_flags & FrameFlags::LastFrameIsKeyFrame) != 0,
|
||||
.frame_size_changed = (vp9_flags & FrameFlags::FrameSizeChanged) != 0,
|
||||
.error_resilient_mode = (vp9_flags & FrameFlags::ErrorResilientMode) != 0,
|
||||
.last_frame_shown = (vp9_flags & FrameFlags::LastShowFrame) != 0,
|
||||
.ref_frame_sign_bias = ref_frame_sign_bias,
|
||||
.base_q_index = base_q_index,
|
||||
.y_dc_delta_q = y_dc_delta_q,
|
||||
.uv_dc_delta_q = uv_dc_delta_q,
|
||||
.uv_ac_delta_q = uv_ac_delta_q,
|
||||
.lossless = lossless != 0,
|
||||
.transform_mode = tx_mode,
|
||||
.allow_high_precision_mv = allow_high_precision_mv != 0,
|
||||
.interp_filter = interp_filter,
|
||||
.reference_mode = reference_mode,
|
||||
.comp_fixed_ref = comp_fixed_ref,
|
||||
.comp_var_ref = comp_var_ref,
|
||||
.log2_tile_cols = log2_tile_cols,
|
||||
.log2_tile_rows = log2_tile_rows,
|
||||
.segment_enabled = segmentation.enabled != 0,
|
||||
.segment_map_update = segmentation.update_map != 0,
|
||||
.segment_map_temporal_update = segmentation.temporal_update != 0,
|
||||
.segment_abs_delta = segmentation.abs_delta,
|
||||
.segment_feature_enable = segmentation.feature_mask,
|
||||
.segment_feature_data = segmentation.feature_data,
|
||||
.mode_ref_delta_enabled = loop_filter.mode_ref_delta_enabled != 0,
|
||||
.use_prev_in_find_mv_refs = !(vp9_flags == (FrameFlags::ErrorResilientMode)) &&
|
||||
!(vp9_flags == (FrameFlags::FrameSizeChanged)) &&
|
||||
!(vp9_flags == (FrameFlags::IntraOnly)) &&
|
||||
(vp9_flags == (FrameFlags::LastShowFrame)) &&
|
||||
!(vp9_flags == (FrameFlags::LastFrameIsKeyFrame)),
|
||||
.ref_deltas = loop_filter.ref_deltas,
|
||||
.mode_deltas = loop_filter.mode_deltas,
|
||||
.frame_size = current_frame_size,
|
||||
.first_level = first_level,
|
||||
.sharpness_level = sharpness_level,
|
||||
.bitstream_size = bitstream_size,
|
||||
};
|
||||
}
|
||||
};
|
||||
static_assert(sizeof(PictureInfo) == 0x100, "PictureInfo is an invalid size");
|
||||
|
||||
struct EntropyProbs {
|
||||
INSERT_PADDING_BYTES(1024);
|
||||
std::array<u8, 28> inter_mode_prob{};
|
||||
std::array<u8, 4> intra_inter_prob{};
|
||||
INSERT_PADDING_BYTES(80);
|
||||
std::array<u8, 2> tx_8x8_prob{};
|
||||
std::array<u8, 4> tx_16x16_prob{};
|
||||
std::array<u8, 6> tx_32x32_prob{};
|
||||
std::array<u8, 4> y_mode_prob_e8{};
|
||||
std::array<std::array<u8, 8>, 4> y_mode_prob_e0e7{};
|
||||
INSERT_PADDING_BYTES(64);
|
||||
std::array<u8, 64> partition_prob{};
|
||||
INSERT_PADDING_BYTES(10);
|
||||
std::array<u8, 8> switchable_interp_prob{};
|
||||
std::array<u8, 5> comp_inter_prob{};
|
||||
std::array<u8, 3> skip_probs{};
|
||||
INSERT_PADDING_BYTES(1);
|
||||
std::array<u8, 3> joints{};
|
||||
std::array<u8, 2> sign{};
|
||||
std::array<u8, 2> class_0{};
|
||||
std::array<u8, 6> fr{};
|
||||
std::array<u8, 2> class_0_hp{};
|
||||
std::array<u8, 2> high_precision{};
|
||||
std::array<u8, 20> classes{};
|
||||
std::array<u8, 12> class_0_fr{};
|
||||
std::array<u8, 20> pred_bits{};
|
||||
std::array<u8, 10> single_ref_prob{};
|
||||
std::array<u8, 5> comp_ref_prob{};
|
||||
INSERT_PADDING_BYTES(17);
|
||||
std::array<u8, 2304> coef_probs{};
|
||||
|
||||
void Convert(Vp9EntropyProbs& fc) {
|
||||
fc.inter_mode_prob = inter_mode_prob;
|
||||
fc.intra_inter_prob = intra_inter_prob;
|
||||
fc.tx_8x8_prob = tx_8x8_prob;
|
||||
fc.tx_16x16_prob = tx_16x16_prob;
|
||||
fc.tx_32x32_prob = tx_32x32_prob;
|
||||
|
||||
for (std::size_t i = 0; i < 4; i++) {
|
||||
for (std::size_t j = 0; j < 9; j++) {
|
||||
fc.y_mode_prob[j + 9 * i] = j < 8 ? y_mode_prob_e0e7[i][j] : y_mode_prob_e8[i];
|
||||
}
|
||||
}
|
||||
|
||||
fc.partition_prob = partition_prob;
|
||||
fc.switchable_interp_prob = switchable_interp_prob;
|
||||
fc.comp_inter_prob = comp_inter_prob;
|
||||
fc.skip_probs = skip_probs;
|
||||
fc.joints = joints;
|
||||
fc.sign = sign;
|
||||
fc.class_0 = class_0;
|
||||
fc.fr = fr;
|
||||
fc.class_0_hp = class_0_hp;
|
||||
fc.high_precision = high_precision;
|
||||
fc.classes = classes;
|
||||
fc.class_0_fr = class_0_fr;
|
||||
fc.prob_bits = pred_bits;
|
||||
fc.single_ref_prob = single_ref_prob;
|
||||
fc.comp_ref_prob = comp_ref_prob;
|
||||
|
||||
// Skip the 4th element as it goes unused
|
||||
for (std::size_t i = 0; i < coef_probs.size(); i += 4) {
|
||||
const std::size_t j = i - i / 4;
|
||||
fc.coef_probs[j] = coef_probs[i];
|
||||
fc.coef_probs[j + 1] = coef_probs[i + 1];
|
||||
fc.coef_probs[j + 2] = coef_probs[i + 2];
|
||||
}
|
||||
}
|
||||
};
|
||||
static_assert(sizeof(EntropyProbs) == 0xEA0, "EntropyProbs is an invalid size");
|
||||
|
||||
enum class Ref { Last, Golden, AltRef };
|
||||
|
||||
struct RefPoolElement {
|
||||
s64 frame{};
|
||||
Ref ref{};
|
||||
bool refresh{};
|
||||
};
|
||||
|
||||
struct FrameContexts {
|
||||
s64 from{};
|
||||
bool adapted{};
|
||||
Vp9EntropyProbs probs{};
|
||||
};
|
||||
|
||||
}; // namespace Decoder
|
||||
}; // namespace Tegra
|
30
src/video_core/command_classes/host1x.cpp
Normal file
30
src/video_core/command_classes/host1x.cpp
Normal file
@ -0,0 +1,30 @@
|
||||
// Copyright 2020 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "video_core/command_classes/host1x.h"
|
||||
#include "video_core/gpu.h"
|
||||
|
||||
Tegra::Host1x::Host1x(GPU& gpu_) : gpu(gpu_) {}
|
||||
|
||||
Tegra::Host1x::~Host1x() = default;
|
||||
|
||||
void Tegra::Host1x::ProcessMethod(Method method, u32 argument) {
|
||||
switch (method) {
|
||||
case Method::LoadSyncptPayload32:
|
||||
syncpoint_value = argument;
|
||||
break;
|
||||
case Method::WaitSyncpt:
|
||||
case Method::WaitSyncpt32:
|
||||
Execute(argument);
|
||||
break;
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Host1x method 0x{:X}", static_cast<u32>(method));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void Tegra::Host1x::Execute(u32 data) {
|
||||
gpu.WaitFence(data, syncpoint_value);
|
||||
}
|
37
src/video_core/command_classes/host1x.h
Normal file
37
src/video_core/command_classes/host1x.h
Normal file
@ -0,0 +1,37 @@
|
||||
// Copyright 2020 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
#include "common/common_funcs.h"
|
||||
#include "common/common_types.h"
|
||||
|
||||
namespace Tegra {
|
||||
class GPU;
|
||||
class Nvdec;
|
||||
|
||||
class Host1x {
|
||||
public:
|
||||
enum class Method : u32 {
|
||||
WaitSyncpt = 0x8,
|
||||
LoadSyncptPayload32 = 0x4e,
|
||||
WaitSyncpt32 = 0x50,
|
||||
};
|
||||
|
||||
explicit Host1x(GPU& gpu);
|
||||
~Host1x();
|
||||
|
||||
/// Writes the method into the state, Invoke Execute() if encountered
|
||||
void ProcessMethod(Method method, u32 argument);
|
||||
|
||||
private:
|
||||
/// For Host1x, execute is waiting on a syncpoint previously written into the state
|
||||
void Execute(u32 data);
|
||||
|
||||
u32 syncpoint_value{};
|
||||
GPU& gpu;
|
||||
};
|
||||
|
||||
} // namespace Tegra
|
48
src/video_core/command_classes/nvdec.cpp
Normal file
48
src/video_core/command_classes/nvdec.cpp
Normal file
@ -0,0 +1,48 @@
|
||||
// Copyright 2020 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "video_core/command_classes/nvdec.h"
|
||||
#include "video_core/gpu.h"
|
||||
|
||||
namespace Tegra {
|
||||
|
||||
Nvdec::Nvdec(GPU& gpu_) : gpu(gpu_), codec(std::make_unique<Codec>(gpu)) {}
|
||||
|
||||
Nvdec::~Nvdec() = default;
|
||||
|
||||
void Nvdec::ProcessMethod(Method method, const std::vector<u32>& arguments) {
|
||||
if (method == Method::SetVideoCodec) {
|
||||
codec->StateWrite(static_cast<u32>(method), arguments[0]);
|
||||
} else {
|
||||
codec->StateWrite(static_cast<u32>(method), static_cast<u64>(arguments[0]) << 8);
|
||||
}
|
||||
|
||||
switch (method) {
|
||||
case Method::SetVideoCodec:
|
||||
codec->SetTargetCodec(static_cast<NvdecCommon::VideoCodec>(arguments[0]));
|
||||
break;
|
||||
case Method::Execute:
|
||||
Execute();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
AVFramePtr Nvdec::GetFrame() {
|
||||
return codec->GetCurrentFrame();
|
||||
}
|
||||
|
||||
void Nvdec::Execute() {
|
||||
switch (codec->GetCurrentCodec()) {
|
||||
case NvdecCommon::VideoCodec::H264:
|
||||
case NvdecCommon::VideoCodec::Vp9:
|
||||
codec->Decode();
|
||||
break;
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unknown codec {}", static_cast<u32>(codec->GetCurrentCodec()));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Tegra
|
38
src/video_core/command_classes/nvdec.h
Normal file
38
src/video_core/command_classes/nvdec.h
Normal file
@ -0,0 +1,38 @@
|
||||
// Copyright 2020 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/command_classes/codecs/codec.h"
|
||||
|
||||
namespace Tegra {
|
||||
class GPU;
|
||||
|
||||
class Nvdec {
|
||||
public:
|
||||
enum class Method : u32 {
|
||||
SetVideoCodec = 0x80,
|
||||
Execute = 0xc0,
|
||||
};
|
||||
|
||||
explicit Nvdec(GPU& gpu);
|
||||
~Nvdec();
|
||||
|
||||
/// Writes the method into the state, Invoke Execute() if encountered
|
||||
void ProcessMethod(Method method, const std::vector<u32>& arguments);
|
||||
|
||||
/// Return most recently decoded frame
|
||||
[[nodiscard]] AVFramePtr GetFrame();
|
||||
|
||||
private:
|
||||
/// Invoke codec to decode a frame
|
||||
void Execute();
|
||||
|
||||
GPU& gpu;
|
||||
std::unique_ptr<Codec> codec;
|
||||
};
|
||||
} // namespace Tegra
|
48
src/video_core/command_classes/nvdec_common.h
Normal file
48
src/video_core/command_classes/nvdec_common.h
Normal file
@ -0,0 +1,48 @@
|
||||
// Copyright 2020 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common/common_funcs.h"
|
||||
#include "common/common_types.h"
|
||||
|
||||
namespace Tegra::NvdecCommon {
|
||||
|
||||
struct NvdecRegisters {
|
||||
INSERT_PADDING_WORDS(256);
|
||||
u64 set_codec_id{};
|
||||
INSERT_PADDING_WORDS(254);
|
||||
u64 set_platform_id{};
|
||||
u64 picture_info_offset{};
|
||||
u64 frame_bitstream_offset{};
|
||||
u64 frame_number{};
|
||||
u64 h264_slice_data_offsets{};
|
||||
u64 h264_mv_dump_offset{};
|
||||
INSERT_PADDING_WORDS(6);
|
||||
u64 frame_stats_offset{};
|
||||
u64 h264_last_surface_luma_offset{};
|
||||
u64 h264_last_surface_chroma_offset{};
|
||||
std::array<u64, 17> surface_luma_offset{};
|
||||
std::array<u64, 17> surface_chroma_offset{};
|
||||
INSERT_PADDING_WORDS(132);
|
||||
u64 vp9_entropy_probs_offset{};
|
||||
u64 vp9_backward_updates_offset{};
|
||||
u64 vp9_last_frame_segmap_offset{};
|
||||
u64 vp9_curr_frame_segmap_offset{};
|
||||
INSERT_PADDING_WORDS(2);
|
||||
u64 vp9_last_frame_mvs_offset{};
|
||||
u64 vp9_curr_frame_mvs_offset{};
|
||||
INSERT_PADDING_WORDS(2);
|
||||
};
|
||||
static_assert(sizeof(NvdecRegisters) == (0xBC0), "NvdecRegisters is incorrect size");
|
||||
|
||||
enum class VideoCodec : u32 {
|
||||
None = 0x0,
|
||||
H264 = 0x3,
|
||||
Vp8 = 0x5,
|
||||
H265 = 0x7,
|
||||
Vp9 = 0x9,
|
||||
};
|
||||
|
||||
} // namespace Tegra::NvdecCommon
|
60
src/video_core/command_classes/sync_manager.cpp
Normal file
60
src/video_core/command_classes/sync_manager.cpp
Normal file
@ -0,0 +1,60 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) Ryujinx Team and Contributors
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
|
||||
// associated documentation files (the "Software"), to deal in the Software without restriction,
|
||||
// including without limitation the rights to use, copy, modify, merge, publish, distribute,
|
||||
// sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all copies or
|
||||
// substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
|
||||
// NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
|
||||
// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
//
|
||||
|
||||
#include <algorithm>
|
||||
#include "sync_manager.h"
|
||||
#include "video_core/gpu.h"
|
||||
|
||||
namespace Tegra {
|
||||
SyncptIncrManager::SyncptIncrManager(GPU& gpu_) : gpu(gpu_) {}
|
||||
SyncptIncrManager::~SyncptIncrManager() = default;
|
||||
|
||||
void SyncptIncrManager::Increment(u32 id) {
|
||||
increments.emplace_back(0, 0, id, true);
|
||||
IncrementAllDone();
|
||||
}
|
||||
|
||||
u32 SyncptIncrManager::IncrementWhenDone(u32 class_id, u32 id) {
|
||||
const u32 handle = current_id++;
|
||||
increments.emplace_back(handle, class_id, id);
|
||||
return handle;
|
||||
}
|
||||
|
||||
void SyncptIncrManager::SignalDone(u32 handle) {
|
||||
const auto done_incr =
|
||||
std::find_if(increments.begin(), increments.end(),
|
||||
[handle](const SyncptIncr& incr) { return incr.id == handle; });
|
||||
if (done_incr != increments.cend()) {
|
||||
done_incr->complete = true;
|
||||
}
|
||||
IncrementAllDone();
|
||||
}
|
||||
|
||||
void SyncptIncrManager::IncrementAllDone() {
|
||||
std::size_t done_count = 0;
|
||||
for (; done_count < increments.size(); ++done_count) {
|
||||
if (!increments[done_count].complete) {
|
||||
break;
|
||||
}
|
||||
gpu.IncrementSyncPoint(increments[done_count].syncpt_id);
|
||||
}
|
||||
increments.erase(increments.begin(), increments.begin() + done_count);
|
||||
}
|
||||
} // namespace Tegra
|
64
src/video_core/command_classes/sync_manager.h
Normal file
64
src/video_core/command_classes/sync_manager.h
Normal file
@ -0,0 +1,64 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) Ryujinx Team and Contributors
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
|
||||
// associated documentation files (the "Software"), to deal in the Software without restriction,
|
||||
// including without limitation the rights to use, copy, modify, merge, publish, distribute,
|
||||
// sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all copies or
|
||||
// substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
|
||||
// NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
|
||||
// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <mutex>
|
||||
#include <vector>
|
||||
#include "common/common_types.h"
|
||||
|
||||
namespace Tegra {
|
||||
class GPU;
|
||||
struct SyncptIncr {
|
||||
u32 id;
|
||||
u32 class_id;
|
||||
u32 syncpt_id;
|
||||
bool complete;
|
||||
|
||||
SyncptIncr(u32 id_, u32 class_id_, u32 syncpt_id_, bool done = false)
|
||||
: id(id_), class_id(class_id_), syncpt_id(syncpt_id_), complete(done) {}
|
||||
};
|
||||
|
||||
class SyncptIncrManager {
|
||||
public:
|
||||
explicit SyncptIncrManager(GPU& gpu);
|
||||
~SyncptIncrManager();
|
||||
|
||||
/// Add syncpoint id and increment all
|
||||
void Increment(u32 id);
|
||||
|
||||
/// Returns a handle to increment later
|
||||
u32 IncrementWhenDone(u32 class_id, u32 id);
|
||||
|
||||
/// IncrememntAllDone, including handle
|
||||
void SignalDone(u32 handle);
|
||||
|
||||
/// Increment all sequential pending increments that are already done.
|
||||
void IncrementAllDone();
|
||||
|
||||
private:
|
||||
std::vector<SyncptIncr> increments;
|
||||
std::mutex increment_lock;
|
||||
u32 current_id{};
|
||||
|
||||
GPU& gpu;
|
||||
};
|
||||
|
||||
} // namespace Tegra
|
175
src/video_core/command_classes/vic.cpp
Normal file
175
src/video_core/command_classes/vic.cpp
Normal file
@ -0,0 +1,175 @@
|
||||
// Copyright 2020 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <array>
|
||||
#include "common/assert.h"
|
||||
#include "video_core/command_classes/nvdec.h"
|
||||
#include "video_core/command_classes/vic.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/gpu.h"
|
||||
#include "video_core/memory_manager.h"
|
||||
#include "video_core/textures/decoders.h"
|
||||
|
||||
extern "C" {
|
||||
#include <libswscale/swscale.h>
|
||||
}
|
||||
|
||||
namespace Tegra {
|
||||
|
||||
Vic::Vic(GPU& gpu_, std::shared_ptr<Nvdec> nvdec_processor_)
|
||||
: gpu(gpu_), nvdec_processor(std::move(nvdec_processor_)) {}
|
||||
Vic::~Vic() = default;
|
||||
|
||||
void Vic::VicStateWrite(u32 offset, u32 arguments) {
|
||||
u8* const state_offset = reinterpret_cast<u8*>(&vic_state) + offset * sizeof(u32);
|
||||
std::memcpy(state_offset, &arguments, sizeof(u32));
|
||||
}
|
||||
|
||||
void Vic::ProcessMethod(Method method, const std::vector<u32>& arguments) {
|
||||
LOG_DEBUG(HW_GPU, "Vic method 0x{:X}", method);
|
||||
VicStateWrite(static_cast<u32>(method), arguments[0]);
|
||||
const u64 arg = static_cast<u64>(arguments[0]) << 8;
|
||||
switch (method) {
|
||||
case Method::Execute:
|
||||
Execute();
|
||||
break;
|
||||
case Method::SetConfigStructOffset:
|
||||
config_struct_address = arg;
|
||||
break;
|
||||
case Method::SetOutputSurfaceLumaOffset:
|
||||
output_surface_luma_address = arg;
|
||||
break;
|
||||
case Method::SetOutputSurfaceChromaUOffset:
|
||||
output_surface_chroma_u_address = arg;
|
||||
break;
|
||||
case Method::SetOutputSurfaceChromaVOffset:
|
||||
output_surface_chroma_v_address = arg;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void Vic::Execute() {
|
||||
if (output_surface_luma_address == 0) {
|
||||
LOG_ERROR(Service_NVDRV, "VIC Luma address not set. Received 0x{:X}",
|
||||
vic_state.output_surface.luma_offset);
|
||||
return;
|
||||
}
|
||||
const VicConfig config{gpu.MemoryManager().Read<u64>(config_struct_address + 0x20)};
|
||||
const AVFramePtr frame_ptr = nvdec_processor->GetFrame();
|
||||
const auto* frame = frame_ptr.get();
|
||||
if (!frame || frame->width == 0 || frame->height == 0) {
|
||||
return;
|
||||
}
|
||||
const VideoPixelFormat pixel_format =
|
||||
static_cast<VideoPixelFormat>(config.pixel_format.Value());
|
||||
switch (pixel_format) {
|
||||
case VideoPixelFormat::BGRA8:
|
||||
case VideoPixelFormat::RGBA8: {
|
||||
LOG_TRACE(Service_NVDRV, "Writing RGB Frame");
|
||||
|
||||
if (scaler_ctx == nullptr || frame->width != scaler_width ||
|
||||
frame->height != scaler_height) {
|
||||
const AVPixelFormat target_format =
|
||||
(pixel_format == VideoPixelFormat::RGBA8) ? AV_PIX_FMT_RGBA : AV_PIX_FMT_BGRA;
|
||||
|
||||
sws_freeContext(scaler_ctx);
|
||||
scaler_ctx = nullptr;
|
||||
|
||||
// FFmpeg returns all frames in YUV420, convert it into expected format
|
||||
scaler_ctx =
|
||||
sws_getContext(frame->width, frame->height, AV_PIX_FMT_YUV420P, frame->width,
|
||||
frame->height, target_format, 0, nullptr, nullptr, nullptr);
|
||||
|
||||
scaler_width = frame->width;
|
||||
scaler_height = frame->height;
|
||||
}
|
||||
// Get Converted frame
|
||||
const std::size_t linear_size = frame->width * frame->height * 4;
|
||||
|
||||
using AVMallocPtr = std::unique_ptr<u8, decltype(&av_free)>;
|
||||
AVMallocPtr converted_frame_buffer{static_cast<u8*>(av_malloc(linear_size)), av_free};
|
||||
|
||||
const int converted_stride{frame->width * 4};
|
||||
u8* const converted_frame_buf_addr{converted_frame_buffer.get()};
|
||||
|
||||
sws_scale(scaler_ctx, frame->data, frame->linesize, 0, frame->height,
|
||||
&converted_frame_buf_addr, &converted_stride);
|
||||
|
||||
const u32 blk_kind = static_cast<u32>(config.block_linear_kind);
|
||||
if (blk_kind != 0) {
|
||||
// swizzle pitch linear to block linear
|
||||
const u32 block_height = static_cast<u32>(config.block_linear_height_log2);
|
||||
const auto size = Tegra::Texture::CalculateSize(true, 4, frame->width, frame->height, 1,
|
||||
block_height, 0);
|
||||
std::vector<u8> swizzled_data(size);
|
||||
Tegra::Texture::SwizzleSubrect(frame->width, frame->height, frame->width * 4,
|
||||
frame->width, 4, swizzled_data.data(),
|
||||
converted_frame_buffer.get(), block_height, 0, 0);
|
||||
|
||||
gpu.MemoryManager().WriteBlock(output_surface_luma_address, swizzled_data.data(), size);
|
||||
gpu.Maxwell3D().OnMemoryWrite();
|
||||
} else {
|
||||
// send pitch linear frame
|
||||
gpu.MemoryManager().WriteBlock(output_surface_luma_address, converted_frame_buf_addr,
|
||||
linear_size);
|
||||
gpu.Maxwell3D().OnMemoryWrite();
|
||||
}
|
||||
break;
|
||||
}
|
||||
case VideoPixelFormat::Yuv420: {
|
||||
LOG_TRACE(Service_NVDRV, "Writing YUV420 Frame");
|
||||
|
||||
const std::size_t surface_width = config.surface_width_minus1 + 1;
|
||||
const std::size_t surface_height = config.surface_height_minus1 + 1;
|
||||
const std::size_t half_width = surface_width / 2;
|
||||
const std::size_t half_height = config.surface_height_minus1 / 2;
|
||||
const std::size_t aligned_width = (surface_width + 0xff) & ~0xff;
|
||||
|
||||
const auto* luma_ptr = frame->data[0];
|
||||
const auto* chroma_b_ptr = frame->data[1];
|
||||
const auto* chroma_r_ptr = frame->data[2];
|
||||
const auto stride = frame->linesize[0];
|
||||
const auto half_stride = frame->linesize[1];
|
||||
|
||||
std::vector<u8> luma_buffer(aligned_width * surface_height);
|
||||
std::vector<u8> chroma_buffer(aligned_width * half_height);
|
||||
|
||||
// Populate luma buffer
|
||||
for (std::size_t y = 0; y < surface_height - 1; ++y) {
|
||||
std::size_t src = y * stride;
|
||||
std::size_t dst = y * aligned_width;
|
||||
|
||||
std::size_t size = surface_width;
|
||||
|
||||
for (std::size_t offset = 0; offset < size; ++offset) {
|
||||
luma_buffer[dst + offset] = luma_ptr[src + offset];
|
||||
}
|
||||
}
|
||||
gpu.MemoryManager().WriteBlock(output_surface_luma_address, luma_buffer.data(),
|
||||
luma_buffer.size());
|
||||
|
||||
// Populate chroma buffer from both channels with interleaving.
|
||||
for (std::size_t y = 0; y < half_height; ++y) {
|
||||
std::size_t src = y * half_stride;
|
||||
std::size_t dst = y * aligned_width;
|
||||
|
||||
for (std::size_t x = 0; x < half_width; ++x) {
|
||||
chroma_buffer[dst + x * 2] = chroma_b_ptr[src + x];
|
||||
chroma_buffer[dst + x * 2 + 1] = chroma_r_ptr[src + x];
|
||||
}
|
||||
}
|
||||
gpu.MemoryManager().WriteBlock(output_surface_chroma_u_address, chroma_buffer.data(),
|
||||
chroma_buffer.size());
|
||||
gpu.Maxwell3D().OnMemoryWrite();
|
||||
break;
|
||||
}
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unknown video pixel format {}", config.pixel_format.Value());
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Tegra
|
110
src/video_core/command_classes/vic.h
Normal file
110
src/video_core/command_classes/vic.h
Normal file
@ -0,0 +1,110 @@
|
||||
// Copyright 2020 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_types.h"
|
||||
|
||||
struct SwsContext;
|
||||
|
||||
namespace Tegra {
|
||||
class GPU;
|
||||
class Nvdec;
|
||||
|
||||
struct PlaneOffsets {
|
||||
u32 luma_offset{};
|
||||
u32 chroma_u_offset{};
|
||||
u32 chroma_v_offset{};
|
||||
};
|
||||
|
||||
struct VicRegisters {
|
||||
INSERT_PADDING_WORDS(64);
|
||||
u32 nop{};
|
||||
INSERT_PADDING_WORDS(15);
|
||||
u32 pm_trigger{};
|
||||
INSERT_PADDING_WORDS(47);
|
||||
u32 set_application_id{};
|
||||
u32 set_watchdog_timer{};
|
||||
INSERT_PADDING_WORDS(17);
|
||||
u32 context_save_area{};
|
||||
u32 context_switch{};
|
||||
INSERT_PADDING_WORDS(43);
|
||||
u32 execute{};
|
||||
INSERT_PADDING_WORDS(63);
|
||||
std::array<std::array<PlaneOffsets, 8>, 8> surfacex_slots{};
|
||||
u32 picture_index{};
|
||||
u32 control_params{};
|
||||
u32 config_struct_offset{};
|
||||
u32 filter_struct_offset{};
|
||||
u32 palette_offset{};
|
||||
u32 hist_offset{};
|
||||
u32 context_id{};
|
||||
u32 fce_ucode_size{};
|
||||
PlaneOffsets output_surface{};
|
||||
u32 fce_ucode_offset{};
|
||||
INSERT_PADDING_WORDS(4);
|
||||
std::array<u32, 8> slot_context_id{};
|
||||
INSERT_PADDING_WORDS(16);
|
||||
};
|
||||
static_assert(sizeof(VicRegisters) == 0x7A0, "VicRegisters is an invalid size");
|
||||
|
||||
class Vic {
|
||||
public:
|
||||
enum class Method : u32 {
|
||||
Execute = 0xc0,
|
||||
SetControlParams = 0x1c1,
|
||||
SetConfigStructOffset = 0x1c2,
|
||||
SetOutputSurfaceLumaOffset = 0x1c8,
|
||||
SetOutputSurfaceChromaUOffset = 0x1c9,
|
||||
SetOutputSurfaceChromaVOffset = 0x1ca
|
||||
};
|
||||
|
||||
explicit Vic(GPU& gpu, std::shared_ptr<Nvdec> nvdec_processor);
|
||||
~Vic();
|
||||
|
||||
/// Write to the device state.
|
||||
void ProcessMethod(Method method, const std::vector<u32>& arguments);
|
||||
|
||||
private:
|
||||
void Execute();
|
||||
|
||||
void VicStateWrite(u32 offset, u32 arguments);
|
||||
VicRegisters vic_state{};
|
||||
|
||||
enum class VideoPixelFormat : u64_le {
|
||||
RGBA8 = 0x1f,
|
||||
BGRA8 = 0x20,
|
||||
Yuv420 = 0x44,
|
||||
};
|
||||
|
||||
union VicConfig {
|
||||
u64_le raw{};
|
||||
BitField<0, 7, u64_le> pixel_format;
|
||||
BitField<7, 2, u64_le> chroma_loc_horiz;
|
||||
BitField<9, 2, u64_le> chroma_loc_vert;
|
||||
BitField<11, 4, u64_le> block_linear_kind;
|
||||
BitField<15, 4, u64_le> block_linear_height_log2;
|
||||
BitField<19, 3, u64_le> reserved0;
|
||||
BitField<22, 10, u64_le> reserved1;
|
||||
BitField<32, 14, u64_le> surface_width_minus1;
|
||||
BitField<46, 14, u64_le> surface_height_minus1;
|
||||
};
|
||||
|
||||
GPU& gpu;
|
||||
std::shared_ptr<Tegra::Nvdec> nvdec_processor;
|
||||
|
||||
GPUVAddr config_struct_address{};
|
||||
GPUVAddr output_surface_luma_address{};
|
||||
GPUVAddr output_surface_chroma_u_address{};
|
||||
GPUVAddr output_surface_chroma_v_address{};
|
||||
|
||||
SwsContext* scaler_ctx{};
|
||||
s32 scaler_width{};
|
||||
s32 scaler_height{};
|
||||
};
|
||||
|
||||
} // namespace Tegra
|
@ -3,33 +3,33 @@
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <array>
|
||||
#include <bitset>
|
||||
#include <cstddef>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/compatible_formats.h"
|
||||
#include "video_core/surface.h"
|
||||
|
||||
namespace VideoCore::Surface {
|
||||
|
||||
namespace {
|
||||
using Table = std::array<std::array<u64, 2>, MaxPixelFormat>;
|
||||
|
||||
// Compatibility table taken from Table 3.X.2 in:
|
||||
// https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_texture_view.txt
|
||||
|
||||
constexpr std::array VIEW_CLASS_128_BITS = {
|
||||
constexpr std::array VIEW_CLASS_128_BITS{
|
||||
PixelFormat::R32G32B32A32_FLOAT,
|
||||
PixelFormat::R32G32B32A32_UINT,
|
||||
PixelFormat::R32G32B32A32_SINT,
|
||||
};
|
||||
|
||||
constexpr std::array VIEW_CLASS_96_BITS = {
|
||||
constexpr std::array VIEW_CLASS_96_BITS{
|
||||
PixelFormat::R32G32B32_FLOAT,
|
||||
};
|
||||
// Missing formats:
|
||||
// PixelFormat::RGB32UI,
|
||||
// PixelFormat::RGB32I,
|
||||
|
||||
constexpr std::array VIEW_CLASS_64_BITS = {
|
||||
constexpr std::array VIEW_CLASS_64_BITS{
|
||||
PixelFormat::R32G32_FLOAT, PixelFormat::R32G32_UINT,
|
||||
PixelFormat::R32G32_SINT, PixelFormat::R16G16B16A16_FLOAT,
|
||||
PixelFormat::R16G16B16A16_UNORM, PixelFormat::R16G16B16A16_SNORM,
|
||||
@ -38,7 +38,7 @@ constexpr std::array VIEW_CLASS_64_BITS = {
|
||||
|
||||
// TODO: How should we handle 48 bits?
|
||||
|
||||
constexpr std::array VIEW_CLASS_32_BITS = {
|
||||
constexpr std::array VIEW_CLASS_32_BITS{
|
||||
PixelFormat::R16G16_FLOAT, PixelFormat::B10G11R11_FLOAT, PixelFormat::R32_FLOAT,
|
||||
PixelFormat::A2B10G10R10_UNORM, PixelFormat::R16G16_UINT, PixelFormat::R32_UINT,
|
||||
PixelFormat::R16G16_SINT, PixelFormat::R32_SINT, PixelFormat::A8B8G8R8_UNORM,
|
||||
@ -50,43 +50,105 @@ constexpr std::array VIEW_CLASS_32_BITS = {
|
||||
|
||||
// TODO: How should we handle 24 bits?
|
||||
|
||||
constexpr std::array VIEW_CLASS_16_BITS = {
|
||||
constexpr std::array VIEW_CLASS_16_BITS{
|
||||
PixelFormat::R16_FLOAT, PixelFormat::R8G8_UINT, PixelFormat::R16_UINT,
|
||||
PixelFormat::R16_SINT, PixelFormat::R8G8_UNORM, PixelFormat::R16_UNORM,
|
||||
PixelFormat::R8G8_SNORM, PixelFormat::R16_SNORM, PixelFormat::R8G8_SINT,
|
||||
};
|
||||
|
||||
constexpr std::array VIEW_CLASS_8_BITS = {
|
||||
constexpr std::array VIEW_CLASS_8_BITS{
|
||||
PixelFormat::R8_UINT,
|
||||
PixelFormat::R8_UNORM,
|
||||
PixelFormat::R8_SINT,
|
||||
PixelFormat::R8_SNORM,
|
||||
};
|
||||
|
||||
constexpr std::array VIEW_CLASS_RGTC1_RED = {
|
||||
constexpr std::array VIEW_CLASS_RGTC1_RED{
|
||||
PixelFormat::BC4_UNORM,
|
||||
PixelFormat::BC4_SNORM,
|
||||
};
|
||||
|
||||
constexpr std::array VIEW_CLASS_RGTC2_RG = {
|
||||
constexpr std::array VIEW_CLASS_RGTC2_RG{
|
||||
PixelFormat::BC5_UNORM,
|
||||
PixelFormat::BC5_SNORM,
|
||||
};
|
||||
|
||||
constexpr std::array VIEW_CLASS_BPTC_UNORM = {
|
||||
constexpr std::array VIEW_CLASS_BPTC_UNORM{
|
||||
PixelFormat::BC7_UNORM,
|
||||
PixelFormat::BC7_SRGB,
|
||||
};
|
||||
|
||||
constexpr std::array VIEW_CLASS_BPTC_FLOAT = {
|
||||
constexpr std::array VIEW_CLASS_BPTC_FLOAT{
|
||||
PixelFormat::BC6H_SFLOAT,
|
||||
PixelFormat::BC6H_UFLOAT,
|
||||
};
|
||||
|
||||
constexpr std::array VIEW_CLASS_ASTC_4x4_RGBA{
|
||||
PixelFormat::ASTC_2D_4X4_UNORM,
|
||||
PixelFormat::ASTC_2D_4X4_SRGB,
|
||||
};
|
||||
|
||||
constexpr std::array VIEW_CLASS_ASTC_5x4_RGBA{
|
||||
PixelFormat::ASTC_2D_5X4_UNORM,
|
||||
PixelFormat::ASTC_2D_5X4_SRGB,
|
||||
};
|
||||
|
||||
constexpr std::array VIEW_CLASS_ASTC_5x5_RGBA{
|
||||
PixelFormat::ASTC_2D_5X5_UNORM,
|
||||
PixelFormat::ASTC_2D_5X5_SRGB,
|
||||
};
|
||||
|
||||
constexpr std::array VIEW_CLASS_ASTC_6x5_RGBA{
|
||||
PixelFormat::ASTC_2D_6X5_UNORM,
|
||||
PixelFormat::ASTC_2D_6X5_SRGB,
|
||||
};
|
||||
|
||||
constexpr std::array VIEW_CLASS_ASTC_6x6_RGBA{
|
||||
PixelFormat::ASTC_2D_6X6_UNORM,
|
||||
PixelFormat::ASTC_2D_6X6_SRGB,
|
||||
};
|
||||
|
||||
constexpr std::array VIEW_CLASS_ASTC_8x5_RGBA{
|
||||
PixelFormat::ASTC_2D_8X5_UNORM,
|
||||
PixelFormat::ASTC_2D_8X5_SRGB,
|
||||
};
|
||||
|
||||
constexpr std::array VIEW_CLASS_ASTC_8x8_RGBA{
|
||||
PixelFormat::ASTC_2D_8X8_UNORM,
|
||||
PixelFormat::ASTC_2D_8X8_SRGB,
|
||||
};
|
||||
|
||||
// Missing formats:
|
||||
// PixelFormat::ASTC_2D_10X5_UNORM
|
||||
// PixelFormat::ASTC_2D_10X5_SRGB
|
||||
|
||||
// Missing formats:
|
||||
// PixelFormat::ASTC_2D_10X6_UNORM
|
||||
// PixelFormat::ASTC_2D_10X6_SRGB
|
||||
|
||||
constexpr std::array VIEW_CLASS_ASTC_10x8_RGBA{
|
||||
PixelFormat::ASTC_2D_10X8_UNORM,
|
||||
PixelFormat::ASTC_2D_10X8_SRGB,
|
||||
};
|
||||
|
||||
constexpr std::array VIEW_CLASS_ASTC_10x10_RGBA{
|
||||
PixelFormat::ASTC_2D_10X10_UNORM,
|
||||
PixelFormat::ASTC_2D_10X10_SRGB,
|
||||
};
|
||||
|
||||
// Missing formats
|
||||
// ASTC_2D_12X10_UNORM,
|
||||
// ASTC_2D_12X10_SRGB,
|
||||
|
||||
constexpr std::array VIEW_CLASS_ASTC_12x12_RGBA{
|
||||
PixelFormat::ASTC_2D_12X12_UNORM,
|
||||
PixelFormat::ASTC_2D_12X12_SRGB,
|
||||
};
|
||||
|
||||
// Compatibility table taken from Table 4.X.1 in:
|
||||
// https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_copy_image.txt
|
||||
|
||||
constexpr std::array COPY_CLASS_128_BITS = {
|
||||
constexpr std::array COPY_CLASS_128_BITS{
|
||||
PixelFormat::R32G32B32A32_UINT, PixelFormat::R32G32B32A32_FLOAT, PixelFormat::R32G32B32A32_SINT,
|
||||
PixelFormat::BC2_UNORM, PixelFormat::BC2_SRGB, PixelFormat::BC3_UNORM,
|
||||
PixelFormat::BC3_SRGB, PixelFormat::BC5_UNORM, PixelFormat::BC5_SNORM,
|
||||
@ -97,7 +159,7 @@ constexpr std::array COPY_CLASS_128_BITS = {
|
||||
// PixelFormat::RGBA32I
|
||||
// COMPRESSED_RG_RGTC2
|
||||
|
||||
constexpr std::array COPY_CLASS_64_BITS = {
|
||||
constexpr std::array COPY_CLASS_64_BITS{
|
||||
PixelFormat::R16G16B16A16_FLOAT, PixelFormat::R16G16B16A16_UINT,
|
||||
PixelFormat::R16G16B16A16_UNORM, PixelFormat::R16G16B16A16_SNORM,
|
||||
PixelFormat::R16G16B16A16_SINT, PixelFormat::R32G32_UINT,
|
||||
@ -110,32 +172,36 @@ constexpr std::array COPY_CLASS_64_BITS = {
|
||||
// COMPRESSED_RGBA_S3TC_DXT1_EXT
|
||||
// COMPRESSED_SIGNED_RED_RGTC1
|
||||
|
||||
void Enable(FormatCompatibility::Table& compatiblity, size_t format_a, size_t format_b) {
|
||||
compatiblity[format_a][format_b] = true;
|
||||
compatiblity[format_b][format_a] = true;
|
||||
constexpr void Enable(Table& table, size_t format_a, size_t format_b) {
|
||||
table[format_a][format_b / 64] |= u64(1) << (format_b % 64);
|
||||
table[format_b][format_a / 64] |= u64(1) << (format_a % 64);
|
||||
}
|
||||
|
||||
void Enable(FormatCompatibility::Table& compatibility, PixelFormat format_a, PixelFormat format_b) {
|
||||
Enable(compatibility, static_cast<size_t>(format_a), static_cast<size_t>(format_b));
|
||||
constexpr void Enable(Table& table, PixelFormat format_a, PixelFormat format_b) {
|
||||
Enable(table, static_cast<size_t>(format_a), static_cast<size_t>(format_b));
|
||||
}
|
||||
|
||||
template <typename Range>
|
||||
void EnableRange(FormatCompatibility::Table& compatibility, const Range& range) {
|
||||
constexpr void EnableRange(Table& table, const Range& range) {
|
||||
for (auto it_a = range.begin(); it_a != range.end(); ++it_a) {
|
||||
for (auto it_b = it_a; it_b != range.end(); ++it_b) {
|
||||
Enable(compatibility, *it_a, *it_b);
|
||||
Enable(table, *it_a, *it_b);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // Anonymous namespace
|
||||
constexpr bool IsSupported(const Table& table, PixelFormat format_a, PixelFormat format_b) {
|
||||
const size_t a = static_cast<size_t>(format_a);
|
||||
const size_t b = static_cast<size_t>(format_b);
|
||||
return ((table[a][b / 64] >> (b % 64)) & 1) != 0;
|
||||
}
|
||||
|
||||
FormatCompatibility::FormatCompatibility() {
|
||||
constexpr Table MakeViewTable() {
|
||||
Table view{};
|
||||
for (size_t i = 0; i < MaxPixelFormat; ++i) {
|
||||
// Identity is allowed
|
||||
Enable(view, i, i);
|
||||
}
|
||||
|
||||
EnableRange(view, VIEW_CLASS_128_BITS);
|
||||
EnableRange(view, VIEW_CLASS_96_BITS);
|
||||
EnableRange(view, VIEW_CLASS_64_BITS);
|
||||
@ -146,10 +212,39 @@ FormatCompatibility::FormatCompatibility() {
|
||||
EnableRange(view, VIEW_CLASS_RGTC2_RG);
|
||||
EnableRange(view, VIEW_CLASS_BPTC_UNORM);
|
||||
EnableRange(view, VIEW_CLASS_BPTC_FLOAT);
|
||||
EnableRange(view, VIEW_CLASS_ASTC_4x4_RGBA);
|
||||
EnableRange(view, VIEW_CLASS_ASTC_5x4_RGBA);
|
||||
EnableRange(view, VIEW_CLASS_ASTC_5x5_RGBA);
|
||||
EnableRange(view, VIEW_CLASS_ASTC_6x5_RGBA);
|
||||
EnableRange(view, VIEW_CLASS_ASTC_6x6_RGBA);
|
||||
EnableRange(view, VIEW_CLASS_ASTC_8x5_RGBA);
|
||||
EnableRange(view, VIEW_CLASS_ASTC_8x8_RGBA);
|
||||
EnableRange(view, VIEW_CLASS_ASTC_10x8_RGBA);
|
||||
EnableRange(view, VIEW_CLASS_ASTC_10x10_RGBA);
|
||||
EnableRange(view, VIEW_CLASS_ASTC_12x12_RGBA);
|
||||
return view;
|
||||
}
|
||||
|
||||
copy = view;
|
||||
constexpr Table MakeCopyTable() {
|
||||
Table copy = MakeViewTable();
|
||||
EnableRange(copy, COPY_CLASS_128_BITS);
|
||||
EnableRange(copy, COPY_CLASS_64_BITS);
|
||||
return copy;
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
bool IsViewCompatible(PixelFormat format_a, PixelFormat format_b, bool broken_views) {
|
||||
if (broken_views) {
|
||||
// If format views are broken, only accept formats that are identical.
|
||||
return format_a == format_b;
|
||||
}
|
||||
static constexpr Table TABLE = MakeViewTable();
|
||||
return IsSupported(TABLE, format_a, format_b);
|
||||
}
|
||||
|
||||
bool IsCopyCompatible(PixelFormat format_a, PixelFormat format_b) {
|
||||
static constexpr Table TABLE = MakeCopyTable();
|
||||
return IsSupported(TABLE, format_a, format_b);
|
||||
}
|
||||
|
||||
} // namespace VideoCore::Surface
|
||||
|
@ -4,31 +4,12 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <bitset>
|
||||
#include <cstddef>
|
||||
|
||||
#include "video_core/surface.h"
|
||||
|
||||
namespace VideoCore::Surface {
|
||||
|
||||
class FormatCompatibility {
|
||||
public:
|
||||
using Table = std::array<std::bitset<MaxPixelFormat>, MaxPixelFormat>;
|
||||
bool IsViewCompatible(PixelFormat format_a, PixelFormat format_b, bool broken_views);
|
||||
|
||||
explicit FormatCompatibility();
|
||||
|
||||
bool TestView(PixelFormat format_a, PixelFormat format_b) const noexcept {
|
||||
return view[static_cast<size_t>(format_a)][static_cast<size_t>(format_b)];
|
||||
}
|
||||
|
||||
bool TestCopy(PixelFormat format_a, PixelFormat format_b) const noexcept {
|
||||
return copy[static_cast<size_t>(format_a)][static_cast<size_t>(format_b)];
|
||||
}
|
||||
|
||||
private:
|
||||
Table view;
|
||||
Table copy;
|
||||
};
|
||||
bool IsCopyCompatible(PixelFormat format_a, PixelFormat format_b);
|
||||
|
||||
} // namespace VideoCore::Surface
|
||||
|
32
src/video_core/delayed_destruction_ring.h
Normal file
32
src/video_core/delayed_destruction_ring.h
Normal file
@ -0,0 +1,32 @@
|
||||
// Copyright 2020 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <cstddef>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
namespace VideoCommon {
|
||||
|
||||
/// Container to push objects to be destroyed a few ticks in the future
|
||||
template <typename T, size_t TICKS_TO_DESTROY>
|
||||
class DelayedDestructionRing {
|
||||
public:
|
||||
void Tick() {
|
||||
index = (index + 1) % TICKS_TO_DESTROY;
|
||||
elements[index].clear();
|
||||
}
|
||||
|
||||
void Push(T&& object) {
|
||||
elements[index].push_back(std::move(object));
|
||||
}
|
||||
|
||||
private:
|
||||
size_t index = 0;
|
||||
std::array<std::vector<T>, TICKS_TO_DESTROY> elements;
|
||||
};
|
||||
|
||||
} // namespace VideoCommon
|
@ -9,13 +9,16 @@
|
||||
#include "video_core/dirty_flags.h"
|
||||
|
||||
#define OFF(field_name) MAXWELL3D_REG_INDEX(field_name)
|
||||
#define NUM(field_name) (sizeof(::Tegra::Engines::Maxwell3D::Regs::field_name) / sizeof(u32))
|
||||
#define NUM(field_name) (sizeof(::Tegra::Engines::Maxwell3D::Regs::field_name) / (sizeof(u32)))
|
||||
|
||||
namespace VideoCommon::Dirty {
|
||||
|
||||
using Tegra::Engines::Maxwell3D;
|
||||
|
||||
void SetupDirtyRenderTargets(Tegra::Engines::Maxwell3D::DirtyState::Tables& tables) {
|
||||
FillBlock(tables[0], OFF(tic), NUM(tic), Descriptors);
|
||||
FillBlock(tables[0], OFF(tsc), NUM(tsc), Descriptors);
|
||||
|
||||
static constexpr std::size_t num_per_rt = NUM(rt[0]);
|
||||
static constexpr std::size_t begin = OFF(rt);
|
||||
static constexpr std::size_t num = num_per_rt * Maxwell3D::Regs::NumRenderTargets;
|
||||
@ -23,6 +26,10 @@ void SetupDirtyRenderTargets(Tegra::Engines::Maxwell3D::DirtyState::Tables& tabl
|
||||
FillBlock(tables[0], begin + rt * num_per_rt, num_per_rt, ColorBuffer0 + rt);
|
||||
}
|
||||
FillBlock(tables[1], begin, num, RenderTargets);
|
||||
FillBlock(tables[0], OFF(render_area), NUM(render_area), RenderTargets);
|
||||
|
||||
tables[0][OFF(rt_control)] = RenderTargets;
|
||||
tables[1][OFF(rt_control)] = RenderTargetControl;
|
||||
|
||||
static constexpr std::array zeta_flags{ZetaBuffer, RenderTargets};
|
||||
for (std::size_t i = 0; i < std::size(zeta_flags); ++i) {
|
||||
|
@ -16,7 +16,10 @@ namespace VideoCommon::Dirty {
|
||||
enum : u8 {
|
||||
NullEntry = 0,
|
||||
|
||||
Descriptors,
|
||||
|
||||
RenderTargets,
|
||||
RenderTargetControl,
|
||||
ColorBuffer0,
|
||||
ColorBuffer1,
|
||||
ColorBuffer2,
|
||||
|
@ -2,6 +2,7 @@
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/cityhash.h"
|
||||
#include "common/microprofile.h"
|
||||
#include "core/core.h"
|
||||
#include "core/memory.h"
|
||||
@ -12,7 +13,7 @@
|
||||
|
||||
namespace Tegra {
|
||||
|
||||
DmaPusher::DmaPusher(Core::System& system, GPU& gpu) : gpu{gpu}, system{system} {}
|
||||
DmaPusher::DmaPusher(Core::System& system_, GPU& gpu_) : gpu{gpu_}, system{system_} {}
|
||||
|
||||
DmaPusher::~DmaPusher() = default;
|
||||
|
||||
@ -45,32 +46,41 @@ bool DmaPusher::Step() {
|
||||
return false;
|
||||
}
|
||||
|
||||
const CommandList& command_list{dma_pushbuffer.front()};
|
||||
ASSERT_OR_EXECUTE(!command_list.empty(), {
|
||||
// Somehow the command_list is empty, in order to avoid a crash
|
||||
// We ignore it and assume its size is 0.
|
||||
CommandList& command_list{dma_pushbuffer.front()};
|
||||
|
||||
ASSERT_OR_EXECUTE(
|
||||
command_list.command_lists.size() || command_list.prefetch_command_list.size(), {
|
||||
// Somehow the command_list is empty, in order to avoid a crash
|
||||
// We ignore it and assume its size is 0.
|
||||
dma_pushbuffer.pop();
|
||||
dma_pushbuffer_subindex = 0;
|
||||
return true;
|
||||
});
|
||||
|
||||
if (command_list.prefetch_command_list.size()) {
|
||||
// Prefetched command list from nvdrv, used for things like synchronization
|
||||
command_headers = std::move(command_list.prefetch_command_list);
|
||||
dma_pushbuffer.pop();
|
||||
dma_pushbuffer_subindex = 0;
|
||||
return true;
|
||||
});
|
||||
const CommandListHeader command_list_header{command_list[dma_pushbuffer_subindex++]};
|
||||
const GPUVAddr dma_get = command_list_header.addr;
|
||||
} else {
|
||||
const CommandListHeader command_list_header{
|
||||
command_list.command_lists[dma_pushbuffer_subindex++]};
|
||||
const GPUVAddr dma_get = command_list_header.addr;
|
||||
|
||||
if (dma_pushbuffer_subindex >= command_list.size()) {
|
||||
// We've gone through the current list, remove it from the queue
|
||||
dma_pushbuffer.pop();
|
||||
dma_pushbuffer_subindex = 0;
|
||||
if (dma_pushbuffer_subindex >= command_list.command_lists.size()) {
|
||||
// We've gone through the current list, remove it from the queue
|
||||
dma_pushbuffer.pop();
|
||||
dma_pushbuffer_subindex = 0;
|
||||
}
|
||||
|
||||
if (command_list_header.size == 0) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Push buffer non-empty, read a word
|
||||
command_headers.resize(command_list_header.size);
|
||||
gpu.MemoryManager().ReadBlockUnsafe(dma_get, command_headers.data(),
|
||||
command_list_header.size * sizeof(u32));
|
||||
}
|
||||
|
||||
if (command_list_header.size == 0) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Push buffer non-empty, read a word
|
||||
command_headers.resize(command_list_header.size);
|
||||
gpu.MemoryManager().ReadBlockUnsafe(dma_get, command_headers.data(),
|
||||
command_list_header.size * sizeof(u32));
|
||||
|
||||
for (std::size_t index = 0; index < command_headers.size();) {
|
||||
const CommandHeader& command_header = command_headers[index];
|
||||
|
||||
@ -142,7 +152,12 @@ void DmaPusher::SetState(const CommandHeader& command_header) {
|
||||
|
||||
void DmaPusher::CallMethod(u32 argument) const {
|
||||
if (dma_state.method < non_puller_methods) {
|
||||
gpu.CallMethod({dma_state.method, argument, dma_state.subchannel, dma_state.method_count});
|
||||
gpu.CallMethod(GPU::MethodCall{
|
||||
dma_state.method,
|
||||
argument,
|
||||
dma_state.subchannel,
|
||||
dma_state.method_count,
|
||||
});
|
||||
} else {
|
||||
subchannels[dma_state.subchannel]->CallMethod(dma_state.method, argument,
|
||||
dma_state.is_last_call);
|
||||
|
@ -18,6 +18,8 @@ class System;
|
||||
|
||||
namespace Tegra {
|
||||
|
||||
class GPU;
|
||||
|
||||
enum class SubmissionMode : u32 {
|
||||
IncreasingOld = 0,
|
||||
Increasing = 1,
|
||||
@ -27,6 +29,31 @@ enum class SubmissionMode : u32 {
|
||||
IncreaseOnce = 5
|
||||
};
|
||||
|
||||
// Note that, traditionally, methods are treated as 4-byte addressable locations, and hence
|
||||
// their numbers are written down multiplied by 4 in Docs. Here we are not multiply by 4.
|
||||
// So the values you see in docs might be multiplied by 4.
|
||||
enum class BufferMethods : u32 {
|
||||
BindObject = 0x0,
|
||||
Nop = 0x2,
|
||||
SemaphoreAddressHigh = 0x4,
|
||||
SemaphoreAddressLow = 0x5,
|
||||
SemaphoreSequence = 0x6,
|
||||
SemaphoreTrigger = 0x7,
|
||||
NotifyIntr = 0x8,
|
||||
WrcacheFlush = 0x9,
|
||||
Unk28 = 0xA,
|
||||
UnkCacheFlush = 0xB,
|
||||
RefCnt = 0x14,
|
||||
SemaphoreAcquire = 0x1A,
|
||||
SemaphoreRelease = 0x1B,
|
||||
FenceValue = 0x1C,
|
||||
FenceAction = 0x1D,
|
||||
WaitForInterrupt = 0x1E,
|
||||
Unk7c = 0x1F,
|
||||
Yield = 0x20,
|
||||
NonPullerMethods = 0x40,
|
||||
};
|
||||
|
||||
struct CommandListHeader {
|
||||
union {
|
||||
u64 raw;
|
||||
@ -49,9 +76,23 @@ union CommandHeader {
|
||||
static_assert(std::is_standard_layout_v<CommandHeader>, "CommandHeader is not standard layout");
|
||||
static_assert(sizeof(CommandHeader) == sizeof(u32), "CommandHeader has incorrect size!");
|
||||
|
||||
class GPU;
|
||||
inline CommandHeader BuildCommandHeader(BufferMethods method, u32 arg_count, SubmissionMode mode) {
|
||||
CommandHeader result{};
|
||||
result.method.Assign(static_cast<u32>(method));
|
||||
result.arg_count.Assign(arg_count);
|
||||
result.mode.Assign(mode);
|
||||
return result;
|
||||
}
|
||||
|
||||
using CommandList = std::vector<Tegra::CommandListHeader>;
|
||||
struct CommandList final {
|
||||
CommandList() = default;
|
||||
explicit CommandList(std::size_t size) : command_lists(size) {}
|
||||
explicit CommandList(std::vector<CommandHeader>&& prefetch_command_list_)
|
||||
: prefetch_command_list{std::move(prefetch_command_list_)} {}
|
||||
|
||||
std::vector<CommandListHeader> command_lists;
|
||||
std::vector<CommandHeader> prefetch_command_list;
|
||||
};
|
||||
|
||||
/**
|
||||
* The DmaPusher class implements DMA submission to FIFOs, providing an area of memory that the
|
||||
@ -60,9 +101,9 @@ using CommandList = std::vector<Tegra::CommandListHeader>;
|
||||
* See https://envytools.readthedocs.io/en/latest/hw/fifo/dma-pusher.html#fifo-dma-pusher for
|
||||
* details on this implementation.
|
||||
*/
|
||||
class DmaPusher {
|
||||
class DmaPusher final {
|
||||
public:
|
||||
explicit DmaPusher(Core::System& system, GPU& gpu);
|
||||
explicit DmaPusher(Core::System& system_, GPU& gpu_);
|
||||
~DmaPusher();
|
||||
|
||||
void Push(CommandList&& entries) {
|
||||
@ -71,7 +112,7 @@ public:
|
||||
|
||||
void DispatchCalls();
|
||||
|
||||
void BindSubchannel(Tegra::Engines::EngineInterface* engine, u32 subchannel_id) {
|
||||
void BindSubchannel(Engines::EngineInterface* engine, u32 subchannel_id) {
|
||||
subchannels[subchannel_id] = engine;
|
||||
}
|
||||
|
||||
@ -104,7 +145,7 @@ private:
|
||||
|
||||
bool ib_enable{true}; ///< IB mode enabled
|
||||
|
||||
std::array<Tegra::Engines::EngineInterface*, max_subchannels> subchannels{};
|
||||
std::array<Engines::EngineInterface*, max_subchannels> subchannels{};
|
||||
|
||||
GPU& gpu;
|
||||
Core::System& system;
|
||||
|
@ -11,16 +11,16 @@
|
||||
|
||||
namespace Tegra::Engines::Upload {
|
||||
|
||||
State::State(MemoryManager& memory_manager, Registers& regs)
|
||||
: regs{regs}, memory_manager{memory_manager} {}
|
||||
State::State(MemoryManager& memory_manager_, Registers& regs_)
|
||||
: regs{regs_}, memory_manager{memory_manager_} {}
|
||||
|
||||
State::~State() = default;
|
||||
|
||||
void State::ProcessExec(const bool is_linear) {
|
||||
void State::ProcessExec(const bool is_linear_) {
|
||||
write_offset = 0;
|
||||
copy_size = regs.line_length_in * regs.line_count;
|
||||
inner_buffer.resize(copy_size);
|
||||
this->is_linear = is_linear;
|
||||
is_linear = is_linear_;
|
||||
}
|
||||
|
||||
void State::ProcessData(const u32 data, const bool is_last_call) {
|
||||
|
@ -54,10 +54,10 @@ struct Registers {
|
||||
|
||||
class State {
|
||||
public:
|
||||
State(MemoryManager& memory_manager, Registers& regs);
|
||||
explicit State(MemoryManager& memory_manager_, Registers& regs_);
|
||||
~State();
|
||||
|
||||
void ProcessExec(bool is_linear);
|
||||
void ProcessExec(bool is_linear_);
|
||||
void ProcessData(u32 data, bool is_last_call);
|
||||
|
||||
private:
|
||||
|
@ -10,7 +10,11 @@
|
||||
|
||||
namespace Tegra::Engines {
|
||||
|
||||
Fermi2D::Fermi2D() = default;
|
||||
Fermi2D::Fermi2D() {
|
||||
// Nvidia's OpenGL driver seems to assume these values
|
||||
regs.src.depth = 1;
|
||||
regs.dst.depth = 1;
|
||||
}
|
||||
|
||||
Fermi2D::~Fermi2D() = default;
|
||||
|
||||
@ -21,79 +25,43 @@ void Fermi2D::BindRasterizer(VideoCore::RasterizerInterface& rasterizer_) {
|
||||
void Fermi2D::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
|
||||
ASSERT_MSG(method < Regs::NUM_REGS,
|
||||
"Invalid Fermi2D register, increase the size of the Regs structure");
|
||||
|
||||
regs.reg_array[method] = method_argument;
|
||||
|
||||
switch (method) {
|
||||
// Trigger the surface copy on the last register write. This is blit_src_y, but this is 64-bit,
|
||||
// so trigger on the second 32-bit write.
|
||||
case FERMI2D_REG_INDEX(blit_src_y) + 1: {
|
||||
HandleSurfaceCopy();
|
||||
break;
|
||||
}
|
||||
if (method == FERMI2D_REG_INDEX(pixels_from_memory.src_y0) + 1) {
|
||||
Blit();
|
||||
}
|
||||
}
|
||||
|
||||
void Fermi2D::CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32 methods_pending) {
|
||||
for (std::size_t i = 0; i < amount; i++) {
|
||||
CallMethod(method, base_start[i], methods_pending - static_cast<u32>(i) <= 1);
|
||||
for (u32 i = 0; i < amount; ++i) {
|
||||
CallMethod(method, base_start[i], methods_pending - i <= 1);
|
||||
}
|
||||
}
|
||||
|
||||
static std::pair<u32, u32> DelimitLine(u32 src_1, u32 src_2, u32 dst_1, u32 dst_2, u32 src_line) {
|
||||
const u32 line_a = src_2 - src_1;
|
||||
const u32 line_b = dst_2 - dst_1;
|
||||
const u32 excess = std::max<s32>(0, line_a - src_line + src_1);
|
||||
return {line_b - (excess * line_b) / line_a, excess};
|
||||
}
|
||||
void Fermi2D::Blit() {
|
||||
LOG_DEBUG(HW_GPU, "called. source address=0x{:x}, destination address=0x{:x}",
|
||||
regs.src.Address(), regs.dst.Address());
|
||||
|
||||
void Fermi2D::HandleSurfaceCopy() {
|
||||
LOG_DEBUG(HW_GPU, "Requested a surface copy with operation {}",
|
||||
static_cast<u32>(regs.operation));
|
||||
UNIMPLEMENTED_IF_MSG(regs.operation != Operation::SrcCopy, "Operation is not copy");
|
||||
UNIMPLEMENTED_IF_MSG(regs.src.layer != 0, "Source layer is not zero");
|
||||
UNIMPLEMENTED_IF_MSG(regs.dst.layer != 0, "Destination layer is not zero");
|
||||
UNIMPLEMENTED_IF_MSG(regs.src.depth != 1, "Source depth is not one");
|
||||
UNIMPLEMENTED_IF_MSG(regs.clip_enable != 0, "Clipped blit enabled");
|
||||
|
||||
// TODO(Subv): Only raw copies are implemented.
|
||||
ASSERT(regs.operation == Operation::SrcCopy);
|
||||
|
||||
const u32 src_blit_x1{static_cast<u32>(regs.blit_src_x >> 32)};
|
||||
const u32 src_blit_y1{static_cast<u32>(regs.blit_src_y >> 32)};
|
||||
u32 src_blit_x2, src_blit_y2;
|
||||
if (regs.blit_control.origin == Origin::Corner) {
|
||||
src_blit_x2 =
|
||||
static_cast<u32>((regs.blit_src_x + (regs.blit_du_dx * regs.blit_dst_width)) >> 32);
|
||||
src_blit_y2 =
|
||||
static_cast<u32>((regs.blit_src_y + (regs.blit_dv_dy * regs.blit_dst_height)) >> 32);
|
||||
} else {
|
||||
src_blit_x2 = static_cast<u32>((regs.blit_src_x >> 32) + regs.blit_dst_width);
|
||||
src_blit_y2 = static_cast<u32>((regs.blit_src_y >> 32) + regs.blit_dst_height);
|
||||
}
|
||||
u32 dst_blit_x2 = regs.blit_dst_x + regs.blit_dst_width;
|
||||
u32 dst_blit_y2 = regs.blit_dst_y + regs.blit_dst_height;
|
||||
const auto [new_dst_w, src_excess_x] =
|
||||
DelimitLine(src_blit_x1, src_blit_x2, regs.blit_dst_x, dst_blit_x2, regs.src.width);
|
||||
const auto [new_dst_h, src_excess_y] =
|
||||
DelimitLine(src_blit_y1, src_blit_y2, regs.blit_dst_y, dst_blit_y2, regs.src.height);
|
||||
dst_blit_x2 = new_dst_w + regs.blit_dst_x;
|
||||
src_blit_x2 = src_blit_x2 - src_excess_x;
|
||||
dst_blit_y2 = new_dst_h + regs.blit_dst_y;
|
||||
src_blit_y2 = src_blit_y2 - src_excess_y;
|
||||
const auto [new_src_w, dst_excess_x] =
|
||||
DelimitLine(regs.blit_dst_x, dst_blit_x2, src_blit_x1, src_blit_x2, regs.dst.width);
|
||||
const auto [new_src_h, dst_excess_y] =
|
||||
DelimitLine(regs.blit_dst_y, dst_blit_y2, src_blit_y1, src_blit_y2, regs.dst.height);
|
||||
src_blit_x2 = new_src_w + src_blit_x1;
|
||||
dst_blit_x2 = dst_blit_x2 - dst_excess_x;
|
||||
src_blit_y2 = new_src_h + src_blit_y1;
|
||||
dst_blit_y2 = dst_blit_y2 - dst_excess_y;
|
||||
const Common::Rectangle<u32> src_rect{src_blit_x1, src_blit_y1, src_blit_x2, src_blit_y2};
|
||||
const Common::Rectangle<u32> dst_rect{regs.blit_dst_x, regs.blit_dst_y, dst_blit_x2,
|
||||
dst_blit_y2};
|
||||
const Config copy_config{
|
||||
const auto& args = regs.pixels_from_memory;
|
||||
const Config config{
|
||||
.operation = regs.operation,
|
||||
.filter = regs.blit_control.filter,
|
||||
.src_rect = src_rect,
|
||||
.dst_rect = dst_rect,
|
||||
.filter = args.sample_mode.filter,
|
||||
.dst_x0 = args.dst_x0,
|
||||
.dst_y0 = args.dst_y0,
|
||||
.dst_x1 = args.dst_x0 + args.dst_width,
|
||||
.dst_y1 = args.dst_y0 + args.dst_height,
|
||||
.src_x0 = static_cast<s32>(args.src_x0 >> 32),
|
||||
.src_y0 = static_cast<s32>(args.src_y0 >> 32),
|
||||
.src_x1 = static_cast<s32>((args.du_dx * args.dst_width + args.src_x0) >> 32),
|
||||
.src_y1 = static_cast<s32>((args.dv_dy * args.dst_height + args.src_y0) >> 32),
|
||||
};
|
||||
if (!rasterizer->AccelerateSurfaceCopy(regs.src, regs.dst, copy_config)) {
|
||||
if (!rasterizer->AccelerateSurfaceCopy(regs.src, regs.dst, config)) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
}
|
||||
|
@ -53,8 +53,8 @@ public:
|
||||
};
|
||||
|
||||
enum class Filter : u32 {
|
||||
PointSample = 0, // Nearest
|
||||
Linear = 1,
|
||||
Point = 0,
|
||||
Bilinear = 1,
|
||||
};
|
||||
|
||||
enum class Operation : u32 {
|
||||
@ -67,88 +67,235 @@ public:
|
||||
BlendPremult = 6,
|
||||
};
|
||||
|
||||
struct Regs {
|
||||
static constexpr std::size_t NUM_REGS = 0x258;
|
||||
enum class MemoryLayout : u32 {
|
||||
BlockLinear = 0,
|
||||
Pitch = 1,
|
||||
};
|
||||
|
||||
struct Surface {
|
||||
RenderTargetFormat format;
|
||||
BitField<0, 1, u32> linear;
|
||||
union {
|
||||
BitField<0, 4, u32> block_width;
|
||||
BitField<4, 4, u32> block_height;
|
||||
BitField<8, 4, u32> block_depth;
|
||||
};
|
||||
u32 depth;
|
||||
u32 layer;
|
||||
u32 pitch;
|
||||
u32 width;
|
||||
u32 height;
|
||||
u32 address_high;
|
||||
u32 address_low;
|
||||
|
||||
GPUVAddr Address() const {
|
||||
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
|
||||
address_low);
|
||||
}
|
||||
|
||||
u32 BlockWidth() const {
|
||||
return block_width.Value();
|
||||
}
|
||||
|
||||
u32 BlockHeight() const {
|
||||
return block_height.Value();
|
||||
}
|
||||
|
||||
u32 BlockDepth() const {
|
||||
return block_depth.Value();
|
||||
}
|
||||
};
|
||||
static_assert(sizeof(Surface) == 0x28, "Surface has incorrect size");
|
||||
enum class CpuIndexWrap : u32 {
|
||||
Wrap = 0,
|
||||
NoWrap = 1,
|
||||
};
|
||||
|
||||
struct Surface {
|
||||
RenderTargetFormat format;
|
||||
MemoryLayout linear;
|
||||
union {
|
||||
BitField<0, 4, u32> block_width;
|
||||
BitField<4, 4, u32> block_height;
|
||||
BitField<8, 4, u32> block_depth;
|
||||
};
|
||||
u32 depth;
|
||||
u32 layer;
|
||||
u32 pitch;
|
||||
u32 width;
|
||||
u32 height;
|
||||
u32 addr_upper;
|
||||
u32 addr_lower;
|
||||
|
||||
[[nodiscard]] constexpr GPUVAddr Address() const noexcept {
|
||||
return (static_cast<GPUVAddr>(addr_upper) << 32) | static_cast<GPUVAddr>(addr_lower);
|
||||
}
|
||||
};
|
||||
static_assert(sizeof(Surface) == 0x28, "Surface has incorrect size");
|
||||
|
||||
enum class SectorPromotion : u32 {
|
||||
NoPromotion = 0,
|
||||
PromoteTo2V = 1,
|
||||
PromoteTo2H = 2,
|
||||
PromoteTo4 = 3,
|
||||
};
|
||||
|
||||
enum class NumTpcs : u32 {
|
||||
All = 0,
|
||||
One = 1,
|
||||
};
|
||||
|
||||
enum class RenderEnableMode : u32 {
|
||||
False = 0,
|
||||
True = 1,
|
||||
Conditional = 2,
|
||||
RenderIfEqual = 3,
|
||||
RenderIfNotEqual = 4,
|
||||
};
|
||||
|
||||
enum class ColorKeyFormat : u32 {
|
||||
A16R56G6B5 = 0,
|
||||
A1R5G55B5 = 1,
|
||||
A8R8G8B8 = 2,
|
||||
A2R10G10B10 = 3,
|
||||
Y8 = 4,
|
||||
Y16 = 5,
|
||||
Y32 = 6,
|
||||
};
|
||||
|
||||
union Beta4 {
|
||||
BitField<0, 8, u32> b;
|
||||
BitField<8, 8, u32> g;
|
||||
BitField<16, 8, u32> r;
|
||||
BitField<24, 8, u32> a;
|
||||
};
|
||||
|
||||
struct Point {
|
||||
u32 x;
|
||||
u32 y;
|
||||
};
|
||||
|
||||
enum class PatternSelect : u32 {
|
||||
MonoChrome8x8 = 0,
|
||||
MonoChrome64x1 = 1,
|
||||
MonoChrome1x64 = 2,
|
||||
Color = 3,
|
||||
};
|
||||
|
||||
enum class NotifyType : u32 {
|
||||
WriteOnly = 0,
|
||||
WriteThenAwaken = 1,
|
||||
};
|
||||
|
||||
enum class MonochromePatternColorFormat : u32 {
|
||||
A8X8R8G6B5 = 0,
|
||||
A1R5G5B5 = 1,
|
||||
A8R8G8B8 = 2,
|
||||
A8Y8 = 3,
|
||||
A8X8Y16 = 4,
|
||||
Y32 = 5,
|
||||
};
|
||||
|
||||
enum class MonochromePatternFormat : u32 {
|
||||
CGA6_M1 = 0,
|
||||
LE_M1 = 1,
|
||||
};
|
||||
|
||||
union Regs {
|
||||
static constexpr std::size_t NUM_REGS = 0x258;
|
||||
struct {
|
||||
u32 object;
|
||||
INSERT_UNION_PADDING_WORDS(0x3F);
|
||||
u32 no_operation;
|
||||
NotifyType notify;
|
||||
INSERT_UNION_PADDING_WORDS(0x2);
|
||||
u32 wait_for_idle;
|
||||
INSERT_UNION_PADDING_WORDS(0xB);
|
||||
u32 pm_trigger;
|
||||
INSERT_UNION_PADDING_WORDS(0xF);
|
||||
u32 context_dma_notify;
|
||||
u32 dst_context_dma;
|
||||
u32 src_context_dma;
|
||||
u32 semaphore_context_dma;
|
||||
INSERT_UNION_PADDING_WORDS(0x1C);
|
||||
Surface dst;
|
||||
CpuIndexWrap pixels_from_cpu_index_wrap;
|
||||
u32 kind2d_check_enable;
|
||||
Surface src;
|
||||
SectorPromotion pixels_from_memory_sector_promotion;
|
||||
INSERT_UNION_PADDING_WORDS(0x1);
|
||||
NumTpcs num_tpcs;
|
||||
u32 render_enable_addr_upper;
|
||||
u32 render_enable_addr_lower;
|
||||
RenderEnableMode render_enable_mode;
|
||||
INSERT_UNION_PADDING_WORDS(0x4);
|
||||
u32 clip_x0;
|
||||
u32 clip_y0;
|
||||
u32 clip_width;
|
||||
u32 clip_height;
|
||||
BitField<0, 1, u32> clip_enable;
|
||||
BitField<0, 3, ColorKeyFormat> color_key_format;
|
||||
u32 color_key;
|
||||
BitField<0, 1, u32> color_key_enable;
|
||||
BitField<0, 8, u32> rop;
|
||||
u32 beta1;
|
||||
Beta4 beta4;
|
||||
Operation operation;
|
||||
union {
|
||||
BitField<0, 6, u32> x;
|
||||
BitField<8, 6, u32> y;
|
||||
} pattern_offset;
|
||||
BitField<0, 2, PatternSelect> pattern_select;
|
||||
INSERT_UNION_PADDING_WORDS(0xC);
|
||||
struct {
|
||||
INSERT_UNION_PADDING_WORDS(0x80);
|
||||
|
||||
Surface dst;
|
||||
|
||||
INSERT_UNION_PADDING_WORDS(2);
|
||||
|
||||
Surface src;
|
||||
|
||||
INSERT_UNION_PADDING_WORDS(0x15);
|
||||
|
||||
Operation operation;
|
||||
|
||||
INSERT_UNION_PADDING_WORDS(0x177);
|
||||
|
||||
BitField<0, 3, MonochromePatternColorFormat> color_format;
|
||||
BitField<0, 1, MonochromePatternFormat> format;
|
||||
u32 color0;
|
||||
u32 color1;
|
||||
u32 pattern0;
|
||||
u32 pattern1;
|
||||
} monochrome_pattern;
|
||||
struct {
|
||||
std::array<u32, 0x40> X8R8G8B8;
|
||||
std::array<u32, 0x20> R5G6B5;
|
||||
std::array<u32, 0x20> X1R5G5B5;
|
||||
std::array<u32, 0x10> Y8;
|
||||
} color_pattern;
|
||||
INSERT_UNION_PADDING_WORDS(0x10);
|
||||
struct {
|
||||
u32 prim_mode;
|
||||
u32 prim_color_format;
|
||||
u32 prim_color;
|
||||
u32 line_tie_break_bits;
|
||||
INSERT_UNION_PADDING_WORDS(0x14);
|
||||
u32 prim_point_xy;
|
||||
INSERT_UNION_PADDING_WORDS(0x7);
|
||||
std::array<Point, 0x40> prim_point;
|
||||
} render_solid;
|
||||
struct {
|
||||
u32 data_type;
|
||||
u32 color_format;
|
||||
u32 index_format;
|
||||
u32 mono_format;
|
||||
u32 wrap;
|
||||
u32 color0;
|
||||
u32 color1;
|
||||
u32 mono_opacity;
|
||||
INSERT_UNION_PADDING_WORDS(0x6);
|
||||
u32 src_width;
|
||||
u32 src_height;
|
||||
u32 dx_du_frac;
|
||||
u32 dx_du_int;
|
||||
u32 dx_dv_frac;
|
||||
u32 dy_dv_int;
|
||||
u32 dst_x0_frac;
|
||||
u32 dst_x0_int;
|
||||
u32 dst_y0_frac;
|
||||
u32 dst_y0_int;
|
||||
u32 data;
|
||||
} pixels_from_cpu;
|
||||
INSERT_UNION_PADDING_WORDS(0x3);
|
||||
u32 big_endian_control;
|
||||
INSERT_UNION_PADDING_WORDS(0x3);
|
||||
struct {
|
||||
BitField<0, 3, u32> block_shape;
|
||||
BitField<0, 5, u32> corral_size;
|
||||
BitField<0, 1, u32> safe_overlap;
|
||||
union {
|
||||
u32 raw;
|
||||
BitField<0, 1, Origin> origin;
|
||||
BitField<4, 1, Filter> filter;
|
||||
} blit_control;
|
||||
|
||||
} sample_mode;
|
||||
INSERT_UNION_PADDING_WORDS(0x8);
|
||||
|
||||
u32 blit_dst_x;
|
||||
u32 blit_dst_y;
|
||||
u32 blit_dst_width;
|
||||
u32 blit_dst_height;
|
||||
u64 blit_du_dx;
|
||||
u64 blit_dv_dy;
|
||||
u64 blit_src_x;
|
||||
u64 blit_src_y;
|
||||
|
||||
INSERT_UNION_PADDING_WORDS(0x21);
|
||||
};
|
||||
std::array<u32, NUM_REGS> reg_array;
|
||||
s32 dst_x0;
|
||||
s32 dst_y0;
|
||||
s32 dst_width;
|
||||
s32 dst_height;
|
||||
s64 du_dx;
|
||||
s64 dv_dy;
|
||||
s64 src_x0;
|
||||
s64 src_y0;
|
||||
} pixels_from_memory;
|
||||
};
|
||||
std::array<u32, NUM_REGS> reg_array;
|
||||
} regs{};
|
||||
|
||||
struct Config {
|
||||
Operation operation{};
|
||||
Filter filter{};
|
||||
Common::Rectangle<u32> src_rect;
|
||||
Common::Rectangle<u32> dst_rect;
|
||||
Operation operation;
|
||||
Filter filter;
|
||||
s32 dst_x0;
|
||||
s32 dst_y0;
|
||||
s32 dst_x1;
|
||||
s32 dst_y1;
|
||||
s32 src_x0;
|
||||
s32 src_y0;
|
||||
s32 src_x1;
|
||||
s32 src_y1;
|
||||
};
|
||||
|
||||
private:
|
||||
@ -156,25 +303,49 @@ private:
|
||||
|
||||
/// Performs the copy from the source surface to the destination surface as configured in the
|
||||
/// registers.
|
||||
void HandleSurfaceCopy();
|
||||
void Blit();
|
||||
};
|
||||
|
||||
#define ASSERT_REG_POSITION(field_name, position) \
|
||||
static_assert(offsetof(Fermi2D::Regs, field_name) == position * 4, \
|
||||
static_assert(offsetof(Fermi2D::Regs, field_name) == position, \
|
||||
"Field " #field_name " has invalid position")
|
||||
|
||||
ASSERT_REG_POSITION(dst, 0x80);
|
||||
ASSERT_REG_POSITION(src, 0x8C);
|
||||
ASSERT_REG_POSITION(operation, 0xAB);
|
||||
ASSERT_REG_POSITION(blit_control, 0x223);
|
||||
ASSERT_REG_POSITION(blit_dst_x, 0x22c);
|
||||
ASSERT_REG_POSITION(blit_dst_y, 0x22d);
|
||||
ASSERT_REG_POSITION(blit_dst_width, 0x22e);
|
||||
ASSERT_REG_POSITION(blit_dst_height, 0x22f);
|
||||
ASSERT_REG_POSITION(blit_du_dx, 0x230);
|
||||
ASSERT_REG_POSITION(blit_dv_dy, 0x232);
|
||||
ASSERT_REG_POSITION(blit_src_x, 0x234);
|
||||
ASSERT_REG_POSITION(blit_src_y, 0x236);
|
||||
ASSERT_REG_POSITION(object, 0x0);
|
||||
ASSERT_REG_POSITION(no_operation, 0x100);
|
||||
ASSERT_REG_POSITION(notify, 0x104);
|
||||
ASSERT_REG_POSITION(wait_for_idle, 0x110);
|
||||
ASSERT_REG_POSITION(pm_trigger, 0x140);
|
||||
ASSERT_REG_POSITION(context_dma_notify, 0x180);
|
||||
ASSERT_REG_POSITION(dst_context_dma, 0x184);
|
||||
ASSERT_REG_POSITION(src_context_dma, 0x188);
|
||||
ASSERT_REG_POSITION(semaphore_context_dma, 0x18C);
|
||||
ASSERT_REG_POSITION(dst, 0x200);
|
||||
ASSERT_REG_POSITION(pixels_from_cpu_index_wrap, 0x228);
|
||||
ASSERT_REG_POSITION(kind2d_check_enable, 0x22C);
|
||||
ASSERT_REG_POSITION(src, 0x230);
|
||||
ASSERT_REG_POSITION(pixels_from_memory_sector_promotion, 0x258);
|
||||
ASSERT_REG_POSITION(num_tpcs, 0x260);
|
||||
ASSERT_REG_POSITION(render_enable_addr_upper, 0x264);
|
||||
ASSERT_REG_POSITION(render_enable_addr_lower, 0x268);
|
||||
ASSERT_REG_POSITION(clip_x0, 0x280);
|
||||
ASSERT_REG_POSITION(clip_y0, 0x284);
|
||||
ASSERT_REG_POSITION(clip_width, 0x288);
|
||||
ASSERT_REG_POSITION(clip_height, 0x28c);
|
||||
ASSERT_REG_POSITION(clip_enable, 0x290);
|
||||
ASSERT_REG_POSITION(color_key_format, 0x294);
|
||||
ASSERT_REG_POSITION(color_key, 0x298);
|
||||
ASSERT_REG_POSITION(rop, 0x2A0);
|
||||
ASSERT_REG_POSITION(beta1, 0x2A4);
|
||||
ASSERT_REG_POSITION(beta4, 0x2A8);
|
||||
ASSERT_REG_POSITION(operation, 0x2AC);
|
||||
ASSERT_REG_POSITION(pattern_offset, 0x2B0);
|
||||
ASSERT_REG_POSITION(pattern_select, 0x2B4);
|
||||
ASSERT_REG_POSITION(monochrome_pattern, 0x2E8);
|
||||
ASSERT_REG_POSITION(color_pattern, 0x300);
|
||||
ASSERT_REG_POSITION(render_solid, 0x580);
|
||||
ASSERT_REG_POSITION(pixels_from_cpu, 0x800);
|
||||
ASSERT_REG_POSITION(big_endian_control, 0x870);
|
||||
ASSERT_REG_POSITION(pixels_from_memory, 0x880);
|
||||
|
||||
#undef ASSERT_REG_POSITION
|
||||
|
||||
|
@ -58,24 +58,6 @@ void KeplerCompute::CallMultiMethod(u32 method, const u32* base_start, u32 amoun
|
||||
}
|
||||
}
|
||||
|
||||
Texture::FullTextureInfo KeplerCompute::GetTexture(std::size_t offset) const {
|
||||
const std::bitset<8> cbuf_mask = launch_description.const_buffer_enable_mask.Value();
|
||||
ASSERT(cbuf_mask[regs.tex_cb_index]);
|
||||
|
||||
const auto& texinfo = launch_description.const_buffer_config[regs.tex_cb_index];
|
||||
ASSERT(texinfo.Address() != 0);
|
||||
|
||||
const GPUVAddr address = texinfo.Address() + offset * sizeof(Texture::TextureHandle);
|
||||
ASSERT(address < texinfo.Address() + texinfo.size);
|
||||
|
||||
const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(address)};
|
||||
return GetTextureInfo(tex_handle);
|
||||
}
|
||||
|
||||
Texture::FullTextureInfo KeplerCompute::GetTextureInfo(Texture::TextureHandle tex_handle) const {
|
||||
return Texture::FullTextureInfo{GetTICEntry(tex_handle.tic_id), GetTSCEntry(tex_handle.tsc_id)};
|
||||
}
|
||||
|
||||
u32 KeplerCompute::AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const {
|
||||
ASSERT(stage == ShaderType::Compute);
|
||||
const auto& buffer = launch_description.const_buffer_config[const_buffer];
|
||||
@ -98,9 +80,11 @@ SamplerDescriptor KeplerCompute::AccessBindlessSampler(ShaderType stage, u64 con
|
||||
|
||||
SamplerDescriptor KeplerCompute::AccessSampler(u32 handle) const {
|
||||
const Texture::TextureHandle tex_handle{handle};
|
||||
const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle);
|
||||
SamplerDescriptor result = SamplerDescriptor::FromTIC(tex_info.tic);
|
||||
result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value());
|
||||
const Texture::TICEntry tic = GetTICEntry(tex_handle.tic_id);
|
||||
const Texture::TSCEntry tsc = GetTSCEntry(tex_handle.tsc_id);
|
||||
|
||||
SamplerDescriptor result = SamplerDescriptor::FromTIC(tic);
|
||||
result.is_shadow.Assign(tsc.depth_compare_enabled.Value());
|
||||
return result;
|
||||
}
|
||||
|
||||
|
@ -209,11 +209,6 @@ public:
|
||||
void CallMultiMethod(u32 method, const u32* base_start, u32 amount,
|
||||
u32 methods_pending) override;
|
||||
|
||||
Texture::FullTextureInfo GetTexture(std::size_t offset) const;
|
||||
|
||||
/// Given a texture handle, returns the TSC and TIC entries.
|
||||
Texture::FullTextureInfo GetTextureInfo(Texture::TextureHandle tex_handle) const;
|
||||
|
||||
u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const override;
|
||||
|
||||
SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const override;
|
||||
|
@ -14,8 +14,8 @@
|
||||
|
||||
namespace Tegra::Engines {
|
||||
|
||||
KeplerMemory::KeplerMemory(Core::System& system, MemoryManager& memory_manager)
|
||||
: system{system}, upload_state{memory_manager, regs.upload} {}
|
||||
KeplerMemory::KeplerMemory(Core::System& system_, MemoryManager& memory_manager)
|
||||
: system{system_}, upload_state{memory_manager, regs.upload} {}
|
||||
|
||||
KeplerMemory::~KeplerMemory() = default;
|
||||
|
||||
|
@ -35,7 +35,7 @@ namespace Tegra::Engines {
|
||||
|
||||
class KeplerMemory final : public EngineInterface {
|
||||
public:
|
||||
KeplerMemory(Core::System& system, MemoryManager& memory_manager);
|
||||
explicit KeplerMemory(Core::System& system_, MemoryManager& memory_manager);
|
||||
~KeplerMemory();
|
||||
|
||||
/// Write the value to the register identified by method.
|
||||
|
@ -2,7 +2,6 @@
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <cinttypes>
|
||||
#include <cstring>
|
||||
#include <optional>
|
||||
#include "common/assert.h"
|
||||
@ -124,6 +123,116 @@ void Maxwell3D::InitializeRegisterDefaults() {
|
||||
mme_inline[MAXWELL3D_REG_INDEX(index_array.count)] = true;
|
||||
}
|
||||
|
||||
void Maxwell3D::ProcessMacro(u32 method, const u32* base_start, u32 amount, bool is_last_call) {
|
||||
if (executing_macro == 0) {
|
||||
// A macro call must begin by writing the macro method's register, not its argument.
|
||||
ASSERT_MSG((method % 2) == 0,
|
||||
"Can't start macro execution by writing to the ARGS register");
|
||||
executing_macro = method;
|
||||
}
|
||||
|
||||
macro_params.insert(macro_params.end(), base_start, base_start + amount);
|
||||
|
||||
// Call the macro when there are no more parameters in the command buffer
|
||||
if (is_last_call) {
|
||||
CallMacroMethod(executing_macro, macro_params);
|
||||
macro_params.clear();
|
||||
}
|
||||
}
|
||||
|
||||
u32 Maxwell3D::ProcessShadowRam(u32 method, u32 argument) {
|
||||
// Keep track of the register value in shadow_state when requested.
|
||||
const auto control = shadow_state.shadow_ram_control;
|
||||
if (control == Regs::ShadowRamControl::Track ||
|
||||
control == Regs::ShadowRamControl::TrackWithFilter) {
|
||||
shadow_state.reg_array[method] = argument;
|
||||
return argument;
|
||||
}
|
||||
if (control == Regs::ShadowRamControl::Replay) {
|
||||
return shadow_state.reg_array[method];
|
||||
}
|
||||
return argument;
|
||||
}
|
||||
|
||||
void Maxwell3D::ProcessDirtyRegisters(u32 method, u32 argument) {
|
||||
if (regs.reg_array[method] == argument) {
|
||||
return;
|
||||
}
|
||||
regs.reg_array[method] = argument;
|
||||
|
||||
for (const auto& table : dirty.tables) {
|
||||
dirty.flags[table[method]] = true;
|
||||
}
|
||||
}
|
||||
|
||||
void Maxwell3D::ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argument,
|
||||
bool is_last_call) {
|
||||
switch (method) {
|
||||
case MAXWELL3D_REG_INDEX(wait_for_idle):
|
||||
return rasterizer->WaitForIdle();
|
||||
case MAXWELL3D_REG_INDEX(shadow_ram_control):
|
||||
shadow_state.shadow_ram_control = static_cast<Regs::ShadowRamControl>(nonshadow_argument);
|
||||
return;
|
||||
case MAXWELL3D_REG_INDEX(macros.data):
|
||||
return macro_engine->AddCode(regs.macros.upload_address, argument);
|
||||
case MAXWELL3D_REG_INDEX(macros.bind):
|
||||
return ProcessMacroBind(argument);
|
||||
case MAXWELL3D_REG_INDEX(firmware[4]):
|
||||
return ProcessFirmwareCall4();
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[0]):
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[1]):
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[2]):
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[3]):
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[4]):
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[5]):
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[6]):
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[7]):
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[8]):
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[9]):
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[10]):
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[11]):
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[12]):
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[13]):
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[14]):
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[15]):
|
||||
return StartCBData(method);
|
||||
case MAXWELL3D_REG_INDEX(cb_bind[0]):
|
||||
return ProcessCBBind(0);
|
||||
case MAXWELL3D_REG_INDEX(cb_bind[1]):
|
||||
return ProcessCBBind(1);
|
||||
case MAXWELL3D_REG_INDEX(cb_bind[2]):
|
||||
return ProcessCBBind(2);
|
||||
case MAXWELL3D_REG_INDEX(cb_bind[3]):
|
||||
return ProcessCBBind(3);
|
||||
case MAXWELL3D_REG_INDEX(cb_bind[4]):
|
||||
return ProcessCBBind(4);
|
||||
case MAXWELL3D_REG_INDEX(draw.vertex_end_gl):
|
||||
return DrawArrays();
|
||||
case MAXWELL3D_REG_INDEX(clear_buffers):
|
||||
return ProcessClearBuffers();
|
||||
case MAXWELL3D_REG_INDEX(query.query_get):
|
||||
return ProcessQueryGet();
|
||||
case MAXWELL3D_REG_INDEX(condition.mode):
|
||||
return ProcessQueryCondition();
|
||||
case MAXWELL3D_REG_INDEX(counter_reset):
|
||||
return ProcessCounterReset();
|
||||
case MAXWELL3D_REG_INDEX(sync_info):
|
||||
return ProcessSyncPoint();
|
||||
case MAXWELL3D_REG_INDEX(exec_upload):
|
||||
return upload_state.ProcessExec(regs.exec_upload.linear != 0);
|
||||
case MAXWELL3D_REG_INDEX(data_upload):
|
||||
upload_state.ProcessData(argument, is_last_call);
|
||||
if (is_last_call) {
|
||||
OnMemoryWrite();
|
||||
}
|
||||
return;
|
||||
case MAXWELL3D_REG_INDEX(fragment_barrier):
|
||||
return rasterizer->FragmentBarrier();
|
||||
case MAXWELL3D_REG_INDEX(tiled_cache_barrier):
|
||||
return rasterizer->TiledCacheBarrier();
|
||||
}
|
||||
}
|
||||
|
||||
void Maxwell3D::CallMacroMethod(u32 method, const std::vector<u32>& parameters) {
|
||||
// Reset the current macro.
|
||||
executing_macro = 0;
|
||||
@ -157,142 +266,16 @@ void Maxwell3D::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
|
||||
// Methods after 0xE00 are special, they're actually triggers for some microcode that was
|
||||
// uploaded to the GPU during initialization.
|
||||
if (method >= MacroRegistersStart) {
|
||||
// We're trying to execute a macro
|
||||
if (executing_macro == 0) {
|
||||
// A macro call must begin by writing the macro method's register, not its argument.
|
||||
ASSERT_MSG((method % 2) == 0,
|
||||
"Can't start macro execution by writing to the ARGS register");
|
||||
executing_macro = method;
|
||||
}
|
||||
|
||||
macro_params.push_back(method_argument);
|
||||
|
||||
// Call the macro when there are no more parameters in the command buffer
|
||||
if (is_last_call) {
|
||||
CallMacroMethod(executing_macro, macro_params);
|
||||
macro_params.clear();
|
||||
}
|
||||
ProcessMacro(method, &method_argument, 1, is_last_call);
|
||||
return;
|
||||
}
|
||||
|
||||
ASSERT_MSG(method < Regs::NUM_REGS,
|
||||
"Invalid Maxwell3D register, increase the size of the Regs structure");
|
||||
|
||||
u32 arg = method_argument;
|
||||
// Keep track of the register value in shadow_state when requested.
|
||||
if (shadow_state.shadow_ram_control == Regs::ShadowRamControl::Track ||
|
||||
shadow_state.shadow_ram_control == Regs::ShadowRamControl::TrackWithFilter) {
|
||||
shadow_state.reg_array[method] = arg;
|
||||
} else if (shadow_state.shadow_ram_control == Regs::ShadowRamControl::Replay) {
|
||||
arg = shadow_state.reg_array[method];
|
||||
}
|
||||
|
||||
if (regs.reg_array[method] != arg) {
|
||||
regs.reg_array[method] = arg;
|
||||
|
||||
for (const auto& table : dirty.tables) {
|
||||
dirty.flags[table[method]] = true;
|
||||
}
|
||||
}
|
||||
|
||||
switch (method) {
|
||||
case MAXWELL3D_REG_INDEX(wait_for_idle): {
|
||||
rasterizer->WaitForIdle();
|
||||
break;
|
||||
}
|
||||
case MAXWELL3D_REG_INDEX(shadow_ram_control): {
|
||||
shadow_state.shadow_ram_control = static_cast<Regs::ShadowRamControl>(method_argument);
|
||||
break;
|
||||
}
|
||||
case MAXWELL3D_REG_INDEX(macros.data): {
|
||||
macro_engine->AddCode(regs.macros.upload_address, arg);
|
||||
break;
|
||||
}
|
||||
case MAXWELL3D_REG_INDEX(macros.bind): {
|
||||
ProcessMacroBind(arg);
|
||||
break;
|
||||
}
|
||||
case MAXWELL3D_REG_INDEX(firmware[4]): {
|
||||
ProcessFirmwareCall4();
|
||||
break;
|
||||
}
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[0]):
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[1]):
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[2]):
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[3]):
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[4]):
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[5]):
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[6]):
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[7]):
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[8]):
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[9]):
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[10]):
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[11]):
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[12]):
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[13]):
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[14]):
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[15]): {
|
||||
StartCBData(method);
|
||||
break;
|
||||
}
|
||||
case MAXWELL3D_REG_INDEX(cb_bind[0]): {
|
||||
ProcessCBBind(0);
|
||||
break;
|
||||
}
|
||||
case MAXWELL3D_REG_INDEX(cb_bind[1]): {
|
||||
ProcessCBBind(1);
|
||||
break;
|
||||
}
|
||||
case MAXWELL3D_REG_INDEX(cb_bind[2]): {
|
||||
ProcessCBBind(2);
|
||||
break;
|
||||
}
|
||||
case MAXWELL3D_REG_INDEX(cb_bind[3]): {
|
||||
ProcessCBBind(3);
|
||||
break;
|
||||
}
|
||||
case MAXWELL3D_REG_INDEX(cb_bind[4]): {
|
||||
ProcessCBBind(4);
|
||||
break;
|
||||
}
|
||||
case MAXWELL3D_REG_INDEX(draw.vertex_end_gl): {
|
||||
DrawArrays();
|
||||
break;
|
||||
}
|
||||
case MAXWELL3D_REG_INDEX(clear_buffers): {
|
||||
ProcessClearBuffers();
|
||||
break;
|
||||
}
|
||||
case MAXWELL3D_REG_INDEX(query.query_get): {
|
||||
ProcessQueryGet();
|
||||
break;
|
||||
}
|
||||
case MAXWELL3D_REG_INDEX(condition.mode): {
|
||||
ProcessQueryCondition();
|
||||
break;
|
||||
}
|
||||
case MAXWELL3D_REG_INDEX(counter_reset): {
|
||||
ProcessCounterReset();
|
||||
break;
|
||||
}
|
||||
case MAXWELL3D_REG_INDEX(sync_info): {
|
||||
ProcessSyncPoint();
|
||||
break;
|
||||
}
|
||||
case MAXWELL3D_REG_INDEX(exec_upload): {
|
||||
upload_state.ProcessExec(regs.exec_upload.linear != 0);
|
||||
break;
|
||||
}
|
||||
case MAXWELL3D_REG_INDEX(data_upload): {
|
||||
upload_state.ProcessData(arg, is_last_call);
|
||||
if (is_last_call) {
|
||||
OnMemoryWrite();
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
const u32 argument = ProcessShadowRam(method, method_argument);
|
||||
ProcessDirtyRegisters(method, argument);
|
||||
ProcessMethodCall(method, argument, method_argument, is_last_call);
|
||||
}
|
||||
|
||||
void Maxwell3D::CallMultiMethod(u32 method, const u32* base_start, u32 amount,
|
||||
@ -300,23 +283,7 @@ void Maxwell3D::CallMultiMethod(u32 method, const u32* base_start, u32 amount,
|
||||
// Methods after 0xE00 are special, they're actually triggers for some microcode that was
|
||||
// uploaded to the GPU during initialization.
|
||||
if (method >= MacroRegistersStart) {
|
||||
// We're trying to execute a macro
|
||||
if (executing_macro == 0) {
|
||||
// A macro call must begin by writing the macro method's register, not its argument.
|
||||
ASSERT_MSG((method % 2) == 0,
|
||||
"Can't start macro execution by writing to the ARGS register");
|
||||
executing_macro = method;
|
||||
}
|
||||
|
||||
for (std::size_t i = 0; i < amount; i++) {
|
||||
macro_params.push_back(base_start[i]);
|
||||
}
|
||||
|
||||
// Call the macro when there are no more parameters in the command buffer
|
||||
if (amount == methods_pending) {
|
||||
CallMacroMethod(executing_macro, macro_params);
|
||||
macro_params.clear();
|
||||
}
|
||||
ProcessMacro(method, base_start, amount, amount == methods_pending);
|
||||
return;
|
||||
}
|
||||
switch (method) {
|
||||
@ -335,15 +302,14 @@ void Maxwell3D::CallMultiMethod(u32 method, const u32* base_start, u32 amount,
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[12]):
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[13]):
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[14]):
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[15]): {
|
||||
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[15]):
|
||||
ProcessCBMultiData(method, base_start, amount);
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
default:
|
||||
for (std::size_t i = 0; i < amount; i++) {
|
||||
CallMethod(method, base_start[i], methods_pending - static_cast<u32>(i) <= 1);
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
@ -396,7 +362,7 @@ void Maxwell3D::CallMethodFromMME(u32 method, u32 method_argument) {
|
||||
}
|
||||
|
||||
void Maxwell3D::FlushMMEInlineDraw() {
|
||||
LOG_TRACE(HW_GPU, "called, topology={}, count={}", static_cast<u32>(regs.draw.topology.Value()),
|
||||
LOG_TRACE(HW_GPU, "called, topology={}, count={}", regs.draw.topology.Value(),
|
||||
regs.vertex_buffer.count);
|
||||
ASSERT_MSG(!(regs.index_array.count && regs.vertex_buffer.count), "Both indexed and direct?");
|
||||
ASSERT(mme_draw.instance_count == mme_draw.gl_end_count);
|
||||
@ -541,8 +507,7 @@ void Maxwell3D::ProcessCounterReset() {
|
||||
rasterizer->ResetCounter(QueryType::SamplesPassed);
|
||||
break;
|
||||
default:
|
||||
LOG_DEBUG(Render_OpenGL, "Unimplemented counter reset={}",
|
||||
static_cast<int>(regs.counter_reset));
|
||||
LOG_DEBUG(Render_OpenGL, "Unimplemented counter reset={}", regs.counter_reset);
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -557,7 +522,7 @@ void Maxwell3D::ProcessSyncPoint() {
|
||||
}
|
||||
|
||||
void Maxwell3D::DrawArrays() {
|
||||
LOG_TRACE(HW_GPU, "called, topology={}, count={}", static_cast<u32>(regs.draw.topology.Value()),
|
||||
LOG_TRACE(HW_GPU, "called, topology={}, count={}", regs.draw.topology.Value(),
|
||||
regs.vertex_buffer.count);
|
||||
ASSERT_MSG(!(regs.index_array.count && regs.vertex_buffer.count), "Both indexed and direct?");
|
||||
|
||||
@ -595,12 +560,12 @@ std::optional<u64> Maxwell3D::GetQueryResult() {
|
||||
return 0;
|
||||
case Regs::QuerySelect::SamplesPassed:
|
||||
// Deferred.
|
||||
rasterizer->Query(regs.query.QueryAddress(), VideoCore::QueryType::SamplesPassed,
|
||||
rasterizer->Query(regs.query.QueryAddress(), QueryType::SamplesPassed,
|
||||
system.GPU().GetTicks());
|
||||
return std::nullopt;
|
||||
default:
|
||||
LOG_DEBUG(HW_GPU, "Unimplemented query select type {}",
|
||||
static_cast<u32>(regs.query.query_get.select.Value()));
|
||||
regs.query.query_get.select.Value());
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
@ -677,7 +642,7 @@ void Maxwell3D::FinishCBData() {
|
||||
}
|
||||
|
||||
Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {
|
||||
const GPUVAddr tic_address_gpu{regs.tic.TICAddress() + tic_index * sizeof(Texture::TICEntry)};
|
||||
const GPUVAddr tic_address_gpu{regs.tic.Address() + tic_index * sizeof(Texture::TICEntry)};
|
||||
|
||||
Texture::TICEntry tic_entry;
|
||||
memory_manager.ReadBlockUnsafe(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry));
|
||||
@ -686,43 +651,19 @@ Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {
|
||||
}
|
||||
|
||||
Texture::TSCEntry Maxwell3D::GetTSCEntry(u32 tsc_index) const {
|
||||
const GPUVAddr tsc_address_gpu{regs.tsc.TSCAddress() + tsc_index * sizeof(Texture::TSCEntry)};
|
||||
const GPUVAddr tsc_address_gpu{regs.tsc.Address() + tsc_index * sizeof(Texture::TSCEntry)};
|
||||
|
||||
Texture::TSCEntry tsc_entry;
|
||||
memory_manager.ReadBlockUnsafe(tsc_address_gpu, &tsc_entry, sizeof(Texture::TSCEntry));
|
||||
return tsc_entry;
|
||||
}
|
||||
|
||||
Texture::FullTextureInfo Maxwell3D::GetTextureInfo(Texture::TextureHandle tex_handle) const {
|
||||
return Texture::FullTextureInfo{GetTICEntry(tex_handle.tic_id), GetTSCEntry(tex_handle.tsc_id)};
|
||||
}
|
||||
|
||||
Texture::FullTextureInfo Maxwell3D::GetStageTexture(ShaderType stage, std::size_t offset) const {
|
||||
const auto stage_index = static_cast<std::size_t>(stage);
|
||||
const auto& shader = state.shader_stages[stage_index];
|
||||
const auto& tex_info_buffer = shader.const_buffers[regs.tex_cb_index];
|
||||
ASSERT(tex_info_buffer.enabled && tex_info_buffer.address != 0);
|
||||
|
||||
const GPUVAddr tex_info_address =
|
||||
tex_info_buffer.address + offset * sizeof(Texture::TextureHandle);
|
||||
|
||||
ASSERT(tex_info_address < tex_info_buffer.address + tex_info_buffer.size);
|
||||
|
||||
const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)};
|
||||
|
||||
return GetTextureInfo(tex_handle);
|
||||
}
|
||||
|
||||
u32 Maxwell3D::GetRegisterValue(u32 method) const {
|
||||
ASSERT_MSG(method < Regs::NUM_REGS, "Invalid Maxwell3D register");
|
||||
return regs.reg_array[method];
|
||||
}
|
||||
|
||||
void Maxwell3D::ProcessClearBuffers() {
|
||||
ASSERT(regs.clear_buffers.R == regs.clear_buffers.G &&
|
||||
regs.clear_buffers.R == regs.clear_buffers.B &&
|
||||
regs.clear_buffers.R == regs.clear_buffers.A);
|
||||
|
||||
rasterizer->Clear();
|
||||
}
|
||||
|
||||
@ -730,9 +671,7 @@ u32 Maxwell3D::AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offse
|
||||
ASSERT(stage != ShaderType::Compute);
|
||||
const auto& shader_stage = state.shader_stages[static_cast<std::size_t>(stage)];
|
||||
const auto& buffer = shader_stage.const_buffers[const_buffer];
|
||||
u32 result;
|
||||
std::memcpy(&result, memory_manager.GetPointer(buffer.address + offset), sizeof(u32));
|
||||
return result;
|
||||
return memory_manager.Read<u32>(buffer.address + offset);
|
||||
}
|
||||
|
||||
SamplerDescriptor Maxwell3D::AccessBoundSampler(ShaderType stage, u64 offset) const {
|
||||
@ -750,9 +689,11 @@ SamplerDescriptor Maxwell3D::AccessBindlessSampler(ShaderType stage, u64 const_b
|
||||
|
||||
SamplerDescriptor Maxwell3D::AccessSampler(u32 handle) const {
|
||||
const Texture::TextureHandle tex_handle{handle};
|
||||
const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle);
|
||||
SamplerDescriptor result = SamplerDescriptor::FromTIC(tex_info.tic);
|
||||
result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value());
|
||||
const Texture::TICEntry tic = GetTICEntry(tex_handle.tic_id);
|
||||
const Texture::TSCEntry tsc = GetTSCEntry(tex_handle.tsc_id);
|
||||
|
||||
SamplerDescriptor result = SamplerDescriptor::FromTIC(tic);
|
||||
result.is_shadow.Assign(tsc.depth_compare_enabled.Value());
|
||||
return result;
|
||||
}
|
||||
|
||||
|
@ -438,16 +438,6 @@ public:
|
||||
DecrWrapOGL = 0x8508,
|
||||
};
|
||||
|
||||
enum class MemoryLayout : u32 {
|
||||
Linear = 0,
|
||||
BlockLinear = 1,
|
||||
};
|
||||
|
||||
enum class InvMemoryLayout : u32 {
|
||||
BlockLinear = 0,
|
||||
Linear = 1,
|
||||
};
|
||||
|
||||
enum class CounterReset : u32 {
|
||||
SampleCnt = 0x01,
|
||||
Unk02 = 0x02,
|
||||
@ -589,21 +579,31 @@ public:
|
||||
NegativeW = 7,
|
||||
};
|
||||
|
||||
enum class SamplerIndex : u32 {
|
||||
Independently = 0,
|
||||
ViaHeaderIndex = 1,
|
||||
};
|
||||
|
||||
struct TileMode {
|
||||
union {
|
||||
BitField<0, 4, u32> block_width;
|
||||
BitField<4, 4, u32> block_height;
|
||||
BitField<8, 4, u32> block_depth;
|
||||
BitField<12, 1, u32> is_pitch_linear;
|
||||
BitField<16, 1, u32> is_3d;
|
||||
};
|
||||
};
|
||||
static_assert(sizeof(TileMode) == 4);
|
||||
|
||||
struct RenderTargetConfig {
|
||||
u32 address_high;
|
||||
u32 address_low;
|
||||
u32 width;
|
||||
u32 height;
|
||||
Tegra::RenderTargetFormat format;
|
||||
TileMode tile_mode;
|
||||
union {
|
||||
BitField<0, 3, u32> block_width;
|
||||
BitField<4, 3, u32> block_height;
|
||||
BitField<8, 3, u32> block_depth;
|
||||
BitField<12, 1, InvMemoryLayout> type;
|
||||
BitField<16, 1, u32> is_3d;
|
||||
} memory_layout;
|
||||
union {
|
||||
BitField<0, 16, u32> layers;
|
||||
BitField<0, 16, u32> depth;
|
||||
BitField<16, 1, u32> volume;
|
||||
};
|
||||
u32 layer_stride;
|
||||
@ -755,7 +755,11 @@ public:
|
||||
|
||||
u32 data_upload;
|
||||
|
||||
INSERT_UNION_PADDING_WORDS(0x44);
|
||||
INSERT_UNION_PADDING_WORDS(0x16);
|
||||
|
||||
u32 force_early_fragment_tests;
|
||||
|
||||
INSERT_UNION_PADDING_WORDS(0x2D);
|
||||
|
||||
struct {
|
||||
union {
|
||||
@ -828,7 +832,11 @@ public:
|
||||
|
||||
u32 patch_vertices;
|
||||
|
||||
INSERT_UNION_PADDING_WORDS(0xC);
|
||||
INSERT_UNION_PADDING_WORDS(0x4);
|
||||
|
||||
u32 fragment_barrier;
|
||||
|
||||
INSERT_UNION_PADDING_WORDS(0x7);
|
||||
|
||||
std::array<ScissorTest, NumViewports> scissor_test;
|
||||
|
||||
@ -838,7 +846,15 @@ public:
|
||||
u32 stencil_back_mask;
|
||||
u32 stencil_back_func_mask;
|
||||
|
||||
INSERT_UNION_PADDING_WORDS(0xC);
|
||||
INSERT_UNION_PADDING_WORDS(0x5);
|
||||
|
||||
u32 invalidate_texture_data_cache;
|
||||
|
||||
INSERT_UNION_PADDING_WORDS(0x1);
|
||||
|
||||
u32 tiled_cache_barrier;
|
||||
|
||||
INSERT_UNION_PADDING_WORDS(0x4);
|
||||
|
||||
u32 color_mask_common;
|
||||
|
||||
@ -862,12 +878,7 @@ public:
|
||||
u32 address_high;
|
||||
u32 address_low;
|
||||
Tegra::DepthFormat format;
|
||||
union {
|
||||
BitField<0, 4, u32> block_width;
|
||||
BitField<4, 4, u32> block_height;
|
||||
BitField<8, 4, u32> block_depth;
|
||||
BitField<20, 1, InvMemoryLayout> type;
|
||||
} memory_layout;
|
||||
TileMode tile_mode;
|
||||
u32 layer_stride;
|
||||
|
||||
GPUVAddr Address() const {
|
||||
@ -876,7 +887,18 @@ public:
|
||||
}
|
||||
} zeta;
|
||||
|
||||
INSERT_UNION_PADDING_WORDS(0x41);
|
||||
struct {
|
||||
union {
|
||||
BitField<0, 16, u32> x;
|
||||
BitField<16, 16, u32> width;
|
||||
};
|
||||
union {
|
||||
BitField<0, 16, u32> y;
|
||||
BitField<16, 16, u32> height;
|
||||
};
|
||||
} render_area;
|
||||
|
||||
INSERT_UNION_PADDING_WORDS(0x3F);
|
||||
|
||||
union {
|
||||
BitField<0, 4, u32> stencil;
|
||||
@ -917,7 +939,7 @@ public:
|
||||
BitField<25, 3, u32> map_7;
|
||||
};
|
||||
|
||||
u32 GetMap(std::size_t index) const {
|
||||
u32 Map(std::size_t index) const {
|
||||
const std::array<u32, NumRenderTargets> maps{map_0, map_1, map_2, map_3,
|
||||
map_4, map_5, map_6, map_7};
|
||||
ASSERT(index < maps.size());
|
||||
@ -930,11 +952,13 @@ public:
|
||||
u32 zeta_width;
|
||||
u32 zeta_height;
|
||||
union {
|
||||
BitField<0, 16, u32> zeta_layers;
|
||||
BitField<0, 16, u32> zeta_depth;
|
||||
BitField<16, 1, u32> zeta_volume;
|
||||
};
|
||||
|
||||
INSERT_UNION_PADDING_WORDS(0x26);
|
||||
SamplerIndex sampler_index;
|
||||
|
||||
INSERT_UNION_PADDING_WORDS(0x25);
|
||||
|
||||
u32 depth_test_enable;
|
||||
|
||||
@ -960,6 +984,7 @@ public:
|
||||
float b;
|
||||
float a;
|
||||
} blend_color;
|
||||
|
||||
INSERT_UNION_PADDING_WORDS(0x4);
|
||||
|
||||
struct {
|
||||
@ -997,7 +1022,12 @@ public:
|
||||
float line_width_smooth;
|
||||
float line_width_aliased;
|
||||
|
||||
INSERT_UNION_PADDING_WORDS(0x1F);
|
||||
INSERT_UNION_PADDING_WORDS(0x1B);
|
||||
|
||||
u32 invalidate_sampler_cache_no_wfi;
|
||||
u32 invalidate_texture_header_cache_no_wfi;
|
||||
|
||||
INSERT_UNION_PADDING_WORDS(0x2);
|
||||
|
||||
u32 vb_element_base;
|
||||
u32 vb_base_instance;
|
||||
@ -1041,13 +1071,13 @@ public:
|
||||
} condition;
|
||||
|
||||
struct {
|
||||
u32 tsc_address_high;
|
||||
u32 tsc_address_low;
|
||||
u32 tsc_limit;
|
||||
u32 address_high;
|
||||
u32 address_low;
|
||||
u32 limit;
|
||||
|
||||
GPUVAddr TSCAddress() const {
|
||||
return static_cast<GPUVAddr>(
|
||||
(static_cast<GPUVAddr>(tsc_address_high) << 32) | tsc_address_low);
|
||||
GPUVAddr Address() const {
|
||||
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
|
||||
address_low);
|
||||
}
|
||||
} tsc;
|
||||
|
||||
@ -1058,13 +1088,13 @@ public:
|
||||
u32 line_smooth_enable;
|
||||
|
||||
struct {
|
||||
u32 tic_address_high;
|
||||
u32 tic_address_low;
|
||||
u32 tic_limit;
|
||||
u32 address_high;
|
||||
u32 address_low;
|
||||
u32 limit;
|
||||
|
||||
GPUVAddr TICAddress() const {
|
||||
return static_cast<GPUVAddr>(
|
||||
(static_cast<GPUVAddr>(tic_address_high) << 32) | tic_address_low);
|
||||
GPUVAddr Address() const {
|
||||
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
|
||||
address_low);
|
||||
}
|
||||
} tic;
|
||||
|
||||
@ -1393,12 +1423,6 @@ public:
|
||||
|
||||
void FlushMMEInlineDraw();
|
||||
|
||||
/// Given a texture handle, returns the TSC and TIC entries.
|
||||
Texture::FullTextureInfo GetTextureInfo(Texture::TextureHandle tex_handle) const;
|
||||
|
||||
/// Returns the texture information for a specific texture in a specific shader stage.
|
||||
Texture::FullTextureInfo GetStageTexture(ShaderType stage, std::size_t offset) const;
|
||||
|
||||
u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const override;
|
||||
|
||||
SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const override;
|
||||
@ -1461,38 +1485,13 @@ public:
|
||||
private:
|
||||
void InitializeRegisterDefaults();
|
||||
|
||||
Core::System& system;
|
||||
MemoryManager& memory_manager;
|
||||
void ProcessMacro(u32 method, const u32* base_start, u32 amount, bool is_last_call);
|
||||
|
||||
VideoCore::RasterizerInterface* rasterizer = nullptr;
|
||||
u32 ProcessShadowRam(u32 method, u32 argument);
|
||||
|
||||
/// Start offsets of each macro in macro_memory
|
||||
std::array<u32, 0x80> macro_positions = {};
|
||||
void ProcessDirtyRegisters(u32 method, u32 argument);
|
||||
|
||||
std::array<bool, Regs::NUM_REGS> mme_inline{};
|
||||
|
||||
/// Macro method that is currently being executed / being fed parameters.
|
||||
u32 executing_macro = 0;
|
||||
/// Parameters that have been submitted to the macro call so far.
|
||||
std::vector<u32> macro_params;
|
||||
|
||||
/// Interpreter for the macro codes uploaded to the GPU.
|
||||
std::unique_ptr<MacroEngine> macro_engine;
|
||||
|
||||
static constexpr u32 null_cb_data = 0xFFFFFFFF;
|
||||
struct {
|
||||
std::array<std::array<u32, 0x4000>, 16> buffer;
|
||||
u32 current{null_cb_data};
|
||||
u32 id{null_cb_data};
|
||||
u32 start_pos{};
|
||||
u32 counter{};
|
||||
} cb_data_state;
|
||||
|
||||
Upload::State upload_state;
|
||||
|
||||
bool execute_on{true};
|
||||
|
||||
std::array<u8, Regs::NUM_REGS> dirty_pointers{};
|
||||
void ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argument, bool is_last_call);
|
||||
|
||||
/// Retrieves information about a specific TIC entry from the TIC buffer.
|
||||
Texture::TICEntry GetTICEntry(u32 tic_index) const;
|
||||
@ -1502,8 +1501,8 @@ private:
|
||||
|
||||
/**
|
||||
* Call a macro on this engine.
|
||||
*
|
||||
* @param method Method to call
|
||||
* @param num_parameters Number of arguments
|
||||
* @param parameters Arguments to the method call
|
||||
*/
|
||||
void CallMacroMethod(u32 method, const std::vector<u32>& parameters);
|
||||
@ -1552,6 +1551,38 @@ private:
|
||||
|
||||
/// Returns a query's value or an empty object if the value will be deferred through a cache.
|
||||
std::optional<u64> GetQueryResult();
|
||||
|
||||
Core::System& system;
|
||||
MemoryManager& memory_manager;
|
||||
|
||||
VideoCore::RasterizerInterface* rasterizer = nullptr;
|
||||
|
||||
/// Start offsets of each macro in macro_memory
|
||||
std::array<u32, 0x80> macro_positions{};
|
||||
|
||||
std::array<bool, Regs::NUM_REGS> mme_inline{};
|
||||
|
||||
/// Macro method that is currently being executed / being fed parameters.
|
||||
u32 executing_macro = 0;
|
||||
/// Parameters that have been submitted to the macro call so far.
|
||||
std::vector<u32> macro_params;
|
||||
|
||||
/// Interpreter for the macro codes uploaded to the GPU.
|
||||
std::unique_ptr<MacroEngine> macro_engine;
|
||||
|
||||
static constexpr u32 null_cb_data = 0xFFFFFFFF;
|
||||
struct CBDataState {
|
||||
std::array<std::array<u32, 0x4000>, 16> buffer;
|
||||
u32 current{null_cb_data};
|
||||
u32 id{null_cb_data};
|
||||
u32 start_pos{};
|
||||
u32 counter{};
|
||||
};
|
||||
CBDataState cb_data_state;
|
||||
|
||||
Upload::State upload_state;
|
||||
|
||||
bool execute_on{true};
|
||||
};
|
||||
|
||||
#define ASSERT_REG_POSITION(field_name, position) \
|
||||
@ -1564,6 +1595,7 @@ ASSERT_REG_POSITION(shadow_ram_control, 0x49);
|
||||
ASSERT_REG_POSITION(upload, 0x60);
|
||||
ASSERT_REG_POSITION(exec_upload, 0x6C);
|
||||
ASSERT_REG_POSITION(data_upload, 0x6D);
|
||||
ASSERT_REG_POSITION(force_early_fragment_tests, 0x84);
|
||||
ASSERT_REG_POSITION(sync_info, 0xB2);
|
||||
ASSERT_REG_POSITION(tess_mode, 0xC8);
|
||||
ASSERT_REG_POSITION(tess_level_outer, 0xC9);
|
||||
@ -1586,10 +1618,13 @@ ASSERT_REG_POSITION(polygon_offset_point_enable, 0x370);
|
||||
ASSERT_REG_POSITION(polygon_offset_line_enable, 0x371);
|
||||
ASSERT_REG_POSITION(polygon_offset_fill_enable, 0x372);
|
||||
ASSERT_REG_POSITION(patch_vertices, 0x373);
|
||||
ASSERT_REG_POSITION(fragment_barrier, 0x378);
|
||||
ASSERT_REG_POSITION(scissor_test, 0x380);
|
||||
ASSERT_REG_POSITION(stencil_back_func_ref, 0x3D5);
|
||||
ASSERT_REG_POSITION(stencil_back_mask, 0x3D6);
|
||||
ASSERT_REG_POSITION(stencil_back_func_mask, 0x3D7);
|
||||
ASSERT_REG_POSITION(invalidate_texture_data_cache, 0x3DD);
|
||||
ASSERT_REG_POSITION(tiled_cache_barrier, 0x3DF);
|
||||
ASSERT_REG_POSITION(color_mask_common, 0x3E4);
|
||||
ASSERT_REG_POSITION(depth_bounds, 0x3E7);
|
||||
ASSERT_REG_POSITION(rt_separate_frag_data, 0x3EB);
|
||||
@ -1597,6 +1632,7 @@ ASSERT_REG_POSITION(multisample_raster_enable, 0x3ED);
|
||||
ASSERT_REG_POSITION(multisample_raster_samples, 0x3EE);
|
||||
ASSERT_REG_POSITION(multisample_sample_mask, 0x3EF);
|
||||
ASSERT_REG_POSITION(zeta, 0x3F8);
|
||||
ASSERT_REG_POSITION(render_area, 0x3FD);
|
||||
ASSERT_REG_POSITION(clear_flags, 0x43E);
|
||||
ASSERT_REG_POSITION(fill_rectangle, 0x44F);
|
||||
ASSERT_REG_POSITION(vertex_attrib_format, 0x458);
|
||||
@ -1605,7 +1641,8 @@ ASSERT_REG_POSITION(multisample_coverage_to_color, 0x47E);
|
||||
ASSERT_REG_POSITION(rt_control, 0x487);
|
||||
ASSERT_REG_POSITION(zeta_width, 0x48a);
|
||||
ASSERT_REG_POSITION(zeta_height, 0x48b);
|
||||
ASSERT_REG_POSITION(zeta_layers, 0x48c);
|
||||
ASSERT_REG_POSITION(zeta_depth, 0x48c);
|
||||
ASSERT_REG_POSITION(sampler_index, 0x48D);
|
||||
ASSERT_REG_POSITION(depth_test_enable, 0x4B3);
|
||||
ASSERT_REG_POSITION(independent_blend_enable, 0x4B9);
|
||||
ASSERT_REG_POSITION(depth_write_enabled, 0x4BA);
|
||||
@ -1629,6 +1666,8 @@ ASSERT_REG_POSITION(frag_color_clamp, 0x4EA);
|
||||
ASSERT_REG_POSITION(screen_y_control, 0x4EB);
|
||||
ASSERT_REG_POSITION(line_width_smooth, 0x4EC);
|
||||
ASSERT_REG_POSITION(line_width_aliased, 0x4ED);
|
||||
ASSERT_REG_POSITION(invalidate_sampler_cache_no_wfi, 0x509);
|
||||
ASSERT_REG_POSITION(invalidate_texture_header_cache_no_wfi, 0x50A);
|
||||
ASSERT_REG_POSITION(vb_element_base, 0x50D);
|
||||
ASSERT_REG_POSITION(vb_base_instance, 0x50E);
|
||||
ASSERT_REG_POSITION(clip_distance_enabled, 0x544);
|
||||
|
@ -16,8 +16,10 @@ namespace Tegra::Engines {
|
||||
|
||||
using namespace Texture;
|
||||
|
||||
MaxwellDMA::MaxwellDMA(Core::System& system, MemoryManager& memory_manager)
|
||||
: system{system}, memory_manager{memory_manager} {}
|
||||
MaxwellDMA::MaxwellDMA(Core::System& system_, MemoryManager& memory_manager_)
|
||||
: system{system_}, memory_manager{memory_manager_} {}
|
||||
|
||||
MaxwellDMA::~MaxwellDMA() = default;
|
||||
|
||||
void MaxwellDMA::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
|
||||
ASSERT_MSG(method < NUM_REGS, "Invalid MaxwellDMA register");
|
||||
@ -94,6 +96,7 @@ void MaxwellDMA::CopyPitchToPitch() {
|
||||
}
|
||||
|
||||
void MaxwellDMA::CopyBlockLinearToPitch() {
|
||||
UNIMPLEMENTED_IF(regs.src_params.block_size.width != 0);
|
||||
UNIMPLEMENTED_IF(regs.src_params.block_size.depth != 0);
|
||||
UNIMPLEMENTED_IF(regs.src_params.layer != 0);
|
||||
|
||||
@ -114,8 +117,6 @@ void MaxwellDMA::CopyBlockLinearToPitch() {
|
||||
const u32 block_depth = src_params.block_size.depth;
|
||||
const size_t src_size =
|
||||
CalculateSize(true, bytes_per_pixel, width, height, depth, block_height, block_depth);
|
||||
const size_t src_layer_size =
|
||||
CalculateSize(true, bytes_per_pixel, width, height, 1, block_height, block_depth);
|
||||
|
||||
if (read_buffer.size() < src_size) {
|
||||
read_buffer.resize(src_size);
|
||||
@ -135,6 +136,8 @@ void MaxwellDMA::CopyBlockLinearToPitch() {
|
||||
}
|
||||
|
||||
void MaxwellDMA::CopyPitchToBlockLinear() {
|
||||
UNIMPLEMENTED_IF_MSG(regs.dst_params.block_size.width != 0, "Block width is not one");
|
||||
|
||||
const auto& dst_params = regs.dst_params;
|
||||
const u32 bytes_per_pixel = regs.pitch_in / regs.line_length_in;
|
||||
const u32 width = dst_params.width;
|
||||
|
@ -72,11 +72,13 @@ public:
|
||||
|
||||
struct RenderEnable {
|
||||
enum class Mode : u32 {
|
||||
FALSE = 0,
|
||||
TRUE = 1,
|
||||
CONDITIONAL = 2,
|
||||
RENDER_IF_EQUAL = 3,
|
||||
RENDER_IF_NOT_EQUAL = 4,
|
||||
// Note: This uses Pascal case in order to avoid the identifiers
|
||||
// FALSE and TRUE, which are reserved on Darwin.
|
||||
False = 0,
|
||||
True = 1,
|
||||
Conditional = 2,
|
||||
RenderIfEqual = 3,
|
||||
RenderIfNotEqual = 4,
|
||||
};
|
||||
|
||||
PackedGPUVAddr address;
|
||||
@ -185,8 +187,8 @@ public:
|
||||
};
|
||||
static_assert(sizeof(RemapConst) == 12);
|
||||
|
||||
explicit MaxwellDMA(Core::System& system, MemoryManager& memory_manager);
|
||||
~MaxwellDMA() = default;
|
||||
explicit MaxwellDMA(Core::System& system_, MemoryManager& memory_manager_);
|
||||
~MaxwellDMA();
|
||||
|
||||
/// Write the value to the register identified by method.
|
||||
void CallMethod(u32 method, u32 method_argument, bool is_last_call) override;
|
||||
|
@ -32,31 +32,31 @@ struct Register {
|
||||
|
||||
constexpr Register() = default;
|
||||
|
||||
constexpr Register(u64 value) : value(value) {}
|
||||
constexpr Register(u64 value_) : value(value_) {}
|
||||
|
||||
constexpr operator u64() const {
|
||||
[[nodiscard]] constexpr operator u64() const {
|
||||
return value;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
constexpr u64 operator-(const T& oth) const {
|
||||
[[nodiscard]] constexpr u64 operator-(const T& oth) const {
|
||||
return value - oth;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
constexpr u64 operator&(const T& oth) const {
|
||||
[[nodiscard]] constexpr u64 operator&(const T& oth) const {
|
||||
return value & oth;
|
||||
}
|
||||
|
||||
constexpr u64 operator&(const Register& oth) const {
|
||||
[[nodiscard]] constexpr u64 operator&(const Register& oth) const {
|
||||
return value & oth.value;
|
||||
}
|
||||
|
||||
constexpr u64 operator~() const {
|
||||
[[nodiscard]] constexpr u64 operator~() const {
|
||||
return ~value;
|
||||
}
|
||||
|
||||
u64 GetSwizzledIndex(u64 elem) const {
|
||||
[[nodiscard]] u64 GetSwizzledIndex(u64 elem) const {
|
||||
elem = (value + elem) & 3;
|
||||
return (value & ~3) + elem;
|
||||
}
|
||||
@ -75,7 +75,7 @@ enum class AttributeSize : u64 {
|
||||
union Attribute {
|
||||
Attribute() = default;
|
||||
|
||||
constexpr explicit Attribute(u64 value) : value(value) {}
|
||||
constexpr explicit Attribute(u64 value_) : value(value_) {}
|
||||
|
||||
enum class Index : u64 {
|
||||
LayerViewportPointSize = 6,
|
||||
@ -107,7 +107,7 @@ union Attribute {
|
||||
BitField<31, 1, u64> patch;
|
||||
BitField<47, 3, AttributeSize> size;
|
||||
|
||||
bool IsPhysical() const {
|
||||
[[nodiscard]] bool IsPhysical() const {
|
||||
return patch == 0 && element == 0 && static_cast<u64>(index.Value()) == 0;
|
||||
}
|
||||
} fmt20;
|
||||
@ -124,7 +124,7 @@ union Attribute {
|
||||
union Sampler {
|
||||
Sampler() = default;
|
||||
|
||||
constexpr explicit Sampler(u64 value) : value(value) {}
|
||||
constexpr explicit Sampler(u64 value_) : value(value_) {}
|
||||
|
||||
enum class Index : u64 {
|
||||
Sampler_0 = 8,
|
||||
@ -137,7 +137,7 @@ union Sampler {
|
||||
union Image {
|
||||
Image() = default;
|
||||
|
||||
constexpr explicit Image(u64 value) : value{value} {}
|
||||
constexpr explicit Image(u64 value_) : value{value_} {}
|
||||
|
||||
BitField<36, 13, u64> index;
|
||||
u64 value;
|
||||
@ -505,14 +505,14 @@ struct IpaMode {
|
||||
IpaInterpMode interpolation_mode;
|
||||
IpaSampleMode sampling_mode;
|
||||
|
||||
bool operator==(const IpaMode& a) const {
|
||||
[[nodiscard]] bool operator==(const IpaMode& a) const {
|
||||
return std::tie(interpolation_mode, sampling_mode) ==
|
||||
std::tie(a.interpolation_mode, a.sampling_mode);
|
||||
}
|
||||
bool operator!=(const IpaMode& a) const {
|
||||
[[nodiscard]] bool operator!=(const IpaMode& a) const {
|
||||
return !operator==(a);
|
||||
}
|
||||
bool operator<(const IpaMode& a) const {
|
||||
[[nodiscard]] bool operator<(const IpaMode& a) const {
|
||||
return std::tie(interpolation_mode, sampling_mode) <
|
||||
std::tie(a.interpolation_mode, a.sampling_mode);
|
||||
}
|
||||
@ -658,10 +658,10 @@ union Instruction {
|
||||
return *this;
|
||||
}
|
||||
|
||||
constexpr Instruction(u64 value) : value{value} {}
|
||||
constexpr Instruction(u64 value_) : value{value_} {}
|
||||
constexpr Instruction(const Instruction& instr) : value(instr.value) {}
|
||||
|
||||
constexpr bool Bit(u64 offset) const {
|
||||
[[nodiscard]] constexpr bool Bit(u64 offset) const {
|
||||
return ((value >> offset) & 1) != 0;
|
||||
}
|
||||
|
||||
@ -746,34 +746,34 @@ union Instruction {
|
||||
BitField<28, 8, u64> imm_lut28;
|
||||
BitField<48, 8, u64> imm_lut48;
|
||||
|
||||
u32 GetImmLut28() const {
|
||||
[[nodiscard]] u32 GetImmLut28() const {
|
||||
return static_cast<u32>(imm_lut28);
|
||||
}
|
||||
|
||||
u32 GetImmLut48() const {
|
||||
[[nodiscard]] u32 GetImmLut48() const {
|
||||
return static_cast<u32>(imm_lut48);
|
||||
}
|
||||
} lop3;
|
||||
|
||||
u16 GetImm20_16() const {
|
||||
[[nodiscard]] u16 GetImm20_16() const {
|
||||
return static_cast<u16>(imm20_16);
|
||||
}
|
||||
|
||||
u32 GetImm20_19() const {
|
||||
[[nodiscard]] u32 GetImm20_19() const {
|
||||
u32 imm{static_cast<u32>(imm20_19)};
|
||||
imm <<= 12;
|
||||
imm |= negate_imm ? 0x80000000 : 0;
|
||||
return imm;
|
||||
}
|
||||
|
||||
u32 GetImm20_32() const {
|
||||
[[nodiscard]] u32 GetImm20_32() const {
|
||||
return static_cast<u32>(imm20_32);
|
||||
}
|
||||
|
||||
s32 GetSignedImm20_20() const {
|
||||
u32 immediate = static_cast<u32>(imm20_19 | (negate_imm << 19));
|
||||
[[nodiscard]] s32 GetSignedImm20_20() const {
|
||||
const auto immediate = static_cast<u32>(imm20_19 | (negate_imm << 19));
|
||||
// Sign extend the 20-bit value.
|
||||
u32 mask = 1U << (20 - 1);
|
||||
const auto mask = 1U << (20 - 1);
|
||||
return static_cast<s32>((immediate ^ mask) - mask);
|
||||
}
|
||||
} alu;
|
||||
@ -857,7 +857,7 @@ union Instruction {
|
||||
BitField<56, 1, u64> second_negate;
|
||||
BitField<30, 9, u64> second;
|
||||
|
||||
u32 PackImmediates() const {
|
||||
[[nodiscard]] u32 PackImmediates() const {
|
||||
// Immediates are half floats shifted.
|
||||
constexpr u32 imm_shift = 6;
|
||||
return static_cast<u32>((first << imm_shift) | (second << (16 + imm_shift)));
|
||||
@ -1033,7 +1033,7 @@ union Instruction {
|
||||
BitField<28, 2, AtomicType> type;
|
||||
BitField<30, 22, s64> offset;
|
||||
|
||||
s32 GetImmediateOffset() const {
|
||||
[[nodiscard]] s32 GetImmediateOffset() const {
|
||||
return static_cast<s32>(offset << 2);
|
||||
}
|
||||
} atoms;
|
||||
@ -1215,7 +1215,7 @@ union Instruction {
|
||||
BitField<39, 4, u64> rounding;
|
||||
// H0, H1 extract for F16 missing
|
||||
BitField<41, 1, u64> selector; // Guessed as some games set it, TODO: reverse this value
|
||||
F2fRoundingOp GetRoundingMode() const {
|
||||
[[nodiscard]] F2fRoundingOp GetRoundingMode() const {
|
||||
constexpr u64 rounding_mask = 0x0B;
|
||||
return static_cast<F2fRoundingOp>(rounding.Value() & rounding_mask);
|
||||
}
|
||||
@ -1239,15 +1239,15 @@ union Instruction {
|
||||
BitField<54, 1, u64> aoffi_flag;
|
||||
BitField<55, 3, TextureProcessMode> process_mode;
|
||||
|
||||
bool IsComponentEnabled(std::size_t component) const {
|
||||
return ((1ull << component) & component_mask) != 0;
|
||||
[[nodiscard]] bool IsComponentEnabled(std::size_t component) const {
|
||||
return ((1ULL << component) & component_mask) != 0;
|
||||
}
|
||||
|
||||
TextureProcessMode GetTextureProcessMode() const {
|
||||
[[nodiscard]] TextureProcessMode GetTextureProcessMode() const {
|
||||
return process_mode;
|
||||
}
|
||||
|
||||
bool UsesMiscMode(TextureMiscMode mode) const {
|
||||
[[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const {
|
||||
switch (mode) {
|
||||
case TextureMiscMode::DC:
|
||||
return dc_flag != 0;
|
||||
@ -1271,15 +1271,15 @@ union Instruction {
|
||||
BitField<36, 1, u64> aoffi_flag;
|
||||
BitField<37, 3, TextureProcessMode> process_mode;
|
||||
|
||||
bool IsComponentEnabled(std::size_t component) const {
|
||||
[[nodiscard]] bool IsComponentEnabled(std::size_t component) const {
|
||||
return ((1ULL << component) & component_mask) != 0;
|
||||
}
|
||||
|
||||
TextureProcessMode GetTextureProcessMode() const {
|
||||
[[nodiscard]] TextureProcessMode GetTextureProcessMode() const {
|
||||
return process_mode;
|
||||
}
|
||||
|
||||
bool UsesMiscMode(TextureMiscMode mode) const {
|
||||
[[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const {
|
||||
switch (mode) {
|
||||
case TextureMiscMode::DC:
|
||||
return dc_flag != 0;
|
||||
@ -1299,7 +1299,7 @@ union Instruction {
|
||||
BitField<31, 4, u64> component_mask;
|
||||
BitField<49, 1, u64> nodep_flag;
|
||||
|
||||
bool UsesMiscMode(TextureMiscMode mode) const {
|
||||
[[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const {
|
||||
switch (mode) {
|
||||
case TextureMiscMode::NODEP:
|
||||
return nodep_flag != 0;
|
||||
@ -1309,7 +1309,7 @@ union Instruction {
|
||||
return false;
|
||||
}
|
||||
|
||||
bool IsComponentEnabled(std::size_t component) const {
|
||||
[[nodiscard]] bool IsComponentEnabled(std::size_t component) const {
|
||||
return ((1ULL << component) & component_mask) != 0;
|
||||
}
|
||||
} txq;
|
||||
@ -1321,11 +1321,11 @@ union Instruction {
|
||||
BitField<35, 1, u64> ndv_flag;
|
||||
BitField<49, 1, u64> nodep_flag;
|
||||
|
||||
bool IsComponentEnabled(std::size_t component) const {
|
||||
return ((1ull << component) & component_mask) != 0;
|
||||
[[nodiscard]] bool IsComponentEnabled(std::size_t component) const {
|
||||
return ((1ULL << component) & component_mask) != 0;
|
||||
}
|
||||
|
||||
bool UsesMiscMode(TextureMiscMode mode) const {
|
||||
[[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const {
|
||||
switch (mode) {
|
||||
case TextureMiscMode::NDV:
|
||||
return (ndv_flag != 0);
|
||||
@ -1347,7 +1347,7 @@ union Instruction {
|
||||
BitField<54, 2, u64> offset_mode;
|
||||
BitField<56, 2, u64> component;
|
||||
|
||||
bool UsesMiscMode(TextureMiscMode mode) const {
|
||||
[[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const {
|
||||
switch (mode) {
|
||||
case TextureMiscMode::NDV:
|
||||
return ndv_flag != 0;
|
||||
@ -1373,7 +1373,7 @@ union Instruction {
|
||||
BitField<33, 2, u64> offset_mode;
|
||||
BitField<37, 2, u64> component;
|
||||
|
||||
bool UsesMiscMode(TextureMiscMode mode) const {
|
||||
[[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const {
|
||||
switch (mode) {
|
||||
case TextureMiscMode::NDV:
|
||||
return ndv_flag != 0;
|
||||
@ -1399,7 +1399,7 @@ union Instruction {
|
||||
BitField<52, 2, u64> component;
|
||||
BitField<55, 1, u64> fp16_flag;
|
||||
|
||||
bool UsesMiscMode(TextureMiscMode mode) const {
|
||||
[[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const {
|
||||
switch (mode) {
|
||||
case TextureMiscMode::DC:
|
||||
return dc_flag != 0;
|
||||
@ -1422,24 +1422,27 @@ union Instruction {
|
||||
BitField<53, 4, u64> texture_info;
|
||||
BitField<59, 1, u64> fp32_flag;
|
||||
|
||||
TextureType GetTextureType() const {
|
||||
[[nodiscard]] TextureType GetTextureType() const {
|
||||
// The TEXS instruction has a weird encoding for the texture type.
|
||||
if (texture_info == 0)
|
||||
if (texture_info == 0) {
|
||||
return TextureType::Texture1D;
|
||||
if (texture_info >= 1 && texture_info <= 9)
|
||||
}
|
||||
if (texture_info >= 1 && texture_info <= 9) {
|
||||
return TextureType::Texture2D;
|
||||
if (texture_info >= 10 && texture_info <= 11)
|
||||
}
|
||||
if (texture_info >= 10 && texture_info <= 11) {
|
||||
return TextureType::Texture3D;
|
||||
if (texture_info >= 12 && texture_info <= 13)
|
||||
}
|
||||
if (texture_info >= 12 && texture_info <= 13) {
|
||||
return TextureType::TextureCube;
|
||||
}
|
||||
|
||||
LOG_CRITICAL(HW_GPU, "Unhandled texture_info: {}",
|
||||
static_cast<u32>(texture_info.Value()));
|
||||
LOG_CRITICAL(HW_GPU, "Unhandled texture_info: {}", texture_info.Value());
|
||||
UNREACHABLE();
|
||||
return TextureType::Texture1D;
|
||||
}
|
||||
|
||||
TextureProcessMode GetTextureProcessMode() const {
|
||||
[[nodiscard]] TextureProcessMode GetTextureProcessMode() const {
|
||||
switch (texture_info) {
|
||||
case 0:
|
||||
case 2:
|
||||
@ -1458,7 +1461,7 @@ union Instruction {
|
||||
return TextureProcessMode::None;
|
||||
}
|
||||
|
||||
bool UsesMiscMode(TextureMiscMode mode) const {
|
||||
[[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const {
|
||||
switch (mode) {
|
||||
case TextureMiscMode::DC:
|
||||
return (texture_info >= 4 && texture_info <= 6) || texture_info == 9;
|
||||
@ -1470,16 +1473,16 @@ union Instruction {
|
||||
return false;
|
||||
}
|
||||
|
||||
bool IsArrayTexture() const {
|
||||
[[nodiscard]] bool IsArrayTexture() const {
|
||||
// TEXS only supports Texture2D arrays.
|
||||
return texture_info >= 7 && texture_info <= 9;
|
||||
}
|
||||
|
||||
bool HasTwoDestinations() const {
|
||||
[[nodiscard]] bool HasTwoDestinations() const {
|
||||
return gpr28.Value() != Register::ZeroIndex;
|
||||
}
|
||||
|
||||
bool IsComponentEnabled(std::size_t component) const {
|
||||
[[nodiscard]] bool IsComponentEnabled(std::size_t component) const {
|
||||
static constexpr std::array<std::array<u32, 8>, 4> mask_lut{{
|
||||
{},
|
||||
{0x1, 0x2, 0x4, 0x8, 0x3, 0x9, 0xa, 0xc},
|
||||
@ -1506,7 +1509,7 @@ union Instruction {
|
||||
BitField<54, 1, u64> cl;
|
||||
BitField<55, 1, u64> process_mode;
|
||||
|
||||
TextureProcessMode GetTextureProcessMode() const {
|
||||
[[nodiscard]] TextureProcessMode GetTextureProcessMode() const {
|
||||
return process_mode == 0 ? TextureProcessMode::LZ : TextureProcessMode::LL;
|
||||
}
|
||||
} tld;
|
||||
@ -1516,7 +1519,7 @@ union Instruction {
|
||||
BitField<53, 4, u64> texture_info;
|
||||
BitField<59, 1, u64> fp32_flag;
|
||||
|
||||
TextureType GetTextureType() const {
|
||||
[[nodiscard]] TextureType GetTextureType() const {
|
||||
// The TLDS instruction has a weird encoding for the texture type.
|
||||
if (texture_info <= 1) {
|
||||
return TextureType::Texture1D;
|
||||
@ -1529,19 +1532,19 @@ union Instruction {
|
||||
return TextureType::Texture3D;
|
||||
}
|
||||
|
||||
LOG_CRITICAL(HW_GPU, "Unhandled texture_info: {}",
|
||||
static_cast<u32>(texture_info.Value()));
|
||||
LOG_CRITICAL(HW_GPU, "Unhandled texture_info: {}", texture_info.Value());
|
||||
UNREACHABLE();
|
||||
return TextureType::Texture1D;
|
||||
}
|
||||
|
||||
TextureProcessMode GetTextureProcessMode() const {
|
||||
if (texture_info == 1 || texture_info == 5 || texture_info == 12)
|
||||
[[nodiscard]] TextureProcessMode GetTextureProcessMode() const {
|
||||
if (texture_info == 1 || texture_info == 5 || texture_info == 12) {
|
||||
return TextureProcessMode::LL;
|
||||
}
|
||||
return TextureProcessMode::LZ;
|
||||
}
|
||||
|
||||
bool UsesMiscMode(TextureMiscMode mode) const {
|
||||
[[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const {
|
||||
switch (mode) {
|
||||
case TextureMiscMode::AOFFI:
|
||||
return texture_info == 12 || texture_info == 4;
|
||||
@ -1555,7 +1558,7 @@ union Instruction {
|
||||
return false;
|
||||
}
|
||||
|
||||
bool IsArrayTexture() const {
|
||||
[[nodiscard]] bool IsArrayTexture() const {
|
||||
// TEXS only supports Texture2D arrays.
|
||||
return texture_info == 8;
|
||||
}
|
||||
@ -1567,7 +1570,7 @@ union Instruction {
|
||||
BitField<35, 1, u64> aoffi_flag;
|
||||
BitField<49, 1, u64> nodep_flag;
|
||||
|
||||
bool UsesMiscMode(TextureMiscMode mode) const {
|
||||
[[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const {
|
||||
switch (mode) {
|
||||
case TextureMiscMode::AOFFI:
|
||||
return aoffi_flag != 0;
|
||||
@ -1591,7 +1594,7 @@ union Instruction {
|
||||
BitField<20, 3, StoreType> store_data_layout;
|
||||
BitField<20, 4, u64> component_mask_selector;
|
||||
|
||||
bool IsComponentEnabled(std::size_t component) const {
|
||||
[[nodiscard]] bool IsComponentEnabled(std::size_t component) const {
|
||||
ASSERT(mode == SurfaceDataMode::P);
|
||||
constexpr u8 R = 0b0001;
|
||||
constexpr u8 G = 0b0010;
|
||||
@ -1604,7 +1607,7 @@ union Instruction {
|
||||
return std::bitset<4>{mask.at(component_mask_selector)}.test(component);
|
||||
}
|
||||
|
||||
StoreType GetStoreDataLayout() const {
|
||||
[[nodiscard]] StoreType GetStoreDataLayout() const {
|
||||
ASSERT(mode == SurfaceDataMode::D_BA);
|
||||
return store_data_layout;
|
||||
}
|
||||
@ -1622,14 +1625,15 @@ union Instruction {
|
||||
BitField<20, 24, u64> target;
|
||||
BitField<5, 1, u64> constant_buffer;
|
||||
|
||||
s32 GetBranchTarget() const {
|
||||
[[nodiscard]] s32 GetBranchTarget() const {
|
||||
// Sign extend the branch target offset
|
||||
u32 mask = 1U << (24 - 1);
|
||||
u32 value = static_cast<u32>(target);
|
||||
const auto mask = 1U << (24 - 1);
|
||||
const auto target_value = static_cast<u32>(target);
|
||||
constexpr auto instruction_size = static_cast<s32>(sizeof(Instruction));
|
||||
|
||||
// The branch offset is relative to the next instruction and is stored in bytes, so
|
||||
// divide it by the size of an instruction and add 1 to it.
|
||||
return static_cast<s32>((value ^ mask) - mask) / static_cast<s32>(sizeof(Instruction)) +
|
||||
1;
|
||||
return static_cast<s32>((target_value ^ mask) - mask) / instruction_size + 1;
|
||||
}
|
||||
} bra;
|
||||
|
||||
@ -1637,14 +1641,15 @@ union Instruction {
|
||||
BitField<20, 24, u64> target;
|
||||
BitField<5, 1, u64> constant_buffer;
|
||||
|
||||
s32 GetBranchExtend() const {
|
||||
[[nodiscard]] s32 GetBranchExtend() const {
|
||||
// Sign extend the branch target offset
|
||||
u32 mask = 1U << (24 - 1);
|
||||
u32 value = static_cast<u32>(target);
|
||||
const auto mask = 1U << (24 - 1);
|
||||
const auto target_value = static_cast<u32>(target);
|
||||
constexpr auto instruction_size = static_cast<s32>(sizeof(Instruction));
|
||||
|
||||
// The branch offset is relative to the next instruction and is stored in bytes, so
|
||||
// divide it by the size of an instruction and add 1 to it.
|
||||
return static_cast<s32>((value ^ mask) - mask) / static_cast<s32>(sizeof(Instruction)) +
|
||||
1;
|
||||
return static_cast<s32>((target_value ^ mask) - mask) / instruction_size + 1;
|
||||
}
|
||||
} brx;
|
||||
|
||||
@ -1697,7 +1702,7 @@ union Instruction {
|
||||
BitField<50, 1, u64> is_op_b_register;
|
||||
BitField<51, 3, VmnmxOperation> operation;
|
||||
|
||||
VmnmxType SourceFormatA() const {
|
||||
[[nodiscard]] VmnmxType SourceFormatA() const {
|
||||
switch (src_format_a) {
|
||||
case 0b11:
|
||||
return VmnmxType::Bits32;
|
||||
@ -1708,7 +1713,7 @@ union Instruction {
|
||||
}
|
||||
}
|
||||
|
||||
VmnmxType SourceFormatB() const {
|
||||
[[nodiscard]] VmnmxType SourceFormatB() const {
|
||||
switch (src_format_b) {
|
||||
case 0b11:
|
||||
return VmnmxType::Bits32;
|
||||
@ -1739,7 +1744,7 @@ union Instruction {
|
||||
BitField<20, 14, u64> shifted_offset;
|
||||
BitField<34, 5, u64> index;
|
||||
|
||||
u64 GetOffset() const {
|
||||
[[nodiscard]] u64 GetOffset() const {
|
||||
return shifted_offset * 4;
|
||||
}
|
||||
} cbuf34;
|
||||
@ -1748,7 +1753,7 @@ union Instruction {
|
||||
BitField<20, 16, s64> offset;
|
||||
BitField<36, 5, u64> index;
|
||||
|
||||
s64 GetOffset() const {
|
||||
[[nodiscard]] s64 GetOffset() const {
|
||||
return offset;
|
||||
}
|
||||
} cbuf36;
|
||||
@ -1893,6 +1898,7 @@ public:
|
||||
ICMP_IMM,
|
||||
FCMP_RR,
|
||||
FCMP_RC,
|
||||
FCMP_IMMR,
|
||||
MUFU, // Multi-Function Operator
|
||||
RRO_C, // Range Reduction Operator
|
||||
RRO_R,
|
||||
@ -1996,29 +2002,29 @@ public:
|
||||
|
||||
/// Returns whether an opcode has an execution predicate field or not (ie, whether it can be
|
||||
/// conditionally executed).
|
||||
static bool IsPredicatedInstruction(Id opcode) {
|
||||
[[nodiscard]] static bool IsPredicatedInstruction(Id opcode) {
|
||||
// TODO(Subv): Add the rest of unpredicated instructions.
|
||||
return opcode != Id::SSY && opcode != Id::PBK;
|
||||
}
|
||||
|
||||
class Matcher {
|
||||
public:
|
||||
constexpr Matcher(const char* const name, u16 mask, u16 expected, Id id, Type type)
|
||||
: name{name}, mask{mask}, expected{expected}, id{id}, type{type} {}
|
||||
constexpr Matcher(const char* const name_, u16 mask_, u16 expected_, Id id_, Type type_)
|
||||
: name{name_}, mask{mask_}, expected{expected_}, id{id_}, type{type_} {}
|
||||
|
||||
constexpr const char* GetName() const {
|
||||
[[nodiscard]] constexpr const char* GetName() const {
|
||||
return name;
|
||||
}
|
||||
|
||||
constexpr u16 GetMask() const {
|
||||
[[nodiscard]] constexpr u16 GetMask() const {
|
||||
return mask;
|
||||
}
|
||||
|
||||
constexpr Id GetId() const {
|
||||
[[nodiscard]] constexpr Id GetId() const {
|
||||
return id;
|
||||
}
|
||||
|
||||
constexpr Type GetType() const {
|
||||
[[nodiscard]] constexpr Type GetType() const {
|
||||
return type;
|
||||
}
|
||||
|
||||
@ -2027,7 +2033,7 @@ public:
|
||||
* @param instruction The instruction to test
|
||||
* @returns true if the given instruction matches.
|
||||
*/
|
||||
constexpr bool Matches(u16 instruction) const {
|
||||
[[nodiscard]] constexpr bool Matches(u16 instruction) const {
|
||||
return (instruction & mask) == expected;
|
||||
}
|
||||
|
||||
@ -2039,7 +2045,8 @@ public:
|
||||
Type type;
|
||||
};
|
||||
|
||||
static std::optional<std::reference_wrapper<const Matcher>> Decode(Instruction instr) {
|
||||
using DecodeResult = std::optional<std::reference_wrapper<const Matcher>>;
|
||||
[[nodiscard]] static DecodeResult Decode(Instruction instr) {
|
||||
static const auto table{GetDecodeTable()};
|
||||
|
||||
const auto matches_instruction = [instr](const auto& matcher) {
|
||||
@ -2061,7 +2068,7 @@ private:
|
||||
* A '0' in a bitstring indicates that a zero must be present at that bit position.
|
||||
* A '1' in a bitstring indicates that a one must be present at that bit position.
|
||||
*/
|
||||
static constexpr auto GetMaskAndExpect(const char* const bitstring) {
|
||||
[[nodiscard]] static constexpr auto GetMaskAndExpect(const char* const bitstring) {
|
||||
u16 mask = 0, expect = 0;
|
||||
for (std::size_t i = 0; i < opcode_bitsize; i++) {
|
||||
const std::size_t bit_position = opcode_bitsize - i - 1;
|
||||
@ -2083,14 +2090,14 @@ private:
|
||||
|
||||
public:
|
||||
/// Creates a matcher that can match and parse instructions based on bitstring.
|
||||
static constexpr auto GetMatcher(const char* const bitstring, Id op, Type type,
|
||||
const char* const name) {
|
||||
[[nodiscard]] static constexpr auto GetMatcher(const char* const bitstring, Id op,
|
||||
Type type, const char* const name) {
|
||||
const auto [mask, expected] = GetMaskAndExpect(bitstring);
|
||||
return Matcher(name, mask, expected, op, type);
|
||||
}
|
||||
};
|
||||
|
||||
static std::vector<Matcher> GetDecodeTable() {
|
||||
[[nodiscard]] static std::vector<Matcher> GetDecodeTable() {
|
||||
std::vector<Matcher> table = {
|
||||
#define INST(bitstring, op, type, name) Detail::GetMatcher(bitstring, op, type, name)
|
||||
INST("111000110011----", Id::KIL, Type::Flow, "KIL"),
|
||||
@ -2205,6 +2212,7 @@ private:
|
||||
INST("0111110-0-------", Id::HSET2_IMM, Type::HalfSet, "HSET2_IMM"),
|
||||
INST("010110111010----", Id::FCMP_RR, Type::Arithmetic, "FCMP_RR"),
|
||||
INST("010010111010----", Id::FCMP_RC, Type::Arithmetic, "FCMP_RC"),
|
||||
INST("0011011-1010----", Id::FCMP_IMMR, Type::Arithmetic, "FCMP_IMMR"),
|
||||
INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"),
|
||||
INST("0100110010010---", Id::RRO_C, Type::Arithmetic, "RRO_C"),
|
||||
INST("0101110010010---", Id::RRO_R, Type::Arithmetic, "RRO_R"),
|
||||
|
@ -41,30 +41,30 @@ struct Header {
|
||||
BitField<26, 1, u32> does_load_or_store;
|
||||
BitField<27, 1, u32> does_fp64;
|
||||
BitField<28, 4, u32> stream_out_mask;
|
||||
} common0{};
|
||||
} common0;
|
||||
|
||||
union {
|
||||
BitField<0, 24, u32> shader_local_memory_low_size;
|
||||
BitField<24, 8, u32> per_patch_attribute_count;
|
||||
} common1{};
|
||||
} common1;
|
||||
|
||||
union {
|
||||
BitField<0, 24, u32> shader_local_memory_high_size;
|
||||
BitField<24, 8, u32> threads_per_input_primitive;
|
||||
} common2{};
|
||||
} common2;
|
||||
|
||||
union {
|
||||
BitField<0, 24, u32> shader_local_memory_crs_size;
|
||||
BitField<24, 4, OutputTopology> output_topology;
|
||||
BitField<28, 4, u32> reserved;
|
||||
} common3{};
|
||||
} common3;
|
||||
|
||||
union {
|
||||
BitField<0, 12, u32> max_output_vertices;
|
||||
BitField<12, 8, u32> store_req_start; // NOTE: not used by geometry shaders.
|
||||
BitField<20, 4, u32> reserved;
|
||||
BitField<24, 8, u32> store_req_end; // NOTE: not used by geometry shaders.
|
||||
} common4{};
|
||||
} common4;
|
||||
|
||||
union {
|
||||
struct {
|
||||
@ -145,7 +145,7 @@ struct Header {
|
||||
}
|
||||
} ps;
|
||||
|
||||
std::array<u32, 0xF> raw{};
|
||||
std::array<u32, 0xF> raw;
|
||||
};
|
||||
|
||||
u64 GetLocalMemorySize() const {
|
||||
@ -153,7 +153,6 @@ struct Header {
|
||||
(common2.shader_local_memory_high_size << 24));
|
||||
}
|
||||
};
|
||||
|
||||
static_assert(sizeof(Header) == 0x50, "Incorrect structure size");
|
||||
|
||||
} // namespace Tegra::Shader
|
||||
|
@ -9,6 +9,7 @@
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "core/core.h"
|
||||
#include "video_core/delayed_destruction_ring.h"
|
||||
#include "video_core/gpu.h"
|
||||
#include "video_core/memory_manager.h"
|
||||
#include "video_core/rasterizer_interface.h"
|
||||
@ -17,11 +18,11 @@ namespace VideoCommon {
|
||||
|
||||
class FenceBase {
|
||||
public:
|
||||
FenceBase(u32 payload, bool is_stubbed)
|
||||
: address{}, payload{payload}, is_semaphore{false}, is_stubbed{is_stubbed} {}
|
||||
explicit FenceBase(u32 payload_, bool is_stubbed_)
|
||||
: address{}, payload{payload_}, is_semaphore{false}, is_stubbed{is_stubbed_} {}
|
||||
|
||||
FenceBase(GPUVAddr address, u32 payload, bool is_stubbed)
|
||||
: address{address}, payload{payload}, is_semaphore{true}, is_stubbed{is_stubbed} {}
|
||||
explicit FenceBase(GPUVAddr address_, u32 payload_, bool is_stubbed_)
|
||||
: address{address_}, payload{payload_}, is_semaphore{true}, is_stubbed{is_stubbed_} {}
|
||||
|
||||
GPUVAddr GetAddress() const {
|
||||
return address;
|
||||
@ -47,6 +48,11 @@ protected:
|
||||
template <typename TFence, typename TTextureCache, typename TTBufferCache, typename TQueryCache>
|
||||
class FenceManager {
|
||||
public:
|
||||
/// Notify the fence manager about a new frame
|
||||
void TickFrame() {
|
||||
delayed_destruction_ring.Tick();
|
||||
}
|
||||
|
||||
void SignalSemaphore(GPUVAddr addr, u32 value) {
|
||||
TryReleasePendingFences();
|
||||
const bool should_flush = ShouldFlush();
|
||||
@ -86,7 +92,7 @@ public:
|
||||
} else {
|
||||
gpu.IncrementSyncPoint(current_fence->GetPayload());
|
||||
}
|
||||
fences.pop();
|
||||
PopFence();
|
||||
}
|
||||
}
|
||||
|
||||
@ -132,7 +138,7 @@ private:
|
||||
} else {
|
||||
gpu.IncrementSyncPoint(current_fence->GetPayload());
|
||||
}
|
||||
fences.pop();
|
||||
PopFence();
|
||||
}
|
||||
}
|
||||
|
||||
@ -158,7 +164,14 @@ private:
|
||||
query_cache.CommitAsyncFlushes();
|
||||
}
|
||||
|
||||
void PopFence() {
|
||||
delayed_destruction_ring.Push(std::move(fences.front()));
|
||||
fences.pop();
|
||||
}
|
||||
|
||||
std::queue<TFence> fences;
|
||||
|
||||
DelayedDestructionRing<TFence, 6> delayed_destruction_ring;
|
||||
};
|
||||
|
||||
} // namespace VideoCommon
|
||||
|
31
src/video_core/framebuffer_config.h
Normal file
31
src/video_core/framebuffer_config.h
Normal file
@ -0,0 +1,31 @@
|
||||
// Copyright 2020 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
namespace Tegra {
|
||||
|
||||
/**
|
||||
* Struct describing framebuffer configuration
|
||||
*/
|
||||
struct FramebufferConfig {
|
||||
enum class PixelFormat : u32 {
|
||||
A8B8G8R8_UNORM = 1,
|
||||
RGB565_UNORM = 4,
|
||||
B8G8R8A8_UNORM = 5,
|
||||
};
|
||||
|
||||
VAddr address{};
|
||||
u32 offset{};
|
||||
u32 width{};
|
||||
u32 height{};
|
||||
u32 stride{};
|
||||
PixelFormat pixel_format{};
|
||||
|
||||
using TransformFlags = Service::NVFlinger::BufferQueue::BufferTransformFlags;
|
||||
TransformFlags transform_flags{};
|
||||
Common::Rectangle<int> crop_rect;
|
||||
};
|
||||
|
||||
} // namespace Tegra
|
@ -10,6 +10,7 @@
|
||||
#include "core/core_timing.h"
|
||||
#include "core/core_timing_util.h"
|
||||
#include "core/frontend/emu_window.h"
|
||||
#include "core/hardware_interrupt_manager.h"
|
||||
#include "core/memory.h"
|
||||
#include "core/settings.h"
|
||||
#include "video_core/engines/fermi_2d.h"
|
||||
@ -27,15 +28,17 @@ namespace Tegra {
|
||||
|
||||
MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192));
|
||||
|
||||
GPU::GPU(Core::System& system_, bool is_async_)
|
||||
GPU::GPU(Core::System& system_, bool is_async_, bool use_nvdec_)
|
||||
: system{system_}, memory_manager{std::make_unique<Tegra::MemoryManager>(system)},
|
||||
dma_pusher{std::make_unique<Tegra::DmaPusher>(system, *this)},
|
||||
cdma_pusher{std::make_unique<Tegra::CDmaPusher>(*this)}, use_nvdec{use_nvdec_},
|
||||
maxwell_3d{std::make_unique<Engines::Maxwell3D>(system, *memory_manager)},
|
||||
fermi_2d{std::make_unique<Engines::Fermi2D>()},
|
||||
kepler_compute{std::make_unique<Engines::KeplerCompute>(system, *memory_manager)},
|
||||
maxwell_dma{std::make_unique<Engines::MaxwellDMA>(system, *memory_manager)},
|
||||
kepler_memory{std::make_unique<Engines::KeplerMemory>(system, *memory_manager)},
|
||||
shader_notify{std::make_unique<VideoCore::ShaderNotify>()}, is_async{is_async_} {}
|
||||
shader_notify{std::make_unique<VideoCore::ShaderNotify>()}, is_async{is_async_},
|
||||
gpu_thread{system_, is_async_} {}
|
||||
|
||||
GPU::~GPU() = default;
|
||||
|
||||
@ -77,31 +80,46 @@ DmaPusher& GPU::DmaPusher() {
|
||||
return *dma_pusher;
|
||||
}
|
||||
|
||||
Tegra::CDmaPusher& GPU::CDmaPusher() {
|
||||
return *cdma_pusher;
|
||||
}
|
||||
|
||||
const DmaPusher& GPU::DmaPusher() const {
|
||||
return *dma_pusher;
|
||||
}
|
||||
|
||||
const Tegra::CDmaPusher& GPU::CDmaPusher() const {
|
||||
return *cdma_pusher;
|
||||
}
|
||||
|
||||
void GPU::WaitFence(u32 syncpoint_id, u32 value) {
|
||||
// Synced GPU, is always in sync
|
||||
if (!is_async) {
|
||||
return;
|
||||
}
|
||||
if (syncpoint_id == UINT32_MAX) {
|
||||
// TODO: Research what this does.
|
||||
LOG_ERROR(HW_GPU, "Waiting for syncpoint -1 not implemented");
|
||||
return;
|
||||
}
|
||||
MICROPROFILE_SCOPE(GPU_wait);
|
||||
std::unique_lock lock{sync_mutex};
|
||||
sync_cv.wait(lock, [=, this] { return syncpoints[syncpoint_id].load() >= value; });
|
||||
sync_cv.wait(lock, [=, this] { return syncpoints.at(syncpoint_id).load() >= value; });
|
||||
}
|
||||
|
||||
void GPU::IncrementSyncPoint(const u32 syncpoint_id) {
|
||||
syncpoints[syncpoint_id]++;
|
||||
auto& syncpoint = syncpoints.at(syncpoint_id);
|
||||
syncpoint++;
|
||||
std::lock_guard lock{sync_mutex};
|
||||
sync_cv.notify_all();
|
||||
if (!syncpt_interrupts[syncpoint_id].empty()) {
|
||||
u32 value = syncpoints[syncpoint_id].load();
|
||||
auto it = syncpt_interrupts[syncpoint_id].begin();
|
||||
while (it != syncpt_interrupts[syncpoint_id].end()) {
|
||||
auto& interrupt = syncpt_interrupts.at(syncpoint_id);
|
||||
if (!interrupt.empty()) {
|
||||
u32 value = syncpoint.load();
|
||||
auto it = interrupt.begin();
|
||||
while (it != interrupt.end()) {
|
||||
if (value >= *it) {
|
||||
TriggerCpuInterrupt(syncpoint_id, *it);
|
||||
it = syncpt_interrupts[syncpoint_id].erase(it);
|
||||
it = interrupt.erase(it);
|
||||
continue;
|
||||
}
|
||||
it++;
|
||||
@ -110,22 +128,22 @@ void GPU::IncrementSyncPoint(const u32 syncpoint_id) {
|
||||
}
|
||||
|
||||
u32 GPU::GetSyncpointValue(const u32 syncpoint_id) const {
|
||||
return syncpoints[syncpoint_id].load();
|
||||
return syncpoints.at(syncpoint_id).load();
|
||||
}
|
||||
|
||||
void GPU::RegisterSyncptInterrupt(const u32 syncpoint_id, const u32 value) {
|
||||
auto& interrupt = syncpt_interrupts[syncpoint_id];
|
||||
auto& interrupt = syncpt_interrupts.at(syncpoint_id);
|
||||
bool contains = std::any_of(interrupt.begin(), interrupt.end(),
|
||||
[value](u32 in_value) { return in_value == value; });
|
||||
if (contains) {
|
||||
return;
|
||||
}
|
||||
syncpt_interrupts[syncpoint_id].emplace_back(value);
|
||||
interrupt.emplace_back(value);
|
||||
}
|
||||
|
||||
bool GPU::CancelSyncptInterrupt(const u32 syncpoint_id, const u32 value) {
|
||||
std::lock_guard lock{sync_mutex};
|
||||
auto& interrupt = syncpt_interrupts[syncpoint_id];
|
||||
auto& interrupt = syncpt_interrupts.at(syncpoint_id);
|
||||
const auto iter =
|
||||
std::find_if(interrupt.begin(), interrupt.end(),
|
||||
[value](u32 interrupt_value) { return value == interrupt_value; });
|
||||
@ -182,34 +200,6 @@ void GPU::SyncGuestHost() {
|
||||
renderer->Rasterizer().SyncGuestHost();
|
||||
}
|
||||
|
||||
void GPU::OnCommandListEnd() {
|
||||
renderer->Rasterizer().ReleaseFences();
|
||||
}
|
||||
// Note that, traditionally, methods are treated as 4-byte addressable locations, and hence
|
||||
// their numbers are written down multiplied by 4 in Docs. Here we are not multiply by 4.
|
||||
// So the values you see in docs might be multiplied by 4.
|
||||
enum class BufferMethods {
|
||||
BindObject = 0x0,
|
||||
Nop = 0x2,
|
||||
SemaphoreAddressHigh = 0x4,
|
||||
SemaphoreAddressLow = 0x5,
|
||||
SemaphoreSequence = 0x6,
|
||||
SemaphoreTrigger = 0x7,
|
||||
NotifyIntr = 0x8,
|
||||
WrcacheFlush = 0x9,
|
||||
Unk28 = 0xA,
|
||||
UnkCacheFlush = 0xB,
|
||||
RefCnt = 0x14,
|
||||
SemaphoreAcquire = 0x1A,
|
||||
SemaphoreRelease = 0x1B,
|
||||
FenceValue = 0x1C,
|
||||
FenceAction = 0x1D,
|
||||
Unk78 = 0x1E,
|
||||
Unk7c = 0x1F,
|
||||
Yield = 0x20,
|
||||
NonPullerMethods = 0x40,
|
||||
};
|
||||
|
||||
enum class GpuSemaphoreOperation {
|
||||
AcquireEqual = 0x1,
|
||||
WriteLong = 0x2,
|
||||
@ -240,8 +230,12 @@ void GPU::CallMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32
|
||||
CallEngineMultiMethod(method, subchannel, base_start, amount, methods_pending);
|
||||
} else {
|
||||
for (std::size_t i = 0; i < amount; i++) {
|
||||
CallPullerMethod(
|
||||
{method, base_start[i], subchannel, methods_pending - static_cast<u32>(i)});
|
||||
CallPullerMethod(MethodCall{
|
||||
method,
|
||||
base_start[i],
|
||||
subchannel,
|
||||
methods_pending - static_cast<u32>(i),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -268,7 +262,12 @@ void GPU::CallPullerMethod(const MethodCall& method_call) {
|
||||
case BufferMethods::UnkCacheFlush:
|
||||
case BufferMethods::WrcacheFlush:
|
||||
case BufferMethods::FenceValue:
|
||||
break;
|
||||
case BufferMethods::FenceAction:
|
||||
ProcessFenceActionMethod();
|
||||
break;
|
||||
case BufferMethods::WaitForInterrupt:
|
||||
ProcessWaitForInterruptMethod();
|
||||
break;
|
||||
case BufferMethods::SemaphoreTrigger: {
|
||||
ProcessSemaphoreTriggerMethod();
|
||||
@ -298,8 +297,7 @@ void GPU::CallPullerMethod(const MethodCall& method_call) {
|
||||
break;
|
||||
}
|
||||
default:
|
||||
LOG_ERROR(HW_GPU, "Special puller engine method {:X} not implemented",
|
||||
static_cast<u32>(method));
|
||||
LOG_ERROR(HW_GPU, "Special puller engine method {:X} not implemented", method);
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -378,10 +376,28 @@ void GPU::ProcessBindMethod(const MethodCall& method_call) {
|
||||
dma_pusher->BindSubchannel(kepler_memory.get(), method_call.subchannel);
|
||||
break;
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unimplemented engine {:04X}", static_cast<u32>(engine_id));
|
||||
UNIMPLEMENTED_MSG("Unimplemented engine {:04X}", engine_id);
|
||||
}
|
||||
}
|
||||
|
||||
void GPU::ProcessFenceActionMethod() {
|
||||
switch (regs.fence_action.op) {
|
||||
case FenceOperation::Acquire:
|
||||
WaitFence(regs.fence_action.syncpoint_id, regs.fence_value);
|
||||
break;
|
||||
case FenceOperation::Increment:
|
||||
IncrementSyncPoint(regs.fence_action.syncpoint_id);
|
||||
break;
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unimplemented operation {}", regs.fence_action.op.Value());
|
||||
}
|
||||
}
|
||||
|
||||
void GPU::ProcessWaitForInterruptMethod() {
|
||||
// TODO(bunnei) ImplementMe
|
||||
LOG_WARNING(HW_GPU, "(STUBBED) called");
|
||||
}
|
||||
|
||||
void GPU::ProcessSemaphoreTriggerMethod() {
|
||||
const auto semaphoreOperationMask = 0xF;
|
||||
const auto op =
|
||||
@ -443,4 +459,75 @@ void GPU::ProcessSemaphoreAcquire() {
|
||||
}
|
||||
}
|
||||
|
||||
void GPU::Start() {
|
||||
gpu_thread.StartThread(*renderer, renderer->Context(), *dma_pusher, *cdma_pusher);
|
||||
cpu_context = renderer->GetRenderWindow().CreateSharedContext();
|
||||
cpu_context->MakeCurrent();
|
||||
}
|
||||
|
||||
void GPU::ObtainContext() {
|
||||
cpu_context->MakeCurrent();
|
||||
}
|
||||
|
||||
void GPU::ReleaseContext() {
|
||||
cpu_context->DoneCurrent();
|
||||
}
|
||||
|
||||
void GPU::PushGPUEntries(Tegra::CommandList&& entries) {
|
||||
gpu_thread.SubmitList(std::move(entries));
|
||||
}
|
||||
|
||||
void GPU::PushCommandBuffer(Tegra::ChCommandHeaderList& entries) {
|
||||
if (!use_nvdec) {
|
||||
return;
|
||||
}
|
||||
// This condition fires when a video stream ends, clear all intermediary data
|
||||
if (entries[0].raw == 0xDEADB33F) {
|
||||
cdma_pusher.reset();
|
||||
return;
|
||||
}
|
||||
if (!cdma_pusher) {
|
||||
cdma_pusher = std::make_unique<Tegra::CDmaPusher>(*this);
|
||||
}
|
||||
|
||||
// SubmitCommandBuffer would make the nvdec operations async, this is not currently working
|
||||
// TODO(ameerj): RE proper async nvdec operation
|
||||
// gpu_thread.SubmitCommandBuffer(std::move(entries));
|
||||
|
||||
cdma_pusher->Push(std::move(entries));
|
||||
cdma_pusher->DispatchCalls();
|
||||
}
|
||||
|
||||
void GPU::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
|
||||
gpu_thread.SwapBuffers(framebuffer);
|
||||
}
|
||||
|
||||
void GPU::FlushRegion(VAddr addr, u64 size) {
|
||||
gpu_thread.FlushRegion(addr, size);
|
||||
}
|
||||
|
||||
void GPU::InvalidateRegion(VAddr addr, u64 size) {
|
||||
gpu_thread.InvalidateRegion(addr, size);
|
||||
}
|
||||
|
||||
void GPU::FlushAndInvalidateRegion(VAddr addr, u64 size) {
|
||||
gpu_thread.FlushAndInvalidateRegion(addr, size);
|
||||
}
|
||||
|
||||
void GPU::TriggerCpuInterrupt(const u32 syncpoint_id, const u32 value) const {
|
||||
auto& interrupt_manager = system.InterruptManager();
|
||||
interrupt_manager.GPUInterruptSyncpt(syncpoint_id, value);
|
||||
}
|
||||
|
||||
void GPU::WaitIdle() const {
|
||||
gpu_thread.WaitIdle();
|
||||
}
|
||||
|
||||
void GPU::OnCommandListEnd() {
|
||||
if (is_async) {
|
||||
// This command only applies to asynchronous GPU mode
|
||||
gpu_thread.OnCommandListEnd();
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Tegra
|
||||
|
@ -13,14 +13,17 @@
|
||||
#include "common/common_types.h"
|
||||
#include "core/hle/service/nvdrv/nvdata.h"
|
||||
#include "core/hle/service/nvflinger/buffer_queue.h"
|
||||
#include "video_core/cdma_pusher.h"
|
||||
#include "video_core/dma_pusher.h"
|
||||
#include "video_core/framebuffer_config.h"
|
||||
#include "video_core/gpu_thread.h"
|
||||
|
||||
using CacheAddr = std::uintptr_t;
|
||||
inline CacheAddr ToCacheAddr(const void* host_ptr) {
|
||||
[[nodiscard]] inline CacheAddr ToCacheAddr(const void* host_ptr) {
|
||||
return reinterpret_cast<CacheAddr>(host_ptr);
|
||||
}
|
||||
|
||||
inline u8* FromCacheAddr(CacheAddr cache_addr) {
|
||||
[[nodiscard]] inline u8* FromCacheAddr(CacheAddr cache_addr) {
|
||||
return reinterpret_cast<u8*>(cache_addr);
|
||||
}
|
||||
|
||||
@ -100,28 +103,6 @@ enum class DepthFormat : u32 {
|
||||
struct CommandListHeader;
|
||||
class DebugContext;
|
||||
|
||||
/**
|
||||
* Struct describing framebuffer configuration
|
||||
*/
|
||||
struct FramebufferConfig {
|
||||
enum class PixelFormat : u32 {
|
||||
A8B8G8R8_UNORM = 1,
|
||||
RGB565_UNORM = 4,
|
||||
B8G8R8A8_UNORM = 5,
|
||||
};
|
||||
|
||||
VAddr address;
|
||||
u32 offset;
|
||||
u32 width;
|
||||
u32 height;
|
||||
u32 stride;
|
||||
PixelFormat pixel_format;
|
||||
|
||||
using TransformFlags = Service::NVFlinger::BufferQueue::BufferTransformFlags;
|
||||
TransformFlags transform_flags;
|
||||
Common::Rectangle<int> crop_rect;
|
||||
};
|
||||
|
||||
namespace Engines {
|
||||
class Fermi2D;
|
||||
class Maxwell3D;
|
||||
@ -140,7 +121,7 @@ enum class EngineID {
|
||||
|
||||
class MemoryManager;
|
||||
|
||||
class GPU {
|
||||
class GPU final {
|
||||
public:
|
||||
struct MethodCall {
|
||||
u32 method{};
|
||||
@ -148,17 +129,17 @@ public:
|
||||
u32 subchannel{};
|
||||
u32 method_count{};
|
||||
|
||||
bool IsLastCall() const {
|
||||
explicit MethodCall(u32 method_, u32 argument_, u32 subchannel_ = 0, u32 method_count_ = 0)
|
||||
: method(method_), argument(argument_), subchannel(subchannel_),
|
||||
method_count(method_count_) {}
|
||||
|
||||
[[nodiscard]] bool IsLastCall() const {
|
||||
return method_count <= 1;
|
||||
}
|
||||
|
||||
MethodCall(u32 method, u32 argument, u32 subchannel = 0, u32 method_count = 0)
|
||||
: method(method), argument(argument), subchannel(subchannel),
|
||||
method_count(method_count) {}
|
||||
};
|
||||
|
||||
explicit GPU(Core::System& system, bool is_async);
|
||||
virtual ~GPU();
|
||||
explicit GPU(Core::System& system_, bool is_async_, bool use_nvdec_);
|
||||
~GPU();
|
||||
|
||||
/// Binds a renderer to the GPU.
|
||||
void BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer);
|
||||
@ -175,13 +156,13 @@ public:
|
||||
/// Synchronizes CPU writes with Host GPU memory.
|
||||
void SyncGuestHost();
|
||||
/// Signal the ending of command list.
|
||||
virtual void OnCommandListEnd();
|
||||
void OnCommandListEnd();
|
||||
|
||||
/// Request a host GPU memory flush from the CPU.
|
||||
u64 RequestFlush(VAddr addr, std::size_t size);
|
||||
[[nodiscard]] u64 RequestFlush(VAddr addr, std::size_t size);
|
||||
|
||||
/// Obtains current flush request fence id.
|
||||
u64 CurrentFlushRequestFence() const {
|
||||
[[nodiscard]] u64 CurrentFlushRequestFence() const {
|
||||
return current_flush_fence.load(std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
@ -189,68 +170,100 @@ public:
|
||||
void TickWork();
|
||||
|
||||
/// Returns a reference to the Maxwell3D GPU engine.
|
||||
Engines::Maxwell3D& Maxwell3D();
|
||||
[[nodiscard]] Engines::Maxwell3D& Maxwell3D();
|
||||
|
||||
/// Returns a const reference to the Maxwell3D GPU engine.
|
||||
const Engines::Maxwell3D& Maxwell3D() const;
|
||||
[[nodiscard]] const Engines::Maxwell3D& Maxwell3D() const;
|
||||
|
||||
/// Returns a reference to the KeplerCompute GPU engine.
|
||||
Engines::KeplerCompute& KeplerCompute();
|
||||
[[nodiscard]] Engines::KeplerCompute& KeplerCompute();
|
||||
|
||||
/// Returns a reference to the KeplerCompute GPU engine.
|
||||
const Engines::KeplerCompute& KeplerCompute() const;
|
||||
[[nodiscard]] const Engines::KeplerCompute& KeplerCompute() const;
|
||||
|
||||
/// Returns a reference to the GPU memory manager.
|
||||
Tegra::MemoryManager& MemoryManager();
|
||||
[[nodiscard]] Tegra::MemoryManager& MemoryManager();
|
||||
|
||||
/// Returns a const reference to the GPU memory manager.
|
||||
const Tegra::MemoryManager& MemoryManager() const;
|
||||
[[nodiscard]] const Tegra::MemoryManager& MemoryManager() const;
|
||||
|
||||
/// Returns a reference to the GPU DMA pusher.
|
||||
Tegra::DmaPusher& DmaPusher();
|
||||
[[nodiscard]] Tegra::DmaPusher& DmaPusher();
|
||||
|
||||
VideoCore::RendererBase& Renderer() {
|
||||
/// Returns a const reference to the GPU DMA pusher.
|
||||
[[nodiscard]] const Tegra::DmaPusher& DmaPusher() const;
|
||||
|
||||
/// Returns a reference to the GPU CDMA pusher.
|
||||
[[nodiscard]] Tegra::CDmaPusher& CDmaPusher();
|
||||
|
||||
/// Returns a const reference to the GPU CDMA pusher.
|
||||
[[nodiscard]] const Tegra::CDmaPusher& CDmaPusher() const;
|
||||
|
||||
/// Returns a reference to the underlying renderer.
|
||||
[[nodiscard]] VideoCore::RendererBase& Renderer() {
|
||||
return *renderer;
|
||||
}
|
||||
|
||||
const VideoCore::RendererBase& Renderer() const {
|
||||
/// Returns a const reference to the underlying renderer.
|
||||
[[nodiscard]] const VideoCore::RendererBase& Renderer() const {
|
||||
return *renderer;
|
||||
}
|
||||
|
||||
VideoCore::ShaderNotify& ShaderNotify() {
|
||||
/// Returns a reference to the shader notifier.
|
||||
[[nodiscard]] VideoCore::ShaderNotify& ShaderNotify() {
|
||||
return *shader_notify;
|
||||
}
|
||||
|
||||
const VideoCore::ShaderNotify& ShaderNotify() const {
|
||||
/// Returns a const reference to the shader notifier.
|
||||
[[nodiscard]] const VideoCore::ShaderNotify& ShaderNotify() const {
|
||||
return *shader_notify;
|
||||
}
|
||||
|
||||
// Waits for the GPU to finish working
|
||||
virtual void WaitIdle() const = 0;
|
||||
void WaitIdle() const;
|
||||
|
||||
/// Allows the CPU/NvFlinger to wait on the GPU before presenting a frame.
|
||||
void WaitFence(u32 syncpoint_id, u32 value);
|
||||
|
||||
void IncrementSyncPoint(u32 syncpoint_id);
|
||||
|
||||
u32 GetSyncpointValue(u32 syncpoint_id) const;
|
||||
[[nodiscard]] u32 GetSyncpointValue(u32 syncpoint_id) const;
|
||||
|
||||
void RegisterSyncptInterrupt(u32 syncpoint_id, u32 value);
|
||||
|
||||
bool CancelSyncptInterrupt(u32 syncpoint_id, u32 value);
|
||||
[[nodiscard]] bool CancelSyncptInterrupt(u32 syncpoint_id, u32 value);
|
||||
|
||||
u64 GetTicks() const;
|
||||
[[nodiscard]] u64 GetTicks() const;
|
||||
|
||||
std::unique_lock<std::mutex> LockSync() {
|
||||
[[nodiscard]] std::unique_lock<std::mutex> LockSync() {
|
||||
return std::unique_lock{sync_mutex};
|
||||
}
|
||||
|
||||
bool IsAsync() const {
|
||||
[[nodiscard]] bool IsAsync() const {
|
||||
return is_async;
|
||||
}
|
||||
|
||||
/// Returns a const reference to the GPU DMA pusher.
|
||||
const Tegra::DmaPusher& DmaPusher() const;
|
||||
[[nodiscard]] bool UseNvdec() const {
|
||||
return use_nvdec;
|
||||
}
|
||||
|
||||
enum class FenceOperation : u32 {
|
||||
Acquire = 0,
|
||||
Increment = 1,
|
||||
};
|
||||
|
||||
union FenceAction {
|
||||
u32 raw;
|
||||
BitField<0, 1, FenceOperation> op;
|
||||
BitField<8, 24, u32> syncpoint_id;
|
||||
|
||||
[[nodiscard]] static CommandHeader Build(FenceOperation op, u32 syncpoint_id) {
|
||||
FenceAction result{};
|
||||
result.op.Assign(op);
|
||||
result.syncpoint_id.Assign(syncpoint_id);
|
||||
return {result.raw};
|
||||
}
|
||||
};
|
||||
|
||||
struct Regs {
|
||||
static constexpr size_t NUM_REGS = 0x40;
|
||||
@ -262,7 +275,7 @@ public:
|
||||
u32 address_high;
|
||||
u32 address_low;
|
||||
|
||||
GPUVAddr SemaphoreAddress() const {
|
||||
[[nodiscard]] GPUVAddr SemaphoreAddress() const {
|
||||
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
|
||||
address_low);
|
||||
}
|
||||
@ -280,10 +293,7 @@ public:
|
||||
u32 semaphore_acquire;
|
||||
u32 semaphore_release;
|
||||
u32 fence_value;
|
||||
union {
|
||||
BitField<4, 4, u32> operation;
|
||||
BitField<8, 8, u32> id;
|
||||
} fence_action;
|
||||
FenceAction fence_action;
|
||||
INSERT_UNION_PADDING_WORDS(0xE2);
|
||||
|
||||
// Puller state
|
||||
@ -300,34 +310,39 @@ public:
|
||||
/// Performs any additional setup necessary in order to begin GPU emulation.
|
||||
/// This can be used to launch any necessary threads and register any necessary
|
||||
/// core timing events.
|
||||
virtual void Start() = 0;
|
||||
void Start();
|
||||
|
||||
/// Obtain the CPU Context
|
||||
virtual void ObtainContext() = 0;
|
||||
void ObtainContext();
|
||||
|
||||
/// Release the CPU Context
|
||||
virtual void ReleaseContext() = 0;
|
||||
void ReleaseContext();
|
||||
|
||||
/// Push GPU command entries to be processed
|
||||
virtual void PushGPUEntries(Tegra::CommandList&& entries) = 0;
|
||||
void PushGPUEntries(Tegra::CommandList&& entries);
|
||||
|
||||
/// Push GPU command buffer entries to be processed
|
||||
void PushCommandBuffer(Tegra::ChCommandHeaderList& entries);
|
||||
|
||||
/// Swap buffers (render frame)
|
||||
virtual void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) = 0;
|
||||
void SwapBuffers(const Tegra::FramebufferConfig* framebuffer);
|
||||
|
||||
/// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
|
||||
virtual void FlushRegion(VAddr addr, u64 size) = 0;
|
||||
void FlushRegion(VAddr addr, u64 size);
|
||||
|
||||
/// Notify rasterizer that any caches of the specified region should be invalidated
|
||||
virtual void InvalidateRegion(VAddr addr, u64 size) = 0;
|
||||
void InvalidateRegion(VAddr addr, u64 size);
|
||||
|
||||
/// Notify rasterizer that any caches of the specified region should be flushed and invalidated
|
||||
virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0;
|
||||
void FlushAndInvalidateRegion(VAddr addr, u64 size);
|
||||
|
||||
protected:
|
||||
virtual void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const = 0;
|
||||
void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const;
|
||||
|
||||
private:
|
||||
void ProcessBindMethod(const MethodCall& method_call);
|
||||
void ProcessFenceActionMethod();
|
||||
void ProcessWaitForInterruptMethod();
|
||||
void ProcessSemaphoreTriggerMethod();
|
||||
void ProcessSemaphoreRelease();
|
||||
void ProcessSemaphoreAcquire();
|
||||
@ -343,13 +358,15 @@ private:
|
||||
u32 methods_pending);
|
||||
|
||||
/// Determines where the method should be executed.
|
||||
bool ExecuteMethodOnEngine(u32 method);
|
||||
[[nodiscard]] bool ExecuteMethodOnEngine(u32 method);
|
||||
|
||||
protected:
|
||||
Core::System& system;
|
||||
std::unique_ptr<Tegra::MemoryManager> memory_manager;
|
||||
std::unique_ptr<Tegra::DmaPusher> dma_pusher;
|
||||
std::unique_ptr<Tegra::CDmaPusher> cdma_pusher;
|
||||
std::unique_ptr<VideoCore::RendererBase> renderer;
|
||||
const bool use_nvdec;
|
||||
|
||||
private:
|
||||
/// Mapping of command subchannels to their bound engine ids
|
||||
@ -372,12 +389,13 @@ private:
|
||||
std::array<std::list<u32>, Service::Nvidia::MaxSyncPoints> syncpt_interrupts;
|
||||
|
||||
std::mutex sync_mutex;
|
||||
std::mutex device_mutex;
|
||||
|
||||
std::condition_variable sync_cv;
|
||||
|
||||
struct FlushRequest {
|
||||
FlushRequest(u64 fence, VAddr addr, std::size_t size)
|
||||
: fence{fence}, addr{addr}, size{size} {}
|
||||
explicit FlushRequest(u64 fence_, VAddr addr_, std::size_t size_)
|
||||
: fence{fence_}, addr{addr_}, size{size_} {}
|
||||
u64 fence;
|
||||
VAddr addr;
|
||||
std::size_t size;
|
||||
@ -389,6 +407,9 @@ private:
|
||||
std::mutex flush_request_mutex;
|
||||
|
||||
const bool is_async;
|
||||
|
||||
VideoCommon::GPUThread::ThreadManager gpu_thread;
|
||||
std::unique_ptr<Core::Frontend::GraphicsContext> cpu_context;
|
||||
};
|
||||
|
||||
#define ASSERT_REG_POSITION(field_name, position) \
|
||||
|
@ -1,64 +0,0 @@
|
||||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "core/core.h"
|
||||
#include "core/hardware_interrupt_manager.h"
|
||||
#include "video_core/gpu_asynch.h"
|
||||
#include "video_core/gpu_thread.h"
|
||||
#include "video_core/renderer_base.h"
|
||||
|
||||
namespace VideoCommon {
|
||||
|
||||
GPUAsynch::GPUAsynch(Core::System& system) : GPU{system, true}, gpu_thread{system} {}
|
||||
|
||||
GPUAsynch::~GPUAsynch() = default;
|
||||
|
||||
void GPUAsynch::Start() {
|
||||
gpu_thread.StartThread(*renderer, renderer->Context(), *dma_pusher);
|
||||
cpu_context = renderer->GetRenderWindow().CreateSharedContext();
|
||||
cpu_context->MakeCurrent();
|
||||
}
|
||||
|
||||
void GPUAsynch::ObtainContext() {
|
||||
cpu_context->MakeCurrent();
|
||||
}
|
||||
|
||||
void GPUAsynch::ReleaseContext() {
|
||||
cpu_context->DoneCurrent();
|
||||
}
|
||||
|
||||
void GPUAsynch::PushGPUEntries(Tegra::CommandList&& entries) {
|
||||
gpu_thread.SubmitList(std::move(entries));
|
||||
}
|
||||
|
||||
void GPUAsynch::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
|
||||
gpu_thread.SwapBuffers(framebuffer);
|
||||
}
|
||||
|
||||
void GPUAsynch::FlushRegion(VAddr addr, u64 size) {
|
||||
gpu_thread.FlushRegion(addr, size);
|
||||
}
|
||||
|
||||
void GPUAsynch::InvalidateRegion(VAddr addr, u64 size) {
|
||||
gpu_thread.InvalidateRegion(addr, size);
|
||||
}
|
||||
|
||||
void GPUAsynch::FlushAndInvalidateRegion(VAddr addr, u64 size) {
|
||||
gpu_thread.FlushAndInvalidateRegion(addr, size);
|
||||
}
|
||||
|
||||
void GPUAsynch::TriggerCpuInterrupt(const u32 syncpoint_id, const u32 value) const {
|
||||
auto& interrupt_manager = system.InterruptManager();
|
||||
interrupt_manager.GPUInterruptSyncpt(syncpoint_id, value);
|
||||
}
|
||||
|
||||
void GPUAsynch::WaitIdle() const {
|
||||
gpu_thread.WaitIdle();
|
||||
}
|
||||
|
||||
void GPUAsynch::OnCommandListEnd() {
|
||||
gpu_thread.OnCommandListEnd();
|
||||
}
|
||||
|
||||
} // namespace VideoCommon
|
@ -1,46 +0,0 @@
|
||||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "video_core/gpu.h"
|
||||
#include "video_core/gpu_thread.h"
|
||||
|
||||
namespace Core::Frontend {
|
||||
class GraphicsContext;
|
||||
}
|
||||
|
||||
namespace VideoCore {
|
||||
class RendererBase;
|
||||
} // namespace VideoCore
|
||||
|
||||
namespace VideoCommon {
|
||||
|
||||
/// Implementation of GPU interface that runs the GPU asynchronously
|
||||
class GPUAsynch final : public Tegra::GPU {
|
||||
public:
|
||||
explicit GPUAsynch(Core::System& system);
|
||||
~GPUAsynch() override;
|
||||
|
||||
void Start() override;
|
||||
void ObtainContext() override;
|
||||
void ReleaseContext() override;
|
||||
void PushGPUEntries(Tegra::CommandList&& entries) override;
|
||||
void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
|
||||
void FlushRegion(VAddr addr, u64 size) override;
|
||||
void InvalidateRegion(VAddr addr, u64 size) override;
|
||||
void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
|
||||
void WaitIdle() const override;
|
||||
|
||||
void OnCommandListEnd() override;
|
||||
|
||||
protected:
|
||||
void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const override;
|
||||
|
||||
private:
|
||||
GPUThread::ThreadManager gpu_thread;
|
||||
std::unique_ptr<Core::Frontend::GraphicsContext> cpu_context;
|
||||
};
|
||||
|
||||
} // namespace VideoCommon
|
@ -1,45 +0,0 @@
|
||||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "video_core/gpu_synch.h"
|
||||
#include "video_core/renderer_base.h"
|
||||
|
||||
namespace VideoCommon {
|
||||
|
||||
GPUSynch::GPUSynch(Core::System& system) : GPU{system, false} {}
|
||||
|
||||
GPUSynch::~GPUSynch() = default;
|
||||
|
||||
void GPUSynch::Start() {}
|
||||
|
||||
void GPUSynch::ObtainContext() {
|
||||
renderer->Context().MakeCurrent();
|
||||
}
|
||||
|
||||
void GPUSynch::ReleaseContext() {
|
||||
renderer->Context().DoneCurrent();
|
||||
}
|
||||
|
||||
void GPUSynch::PushGPUEntries(Tegra::CommandList&& entries) {
|
||||
dma_pusher->Push(std::move(entries));
|
||||
dma_pusher->DispatchCalls();
|
||||
}
|
||||
|
||||
void GPUSynch::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
|
||||
renderer->SwapBuffers(framebuffer);
|
||||
}
|
||||
|
||||
void GPUSynch::FlushRegion(VAddr addr, u64 size) {
|
||||
renderer->Rasterizer().FlushRegion(addr, size);
|
||||
}
|
||||
|
||||
void GPUSynch::InvalidateRegion(VAddr addr, u64 size) {
|
||||
renderer->Rasterizer().InvalidateRegion(addr, size);
|
||||
}
|
||||
|
||||
void GPUSynch::FlushAndInvalidateRegion(VAddr addr, u64 size) {
|
||||
renderer->Rasterizer().FlushAndInvalidateRegion(addr, size);
|
||||
}
|
||||
|
||||
} // namespace VideoCommon
|
@ -1,40 +0,0 @@
|
||||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "video_core/gpu.h"
|
||||
|
||||
namespace Core::Frontend {
|
||||
class GraphicsContext;
|
||||
}
|
||||
|
||||
namespace VideoCore {
|
||||
class RendererBase;
|
||||
} // namespace VideoCore
|
||||
|
||||
namespace VideoCommon {
|
||||
|
||||
/// Implementation of GPU interface that runs the GPU synchronously
|
||||
class GPUSynch final : public Tegra::GPU {
|
||||
public:
|
||||
explicit GPUSynch(Core::System& system);
|
||||
~GPUSynch() override;
|
||||
|
||||
void Start() override;
|
||||
void ObtainContext() override;
|
||||
void ReleaseContext() override;
|
||||
void PushGPUEntries(Tegra::CommandList&& entries) override;
|
||||
void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
|
||||
void FlushRegion(VAddr addr, u64 size) override;
|
||||
void InvalidateRegion(VAddr addr, u64 size) override;
|
||||
void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
|
||||
void WaitIdle() const override {}
|
||||
|
||||
protected:
|
||||
void TriggerCpuInterrupt([[maybe_unused]] u32 syncpoint_id,
|
||||
[[maybe_unused]] u32 value) const override {}
|
||||
};
|
||||
|
||||
} // namespace VideoCommon
|
@ -4,6 +4,7 @@
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/microprofile.h"
|
||||
#include "common/scope_exit.h"
|
||||
#include "common/thread.h"
|
||||
#include "core/core.h"
|
||||
#include "core/frontend/emu_window.h"
|
||||
@ -18,9 +19,11 @@ namespace VideoCommon::GPUThread {
|
||||
/// Runs the GPU thread
|
||||
static void RunThread(Core::System& system, VideoCore::RendererBase& renderer,
|
||||
Core::Frontend::GraphicsContext& context, Tegra::DmaPusher& dma_pusher,
|
||||
SynchState& state) {
|
||||
SynchState& state, Tegra::CDmaPusher& cdma_pusher) {
|
||||
std::string name = "yuzu:GPU";
|
||||
MicroProfileOnThreadCreate(name.c_str());
|
||||
SCOPE_EXIT({ MicroProfileOnThreadExit(); });
|
||||
|
||||
Common::SetCurrentThreadName(name.c_str());
|
||||
Common::SetCurrentThreadPriority(Common::ThreadPriority::High);
|
||||
system.RegisterHostThread();
|
||||
@ -39,19 +42,23 @@ static void RunThread(Core::System& system, VideoCore::RendererBase& renderer,
|
||||
CommandDataContainer next;
|
||||
while (state.is_running) {
|
||||
next = state.queue.PopWait();
|
||||
if (const auto submit_list = std::get_if<SubmitListCommand>(&next.data)) {
|
||||
if (auto* submit_list = std::get_if<SubmitListCommand>(&next.data)) {
|
||||
dma_pusher.Push(std::move(submit_list->entries));
|
||||
dma_pusher.DispatchCalls();
|
||||
} else if (const auto data = std::get_if<SwapBuffersCommand>(&next.data)) {
|
||||
} else if (auto* command_list = std::get_if<SubmitChCommandEntries>(&next.data)) {
|
||||
// NVDEC
|
||||
cdma_pusher.Push(std::move(command_list->entries));
|
||||
cdma_pusher.DispatchCalls();
|
||||
} else if (const auto* data = std::get_if<SwapBuffersCommand>(&next.data)) {
|
||||
renderer.SwapBuffers(data->framebuffer ? &*data->framebuffer : nullptr);
|
||||
} else if (std::holds_alternative<OnCommandListEndCommand>(next.data)) {
|
||||
renderer.Rasterizer().ReleaseFences();
|
||||
} else if (std::holds_alternative<GPUTickCommand>(next.data)) {
|
||||
system.GPU().TickWork();
|
||||
} else if (const auto data = std::get_if<FlushRegionCommand>(&next.data)) {
|
||||
renderer.Rasterizer().FlushRegion(data->addr, data->size);
|
||||
} else if (const auto data = std::get_if<InvalidateRegionCommand>(&next.data)) {
|
||||
renderer.Rasterizer().OnCPUWrite(data->addr, data->size);
|
||||
} else if (const auto* flush = std::get_if<FlushRegionCommand>(&next.data)) {
|
||||
renderer.Rasterizer().FlushRegion(flush->addr, flush->size);
|
||||
} else if (const auto* invalidate = std::get_if<InvalidateRegionCommand>(&next.data)) {
|
||||
renderer.Rasterizer().OnCPUWrite(invalidate->addr, invalidate->size);
|
||||
} else if (std::holds_alternative<EndProcessingCommand>(next.data)) {
|
||||
return;
|
||||
} else {
|
||||
@ -61,7 +68,8 @@ static void RunThread(Core::System& system, VideoCore::RendererBase& renderer,
|
||||
}
|
||||
}
|
||||
|
||||
ThreadManager::ThreadManager(Core::System& system) : system{system} {}
|
||||
ThreadManager::ThreadManager(Core::System& system_, bool is_async_)
|
||||
: system{system_}, is_async{is_async_} {}
|
||||
|
||||
ThreadManager::~ThreadManager() {
|
||||
if (!thread.joinable()) {
|
||||
@ -75,33 +83,48 @@ ThreadManager::~ThreadManager() {
|
||||
|
||||
void ThreadManager::StartThread(VideoCore::RendererBase& renderer,
|
||||
Core::Frontend::GraphicsContext& context,
|
||||
Tegra::DmaPusher& dma_pusher) {
|
||||
thread = std::thread{RunThread, std::ref(system), std::ref(renderer),
|
||||
std::ref(context), std::ref(dma_pusher), std::ref(state)};
|
||||
Tegra::DmaPusher& dma_pusher, Tegra::CDmaPusher& cdma_pusher) {
|
||||
thread = std::thread(RunThread, std::ref(system), std::ref(renderer), std::ref(context),
|
||||
std::ref(dma_pusher), std::ref(state), std::ref(cdma_pusher));
|
||||
}
|
||||
|
||||
void ThreadManager::SubmitList(Tegra::CommandList&& entries) {
|
||||
PushCommand(SubmitListCommand(std::move(entries)));
|
||||
}
|
||||
|
||||
void ThreadManager::SubmitCommandBuffer(Tegra::ChCommandHeaderList&& entries) {
|
||||
PushCommand(SubmitChCommandEntries(std::move(entries)));
|
||||
}
|
||||
|
||||
void ThreadManager::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
|
||||
PushCommand(SwapBuffersCommand(framebuffer ? std::make_optional(*framebuffer) : std::nullopt));
|
||||
}
|
||||
|
||||
void ThreadManager::FlushRegion(VAddr addr, u64 size) {
|
||||
if (!Settings::IsGPULevelHigh()) {
|
||||
if (!is_async) {
|
||||
// Always flush with synchronous GPU mode
|
||||
PushCommand(FlushRegionCommand(addr, size));
|
||||
return;
|
||||
}
|
||||
if (!Settings::IsGPULevelExtreme()) {
|
||||
return;
|
||||
}
|
||||
if (system.Renderer().Rasterizer().MustFlushRegion(addr, size)) {
|
||||
|
||||
// Asynchronous GPU mode
|
||||
switch (Settings::values.gpu_accuracy.GetValue()) {
|
||||
case Settings::GPUAccuracy::Normal:
|
||||
PushCommand(FlushRegionCommand(addr, size));
|
||||
break;
|
||||
case Settings::GPUAccuracy::High:
|
||||
// TODO(bunnei): Is this right? Preserving existing behavior for now
|
||||
break;
|
||||
case Settings::GPUAccuracy::Extreme: {
|
||||
auto& gpu = system.GPU();
|
||||
u64 fence = gpu.RequestFlush(addr, size);
|
||||
PushCommand(GPUTickCommand());
|
||||
while (fence > gpu.CurrentFlushRequestFence()) {
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unsupported gpu_accuracy {}", Settings::values.gpu_accuracy.GetValue());
|
||||
}
|
||||
}
|
||||
|
||||
@ -115,7 +138,8 @@ void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) {
|
||||
}
|
||||
|
||||
void ThreadManager::WaitIdle() const {
|
||||
while (state.last_fence > state.signaled_fence.load(std::memory_order_relaxed)) {
|
||||
while (state.last_fence > state.signaled_fence.load(std::memory_order_relaxed) &&
|
||||
system.IsPoweredOn()) {
|
||||
}
|
||||
}
|
||||
|
||||
@ -126,6 +150,12 @@ void ThreadManager::OnCommandListEnd() {
|
||||
u64 ThreadManager::PushCommand(CommandData&& command_data) {
|
||||
const u64 fence{++state.last_fence};
|
||||
state.queue.Push(CommandDataContainer(std::move(command_data), fence));
|
||||
|
||||
if (!is_async) {
|
||||
// In synchronous GPU mode, block the caller until the command has executed
|
||||
WaitIdle();
|
||||
}
|
||||
|
||||
return fence;
|
||||
}
|
||||
|
||||
|
@ -10,8 +10,9 @@
|
||||
#include <optional>
|
||||
#include <thread>
|
||||
#include <variant>
|
||||
|
||||
#include "common/threadsafe_queue.h"
|
||||
#include "video_core/gpu.h"
|
||||
#include "video_core/framebuffer_config.h"
|
||||
|
||||
namespace Tegra {
|
||||
struct FramebufferConfig;
|
||||
@ -25,6 +26,10 @@ class GraphicsContext;
|
||||
class System;
|
||||
} // namespace Core
|
||||
|
||||
namespace VideoCore {
|
||||
class RendererBase;
|
||||
} // namespace VideoCore
|
||||
|
||||
namespace VideoCommon::GPUThread {
|
||||
|
||||
/// Command to signal to the GPU thread that processing has ended
|
||||
@ -32,22 +37,30 @@ struct EndProcessingCommand final {};
|
||||
|
||||
/// Command to signal to the GPU thread that a command list is ready for processing
|
||||
struct SubmitListCommand final {
|
||||
explicit SubmitListCommand(Tegra::CommandList&& entries) : entries{std::move(entries)} {}
|
||||
explicit SubmitListCommand(Tegra::CommandList&& entries_) : entries{std::move(entries_)} {}
|
||||
|
||||
Tegra::CommandList entries;
|
||||
};
|
||||
|
||||
/// Command to signal to the GPU thread that a cdma command list is ready for processing
|
||||
struct SubmitChCommandEntries final {
|
||||
explicit SubmitChCommandEntries(Tegra::ChCommandHeaderList&& entries_)
|
||||
: entries{std::move(entries_)} {}
|
||||
|
||||
Tegra::ChCommandHeaderList entries;
|
||||
};
|
||||
|
||||
/// Command to signal to the GPU thread that a swap buffers is pending
|
||||
struct SwapBuffersCommand final {
|
||||
explicit SwapBuffersCommand(std::optional<const Tegra::FramebufferConfig> framebuffer)
|
||||
: framebuffer{std::move(framebuffer)} {}
|
||||
explicit SwapBuffersCommand(std::optional<const Tegra::FramebufferConfig> framebuffer_)
|
||||
: framebuffer{std::move(framebuffer_)} {}
|
||||
|
||||
std::optional<Tegra::FramebufferConfig> framebuffer;
|
||||
};
|
||||
|
||||
/// Command to signal to the GPU thread to flush a region
|
||||
struct FlushRegionCommand final {
|
||||
explicit constexpr FlushRegionCommand(VAddr addr, u64 size) : addr{addr}, size{size} {}
|
||||
explicit constexpr FlushRegionCommand(VAddr addr_, u64 size_) : addr{addr_}, size{size_} {}
|
||||
|
||||
VAddr addr;
|
||||
u64 size;
|
||||
@ -55,7 +68,7 @@ struct FlushRegionCommand final {
|
||||
|
||||
/// Command to signal to the GPU thread to invalidate a region
|
||||
struct InvalidateRegionCommand final {
|
||||
explicit constexpr InvalidateRegionCommand(VAddr addr, u64 size) : addr{addr}, size{size} {}
|
||||
explicit constexpr InvalidateRegionCommand(VAddr addr_, u64 size_) : addr{addr_}, size{size_} {}
|
||||
|
||||
VAddr addr;
|
||||
u64 size;
|
||||
@ -63,8 +76,8 @@ struct InvalidateRegionCommand final {
|
||||
|
||||
/// Command to signal to the GPU thread to flush and invalidate a region
|
||||
struct FlushAndInvalidateRegionCommand final {
|
||||
explicit constexpr FlushAndInvalidateRegionCommand(VAddr addr, u64 size)
|
||||
: addr{addr}, size{size} {}
|
||||
explicit constexpr FlushAndInvalidateRegionCommand(VAddr addr_, u64 size_)
|
||||
: addr{addr_}, size{size_} {}
|
||||
|
||||
VAddr addr;
|
||||
u64 size;
|
||||
@ -77,15 +90,15 @@ struct OnCommandListEndCommand final {};
|
||||
struct GPUTickCommand final {};
|
||||
|
||||
using CommandData =
|
||||
std::variant<EndProcessingCommand, SubmitListCommand, SwapBuffersCommand, FlushRegionCommand,
|
||||
InvalidateRegionCommand, FlushAndInvalidateRegionCommand, OnCommandListEndCommand,
|
||||
GPUTickCommand>;
|
||||
std::variant<EndProcessingCommand, SubmitListCommand, SubmitChCommandEntries,
|
||||
SwapBuffersCommand, FlushRegionCommand, InvalidateRegionCommand,
|
||||
FlushAndInvalidateRegionCommand, OnCommandListEndCommand, GPUTickCommand>;
|
||||
|
||||
struct CommandDataContainer {
|
||||
CommandDataContainer() = default;
|
||||
|
||||
CommandDataContainer(CommandData&& data, u64 next_fence)
|
||||
: data{std::move(data)}, fence{next_fence} {}
|
||||
explicit CommandDataContainer(CommandData&& data_, u64 next_fence_)
|
||||
: data{std::move(data_)}, fence{next_fence_} {}
|
||||
|
||||
CommandData data;
|
||||
u64 fence{};
|
||||
@ -104,16 +117,19 @@ struct SynchState final {
|
||||
/// Class used to manage the GPU thread
|
||||
class ThreadManager final {
|
||||
public:
|
||||
explicit ThreadManager(Core::System& system);
|
||||
explicit ThreadManager(Core::System& system_, bool is_async_);
|
||||
~ThreadManager();
|
||||
|
||||
/// Creates and starts the GPU thread.
|
||||
void StartThread(VideoCore::RendererBase& renderer, Core::Frontend::GraphicsContext& context,
|
||||
Tegra::DmaPusher& dma_pusher);
|
||||
Tegra::DmaPusher& dma_pusher, Tegra::CDmaPusher& cdma_pusher);
|
||||
|
||||
/// Push GPU command entries to be processed
|
||||
void SubmitList(Tegra::CommandList&& entries);
|
||||
|
||||
/// Push GPU CDMA command buffer entries to be processed
|
||||
void SubmitCommandBuffer(Tegra::ChCommandHeaderList&& entries);
|
||||
|
||||
/// Swap buffers (render frame)
|
||||
void SwapBuffers(const Tegra::FramebufferConfig* framebuffer);
|
||||
|
||||
@ -135,11 +151,11 @@ private:
|
||||
/// Pushes a command to be executed by the GPU thread
|
||||
u64 PushCommand(CommandData&& command_data);
|
||||
|
||||
private:
|
||||
SynchState state;
|
||||
Core::System& system;
|
||||
std::thread thread;
|
||||
std::thread::id thread_id;
|
||||
const bool is_async;
|
||||
};
|
||||
|
||||
} // namespace VideoCommon::GPUThread
|
||||
|
@ -19,8 +19,8 @@ namespace VideoCore {
|
||||
class GuestDriverProfile {
|
||||
public:
|
||||
explicit GuestDriverProfile() = default;
|
||||
explicit GuestDriverProfile(std::optional<u32> texture_handler_size)
|
||||
: texture_handler_size{texture_handler_size} {}
|
||||
explicit GuestDriverProfile(std::optional<u32> texture_handler_size_)
|
||||
: texture_handler_size{texture_handler_size_} {}
|
||||
|
||||
void DeduceTextureHandlerSize(std::vector<u32> bound_offsets);
|
||||
|
||||
|
@ -1,18 +1,29 @@
|
||||
set(SHADER_FILES
|
||||
block_linear_unswizzle_2d.comp
|
||||
block_linear_unswizzle_3d.comp
|
||||
convert_depth_to_float.frag
|
||||
convert_float_to_depth.frag
|
||||
full_screen_triangle.vert
|
||||
opengl_copy_bc4.comp
|
||||
opengl_present.frag
|
||||
opengl_present.vert
|
||||
pitch_unswizzle.comp
|
||||
vulkan_blit_color_float.frag
|
||||
vulkan_blit_depth_stencil.frag
|
||||
vulkan_present.frag
|
||||
vulkan_present.vert
|
||||
vulkan_quad_array.comp
|
||||
vulkan_quad_indexed.comp
|
||||
vulkan_uint8.comp
|
||||
)
|
||||
|
||||
find_program(GLSLANGVALIDATOR "glslangValidator" REQUIRED)
|
||||
|
||||
set(GLSL_FLAGS "")
|
||||
|
||||
set(SHADER_INCLUDE ${CMAKE_CURRENT_BINARY_DIR}/include)
|
||||
set(HOST_SHADERS_INCLUDE ${SHADER_INCLUDE} PARENT_SCOPE)
|
||||
|
||||
set(SHADER_DIR ${SHADER_INCLUDE}/video_core/host_shaders)
|
||||
add_custom_command(
|
||||
OUTPUT
|
||||
${SHADER_DIR}
|
||||
COMMAND
|
||||
${CMAKE_COMMAND} -E make_directory ${SHADER_DIR}
|
||||
)
|
||||
set(HOST_SHADERS_INCLUDE ${SHADER_INCLUDE} PARENT_SCOPE)
|
||||
|
||||
set(INPUT_FILE ${CMAKE_CURRENT_SOURCE_DIR}/source_shader.h.in)
|
||||
set(HEADER_GENERATOR ${CMAKE_CURRENT_SOURCE_DIR}/StringShaderHeader.cmake)
|
||||
@ -20,19 +31,36 @@ set(HEADER_GENERATOR ${CMAKE_CURRENT_SOURCE_DIR}/StringShaderHeader.cmake)
|
||||
foreach(FILENAME IN ITEMS ${SHADER_FILES})
|
||||
string(REPLACE "." "_" SHADER_NAME ${FILENAME})
|
||||
set(SOURCE_FILE ${CMAKE_CURRENT_SOURCE_DIR}/${FILENAME})
|
||||
set(HEADER_FILE ${SHADER_DIR}/${SHADER_NAME}.h)
|
||||
add_custom_command(
|
||||
OUTPUT
|
||||
${HEADER_FILE}
|
||||
COMMAND
|
||||
${CMAKE_COMMAND} -P ${HEADER_GENERATOR} ${SOURCE_FILE} ${HEADER_FILE} ${INPUT_FILE}
|
||||
MAIN_DEPENDENCY
|
||||
${SOURCE_FILE}
|
||||
DEPENDS
|
||||
${HEADER_GENERATOR}
|
||||
${INPUT_FILE}
|
||||
)
|
||||
set(SHADER_HEADERS ${SHADER_HEADERS} ${HEADER_FILE})
|
||||
# Skip generating source headers on Vulkan exclusive files
|
||||
if (NOT ${FILENAME} MATCHES "vulkan.*")
|
||||
set(SOURCE_HEADER_FILE ${SHADER_DIR}/${SHADER_NAME}.h)
|
||||
add_custom_command(
|
||||
OUTPUT
|
||||
${SOURCE_HEADER_FILE}
|
||||
COMMAND
|
||||
${CMAKE_COMMAND} -P ${HEADER_GENERATOR} ${SOURCE_FILE} ${SOURCE_HEADER_FILE} ${INPUT_FILE}
|
||||
MAIN_DEPENDENCY
|
||||
${SOURCE_FILE}
|
||||
DEPENDS
|
||||
${INPUT_FILE}
|
||||
# HEADER_GENERATOR should be included here but msbuild seems to assume it's always modified
|
||||
)
|
||||
set(SHADER_HEADERS ${SHADER_HEADERS} ${SOURCE_HEADER_FILE})
|
||||
endif()
|
||||
# Skip compiling to SPIR-V OpenGL exclusive files
|
||||
if (NOT ${FILENAME} MATCHES "opengl.*")
|
||||
string(TOUPPER ${SHADER_NAME}_SPV SPIRV_VARIABLE_NAME)
|
||||
set(SPIRV_HEADER_FILE ${SHADER_DIR}/${SHADER_NAME}_spv.h)
|
||||
add_custom_command(
|
||||
OUTPUT
|
||||
${SPIRV_HEADER_FILE}
|
||||
COMMAND
|
||||
${GLSLANGVALIDATOR} -V ${GLSL_FLAGS} --variable-name ${SPIRV_VARIABLE_NAME} -o ${SPIRV_HEADER_FILE} ${SOURCE_FILE}
|
||||
MAIN_DEPENDENCY
|
||||
${SOURCE_FILE}
|
||||
)
|
||||
set(SHADER_HEADERS ${SHADER_HEADERS} ${SPIRV_HEADER_FILE})
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
add_custom_target(host_shaders
|
||||
|
@ -8,4 +8,6 @@ string(TOUPPER ${CONTENTS_NAME} CONTENTS_NAME)
|
||||
|
||||
file(READ ${SOURCE_FILE} CONTENTS)
|
||||
|
||||
get_filename_component(OUTPUT_DIR ${HEADER_FILE} DIRECTORY)
|
||||
make_directory(${OUTPUT_DIR})
|
||||
configure_file(${INPUT_FILE} ${HEADER_FILE} @ONLY)
|
||||
|
122
src/video_core/host_shaders/block_linear_unswizzle_2d.comp
Normal file
122
src/video_core/host_shaders/block_linear_unswizzle_2d.comp
Normal file
@ -0,0 +1,122 @@
|
||||
// Copyright 2020 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#version 430
|
||||
|
||||
#ifdef VULKAN
|
||||
|
||||
#extension GL_EXT_shader_16bit_storage : require
|
||||
#extension GL_EXT_shader_8bit_storage : require
|
||||
#define HAS_EXTENDED_TYPES 1
|
||||
#define BEGIN_PUSH_CONSTANTS layout(push_constant) uniform PushConstants {
|
||||
#define END_PUSH_CONSTANTS };
|
||||
#define UNIFORM(n)
|
||||
#define BINDING_SWIZZLE_BUFFER 0
|
||||
#define BINDING_INPUT_BUFFER 1
|
||||
#define BINDING_OUTPUT_IMAGE 2
|
||||
|
||||
#else // ^^^ Vulkan ^^^ // vvv OpenGL vvv
|
||||
|
||||
#extension GL_NV_gpu_shader5 : enable
|
||||
#ifdef GL_NV_gpu_shader5
|
||||
#define HAS_EXTENDED_TYPES 1
|
||||
#else
|
||||
#define HAS_EXTENDED_TYPES 0
|
||||
#endif
|
||||
#define BEGIN_PUSH_CONSTANTS
|
||||
#define END_PUSH_CONSTANTS
|
||||
#define UNIFORM(n) layout (location = n) uniform
|
||||
#define BINDING_SWIZZLE_BUFFER 0
|
||||
#define BINDING_INPUT_BUFFER 1
|
||||
#define BINDING_OUTPUT_IMAGE 0
|
||||
|
||||
#endif
|
||||
|
||||
BEGIN_PUSH_CONSTANTS
|
||||
UNIFORM(0) uvec3 origin;
|
||||
UNIFORM(1) ivec3 destination;
|
||||
UNIFORM(2) uint bytes_per_block_log2;
|
||||
UNIFORM(3) uint layer_stride;
|
||||
UNIFORM(4) uint block_size;
|
||||
UNIFORM(5) uint x_shift;
|
||||
UNIFORM(6) uint block_height;
|
||||
UNIFORM(7) uint block_height_mask;
|
||||
END_PUSH_CONSTANTS
|
||||
|
||||
layout(binding = BINDING_SWIZZLE_BUFFER, std430) readonly buffer SwizzleTable {
|
||||
uint swizzle_table[];
|
||||
};
|
||||
|
||||
#if HAS_EXTENDED_TYPES
|
||||
layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU8 { uint8_t u8data[]; };
|
||||
layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU16 { uint16_t u16data[]; };
|
||||
#endif
|
||||
layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU32 { uint u32data[]; };
|
||||
layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU64 { uvec2 u64data[]; };
|
||||
layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU128 { uvec4 u128data[]; };
|
||||
|
||||
layout(binding = BINDING_OUTPUT_IMAGE) uniform writeonly uimage2DArray output_image;
|
||||
|
||||
layout(local_size_x = 32, local_size_y = 32, local_size_z = 1) in;
|
||||
|
||||
const uint GOB_SIZE_X = 64;
|
||||
const uint GOB_SIZE_Y = 8;
|
||||
const uint GOB_SIZE_Z = 1;
|
||||
const uint GOB_SIZE = GOB_SIZE_X * GOB_SIZE_Y * GOB_SIZE_Z;
|
||||
|
||||
const uint GOB_SIZE_X_SHIFT = 6;
|
||||
const uint GOB_SIZE_Y_SHIFT = 3;
|
||||
const uint GOB_SIZE_Z_SHIFT = 0;
|
||||
const uint GOB_SIZE_SHIFT = GOB_SIZE_X_SHIFT + GOB_SIZE_Y_SHIFT + GOB_SIZE_Z_SHIFT;
|
||||
|
||||
const uvec2 SWIZZLE_MASK = uvec2(GOB_SIZE_X - 1, GOB_SIZE_Y - 1);
|
||||
|
||||
uint SwizzleOffset(uvec2 pos) {
|
||||
pos = pos & SWIZZLE_MASK;
|
||||
return swizzle_table[pos.y * 64 + pos.x];
|
||||
}
|
||||
|
||||
uvec4 ReadTexel(uint offset) {
|
||||
switch (bytes_per_block_log2) {
|
||||
#if HAS_EXTENDED_TYPES
|
||||
case 0:
|
||||
return uvec4(u8data[offset], 0, 0, 0);
|
||||
case 1:
|
||||
return uvec4(u16data[offset / 2], 0, 0, 0);
|
||||
#else
|
||||
case 0:
|
||||
return uvec4(bitfieldExtract(u32data[offset / 4], int((offset * 8) & 24), 8), 0, 0, 0);
|
||||
case 1:
|
||||
return uvec4(bitfieldExtract(u32data[offset / 4], int((offset * 8) & 16), 16), 0, 0, 0);
|
||||
#endif
|
||||
case 2:
|
||||
return uvec4(u32data[offset / 4], 0, 0, 0);
|
||||
case 3:
|
||||
return uvec4(u64data[offset / 8], 0, 0);
|
||||
case 4:
|
||||
return u128data[offset / 16];
|
||||
}
|
||||
return uvec4(0);
|
||||
}
|
||||
|
||||
void main() {
|
||||
uvec3 pos = gl_GlobalInvocationID + origin;
|
||||
pos.x <<= bytes_per_block_log2;
|
||||
|
||||
// Read as soon as possible due to its latency
|
||||
const uint swizzle = SwizzleOffset(pos.xy);
|
||||
|
||||
const uint block_y = pos.y >> GOB_SIZE_Y_SHIFT;
|
||||
|
||||
uint offset = 0;
|
||||
offset += pos.z * layer_stride;
|
||||
offset += (block_y >> block_height) * block_size;
|
||||
offset += (block_y & block_height_mask) << GOB_SIZE_SHIFT;
|
||||
offset += (pos.x >> GOB_SIZE_X_SHIFT) << x_shift;
|
||||
offset += swizzle;
|
||||
|
||||
const uvec4 texel = ReadTexel(offset);
|
||||
const ivec3 coord = ivec3(gl_GlobalInvocationID) + destination;
|
||||
imageStore(output_image, coord, texel);
|
||||
}
|
125
src/video_core/host_shaders/block_linear_unswizzle_3d.comp
Normal file
125
src/video_core/host_shaders/block_linear_unswizzle_3d.comp
Normal file
@ -0,0 +1,125 @@
|
||||
// Copyright 2020 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#version 430
|
||||
|
||||
#ifdef VULKAN
|
||||
|
||||
#extension GL_EXT_shader_16bit_storage : require
|
||||
#extension GL_EXT_shader_8bit_storage : require
|
||||
#define HAS_EXTENDED_TYPES 1
|
||||
#define BEGIN_PUSH_CONSTANTS layout(push_constant) uniform PushConstants {
|
||||
#define END_PUSH_CONSTANTS };
|
||||
#define UNIFORM(n)
|
||||
#define BINDING_SWIZZLE_BUFFER 0
|
||||
#define BINDING_INPUT_BUFFER 1
|
||||
#define BINDING_OUTPUT_IMAGE 2
|
||||
|
||||
#else // ^^^ Vulkan ^^^ // vvv OpenGL vvv
|
||||
|
||||
#extension GL_NV_gpu_shader5 : enable
|
||||
#ifdef GL_NV_gpu_shader5
|
||||
#define HAS_EXTENDED_TYPES 1
|
||||
#else
|
||||
#define HAS_EXTENDED_TYPES 0
|
||||
#endif
|
||||
#define BEGIN_PUSH_CONSTANTS
|
||||
#define END_PUSH_CONSTANTS
|
||||
#define UNIFORM(n) layout (location = n) uniform
|
||||
#define BINDING_SWIZZLE_BUFFER 0
|
||||
#define BINDING_INPUT_BUFFER 1
|
||||
#define BINDING_OUTPUT_IMAGE 0
|
||||
|
||||
#endif
|
||||
|
||||
BEGIN_PUSH_CONSTANTS
|
||||
UNIFORM(0) uvec3 origin;
|
||||
UNIFORM(1) ivec3 destination;
|
||||
UNIFORM(2) uint bytes_per_block_log2;
|
||||
UNIFORM(3) uint slice_size;
|
||||
UNIFORM(4) uint block_size;
|
||||
UNIFORM(5) uint x_shift;
|
||||
UNIFORM(6) uint block_height;
|
||||
UNIFORM(7) uint block_height_mask;
|
||||
UNIFORM(8) uint block_depth;
|
||||
UNIFORM(9) uint block_depth_mask;
|
||||
END_PUSH_CONSTANTS
|
||||
|
||||
layout(binding = BINDING_SWIZZLE_BUFFER, std430) readonly buffer SwizzleTable {
|
||||
uint swizzle_table[];
|
||||
};
|
||||
|
||||
#if HAS_EXTENDED_TYPES
|
||||
layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU8 { uint8_t u8data[]; };
|
||||
layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU16 { uint16_t u16data[]; };
|
||||
#endif
|
||||
layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU32 { uint u32data[]; };
|
||||
layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU64 { uvec2 u64data[]; };
|
||||
layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU128 { uvec4 u128data[]; };
|
||||
|
||||
layout(binding = BINDING_OUTPUT_IMAGE) uniform writeonly uimage3D output_image;
|
||||
|
||||
layout(local_size_x = 16, local_size_y = 8, local_size_z = 8) in;
|
||||
|
||||
const uint GOB_SIZE_X = 64;
|
||||
const uint GOB_SIZE_Y = 8;
|
||||
const uint GOB_SIZE_Z = 1;
|
||||
const uint GOB_SIZE = GOB_SIZE_X * GOB_SIZE_Y * GOB_SIZE_Z;
|
||||
|
||||
const uint GOB_SIZE_X_SHIFT = 6;
|
||||
const uint GOB_SIZE_Y_SHIFT = 3;
|
||||
const uint GOB_SIZE_Z_SHIFT = 0;
|
||||
const uint GOB_SIZE_SHIFT = GOB_SIZE_X_SHIFT + GOB_SIZE_Y_SHIFT + GOB_SIZE_Z_SHIFT;
|
||||
|
||||
const uvec2 SWIZZLE_MASK = uvec2(GOB_SIZE_X - 1, GOB_SIZE_Y - 1);
|
||||
|
||||
uint SwizzleOffset(uvec2 pos) {
|
||||
pos = pos & SWIZZLE_MASK;
|
||||
return swizzle_table[pos.y * 64 + pos.x];
|
||||
}
|
||||
|
||||
uvec4 ReadTexel(uint offset) {
|
||||
switch (bytes_per_block_log2) {
|
||||
#if HAS_EXTENDED_TYPES
|
||||
case 0:
|
||||
return uvec4(u8data[offset], 0, 0, 0);
|
||||
case 1:
|
||||
return uvec4(u16data[offset / 2], 0, 0, 0);
|
||||
#else
|
||||
case 0:
|
||||
return uvec4(bitfieldExtract(u32data[offset / 4], int((offset * 8) & 24), 8), 0, 0, 0);
|
||||
case 1:
|
||||
return uvec4(bitfieldExtract(u32data[offset / 4], int((offset * 8) & 16), 16), 0, 0, 0);
|
||||
#endif
|
||||
case 2:
|
||||
return uvec4(u32data[offset / 4], 0, 0, 0);
|
||||
case 3:
|
||||
return uvec4(u64data[offset / 8], 0, 0);
|
||||
case 4:
|
||||
return u128data[offset / 16];
|
||||
}
|
||||
return uvec4(0);
|
||||
}
|
||||
|
||||
void main() {
|
||||
uvec3 pos = gl_GlobalInvocationID + origin;
|
||||
pos.x <<= bytes_per_block_log2;
|
||||
|
||||
// Read as soon as possible due to its latency
|
||||
const uint swizzle = SwizzleOffset(pos.xy);
|
||||
|
||||
const uint block_y = pos.y >> GOB_SIZE_Y_SHIFT;
|
||||
|
||||
uint offset = 0;
|
||||
offset += (pos.z >> block_depth) * slice_size;
|
||||
offset += (pos.z & block_depth_mask) << (GOB_SIZE_SHIFT + block_height);
|
||||
offset += (block_y >> block_height) * block_size;
|
||||
offset += (block_y & block_height_mask) << GOB_SIZE_SHIFT;
|
||||
offset += (pos.x >> GOB_SIZE_X_SHIFT) << x_shift;
|
||||
offset += swizzle;
|
||||
|
||||
const uvec4 texel = ReadTexel(offset);
|
||||
const ivec3 coord = ivec3(gl_GlobalInvocationID) + destination;
|
||||
imageStore(output_image, coord, texel);
|
||||
}
|
13
src/video_core/host_shaders/convert_depth_to_float.frag
Normal file
13
src/video_core/host_shaders/convert_depth_to_float.frag
Normal file
@ -0,0 +1,13 @@
|
||||
// Copyright 2020 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#version 450
|
||||
|
||||
layout(binding = 0) uniform sampler2D depth_texture;
|
||||
layout(location = 0) out float output_color;
|
||||
|
||||
void main() {
|
||||
ivec2 coord = ivec2(gl_FragCoord.xy);
|
||||
output_color = texelFetch(depth_texture, coord, 0).r;
|
||||
}
|
13
src/video_core/host_shaders/convert_float_to_depth.frag
Normal file
13
src/video_core/host_shaders/convert_float_to_depth.frag
Normal file
@ -0,0 +1,13 @@
|
||||
// Copyright 2020 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#version 450
|
||||
|
||||
layout(binding = 0) uniform sampler2D color_texture;
|
||||
|
||||
void main() {
|
||||
ivec2 coord = ivec2(gl_FragCoord.xy);
|
||||
float color = texelFetch(color_texture, coord, 0).r;
|
||||
gl_FragDepth = color;
|
||||
}
|
29
src/video_core/host_shaders/full_screen_triangle.vert
Normal file
29
src/video_core/host_shaders/full_screen_triangle.vert
Normal file
@ -0,0 +1,29 @@
|
||||
// Copyright 2020 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#version 450
|
||||
|
||||
#ifdef VULKAN
|
||||
#define BEGIN_PUSH_CONSTANTS layout(push_constant) uniform PushConstants {
|
||||
#define END_PUSH_CONSTANTS };
|
||||
#define UNIFORM(n)
|
||||
#else // ^^^ Vulkan ^^^ // vvv OpenGL vvv
|
||||
#define BEGIN_PUSH_CONSTANTS
|
||||
#define END_PUSH_CONSTANTS
|
||||
#define UNIFORM(n) layout (location = n) uniform
|
||||
#endif
|
||||
|
||||
BEGIN_PUSH_CONSTANTS
|
||||
UNIFORM(0) vec2 tex_scale;
|
||||
UNIFORM(1) vec2 tex_offset;
|
||||
END_PUSH_CONSTANTS
|
||||
|
||||
layout(location = 0) out vec2 texcoord;
|
||||
|
||||
void main() {
|
||||
float x = float((gl_VertexIndex & 1) << 2);
|
||||
float y = float((gl_VertexIndex & 2) << 1);
|
||||
gl_Position = vec4(x - 1.0, y - 1.0, 0.0, 1.0);
|
||||
texcoord = fma(vec2(x, y) / 2.0, tex_scale, tex_offset);
|
||||
}
|
70
src/video_core/host_shaders/opengl_copy_bc4.comp
Normal file
70
src/video_core/host_shaders/opengl_copy_bc4.comp
Normal file
@ -0,0 +1,70 @@
|
||||
// Copyright 2020 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#version 430 core
|
||||
#extension GL_ARB_gpu_shader_int64 : require
|
||||
|
||||
layout (local_size_x = 4, local_size_y = 4) in;
|
||||
|
||||
layout(binding = 0, rg32ui) readonly uniform uimage3D bc4_input;
|
||||
layout(binding = 1, rgba8ui) writeonly uniform uimage3D bc4_output;
|
||||
|
||||
layout(location = 0) uniform uvec3 src_offset;
|
||||
layout(location = 1) uniform uvec3 dst_offset;
|
||||
|
||||
// https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_texture_compression_rgtc.txt
|
||||
uint DecompressBlock(uint64_t bits, uvec2 coord) {
|
||||
const uint code_offset = 16 + 3 * (4 * coord.y + coord.x);
|
||||
const uint code = uint(bits >> code_offset) & 7;
|
||||
const uint red0 = uint(bits >> 0) & 0xff;
|
||||
const uint red1 = uint(bits >> 8) & 0xff;
|
||||
if (red0 > red1) {
|
||||
switch (code) {
|
||||
case 0:
|
||||
return red0;
|
||||
case 1:
|
||||
return red1;
|
||||
case 2:
|
||||
return (6 * red0 + 1 * red1) / 7;
|
||||
case 3:
|
||||
return (5 * red0 + 2 * red1) / 7;
|
||||
case 4:
|
||||
return (4 * red0 + 3 * red1) / 7;
|
||||
case 5:
|
||||
return (3 * red0 + 4 * red1) / 7;
|
||||
case 6:
|
||||
return (2 * red0 + 5 * red1) / 7;
|
||||
case 7:
|
||||
return (1 * red0 + 6 * red1) / 7;
|
||||
}
|
||||
} else {
|
||||
switch (code) {
|
||||
case 0:
|
||||
return red0;
|
||||
case 1:
|
||||
return red1;
|
||||
case 2:
|
||||
return (4 * red0 + 1 * red1) / 5;
|
||||
case 3:
|
||||
return (3 * red0 + 2 * red1) / 5;
|
||||
case 4:
|
||||
return (2 * red0 + 3 * red1) / 5;
|
||||
case 5:
|
||||
return (1 * red0 + 4 * red1) / 5;
|
||||
case 6:
|
||||
return 0;
|
||||
case 7:
|
||||
return 0xff;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void main() {
|
||||
uvec2 packed_bits = imageLoad(bc4_input, ivec3(gl_WorkGroupID + src_offset)).rg;
|
||||
uint64_t bits = packUint2x32(packed_bits);
|
||||
uint red = DecompressBlock(bits, gl_LocalInvocationID.xy);
|
||||
uvec4 color = uvec4(red & 0xff, 0, 0, 0xff);
|
||||
imageStore(bc4_output, ivec3(gl_GlobalInvocationID + dst_offset), color);
|
||||
}
|
@ -1,3 +1,7 @@
|
||||
// Copyright 2020 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#version 430 core
|
||||
|
||||
layout (location = 0) in vec2 frag_tex_coord;
|
||||
|
@ -1,3 +1,7 @@
|
||||
// Copyright 2020 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#version 430 core
|
||||
|
||||
out gl_PerVertex {
|
||||
|
86
src/video_core/host_shaders/pitch_unswizzle.comp
Normal file
86
src/video_core/host_shaders/pitch_unswizzle.comp
Normal file
@ -0,0 +1,86 @@
|
||||
// Copyright 2020 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#version 430
|
||||
|
||||
#ifdef VULKAN
|
||||
|
||||
#extension GL_EXT_shader_16bit_storage : require
|
||||
#extension GL_EXT_shader_8bit_storage : require
|
||||
#define HAS_EXTENDED_TYPES 1
|
||||
#define BEGIN_PUSH_CONSTANTS layout(push_constant) uniform PushConstants {
|
||||
#define END_PUSH_CONSTANTS };
|
||||
#define UNIFORM(n)
|
||||
#define BINDING_INPUT_BUFFER 0
|
||||
#define BINDING_OUTPUT_IMAGE 1
|
||||
|
||||
#else // ^^^ Vulkan ^^^ // vvv OpenGL vvv
|
||||
|
||||
#extension GL_NV_gpu_shader5 : enable
|
||||
#ifdef GL_NV_gpu_shader5
|
||||
#define HAS_EXTENDED_TYPES 1
|
||||
#else
|
||||
#define HAS_EXTENDED_TYPES 0
|
||||
#endif
|
||||
#define BEGIN_PUSH_CONSTANTS
|
||||
#define END_PUSH_CONSTANTS
|
||||
#define UNIFORM(n) layout (location = n) uniform
|
||||
#define BINDING_INPUT_BUFFER 0
|
||||
#define BINDING_OUTPUT_IMAGE 0
|
||||
|
||||
#endif
|
||||
|
||||
BEGIN_PUSH_CONSTANTS
|
||||
UNIFORM(0) uvec2 origin;
|
||||
UNIFORM(1) ivec2 destination;
|
||||
UNIFORM(2) uint bytes_per_block;
|
||||
UNIFORM(3) uint pitch;
|
||||
END_PUSH_CONSTANTS
|
||||
|
||||
#if HAS_EXTENDED_TYPES
|
||||
layout(binding = BINDING_INPUT_BUFFER, std430) readonly buffer InputBufferU8 { uint8_t u8data[]; };
|
||||
layout(binding = BINDING_INPUT_BUFFER, std430) readonly buffer InputBufferU16 { uint16_t u16data[]; };
|
||||
#endif
|
||||
layout(binding = BINDING_INPUT_BUFFER, std430) readonly buffer InputBufferU32 { uint u32data[]; };
|
||||
layout(binding = BINDING_INPUT_BUFFER, std430) readonly buffer InputBufferU64 { uvec2 u64data[]; };
|
||||
layout(binding = BINDING_INPUT_BUFFER, std430) readonly buffer InputBufferU128 { uvec4 u128data[]; };
|
||||
|
||||
layout(binding = BINDING_OUTPUT_IMAGE) writeonly uniform uimage2D output_image;
|
||||
|
||||
layout(local_size_x = 32, local_size_y = 32, local_size_z = 1) in;
|
||||
|
||||
uvec4 ReadTexel(uint offset) {
|
||||
switch (bytes_per_block) {
|
||||
#if HAS_EXTENDED_TYPES
|
||||
case 1:
|
||||
return uvec4(u8data[offset], 0, 0, 0);
|
||||
case 2:
|
||||
return uvec4(u16data[offset / 2], 0, 0, 0);
|
||||
#else
|
||||
case 1:
|
||||
return uvec4(bitfieldExtract(u32data[offset / 4], int((offset * 8) & 24), 8), 0, 0, 0);
|
||||
case 2:
|
||||
return uvec4(bitfieldExtract(u32data[offset / 4], int((offset * 8) & 16), 16), 0, 0, 0);
|
||||
#endif
|
||||
case 4:
|
||||
return uvec4(u32data[offset / 4], 0, 0, 0);
|
||||
case 8:
|
||||
return uvec4(u64data[offset / 8], 0, 0);
|
||||
case 16:
|
||||
return u128data[offset / 16];
|
||||
}
|
||||
return uvec4(0);
|
||||
}
|
||||
|
||||
void main() {
|
||||
uvec2 pos = gl_GlobalInvocationID.xy + origin;
|
||||
|
||||
uint offset = 0;
|
||||
offset += pos.x * bytes_per_block;
|
||||
offset += pos.y * pitch;
|
||||
|
||||
const uvec4 texel = ReadTexel(offset);
|
||||
const ivec2 coord = ivec2(gl_GlobalInvocationID.xy) + destination;
|
||||
imageStore(output_image, coord, texel);
|
||||
}
|
14
src/video_core/host_shaders/vulkan_blit_color_float.frag
Normal file
14
src/video_core/host_shaders/vulkan_blit_color_float.frag
Normal file
@ -0,0 +1,14 @@
|
||||
// Copyright 2020 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#version 450
|
||||
|
||||
layout(binding = 0) uniform sampler2D tex;
|
||||
|
||||
layout(location = 0) in vec2 texcoord;
|
||||
layout(location = 0) out vec4 color;
|
||||
|
||||
void main() {
|
||||
color = textureLod(tex, texcoord, 0);
|
||||
}
|
16
src/video_core/host_shaders/vulkan_blit_depth_stencil.frag
Normal file
16
src/video_core/host_shaders/vulkan_blit_depth_stencil.frag
Normal file
@ -0,0 +1,16 @@
|
||||
// Copyright 2020 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#version 450
|
||||
#extension GL_ARB_shader_stencil_export : require
|
||||
|
||||
layout(binding = 0) uniform sampler2D depth_tex;
|
||||
layout(binding = 1) uniform isampler2D stencil_tex;
|
||||
|
||||
layout(location = 0) in vec2 texcoord;
|
||||
|
||||
void main() {
|
||||
gl_FragDepth = textureLod(depth_tex, texcoord, 0).r;
|
||||
gl_FragStencilRefARB = textureLod(stencil_tex, texcoord, 0).r;
|
||||
}
|
@ -2,15 +2,6 @@
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
/*
|
||||
* Build instructions:
|
||||
* $ glslangValidator -V $THIS_FILE -o output.spv
|
||||
* $ spirv-opt -O --strip-debug output.spv -o optimized.spv
|
||||
* $ xxd -i optimized.spv
|
||||
*
|
||||
* Then copy that bytecode to the C++ file
|
||||
*/
|
||||
|
||||
#version 460 core
|
||||
|
||||
layout (location = 0) in vec2 frag_tex_coord;
|
@ -2,15 +2,6 @@
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
/*
|
||||
* Build instructions:
|
||||
* $ glslangValidator -V $THIS_FILE -o output.spv
|
||||
* $ spirv-opt -O --strip-debug output.spv -o optimized.spv
|
||||
* $ xxd -i optimized.spv
|
||||
*
|
||||
* Then copy that bytecode to the C++ file
|
||||
*/
|
||||
|
||||
#version 460 core
|
||||
|
||||
layout (location = 0) in vec2 vert_position;
|
@ -2,15 +2,6 @@
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
/*
|
||||
* Build instructions:
|
||||
* $ glslangValidator -V $THIS_FILE -o output.spv
|
||||
* $ spirv-opt -O --strip-debug output.spv -o optimized.spv
|
||||
* $ xxd -i optimized.spv
|
||||
*
|
||||
* Then copy that bytecode to the C++ file
|
||||
*/
|
||||
|
||||
#version 460 core
|
||||
|
||||
layout (local_size_x = 1024) in;
|
@ -2,15 +2,6 @@
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
/*
|
||||
* Build instructions:
|
||||
* $ glslangValidator -V quad_indexed.comp -o output.spv
|
||||
* $ spirv-opt -O --strip-debug output.spv -o optimized.spv
|
||||
* $ xxd -i optimized.spv
|
||||
*
|
||||
* Then copy that bytecode to the C++ file
|
||||
*/
|
||||
|
||||
#version 460 core
|
||||
|
||||
layout (local_size_x = 1024) in;
|
@ -2,15 +2,6 @@
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
/*
|
||||
* Build instructions:
|
||||
* $ glslangValidator -V $THIS_FILE -o output.spv
|
||||
* $ spirv-opt -O --strip-debug output.spv -o optimized.spv
|
||||
* $ xxd -i optimized.spv
|
||||
*
|
||||
* Then copy that bytecode to the C++ file
|
||||
*/
|
||||
|
||||
#version 460 core
|
||||
#extension GL_EXT_shader_16bit_storage : require
|
||||
#extension GL_EXT_shader_8bit_storage : require
|
@ -85,7 +85,7 @@ constexpr std::array<std::pair<u64, HLEFunction>, 3> hle_funcs{{
|
||||
{0x0217920100488FF7, &HLE_0217920100488FF7},
|
||||
}};
|
||||
|
||||
HLEMacro::HLEMacro(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {}
|
||||
HLEMacro::HLEMacro(Engines::Maxwell3D& maxwell3d_) : maxwell3d{maxwell3d_} {}
|
||||
HLEMacro::~HLEMacro() = default;
|
||||
|
||||
std::optional<std::unique_ptr<CachedMacro>> HLEMacro::GetHLEProgram(u64 hash) const {
|
||||
@ -99,8 +99,8 @@ std::optional<std::unique_ptr<CachedMacro>> HLEMacro::GetHLEProgram(u64 hash) co
|
||||
|
||||
HLEMacroImpl::~HLEMacroImpl() = default;
|
||||
|
||||
HLEMacroImpl::HLEMacroImpl(Engines::Maxwell3D& maxwell3d, HLEFunction func)
|
||||
: maxwell3d(maxwell3d), func(func) {}
|
||||
HLEMacroImpl::HLEMacroImpl(Engines::Maxwell3D& maxwell3d_, HLEFunction func_)
|
||||
: maxwell3d{maxwell3d_}, func{func_} {}
|
||||
|
||||
void HLEMacroImpl::Execute(const std::vector<u32>& parameters, u32 method) {
|
||||
func(maxwell3d, parameters);
|
||||
|
@ -20,7 +20,7 @@ using HLEFunction = void (*)(Engines::Maxwell3D& maxwell3d, const std::vector<u3
|
||||
|
||||
class HLEMacro {
|
||||
public:
|
||||
explicit HLEMacro(Engines::Maxwell3D& maxwell3d);
|
||||
explicit HLEMacro(Engines::Maxwell3D& maxwell3d_);
|
||||
~HLEMacro();
|
||||
|
||||
std::optional<std::unique_ptr<CachedMacro>> GetHLEProgram(u64 hash) const;
|
||||
|
@ -11,29 +11,29 @@
|
||||
MICROPROFILE_DEFINE(MacroInterp, "GPU", "Execute macro interpreter", MP_RGB(128, 128, 192));
|
||||
|
||||
namespace Tegra {
|
||||
MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d)
|
||||
: MacroEngine::MacroEngine(maxwell3d), maxwell3d(maxwell3d) {}
|
||||
MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d_)
|
||||
: MacroEngine{maxwell3d_}, maxwell3d{maxwell3d_} {}
|
||||
|
||||
std::unique_ptr<CachedMacro> MacroInterpreter::Compile(const std::vector<u32>& code) {
|
||||
return std::make_unique<MacroInterpreterImpl>(maxwell3d, code);
|
||||
}
|
||||
|
||||
MacroInterpreterImpl::MacroInterpreterImpl(Engines::Maxwell3D& maxwell3d,
|
||||
const std::vector<u32>& code)
|
||||
: maxwell3d(maxwell3d), code(code) {}
|
||||
MacroInterpreterImpl::MacroInterpreterImpl(Engines::Maxwell3D& maxwell3d_,
|
||||
const std::vector<u32>& code_)
|
||||
: maxwell3d{maxwell3d_}, code{code_} {}
|
||||
|
||||
void MacroInterpreterImpl::Execute(const std::vector<u32>& parameters, u32 method) {
|
||||
void MacroInterpreterImpl::Execute(const std::vector<u32>& params, u32 method) {
|
||||
MICROPROFILE_SCOPE(MacroInterp);
|
||||
Reset();
|
||||
|
||||
registers[1] = parameters[0];
|
||||
num_parameters = parameters.size();
|
||||
registers[1] = params[0];
|
||||
num_parameters = params.size();
|
||||
|
||||
if (num_parameters > parameters_capacity) {
|
||||
parameters_capacity = num_parameters;
|
||||
this->parameters = std::make_unique<u32[]>(num_parameters);
|
||||
parameters = std::make_unique<u32[]>(num_parameters);
|
||||
}
|
||||
std::memcpy(this->parameters.get(), parameters.data(), num_parameters * sizeof(u32));
|
||||
std::memcpy(parameters.get(), params.data(), num_parameters * sizeof(u32));
|
||||
|
||||
// Execute the code until we hit an exit condition.
|
||||
bool keep_executing = true;
|
||||
@ -133,8 +133,7 @@ bool MacroInterpreterImpl::Step(bool is_delay_slot) {
|
||||
break;
|
||||
}
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unimplemented macro operation {}",
|
||||
static_cast<u32>(opcode.operation.Value()));
|
||||
UNIMPLEMENTED_MSG("Unimplemented macro operation {}", opcode.operation.Value());
|
||||
}
|
||||
|
||||
// An instruction with the Exit flag will not actually
|
||||
@ -182,7 +181,7 @@ u32 MacroInterpreterImpl::GetALUResult(Macro::ALUOperation operation, u32 src_a,
|
||||
return ~(src_a & src_b);
|
||||
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unimplemented ALU operation {}", static_cast<u32>(operation));
|
||||
UNIMPLEMENTED_MSG("Unimplemented ALU operation {}", operation);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
@ -230,7 +229,7 @@ void MacroInterpreterImpl::ProcessResult(Macro::ResultOperation operation, u32 r
|
||||
Send((result >> 12) & 0b111111);
|
||||
break;
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unimplemented result operation {}", static_cast<u32>(operation));
|
||||
UNIMPLEMENTED_MSG("Unimplemented result operation {}", operation);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -17,7 +17,7 @@ class Maxwell3D;
|
||||
|
||||
class MacroInterpreter final : public MacroEngine {
|
||||
public:
|
||||
explicit MacroInterpreter(Engines::Maxwell3D& maxwell3d);
|
||||
explicit MacroInterpreter(Engines::Maxwell3D& maxwell3d_);
|
||||
|
||||
protected:
|
||||
std::unique_ptr<CachedMacro> Compile(const std::vector<u32>& code) override;
|
||||
@ -28,8 +28,8 @@ private:
|
||||
|
||||
class MacroInterpreterImpl : public CachedMacro {
|
||||
public:
|
||||
MacroInterpreterImpl(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& code);
|
||||
void Execute(const std::vector<u32>& parameters, u32 method) override;
|
||||
explicit MacroInterpreterImpl(Engines::Maxwell3D& maxwell3d_, const std::vector<u32>& code_);
|
||||
void Execute(const std::vector<u32>& params, u32 method) override;
|
||||
|
||||
private:
|
||||
/// Resets the execution engine state, zeroing registers, etc.
|
||||
@ -38,9 +38,9 @@ private:
|
||||
/**
|
||||
* Executes a single macro instruction located at the current program counter. Returns whether
|
||||
* the interpreter should keep running.
|
||||
* @param offset Offset to start execution at.
|
||||
*
|
||||
* @param is_delay_slot Whether the current step is being executed due to a delay slot in a
|
||||
* previous instruction.
|
||||
* previous instruction.
|
||||
*/
|
||||
bool Step(bool is_delay_slot);
|
||||
|
||||
|
@ -28,15 +28,15 @@ static const std::bitset<32> PERSISTENT_REGISTERS = Common::X64::BuildRegSet({
|
||||
BRANCH_HOLDER,
|
||||
});
|
||||
|
||||
MacroJITx64::MacroJITx64(Engines::Maxwell3D& maxwell3d)
|
||||
: MacroEngine::MacroEngine(maxwell3d), maxwell3d(maxwell3d) {}
|
||||
MacroJITx64::MacroJITx64(Engines::Maxwell3D& maxwell3d_)
|
||||
: MacroEngine{maxwell3d_}, maxwell3d{maxwell3d_} {}
|
||||
|
||||
std::unique_ptr<CachedMacro> MacroJITx64::Compile(const std::vector<u32>& code) {
|
||||
return std::make_unique<MacroJITx64Impl>(maxwell3d, code);
|
||||
}
|
||||
|
||||
MacroJITx64Impl::MacroJITx64Impl(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& code)
|
||||
: Xbyak::CodeGenerator(MAX_CODE_SIZE), code(code), maxwell3d(maxwell3d) {
|
||||
MacroJITx64Impl::MacroJITx64Impl(Engines::Maxwell3D& maxwell3d_, const std::vector<u32>& code_)
|
||||
: CodeGenerator{MAX_CODE_SIZE}, code{code_}, maxwell3d{maxwell3d_} {
|
||||
Compile();
|
||||
}
|
||||
|
||||
@ -165,8 +165,7 @@ void MacroJITx64Impl::Compile_ALU(Macro::Opcode opcode) {
|
||||
}
|
||||
break;
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unimplemented ALU operation {}",
|
||||
static_cast<std::size_t>(opcode.alu_operation.Value()));
|
||||
UNIMPLEMENTED_MSG("Unimplemented ALU operation {}", opcode.alu_operation.Value());
|
||||
break;
|
||||
}
|
||||
Compile_ProcessResult(opcode.result_operation, opcode.dst);
|
||||
@ -553,15 +552,15 @@ Xbyak::Reg32 MacroJITx64Impl::Compile_GetRegister(u32 index, Xbyak::Reg32 dst) {
|
||||
}
|
||||
|
||||
void MacroJITx64Impl::Compile_ProcessResult(Macro::ResultOperation operation, u32 reg) {
|
||||
const auto SetRegister = [this](u32 reg, const Xbyak::Reg32& result) {
|
||||
const auto SetRegister = [this](u32 reg_index, const Xbyak::Reg32& result) {
|
||||
// Register 0 is supposed to always return 0. NOP is implemented as a store to the zero
|
||||
// register.
|
||||
if (reg == 0) {
|
||||
if (reg_index == 0) {
|
||||
return;
|
||||
}
|
||||
mov(dword[STATE + offsetof(JITState, registers) + reg * sizeof(u32)], result);
|
||||
mov(dword[STATE + offsetof(JITState, registers) + reg_index * sizeof(u32)], result);
|
||||
};
|
||||
const auto SetMethodAddress = [this](const Xbyak::Reg32& reg) { mov(METHOD_ADDRESS, reg); };
|
||||
const auto SetMethodAddress = [this](const Xbyak::Reg32& reg32) { mov(METHOD_ADDRESS, reg32); };
|
||||
|
||||
switch (operation) {
|
||||
case Macro::ResultOperation::IgnoreAndFetch:
|
||||
@ -604,7 +603,7 @@ void MacroJITx64Impl::Compile_ProcessResult(Macro::ResultOperation operation, u3
|
||||
Compile_Send(RESULT);
|
||||
break;
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unimplemented macro operation {}", static_cast<std::size_t>(operation));
|
||||
UNIMPLEMENTED_MSG("Unimplemented macro operation {}", operation);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -23,7 +23,7 @@ constexpr size_t MAX_CODE_SIZE = 0x10000;
|
||||
|
||||
class MacroJITx64 final : public MacroEngine {
|
||||
public:
|
||||
explicit MacroJITx64(Engines::Maxwell3D& maxwell3d);
|
||||
explicit MacroJITx64(Engines::Maxwell3D& maxwell3d_);
|
||||
|
||||
protected:
|
||||
std::unique_ptr<CachedMacro> Compile(const std::vector<u32>& code) override;
|
||||
@ -34,7 +34,7 @@ private:
|
||||
|
||||
class MacroJITx64Impl : public Xbyak::CodeGenerator, public CachedMacro {
|
||||
public:
|
||||
MacroJITx64Impl(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& code);
|
||||
explicit MacroJITx64Impl(Engines::Maxwell3D& maxwell3d_, const std::vector<u32>& code_);
|
||||
~MacroJITx64Impl();
|
||||
|
||||
void Execute(const std::vector<u32>& parameters, u32 method) override;
|
||||
|
@ -11,6 +11,7 @@
|
||||
#include "video_core/gpu.h"
|
||||
#include "video_core/memory_manager.h"
|
||||
#include "video_core/rasterizer_interface.h"
|
||||
#include "video_core/renderer_base.h"
|
||||
|
||||
namespace Tegra {
|
||||
|
||||
@ -44,13 +45,22 @@ GPUVAddr MemoryManager::MapAllocate(VAddr cpu_addr, std::size_t size, std::size_
|
||||
return Map(cpu_addr, *FindFreeRange(size, align), size);
|
||||
}
|
||||
|
||||
GPUVAddr MemoryManager::MapAllocate32(VAddr cpu_addr, std::size_t size) {
|
||||
const std::optional<GPUVAddr> gpu_addr = FindFreeRange(size, 1, true);
|
||||
ASSERT(gpu_addr);
|
||||
return Map(cpu_addr, *gpu_addr, size);
|
||||
}
|
||||
|
||||
void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) {
|
||||
if (!size) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Flush and invalidate through the GPU interface, to be asynchronous if possible.
|
||||
system.GPU().FlushAndInvalidateRegion(*GpuToCpuAddress(gpu_addr), size);
|
||||
const std::optional<VAddr> cpu_addr = GpuToCpuAddress(gpu_addr);
|
||||
ASSERT(cpu_addr);
|
||||
|
||||
rasterizer->UnmapMemory(*cpu_addr, size);
|
||||
|
||||
UpdateRange(gpu_addr, PageEntry::State::Unmapped, size);
|
||||
}
|
||||
@ -108,7 +118,8 @@ void MemoryManager::SetPageEntry(GPUVAddr gpu_addr, PageEntry page_entry, std::s
|
||||
page_table[PageEntryIndex(gpu_addr)] = page_entry;
|
||||
}
|
||||
|
||||
std::optional<GPUVAddr> MemoryManager::FindFreeRange(std::size_t size, std::size_t align) const {
|
||||
std::optional<GPUVAddr> MemoryManager::FindFreeRange(std::size_t size, std::size_t align,
|
||||
bool start_32bit_address) const {
|
||||
if (!align) {
|
||||
align = page_size;
|
||||
} else {
|
||||
@ -116,7 +127,7 @@ std::optional<GPUVAddr> MemoryManager::FindFreeRange(std::size_t size, std::size
|
||||
}
|
||||
|
||||
u64 available_size{};
|
||||
GPUVAddr gpu_addr{address_space_start};
|
||||
GPUVAddr gpu_addr{start_32bit_address ? address_space_start_low : address_space_start};
|
||||
while (gpu_addr + available_size < address_space_size) {
|
||||
if (GetPageEntry(gpu_addr + available_size).IsUnmapped()) {
|
||||
available_size += page_size;
|
||||
|
@ -28,7 +28,7 @@ public:
|
||||
};
|
||||
|
||||
constexpr PageEntry() = default;
|
||||
constexpr PageEntry(State state) : state{state} {}
|
||||
constexpr PageEntry(State state_) : state{state_} {}
|
||||
constexpr PageEntry(VAddr addr) : state{static_cast<State>(addr >> ShiftBits)} {}
|
||||
|
||||
[[nodiscard]] constexpr bool IsUnmapped() const {
|
||||
@ -68,7 +68,7 @@ static_assert(sizeof(PageEntry) == 4, "PageEntry is too large");
|
||||
|
||||
class MemoryManager final {
|
||||
public:
|
||||
explicit MemoryManager(Core::System& system);
|
||||
explicit MemoryManager(Core::System& system_);
|
||||
~MemoryManager();
|
||||
|
||||
/// Binds a renderer to the memory manager.
|
||||
@ -116,6 +116,7 @@ public:
|
||||
|
||||
[[nodiscard]] GPUVAddr Map(VAddr cpu_addr, GPUVAddr gpu_addr, std::size_t size);
|
||||
[[nodiscard]] GPUVAddr MapAllocate(VAddr cpu_addr, std::size_t size, std::size_t align);
|
||||
[[nodiscard]] GPUVAddr MapAllocate32(VAddr cpu_addr, std::size_t size);
|
||||
[[nodiscard]] std::optional<GPUVAddr> AllocateFixed(GPUVAddr gpu_addr, std::size_t size);
|
||||
[[nodiscard]] GPUVAddr Allocate(std::size_t size, std::size_t align);
|
||||
void Unmap(GPUVAddr gpu_addr, std::size_t size);
|
||||
@ -124,7 +125,8 @@ private:
|
||||
[[nodiscard]] PageEntry GetPageEntry(GPUVAddr gpu_addr) const;
|
||||
void SetPageEntry(GPUVAddr gpu_addr, PageEntry page_entry, std::size_t size = page_size);
|
||||
GPUVAddr UpdateRange(GPUVAddr gpu_addr, PageEntry page_entry, std::size_t size);
|
||||
[[nodiscard]] std::optional<GPUVAddr> FindFreeRange(std::size_t size, std::size_t align) const;
|
||||
[[nodiscard]] std::optional<GPUVAddr> FindFreeRange(std::size_t size, std::size_t align,
|
||||
bool start_32bit_address = false) const;
|
||||
|
||||
void TryLockPage(PageEntry page_entry, std::size_t size);
|
||||
void TryUnlockPage(PageEntry page_entry, std::size_t size);
|
||||
@ -135,6 +137,7 @@ private:
|
||||
|
||||
static constexpr u64 address_space_size = 1ULL << 40;
|
||||
static constexpr u64 address_space_start = 1ULL << 32;
|
||||
static constexpr u64 address_space_start_low = 1ULL << 16;
|
||||
static constexpr u64 page_bits{16};
|
||||
static constexpr u64 page_size{1 << page_bits};
|
||||
static constexpr u64 page_mask{page_size - 1};
|
||||
|
@ -1,250 +0,0 @@
|
||||
// Copyright 2018 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <array>
|
||||
#include <cstring>
|
||||
#include "common/assert.h"
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/morton.h"
|
||||
#include "video_core/surface.h"
|
||||
#include "video_core/textures/decoders.h"
|
||||
|
||||
namespace VideoCore {
|
||||
|
||||
using Surface::GetBytesPerPixel;
|
||||
using Surface::PixelFormat;
|
||||
|
||||
using MortonCopyFn = void (*)(u32, u32, u32, u32, u32, u32, u8*, u8*);
|
||||
using ConversionArray = std::array<MortonCopyFn, Surface::MaxPixelFormat>;
|
||||
|
||||
template <bool morton_to_linear, PixelFormat format>
|
||||
static void MortonCopy(u32 stride, u32 block_height, u32 height, u32 block_depth, u32 depth,
|
||||
u32 tile_width_spacing, u8* buffer, u8* addr) {
|
||||
constexpr u32 bytes_per_pixel = GetBytesPerPixel(format);
|
||||
|
||||
// With the BCn formats (DXT and DXN), each 4x4 tile is swizzled instead of just individual
|
||||
// pixel values.
|
||||
constexpr u32 tile_size_x{GetDefaultBlockWidth(format)};
|
||||
constexpr u32 tile_size_y{GetDefaultBlockHeight(format)};
|
||||
|
||||
if constexpr (morton_to_linear) {
|
||||
Tegra::Texture::UnswizzleTexture(buffer, addr, tile_size_x, tile_size_y, bytes_per_pixel,
|
||||
stride, height, depth, block_height, block_depth,
|
||||
tile_width_spacing);
|
||||
} else {
|
||||
Tegra::Texture::CopySwizzledData((stride + tile_size_x - 1) / tile_size_x,
|
||||
(height + tile_size_y - 1) / tile_size_y, depth,
|
||||
bytes_per_pixel, bytes_per_pixel, addr, buffer, false,
|
||||
block_height, block_depth, tile_width_spacing);
|
||||
}
|
||||
}
|
||||
|
||||
static constexpr ConversionArray morton_to_linear_fns = {
|
||||
MortonCopy<true, PixelFormat::A8B8G8R8_UNORM>,
|
||||
MortonCopy<true, PixelFormat::A8B8G8R8_SNORM>,
|
||||
MortonCopy<true, PixelFormat::A8B8G8R8_SINT>,
|
||||
MortonCopy<true, PixelFormat::A8B8G8R8_UINT>,
|
||||
MortonCopy<true, PixelFormat::R5G6B5_UNORM>,
|
||||
MortonCopy<true, PixelFormat::B5G6R5_UNORM>,
|
||||
MortonCopy<true, PixelFormat::A1R5G5B5_UNORM>,
|
||||
MortonCopy<true, PixelFormat::A2B10G10R10_UNORM>,
|
||||
MortonCopy<true, PixelFormat::A2B10G10R10_UINT>,
|
||||
MortonCopy<true, PixelFormat::A1B5G5R5_UNORM>,
|
||||
MortonCopy<true, PixelFormat::R8_UNORM>,
|
||||
MortonCopy<true, PixelFormat::R8_SNORM>,
|
||||
MortonCopy<true, PixelFormat::R8_SINT>,
|
||||
MortonCopy<true, PixelFormat::R8_UINT>,
|
||||
MortonCopy<true, PixelFormat::R16G16B16A16_FLOAT>,
|
||||
MortonCopy<true, PixelFormat::R16G16B16A16_UNORM>,
|
||||
MortonCopy<true, PixelFormat::R16G16B16A16_SNORM>,
|
||||
MortonCopy<true, PixelFormat::R16G16B16A16_SINT>,
|
||||
MortonCopy<true, PixelFormat::R16G16B16A16_UINT>,
|
||||
MortonCopy<true, PixelFormat::B10G11R11_FLOAT>,
|
||||
MortonCopy<true, PixelFormat::R32G32B32A32_UINT>,
|
||||
MortonCopy<true, PixelFormat::BC1_RGBA_UNORM>,
|
||||
MortonCopy<true, PixelFormat::BC2_UNORM>,
|
||||
MortonCopy<true, PixelFormat::BC3_UNORM>,
|
||||
MortonCopy<true, PixelFormat::BC4_UNORM>,
|
||||
MortonCopy<true, PixelFormat::BC4_SNORM>,
|
||||
MortonCopy<true, PixelFormat::BC5_UNORM>,
|
||||
MortonCopy<true, PixelFormat::BC5_SNORM>,
|
||||
MortonCopy<true, PixelFormat::BC7_UNORM>,
|
||||
MortonCopy<true, PixelFormat::BC6H_UFLOAT>,
|
||||
MortonCopy<true, PixelFormat::BC6H_SFLOAT>,
|
||||
MortonCopy<true, PixelFormat::ASTC_2D_4X4_UNORM>,
|
||||
MortonCopy<true, PixelFormat::B8G8R8A8_UNORM>,
|
||||
MortonCopy<true, PixelFormat::R32G32B32A32_FLOAT>,
|
||||
MortonCopy<true, PixelFormat::R32G32B32A32_SINT>,
|
||||
MortonCopy<true, PixelFormat::R32G32_FLOAT>,
|
||||
MortonCopy<true, PixelFormat::R32G32_SINT>,
|
||||
MortonCopy<true, PixelFormat::R32_FLOAT>,
|
||||
MortonCopy<true, PixelFormat::R16_FLOAT>,
|
||||
MortonCopy<true, PixelFormat::R16_UNORM>,
|
||||
MortonCopy<true, PixelFormat::R16_SNORM>,
|
||||
MortonCopy<true, PixelFormat::R16_UINT>,
|
||||
MortonCopy<true, PixelFormat::R16_SINT>,
|
||||
MortonCopy<true, PixelFormat::R16G16_UNORM>,
|
||||
MortonCopy<true, PixelFormat::R16G16_FLOAT>,
|
||||
MortonCopy<true, PixelFormat::R16G16_UINT>,
|
||||
MortonCopy<true, PixelFormat::R16G16_SINT>,
|
||||
MortonCopy<true, PixelFormat::R16G16_SNORM>,
|
||||
MortonCopy<true, PixelFormat::R32G32B32_FLOAT>,
|
||||
MortonCopy<true, PixelFormat::A8B8G8R8_SRGB>,
|
||||
MortonCopy<true, PixelFormat::R8G8_UNORM>,
|
||||
MortonCopy<true, PixelFormat::R8G8_SNORM>,
|
||||
MortonCopy<true, PixelFormat::R8G8_SINT>,
|
||||
MortonCopy<true, PixelFormat::R8G8_UINT>,
|
||||
MortonCopy<true, PixelFormat::R32G32_UINT>,
|
||||
MortonCopy<true, PixelFormat::R16G16B16X16_FLOAT>,
|
||||
MortonCopy<true, PixelFormat::R32_UINT>,
|
||||
MortonCopy<true, PixelFormat::R32_SINT>,
|
||||
MortonCopy<true, PixelFormat::ASTC_2D_8X8_UNORM>,
|
||||
MortonCopy<true, PixelFormat::ASTC_2D_8X5_UNORM>,
|
||||
MortonCopy<true, PixelFormat::ASTC_2D_5X4_UNORM>,
|
||||
MortonCopy<true, PixelFormat::B8G8R8A8_SRGB>,
|
||||
MortonCopy<true, PixelFormat::BC1_RGBA_SRGB>,
|
||||
MortonCopy<true, PixelFormat::BC2_SRGB>,
|
||||
MortonCopy<true, PixelFormat::BC3_SRGB>,
|
||||
MortonCopy<true, PixelFormat::BC7_SRGB>,
|
||||
MortonCopy<true, PixelFormat::A4B4G4R4_UNORM>,
|
||||
MortonCopy<true, PixelFormat::ASTC_2D_4X4_SRGB>,
|
||||
MortonCopy<true, PixelFormat::ASTC_2D_8X8_SRGB>,
|
||||
MortonCopy<true, PixelFormat::ASTC_2D_8X5_SRGB>,
|
||||
MortonCopy<true, PixelFormat::ASTC_2D_5X4_SRGB>,
|
||||
MortonCopy<true, PixelFormat::ASTC_2D_5X5_UNORM>,
|
||||
MortonCopy<true, PixelFormat::ASTC_2D_5X5_SRGB>,
|
||||
MortonCopy<true, PixelFormat::ASTC_2D_10X8_UNORM>,
|
||||
MortonCopy<true, PixelFormat::ASTC_2D_10X8_SRGB>,
|
||||
MortonCopy<true, PixelFormat::ASTC_2D_6X6_UNORM>,
|
||||
MortonCopy<true, PixelFormat::ASTC_2D_6X6_SRGB>,
|
||||
MortonCopy<true, PixelFormat::ASTC_2D_10X10_UNORM>,
|
||||
MortonCopy<true, PixelFormat::ASTC_2D_10X10_SRGB>,
|
||||
MortonCopy<true, PixelFormat::ASTC_2D_12X12_UNORM>,
|
||||
MortonCopy<true, PixelFormat::ASTC_2D_12X12_SRGB>,
|
||||
MortonCopy<true, PixelFormat::ASTC_2D_8X6_UNORM>,
|
||||
MortonCopy<true, PixelFormat::ASTC_2D_8X6_SRGB>,
|
||||
MortonCopy<true, PixelFormat::ASTC_2D_6X5_UNORM>,
|
||||
MortonCopy<true, PixelFormat::ASTC_2D_6X5_SRGB>,
|
||||
MortonCopy<true, PixelFormat::E5B9G9R9_FLOAT>,
|
||||
MortonCopy<true, PixelFormat::D32_FLOAT>,
|
||||
MortonCopy<true, PixelFormat::D16_UNORM>,
|
||||
MortonCopy<true, PixelFormat::D24_UNORM_S8_UINT>,
|
||||
MortonCopy<true, PixelFormat::S8_UINT_D24_UNORM>,
|
||||
MortonCopy<true, PixelFormat::D32_FLOAT_S8_UINT>,
|
||||
};
|
||||
|
||||
static constexpr ConversionArray linear_to_morton_fns = {
|
||||
MortonCopy<false, PixelFormat::A8B8G8R8_UNORM>,
|
||||
MortonCopy<false, PixelFormat::A8B8G8R8_SNORM>,
|
||||
MortonCopy<false, PixelFormat::A8B8G8R8_SINT>,
|
||||
MortonCopy<false, PixelFormat::A8B8G8R8_UINT>,
|
||||
MortonCopy<false, PixelFormat::R5G6B5_UNORM>,
|
||||
MortonCopy<false, PixelFormat::B5G6R5_UNORM>,
|
||||
MortonCopy<false, PixelFormat::A1R5G5B5_UNORM>,
|
||||
MortonCopy<false, PixelFormat::A2B10G10R10_UNORM>,
|
||||
MortonCopy<false, PixelFormat::A2B10G10R10_UINT>,
|
||||
MortonCopy<false, PixelFormat::A1B5G5R5_UNORM>,
|
||||
MortonCopy<false, PixelFormat::R8_UNORM>,
|
||||
MortonCopy<false, PixelFormat::R8_SNORM>,
|
||||
MortonCopy<false, PixelFormat::R8_SINT>,
|
||||
MortonCopy<false, PixelFormat::R8_UINT>,
|
||||
MortonCopy<false, PixelFormat::R16G16B16A16_FLOAT>,
|
||||
MortonCopy<false, PixelFormat::R16G16B16A16_SNORM>,
|
||||
MortonCopy<false, PixelFormat::R16G16B16A16_SINT>,
|
||||
MortonCopy<false, PixelFormat::R16G16B16A16_UNORM>,
|
||||
MortonCopy<false, PixelFormat::R16G16B16A16_UINT>,
|
||||
MortonCopy<false, PixelFormat::B10G11R11_FLOAT>,
|
||||
MortonCopy<false, PixelFormat::R32G32B32A32_UINT>,
|
||||
MortonCopy<false, PixelFormat::BC1_RGBA_UNORM>,
|
||||
MortonCopy<false, PixelFormat::BC2_UNORM>,
|
||||
MortonCopy<false, PixelFormat::BC3_UNORM>,
|
||||
MortonCopy<false, PixelFormat::BC4_UNORM>,
|
||||
MortonCopy<false, PixelFormat::BC4_SNORM>,
|
||||
MortonCopy<false, PixelFormat::BC5_UNORM>,
|
||||
MortonCopy<false, PixelFormat::BC5_SNORM>,
|
||||
MortonCopy<false, PixelFormat::BC7_UNORM>,
|
||||
MortonCopy<false, PixelFormat::BC6H_UFLOAT>,
|
||||
MortonCopy<false, PixelFormat::BC6H_SFLOAT>,
|
||||
// TODO(Subv): Swizzling ASTC formats are not supported
|
||||
nullptr,
|
||||
MortonCopy<false, PixelFormat::B8G8R8A8_UNORM>,
|
||||
MortonCopy<false, PixelFormat::R32G32B32A32_FLOAT>,
|
||||
MortonCopy<false, PixelFormat::R32G32B32A32_SINT>,
|
||||
MortonCopy<false, PixelFormat::R32G32_FLOAT>,
|
||||
MortonCopy<false, PixelFormat::R32G32_SINT>,
|
||||
MortonCopy<false, PixelFormat::R32_FLOAT>,
|
||||
MortonCopy<false, PixelFormat::R16_FLOAT>,
|
||||
MortonCopy<false, PixelFormat::R16_UNORM>,
|
||||
MortonCopy<false, PixelFormat::R16_SNORM>,
|
||||
MortonCopy<false, PixelFormat::R16_UINT>,
|
||||
MortonCopy<false, PixelFormat::R16_SINT>,
|
||||
MortonCopy<false, PixelFormat::R16G16_UNORM>,
|
||||
MortonCopy<false, PixelFormat::R16G16_FLOAT>,
|
||||
MortonCopy<false, PixelFormat::R16G16_UINT>,
|
||||
MortonCopy<false, PixelFormat::R16G16_SINT>,
|
||||
MortonCopy<false, PixelFormat::R16G16_SNORM>,
|
||||
MortonCopy<false, PixelFormat::R32G32B32_FLOAT>,
|
||||
MortonCopy<false, PixelFormat::A8B8G8R8_SRGB>,
|
||||
MortonCopy<false, PixelFormat::R8G8_UNORM>,
|
||||
MortonCopy<false, PixelFormat::R8G8_SNORM>,
|
||||
MortonCopy<false, PixelFormat::R8G8_SINT>,
|
||||
MortonCopy<false, PixelFormat::R8G8_UINT>,
|
||||
MortonCopy<false, PixelFormat::R32G32_UINT>,
|
||||
MortonCopy<false, PixelFormat::R16G16B16X16_FLOAT>,
|
||||
MortonCopy<false, PixelFormat::R32_UINT>,
|
||||
MortonCopy<false, PixelFormat::R32_SINT>,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
MortonCopy<false, PixelFormat::B8G8R8A8_SRGB>,
|
||||
MortonCopy<false, PixelFormat::BC1_RGBA_SRGB>,
|
||||
MortonCopy<false, PixelFormat::BC2_SRGB>,
|
||||
MortonCopy<false, PixelFormat::BC3_SRGB>,
|
||||
MortonCopy<false, PixelFormat::BC7_SRGB>,
|
||||
MortonCopy<false, PixelFormat::A4B4G4R4_UNORM>,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
MortonCopy<false, PixelFormat::E5B9G9R9_FLOAT>,
|
||||
MortonCopy<false, PixelFormat::D32_FLOAT>,
|
||||
MortonCopy<false, PixelFormat::D16_UNORM>,
|
||||
MortonCopy<false, PixelFormat::D24_UNORM_S8_UINT>,
|
||||
MortonCopy<false, PixelFormat::S8_UINT_D24_UNORM>,
|
||||
MortonCopy<false, PixelFormat::D32_FLOAT_S8_UINT>,
|
||||
};
|
||||
|
||||
static MortonCopyFn GetSwizzleFunction(MortonSwizzleMode mode, Surface::PixelFormat format) {
|
||||
switch (mode) {
|
||||
case MortonSwizzleMode::MortonToLinear:
|
||||
return morton_to_linear_fns[static_cast<std::size_t>(format)];
|
||||
case MortonSwizzleMode::LinearToMorton:
|
||||
return linear_to_morton_fns[static_cast<std::size_t>(format)];
|
||||
}
|
||||
UNREACHABLE();
|
||||
return morton_to_linear_fns[static_cast<std::size_t>(format)];
|
||||
}
|
||||
|
||||
void MortonSwizzle(MortonSwizzleMode mode, Surface::PixelFormat format, u32 stride,
|
||||
u32 block_height, u32 height, u32 block_depth, u32 depth, u32 tile_width_spacing,
|
||||
u8* buffer, u8* addr) {
|
||||
GetSwizzleFunction(mode, format)(stride, block_height, height, block_depth, depth,
|
||||
tile_width_spacing, buffer, addr);
|
||||
}
|
||||
|
||||
} // namespace VideoCore
|
||||
|
@ -1,18 +0,0 @@
|
||||
// Copyright 2018 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/surface.h"
|
||||
|
||||
namespace VideoCore {
|
||||
|
||||
enum class MortonSwizzleMode { MortonToLinear, LinearToMorton };
|
||||
|
||||
void MortonSwizzle(MortonSwizzleMode mode, VideoCore::Surface::PixelFormat format, u32 stride,
|
||||
u32 block_height, u32 height, u32 block_depth, u32 depth, u32 tile_width_spacing,
|
||||
u8* buffer, u8* addr);
|
||||
|
||||
} // namespace VideoCore
|
||||
|
@ -28,8 +28,8 @@ namespace VideoCommon {
|
||||
template <class QueryCache, class HostCounter>
|
||||
class CounterStreamBase {
|
||||
public:
|
||||
explicit CounterStreamBase(QueryCache& cache, VideoCore::QueryType type)
|
||||
: cache{cache}, type{type} {}
|
||||
explicit CounterStreamBase(QueryCache& cache_, VideoCore::QueryType type_)
|
||||
: cache{cache_}, type{type_} {}
|
||||
|
||||
/// Updates the state of the stream, enabling or disabling as needed.
|
||||
void Update(bool enabled) {
|
||||
@ -334,8 +334,8 @@ private:
|
||||
template <class HostCounter>
|
||||
class CachedQueryBase {
|
||||
public:
|
||||
explicit CachedQueryBase(VAddr cpu_addr, u8* host_ptr)
|
||||
: cpu_addr{cpu_addr}, host_ptr{host_ptr} {}
|
||||
explicit CachedQueryBase(VAddr cpu_addr_, u8* host_ptr_)
|
||||
: cpu_addr{cpu_addr_}, host_ptr{host_ptr_} {}
|
||||
virtual ~CachedQueryBase() = default;
|
||||
|
||||
CachedQueryBase(CachedQueryBase&&) noexcept = default;
|
||||
|
@ -32,7 +32,7 @@ using DiskResourceLoadCallback = std::function<void(LoadCallbackStage, std::size
|
||||
|
||||
class RasterizerInterface {
|
||||
public:
|
||||
virtual ~RasterizerInterface() {}
|
||||
virtual ~RasterizerInterface() = default;
|
||||
|
||||
/// Dispatches a draw invocation
|
||||
virtual void Draw(bool is_indexed, bool is_instanced) = 0;
|
||||
@ -76,6 +76,9 @@ public:
|
||||
/// Sync memory between guest and host.
|
||||
virtual void SyncGuestHost() = 0;
|
||||
|
||||
/// Unmap memory range
|
||||
virtual void UnmapMemory(VAddr addr, u64 size) = 0;
|
||||
|
||||
/// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
|
||||
/// and invalidated
|
||||
virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0;
|
||||
@ -83,6 +86,12 @@ public:
|
||||
/// Notify the host renderer to wait for previous primitive and compute operations.
|
||||
virtual void WaitForIdle() = 0;
|
||||
|
||||
/// Notify the host renderer to wait for reads and writes to render targets and flush caches.
|
||||
virtual void FragmentBarrier() = 0;
|
||||
|
||||
/// Notify the host renderer to make available previous render target writes.
|
||||
virtual void TiledCacheBarrier() = 0;
|
||||
|
||||
/// Notify the rasterizer to send all written commands to the host GPU.
|
||||
virtual void FlushCommands() = 0;
|
||||
|
||||
@ -90,15 +99,15 @@ public:
|
||||
virtual void TickFrame() = 0;
|
||||
|
||||
/// Attempt to use a faster method to perform a surface copy
|
||||
virtual bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
|
||||
const Tegra::Engines::Fermi2D::Regs::Surface& dst,
|
||||
const Tegra::Engines::Fermi2D::Config& copy_config) {
|
||||
[[nodiscard]] virtual bool AccelerateSurfaceCopy(
|
||||
const Tegra::Engines::Fermi2D::Surface& src, const Tegra::Engines::Fermi2D::Surface& dst,
|
||||
const Tegra::Engines::Fermi2D::Config& copy_config) {
|
||||
return false;
|
||||
}
|
||||
|
||||
/// Attempt to use a faster method to display the framebuffer to screen
|
||||
virtual bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
|
||||
u32 pixel_stride) {
|
||||
[[nodiscard]] virtual bool AccelerateDisplay(const Tegra::FramebufferConfig& config,
|
||||
VAddr framebuffer_addr, u32 pixel_stride) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -110,12 +119,12 @@ public:
|
||||
const DiskResourceLoadCallback& callback) {}
|
||||
|
||||
/// Grant access to the Guest Driver Profile for recording/obtaining info on the guest driver.
|
||||
GuestDriverProfile& AccessGuestDriverProfile() {
|
||||
[[nodiscard]] GuestDriverProfile& AccessGuestDriverProfile() {
|
||||
return guest_driver_profile;
|
||||
}
|
||||
|
||||
/// Grant access to the Guest Driver Profile for recording/obtaining info on the guest driver.
|
||||
const GuestDriverProfile& AccessGuestDriverProfile() const {
|
||||
[[nodiscard]] const GuestDriverProfile& AccessGuestDriverProfile() const {
|
||||
return guest_driver_profile;
|
||||
}
|
||||
|
||||
|
@ -38,7 +38,7 @@ public:
|
||||
virtual ~RendererBase();
|
||||
|
||||
/// Initialize the renderer
|
||||
virtual bool Init() = 0;
|
||||
[[nodiscard]] virtual bool Init() = 0;
|
||||
|
||||
/// Shutdown the renderer
|
||||
virtual void ShutDown() = 0;
|
||||
@ -49,43 +49,43 @@ public:
|
||||
// Getter/setter functions:
|
||||
// ------------------------
|
||||
|
||||
f32 GetCurrentFPS() const {
|
||||
[[nodiscard]] f32 GetCurrentFPS() const {
|
||||
return m_current_fps;
|
||||
}
|
||||
|
||||
int GetCurrentFrame() const {
|
||||
[[nodiscard]] int GetCurrentFrame() const {
|
||||
return m_current_frame;
|
||||
}
|
||||
|
||||
RasterizerInterface& Rasterizer() {
|
||||
[[nodiscard]] RasterizerInterface& Rasterizer() {
|
||||
return *rasterizer;
|
||||
}
|
||||
|
||||
const RasterizerInterface& Rasterizer() const {
|
||||
[[nodiscard]] const RasterizerInterface& Rasterizer() const {
|
||||
return *rasterizer;
|
||||
}
|
||||
|
||||
Core::Frontend::GraphicsContext& Context() {
|
||||
[[nodiscard]] Core::Frontend::GraphicsContext& Context() {
|
||||
return *context;
|
||||
}
|
||||
|
||||
const Core::Frontend::GraphicsContext& Context() const {
|
||||
[[nodiscard]] const Core::Frontend::GraphicsContext& Context() const {
|
||||
return *context;
|
||||
}
|
||||
|
||||
Core::Frontend::EmuWindow& GetRenderWindow() {
|
||||
[[nodiscard]] Core::Frontend::EmuWindow& GetRenderWindow() {
|
||||
return render_window;
|
||||
}
|
||||
|
||||
const Core::Frontend::EmuWindow& GetRenderWindow() const {
|
||||
[[nodiscard]] const Core::Frontend::EmuWindow& GetRenderWindow() const {
|
||||
return render_window;
|
||||
}
|
||||
|
||||
RendererSettings& Settings() {
|
||||
[[nodiscard]] RendererSettings& Settings() {
|
||||
return renderer_settings;
|
||||
}
|
||||
|
||||
const RendererSettings& Settings() const {
|
||||
[[nodiscard]] const RendererSettings& Settings() const {
|
||||
return renderer_settings;
|
||||
}
|
||||
|
||||
|
@ -39,8 +39,8 @@ using Operation = const OperationNode&;
|
||||
constexpr std::array INTERNAL_FLAG_NAMES = {"ZERO", "SIGN", "CARRY", "OVERFLOW"};
|
||||
|
||||
char Swizzle(std::size_t component) {
|
||||
ASSERT(component < 4);
|
||||
return component["xyzw"];
|
||||
static constexpr std::string_view SWIZZLE{"xyzw"};
|
||||
return SWIZZLE.at(component);
|
||||
}
|
||||
|
||||
constexpr bool IsGenericAttribute(Attribute::Index index) {
|
||||
@ -71,7 +71,7 @@ std::string_view GetInputFlags(PixelImap attribute) {
|
||||
case PixelImap::Unused:
|
||||
break;
|
||||
}
|
||||
UNIMPLEMENTED_MSG("Unknown attribute usage index={}", static_cast<int>(attribute));
|
||||
UNIMPLEMENTED_MSG("Unknown attribute usage index={}", attribute);
|
||||
return {};
|
||||
}
|
||||
|
||||
@ -123,7 +123,7 @@ std::string_view PrimitiveDescription(Tegra::Engines::Maxwell3D::Regs::Primitive
|
||||
case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleStripAdjacency:
|
||||
return "TRIANGLES_ADJACENCY";
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("topology={}", static_cast<int>(topology));
|
||||
UNIMPLEMENTED_MSG("topology={}", topology);
|
||||
return "POINTS";
|
||||
}
|
||||
}
|
||||
@ -137,7 +137,7 @@ std::string_view TopologyName(Tegra::Shader::OutputTopology topology) {
|
||||
case Tegra::Shader::OutputTopology::TriangleStrip:
|
||||
return "TRIANGLE_STRIP";
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unknown output topology: {}", static_cast<u32>(topology));
|
||||
UNIMPLEMENTED_MSG("Unknown output topology: {}", topology);
|
||||
return "points";
|
||||
}
|
||||
}
|
||||
@ -187,8 +187,8 @@ std::string TextureType(const MetaTexture& meta) {
|
||||
|
||||
class ARBDecompiler final {
|
||||
public:
|
||||
explicit ARBDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry,
|
||||
ShaderType stage, std::string_view identifier);
|
||||
explicit ARBDecompiler(const Device& device_, const ShaderIR& ir_, const Registry& registry_,
|
||||
ShaderType stage_, std::string_view identifier);
|
||||
|
||||
std::string Code() const {
|
||||
return shader_source;
|
||||
@ -224,7 +224,7 @@ private:
|
||||
|
||||
std::string Visit(const Node& node);
|
||||
|
||||
std::pair<std::string, std::size_t> BuildCoords(Operation);
|
||||
std::tuple<std::string, std::string, std::size_t> BuildCoords(Operation);
|
||||
std::string BuildAoffi(Operation);
|
||||
std::string GlobalMemoryPointer(const GmemNode& gmem);
|
||||
void Exit();
|
||||
@ -376,9 +376,11 @@ private:
|
||||
std::string temporary = AllocTemporary();
|
||||
std::string address;
|
||||
std::string_view opname;
|
||||
bool robust = false;
|
||||
if (const auto gmem = std::get_if<GmemNode>(&*operation[0])) {
|
||||
address = GlobalMemoryPointer(*gmem);
|
||||
opname = "ATOM";
|
||||
robust = true;
|
||||
} else if (const auto smem = std::get_if<SmemNode>(&*operation[0])) {
|
||||
address = fmt::format("shared_mem[{}]", Visit(smem->GetAddress()));
|
||||
opname = "ATOMS";
|
||||
@ -386,7 +388,15 @@ private:
|
||||
UNREACHABLE();
|
||||
return "{0, 0, 0, 0}";
|
||||
}
|
||||
if (robust) {
|
||||
AddLine("IF NE.x;");
|
||||
}
|
||||
AddLine("{}.{}.{} {}, {}, {};", opname, op, type, temporary, Visit(operation[1]), address);
|
||||
if (robust) {
|
||||
AddLine("ELSE;");
|
||||
AddLine("MOV.S {}, 0;", temporary);
|
||||
AddLine("ENDIF;");
|
||||
}
|
||||
return temporary;
|
||||
}
|
||||
|
||||
@ -792,9 +802,9 @@ private:
|
||||
};
|
||||
};
|
||||
|
||||
ARBDecompiler::ARBDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry,
|
||||
ShaderType stage, std::string_view identifier)
|
||||
: device{device}, ir{ir}, registry{registry}, stage{stage} {
|
||||
ARBDecompiler::ARBDecompiler(const Device& device_, const ShaderIR& ir_, const Registry& registry_,
|
||||
ShaderType stage_, std::string_view identifier)
|
||||
: device{device_}, ir{ir_}, registry{registry_}, stage{stage_} {
|
||||
DefineGlobalMemory();
|
||||
|
||||
AddLine("TEMP RC;");
|
||||
@ -980,10 +990,9 @@ void ARBDecompiler::DeclareLocalMemory() {
|
||||
}
|
||||
|
||||
void ARBDecompiler::DeclareGlobalMemory() {
|
||||
const std::size_t num_entries = ir.GetGlobalMemory().size();
|
||||
const size_t num_entries = ir.GetGlobalMemory().size();
|
||||
if (num_entries > 0) {
|
||||
const std::size_t num_vectors = Common::AlignUp(num_entries, 2) / 2;
|
||||
AddLine("PARAM c[{}] = {{ program.local[0..{}] }};", num_vectors, num_vectors - 1);
|
||||
AddLine("PARAM c[{}] = {{ program.local[0..{}] }};", num_entries, num_entries - 1);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1125,44 +1134,44 @@ void ARBDecompiler::VisitAST(const ASTNode& node) {
|
||||
for (ASTNode current = ast->nodes.GetFirst(); current; current = current->GetNext()) {
|
||||
VisitAST(current);
|
||||
}
|
||||
} else if (const auto ast = std::get_if<ASTIfThen>(&*node->GetInnerData())) {
|
||||
const std::string condition = VisitExpression(ast->condition);
|
||||
} else if (const auto if_then = std::get_if<ASTIfThen>(&*node->GetInnerData())) {
|
||||
const std::string condition = VisitExpression(if_then->condition);
|
||||
ResetTemporaries();
|
||||
|
||||
AddLine("MOVC.U RC.x, {};", condition);
|
||||
AddLine("IF NE.x;");
|
||||
for (ASTNode current = ast->nodes.GetFirst(); current; current = current->GetNext()) {
|
||||
for (ASTNode current = if_then->nodes.GetFirst(); current; current = current->GetNext()) {
|
||||
VisitAST(current);
|
||||
}
|
||||
AddLine("ENDIF;");
|
||||
} else if (const auto ast = std::get_if<ASTIfElse>(&*node->GetInnerData())) {
|
||||
} else if (const auto if_else = std::get_if<ASTIfElse>(&*node->GetInnerData())) {
|
||||
AddLine("ELSE;");
|
||||
for (ASTNode current = ast->nodes.GetFirst(); current; current = current->GetNext()) {
|
||||
for (ASTNode current = if_else->nodes.GetFirst(); current; current = current->GetNext()) {
|
||||
VisitAST(current);
|
||||
}
|
||||
} else if (const auto ast = std::get_if<ASTBlockDecoded>(&*node->GetInnerData())) {
|
||||
VisitBlock(ast->nodes);
|
||||
} else if (const auto ast = std::get_if<ASTVarSet>(&*node->GetInnerData())) {
|
||||
AddLine("MOV.U F{}, {};", ast->index, VisitExpression(ast->condition));
|
||||
} else if (const auto decoded = std::get_if<ASTBlockDecoded>(&*node->GetInnerData())) {
|
||||
VisitBlock(decoded->nodes);
|
||||
} else if (const auto var_set = std::get_if<ASTVarSet>(&*node->GetInnerData())) {
|
||||
AddLine("MOV.U F{}, {};", var_set->index, VisitExpression(var_set->condition));
|
||||
ResetTemporaries();
|
||||
} else if (const auto ast = std::get_if<ASTDoWhile>(&*node->GetInnerData())) {
|
||||
const std::string condition = VisitExpression(ast->condition);
|
||||
} else if (const auto do_while = std::get_if<ASTDoWhile>(&*node->GetInnerData())) {
|
||||
const std::string condition = VisitExpression(do_while->condition);
|
||||
ResetTemporaries();
|
||||
AddLine("REP;");
|
||||
for (ASTNode current = ast->nodes.GetFirst(); current; current = current->GetNext()) {
|
||||
for (ASTNode current = do_while->nodes.GetFirst(); current; current = current->GetNext()) {
|
||||
VisitAST(current);
|
||||
}
|
||||
AddLine("MOVC.U RC.x, {};", condition);
|
||||
AddLine("BRK (NE.x);");
|
||||
AddLine("ENDREP;");
|
||||
} else if (const auto ast = std::get_if<ASTReturn>(&*node->GetInnerData())) {
|
||||
const bool is_true = ExprIsTrue(ast->condition);
|
||||
} else if (const auto ast_return = std::get_if<ASTReturn>(&*node->GetInnerData())) {
|
||||
const bool is_true = ExprIsTrue(ast_return->condition);
|
||||
if (!is_true) {
|
||||
AddLine("MOVC.U RC.x, {};", VisitExpression(ast->condition));
|
||||
AddLine("MOVC.U RC.x, {};", VisitExpression(ast_return->condition));
|
||||
AddLine("IF NE.x;");
|
||||
ResetTemporaries();
|
||||
}
|
||||
if (ast->kills) {
|
||||
if (ast_return->kills) {
|
||||
AddLine("KIL TR;");
|
||||
} else {
|
||||
Exit();
|
||||
@ -1170,11 +1179,11 @@ void ARBDecompiler::VisitAST(const ASTNode& node) {
|
||||
if (!is_true) {
|
||||
AddLine("ENDIF;");
|
||||
}
|
||||
} else if (const auto ast = std::get_if<ASTBreak>(&*node->GetInnerData())) {
|
||||
if (ExprIsTrue(ast->condition)) {
|
||||
} else if (const auto ast_break = std::get_if<ASTBreak>(&*node->GetInnerData())) {
|
||||
if (ExprIsTrue(ast_break->condition)) {
|
||||
AddLine("BRK;");
|
||||
} else {
|
||||
AddLine("MOVC.U RC.x, {};", VisitExpression(ast->condition));
|
||||
AddLine("MOVC.U RC.x, {};", VisitExpression(ast_break->condition));
|
||||
AddLine("BRK (NE.x);");
|
||||
ResetTemporaries();
|
||||
}
|
||||
@ -1342,7 +1351,7 @@ std::string ARBDecompiler::Visit(const Node& node) {
|
||||
GetGenericAttributeIndex(index), swizzle);
|
||||
}
|
||||
}
|
||||
UNIMPLEMENTED_MSG("Unimplemented input attribute={}", static_cast<int>(index));
|
||||
UNIMPLEMENTED_MSG("Unimplemented input attribute={}", index);
|
||||
break;
|
||||
}
|
||||
return "{0, 0, 0, 0}.x";
|
||||
@ -1363,7 +1372,8 @@ std::string ARBDecompiler::Visit(const Node& node) {
|
||||
|
||||
if (const auto gmem = std::get_if<GmemNode>(&*node)) {
|
||||
std::string temporary = AllocTemporary();
|
||||
AddLine("LOAD.U32 {}, {};", temporary, GlobalMemoryPointer(*gmem));
|
||||
AddLine("MOV {}, 0;", temporary);
|
||||
AddLine("LOAD.U32 {} (NE.x), {};", temporary, GlobalMemoryPointer(*gmem));
|
||||
return temporary;
|
||||
}
|
||||
|
||||
@ -1406,12 +1416,12 @@ std::string ARBDecompiler::Visit(const Node& node) {
|
||||
return {};
|
||||
}
|
||||
|
||||
std::pair<std::string, std::size_t> ARBDecompiler::BuildCoords(Operation operation) {
|
||||
std::tuple<std::string, std::string, std::size_t> ARBDecompiler::BuildCoords(Operation operation) {
|
||||
const auto& meta = std::get<MetaTexture>(operation.GetMeta());
|
||||
UNIMPLEMENTED_IF(meta.sampler.is_indexed);
|
||||
UNIMPLEMENTED_IF(meta.sampler.is_shadow && meta.sampler.is_array &&
|
||||
meta.sampler.type == Tegra::Shader::TextureType::TextureCube);
|
||||
|
||||
const bool is_extended = meta.sampler.is_shadow && meta.sampler.is_array &&
|
||||
meta.sampler.type == Tegra::Shader::TextureType::TextureCube;
|
||||
const std::size_t count = operation.GetOperandsCount();
|
||||
std::string temporary = AllocVectorTemporary();
|
||||
std::size_t i = 0;
|
||||
@ -1419,12 +1429,21 @@ std::pair<std::string, std::size_t> ARBDecompiler::BuildCoords(Operation operati
|
||||
AddLine("MOV.F {}.{}, {};", temporary, Swizzle(i), Visit(operation[i]));
|
||||
}
|
||||
if (meta.sampler.is_array) {
|
||||
AddLine("I2F.S {}.{}, {};", temporary, Swizzle(i++), Visit(meta.array));
|
||||
AddLine("I2F.S {}.{}, {};", temporary, Swizzle(i), Visit(meta.array));
|
||||
++i;
|
||||
}
|
||||
if (meta.sampler.is_shadow) {
|
||||
AddLine("MOV.F {}.{}, {};", temporary, Swizzle(i++), Visit(meta.depth_compare));
|
||||
std::string compare = Visit(meta.depth_compare);
|
||||
if (is_extended) {
|
||||
ASSERT(i == 4);
|
||||
std::string extra_coord = AllocVectorTemporary();
|
||||
AddLine("MOV.F {}.x, {};", extra_coord, compare);
|
||||
return {fmt::format("{}, {}", temporary, extra_coord), extra_coord, 0};
|
||||
}
|
||||
AddLine("MOV.F {}.{}, {};", temporary, Swizzle(i), compare);
|
||||
++i;
|
||||
}
|
||||
return {std::move(temporary), i};
|
||||
return {temporary, temporary, i};
|
||||
}
|
||||
|
||||
std::string ARBDecompiler::BuildAoffi(Operation operation) {
|
||||
@ -1441,18 +1460,21 @@ std::string ARBDecompiler::BuildAoffi(Operation operation) {
|
||||
}
|
||||
|
||||
std::string ARBDecompiler::GlobalMemoryPointer(const GmemNode& gmem) {
|
||||
// Read a bindless SSBO, return its address and set CC accordingly
|
||||
// address = c[binding].xy
|
||||
// length = c[binding].z
|
||||
const u32 binding = global_memory_names.at(gmem.GetDescriptor());
|
||||
const char result_swizzle = binding % 2 == 0 ? 'x' : 'y';
|
||||
|
||||
const std::string pointer = AllocLongVectorTemporary();
|
||||
std::string temporary = AllocTemporary();
|
||||
|
||||
const u32 local_index = binding / 2;
|
||||
AddLine("PK64.U {}, c[{}];", pointer, local_index);
|
||||
AddLine("PK64.U {}, c[{}];", pointer, binding);
|
||||
AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem.GetRealAddress()),
|
||||
Visit(gmem.GetBaseAddress()));
|
||||
AddLine("CVT.U64.U32 {}.z, {};", pointer, temporary);
|
||||
AddLine("ADD.U64 {}.x, {}.{}, {}.z;", pointer, pointer, result_swizzle, pointer);
|
||||
AddLine("ADD.U64 {}.x, {}.x, {}.z;", pointer, pointer, pointer);
|
||||
// Compare offset to length and set CC
|
||||
AddLine("SLT.U.CC RC.x, {}, c[{}].z;", temporary, binding);
|
||||
return fmt::format("{}.x", pointer);
|
||||
}
|
||||
|
||||
@ -1463,9 +1485,7 @@ void ARBDecompiler::Exit() {
|
||||
}
|
||||
|
||||
const auto safe_get_register = [this](u32 reg) -> std::string {
|
||||
// TODO(Rodrigo): Replace with contains once C++20 releases
|
||||
const auto& used_registers = ir.GetRegisters();
|
||||
if (used_registers.find(reg) != used_registers.end()) {
|
||||
if (ir.GetRegisters().contains(reg)) {
|
||||
return fmt::format("R{}.x", reg);
|
||||
}
|
||||
return "{0, 0, 0, 0}.x";
|
||||
@ -1552,7 +1572,9 @@ std::string ARBDecompiler::Assign(Operation operation) {
|
||||
ResetTemporaries();
|
||||
return {};
|
||||
} else if (const auto gmem = std::get_if<GmemNode>(&*dest)) {
|
||||
AddLine("IF NE.x;");
|
||||
AddLine("STORE.U32 {}, {};", Visit(src), GlobalMemoryPointer(*gmem));
|
||||
AddLine("ENDIF;");
|
||||
ResetTemporaries();
|
||||
return {};
|
||||
} else {
|
||||
@ -1844,7 +1866,7 @@ std::string ARBDecompiler::LogicalAddCarry(Operation operation) {
|
||||
std::string ARBDecompiler::Texture(Operation operation) {
|
||||
const auto& meta = std::get<MetaTexture>(operation.GetMeta());
|
||||
const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;
|
||||
const auto [temporary, swizzle] = BuildCoords(operation);
|
||||
const auto [coords, temporary, swizzle] = BuildCoords(operation);
|
||||
|
||||
std::string_view opcode = "TEX";
|
||||
std::string extra;
|
||||
@ -1873,7 +1895,7 @@ std::string ARBDecompiler::Texture(Operation operation) {
|
||||
}
|
||||
}
|
||||
|
||||
AddLine("{}.F {}, {},{} texture[{}], {}{};", opcode, temporary, temporary, extra, sampler_id,
|
||||
AddLine("{}.F {}, {},{} texture[{}], {}{};", opcode, temporary, coords, extra, sampler_id,
|
||||
TextureType(meta), BuildAoffi(operation));
|
||||
AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element));
|
||||
return fmt::format("{}.x", temporary);
|
||||
@ -1882,7 +1904,7 @@ std::string ARBDecompiler::Texture(Operation operation) {
|
||||
std::string ARBDecompiler::TextureGather(Operation operation) {
|
||||
const auto& meta = std::get<MetaTexture>(operation.GetMeta());
|
||||
const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;
|
||||
const auto [temporary, swizzle] = BuildCoords(operation);
|
||||
const auto [coords, temporary, swizzle] = BuildCoords(operation);
|
||||
|
||||
std::string comp;
|
||||
if (!meta.sampler.is_shadow) {
|
||||
@ -1892,7 +1914,7 @@ std::string ARBDecompiler::TextureGather(Operation operation) {
|
||||
|
||||
AddLine("TXG.F {}, {}, texture[{}]{}, {}{};", temporary, temporary, sampler_id, comp,
|
||||
TextureType(meta), BuildAoffi(operation));
|
||||
AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element));
|
||||
AddLine("MOV.U {}.x, {}.{};", temporary, coords, Swizzle(meta.element));
|
||||
return fmt::format("{}.x", temporary);
|
||||
}
|
||||
|
||||
@ -1930,13 +1952,13 @@ std::string ARBDecompiler::TextureQueryLod(Operation operation) {
|
||||
std::string ARBDecompiler::TexelFetch(Operation operation) {
|
||||
const auto& meta = std::get<MetaTexture>(operation.GetMeta());
|
||||
const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;
|
||||
const auto [temporary, swizzle] = BuildCoords(operation);
|
||||
const auto [coords, temporary, swizzle] = BuildCoords(operation);
|
||||
|
||||
if (!meta.sampler.is_buffer) {
|
||||
ASSERT(swizzle < 4);
|
||||
AddLine("MOV.F {}.w, {};", temporary, Visit(meta.lod));
|
||||
}
|
||||
AddLine("TXF.F {}, {}, texture[{}], {}{};", temporary, temporary, sampler_id, TextureType(meta),
|
||||
AddLine("TXF.F {}, {}, texture[{}], {}{};", temporary, coords, sampler_id, TextureType(meta),
|
||||
BuildAoffi(operation));
|
||||
AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element));
|
||||
return fmt::format("{}.x", temporary);
|
||||
@ -1947,7 +1969,7 @@ std::string ARBDecompiler::TextureGradient(Operation operation) {
|
||||
const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;
|
||||
const std::string ddx = AllocVectorTemporary();
|
||||
const std::string ddy = AllocVectorTemporary();
|
||||
const std::string coord = BuildCoords(operation).first;
|
||||
const std::string coord = std::get<1>(BuildCoords(operation));
|
||||
|
||||
const std::size_t num_components = meta.derivates.size() / 2;
|
||||
for (std::size_t index = 0; index < num_components; ++index) {
|
||||
|
@ -22,11 +22,11 @@ using Maxwell = Tegra::Engines::Maxwell3D::Regs;
|
||||
|
||||
MICROPROFILE_DEFINE(OpenGL_Buffer_Download, "OpenGL", "Buffer Download", MP_RGB(192, 192, 128));
|
||||
|
||||
Buffer::Buffer(const Device& device, VAddr cpu_addr, std::size_t size)
|
||||
: VideoCommon::BufferBlock{cpu_addr, size} {
|
||||
Buffer::Buffer(const Device& device_, VAddr cpu_addr_, std::size_t size_)
|
||||
: BufferBlock{cpu_addr_, size_} {
|
||||
gl_buffer.Create();
|
||||
glNamedBufferData(gl_buffer.handle, static_cast<GLsizeiptr>(size), nullptr, GL_DYNAMIC_DRAW);
|
||||
if (device.UseAssemblyShaders() || device.HasVertexBufferUnifiedMemory()) {
|
||||
glNamedBufferData(gl_buffer.handle, static_cast<GLsizeiptr>(size_), nullptr, GL_DYNAMIC_DRAW);
|
||||
if (device_.UseAssemblyShaders() || device_.HasVertexBufferUnifiedMemory()) {
|
||||
glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_WRITE);
|
||||
glGetNamedBufferParameterui64vNV(gl_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &gpu_address);
|
||||
}
|
||||
@ -34,14 +34,14 @@ Buffer::Buffer(const Device& device, VAddr cpu_addr, std::size_t size)
|
||||
|
||||
Buffer::~Buffer() = default;
|
||||
|
||||
void Buffer::Upload(std::size_t offset, std::size_t size, const u8* data) {
|
||||
glNamedBufferSubData(Handle(), static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size),
|
||||
data);
|
||||
void Buffer::Upload(std::size_t offset, std::size_t data_size, const u8* data) {
|
||||
glNamedBufferSubData(Handle(), static_cast<GLintptr>(offset),
|
||||
static_cast<GLsizeiptr>(data_size), data);
|
||||
}
|
||||
|
||||
void Buffer::Download(std::size_t offset, std::size_t size, u8* data) {
|
||||
void Buffer::Download(std::size_t offset, std::size_t data_size, u8* data) {
|
||||
MICROPROFILE_SCOPE(OpenGL_Buffer_Download);
|
||||
const GLsizeiptr gl_size = static_cast<GLsizeiptr>(size);
|
||||
const GLsizeiptr gl_size = static_cast<GLsizeiptr>(data_size);
|
||||
const GLintptr gl_offset = static_cast<GLintptr>(offset);
|
||||
if (read_buffer.handle == 0) {
|
||||
read_buffer.Create();
|
||||
@ -54,17 +54,16 @@ void Buffer::Download(std::size_t offset, std::size_t size, u8* data) {
|
||||
}
|
||||
|
||||
void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset,
|
||||
std::size_t size) {
|
||||
std::size_t copy_size) {
|
||||
glCopyNamedBufferSubData(src.Handle(), Handle(), static_cast<GLintptr>(src_offset),
|
||||
static_cast<GLintptr>(dst_offset), static_cast<GLsizeiptr>(size));
|
||||
static_cast<GLintptr>(dst_offset), static_cast<GLsizeiptr>(copy_size));
|
||||
}
|
||||
|
||||
OGLBufferCache::OGLBufferCache(VideoCore::RasterizerInterface& rasterizer,
|
||||
Tegra::MemoryManager& gpu_memory, Core::Memory::Memory& cpu_memory,
|
||||
const Device& device_, std::size_t stream_size)
|
||||
: GenericBufferCache{rasterizer, gpu_memory, cpu_memory,
|
||||
std::make_unique<OGLStreamBuffer>(device_, stream_size, true)},
|
||||
device{device_} {
|
||||
OGLBufferCache::OGLBufferCache(VideoCore::RasterizerInterface& rasterizer_,
|
||||
Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_,
|
||||
const Device& device_, OGLStreamBuffer& stream_buffer_,
|
||||
StateTracker& state_tracker)
|
||||
: GenericBufferCache{rasterizer_, gpu_memory_, cpu_memory_, stream_buffer_}, device{device_} {
|
||||
if (!device.HasFastBufferSubData()) {
|
||||
return;
|
||||
}
|
||||
|
@ -22,18 +22,19 @@ namespace OpenGL {
|
||||
class Device;
|
||||
class OGLStreamBuffer;
|
||||
class RasterizerOpenGL;
|
||||
class StateTracker;
|
||||
|
||||
class Buffer : public VideoCommon::BufferBlock {
|
||||
public:
|
||||
explicit Buffer(const Device& device, VAddr cpu_addr, std::size_t size);
|
||||
explicit Buffer(const Device& device_, VAddr cpu_addr_, std::size_t size_);
|
||||
~Buffer();
|
||||
|
||||
void Upload(std::size_t offset, std::size_t size, const u8* data);
|
||||
void Upload(std::size_t offset, std::size_t data_size, const u8* data);
|
||||
|
||||
void Download(std::size_t offset, std::size_t size, u8* data);
|
||||
void Download(std::size_t offset, std::size_t data_size, u8* data);
|
||||
|
||||
void CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset,
|
||||
std::size_t size);
|
||||
std::size_t copy_size);
|
||||
|
||||
GLuint Handle() const noexcept {
|
||||
return gl_buffer.handle;
|
||||
@ -54,7 +55,8 @@ class OGLBufferCache final : public GenericBufferCache {
|
||||
public:
|
||||
explicit OGLBufferCache(VideoCore::RasterizerInterface& rasterizer,
|
||||
Tegra::MemoryManager& gpu_memory, Core::Memory::Memory& cpu_memory,
|
||||
const Device& device, std::size_t stream_size);
|
||||
const Device& device, OGLStreamBuffer& stream_buffer,
|
||||
StateTracker& state_tracker);
|
||||
~OGLBufferCache();
|
||||
|
||||
BufferInfo GetEmptyBuffer(std::size_t) override;
|
||||
|
@ -5,9 +5,11 @@
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <cstddef>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <limits>
|
||||
#include <optional>
|
||||
#include <span>
|
||||
#include <vector>
|
||||
|
||||
#include <glad/glad.h>
|
||||
@ -27,27 +29,29 @@ constexpr u32 ReservedUniformBlocks = 1;
|
||||
|
||||
constexpr u32 NumStages = 5;
|
||||
|
||||
constexpr std::array LimitUBOs = {
|
||||
constexpr std::array LIMIT_UBOS = {
|
||||
GL_MAX_VERTEX_UNIFORM_BLOCKS, GL_MAX_TESS_CONTROL_UNIFORM_BLOCKS,
|
||||
GL_MAX_TESS_EVALUATION_UNIFORM_BLOCKS, GL_MAX_GEOMETRY_UNIFORM_BLOCKS,
|
||||
GL_MAX_FRAGMENT_UNIFORM_BLOCKS, GL_MAX_COMPUTE_UNIFORM_BLOCKS};
|
||||
|
||||
constexpr std::array LimitSSBOs = {
|
||||
GL_MAX_FRAGMENT_UNIFORM_BLOCKS, GL_MAX_COMPUTE_UNIFORM_BLOCKS,
|
||||
};
|
||||
constexpr std::array LIMIT_SSBOS = {
|
||||
GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS, GL_MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS,
|
||||
GL_MAX_TESS_EVALUATION_SHADER_STORAGE_BLOCKS, GL_MAX_GEOMETRY_SHADER_STORAGE_BLOCKS,
|
||||
GL_MAX_FRAGMENT_SHADER_STORAGE_BLOCKS, GL_MAX_COMPUTE_SHADER_STORAGE_BLOCKS};
|
||||
|
||||
constexpr std::array LimitSamplers = {GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS,
|
||||
GL_MAX_TESS_CONTROL_TEXTURE_IMAGE_UNITS,
|
||||
GL_MAX_TESS_EVALUATION_TEXTURE_IMAGE_UNITS,
|
||||
GL_MAX_GEOMETRY_TEXTURE_IMAGE_UNITS,
|
||||
GL_MAX_TEXTURE_IMAGE_UNITS,
|
||||
GL_MAX_COMPUTE_TEXTURE_IMAGE_UNITS};
|
||||
|
||||
constexpr std::array LimitImages = {
|
||||
GL_MAX_FRAGMENT_SHADER_STORAGE_BLOCKS, GL_MAX_COMPUTE_SHADER_STORAGE_BLOCKS,
|
||||
};
|
||||
constexpr std::array LIMIT_SAMPLERS = {
|
||||
GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS,
|
||||
GL_MAX_TESS_CONTROL_TEXTURE_IMAGE_UNITS,
|
||||
GL_MAX_TESS_EVALUATION_TEXTURE_IMAGE_UNITS,
|
||||
GL_MAX_GEOMETRY_TEXTURE_IMAGE_UNITS,
|
||||
GL_MAX_TEXTURE_IMAGE_UNITS,
|
||||
GL_MAX_COMPUTE_TEXTURE_IMAGE_UNITS,
|
||||
};
|
||||
constexpr std::array LIMIT_IMAGES = {
|
||||
GL_MAX_VERTEX_IMAGE_UNIFORMS, GL_MAX_TESS_CONTROL_IMAGE_UNIFORMS,
|
||||
GL_MAX_TESS_EVALUATION_IMAGE_UNIFORMS, GL_MAX_GEOMETRY_IMAGE_UNIFORMS,
|
||||
GL_MAX_FRAGMENT_IMAGE_UNIFORMS, GL_MAX_COMPUTE_IMAGE_UNIFORMS};
|
||||
GL_MAX_FRAGMENT_IMAGE_UNIFORMS, GL_MAX_COMPUTE_IMAGE_UNIFORMS,
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
T GetInteger(GLenum pname) {
|
||||
@ -76,8 +80,8 @@ std::vector<std::string_view> GetExtensions() {
|
||||
return extensions;
|
||||
}
|
||||
|
||||
bool HasExtension(const std::vector<std::string_view>& images, std::string_view extension) {
|
||||
return std::find(images.begin(), images.end(), extension) != images.end();
|
||||
bool HasExtension(std::span<const std::string_view> extensions, std::string_view extension) {
|
||||
return std::ranges::find(extensions, extension) != extensions.end();
|
||||
}
|
||||
|
||||
u32 Extract(u32& base, u32& num, u32 amount, std::optional<GLenum> limit = {}) {
|
||||
@ -91,8 +95,8 @@ u32 Extract(u32& base, u32& num, u32 amount, std::optional<GLenum> limit = {}) {
|
||||
|
||||
std::array<u32, Tegra::Engines::MaxShaderTypes> BuildMaxUniformBuffers() noexcept {
|
||||
std::array<u32, Tegra::Engines::MaxShaderTypes> max;
|
||||
std::transform(LimitUBOs.begin(), LimitUBOs.end(), max.begin(),
|
||||
[](GLenum pname) { return GetInteger<u32>(pname); });
|
||||
std::ranges::transform(LIMIT_UBOS, max.begin(),
|
||||
[](GLenum pname) { return GetInteger<u32>(pname); });
|
||||
return max;
|
||||
}
|
||||
|
||||
@ -115,9 +119,10 @@ std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindin
|
||||
for (std::size_t i = 0; i < NumStages; ++i) {
|
||||
const std::size_t stage = stage_swizzle[i];
|
||||
bindings[stage] = {
|
||||
Extract(base_ubo, num_ubos, total_ubos / NumStages, LimitUBOs[stage]),
|
||||
Extract(base_ssbo, num_ssbos, total_ssbos / NumStages, LimitSSBOs[stage]),
|
||||
Extract(base_samplers, num_samplers, total_samplers / NumStages, LimitSamplers[stage])};
|
||||
Extract(base_ubo, num_ubos, total_ubos / NumStages, LIMIT_UBOS[stage]),
|
||||
Extract(base_ssbo, num_ssbos, total_ssbos / NumStages, LIMIT_SSBOS[stage]),
|
||||
Extract(base_samplers, num_samplers, total_samplers / NumStages,
|
||||
LIMIT_SAMPLERS[stage])};
|
||||
}
|
||||
|
||||
u32 num_images = GetInteger<u32>(GL_MAX_IMAGE_UNITS);
|
||||
@ -130,7 +135,7 @@ std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindin
|
||||
|
||||
// Reserve at least 4 image bindings on the fragment stage.
|
||||
bindings[4].image =
|
||||
Extract(base_images, num_images, std::max(4U, num_images / NumStages), LimitImages[4]);
|
||||
Extract(base_images, num_images, std::max(4U, num_images / NumStages), LIMIT_IMAGES[4]);
|
||||
|
||||
// This is guaranteed to be at least 1.
|
||||
const u32 total_extracted_images = num_images / (NumStages - 1);
|
||||
@ -142,7 +147,7 @@ std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindin
|
||||
continue;
|
||||
}
|
||||
bindings[stage].image =
|
||||
Extract(base_images, num_images, total_extracted_images, LimitImages[stage]);
|
||||
Extract(base_images, num_images, total_extracted_images, LIMIT_IMAGES[stage]);
|
||||
}
|
||||
|
||||
// Compute doesn't care about any of this.
|
||||
@ -188,17 +193,22 @@ bool IsASTCSupported() {
|
||||
return true;
|
||||
}
|
||||
|
||||
[[nodiscard]] bool IsDebugToolAttached(std::span<const std::string_view> extensions) {
|
||||
const bool nsight = std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED");
|
||||
return nsight || HasExtension(extensions, "GL_EXT_debug_tool");
|
||||
}
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
Device::Device()
|
||||
: max_uniform_buffers{BuildMaxUniformBuffers()}, base_bindings{BuildBaseBindings()} {
|
||||
const std::string_view vendor = reinterpret_cast<const char*>(glGetString(GL_VENDOR));
|
||||
const std::string_view renderer = reinterpret_cast<const char*>(glGetString(GL_RENDERER));
|
||||
const std::string_view version = reinterpret_cast<const char*>(glGetString(GL_VERSION));
|
||||
const std::vector extensions = GetExtensions();
|
||||
|
||||
const bool is_nvidia = vendor == "NVIDIA Corporation";
|
||||
const bool is_amd = vendor == "ATI Technologies Inc.";
|
||||
const bool is_intel = vendor == "Intel";
|
||||
|
||||
bool disable_fast_buffer_sub_data = false;
|
||||
if (is_nvidia && version == "4.6.0 NVIDIA 443.24") {
|
||||
@ -207,9 +217,8 @@ Device::Device()
|
||||
"Beta driver 443.24 is known to have issues. There might be performance issues.");
|
||||
disable_fast_buffer_sub_data = true;
|
||||
}
|
||||
|
||||
uniform_buffer_alignment = GetInteger<std::size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT);
|
||||
shader_storage_alignment = GetInteger<std::size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT);
|
||||
uniform_buffer_alignment = GetInteger<size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT);
|
||||
shader_storage_alignment = GetInteger<size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT);
|
||||
max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS);
|
||||
max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS);
|
||||
max_compute_shared_memory_size = GetInteger<u32>(GL_MAX_COMPUTE_SHARED_MEMORY_SIZE);
|
||||
@ -223,8 +232,10 @@ Device::Device()
|
||||
has_variable_aoffi = TestVariableAoffi();
|
||||
has_component_indexing_bug = is_amd;
|
||||
has_precise_bug = TestPreciseBug();
|
||||
has_broken_texture_view_formats = is_amd || is_intel;
|
||||
has_nv_viewport_array2 = GLAD_GL_NV_viewport_array2;
|
||||
has_vertex_buffer_unified_memory = GLAD_GL_NV_vertex_buffer_unified_memory;
|
||||
has_debugging_tool_attached = IsDebugToolAttached(extensions);
|
||||
|
||||
// At the moment of writing this, only Nvidia's driver optimizes BufferSubData on exclusive
|
||||
// uniform buffers as "push constants"
|
||||
@ -239,6 +250,8 @@ Device::Device()
|
||||
LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", has_variable_aoffi);
|
||||
LOG_INFO(Render_OpenGL, "Renderer_ComponentIndexingBug: {}", has_component_indexing_bug);
|
||||
LOG_INFO(Render_OpenGL, "Renderer_PreciseBug: {}", has_precise_bug);
|
||||
LOG_INFO(Render_OpenGL, "Renderer_BrokenTextureViewFormats: {}",
|
||||
has_broken_texture_view_formats);
|
||||
|
||||
if (Settings::values.use_assembly_shaders.GetValue() && !use_assembly_shaders) {
|
||||
LOG_ERROR(Render_OpenGL, "Assembly shaders enabled but not supported");
|
||||
|
@ -36,11 +36,11 @@ public:
|
||||
return GetBaseBindings(static_cast<std::size_t>(shader_type));
|
||||
}
|
||||
|
||||
std::size_t GetUniformBufferAlignment() const {
|
||||
size_t GetUniformBufferAlignment() const {
|
||||
return uniform_buffer_alignment;
|
||||
}
|
||||
|
||||
std::size_t GetShaderStorageBufferAlignment() const {
|
||||
size_t GetShaderStorageBufferAlignment() const {
|
||||
return shader_storage_alignment;
|
||||
}
|
||||
|
||||
@ -96,6 +96,10 @@ public:
|
||||
return has_precise_bug;
|
||||
}
|
||||
|
||||
bool HasBrokenTextureViewFormats() const {
|
||||
return has_broken_texture_view_formats;
|
||||
}
|
||||
|
||||
bool HasFastBufferSubData() const {
|
||||
return has_fast_buffer_sub_data;
|
||||
}
|
||||
@ -104,6 +108,10 @@ public:
|
||||
return has_nv_viewport_array2;
|
||||
}
|
||||
|
||||
bool HasDebuggingToolAttached() const {
|
||||
return has_debugging_tool_attached;
|
||||
}
|
||||
|
||||
bool UseAssemblyShaders() const {
|
||||
return use_assembly_shaders;
|
||||
}
|
||||
@ -118,8 +126,8 @@ private:
|
||||
|
||||
std::array<u32, Tegra::Engines::MaxShaderTypes> max_uniform_buffers{};
|
||||
std::array<BaseBindings, Tegra::Engines::MaxShaderTypes> base_bindings{};
|
||||
std::size_t uniform_buffer_alignment{};
|
||||
std::size_t shader_storage_alignment{};
|
||||
size_t uniform_buffer_alignment{};
|
||||
size_t shader_storage_alignment{};
|
||||
u32 max_vertex_attributes{};
|
||||
u32 max_varyings{};
|
||||
u32 max_compute_shared_memory_size{};
|
||||
@ -133,8 +141,10 @@ private:
|
||||
bool has_variable_aoffi{};
|
||||
bool has_component_indexing_bug{};
|
||||
bool has_precise_bug{};
|
||||
bool has_broken_texture_view_formats{};
|
||||
bool has_fast_buffer_sub_data{};
|
||||
bool has_nv_viewport_array2{};
|
||||
bool has_debugging_tool_attached{};
|
||||
bool use_assembly_shaders{};
|
||||
bool use_asynchronous_shaders{};
|
||||
};
|
||||
|
@ -11,10 +11,10 @@
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
GLInnerFence::GLInnerFence(u32 payload, bool is_stubbed) : FenceBase(payload, is_stubbed) {}
|
||||
GLInnerFence::GLInnerFence(u32 payload_, bool is_stubbed_) : FenceBase{payload_, is_stubbed_} {}
|
||||
|
||||
GLInnerFence::GLInnerFence(GPUVAddr address, u32 payload, bool is_stubbed)
|
||||
: FenceBase(address, payload, is_stubbed) {}
|
||||
GLInnerFence::GLInnerFence(GPUVAddr address_, u32 payload_, bool is_stubbed_)
|
||||
: FenceBase{address_, payload_, is_stubbed_} {}
|
||||
|
||||
GLInnerFence::~GLInnerFence() = default;
|
||||
|
||||
@ -45,10 +45,10 @@ void GLInnerFence::Wait() {
|
||||
glClientWaitSync(sync_object.handle, 0, GL_TIMEOUT_IGNORED);
|
||||
}
|
||||
|
||||
FenceManagerOpenGL::FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer, Tegra::GPU& gpu,
|
||||
TextureCacheOpenGL& texture_cache,
|
||||
OGLBufferCache& buffer_cache, QueryCache& query_cache)
|
||||
: GenericFenceManager{rasterizer, gpu, texture_cache, buffer_cache, query_cache} {}
|
||||
FenceManagerOpenGL::FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer_,
|
||||
Tegra::GPU& gpu_, TextureCache& texture_cache_,
|
||||
OGLBufferCache& buffer_cache_, QueryCache& query_cache_)
|
||||
: GenericFenceManager{rasterizer_, gpu_, texture_cache_, buffer_cache_, query_cache_} {}
|
||||
|
||||
Fence FenceManagerOpenGL::CreateFence(u32 value, bool is_stubbed) {
|
||||
return std::make_shared<GLInnerFence>(value, is_stubbed);
|
||||
|
@ -17,8 +17,8 @@ namespace OpenGL {
|
||||
|
||||
class GLInnerFence : public VideoCommon::FenceBase {
|
||||
public:
|
||||
GLInnerFence(u32 payload, bool is_stubbed);
|
||||
GLInnerFence(GPUVAddr address, u32 payload, bool is_stubbed);
|
||||
explicit GLInnerFence(u32 payload_, bool is_stubbed_);
|
||||
explicit GLInnerFence(GPUVAddr address_, u32 payload_, bool is_stubbed_);
|
||||
~GLInnerFence();
|
||||
|
||||
void Queue();
|
||||
@ -33,13 +33,13 @@ private:
|
||||
|
||||
using Fence = std::shared_ptr<GLInnerFence>;
|
||||
using GenericFenceManager =
|
||||
VideoCommon::FenceManager<Fence, TextureCacheOpenGL, OGLBufferCache, QueryCache>;
|
||||
VideoCommon::FenceManager<Fence, TextureCache, OGLBufferCache, QueryCache>;
|
||||
|
||||
class FenceManagerOpenGL final : public GenericFenceManager {
|
||||
public:
|
||||
explicit FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer, Tegra::GPU& gpu,
|
||||
TextureCacheOpenGL& texture_cache, OGLBufferCache& buffer_cache,
|
||||
QueryCache& query_cache);
|
||||
explicit FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_,
|
||||
TextureCache& texture_cache_, OGLBufferCache& buffer_cache_,
|
||||
QueryCache& query_cache_);
|
||||
|
||||
protected:
|
||||
Fence CreateFence(u32 value, bool is_stubbed) override;
|
||||
|
@ -1,85 +0,0 @@
|
||||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <tuple>
|
||||
#include <unordered_map>
|
||||
#include <utility>
|
||||
|
||||
#include <glad/glad.h>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/renderer_opengl/gl_framebuffer_cache.h"
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
|
||||
using VideoCore::Surface::SurfaceType;
|
||||
|
||||
FramebufferCacheOpenGL::FramebufferCacheOpenGL() = default;
|
||||
|
||||
FramebufferCacheOpenGL::~FramebufferCacheOpenGL() = default;
|
||||
|
||||
GLuint FramebufferCacheOpenGL::GetFramebuffer(const FramebufferCacheKey& key) {
|
||||
const auto [entry, is_cache_miss] = cache.try_emplace(key);
|
||||
auto& framebuffer{entry->second};
|
||||
if (is_cache_miss) {
|
||||
framebuffer = CreateFramebuffer(key);
|
||||
}
|
||||
return framebuffer.handle;
|
||||
}
|
||||
|
||||
OGLFramebuffer FramebufferCacheOpenGL::CreateFramebuffer(const FramebufferCacheKey& key) {
|
||||
OGLFramebuffer framebuffer;
|
||||
framebuffer.Create();
|
||||
|
||||
// TODO(Rodrigo): Use DSA here after Nvidia fixes their framebuffer DSA bugs.
|
||||
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer.handle);
|
||||
|
||||
if (key.zeta) {
|
||||
const bool stencil = key.zeta->GetSurfaceParams().type == SurfaceType::DepthStencil;
|
||||
const GLenum attach_target = stencil ? GL_DEPTH_STENCIL_ATTACHMENT : GL_DEPTH_ATTACHMENT;
|
||||
key.zeta->Attach(attach_target, GL_DRAW_FRAMEBUFFER);
|
||||
}
|
||||
|
||||
std::size_t num_buffers = 0;
|
||||
std::array<GLenum, Maxwell::NumRenderTargets> targets;
|
||||
|
||||
for (std::size_t index = 0; index < Maxwell::NumRenderTargets; ++index) {
|
||||
if (!key.colors[index]) {
|
||||
targets[index] = GL_NONE;
|
||||
continue;
|
||||
}
|
||||
const GLenum attach_target = GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(index);
|
||||
key.colors[index]->Attach(attach_target, GL_DRAW_FRAMEBUFFER);
|
||||
|
||||
const u32 attachment = (key.color_attachments >> (BitsPerAttachment * index)) & 0b1111;
|
||||
targets[index] = GL_COLOR_ATTACHMENT0 + attachment;
|
||||
num_buffers = index + 1;
|
||||
}
|
||||
|
||||
if (num_buffers > 0) {
|
||||
glDrawBuffers(static_cast<GLsizei>(num_buffers), std::data(targets));
|
||||
} else {
|
||||
glDrawBuffer(GL_NONE);
|
||||
}
|
||||
|
||||
return framebuffer;
|
||||
}
|
||||
|
||||
std::size_t FramebufferCacheKey::Hash() const noexcept {
|
||||
std::size_t hash = std::hash<View>{}(zeta);
|
||||
for (const auto& color : colors) {
|
||||
hash ^= std::hash<View>{}(color);
|
||||
}
|
||||
hash ^= static_cast<std::size_t>(color_attachments) << 16;
|
||||
return hash;
|
||||
}
|
||||
|
||||
bool FramebufferCacheKey::operator==(const FramebufferCacheKey& rhs) const noexcept {
|
||||
return std::tie(colors, zeta, color_attachments) ==
|
||||
std::tie(rhs.colors, rhs.zeta, rhs.color_attachments);
|
||||
}
|
||||
|
||||
} // namespace OpenGL
|
@ -1,68 +0,0 @@
|
||||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <cstddef>
|
||||
#include <unordered_map>
|
||||
|
||||
#include <glad/glad.h>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
||||
#include "video_core/renderer_opengl/gl_texture_cache.h"
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
constexpr std::size_t BitsPerAttachment = 4;
|
||||
|
||||
struct FramebufferCacheKey {
|
||||
View zeta;
|
||||
std::array<View, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> colors;
|
||||
u32 color_attachments = 0;
|
||||
|
||||
std::size_t Hash() const noexcept;
|
||||
|
||||
bool operator==(const FramebufferCacheKey& rhs) const noexcept;
|
||||
|
||||
bool operator!=(const FramebufferCacheKey& rhs) const noexcept {
|
||||
return !operator==(rhs);
|
||||
}
|
||||
|
||||
void SetAttachment(std::size_t index, u32 attachment) {
|
||||
color_attachments |= attachment << (BitsPerAttachment * index);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace OpenGL
|
||||
|
||||
namespace std {
|
||||
|
||||
template <>
|
||||
struct hash<OpenGL::FramebufferCacheKey> {
|
||||
std::size_t operator()(const OpenGL::FramebufferCacheKey& k) const noexcept {
|
||||
return k.Hash();
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace std
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
class FramebufferCacheOpenGL {
|
||||
public:
|
||||
FramebufferCacheOpenGL();
|
||||
~FramebufferCacheOpenGL();
|
||||
|
||||
GLuint GetFramebuffer(const FramebufferCacheKey& key);
|
||||
|
||||
private:
|
||||
OGLFramebuffer CreateFramebuffer(const FramebufferCacheKey& key);
|
||||
|
||||
std::unordered_map<FramebufferCacheKey, OGLFramebuffer> cache;
|
||||
};
|
||||
|
||||
} // namespace OpenGL
|
@ -30,11 +30,9 @@ constexpr GLenum GetTarget(VideoCore::QueryType type) {
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
QueryCache::QueryCache(RasterizerOpenGL& rasterizer, Tegra::Engines::Maxwell3D& maxwell3d,
|
||||
Tegra::MemoryManager& gpu_memory)
|
||||
: VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream, HostCounter>(
|
||||
rasterizer, maxwell3d, gpu_memory),
|
||||
gl_rasterizer{rasterizer} {}
|
||||
QueryCache::QueryCache(RasterizerOpenGL& rasterizer_, Tegra::Engines::Maxwell3D& maxwell3d_,
|
||||
Tegra::MemoryManager& gpu_memory_)
|
||||
: QueryCacheBase(rasterizer_, maxwell3d_, gpu_memory_), gl_rasterizer{rasterizer_} {}
|
||||
|
||||
QueryCache::~QueryCache() = default;
|
||||
|
||||
@ -59,10 +57,11 @@ bool QueryCache::AnyCommandQueued() const noexcept {
|
||||
return gl_rasterizer.AnyCommandQueued();
|
||||
}
|
||||
|
||||
HostCounter::HostCounter(QueryCache& cache, std::shared_ptr<HostCounter> dependency,
|
||||
VideoCore::QueryType type)
|
||||
: VideoCommon::HostCounterBase<QueryCache, HostCounter>{std::move(dependency)}, cache{cache},
|
||||
type{type}, query{cache.AllocateQuery(type)} {
|
||||
HostCounter::HostCounter(QueryCache& cache_, std::shared_ptr<HostCounter> dependency_,
|
||||
VideoCore::QueryType type_)
|
||||
: HostCounterBase{std::move(dependency_)}, cache{cache_}, type{type_}, query{
|
||||
cache.AllocateQuery(
|
||||
type)} {
|
||||
glBeginQuery(GetTarget(type), query.handle);
|
||||
}
|
||||
|
||||
@ -86,13 +85,14 @@ u64 HostCounter::BlockingQuery() const {
|
||||
return static_cast<u64>(value);
|
||||
}
|
||||
|
||||
CachedQuery::CachedQuery(QueryCache& cache, VideoCore::QueryType type, VAddr cpu_addr, u8* host_ptr)
|
||||
: VideoCommon::CachedQueryBase<HostCounter>{cpu_addr, host_ptr}, cache{&cache}, type{type} {}
|
||||
CachedQuery::CachedQuery(QueryCache& cache_, VideoCore::QueryType type_, VAddr cpu_addr_,
|
||||
u8* host_ptr_)
|
||||
: CachedQueryBase{cpu_addr_, host_ptr_}, cache{&cache_}, type{type_} {}
|
||||
|
||||
CachedQuery::~CachedQuery() = default;
|
||||
|
||||
CachedQuery::CachedQuery(CachedQuery&& rhs) noexcept
|
||||
: VideoCommon::CachedQueryBase<HostCounter>(std::move(rhs)), cache{rhs.cache}, type{rhs.type} {}
|
||||
: CachedQueryBase(std::move(rhs)), cache{rhs.cache}, type{rhs.type} {}
|
||||
|
||||
CachedQuery& CachedQuery::operator=(CachedQuery&& rhs) noexcept {
|
||||
cache = rhs.cache;
|
||||
|
@ -29,8 +29,8 @@ using CounterStream = VideoCommon::CounterStreamBase<QueryCache, HostCounter>;
|
||||
class QueryCache final
|
||||
: public VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream, HostCounter> {
|
||||
public:
|
||||
explicit QueryCache(RasterizerOpenGL& rasterizer, Tegra::Engines::Maxwell3D& maxwell3d,
|
||||
Tegra::MemoryManager& gpu_memory);
|
||||
explicit QueryCache(RasterizerOpenGL& rasterizer_, Tegra::Engines::Maxwell3D& maxwell3d_,
|
||||
Tegra::MemoryManager& gpu_memory_);
|
||||
~QueryCache();
|
||||
|
||||
OGLQuery AllocateQuery(VideoCore::QueryType type);
|
||||
@ -46,8 +46,8 @@ private:
|
||||
|
||||
class HostCounter final : public VideoCommon::HostCounterBase<QueryCache, HostCounter> {
|
||||
public:
|
||||
explicit HostCounter(QueryCache& cache, std::shared_ptr<HostCounter> dependency,
|
||||
VideoCore::QueryType type);
|
||||
explicit HostCounter(QueryCache& cache_, std::shared_ptr<HostCounter> dependency_,
|
||||
VideoCore::QueryType type_);
|
||||
~HostCounter();
|
||||
|
||||
void EndQuery();
|
||||
@ -62,8 +62,8 @@ private:
|
||||
|
||||
class CachedQuery final : public VideoCommon::CachedQueryBase<HostCounter> {
|
||||
public:
|
||||
explicit CachedQuery(QueryCache& cache, VideoCore::QueryType type, VAddr cpu_addr,
|
||||
u8* host_ptr);
|
||||
explicit CachedQuery(QueryCache& cache_, VideoCore::QueryType type_, VAddr cpu_addr_,
|
||||
u8* host_ptr_);
|
||||
~CachedQuery() override;
|
||||
|
||||
CachedQuery(CachedQuery&& rhs) noexcept;
|
||||
|
@ -25,12 +25,15 @@
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/engines/shader_type.h"
|
||||
#include "video_core/memory_manager.h"
|
||||
#include "video_core/renderer_opengl/gl_device.h"
|
||||
#include "video_core/renderer_opengl/gl_query_cache.h"
|
||||
#include "video_core/renderer_opengl/gl_rasterizer.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_cache.h"
|
||||
#include "video_core/renderer_opengl/gl_texture_cache.h"
|
||||
#include "video_core/renderer_opengl/maxwell_to_gl.h"
|
||||
#include "video_core/renderer_opengl/renderer_opengl.h"
|
||||
#include "video_core/shader_cache.h"
|
||||
#include "video_core/texture_cache/texture_cache.h"
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
@ -55,18 +58,32 @@ MICROPROFILE_DEFINE(OpenGL_PrimitiveAssembly, "OpenGL", "Prim Asmbl", MP_RGB(255
|
||||
|
||||
namespace {
|
||||
|
||||
constexpr std::size_t NUM_CONST_BUFFERS_PER_STAGE = 18;
|
||||
constexpr std::size_t NUM_CONST_BUFFERS_BYTES_PER_STAGE =
|
||||
constexpr size_t NUM_CONST_BUFFERS_PER_STAGE = 18;
|
||||
constexpr size_t NUM_CONST_BUFFERS_BYTES_PER_STAGE =
|
||||
NUM_CONST_BUFFERS_PER_STAGE * Maxwell::MaxConstBufferSize;
|
||||
constexpr std::size_t TOTAL_CONST_BUFFER_BYTES =
|
||||
constexpr size_t TOTAL_CONST_BUFFER_BYTES =
|
||||
NUM_CONST_BUFFERS_BYTES_PER_STAGE * Maxwell::MaxShaderStage;
|
||||
|
||||
constexpr std::size_t NUM_SUPPORTED_VERTEX_ATTRIBUTES = 16;
|
||||
constexpr std::size_t NUM_SUPPORTED_VERTEX_BINDINGS = 16;
|
||||
constexpr size_t NUM_SUPPORTED_VERTEX_ATTRIBUTES = 16;
|
||||
constexpr size_t NUM_SUPPORTED_VERTEX_BINDINGS = 16;
|
||||
|
||||
constexpr size_t MAX_TEXTURES = 192;
|
||||
constexpr size_t MAX_IMAGES = 48;
|
||||
|
||||
struct TextureHandle {
|
||||
constexpr TextureHandle(u32 data, bool via_header_index) {
|
||||
const Tegra::Texture::TextureHandle handle{data};
|
||||
image = handle.tic_id;
|
||||
sampler = via_header_index ? image : handle.tsc_id.Value();
|
||||
}
|
||||
|
||||
u32 image;
|
||||
u32 sampler;
|
||||
};
|
||||
|
||||
template <typename Engine, typename Entry>
|
||||
Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry,
|
||||
ShaderType shader_type, std::size_t index = 0) {
|
||||
TextureHandle GetTextureInfo(const Engine& engine, bool via_header_index, const Entry& entry,
|
||||
ShaderType shader_type, size_t index = 0) {
|
||||
if constexpr (std::is_same_v<Entry, SamplerEntry>) {
|
||||
if (entry.is_separated) {
|
||||
const u32 buffer_1 = entry.buffer;
|
||||
@ -75,21 +92,16 @@ Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry
|
||||
const u32 offset_2 = entry.secondary_offset;
|
||||
const u32 handle_1 = engine.AccessConstBuffer32(shader_type, buffer_1, offset_1);
|
||||
const u32 handle_2 = engine.AccessConstBuffer32(shader_type, buffer_2, offset_2);
|
||||
return engine.GetTextureInfo(handle_1 | handle_2);
|
||||
return TextureHandle(handle_1 | handle_2, via_header_index);
|
||||
}
|
||||
}
|
||||
if (entry.is_bindless) {
|
||||
const u32 handle = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset);
|
||||
return engine.GetTextureInfo(handle);
|
||||
}
|
||||
|
||||
const auto& gpu_profile = engine.AccessGuestDriverProfile();
|
||||
const u32 offset = entry.offset + static_cast<u32>(index * gpu_profile.GetTextureHandlerSize());
|
||||
if constexpr (std::is_same_v<Engine, Tegra::Engines::Maxwell3D>) {
|
||||
return engine.GetStageTexture(shader_type, offset);
|
||||
} else {
|
||||
return engine.GetTexture(offset);
|
||||
const u32 raw = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset);
|
||||
return TextureHandle(raw, via_header_index);
|
||||
}
|
||||
const u32 buffer = engine.GetBoundBuffer();
|
||||
const u64 offset = (entry.offset + index) * sizeof(u32);
|
||||
return TextureHandle(engine.AccessConstBuffer32(shader_type, buffer, offset), via_header_index);
|
||||
}
|
||||
|
||||
std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer,
|
||||
@ -97,7 +109,6 @@ std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer,
|
||||
if (!entry.IsIndirect()) {
|
||||
return entry.GetSize();
|
||||
}
|
||||
|
||||
if (buffer.size > Maxwell::MaxConstBufferSize) {
|
||||
LOG_WARNING(Render_OpenGL, "Indirect constbuffer size {} exceeds maximum {}", buffer.size,
|
||||
Maxwell::MaxConstBufferSize);
|
||||
@ -131,7 +142,7 @@ std::pair<GLint, GLint> TransformFeedbackEnum(u8 location) {
|
||||
case 43:
|
||||
return {GL_BACK_SECONDARY_COLOR_NV, 0};
|
||||
}
|
||||
UNIMPLEMENTED_MSG("index={}", static_cast<int>(index));
|
||||
UNIMPLEMENTED_MSG("index={}", index);
|
||||
return {GL_POSITION, 0};
|
||||
}
|
||||
|
||||
@ -139,35 +150,68 @@ void oglEnable(GLenum cap, bool state) {
|
||||
(state ? glEnable : glDisable)(cap);
|
||||
}
|
||||
|
||||
void UpdateBindlessPointers(GLenum target, GLuint64EXT* pointers, std::size_t num_entries) {
|
||||
if (num_entries == 0) {
|
||||
void UpdateBindlessSSBOs(GLenum target, const BindlessSSBO* ssbos, size_t num_ssbos) {
|
||||
if (num_ssbos == 0) {
|
||||
return;
|
||||
}
|
||||
if (num_entries % 2 == 1) {
|
||||
pointers[num_entries] = 0;
|
||||
glProgramLocalParametersI4uivNV(target, 0, static_cast<GLsizei>(num_ssbos),
|
||||
reinterpret_cast<const GLuint*>(ssbos));
|
||||
}
|
||||
|
||||
ImageViewType ImageViewTypeFromEntry(const SamplerEntry& entry) {
|
||||
if (entry.is_buffer) {
|
||||
return ImageViewType::Buffer;
|
||||
}
|
||||
const GLsizei num_vectors = static_cast<GLsizei>((num_entries + 1) / 2);
|
||||
glProgramLocalParametersI4uivNV(target, 0, num_vectors,
|
||||
reinterpret_cast<const GLuint*>(pointers));
|
||||
switch (entry.type) {
|
||||
case Tegra::Shader::TextureType::Texture1D:
|
||||
return entry.is_array ? ImageViewType::e1DArray : ImageViewType::e1D;
|
||||
case Tegra::Shader::TextureType::Texture2D:
|
||||
return entry.is_array ? ImageViewType::e2DArray : ImageViewType::e2D;
|
||||
case Tegra::Shader::TextureType::Texture3D:
|
||||
return ImageViewType::e3D;
|
||||
case Tegra::Shader::TextureType::TextureCube:
|
||||
return entry.is_array ? ImageViewType::CubeArray : ImageViewType::Cube;
|
||||
}
|
||||
UNREACHABLE();
|
||||
return ImageViewType::e2D;
|
||||
}
|
||||
|
||||
ImageViewType ImageViewTypeFromEntry(const ImageEntry& entry) {
|
||||
switch (entry.type) {
|
||||
case Tegra::Shader::ImageType::Texture1D:
|
||||
return ImageViewType::e1D;
|
||||
case Tegra::Shader::ImageType::Texture1DArray:
|
||||
return ImageViewType::e1DArray;
|
||||
case Tegra::Shader::ImageType::Texture2D:
|
||||
return ImageViewType::e2D;
|
||||
case Tegra::Shader::ImageType::Texture2DArray:
|
||||
return ImageViewType::e2DArray;
|
||||
case Tegra::Shader::ImageType::Texture3D:
|
||||
return ImageViewType::e3D;
|
||||
case Tegra::Shader::ImageType::TextureBuffer:
|
||||
return ImageViewType::Buffer;
|
||||
}
|
||||
UNREACHABLE();
|
||||
return ImageViewType::e2D;
|
||||
}
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window, Tegra::GPU& gpu_,
|
||||
Core::Memory::Memory& cpu_memory, const Device& device_,
|
||||
RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
|
||||
Core::Memory::Memory& cpu_memory_, const Device& device_,
|
||||
ScreenInfo& screen_info_, ProgramManager& program_manager_,
|
||||
StateTracker& state_tracker_)
|
||||
: RasterizerAccelerated{cpu_memory}, gpu(gpu_), maxwell3d(gpu.Maxwell3D()),
|
||||
: RasterizerAccelerated(cpu_memory_), gpu(gpu_), maxwell3d(gpu.Maxwell3D()),
|
||||
kepler_compute(gpu.KeplerCompute()), gpu_memory(gpu.MemoryManager()), device(device_),
|
||||
screen_info(screen_info_), program_manager(program_manager_), state_tracker(state_tracker_),
|
||||
texture_cache(*this, maxwell3d, gpu_memory, device, state_tracker),
|
||||
shader_cache(*this, emu_window, gpu, maxwell3d, kepler_compute, gpu_memory, device),
|
||||
stream_buffer(device, state_tracker),
|
||||
texture_cache_runtime(device, program_manager, state_tracker),
|
||||
texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory),
|
||||
shader_cache(*this, emu_window_, gpu, maxwell3d, kepler_compute, gpu_memory, device),
|
||||
query_cache(*this, maxwell3d, gpu_memory),
|
||||
buffer_cache(*this, gpu_memory, cpu_memory, device, STREAM_BUFFER_SIZE),
|
||||
buffer_cache(*this, gpu_memory, cpu_memory_, device, stream_buffer, state_tracker),
|
||||
fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache),
|
||||
async_shaders(emu_window) {
|
||||
CheckExtensions();
|
||||
|
||||
async_shaders(emu_window_) {
|
||||
unified_uniform_buffer.Create();
|
||||
glNamedBufferStorage(unified_uniform_buffer.handle, TOTAL_CONST_BUFFER_BYTES, nullptr, 0);
|
||||
|
||||
@ -178,7 +222,6 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window, Tegra:
|
||||
nullptr, 0);
|
||||
}
|
||||
}
|
||||
|
||||
if (device.UseAsynchronousShaders()) {
|
||||
async_shaders.AllocateWorkers();
|
||||
}
|
||||
@ -190,14 +233,6 @@ RasterizerOpenGL::~RasterizerOpenGL() {
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::CheckExtensions() {
|
||||
if (!GLAD_GL_ARB_texture_filter_anisotropic && !GLAD_GL_EXT_texture_filter_anisotropic) {
|
||||
LOG_WARNING(
|
||||
Render_OpenGL,
|
||||
"Anisotropic filter is not supported! This can cause graphical issues in some games.");
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SetupVertexFormat() {
|
||||
auto& flags = maxwell3d.dirty.flags;
|
||||
if (!flags[Dirty::VertexFormats]) {
|
||||
@ -320,10 +355,16 @@ GLintptr RasterizerOpenGL::SetupIndexBuffer() {
|
||||
return info.offset;
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
|
||||
void RasterizerOpenGL::SetupShaders() {
|
||||
MICROPROFILE_SCOPE(OpenGL_Shader);
|
||||
u32 clip_distances = 0;
|
||||
|
||||
std::array<Shader*, Maxwell::MaxShaderStage> shaders{};
|
||||
image_view_indices.clear();
|
||||
sampler_handles.clear();
|
||||
|
||||
texture_cache.SynchronizeGraphicsDescriptors();
|
||||
|
||||
for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
|
||||
const auto& shader_config = maxwell3d.regs.shader_config[index];
|
||||
const auto program{static_cast<Maxwell::ShaderProgram>(index)};
|
||||
@ -342,7 +383,6 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
// Currently this stages are not supported in the OpenGL backend.
|
||||
// TODO(Blinkhawk): Port tesselation shaders from Vulkan to OpenGL
|
||||
if (program == Maxwell::ShaderProgram::TesselationControl ||
|
||||
@ -351,7 +391,6 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
|
||||
}
|
||||
|
||||
Shader* const shader = shader_cache.GetStageProgram(program, async_shaders);
|
||||
|
||||
const GLuint program_handle = shader->IsBuilt() ? shader->GetHandle() : 0;
|
||||
switch (program) {
|
||||
case Maxwell::ShaderProgram::VertexA:
|
||||
@ -367,14 +406,17 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unimplemented shader index={}, enable={}, offset=0x{:08X}", index,
|
||||
shader_config.enable.Value(), shader_config.offset);
|
||||
break;
|
||||
}
|
||||
|
||||
// Stage indices are 0 - 5
|
||||
const std::size_t stage = index == 0 ? 0 : index - 1;
|
||||
const size_t stage = index == 0 ? 0 : index - 1;
|
||||
shaders[stage] = shader;
|
||||
|
||||
SetupDrawConstBuffers(stage, shader);
|
||||
SetupDrawGlobalMemory(stage, shader);
|
||||
SetupDrawTextures(stage, shader);
|
||||
SetupDrawImages(stage, shader);
|
||||
SetupDrawTextures(shader, stage);
|
||||
SetupDrawImages(shader, stage);
|
||||
|
||||
// Workaround for Intel drivers.
|
||||
// When a clip distance is enabled but not set in the shader it crops parts of the screen
|
||||
@ -388,9 +430,23 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
|
||||
++index;
|
||||
}
|
||||
}
|
||||
|
||||
SyncClipEnabled(clip_distances);
|
||||
maxwell3d.dirty.flags[Dirty::Shaders] = false;
|
||||
|
||||
const std::span indices_span(image_view_indices.data(), image_view_indices.size());
|
||||
texture_cache.FillGraphicsImageViews(indices_span, image_view_ids);
|
||||
|
||||
size_t image_view_index = 0;
|
||||
size_t texture_index = 0;
|
||||
size_t image_index = 0;
|
||||
for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {
|
||||
const Shader* const shader = shaders[stage];
|
||||
if (shader) {
|
||||
const auto base = device.GetBaseBindings(stage);
|
||||
BindTextures(shader->GetEntries(), base.sampler, base.image, image_view_index,
|
||||
texture_index, image_index);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const {
|
||||
@ -421,98 +477,6 @@ void RasterizerOpenGL::LoadDiskResources(u64 title_id, const std::atomic_bool& s
|
||||
shader_cache.LoadDiskCache(title_id, stop_loading, callback);
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::ConfigureFramebuffers() {
|
||||
MICROPROFILE_SCOPE(OpenGL_Framebuffer);
|
||||
if (!maxwell3d.dirty.flags[VideoCommon::Dirty::RenderTargets]) {
|
||||
return;
|
||||
}
|
||||
maxwell3d.dirty.flags[VideoCommon::Dirty::RenderTargets] = false;
|
||||
|
||||
texture_cache.GuardRenderTargets(true);
|
||||
|
||||
View depth_surface = texture_cache.GetDepthBufferSurface(true);
|
||||
|
||||
const auto& regs = maxwell3d.regs;
|
||||
UNIMPLEMENTED_IF(regs.rt_separate_frag_data == 0);
|
||||
|
||||
// Bind the framebuffer surfaces
|
||||
FramebufferCacheKey key;
|
||||
const auto colors_count = static_cast<std::size_t>(regs.rt_control.count);
|
||||
for (std::size_t index = 0; index < colors_count; ++index) {
|
||||
View color_surface{texture_cache.GetColorBufferSurface(index, true)};
|
||||
if (!color_surface) {
|
||||
continue;
|
||||
}
|
||||
// Assume that a surface will be written to if it is used as a framebuffer, even
|
||||
// if the shader doesn't actually write to it.
|
||||
texture_cache.MarkColorBufferInUse(index);
|
||||
|
||||
key.SetAttachment(index, regs.rt_control.GetMap(index));
|
||||
key.colors[index] = std::move(color_surface);
|
||||
}
|
||||
|
||||
if (depth_surface) {
|
||||
// Assume that a surface will be written to if it is used as a framebuffer, even if
|
||||
// the shader doesn't actually write to it.
|
||||
texture_cache.MarkDepthBufferInUse();
|
||||
key.zeta = std::move(depth_surface);
|
||||
}
|
||||
|
||||
texture_cache.GuardRenderTargets(false);
|
||||
|
||||
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer_cache.GetFramebuffer(key));
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::ConfigureClearFramebuffer(bool using_color, bool using_depth_stencil) {
|
||||
const auto& regs = maxwell3d.regs;
|
||||
|
||||
texture_cache.GuardRenderTargets(true);
|
||||
View color_surface;
|
||||
|
||||
if (using_color) {
|
||||
// Determine if we have to preserve the contents.
|
||||
// First we have to make sure all clear masks are enabled.
|
||||
bool preserve_contents = !regs.clear_buffers.R || !regs.clear_buffers.G ||
|
||||
!regs.clear_buffers.B || !regs.clear_buffers.A;
|
||||
const std::size_t index = regs.clear_buffers.RT;
|
||||
if (regs.clear_flags.scissor) {
|
||||
// Then we have to confirm scissor testing clears the whole image.
|
||||
const auto& scissor = regs.scissor_test[0];
|
||||
preserve_contents |= scissor.min_x > 0;
|
||||
preserve_contents |= scissor.min_y > 0;
|
||||
preserve_contents |= scissor.max_x < regs.rt[index].width;
|
||||
preserve_contents |= scissor.max_y < regs.rt[index].height;
|
||||
}
|
||||
|
||||
color_surface = texture_cache.GetColorBufferSurface(index, preserve_contents);
|
||||
texture_cache.MarkColorBufferInUse(index);
|
||||
}
|
||||
|
||||
View depth_surface;
|
||||
if (using_depth_stencil) {
|
||||
bool preserve_contents = false;
|
||||
if (regs.clear_flags.scissor) {
|
||||
// For depth stencil clears we only have to confirm scissor test covers the whole image.
|
||||
const auto& scissor = regs.scissor_test[0];
|
||||
preserve_contents |= scissor.min_x > 0;
|
||||
preserve_contents |= scissor.min_y > 0;
|
||||
preserve_contents |= scissor.max_x < regs.zeta_width;
|
||||
preserve_contents |= scissor.max_y < regs.zeta_height;
|
||||
}
|
||||
|
||||
depth_surface = texture_cache.GetDepthBufferSurface(preserve_contents);
|
||||
texture_cache.MarkDepthBufferInUse();
|
||||
}
|
||||
texture_cache.GuardRenderTargets(false);
|
||||
|
||||
FramebufferCacheKey key;
|
||||
key.colors[0] = std::move(color_surface);
|
||||
key.zeta = std::move(depth_surface);
|
||||
|
||||
state_tracker.NotifyFramebuffer();
|
||||
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer_cache.GetFramebuffer(key));
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::Clear() {
|
||||
if (!maxwell3d.ShouldExecute()) {
|
||||
return;
|
||||
@ -527,8 +491,9 @@ void RasterizerOpenGL::Clear() {
|
||||
regs.clear_buffers.A) {
|
||||
use_color = true;
|
||||
|
||||
state_tracker.NotifyColorMask0();
|
||||
glColorMaski(0, regs.clear_buffers.R != 0, regs.clear_buffers.G != 0,
|
||||
const GLuint index = regs.clear_buffers.RT;
|
||||
state_tracker.NotifyColorMask(index);
|
||||
glColorMaski(index, regs.clear_buffers.R != 0, regs.clear_buffers.G != 0,
|
||||
regs.clear_buffers.B != 0, regs.clear_buffers.A != 0);
|
||||
|
||||
// TODO(Rodrigo): Determine if clamping is used on clears
|
||||
@ -561,15 +526,17 @@ void RasterizerOpenGL::Clear() {
|
||||
state_tracker.NotifyScissor0();
|
||||
glDisablei(GL_SCISSOR_TEST, 0);
|
||||
}
|
||||
|
||||
UNIMPLEMENTED_IF(regs.clear_flags.viewport);
|
||||
|
||||
ConfigureClearFramebuffer(use_color, use_depth || use_stencil);
|
||||
|
||||
if (use_color) {
|
||||
glClearBufferfv(GL_COLOR, 0, regs.clear_color);
|
||||
{
|
||||
auto lock = texture_cache.AcquireLock();
|
||||
texture_cache.UpdateRenderTargets(true);
|
||||
state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle());
|
||||
}
|
||||
|
||||
if (use_color) {
|
||||
glClearBufferfv(GL_COLOR, regs.clear_buffers.RT, regs.clear_color);
|
||||
}
|
||||
if (use_depth && use_stencil) {
|
||||
glClearBufferfi(GL_DEPTH_STENCIL, 0, regs.clear_depth, regs.clear_stencil);
|
||||
} else if (use_depth) {
|
||||
@ -626,16 +593,7 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
|
||||
(Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment());
|
||||
|
||||
// Prepare the vertex array.
|
||||
const bool invalidated = buffer_cache.Map(buffer_size);
|
||||
|
||||
if (invalidated) {
|
||||
// When the stream buffer has been invalidated, we have to consider vertex buffers as dirty
|
||||
auto& dirty = maxwell3d.dirty.flags;
|
||||
dirty[Dirty::VertexBuffers] = true;
|
||||
for (int index = Dirty::VertexBuffer0; index <= Dirty::VertexBuffer31; ++index) {
|
||||
dirty[index] = true;
|
||||
}
|
||||
}
|
||||
buffer_cache.Map(buffer_size);
|
||||
|
||||
// Prepare vertex array format.
|
||||
SetupVertexFormat();
|
||||
@ -659,22 +617,16 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
|
||||
}
|
||||
|
||||
// Setup shaders and their used resources.
|
||||
texture_cache.GuardSamplers(true);
|
||||
const GLenum primitive_mode = MaxwellToGL::PrimitiveTopology(maxwell3d.regs.draw.topology);
|
||||
SetupShaders(primitive_mode);
|
||||
texture_cache.GuardSamplers(false);
|
||||
|
||||
ConfigureFramebuffers();
|
||||
auto lock = texture_cache.AcquireLock();
|
||||
SetupShaders();
|
||||
|
||||
// Signal the buffer cache that we are not going to upload more things.
|
||||
buffer_cache.Unmap();
|
||||
|
||||
texture_cache.UpdateRenderTargets(false);
|
||||
state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle());
|
||||
program_manager.BindGraphicsPipeline();
|
||||
|
||||
if (texture_cache.TextureBarrier()) {
|
||||
glTextureBarrier();
|
||||
}
|
||||
|
||||
const GLenum primitive_mode = MaxwellToGL::PrimitiveTopology(maxwell3d.regs.draw.topology);
|
||||
BeginTransformFeedback(primitive_mode);
|
||||
|
||||
const GLuint base_instance = static_cast<GLuint>(maxwell3d.regs.vb_base_instance);
|
||||
@ -726,15 +678,13 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
|
||||
buffer_cache.Acquire();
|
||||
current_cbuf = 0;
|
||||
|
||||
auto kernel = shader_cache.GetComputeKernel(code_addr);
|
||||
program_manager.BindCompute(kernel->GetHandle());
|
||||
Shader* const kernel = shader_cache.GetComputeKernel(code_addr);
|
||||
|
||||
SetupComputeTextures(kernel);
|
||||
SetupComputeImages(kernel);
|
||||
auto lock = texture_cache.AcquireLock();
|
||||
BindComputeTextures(kernel);
|
||||
|
||||
const std::size_t buffer_size =
|
||||
Tegra::Engines::KeplerCompute::NumConstBuffers *
|
||||
(Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment());
|
||||
const size_t buffer_size = Tegra::Engines::KeplerCompute::NumConstBuffers *
|
||||
(Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment());
|
||||
buffer_cache.Map(buffer_size);
|
||||
|
||||
SetupComputeConstBuffers(kernel);
|
||||
@ -743,7 +693,6 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
|
||||
buffer_cache.Unmap();
|
||||
|
||||
const auto& launch_desc = kepler_compute.launch_description;
|
||||
program_manager.BindCompute(kernel->GetHandle());
|
||||
glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z);
|
||||
++num_queued_commands;
|
||||
}
|
||||
@ -764,7 +713,10 @@ void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) {
|
||||
if (addr == 0 || size == 0) {
|
||||
return;
|
||||
}
|
||||
texture_cache.FlushRegion(addr, size);
|
||||
{
|
||||
auto lock = texture_cache.AcquireLock();
|
||||
texture_cache.DownloadMemory(addr, size);
|
||||
}
|
||||
buffer_cache.FlushRegion(addr, size);
|
||||
query_cache.FlushRegion(addr, size);
|
||||
}
|
||||
@ -773,7 +725,8 @@ bool RasterizerOpenGL::MustFlushRegion(VAddr addr, u64 size) {
|
||||
if (!Settings::IsGPULevelHigh()) {
|
||||
return buffer_cache.MustFlushRegion(addr, size);
|
||||
}
|
||||
return texture_cache.MustFlushRegion(addr, size) || buffer_cache.MustFlushRegion(addr, size);
|
||||
return texture_cache.IsRegionGpuModified(addr, size) ||
|
||||
buffer_cache.MustFlushRegion(addr, size);
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
|
||||
@ -781,7 +734,10 @@ void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
|
||||
if (addr == 0 || size == 0) {
|
||||
return;
|
||||
}
|
||||
texture_cache.InvalidateRegion(addr, size);
|
||||
{
|
||||
auto lock = texture_cache.AcquireLock();
|
||||
texture_cache.WriteMemory(addr, size);
|
||||
}
|
||||
shader_cache.InvalidateRegion(addr, size);
|
||||
buffer_cache.InvalidateRegion(addr, size);
|
||||
query_cache.InvalidateRegion(addr, size);
|
||||
@ -792,18 +748,29 @@ void RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) {
|
||||
if (addr == 0 || size == 0) {
|
||||
return;
|
||||
}
|
||||
texture_cache.OnCPUWrite(addr, size);
|
||||
{
|
||||
auto lock = texture_cache.AcquireLock();
|
||||
texture_cache.WriteMemory(addr, size);
|
||||
}
|
||||
shader_cache.OnCPUWrite(addr, size);
|
||||
buffer_cache.OnCPUWrite(addr, size);
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SyncGuestHost() {
|
||||
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
|
||||
texture_cache.SyncGuestHost();
|
||||
buffer_cache.SyncGuestHost();
|
||||
shader_cache.SyncGuestHost();
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::UnmapMemory(VAddr addr, u64 size) {
|
||||
{
|
||||
auto lock = texture_cache.AcquireLock();
|
||||
texture_cache.UnmapMemory(addr, size);
|
||||
}
|
||||
buffer_cache.OnCPUWrite(addr, size);
|
||||
shader_cache.OnCPUWrite(addr, size);
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SignalSemaphore(GPUVAddr addr, u32 value) {
|
||||
if (!gpu.IsAsync()) {
|
||||
gpu_memory.Write<u32>(addr, value);
|
||||
@ -845,6 +812,14 @@ void RasterizerOpenGL::WaitForIdle() {
|
||||
GL_SHADER_STORAGE_BARRIER_BIT | GL_QUERY_BUFFER_BARRIER_BIT);
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::FragmentBarrier() {
|
||||
glMemoryBarrier(GL_FRAMEBUFFER_BARRIER_BIT);
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::TiledCacheBarrier() {
|
||||
glTextureBarrier();
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::FlushCommands() {
|
||||
// Only flush when we have commands queued to OpenGL.
|
||||
if (num_queued_commands == 0) {
|
||||
@ -858,53 +833,103 @@ void RasterizerOpenGL::TickFrame() {
|
||||
// Ticking a frame means that buffers will be swapped, calling glFlush implicitly.
|
||||
num_queued_commands = 0;
|
||||
|
||||
fence_manager.TickFrame();
|
||||
buffer_cache.TickFrame();
|
||||
{
|
||||
auto lock = texture_cache.AcquireLock();
|
||||
texture_cache.TickFrame();
|
||||
}
|
||||
}
|
||||
|
||||
bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
|
||||
const Tegra::Engines::Fermi2D::Regs::Surface& dst,
|
||||
bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src,
|
||||
const Tegra::Engines::Fermi2D::Surface& dst,
|
||||
const Tegra::Engines::Fermi2D::Config& copy_config) {
|
||||
MICROPROFILE_SCOPE(OpenGL_Blits);
|
||||
texture_cache.DoFermiCopy(src, dst, copy_config);
|
||||
auto lock = texture_cache.AcquireLock();
|
||||
texture_cache.BlitImage(dst, src, copy_config);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
|
||||
VAddr framebuffer_addr, u32 pixel_stride) {
|
||||
if (!framebuffer_addr) {
|
||||
return {};
|
||||
if (framebuffer_addr == 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
|
||||
|
||||
const auto surface{texture_cache.TryFindFramebufferSurface(framebuffer_addr)};
|
||||
if (!surface) {
|
||||
return {};
|
||||
auto lock = texture_cache.AcquireLock();
|
||||
ImageView* const image_view{texture_cache.TryFindFramebufferImageView(framebuffer_addr)};
|
||||
if (!image_view) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Verify that the cached surface is the same size and format as the requested framebuffer
|
||||
const auto& params{surface->GetSurfaceParams()};
|
||||
const auto& pixel_format{
|
||||
VideoCore::Surface::PixelFormatFromGPUPixelFormat(config.pixel_format)};
|
||||
ASSERT_MSG(params.width == config.width, "Framebuffer width is different");
|
||||
ASSERT_MSG(params.height == config.height, "Framebuffer height is different");
|
||||
|
||||
if (params.pixel_format != pixel_format) {
|
||||
LOG_DEBUG(Render_OpenGL, "Framebuffer pixel_format is different");
|
||||
}
|
||||
|
||||
screen_info.display_texture = surface->GetTexture();
|
||||
screen_info.display_srgb = surface->GetSurfaceParams().srgb_conversion;
|
||||
// ASSERT_MSG(image_view->size.width == config.width, "Framebuffer width is different");
|
||||
// ASSERT_MSG(image_view->size.height == config.height, "Framebuffer height is different");
|
||||
|
||||
screen_info.display_texture = image_view->Handle(ImageViewType::e2D);
|
||||
screen_info.display_srgb = VideoCore::Surface::IsPixelFormatSRGB(image_view->format);
|
||||
return true;
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, Shader* shader) {
|
||||
static constexpr std::array PARAMETER_LUT = {
|
||||
GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV,
|
||||
GL_TESS_EVALUATION_PROGRAM_PARAMETER_BUFFER_NV, GL_GEOMETRY_PROGRAM_PARAMETER_BUFFER_NV,
|
||||
GL_FRAGMENT_PROGRAM_PARAMETER_BUFFER_NV};
|
||||
void RasterizerOpenGL::BindComputeTextures(Shader* kernel) {
|
||||
image_view_indices.clear();
|
||||
sampler_handles.clear();
|
||||
|
||||
texture_cache.SynchronizeComputeDescriptors();
|
||||
|
||||
SetupComputeTextures(kernel);
|
||||
SetupComputeImages(kernel);
|
||||
|
||||
const std::span indices_span(image_view_indices.data(), image_view_indices.size());
|
||||
texture_cache.FillComputeImageViews(indices_span, image_view_ids);
|
||||
|
||||
program_manager.BindCompute(kernel->GetHandle());
|
||||
size_t image_view_index = 0;
|
||||
size_t texture_index = 0;
|
||||
size_t image_index = 0;
|
||||
BindTextures(kernel->GetEntries(), 0, 0, image_view_index, texture_index, image_index);
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::BindTextures(const ShaderEntries& entries, GLuint base_texture,
|
||||
GLuint base_image, size_t& image_view_index,
|
||||
size_t& texture_index, size_t& image_index) {
|
||||
const GLuint* const samplers = sampler_handles.data() + texture_index;
|
||||
const GLuint* const textures = texture_handles.data() + texture_index;
|
||||
const GLuint* const images = image_handles.data() + image_index;
|
||||
|
||||
const size_t num_samplers = entries.samplers.size();
|
||||
for (const auto& sampler : entries.samplers) {
|
||||
for (size_t i = 0; i < sampler.size; ++i) {
|
||||
const ImageViewId image_view_id = image_view_ids[image_view_index++];
|
||||
const ImageView& image_view = texture_cache.GetImageView(image_view_id);
|
||||
const GLuint handle = image_view.Handle(ImageViewTypeFromEntry(sampler));
|
||||
texture_handles[texture_index++] = handle;
|
||||
}
|
||||
}
|
||||
const size_t num_images = entries.images.size();
|
||||
for (size_t unit = 0; unit < num_images; ++unit) {
|
||||
// TODO: Mark as modified
|
||||
const ImageViewId image_view_id = image_view_ids[image_view_index++];
|
||||
const ImageView& image_view = texture_cache.GetImageView(image_view_id);
|
||||
const GLuint handle = image_view.Handle(ImageViewTypeFromEntry(entries.images[unit]));
|
||||
image_handles[image_index] = handle;
|
||||
++image_index;
|
||||
}
|
||||
if (num_samplers > 0) {
|
||||
glBindSamplers(base_texture, static_cast<GLsizei>(num_samplers), samplers);
|
||||
glBindTextures(base_texture, static_cast<GLsizei>(num_samplers), textures);
|
||||
}
|
||||
if (num_images > 0) {
|
||||
glBindImageTextures(base_image, static_cast<GLsizei>(num_images), images);
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, Shader* shader) {
|
||||
static constexpr std::array PARAMETER_LUT{
|
||||
GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV,
|
||||
GL_TESS_EVALUATION_PROGRAM_PARAMETER_BUFFER_NV, GL_GEOMETRY_PROGRAM_PARAMETER_BUFFER_NV,
|
||||
GL_FRAGMENT_PROGRAM_PARAMETER_BUFFER_NV,
|
||||
};
|
||||
MICROPROFILE_SCOPE(OpenGL_UBO);
|
||||
const auto& stages = maxwell3d.state.shader_stages;
|
||||
const auto& shader_stage = stages[stage_index];
|
||||
@ -1003,12 +1028,11 @@ void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, Shader* sh
|
||||
GL_VERTEX_PROGRAM_NV, GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV,
|
||||
GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV,
|
||||
};
|
||||
|
||||
const auto& cbufs{maxwell3d.state.shader_stages[stage_index]};
|
||||
const auto& entries{shader->GetEntries().global_memory_entries};
|
||||
|
||||
std::array<GLuint64EXT, 32> pointers;
|
||||
ASSERT(entries.size() < pointers.size());
|
||||
std::array<BindlessSSBO, 32> ssbos;
|
||||
ASSERT(entries.size() < ssbos.size());
|
||||
|
||||
const bool assembly_shaders = device.UseAssemblyShaders();
|
||||
u32 binding = assembly_shaders ? 0 : device.GetBaseBindings(stage_index).shader_storage_buffer;
|
||||
@ -1016,11 +1040,11 @@ void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, Shader* sh
|
||||
const GPUVAddr addr{cbufs.const_buffers[entry.cbuf_index].address + entry.cbuf_offset};
|
||||
const GPUVAddr gpu_addr{gpu_memory.Read<u64>(addr)};
|
||||
const u32 size{gpu_memory.Read<u32>(addr + 8)};
|
||||
SetupGlobalMemory(binding, entry, gpu_addr, size, &pointers[binding]);
|
||||
SetupGlobalMemory(binding, entry, gpu_addr, size, &ssbos[binding]);
|
||||
++binding;
|
||||
}
|
||||
if (assembly_shaders) {
|
||||
UpdateBindlessPointers(TARGET_LUT[stage_index], pointers.data(), entries.size());
|
||||
UpdateBindlessSSBOs(TARGET_LUT[stage_index], ssbos.data(), entries.size());
|
||||
}
|
||||
}
|
||||
|
||||
@ -1028,106 +1052,85 @@ void RasterizerOpenGL::SetupComputeGlobalMemory(Shader* kernel) {
|
||||
const auto& cbufs{kepler_compute.launch_description.const_buffer_config};
|
||||
const auto& entries{kernel->GetEntries().global_memory_entries};
|
||||
|
||||
std::array<GLuint64EXT, 32> pointers;
|
||||
ASSERT(entries.size() < pointers.size());
|
||||
std::array<BindlessSSBO, 32> ssbos;
|
||||
ASSERT(entries.size() < ssbos.size());
|
||||
|
||||
u32 binding = 0;
|
||||
for (const auto& entry : entries) {
|
||||
const GPUVAddr addr{cbufs[entry.cbuf_index].Address() + entry.cbuf_offset};
|
||||
const GPUVAddr gpu_addr{gpu_memory.Read<u64>(addr)};
|
||||
const u32 size{gpu_memory.Read<u32>(addr + 8)};
|
||||
SetupGlobalMemory(binding, entry, gpu_addr, size, &pointers[binding]);
|
||||
SetupGlobalMemory(binding, entry, gpu_addr, size, &ssbos[binding]);
|
||||
++binding;
|
||||
}
|
||||
if (device.UseAssemblyShaders()) {
|
||||
UpdateBindlessPointers(GL_COMPUTE_PROGRAM_NV, pointers.data(), entries.size());
|
||||
UpdateBindlessSSBOs(GL_COMPUTE_PROGRAM_NV, ssbos.data(), ssbos.size());
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry,
|
||||
GPUVAddr gpu_addr, std::size_t size,
|
||||
GLuint64EXT* pointer) {
|
||||
const std::size_t alignment{device.GetShaderStorageBufferAlignment()};
|
||||
GPUVAddr gpu_addr, size_t size, BindlessSSBO* ssbo) {
|
||||
const size_t alignment{device.GetShaderStorageBufferAlignment()};
|
||||
const auto info = buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.is_written);
|
||||
if (device.UseAssemblyShaders()) {
|
||||
*pointer = info.address + info.offset;
|
||||
*ssbo = BindlessSSBO{
|
||||
.address = static_cast<GLuint64EXT>(info.address + info.offset),
|
||||
.length = static_cast<GLsizei>(size),
|
||||
.padding = 0,
|
||||
};
|
||||
} else {
|
||||
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, info.handle, info.offset,
|
||||
static_cast<GLsizeiptr>(size));
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, Shader* shader) {
|
||||
MICROPROFILE_SCOPE(OpenGL_Texture);
|
||||
u32 binding = device.GetBaseBindings(stage_index).sampler;
|
||||
void RasterizerOpenGL::SetupDrawTextures(const Shader* shader, size_t stage_index) {
|
||||
const bool via_header_index =
|
||||
maxwell3d.regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
|
||||
for (const auto& entry : shader->GetEntries().samplers) {
|
||||
const auto shader_type = static_cast<ShaderType>(stage_index);
|
||||
for (std::size_t i = 0; i < entry.size; ++i) {
|
||||
const auto texture = GetTextureInfo(maxwell3d, entry, shader_type, i);
|
||||
SetupTexture(binding++, texture, entry);
|
||||
for (size_t index = 0; index < entry.size; ++index) {
|
||||
const auto handle =
|
||||
GetTextureInfo(maxwell3d, via_header_index, entry, shader_type, index);
|
||||
const Sampler* const sampler = texture_cache.GetGraphicsSampler(handle.sampler);
|
||||
sampler_handles.push_back(sampler->Handle());
|
||||
image_view_indices.push_back(handle.image);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SetupComputeTextures(Shader* kernel) {
|
||||
MICROPROFILE_SCOPE(OpenGL_Texture);
|
||||
u32 binding = 0;
|
||||
void RasterizerOpenGL::SetupComputeTextures(const Shader* kernel) {
|
||||
const bool via_header_index = kepler_compute.launch_description.linked_tsc;
|
||||
for (const auto& entry : kernel->GetEntries().samplers) {
|
||||
for (std::size_t i = 0; i < entry.size; ++i) {
|
||||
const auto texture = GetTextureInfo(kepler_compute, entry, ShaderType::Compute, i);
|
||||
SetupTexture(binding++, texture, entry);
|
||||
for (size_t i = 0; i < entry.size; ++i) {
|
||||
const auto handle =
|
||||
GetTextureInfo(kepler_compute, via_header_index, entry, ShaderType::Compute, i);
|
||||
const Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler);
|
||||
sampler_handles.push_back(sampler->Handle());
|
||||
image_view_indices.push_back(handle.image);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture,
|
||||
const SamplerEntry& entry) {
|
||||
const auto view = texture_cache.GetTextureSurface(texture.tic, entry);
|
||||
if (!view) {
|
||||
// Can occur when texture addr is null or its memory is unmapped/invalid
|
||||
glBindSampler(binding, 0);
|
||||
glBindTextureUnit(binding, 0);
|
||||
return;
|
||||
}
|
||||
const GLuint handle = view->GetTexture(texture.tic.x_source, texture.tic.y_source,
|
||||
texture.tic.z_source, texture.tic.w_source);
|
||||
glBindTextureUnit(binding, handle);
|
||||
if (!view->GetSurfaceParams().IsBuffer()) {
|
||||
glBindSampler(binding, sampler_cache.GetSampler(texture.tsc));
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, Shader* shader) {
|
||||
u32 binding = device.GetBaseBindings(stage_index).image;
|
||||
void RasterizerOpenGL::SetupDrawImages(const Shader* shader, size_t stage_index) {
|
||||
const bool via_header_index =
|
||||
maxwell3d.regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
|
||||
for (const auto& entry : shader->GetEntries().images) {
|
||||
const auto shader_type = static_cast<ShaderType>(stage_index);
|
||||
const auto tic = GetTextureInfo(maxwell3d, entry, shader_type).tic;
|
||||
SetupImage(binding++, tic, entry);
|
||||
const auto handle = GetTextureInfo(maxwell3d, via_header_index, entry, shader_type);
|
||||
image_view_indices.push_back(handle.image);
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SetupComputeImages(Shader* shader) {
|
||||
u32 binding = 0;
|
||||
void RasterizerOpenGL::SetupComputeImages(const Shader* shader) {
|
||||
const bool via_header_index = kepler_compute.launch_description.linked_tsc;
|
||||
for (const auto& entry : shader->GetEntries().images) {
|
||||
const auto tic = GetTextureInfo(kepler_compute, entry, ShaderType::Compute).tic;
|
||||
SetupImage(binding++, tic, entry);
|
||||
const auto handle =
|
||||
GetTextureInfo(kepler_compute, via_header_index, entry, ShaderType::Compute);
|
||||
image_view_indices.push_back(handle.image);
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SetupImage(u32 binding, const Tegra::Texture::TICEntry& tic,
|
||||
const ImageEntry& entry) {
|
||||
const auto view = texture_cache.GetImageSurface(tic, entry);
|
||||
if (!view) {
|
||||
glBindImageTexture(binding, 0, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R8);
|
||||
return;
|
||||
}
|
||||
if (entry.is_written) {
|
||||
view->MarkAsModified(texture_cache.Tick());
|
||||
}
|
||||
const GLuint handle = view->GetTexture(tic.x_source, tic.y_source, tic.z_source, tic.w_source);
|
||||
glBindImageTexture(binding, handle, 0, GL_TRUE, 0, GL_READ_WRITE, view->GetFormat());
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SyncViewport() {
|
||||
auto& flags = maxwell3d.dirty.flags;
|
||||
const auto& regs = maxwell3d.regs;
|
||||
@ -1157,7 +1160,7 @@ void RasterizerOpenGL::SyncViewport() {
|
||||
flags[Dirty::ClipControl] = false;
|
||||
|
||||
bool flip_y = false;
|
||||
if (regs.viewport_transform[0].scale_y < 0.0) {
|
||||
if (regs.viewport_transform[0].scale_y < 0.0f) {
|
||||
flip_y = !flip_y;
|
||||
}
|
||||
if (regs.screen_y_control.y_negate != 0) {
|
||||
@ -1527,17 +1530,9 @@ void RasterizerOpenGL::SyncPointState() {
|
||||
flags[Dirty::PointSize] = false;
|
||||
|
||||
oglEnable(GL_POINT_SPRITE, maxwell3d.regs.point_sprite_enable);
|
||||
oglEnable(GL_PROGRAM_POINT_SIZE, maxwell3d.regs.vp_point_size.enable);
|
||||
|
||||
if (maxwell3d.regs.vp_point_size.enable) {
|
||||
// By definition of GL_POINT_SIZE, it only matters if GL_PROGRAM_POINT_SIZE is disabled.
|
||||
glEnable(GL_PROGRAM_POINT_SIZE);
|
||||
return;
|
||||
}
|
||||
|
||||
// Limit the point size to 1 since nouveau sometimes sets a point size of 0 (and that's invalid
|
||||
// in OpenGL).
|
||||
glPointSize(std::max(1.0f, maxwell3d.regs.point_size));
|
||||
glDisable(GL_PROGRAM_POINT_SIZE);
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SyncLineState() {
|
||||
@ -1580,10 +1575,6 @@ void RasterizerOpenGL::SyncAlphaTest() {
|
||||
flags[Dirty::AlphaTest] = false;
|
||||
|
||||
const auto& regs = maxwell3d.regs;
|
||||
if (regs.alpha_test_enabled && regs.rt_control.count > 1) {
|
||||
LOG_WARNING(Render_OpenGL, "Alpha testing with more than one render target is not tested");
|
||||
}
|
||||
|
||||
if (regs.alpha_test_enabled) {
|
||||
glEnable(GL_ALPHA_TEST);
|
||||
glAlphaFunc(MaxwellToGL::ComparisonOp(regs.alpha_test_func), regs.alpha_test_ref);
|
||||
|
@ -7,12 +7,13 @@
|
||||
#include <array>
|
||||
#include <atomic>
|
||||
#include <cstddef>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
#include <tuple>
|
||||
#include <utility>
|
||||
|
||||
#include <boost/container/static_vector.hpp>
|
||||
|
||||
#include <glad/glad.h>
|
||||
|
||||
#include "common/common_types.h"
|
||||
@ -23,16 +24,14 @@
|
||||
#include "video_core/renderer_opengl/gl_buffer_cache.h"
|
||||
#include "video_core/renderer_opengl/gl_device.h"
|
||||
#include "video_core/renderer_opengl/gl_fence_manager.h"
|
||||
#include "video_core/renderer_opengl/gl_framebuffer_cache.h"
|
||||
#include "video_core/renderer_opengl/gl_query_cache.h"
|
||||
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
||||
#include "video_core/renderer_opengl/gl_sampler_cache.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_cache.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_manager.h"
|
||||
#include "video_core/renderer_opengl/gl_state_tracker.h"
|
||||
#include "video_core/renderer_opengl/gl_stream_buffer.h"
|
||||
#include "video_core/renderer_opengl/gl_texture_cache.h"
|
||||
#include "video_core/renderer_opengl/utils.h"
|
||||
#include "video_core/shader/async_shaders.h"
|
||||
#include "video_core/textures/texture.h"
|
||||
|
||||
@ -51,14 +50,21 @@ class MemoryManager;
|
||||
namespace OpenGL {
|
||||
|
||||
struct ScreenInfo;
|
||||
struct DrawParameters;
|
||||
struct ShaderEntries;
|
||||
|
||||
struct BindlessSSBO {
|
||||
GLuint64EXT address;
|
||||
GLsizei length;
|
||||
GLsizei padding;
|
||||
};
|
||||
static_assert(sizeof(BindlessSSBO) * CHAR_BIT == 128);
|
||||
|
||||
class RasterizerOpenGL : public VideoCore::RasterizerAccelerated {
|
||||
public:
|
||||
explicit RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window, Tegra::GPU& gpu,
|
||||
Core::Memory::Memory& cpu_memory, const Device& device,
|
||||
ScreenInfo& screen_info, ProgramManager& program_manager,
|
||||
StateTracker& state_tracker);
|
||||
explicit RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
|
||||
Core::Memory::Memory& cpu_memory_, const Device& device_,
|
||||
ScreenInfo& screen_info_, ProgramManager& program_manager_,
|
||||
StateTracker& state_tracker_);
|
||||
~RasterizerOpenGL() override;
|
||||
|
||||
void Draw(bool is_indexed, bool is_instanced) override;
|
||||
@ -72,15 +78,18 @@ public:
|
||||
void InvalidateRegion(VAddr addr, u64 size) override;
|
||||
void OnCPUWrite(VAddr addr, u64 size) override;
|
||||
void SyncGuestHost() override;
|
||||
void UnmapMemory(VAddr addr, u64 size) override;
|
||||
void SignalSemaphore(GPUVAddr addr, u32 value) override;
|
||||
void SignalSyncPoint(u32 value) override;
|
||||
void ReleaseFences() override;
|
||||
void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
|
||||
void WaitForIdle() override;
|
||||
void FragmentBarrier() override;
|
||||
void TiledCacheBarrier() override;
|
||||
void FlushCommands() override;
|
||||
void TickFrame() override;
|
||||
bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
|
||||
const Tegra::Engines::Fermi2D::Regs::Surface& dst,
|
||||
bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src,
|
||||
const Tegra::Engines::Fermi2D::Surface& dst,
|
||||
const Tegra::Engines::Fermi2D::Config& copy_config) override;
|
||||
bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
|
||||
u32 pixel_stride) override;
|
||||
@ -101,11 +110,14 @@ public:
|
||||
}
|
||||
|
||||
private:
|
||||
/// Configures the color and depth framebuffer states.
|
||||
void ConfigureFramebuffers();
|
||||
static constexpr size_t MAX_TEXTURES = 192;
|
||||
static constexpr size_t MAX_IMAGES = 48;
|
||||
static constexpr size_t MAX_IMAGE_VIEWS = MAX_TEXTURES + MAX_IMAGES;
|
||||
|
||||
/// Configures the color and depth framebuffer for clearing.
|
||||
void ConfigureClearFramebuffer(bool using_color, bool using_depth_stencil);
|
||||
void BindComputeTextures(Shader* kernel);
|
||||
|
||||
void BindTextures(const ShaderEntries& entries, GLuint base_texture, GLuint base_image,
|
||||
size_t& image_view_index, size_t& texture_index, size_t& image_index);
|
||||
|
||||
/// Configures the current constbuffers to use for the draw command.
|
||||
void SetupDrawConstBuffers(std::size_t stage_index, Shader* shader);
|
||||
@ -126,26 +138,19 @@ private:
|
||||
|
||||
/// Configures a global memory buffer.
|
||||
void SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, GPUVAddr gpu_addr,
|
||||
std::size_t size, GLuint64EXT* pointer);
|
||||
size_t size, BindlessSSBO* ssbo);
|
||||
|
||||
/// Configures the current textures to use for the draw command.
|
||||
void SetupDrawTextures(std::size_t stage_index, Shader* shader);
|
||||
void SetupDrawTextures(const Shader* shader, size_t stage_index);
|
||||
|
||||
/// Configures the textures used in a compute shader.
|
||||
void SetupComputeTextures(Shader* kernel);
|
||||
|
||||
/// Configures a texture.
|
||||
void SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture,
|
||||
const SamplerEntry& entry);
|
||||
void SetupComputeTextures(const Shader* kernel);
|
||||
|
||||
/// Configures images in a graphics shader.
|
||||
void SetupDrawImages(std::size_t stage_index, Shader* shader);
|
||||
void SetupDrawImages(const Shader* shader, size_t stage_index);
|
||||
|
||||
/// Configures images in a compute shader.
|
||||
void SetupComputeImages(Shader* shader);
|
||||
|
||||
/// Configures an image.
|
||||
void SetupImage(u32 binding, const Tegra::Texture::TICEntry& tic, const ImageEntry& entry);
|
||||
void SetupComputeImages(const Shader* shader);
|
||||
|
||||
/// Syncs the viewport and depth range to match the guest state
|
||||
void SyncViewport();
|
||||
@ -220,9 +225,6 @@ private:
|
||||
/// End a transform feedback
|
||||
void EndTransformFeedback();
|
||||
|
||||
/// Check for extension that are not strictly required but are needed for correct emulation
|
||||
void CheckExtensions();
|
||||
|
||||
std::size_t CalculateVertexArraysSize() const;
|
||||
|
||||
std::size_t CalculateIndexBufferSize() const;
|
||||
@ -235,7 +237,7 @@ private:
|
||||
|
||||
GLintptr SetupIndexBuffer();
|
||||
|
||||
void SetupShaders(GLenum primitive_mode);
|
||||
void SetupShaders();
|
||||
|
||||
Tegra::GPU& gpu;
|
||||
Tegra::Engines::Maxwell3D& maxwell3d;
|
||||
@ -247,19 +249,21 @@ private:
|
||||
ProgramManager& program_manager;
|
||||
StateTracker& state_tracker;
|
||||
|
||||
TextureCacheOpenGL texture_cache;
|
||||
OGLStreamBuffer stream_buffer;
|
||||
TextureCacheRuntime texture_cache_runtime;
|
||||
TextureCache texture_cache;
|
||||
ShaderCacheOpenGL shader_cache;
|
||||
SamplerCacheOpenGL sampler_cache;
|
||||
FramebufferCacheOpenGL framebuffer_cache;
|
||||
QueryCache query_cache;
|
||||
OGLBufferCache buffer_cache;
|
||||
FenceManagerOpenGL fence_manager;
|
||||
|
||||
VideoCommon::Shader::AsyncShaders async_shaders;
|
||||
|
||||
static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024;
|
||||
|
||||
GLint vertex_binding = 0;
|
||||
boost::container::static_vector<u32, MAX_IMAGE_VIEWS> image_view_indices;
|
||||
std::array<ImageViewId, MAX_IMAGE_VIEWS> image_view_ids;
|
||||
boost::container::static_vector<GLuint, MAX_TEXTURES> sampler_handles;
|
||||
std::array<GLuint, MAX_TEXTURES> texture_handles;
|
||||
std::array<GLuint, MAX_IMAGES> image_handles;
|
||||
|
||||
std::array<OGLBuffer, Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers>
|
||||
transform_feedback_buffers;
|
||||
@ -273,7 +277,7 @@ private:
|
||||
std::size_t current_cbuf = 0;
|
||||
OGLBuffer unified_uniform_buffer;
|
||||
|
||||
/// Number of commands queued to the OpenGL driver. Reseted on flush.
|
||||
/// Number of commands queued to the OpenGL driver. Resetted on flush.
|
||||
std::size_t num_queued_commands = 0;
|
||||
|
||||
u32 last_clip_distance_mask = 0;
|
||||
|
@ -71,7 +71,7 @@ void OGLSampler::Create() {
|
||||
return;
|
||||
|
||||
MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
|
||||
glGenSamplers(1, &handle);
|
||||
glCreateSamplers(1, &handle);
|
||||
}
|
||||
|
||||
void OGLSampler::Release() {
|
||||
|
@ -1,52 +0,0 @@
|
||||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/logging/log.h"
|
||||
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
||||
#include "video_core/renderer_opengl/gl_sampler_cache.h"
|
||||
#include "video_core/renderer_opengl/maxwell_to_gl.h"
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
SamplerCacheOpenGL::SamplerCacheOpenGL() = default;
|
||||
|
||||
SamplerCacheOpenGL::~SamplerCacheOpenGL() = default;
|
||||
|
||||
OGLSampler SamplerCacheOpenGL::CreateSampler(const Tegra::Texture::TSCEntry& tsc) const {
|
||||
OGLSampler sampler;
|
||||
sampler.Create();
|
||||
|
||||
const GLuint sampler_id{sampler.handle};
|
||||
glSamplerParameteri(
|
||||
sampler_id, GL_TEXTURE_MAG_FILTER,
|
||||
MaxwellToGL::TextureFilterMode(tsc.mag_filter, Tegra::Texture::TextureMipmapFilter::None));
|
||||
glSamplerParameteri(sampler_id, GL_TEXTURE_MIN_FILTER,
|
||||
MaxwellToGL::TextureFilterMode(tsc.min_filter, tsc.mipmap_filter));
|
||||
glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_S, MaxwellToGL::WrapMode(tsc.wrap_u));
|
||||
glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_T, MaxwellToGL::WrapMode(tsc.wrap_v));
|
||||
glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_R, MaxwellToGL::WrapMode(tsc.wrap_p));
|
||||
glSamplerParameteri(sampler_id, GL_TEXTURE_COMPARE_MODE,
|
||||
tsc.depth_compare_enabled == 1 ? GL_COMPARE_REF_TO_TEXTURE : GL_NONE);
|
||||
glSamplerParameteri(sampler_id, GL_TEXTURE_COMPARE_FUNC,
|
||||
MaxwellToGL::DepthCompareFunc(tsc.depth_compare_func));
|
||||
glSamplerParameterfv(sampler_id, GL_TEXTURE_BORDER_COLOR, tsc.GetBorderColor().data());
|
||||
glSamplerParameterf(sampler_id, GL_TEXTURE_MIN_LOD, tsc.GetMinLod());
|
||||
glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_LOD, tsc.GetMaxLod());
|
||||
glSamplerParameterf(sampler_id, GL_TEXTURE_LOD_BIAS, tsc.GetLodBias());
|
||||
if (GLAD_GL_ARB_texture_filter_anisotropic) {
|
||||
glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_ANISOTROPY, tsc.GetMaxAnisotropy());
|
||||
} else if (GLAD_GL_EXT_texture_filter_anisotropic) {
|
||||
glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_ANISOTROPY_EXT, tsc.GetMaxAnisotropy());
|
||||
} else {
|
||||
LOG_WARNING(Render_OpenGL, "Anisotropy not supported by host GPU driver");
|
||||
}
|
||||
|
||||
return sampler;
|
||||
}
|
||||
|
||||
GLuint SamplerCacheOpenGL::ToSamplerType(const OGLSampler& sampler) const {
|
||||
return sampler.handle;
|
||||
}
|
||||
|
||||
} // namespace OpenGL
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user