refactor: implement base x64 fastfile loading for iw4

This commit is contained in:
Jan
2025-05-03 17:31:17 +01:00
parent 78d8fba6f8
commit 03ccede91c
43 changed files with 1025 additions and 331 deletions

View File

@ -1,6 +1,7 @@
#include "ZoneInputStream.h"
#include "Loading/Exception/BlockOverflowException.h"
#include "Loading/Exception/InvalidAliasLookupException.h"
#include "Loading/Exception/InvalidOffsetBlockException.h"
#include "Loading/Exception/InvalidOffsetBlockOffsetException.h"
#include "Loading/Exception/OutOfBlockBoundsException.h"
@ -10,17 +11,33 @@
#include <cstring>
#include <stack>
ZoneStreamFillReadAccessor::ZoneStreamFillReadAccessor(const void* buffer, const size_t bufferSize, const unsigned pointerByteCount)
: m_buffer(buffer),
ZoneStreamFillReadAccessor::ZoneStreamFillReadAccessor(const void* dataBuffer, void* blockBuffer, const size_t bufferSize, const unsigned pointerByteCount)
: m_data_buffer(dataBuffer),
m_block_buffer(blockBuffer),
m_buffer_size(bufferSize),
m_pointer_byte_count(pointerByteCount)
{
// Otherwise we cannot insert alias
assert(m_pointer_byte_count <= sizeof(uintptr_t));
}
ZoneStreamFillReadAccessor ZoneStreamFillReadAccessor::AtOffset(const size_t offset) const
{
assert(offset < m_buffer_size);
return ZoneStreamFillReadAccessor(static_cast<const char*>(m_buffer) + offset, m_buffer_size - offset, m_pointer_byte_count);
return ZoneStreamFillReadAccessor(
static_cast<const char*>(m_data_buffer) + offset, static_cast<char*>(m_block_buffer) + offset, m_buffer_size - offset, m_pointer_byte_count);
}
void ZoneStreamFillReadAccessor::InsertPointerRedirect(const uintptr_t aliasValue, const size_t offset) const
{
// Memory should be zero by default
if (aliasValue == 0)
return;
assert(offset < m_buffer_size);
assert(m_block_buffer);
std::memcpy(static_cast<char*>(m_block_buffer) + offset, &aliasValue, m_pointer_byte_count);
}
namespace
@ -28,12 +45,19 @@ namespace
class XBlockInputStream final : public ZoneInputStream
{
public:
XBlockInputStream(
const unsigned pointerBitCount, const unsigned blockBitCount, std::vector<XBlock*>& blocks, const block_t insertBlock, ILoadingStream& stream)
XBlockInputStream(const unsigned pointerBitCount,
const unsigned blockBitCount,
std::vector<XBlock*>& blocks,
const block_t insertBlock,
ILoadingStream& stream,
MemoryManager& memory)
: m_blocks(blocks),
m_stream(stream),
m_memory(memory),
m_pointer_byte_count(pointerBitCount / 8u),
m_block_bit_count(blockBitCount)
m_block_mask((std::numeric_limits<uintptr_t>::max() >> (sizeof(uintptr_t) * 8 - blockBitCount)) << (pointerBitCount - blockBitCount)),
m_block_shift(pointerBitCount - blockBitCount),
m_offset_mask(std::numeric_limits<uintptr_t>::max() >> (sizeof(uintptr_t) * 8 - (pointerBitCount - blockBitCount)))
{
assert(pointerBitCount % 8u == 0u);
assert(insertBlock < static_cast<block_t>(blocks.size()));
@ -45,6 +69,11 @@ namespace
m_insert_block = blocks[insertBlock];
}
[[nodiscard]] unsigned GetPointerBitCount() const override
{
return m_pointer_byte_count * 8u;
}
void PushBlock(const block_t block) override
{
assert(block < static_cast<block_t>(m_blocks.size()));
@ -54,7 +83,7 @@ namespace
m_block_stack.push(newBlock);
if (newBlock->m_type == XBlock::Type::BLOCK_TYPE_TEMP)
if (newBlock->m_type == XBlockType::BLOCK_TYPE_TEMP)
m_temp_offsets.push(m_block_offsets[newBlock->m_index]);
}
@ -70,7 +99,7 @@ namespace
m_block_stack.pop();
// If the temp block is not used anymore right now, reset it to the buffer start since as the name suggests, the data inside is temporary.
if (poppedBlock->m_type == XBlock::Type::BLOCK_TYPE_TEMP)
if (poppedBlock->m_type == XBlockType::BLOCK_TYPE_TEMP)
{
m_block_offsets[poppedBlock->m_index] = m_temp_offsets.top();
m_temp_offsets.pop();
@ -96,6 +125,23 @@ namespace
return &block->m_buffer[m_block_offsets[block->m_index]];
}
void* AllocOutOfBlock(const unsigned align, const size_t size) override
{
assert(!m_block_stack.empty());
if (m_block_stack.empty())
return nullptr;
auto* block = m_block_stack.top();
Align(align);
if (m_block_offsets[block->m_index] > block->m_buffer_size)
throw BlockOverflowException(block);
return m_memory.AllocRaw(size);
}
void LoadDataRaw(void* dst, const size_t size) override
{
m_stream.Load(dst, size);
@ -108,10 +154,10 @@ namespace
{
auto* block = m_block_stack.top();
if (block->m_buffer > dst || block->m_buffer + block->m_buffer_size < dst)
if (block->m_buffer.get() > dst || block->m_buffer.get() + block->m_buffer_size < dst)
throw OutOfBlockBoundsException(block);
if (static_cast<uint8_t*>(dst) + size > block->m_buffer + block->m_buffer_size)
if (static_cast<uint8_t*>(dst) + size > block->m_buffer.get() + block->m_buffer_size)
throw BlockOverflowException(block);
// Theoretically ptr should always be at the current block offset.
@ -119,16 +165,16 @@ namespace
switch (block->m_type)
{
case XBlock::Type::BLOCK_TYPE_TEMP:
case XBlock::Type::BLOCK_TYPE_NORMAL:
case XBlockType::BLOCK_TYPE_TEMP:
case XBlockType::BLOCK_TYPE_NORMAL:
m_stream.Load(dst, size);
break;
case XBlock::Type::BLOCK_TYPE_RUNTIME:
case XBlockType::BLOCK_TYPE_RUNTIME:
std::memset(dst, 0, size);
break;
case XBlock::Type::BLOCK_TYPE_DELAY:
case XBlockType::BLOCK_TYPE_DELAY:
assert(false);
break;
}
@ -150,14 +196,14 @@ namespace
auto* block = m_block_stack.top();
if (block->m_buffer > dst || block->m_buffer + block->m_buffer_size < dst)
if (block->m_buffer.get() > dst || block->m_buffer.get() + block->m_buffer_size < dst)
throw OutOfBlockBoundsException(block);
// Theoretically ptr should always be at the current block offset.
assert(dst == &block->m_buffer[m_block_offsets[block->m_index]]);
uint8_t byte;
auto offset = static_cast<size_t>(static_cast<uint8_t*>(dst) - block->m_buffer);
auto offset = static_cast<size_t>(static_cast<uint8_t*>(dst) - block->m_buffer.get());
do
{
if (offset >= block->m_buffer_size)
@ -179,33 +225,34 @@ namespace
if (!m_block_stack.empty())
{
const auto* block = m_block_stack.top();
auto* blockBufferForFill = &block->m_buffer[m_block_offsets[block->m_index]];
switch (block->m_type)
{
case XBlock::Type::BLOCK_TYPE_TEMP:
case XBlock::Type::BLOCK_TYPE_NORMAL:
case XBlockType::BLOCK_TYPE_TEMP:
case XBlockType::BLOCK_TYPE_NORMAL:
m_stream.Load(dst, size);
break;
case XBlock::Type::BLOCK_TYPE_RUNTIME:
case XBlockType::BLOCK_TYPE_RUNTIME:
std::memset(dst, 0, size);
break;
case XBlock::Type::BLOCK_TYPE_DELAY:
case XBlockType::BLOCK_TYPE_DELAY:
assert(false);
break;
}
IncBlockPos(size);
}
else
{
m_stream.Load(dst, size);
return ZoneStreamFillReadAccessor(dst, blockBufferForFill, size, m_pointer_byte_count);
}
return ZoneStreamFillReadAccessor(dst, size, m_pointer_byte_count);
m_stream.Load(dst, size);
return ZoneStreamFillReadAccessor(dst, nullptr, size, m_pointer_byte_count);
}
void* InsertPointer() override
void* InsertPointerNative() override
{
m_block_stack.push(m_insert_block);
@ -224,14 +271,46 @@ namespace
return ptr;
}
uintptr_t InsertPointerAliasLookup() override
{
m_block_stack.push(m_insert_block);
// Alignment of pointer should always be its size
Align(m_pointer_byte_count);
if (m_block_offsets[m_insert_block->m_index] + m_pointer_byte_count > m_insert_block->m_buffer_size)
throw BlockOverflowException(m_insert_block);
auto* ptr = static_cast<void*>(&m_insert_block->m_buffer[m_block_offsets[m_insert_block->m_index]]);
IncBlockPos(m_pointer_byte_count);
m_block_stack.pop();
const auto newLookupIndex = static_cast<uintptr_t>(m_alias_lookup.size()) + 1;
m_alias_lookup.emplace_back(nullptr);
std::memcpy(ptr, &newLookupIndex, m_pointer_byte_count);
return newLookupIndex;
}
void SetInsertedPointerAliasLookup(const uintptr_t lookupEntry, void* value) override
{
assert(lookupEntry > 0);
assert(lookupEntry <= m_alias_lookup.size());
m_alias_lookup[lookupEntry - 1] = value;
}
void* ConvertOffsetToPointerNative(const void* offset) override
{
// -1 because otherwise Block 0 Offset 0 would be just 0 which is already used to signalize a nullptr.
// So all offsets are moved by 1.
const auto offsetInt = reinterpret_cast<uintptr_t>(offset) - 1u;
const auto blockNum = static_cast<block_t>(offsetInt >> (sizeof(offsetInt) * 8u - m_block_bit_count));
const auto blockOffset = static_cast<size_t>(offsetInt & (UINTPTR_MAX >> m_block_bit_count));
const auto blockNum = static_cast<block_t>((offsetInt & m_block_mask) >> m_block_shift);
const auto blockOffset = static_cast<size_t>(offsetInt & m_offset_mask);
if (blockNum < 0 || blockNum >= static_cast<block_t>(m_blocks.size()))
throw InvalidOffsetBlockException(blockNum);
@ -249,8 +328,8 @@ namespace
// For details see ConvertOffsetToPointer
const auto offsetInt = reinterpret_cast<uintptr_t>(offset) - 1u;
const auto blockNum = static_cast<block_t>(offsetInt >> (sizeof(offsetInt) * 8u - m_block_bit_count));
const auto blockOffset = static_cast<size_t>(offsetInt & (UINTPTR_MAX >> m_block_bit_count));
const auto blockNum = static_cast<block_t>((offsetInt & m_block_mask) >> m_block_shift);
const auto blockOffset = static_cast<size_t>(offsetInt & m_offset_mask);
if (blockNum < 0 || blockNum >= static_cast<block_t>(m_blocks.size()))
throw InvalidOffsetBlockException(blockNum);
@ -263,6 +342,73 @@ namespace
return *reinterpret_cast<void**>(&block->m_buffer[blockOffset]);
}
uintptr_t AllocRedirectEntry(void** alias) override
{
// nullptr is always lookup alias 0
if (*alias == nullptr)
return 0;
const auto newIndex = m_pointer_redirect_lookup.size();
m_pointer_redirect_lookup.emplace_back(alias);
return static_cast<uintptr_t>(newIndex + 1);
}
void* ConvertOffsetToPointerRedirect(const void* offset) override
{
// For details see ConvertOffsetToPointer
const auto offsetInt = reinterpret_cast<uintptr_t>(offset) - 1u;
const auto blockNum = static_cast<block_t>((offsetInt & m_block_mask) >> m_block_shift);
const auto blockOffset = static_cast<size_t>(offsetInt & m_offset_mask);
if (blockNum < 0 || blockNum >= static_cast<block_t>(m_blocks.size()))
throw InvalidOffsetBlockException(blockNum);
auto* block = m_blocks[blockNum];
if (block->m_buffer_size <= blockOffset + sizeof(void*))
throw InvalidOffsetBlockOffsetException(block, blockOffset);
uintptr_t lookupEntry = 0u;
std::memcpy(&lookupEntry, &block->m_buffer[blockOffset], m_pointer_byte_count);
if (lookupEntry == 0)
return nullptr;
if (lookupEntry > m_pointer_redirect_lookup.size())
throw InvalidAliasLookupException(lookupEntry - 1, m_pointer_redirect_lookup.size());
return *m_pointer_redirect_lookup[lookupEntry - 1];
}
void* ConvertOffsetToAliasLookup(const void* offset) override
{
// For details see ConvertOffsetToPointer
const auto offsetInt = reinterpret_cast<uintptr_t>(offset) - 1u;
const auto blockNum = static_cast<block_t>((offsetInt & m_block_mask) >> m_block_shift);
const auto blockOffset = static_cast<size_t>(offsetInt & m_offset_mask);
if (blockNum < 0 || blockNum >= static_cast<block_t>(m_blocks.size()))
throw InvalidOffsetBlockException(blockNum);
auto* block = m_blocks[blockNum];
if (block->m_buffer_size <= blockOffset + sizeof(void*))
throw InvalidOffsetBlockOffsetException(block, blockOffset);
uintptr_t lookupEntry = 0u;
std::memcpy(&lookupEntry, &block->m_buffer[blockOffset], m_pointer_byte_count);
if (lookupEntry == 0)
return nullptr;
if (lookupEntry > m_alias_lookup.size())
throw InvalidAliasLookupException(lookupEntry - 1, m_alias_lookup.size());
return m_alias_lookup[lookupEntry - 1];
}
private:
void IncBlockPos(const size_t size)
{
@ -293,16 +439,26 @@ namespace
std::stack<size_t> m_temp_offsets;
ILoadingStream& m_stream;
MemoryManager& m_memory;
unsigned m_pointer_byte_count;
unsigned m_block_bit_count;
uintptr_t m_block_mask;
unsigned m_block_shift;
uintptr_t m_offset_mask;
XBlock* m_insert_block;
std::vector<uint8_t> m_fill_buffer;
std::vector<void**> m_pointer_redirect_lookup;
std::vector<void*> m_alias_lookup;
};
} // namespace
std::unique_ptr<ZoneInputStream> ZoneInputStream::Create(
const unsigned pointerBitCount, const unsigned blockBitCount, std::vector<XBlock*>& blocks, const block_t insertBlock, ILoadingStream& stream)
std::unique_ptr<ZoneInputStream> ZoneInputStream::Create(const unsigned pointerBitCount,
const unsigned blockBitCount,
std::vector<XBlock*>& blocks,
const block_t insertBlock,
ILoadingStream& stream,
MemoryManager& memory)
{
return std::make_unique<XBlockInputStream>(pointerBitCount, blockBitCount, blocks, insertBlock, stream);
return std::make_unique<XBlockInputStream>(pointerBitCount, blockBitCount, blocks, insertBlock, stream, memory);
}