mirror of
https://github.com/hedge-dev/UnleashedRecomp.git
synced 2025-10-30 07:11:05 +00:00
Use a separate upload buffer allocator for the main thread. (#281)
This commit is contained in:
parent
d3589979e4
commit
96108e1759
2 changed files with 103 additions and 35 deletions
|
|
@ -176,8 +176,8 @@ static RenderViewport g_viewport(0.0f, 0.0f, 1280.0f, 720.0f);
|
||||||
static PipelineState g_pipelineState;
|
static PipelineState g_pipelineState;
|
||||||
static int32_t g_depthBias;
|
static int32_t g_depthBias;
|
||||||
static float g_slopeScaledDepthBias;
|
static float g_slopeScaledDepthBias;
|
||||||
static UploadAllocation g_vertexShaderConstants;
|
static uint32_t g_vertexShaderConstants[0x400];
|
||||||
static UploadAllocation g_pixelShaderConstants;
|
static uint32_t g_pixelShaderConstants[0x380];
|
||||||
static SharedConstants g_sharedConstants;
|
static SharedConstants g_sharedConstants;
|
||||||
static GuestTexture* g_textures[16];
|
static GuestTexture* g_textures[16];
|
||||||
static RenderSamplerDesc g_samplerDescs[16];
|
static RenderSamplerDesc g_samplerDescs[16];
|
||||||
|
|
@ -408,12 +408,9 @@ struct UploadAllocator
|
||||||
std::vector<UploadBuffer> buffers;
|
std::vector<UploadBuffer> buffers;
|
||||||
uint32_t index = 0;
|
uint32_t index = 0;
|
||||||
uint32_t offset = 0;
|
uint32_t offset = 0;
|
||||||
Mutex mutex;
|
|
||||||
|
|
||||||
UploadAllocation allocate(uint32_t size, uint32_t alignment)
|
UploadAllocation allocate(uint32_t size, uint32_t alignment)
|
||||||
{
|
{
|
||||||
std::lock_guard lock(mutex);
|
|
||||||
|
|
||||||
assert(size <= UploadBuffer::SIZE);
|
assert(size <= UploadBuffer::SIZE);
|
||||||
|
|
||||||
offset = (offset + alignment - 1) & ~(alignment - 1);
|
offset = (offset + alignment - 1) & ~(alignment - 1);
|
||||||
|
|
@ -474,6 +471,53 @@ struct UploadAllocator
|
||||||
|
|
||||||
static UploadAllocator g_uploadAllocators[NUM_FRAMES];
|
static UploadAllocator g_uploadAllocators[NUM_FRAMES];
|
||||||
|
|
||||||
|
struct IntermediaryUploadAllocator
|
||||||
|
{
|
||||||
|
static constexpr size_t SIZE = 16 * 1024 * 1024;
|
||||||
|
|
||||||
|
std::vector<std::unique_ptr<uint8_t[]>> buffers;
|
||||||
|
uint32_t index = 0;
|
||||||
|
uint32_t offset = 0;
|
||||||
|
|
||||||
|
uint8_t* allocate(uint32_t size)
|
||||||
|
{
|
||||||
|
assert(size <= SIZE);
|
||||||
|
|
||||||
|
if (offset + size > SIZE)
|
||||||
|
{
|
||||||
|
++index;
|
||||||
|
offset = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (buffers.size() <= index)
|
||||||
|
buffers.resize(index + 1);
|
||||||
|
|
||||||
|
auto& buffer = buffers[index];
|
||||||
|
if (buffer == nullptr)
|
||||||
|
buffer = std::make_unique_for_overwrite<uint8_t[]>(SIZE);
|
||||||
|
|
||||||
|
auto result = buffer.get() + offset;
|
||||||
|
offset += ((size + 0xF) & ~0xF);
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint8_t* allocate(const void* memory, uint32_t size)
|
||||||
|
{
|
||||||
|
auto result = allocate(size);
|
||||||
|
memcpy(result, memory, size);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
void reset()
|
||||||
|
{
|
||||||
|
index = 0;
|
||||||
|
offset = 0;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
static IntermediaryUploadAllocator g_intermediaryUploadAllocator;
|
||||||
|
|
||||||
static std::vector<GuestResource*> g_tempResources[NUM_FRAMES];
|
static std::vector<GuestResource*> g_tempResources[NUM_FRAMES];
|
||||||
static std::vector<std::unique_ptr<RenderBuffer>> g_tempBuffers[NUM_FRAMES];
|
static std::vector<std::unique_ptr<RenderBuffer>> g_tempBuffers[NUM_FRAMES];
|
||||||
|
|
||||||
|
|
@ -821,12 +865,16 @@ struct RenderCommand
|
||||||
|
|
||||||
struct
|
struct
|
||||||
{
|
{
|
||||||
UploadAllocation allocation;
|
uint8_t* memory;
|
||||||
|
uint32_t index;
|
||||||
|
uint32_t size;
|
||||||
} setVertexShaderConstants;
|
} setVertexShaderConstants;
|
||||||
|
|
||||||
struct
|
struct
|
||||||
{
|
{
|
||||||
UploadAllocation allocation;
|
uint8_t* memory;
|
||||||
|
uint32_t index;
|
||||||
|
uint32_t size;
|
||||||
} setPixelShaderConstants;
|
} setPixelShaderConstants;
|
||||||
|
|
||||||
struct
|
struct
|
||||||
|
|
@ -854,7 +902,8 @@ struct RenderCommand
|
||||||
{
|
{
|
||||||
uint32_t primitiveType;
|
uint32_t primitiveType;
|
||||||
uint32_t primitiveCount;
|
uint32_t primitiveCount;
|
||||||
UploadAllocation vertexStreamZeroData;
|
uint8_t* vertexStreamZeroData;
|
||||||
|
uint32_t vertexStreamZeroSize;
|
||||||
uint32_t vertexStreamZeroStride;
|
uint32_t vertexStreamZeroStride;
|
||||||
CsdFilterState csdFilterState;
|
CsdFilterState csdFilterState;
|
||||||
} drawPrimitiveUP;
|
} drawPrimitiveUP;
|
||||||
|
|
@ -1485,9 +1534,6 @@ static void BeginCommandList()
|
||||||
|
|
||||||
g_backBuffer->layout = RenderTextureLayout::UNKNOWN;
|
g_backBuffer->layout = RenderTextureLayout::UNKNOWN;
|
||||||
|
|
||||||
g_vertexShaderConstants = {};
|
|
||||||
g_pixelShaderConstants = {};
|
|
||||||
|
|
||||||
for (size_t i = 0; i < 16; i++)
|
for (size_t i = 0; i < 16; i++)
|
||||||
{
|
{
|
||||||
g_sharedConstants.texture2DIndices[i] = TEXTURE_DESCRIPTOR_NULL_TEXTURE_2D;
|
g_sharedConstants.texture2DIndices[i] = TEXTURE_DESCRIPTOR_NULL_TEXTURE_2D;
|
||||||
|
|
@ -2498,6 +2544,7 @@ void Video::Present()
|
||||||
|
|
||||||
g_dirtyStates = DirtyStates(true);
|
g_dirtyStates = DirtyStates(true);
|
||||||
g_uploadAllocators[g_frame].reset();
|
g_uploadAllocators[g_frame].reset();
|
||||||
|
g_intermediaryUploadAllocator.reset();
|
||||||
g_triangleFanIndexData.reset();
|
g_triangleFanIndexData.reset();
|
||||||
g_quadIndexData.reset();
|
g_quadIndexData.reset();
|
||||||
|
|
||||||
|
|
@ -2532,15 +2579,6 @@ void Video::Present()
|
||||||
g_presentProfiler.Reset();
|
g_presentProfiler.Reset();
|
||||||
}
|
}
|
||||||
|
|
||||||
static void Present(GuestDevice* device)
|
|
||||||
{
|
|
||||||
Video::Present();
|
|
||||||
|
|
||||||
// Invalidate vertex/pixel shader constants.
|
|
||||||
device->dirtyFlags[0] = ~0;
|
|
||||||
device->dirtyFlags[1] = ~0;
|
|
||||||
}
|
|
||||||
|
|
||||||
void Video::StartPipelinePrecompilation()
|
void Video::StartPipelinePrecompilation()
|
||||||
{
|
{
|
||||||
g_shouldPrecompilePipelines = true;
|
g_shouldPrecompilePipelines = true;
|
||||||
|
|
@ -3966,20 +4004,38 @@ static void FlushRenderStateForMainThread(GuestDevice* device, LocalRenderComman
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (device->dirtyFlags[0] != 0)
|
uint64_t dirtyFlags = device->dirtyFlags[0].get();
|
||||||
|
if (dirtyFlags != 0)
|
||||||
{
|
{
|
||||||
|
int startRegister = std::countl_zero(dirtyFlags);
|
||||||
|
int endRegister = 64 - std::countr_zero(dirtyFlags);
|
||||||
|
|
||||||
|
uint32_t index = startRegister * 16;
|
||||||
|
uint32_t size = (endRegister - startRegister) * 64;
|
||||||
|
|
||||||
auto& cmd = queue.enqueue();
|
auto& cmd = queue.enqueue();
|
||||||
cmd.type = RenderCommandType::SetVertexShaderConstants;
|
cmd.type = RenderCommandType::SetVertexShaderConstants;
|
||||||
cmd.setVertexShaderConstants.allocation = g_uploadAllocators[g_frame].allocate<true>(device->vertexShaderFloatConstants, 0x1000, 0x100);
|
cmd.setVertexShaderConstants.memory = g_intermediaryUploadAllocator.allocate(&device->vertexShaderFloatConstants[index], size);
|
||||||
|
cmd.setVertexShaderConstants.index = index;
|
||||||
|
cmd.setVertexShaderConstants.size = size;
|
||||||
|
|
||||||
device->dirtyFlags[0] = 0;
|
device->dirtyFlags[0] = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (device->dirtyFlags[1] != 0)
|
dirtyFlags = device->dirtyFlags[1].get();
|
||||||
|
if (dirtyFlags != 0)
|
||||||
{
|
{
|
||||||
|
int startRegister = std::countl_zero(dirtyFlags);
|
||||||
|
int endRegister = std::min(56, 64 - std::countr_zero(dirtyFlags));
|
||||||
|
|
||||||
|
uint32_t index = startRegister * 16;
|
||||||
|
uint32_t size = (endRegister - startRegister) * 64;
|
||||||
|
|
||||||
auto& cmd = queue.enqueue();
|
auto& cmd = queue.enqueue();
|
||||||
cmd.type = RenderCommandType::SetPixelShaderConstants;
|
cmd.type = RenderCommandType::SetPixelShaderConstants;
|
||||||
cmd.setPixelShaderConstants.allocation = g_uploadAllocators[g_frame].allocate<true>(device->pixelShaderFloatConstants, 0xE00, 0x100);
|
cmd.setPixelShaderConstants.memory = g_intermediaryUploadAllocator.allocate(&device->pixelShaderFloatConstants[index], size);
|
||||||
|
cmd.setPixelShaderConstants.index = index;
|
||||||
|
cmd.setPixelShaderConstants.size = size;
|
||||||
|
|
||||||
device->dirtyFlags[1] = 0;
|
device->dirtyFlags[1] = 0;
|
||||||
}
|
}
|
||||||
|
|
@ -4040,13 +4096,19 @@ static void ProcSetSamplerState(const RenderCommand& cmd)
|
||||||
|
|
||||||
static void ProcSetVertexShaderConstants(const RenderCommand& cmd)
|
static void ProcSetVertexShaderConstants(const RenderCommand& cmd)
|
||||||
{
|
{
|
||||||
g_vertexShaderConstants = cmd.setVertexShaderConstants.allocation;
|
auto& args = cmd.setVertexShaderConstants;
|
||||||
|
assert((args.index * sizeof(uint32_t) + args.size) <= sizeof(g_vertexShaderConstants));
|
||||||
|
|
||||||
|
memcpy(&g_vertexShaderConstants[args.index], args.memory, args.size);
|
||||||
g_dirtyStates.vertexShaderConstants = true;
|
g_dirtyStates.vertexShaderConstants = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ProcSetPixelShaderConstants(const RenderCommand& cmd)
|
static void ProcSetPixelShaderConstants(const RenderCommand& cmd)
|
||||||
{
|
{
|
||||||
g_pixelShaderConstants = cmd.setPixelShaderConstants.allocation;
|
auto& args = cmd.setPixelShaderConstants;
|
||||||
|
assert((args.index * sizeof(uint32_t) + args.size) <= sizeof(g_pixelShaderConstants));
|
||||||
|
|
||||||
|
memcpy(&g_pixelShaderConstants[args.index], args.memory, args.size);
|
||||||
g_dirtyStates.pixelShaderConstants = true;
|
g_dirtyStates.pixelShaderConstants = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -4138,10 +4200,16 @@ static void FlushRenderStateForRenderThread()
|
||||||
commandList->setDepthBias(g_depthBias, 0.0f, g_slopeScaledDepthBias);
|
commandList->setDepthBias(g_depthBias, 0.0f, g_slopeScaledDepthBias);
|
||||||
|
|
||||||
if (g_dirtyStates.vertexShaderConstants)
|
if (g_dirtyStates.vertexShaderConstants)
|
||||||
SetRootDescriptor(g_vertexShaderConstants, 0);
|
{
|
||||||
|
auto vertexShaderConstants = g_uploadAllocators[g_frame].allocate<true>(g_vertexShaderConstants, sizeof(g_vertexShaderConstants), 0x100);
|
||||||
|
SetRootDescriptor(vertexShaderConstants, 0);
|
||||||
|
}
|
||||||
|
|
||||||
if (g_dirtyStates.pixelShaderConstants)
|
if (g_dirtyStates.pixelShaderConstants)
|
||||||
SetRootDescriptor(g_pixelShaderConstants, 1);
|
{
|
||||||
|
auto pixelShaderConstants = g_uploadAllocators[g_frame].allocate<true>(g_pixelShaderConstants, sizeof(g_pixelShaderConstants), 0x100);
|
||||||
|
SetRootDescriptor(pixelShaderConstants, 1);
|
||||||
|
}
|
||||||
|
|
||||||
if (g_dirtyStates.sharedConstants)
|
if (g_dirtyStates.sharedConstants)
|
||||||
{
|
{
|
||||||
|
|
@ -4302,7 +4370,8 @@ static void DrawPrimitiveUP(GuestDevice* device, uint32_t primitiveType, uint32_
|
||||||
cmd.type = RenderCommandType::DrawPrimitiveUP;
|
cmd.type = RenderCommandType::DrawPrimitiveUP;
|
||||||
cmd.drawPrimitiveUP.primitiveType = primitiveType;
|
cmd.drawPrimitiveUP.primitiveType = primitiveType;
|
||||||
cmd.drawPrimitiveUP.primitiveCount = primitiveCount;
|
cmd.drawPrimitiveUP.primitiveCount = primitiveCount;
|
||||||
cmd.drawPrimitiveUP.vertexStreamZeroData = g_uploadAllocators[g_frame].allocate<true>(reinterpret_cast<uint32_t*>(vertexStreamZeroData), primitiveCount * vertexStreamZeroStride, 0x4);
|
cmd.drawPrimitiveUP.vertexStreamZeroData = g_intermediaryUploadAllocator.allocate(vertexStreamZeroData, primitiveCount * vertexStreamZeroStride);
|
||||||
|
cmd.drawPrimitiveUP.vertexStreamZeroSize = primitiveCount * vertexStreamZeroStride;
|
||||||
cmd.drawPrimitiveUP.vertexStreamZeroStride = vertexStreamZeroStride;
|
cmd.drawPrimitiveUP.vertexStreamZeroStride = vertexStreamZeroStride;
|
||||||
cmd.drawPrimitiveUP.csdFilterState = g_csdFilterState;
|
cmd.drawPrimitiveUP.csdFilterState = g_csdFilterState;
|
||||||
|
|
||||||
|
|
@ -4320,9 +4389,11 @@ static void ProcDrawPrimitiveUP(const RenderCommand& cmd)
|
||||||
SetPrimitiveType(args.primitiveType);
|
SetPrimitiveType(args.primitiveType);
|
||||||
SetDirtyValue(g_dirtyStates.pipelineState, g_pipelineState.vertexStrides[0], uint8_t(args.vertexStreamZeroStride));
|
SetDirtyValue(g_dirtyStates.pipelineState, g_pipelineState.vertexStrides[0], uint8_t(args.vertexStreamZeroStride));
|
||||||
|
|
||||||
|
auto allocation = g_uploadAllocators[g_frame].allocate<true>(reinterpret_cast<const uint32_t*>(args.vertexStreamZeroData), args.vertexStreamZeroSize, 0x4);
|
||||||
|
|
||||||
auto& vertexBufferView = g_vertexBufferViews[0];
|
auto& vertexBufferView = g_vertexBufferViews[0];
|
||||||
vertexBufferView.size = args.primitiveCount * args.vertexStreamZeroStride;
|
vertexBufferView.size = args.primitiveCount * args.vertexStreamZeroStride;
|
||||||
vertexBufferView.buffer = args.vertexStreamZeroData.buffer->at(args.vertexStreamZeroData.offset);
|
vertexBufferView.buffer = allocation.buffer->at(allocation.offset);
|
||||||
g_inputSlots[0].stride = args.vertexStreamZeroStride;
|
g_inputSlots[0].stride = args.vertexStreamZeroStride;
|
||||||
g_dirtyStates.vertexStreamFirst = 0;
|
g_dirtyStates.vertexStreamFirst = 0;
|
||||||
|
|
||||||
|
|
@ -7132,7 +7203,7 @@ GUEST_FUNCTION_HOOK(sub_82BE96F0, GetSurfaceDesc);
|
||||||
GUEST_FUNCTION_HOOK(sub_82BE04B0, GetVertexDeclaration);
|
GUEST_FUNCTION_HOOK(sub_82BE04B0, GetVertexDeclaration);
|
||||||
GUEST_FUNCTION_HOOK(sub_82BE0530, HashVertexDeclaration);
|
GUEST_FUNCTION_HOOK(sub_82BE0530, HashVertexDeclaration);
|
||||||
|
|
||||||
GUEST_FUNCTION_HOOK(sub_82BDA8C0, Present);
|
GUEST_FUNCTION_HOOK(sub_82BDA8C0, Video::Present);
|
||||||
GUEST_FUNCTION_HOOK(sub_82BDD330, GetBackBuffer);
|
GUEST_FUNCTION_HOOK(sub_82BDD330, GetBackBuffer);
|
||||||
|
|
||||||
GUEST_FUNCTION_HOOK(sub_82BE9498, CreateTexture);
|
GUEST_FUNCTION_HOOK(sub_82BE9498, CreateTexture);
|
||||||
|
|
|
||||||
|
|
@ -115,10 +115,7 @@ PPC_FUNC(sub_8312DBF8)
|
||||||
constexpr auto INTERVAL = 1000000000ns / 60;
|
constexpr auto INTERVAL = 1000000000ns / 60;
|
||||||
auto next = now + (INTERVAL - now.time_since_epoch() % INTERVAL);
|
auto next = now + (INTERVAL - now.time_since_epoch() % INTERVAL);
|
||||||
|
|
||||||
std::this_thread::sleep_for(std::chrono::floor<std::chrono::milliseconds>(next - now - 1ms));
|
std::this_thread::sleep_until(next);
|
||||||
|
|
||||||
while (std::chrono::steady_clock::now() < next)
|
|
||||||
std::this_thread::yield();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void WaitVsyncMidAsmHook()
|
void WaitVsyncMidAsmHook()
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue