mirror of
https://github.com/hedge-dev/UnleashedRecomp.git
synced 2025-12-19 14:32:19 +00:00
Implement copy bypass optimization. (#262)
* Initial work on copy bypass optimization. * Force depth stencil textures to be transient. * Get rid of texture copying for shadow maps. * Move barrier populate function. * Set viewport/scissor rect explicitly for MSAA depth resolve.
This commit is contained in:
parent
342d696f99
commit
aaad10d797
3 changed files with 381 additions and 152 deletions
|
|
@ -155,11 +155,11 @@ static GuestSurface* g_renderTarget;
|
||||||
static GuestSurface* g_depthStencil;
|
static GuestSurface* g_depthStencil;
|
||||||
static RenderFramebuffer* g_framebuffer;
|
static RenderFramebuffer* g_framebuffer;
|
||||||
static RenderViewport g_viewport(0.0f, 0.0f, 1280.0f, 720.0f);
|
static RenderViewport g_viewport(0.0f, 0.0f, 1280.0f, 720.0f);
|
||||||
static bool g_halfPixel = true;
|
|
||||||
static PipelineState g_pipelineState;
|
static PipelineState g_pipelineState;
|
||||||
static int32_t g_depthBias;
|
static int32_t g_depthBias;
|
||||||
static float g_slopeScaledDepthBias;
|
static float g_slopeScaledDepthBias;
|
||||||
static SharedConstants g_sharedConstants;
|
static SharedConstants g_sharedConstants;
|
||||||
|
static GuestTexture* g_textures[16];
|
||||||
static RenderSamplerDesc g_samplerDescs[16];
|
static RenderSamplerDesc g_samplerDescs[16];
|
||||||
static bool g_scissorTestEnable = false;
|
static bool g_scissorTestEnable = false;
|
||||||
static RenderRect g_scissorRect;
|
static RenderRect g_scissorRect;
|
||||||
|
|
@ -681,6 +681,9 @@ enum class CsdFilterState
|
||||||
|
|
||||||
static CsdFilterState g_csdFilterState;
|
static CsdFilterState g_csdFilterState;
|
||||||
|
|
||||||
|
static ankerl::unordered_dense::set<GuestSurface*> g_pendingSurfaceCopies;
|
||||||
|
static ankerl::unordered_dense::set<GuestSurface*> g_pendingMsaaResolves;
|
||||||
|
|
||||||
enum class RenderCommandType
|
enum class RenderCommandType
|
||||||
{
|
{
|
||||||
SetRenderState,
|
SetRenderState,
|
||||||
|
|
@ -694,6 +697,7 @@ enum class RenderCommandType
|
||||||
StretchRect,
|
StretchRect,
|
||||||
SetRenderTarget,
|
SetRenderTarget,
|
||||||
SetDepthStencilSurface,
|
SetDepthStencilSurface,
|
||||||
|
ExecutePendingStretchRectCommands,
|
||||||
Clear,
|
Clear,
|
||||||
SetViewport,
|
SetViewport,
|
||||||
SetTexture,
|
SetTexture,
|
||||||
|
|
@ -710,7 +714,7 @@ enum class RenderCommandType
|
||||||
SetVertexShader,
|
SetVertexShader,
|
||||||
SetStreamSource,
|
SetStreamSource,
|
||||||
SetIndices,
|
SetIndices,
|
||||||
SetPixelShader
|
SetPixelShader,
|
||||||
};
|
};
|
||||||
|
|
||||||
struct RenderCommand
|
struct RenderCommand
|
||||||
|
|
@ -1465,6 +1469,8 @@ static void BeginCommandList()
|
||||||
g_sharedConstants.textureCubeIndices[i] = TEXTURE_DESCRIPTOR_NULL_TEXTURE_CUBE;
|
g_sharedConstants.textureCubeIndices[i] = TEXTURE_DESCRIPTOR_NULL_TEXTURE_CUBE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
memset(g_textures, 0, sizeof(g_textures));
|
||||||
|
|
||||||
if (Config::GITextureFiltering == EGITextureFiltering::Bicubic)
|
if (Config::GITextureFiltering == EGITextureFiltering::Bicubic)
|
||||||
g_pipelineState.specConstants |= SPEC_CONSTANT_BICUBIC_GI_FILTER;
|
g_pipelineState.specConstants |= SPEC_CONSTANT_BICUBIC_GI_FILTER;
|
||||||
else
|
else
|
||||||
|
|
@ -2409,9 +2415,12 @@ static std::atomic<bool> g_executedCommandList;
|
||||||
|
|
||||||
void Video::Present()
|
void Video::Present()
|
||||||
{
|
{
|
||||||
|
RenderCommand cmd;
|
||||||
|
cmd.type = RenderCommandType::ExecutePendingStretchRectCommands;
|
||||||
|
g_renderQueue.enqueue(cmd);
|
||||||
|
|
||||||
DrawImGui();
|
DrawImGui();
|
||||||
|
|
||||||
RenderCommand cmd;
|
|
||||||
cmd.type = RenderCommandType::ExecuteCommandList;
|
cmd.type = RenderCommandType::ExecuteCommandList;
|
||||||
g_renderQueue.enqueue(cmd);
|
g_renderQueue.enqueue(cmd);
|
||||||
|
|
||||||
|
|
@ -2497,7 +2506,7 @@ static void SetRootDescriptor(const UploadAllocation& allocation, size_t index)
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ProcExecuteCommandList(const RenderCommand& cmd)
|
static void ProcExecuteCommandList(const RenderCommand& cmd)
|
||||||
{
|
{
|
||||||
if (g_swapChainValid)
|
if (g_swapChainValid)
|
||||||
{
|
{
|
||||||
auto swapChainTexture = g_swapChain->getTexture(g_backBufferIndex);
|
auto swapChainTexture = g_swapChain->getTexture(g_backBufferIndex);
|
||||||
|
|
@ -2795,16 +2804,13 @@ static GuestSurface* CreateSurface(uint32_t width, uint32_t height, uint32_t for
|
||||||
surface->guestFormat = format;
|
surface->guestFormat = format;
|
||||||
surface->sampleCount = desc.multisampling.sampleCount;
|
surface->sampleCount = desc.multisampling.sampleCount;
|
||||||
|
|
||||||
if (desc.multisampling.sampleCount != RenderSampleCount::COUNT_1 && desc.format == RenderFormat::D32_FLOAT)
|
RenderTextureViewDesc viewDesc;
|
||||||
{
|
viewDesc.dimension = RenderTextureViewDimension::TEXTURE_2D;
|
||||||
RenderTextureViewDesc viewDesc;
|
viewDesc.format = desc.format;
|
||||||
viewDesc.dimension = RenderTextureViewDimension::TEXTURE_2D;
|
viewDesc.mipLevels = 1;
|
||||||
viewDesc.format = RenderFormat::D32_FLOAT;
|
surface->textureView = surface->textureHolder->createTextureView(viewDesc);
|
||||||
viewDesc.mipLevels = 1;
|
surface->descriptorIndex = g_textureDescriptorAllocator.allocate();
|
||||||
surface->textureView = surface->textureHolder->createTextureView(viewDesc);
|
g_textureDescriptorSet->setTexture(surface->descriptorIndex, surface->textureHolder.get(), RenderTextureLayout::SHADER_READ, surface->textureView.get());
|
||||||
surface->descriptorIndex = g_textureDescriptorAllocator.allocate();
|
|
||||||
g_textureDescriptorSet->setTexture(surface->descriptorIndex, surface->textureHolder.get(), RenderTextureLayout::SHADER_READ, surface->textureView.get());
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef _DEBUG
|
#ifdef _DEBUG
|
||||||
surface->texture->setName(fmt::format("{} {:X}", desc.flags & RenderTextureFlag::RENDER_TARGET ? "Render Target" : "Depth Stencil", g_memory.MapVirtual(surface)));
|
surface->texture->setName(fmt::format("{} {:X}", desc.flags & RenderTextureFlag::RENDER_TARGET ? "Render Target" : "Depth Stencil", g_memory.MapVirtual(surface)));
|
||||||
|
|
@ -2820,11 +2826,8 @@ static void FlushViewport()
|
||||||
if (g_dirtyStates.viewport)
|
if (g_dirtyStates.viewport)
|
||||||
{
|
{
|
||||||
auto viewport = g_viewport;
|
auto viewport = g_viewport;
|
||||||
if (g_halfPixel)
|
viewport.x += 0.5f;
|
||||||
{
|
viewport.y += 0.5f;
|
||||||
viewport.x += 0.5f;
|
|
||||||
viewport.y += 0.5f;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (viewport.minDepth > viewport.maxDepth)
|
if (viewport.minDepth > viewport.maxDepth)
|
||||||
std::swap(viewport.minDepth, viewport.maxDepth);
|
std::swap(viewport.minDepth, viewport.maxDepth);
|
||||||
|
|
@ -2848,13 +2851,6 @@ static void FlushViewport()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool SetHalfPixel(bool enable)
|
|
||||||
{
|
|
||||||
bool oldValue = g_halfPixel;
|
|
||||||
SetDirtyValue(g_dirtyStates.viewport, g_halfPixel, enable);
|
|
||||||
return oldValue;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void StretchRect(GuestDevice* device, uint32_t flags, uint32_t, GuestTexture* texture)
|
static void StretchRect(GuestDevice* device, uint32_t flags, uint32_t, GuestTexture* texture)
|
||||||
{
|
{
|
||||||
RenderCommand cmd;
|
RenderCommand cmd;
|
||||||
|
|
@ -2864,105 +2860,43 @@ static void StretchRect(GuestDevice* device, uint32_t flags, uint32_t, GuestText
|
||||||
g_renderQueue.enqueue(cmd);
|
g_renderQueue.enqueue(cmd);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void SetTextureInRenderThread(uint32_t index, GuestTexture* texture);
|
||||||
|
static void SetSurface(uint32_t index, GuestSurface* surface);
|
||||||
|
|
||||||
static void ProcStretchRect(const RenderCommand& cmd)
|
static void ProcStretchRect(const RenderCommand& cmd)
|
||||||
{
|
{
|
||||||
const auto& args = cmd.stretchRect;
|
const auto& args = cmd.stretchRect;
|
||||||
|
|
||||||
const bool isDepthStencil = (args.flags & 0x4) != 0;
|
const bool isDepthStencil = (args.flags & 0x4) != 0;
|
||||||
const auto surface = isDepthStencil ? g_depthStencil : g_renderTarget;
|
const auto surface = isDepthStencil ? g_depthStencil : g_renderTarget;
|
||||||
const bool multiSampling = surface->sampleCount != RenderSampleCount::COUNT_1;
|
|
||||||
|
|
||||||
RenderTextureLayout srcLayout;
|
// Erase previous pending command so it doesn't cause the texture to be overriden.
|
||||||
RenderTextureLayout dstLayout;
|
if (args.texture->sourceSurface != nullptr)
|
||||||
|
args.texture->sourceSurface->destinationTextures.erase(args.texture);
|
||||||
|
|
||||||
if (multiSampling)
|
args.texture->sourceSurface = surface;
|
||||||
|
surface->destinationTextures.emplace(args.texture);
|
||||||
|
|
||||||
|
// If the texture is assigned to any slots, set it again. This'll also push the barrier.
|
||||||
|
for (uint32_t i = 0; i < std::size(g_textures); i++)
|
||||||
{
|
{
|
||||||
if (isDepthStencil)
|
if (g_textures[i] == args.texture)
|
||||||
{
|
{
|
||||||
srcLayout = RenderTextureLayout::SHADER_READ;
|
// Set the original texture for MSAA textures as they always get resolved.
|
||||||
dstLayout = RenderTextureLayout::DEPTH_WRITE;
|
if (surface->sampleCount != RenderSampleCount::COUNT_1)
|
||||||
}
|
{
|
||||||
else
|
SetTextureInRenderThread(i, args.texture);
|
||||||
{
|
g_pendingMsaaResolves.emplace(surface);
|
||||||
srcLayout = RenderTextureLayout::RESOLVE_SOURCE;
|
}
|
||||||
dstLayout = RenderTextureLayout::RESOLVE_DEST;
|
else
|
||||||
|
{
|
||||||
|
SetSurface(i, surface);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
|
||||||
{
|
|
||||||
srcLayout = RenderTextureLayout::COPY_SOURCE;
|
|
||||||
dstLayout = RenderTextureLayout::COPY_DEST;
|
|
||||||
}
|
|
||||||
|
|
||||||
AddBarrier(surface, srcLayout);
|
// Remember to clear later.
|
||||||
AddBarrier(args.texture, dstLayout);
|
g_pendingSurfaceCopies.emplace(surface);
|
||||||
FlushBarriers();
|
|
||||||
|
|
||||||
auto& commandList = g_commandLists[g_frame];
|
|
||||||
if (multiSampling)
|
|
||||||
{
|
|
||||||
if (isDepthStencil)
|
|
||||||
{
|
|
||||||
uint32_t pipelineIndex = 0;
|
|
||||||
|
|
||||||
switch (g_depthStencil->sampleCount)
|
|
||||||
{
|
|
||||||
case RenderSampleCount::COUNT_2:
|
|
||||||
pipelineIndex = 0;
|
|
||||||
break;
|
|
||||||
case RenderSampleCount::COUNT_4:
|
|
||||||
pipelineIndex = 1;
|
|
||||||
break;
|
|
||||||
case RenderSampleCount::COUNT_8:
|
|
||||||
pipelineIndex = 2;
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
assert(false && "Unsupported MSAA sample count");
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (args.texture->framebuffer == nullptr)
|
|
||||||
{
|
|
||||||
RenderFramebufferDesc desc;
|
|
||||||
desc.depthAttachment = args.texture->texture;
|
|
||||||
args.texture->framebuffer = g_device->createFramebuffer(desc);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (g_framebuffer != args.texture->framebuffer.get())
|
|
||||||
{
|
|
||||||
commandList->setFramebuffer(args.texture->framebuffer.get());
|
|
||||||
g_framebuffer = args.texture->framebuffer.get();
|
|
||||||
}
|
|
||||||
|
|
||||||
bool oldHalfPixel = SetHalfPixel(false);
|
|
||||||
FlushViewport();
|
|
||||||
|
|
||||||
commandList->setPipeline(g_resolveMsaaDepthPipelines[pipelineIndex].get());
|
|
||||||
commandList->setGraphicsPushConstants(0, &g_depthStencil->descriptorIndex, 0, sizeof(uint32_t));
|
|
||||||
commandList->drawInstanced(6, 1, 0, 0);
|
|
||||||
|
|
||||||
g_dirtyStates.renderTargetAndDepthStencil = true;
|
|
||||||
g_dirtyStates.pipelineState = true;
|
|
||||||
|
|
||||||
if (g_vulkan)
|
|
||||||
{
|
|
||||||
g_dirtyStates.depthBias = true; // Static depth bias in MSAA pipeline invalidates dynamic depth bias.
|
|
||||||
g_dirtyStates.vertexShaderConstants = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
SetHalfPixel(oldHalfPixel);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
commandList->resolveTexture(args.texture->texture, surface->texture);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
commandList->copyTexture(args.texture->texture, surface->texture);
|
|
||||||
}
|
|
||||||
|
|
||||||
AddBarrier(args.texture, RenderTextureLayout::SHADER_READ);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void SetDefaultViewport(GuestDevice* device, GuestSurface* surface)
|
static void SetDefaultViewport(GuestDevice* device, GuestSurface* surface)
|
||||||
|
|
@ -3028,6 +2962,170 @@ static void ProcSetDepthStencilSurface(const RenderCommand& cmd)
|
||||||
SetDirtyValue(g_dirtyStates.pipelineState, g_pipelineState.depthStencilFormat, args.depthStencil != nullptr ? args.depthStencil->format : RenderFormat::UNKNOWN);
|
SetDirtyValue(g_dirtyStates.pipelineState, g_pipelineState.depthStencilFormat, args.depthStencil != nullptr ? args.depthStencil->format : RenderFormat::UNKNOWN);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool PopulateBarriersForStretchRect(GuestSurface* renderTarget, GuestSurface* depthStencil)
|
||||||
|
{
|
||||||
|
bool addedAny = false;
|
||||||
|
|
||||||
|
for (const auto surface : { renderTarget, depthStencil })
|
||||||
|
{
|
||||||
|
if (surface != nullptr && !surface->destinationTextures.empty())
|
||||||
|
{
|
||||||
|
const bool multiSampling = surface->sampleCount != RenderSampleCount::COUNT_1;
|
||||||
|
|
||||||
|
RenderTextureLayout srcLayout;
|
||||||
|
RenderTextureLayout dstLayout;
|
||||||
|
|
||||||
|
if (multiSampling)
|
||||||
|
{
|
||||||
|
if (surface == depthStencil)
|
||||||
|
{
|
||||||
|
srcLayout = RenderTextureLayout::SHADER_READ;
|
||||||
|
dstLayout = RenderTextureLayout::DEPTH_WRITE;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
srcLayout = RenderTextureLayout::RESOLVE_SOURCE;
|
||||||
|
dstLayout = RenderTextureLayout::RESOLVE_DEST;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
srcLayout = RenderTextureLayout::COPY_SOURCE;
|
||||||
|
dstLayout = RenderTextureLayout::COPY_DEST;
|
||||||
|
}
|
||||||
|
|
||||||
|
AddBarrier(surface, srcLayout);
|
||||||
|
|
||||||
|
for (const auto texture : surface->destinationTextures)
|
||||||
|
AddBarrier(texture, dstLayout);
|
||||||
|
|
||||||
|
addedAny = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return addedAny;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void ExecutePendingStretchRectCommands(GuestSurface* renderTarget, GuestSurface* depthStencil)
|
||||||
|
{
|
||||||
|
auto& commandList = g_commandLists[g_frame];
|
||||||
|
|
||||||
|
for (const auto surface : { renderTarget, depthStencil })
|
||||||
|
{
|
||||||
|
if (surface != nullptr && !surface->destinationTextures.empty())
|
||||||
|
{
|
||||||
|
const bool multiSampling = surface->sampleCount != RenderSampleCount::COUNT_1;
|
||||||
|
|
||||||
|
for (const auto texture : surface->destinationTextures)
|
||||||
|
{
|
||||||
|
if (multiSampling)
|
||||||
|
{
|
||||||
|
if (surface == depthStencil)
|
||||||
|
{
|
||||||
|
uint32_t pipelineIndex = 0;
|
||||||
|
|
||||||
|
switch (surface->sampleCount)
|
||||||
|
{
|
||||||
|
case RenderSampleCount::COUNT_2:
|
||||||
|
pipelineIndex = 0;
|
||||||
|
break;
|
||||||
|
case RenderSampleCount::COUNT_4:
|
||||||
|
pipelineIndex = 1;
|
||||||
|
break;
|
||||||
|
case RenderSampleCount::COUNT_8:
|
||||||
|
pipelineIndex = 2;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
assert(false && "Unsupported MSAA sample count");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (texture->framebuffer == nullptr)
|
||||||
|
{
|
||||||
|
RenderFramebufferDesc desc;
|
||||||
|
desc.depthAttachment = texture->texture;
|
||||||
|
texture->framebuffer = g_device->createFramebuffer(desc);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (g_framebuffer != texture->framebuffer.get())
|
||||||
|
{
|
||||||
|
commandList->setFramebuffer(texture->framebuffer.get());
|
||||||
|
g_framebuffer = texture->framebuffer.get();
|
||||||
|
}
|
||||||
|
|
||||||
|
commandList->setPipeline(g_resolveMsaaDepthPipelines[pipelineIndex].get());
|
||||||
|
commandList->setViewports(RenderViewport(0.0f, 0.0f, float(texture->width), float(texture->height), 0.0f, 1.0f));
|
||||||
|
commandList->setScissors(RenderRect(0, 0, texture->width, texture->height));
|
||||||
|
commandList->setGraphicsPushConstants(0, &surface->descriptorIndex, 0, sizeof(uint32_t));
|
||||||
|
commandList->drawInstanced(6, 1, 0, 0);
|
||||||
|
|
||||||
|
g_dirtyStates.renderTargetAndDepthStencil = true;
|
||||||
|
g_dirtyStates.viewport = true;
|
||||||
|
g_dirtyStates.pipelineState = true;
|
||||||
|
g_dirtyStates.scissorRect = true;
|
||||||
|
|
||||||
|
if (g_vulkan)
|
||||||
|
{
|
||||||
|
g_dirtyStates.depthBias = true; // Static depth bias in MSAA pipeline invalidates dynamic depth bias.
|
||||||
|
g_dirtyStates.vertexShaderConstants = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
commandList->resolveTexture(texture->texture, surface->texture);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
commandList->copyTexture(texture->texture, surface->texture);
|
||||||
|
}
|
||||||
|
|
||||||
|
texture->sourceSurface = nullptr;
|
||||||
|
|
||||||
|
// Check if any texture slots had this texture assigned, and make it point back at the original texture.
|
||||||
|
for (uint32_t i = 0; i < std::size(g_textures); i++)
|
||||||
|
{
|
||||||
|
if (g_textures[i] == texture)
|
||||||
|
SetTextureInRenderThread(i, texture);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
surface->destinationTextures.clear();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void ProcExecutePendingStretchRectCommands(const RenderCommand& cmd)
|
||||||
|
{
|
||||||
|
bool foundAny = false;
|
||||||
|
|
||||||
|
for (const auto surface : g_pendingSurfaceCopies)
|
||||||
|
{
|
||||||
|
// Depth stencil textures in this game are guaranteed to be transient.
|
||||||
|
if (surface->format != RenderFormat::D32_FLOAT)
|
||||||
|
foundAny |= PopulateBarriersForStretchRect(surface, nullptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (foundAny)
|
||||||
|
{
|
||||||
|
FlushBarriers();
|
||||||
|
|
||||||
|
for (const auto surface : g_pendingSurfaceCopies)
|
||||||
|
{
|
||||||
|
if (surface->format != RenderFormat::D32_FLOAT)
|
||||||
|
ExecutePendingStretchRectCommands(surface, nullptr);
|
||||||
|
|
||||||
|
for (const auto texture : surface->destinationTextures)
|
||||||
|
texture->sourceSurface = nullptr;
|
||||||
|
|
||||||
|
surface->destinationTextures.clear();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
g_pendingSurfaceCopies.clear();
|
||||||
|
g_pendingMsaaResolves.clear();
|
||||||
|
}
|
||||||
|
|
||||||
static void SetFramebuffer(GuestSurface* renderTarget, GuestSurface* depthStencil, bool settingForClear)
|
static void SetFramebuffer(GuestSurface* renderTarget, GuestSurface* depthStencil, bool settingForClear)
|
||||||
{
|
{
|
||||||
if (settingForClear || g_dirtyStates.renderTargetAndDepthStencil)
|
if (settingForClear || g_dirtyStates.renderTargetAndDepthStencil)
|
||||||
|
|
@ -3106,6 +3204,12 @@ static void ProcClear(const RenderCommand& cmd)
|
||||||
{
|
{
|
||||||
const auto& args = cmd.clear;
|
const auto& args = cmd.clear;
|
||||||
|
|
||||||
|
if (PopulateBarriersForStretchRect(g_renderTarget, g_depthStencil))
|
||||||
|
{
|
||||||
|
FlushBarriers();
|
||||||
|
ExecutePendingStretchRectCommands(g_renderTarget, g_depthStencil);
|
||||||
|
}
|
||||||
|
|
||||||
AddBarrier(g_renderTarget, RenderTextureLayout::COLOR_WRITE);
|
AddBarrier(g_renderTarget, RenderTextureLayout::COLOR_WRITE);
|
||||||
AddBarrier(g_depthStencil, RenderTextureLayout::DEPTH_WRITE);
|
AddBarrier(g_depthStencil, RenderTextureLayout::DEPTH_WRITE);
|
||||||
FlushBarriers();
|
FlushBarriers();
|
||||||
|
|
@ -3194,22 +3298,55 @@ static void SetTexture(GuestDevice* device, uint32_t index, GuestTexture* textur
|
||||||
g_renderQueue.enqueue(cmd);
|
g_renderQueue.enqueue(cmd);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void SetTextureInRenderThread(uint32_t index, GuestTexture* texture)
|
||||||
|
{
|
||||||
|
AddBarrier(texture, RenderTextureLayout::SHADER_READ);
|
||||||
|
|
||||||
|
auto viewDimension = texture != nullptr ? texture->viewDimension : RenderTextureViewDimension::UNKNOWN;
|
||||||
|
|
||||||
|
SetDirtyValue(g_dirtyStates.sharedConstants, g_sharedConstants.texture2DIndices[index],
|
||||||
|
viewDimension == RenderTextureViewDimension::TEXTURE_2D ? texture->descriptorIndex : TEXTURE_DESCRIPTOR_NULL_TEXTURE_2D);
|
||||||
|
|
||||||
|
SetDirtyValue(g_dirtyStates.sharedConstants, g_sharedConstants.texture3DIndices[index], texture != nullptr &&
|
||||||
|
viewDimension == RenderTextureViewDimension::TEXTURE_3D ? texture->descriptorIndex : TEXTURE_DESCRIPTOR_NULL_TEXTURE_3D);
|
||||||
|
|
||||||
|
SetDirtyValue(g_dirtyStates.sharedConstants, g_sharedConstants.textureCubeIndices[index], texture != nullptr &&
|
||||||
|
viewDimension == RenderTextureViewDimension::TEXTURE_CUBE ? texture->descriptorIndex : TEXTURE_DESCRIPTOR_NULL_TEXTURE_CUBE);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void SetSurface(uint32_t index, GuestSurface* surface)
|
||||||
|
{
|
||||||
|
AddBarrier(surface, RenderTextureLayout::SHADER_READ);
|
||||||
|
|
||||||
|
SetDirtyValue(g_dirtyStates.sharedConstants, g_sharedConstants.texture2DIndices[index], surface->descriptorIndex);
|
||||||
|
SetDirtyValue(g_dirtyStates.sharedConstants, g_sharedConstants.texture3DIndices[index], uint32_t(TEXTURE_DESCRIPTOR_NULL_TEXTURE_3D));
|
||||||
|
SetDirtyValue(g_dirtyStates.sharedConstants, g_sharedConstants.textureCubeIndices[index], uint32_t(TEXTURE_DESCRIPTOR_NULL_TEXTURE_CUBE));
|
||||||
|
}
|
||||||
|
|
||||||
static void ProcSetTexture(const RenderCommand& cmd)
|
static void ProcSetTexture(const RenderCommand& cmd)
|
||||||
{
|
{
|
||||||
const auto& args = cmd.setTexture;
|
const auto& args = cmd.setTexture;
|
||||||
|
|
||||||
AddBarrier(args.texture, RenderTextureLayout::SHADER_READ);
|
// If a pending copy operation is detected, set the source surface. The indices will be fixed later if flushing is necessary.
|
||||||
|
bool shouldSetTexture = true;
|
||||||
auto viewDimension = args.texture != nullptr ? args.texture->viewDimension : RenderTextureViewDimension::UNKNOWN;
|
if (args.texture != nullptr && args.texture->sourceSurface != nullptr)
|
||||||
|
{
|
||||||
SetDirtyValue(g_dirtyStates.sharedConstants, g_sharedConstants.texture2DIndices[args.index],
|
// MSAA surfaces need to be resolved and cannot be used directly.
|
||||||
viewDimension == RenderTextureViewDimension::TEXTURE_2D ? args.texture->descriptorIndex : TEXTURE_DESCRIPTOR_NULL_TEXTURE_2D);
|
if (args.texture->sourceSurface->sampleCount != RenderSampleCount::COUNT_1)
|
||||||
|
{
|
||||||
SetDirtyValue(g_dirtyStates.sharedConstants, g_sharedConstants.texture3DIndices[args.index], args.texture != nullptr &&
|
g_pendingMsaaResolves.emplace(args.texture->sourceSurface);
|
||||||
viewDimension == RenderTextureViewDimension::TEXTURE_3D ? args.texture->descriptorIndex : TEXTURE_DESCRIPTOR_NULL_TEXTURE_3D);
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
SetSurface(args.index, args.texture->sourceSurface);
|
||||||
|
shouldSetTexture = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
SetDirtyValue(g_dirtyStates.sharedConstants, g_sharedConstants.textureCubeIndices[args.index], args.texture != nullptr &&
|
if (shouldSetTexture)
|
||||||
viewDimension == RenderTextureViewDimension::TEXTURE_CUBE ? args.texture->descriptorIndex : TEXTURE_DESCRIPTOR_NULL_TEXTURE_CUBE);
|
SetTextureInRenderThread(args.index, args.texture);
|
||||||
|
|
||||||
|
g_textures[args.index] = args.texture;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void SetScissorRect(GuestDevice* device, GuestRect* rect)
|
static void SetScissorRect(GuestDevice* device, GuestRect* rect)
|
||||||
|
|
@ -3815,9 +3952,34 @@ static void FlushRenderStateForRenderThread()
|
||||||
auto renderTarget = g_pipelineState.colorWriteEnable ? g_renderTarget : nullptr;
|
auto renderTarget = g_pipelineState.colorWriteEnable ? g_renderTarget : nullptr;
|
||||||
auto depthStencil = g_pipelineState.zEnable ? g_depthStencil : nullptr;
|
auto depthStencil = g_pipelineState.zEnable ? g_depthStencil : nullptr;
|
||||||
|
|
||||||
|
bool foundAny = PopulateBarriersForStretchRect(renderTarget, depthStencil);
|
||||||
|
|
||||||
|
for (const auto surface : g_pendingMsaaResolves)
|
||||||
|
{
|
||||||
|
bool isDepthStencil = (surface->format == RenderFormat::D32_FLOAT);
|
||||||
|
foundAny |= PopulateBarriersForStretchRect(isDepthStencil ? nullptr : surface, isDepthStencil ? surface : nullptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (foundAny)
|
||||||
|
{
|
||||||
|
FlushBarriers();
|
||||||
|
ExecutePendingStretchRectCommands(renderTarget, depthStencil);
|
||||||
|
|
||||||
|
for (const auto surface : g_pendingMsaaResolves)
|
||||||
|
{
|
||||||
|
bool isDepthStencil = (surface->format == RenderFormat::D32_FLOAT);
|
||||||
|
ExecutePendingStretchRectCommands(isDepthStencil ? nullptr : surface, isDepthStencil ? surface : nullptr);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!g_pendingMsaaResolves.empty())
|
||||||
|
g_pendingMsaaResolves.clear();
|
||||||
|
|
||||||
AddBarrier(renderTarget, RenderTextureLayout::COLOR_WRITE);
|
AddBarrier(renderTarget, RenderTextureLayout::COLOR_WRITE);
|
||||||
AddBarrier(depthStencil, RenderTextureLayout::DEPTH_WRITE);
|
AddBarrier(depthStencil, RenderTextureLayout::DEPTH_WRITE);
|
||||||
|
|
||||||
FlushBarriers();
|
FlushBarriers();
|
||||||
|
|
||||||
SetFramebuffer(renderTarget, depthStencil, false);
|
SetFramebuffer(renderTarget, depthStencil, false);
|
||||||
FlushViewport();
|
FlushViewport();
|
||||||
|
|
||||||
|
|
@ -4580,35 +4742,36 @@ static std::thread g_renderThread([]
|
||||||
auto& cmd = commands[i];
|
auto& cmd = commands[i];
|
||||||
switch (cmd.type)
|
switch (cmd.type)
|
||||||
{
|
{
|
||||||
case RenderCommandType::SetRenderState: ProcSetRenderState(cmd); break;
|
case RenderCommandType::SetRenderState: ProcSetRenderState(cmd); break;
|
||||||
case RenderCommandType::DestructResource: ProcDestructResource(cmd); break;
|
case RenderCommandType::DestructResource: ProcDestructResource(cmd); break;
|
||||||
case RenderCommandType::UnlockTextureRect: ProcUnlockTextureRect(cmd); break;
|
case RenderCommandType::UnlockTextureRect: ProcUnlockTextureRect(cmd); break;
|
||||||
case RenderCommandType::UnlockBuffer16: ProcUnlockBuffer16(cmd); break;
|
case RenderCommandType::UnlockBuffer16: ProcUnlockBuffer16(cmd); break;
|
||||||
case RenderCommandType::UnlockBuffer32: ProcUnlockBuffer32(cmd); break;
|
case RenderCommandType::UnlockBuffer32: ProcUnlockBuffer32(cmd); break;
|
||||||
case RenderCommandType::DrawImGui: ProcDrawImGui(cmd); break;
|
case RenderCommandType::DrawImGui: ProcDrawImGui(cmd); break;
|
||||||
case RenderCommandType::ExecuteCommandList: ProcExecuteCommandList(cmd); break;
|
case RenderCommandType::ExecuteCommandList: ProcExecuteCommandList(cmd); break;
|
||||||
case RenderCommandType::BeginCommandList: ProcBeginCommandList(cmd); break;
|
case RenderCommandType::BeginCommandList: ProcBeginCommandList(cmd); break;
|
||||||
case RenderCommandType::StretchRect: ProcStretchRect(cmd); break;
|
case RenderCommandType::StretchRect: ProcStretchRect(cmd); break;
|
||||||
case RenderCommandType::SetRenderTarget: ProcSetRenderTarget(cmd); break;
|
case RenderCommandType::SetRenderTarget: ProcSetRenderTarget(cmd); break;
|
||||||
case RenderCommandType::SetDepthStencilSurface: ProcSetDepthStencilSurface(cmd); break;
|
case RenderCommandType::SetDepthStencilSurface: ProcSetDepthStencilSurface(cmd); break;
|
||||||
case RenderCommandType::Clear: ProcClear(cmd); break;
|
case RenderCommandType::ExecutePendingStretchRectCommands: ProcExecutePendingStretchRectCommands(cmd); break;
|
||||||
case RenderCommandType::SetViewport: ProcSetViewport(cmd); break;
|
case RenderCommandType::Clear: ProcClear(cmd); break;
|
||||||
case RenderCommandType::SetTexture: ProcSetTexture(cmd); break;
|
case RenderCommandType::SetViewport: ProcSetViewport(cmd); break;
|
||||||
case RenderCommandType::SetScissorRect: ProcSetScissorRect(cmd); break;
|
case RenderCommandType::SetTexture: ProcSetTexture(cmd); break;
|
||||||
case RenderCommandType::SetSamplerState: ProcSetSamplerState(cmd); break;
|
case RenderCommandType::SetScissorRect: ProcSetScissorRect(cmd); break;
|
||||||
case RenderCommandType::SetBooleans: ProcSetBooleans(cmd); break;
|
case RenderCommandType::SetSamplerState: ProcSetSamplerState(cmd); break;
|
||||||
case RenderCommandType::SetVertexShaderConstants: ProcSetVertexShaderConstants(cmd); break;
|
case RenderCommandType::SetBooleans: ProcSetBooleans(cmd); break;
|
||||||
case RenderCommandType::SetPixelShaderConstants: ProcSetPixelShaderConstants(cmd); break;
|
case RenderCommandType::SetVertexShaderConstants: ProcSetVertexShaderConstants(cmd); break;
|
||||||
case RenderCommandType::AddPipeline: ProcAddPipeline(cmd); break;
|
case RenderCommandType::SetPixelShaderConstants: ProcSetPixelShaderConstants(cmd); break;
|
||||||
case RenderCommandType::DrawPrimitive: ProcDrawPrimitive(cmd); break;
|
case RenderCommandType::AddPipeline: ProcAddPipeline(cmd); break;
|
||||||
case RenderCommandType::DrawIndexedPrimitive: ProcDrawIndexedPrimitive(cmd); break;
|
case RenderCommandType::DrawPrimitive: ProcDrawPrimitive(cmd); break;
|
||||||
case RenderCommandType::DrawPrimitiveUP: ProcDrawPrimitiveUP(cmd); break;
|
case RenderCommandType::DrawIndexedPrimitive: ProcDrawIndexedPrimitive(cmd); break;
|
||||||
case RenderCommandType::SetVertexDeclaration: ProcSetVertexDeclaration(cmd); break;
|
case RenderCommandType::DrawPrimitiveUP: ProcDrawPrimitiveUP(cmd); break;
|
||||||
case RenderCommandType::SetVertexShader: ProcSetVertexShader(cmd); break;
|
case RenderCommandType::SetVertexDeclaration: ProcSetVertexDeclaration(cmd); break;
|
||||||
case RenderCommandType::SetStreamSource: ProcSetStreamSource(cmd); break;
|
case RenderCommandType::SetVertexShader: ProcSetVertexShader(cmd); break;
|
||||||
case RenderCommandType::SetIndices: ProcSetIndices(cmd); break;
|
case RenderCommandType::SetStreamSource: ProcSetStreamSource(cmd); break;
|
||||||
case RenderCommandType::SetPixelShader: ProcSetPixelShader(cmd); break;
|
case RenderCommandType::SetIndices: ProcSetIndices(cmd); break;
|
||||||
default: assert(false && "Unrecognized render command type."); break;
|
case RenderCommandType::SetPixelShader: ProcSetPixelShader(cmd); break;
|
||||||
|
default: assert(false && "Unrecognized render command type."); break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -6772,6 +6935,52 @@ PPC_FUNC(sub_825E2F78)
|
||||||
__imp__sub_825E2F78(ctx, base);
|
__imp__sub_825E2F78(ctx, base);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Game shares surfaces with identical descriptions. We don't want to share shadow maps,
|
||||||
|
// so we can set its format to a depth format that still resolves to the same type in recomp,
|
||||||
|
// but manages to keep the surfaces actually separated in guest code.
|
||||||
|
void FxShadowMapInitMidAsmHook(PPCRegister& r11)
|
||||||
|
{
|
||||||
|
uint8_t* base = g_memory.base;
|
||||||
|
|
||||||
|
uint32_t surface = PPC_LOAD_U32(PPC_LOAD_U32(PPC_LOAD_U32(r11.u32 + 0x24) + 0x4));
|
||||||
|
PPC_STORE_U32(surface + 0x20, D3DFMT_D24FS8);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Re-render objects in the terrain shadow map instead of copying the texture.
|
||||||
|
static bool g_jumpOverStretchRect;
|
||||||
|
|
||||||
|
void FxShadowMapNoTerrainMidAsmHook(PPCRegister& r4, PPCRegister& r30)
|
||||||
|
{
|
||||||
|
// Set the no terrain shadow map as the render target.
|
||||||
|
uint8_t* base = g_memory.base;
|
||||||
|
r4.u64 = PPC_LOAD_U32(r30.u32 + 0x58);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool FxShadowMapMidAsmHook(PPCRegister& r4, PPCRegister& r5, PPCRegister& r6, PPCRegister& r30)
|
||||||
|
{
|
||||||
|
if (g_jumpOverStretchRect)
|
||||||
|
{
|
||||||
|
// Reset for the next time shadow maps get rendered.
|
||||||
|
g_jumpOverStretchRect = false;
|
||||||
|
|
||||||
|
// Jump over the stretch rect call.
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// Mark to jump over the stretch call the next time.
|
||||||
|
g_jumpOverStretchRect = true;
|
||||||
|
|
||||||
|
// Jump to the beginning. Set registers accordingly to set the terrain shadow map as the render target.
|
||||||
|
uint8_t* base = g_memory.base;
|
||||||
|
r6.u64 = 0;
|
||||||
|
r5.u64 = 0;
|
||||||
|
r4.u64 = PPC_LOAD_U32(r30.u32 + 0x50);
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
GUEST_FUNCTION_HOOK(sub_82BD99B0, CreateDevice);
|
GUEST_FUNCTION_HOOK(sub_82BD99B0, CreateDevice);
|
||||||
|
|
||||||
GUEST_FUNCTION_HOOK(sub_82BE6230, DestructResource);
|
GUEST_FUNCTION_HOOK(sub_82BE6230, DestructResource);
|
||||||
|
|
|
||||||
|
|
@ -158,6 +158,7 @@ struct GuestTexture : GuestBaseTexture
|
||||||
void* mappedMemory = nullptr;
|
void* mappedMemory = nullptr;
|
||||||
std::unique_ptr<RenderFramebuffer> framebuffer;
|
std::unique_ptr<RenderFramebuffer> framebuffer;
|
||||||
std::unique_ptr<GuestTexture> patchedTexture;
|
std::unique_ptr<GuestTexture> patchedTexture;
|
||||||
|
struct GuestSurface* sourceSurface = nullptr;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct GuestLockedRect
|
struct GuestLockedRect
|
||||||
|
|
@ -205,6 +206,7 @@ struct GuestSurface : GuestBaseTexture
|
||||||
uint32_t guestFormat = 0;
|
uint32_t guestFormat = 0;
|
||||||
ankerl::unordered_dense::map<const RenderTexture*, std::unique_ptr<RenderFramebuffer>> framebuffers;
|
ankerl::unordered_dense::map<const RenderTexture*, std::unique_ptr<RenderFramebuffer>> framebuffers;
|
||||||
RenderSampleCounts sampleCount = RenderSampleCount::COUNT_1;
|
RenderSampleCounts sampleCount = RenderSampleCount::COUNT_1;
|
||||||
|
ankerl::unordered_dense::set<GuestTexture*> destinationTextures;
|
||||||
};
|
};
|
||||||
|
|
||||||
enum GuestDeclType
|
enum GuestDeclType
|
||||||
|
|
|
||||||
|
|
@ -918,3 +918,21 @@ jump_address = 0x822C111C
|
||||||
[[midasm_hook]]
|
[[midasm_hook]]
|
||||||
name = "PressStartSaveLoadThreadMidAsmHook"
|
name = "PressStartSaveLoadThreadMidAsmHook"
|
||||||
address = 0x822C4358
|
address = 0x822C4358
|
||||||
|
|
||||||
|
[[midasm_hook]]
|
||||||
|
name = "FxShadowMapInitMidAsmHook"
|
||||||
|
address = 0x82BAD8F4
|
||||||
|
registers = ["r11"]
|
||||||
|
|
||||||
|
[[midasm_hook]]
|
||||||
|
name = "FxShadowMapNoTerrainMidAsmHook"
|
||||||
|
address = 0x82BAD9EC
|
||||||
|
registers = ["r4", "r30"]
|
||||||
|
after_instruction = true
|
||||||
|
|
||||||
|
[[midasm_hook]]
|
||||||
|
name = "FxShadowMapMidAsmHook"
|
||||||
|
address = 0x82BADADC
|
||||||
|
registers = ["r4", "r5", "r6", "r30"]
|
||||||
|
jump_address_on_true = 0x82BAD9F0
|
||||||
|
jump_address_on_false = 0x82BADAFC
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue