mirror of
https://github.com/hedge-dev/UnleashedRecomp.git
synced 2025-12-18 22:12:18 +00:00
Implement copy bypass optimization. (#262)
* Initial work on copy bypass optimization. * Force depth stencil textures to be transient. * Get rid of texture copying for shadow maps. * Move barrier populate function. * Set viewport/scissor rect explicitly for MSAA depth resolve.
This commit is contained in:
parent
342d696f99
commit
aaad10d797
3 changed files with 381 additions and 152 deletions
|
|
@ -155,11 +155,11 @@ static GuestSurface* g_renderTarget;
|
|||
static GuestSurface* g_depthStencil;
|
||||
static RenderFramebuffer* g_framebuffer;
|
||||
static RenderViewport g_viewport(0.0f, 0.0f, 1280.0f, 720.0f);
|
||||
static bool g_halfPixel = true;
|
||||
static PipelineState g_pipelineState;
|
||||
static int32_t g_depthBias;
|
||||
static float g_slopeScaledDepthBias;
|
||||
static SharedConstants g_sharedConstants;
|
||||
static GuestTexture* g_textures[16];
|
||||
static RenderSamplerDesc g_samplerDescs[16];
|
||||
static bool g_scissorTestEnable = false;
|
||||
static RenderRect g_scissorRect;
|
||||
|
|
@ -681,6 +681,9 @@ enum class CsdFilterState
|
|||
|
||||
static CsdFilterState g_csdFilterState;
|
||||
|
||||
static ankerl::unordered_dense::set<GuestSurface*> g_pendingSurfaceCopies;
|
||||
static ankerl::unordered_dense::set<GuestSurface*> g_pendingMsaaResolves;
|
||||
|
||||
enum class RenderCommandType
|
||||
{
|
||||
SetRenderState,
|
||||
|
|
@ -694,6 +697,7 @@ enum class RenderCommandType
|
|||
StretchRect,
|
||||
SetRenderTarget,
|
||||
SetDepthStencilSurface,
|
||||
ExecutePendingStretchRectCommands,
|
||||
Clear,
|
||||
SetViewport,
|
||||
SetTexture,
|
||||
|
|
@ -710,7 +714,7 @@ enum class RenderCommandType
|
|||
SetVertexShader,
|
||||
SetStreamSource,
|
||||
SetIndices,
|
||||
SetPixelShader
|
||||
SetPixelShader,
|
||||
};
|
||||
|
||||
struct RenderCommand
|
||||
|
|
@ -1465,6 +1469,8 @@ static void BeginCommandList()
|
|||
g_sharedConstants.textureCubeIndices[i] = TEXTURE_DESCRIPTOR_NULL_TEXTURE_CUBE;
|
||||
}
|
||||
|
||||
memset(g_textures, 0, sizeof(g_textures));
|
||||
|
||||
if (Config::GITextureFiltering == EGITextureFiltering::Bicubic)
|
||||
g_pipelineState.specConstants |= SPEC_CONSTANT_BICUBIC_GI_FILTER;
|
||||
else
|
||||
|
|
@ -2409,9 +2415,12 @@ static std::atomic<bool> g_executedCommandList;
|
|||
|
||||
void Video::Present()
|
||||
{
|
||||
RenderCommand cmd;
|
||||
cmd.type = RenderCommandType::ExecutePendingStretchRectCommands;
|
||||
g_renderQueue.enqueue(cmd);
|
||||
|
||||
DrawImGui();
|
||||
|
||||
RenderCommand cmd;
|
||||
cmd.type = RenderCommandType::ExecuteCommandList;
|
||||
g_renderQueue.enqueue(cmd);
|
||||
|
||||
|
|
@ -2497,7 +2506,7 @@ static void SetRootDescriptor(const UploadAllocation& allocation, size_t index)
|
|||
}
|
||||
|
||||
static void ProcExecuteCommandList(const RenderCommand& cmd)
|
||||
{
|
||||
{
|
||||
if (g_swapChainValid)
|
||||
{
|
||||
auto swapChainTexture = g_swapChain->getTexture(g_backBufferIndex);
|
||||
|
|
@ -2795,16 +2804,13 @@ static GuestSurface* CreateSurface(uint32_t width, uint32_t height, uint32_t for
|
|||
surface->guestFormat = format;
|
||||
surface->sampleCount = desc.multisampling.sampleCount;
|
||||
|
||||
if (desc.multisampling.sampleCount != RenderSampleCount::COUNT_1 && desc.format == RenderFormat::D32_FLOAT)
|
||||
{
|
||||
RenderTextureViewDesc viewDesc;
|
||||
viewDesc.dimension = RenderTextureViewDimension::TEXTURE_2D;
|
||||
viewDesc.format = RenderFormat::D32_FLOAT;
|
||||
viewDesc.mipLevels = 1;
|
||||
surface->textureView = surface->textureHolder->createTextureView(viewDesc);
|
||||
surface->descriptorIndex = g_textureDescriptorAllocator.allocate();
|
||||
g_textureDescriptorSet->setTexture(surface->descriptorIndex, surface->textureHolder.get(), RenderTextureLayout::SHADER_READ, surface->textureView.get());
|
||||
}
|
||||
RenderTextureViewDesc viewDesc;
|
||||
viewDesc.dimension = RenderTextureViewDimension::TEXTURE_2D;
|
||||
viewDesc.format = desc.format;
|
||||
viewDesc.mipLevels = 1;
|
||||
surface->textureView = surface->textureHolder->createTextureView(viewDesc);
|
||||
surface->descriptorIndex = g_textureDescriptorAllocator.allocate();
|
||||
g_textureDescriptorSet->setTexture(surface->descriptorIndex, surface->textureHolder.get(), RenderTextureLayout::SHADER_READ, surface->textureView.get());
|
||||
|
||||
#ifdef _DEBUG
|
||||
surface->texture->setName(fmt::format("{} {:X}", desc.flags & RenderTextureFlag::RENDER_TARGET ? "Render Target" : "Depth Stencil", g_memory.MapVirtual(surface)));
|
||||
|
|
@ -2820,11 +2826,8 @@ static void FlushViewport()
|
|||
if (g_dirtyStates.viewport)
|
||||
{
|
||||
auto viewport = g_viewport;
|
||||
if (g_halfPixel)
|
||||
{
|
||||
viewport.x += 0.5f;
|
||||
viewport.y += 0.5f;
|
||||
}
|
||||
viewport.x += 0.5f;
|
||||
viewport.y += 0.5f;
|
||||
|
||||
if (viewport.minDepth > viewport.maxDepth)
|
||||
std::swap(viewport.minDepth, viewport.maxDepth);
|
||||
|
|
@ -2848,13 +2851,6 @@ static void FlushViewport()
|
|||
}
|
||||
}
|
||||
|
||||
static bool SetHalfPixel(bool enable)
|
||||
{
|
||||
bool oldValue = g_halfPixel;
|
||||
SetDirtyValue(g_dirtyStates.viewport, g_halfPixel, enable);
|
||||
return oldValue;
|
||||
}
|
||||
|
||||
static void StretchRect(GuestDevice* device, uint32_t flags, uint32_t, GuestTexture* texture)
|
||||
{
|
||||
RenderCommand cmd;
|
||||
|
|
@ -2864,105 +2860,43 @@ static void StretchRect(GuestDevice* device, uint32_t flags, uint32_t, GuestText
|
|||
g_renderQueue.enqueue(cmd);
|
||||
}
|
||||
|
||||
static void SetTextureInRenderThread(uint32_t index, GuestTexture* texture);
|
||||
static void SetSurface(uint32_t index, GuestSurface* surface);
|
||||
|
||||
static void ProcStretchRect(const RenderCommand& cmd)
|
||||
{
|
||||
const auto& args = cmd.stretchRect;
|
||||
|
||||
const bool isDepthStencil = (args.flags & 0x4) != 0;
|
||||
const auto surface = isDepthStencil ? g_depthStencil : g_renderTarget;
|
||||
const bool multiSampling = surface->sampleCount != RenderSampleCount::COUNT_1;
|
||||
|
||||
RenderTextureLayout srcLayout;
|
||||
RenderTextureLayout dstLayout;
|
||||
// Erase previous pending command so it doesn't cause the texture to be overriden.
|
||||
if (args.texture->sourceSurface != nullptr)
|
||||
args.texture->sourceSurface->destinationTextures.erase(args.texture);
|
||||
|
||||
if (multiSampling)
|
||||
args.texture->sourceSurface = surface;
|
||||
surface->destinationTextures.emplace(args.texture);
|
||||
|
||||
// If the texture is assigned to any slots, set it again. This'll also push the barrier.
|
||||
for (uint32_t i = 0; i < std::size(g_textures); i++)
|
||||
{
|
||||
if (isDepthStencil)
|
||||
if (g_textures[i] == args.texture)
|
||||
{
|
||||
srcLayout = RenderTextureLayout::SHADER_READ;
|
||||
dstLayout = RenderTextureLayout::DEPTH_WRITE;
|
||||
}
|
||||
else
|
||||
{
|
||||
srcLayout = RenderTextureLayout::RESOLVE_SOURCE;
|
||||
dstLayout = RenderTextureLayout::RESOLVE_DEST;
|
||||
// Set the original texture for MSAA textures as they always get resolved.
|
||||
if (surface->sampleCount != RenderSampleCount::COUNT_1)
|
||||
{
|
||||
SetTextureInRenderThread(i, args.texture);
|
||||
g_pendingMsaaResolves.emplace(surface);
|
||||
}
|
||||
else
|
||||
{
|
||||
SetSurface(i, surface);
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
srcLayout = RenderTextureLayout::COPY_SOURCE;
|
||||
dstLayout = RenderTextureLayout::COPY_DEST;
|
||||
}
|
||||
|
||||
AddBarrier(surface, srcLayout);
|
||||
AddBarrier(args.texture, dstLayout);
|
||||
FlushBarriers();
|
||||
|
||||
auto& commandList = g_commandLists[g_frame];
|
||||
if (multiSampling)
|
||||
{
|
||||
if (isDepthStencil)
|
||||
{
|
||||
uint32_t pipelineIndex = 0;
|
||||
|
||||
switch (g_depthStencil->sampleCount)
|
||||
{
|
||||
case RenderSampleCount::COUNT_2:
|
||||
pipelineIndex = 0;
|
||||
break;
|
||||
case RenderSampleCount::COUNT_4:
|
||||
pipelineIndex = 1;
|
||||
break;
|
||||
case RenderSampleCount::COUNT_8:
|
||||
pipelineIndex = 2;
|
||||
break;
|
||||
default:
|
||||
assert(false && "Unsupported MSAA sample count");
|
||||
break;
|
||||
}
|
||||
|
||||
if (args.texture->framebuffer == nullptr)
|
||||
{
|
||||
RenderFramebufferDesc desc;
|
||||
desc.depthAttachment = args.texture->texture;
|
||||
args.texture->framebuffer = g_device->createFramebuffer(desc);
|
||||
}
|
||||
|
||||
if (g_framebuffer != args.texture->framebuffer.get())
|
||||
{
|
||||
commandList->setFramebuffer(args.texture->framebuffer.get());
|
||||
g_framebuffer = args.texture->framebuffer.get();
|
||||
}
|
||||
|
||||
bool oldHalfPixel = SetHalfPixel(false);
|
||||
FlushViewport();
|
||||
|
||||
commandList->setPipeline(g_resolveMsaaDepthPipelines[pipelineIndex].get());
|
||||
commandList->setGraphicsPushConstants(0, &g_depthStencil->descriptorIndex, 0, sizeof(uint32_t));
|
||||
commandList->drawInstanced(6, 1, 0, 0);
|
||||
|
||||
g_dirtyStates.renderTargetAndDepthStencil = true;
|
||||
g_dirtyStates.pipelineState = true;
|
||||
|
||||
if (g_vulkan)
|
||||
{
|
||||
g_dirtyStates.depthBias = true; // Static depth bias in MSAA pipeline invalidates dynamic depth bias.
|
||||
g_dirtyStates.vertexShaderConstants = true;
|
||||
}
|
||||
|
||||
SetHalfPixel(oldHalfPixel);
|
||||
}
|
||||
else
|
||||
{
|
||||
commandList->resolveTexture(args.texture->texture, surface->texture);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
commandList->copyTexture(args.texture->texture, surface->texture);
|
||||
}
|
||||
|
||||
AddBarrier(args.texture, RenderTextureLayout::SHADER_READ);
|
||||
// Remember to clear later.
|
||||
g_pendingSurfaceCopies.emplace(surface);
|
||||
}
|
||||
|
||||
static void SetDefaultViewport(GuestDevice* device, GuestSurface* surface)
|
||||
|
|
@ -3028,6 +2962,170 @@ static void ProcSetDepthStencilSurface(const RenderCommand& cmd)
|
|||
SetDirtyValue(g_dirtyStates.pipelineState, g_pipelineState.depthStencilFormat, args.depthStencil != nullptr ? args.depthStencil->format : RenderFormat::UNKNOWN);
|
||||
}
|
||||
|
||||
static bool PopulateBarriersForStretchRect(GuestSurface* renderTarget, GuestSurface* depthStencil)
|
||||
{
|
||||
bool addedAny = false;
|
||||
|
||||
for (const auto surface : { renderTarget, depthStencil })
|
||||
{
|
||||
if (surface != nullptr && !surface->destinationTextures.empty())
|
||||
{
|
||||
const bool multiSampling = surface->sampleCount != RenderSampleCount::COUNT_1;
|
||||
|
||||
RenderTextureLayout srcLayout;
|
||||
RenderTextureLayout dstLayout;
|
||||
|
||||
if (multiSampling)
|
||||
{
|
||||
if (surface == depthStencil)
|
||||
{
|
||||
srcLayout = RenderTextureLayout::SHADER_READ;
|
||||
dstLayout = RenderTextureLayout::DEPTH_WRITE;
|
||||
}
|
||||
else
|
||||
{
|
||||
srcLayout = RenderTextureLayout::RESOLVE_SOURCE;
|
||||
dstLayout = RenderTextureLayout::RESOLVE_DEST;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
srcLayout = RenderTextureLayout::COPY_SOURCE;
|
||||
dstLayout = RenderTextureLayout::COPY_DEST;
|
||||
}
|
||||
|
||||
AddBarrier(surface, srcLayout);
|
||||
|
||||
for (const auto texture : surface->destinationTextures)
|
||||
AddBarrier(texture, dstLayout);
|
||||
|
||||
addedAny = true;
|
||||
}
|
||||
}
|
||||
|
||||
return addedAny;
|
||||
}
|
||||
|
||||
static void ExecutePendingStretchRectCommands(GuestSurface* renderTarget, GuestSurface* depthStencil)
|
||||
{
|
||||
auto& commandList = g_commandLists[g_frame];
|
||||
|
||||
for (const auto surface : { renderTarget, depthStencil })
|
||||
{
|
||||
if (surface != nullptr && !surface->destinationTextures.empty())
|
||||
{
|
||||
const bool multiSampling = surface->sampleCount != RenderSampleCount::COUNT_1;
|
||||
|
||||
for (const auto texture : surface->destinationTextures)
|
||||
{
|
||||
if (multiSampling)
|
||||
{
|
||||
if (surface == depthStencil)
|
||||
{
|
||||
uint32_t pipelineIndex = 0;
|
||||
|
||||
switch (surface->sampleCount)
|
||||
{
|
||||
case RenderSampleCount::COUNT_2:
|
||||
pipelineIndex = 0;
|
||||
break;
|
||||
case RenderSampleCount::COUNT_4:
|
||||
pipelineIndex = 1;
|
||||
break;
|
||||
case RenderSampleCount::COUNT_8:
|
||||
pipelineIndex = 2;
|
||||
break;
|
||||
default:
|
||||
assert(false && "Unsupported MSAA sample count");
|
||||
break;
|
||||
}
|
||||
|
||||
if (texture->framebuffer == nullptr)
|
||||
{
|
||||
RenderFramebufferDesc desc;
|
||||
desc.depthAttachment = texture->texture;
|
||||
texture->framebuffer = g_device->createFramebuffer(desc);
|
||||
}
|
||||
|
||||
if (g_framebuffer != texture->framebuffer.get())
|
||||
{
|
||||
commandList->setFramebuffer(texture->framebuffer.get());
|
||||
g_framebuffer = texture->framebuffer.get();
|
||||
}
|
||||
|
||||
commandList->setPipeline(g_resolveMsaaDepthPipelines[pipelineIndex].get());
|
||||
commandList->setViewports(RenderViewport(0.0f, 0.0f, float(texture->width), float(texture->height), 0.0f, 1.0f));
|
||||
commandList->setScissors(RenderRect(0, 0, texture->width, texture->height));
|
||||
commandList->setGraphicsPushConstants(0, &surface->descriptorIndex, 0, sizeof(uint32_t));
|
||||
commandList->drawInstanced(6, 1, 0, 0);
|
||||
|
||||
g_dirtyStates.renderTargetAndDepthStencil = true;
|
||||
g_dirtyStates.viewport = true;
|
||||
g_dirtyStates.pipelineState = true;
|
||||
g_dirtyStates.scissorRect = true;
|
||||
|
||||
if (g_vulkan)
|
||||
{
|
||||
g_dirtyStates.depthBias = true; // Static depth bias in MSAA pipeline invalidates dynamic depth bias.
|
||||
g_dirtyStates.vertexShaderConstants = true;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
commandList->resolveTexture(texture->texture, surface->texture);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
commandList->copyTexture(texture->texture, surface->texture);
|
||||
}
|
||||
|
||||
texture->sourceSurface = nullptr;
|
||||
|
||||
// Check if any texture slots had this texture assigned, and make it point back at the original texture.
|
||||
for (uint32_t i = 0; i < std::size(g_textures); i++)
|
||||
{
|
||||
if (g_textures[i] == texture)
|
||||
SetTextureInRenderThread(i, texture);
|
||||
}
|
||||
}
|
||||
|
||||
surface->destinationTextures.clear();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void ProcExecutePendingStretchRectCommands(const RenderCommand& cmd)
|
||||
{
|
||||
bool foundAny = false;
|
||||
|
||||
for (const auto surface : g_pendingSurfaceCopies)
|
||||
{
|
||||
// Depth stencil textures in this game are guaranteed to be transient.
|
||||
if (surface->format != RenderFormat::D32_FLOAT)
|
||||
foundAny |= PopulateBarriersForStretchRect(surface, nullptr);
|
||||
}
|
||||
|
||||
if (foundAny)
|
||||
{
|
||||
FlushBarriers();
|
||||
|
||||
for (const auto surface : g_pendingSurfaceCopies)
|
||||
{
|
||||
if (surface->format != RenderFormat::D32_FLOAT)
|
||||
ExecutePendingStretchRectCommands(surface, nullptr);
|
||||
|
||||
for (const auto texture : surface->destinationTextures)
|
||||
texture->sourceSurface = nullptr;
|
||||
|
||||
surface->destinationTextures.clear();
|
||||
}
|
||||
}
|
||||
|
||||
g_pendingSurfaceCopies.clear();
|
||||
g_pendingMsaaResolves.clear();
|
||||
}
|
||||
|
||||
static void SetFramebuffer(GuestSurface* renderTarget, GuestSurface* depthStencil, bool settingForClear)
|
||||
{
|
||||
if (settingForClear || g_dirtyStates.renderTargetAndDepthStencil)
|
||||
|
|
@ -3106,6 +3204,12 @@ static void ProcClear(const RenderCommand& cmd)
|
|||
{
|
||||
const auto& args = cmd.clear;
|
||||
|
||||
if (PopulateBarriersForStretchRect(g_renderTarget, g_depthStencil))
|
||||
{
|
||||
FlushBarriers();
|
||||
ExecutePendingStretchRectCommands(g_renderTarget, g_depthStencil);
|
||||
}
|
||||
|
||||
AddBarrier(g_renderTarget, RenderTextureLayout::COLOR_WRITE);
|
||||
AddBarrier(g_depthStencil, RenderTextureLayout::DEPTH_WRITE);
|
||||
FlushBarriers();
|
||||
|
|
@ -3194,22 +3298,55 @@ static void SetTexture(GuestDevice* device, uint32_t index, GuestTexture* textur
|
|||
g_renderQueue.enqueue(cmd);
|
||||
}
|
||||
|
||||
static void SetTextureInRenderThread(uint32_t index, GuestTexture* texture)
|
||||
{
|
||||
AddBarrier(texture, RenderTextureLayout::SHADER_READ);
|
||||
|
||||
auto viewDimension = texture != nullptr ? texture->viewDimension : RenderTextureViewDimension::UNKNOWN;
|
||||
|
||||
SetDirtyValue(g_dirtyStates.sharedConstants, g_sharedConstants.texture2DIndices[index],
|
||||
viewDimension == RenderTextureViewDimension::TEXTURE_2D ? texture->descriptorIndex : TEXTURE_DESCRIPTOR_NULL_TEXTURE_2D);
|
||||
|
||||
SetDirtyValue(g_dirtyStates.sharedConstants, g_sharedConstants.texture3DIndices[index], texture != nullptr &&
|
||||
viewDimension == RenderTextureViewDimension::TEXTURE_3D ? texture->descriptorIndex : TEXTURE_DESCRIPTOR_NULL_TEXTURE_3D);
|
||||
|
||||
SetDirtyValue(g_dirtyStates.sharedConstants, g_sharedConstants.textureCubeIndices[index], texture != nullptr &&
|
||||
viewDimension == RenderTextureViewDimension::TEXTURE_CUBE ? texture->descriptorIndex : TEXTURE_DESCRIPTOR_NULL_TEXTURE_CUBE);
|
||||
}
|
||||
|
||||
static void SetSurface(uint32_t index, GuestSurface* surface)
|
||||
{
|
||||
AddBarrier(surface, RenderTextureLayout::SHADER_READ);
|
||||
|
||||
SetDirtyValue(g_dirtyStates.sharedConstants, g_sharedConstants.texture2DIndices[index], surface->descriptorIndex);
|
||||
SetDirtyValue(g_dirtyStates.sharedConstants, g_sharedConstants.texture3DIndices[index], uint32_t(TEXTURE_DESCRIPTOR_NULL_TEXTURE_3D));
|
||||
SetDirtyValue(g_dirtyStates.sharedConstants, g_sharedConstants.textureCubeIndices[index], uint32_t(TEXTURE_DESCRIPTOR_NULL_TEXTURE_CUBE));
|
||||
}
|
||||
|
||||
static void ProcSetTexture(const RenderCommand& cmd)
|
||||
{
|
||||
const auto& args = cmd.setTexture;
|
||||
|
||||
AddBarrier(args.texture, RenderTextureLayout::SHADER_READ);
|
||||
|
||||
auto viewDimension = args.texture != nullptr ? args.texture->viewDimension : RenderTextureViewDimension::UNKNOWN;
|
||||
|
||||
SetDirtyValue(g_dirtyStates.sharedConstants, g_sharedConstants.texture2DIndices[args.index],
|
||||
viewDimension == RenderTextureViewDimension::TEXTURE_2D ? args.texture->descriptorIndex : TEXTURE_DESCRIPTOR_NULL_TEXTURE_2D);
|
||||
|
||||
SetDirtyValue(g_dirtyStates.sharedConstants, g_sharedConstants.texture3DIndices[args.index], args.texture != nullptr &&
|
||||
viewDimension == RenderTextureViewDimension::TEXTURE_3D ? args.texture->descriptorIndex : TEXTURE_DESCRIPTOR_NULL_TEXTURE_3D);
|
||||
// If a pending copy operation is detected, set the source surface. The indices will be fixed later if flushing is necessary.
|
||||
bool shouldSetTexture = true;
|
||||
if (args.texture != nullptr && args.texture->sourceSurface != nullptr)
|
||||
{
|
||||
// MSAA surfaces need to be resolved and cannot be used directly.
|
||||
if (args.texture->sourceSurface->sampleCount != RenderSampleCount::COUNT_1)
|
||||
{
|
||||
g_pendingMsaaResolves.emplace(args.texture->sourceSurface);
|
||||
}
|
||||
else
|
||||
{
|
||||
SetSurface(args.index, args.texture->sourceSurface);
|
||||
shouldSetTexture = false;
|
||||
}
|
||||
}
|
||||
|
||||
SetDirtyValue(g_dirtyStates.sharedConstants, g_sharedConstants.textureCubeIndices[args.index], args.texture != nullptr &&
|
||||
viewDimension == RenderTextureViewDimension::TEXTURE_CUBE ? args.texture->descriptorIndex : TEXTURE_DESCRIPTOR_NULL_TEXTURE_CUBE);
|
||||
if (shouldSetTexture)
|
||||
SetTextureInRenderThread(args.index, args.texture);
|
||||
|
||||
g_textures[args.index] = args.texture;
|
||||
}
|
||||
|
||||
static void SetScissorRect(GuestDevice* device, GuestRect* rect)
|
||||
|
|
@ -3815,9 +3952,34 @@ static void FlushRenderStateForRenderThread()
|
|||
auto renderTarget = g_pipelineState.colorWriteEnable ? g_renderTarget : nullptr;
|
||||
auto depthStencil = g_pipelineState.zEnable ? g_depthStencil : nullptr;
|
||||
|
||||
bool foundAny = PopulateBarriersForStretchRect(renderTarget, depthStencil);
|
||||
|
||||
for (const auto surface : g_pendingMsaaResolves)
|
||||
{
|
||||
bool isDepthStencil = (surface->format == RenderFormat::D32_FLOAT);
|
||||
foundAny |= PopulateBarriersForStretchRect(isDepthStencil ? nullptr : surface, isDepthStencil ? surface : nullptr);
|
||||
}
|
||||
|
||||
if (foundAny)
|
||||
{
|
||||
FlushBarriers();
|
||||
ExecutePendingStretchRectCommands(renderTarget, depthStencil);
|
||||
|
||||
for (const auto surface : g_pendingMsaaResolves)
|
||||
{
|
||||
bool isDepthStencil = (surface->format == RenderFormat::D32_FLOAT);
|
||||
ExecutePendingStretchRectCommands(isDepthStencil ? nullptr : surface, isDepthStencil ? surface : nullptr);
|
||||
}
|
||||
}
|
||||
|
||||
if (!g_pendingMsaaResolves.empty())
|
||||
g_pendingMsaaResolves.clear();
|
||||
|
||||
AddBarrier(renderTarget, RenderTextureLayout::COLOR_WRITE);
|
||||
AddBarrier(depthStencil, RenderTextureLayout::DEPTH_WRITE);
|
||||
|
||||
FlushBarriers();
|
||||
|
||||
SetFramebuffer(renderTarget, depthStencil, false);
|
||||
FlushViewport();
|
||||
|
||||
|
|
@ -4580,35 +4742,36 @@ static std::thread g_renderThread([]
|
|||
auto& cmd = commands[i];
|
||||
switch (cmd.type)
|
||||
{
|
||||
case RenderCommandType::SetRenderState: ProcSetRenderState(cmd); break;
|
||||
case RenderCommandType::DestructResource: ProcDestructResource(cmd); break;
|
||||
case RenderCommandType::UnlockTextureRect: ProcUnlockTextureRect(cmd); break;
|
||||
case RenderCommandType::UnlockBuffer16: ProcUnlockBuffer16(cmd); break;
|
||||
case RenderCommandType::UnlockBuffer32: ProcUnlockBuffer32(cmd); break;
|
||||
case RenderCommandType::DrawImGui: ProcDrawImGui(cmd); break;
|
||||
case RenderCommandType::ExecuteCommandList: ProcExecuteCommandList(cmd); break;
|
||||
case RenderCommandType::BeginCommandList: ProcBeginCommandList(cmd); break;
|
||||
case RenderCommandType::StretchRect: ProcStretchRect(cmd); break;
|
||||
case RenderCommandType::SetRenderTarget: ProcSetRenderTarget(cmd); break;
|
||||
case RenderCommandType::SetDepthStencilSurface: ProcSetDepthStencilSurface(cmd); break;
|
||||
case RenderCommandType::Clear: ProcClear(cmd); break;
|
||||
case RenderCommandType::SetViewport: ProcSetViewport(cmd); break;
|
||||
case RenderCommandType::SetTexture: ProcSetTexture(cmd); break;
|
||||
case RenderCommandType::SetScissorRect: ProcSetScissorRect(cmd); break;
|
||||
case RenderCommandType::SetSamplerState: ProcSetSamplerState(cmd); break;
|
||||
case RenderCommandType::SetBooleans: ProcSetBooleans(cmd); break;
|
||||
case RenderCommandType::SetVertexShaderConstants: ProcSetVertexShaderConstants(cmd); break;
|
||||
case RenderCommandType::SetPixelShaderConstants: ProcSetPixelShaderConstants(cmd); break;
|
||||
case RenderCommandType::AddPipeline: ProcAddPipeline(cmd); break;
|
||||
case RenderCommandType::DrawPrimitive: ProcDrawPrimitive(cmd); break;
|
||||
case RenderCommandType::DrawIndexedPrimitive: ProcDrawIndexedPrimitive(cmd); break;
|
||||
case RenderCommandType::DrawPrimitiveUP: ProcDrawPrimitiveUP(cmd); break;
|
||||
case RenderCommandType::SetVertexDeclaration: ProcSetVertexDeclaration(cmd); break;
|
||||
case RenderCommandType::SetVertexShader: ProcSetVertexShader(cmd); break;
|
||||
case RenderCommandType::SetStreamSource: ProcSetStreamSource(cmd); break;
|
||||
case RenderCommandType::SetIndices: ProcSetIndices(cmd); break;
|
||||
case RenderCommandType::SetPixelShader: ProcSetPixelShader(cmd); break;
|
||||
default: assert(false && "Unrecognized render command type."); break;
|
||||
case RenderCommandType::SetRenderState: ProcSetRenderState(cmd); break;
|
||||
case RenderCommandType::DestructResource: ProcDestructResource(cmd); break;
|
||||
case RenderCommandType::UnlockTextureRect: ProcUnlockTextureRect(cmd); break;
|
||||
case RenderCommandType::UnlockBuffer16: ProcUnlockBuffer16(cmd); break;
|
||||
case RenderCommandType::UnlockBuffer32: ProcUnlockBuffer32(cmd); break;
|
||||
case RenderCommandType::DrawImGui: ProcDrawImGui(cmd); break;
|
||||
case RenderCommandType::ExecuteCommandList: ProcExecuteCommandList(cmd); break;
|
||||
case RenderCommandType::BeginCommandList: ProcBeginCommandList(cmd); break;
|
||||
case RenderCommandType::StretchRect: ProcStretchRect(cmd); break;
|
||||
case RenderCommandType::SetRenderTarget: ProcSetRenderTarget(cmd); break;
|
||||
case RenderCommandType::SetDepthStencilSurface: ProcSetDepthStencilSurface(cmd); break;
|
||||
case RenderCommandType::ExecutePendingStretchRectCommands: ProcExecutePendingStretchRectCommands(cmd); break;
|
||||
case RenderCommandType::Clear: ProcClear(cmd); break;
|
||||
case RenderCommandType::SetViewport: ProcSetViewport(cmd); break;
|
||||
case RenderCommandType::SetTexture: ProcSetTexture(cmd); break;
|
||||
case RenderCommandType::SetScissorRect: ProcSetScissorRect(cmd); break;
|
||||
case RenderCommandType::SetSamplerState: ProcSetSamplerState(cmd); break;
|
||||
case RenderCommandType::SetBooleans: ProcSetBooleans(cmd); break;
|
||||
case RenderCommandType::SetVertexShaderConstants: ProcSetVertexShaderConstants(cmd); break;
|
||||
case RenderCommandType::SetPixelShaderConstants: ProcSetPixelShaderConstants(cmd); break;
|
||||
case RenderCommandType::AddPipeline: ProcAddPipeline(cmd); break;
|
||||
case RenderCommandType::DrawPrimitive: ProcDrawPrimitive(cmd); break;
|
||||
case RenderCommandType::DrawIndexedPrimitive: ProcDrawIndexedPrimitive(cmd); break;
|
||||
case RenderCommandType::DrawPrimitiveUP: ProcDrawPrimitiveUP(cmd); break;
|
||||
case RenderCommandType::SetVertexDeclaration: ProcSetVertexDeclaration(cmd); break;
|
||||
case RenderCommandType::SetVertexShader: ProcSetVertexShader(cmd); break;
|
||||
case RenderCommandType::SetStreamSource: ProcSetStreamSource(cmd); break;
|
||||
case RenderCommandType::SetIndices: ProcSetIndices(cmd); break;
|
||||
case RenderCommandType::SetPixelShader: ProcSetPixelShader(cmd); break;
|
||||
default: assert(false && "Unrecognized render command type."); break;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -6772,6 +6935,52 @@ PPC_FUNC(sub_825E2F78)
|
|||
__imp__sub_825E2F78(ctx, base);
|
||||
}
|
||||
|
||||
// Game shares surfaces with identical descriptions. We don't want to share shadow maps,
|
||||
// so we can set its format to a depth format that still resolves to the same type in recomp,
|
||||
// but manages to keep the surfaces actually separated in guest code.
|
||||
void FxShadowMapInitMidAsmHook(PPCRegister& r11)
|
||||
{
|
||||
uint8_t* base = g_memory.base;
|
||||
|
||||
uint32_t surface = PPC_LOAD_U32(PPC_LOAD_U32(PPC_LOAD_U32(r11.u32 + 0x24) + 0x4));
|
||||
PPC_STORE_U32(surface + 0x20, D3DFMT_D24FS8);
|
||||
}
|
||||
|
||||
// Re-render objects in the terrain shadow map instead of copying the texture.
|
||||
static bool g_jumpOverStretchRect;
|
||||
|
||||
void FxShadowMapNoTerrainMidAsmHook(PPCRegister& r4, PPCRegister& r30)
|
||||
{
|
||||
// Set the no terrain shadow map as the render target.
|
||||
uint8_t* base = g_memory.base;
|
||||
r4.u64 = PPC_LOAD_U32(r30.u32 + 0x58);
|
||||
}
|
||||
|
||||
bool FxShadowMapMidAsmHook(PPCRegister& r4, PPCRegister& r5, PPCRegister& r6, PPCRegister& r30)
|
||||
{
|
||||
if (g_jumpOverStretchRect)
|
||||
{
|
||||
// Reset for the next time shadow maps get rendered.
|
||||
g_jumpOverStretchRect = false;
|
||||
|
||||
// Jump over the stretch rect call.
|
||||
return false;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Mark to jump over the stretch call the next time.
|
||||
g_jumpOverStretchRect = true;
|
||||
|
||||
// Jump to the beginning. Set registers accordingly to set the terrain shadow map as the render target.
|
||||
uint8_t* base = g_memory.base;
|
||||
r6.u64 = 0;
|
||||
r5.u64 = 0;
|
||||
r4.u64 = PPC_LOAD_U32(r30.u32 + 0x50);
|
||||
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
GUEST_FUNCTION_HOOK(sub_82BD99B0, CreateDevice);
|
||||
|
||||
GUEST_FUNCTION_HOOK(sub_82BE6230, DestructResource);
|
||||
|
|
|
|||
|
|
@ -158,6 +158,7 @@ struct GuestTexture : GuestBaseTexture
|
|||
void* mappedMemory = nullptr;
|
||||
std::unique_ptr<RenderFramebuffer> framebuffer;
|
||||
std::unique_ptr<GuestTexture> patchedTexture;
|
||||
struct GuestSurface* sourceSurface = nullptr;
|
||||
};
|
||||
|
||||
struct GuestLockedRect
|
||||
|
|
@ -205,6 +206,7 @@ struct GuestSurface : GuestBaseTexture
|
|||
uint32_t guestFormat = 0;
|
||||
ankerl::unordered_dense::map<const RenderTexture*, std::unique_ptr<RenderFramebuffer>> framebuffers;
|
||||
RenderSampleCounts sampleCount = RenderSampleCount::COUNT_1;
|
||||
ankerl::unordered_dense::set<GuestTexture*> destinationTextures;
|
||||
};
|
||||
|
||||
enum GuestDeclType
|
||||
|
|
|
|||
|
|
@ -918,3 +918,21 @@ jump_address = 0x822C111C
|
|||
[[midasm_hook]]
|
||||
name = "PressStartSaveLoadThreadMidAsmHook"
|
||||
address = 0x822C4358
|
||||
|
||||
[[midasm_hook]]
|
||||
name = "FxShadowMapInitMidAsmHook"
|
||||
address = 0x82BAD8F4
|
||||
registers = ["r11"]
|
||||
|
||||
[[midasm_hook]]
|
||||
name = "FxShadowMapNoTerrainMidAsmHook"
|
||||
address = 0x82BAD9EC
|
||||
registers = ["r4", "r30"]
|
||||
after_instruction = true
|
||||
|
||||
[[midasm_hook]]
|
||||
name = "FxShadowMapMidAsmHook"
|
||||
address = 0x82BADADC
|
||||
registers = ["r4", "r5", "r6", "r30"]
|
||||
jump_address_on_true = 0x82BAD9F0
|
||||
jump_address_on_false = 0x82BADAFC
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue