mirror of
				https://github.com/hedge-dev/UnleashedRecomp.git
				synced 2025-10-30 07:11:05 +00:00 
			
		
		
		
	Implement copy bypass optimization. (#262)
* Initial work on copy bypass optimization. * Force depth stencil textures to be transient. * Get rid of texture copying for shadow maps. * Move barrier populate function. * Set viewport/scissor rect explicitly for MSAA depth resolve.
This commit is contained in:
		
							parent
							
								
									342d696f99
								
							
						
					
					
						commit
						aaad10d797
					
				
					 3 changed files with 381 additions and 152 deletions
				
			
		|  | @ -155,11 +155,11 @@ static GuestSurface* g_renderTarget; | |||
| static GuestSurface* g_depthStencil; | ||||
| static RenderFramebuffer* g_framebuffer; | ||||
| static RenderViewport g_viewport(0.0f, 0.0f, 1280.0f, 720.0f); | ||||
| static bool g_halfPixel = true; | ||||
| static PipelineState g_pipelineState; | ||||
| static int32_t g_depthBias; | ||||
| static float g_slopeScaledDepthBias; | ||||
| static SharedConstants g_sharedConstants; | ||||
| static GuestTexture* g_textures[16]; | ||||
| static RenderSamplerDesc g_samplerDescs[16]; | ||||
| static bool g_scissorTestEnable = false; | ||||
| static RenderRect g_scissorRect; | ||||
|  | @ -681,6 +681,9 @@ enum class CsdFilterState | |||
| 
 | ||||
| static CsdFilterState g_csdFilterState; | ||||
| 
 | ||||
| static ankerl::unordered_dense::set<GuestSurface*> g_pendingSurfaceCopies; | ||||
| static ankerl::unordered_dense::set<GuestSurface*> g_pendingMsaaResolves; | ||||
| 
 | ||||
| enum class RenderCommandType | ||||
| { | ||||
|     SetRenderState, | ||||
|  | @ -694,6 +697,7 @@ enum class RenderCommandType | |||
|     StretchRect, | ||||
|     SetRenderTarget, | ||||
|     SetDepthStencilSurface, | ||||
|     ExecutePendingStretchRectCommands, | ||||
|     Clear, | ||||
|     SetViewport, | ||||
|     SetTexture, | ||||
|  | @ -710,7 +714,7 @@ enum class RenderCommandType | |||
|     SetVertexShader, | ||||
|     SetStreamSource, | ||||
|     SetIndices, | ||||
|     SetPixelShader | ||||
|     SetPixelShader, | ||||
| }; | ||||
| 
 | ||||
| struct RenderCommand | ||||
|  | @ -1465,6 +1469,8 @@ static void BeginCommandList() | |||
|         g_sharedConstants.textureCubeIndices[i] = TEXTURE_DESCRIPTOR_NULL_TEXTURE_CUBE; | ||||
|     } | ||||
| 
 | ||||
|     memset(g_textures, 0, sizeof(g_textures)); | ||||
| 
 | ||||
|     if (Config::GITextureFiltering == EGITextureFiltering::Bicubic) | ||||
|         g_pipelineState.specConstants |= SPEC_CONSTANT_BICUBIC_GI_FILTER; | ||||
|     else | ||||
|  | @ -2409,9 +2415,12 @@ static std::atomic<bool> g_executedCommandList; | |||
| 
 | ||||
| void Video::Present()  | ||||
| { | ||||
|     RenderCommand cmd; | ||||
|     cmd.type = RenderCommandType::ExecutePendingStretchRectCommands; | ||||
|     g_renderQueue.enqueue(cmd); | ||||
| 
 | ||||
|     DrawImGui(); | ||||
| 
 | ||||
|     RenderCommand cmd; | ||||
|     cmd.type = RenderCommandType::ExecuteCommandList; | ||||
|     g_renderQueue.enqueue(cmd); | ||||
| 
 | ||||
|  | @ -2497,7 +2506,7 @@ static void SetRootDescriptor(const UploadAllocation& allocation, size_t index) | |||
| } | ||||
| 
 | ||||
| static void ProcExecuteCommandList(const RenderCommand& cmd) | ||||
| { | ||||
| {     | ||||
|     if (g_swapChainValid) | ||||
|     { | ||||
|         auto swapChainTexture = g_swapChain->getTexture(g_backBufferIndex); | ||||
|  | @ -2795,16 +2804,13 @@ static GuestSurface* CreateSurface(uint32_t width, uint32_t height, uint32_t for | |||
|     surface->guestFormat = format; | ||||
|     surface->sampleCount = desc.multisampling.sampleCount; | ||||
| 
 | ||||
|     if (desc.multisampling.sampleCount != RenderSampleCount::COUNT_1 && desc.format == RenderFormat::D32_FLOAT) | ||||
|     { | ||||
|         RenderTextureViewDesc viewDesc; | ||||
|         viewDesc.dimension = RenderTextureViewDimension::TEXTURE_2D; | ||||
|         viewDesc.format = RenderFormat::D32_FLOAT; | ||||
|         viewDesc.mipLevels = 1; | ||||
|         surface->textureView = surface->textureHolder->createTextureView(viewDesc); | ||||
|         surface->descriptorIndex = g_textureDescriptorAllocator.allocate(); | ||||
|         g_textureDescriptorSet->setTexture(surface->descriptorIndex, surface->textureHolder.get(), RenderTextureLayout::SHADER_READ, surface->textureView.get()); | ||||
|     } | ||||
|     RenderTextureViewDesc viewDesc; | ||||
|     viewDesc.dimension = RenderTextureViewDimension::TEXTURE_2D; | ||||
|     viewDesc.format = desc.format; | ||||
|     viewDesc.mipLevels = 1; | ||||
|     surface->textureView = surface->textureHolder->createTextureView(viewDesc); | ||||
|     surface->descriptorIndex = g_textureDescriptorAllocator.allocate(); | ||||
|     g_textureDescriptorSet->setTexture(surface->descriptorIndex, surface->textureHolder.get(), RenderTextureLayout::SHADER_READ, surface->textureView.get()); | ||||
| 
 | ||||
| #ifdef _DEBUG  | ||||
|     surface->texture->setName(fmt::format("{} {:X}", desc.flags & RenderTextureFlag::RENDER_TARGET ? "Render Target" : "Depth Stencil", g_memory.MapVirtual(surface))); | ||||
|  | @ -2820,11 +2826,8 @@ static void FlushViewport() | |||
|     if (g_dirtyStates.viewport) | ||||
|     { | ||||
|         auto viewport = g_viewport; | ||||
|         if (g_halfPixel) | ||||
|         { | ||||
|             viewport.x += 0.5f; | ||||
|             viewport.y += 0.5f; | ||||
|         } | ||||
|         viewport.x += 0.5f; | ||||
|         viewport.y += 0.5f; | ||||
| 
 | ||||
|         if (viewport.minDepth > viewport.maxDepth) | ||||
|             std::swap(viewport.minDepth, viewport.maxDepth); | ||||
|  | @ -2848,13 +2851,6 @@ static void FlushViewport() | |||
|     } | ||||
| } | ||||
| 
 | ||||
| static bool SetHalfPixel(bool enable) | ||||
| { | ||||
|     bool oldValue = g_halfPixel; | ||||
|     SetDirtyValue(g_dirtyStates.viewport, g_halfPixel, enable); | ||||
|     return oldValue; | ||||
| } | ||||
| 
 | ||||
| static void StretchRect(GuestDevice* device, uint32_t flags, uint32_t, GuestTexture* texture) | ||||
| { | ||||
|     RenderCommand cmd; | ||||
|  | @ -2864,105 +2860,43 @@ static void StretchRect(GuestDevice* device, uint32_t flags, uint32_t, GuestText | |||
|     g_renderQueue.enqueue(cmd); | ||||
| } | ||||
| 
 | ||||
| static void SetTextureInRenderThread(uint32_t index, GuestTexture* texture); | ||||
| static void SetSurface(uint32_t index, GuestSurface* surface); | ||||
| 
 | ||||
| static void ProcStretchRect(const RenderCommand& cmd) | ||||
| { | ||||
|     const auto& args = cmd.stretchRect; | ||||
| 
 | ||||
|     const bool isDepthStencil = (args.flags & 0x4) != 0; | ||||
|     const auto surface = isDepthStencil ? g_depthStencil : g_renderTarget; | ||||
|     const bool multiSampling = surface->sampleCount != RenderSampleCount::COUNT_1; | ||||
| 
 | ||||
|     RenderTextureLayout srcLayout; | ||||
|     RenderTextureLayout dstLayout; | ||||
|     // Erase previous pending command so it doesn't cause the texture to be overriden.
 | ||||
|     if (args.texture->sourceSurface != nullptr) | ||||
|         args.texture->sourceSurface->destinationTextures.erase(args.texture); | ||||
| 
 | ||||
|     if (multiSampling) | ||||
|     args.texture->sourceSurface = surface; | ||||
|     surface->destinationTextures.emplace(args.texture); | ||||
| 
 | ||||
|     // If the texture is assigned to any slots, set it again. This'll also push the barrier.
 | ||||
|     for (uint32_t i = 0; i < std::size(g_textures); i++) | ||||
|     { | ||||
|         if (isDepthStencil) | ||||
|         if (g_textures[i] == args.texture) | ||||
|         { | ||||
|             srcLayout = RenderTextureLayout::SHADER_READ; | ||||
|             dstLayout = RenderTextureLayout::DEPTH_WRITE; | ||||
|         } | ||||
|         else | ||||
|         { | ||||
|             srcLayout = RenderTextureLayout::RESOLVE_SOURCE; | ||||
|             dstLayout = RenderTextureLayout::RESOLVE_DEST; | ||||
|             // Set the original texture for MSAA textures as they always get resolved.
 | ||||
|             if (surface->sampleCount != RenderSampleCount::COUNT_1) | ||||
|             { | ||||
|                 SetTextureInRenderThread(i, args.texture); | ||||
|                 g_pendingMsaaResolves.emplace(surface); | ||||
|             } | ||||
|             else | ||||
|             { | ||||
|                 SetSurface(i, surface); | ||||
|             } | ||||
|         } | ||||
|     } | ||||
|     else | ||||
|     { | ||||
|         srcLayout = RenderTextureLayout::COPY_SOURCE; | ||||
|         dstLayout = RenderTextureLayout::COPY_DEST; | ||||
|     } | ||||
| 
 | ||||
|     AddBarrier(surface, srcLayout); | ||||
|     AddBarrier(args.texture, dstLayout); | ||||
|     FlushBarriers(); | ||||
| 
 | ||||
|     auto& commandList = g_commandLists[g_frame]; | ||||
|     if (multiSampling) | ||||
|     { | ||||
|         if (isDepthStencil) | ||||
|         { | ||||
|             uint32_t pipelineIndex = 0; | ||||
| 
 | ||||
|             switch (g_depthStencil->sampleCount) | ||||
|             { | ||||
|             case RenderSampleCount::COUNT_2: | ||||
|                 pipelineIndex = 0; | ||||
|                 break; | ||||
|             case RenderSampleCount::COUNT_4: | ||||
|                 pipelineIndex = 1; | ||||
|                 break; | ||||
|             case RenderSampleCount::COUNT_8: | ||||
|                 pipelineIndex = 2; | ||||
|                 break; | ||||
|             default: | ||||
|                 assert(false && "Unsupported MSAA sample count"); | ||||
|                 break; | ||||
|             } | ||||
| 
 | ||||
|             if (args.texture->framebuffer == nullptr) | ||||
|             { | ||||
|                 RenderFramebufferDesc desc; | ||||
|                 desc.depthAttachment = args.texture->texture; | ||||
|                 args.texture->framebuffer = g_device->createFramebuffer(desc); | ||||
|             } | ||||
| 
 | ||||
|             if (g_framebuffer != args.texture->framebuffer.get()) | ||||
|             { | ||||
|                 commandList->setFramebuffer(args.texture->framebuffer.get()); | ||||
|                 g_framebuffer = args.texture->framebuffer.get(); | ||||
|             } | ||||
| 
 | ||||
|             bool oldHalfPixel = SetHalfPixel(false); | ||||
|             FlushViewport(); | ||||
| 
 | ||||
|             commandList->setPipeline(g_resolveMsaaDepthPipelines[pipelineIndex].get()); | ||||
|             commandList->setGraphicsPushConstants(0, &g_depthStencil->descriptorIndex, 0, sizeof(uint32_t)); | ||||
|             commandList->drawInstanced(6, 1, 0, 0); | ||||
| 
 | ||||
|             g_dirtyStates.renderTargetAndDepthStencil = true; | ||||
|             g_dirtyStates.pipelineState = true; | ||||
| 
 | ||||
|             if (g_vulkan) | ||||
|             { | ||||
|                 g_dirtyStates.depthBias = true; // Static depth bias in MSAA pipeline invalidates dynamic depth bias.
 | ||||
|                 g_dirtyStates.vertexShaderConstants = true; | ||||
|             } | ||||
| 
 | ||||
|             SetHalfPixel(oldHalfPixel); | ||||
|         } | ||||
|         else | ||||
|         { | ||||
|             commandList->resolveTexture(args.texture->texture, surface->texture); | ||||
|         } | ||||
|     } | ||||
|     else | ||||
|     { | ||||
|         commandList->copyTexture(args.texture->texture, surface->texture); | ||||
|     } | ||||
| 
 | ||||
|     AddBarrier(args.texture, RenderTextureLayout::SHADER_READ); | ||||
|     // Remember to clear later.
 | ||||
|     g_pendingSurfaceCopies.emplace(surface); | ||||
| } | ||||
| 
 | ||||
| static void SetDefaultViewport(GuestDevice* device, GuestSurface* surface) | ||||
|  | @ -3028,6 +2962,170 @@ static void ProcSetDepthStencilSurface(const RenderCommand& cmd) | |||
|     SetDirtyValue(g_dirtyStates.pipelineState, g_pipelineState.depthStencilFormat, args.depthStencil != nullptr ? args.depthStencil->format : RenderFormat::UNKNOWN); | ||||
| } | ||||
| 
 | ||||
| static bool PopulateBarriersForStretchRect(GuestSurface* renderTarget, GuestSurface* depthStencil) | ||||
| { | ||||
|     bool addedAny = false; | ||||
| 
 | ||||
|     for (const auto surface : { renderTarget, depthStencil }) | ||||
|     { | ||||
|         if (surface != nullptr && !surface->destinationTextures.empty()) | ||||
|         { | ||||
|             const bool multiSampling = surface->sampleCount != RenderSampleCount::COUNT_1; | ||||
| 
 | ||||
|             RenderTextureLayout srcLayout; | ||||
|             RenderTextureLayout dstLayout; | ||||
| 
 | ||||
|             if (multiSampling) | ||||
|             { | ||||
|                 if (surface == depthStencil) | ||||
|                 { | ||||
|                     srcLayout = RenderTextureLayout::SHADER_READ; | ||||
|                     dstLayout = RenderTextureLayout::DEPTH_WRITE; | ||||
|                 } | ||||
|                 else | ||||
|                 { | ||||
|                     srcLayout = RenderTextureLayout::RESOLVE_SOURCE; | ||||
|                     dstLayout = RenderTextureLayout::RESOLVE_DEST; | ||||
|                 } | ||||
|             } | ||||
|             else | ||||
|             { | ||||
|                 srcLayout = RenderTextureLayout::COPY_SOURCE; | ||||
|                 dstLayout = RenderTextureLayout::COPY_DEST; | ||||
|             } | ||||
| 
 | ||||
|             AddBarrier(surface, srcLayout); | ||||
| 
 | ||||
|             for (const auto texture : surface->destinationTextures) | ||||
|                 AddBarrier(texture, dstLayout); | ||||
| 
 | ||||
|             addedAny = true; | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     return addedAny; | ||||
| } | ||||
| 
 | ||||
| static void ExecutePendingStretchRectCommands(GuestSurface* renderTarget, GuestSurface* depthStencil) | ||||
| { | ||||
|     auto& commandList = g_commandLists[g_frame]; | ||||
| 
 | ||||
|     for (const auto surface : { renderTarget, depthStencil }) | ||||
|     { | ||||
|         if (surface != nullptr && !surface->destinationTextures.empty()) | ||||
|         { | ||||
|             const bool multiSampling = surface->sampleCount != RenderSampleCount::COUNT_1; | ||||
| 
 | ||||
|             for (const auto texture : surface->destinationTextures) | ||||
|             { | ||||
|                 if (multiSampling) | ||||
|                 { | ||||
|                     if (surface == depthStencil) | ||||
|                     { | ||||
|                         uint32_t pipelineIndex = 0; | ||||
| 
 | ||||
|                         switch (surface->sampleCount) | ||||
|                         { | ||||
|                         case RenderSampleCount::COUNT_2: | ||||
|                             pipelineIndex = 0; | ||||
|                             break; | ||||
|                         case RenderSampleCount::COUNT_4: | ||||
|                             pipelineIndex = 1; | ||||
|                             break; | ||||
|                         case RenderSampleCount::COUNT_8: | ||||
|                             pipelineIndex = 2; | ||||
|                             break; | ||||
|                         default: | ||||
|                             assert(false && "Unsupported MSAA sample count"); | ||||
|                             break; | ||||
|                         } | ||||
| 
 | ||||
|                         if (texture->framebuffer == nullptr) | ||||
|                         { | ||||
|                             RenderFramebufferDesc desc; | ||||
|                             desc.depthAttachment = texture->texture; | ||||
|                             texture->framebuffer = g_device->createFramebuffer(desc); | ||||
|                         } | ||||
| 
 | ||||
|                         if (g_framebuffer != texture->framebuffer.get()) | ||||
|                         { | ||||
|                             commandList->setFramebuffer(texture->framebuffer.get()); | ||||
|                             g_framebuffer = texture->framebuffer.get(); | ||||
|                         } | ||||
| 
 | ||||
|                         commandList->setPipeline(g_resolveMsaaDepthPipelines[pipelineIndex].get()); | ||||
|                         commandList->setViewports(RenderViewport(0.0f, 0.0f, float(texture->width), float(texture->height), 0.0f, 1.0f)); | ||||
|                         commandList->setScissors(RenderRect(0, 0, texture->width, texture->height)); | ||||
|                         commandList->setGraphicsPushConstants(0, &surface->descriptorIndex, 0, sizeof(uint32_t)); | ||||
|                         commandList->drawInstanced(6, 1, 0, 0); | ||||
| 
 | ||||
|                         g_dirtyStates.renderTargetAndDepthStencil = true; | ||||
|                         g_dirtyStates.viewport = true; | ||||
|                         g_dirtyStates.pipelineState = true; | ||||
|                         g_dirtyStates.scissorRect = true; | ||||
| 
 | ||||
|                         if (g_vulkan) | ||||
|                         { | ||||
|                             g_dirtyStates.depthBias = true; // Static depth bias in MSAA pipeline invalidates dynamic depth bias.
 | ||||
|                             g_dirtyStates.vertexShaderConstants = true; | ||||
|                         } | ||||
|                     } | ||||
|                     else | ||||
|                     { | ||||
|                         commandList->resolveTexture(texture->texture, surface->texture); | ||||
|                     } | ||||
|                 } | ||||
|                 else | ||||
|                 { | ||||
|                     commandList->copyTexture(texture->texture, surface->texture); | ||||
|                 } | ||||
| 
 | ||||
|                 texture->sourceSurface = nullptr; | ||||
| 
 | ||||
|                 // Check if any texture slots had this texture assigned, and make it point back at the original texture.
 | ||||
|                 for (uint32_t i = 0; i < std::size(g_textures); i++) | ||||
|                 { | ||||
|                     if (g_textures[i] == texture) | ||||
|                         SetTextureInRenderThread(i, texture); | ||||
|                 } | ||||
|             } | ||||
| 
 | ||||
|             surface->destinationTextures.clear(); | ||||
|         } | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| static void ProcExecutePendingStretchRectCommands(const RenderCommand& cmd) | ||||
| { | ||||
|     bool foundAny = false; | ||||
| 
 | ||||
|     for (const auto surface : g_pendingSurfaceCopies) | ||||
|     { | ||||
|         // Depth stencil textures in this game are guaranteed to be transient.
 | ||||
|         if (surface->format != RenderFormat::D32_FLOAT) | ||||
|             foundAny |= PopulateBarriersForStretchRect(surface, nullptr); | ||||
|     } | ||||
| 
 | ||||
|     if (foundAny) | ||||
|     { | ||||
|         FlushBarriers(); | ||||
| 
 | ||||
|         for (const auto surface : g_pendingSurfaceCopies) | ||||
|         { | ||||
|             if (surface->format != RenderFormat::D32_FLOAT) | ||||
|                 ExecutePendingStretchRectCommands(surface, nullptr); | ||||
| 
 | ||||
|             for (const auto texture : surface->destinationTextures) | ||||
|                 texture->sourceSurface = nullptr; | ||||
| 
 | ||||
|             surface->destinationTextures.clear(); | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     g_pendingSurfaceCopies.clear(); | ||||
|     g_pendingMsaaResolves.clear(); | ||||
| } | ||||
| 
 | ||||
| static void SetFramebuffer(GuestSurface* renderTarget, GuestSurface* depthStencil, bool settingForClear) | ||||
| { | ||||
|     if (settingForClear || g_dirtyStates.renderTargetAndDepthStencil) | ||||
|  | @ -3106,6 +3204,12 @@ static void ProcClear(const RenderCommand& cmd) | |||
| { | ||||
|     const auto& args = cmd.clear; | ||||
| 
 | ||||
|     if (PopulateBarriersForStretchRect(g_renderTarget, g_depthStencil)) | ||||
|     { | ||||
|         FlushBarriers(); | ||||
|         ExecutePendingStretchRectCommands(g_renderTarget, g_depthStencil); | ||||
|     } | ||||
| 
 | ||||
|     AddBarrier(g_renderTarget, RenderTextureLayout::COLOR_WRITE); | ||||
|     AddBarrier(g_depthStencil, RenderTextureLayout::DEPTH_WRITE); | ||||
|     FlushBarriers(); | ||||
|  | @ -3194,22 +3298,55 @@ static void SetTexture(GuestDevice* device, uint32_t index, GuestTexture* textur | |||
|     g_renderQueue.enqueue(cmd); | ||||
| } | ||||
| 
 | ||||
| static void SetTextureInRenderThread(uint32_t index, GuestTexture* texture) | ||||
| { | ||||
|     AddBarrier(texture, RenderTextureLayout::SHADER_READ); | ||||
| 
 | ||||
|     auto viewDimension = texture != nullptr ? texture->viewDimension : RenderTextureViewDimension::UNKNOWN; | ||||
| 
 | ||||
|     SetDirtyValue(g_dirtyStates.sharedConstants, g_sharedConstants.texture2DIndices[index], | ||||
|         viewDimension == RenderTextureViewDimension::TEXTURE_2D ? texture->descriptorIndex : TEXTURE_DESCRIPTOR_NULL_TEXTURE_2D); | ||||
| 
 | ||||
|     SetDirtyValue(g_dirtyStates.sharedConstants, g_sharedConstants.texture3DIndices[index], texture != nullptr && | ||||
|         viewDimension == RenderTextureViewDimension::TEXTURE_3D ? texture->descriptorIndex : TEXTURE_DESCRIPTOR_NULL_TEXTURE_3D); | ||||
| 
 | ||||
|     SetDirtyValue(g_dirtyStates.sharedConstants, g_sharedConstants.textureCubeIndices[index], texture != nullptr && | ||||
|         viewDimension == RenderTextureViewDimension::TEXTURE_CUBE ? texture->descriptorIndex : TEXTURE_DESCRIPTOR_NULL_TEXTURE_CUBE); | ||||
| } | ||||
| 
 | ||||
| static void SetSurface(uint32_t index, GuestSurface* surface) | ||||
| { | ||||
|     AddBarrier(surface, RenderTextureLayout::SHADER_READ); | ||||
| 
 | ||||
|     SetDirtyValue(g_dirtyStates.sharedConstants, g_sharedConstants.texture2DIndices[index], surface->descriptorIndex); | ||||
|     SetDirtyValue(g_dirtyStates.sharedConstants, g_sharedConstants.texture3DIndices[index], uint32_t(TEXTURE_DESCRIPTOR_NULL_TEXTURE_3D)); | ||||
|     SetDirtyValue(g_dirtyStates.sharedConstants, g_sharedConstants.textureCubeIndices[index], uint32_t(TEXTURE_DESCRIPTOR_NULL_TEXTURE_CUBE)); | ||||
| } | ||||
| 
 | ||||
| static void ProcSetTexture(const RenderCommand& cmd) | ||||
| { | ||||
|     const auto& args = cmd.setTexture; | ||||
| 
 | ||||
|     AddBarrier(args.texture, RenderTextureLayout::SHADER_READ); | ||||
| 
 | ||||
|     auto viewDimension = args.texture != nullptr ? args.texture->viewDimension : RenderTextureViewDimension::UNKNOWN; | ||||
| 
 | ||||
|     SetDirtyValue(g_dirtyStates.sharedConstants, g_sharedConstants.texture2DIndices[args.index], | ||||
|         viewDimension == RenderTextureViewDimension::TEXTURE_2D ? args.texture->descriptorIndex : TEXTURE_DESCRIPTOR_NULL_TEXTURE_2D); | ||||
| 
 | ||||
|     SetDirtyValue(g_dirtyStates.sharedConstants, g_sharedConstants.texture3DIndices[args.index], args.texture != nullptr && | ||||
|         viewDimension == RenderTextureViewDimension::TEXTURE_3D ? args.texture->descriptorIndex : TEXTURE_DESCRIPTOR_NULL_TEXTURE_3D);  | ||||
|     // If a pending copy operation is detected, set the source surface. The indices will be fixed later if flushing is necessary.
 | ||||
|     bool shouldSetTexture = true; | ||||
|     if (args.texture != nullptr && args.texture->sourceSurface != nullptr) | ||||
|     { | ||||
|         // MSAA surfaces need to be resolved and cannot be used directly.
 | ||||
|         if (args.texture->sourceSurface->sampleCount != RenderSampleCount::COUNT_1) | ||||
|         { | ||||
|             g_pendingMsaaResolves.emplace(args.texture->sourceSurface); | ||||
|         } | ||||
|         else | ||||
|         { | ||||
|             SetSurface(args.index, args.texture->sourceSurface); | ||||
|             shouldSetTexture = false; | ||||
|         } | ||||
|     } | ||||
|      | ||||
|     SetDirtyValue(g_dirtyStates.sharedConstants, g_sharedConstants.textureCubeIndices[args.index], args.texture != nullptr && | ||||
|         viewDimension == RenderTextureViewDimension::TEXTURE_CUBE ? args.texture->descriptorIndex : TEXTURE_DESCRIPTOR_NULL_TEXTURE_CUBE); | ||||
|     if (shouldSetTexture) | ||||
|         SetTextureInRenderThread(args.index, args.texture); | ||||
|      | ||||
|     g_textures[args.index] = args.texture; | ||||
| } | ||||
| 
 | ||||
| static void SetScissorRect(GuestDevice* device, GuestRect* rect) | ||||
|  | @ -3815,9 +3952,34 @@ static void FlushRenderStateForRenderThread() | |||
|     auto renderTarget = g_pipelineState.colorWriteEnable ? g_renderTarget : nullptr; | ||||
|     auto depthStencil = g_pipelineState.zEnable ? g_depthStencil : nullptr; | ||||
| 
 | ||||
|     bool foundAny = PopulateBarriersForStretchRect(renderTarget, depthStencil); | ||||
| 
 | ||||
|     for (const auto surface : g_pendingMsaaResolves) | ||||
|     { | ||||
|         bool isDepthStencil = (surface->format == RenderFormat::D32_FLOAT); | ||||
|         foundAny |= PopulateBarriersForStretchRect(isDepthStencil ? nullptr : surface, isDepthStencil ? surface : nullptr); | ||||
|     } | ||||
| 
 | ||||
|     if (foundAny) | ||||
|     { | ||||
|         FlushBarriers(); | ||||
|         ExecutePendingStretchRectCommands(renderTarget, depthStencil); | ||||
| 
 | ||||
|         for (const auto surface : g_pendingMsaaResolves) | ||||
|         { | ||||
|             bool isDepthStencil = (surface->format == RenderFormat::D32_FLOAT); | ||||
|             ExecutePendingStretchRectCommands(isDepthStencil ? nullptr : surface, isDepthStencil ? surface : nullptr); | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     if (!g_pendingMsaaResolves.empty()) | ||||
|         g_pendingMsaaResolves.clear(); | ||||
| 
 | ||||
|     AddBarrier(renderTarget, RenderTextureLayout::COLOR_WRITE); | ||||
|     AddBarrier(depthStencil, RenderTextureLayout::DEPTH_WRITE); | ||||
| 
 | ||||
|     FlushBarriers(); | ||||
| 
 | ||||
|     SetFramebuffer(renderTarget, depthStencil, false); | ||||
|     FlushViewport(); | ||||
| 
 | ||||
|  | @ -4580,35 +4742,36 @@ static std::thread g_renderThread([] | |||
|                 auto& cmd = commands[i]; | ||||
|                 switch (cmd.type) | ||||
|                 { | ||||
|                 case RenderCommandType::SetRenderState:           ProcSetRenderState(cmd); break; | ||||
|                 case RenderCommandType::DestructResource:         ProcDestructResource(cmd); break; | ||||
|                 case RenderCommandType::UnlockTextureRect:        ProcUnlockTextureRect(cmd); break; | ||||
|                 case RenderCommandType::UnlockBuffer16:           ProcUnlockBuffer16(cmd); break; | ||||
|                 case RenderCommandType::UnlockBuffer32:           ProcUnlockBuffer32(cmd); break; | ||||
|                 case RenderCommandType::DrawImGui:                ProcDrawImGui(cmd); break; | ||||
|                 case RenderCommandType::ExecuteCommandList:       ProcExecuteCommandList(cmd); break; | ||||
|                 case RenderCommandType::BeginCommandList:         ProcBeginCommandList(cmd); break; | ||||
|                 case RenderCommandType::StretchRect:              ProcStretchRect(cmd); break; | ||||
|                 case RenderCommandType::SetRenderTarget:          ProcSetRenderTarget(cmd); break; | ||||
|                 case RenderCommandType::SetDepthStencilSurface:   ProcSetDepthStencilSurface(cmd); break; | ||||
|                 case RenderCommandType::Clear:                    ProcClear(cmd); break; | ||||
|                 case RenderCommandType::SetViewport:              ProcSetViewport(cmd); break; | ||||
|                 case RenderCommandType::SetTexture:               ProcSetTexture(cmd); break; | ||||
|                 case RenderCommandType::SetScissorRect:           ProcSetScissorRect(cmd); break; | ||||
|                 case RenderCommandType::SetSamplerState:          ProcSetSamplerState(cmd); break; | ||||
|                 case RenderCommandType::SetBooleans:              ProcSetBooleans(cmd); break; | ||||
|                 case RenderCommandType::SetVertexShaderConstants: ProcSetVertexShaderConstants(cmd); break; | ||||
|                 case RenderCommandType::SetPixelShaderConstants:  ProcSetPixelShaderConstants(cmd); break; | ||||
|                 case RenderCommandType::AddPipeline:              ProcAddPipeline(cmd); break; | ||||
|                 case RenderCommandType::DrawPrimitive:            ProcDrawPrimitive(cmd); break; | ||||
|                 case RenderCommandType::DrawIndexedPrimitive:     ProcDrawIndexedPrimitive(cmd); break; | ||||
|                 case RenderCommandType::DrawPrimitiveUP:          ProcDrawPrimitiveUP(cmd); break; | ||||
|                 case RenderCommandType::SetVertexDeclaration:     ProcSetVertexDeclaration(cmd); break; | ||||
|                 case RenderCommandType::SetVertexShader:          ProcSetVertexShader(cmd); break; | ||||
|                 case RenderCommandType::SetStreamSource:          ProcSetStreamSource(cmd); break; | ||||
|                 case RenderCommandType::SetIndices:               ProcSetIndices(cmd); break; | ||||
|                 case RenderCommandType::SetPixelShader:           ProcSetPixelShader(cmd); break; | ||||
|                 default:                                          assert(false && "Unrecognized render command type."); break; | ||||
|                 case RenderCommandType::SetRenderState:                    ProcSetRenderState(cmd); break; | ||||
|                 case RenderCommandType::DestructResource:                  ProcDestructResource(cmd); break; | ||||
|                 case RenderCommandType::UnlockTextureRect:                 ProcUnlockTextureRect(cmd); break; | ||||
|                 case RenderCommandType::UnlockBuffer16:                    ProcUnlockBuffer16(cmd); break; | ||||
|                 case RenderCommandType::UnlockBuffer32:                    ProcUnlockBuffer32(cmd); break; | ||||
|                 case RenderCommandType::DrawImGui:                         ProcDrawImGui(cmd); break; | ||||
|                 case RenderCommandType::ExecuteCommandList:                ProcExecuteCommandList(cmd); break; | ||||
|                 case RenderCommandType::BeginCommandList:                  ProcBeginCommandList(cmd); break; | ||||
|                 case RenderCommandType::StretchRect:                       ProcStretchRect(cmd); break; | ||||
|                 case RenderCommandType::SetRenderTarget:                   ProcSetRenderTarget(cmd); break; | ||||
|                 case RenderCommandType::SetDepthStencilSurface:            ProcSetDepthStencilSurface(cmd); break; | ||||
|                 case RenderCommandType::ExecutePendingStretchRectCommands: ProcExecutePendingStretchRectCommands(cmd); break; | ||||
|                 case RenderCommandType::Clear:                             ProcClear(cmd); break; | ||||
|                 case RenderCommandType::SetViewport:                       ProcSetViewport(cmd); break; | ||||
|                 case RenderCommandType::SetTexture:                        ProcSetTexture(cmd); break; | ||||
|                 case RenderCommandType::SetScissorRect:                    ProcSetScissorRect(cmd); break; | ||||
|                 case RenderCommandType::SetSamplerState:                   ProcSetSamplerState(cmd); break; | ||||
|                 case RenderCommandType::SetBooleans:                       ProcSetBooleans(cmd); break; | ||||
|                 case RenderCommandType::SetVertexShaderConstants:          ProcSetVertexShaderConstants(cmd); break; | ||||
|                 case RenderCommandType::SetPixelShaderConstants:           ProcSetPixelShaderConstants(cmd); break; | ||||
|                 case RenderCommandType::AddPipeline:                       ProcAddPipeline(cmd); break; | ||||
|                 case RenderCommandType::DrawPrimitive:                     ProcDrawPrimitive(cmd); break; | ||||
|                 case RenderCommandType::DrawIndexedPrimitive:              ProcDrawIndexedPrimitive(cmd); break; | ||||
|                 case RenderCommandType::DrawPrimitiveUP:                   ProcDrawPrimitiveUP(cmd); break; | ||||
|                 case RenderCommandType::SetVertexDeclaration:              ProcSetVertexDeclaration(cmd); break; | ||||
|                 case RenderCommandType::SetVertexShader:                   ProcSetVertexShader(cmd); break; | ||||
|                 case RenderCommandType::SetStreamSource:                   ProcSetStreamSource(cmd); break; | ||||
|                 case RenderCommandType::SetIndices:                        ProcSetIndices(cmd); break; | ||||
|                 case RenderCommandType::SetPixelShader:                    ProcSetPixelShader(cmd); break; | ||||
|                 default:                                                   assert(false && "Unrecognized render command type."); break; | ||||
|                 } | ||||
|             } | ||||
| 
 | ||||
|  | @ -6772,6 +6935,52 @@ PPC_FUNC(sub_825E2F78) | |||
|     __imp__sub_825E2F78(ctx, base); | ||||
| } | ||||
| 
 | ||||
| // Game shares surfaces with identical descriptions. We don't want to share shadow maps,
 | ||||
| // so we can set its format to a depth format that still resolves to the same type in recomp,
 | ||||
| // but manages to keep the surfaces actually separated in guest code.
 | ||||
| void FxShadowMapInitMidAsmHook(PPCRegister& r11) | ||||
| { | ||||
|     uint8_t* base = g_memory.base; | ||||
| 
 | ||||
|     uint32_t surface = PPC_LOAD_U32(PPC_LOAD_U32(PPC_LOAD_U32(r11.u32 + 0x24) + 0x4)); | ||||
|     PPC_STORE_U32(surface + 0x20, D3DFMT_D24FS8); | ||||
| } | ||||
| 
 | ||||
| // Re-render objects in the terrain shadow map instead of copying the texture.
 | ||||
| static bool g_jumpOverStretchRect; | ||||
| 
 | ||||
| void FxShadowMapNoTerrainMidAsmHook(PPCRegister& r4, PPCRegister& r30) | ||||
| { | ||||
|     // Set the no terrain shadow map as the render target.
 | ||||
|     uint8_t* base = g_memory.base; | ||||
|     r4.u64 = PPC_LOAD_U32(r30.u32 + 0x58); | ||||
| } | ||||
| 
 | ||||
| bool FxShadowMapMidAsmHook(PPCRegister& r4, PPCRegister& r5, PPCRegister& r6, PPCRegister& r30) | ||||
| { | ||||
|     if (g_jumpOverStretchRect) | ||||
|     { | ||||
|         // Reset for the next time shadow maps get rendered.
 | ||||
|         g_jumpOverStretchRect = false; | ||||
| 
 | ||||
|         // Jump over the stretch rect call.
 | ||||
|         return false; | ||||
|     } | ||||
|     else | ||||
|     { | ||||
|         // Mark to jump over the stretch call the next time.
 | ||||
|         g_jumpOverStretchRect = true; | ||||
| 
 | ||||
|         // Jump to the beginning. Set registers accordingly to set the terrain shadow map as the render target.
 | ||||
|         uint8_t* base = g_memory.base; | ||||
|         r6.u64 = 0; | ||||
|         r5.u64 = 0; | ||||
|         r4.u64 = PPC_LOAD_U32(r30.u32 + 0x50); | ||||
| 
 | ||||
|         return true; | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| GUEST_FUNCTION_HOOK(sub_82BD99B0, CreateDevice); | ||||
| 
 | ||||
| GUEST_FUNCTION_HOOK(sub_82BE6230, DestructResource); | ||||
|  |  | |||
|  | @ -158,6 +158,7 @@ struct GuestTexture : GuestBaseTexture | |||
|     void* mappedMemory = nullptr; | ||||
|     std::unique_ptr<RenderFramebuffer> framebuffer; | ||||
|     std::unique_ptr<GuestTexture> patchedTexture; | ||||
|     struct GuestSurface* sourceSurface = nullptr; | ||||
| }; | ||||
| 
 | ||||
| struct GuestLockedRect | ||||
|  | @ -205,6 +206,7 @@ struct GuestSurface : GuestBaseTexture | |||
|     uint32_t guestFormat = 0; | ||||
|     ankerl::unordered_dense::map<const RenderTexture*, std::unique_ptr<RenderFramebuffer>> framebuffers; | ||||
|     RenderSampleCounts sampleCount = RenderSampleCount::COUNT_1; | ||||
|     ankerl::unordered_dense::set<GuestTexture*> destinationTextures; | ||||
| }; | ||||
| 
 | ||||
| enum GuestDeclType | ||||
|  |  | |||
|  | @ -918,3 +918,21 @@ jump_address = 0x822C111C | |||
| [[midasm_hook]] | ||||
| name = "PressStartSaveLoadThreadMidAsmHook" | ||||
| address = 0x822C4358 | ||||
| 
 | ||||
| [[midasm_hook]] | ||||
| name = "FxShadowMapInitMidAsmHook" | ||||
| address = 0x82BAD8F4 | ||||
| registers = ["r11"] | ||||
| 
 | ||||
| [[midasm_hook]] | ||||
| name = "FxShadowMapNoTerrainMidAsmHook" | ||||
| address = 0x82BAD9EC | ||||
| registers = ["r4", "r30"] | ||||
| after_instruction = true | ||||
| 
 | ||||
| [[midasm_hook]] | ||||
| name = "FxShadowMapMidAsmHook" | ||||
| address = 0x82BADADC | ||||
| registers = ["r4", "r5", "r6", "r30"] | ||||
| jump_address_on_true = 0x82BAD9F0 | ||||
| jump_address_on_false = 0x82BADAFC | ||||
|  |  | |||
		Loading…
	
	Add table
		
		Reference in a new issue
	
	 Skyth (Asilkan)
						Skyth (Asilkan)