diff --git a/UnleashedRecomp/gpu/rhi/plume_d3d12.cpp b/UnleashedRecomp/gpu/rhi/plume_d3d12.cpp index 32aac2a8..a543b74e 100644 --- a/UnleashedRecomp/gpu/rhi/plume_d3d12.cpp +++ b/UnleashedRecomp/gpu/rhi/plume_d3d12.cpp @@ -27,6 +27,8 @@ //# define D3D12_DEBUG_LAYER_GPU_BASED_VALIDATION_ENABLED #endif +//#define D3D12_DEBUG_SET_STABLE_POWER_STATE + // Old Windows SDK versions don't provide this macro, so we workaround it by making sure it is defined. #ifndef D3D12_RESOURCE_STATE_ALL_SHADER_RESOURCE #define D3D12_RESOURCE_STATE_ALL_SHADER_RESOURCE (D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE | D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE) @@ -692,6 +694,20 @@ namespace plume { ); } + static D3D12_RESOLVE_MODE toD3D12(RenderResolveMode resolveMode) { + switch (resolveMode) { + case RenderResolveMode::MIN: + return D3D12_RESOLVE_MODE_MIN; + case RenderResolveMode::MAX: + return D3D12_RESOLVE_MODE_MAX; + case RenderResolveMode::AVERAGE: + return D3D12_RESOLVE_MODE_AVERAGE; + default: + assert(false && "Unknown resolve mode."); + return D3D12_RESOLVE_MODE_AVERAGE; + } + } + static void setObjectName(ID3D12Object *object, const std::string &name) { const std::wstring wideCharName = Utf8ToUtf16(name); object->SetName(wideCharName.c_str()); @@ -1916,7 +1932,7 @@ namespace plume { resetSamplePositions(); } - void D3D12CommandList::resolveTextureRegion(const RenderTexture *dstTexture, uint32_t dstX, uint32_t dstY, const RenderTexture *srcTexture, const RenderRect *srcRect) { + void D3D12CommandList::resolveTextureRegion(const RenderTexture *dstTexture, uint32_t dstX, uint32_t dstY, const RenderTexture *srcTexture, const RenderRect *srcRect, RenderResolveMode resolveMode) { assert(dstTexture != nullptr); assert(srcTexture != nullptr); @@ -1931,7 +1947,7 @@ namespace plume { } setSamplePositions(interfaceDstTexture); - d3d->ResolveSubresourceRegion(interfaceDstTexture->d3d, 0, dstX, dstY, interfaceSrcTexture->d3d, 0, (srcRect != nullptr) ? &rect : nullptr, toDXGI(interfaceDstTexture->desc.format), D3D12_RESOLVE_MODE_AVERAGE); + d3d->ResolveSubresourceRegion(interfaceDstTexture->d3d, 0, dstX, dstY, interfaceSrcTexture->d3d, 0, (srcRect != nullptr) ? &rect : nullptr, toDXGI(interfaceDstTexture->desc.format), toD3D12(resolveMode)); resetSamplePositions(); } @@ -3373,6 +3389,10 @@ namespace plume { return; } + #ifdef D3D12_DEBUG_SET_STABLE_POWER_STATE + d3d->SetStablePowerState(TRUE); + #endif + D3D12MA::ALLOCATOR_DESC allocatorDesc = {}; allocatorDesc.pDevice = d3d; allocatorDesc.pAdapter = adapter; diff --git a/UnleashedRecomp/gpu/rhi/plume_d3d12.h b/UnleashedRecomp/gpu/rhi/plume_d3d12.h index b1a8645f..291a7d47 100644 --- a/UnleashedRecomp/gpu/rhi/plume_d3d12.h +++ b/UnleashedRecomp/gpu/rhi/plume_d3d12.h @@ -192,7 +192,7 @@ namespace plume { void copyBuffer(const RenderBuffer *dstBuffer, const RenderBuffer *srcBuffer) override; void copyTexture(const RenderTexture *dstTexture, const RenderTexture *srcTexture) override; void resolveTexture(const RenderTexture *dstTexture, const RenderTexture *srcTexture) override; - void resolveTextureRegion(const RenderTexture *dstTexture, uint32_t dstX, uint32_t dstY, const RenderTexture *srcTexture, const RenderRect *srcRect) override; + void resolveTextureRegion(const RenderTexture *dstTexture, uint32_t dstX, uint32_t dstY, const RenderTexture *srcTexture, const RenderRect *srcRect, RenderResolveMode resolveMode) override; void buildBottomLevelAS(const RenderAccelerationStructure *dstAccelerationStructure, RenderBufferReference scratchBuffer, const RenderBottomLevelASBuildInfo &buildInfo) override; void buildTopLevelAS(const RenderAccelerationStructure *dstAccelerationStructure, RenderBufferReference scratchBuffer, RenderBufferReference instancesBuffer, const RenderTopLevelASBuildInfo &buildInfo) override; void discardTexture(const RenderTexture* texture) override; diff --git a/UnleashedRecomp/gpu/rhi/plume_render_interface.h b/UnleashedRecomp/gpu/rhi/plume_render_interface.h index ef2a5ed6..4b9e5339 100644 --- a/UnleashedRecomp/gpu/rhi/plume_render_interface.h +++ b/UnleashedRecomp/gpu/rhi/plume_render_interface.h @@ -143,7 +143,7 @@ namespace plume { virtual void copyBuffer(const RenderBuffer *dstBuffer, const RenderBuffer *srcBuffer) = 0; virtual void copyTexture(const RenderTexture *dstTexture, const RenderTexture *srcTexture) = 0; virtual void resolveTexture(const RenderTexture *dstTexture, const RenderTexture *srcTexture) = 0; - virtual void resolveTextureRegion(const RenderTexture *dstTexture, uint32_t dstX, uint32_t dstY, const RenderTexture *srcTexture, const RenderRect *srcRect = nullptr) = 0; + virtual void resolveTextureRegion(const RenderTexture *dstTexture, uint32_t dstX, uint32_t dstY, const RenderTexture *srcTexture, const RenderRect *srcRect = nullptr, RenderResolveMode resolveMode = RenderResolveMode::AVERAGE) = 0; virtual void buildBottomLevelAS(const RenderAccelerationStructure *dstAccelerationStructure, RenderBufferReference scratchBuffer, const RenderBottomLevelASBuildInfo &buildInfo) = 0; virtual void buildTopLevelAS(const RenderAccelerationStructure *dstAccelerationStructure, RenderBufferReference scratchBuffer, RenderBufferReference instancesBuffer, const RenderTopLevelASBuildInfo &buildInfo) = 0; virtual void discardTexture(const RenderTexture* texture) = 0; // D3D12 only. diff --git a/UnleashedRecomp/gpu/rhi/plume_render_interface_types.h b/UnleashedRecomp/gpu/rhi/plume_render_interface_types.h index 7352e863..b7551832 100644 --- a/UnleashedRecomp/gpu/rhi/plume_render_interface_types.h +++ b/UnleashedRecomp/gpu/rhi/plume_render_interface_types.h @@ -483,6 +483,12 @@ namespace plume { CPU }; + enum class RenderResolveMode { + MIN, + MAX, + AVERAGE + }; + // Global functions. constexpr uint32_t RenderFormatSize(RenderFormat format) { diff --git a/UnleashedRecomp/gpu/rhi/plume_vulkan.cpp b/UnleashedRecomp/gpu/rhi/plume_vulkan.cpp index 192c7ed5..1ff395a8 100644 --- a/UnleashedRecomp/gpu/rhi/plume_vulkan.cpp +++ b/UnleashedRecomp/gpu/rhi/plume_vulkan.cpp @@ -3074,12 +3074,13 @@ namespace plume { } void VulkanCommandList::resolveTexture(const RenderTexture *dstTexture, const RenderTexture *srcTexture) { - resolveTextureRegion(dstTexture, 0, 0, srcTexture, nullptr); + resolveTextureRegion(dstTexture, 0, 0, srcTexture, nullptr, RenderResolveMode::AVERAGE); } - void VulkanCommandList::resolveTextureRegion(const RenderTexture *dstTexture, uint32_t dstX, uint32_t dstY, const RenderTexture *srcTexture, const RenderRect *srcRect) { + void VulkanCommandList::resolveTextureRegion(const RenderTexture *dstTexture, uint32_t dstX, uint32_t dstY, const RenderTexture *srcTexture, const RenderRect *srcRect, RenderResolveMode resolveMode) { assert(dstTexture != nullptr); assert(srcTexture != nullptr); + assert(resolveMode == RenderResolveMode::AVERAGE && "Vulkan only supports AVERAGE resolve mode."); thread_local std::vector imageResolves; imageResolves.clear(); diff --git a/UnleashedRecomp/gpu/rhi/plume_vulkan.h b/UnleashedRecomp/gpu/rhi/plume_vulkan.h index 122ffcf3..469f0560 100644 --- a/UnleashedRecomp/gpu/rhi/plume_vulkan.h +++ b/UnleashedRecomp/gpu/rhi/plume_vulkan.h @@ -315,7 +315,7 @@ namespace plume { void copyBuffer(const RenderBuffer *dstBuffer, const RenderBuffer *srcBuffer) override; void copyTexture(const RenderTexture *dstTexture, const RenderTexture *srcTexture) override; void resolveTexture(const RenderTexture *dstTexture, const RenderTexture *srcTexture) override; - void resolveTextureRegion(const RenderTexture *dstTexture, uint32_t dstX, uint32_t dstY, const RenderTexture *srcTexture, const RenderRect *srcRect) override; + void resolveTextureRegion(const RenderTexture *dstTexture, uint32_t dstX, uint32_t dstY, const RenderTexture *srcTexture, const RenderRect *srcRect, RenderResolveMode resolveMode) override; void buildBottomLevelAS(const RenderAccelerationStructure *dstAccelerationStructure, RenderBufferReference scratchBuffer, const RenderBottomLevelASBuildInfo &buildInfo) override; void buildTopLevelAS(const RenderAccelerationStructure *dstAccelerationStructure, RenderBufferReference scratchBuffer, RenderBufferReference instancesBuffer, const RenderTopLevelASBuildInfo &buildInfo) override; void discardTexture(const RenderTexture* texture) override; diff --git a/UnleashedRecomp/gpu/video.cpp b/UnleashedRecomp/gpu/video.cpp index da8fb8df..fa692ee0 100644 --- a/UnleashedRecomp/gpu/video.cpp +++ b/UnleashedRecomp/gpu/video.cpp @@ -235,6 +235,9 @@ static bool g_vulkan = false; static constexpr bool g_vulkan = true; #endif +static constexpr bool g_hardwareResolve = true; +static constexpr bool g_hardwareDepthResolve = true; + static std::unique_ptr g_interface; static std::unique_ptr g_device; @@ -3024,10 +3027,35 @@ static bool PopulateBarriersForStretchRect(GuestSurface* renderTarget, GuestSurf { if (surface != nullptr && !surface->destinationTextures.empty()) { - AddBarrier(surface, RenderTextureLayout::SHADER_READ); + const bool multiSampling = surface->sampleCount != RenderSampleCount::COUNT_1; + + RenderTextureLayout srcLayout; + RenderTextureLayout dstLayout; + bool shaderResolve = true; + + if (multiSampling && g_hardwareResolve) + { + // Hardware depth resolve is only supported on D3D12 when programmable sample positions are available. + bool hardwareDepthResolveAvailable = g_hardwareDepthResolve && !g_vulkan && g_capabilities.sampleLocations; + + if (surface->format != RenderFormat::D32_FLOAT || hardwareDepthResolveAvailable) + { + srcLayout = RenderTextureLayout::RESOLVE_SOURCE; + dstLayout = RenderTextureLayout::RESOLVE_DEST; + shaderResolve = false; + } + } + + if (shaderResolve) + { + srcLayout = RenderTextureLayout::SHADER_READ; + dstLayout = (surface->format == RenderFormat::D32_FLOAT ? RenderTextureLayout::DEPTH_WRITE : RenderTextureLayout::COLOR_WRITE); + } + + AddBarrier(surface, srcLayout); for (const auto texture : surface->destinationTextures) - AddBarrier(texture, texture->format == RenderFormat::D32_FLOAT ? RenderTextureLayout::DEPTH_WRITE : RenderTextureLayout::COLOR_WRITE); + AddBarrier(texture, dstLayout); addedAny = true; } @@ -3048,113 +3076,133 @@ static void ExecutePendingStretchRectCommands(GuestSurface* renderTarget, GuestS for (const auto texture : surface->destinationTextures) { - RenderPipeline* pipeline = nullptr; + bool shaderResolve = true; - if (multiSampling) + if (multiSampling && g_hardwareResolve) { - uint32_t pipelineIndex = 0; + bool hardwareDepthResolveAvailable = g_hardwareDepthResolve && !g_vulkan && g_capabilities.sampleLocations; - switch (surface->sampleCount) + if (surface->format != RenderFormat::D32_FLOAT || hardwareDepthResolveAvailable) { - case RenderSampleCount::COUNT_2: - pipelineIndex = 0; - break; - case RenderSampleCount::COUNT_4: - pipelineIndex = 1; - break; - case RenderSampleCount::COUNT_8: - pipelineIndex = 2; - break; - default: - assert(false && "Unsupported MSAA sample count"); - break; - } + if (surface->format == RenderFormat::D32_FLOAT) + commandList->resolveTextureRegion(texture->texture, 0, 0, surface->texture, nullptr, RenderResolveMode::MIN); + else + commandList->resolveTexture(texture->texture, surface->texture); - if (texture->format == RenderFormat::D32_FLOAT) - { - pipeline = g_resolveMsaaDepthPipelines[pipelineIndex].get(); + shaderResolve = false; } - else + } + + if (shaderResolve) + { + RenderPipeline* pipeline = nullptr; + + if (multiSampling) { - auto& resolveMsaaColorPipeline = g_resolveMsaaColorPipelines[surface->format][pipelineIndex]; - if (resolveMsaaColorPipeline == nullptr) + uint32_t pipelineIndex = 0; + + switch (surface->sampleCount) { - RenderGraphicsPipelineDesc desc; - desc.pipelineLayout = g_pipelineLayout.get(); - desc.vertexShader = g_copyShader.get(); - desc.pixelShader = g_resolveMsaaColorShaders[pipelineIndex].get(); - desc.renderTargetFormat[0] = texture->format; - desc.renderTargetBlend[0] = RenderBlendDesc::Copy(); - desc.renderTargetCount = 1; - resolveMsaaColorPipeline = g_device->createGraphicsPipeline(desc); + case RenderSampleCount::COUNT_2: + pipelineIndex = 0; + break; + case RenderSampleCount::COUNT_4: + pipelineIndex = 1; + break; + case RenderSampleCount::COUNT_8: + pipelineIndex = 2; + break; + default: + assert(false && "Unsupported MSAA sample count"); + break; } - pipeline = resolveMsaaColorPipeline.get(); - } - } - else - { - if (texture->format == RenderFormat::D32_FLOAT) - { - pipeline = g_copyDepthPipeline.get(); - } - else - { - auto& copyColorPipeline = g_copyColorPipelines[surface->format]; - if (copyColorPipeline == nullptr) + if (texture->format == RenderFormat::D32_FLOAT) { - RenderGraphicsPipelineDesc desc; - desc.pipelineLayout = g_pipelineLayout.get(); - desc.vertexShader = g_copyShader.get(); - desc.pixelShader = g_copyColorShader.get(); - desc.renderTargetFormat[0] = texture->format; - desc.renderTargetBlend[0] = RenderBlendDesc::Copy(); - desc.renderTargetCount = 1; - copyColorPipeline = g_device->createGraphicsPipeline(desc); + pipeline = g_resolveMsaaDepthPipelines[pipelineIndex].get(); } + else + { + auto& resolveMsaaColorPipeline = g_resolveMsaaColorPipelines[surface->format][pipelineIndex]; + if (resolveMsaaColorPipeline == nullptr) + { + RenderGraphicsPipelineDesc desc; + desc.pipelineLayout = g_pipelineLayout.get(); + desc.vertexShader = g_copyShader.get(); + desc.pixelShader = g_resolveMsaaColorShaders[pipelineIndex].get(); + desc.renderTargetFormat[0] = texture->format; + desc.renderTargetBlend[0] = RenderBlendDesc::Copy(); + desc.renderTargetCount = 1; + resolveMsaaColorPipeline = g_device->createGraphicsPipeline(desc); + } - pipeline = copyColorPipeline.get(); - } - } - - if (texture->framebuffer == nullptr) - { - if (texture->format == RenderFormat::D32_FLOAT) - { - RenderFramebufferDesc desc; - desc.depthAttachment = texture->texture; - texture->framebuffer = g_device->createFramebuffer(desc); + pipeline = resolveMsaaColorPipeline.get(); + } } else { - RenderFramebufferDesc desc; - desc.colorAttachments = const_cast(&texture->texture); - desc.colorAttachmentsCount = 1; - texture->framebuffer = g_device->createFramebuffer(desc); + if (texture->format == RenderFormat::D32_FLOAT) + { + pipeline = g_copyDepthPipeline.get(); + } + else + { + auto& copyColorPipeline = g_copyColorPipelines[surface->format]; + if (copyColorPipeline == nullptr) + { + RenderGraphicsPipelineDesc desc; + desc.pipelineLayout = g_pipelineLayout.get(); + desc.vertexShader = g_copyShader.get(); + desc.pixelShader = g_copyColorShader.get(); + desc.renderTargetFormat[0] = texture->format; + desc.renderTargetBlend[0] = RenderBlendDesc::Copy(); + desc.renderTargetCount = 1; + copyColorPipeline = g_device->createGraphicsPipeline(desc); + } + + pipeline = copyColorPipeline.get(); + } } - } - if (g_framebuffer != texture->framebuffer.get()) - { - commandList->setFramebuffer(texture->framebuffer.get()); - g_framebuffer = texture->framebuffer.get(); - } + if (texture->framebuffer == nullptr) + { + if (texture->format == RenderFormat::D32_FLOAT) + { + RenderFramebufferDesc desc; + desc.depthAttachment = texture->texture; + texture->framebuffer = g_device->createFramebuffer(desc); + } + else + { + RenderFramebufferDesc desc; + desc.colorAttachments = const_cast(&texture->texture); + desc.colorAttachmentsCount = 1; + texture->framebuffer = g_device->createFramebuffer(desc); + } + } - commandList->setPipeline(pipeline); - commandList->setViewports(RenderViewport(0.0f, 0.0f, float(texture->width), float(texture->height), 0.0f, 1.0f)); - commandList->setScissors(RenderRect(0, 0, texture->width, texture->height)); - commandList->setGraphicsPushConstants(0, &surface->descriptorIndex, 0, sizeof(uint32_t)); - commandList->drawInstanced(6, 1, 0, 0); + if (g_framebuffer != texture->framebuffer.get()) + { + commandList->setFramebuffer(texture->framebuffer.get()); + g_framebuffer = texture->framebuffer.get(); + } - g_dirtyStates.renderTargetAndDepthStencil = true; - g_dirtyStates.viewport = true; - g_dirtyStates.pipelineState = true; - g_dirtyStates.scissorRect = true; + commandList->setPipeline(pipeline); + commandList->setViewports(RenderViewport(0.0f, 0.0f, float(texture->width), float(texture->height), 0.0f, 1.0f)); + commandList->setScissors(RenderRect(0, 0, texture->width, texture->height)); + commandList->setGraphicsPushConstants(0, &surface->descriptorIndex, 0, sizeof(uint32_t)); + commandList->drawInstanced(6, 1, 0, 0); - if (g_vulkan) - { - g_dirtyStates.vertexShaderConstants = true; // The push constant call invalidates vertex shader constants. - g_dirtyStates.depthBias = true; // Static depth bias in copy pipeline invalidates dynamic depth bias. + g_dirtyStates.renderTargetAndDepthStencil = true; + g_dirtyStates.viewport = true; + g_dirtyStates.pipelineState = true; + g_dirtyStates.scissorRect = true; + + if (g_vulkan) + { + g_dirtyStates.vertexShaderConstants = true; // The push constant call invalidates vertex shader constants. + g_dirtyStates.depthBias = true; // Static depth bias in copy pipeline invalidates dynamic depth bias. + } } texture->sourceSurface = nullptr;