diff --git a/UnleashedRecomp/CMakeLists.txt b/UnleashedRecomp/CMakeLists.txt index c44c4b35..974345ba 100644 --- a/UnleashedRecomp/CMakeLists.txt +++ b/UnleashedRecomp/CMakeLists.txt @@ -414,7 +414,9 @@ function(compile_pixel_shader FILE_PATH) endfunction() compile_pixel_shader(blend_color_alpha_ps) -compile_vertex_shader(copy_vs) +compile_vertex_shader(copy_vs) +compile_pixel_shader(copy_color_ps) +compile_pixel_shader(copy_depth_ps) compile_pixel_shader(csd_filter_ps) compile_vertex_shader(csd_no_tex_vs) compile_vertex_shader(csd_vs) @@ -427,7 +429,10 @@ compile_pixel_shader(gamma_correction_ps) compile_pixel_shader(imgui_ps) compile_vertex_shader(imgui_vs) compile_pixel_shader(movie_ps) -compile_vertex_shader(movie_vs) +compile_vertex_shader(movie_vs) +compile_pixel_shader(resolve_msaa_color_2x) +compile_pixel_shader(resolve_msaa_color_4x) +compile_pixel_shader(resolve_msaa_color_8x) compile_pixel_shader(resolve_msaa_depth_2x) compile_pixel_shader(resolve_msaa_depth_4x) compile_pixel_shader(resolve_msaa_depth_8x) diff --git a/UnleashedRecomp/gpu/shader/copy_color_ps.hlsl b/UnleashedRecomp/gpu/shader/copy_color_ps.hlsl new file mode 100644 index 00000000..0559557a --- /dev/null +++ b/UnleashedRecomp/gpu/shader/copy_color_ps.hlsl @@ -0,0 +1,8 @@ +#include "copy_common.hlsli" + +Texture2D g_Texture2DDescriptorHeap[] : register(t0, space0); + +float4 main(in float4 position : SV_Position) : SV_Target +{ + return g_Texture2DDescriptorHeap[g_PushConstants.ResourceDescriptorIndex].Load(int3(position.xy, 0)); +} diff --git a/UnleashedRecomp/gpu/shader/copy_common.hlsli b/UnleashedRecomp/gpu/shader/copy_common.hlsli new file mode 100644 index 00000000..5984ad0b --- /dev/null +++ b/UnleashedRecomp/gpu/shader/copy_common.hlsli @@ -0,0 +1,8 @@ +#pragma once + +struct PushConstants +{ + uint ResourceDescriptorIndex; +}; + +[[vk::push_constant]] ConstantBuffer g_PushConstants : register(b3, space4); diff --git a/UnleashedRecomp/gpu/shader/copy_depth_ps.hlsl b/UnleashedRecomp/gpu/shader/copy_depth_ps.hlsl new file mode 100644 index 00000000..251f893a --- /dev/null +++ b/UnleashedRecomp/gpu/shader/copy_depth_ps.hlsl @@ -0,0 +1,8 @@ +#include "copy_common.hlsli" + +Texture2D g_Texture2DDescriptorHeap[] : register(t0, space0); + +float main(in float4 position : SV_Position) : SV_Depth +{ + return g_Texture2DDescriptorHeap[g_PushConstants.ResourceDescriptorIndex].Load(int3(position.xy, 0)); +} diff --git a/UnleashedRecomp/gpu/shader/resolve_msaa_color.hlsli b/UnleashedRecomp/gpu/shader/resolve_msaa_color.hlsli new file mode 100644 index 00000000..f9b029d9 --- /dev/null +++ b/UnleashedRecomp/gpu/shader/resolve_msaa_color.hlsli @@ -0,0 +1,15 @@ +#pragma once + +#include "copy_common.hlsli" + +Texture2DMS g_Texture2DMSDescriptorHeap[] : register(t0, space0); + +float4 main(in float4 position : SV_Position) : SV_Target +{ + float4 result = g_Texture2DMSDescriptorHeap[g_PushConstants.ResourceDescriptorIndex].Load(int2(position.xy), 0); + + [unroll] for (int i = 1; i < SAMPLE_COUNT; i++) + result += g_Texture2DMSDescriptorHeap[g_PushConstants.ResourceDescriptorIndex].Load(int2(position.xy), i); + + return result / SAMPLE_COUNT; +} diff --git a/UnleashedRecomp/gpu/shader/resolve_msaa_color_2x.hlsl b/UnleashedRecomp/gpu/shader/resolve_msaa_color_2x.hlsl new file mode 100644 index 00000000..95338bfa --- /dev/null +++ b/UnleashedRecomp/gpu/shader/resolve_msaa_color_2x.hlsl @@ -0,0 +1,2 @@ +#define SAMPLE_COUNT 2 +#include "resolve_msaa_color.hlsli" diff --git a/UnleashedRecomp/gpu/shader/resolve_msaa_color_4x.hlsl b/UnleashedRecomp/gpu/shader/resolve_msaa_color_4x.hlsl new file mode 100644 index 00000000..71b8b8a4 --- /dev/null +++ b/UnleashedRecomp/gpu/shader/resolve_msaa_color_4x.hlsl @@ -0,0 +1,2 @@ +#define SAMPLE_COUNT 4 +#include "resolve_msaa_color.hlsli" diff --git a/UnleashedRecomp/gpu/shader/resolve_msaa_color_8x.hlsl b/UnleashedRecomp/gpu/shader/resolve_msaa_color_8x.hlsl new file mode 100644 index 00000000..9a0f8ac4 --- /dev/null +++ b/UnleashedRecomp/gpu/shader/resolve_msaa_color_8x.hlsl @@ -0,0 +1,2 @@ +#define SAMPLE_COUNT 8 +#include "resolve_msaa_color.hlsli" diff --git a/UnleashedRecomp/gpu/shader/resolve_msaa_depth.hlsli b/UnleashedRecomp/gpu/shader/resolve_msaa_depth.hlsli index d413717b..a06c7bac 100644 --- a/UnleashedRecomp/gpu/shader/resolve_msaa_depth.hlsli +++ b/UnleashedRecomp/gpu/shader/resolve_msaa_depth.hlsli @@ -1,11 +1,6 @@ #pragma once -struct PushConstants -{ - uint ResourceDescriptorIndex; -}; - -[[vk::push_constant]] ConstantBuffer g_PushConstants : register(b3, space4); +#include "copy_common.hlsli" Texture2DMS g_Texture2DMSDescriptorHeap[] : register(t0, space0); diff --git a/UnleashedRecomp/gpu/video.cpp b/UnleashedRecomp/gpu/video.cpp index fdca431f..9fabfc30 100644 --- a/UnleashedRecomp/gpu/video.cpp +++ b/UnleashedRecomp/gpu/video.cpp @@ -40,6 +40,8 @@ #ifdef UNLEASHED_RECOMP_D3D12 #include "shader/blend_color_alpha_ps.hlsl.dxil.h" #include "shader/copy_vs.hlsl.dxil.h" +#include "shader/copy_color_ps.hlsl.dxil.h" +#include "shader/copy_depth_ps.hlsl.dxil.h" #include "shader/csd_filter_ps.hlsl.dxil.h" #include "shader/csd_no_tex_vs.hlsl.dxil.h" #include "shader/csd_vs.hlsl.dxil.h" @@ -53,6 +55,9 @@ #include "shader/imgui_vs.hlsl.dxil.h" #include "shader/movie_ps.hlsl.dxil.h" #include "shader/movie_vs.hlsl.dxil.h" +#include "shader/resolve_msaa_color_2x.hlsl.dxil.h" +#include "shader/resolve_msaa_color_4x.hlsl.dxil.h" +#include "shader/resolve_msaa_color_8x.hlsl.dxil.h" #include "shader/resolve_msaa_depth_2x.hlsl.dxil.h" #include "shader/resolve_msaa_depth_4x.hlsl.dxil.h" #include "shader/resolve_msaa_depth_8x.hlsl.dxil.h" @@ -60,6 +65,8 @@ #include "shader/blend_color_alpha_ps.hlsl.spirv.h" #include "shader/copy_vs.hlsl.spirv.h" +#include "shader/copy_color_ps.hlsl.spirv.h" +#include "shader/copy_depth_ps.hlsl.spirv.h" #include "shader/csd_filter_ps.hlsl.spirv.h" #include "shader/csd_no_tex_vs.hlsl.spirv.h" #include "shader/csd_vs.hlsl.spirv.h" @@ -73,6 +80,9 @@ #include "shader/imgui_vs.hlsl.spirv.h" #include "shader/movie_ps.hlsl.spirv.h" #include "shader/movie_vs.hlsl.spirv.h" +#include "shader/resolve_msaa_color_2x.hlsl.spirv.h" +#include "shader/resolve_msaa_color_4x.hlsl.spirv.h" +#include "shader/resolve_msaa_color_8x.hlsl.spirv.h" #include "shader/resolve_msaa_depth_2x.hlsl.spirv.h" #include "shader/resolve_msaa_depth_4x.hlsl.spirv.h" #include "shader/resolve_msaa_depth_8x.hlsl.spirv.h" @@ -1139,6 +1149,14 @@ static const std::pair g_setRenderStateFunctions[] = { D3DRS_COLORWRITEENABLE, HostToGuestFunction> } }; +static std::unique_ptr g_copyShader; + +static std::unique_ptr g_copyColorShader; +static ankerl::unordered_dense::map> g_copyColorPipelines; +static std::unique_ptr g_copyDepthPipeline; + +static std::unique_ptr g_resolveMsaaColorShaders[3]; +static ankerl::unordered_dense::map, 3>> g_resolveMsaaColorPipelines; static std::unique_ptr g_resolveMsaaDepthPipelines[3]; enum @@ -1711,7 +1729,23 @@ bool Video::CreateHostDevice(const char *sdlVideoDriver) g_pipelineLayout = pipelineLayoutBuilder.create(g_device.get()); - auto copyShader = CREATE_SHADER(copy_vs); + g_copyShader = CREATE_SHADER(copy_vs); + g_copyColorShader = CREATE_SHADER(copy_color_ps); + auto copyDepthShader = CREATE_SHADER(copy_depth_ps); + + RenderGraphicsPipelineDesc desc; + desc.pipelineLayout = g_pipelineLayout.get(); + desc.vertexShader = g_copyShader.get(); + desc.pixelShader = copyDepthShader.get(); + desc.depthFunction = RenderComparisonFunction::ALWAYS; + desc.depthEnabled = true; + desc.depthWriteEnabled = true; + desc.depthTargetFormat = RenderFormat::D32_FLOAT; + g_copyDepthPipeline = g_device->createGraphicsPipeline(desc); + + g_resolveMsaaColorShaders[0] = CREATE_SHADER(resolve_msaa_color_2x); + g_resolveMsaaColorShaders[1] = CREATE_SHADER(resolve_msaa_color_4x); + g_resolveMsaaColorShaders[2] = CREATE_SHADER(resolve_msaa_color_8x); for (size_t i = 0; i < std::size(g_resolveMsaaDepthPipelines); i++) { @@ -1729,9 +1763,9 @@ bool Video::CreateHostDevice(const char *sdlVideoDriver) break; } - RenderGraphicsPipelineDesc desc; + desc = {}; desc.pipelineLayout = g_pipelineLayout.get(); - desc.vertexShader = copyShader.get(); + desc.vertexShader = g_copyShader.get(); desc.pixelShader = pixelShader.get(); desc.depthFunction = RenderComparisonFunction::ALWAYS; desc.depthEnabled = true; @@ -1758,9 +1792,9 @@ bool Video::CreateHostDevice(const char *sdlVideoDriver) auto gammaCorrectionShader = CREATE_SHADER(gamma_correction_ps); - RenderGraphicsPipelineDesc desc; + desc = {}; desc.pipelineLayout = g_pipelineLayout.get(); - desc.vertexShader = copyShader.get(); + desc.vertexShader = g_copyShader.get(); desc.pixelShader = gammaCorrectionShader.get(); desc.renderTargetFormat[0] = BACKBUFFER_FORMAT; desc.renderTargetBlend[0] = RenderBlendDesc::Copy(); @@ -2714,7 +2748,13 @@ static GuestTexture* CreateTexture(uint32_t width, uint32_t height, uint32_t dep desc.mipLevels = levels; desc.arraySize = 1; desc.format = ConvertFormat(format); - desc.flags = (desc.format == RenderFormat::D32_FLOAT) ? RenderTextureFlag::DEPTH_TARGET : RenderTextureFlag::NONE; + + if (desc.format == RenderFormat::D32_FLOAT) + desc.flags = RenderTextureFlag::DEPTH_TARGET; + else if (usage != 0) + desc.flags = RenderTextureFlag::RENDER_TARGET; + else + desc.flags = RenderTextureFlag::NONE; texture->textureHolder = g_device->createTexture(desc); texture->texture = texture->textureHolder.get(); @@ -2970,34 +3010,10 @@ static bool PopulateBarriersForStretchRect(GuestSurface* renderTarget, GuestSurf { if (surface != nullptr && !surface->destinationTextures.empty()) { - const bool multiSampling = surface->sampleCount != RenderSampleCount::COUNT_1; - - RenderTextureLayout srcLayout; - RenderTextureLayout dstLayout; - - if (multiSampling) - { - if (surface == depthStencil) - { - srcLayout = RenderTextureLayout::SHADER_READ; - dstLayout = RenderTextureLayout::DEPTH_WRITE; - } - else - { - srcLayout = RenderTextureLayout::RESOLVE_SOURCE; - dstLayout = RenderTextureLayout::RESOLVE_DEST; - } - } - else - { - srcLayout = RenderTextureLayout::COPY_SOURCE; - dstLayout = RenderTextureLayout::COPY_DEST; - } - - AddBarrier(surface, srcLayout); + AddBarrier(surface, RenderTextureLayout::SHADER_READ); for (const auto texture : surface->destinationTextures) - AddBarrier(texture, dstLayout); + AddBarrier(texture, texture->format == RenderFormat::D32_FLOAT ? RenderTextureLayout::DEPTH_WRITE : RenderTextureLayout::COLOR_WRITE); addedAny = true; } @@ -3018,66 +3034,113 @@ static void ExecutePendingStretchRectCommands(GuestSurface* renderTarget, GuestS for (const auto texture : surface->destinationTextures) { + RenderPipeline* pipeline = nullptr; + if (multiSampling) { - if (surface == depthStencil) + uint32_t pipelineIndex = 0; + + switch (surface->sampleCount) { - uint32_t pipelineIndex = 0; + case RenderSampleCount::COUNT_2: + pipelineIndex = 0; + break; + case RenderSampleCount::COUNT_4: + pipelineIndex = 1; + break; + case RenderSampleCount::COUNT_8: + pipelineIndex = 2; + break; + default: + assert(false && "Unsupported MSAA sample count"); + break; + } - switch (surface->sampleCount) - { - case RenderSampleCount::COUNT_2: - pipelineIndex = 0; - break; - case RenderSampleCount::COUNT_4: - pipelineIndex = 1; - break; - case RenderSampleCount::COUNT_8: - pipelineIndex = 2; - break; - default: - assert(false && "Unsupported MSAA sample count"); - break; - } - - if (texture->framebuffer == nullptr) - { - RenderFramebufferDesc desc; - desc.depthAttachment = texture->texture; - texture->framebuffer = g_device->createFramebuffer(desc); - } - - if (g_framebuffer != texture->framebuffer.get()) - { - commandList->setFramebuffer(texture->framebuffer.get()); - g_framebuffer = texture->framebuffer.get(); - } - - commandList->setPipeline(g_resolveMsaaDepthPipelines[pipelineIndex].get()); - commandList->setViewports(RenderViewport(0.0f, 0.0f, float(texture->width), float(texture->height), 0.0f, 1.0f)); - commandList->setScissors(RenderRect(0, 0, texture->width, texture->height)); - commandList->setGraphicsPushConstants(0, &surface->descriptorIndex, 0, sizeof(uint32_t)); - commandList->drawInstanced(6, 1, 0, 0); - - g_dirtyStates.renderTargetAndDepthStencil = true; - g_dirtyStates.viewport = true; - g_dirtyStates.pipelineState = true; - g_dirtyStates.scissorRect = true; - - if (g_vulkan) - { - g_dirtyStates.depthBias = true; // Static depth bias in MSAA pipeline invalidates dynamic depth bias. - g_dirtyStates.vertexShaderConstants = true; - } + if (texture->format == RenderFormat::D32_FLOAT) + { + pipeline = g_resolveMsaaDepthPipelines[pipelineIndex].get(); } else { - commandList->resolveTexture(texture->texture, surface->texture); + auto& resolveMsaaColorPipeline = g_resolveMsaaColorPipelines[surface->format][pipelineIndex]; + if (resolveMsaaColorPipeline == nullptr) + { + RenderGraphicsPipelineDesc desc; + desc.pipelineLayout = g_pipelineLayout.get(); + desc.vertexShader = g_copyShader.get(); + desc.pixelShader = g_resolveMsaaColorShaders[pipelineIndex].get(); + desc.renderTargetFormat[0] = texture->format; + desc.renderTargetBlend[0] = RenderBlendDesc::Copy(); + desc.renderTargetCount = 1; + resolveMsaaColorPipeline = g_device->createGraphicsPipeline(desc); + } + + pipeline = resolveMsaaColorPipeline.get(); } } else { - commandList->copyTexture(texture->texture, surface->texture); + if (texture->format == RenderFormat::D32_FLOAT) + { + pipeline = g_copyDepthPipeline.get(); + } + else + { + auto& copyColorPipeline = g_copyColorPipelines[surface->format]; + if (copyColorPipeline == nullptr) + { + RenderGraphicsPipelineDesc desc; + desc.pipelineLayout = g_pipelineLayout.get(); + desc.vertexShader = g_copyShader.get(); + desc.pixelShader = g_copyColorShader.get(); + desc.renderTargetFormat[0] = texture->format; + desc.renderTargetBlend[0] = RenderBlendDesc::Copy(); + desc.renderTargetCount = 1; + copyColorPipeline = g_device->createGraphicsPipeline(desc); + } + + pipeline = copyColorPipeline.get(); + } + } + + if (texture->framebuffer == nullptr) + { + if (texture->format == RenderFormat::D32_FLOAT) + { + RenderFramebufferDesc desc; + desc.depthAttachment = texture->texture; + texture->framebuffer = g_device->createFramebuffer(desc); + } + else + { + RenderFramebufferDesc desc; + desc.colorAttachments = const_cast(&texture->texture); + desc.colorAttachmentsCount = 1; + texture->framebuffer = g_device->createFramebuffer(desc); + } + } + + if (g_framebuffer != texture->framebuffer.get()) + { + commandList->setFramebuffer(texture->framebuffer.get()); + g_framebuffer = texture->framebuffer.get(); + } + + commandList->setPipeline(pipeline); + commandList->setViewports(RenderViewport(0.0f, 0.0f, float(texture->width), float(texture->height), 0.0f, 1.0f)); + commandList->setScissors(RenderRect(0, 0, texture->width, texture->height)); + commandList->setGraphicsPushConstants(0, &surface->descriptorIndex, 0, sizeof(uint32_t)); + commandList->drawInstanced(6, 1, 0, 0); + + g_dirtyStates.renderTargetAndDepthStencil = true; + g_dirtyStates.viewport = true; + g_dirtyStates.pipelineState = true; + g_dirtyStates.scissorRect = true; + + if (g_vulkan) + { + g_dirtyStates.depthBias = true; // Static depth bias in copy pipeline invalidates dynamic depth bias. + g_dirtyStates.vertexShaderConstants = true; } texture->sourceSurface = nullptr; diff --git a/tools/XenosRecomp b/tools/XenosRecomp index 96458eb7..855a5a8c 160000 --- a/tools/XenosRecomp +++ b/tools/XenosRecomp @@ -1 +1 @@ -Subproject commit 96458eb7bc01798c951bce9d627852c2870bc54d +Subproject commit 855a5a8c51ea5f84baecbf4fc87c182795d482c9