From c2ce01215554b6570cbb9eb323ba0d1aa7357272 Mon Sep 17 00:00:00 2001 From: Skyth <19259897+blueskythlikesclouds@users.noreply.github.com> Date: Sat, 19 Oct 2024 22:22:09 +0300 Subject: [PATCH] Implement MSAA properly for Vulkan. --- .gitmodules | 3 + UnleashedRecomp/CMakeLists.txt | 30 +++ UnleashedRecomp/gpu/shader/.gitignore | 1 + UnleashedRecomp/gpu/shader/copy_vs.hlsl | 5 + .../gpu/shader/resolve_msaa_depth.hlsli | 18 ++ .../gpu/shader/resolve_msaa_depth_2x.hlsl | 2 + .../gpu/shader/resolve_msaa_depth_4x.hlsl | 2 + .../gpu/shader/resolve_msaa_depth_8x.hlsl | 2 + UnleashedRecomp/gpu/video.cpp | 241 ++++++++++++++---- UnleashedRecomp/gpu/video.h | 3 + thirdparty/ShaderRecomp | 1 + vcpkg.json | 1 + 12 files changed, 258 insertions(+), 51 deletions(-) create mode 100644 UnleashedRecomp/gpu/shader/.gitignore create mode 100644 UnleashedRecomp/gpu/shader/copy_vs.hlsl create mode 100644 UnleashedRecomp/gpu/shader/resolve_msaa_depth.hlsli create mode 100644 UnleashedRecomp/gpu/shader/resolve_msaa_depth_2x.hlsl create mode 100644 UnleashedRecomp/gpu/shader/resolve_msaa_depth_4x.hlsl create mode 100644 UnleashedRecomp/gpu/shader/resolve_msaa_depth_8x.hlsl create mode 160000 thirdparty/ShaderRecomp diff --git a/.gitmodules b/.gitmodules index d63944f..11693b9 100644 --- a/.gitmodules +++ b/.gitmodules @@ -4,3 +4,6 @@ [submodule "thirdparty/ddspp"] path = thirdparty/ddspp url = https://github.com/redorav/ddspp.git +[submodule "thirdparty/ShaderRecomp"] + path = thirdparty/ShaderRecomp + url = https://github.com/hedge-dev/ShaderRecomp.git diff --git a/UnleashedRecomp/CMakeLists.txt b/UnleashedRecomp/CMakeLists.txt index f07ba1a..1da444d 100644 --- a/UnleashedRecomp/CMakeLists.txt +++ b/UnleashedRecomp/CMakeLists.txt @@ -95,6 +95,7 @@ find_package(VulkanMemoryAllocator CONFIG REQUIRED) find_package(xxHash CONFIG REQUIRED) find_package(PkgConfig REQUIRED) pkg_check_modules(tomlplusplus REQUIRED IMPORTED_TARGET tomlplusplus) +find_package(directx-dxc REQUIRED) target_link_libraries(UnleashedRecomp PRIVATE comctl32 @@ -122,3 +123,32 @@ target_include_directories(UnleashedRecomp PRIVATE ) target_precompile_headers(UnleashedRecomp PUBLIC ${SWA_PRECOMPILED_HEADERS}) + +function(compile_shader FILE_PATH TARGET_NAME) + set(FILE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/gpu/shader/${FILE_PATH}.hlsl) + cmake_path(GET FILE_PATH STEM VARIABLE_NAME) + add_custom_command( + OUTPUT ${FILE_PATH}.dxil.h + COMMAND ${DIRECTX_DXC_TOOL} -T ${TARGET_NAME} -HV 2021 -all-resources-bound -Wno-ignored-attributes -Fh ${FILE_PATH}.dxil.h ${FILE_PATH} -Vn g_${VARIABLE_NAME}_dxil + DEPENDS ${FILE_PATH} + ) + add_custom_command( + OUTPUT ${FILE_PATH}.spirv.h + COMMAND ${DIRECTX_DXC_TOOL} -T ${TARGET_NAME} -HV 2021 -all-resources-bound -spirv -fvk-use-dx-layout ${ARGN} -Fh ${FILE_PATH}.spirv.h ${FILE_PATH} -Vn g_${VARIABLE_NAME}_spirv + DEPENDS ${FILE_PATH} + ) + target_sources(UnleashedRecomp PRIVATE ${FILE_PATH}.dxil.h ${FILE_PATH}.spirv.h) +endfunction() + +function(compile_vertex_shader FILE_PATH) + compile_shader(${FILE_PATH} vs_6_0 -fvk-invert-y) +endfunction() + +function(compile_pixel_shader FILE_PATH) + compile_shader(${FILE_PATH} ps_6_0) +endfunction() + +compile_vertex_shader(copy_vs) +compile_pixel_shader(resolve_msaa_depth_2x) +compile_pixel_shader(resolve_msaa_depth_4x) +compile_pixel_shader(resolve_msaa_depth_8x) diff --git a/UnleashedRecomp/gpu/shader/.gitignore b/UnleashedRecomp/gpu/shader/.gitignore new file mode 100644 index 0000000..b8f26ec --- /dev/null +++ b/UnleashedRecomp/gpu/shader/.gitignore @@ -0,0 +1 @@ +*.hlsl.*.h diff --git a/UnleashedRecomp/gpu/shader/copy_vs.hlsl b/UnleashedRecomp/gpu/shader/copy_vs.hlsl new file mode 100644 index 0000000..5a2576d --- /dev/null +++ b/UnleashedRecomp/gpu/shader/copy_vs.hlsl @@ -0,0 +1,5 @@ +void main(in uint vertexId : SV_VertexID, out float4 position : SV_Position, out float2 texCoord : TEXCOORD) +{ + texCoord = float2((vertexId << 1) & 2, vertexId & 2); + position = float4(texCoord * float2(2.0, -2.0) + float2(-1.0, 1.0), 0.0, 1.0); +} diff --git a/UnleashedRecomp/gpu/shader/resolve_msaa_depth.hlsli b/UnleashedRecomp/gpu/shader/resolve_msaa_depth.hlsli new file mode 100644 index 0000000..d06c83d --- /dev/null +++ b/UnleashedRecomp/gpu/shader/resolve_msaa_depth.hlsli @@ -0,0 +1,18 @@ +struct PushConstants +{ + uint ResourceDescriptorIndex; +}; + +[[vk::push_constant]] ConstantBuffer g_PushConstants : register(b3, space4); + +Texture2DMS g_Texture2DMSDescriptorHeap[] : register(t0, space0); + +float main(in float4 position : SV_Position) : SV_Depth +{ + float result = g_Texture2DMSDescriptorHeap[g_PushConstants.ResourceDescriptorIndex].Load(int2(position.xy), 0); + + [unroll] for (int i = 1; i < SAMPLE_COUNT; i++) + result = max(result, g_Texture2DMSDescriptorHeap[g_PushConstants.ResourceDescriptorIndex].Load(int2(position.xy), i)); + + return result; +} diff --git a/UnleashedRecomp/gpu/shader/resolve_msaa_depth_2x.hlsl b/UnleashedRecomp/gpu/shader/resolve_msaa_depth_2x.hlsl new file mode 100644 index 0000000..dab77b2 --- /dev/null +++ b/UnleashedRecomp/gpu/shader/resolve_msaa_depth_2x.hlsl @@ -0,0 +1,2 @@ +#define SAMPLE_COUNT 2 +#include "resolve_msaa_depth.hlsli" diff --git a/UnleashedRecomp/gpu/shader/resolve_msaa_depth_4x.hlsl b/UnleashedRecomp/gpu/shader/resolve_msaa_depth_4x.hlsl new file mode 100644 index 0000000..f7dd8be --- /dev/null +++ b/UnleashedRecomp/gpu/shader/resolve_msaa_depth_4x.hlsl @@ -0,0 +1,2 @@ +#define SAMPLE_COUNT 4 +#include "resolve_msaa_depth.hlsli" diff --git a/UnleashedRecomp/gpu/shader/resolve_msaa_depth_8x.hlsl b/UnleashedRecomp/gpu/shader/resolve_msaa_depth_8x.hlsl new file mode 100644 index 0000000..3a4e9fb --- /dev/null +++ b/UnleashedRecomp/gpu/shader/resolve_msaa_depth_8x.hlsl @@ -0,0 +1,2 @@ +#define SAMPLE_COUNT 8 +#include "resolve_msaa_depth.hlsli" diff --git a/UnleashedRecomp/gpu/video.cpp b/UnleashedRecomp/gpu/video.cpp index 23b07e6..980a1ea 100644 --- a/UnleashedRecomp/gpu/video.cpp +++ b/UnleashedRecomp/gpu/video.cpp @@ -10,6 +10,15 @@ #include "video.h" #include "ui/window.h" +#include "shader/copy_vs.hlsl.dxil.h" +#include "shader/copy_vs.hlsl.spirv.h" +#include "shader/resolve_msaa_depth_2x.hlsl.dxil.h" +#include "shader/resolve_msaa_depth_2x.hlsl.spirv.h" +#include "shader/resolve_msaa_depth_4x.hlsl.dxil.h" +#include "shader/resolve_msaa_depth_4x.hlsl.spirv.h" +#include "shader/resolve_msaa_depth_8x.hlsl.dxil.h" +#include "shader/resolve_msaa_depth_8x.hlsl.spirv.h" + namespace RT64 { extern std::unique_ptr CreateD3D12Interface(); @@ -532,6 +541,8 @@ static const std::pair g_setRenderStateFunctions[] = { D3DRS_COLORWRITEENABLE, GuestFunction } }; +static std::unique_ptr g_resolveMsaaDepthPipelines[3]; + static void CreateHostDevice() { for (uint32_t i = 0; i < 16; i++) @@ -595,10 +606,49 @@ static void CreateHostDevice() pipelineLayoutBuilder.addRootDescriptor(0, 4, RenderRootDescriptorType::CONSTANT_BUFFER); pipelineLayoutBuilder.addRootDescriptor(1, 4, RenderRootDescriptorType::CONSTANT_BUFFER); pipelineLayoutBuilder.addRootDescriptor(2, 4, RenderRootDescriptorType::CONSTANT_BUFFER); + pipelineLayoutBuilder.addPushConstant(3, 4, 4, RenderShaderStageFlag::PIXEL); // For copy/resolve shaders. } pipelineLayoutBuilder.end(); g_pipelineLayout = pipelineLayoutBuilder.create(g_device.get()); + +#define CREATE_SHADER(NAME) \ + g_device->createShader( \ + g_vulkan ? g_##NAME##_spirv : g_##NAME##_dxil, \ + g_vulkan ? sizeof(g_##NAME##_spirv) : sizeof(g_##NAME##_dxil), \ + "main", \ + g_vulkan ? RenderShaderFormat::SPIRV : RenderShaderFormat::DXIL) + + auto copyShader = CREATE_SHADER(copy_vs); + + for (size_t i = 0; i < std::size(g_resolveMsaaDepthPipelines); i++) + { + std::unique_ptr pixelShader; + switch (i) + { + case 0: + pixelShader = CREATE_SHADER(resolve_msaa_depth_2x); + break; + case 1: + pixelShader = CREATE_SHADER(resolve_msaa_depth_4x); + break; + case 2: + pixelShader = CREATE_SHADER(resolve_msaa_depth_8x); + break; + } + + RenderGraphicsPipelineDesc desc; + desc.pipelineLayout = g_pipelineLayout.get(); + desc.vertexShader = copyShader.get(); + desc.pixelShader = pixelShader.get(); + desc.depthFunction = RenderComparisonFunction::ALWAYS; + desc.depthEnabled = true; + desc.depthWriteEnabled = true; + desc.depthTargetFormat = RenderFormat::D32_FLOAT; + g_resolveMsaaDepthPipelines[i] = g_device->createGraphicsPipeline(desc); + } + +#undef CREATE_SHADER } static void WaitForGPU() @@ -768,6 +818,9 @@ static void DestructResource(GuestResource* resource) { std::lock_guard lock(g_tempMutex); g_tempTextures[g_frame].emplace_back(std::move(surface->textureHolder)); + + if (surface->descriptorIndex != NULL) + g_tempDescriptorIndices[g_frame].push_back(surface->descriptorIndex); } surface->~GuestSurface(); @@ -1063,7 +1116,7 @@ static GuestSurface* CreateSurface(uint32_t width, uint32_t height, uint32_t for desc.depth = 1; desc.mipLevels = 1; desc.arraySize = 1; - //desc.multisampling.sampleCount = (desc.format != RenderFormat::D32_FLOAT && multiSample != 0) ? RenderSampleCount::COUNT_2 : RenderSampleCount::COUNT_1; + desc.multisampling.sampleCount = multiSample != 0 && Config::MSAA > 1 ? Config::MSAA : RenderSampleCount::COUNT_1; desc.format = ConvertFormat(format); desc.flags = desc.format == RenderFormat::D32_FLOAT ? RenderTextureFlag::DEPTH_TARGET : RenderTextureFlag::RENDER_TARGET; @@ -1077,24 +1130,155 @@ static GuestSurface* CreateSurface(uint32_t width, uint32_t height, uint32_t for surface->format = desc.format; surface->sampleCount = desc.multisampling.sampleCount; + if (multiSample != 0 && desc.format == RenderFormat::D32_FLOAT) + { + RenderTextureViewDesc viewDesc; + viewDesc.dimension = RenderTextureViewDimension::TEXTURE_2D; + viewDesc.format = RenderFormat::D32_FLOAT; + viewDesc.mipLevels = 1; + surface->textureView = surface->textureHolder->createTextureView(viewDesc); + surface->descriptorIndex = g_textureDescriptorAllocator.allocate(); + g_textureDescriptorSet->setTexture(surface->descriptorIndex, surface->textureHolder.get(), RenderTextureLayout::SHADER_READ, surface->textureView.get()); + } + return surface; } +static void FlushViewport() +{ + bool renderingToBackBuffer = g_renderTarget == g_backBuffer && + g_backBuffer->texture != g_backBuffer->textureHolder.get(); + + auto& commandList = g_commandLists[g_frame]; + + if (g_dirtyStates.viewport) + { + if (renderingToBackBuffer) + { + uint32_t width = g_swapChain->getWidth(); + uint32_t height = g_swapChain->getHeight(); + + commandList->setViewports(RenderViewport( + g_viewport.x * width / 1280.0f, + g_viewport.y * height / 720.0f, + g_viewport.width * width / 1280.0f, + g_viewport.height * height / 720.0f, + g_viewport.minDepth, + g_viewport.maxDepth)); + } + else + { + commandList->setViewports(g_viewport); + } + } + + if (g_dirtyStates.scissorRect) + { + auto scissorRect = g_scissorTestEnable ? g_scissorRect : RenderRect( + g_viewport.x, + g_viewport.y, + g_viewport.x + g_viewport.width, + g_viewport.y + g_viewport.height); + + if (renderingToBackBuffer) + { + uint32_t width = g_swapChain->getWidth(); + uint32_t height = g_swapChain->getHeight(); + + scissorRect.left = scissorRect.left * width / 1280; + scissorRect.top = scissorRect.top * height / 720; + scissorRect.right = scissorRect.right * width / 1280; + scissorRect.bottom = scissorRect.bottom * height / 720; + } + + commandList->setScissors(scissorRect); + } +} + static void StretchRect(GuestDevice* device, uint32_t flags, uint32_t, GuestTexture* texture) { const bool isDepthStencil = (flags & 0x4) != 0; const auto surface = isDepthStencil ? g_depthStencil : g_renderTarget; const bool multiSampling = surface->sampleCount != RenderSampleCount::COUNT_1; - g_barriers.emplace_back(surface->texture, multiSampling ? RenderTextureLayout::RESOLVE_SOURCE : RenderTextureLayout::COPY_SOURCE); - g_barriers.emplace_back(texture->texture.get(), multiSampling ? RenderTextureLayout::RESOLVE_DEST : RenderTextureLayout::COPY_DEST); + RenderTextureLayout srcLayout; + RenderTextureLayout dstLayout; + + if (multiSampling) + { + if (isDepthStencil) + { + srcLayout = RenderTextureLayout::SHADER_READ; + dstLayout = RenderTextureLayout::DEPTH_WRITE; + } + else + { + srcLayout = RenderTextureLayout::RESOLVE_SOURCE; + dstLayout = RenderTextureLayout::RESOLVE_DEST; + } + } + else + { + srcLayout = RenderTextureLayout::COPY_SOURCE; + dstLayout = RenderTextureLayout::COPY_DEST; + } + + g_barriers.emplace_back(surface->texture, srcLayout); + g_barriers.emplace_back(texture->texture.get(), dstLayout); FlushBarriers(); auto& commandList = g_commandLists[g_frame]; if (multiSampling) - commandList->resolveTexture(texture->texture.get(), surface->texture); - else + { + if (isDepthStencil) + { + uint32_t pipelineIndex = 0; + + switch (g_depthStencil->sampleCount) + { + case RenderSampleCount::COUNT_2: + pipelineIndex = 0; + break; + case RenderSampleCount::COUNT_4: + pipelineIndex = 1; + break; + case RenderSampleCount::COUNT_8: + pipelineIndex = 2; + break; + default: + assert(false && "Unsupported MSAA sample count"); + break; + } + + if (texture->framebuffer == nullptr) + { + RenderFramebufferDesc desc; + desc.depthAttachment = texture->texture.get(); + texture->framebuffer = g_device->createFramebuffer(desc); + } + + FlushViewport(); + + commandList->setFramebuffer(texture->framebuffer.get()); + commandList->setPipeline(g_resolveMsaaDepthPipelines[pipelineIndex].get()); + commandList->setGraphicsPushConstants(0, &g_depthStencil->descriptorIndex, 0, sizeof(uint32_t)); + commandList->drawInstanced(6, 1, 0, 0); + + g_dirtyStates.renderTargetAndDepthStencil = true; + g_dirtyStates.pipelineState = true; + + if (g_vulkan) + g_dirtyStates.vertexShaderConstants = true; + } + else + { + commandList->resolveTexture(texture->texture.get(), surface->texture); + } + } + else + { commandList->copyTexture(texture->texture.get(), surface->texture); + } surface->pendingBarrier = true; texture->pendingBarrier = true; @@ -1356,33 +1540,10 @@ static RenderBorderColor ConvertBorderColor(uint32_t value) static void FlushRenderState(GuestDevice* device) { FlushFramebuffer(); + FlushViewport(); auto& commandList = g_commandLists[g_frame]; - bool renderingToBackBuffer = g_renderTarget == g_backBuffer && - g_backBuffer->texture != g_backBuffer->textureHolder.get(); - - if (g_dirtyStates.viewport) - { - if (renderingToBackBuffer) - { - uint32_t width = g_swapChain->getWidth(); - uint32_t height = g_swapChain->getHeight(); - - commandList->setViewports(RenderViewport( - g_viewport.x * width / 1280.0f, - g_viewport.y * height / 720.0f, - g_viewport.width * width / 1280.0f, - g_viewport.height * height / 720.0f, - g_viewport.minDepth, - g_viewport.maxDepth)); - } - else - { - commandList->setViewports(g_viewport); - } - } - if (g_dirtyStates.pipelineState) commandList->setPipeline(CreateGraphicsPipeline(g_pipelineState)); @@ -1464,28 +1625,6 @@ static void FlushRenderState(GuestDevice* device) setRootDescriptor(sharedConstants, 2); } - if (g_dirtyStates.scissorRect) - { - auto scissorRect = g_scissorTestEnable ? g_scissorRect : RenderRect( - g_viewport.x, - g_viewport.y, - g_viewport.x + g_viewport.width, - g_viewport.y + g_viewport.height); - - if (renderingToBackBuffer) - { - uint32_t width = g_swapChain->getWidth(); - uint32_t height = g_swapChain->getHeight(); - - scissorRect.left = scissorRect.left * width / 1280; - scissorRect.top = scissorRect.top * height / 720; - scissorRect.right = scissorRect.right * width / 1280; - scissorRect.bottom = scissorRect.bottom * height / 720; - } - - commandList->setScissors(scissorRect); - } - if (g_dirtyStates.vertexShaderConstants || device->dirtyFlags[0] != 0) { auto vertexShaderConstants = uploadAllocator.allocate(device->vertexShaderFloatConstants, 0x1000, 0x100); diff --git a/UnleashedRecomp/gpu/video.h b/UnleashedRecomp/gpu/video.h index e48c4b9..9c62087 100644 --- a/UnleashedRecomp/gpu/video.h +++ b/UnleashedRecomp/gpu/video.h @@ -98,6 +98,7 @@ struct GuestTexture : GuestResource void* mappedMemory = nullptr; uint32_t descriptorIndex = 0; bool pendingBarrier = true; + std::unique_ptr framebuffer; }; struct GuestLockedRect @@ -144,12 +145,14 @@ struct GuestSurface : GuestResource { std::unique_ptr textureHolder; RenderTexture* texture = nullptr; + std::unique_ptr textureView; uint32_t width = 0; uint32_t height = 0; RenderFormat format = RenderFormat::UNKNOWN; ankerl::unordered_dense::map> framebuffers; bool pendingBarrier = true; RenderSampleCounts sampleCount = RenderSampleCount::COUNT_1; + uint32_t descriptorIndex = 0; }; enum GuestDeclType diff --git a/thirdparty/ShaderRecomp b/thirdparty/ShaderRecomp new file mode 160000 index 0000000..9da6b59 --- /dev/null +++ b/thirdparty/ShaderRecomp @@ -0,0 +1 @@ +Subproject commit 9da6b59ce51c5becc919c2f1aed7c5e5f3b86f31 diff --git a/vcpkg.json b/vcpkg.json index db5fda7..0526b52 100644 --- a/vcpkg.json +++ b/vcpkg.json @@ -2,6 +2,7 @@ "builtin-baseline": "e63bd09dc0b7204467705c1c7c71d0e2a3f8860b", "dependencies": [ "d3d12-memory-allocator", + "directx-dxc", "sdl2", "unordered-dense", "volk",