From f936ed2212d8291439003eb0c0d8edc0ecafd24d Mon Sep 17 00:00:00 2001 From: "Skyth (Asilkan)" <19259897+blueskythlikesclouds@users.noreply.github.com> Date: Fri, 29 Nov 2024 23:14:08 +0300 Subject: [PATCH] Specialization constants, reverse Z and smol-v implementation. (#2) * Specialization constant & DXIL library implementation. * Fix alpha to coverage. * Add reverse Z implementation. * Avoid dynamic branches when loading array constants. * Remove "has bone" specialization constant. * Integrate smol-v. --- .gitmodules | 3 + ShaderRecomp/CMakeLists.txt | 7 +- ShaderRecomp/dxc_compiler.cpp | 25 +++- ShaderRecomp/dxc_compiler.h | 4 +- ShaderRecomp/main.cpp | 25 ++-- ShaderRecomp/pch.h | 1 + .../{shader_common.hlsli => shader_common.h} | 47 +++++--- ShaderRecomp/shader_recompiler.cpp | 111 +++++++++++++++--- ShaderRecomp/shader_recompiler.h | 3 + thirdparty/smol-v | 1 + 10 files changed, 179 insertions(+), 48 deletions(-) create mode 100644 .gitmodules rename ShaderRecomp/{shader_common.hlsli => shader_common.h} (85%) create mode 160000 thirdparty/smol-v diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..47afc81 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "thirdparty/smol-v"] + path = thirdparty/smol-v + url = https://github.com/aras-p/smol-v diff --git a/ShaderRecomp/CMakeLists.txt b/ShaderRecomp/CMakeLists.txt index 8fbef91..a7a72b0 100644 --- a/ShaderRecomp/CMakeLists.txt +++ b/ShaderRecomp/CMakeLists.txt @@ -1,5 +1,7 @@ project(ShaderRecomp) +set(SMOLV_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../thirdparty/smol-v/source") + add_executable(ShaderRecomp constant_table.h dxc_compiler.cpp @@ -9,7 +11,8 @@ add_executable(ShaderRecomp shader.h shader_code.h shader_recompiler.cpp - shader_recompiler.h) + shader_recompiler.h + "${SMOLV_SOURCE_DIR}/smolv.cpp") find_package(directx-dxc CONFIG REQUIRED) find_package(xxhash CONFIG REQUIRED) @@ -20,6 +23,8 @@ target_link_libraries(ShaderRecomp PRIVATE xxHash::xxhash $,zstd::libzstd_shared,zstd::libzstd_static>) +target_include_directories(ShaderRecomp PRIVATE ${SMOLV_SOURCE_DIR}) + target_precompile_headers(ShaderRecomp PRIVATE pch.h) if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang") diff --git a/ShaderRecomp/dxc_compiler.cpp b/ShaderRecomp/dxc_compiler.cpp index 4259f8e..bde73ad 100644 --- a/ShaderRecomp/dxc_compiler.cpp +++ b/ShaderRecomp/dxc_compiler.cpp @@ -11,16 +11,30 @@ DxcCompiler::~DxcCompiler() dxcCompiler->Release(); } -IDxcBlob* DxcCompiler::compile(const std::string& shaderSource, bool isPixelShader, bool compileSpirv) +IDxcBlob* DxcCompiler::compile(const std::string& shaderSource, bool compilePixelShader, bool compileLibrary, bool compileSpirv) { DxcBuffer source{}; source.Ptr = shaderSource.c_str(); source.Size = shaderSource.size(); - const wchar_t* args[16]{}; + const wchar_t* args[32]{}; uint32_t argCount = 0; - args[argCount++] = isPixelShader ? L"-T ps_6_0" : L"-T vs_6_0"; + const wchar_t* target = nullptr; + if (compileLibrary) + { + assert(!compileSpirv); + target = L"-T lib_6_3"; + } + else + { + if (compilePixelShader) + target = L"-T ps_6_0"; + else + target = L"-T vs_6_0"; + } + + args[argCount++] = target; args[argCount++] = L"-HV 2021"; args[argCount++] = L"-all-resources-bound"; @@ -29,14 +43,17 @@ IDxcBlob* DxcCompiler::compile(const std::string& shaderSource, bool isPixelShad args[argCount++] = L"-spirv"; args[argCount++] = L"-fvk-use-dx-layout"; - if (!isPixelShader) + if (!compilePixelShader) args[argCount++] = L"-fvk-invert-y"; } else { args[argCount++] = L"-Wno-ignored-attributes"; + args[argCount++] = L"-Qstrip_reflect"; } + args[argCount++] = L"-Qstrip_debug"; + IDxcResult* result = nullptr; HRESULT hr = dxcCompiler->Compile(&source, args, argCount, nullptr, IID_PPV_ARGS(&result)); diff --git a/ShaderRecomp/dxc_compiler.h b/ShaderRecomp/dxc_compiler.h index 49a579f..d49bd32 100644 --- a/ShaderRecomp/dxc_compiler.h +++ b/ShaderRecomp/dxc_compiler.h @@ -7,5 +7,5 @@ struct DxcCompiler DxcCompiler(); ~DxcCompiler(); - IDxcBlob* compile(const std::string& shaderSource, bool isPixelShader, bool compileSpirv); -}; \ No newline at end of file + IDxcBlob* compile(const std::string& shaderSource, bool compilePixelShader, bool compileLibrary, bool compileSpirv); +}; diff --git a/ShaderRecomp/main.cpp b/ShaderRecomp/main.cpp index 9df5309..dce0323 100644 --- a/ShaderRecomp/main.cpp +++ b/ShaderRecomp/main.cpp @@ -25,7 +25,8 @@ struct RecompiledShader { uint8_t* data = nullptr; IDxcBlob* dxil = nullptr; - IDxcBlob* spirv = nullptr; + std::vector spirv; + uint32_t specConstantsMask = 0; }; int main(int argc, char** argv) @@ -109,13 +110,20 @@ int main(int argc, char** argv) recompiler = {}; recompiler.recompile(shader.data, include); - thread_local DxcCompiler dxcCompiler; - shader.dxil = dxcCompiler.compile(recompiler.out, recompiler.isPixelShader, false); - shader.spirv = dxcCompiler.compile(recompiler.out, recompiler.isPixelShader, true); + shader.specConstantsMask = recompiler.specConstantsMask; - assert(shader.dxil != nullptr && shader.spirv != nullptr); + thread_local DxcCompiler dxcCompiler; + shader.dxil = dxcCompiler.compile(recompiler.out, recompiler.isPixelShader, recompiler.specConstantsMask != 0, false); + IDxcBlob* spirv = dxcCompiler.compile(recompiler.out, recompiler.isPixelShader, false, true); + + assert(shader.dxil != nullptr && spirv != nullptr); assert(*(reinterpret_cast(shader.dxil->GetBufferPointer()) + 1) != 0 && "DXIL was not signed properly!"); + bool result = smolv::Encode(spirv->GetBufferPointer(), spirv->GetBufferSize(), shader.spirv, smolv::kEncodeFlagStripDebugInfo); + assert(result); + + spirv->Release(); + size_t currentProgress = ++progress; if ((currentProgress % 10) == 0 || (currentProgress == shaders.size() - 1)) std::println("Recompiling shaders... {}%", currentProgress / float(shaders.size()) * 100.0f); @@ -132,14 +140,13 @@ int main(int argc, char** argv) for (auto& [hash, shader] : shaders) { - f.println("\t{{ 0x{:X}, {}, {}, {}, {} }},", - hash, dxil.size(), shader.dxil->GetBufferSize(), spirv.size(), shader.spirv->GetBufferSize()); + f.println("\t{{ 0x{:X}, {}, {}, {}, {}, {} }},", + hash, dxil.size(), shader.dxil->GetBufferSize(), spirv.size(), shader.spirv.size(), shader.specConstantsMask); dxil.insert(dxil.end(), reinterpret_cast(shader.dxil->GetBufferPointer()), reinterpret_cast(shader.dxil->GetBufferPointer()) + shader.dxil->GetBufferSize()); - spirv.insert(spirv.end(), reinterpret_cast(shader.spirv->GetBufferPointer()), - reinterpret_cast(shader.spirv->GetBufferPointer()) + shader.spirv->GetBufferSize()); + spirv.insert(spirv.end(), shader.spirv.begin(), shader.spirv.end()); } f.println("}};"); diff --git a/ShaderRecomp/pch.h b/ShaderRecomp/pch.h index 829eb2b..7619813 100644 --- a/ShaderRecomp/pch.h +++ b/ShaderRecomp/pch.h @@ -15,6 +15,7 @@ #include #include #include +#include template struct be diff --git a/ShaderRecomp/shader_common.hlsli b/ShaderRecomp/shader_common.h similarity index 85% rename from ShaderRecomp/shader_common.hlsli rename to ShaderRecomp/shader_common.h index af37837..7ce0d8a 100644 --- a/ShaderRecomp/shader_common.hlsli +++ b/ShaderRecomp/shader_common.h @@ -1,8 +1,17 @@ +#ifndef SHADER_COMMON_H_INCLUDED +#define SHADER_COMMON_H_INCLUDED + +#define SPEC_CONSTANT_R11G11B10_NORMAL (1 << 0) +#define SPEC_CONSTANT_BICUBIC_GI_FILTER (1 << 1) +#define SPEC_CONSTANT_ALPHA_TEST (1 << 2) +#define SPEC_CONSTANT_ALPHA_TO_COVERAGE (1 << 3) +#define SPEC_CONSTANT_REVERSE_Z (1 << 4) + +#if !defined(__cplusplus) || defined(__INTELLISENSE__) + #define FLT_MIN asfloat(0xff7fffff) #define FLT_MAX asfloat(0x7f7fffff) -#define INPUT_LAYOUT_FLAG_HAS_R11G11B10_NORMAL (1 << 0) - #ifdef __spirv__ struct PushConstants @@ -14,22 +23,22 @@ struct PushConstants [[vk::push_constant]] ConstantBuffer g_PushConstants; -#define g_AlphaTestMode vk::RawBufferLoad(g_PushConstants.SharedConstants + 256) -#define g_AlphaThreshold vk::RawBufferLoad(g_PushConstants.SharedConstants + 260) -#define g_Booleans vk::RawBufferLoad(g_PushConstants.SharedConstants + 264) -#define g_SwappedTexcoords vk::RawBufferLoad(g_PushConstants.SharedConstants + 268) -#define g_InputLayoutFlags vk::RawBufferLoad(g_PushConstants.SharedConstants + 272) -#define g_EnableGIBicubicFiltering vk::RawBufferLoad(g_PushConstants.SharedConstants + 276) +#define g_Booleans vk::RawBufferLoad(g_PushConstants.SharedConstants + 256) +#define g_SwappedTexcoords vk::RawBufferLoad(g_PushConstants.SharedConstants + 260) +#define g_AlphaThreshold vk::RawBufferLoad(g_PushConstants.SharedConstants + 264) + +[[vk::constant_id(0)]] const uint g_SpecConstants = 0; + +#define g_SpecConstants() g_SpecConstants #else #define DEFINE_SHARED_CONSTANTS() \ - uint g_AlphaTestMode : packoffset(c16.x); \ - float g_AlphaThreshold : packoffset(c16.y); \ - uint g_Booleans : packoffset(c16.z); \ - uint g_SwappedTexcoords : packoffset(c16.w); \ - uint g_InputLayoutFlags : packoffset(c17.x); \ - bool g_EnableGIBicubicFiltering : packoffset(c17.y) + uint g_Booleans : packoffset(c16.x); \ + uint g_SwappedTexcoords : packoffset(c16.y); \ + float g_AlphaThreshold : packoffset(c16.z) \ + +uint g_SpecConstants(); #endif @@ -145,9 +154,9 @@ float4 tfetchCube(uint resourceDescriptorIndex, uint samplerDescriptorIndex, flo return g_TextureCubeDescriptorHeap[resourceDescriptorIndex].Sample(g_SamplerDescriptorHeap[samplerDescriptorIndex], cubeMapData.cubeMapDirections[texCoord.z]); } -float4 tfetchR11G11B10(uint inputLayoutFlags, uint4 value) +float4 tfetchR11G11B10(uint4 value) { - if (inputLayoutFlags & INPUT_LAYOUT_FLAG_HAS_R11G11B10_NORMAL) + if (g_SpecConstants() & SPEC_CONSTANT_R11G11B10_NORMAL) { return float4( (value.x & 0x00000400 ? -1.0 : 0.0) + ((value.x & 0x3FF) / 1024.0), @@ -163,7 +172,7 @@ float4 tfetchR11G11B10(uint inputLayoutFlags, uint4 value) float4 tfetchTexcoord(uint swappedTexcoords, float4 value, uint semanticIndex) { - return (swappedTexcoords & (1 << semanticIndex)) != 0 ? value.yxwz : value; + return (swappedTexcoords & (1ull << semanticIndex)) != 0 ? value.yxwz : value; } float4 cube(float4 value, inout CubeMapData cubeMapData) @@ -202,3 +211,7 @@ float computeMipLevel(float2 pixelCoord) float deltaMaxSqr = max(dot(dx, dx), dot(dy, dy)); return max(0.0, 0.5 * log2(deltaMaxSqr)); } + +#endif + +#endif diff --git a/ShaderRecomp/shader_recompiler.cpp b/ShaderRecomp/shader_recompiler.cpp index 79b537b..779b5f0 100644 --- a/ShaderRecomp/shader_recompiler.cpp +++ b/ShaderRecomp/shader_recompiler.cpp @@ -1,4 +1,5 @@ #include "shader_recompiler.h" +#include "shader_common.h" static constexpr char SWIZZLES[] = { @@ -182,7 +183,8 @@ void ShaderRecompiler::recompile(const VertexFetchInstruction& instr, uint32_t a case DeclUsage::Normal: case DeclUsage::Tangent: case DeclUsage::Binormal: - print("tfetchR11G11B10(g_InputLayoutFlags, "); + specConstantsMask |= SPEC_CONSTANT_R11G11B10_NORMAL; + print("tfetchR11G11B10("); break; case DeclUsage::TexCoord: @@ -251,8 +253,7 @@ void ShaderRecompiler::recompile(const TextureFetchInstruction& instr, bool bicu if (findResult != samplers.end()) { constNamePtr = findResult->second; - subtractFromOne = strcmp(constNamePtr, "sampZBuffer") == 0 || - strcmp(constNamePtr, "g_DepthSampler") == 0; + subtractFromOne = hasMtxPrevInvViewProjection && strcmp(constNamePtr, "sampZBuffer") == 0; } else { @@ -446,8 +447,16 @@ void ShaderRecompiler::recompile(const AluInstruction& instr) const char* constantName = reinterpret_cast(constantTableData + findResult->second->name); if (findResult->second->registerCount > 1) { - regFormatted = std::format("{}({}{})", constantName, - reg - findResult->second->registerIndex, instr.const0Relative ? (instr.constAddressRegisterRelative ? " + a0" : " + aL") : ""); + if (hasMtxProjection && strcmp(constantName, "g_MtxProjection") == 0) + { + regFormatted = std::format("(iterationIndex == 0 ? mtxProjectionReverseZ[{0}] : mtxProjection[{0}])", + reg - findResult->second->registerIndex); + } + else + { + regFormatted = std::format("{}({}{})", constantName, + reg - findResult->second->registerIndex, instr.const0Relative ? (instr.constAddressRegisterRelative ? " + a0" : " + aL") : ""); + } } else { @@ -550,6 +559,8 @@ void ShaderRecompiler::recompile(const AluInstruction& instr) break; } + bool closeIfBracket = false; + std::string_view exportRegister; if (instr.exportData) { @@ -580,6 +591,18 @@ void ShaderRecompiler::recompile(const AluInstruction& instr) { case ExportRegister::VSPosition: exportRegister = "oPos"; + + if (hasMtxProjection) + { + indent(); + out += "if ((g_SpecConstants() & SPEC_CONSTANT_REVERSE_Z) == 0 || iterationIndex == 0)\n"; + indent(); + out += "{\n"; + ++indentation; + + closeIfBracket = true; + } + break; default: @@ -1042,6 +1065,13 @@ void ShaderRecompiler::recompile(const AluInstruction& instr) out += "clip(ps != 0.0 ? 1 : -1);\n"; } + if (closeIfBracket) + { + --indentation; + indent(); + out += "}\n"; + } + if (instr.isPredicated) { --indentation; @@ -1079,11 +1109,18 @@ void ShaderRecompiler::recompile(const uint8_t* shaderData, const std::string_vi if (!isPixelShader) { - if (strcmp(constantName, "g_InstanceTypes") == 0) + if (strcmp(constantName, "g_MtxProjection") == 0) + hasMtxProjection = true; + else if (strcmp(constantName, "g_InstanceTypes") == 0) isMetaInstancer = true; else if (strcmp(constantName, "g_IndexCount") == 0) hasIndexCount = true; } + else + { + if (strcmp(constantName, "g_MtxPrevInvViewProjection") == 0) + hasMtxPrevInvViewProjection = true; + } switch (constantInfo->registerSet) { @@ -1093,8 +1130,10 @@ void ShaderRecompiler::recompile(const uint8_t* shaderData, const std::string_vi if (constantInfo->registerCount > 1) { - println("#define {}(INDEX) ((INDEX) < {} ? vk::RawBufferLoad(g_PushConstants.{}ShaderConstants + ({} + (INDEX)) * 16, 0x10) : 0.0)", - constantName, constantInfo->registerCount.get(), shaderName, constantInfo->registerIndex.get()); + uint32_t tailCount = (isPixelShader ? 224 : 256) - constantInfo->registerIndex; + + println("#define {}(INDEX) select((INDEX) < {}, vk::RawBufferLoad(g_PushConstants.{}ShaderConstants + ({} + min(INDEX, {})) * 16, 0x10), 0.0)", + constantName, tailCount, shaderName, constantInfo->registerIndex.get(), tailCount - 1); } else { @@ -1148,7 +1187,10 @@ void ShaderRecompiler::recompile(const uint8_t* shaderData, const std::string_vi println(" : packoffset(c{});", constantInfo->registerIndex.get()); if (constantInfo->registerCount > 1) - println("#define {0}(INDEX) ((INDEX) < {1} ? {0}[INDEX] : 0.0)", constantName, constantInfo->registerCount.get()); + { + uint32_t tailCount = (isPixelShader ? 224 : 256) - constantInfo->registerIndex; + println("#define {0}(INDEX) select((INDEX) < {1}, {0}[min(INDEX, {2})], 0.0)", constantName, tailCount, tailCount - 1); + } } } @@ -1199,6 +1241,15 @@ void ShaderRecompiler::recompile(const uint8_t* shaderData, const std::string_vi const auto shader = reinterpret_cast(shaderData + shaderContainer->shaderOffset); + out += "#ifndef __spirv__\n"; + + if (isPixelShader) + out += "[shader(\"pixel\")]\n"; + else + out += "[shader(\"vertex\")]\n"; + + out += "#endif\n"; + out += "void main(\n"; if (isPixelShader) @@ -1208,7 +1259,11 @@ void ShaderRecompiler::recompile(const uint8_t* shaderData, const std::string_vi for (auto& [usage, usageIndex] : INTERPOLATORS) println("\tin float4 i{0}{1} : {2}{1},", USAGE_VARIABLES[uint32_t(usage)], usageIndex, USAGE_SEMANTICS[uint32_t(usage)]); - out += "\tin bool iFace : SV_IsFrontFace"; + out += "#ifdef __spirv__\n"; + out += "\tin bool iFace : SV_IsFrontFace\n"; + out += "#else\n"; + out += "\tin uint iFace : SV_IsFrontFace\n"; + out += "#endif\n"; auto pixelShader = reinterpret_cast(shader); if (pixelShader->outputs & PIXEL_SHADER_OUTPUT_COLOR0) @@ -1270,6 +1325,20 @@ void ShaderRecompiler::recompile(const uint8_t* shaderData, const std::string_vi out += ")\n"; out += "{\n"; + + if (hasMtxProjection) + { + specConstantsMask |= SPEC_CONSTANT_REVERSE_Z; + + out += "\toPos = 0.0;\n"; + + out += "\tfloat4x4 mtxProjection = float4x4(g_MtxProjection(0), g_MtxProjection(1), g_MtxProjection(2), g_MtxProjection(3));\n"; + out += "\tfloat4x4 mtxProjectionReverseZ = mul(mtxProjection, float4x4(1, 0, 0, 0, 0, 1, 0, 0, 0, 0, -1, 0, 0, 0, 1, 1));\n"; + + out += "\t[unroll] for (int iterationIndex = 0; iterationIndex < 2; iterationIndex++)\n"; + out += "\t{\n"; + } + if (shaderContainer->definitionTableOffset != NULL) { auto definitionTable = reinterpret_cast(shaderData + shaderContainer->definitionTableOffset); @@ -1345,7 +1414,9 @@ void ShaderRecompiler::recompile(const uint8_t* shaderData, const std::string_vi if (!isPixelShader) { - out += "\toPos = 0.0;\n"; + if (!hasMtxProjection) + out += "\toPos = 0.0;\n"; + for (auto& [usage, usageIndex] : INTERPOLATORS) println("\to{}{} = 0.0;", USAGE_VARIABLES[uint32_t(usage)], usageIndex); @@ -1638,8 +1709,10 @@ void ShaderRecompiler::recompile(const uint8_t* shaderData, const std::string_vi { if (textureFetch.constIndex == 10) // g_GISampler { + specConstantsMask |= SPEC_CONSTANT_BICUBIC_GI_FILTER; + indent(); - out += "[branch] if (g_EnableGIBicubicFiltering)"; + out += "if (g_SpecConstants() & SPEC_CONSTANT_BICUBIC_GI_FILTER)"; indent(); out += '{'; @@ -1680,8 +1753,10 @@ void ShaderRecompiler::recompile(const uint8_t* shaderData, const std::string_vi { if (isPixelShader) { + specConstantsMask |= (SPEC_CONSTANT_ALPHA_TEST | SPEC_CONSTANT_ALPHA_TO_COVERAGE); + indent(); - out += "[branch] if (g_AlphaTestMode == 1)"; + out += "[branch] if (g_SpecConstants() & SPEC_CONSTANT_ALPHA_TEST)"; indent(); out += '{'; @@ -1691,7 +1766,7 @@ void ShaderRecompiler::recompile(const uint8_t* shaderData, const std::string_vi indent(); out += "}"; indent(); - out += "else if (g_AlphaTestMode == 2)"; + out += "else if (g_SpecConstants() & SPEC_CONSTANT_ALPHA_TO_COVERAGE)"; indent(); out += '{'; @@ -1707,7 +1782,10 @@ void ShaderRecompiler::recompile(const uint8_t* shaderData, const std::string_vi if (simpleControlFlow) { indent(); - out += "return;\n"; + if (hasMtxProjection) + out += "continue;\n"; + else + out += "return;\n"; } else { @@ -1735,5 +1813,8 @@ void ShaderRecompiler::recompile(const uint8_t* shaderData, const std::string_vi out += "\t}\n"; } + if (hasMtxProjection) + out += "\t}\n"; + out += "}"; } diff --git a/ShaderRecomp/shader_recompiler.h b/ShaderRecomp/shader_recompiler.h index a59a8b4..4603fcf 100644 --- a/ShaderRecomp/shader_recompiler.h +++ b/ShaderRecomp/shader_recompiler.h @@ -32,6 +32,9 @@ struct ShaderRecompiler : StringBuffer std::unordered_map boolConstants; std::unordered_map samplers; std::unordered_map ifEndLabels; + uint32_t specConstantsMask = 0; + bool hasMtxProjection = false; + bool hasMtxPrevInvViewProjection = false; void indent() { diff --git a/thirdparty/smol-v b/thirdparty/smol-v new file mode 160000 index 0000000..9dd54c3 --- /dev/null +++ b/thirdparty/smol-v @@ -0,0 +1 @@ +Subproject commit 9dd54c379ac29fa148cb1b829bb939ba7381d8f4