From 1d52a81ba97fac4e056e1208511cc0bbf5c750c8 Mon Sep 17 00:00:00 2001 From: squidbus <175574877+squidbus@users.noreply.github.com> Date: Sat, 2 Aug 2025 19:26:16 -0700 Subject: [PATCH] MSL shader support Co-authored-by: Isaac Marovitz --- XenosRecomp/CMakeLists.txt | 18 +- XenosRecomp/air_compiler.cpp | 75 +++ XenosRecomp/air_compiler.h | 10 + XenosRecomp/dxc_compiler.cpp | 5 + XenosRecomp/main.cpp | 142 ++++-- XenosRecomp/pch.h | 1 + XenosRecomp/shader_common.h | 302 +++++++++-- XenosRecomp/shader_recompiler.cpp | 809 +++++++++++++++++++++++------- XenosRecomp/shader_recompiler.h | 2 +- 9 files changed, 1085 insertions(+), 279 deletions(-) create mode 100644 XenosRecomp/air_compiler.cpp create mode 100644 XenosRecomp/air_compiler.h diff --git a/XenosRecomp/CMakeLists.txt b/XenosRecomp/CMakeLists.txt index e505a5e..b187f59 100644 --- a/XenosRecomp/CMakeLists.txt +++ b/XenosRecomp/CMakeLists.txt @@ -4,9 +4,13 @@ if (WIN32) option(XENOS_RECOMP_DXIL "Generate DXIL shader cache" ON) endif() +if (APPLE) + option(XENOS_RECOMP_AIR "Generate Metal AIR shader cache" ON) +endif() + set(SMOLV_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../thirdparty/smol-v/source") -add_executable(XenosRecomp +add_executable(XenosRecomp constant_table.h dxc_compiler.cpp dxc_compiler.h @@ -30,13 +34,6 @@ target_precompile_headers(XenosRecomp PRIVATE pch.h) if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang" OR CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang") target_compile_options(XenosRecomp PRIVATE -Wno-switch -Wno-unused-variable -Wno-null-arithmetic -fms-extensions) - - include(CheckCXXSymbolExists) - check_cxx_symbol_exists(_LIBCPP_VERSION version LIBCPP) - if(LIBCPP) - # Allows using std::execution - target_compile_options(XenosRecomp PRIVATE -fexperimental-library) - endif() endif() if (WIN32) @@ -51,3 +48,8 @@ if (XENOS_RECOMP_DXIL) target_compile_definitions(XenosRecomp PRIVATE XENOS_RECOMP_DXIL) target_link_libraries(XenosRecomp PRIVATE Microsoft::DXIL) endif() + +if (XENOS_RECOMP_AIR) + target_compile_definitions(XenosRecomp PRIVATE XENOS_RECOMP_AIR) + target_sources(XenosRecomp PRIVATE air_compiler.cpp air_compiler.h) +endif() diff --git a/XenosRecomp/air_compiler.cpp b/XenosRecomp/air_compiler.cpp new file mode 100644 index 0000000..7532569 --- /dev/null +++ b/XenosRecomp/air_compiler.cpp @@ -0,0 +1,75 @@ +#include "air_compiler.h" + +#include +#include +#include +#include + +struct TemporaryPath +{ + const std::string path; + + explicit TemporaryPath(std::string_view path) : path(path) {} + + ~TemporaryPath() + { + unlink(path.c_str()); + } +}; + +static int executeCommand(const char** argv) +{ + pid_t pid; + if (posix_spawn(&pid, argv[0], nullptr, nullptr, const_cast(argv), nullptr) != 0) + return -1; + + int status; + if (waitpid(pid, &status, 0) == -1) + return -1; + + return status; +} + +std::vector AirCompiler::compile(const std::string& shaderSource) +{ + // Save source to a location on disk for the compiler to read. + char sourcePathTemplate[PATH_MAX] = "/tmp/xenos_metal_XXXXXX.metal"; + const int sourceFd = mkstemps(sourcePathTemplate, 6); + if (sourceFd == -1) + { + fmt::println("Failed to create temporary file for shader source: {}", strerror(errno)); + std::exit(1); + } + + const TemporaryPath sourcePath(sourcePathTemplate); + const TemporaryPath irPath(sourcePath.path + ".ir"); + const TemporaryPath metalLibPath(sourcePath.path + ".metallib"); + + const ssize_t sourceWritten = write(sourceFd, shaderSource.data(), shaderSource.size()); + close(sourceFd); + if (sourceWritten < 0) + { + fmt::println("Failed to write shader source to disk: {}", strerror(errno)); + std::exit(1); + } + + const char* compileCommand[] = { "/usr/bin/xcrun", "-sdk", "macosx", "metal", "-o", irPath.path.c_str(), "-c", sourcePath.path.c_str(), "-D__air__", "-DUNLEASHED_RECOMP", "-Wno-unused-variable", "-frecord-sources", "-gline-tables-only", nullptr }; + if (const int compileStatus = executeCommand(compileCommand); compileStatus != 0) + { + fmt::println("Metal compiler exited with status: {}", compileStatus); + fmt::println("Generated source:\n{}", shaderSource); + std::exit(1); + } + + const char* linkCommand[] = { "/usr/bin/xcrun", "-sdk", "macosx", "metallib", "-o", metalLibPath.path.c_str(), irPath.path.c_str(), nullptr }; + if (const int linkStatus = executeCommand(linkCommand); linkStatus != 0) + { + fmt::println("Metal linker exited with status: {}", linkStatus); + fmt::println("Generated source:\n{}", shaderSource); + std::exit(1); + } + + std::ifstream libStream(metalLibPath.path, std::ios::binary); + std::vector data((std::istreambuf_iterator(libStream)), std::istreambuf_iterator()); + return data; +} diff --git a/XenosRecomp/air_compiler.h b/XenosRecomp/air_compiler.h new file mode 100644 index 0000000..66a0326 --- /dev/null +++ b/XenosRecomp/air_compiler.h @@ -0,0 +1,10 @@ +#pragma once + +#include +#include + +class AirCompiler +{ +public: + [[nodiscard]] static std::vector compile(const std::string& shaderSource); +}; \ No newline at end of file diff --git a/XenosRecomp/dxc_compiler.cpp b/XenosRecomp/dxc_compiler.cpp index fcbc43a..7c4b433 100644 --- a/XenosRecomp/dxc_compiler.cpp +++ b/XenosRecomp/dxc_compiler.cpp @@ -34,6 +34,11 @@ IDxcBlob* DxcCompiler::compile(const std::string& shaderSource, bool compilePixe target = L"-T vs_6_0"; } + if (!compileLibrary) + { + args[argCount++] = L"-E shaderMain"; + } + args[argCount++] = target; args[argCount++] = L"-HV 2021"; args[argCount++] = L"-all-resources-bound"; diff --git a/XenosRecomp/main.cpp b/XenosRecomp/main.cpp index d015145..a8b87a6 100644 --- a/XenosRecomp/main.cpp +++ b/XenosRecomp/main.cpp @@ -1,7 +1,15 @@ +#include +#include +#include + #include "shader.h" #include "shader_recompiler.h" #include "dxc_compiler.h" +#ifdef XENOS_RECOMP_AIR +#include "air_compiler.h" +#endif + static std::unique_ptr readAllBytes(const char* filePath, size_t& fileSize) { FILE* file = fopen(filePath, "rb"); @@ -26,9 +34,43 @@ struct RecompiledShader uint8_t* data = nullptr; IDxcBlob* dxil = nullptr; std::vector spirv; + std::vector air; uint32_t specConstantsMask = 0; }; +void recompileShader(RecompiledShader& shader, const std::string_view include, std::atomic& progress, uint32_t numShaders) +{ + thread_local ShaderRecompiler recompiler; + recompiler = {}; + recompiler.recompile(shader.data, include); + + shader.specConstantsMask = recompiler.specConstantsMask; + + thread_local DxcCompiler dxcCompiler; + +#ifdef XENOS_RECOMP_DXIL + shader.dxil = dxcCompiler.compile(recompiler.out, recompiler.isPixelShader, recompiler.specConstantsMask != 0, false); + assert(shader.dxil != nullptr); + assert(*(reinterpret_cast(shader.dxil->GetBufferPointer()) + 1) != 0 && "DXIL was not signed properly!"); +#endif + +#ifdef XENOS_RECOMP_AIR + shader.air = AirCompiler::compile(recompiler.out); +#endif + + IDxcBlob* spirv = dxcCompiler.compile(recompiler.out, recompiler.isPixelShader, false, true); + assert(spirv != nullptr); + + bool result = smolv::Encode(spirv->GetBufferPointer(), spirv->GetBufferSize(), shader.spirv, smolv::kEncodeFlagStripDebugInfo); + assert(result); + + spirv->Release(); + + size_t currentProgress = ++progress; + if ((currentProgress % 10) == 0 || (currentProgress == numShaders - 1)) + fmt::println("Recompiling shaders... {}%", currentProgress / float(numShaders) * 100.0f); +} + int main(int argc, char** argv) { #ifndef XENOS_RECOMP_INPUT @@ -71,6 +113,7 @@ int main(int argc, char** argv) { std::vector> files; std::map shaders; + std::map shaderFilenames; for (auto& file : std::filesystem::recursive_directory_iterator(input)) { @@ -99,6 +142,7 @@ int main(int argc, char** argv) { shader.first->second.data = fileData.get() + i; foundAny = true; + shaderFilenames[hash] = file.path().string(); } i += dataSize; @@ -113,38 +157,42 @@ int main(int argc, char** argv) files.emplace_back(std::move(fileData)); } + std::mutex shaderQueueMutex; + std::deque shaderQueue; + for (const auto& [hash, _] : shaders) + { + shaderQueue.emplace_back(hash); + } + + const uint32_t numThreads = std::max(std::thread::hardware_concurrency(), 1u); + fmt::println("Recompiling shaders with {} threads", numThreads); + std::atomic progress = 0; - - std::for_each(std::execution::par_unseq, shaders.begin(), shaders.end(), [&](auto& hashShaderPair) + std::vector threads; + threads.reserve(numThreads); + for (uint32_t i = 0; i < numThreads; i++) + { + threads.emplace_back([&] { - auto& shader = hashShaderPair.second; - - thread_local ShaderRecompiler recompiler; - recompiler = {}; - recompiler.recompile(shader.data, include); - - shader.specConstantsMask = recompiler.specConstantsMask; - - thread_local DxcCompiler dxcCompiler; - -#ifdef XENOS_RECOMP_DXIL - shader.dxil = dxcCompiler.compile(recompiler.out, recompiler.isPixelShader, recompiler.specConstantsMask != 0, false); - assert(shader.dxil != nullptr); - assert(*(reinterpret_cast(shader.dxil->GetBufferPointer()) + 1) != 0 && "DXIL was not signed properly!"); -#endif - - IDxcBlob* spirv = dxcCompiler.compile(recompiler.out, recompiler.isPixelShader, false, true); - assert(spirv != nullptr); - - bool result = smolv::Encode(spirv->GetBufferPointer(), spirv->GetBufferSize(), shader.spirv, smolv::kEncodeFlagStripDebugInfo); - assert(result); - - spirv->Release(); - - size_t currentProgress = ++progress; - if ((currentProgress % 10) == 0 || (currentProgress == shaders.size() - 1)) - fmt::println("Recompiling shaders... {}%", currentProgress / float(shaders.size()) * 100.0f); + while (true) + { + XXH64_hash_t shaderHash; + { + std::lock_guard lock(shaderQueueMutex); + if (shaderQueue.empty()) { + return; + } + shaderHash = shaderQueue.front(); + shaderQueue.pop_front(); + } + recompileShader(shaders[shaderHash], include, progress, shaders.size()); + } }); + } + for (auto& thread : threads) + { + thread.join(); + } fmt::println("Creating shader cache..."); @@ -154,18 +202,32 @@ int main(int argc, char** argv) std::vector dxil; std::vector spirv; + std::vector air; for (auto& [hash, shader] : shaders) { - f.println("\t{{ 0x{:X}, {}, {}, {}, {}, {} }},", - hash, dxil.size(), (shader.dxil != nullptr) ? shader.dxil->GetBufferSize() : 0, spirv.size(), shader.spirv.size(), shader.specConstantsMask); + const std::string& fullFilename = shaderFilenames[hash]; + std::string filename = fullFilename; + size_t shaderPos = filename.find("shader"); + if (shaderPos != std::string::npos) { + filename = filename.substr(shaderPos); + // Prevent bad escape sequences in Windows shader path. + std::replace(filename.begin(), filename.end(), '\\', '/'); + } + f.println("\t{{ 0x{:X}, {}, {}, {}, {}, {}, {}, {}, \"{}\" }},", + hash, dxil.size(), (shader.dxil != nullptr) ? shader.dxil->GetBufferSize() : 0, + spirv.size(), shader.spirv.size(), air.size(), shader.air.size(), shader.specConstantsMask, filename); if (shader.dxil != nullptr) { dxil.insert(dxil.end(), reinterpret_cast(shader.dxil->GetBufferPointer()), reinterpret_cast(shader.dxil->GetBufferPointer()) + shader.dxil->GetBufferSize()); } - + +#ifdef XENOS_RECOMP_AIR + air.insert(air.end(), shader.air.begin(), shader.air.end()); +#endif + spirv.insert(spirv.end(), shader.spirv.begin(), shader.spirv.end()); } @@ -189,6 +251,22 @@ int main(int argc, char** argv) f.println("const size_t g_dxilCacheDecompressedSize = {};", dxil.size()); #endif +#ifdef XENOS_RECOMP_AIR + fmt::println("Compressing AIR cache..."); + + std::vector airCompressed(ZSTD_compressBound(air.size())); + airCompressed.resize(ZSTD_compress(airCompressed.data(), airCompressed.size(), air.data(), air.size(), level)); + + f.print("const uint8_t g_compressedAirCache[] = {{"); + + for (auto data : airCompressed) + f.print("{},", data); + + f.println("}};"); + f.println("const size_t g_airCacheCompressedSize = {};", airCompressed.size()); + f.println("const size_t g_airCacheDecompressedSize = {};", air.size()); +#endif + fmt::println("Compressing SPIRV cache..."); std::vector spirvCompressed(ZSTD_compressBound(spirv.size())); diff --git a/XenosRecomp/pch.h b/XenosRecomp/pch.h index e59682c..b080943 100644 --- a/XenosRecomp/pch.h +++ b/XenosRecomp/pch.h @@ -1,6 +1,7 @@ #pragma once #ifdef _WIN32 +#define NOMINMAX #include #endif diff --git a/XenosRecomp/shader_common.h b/XenosRecomp/shader_common.h index 1c74034..9191a28 100644 --- a/XenosRecomp/shader_common.h +++ b/XenosRecomp/shader_common.h @@ -10,10 +10,12 @@ #define SPEC_CONSTANT_REVERSE_Z (1 << 4) #endif -#if !defined(__cplusplus) || defined(__INTELLISENSE__) +#if defined(__air__) || !defined(__cplusplus) || defined(__INTELLISENSE__) +#ifndef __air__ #define FLT_MIN asfloat(0xff7fffff) #define FLT_MAX asfloat(0x7f7fffff) +#endif #ifdef __spirv__ @@ -35,6 +37,32 @@ struct PushConstants #define g_SpecConstants() g_SpecConstants +#elif defined(__air__) + +#include + +using namespace metal; + +constant uint G_SPEC_CONSTANTS [[function_constant(0)]]; +constant uint G_SPEC_CONSTANTS_VAL = is_function_constant_defined(G_SPEC_CONSTANTS) ? G_SPEC_CONSTANTS : 0; + +uint g_SpecConstants() +{ + return G_SPEC_CONSTANTS_VAL; +} + +struct PushConstants +{ + ulong VertexShaderConstants; + ulong PixelShaderConstants; + ulong SharedConstants; +}; + +#define g_Booleans (*(reinterpret_cast(g_PushConstants.SharedConstants + 256))) +#define g_SwappedTexcoords (*(reinterpret_cast(g_PushConstants.SharedConstants + 260))) +#define g_HalfPixelOffset (*(reinterpret_cast(g_PushConstants.SharedConstants + 264))) +#define g_AlphaThreshold (*(reinterpret_cast(g_PushConstants.SharedConstants + 272))) + #else #define DEFINE_SHARED_CONSTANTS() \ @@ -47,6 +75,93 @@ uint g_SpecConstants(); #endif +struct CubeMapData +{ + float3 cubeMapDirections[2]; + uint cubeMapIndex; +}; + +#ifdef __air__ + +struct Texture2DDescriptorHeap +{ + texture2d tex; +}; + +struct Texture3DDescriptorHeap +{ + texture3d tex; +}; + +struct TextureCubeDescriptorHeap +{ + texturecube tex; +}; + +struct SamplerDescriptorHeap +{ + sampler samp; +}; + +uint2 getTexture2DDimensions(texture2d texture) +{ + return uint2(texture.get_width(), texture.get_height()); +} + +float4 tfetch2D(constant Texture2DDescriptorHeap* textureHeap, + constant SamplerDescriptorHeap* samplerHeap, + uint resourceDescriptorIndex, + uint samplerDescriptorIndex, + float2 texCoord, float2 offset) +{ + texture2d texture = textureHeap[resourceDescriptorIndex].tex; + sampler sampler = samplerHeap[samplerDescriptorIndex].samp; + return texture.sample(sampler, texCoord + offset / (float2)getTexture2DDimensions(texture)); +} + +float2 getWeights2D(constant Texture2DDescriptorHeap* textureHeap, + constant SamplerDescriptorHeap* samplerHeap, + uint resourceDescriptorIndex, + uint samplerDescriptorIndex, + float2 texCoord, float2 offset) +{ + texture2d texture = textureHeap[resourceDescriptorIndex].tex; + return select(fract(texCoord * float2(getTexture2DDimensions(texture)) + offset - 0.5), 0.0, isnan(texCoord)); +} + +float4 tfetch3D(constant Texture3DDescriptorHeap* textureHeap, + constant SamplerDescriptorHeap* samplerHeap, + uint resourceDescriptorIndex, + uint samplerDescriptorIndex, + float3 texCoord) +{ + texture3d texture = textureHeap[resourceDescriptorIndex].tex; + sampler sampler = samplerHeap[samplerDescriptorIndex].samp; + return texture.sample(sampler, texCoord); +} + +float4 tfetchCube(constant TextureCubeDescriptorHeap* textureHeap, + constant SamplerDescriptorHeap* samplerHeap, + uint resourceDescriptorIndex, + uint samplerDescriptorIndex, + float3 texCoord, thread CubeMapData* cubeMapData) +{ + texturecube texture = textureHeap[resourceDescriptorIndex].tex; + sampler sampler = samplerHeap[samplerDescriptorIndex].samp; + return texture.sample(sampler, cubeMapData->cubeMapDirections[(uint)texCoord.z]); +} + +float4 cube(float4 value, thread CubeMapData* cubeMapData) +{ + uint index = cubeMapData->cubeMapIndex; + cubeMapData->cubeMapDirections[index] = value.xyz; + ++cubeMapData->cubeMapIndex; + + return float4(0.0, 0.0, 0.0, index); +} + +#else + Texture2D g_Texture2DDescriptorHeap[] : register(t0, space0); Texture3D g_Texture3DDescriptorHeap[] : register(t0, space1); TextureCube g_TextureCubeDescriptorHeap[] : register(t0, space2); @@ -71,6 +186,85 @@ float2 getWeights2D(uint resourceDescriptorIndex, uint samplerDescriptorIndex, f return select(isnan(texCoord), 0.0, frac(texCoord * getTexture2DDimensions(texture) + offset - 0.5)); } +float4 tfetch3D(uint resourceDescriptorIndex, uint samplerDescriptorIndex, float3 texCoord) +{ + return g_Texture3DDescriptorHeap[resourceDescriptorIndex].Sample(g_SamplerDescriptorHeap[samplerDescriptorIndex], texCoord); +} + +float4 tfetchCube(uint resourceDescriptorIndex, uint samplerDescriptorIndex, float3 texCoord, inout CubeMapData cubeMapData) +{ + return g_TextureCubeDescriptorHeap[resourceDescriptorIndex].Sample(g_SamplerDescriptorHeap[samplerDescriptorIndex], cubeMapData.cubeMapDirections[texCoord.z]); +} + +float4 cube(float4 value, inout CubeMapData cubeMapData) +{ + uint index = cubeMapData.cubeMapIndex; + cubeMapData.cubeMapDirections[index] = value.xyz; + ++cubeMapData.cubeMapIndex; + + return float4(0.0, 0.0, 0.0, index); +} + +#endif + +float4 tfetchR11G11B10(uint4 value) +{ + if (g_SpecConstants() & SPEC_CONSTANT_R11G11B10_NORMAL) + { + return float4( + (value.x & 0x00000400 ? -1.0 : 0.0) + ((value.x & 0x3FF) / 1024.0), + (value.x & 0x00200000 ? -1.0 : 0.0) + (((value.x >> 11) & 0x3FF) / 1024.0), + (value.x & 0x80000000 ? -1.0 : 0.0) + (((value.x >> 22) & 0x1FF) / 512.0), + 0.0); + } + else + { +#ifdef __air__ + return as_type(value); +#else + return asfloat(value); +#endif + } +} + +#ifdef __air__ +#define selectWrapper(a, b, c) select(c, b, a) +#else +#define selectWrapper(a, b, c) select(a, b, c) +#endif + +#ifdef __air__ +#define frac(X) fract(X) + +template +void clip(T a) +{ + if (a < 0.0) { + discard_fragment(); + } +} + +template +float rcp(T a) +{ + return 1.0 / a; +} + +template +float4x4 mul(T a, T b) +{ + return b * a; +} +#endif + +#ifdef __air__ +#define UNROLL +#define BRANCH +#else +#define UNROLL [unroll] +#define BRANCH [branch] +#endif + float w0(float a) { return (1.0f / 6.0f) * (a * (a * (-a + 3.0f) - 3.0f) + 1.0f); @@ -111,12 +305,52 @@ float h1(float a) return 1.0f + w3(a) / (w2(a) + w3(a)) + 0.5f; } +#ifdef __air__ + +float4 tfetch2DBicubic(constant Texture2DDescriptorHeap* textureHeap, + constant SamplerDescriptorHeap* samplerHeap, + uint resourceDescriptorIndex, + uint samplerDescriptorIndex, + float2 texCoord, float2 offset) +{ + texture2d texture = textureHeap[resourceDescriptorIndex].tex; + sampler sampler = samplerHeap[samplerDescriptorIndex].samp; + uint2 dimensions = getTexture2DDimensions(texture); + + float x = texCoord.x * dimensions.x + offset.x; + float y = texCoord.y * dimensions.y + offset.y; + + x -= 0.5f; + y -= 0.5f; + float px = floor(x); + float py = floor(y); + float fx = x - px; + float fy = y - py; + + float g0x = g0(fx); + float g1x = g1(fx); + float h0x = h0(fx); + float h1x = h1(fx); + float h0y = h0(fy); + float h1y = h1(fy); + + float4 r = + g0(fy) * (g0x * texture.sample(sampler, float2(px + h0x, py + h0y) / float2(dimensions)) + + g1x * texture.sample(sampler, float2(px + h1x, py + h0y) / float2(dimensions))) + + g1(fy) * (g0x * texture.sample(sampler, float2(px + h0x, py + h1y) / float2(dimensions)) + + g1x * texture.sample(sampler, float2(px + h1x, py + h1y) / float2(dimensions))); + + return r; +} + +#else + float4 tfetch2DBicubic(uint resourceDescriptorIndex, uint samplerDescriptorIndex, float2 texCoord, float2 offset) { Texture2D texture = g_Texture2DDescriptorHeap[resourceDescriptorIndex]; SamplerState samplerState = g_SamplerDescriptorHeap[samplerDescriptorIndex]; uint2 dimensions = getTexture2DDimensions(texture); - + float x = texCoord.x * dimensions.x + offset.x; float y = texCoord.y * dimensions.y + offset.y; @@ -143,50 +377,11 @@ float4 tfetch2DBicubic(uint resourceDescriptorIndex, uint samplerDescriptorIndex return r; } -float4 tfetch3D(uint resourceDescriptorIndex, uint samplerDescriptorIndex, float3 texCoord) -{ - return g_Texture3DDescriptorHeap[resourceDescriptorIndex].Sample(g_SamplerDescriptorHeap[samplerDescriptorIndex], texCoord); -} +#endif -struct CubeMapData +float4 swapFloats(uint swappedFloats, float4 value, uint semanticIndex) { - float3 cubeMapDirections[2]; - uint cubeMapIndex; -}; - -float4 tfetchCube(uint resourceDescriptorIndex, uint samplerDescriptorIndex, float3 texCoord, inout CubeMapData cubeMapData) -{ - return g_TextureCubeDescriptorHeap[resourceDescriptorIndex].Sample(g_SamplerDescriptorHeap[samplerDescriptorIndex], cubeMapData.cubeMapDirections[texCoord.z]); -} - -float4 tfetchR11G11B10(uint4 value) -{ - if (g_SpecConstants() & SPEC_CONSTANT_R11G11B10_NORMAL) - { - return float4( - (value.x & 0x00000400 ? -1.0 : 0.0) + ((value.x & 0x3FF) / 1024.0), - (value.x & 0x00200000 ? -1.0 : 0.0) + (((value.x >> 11) & 0x3FF) / 1024.0), - (value.x & 0x80000000 ? -1.0 : 0.0) + (((value.x >> 22) & 0x1FF) / 512.0), - 0.0); - } - else - { - return asfloat(value); - } -} - -float4 tfetchTexcoord(uint swappedTexcoords, float4 value, uint semanticIndex) -{ - return (swappedTexcoords & (1ull << semanticIndex)) != 0 ? value.yxwz : value; -} - -float4 cube(float4 value, inout CubeMapData cubeMapData) -{ - uint index = cubeMapData.cubeMapIndex; - cubeMapData.cubeMapDirections[index] = value.xyz; - ++cubeMapData.cubeMapIndex; - - return float4(0.0, 0.0, 0.0, index); + return (swappedFloats & (1ull << semanticIndex)) != 0 ? value.yxwz : value; } float4 dst(float4 src0, float4 src1) @@ -204,15 +399,34 @@ float4 max4(float4 src0) return max(max(src0.x, src0.y), max(src0.z, src0.w)); } +#ifdef __air__ + +float2 getPixelCoord(constant Texture2DDescriptorHeap* textureHeap, + uint resourceDescriptorIndex, + float2 texCoord) +{ + texture2d texture = textureHeap[resourceDescriptorIndex].tex; + return (float2)getTexture2DDimensions(texture) * texCoord; +} + +#else + float2 getPixelCoord(uint resourceDescriptorIndex, float2 texCoord) { return getTexture2DDimensions(g_Texture2DDescriptorHeap[resourceDescriptorIndex]) * texCoord; } +#endif + float computeMipLevel(float2 pixelCoord) { +#ifdef __air__ + float2 dx = dfdx(pixelCoord); + float2 dy = dfdy(pixelCoord); +#else float2 dx = ddx(pixelCoord); float2 dy = ddy(pixelCoord); +#endif float deltaMaxSqr = max(dot(dx, dx), dot(dy, dy)); return max(0.0, 0.5 * log2(deltaMaxSqr)); } diff --git a/XenosRecomp/shader_recompiler.cpp b/XenosRecomp/shader_recompiler.cpp index 698a418..fd59693 100644 --- a/XenosRecomp/shader_recompiler.cpp +++ b/XenosRecomp/shader_recompiler.cpp @@ -121,7 +121,7 @@ static constexpr std::pair INTERPOLATORS[] = static constexpr std::string_view TEXTURE_DIMENSIONS[] = { "2D", - "3D", + "3D", "Cube" }; @@ -130,14 +130,21 @@ static FetchDestinationSwizzle getDestSwizzle(uint32_t dstSwizzle, uint32_t inde return FetchDestinationSwizzle((dstSwizzle >> (index * 3)) & 0x7); } -void ShaderRecompiler::printDstSwizzle(uint32_t dstSwizzle, bool operand) +uint32_t ShaderRecompiler::printDstSwizzle(uint32_t dstSwizzle, bool operand) { + uint32_t size = 0; + for (size_t i = 0; i < 4; i++) { const auto swizzle = getDestSwizzle(dstSwizzle, i); if (swizzle >= FetchDestinationSwizzle::X && swizzle <= FetchDestinationSwizzle::W) + { out += SWIZZLES[operand ? uint32_t(swizzle) : i]; + size++; + } } + + return size; } void ShaderRecompiler::printDstSwizzle01(uint32_t dstRegister, uint32_t dstSwizzle) @@ -172,10 +179,15 @@ void ShaderRecompiler::recompile(const VertexFetchInstruction& instr, uint32_t a indent(); print("r{}.", instr.dstRegister); - printDstSwizzle(instr.dstSwizzle, false); + uint32_t size = printDstSwizzle(instr.dstSwizzle, false); out += " = "; + if (size <= 1) + out += "(float)("; + else + print("(float{})(", size); + auto findResult = vertexElements.find(address); assert(findResult != vertexElements.end()); @@ -189,11 +201,11 @@ void ShaderRecompiler::recompile(const VertexFetchInstruction& instr, uint32_t a break; case DeclUsage::TexCoord: - print("tfetchTexcoord(g_SwappedTexcoords, "); + print("swapFloats(g_SwappedTexcoords, (float4)"); break; } - print("i{}{}", USAGE_VARIABLES[uint32_t(findResult->second.usage)], uint32_t(findResult->second.usageIndex)); + print("(input.i{}{})", USAGE_VARIABLES[uint32_t(findResult->second.usage)], uint32_t(findResult->second.usageIndex)); switch (findResult->second.usage) { @@ -208,7 +220,7 @@ void ShaderRecompiler::recompile(const VertexFetchInstruction& instr, uint32_t a break; } - out += '.'; + out += ")."; printDstSwizzle(instr.dstSwizzle, true); out += ";\n"; @@ -271,7 +283,13 @@ void ShaderRecompiler::recompile(const TextureFetchInstruction& instr, bool bicu if (instr.constIndex == 0 && instr.dimension == TextureDimension::Texture2D) { indent(); - print("pixelCoord = getPixelCoord({}_Texture2DDescriptorIndex, ", constNamePtr); + println("pixelCoord = getPixelCoord("); + println("#ifdef __air__"); + indent(); + println("g_Texture2DDescriptorHeap,"); + println("#endif"); + indent(); + print("{}_Texture2DDescriptorIndex, ", constNamePtr); printSrcRegister(2); out += ");\n"; } @@ -331,7 +349,17 @@ void ShaderRecompiler::recompile(const TextureFetchInstruction& instr, bool bicu out += "Bicubic"; #endif - print("({0}_Texture{1}DescriptorIndex, {0}_SamplerDescriptorIndex, ", constNamePtr, dimension); + println("("); + + println("#ifdef __air__"); + indent(); + println("\tg_Texture{}DescriptorHeap,", dimension); + indent(); + println("\tg_SamplerDescriptorHeap,"); + println("#endif"); + + indent(); + print("\t{0}_Texture{1}DescriptorIndex, {0}_SamplerDescriptorIndex, ", constNamePtr, dimension); printSrcRegister(componentCount); switch (instr.dimension) @@ -340,7 +368,13 @@ void ShaderRecompiler::recompile(const TextureFetchInstruction& instr, bool bicu print(", float2({}, {})", instr.offsetX * 0.5f, instr.offsetY * 0.5f); break; case TextureDimension::TextureCube: - out += ", cubeMapData"; + println("\n#ifdef __air__"); + indent(); + println(", &cubeMapData"); + println("#else"); + indent(); + println(", cubeMapData"); + println("#endif"); break; } @@ -383,6 +417,12 @@ void ShaderRecompiler::recompile(const AluInstruction& instr) SCALAR_CONSTANT_1 }; + struct OperationResult + { + std::string expression; + size_t componentCount; + }; + auto op = [&](size_t operand) { size_t reg = 0; @@ -487,16 +527,16 @@ void ShaderRecompiler::recompile(const AluInstruction& instr) } } - std::string result; + OperationResult opResult {}; if (negate) - result += '-'; + opResult.expression += '-'; if (abs) - result += "abs("; + opResult.expression += "abs("; - result += regFormatted; - result += '.'; + opResult.expression += regFormatted; + opResult.expression += '.'; switch (operand) { @@ -528,8 +568,10 @@ void ShaderRecompiler::recompile(const AluInstruction& instr) for (size_t i = 0; i < 4; i++) { - if ((mask >> i) & 0x1) - result += SWIZZLES[((swizzle >> (i * 2)) + i) & 0x3]; + if ((mask >> i) & 0x1) { + opResult.componentCount++; + opResult.expression += SWIZZLES[((swizzle >> (i * 2)) + i) & 0x3]; + } } break; @@ -537,47 +579,51 @@ void ShaderRecompiler::recompile(const AluInstruction& instr) case SCALAR_0: case SCALAR_CONSTANT_0: - result += SWIZZLES[((swizzle >> 6) + 3) & 0x3]; + opResult.componentCount = 1; + opResult.expression += SWIZZLES[((swizzle >> 6) + 3) & 0x3]; break; case SCALAR_1: case SCALAR_CONSTANT_1: - result += SWIZZLES[swizzle & 0x3]; + opResult.componentCount = 1; + opResult.expression += SWIZZLES[swizzle & 0x3]; break; } if (abs) - result += ")"; + opResult.expression += ")"; - return result; + return opResult; }; switch (instr.vectorOpcode) { case AluVectorOpcode::KillEq: indent(); - println("clip(any({} == {}) ? -1 : 1);", op(VECTOR_0), op(VECTOR_1)); + println("clip(any({} == {}) ? -1 : 1);", op(VECTOR_0).expression, op(VECTOR_1).expression); break; case AluVectorOpcode::KillGt: indent(); - println("clip(any({} > {}) ? -1 : 1);", op(VECTOR_0), op(VECTOR_1)); + println("clip(any({} > {}) ? -1 : 1);", op(VECTOR_0).expression, op(VECTOR_1).expression); break; case AluVectorOpcode::KillGe: indent(); - println("clip(any({} >= {}) ? -1 : 1);", op(VECTOR_0), op(VECTOR_1)); + println("clip(any({} >= {}) ? -1 : 1);", op(VECTOR_0).expression, op(VECTOR_1).expression); break; case AluVectorOpcode::KillNe: indent(); - println("clip(any({} != {}) ? -1 : 1);", op(VECTOR_0), op(VECTOR_1)); + println("clip(any({} != {}) ? -1 : 1);", op(VECTOR_0).expression, op(VECTOR_1).expression); break; } bool closeIfBracket = false; std::string_view exportRegister; + bool vectorRegister = true; + if (instr.exportData) { if (isPixelShader) @@ -585,19 +631,20 @@ void ShaderRecompiler::recompile(const AluInstruction& instr) switch (ExportRegister(instr.vectorDest)) { case ExportRegister::PSColor0: - exportRegister = "oC0"; + exportRegister = "output.oC0"; break; case ExportRegister::PSColor1: - exportRegister = "oC1"; + exportRegister = "output.oC1"; break; case ExportRegister::PSColor2: - exportRegister = "oC2"; + exportRegister = "output.oC2"; break; case ExportRegister::PSColor3: - exportRegister = "oC3"; + exportRegister = "output.oC3"; break; case ExportRegister::PSDepth: - exportRegister = "oDepth"; + exportRegister = "output.oDepth"; + vectorRegister = false; break; } } @@ -606,7 +653,7 @@ void ShaderRecompiler::recompile(const AluInstruction& instr) switch (ExportRegister(instr.vectorDest)) { case ExportRegister::VSPosition: - exportRegister = "oPos"; + exportRegister = "output.oPos"; #ifdef UNLEASHED_RECOMP if (hasMtxProjection) @@ -637,7 +684,7 @@ void ShaderRecompiler::recompile(const AluInstruction& instr) if (instr.vectorOpcode >= AluVectorOpcode::SetpEqPush && instr.vectorOpcode <= AluVectorOpcode::SetpGePush) { indent(); - print("p0 = {} == 0.0 && {} ", op(VECTOR_0), op(VECTOR_1)); + print("p0 = {} == 0.0 && {} ", op(VECTOR_0).expression, op(VECTOR_1).expression); switch (instr.vectorOpcode) { @@ -660,7 +707,7 @@ void ShaderRecompiler::recompile(const AluInstruction& instr) else if (instr.vectorOpcode >= AluVectorOpcode::MaxA) { indent(); - println("a0 = (int)clamp(floor(({}).w + 0.5), -256.0, 255.0);", op(VECTOR_0)); + println("a0 = (int)clamp(floor(({}).w + 0.5), -256.0, 255.0);", op(VECTOR_0).expression); } uint32_t vectorWriteMask = instr.vectorWriteMask; @@ -673,132 +720,275 @@ void ShaderRecompiler::recompile(const AluInstruction& instr) if (!exportRegister.empty()) { out += exportRegister; - out += '.'; + if (vectorRegister) + out += '.'; } else { print("r{}.", instr.vectorDest); } + uint32_t vectorWriteSize = 0; + for (size_t i = 0; i < 4; i++) { if ((vectorWriteMask >> i) & 0x1) - out += SWIZZLES[i]; + { + if (vectorRegister) + out += SWIZZLES[i]; + vectorWriteSize++; + } } out += " = "; + if (vectorWriteSize > 1) + print("(float{})((", vectorWriteSize); + else + out += "(float)(("; + if (instr.vectorSaturate) out += "saturate("; + size_t operationResultComponentCount; + switch (instr.vectorOpcode) { case AluVectorOpcode::Add: - print("{} + {}", op(VECTOR_0), op(VECTOR_1)); - break; + { + auto v0 = op(VECTOR_0); + auto v1 = op(VECTOR_1); + operationResultComponentCount = std::max(v0.componentCount, v1.componentCount); + + print("{} + {}", v0.expression, v1.expression); + break; + } case AluVectorOpcode::Mul: - print("{} * {}", op(VECTOR_0), op(VECTOR_1)); - break; + { + auto v0 = op(VECTOR_0); + auto v1 = op(VECTOR_1); + operationResultComponentCount = std::max(v0.componentCount, v1.componentCount); + + print("{} * {}", v0.expression, v1.expression); + break; + } case AluVectorOpcode::Max: case AluVectorOpcode::MaxA: - print("max({}, {})", op(VECTOR_0), op(VECTOR_1)); - break; + { + auto v0 = op(VECTOR_0); + auto v1 = op(VECTOR_1); + operationResultComponentCount = std::max(v0.componentCount, v1.componentCount); + + print("max({}, {})", v0.expression, v1.expression); + break; + } case AluVectorOpcode::Min: - print("min({}, {})", op(VECTOR_0), op(VECTOR_1)); - break; + { + auto v0 = op(VECTOR_0); + auto v1 = op(VECTOR_1); + operationResultComponentCount = std::max(v0.componentCount, v1.componentCount); + + print("min({}, {})", v0.expression, v1.expression); + break; + } case AluVectorOpcode::Seq: - print("{} == {}", op(VECTOR_0), op(VECTOR_1)); - break; + { + auto v0 = op(VECTOR_0); + auto v1 = op(VECTOR_1); + operationResultComponentCount = std::max(v0.componentCount, v1.componentCount); + + print("{} == {}", v0.expression, v1.expression); + break; + } case AluVectorOpcode::Sgt: - print("{} > {}", op(VECTOR_0), op(VECTOR_1)); - break; + { + auto v0 = op(VECTOR_0); + auto v1 = op(VECTOR_1); + operationResultComponentCount = std::max(v0.componentCount, v1.componentCount); + + print("{} > {}", v0.expression, v1.expression); + break; + } case AluVectorOpcode::Sge: - print("{} >= {}", op(VECTOR_0), op(VECTOR_1)); - break; + { + auto v0 = op(VECTOR_0); + auto v1 = op(VECTOR_1); + operationResultComponentCount = std::max(v0.componentCount, v1.componentCount); + + print("{} >= {}", v0.expression, v1.expression); + break; + } case AluVectorOpcode::Sne: - print("{} != {}", op(VECTOR_0), op(VECTOR_1)); - break; + { + auto v0 = op(VECTOR_0); + auto v1 = op(VECTOR_1); + operationResultComponentCount = std::max(v0.componentCount, v1.componentCount); + + print("{} != {}", v0.expression, v1.expression); + break; + } case AluVectorOpcode::Frc: - print("frac({})", op(VECTOR_0)); - break; + { + auto v0 = op(VECTOR_0); + operationResultComponentCount = v0.componentCount; + + print("frac({})", v0.expression); + break; + } case AluVectorOpcode::Trunc: - print("trunc({})", op(VECTOR_0)); - break; + { + auto v0 = op(VECTOR_0); + operationResultComponentCount = v0.componentCount; + + print("trunc({})", v0.expression); + break; + } case AluVectorOpcode::Floor: - print("floor({})", op(VECTOR_0)); - break; + { + auto v0 = op(VECTOR_0); + operationResultComponentCount = v0.componentCount; + + print("floor({})", v0.expression); + break; + } case AluVectorOpcode::Mad: - print("{} * {} + {}", op(VECTOR_0), op(VECTOR_1), op(VECTOR_2)); - break; + { + auto v0 = op(VECTOR_0); + auto v1 = op(VECTOR_1); + auto v2 = op(VECTOR_2); + operationResultComponentCount = std::max(std::max(v0.componentCount, v1.componentCount), v2.componentCount); + + print("{} * {} + {}", v0.expression, v1.expression, v2.expression); + break; + } case AluVectorOpcode::CndEq: - print("select({} == 0.0, {}, {})", op(VECTOR_0), op(VECTOR_1), op(VECTOR_2)); - break; + { + auto v0 = op(VECTOR_0); + auto v1 = op(VECTOR_1); + auto v2 = op(VECTOR_2); + operationResultComponentCount = std::max(v1.componentCount, v2.componentCount); + + print("selectWrapper({} == 0.0, {}, {})", v0.expression, v1.expression, v2.expression); + break; + } case AluVectorOpcode::CndGe: - print("select({} >= 0.0, {}, {})", op(VECTOR_0), op(VECTOR_1), op(VECTOR_2)); - break; + { + auto v0 = op(VECTOR_0); + auto v1 = op(VECTOR_1); + auto v2 = op(VECTOR_2); + operationResultComponentCount = std::max(v1.componentCount, v2.componentCount); + + print("selectWrapper({} >= 0.0, {}, {})", v0.expression, v1.expression, v2.expression); + break; + } case AluVectorOpcode::CndGt: - print("select({} > 0.0, {}, {})", op(VECTOR_0), op(VECTOR_1), op(VECTOR_2)); - break; + { + auto v0 = op(VECTOR_0); + auto v1 = op(VECTOR_1); + auto v2 = op(VECTOR_2); + operationResultComponentCount = std::max(v1.componentCount, v2.componentCount); + + print("selectWrapper({} > 0.0, {}, {})", v0.expression, v1.expression, v2.expression); + break; + } case AluVectorOpcode::Dp4: case AluVectorOpcode::Dp3: - print("dot({}, {})", op(VECTOR_0), op(VECTOR_1)); + operationResultComponentCount = 1; + print("dot({}, {})", op(VECTOR_0).expression, op(VECTOR_1).expression); break; case AluVectorOpcode::Dp2Add: - print("dot({}, {}) + {}", op(VECTOR_0), op(VECTOR_1), op(VECTOR_2)); - break; + { + auto v2 = op(VECTOR_2); + operationResultComponentCount = v2.componentCount; + + print("dot({}, {}) + {}", op(VECTOR_0).expression, op(VECTOR_1).expression, v2.expression); + break; + } case AluVectorOpcode::Cube: + operationResultComponentCount = 4; + println("\n#ifdef __air__"); + indent(); + print("cube(r{}, &cubeMapData)", instr.src1Register); + println("\n#else"); + indent(); print("cube(r{}, cubeMapData)", instr.src1Register); + println("\n#endif"); break; case AluVectorOpcode::Max4: - print("max4({})", op(VECTOR_0)); + operationResultComponentCount = 4; + print("max4({})", op(VECTOR_0).expression); break; case AluVectorOpcode::SetpEqPush: case AluVectorOpcode::SetpNePush: case AluVectorOpcode::SetpGtPush: case AluVectorOpcode::SetpGePush: - print("p0 ? 0.0 : {} + 1.0", op(VECTOR_0)); - break; + { + auto v0 = op(VECTOR_0); + operationResultComponentCount = v0.componentCount; + + print("p0 ? 0.0 : {} + 1.0", v0.expression); + break; + } case AluVectorOpcode::KillEq: - print("any({} == {})", op(VECTOR_0), op(VECTOR_1)); + operationResultComponentCount = 1; + print("any({} == {})", op(VECTOR_0).expression, op(VECTOR_1).expression); break; case AluVectorOpcode::KillGt: - print("any({} > {})", op(VECTOR_0), op(VECTOR_1)); + operationResultComponentCount = 1; + print("any({} > {})", op(VECTOR_0).expression, op(VECTOR_1).expression); break; case AluVectorOpcode::KillGe: - print("any({} >= {})", op(VECTOR_0), op(VECTOR_1)); + operationResultComponentCount = 1; + print("any({} >= {})", op(VECTOR_0).expression, op(VECTOR_1).expression); break; case AluVectorOpcode::KillNe: - print("any({} != {})", op(VECTOR_0), op(VECTOR_1)); + operationResultComponentCount = 1; + print("any({} != {})", op(VECTOR_0).expression, op(VECTOR_1).expression); break; case AluVectorOpcode::Dst: - print("dst({}, {})", op(VECTOR_0), op(VECTOR_1)); + operationResultComponentCount = 4; + print("dst({}, {})", op(VECTOR_0).expression, op(VECTOR_1).expression); break; } + out += ")"; + + if (operationResultComponentCount > vectorWriteSize) { + if (vectorWriteSize == 1) { + out += ".x"; + } else if (vectorWriteSize == 2) { + out += ".xy"; + } else if (vectorWriteSize == 3) { + out += ".xyz"; + } + } + + out += ")"; + if (instr.vectorSaturate) out += ')'; @@ -815,27 +1005,27 @@ void ShaderRecompiler::recompile(const AluInstruction& instr) switch (instr.scalarOpcode) { case AluScalarOpcode::SetpEq: - print("{} == 0.0", op(SCALAR_0)); + print("{} == 0.0", op(SCALAR_0).expression); break; case AluScalarOpcode::SetpNe: - print("{} != 0.0", op(SCALAR_0)); + print("{} != 0.0", op(SCALAR_0).expression); break; case AluScalarOpcode::SetpGt: - print("{} > 0.0", op(SCALAR_0)); + print("{} > 0.0", op(SCALAR_0).expression); break; case AluScalarOpcode::SetpGe: - print("{} >= 0.0", op(SCALAR_0)); + print("{} >= 0.0", op(SCALAR_0).expression); break; case AluScalarOpcode::SetpInv: - print("{} == 1.0", op(SCALAR_0)); + print("{} == 1.0", op(SCALAR_0).expression); break; case AluScalarOpcode::SetpPop: - print("{} - 1.0 <= 0.0", op(SCALAR_0)); + print("{} - 1.0 <= 0.0", op(SCALAR_0).expression); break; case AluScalarOpcode::SetpClr: @@ -843,7 +1033,7 @@ void ShaderRecompiler::recompile(const AluInstruction& instr) break; case AluScalarOpcode::SetpRstr: - print("{} == 0.0", op(SCALAR_0)); + print("{} == 0.0", op(SCALAR_0).expression); break; } @@ -853,92 +1043,92 @@ void ShaderRecompiler::recompile(const AluInstruction& instr) indent(); out += "ps = "; if (instr.scalarSaturate) - out += "saturate("; + out += "saturate((float)("; switch (instr.scalarOpcode) { case AluScalarOpcode::Adds: - print("{} + {}", op(SCALAR_0), op(SCALAR_1)); + print("{} + {}", op(SCALAR_0).expression, op(SCALAR_1).expression); break; case AluScalarOpcode::AddsPrev: - print("{} + ps", op(SCALAR_0)); + print("{} + ps", op(SCALAR_0).expression); break; case AluScalarOpcode::Muls: - print("{} * {}", op(SCALAR_0), op(SCALAR_1)); + print("{} * {}", op(SCALAR_0).expression, op(SCALAR_1).expression); break; case AluScalarOpcode::MulsPrev: case AluScalarOpcode::MulsPrev2: - print("{} * ps", op(SCALAR_0)); + print("{} * ps", op(SCALAR_0).expression); break; case AluScalarOpcode::Maxs: case AluScalarOpcode::MaxAs: case AluScalarOpcode::MaxAsf: - print("max({}, {})", op(SCALAR_0), op(SCALAR_1)); + print("max({}, {})", op(SCALAR_0).expression, op(SCALAR_1).expression); break; case AluScalarOpcode::Mins: - print("min({}, {})", op(SCALAR_0), op(SCALAR_1)); + print("min({}, {})", op(SCALAR_0).expression, op(SCALAR_1).expression); break; case AluScalarOpcode::Seqs: - print("{} == 0.0", op(SCALAR_0)); + print("{} == 0.0", op(SCALAR_0).expression); break; case AluScalarOpcode::Sgts: - print("{} > 0.0", op(SCALAR_0)); + print("{} > 0.0", op(SCALAR_0).expression); break; case AluScalarOpcode::Sges: - print("{} >= 0.0", op(SCALAR_0)); + print("{} >= 0.0", op(SCALAR_0).expression); break; case AluScalarOpcode::Snes: - print("{} != 0.0", op(SCALAR_0)); + print("{} != 0.0", op(SCALAR_0).expression); break; case AluScalarOpcode::Frcs: - print("frac({})", op(SCALAR_0)); + print("frac({})", op(SCALAR_0).expression); break; case AluScalarOpcode::Truncs: - print("trunc({})", op(SCALAR_0)); + print("trunc({})", op(SCALAR_0).expression); break; case AluScalarOpcode::Floors: - print("floor({})", op(SCALAR_0)); + print("floor({})", op(SCALAR_0).expression); break; case AluScalarOpcode::Exp: - print("exp2({})", op(SCALAR_0)); + print("exp2({})", op(SCALAR_0).expression); break; case AluScalarOpcode::Logc: case AluScalarOpcode::Log: - print("clamp(log2({}), FLT_MIN, FLT_MAX)", op(SCALAR_0)); + print("clamp(log2({}), -FLT_MAX, FLT_MAX)", op(SCALAR_0).expression); break; case AluScalarOpcode::Rcpc: case AluScalarOpcode::Rcpf: case AluScalarOpcode::Rcp: - print("clamp(rcp({}), FLT_MIN, FLT_MAX)", op(SCALAR_0)); + print("clamp(rcp({}), -FLT_MAX, FLT_MAX)", op(SCALAR_0).expression); break; case AluScalarOpcode::Rsqc: case AluScalarOpcode::Rsqf: case AluScalarOpcode::Rsq: - print("clamp(rsqrt({}), FLT_MIN, FLT_MAX)", op(SCALAR_0)); + print("clamp(rsqrt({}), -FLT_MAX, FLT_MAX)", op(SCALAR_0).expression); break; case AluScalarOpcode::Subs: - print("{} - {}", op(SCALAR_0), op(SCALAR_1)); + print("{} - {}", op(SCALAR_0).expression, op(SCALAR_1).expression); break; case AluScalarOpcode::SubsPrev: - print("{} - ps", op(SCALAR_0)); + print("{} - ps", op(SCALAR_0).expression); break; case AluScalarOpcode::SetpEq: @@ -949,11 +1139,11 @@ void ShaderRecompiler::recompile(const AluInstruction& instr) break; case AluScalarOpcode::SetpInv: - print("{0} == 0.0 ? 1.0 : {0}", op(SCALAR_0)); + print("p0 ? 0.0 : {0} == 0.0 ? 1.0 : {0}", op(SCALAR_0).expression); break; case AluScalarOpcode::SetpPop: - print("p0 ? 0.0 : ({} - 1.0)", op(SCALAR_0)); + print("p0 ? 0.0 : ({} - 1.0)", op(SCALAR_0).expression); break; case AluScalarOpcode::SetpClr: @@ -961,59 +1151,59 @@ void ShaderRecompiler::recompile(const AluInstruction& instr) break; case AluScalarOpcode::SetpRstr: - print("p0 ? 0.0 : {}", op(SCALAR_0)); + print("p0 ? 0.0 : {}", op(SCALAR_0).expression); break; case AluScalarOpcode::KillsEq: - print("{} == 0.0", op(SCALAR_0)); + print("{} == 0.0", op(SCALAR_0).expression); break; case AluScalarOpcode::KillsGt: - print("{} > 0.0", op(SCALAR_0)); + print("{} > 0.0", op(SCALAR_0).expression); break; case AluScalarOpcode::KillsGe: - print("{} >= 0.0", op(SCALAR_0)); + print("{} >= 0.0", op(SCALAR_0).expression); break; case AluScalarOpcode::KillsNe: - print("{} != 0.0", op(SCALAR_0)); + print("{} != 0.0", op(SCALAR_0).expression); break; case AluScalarOpcode::KillsOne: - print("{} == 1.0", op(SCALAR_0)); + print("{} == 1.0", op(SCALAR_0).expression); break; case AluScalarOpcode::Sqrt: - print("sqrt({})", op(SCALAR_0)); + print("sqrt({})", op(SCALAR_0).expression); break; case AluScalarOpcode::Mulsc0: case AluScalarOpcode::Mulsc1: - print("{} * {}", op(SCALAR_CONSTANT_0), op(SCALAR_CONSTANT_1)); + print("{} * {}", op(SCALAR_CONSTANT_0).expression, op(SCALAR_CONSTANT_1).expression); break; case AluScalarOpcode::Addsc0: case AluScalarOpcode::Addsc1: - print("{} + {}", op(SCALAR_CONSTANT_0), op(SCALAR_CONSTANT_1)); + print("{} + {}", op(SCALAR_CONSTANT_0).expression, op(SCALAR_CONSTANT_1).expression); break; case AluScalarOpcode::Subsc0: case AluScalarOpcode::Subsc1: - print("{} - {}", op(SCALAR_CONSTANT_0), op(SCALAR_CONSTANT_1)); + print("{} - {}", op(SCALAR_CONSTANT_0).expression, op(SCALAR_CONSTANT_1).expression); break; case AluScalarOpcode::Sin: - print("sin({})", op(SCALAR_0)); + print("sin({})", op(SCALAR_0).expression); break; case AluScalarOpcode::Cos: - print("cos({})", op(SCALAR_0)); + print("cos({})", op(SCALAR_0).expression); break; } if (instr.scalarSaturate) - out += ')'; + out += "))"; out += ";\n"; @@ -1021,11 +1211,11 @@ void ShaderRecompiler::recompile(const AluInstruction& instr) { case AluScalarOpcode::MaxAs: indent(); - println("a0 = (int)clamp(floor({} + 0.5), -256.0, 255.0);", op(SCALAR_0)); + println("a0 = (int)clamp(floor({} + 0.5), -256.0, 255.0);", op(SCALAR_0).expression); break; case AluScalarOpcode::MaxAsf: indent(); - println("a0 = (int)clamp(floor({}), -256.0, 255.0);", op(SCALAR_0)); + println("a0 = (int)clamp(floor({}), -256.0, 255.0);", op(SCALAR_0).expression); break; } } @@ -1040,7 +1230,8 @@ void ShaderRecompiler::recompile(const AluInstruction& instr) if (!exportRegister.empty()) { out += exportRegister; - out += '.'; + if (vectorRegister) + out += '.'; } else { @@ -1049,7 +1240,7 @@ void ShaderRecompiler::recompile(const AluInstruction& instr) for (size_t i = 0; i < 4; i++) { - if ((scalarWriteMask >> i) & 0x1) + if (((scalarWriteMask >> i) & 0x1) && vectorRegister) out += SWIZZLES[i]; } @@ -1154,7 +1345,7 @@ void ShaderRecompiler::recompile(const uint8_t* shaderData, const std::string_vi { uint32_t tailCount = (isPixelShader ? 224 : 256) - constantInfo->registerIndex; - println("#define {}(INDEX) select((INDEX) < {}, vk::RawBufferLoad(g_PushConstants.{}ShaderConstants + ({} + min(INDEX, {})) * 16, 0x10), 0.0)", + println("#define {}(INDEX) selectWrapper((INDEX) < {}, vk::RawBufferLoad(g_PushConstants.{}ShaderConstants + ({} + min(INDEX, {})) * 16, 0x10), 0.0)", constantName, tailCount, shaderName, constantInfo->registerIndex.get(), tailCount - 1); } else @@ -1187,6 +1378,75 @@ void ShaderRecompiler::recompile(const uint8_t* shaderData, const std::string_vi } } + out += "\n#elif defined(__air__)\n\n"; + + for (uint32_t i = 0; i < constantTableContainer->constantTable.constants; i++) + { + const auto constantInfo = reinterpret_cast( + constantTableData + constantTableContainer->constantTable.constantInfo + i * sizeof(ConstantInfo)); + + const char* constantName = reinterpret_cast(constantTableData + constantInfo->name); + + #ifdef UNLEASHED_RECOMP + if (!isPixelShader) + { + if (strcmp(constantName, "g_MtxProjection") == 0) + hasMtxProjection = true; + else if (strcmp(constantName, "g_InstanceTypes") == 0) + isMetaInstancer = true; + else if (strcmp(constantName, "g_IndexCount") == 0) + hasIndexCount = true; + } + else + { + if (strcmp(constantName, "g_MtxPrevInvViewProjection") == 0) + hasMtxPrevInvViewProjection = true; + } + #endif + + switch (constantInfo->registerSet) + { + case RegisterSet::Float4: + { + const char* shaderName = isPixelShader ? "Pixel" : "Vertex"; + + if (constantInfo->registerCount > 1) + { + uint32_t tailCount = (isPixelShader ? 224 : 256) - constantInfo->registerIndex; + + println("#define {}(INDEX) selectWrapper((INDEX) < {}, (*(reinterpret_cast(g_PushConstants.{}ShaderConstants + ({} + min(INDEX, {})) * 16))), 0.0)", + constantName, tailCount, shaderName, constantInfo->registerIndex.get(), tailCount - 1); + } + else + { + println("#define {} (*(reinterpret_cast(g_PushConstants.{}ShaderConstants + {})))", + constantName, shaderName, constantInfo->registerIndex * 16); + } + + for (uint16_t j = 0; j < constantInfo->registerCount; j++) + float4Constants.emplace(constantInfo->registerIndex + j, constantInfo); + + break; + } + + case RegisterSet::Sampler: + { + for (size_t j = 0; j < std::size(TEXTURE_DIMENSIONS); j++) + { + println("#define {}_Texture{}DescriptorIndex (*(reinterpret_cast(g_PushConstants.SharedConstants + {})))", + constantName, TEXTURE_DIMENSIONS[j], j * 64 + constantInfo->registerIndex * 4); + } + + println("#define {}_SamplerDescriptorIndex (*(reinterpret_cast(g_PushConstants.SharedConstants + {})))", + constantName, std::size(TEXTURE_DIMENSIONS) * 64 + constantInfo->registerIndex * 4); + + samplers.emplace(constantInfo->registerIndex, constantName); + break; + } + + } + } + out += "\n#else\n\n"; println("cbuffer {}ShaderConstants : register(b{}, space4)", isPixelShader ? "Pixel" : "Vertex", isPixelShader ? 1 : 0); @@ -1211,7 +1471,7 @@ void ShaderRecompiler::recompile(const uint8_t* shaderData, const std::string_vi if (constantInfo->registerCount > 1) { uint32_t tailCount = (isPixelShader ? 224 : 256) - constantInfo->registerIndex; - println("#define {0}(INDEX) select((INDEX) < {1}, {0}[min(INDEX, {2})], 0.0)", constantName, tailCount, tailCount - 1); + println("#define {0}(INDEX) selectWrapper((INDEX) < {1}, {0}[min(INDEX, {2})], 0.0)", constantName, tailCount, tailCount - 1); } } } @@ -1254,7 +1514,7 @@ void ShaderRecompiler::recompile(const uint8_t* shaderData, const std::string_vi if (constantInfo->registerSet == RegisterSet::Bool) { const char* constantName = reinterpret_cast(constantTableData + constantInfo->name); - println("\t#define {} (1 << {})", constantName, constantInfo->registerIndex + (isPixelShader ? 16 : 0)); + println("#define {} (1 << {})", constantName, constantInfo->registerIndex + (isPixelShader ? 16 : 0)); boolConstants.emplace(constantInfo->registerIndex, constantName); } } @@ -1263,45 +1523,79 @@ void ShaderRecompiler::recompile(const uint8_t* shaderData, const std::string_vi const auto shader = reinterpret_cast(shaderData + shaderContainer->shaderOffset); - out += "#ifndef __spirv__\n"; - - if (isPixelShader) - out += "[shader(\"pixel\")]\n"; - else - out += "[shader(\"vertex\")]\n"; - - out += "#endif\n"; - - out += "void main(\n"; + println("struct {}", isPixelShader ? "Interpolators" : "VertexShaderInput"); + out += "{\n"; if (isPixelShader) { - out += "\tin float4 iPos : SV_Position,\n"; + out += "#ifdef __air__\n"; + + out += "\tfloat4 iPos [[position]];\n"; for (auto& [usage, usageIndex] : INTERPOLATORS) - println("\tin float4 i{0}{1} : {2}{1},", USAGE_VARIABLES[uint32_t(usage)], usageIndex, USAGE_SEMANTICS[uint32_t(usage)]); + println("\tfloat4 i{0}{1} [[user({2}{1})]];", USAGE_VARIABLES[uint32_t(usage)], usageIndex, USAGE_SEMANTICS[uint32_t(usage)]); - out += "#ifdef __spirv__\n"; - out += "\tin bool iFace : SV_IsFrontFace\n"; out += "#else\n"; - out += "\tin uint iFace : SV_IsFrontFace\n"; - out += "#endif\n"; - auto pixelShader = reinterpret_cast(shader); - if (pixelShader->outputs & PIXEL_SHADER_OUTPUT_COLOR0) - out += ",\n\tout float4 oC0 : SV_Target0"; - if (pixelShader->outputs & PIXEL_SHADER_OUTPUT_COLOR1) - out += ",\n\tout float4 oC1 : SV_Target1"; - if (pixelShader->outputs & PIXEL_SHADER_OUTPUT_COLOR2) - out += ",\n\tout float4 oC2 : SV_Target2"; - if (pixelShader->outputs & PIXEL_SHADER_OUTPUT_COLOR3) - out += ",\n\tout float4 oC3 : SV_Target3"; - if (pixelShader->outputs & PIXEL_SHADER_OUTPUT_DEPTH) - out += ",\n\tout float oDepth : SV_Depth"; + out += "\tfloat4 iPos : SV_Position;\n"; + + for (auto& [usage, usageIndex] : INTERPOLATORS) + println("\tfloat4 i{0}{1} : {2}{1};", USAGE_VARIABLES[uint32_t(usage)], usageIndex, USAGE_SEMANTICS[uint32_t(usage)]); + + out += "#endif\n"; } else { auto vertexShader = reinterpret_cast(shader); + + out += "#ifdef __air__\n"; + + for (uint32_t i = 0; i < vertexShader->vertexElementCount; i++) + { + union + { + VertexElement vertexElement; + uint32_t value; + }; + + value = vertexShader->vertexElementsAndInterpolators[vertexShader->field18 + i]; + + const char* usageType = USAGE_TYPES[uint32_t(vertexElement.usage)]; + + #ifdef UNLEASHED_RECOMP + if ((vertexElement.usage == DeclUsage::TexCoord && vertexElement.usageIndex == 2 && isMetaInstancer) || + (vertexElement.usage == DeclUsage::Position && vertexElement.usageIndex == 1)) + { + usageType = "uint4"; + } + #endif + + out += '\t'; + + print("{0} i{1}{2}", usageType, USAGE_VARIABLES[uint32_t(vertexElement.usage)], + uint32_t(vertexElement.usageIndex)); + + bool foundUsage = false; + for (auto& usageLocation : USAGE_LOCATIONS) + { + if (usageLocation.usage == vertexElement.usage && usageLocation.usageIndex == vertexElement.usageIndex) + { + println(" [[attribute({})]];", usageLocation.location); + foundUsage = true; + break; + } + } + + if (!foundUsage) { + fmt::println("Missing mapping for vertex element usage: {} {}", USAGE_VARIABLES[uint32_t(vertexElement.usage)], uint32_t(vertexElement.usageIndex)); + exit(1); + } + + vertexElements.emplace(uint32_t(vertexElement.address), vertexElement); + } + + out += "#else\n"; + for (uint32_t i = 0; i < vertexShader->vertexElementCount; i++) { union @@ -1333,40 +1627,159 @@ void ShaderRecompiler::recompile(const uint8_t* shaderData, const std::string_vi } } - println("in {0} i{1}{2} : {3}{2},", usageType, USAGE_VARIABLES[uint32_t(vertexElement.usage)], + println("{0} i{1}{2} : {3}{2};", usageType, USAGE_VARIABLES[uint32_t(vertexElement.usage)], uint32_t(vertexElement.usageIndex), USAGE_SEMANTICS[uint32_t(vertexElement.usage)]); - - vertexElements.emplace(uint32_t(vertexElement.address), vertexElement); } + out += "#endif\n"; + } + + out += "};\n"; + + println("struct {}", isPixelShader ? "PixelShaderOutput" : "Interpolators"); + out += "{\n"; + + if (isPixelShader) + { + out += "#ifdef __air__\n"; + + auto pixelShader = reinterpret_cast(shader); + if (pixelShader->outputs & PIXEL_SHADER_OUTPUT_COLOR0) + out += "\tfloat4 oC0 [[color(0)]];\n"; + if (pixelShader->outputs & PIXEL_SHADER_OUTPUT_COLOR1) + out += "\tfloat4 oC1 [[color(1)]];\n"; + if (pixelShader->outputs & PIXEL_SHADER_OUTPUT_COLOR2) + out += "\tfloat4 oC2 [[color(2)]];\n"; + if (pixelShader->outputs & PIXEL_SHADER_OUTPUT_COLOR3) + out += "\tfloat4 oC3 [[color(3)]];\n"; + if (pixelShader->outputs & PIXEL_SHADER_OUTPUT_DEPTH) + out += "\tfloat oDepth [[depth(any)]];\n"; + + out += "#else\n"; + + if (pixelShader->outputs & PIXEL_SHADER_OUTPUT_COLOR0) + out += "\tfloat4 oC0 : SV_Target0;\n"; + if (pixelShader->outputs & PIXEL_SHADER_OUTPUT_COLOR1) + out += "\tfloat4 oC1 : SV_Target1;\n"; + if (pixelShader->outputs & PIXEL_SHADER_OUTPUT_COLOR2) + out += "\tfloat4 oC2 : SV_Target2;\n"; + if (pixelShader->outputs & PIXEL_SHADER_OUTPUT_COLOR3) + out += "\tfloat4 oC3 : SV_Target3;\n"; + if (pixelShader->outputs & PIXEL_SHADER_OUTPUT_DEPTH) + out += "\tfloat oDepth : SV_Depth;\n"; + + out += "#endif\n"; + } + else + { + out += "#ifdef __air__\n"; + + out += "\tfloat4 oPos [[position]] [[invariant]];\n"; + + for (auto& [usage, usageIndex] : INTERPOLATORS) + print("\tfloat4 o{0}{1} [[user({2}{1})]];\n", USAGE_VARIABLES[uint32_t(usage)], usageIndex, USAGE_SEMANTICS[uint32_t(usage)]); + + out += "#else\n"; + + out += "\tprecise float4 oPos : SV_Position;\n"; + + for (auto& [usage, usageIndex] : INTERPOLATORS) + print("\tfloat4 o{0}{1} : {2}{1};\n", USAGE_VARIABLES[uint32_t(usage)], usageIndex, USAGE_SEMANTICS[uint32_t(usage)]); + + out += "#endif\n"; + } + + out += "};\n"; + + out += "#ifdef __air__\n"; + + if (isPixelShader) + out += "[[fragment]]\n"; + else + out += "[[vertex]]\n"; + + out += "#elif !defined(__spirv__)\n"; + + if (isPixelShader) + out += "[shader(\"pixel\")]\n"; + else + out += "[shader(\"vertex\")]\n"; + + out += "#endif\n"; + + println("{} shaderMain(", isPixelShader ? "PixelShaderOutput" : "Interpolators"); + + if (isPixelShader) + { + out += "#ifdef __air__\n"; + + out += "\tInterpolators input [[stage_in]],\n"; + out += "\tbool iFace [[front_facing]],\n"; + + out += "\tconstant Texture2DDescriptorHeap* g_Texture2DDescriptorHeap [[buffer(0)]],\n"; + out += "\tconstant Texture3DDescriptorHeap* g_Texture3DDescriptorHeap [[buffer(1)]],\n"; + out += "\tconstant TextureCubeDescriptorHeap* g_TextureCubeDescriptorHeap [[buffer(2)]],\n"; + out += "\tconstant SamplerDescriptorHeap* g_SamplerDescriptorHeap [[buffer(3)]],\n"; + out += "\tconstant PushConstants& g_PushConstants [[buffer(8)]]\n"; + + out += "#else\n"; + + out += "\tInterpolators input,\n"; + + out += "#ifdef __spirv__\n"; + out += "\tin bool iFace : SV_IsFrontFace\n"; + out += "#else\n"; + out += "\tin uint iFace : SV_IsFrontFace\n"; + out += "#endif\n"; + + out += "\n#endif\n"; + } + else + { + out += "#ifdef __air__\n"; + out += "\tconstant PushConstants& g_PushConstants [[buffer(8)]],\n"; + out += "\tVertexShaderInput input [[stage_in]]\n"; + out += "#else\n"; + out += "\tVertexShaderInput input\n"; + out += "#endif\n"; + #ifdef UNLEASHED_RECOMP if (hasIndexCount) { + out += "\t,\n"; + out += "#ifdef __air__\n"; + out += "\tuint iVertexId [[vertex_id]],\n"; + out += "\tuint iInstanceId [[instance_id]]\n"; + out += "#else\n"; out += "\tin uint iVertexId : SV_VertexID,\n"; - out += "\tin uint iInstanceId : SV_InstanceID,\n"; + out += "\tin uint iInstanceId : SV_InstanceID\n"; + out += "#endif\n"; } #endif - - out += "\tout float4 oPos : SV_Position"; - - for (auto& [usage, usageIndex] : INTERPOLATORS) - print(",\n\tout float4 o{0}{1} : {2}{1}", USAGE_VARIABLES[uint32_t(usage)], usageIndex, USAGE_SEMANTICS[uint32_t(usage)]); } out += ")\n"; out += "{\n"; + std::string outputName = isPixelShader ? "PixelShaderOutput" : "Interpolators"; + + out += "#ifdef __air__\n"; + println("\t{0} output = {0}{{}};", outputName); + out += "#else\n"; + println("\t{0} output = ({0})0;", outputName); + out += "#endif\n"; + #ifdef UNLEASHED_RECOMP if (hasMtxProjection) { specConstantsMask |= SPEC_CONSTANT_REVERSE_Z; - out += "\toPos = 0.0;\n"; + out += "\toutput.oPos = 0.0;\n"; out += "\tfloat4x4 mtxProjection = float4x4(g_MtxProjection(0), g_MtxProjection(1), g_MtxProjection(2), g_MtxProjection(3));\n"; out += "\tfloat4x4 mtxProjectionReverseZ = mul(mtxProjection, float4x4(1, 0, 0, 0, 0, 1, 0, 0, 0, 0, -1, 0, 0, 0, 1, 1));\n"; - out += "\t[unroll] for (int iterationIndex = 0; iterationIndex < 2; iterationIndex++)\n"; + out += "\tUNROLL for (int iterationIndex = 0; iterationIndex < 2; iterationIndex++)\n"; out += "\t{\n"; } #endif @@ -1381,8 +1794,13 @@ void ShaderRecompiler::recompile(const uint8_t* shaderData, const std::string_vi auto value = reinterpret_cast*>(shaderData + shaderContainer->virtualSize + definition->physicalOffset); for (uint16_t i = 0; i < (definition->count + 3) / 4; i++) { + println("#ifdef __air__"); + println("\tfloat4 c{} = as_type(uint4(0x{:X}, 0x{:X}, 0x{:X}, 0x{:X}));", + definition->registerIndex + i - (isPixelShader ? 256 : 0), value[0].get(), value[1].get(), value[2].get(), value[3].get()); + println("#else"); println("\tfloat4 c{} = asfloat(uint4(0x{:X}, 0x{:X}, 0x{:X}, 0x{:X}));", definition->registerIndex + i - (isPixelShader ? 256 : 0), value[0].get(), value[1].get(), value[2].get(), value[3].get()); + println("#endif"); value += 4; } @@ -1433,14 +1851,14 @@ void ShaderRecompiler::recompile(const uint8_t* shaderData, const std::string_vi if (isPixelShader) { value = reinterpret_cast(shader)->interpolators[i]; - println("\tfloat4 r{} = i{}{};", uint32_t(interpolator.reg), USAGE_VARIABLES[uint32_t(interpolator.usage)], uint32_t(interpolator.usageIndex)); + println("\tfloat4 r{} = input.i{}{};", uint32_t(interpolator.reg), USAGE_VARIABLES[uint32_t(interpolator.usage)], uint32_t(interpolator.usageIndex)); printedRegisters[interpolator.reg] = true; } else { auto vertexShader = reinterpret_cast(shader); value = vertexShader->vertexElementsAndInterpolators[vertexShader->field18 + vertexShader->vertexElementCount + i]; - interpolators.emplace(i, fmt::format("o{}{}", USAGE_VARIABLES[uint32_t(interpolator.usage)], uint32_t(interpolator.usageIndex))); + interpolators.emplace(i, fmt::format("output.o{}{}", USAGE_VARIABLES[uint32_t(interpolator.usage)], uint32_t(interpolator.usageIndex))); } } @@ -1448,11 +1866,11 @@ void ShaderRecompiler::recompile(const uint8_t* shaderData, const std::string_vi { #ifdef UNLEASHED_RECOMP if (!hasMtxProjection) - out += "\toPos = 0.0;\n"; + out += "\toutput.oPos = 0.0;\n"; #endif for (auto& [usage, usageIndex] : INTERPOLATORS) - println("\to{}{} = 0.0;", USAGE_VARIABLES[uint32_t(usage)], usageIndex); + println("\toutput.o{}{} = 0.0;", USAGE_VARIABLES[uint32_t(usage)], usageIndex); out += "\n"; } @@ -1464,7 +1882,7 @@ void ShaderRecompiler::recompile(const uint8_t* shaderData, const std::string_vi print("\tfloat4 r{} = ", i); if (isPixelShader && i == ((shader->fieldC >> 8) & 0xFF)) { - out += "float4((iPos.xy - 0.5) * float2(iFace ? 1.0 : -1.0, 1.0), 0.0, 0.0);\n"; + out += "float4((input.iPos.xy - 0.5) * float2(iFace ? 1.0 : -1.0, 1.0), 0.0, 0.0);\n"; } #ifdef UNLEASHED_RECOMP else if (!isPixelShader && hasIndexCount && i == 0) @@ -1488,7 +1906,11 @@ void ShaderRecompiler::recompile(const uint8_t* shaderData, const std::string_vi #ifdef UNLEASHED_RECOMP out += "\tfloat2 pixelCoord = 0.0;\n"; #endif + out += "#ifdef __air__\n"; + out += "\tCubeMapData cubeMapData = CubeMapData{};\n"; + out += "#else\n"; out += "\tCubeMapData cubeMapData = (CubeMapData)0;\n"; + out += "#endif\n"; } const be* code = reinterpret_cast*>(shaderData + shaderContainer->virtualSize + shader->physicalOffset); @@ -1611,7 +2033,6 @@ void ShaderRecompiler::recompile(const uint8_t* shaderData, const std::string_vi uint32_t count = 0; uint32_t sequence = 0; bool shouldReturn = false; - bool shouldCloseCurlyBracket = false; switch (cfInstr.opcode) { @@ -1646,7 +2067,7 @@ void ShaderRecompiler::recompile(const uint8_t* shaderData, const std::string_vi { indent(); #ifdef UNLEASHED_RECOMP - print("[unroll] "); + print("UNROLL "); #endif println("for (aL = 0; aL < i{}.x; aL++)", uint32_t(cfInstr.loopStart.loopId)); indent(); @@ -1754,27 +2175,27 @@ void ShaderRecompiler::recompile(const uint8_t* shaderData, const std::string_vi specConstantsMask |= SPEC_CONSTANT_BICUBIC_GI_FILTER; indent(); - out += "if (g_SpecConstants() & SPEC_CONSTANT_BICUBIC_GI_FILTER)"; + out += "if (g_SpecConstants() & SPEC_CONSTANT_BICUBIC_GI_FILTER)\n"; indent(); - out += '{'; + out += "{\n"; ++indentation; recompile(textureFetch, true); --indentation; indent(); - out += "}"; + out += "}\n"; indent(); - out += "else"; + out += "else\n"; indent(); - out += '{'; + out += "{\n"; ++indentation; recompile(textureFetch, false); --indentation; indent(); - out += '}'; + out += "}\n"; } else #endif @@ -1799,31 +2220,31 @@ void ShaderRecompiler::recompile(const uint8_t* shaderData, const std::string_vi specConstantsMask |= SPEC_CONSTANT_ALPHA_TEST; indent(); - out += "[branch] if (g_SpecConstants() & SPEC_CONSTANT_ALPHA_TEST)"; + out += "BRANCH if (g_SpecConstants() & SPEC_CONSTANT_ALPHA_TEST)\n"; indent(); - out += '{'; + out += "{\n"; indent(); - out += "\tclip(oC0.w - g_AlphaThreshold);\n"; + out += "\tclip(output.oC0.w - g_AlphaThreshold);\n"; indent(); - out += "}"; + out += "}\n"; #ifdef UNLEASHED_RECOMP specConstantsMask |= SPEC_CONSTANT_ALPHA_TO_COVERAGE; indent(); - out += "else if (g_SpecConstants() & SPEC_CONSTANT_ALPHA_TO_COVERAGE)"; + out += "else if (g_SpecConstants() & SPEC_CONSTANT_ALPHA_TO_COVERAGE)\n"; indent(); - out += '{'; + out += "{\n"; indent(); - out += "\toC0.w *= 1.0 + computeMipLevel(pixelCoord) * 0.25;\n"; + out += "\toutput.oC0.w *= 1.0 + computeMipLevel(pixelCoord) * 0.25;\n"; indent(); - out += "\toC0.w = 0.5 + (oC0.w - g_AlphaThreshold) / max(fwidth(oC0.w), 1e-6);\n"; + out += "\toutput.oC0.w = 0.5 + (output.oC0.w - g_AlphaThreshold) / max(fwidth(output.oC0.w), 1e-6);\n"; indent(); - out += '}'; + out += "}\n"; #endif } else @@ -1832,7 +2253,7 @@ void ShaderRecompiler::recompile(const uint8_t* shaderData, const std::string_vi if (!hasMtxProjection) #endif { - out += "\toPos.xy += g_HalfPixelOffset * oPos.w;\n"; + out += "\toutput.oPos.xy += g_HalfPixelOffset * output.oPos.w;\n"; } } @@ -1847,7 +2268,7 @@ void ShaderRecompiler::recompile(const uint8_t* shaderData, const std::string_vi else #endif { - out += "return;\n"; + out += "return output;\n"; } } else @@ -1855,13 +2276,6 @@ void ShaderRecompiler::recompile(const uint8_t* shaderData, const std::string_vi out += "\t\t\tbreak;\n"; } } - - if (shouldCloseCurlyBracket) - { - --indentation; - indent(); - out += "}\n"; - } } controlFlowCode += 3; @@ -1881,7 +2295,14 @@ void ShaderRecompiler::recompile(const uint8_t* shaderData, const std::string_vi out += "\t}\n"; if (!isPixelShader && hasMtxProjection) - out += "\toPos.xy += g_HalfPixelOffset * oPos.w;\n"; + out += "\toutput.oPos.xy += g_HalfPixelOffset * output.oPos.w;\n"; +#endif + + if (!simpleControlFlow) + out += "\treturn output;\n"; +#ifdef UNLEASHED_RECOMP + else if (hasMtxProjection) + out += "\treturn output;\n"; #endif out += "}"; diff --git a/XenosRecomp/shader_recompiler.h b/XenosRecomp/shader_recompiler.h index b2e317d..4f468ec 100644 --- a/XenosRecomp/shader_recompiler.h +++ b/XenosRecomp/shader_recompiler.h @@ -45,7 +45,7 @@ struct ShaderRecompiler : StringBuffer out += '\t'; } - void printDstSwizzle(uint32_t dstSwizzle, bool operand); + uint32_t printDstSwizzle(uint32_t dstSwizzle, bool operand); void printDstSwizzle01(uint32_t dstRegister, uint32_t dstSwizzle); void recompile(const VertexFetchInstruction& instr, uint32_t address);