mirror of
				https://github.com/hedge-dev/XenosRecomp.git
				synced 2025-10-30 07:12:17 +00:00 
			
		
		
		
	MSL shader support
Co-authored-by: Isaac Marovitz <isaacryu@icloud.com>
This commit is contained in:
		
							parent
							
								
									990d03b28a
								
							
						
					
					
						commit
						4906992039
					
				
					 9 changed files with 1091 additions and 279 deletions
				
			
		|  | @ -4,6 +4,10 @@ if (WIN32) | ||||||
|     option(XENOS_RECOMP_DXIL "Generate DXIL shader cache" ON) |     option(XENOS_RECOMP_DXIL "Generate DXIL shader cache" ON) | ||||||
| endif() | endif() | ||||||
| 
 | 
 | ||||||
|  | if (APPLE) | ||||||
|  |     option(XENOS_RECOMP_AIR "Generate Metal AIR shader cache" ON) | ||||||
|  | endif() | ||||||
|  | 
 | ||||||
| set(SMOLV_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../thirdparty/smol-v/source") | set(SMOLV_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../thirdparty/smol-v/source") | ||||||
| 
 | 
 | ||||||
| add_executable(XenosRecomp | add_executable(XenosRecomp | ||||||
|  | @ -30,13 +34,6 @@ target_precompile_headers(XenosRecomp PRIVATE pch.h) | ||||||
| 
 | 
 | ||||||
| if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang" OR CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang") | if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang" OR CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang") | ||||||
|     target_compile_options(XenosRecomp PRIVATE -Wno-switch -Wno-unused-variable -Wno-null-arithmetic -fms-extensions) |     target_compile_options(XenosRecomp PRIVATE -Wno-switch -Wno-unused-variable -Wno-null-arithmetic -fms-extensions) | ||||||
| 
 |  | ||||||
|     include(CheckCXXSymbolExists) |  | ||||||
|     check_cxx_symbol_exists(_LIBCPP_VERSION version LIBCPP) |  | ||||||
|     if(LIBCPP) |  | ||||||
|         # Allows using std::execution |  | ||||||
|         target_compile_options(XenosRecomp PRIVATE -fexperimental-library) |  | ||||||
|     endif() |  | ||||||
| endif() | endif() | ||||||
| 
 | 
 | ||||||
| if (WIN32) | if (WIN32) | ||||||
|  | @ -51,3 +48,8 @@ if (XENOS_RECOMP_DXIL) | ||||||
|     target_compile_definitions(XenosRecomp PRIVATE XENOS_RECOMP_DXIL) |     target_compile_definitions(XenosRecomp PRIVATE XENOS_RECOMP_DXIL) | ||||||
|     target_link_libraries(XenosRecomp PRIVATE Microsoft::DXIL) |     target_link_libraries(XenosRecomp PRIVATE Microsoft::DXIL) | ||||||
| endif() | endif() | ||||||
|  | 
 | ||||||
|  | if (XENOS_RECOMP_AIR) | ||||||
|  |     target_compile_definitions(XenosRecomp PRIVATE XENOS_RECOMP_AIR) | ||||||
|  |     target_sources(XenosRecomp PRIVATE air_compiler.cpp air_compiler.h) | ||||||
|  | endif() | ||||||
|  |  | ||||||
							
								
								
									
										81
									
								
								XenosRecomp/air_compiler.cpp
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										81
									
								
								XenosRecomp/air_compiler.cpp
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,81 @@ | ||||||
|  | #include "air_compiler.h" | ||||||
|  | 
 | ||||||
|  | #include <fstream> | ||||||
|  | #include <iterator> | ||||||
|  | #include <spawn.h> | ||||||
|  | #include <unistd.h> | ||||||
|  | 
 | ||||||
|  | struct TemporaryPath | ||||||
|  | { | ||||||
|  |     const std::string path; | ||||||
|  | 
 | ||||||
|  |     explicit TemporaryPath(std::string_view path) : path(path) {} | ||||||
|  | 
 | ||||||
|  |     ~TemporaryPath() | ||||||
|  |     { | ||||||
|  |         unlink(path.c_str()); | ||||||
|  |     } | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | static int executeCommand(const char** argv) | ||||||
|  | { | ||||||
|  |     pid_t pid; | ||||||
|  |     if (posix_spawn(&pid, argv[0], nullptr, nullptr, const_cast<char**>(argv), nullptr) != 0) | ||||||
|  |         return -1; | ||||||
|  | 
 | ||||||
|  |     int status; | ||||||
|  |     if (waitpid(pid, &status, 0) == -1) | ||||||
|  |         return -1; | ||||||
|  | 
 | ||||||
|  |     return status; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | std::vector<uint8_t> AirCompiler::compile(const std::string& shaderSource) | ||||||
|  | { | ||||||
|  |     // Save source to a location on disk for the compiler to read.
 | ||||||
|  |     char sourcePathTemplate[PATH_MAX] = "/tmp/xenos_metal_XXXXXX.metal"; | ||||||
|  |     const int sourceFd = mkstemps(sourcePathTemplate, 6); | ||||||
|  |     if (sourceFd == -1) | ||||||
|  |     { | ||||||
|  |         fmt::println("Failed to create temporary file for shader source: {}", strerror(errno)); | ||||||
|  |         std::exit(1); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     const TemporaryPath sourcePath(sourcePathTemplate); | ||||||
|  |     const TemporaryPath irPath(sourcePath.path + ".ir"); | ||||||
|  |     const TemporaryPath metalLibPath(sourcePath.path + ".metallib"); | ||||||
|  | 
 | ||||||
|  |     const ssize_t sourceWritten = write(sourceFd, shaderSource.data(), shaderSource.size()); | ||||||
|  |     close(sourceFd); | ||||||
|  |     if (sourceWritten < 0) | ||||||
|  |     { | ||||||
|  |         fmt::println("Failed to write shader source to disk: {}", strerror(errno)); | ||||||
|  |         std::exit(1); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     const char* compileCommand[] = { | ||||||
|  |         "/usr/bin/xcrun", "-sdk", "macosx", "metal", "-o", irPath.path.c_str(), "-c", sourcePath.path.c_str(), "-Wno-unused-variable", "-frecord-sources", "-gline-tables-only", "-fmetal-math-mode=relaxed", "-D__air__", | ||||||
|  | #ifdef UNLEASHED_RECOMP | ||||||
|  |         "-DUNLEASHED_RECOMP", | ||||||
|  | #endif | ||||||
|  |         nullptr | ||||||
|  |     }; | ||||||
|  |     if (const int compileStatus = executeCommand(compileCommand); compileStatus != 0) | ||||||
|  |     { | ||||||
|  |         fmt::println("Metal compiler exited with status: {}", compileStatus); | ||||||
|  |         fmt::println("Generated source:\n{}", shaderSource); | ||||||
|  |         std::exit(1); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     const char* linkCommand[] = { "/usr/bin/xcrun", "-sdk", "macosx", "metallib", "-o", metalLibPath.path.c_str(), irPath.path.c_str(), nullptr }; | ||||||
|  |     if (const int linkStatus = executeCommand(linkCommand); linkStatus != 0) | ||||||
|  |     { | ||||||
|  |         fmt::println("Metal linker exited with status: {}", linkStatus); | ||||||
|  |         fmt::println("Generated source:\n{}", shaderSource); | ||||||
|  |         std::exit(1); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     std::ifstream libStream(metalLibPath.path, std::ios::binary); | ||||||
|  |     std::vector<uint8_t> data((std::istreambuf_iterator(libStream)), std::istreambuf_iterator<char>()); | ||||||
|  |     return data; | ||||||
|  | } | ||||||
							
								
								
									
										10
									
								
								XenosRecomp/air_compiler.h
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										10
									
								
								XenosRecomp/air_compiler.h
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,10 @@ | ||||||
|  | #pragma once | ||||||
|  | 
 | ||||||
|  | #include <string> | ||||||
|  | #include <vector> | ||||||
|  | 
 | ||||||
|  | class AirCompiler | ||||||
|  | { | ||||||
|  | public: | ||||||
|  |     [[nodiscard]] static std::vector<uint8_t> compile(const std::string& shaderSource); | ||||||
|  | }; | ||||||
|  | @ -34,6 +34,11 @@ IDxcBlob* DxcCompiler::compile(const std::string& shaderSource, bool compilePixe | ||||||
|             target = L"-T vs_6_0"; |             target = L"-T vs_6_0"; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  |     if (!compileLibrary) | ||||||
|  |     { | ||||||
|  |         args[argCount++] = L"-E shaderMain"; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|     args[argCount++] = target; |     args[argCount++] = target; | ||||||
|     args[argCount++] = L"-HV 2021"; |     args[argCount++] = L"-HV 2021"; | ||||||
|     args[argCount++] = L"-all-resources-bound"; |     args[argCount++] = L"-all-resources-bound"; | ||||||
|  |  | ||||||
|  | @ -1,7 +1,15 @@ | ||||||
|  | #include <deque> | ||||||
|  | #include <mutex> | ||||||
|  | #include <thread> | ||||||
|  | 
 | ||||||
| #include "shader.h" | #include "shader.h" | ||||||
| #include "shader_recompiler.h" | #include "shader_recompiler.h" | ||||||
| #include "dxc_compiler.h" | #include "dxc_compiler.h" | ||||||
| 
 | 
 | ||||||
|  | #ifdef XENOS_RECOMP_AIR | ||||||
|  | #include "air_compiler.h" | ||||||
|  | #endif | ||||||
|  | 
 | ||||||
| static std::unique_ptr<uint8_t[]> readAllBytes(const char* filePath, size_t& fileSize) | static std::unique_ptr<uint8_t[]> readAllBytes(const char* filePath, size_t& fileSize) | ||||||
| { | { | ||||||
|     FILE* file = fopen(filePath, "rb"); |     FILE* file = fopen(filePath, "rb"); | ||||||
|  | @ -26,9 +34,43 @@ struct RecompiledShader | ||||||
|     uint8_t* data = nullptr; |     uint8_t* data = nullptr; | ||||||
|     IDxcBlob* dxil = nullptr; |     IDxcBlob* dxil = nullptr; | ||||||
|     std::vector<uint8_t> spirv; |     std::vector<uint8_t> spirv; | ||||||
|  |     std::vector<uint8_t> air; | ||||||
|     uint32_t specConstantsMask = 0; |     uint32_t specConstantsMask = 0; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
|  | void recompileShader(RecompiledShader& shader, const std::string_view include, std::atomic<uint32_t>& progress, uint32_t numShaders) | ||||||
|  | { | ||||||
|  |     thread_local ShaderRecompiler recompiler; | ||||||
|  |     recompiler = {}; | ||||||
|  |     recompiler.recompile(shader.data, include); | ||||||
|  | 
 | ||||||
|  |     shader.specConstantsMask = recompiler.specConstantsMask; | ||||||
|  | 
 | ||||||
|  |     thread_local DxcCompiler dxcCompiler; | ||||||
|  | 
 | ||||||
|  | #ifdef XENOS_RECOMP_DXIL | ||||||
|  |     shader.dxil = dxcCompiler.compile(recompiler.out, recompiler.isPixelShader, recompiler.specConstantsMask != 0, false); | ||||||
|  |     assert(shader.dxil != nullptr); | ||||||
|  |     assert(*(reinterpret_cast<uint32_t *>(shader.dxil->GetBufferPointer()) + 1) != 0 && "DXIL was not signed properly!"); | ||||||
|  | #endif | ||||||
|  | 
 | ||||||
|  | #ifdef XENOS_RECOMP_AIR | ||||||
|  |     shader.air = AirCompiler::compile(recompiler.out); | ||||||
|  | #endif | ||||||
|  | 
 | ||||||
|  |     IDxcBlob* spirv = dxcCompiler.compile(recompiler.out, recompiler.isPixelShader, false, true); | ||||||
|  |     assert(spirv != nullptr); | ||||||
|  | 
 | ||||||
|  |     bool result = smolv::Encode(spirv->GetBufferPointer(), spirv->GetBufferSize(), shader.spirv, smolv::kEncodeFlagStripDebugInfo); | ||||||
|  |     assert(result); | ||||||
|  | 
 | ||||||
|  |     spirv->Release(); | ||||||
|  | 
 | ||||||
|  |     size_t currentProgress = ++progress; | ||||||
|  |     if ((currentProgress % 10) == 0 || (currentProgress == numShaders - 1)) | ||||||
|  |         fmt::println("Recompiling shaders... {}%", currentProgress / float(numShaders) * 100.0f); | ||||||
|  | } | ||||||
|  | 
 | ||||||
| int main(int argc, char** argv) | int main(int argc, char** argv) | ||||||
| { | { | ||||||
| #ifndef XENOS_RECOMP_INPUT | #ifndef XENOS_RECOMP_INPUT | ||||||
|  | @ -71,6 +113,7 @@ int main(int argc, char** argv) | ||||||
|     { |     { | ||||||
|         std::vector<std::unique_ptr<uint8_t[]>> files; |         std::vector<std::unique_ptr<uint8_t[]>> files; | ||||||
|         std::map<XXH64_hash_t, RecompiledShader> shaders; |         std::map<XXH64_hash_t, RecompiledShader> shaders; | ||||||
|  |         std::map<XXH64_hash_t, std::string> shaderFilenames; | ||||||
| 
 | 
 | ||||||
|         for (auto& file : std::filesystem::recursive_directory_iterator(input)) |         for (auto& file : std::filesystem::recursive_directory_iterator(input)) | ||||||
|         { |         { | ||||||
|  | @ -99,6 +142,7 @@ int main(int argc, char** argv) | ||||||
|                     { |                     { | ||||||
|                         shader.first->second.data = fileData.get() + i; |                         shader.first->second.data = fileData.get() + i; | ||||||
|                         foundAny = true; |                         foundAny = true; | ||||||
|  |                         shaderFilenames[hash] = file.path().string(); | ||||||
|                     } |                     } | ||||||
| 
 | 
 | ||||||
|                     i += dataSize; |                     i += dataSize; | ||||||
|  | @ -113,38 +157,42 @@ int main(int argc, char** argv) | ||||||
|                 files.emplace_back(std::move(fileData)); |                 files.emplace_back(std::move(fileData)); | ||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
|         std::atomic<uint32_t> progress = 0; |         std::mutex shaderQueueMutex; | ||||||
| 
 |         std::deque<XXH64_hash_t> shaderQueue; | ||||||
|         std::for_each(std::execution::par_unseq, shaders.begin(), shaders.end(), [&](auto& hashShaderPair) |         for (const auto& [hash, _] : shaders) | ||||||
|         { |         { | ||||||
|                 auto& shader = hashShaderPair.second; |             shaderQueue.emplace_back(hash); | ||||||
|  |         } | ||||||
| 
 | 
 | ||||||
|                 thread_local ShaderRecompiler recompiler; |         const uint32_t numThreads = std::max(std::thread::hardware_concurrency(), 1u); | ||||||
|                 recompiler = {}; |         fmt::println("Recompiling shaders with {} threads", numThreads); | ||||||
|                 recompiler.recompile(shader.data, include); |  | ||||||
| 
 | 
 | ||||||
|                 shader.specConstantsMask = recompiler.specConstantsMask; |         std::atomic<uint32_t> progress = 0; | ||||||
| 
 |         std::vector<std::thread> threads; | ||||||
|                 thread_local DxcCompiler dxcCompiler; |         threads.reserve(numThreads); | ||||||
| 
 |         for (uint32_t i = 0; i < numThreads; i++) | ||||||
| #ifdef XENOS_RECOMP_DXIL |         { | ||||||
|                 shader.dxil = dxcCompiler.compile(recompiler.out, recompiler.isPixelShader, recompiler.specConstantsMask != 0, false); |             threads.emplace_back([&] | ||||||
|                 assert(shader.dxil != nullptr); |             { | ||||||
|                 assert(*(reinterpret_cast<uint32_t *>(shader.dxil->GetBufferPointer()) + 1) != 0 && "DXIL was not signed properly!"); |                 while (true) | ||||||
| #endif |                 { | ||||||
| 
 |                     XXH64_hash_t shaderHash; | ||||||
|                 IDxcBlob* spirv = dxcCompiler.compile(recompiler.out, recompiler.isPixelShader, false, true); |                     { | ||||||
|                 assert(spirv != nullptr); |                         std::lock_guard lock(shaderQueueMutex); | ||||||
| 
 |                         if (shaderQueue.empty()) { | ||||||
|                 bool result = smolv::Encode(spirv->GetBufferPointer(), spirv->GetBufferSize(), shader.spirv, smolv::kEncodeFlagStripDebugInfo); |                             return; | ||||||
|                 assert(result); |                         } | ||||||
| 
 |                         shaderHash = shaderQueue.front(); | ||||||
|                 spirv->Release(); |                         shaderQueue.pop_front(); | ||||||
| 
 |                     } | ||||||
|                 size_t currentProgress = ++progress; |                     recompileShader(shaders[shaderHash], include, progress, shaders.size()); | ||||||
|                 if ((currentProgress % 10) == 0 || (currentProgress == shaders.size() - 1)) |                 } | ||||||
|                     fmt::println("Recompiling shaders... {}%", currentProgress / float(shaders.size()) * 100.0f); |  | ||||||
|             }); |             }); | ||||||
|  |         } | ||||||
|  |         for (auto& thread : threads) | ||||||
|  |         { | ||||||
|  |             thread.join(); | ||||||
|  |         } | ||||||
| 
 | 
 | ||||||
|         fmt::println("Creating shader cache..."); |         fmt::println("Creating shader cache..."); | ||||||
| 
 | 
 | ||||||
|  | @ -154,11 +202,21 @@ int main(int argc, char** argv) | ||||||
| 
 | 
 | ||||||
|         std::vector<uint8_t> dxil; |         std::vector<uint8_t> dxil; | ||||||
|         std::vector<uint8_t> spirv; |         std::vector<uint8_t> spirv; | ||||||
|  |         std::vector<uint8_t> air; | ||||||
| 
 | 
 | ||||||
|         for (auto& [hash, shader] : shaders) |         for (auto& [hash, shader] : shaders) | ||||||
|         { |         { | ||||||
|             f.println("\t{{ 0x{:X}, {}, {}, {}, {}, {} }},", |             const std::string& fullFilename = shaderFilenames[hash]; | ||||||
|                 hash, dxil.size(), (shader.dxil != nullptr) ? shader.dxil->GetBufferSize() : 0, spirv.size(), shader.spirv.size(), shader.specConstantsMask); |             std::string filename = fullFilename; | ||||||
|  |             size_t shaderPos = filename.find("shader"); | ||||||
|  |             if (shaderPos != std::string::npos) { | ||||||
|  |                 filename = filename.substr(shaderPos); | ||||||
|  |                 // Prevent bad escape sequences in Windows shader path.
 | ||||||
|  |                 std::replace(filename.begin(), filename.end(), '\\', '/'); | ||||||
|  |             } | ||||||
|  |             f.println("\t{{ 0x{:X}, {}, {}, {}, {}, {}, {}, {}, \"{}\" }},", | ||||||
|  |                 hash, dxil.size(), (shader.dxil != nullptr) ? shader.dxil->GetBufferSize() : 0, | ||||||
|  |                 spirv.size(), shader.spirv.size(), air.size(), shader.air.size(), shader.specConstantsMask, filename); | ||||||
| 
 | 
 | ||||||
|             if (shader.dxil != nullptr) |             if (shader.dxil != nullptr) | ||||||
|             { |             { | ||||||
|  | @ -166,6 +224,10 @@ int main(int argc, char** argv) | ||||||
|                     reinterpret_cast<uint8_t *>(shader.dxil->GetBufferPointer()) + shader.dxil->GetBufferSize()); |                     reinterpret_cast<uint8_t *>(shader.dxil->GetBufferPointer()) + shader.dxil->GetBufferSize()); | ||||||
|             } |             } | ||||||
| 
 | 
 | ||||||
|  | #ifdef XENOS_RECOMP_AIR | ||||||
|  |             air.insert(air.end(), shader.air.begin(), shader.air.end()); | ||||||
|  | #endif | ||||||
|  | 
 | ||||||
|             spirv.insert(spirv.end(), shader.spirv.begin(), shader.spirv.end()); |             spirv.insert(spirv.end(), shader.spirv.begin(), shader.spirv.end()); | ||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
|  | @ -189,6 +251,22 @@ int main(int argc, char** argv) | ||||||
|         f.println("const size_t g_dxilCacheDecompressedSize = {};", dxil.size()); |         f.println("const size_t g_dxilCacheDecompressedSize = {};", dxil.size()); | ||||||
| #endif | #endif | ||||||
| 
 | 
 | ||||||
|  | #ifdef XENOS_RECOMP_AIR | ||||||
|  |         fmt::println("Compressing AIR cache..."); | ||||||
|  | 
 | ||||||
|  |         std::vector<uint8_t> airCompressed(ZSTD_compressBound(air.size())); | ||||||
|  |         airCompressed.resize(ZSTD_compress(airCompressed.data(), airCompressed.size(), air.data(), air.size(), level)); | ||||||
|  | 
 | ||||||
|  |         f.print("const uint8_t g_compressedAirCache[] = {{"); | ||||||
|  | 
 | ||||||
|  |         for (auto data : airCompressed) | ||||||
|  |             f.print("{},", data); | ||||||
|  | 
 | ||||||
|  |         f.println("}};"); | ||||||
|  |         f.println("const size_t g_airCacheCompressedSize = {};", airCompressed.size()); | ||||||
|  |         f.println("const size_t g_airCacheDecompressedSize = {};", air.size()); | ||||||
|  | #endif | ||||||
|  | 
 | ||||||
|         fmt::println("Compressing SPIRV cache..."); |         fmt::println("Compressing SPIRV cache..."); | ||||||
| 
 | 
 | ||||||
|         std::vector<uint8_t> spirvCompressed(ZSTD_compressBound(spirv.size())); |         std::vector<uint8_t> spirvCompressed(ZSTD_compressBound(spirv.size())); | ||||||
|  |  | ||||||
|  | @ -1,6 +1,7 @@ | ||||||
| #pragma once | #pragma once | ||||||
| 
 | 
 | ||||||
| #ifdef _WIN32 | #ifdef _WIN32 | ||||||
|  | #define NOMINMAX | ||||||
| #include <Windows.h> | #include <Windows.h> | ||||||
| #endif | #endif | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -10,10 +10,12 @@ | ||||||
|     #define SPEC_CONSTANT_REVERSE_Z         (1 << 4) |     #define SPEC_CONSTANT_REVERSE_Z         (1 << 4) | ||||||
| #endif | #endif | ||||||
| 
 | 
 | ||||||
| #if !defined(__cplusplus) || defined(__INTELLISENSE__) | #if defined(__air__) || !defined(__cplusplus) || defined(__INTELLISENSE__) | ||||||
| 
 | 
 | ||||||
|  | #ifndef __air__ | ||||||
| #define FLT_MIN asfloat(0xff7fffff) | #define FLT_MIN asfloat(0xff7fffff) | ||||||
| #define FLT_MAX asfloat(0x7f7fffff) | #define FLT_MAX asfloat(0x7f7fffff) | ||||||
|  | #endif | ||||||
| 
 | 
 | ||||||
| #ifdef __spirv__ | #ifdef __spirv__ | ||||||
| 
 | 
 | ||||||
|  | @ -35,6 +37,32 @@ struct PushConstants | ||||||
| 
 | 
 | ||||||
| #define g_SpecConstants() g_SpecConstants | #define g_SpecConstants() g_SpecConstants | ||||||
| 
 | 
 | ||||||
|  | #elif defined(__air__) | ||||||
|  | 
 | ||||||
|  | #include <metal_stdlib> | ||||||
|  | 
 | ||||||
|  | using namespace metal; | ||||||
|  | 
 | ||||||
|  | constant uint G_SPEC_CONSTANTS [[function_constant(0)]]; | ||||||
|  | constant uint G_SPEC_CONSTANTS_VAL = is_function_constant_defined(G_SPEC_CONSTANTS) ? G_SPEC_CONSTANTS : 0; | ||||||
|  | 
 | ||||||
|  | uint g_SpecConstants() | ||||||
|  | { | ||||||
|  |     return G_SPEC_CONSTANTS_VAL; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | struct PushConstants | ||||||
|  | { | ||||||
|  |     ulong VertexShaderConstants; | ||||||
|  |     ulong PixelShaderConstants; | ||||||
|  |     ulong SharedConstants; | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | #define g_Booleans (*(reinterpret_cast<device uint*>(g_PushConstants.SharedConstants + 256))) | ||||||
|  | #define g_SwappedTexcoords (*(reinterpret_cast<device uint*>(g_PushConstants.SharedConstants + 260))) | ||||||
|  | #define g_HalfPixelOffset (*(reinterpret_cast<device float2*>(g_PushConstants.SharedConstants + 264))) | ||||||
|  | #define g_AlphaThreshold (*(reinterpret_cast<device float*>(g_PushConstants.SharedConstants + 272))) | ||||||
|  | 
 | ||||||
| #else | #else | ||||||
| 
 | 
 | ||||||
| #define DEFINE_SHARED_CONSTANTS() \ | #define DEFINE_SHARED_CONSTANTS() \ | ||||||
|  | @ -47,6 +75,93 @@ uint g_SpecConstants(); | ||||||
| 
 | 
 | ||||||
| #endif | #endif | ||||||
| 
 | 
 | ||||||
|  | struct CubeMapData | ||||||
|  | { | ||||||
|  |     float3 cubeMapDirections[2]; | ||||||
|  |     uint cubeMapIndex; | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | #ifdef __air__ | ||||||
|  | 
 | ||||||
|  | struct Texture2DDescriptorHeap | ||||||
|  | { | ||||||
|  |     texture2d<float> tex; | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | struct Texture3DDescriptorHeap | ||||||
|  | { | ||||||
|  |     texture3d<float> tex; | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | struct TextureCubeDescriptorHeap | ||||||
|  | { | ||||||
|  |     texturecube<float> tex; | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | struct SamplerDescriptorHeap | ||||||
|  | { | ||||||
|  |     sampler samp; | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | uint2 getTexture2DDimensions(texture2d<float> texture) | ||||||
|  | { | ||||||
|  |     return uint2(texture.get_width(), texture.get_height()); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | float4 tfetch2D(constant Texture2DDescriptorHeap* textureHeap, | ||||||
|  |                 constant SamplerDescriptorHeap* samplerHeap, | ||||||
|  |                 uint resourceDescriptorIndex, | ||||||
|  |                 uint samplerDescriptorIndex, | ||||||
|  |                 float2 texCoord, float2 offset) | ||||||
|  | { | ||||||
|  |     texture2d<float> texture = textureHeap[resourceDescriptorIndex].tex; | ||||||
|  |     sampler sampler = samplerHeap[samplerDescriptorIndex].samp; | ||||||
|  |     return texture.sample(sampler, texCoord + offset / (float2)getTexture2DDimensions(texture)); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | float2 getWeights2D(constant Texture2DDescriptorHeap* textureHeap, | ||||||
|  |                     constant SamplerDescriptorHeap* samplerHeap, | ||||||
|  |                     uint resourceDescriptorIndex, | ||||||
|  |                     uint samplerDescriptorIndex, | ||||||
|  |                     float2 texCoord, float2 offset) | ||||||
|  | { | ||||||
|  |     texture2d<float> texture = textureHeap[resourceDescriptorIndex].tex; | ||||||
|  |     return select(fract(texCoord * float2(getTexture2DDimensions(texture)) + offset - 0.5), 0.0, isnan(texCoord)); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | float4 tfetch3D(constant Texture3DDescriptorHeap* textureHeap, | ||||||
|  |                 constant SamplerDescriptorHeap* samplerHeap, | ||||||
|  |                 uint resourceDescriptorIndex, | ||||||
|  |                 uint samplerDescriptorIndex, | ||||||
|  |                 float3 texCoord) | ||||||
|  | { | ||||||
|  |     texture3d<float> texture = textureHeap[resourceDescriptorIndex].tex; | ||||||
|  |     sampler sampler = samplerHeap[samplerDescriptorIndex].samp; | ||||||
|  |     return texture.sample(sampler, texCoord); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | float4 tfetchCube(constant TextureCubeDescriptorHeap* textureHeap, | ||||||
|  |                   constant SamplerDescriptorHeap* samplerHeap, | ||||||
|  |                   uint resourceDescriptorIndex, | ||||||
|  |                   uint samplerDescriptorIndex, | ||||||
|  |                   float3 texCoord, thread CubeMapData* cubeMapData) | ||||||
|  | { | ||||||
|  |     texturecube<float> texture = textureHeap[resourceDescriptorIndex].tex; | ||||||
|  |     sampler sampler = samplerHeap[samplerDescriptorIndex].samp; | ||||||
|  |     return texture.sample(sampler, cubeMapData->cubeMapDirections[(uint)texCoord.z]); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | float4 cube(float4 value, thread CubeMapData* cubeMapData) | ||||||
|  | { | ||||||
|  |     uint index = cubeMapData->cubeMapIndex; | ||||||
|  |     cubeMapData->cubeMapDirections[index] = value.xyz; | ||||||
|  |     ++cubeMapData->cubeMapIndex; | ||||||
|  | 
 | ||||||
|  |     return float4(0.0, 0.0, 0.0, index); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | #else | ||||||
|  | 
 | ||||||
| Texture2D<float4> g_Texture2DDescriptorHeap[] : register(t0, space0); | Texture2D<float4> g_Texture2DDescriptorHeap[] : register(t0, space0); | ||||||
| Texture3D<float4> g_Texture3DDescriptorHeap[] : register(t0, space1); | Texture3D<float4> g_Texture3DDescriptorHeap[] : register(t0, space1); | ||||||
| TextureCube<float4> g_TextureCubeDescriptorHeap[] : register(t0, space2); | TextureCube<float4> g_TextureCubeDescriptorHeap[] : register(t0, space2); | ||||||
|  | @ -71,6 +186,85 @@ float2 getWeights2D(uint resourceDescriptorIndex, uint samplerDescriptorIndex, f | ||||||
|     return select(isnan(texCoord), 0.0, frac(texCoord * getTexture2DDimensions(texture) + offset - 0.5)); |     return select(isnan(texCoord), 0.0, frac(texCoord * getTexture2DDimensions(texture) + offset - 0.5)); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | float4 tfetch3D(uint resourceDescriptorIndex, uint samplerDescriptorIndex, float3 texCoord) | ||||||
|  | { | ||||||
|  |     return g_Texture3DDescriptorHeap[resourceDescriptorIndex].Sample(g_SamplerDescriptorHeap[samplerDescriptorIndex], texCoord); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | float4 tfetchCube(uint resourceDescriptorIndex, uint samplerDescriptorIndex, float3 texCoord, inout CubeMapData cubeMapData) | ||||||
|  | { | ||||||
|  |     return g_TextureCubeDescriptorHeap[resourceDescriptorIndex].Sample(g_SamplerDescriptorHeap[samplerDescriptorIndex], cubeMapData.cubeMapDirections[texCoord.z]); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | float4 cube(float4 value, inout CubeMapData cubeMapData) | ||||||
|  | { | ||||||
|  |     uint index = cubeMapData.cubeMapIndex; | ||||||
|  |     cubeMapData.cubeMapDirections[index] = value.xyz; | ||||||
|  |     ++cubeMapData.cubeMapIndex; | ||||||
|  | 
 | ||||||
|  |     return float4(0.0, 0.0, 0.0, index); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | #endif | ||||||
|  | 
 | ||||||
|  | float4 tfetchR11G11B10(uint4 value) | ||||||
|  | { | ||||||
|  |     if (g_SpecConstants() & SPEC_CONSTANT_R11G11B10_NORMAL) | ||||||
|  |     { | ||||||
|  |         return float4( | ||||||
|  |             (value.x & 0x00000400 ? -1.0 : 0.0) + ((value.x & 0x3FF) / 1024.0), | ||||||
|  |             (value.x & 0x00200000 ? -1.0 : 0.0) + (((value.x >> 11) & 0x3FF) / 1024.0), | ||||||
|  |             (value.x & 0x80000000 ? -1.0 : 0.0) + (((value.x >> 22) & 0x1FF) / 512.0), | ||||||
|  |             0.0); | ||||||
|  |     } | ||||||
|  |     else | ||||||
|  |     { | ||||||
|  | #ifdef __air__ | ||||||
|  |         return as_type<float4>(value); | ||||||
|  | #else | ||||||
|  |         return asfloat(value); | ||||||
|  | #endif | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | #ifdef __air__ | ||||||
|  | #define selectWrapper(a, b, c) select(c, b, a) | ||||||
|  | #else | ||||||
|  | #define selectWrapper(a, b, c) select(a, b, c) | ||||||
|  | #endif | ||||||
|  | 
 | ||||||
|  | #ifdef __air__ | ||||||
|  | #define frac(X) fract(X) | ||||||
|  | 
 | ||||||
|  | template<typename T> | ||||||
|  | void clip(T a) | ||||||
|  | { | ||||||
|  |     if (a < 0.0) { | ||||||
|  |         discard_fragment(); | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | template<typename T> | ||||||
|  | float rcp(T a) | ||||||
|  | { | ||||||
|  |     return 1.0 / a; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | template<typename T> | ||||||
|  | float4x4 mul(T a, T b) | ||||||
|  | { | ||||||
|  |     return b * a; | ||||||
|  | } | ||||||
|  | #endif | ||||||
|  | 
 | ||||||
|  | #ifdef __air__ | ||||||
|  | #define UNROLL | ||||||
|  | #define BRANCH | ||||||
|  | #else | ||||||
|  | #define UNROLL [unroll] | ||||||
|  | #define BRANCH [branch] | ||||||
|  | #endif | ||||||
|  | 
 | ||||||
| float w0(float a) | float w0(float a) | ||||||
| { | { | ||||||
|     return (1.0f / 6.0f) * (a * (a * (-a + 3.0f) - 3.0f) + 1.0f); |     return (1.0f / 6.0f) * (a * (a * (-a + 3.0f) - 3.0f) + 1.0f); | ||||||
|  | @ -111,6 +305,46 @@ float h1(float a) | ||||||
|     return 1.0f + w3(a) / (w2(a) + w3(a)) + 0.5f; |     return 1.0f + w3(a) / (w2(a) + w3(a)) + 0.5f; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | #ifdef __air__ | ||||||
|  | 
 | ||||||
|  | float4 tfetch2DBicubic(constant Texture2DDescriptorHeap* textureHeap, | ||||||
|  |                        constant SamplerDescriptorHeap* samplerHeap, | ||||||
|  |                        uint resourceDescriptorIndex, | ||||||
|  |                        uint samplerDescriptorIndex, | ||||||
|  |                        float2 texCoord, float2 offset) | ||||||
|  | { | ||||||
|  |     texture2d<float> texture = textureHeap[resourceDescriptorIndex].tex; | ||||||
|  |     sampler sampler = samplerHeap[samplerDescriptorIndex].samp; | ||||||
|  |     uint2 dimensions = getTexture2DDimensions(texture); | ||||||
|  | 
 | ||||||
|  |     float x = texCoord.x * dimensions.x + offset.x; | ||||||
|  |     float y = texCoord.y * dimensions.y + offset.y; | ||||||
|  | 
 | ||||||
|  |     x -= 0.5f; | ||||||
|  |     y -= 0.5f; | ||||||
|  |     float px = floor(x); | ||||||
|  |     float py = floor(y); | ||||||
|  |     float fx = x - px; | ||||||
|  |     float fy = y - py; | ||||||
|  | 
 | ||||||
|  |     float g0x = g0(fx); | ||||||
|  |     float g1x = g1(fx); | ||||||
|  |     float h0x = h0(fx); | ||||||
|  |     float h1x = h1(fx); | ||||||
|  |     float h0y = h0(fy); | ||||||
|  |     float h1y = h1(fy); | ||||||
|  | 
 | ||||||
|  |     float4 r = | ||||||
|  |         g0(fy) * (g0x * texture.sample(sampler, float2(px + h0x, py + h0y) / float2(dimensions)) + | ||||||
|  |               g1x * texture.sample(sampler, float2(px + h1x, py + h0y) / float2(dimensions))) + | ||||||
|  |         g1(fy) * (g0x * texture.sample(sampler, float2(px + h0x, py + h1y) / float2(dimensions)) + | ||||||
|  |               g1x * texture.sample(sampler, float2(px + h1x, py + h1y) / float2(dimensions))); | ||||||
|  | 
 | ||||||
|  |     return r; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | #else | ||||||
|  | 
 | ||||||
| float4 tfetch2DBicubic(uint resourceDescriptorIndex, uint samplerDescriptorIndex, float2 texCoord, float2 offset) | float4 tfetch2DBicubic(uint resourceDescriptorIndex, uint samplerDescriptorIndex, float2 texCoord, float2 offset) | ||||||
| { | { | ||||||
|     Texture2D<float4> texture = g_Texture2DDescriptorHeap[resourceDescriptorIndex]; |     Texture2D<float4> texture = g_Texture2DDescriptorHeap[resourceDescriptorIndex]; | ||||||
|  | @ -143,50 +377,11 @@ float4 tfetch2DBicubic(uint resourceDescriptorIndex, uint samplerDescriptorIndex | ||||||
|     return r; |     return r; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| float4 tfetch3D(uint resourceDescriptorIndex, uint samplerDescriptorIndex, float3 texCoord) | #endif | ||||||
| { |  | ||||||
|     return g_Texture3DDescriptorHeap[resourceDescriptorIndex].Sample(g_SamplerDescriptorHeap[samplerDescriptorIndex], texCoord); |  | ||||||
| } |  | ||||||
| 
 | 
 | ||||||
| struct CubeMapData | float4 swapFloats(uint swappedFloats, float4 value, uint semanticIndex) | ||||||
| { | { | ||||||
|     float3 cubeMapDirections[2]; |     return (swappedFloats & (1ull << semanticIndex)) != 0 ? value.yxwz : value; | ||||||
|     uint cubeMapIndex; |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| float4 tfetchCube(uint resourceDescriptorIndex, uint samplerDescriptorIndex, float3 texCoord, inout CubeMapData cubeMapData) |  | ||||||
| { |  | ||||||
|     return g_TextureCubeDescriptorHeap[resourceDescriptorIndex].Sample(g_SamplerDescriptorHeap[samplerDescriptorIndex], cubeMapData.cubeMapDirections[texCoord.z]); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| float4 tfetchR11G11B10(uint4 value) |  | ||||||
| { |  | ||||||
|     if (g_SpecConstants() & SPEC_CONSTANT_R11G11B10_NORMAL) |  | ||||||
|     { |  | ||||||
|         return float4( |  | ||||||
|             (value.x & 0x00000400 ? -1.0 : 0.0) + ((value.x & 0x3FF) / 1024.0), |  | ||||||
|             (value.x & 0x00200000 ? -1.0 : 0.0) + (((value.x >> 11) & 0x3FF) / 1024.0), |  | ||||||
|             (value.x & 0x80000000 ? -1.0 : 0.0) + (((value.x >> 22) & 0x1FF) / 512.0), |  | ||||||
|             0.0); |  | ||||||
|     } |  | ||||||
|     else |  | ||||||
|     { |  | ||||||
|         return asfloat(value); |  | ||||||
|     } |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| float4 tfetchTexcoord(uint swappedTexcoords, float4 value, uint semanticIndex) |  | ||||||
| { |  | ||||||
|     return (swappedTexcoords & (1ull << semanticIndex)) != 0 ? value.yxwz : value; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| float4 cube(float4 value, inout CubeMapData cubeMapData) |  | ||||||
| { |  | ||||||
|     uint index = cubeMapData.cubeMapIndex; |  | ||||||
|     cubeMapData.cubeMapDirections[index] = value.xyz; |  | ||||||
|     ++cubeMapData.cubeMapIndex; |  | ||||||
|      |  | ||||||
|     return float4(0.0, 0.0, 0.0, index); |  | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| float4 dst(float4 src0, float4 src1) | float4 dst(float4 src0, float4 src1) | ||||||
|  | @ -204,15 +399,34 @@ float4 max4(float4 src0) | ||||||
|     return max(max(src0.x, src0.y), max(src0.z, src0.w)); |     return max(max(src0.x, src0.y), max(src0.z, src0.w)); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | #ifdef __air__ | ||||||
|  | 
 | ||||||
|  | float2 getPixelCoord(constant Texture2DDescriptorHeap* textureHeap, | ||||||
|  |                      uint resourceDescriptorIndex, | ||||||
|  |                      float2 texCoord) | ||||||
|  | { | ||||||
|  |     texture2d<float> texture = textureHeap[resourceDescriptorIndex].tex; | ||||||
|  |     return (float2)getTexture2DDimensions(texture) * texCoord; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | #else | ||||||
|  | 
 | ||||||
| float2 getPixelCoord(uint resourceDescriptorIndex, float2 texCoord) | float2 getPixelCoord(uint resourceDescriptorIndex, float2 texCoord) | ||||||
| { | { | ||||||
|     return getTexture2DDimensions(g_Texture2DDescriptorHeap[resourceDescriptorIndex]) * texCoord; |     return getTexture2DDimensions(g_Texture2DDescriptorHeap[resourceDescriptorIndex]) * texCoord; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | #endif | ||||||
|  | 
 | ||||||
| float computeMipLevel(float2 pixelCoord) | float computeMipLevel(float2 pixelCoord) | ||||||
| { | { | ||||||
|  | #ifdef __air__ | ||||||
|  |     float2 dx = dfdx(pixelCoord); | ||||||
|  |     float2 dy = dfdy(pixelCoord); | ||||||
|  | #else | ||||||
|     float2 dx = ddx(pixelCoord); |     float2 dx = ddx(pixelCoord); | ||||||
|     float2 dy = ddy(pixelCoord); |     float2 dy = ddy(pixelCoord); | ||||||
|  | #endif | ||||||
|     float deltaMaxSqr = max(dot(dx, dx), dot(dy, dy)); |     float deltaMaxSqr = max(dot(dx, dx), dot(dy, dy)); | ||||||
|     return max(0.0, 0.5 * log2(deltaMaxSqr)); |     return max(0.0, 0.5 * log2(deltaMaxSqr)); | ||||||
| } | } | ||||||
|  |  | ||||||
										
											
												File diff suppressed because it is too large
												Load diff
											
										
									
								
							|  | @ -45,7 +45,7 @@ struct ShaderRecompiler : StringBuffer | ||||||
|             out += '\t'; |             out += '\t'; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     void printDstSwizzle(uint32_t dstSwizzle, bool operand); |     uint32_t printDstSwizzle(uint32_t dstSwizzle, bool operand); | ||||||
|     void printDstSwizzle01(uint32_t dstRegister, uint32_t dstSwizzle); |     void printDstSwizzle01(uint32_t dstRegister, uint32_t dstSwizzle); | ||||||
| 
 | 
 | ||||||
|     void recompile(const VertexFetchInstruction& instr, uint32_t address); |     void recompile(const VertexFetchInstruction& instr, uint32_t address); | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		
		Reference in a new issue
	
	 squidbus
						squidbus