mirror of
				https://github.com/hedge-dev/XenosRecomp.git
				synced 2025-10-30 07:12:17 +00:00 
			
		
		
		
	Compare commits
	
		
			2 commits
		
	
	
		
			99db834f5c
			...
			12f588ae35
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| 
							 | 
						12f588ae35 | ||
| 
							 | 
						1d52a81ba9 | 
					 9 changed files with 1085 additions and 279 deletions
				
			
		| 
						 | 
				
			
			@ -4,6 +4,10 @@ if (WIN32)
 | 
			
		|||
    option(XENOS_RECOMP_DXIL "Generate DXIL shader cache" ON)
 | 
			
		||||
endif()
 | 
			
		||||
 | 
			
		||||
if (APPLE)
 | 
			
		||||
    option(XENOS_RECOMP_AIR "Generate Metal AIR shader cache" ON)
 | 
			
		||||
endif()
 | 
			
		||||
 | 
			
		||||
set(SMOLV_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../thirdparty/smol-v/source")
 | 
			
		||||
 | 
			
		||||
add_executable(XenosRecomp
 | 
			
		||||
| 
						 | 
				
			
			@ -30,13 +34,6 @@ target_precompile_headers(XenosRecomp PRIVATE pch.h)
 | 
			
		|||
 | 
			
		||||
if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang" OR CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang")
 | 
			
		||||
    target_compile_options(XenosRecomp PRIVATE -Wno-switch -Wno-unused-variable -Wno-null-arithmetic -fms-extensions)
 | 
			
		||||
 | 
			
		||||
    include(CheckCXXSymbolExists)
 | 
			
		||||
    check_cxx_symbol_exists(_LIBCPP_VERSION version LIBCPP)
 | 
			
		||||
    if(LIBCPP)
 | 
			
		||||
        # Allows using std::execution
 | 
			
		||||
        target_compile_options(XenosRecomp PRIVATE -fexperimental-library)
 | 
			
		||||
    endif()
 | 
			
		||||
endif()
 | 
			
		||||
 | 
			
		||||
if (WIN32)
 | 
			
		||||
| 
						 | 
				
			
			@ -51,3 +48,8 @@ if (XENOS_RECOMP_DXIL)
 | 
			
		|||
    target_compile_definitions(XenosRecomp PRIVATE XENOS_RECOMP_DXIL)
 | 
			
		||||
    target_link_libraries(XenosRecomp PRIVATE Microsoft::DXIL)
 | 
			
		||||
endif()
 | 
			
		||||
 | 
			
		||||
if (XENOS_RECOMP_AIR)
 | 
			
		||||
    target_compile_definitions(XenosRecomp PRIVATE XENOS_RECOMP_AIR)
 | 
			
		||||
    target_sources(XenosRecomp PRIVATE air_compiler.cpp air_compiler.h)
 | 
			
		||||
endif()
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										75
									
								
								XenosRecomp/air_compiler.cpp
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										75
									
								
								XenosRecomp/air_compiler.cpp
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,75 @@
 | 
			
		|||
#include "air_compiler.h"
 | 
			
		||||
 | 
			
		||||
#include <fstream>
 | 
			
		||||
#include <iterator>
 | 
			
		||||
#include <spawn.h>
 | 
			
		||||
#include <unistd.h>
 | 
			
		||||
 | 
			
		||||
struct TemporaryPath
 | 
			
		||||
{
 | 
			
		||||
    const std::string path;
 | 
			
		||||
 | 
			
		||||
    explicit TemporaryPath(std::string_view path) : path(path) {}
 | 
			
		||||
 | 
			
		||||
    ~TemporaryPath()
 | 
			
		||||
    {
 | 
			
		||||
        unlink(path.c_str());
 | 
			
		||||
    }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
static int executeCommand(const char** argv)
 | 
			
		||||
{
 | 
			
		||||
    pid_t pid;
 | 
			
		||||
    if (posix_spawn(&pid, argv[0], nullptr, nullptr, const_cast<char**>(argv), nullptr) != 0)
 | 
			
		||||
        return -1;
 | 
			
		||||
 | 
			
		||||
    int status;
 | 
			
		||||
    if (waitpid(pid, &status, 0) == -1)
 | 
			
		||||
        return -1;
 | 
			
		||||
 | 
			
		||||
    return status;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
std::vector<uint8_t> AirCompiler::compile(const std::string& shaderSource)
 | 
			
		||||
{
 | 
			
		||||
    // Save source to a location on disk for the compiler to read.
 | 
			
		||||
    char sourcePathTemplate[PATH_MAX] = "/tmp/xenos_metal_XXXXXX.metal";
 | 
			
		||||
    const int sourceFd = mkstemps(sourcePathTemplate, 6);
 | 
			
		||||
    if (sourceFd == -1)
 | 
			
		||||
    {
 | 
			
		||||
        fmt::println("Failed to create temporary file for shader source: {}", strerror(errno));
 | 
			
		||||
        std::exit(1);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    const TemporaryPath sourcePath(sourcePathTemplate);
 | 
			
		||||
    const TemporaryPath irPath(sourcePath.path + ".ir");
 | 
			
		||||
    const TemporaryPath metalLibPath(sourcePath.path + ".metallib");
 | 
			
		||||
 | 
			
		||||
    const ssize_t sourceWritten = write(sourceFd, shaderSource.data(), shaderSource.size());
 | 
			
		||||
    close(sourceFd);
 | 
			
		||||
    if (sourceWritten < 0)
 | 
			
		||||
    {
 | 
			
		||||
        fmt::println("Failed to write shader source to disk: {}", strerror(errno));
 | 
			
		||||
        std::exit(1);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    const char* compileCommand[] = { "/usr/bin/xcrun", "-sdk", "macosx", "metal", "-o", irPath.path.c_str(), "-c", sourcePath.path.c_str(), "-D__air__", "-DUNLEASHED_RECOMP", "-Wno-unused-variable", "-frecord-sources", "-gline-tables-only", nullptr };
 | 
			
		||||
    if (const int compileStatus = executeCommand(compileCommand); compileStatus != 0)
 | 
			
		||||
    {
 | 
			
		||||
        fmt::println("Metal compiler exited with status: {}", compileStatus);
 | 
			
		||||
        fmt::println("Generated source:\n{}", shaderSource);
 | 
			
		||||
        std::exit(1);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    const char* linkCommand[] = { "/usr/bin/xcrun", "-sdk", "macosx", "metallib", "-o", metalLibPath.path.c_str(), irPath.path.c_str(), nullptr };
 | 
			
		||||
    if (const int linkStatus = executeCommand(linkCommand); linkStatus != 0)
 | 
			
		||||
    {
 | 
			
		||||
        fmt::println("Metal linker exited with status: {}", linkStatus);
 | 
			
		||||
        fmt::println("Generated source:\n{}", shaderSource);
 | 
			
		||||
        std::exit(1);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    std::ifstream libStream(metalLibPath.path, std::ios::binary);
 | 
			
		||||
    std::vector<uint8_t> data((std::istreambuf_iterator(libStream)), std::istreambuf_iterator<char>());
 | 
			
		||||
    return data;
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										10
									
								
								XenosRecomp/air_compiler.h
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										10
									
								
								XenosRecomp/air_compiler.h
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,10 @@
 | 
			
		|||
#pragma once
 | 
			
		||||
 | 
			
		||||
#include <string>
 | 
			
		||||
#include <vector>
 | 
			
		||||
 | 
			
		||||
class AirCompiler
 | 
			
		||||
{
 | 
			
		||||
public:
 | 
			
		||||
    [[nodiscard]] static std::vector<uint8_t> compile(const std::string& shaderSource);
 | 
			
		||||
};
 | 
			
		||||
| 
						 | 
				
			
			@ -34,6 +34,11 @@ IDxcBlob* DxcCompiler::compile(const std::string& shaderSource, bool compilePixe
 | 
			
		|||
            target = L"-T vs_6_0";
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if (!compileLibrary)
 | 
			
		||||
    {
 | 
			
		||||
        args[argCount++] = L"-E shaderMain";
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    args[argCount++] = target;
 | 
			
		||||
    args[argCount++] = L"-HV 2021";
 | 
			
		||||
    args[argCount++] = L"-all-resources-bound";
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1,7 +1,15 @@
 | 
			
		|||
#include <deque>
 | 
			
		||||
#include <mutex>
 | 
			
		||||
#include <thread>
 | 
			
		||||
 | 
			
		||||
#include "shader.h"
 | 
			
		||||
#include "shader_recompiler.h"
 | 
			
		||||
#include "dxc_compiler.h"
 | 
			
		||||
 | 
			
		||||
#ifdef XENOS_RECOMP_AIR
 | 
			
		||||
#include "air_compiler.h"
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
static std::unique_ptr<uint8_t[]> readAllBytes(const char* filePath, size_t& fileSize)
 | 
			
		||||
{
 | 
			
		||||
    FILE* file = fopen(filePath, "rb");
 | 
			
		||||
| 
						 | 
				
			
			@ -26,9 +34,43 @@ struct RecompiledShader
 | 
			
		|||
    uint8_t* data = nullptr;
 | 
			
		||||
    IDxcBlob* dxil = nullptr;
 | 
			
		||||
    std::vector<uint8_t> spirv;
 | 
			
		||||
    std::vector<uint8_t> air;
 | 
			
		||||
    uint32_t specConstantsMask = 0;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
void recompileShader(RecompiledShader& shader, const std::string_view include, std::atomic<uint32_t>& progress, uint32_t numShaders)
 | 
			
		||||
{
 | 
			
		||||
    thread_local ShaderRecompiler recompiler;
 | 
			
		||||
    recompiler = {};
 | 
			
		||||
    recompiler.recompile(shader.data, include);
 | 
			
		||||
 | 
			
		||||
    shader.specConstantsMask = recompiler.specConstantsMask;
 | 
			
		||||
 | 
			
		||||
    thread_local DxcCompiler dxcCompiler;
 | 
			
		||||
 | 
			
		||||
#ifdef XENOS_RECOMP_DXIL
 | 
			
		||||
    shader.dxil = dxcCompiler.compile(recompiler.out, recompiler.isPixelShader, recompiler.specConstantsMask != 0, false);
 | 
			
		||||
    assert(shader.dxil != nullptr);
 | 
			
		||||
    assert(*(reinterpret_cast<uint32_t *>(shader.dxil->GetBufferPointer()) + 1) != 0 && "DXIL was not signed properly!");
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#ifdef XENOS_RECOMP_AIR
 | 
			
		||||
    shader.air = AirCompiler::compile(recompiler.out);
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
    IDxcBlob* spirv = dxcCompiler.compile(recompiler.out, recompiler.isPixelShader, false, true);
 | 
			
		||||
    assert(spirv != nullptr);
 | 
			
		||||
 | 
			
		||||
    bool result = smolv::Encode(spirv->GetBufferPointer(), spirv->GetBufferSize(), shader.spirv, smolv::kEncodeFlagStripDebugInfo);
 | 
			
		||||
    assert(result);
 | 
			
		||||
 | 
			
		||||
    spirv->Release();
 | 
			
		||||
 | 
			
		||||
    size_t currentProgress = ++progress;
 | 
			
		||||
    if ((currentProgress % 10) == 0 || (currentProgress == numShaders - 1))
 | 
			
		||||
        fmt::println("Recompiling shaders... {}%", currentProgress / float(numShaders) * 100.0f);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int main(int argc, char** argv)
 | 
			
		||||
{
 | 
			
		||||
#ifndef XENOS_RECOMP_INPUT
 | 
			
		||||
| 
						 | 
				
			
			@ -71,6 +113,7 @@ int main(int argc, char** argv)
 | 
			
		|||
    {
 | 
			
		||||
        std::vector<std::unique_ptr<uint8_t[]>> files;
 | 
			
		||||
        std::map<XXH64_hash_t, RecompiledShader> shaders;
 | 
			
		||||
        std::map<XXH64_hash_t, std::string> shaderFilenames;
 | 
			
		||||
 | 
			
		||||
        for (auto& file : std::filesystem::recursive_directory_iterator(input))
 | 
			
		||||
        {
 | 
			
		||||
| 
						 | 
				
			
			@ -99,6 +142,7 @@ int main(int argc, char** argv)
 | 
			
		|||
                    {
 | 
			
		||||
                        shader.first->second.data = fileData.get() + i;
 | 
			
		||||
                        foundAny = true;
 | 
			
		||||
                        shaderFilenames[hash] = file.path().string();
 | 
			
		||||
                    }
 | 
			
		||||
 | 
			
		||||
                    i += dataSize;
 | 
			
		||||
| 
						 | 
				
			
			@ -113,38 +157,42 @@ int main(int argc, char** argv)
 | 
			
		|||
                files.emplace_back(std::move(fileData));
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        std::mutex shaderQueueMutex;
 | 
			
		||||
        std::deque<XXH64_hash_t> shaderQueue;
 | 
			
		||||
        for (const auto& [hash, _] : shaders)
 | 
			
		||||
        {
 | 
			
		||||
            shaderQueue.emplace_back(hash);
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        const uint32_t numThreads = std::max(std::thread::hardware_concurrency(), 1u);
 | 
			
		||||
        fmt::println("Recompiling shaders with {} threads", numThreads);
 | 
			
		||||
 | 
			
		||||
        std::atomic<uint32_t> progress = 0;
 | 
			
		||||
 | 
			
		||||
        std::for_each(std::execution::par_unseq, shaders.begin(), shaders.end(), [&](auto& hashShaderPair)
 | 
			
		||||
        std::vector<std::thread> threads;
 | 
			
		||||
        threads.reserve(numThreads);
 | 
			
		||||
        for (uint32_t i = 0; i < numThreads; i++)
 | 
			
		||||
        {
 | 
			
		||||
            threads.emplace_back([&]
 | 
			
		||||
            {
 | 
			
		||||
                auto& shader = hashShaderPair.second;
 | 
			
		||||
 | 
			
		||||
                thread_local ShaderRecompiler recompiler;
 | 
			
		||||
                recompiler = {};
 | 
			
		||||
                recompiler.recompile(shader.data, include);
 | 
			
		||||
 | 
			
		||||
                shader.specConstantsMask = recompiler.specConstantsMask;
 | 
			
		||||
 | 
			
		||||
                thread_local DxcCompiler dxcCompiler;
 | 
			
		||||
 | 
			
		||||
#ifdef XENOS_RECOMP_DXIL
 | 
			
		||||
                shader.dxil = dxcCompiler.compile(recompiler.out, recompiler.isPixelShader, recompiler.specConstantsMask != 0, false);
 | 
			
		||||
                assert(shader.dxil != nullptr);
 | 
			
		||||
                assert(*(reinterpret_cast<uint32_t *>(shader.dxil->GetBufferPointer()) + 1) != 0 && "DXIL was not signed properly!");
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
                IDxcBlob* spirv = dxcCompiler.compile(recompiler.out, recompiler.isPixelShader, false, true);
 | 
			
		||||
                assert(spirv != nullptr);
 | 
			
		||||
 | 
			
		||||
                bool result = smolv::Encode(spirv->GetBufferPointer(), spirv->GetBufferSize(), shader.spirv, smolv::kEncodeFlagStripDebugInfo);
 | 
			
		||||
                assert(result);
 | 
			
		||||
 | 
			
		||||
                spirv->Release();
 | 
			
		||||
 | 
			
		||||
                size_t currentProgress = ++progress;
 | 
			
		||||
                if ((currentProgress % 10) == 0 || (currentProgress == shaders.size() - 1))
 | 
			
		||||
                    fmt::println("Recompiling shaders... {}%", currentProgress / float(shaders.size()) * 100.0f);
 | 
			
		||||
                while (true)
 | 
			
		||||
                {
 | 
			
		||||
                    XXH64_hash_t shaderHash;
 | 
			
		||||
                    {
 | 
			
		||||
                        std::lock_guard lock(shaderQueueMutex);
 | 
			
		||||
                        if (shaderQueue.empty()) {
 | 
			
		||||
                            return;
 | 
			
		||||
                        }
 | 
			
		||||
                        shaderHash = shaderQueue.front();
 | 
			
		||||
                        shaderQueue.pop_front();
 | 
			
		||||
                    }
 | 
			
		||||
                    recompileShader(shaders[shaderHash], include, progress, shaders.size());
 | 
			
		||||
                }
 | 
			
		||||
            });
 | 
			
		||||
        }
 | 
			
		||||
        for (auto& thread : threads)
 | 
			
		||||
        {
 | 
			
		||||
            thread.join();
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        fmt::println("Creating shader cache...");
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -154,11 +202,21 @@ int main(int argc, char** argv)
 | 
			
		|||
 | 
			
		||||
        std::vector<uint8_t> dxil;
 | 
			
		||||
        std::vector<uint8_t> spirv;
 | 
			
		||||
        std::vector<uint8_t> air;
 | 
			
		||||
 | 
			
		||||
        for (auto& [hash, shader] : shaders)
 | 
			
		||||
        {
 | 
			
		||||
            f.println("\t{{ 0x{:X}, {}, {}, {}, {}, {} }},",
 | 
			
		||||
                hash, dxil.size(), (shader.dxil != nullptr) ? shader.dxil->GetBufferSize() : 0, spirv.size(), shader.spirv.size(), shader.specConstantsMask);
 | 
			
		||||
            const std::string& fullFilename = shaderFilenames[hash];
 | 
			
		||||
            std::string filename = fullFilename;
 | 
			
		||||
            size_t shaderPos = filename.find("shader");
 | 
			
		||||
            if (shaderPos != std::string::npos) {
 | 
			
		||||
                filename = filename.substr(shaderPos);
 | 
			
		||||
                // Prevent bad escape sequences in Windows shader path.
 | 
			
		||||
                std::replace(filename.begin(), filename.end(), '\\', '/');
 | 
			
		||||
            }
 | 
			
		||||
            f.println("\t{{ 0x{:X}, {}, {}, {}, {}, {}, {}, {}, \"{}\" }},",
 | 
			
		||||
                hash, dxil.size(), (shader.dxil != nullptr) ? shader.dxil->GetBufferSize() : 0,
 | 
			
		||||
                spirv.size(), shader.spirv.size(), air.size(), shader.air.size(), shader.specConstantsMask, filename);
 | 
			
		||||
 | 
			
		||||
            if (shader.dxil != nullptr)
 | 
			
		||||
            {
 | 
			
		||||
| 
						 | 
				
			
			@ -166,6 +224,10 @@ int main(int argc, char** argv)
 | 
			
		|||
                    reinterpret_cast<uint8_t *>(shader.dxil->GetBufferPointer()) + shader.dxil->GetBufferSize());
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
#ifdef XENOS_RECOMP_AIR
 | 
			
		||||
            air.insert(air.end(), shader.air.begin(), shader.air.end());
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
            spirv.insert(spirv.end(), shader.spirv.begin(), shader.spirv.end());
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -189,6 +251,22 @@ int main(int argc, char** argv)
 | 
			
		|||
        f.println("const size_t g_dxilCacheDecompressedSize = {};", dxil.size());
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#ifdef XENOS_RECOMP_AIR
 | 
			
		||||
        fmt::println("Compressing AIR cache...");
 | 
			
		||||
 | 
			
		||||
        std::vector<uint8_t> airCompressed(ZSTD_compressBound(air.size()));
 | 
			
		||||
        airCompressed.resize(ZSTD_compress(airCompressed.data(), airCompressed.size(), air.data(), air.size(), level));
 | 
			
		||||
 | 
			
		||||
        f.print("const uint8_t g_compressedAirCache[] = {{");
 | 
			
		||||
 | 
			
		||||
        for (auto data : airCompressed)
 | 
			
		||||
            f.print("{},", data);
 | 
			
		||||
 | 
			
		||||
        f.println("}};");
 | 
			
		||||
        f.println("const size_t g_airCacheCompressedSize = {};", airCompressed.size());
 | 
			
		||||
        f.println("const size_t g_airCacheDecompressedSize = {};", air.size());
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
        fmt::println("Compressing SPIRV cache...");
 | 
			
		||||
 | 
			
		||||
        std::vector<uint8_t> spirvCompressed(ZSTD_compressBound(spirv.size()));
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1,6 +1,7 @@
 | 
			
		|||
#pragma once
 | 
			
		||||
 | 
			
		||||
#ifdef _WIN32
 | 
			
		||||
#define NOMINMAX
 | 
			
		||||
#include <Windows.h>
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -10,10 +10,12 @@
 | 
			
		|||
    #define SPEC_CONSTANT_REVERSE_Z         (1 << 4)
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#if !defined(__cplusplus) || defined(__INTELLISENSE__)
 | 
			
		||||
#if defined(__air__) || !defined(__cplusplus) || defined(__INTELLISENSE__)
 | 
			
		||||
 | 
			
		||||
#ifndef __air__
 | 
			
		||||
#define FLT_MIN asfloat(0xff7fffff)
 | 
			
		||||
#define FLT_MAX asfloat(0x7f7fffff)
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#ifdef __spirv__
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -35,6 +37,32 @@ struct PushConstants
 | 
			
		|||
 | 
			
		||||
#define g_SpecConstants() g_SpecConstants
 | 
			
		||||
 | 
			
		||||
#elif defined(__air__)
 | 
			
		||||
 | 
			
		||||
#include <metal_stdlib>
 | 
			
		||||
 | 
			
		||||
using namespace metal;
 | 
			
		||||
 | 
			
		||||
constant uint G_SPEC_CONSTANTS [[function_constant(0)]];
 | 
			
		||||
constant uint G_SPEC_CONSTANTS_VAL = is_function_constant_defined(G_SPEC_CONSTANTS) ? G_SPEC_CONSTANTS : 0;
 | 
			
		||||
 | 
			
		||||
uint g_SpecConstants()
 | 
			
		||||
{
 | 
			
		||||
    return G_SPEC_CONSTANTS_VAL;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
struct PushConstants
 | 
			
		||||
{
 | 
			
		||||
    ulong VertexShaderConstants;
 | 
			
		||||
    ulong PixelShaderConstants;
 | 
			
		||||
    ulong SharedConstants;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
#define g_Booleans (*(reinterpret_cast<device uint*>(g_PushConstants.SharedConstants + 256)))
 | 
			
		||||
#define g_SwappedTexcoords (*(reinterpret_cast<device uint*>(g_PushConstants.SharedConstants + 260)))
 | 
			
		||||
#define g_HalfPixelOffset (*(reinterpret_cast<device float2*>(g_PushConstants.SharedConstants + 264)))
 | 
			
		||||
#define g_AlphaThreshold (*(reinterpret_cast<device float*>(g_PushConstants.SharedConstants + 272)))
 | 
			
		||||
 | 
			
		||||
#else
 | 
			
		||||
 | 
			
		||||
#define DEFINE_SHARED_CONSTANTS() \
 | 
			
		||||
| 
						 | 
				
			
			@ -47,6 +75,93 @@ uint g_SpecConstants();
 | 
			
		|||
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
struct CubeMapData
 | 
			
		||||
{
 | 
			
		||||
    float3 cubeMapDirections[2];
 | 
			
		||||
    uint cubeMapIndex;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
#ifdef __air__
 | 
			
		||||
 | 
			
		||||
struct Texture2DDescriptorHeap
 | 
			
		||||
{
 | 
			
		||||
    texture2d<float> tex;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
struct Texture3DDescriptorHeap
 | 
			
		||||
{
 | 
			
		||||
    texture3d<float> tex;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
struct TextureCubeDescriptorHeap
 | 
			
		||||
{
 | 
			
		||||
    texturecube<float> tex;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
struct SamplerDescriptorHeap
 | 
			
		||||
{
 | 
			
		||||
    sampler samp;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
uint2 getTexture2DDimensions(texture2d<float> texture)
 | 
			
		||||
{
 | 
			
		||||
    return uint2(texture.get_width(), texture.get_height());
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
float4 tfetch2D(constant Texture2DDescriptorHeap* textureHeap,
 | 
			
		||||
                constant SamplerDescriptorHeap* samplerHeap,
 | 
			
		||||
                uint resourceDescriptorIndex,
 | 
			
		||||
                uint samplerDescriptorIndex,
 | 
			
		||||
                float2 texCoord, float2 offset)
 | 
			
		||||
{
 | 
			
		||||
    texture2d<float> texture = textureHeap[resourceDescriptorIndex].tex;
 | 
			
		||||
    sampler sampler = samplerHeap[samplerDescriptorIndex].samp;
 | 
			
		||||
    return texture.sample(sampler, texCoord + offset / (float2)getTexture2DDimensions(texture));
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
float2 getWeights2D(constant Texture2DDescriptorHeap* textureHeap,
 | 
			
		||||
                    constant SamplerDescriptorHeap* samplerHeap,
 | 
			
		||||
                    uint resourceDescriptorIndex,
 | 
			
		||||
                    uint samplerDescriptorIndex,
 | 
			
		||||
                    float2 texCoord, float2 offset)
 | 
			
		||||
{
 | 
			
		||||
    texture2d<float> texture = textureHeap[resourceDescriptorIndex].tex;
 | 
			
		||||
    return select(fract(texCoord * float2(getTexture2DDimensions(texture)) + offset - 0.5), 0.0, isnan(texCoord));
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
float4 tfetch3D(constant Texture3DDescriptorHeap* textureHeap,
 | 
			
		||||
                constant SamplerDescriptorHeap* samplerHeap,
 | 
			
		||||
                uint resourceDescriptorIndex,
 | 
			
		||||
                uint samplerDescriptorIndex,
 | 
			
		||||
                float3 texCoord)
 | 
			
		||||
{
 | 
			
		||||
    texture3d<float> texture = textureHeap[resourceDescriptorIndex].tex;
 | 
			
		||||
    sampler sampler = samplerHeap[samplerDescriptorIndex].samp;
 | 
			
		||||
    return texture.sample(sampler, texCoord);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
float4 tfetchCube(constant TextureCubeDescriptorHeap* textureHeap,
 | 
			
		||||
                  constant SamplerDescriptorHeap* samplerHeap,
 | 
			
		||||
                  uint resourceDescriptorIndex,
 | 
			
		||||
                  uint samplerDescriptorIndex,
 | 
			
		||||
                  float3 texCoord, thread CubeMapData* cubeMapData)
 | 
			
		||||
{
 | 
			
		||||
    texturecube<float> texture = textureHeap[resourceDescriptorIndex].tex;
 | 
			
		||||
    sampler sampler = samplerHeap[samplerDescriptorIndex].samp;
 | 
			
		||||
    return texture.sample(sampler, cubeMapData->cubeMapDirections[(uint)texCoord.z]);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
float4 cube(float4 value, thread CubeMapData* cubeMapData)
 | 
			
		||||
{
 | 
			
		||||
    uint index = cubeMapData->cubeMapIndex;
 | 
			
		||||
    cubeMapData->cubeMapDirections[index] = value.xyz;
 | 
			
		||||
    ++cubeMapData->cubeMapIndex;
 | 
			
		||||
 | 
			
		||||
    return float4(0.0, 0.0, 0.0, index);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#else
 | 
			
		||||
 | 
			
		||||
Texture2D<float4> g_Texture2DDescriptorHeap[] : register(t0, space0);
 | 
			
		||||
Texture3D<float4> g_Texture3DDescriptorHeap[] : register(t0, space1);
 | 
			
		||||
TextureCube<float4> g_TextureCubeDescriptorHeap[] : register(t0, space2);
 | 
			
		||||
| 
						 | 
				
			
			@ -71,6 +186,85 @@ float2 getWeights2D(uint resourceDescriptorIndex, uint samplerDescriptorIndex, f
 | 
			
		|||
    return select(isnan(texCoord), 0.0, frac(texCoord * getTexture2DDimensions(texture) + offset - 0.5));
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
float4 tfetch3D(uint resourceDescriptorIndex, uint samplerDescriptorIndex, float3 texCoord)
 | 
			
		||||
{
 | 
			
		||||
    return g_Texture3DDescriptorHeap[resourceDescriptorIndex].Sample(g_SamplerDescriptorHeap[samplerDescriptorIndex], texCoord);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
float4 tfetchCube(uint resourceDescriptorIndex, uint samplerDescriptorIndex, float3 texCoord, inout CubeMapData cubeMapData)
 | 
			
		||||
{
 | 
			
		||||
    return g_TextureCubeDescriptorHeap[resourceDescriptorIndex].Sample(g_SamplerDescriptorHeap[samplerDescriptorIndex], cubeMapData.cubeMapDirections[texCoord.z]);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
float4 cube(float4 value, inout CubeMapData cubeMapData)
 | 
			
		||||
{
 | 
			
		||||
    uint index = cubeMapData.cubeMapIndex;
 | 
			
		||||
    cubeMapData.cubeMapDirections[index] = value.xyz;
 | 
			
		||||
    ++cubeMapData.cubeMapIndex;
 | 
			
		||||
 | 
			
		||||
    return float4(0.0, 0.0, 0.0, index);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
float4 tfetchR11G11B10(uint4 value)
 | 
			
		||||
{
 | 
			
		||||
    if (g_SpecConstants() & SPEC_CONSTANT_R11G11B10_NORMAL)
 | 
			
		||||
    {
 | 
			
		||||
        return float4(
 | 
			
		||||
            (value.x & 0x00000400 ? -1.0 : 0.0) + ((value.x & 0x3FF) / 1024.0),
 | 
			
		||||
            (value.x & 0x00200000 ? -1.0 : 0.0) + (((value.x >> 11) & 0x3FF) / 1024.0),
 | 
			
		||||
            (value.x & 0x80000000 ? -1.0 : 0.0) + (((value.x >> 22) & 0x1FF) / 512.0),
 | 
			
		||||
            0.0);
 | 
			
		||||
    }
 | 
			
		||||
    else
 | 
			
		||||
    {
 | 
			
		||||
#ifdef __air__
 | 
			
		||||
        return as_type<float4>(value);
 | 
			
		||||
#else
 | 
			
		||||
        return asfloat(value);
 | 
			
		||||
#endif
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#ifdef __air__
 | 
			
		||||
#define selectWrapper(a, b, c) select(c, b, a)
 | 
			
		||||
#else
 | 
			
		||||
#define selectWrapper(a, b, c) select(a, b, c)
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#ifdef __air__
 | 
			
		||||
#define frac(X) fract(X)
 | 
			
		||||
 | 
			
		||||
template<typename T>
 | 
			
		||||
void clip(T a)
 | 
			
		||||
{
 | 
			
		||||
    if (a < 0.0) {
 | 
			
		||||
        discard_fragment();
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template<typename T>
 | 
			
		||||
float rcp(T a)
 | 
			
		||||
{
 | 
			
		||||
    return 1.0 / a;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template<typename T>
 | 
			
		||||
float4x4 mul(T a, T b)
 | 
			
		||||
{
 | 
			
		||||
    return b * a;
 | 
			
		||||
}
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#ifdef __air__
 | 
			
		||||
#define UNROLL
 | 
			
		||||
#define BRANCH
 | 
			
		||||
#else
 | 
			
		||||
#define UNROLL [unroll]
 | 
			
		||||
#define BRANCH [branch]
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
float w0(float a)
 | 
			
		||||
{
 | 
			
		||||
    return (1.0f / 6.0f) * (a * (a * (-a + 3.0f) - 3.0f) + 1.0f);
 | 
			
		||||
| 
						 | 
				
			
			@ -111,6 +305,46 @@ float h1(float a)
 | 
			
		|||
    return 1.0f + w3(a) / (w2(a) + w3(a)) + 0.5f;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#ifdef __air__
 | 
			
		||||
 | 
			
		||||
float4 tfetch2DBicubic(constant Texture2DDescriptorHeap* textureHeap,
 | 
			
		||||
                       constant SamplerDescriptorHeap* samplerHeap,
 | 
			
		||||
                       uint resourceDescriptorIndex,
 | 
			
		||||
                       uint samplerDescriptorIndex,
 | 
			
		||||
                       float2 texCoord, float2 offset)
 | 
			
		||||
{
 | 
			
		||||
    texture2d<float> texture = textureHeap[resourceDescriptorIndex].tex;
 | 
			
		||||
    sampler sampler = samplerHeap[samplerDescriptorIndex].samp;
 | 
			
		||||
    uint2 dimensions = getTexture2DDimensions(texture);
 | 
			
		||||
 | 
			
		||||
    float x = texCoord.x * dimensions.x + offset.x;
 | 
			
		||||
    float y = texCoord.y * dimensions.y + offset.y;
 | 
			
		||||
 | 
			
		||||
    x -= 0.5f;
 | 
			
		||||
    y -= 0.5f;
 | 
			
		||||
    float px = floor(x);
 | 
			
		||||
    float py = floor(y);
 | 
			
		||||
    float fx = x - px;
 | 
			
		||||
    float fy = y - py;
 | 
			
		||||
 | 
			
		||||
    float g0x = g0(fx);
 | 
			
		||||
    float g1x = g1(fx);
 | 
			
		||||
    float h0x = h0(fx);
 | 
			
		||||
    float h1x = h1(fx);
 | 
			
		||||
    float h0y = h0(fy);
 | 
			
		||||
    float h1y = h1(fy);
 | 
			
		||||
 | 
			
		||||
    float4 r =
 | 
			
		||||
        g0(fy) * (g0x * texture.sample(sampler, float2(px + h0x, py + h0y) / float2(dimensions)) +
 | 
			
		||||
              g1x * texture.sample(sampler, float2(px + h1x, py + h0y) / float2(dimensions))) +
 | 
			
		||||
        g1(fy) * (g0x * texture.sample(sampler, float2(px + h0x, py + h1y) / float2(dimensions)) +
 | 
			
		||||
              g1x * texture.sample(sampler, float2(px + h1x, py + h1y) / float2(dimensions)));
 | 
			
		||||
 | 
			
		||||
    return r;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#else
 | 
			
		||||
 | 
			
		||||
float4 tfetch2DBicubic(uint resourceDescriptorIndex, uint samplerDescriptorIndex, float2 texCoord, float2 offset)
 | 
			
		||||
{
 | 
			
		||||
    Texture2D<float4> texture = g_Texture2DDescriptorHeap[resourceDescriptorIndex];
 | 
			
		||||
| 
						 | 
				
			
			@ -143,50 +377,11 @@ float4 tfetch2DBicubic(uint resourceDescriptorIndex, uint samplerDescriptorIndex
 | 
			
		|||
    return r;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
float4 tfetch3D(uint resourceDescriptorIndex, uint samplerDescriptorIndex, float3 texCoord)
 | 
			
		||||
{
 | 
			
		||||
    return g_Texture3DDescriptorHeap[resourceDescriptorIndex].Sample(g_SamplerDescriptorHeap[samplerDescriptorIndex], texCoord);
 | 
			
		||||
}
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
struct CubeMapData
 | 
			
		||||
float4 swapFloats(uint swappedFloats, float4 value, uint semanticIndex)
 | 
			
		||||
{
 | 
			
		||||
    float3 cubeMapDirections[2];
 | 
			
		||||
    uint cubeMapIndex;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
float4 tfetchCube(uint resourceDescriptorIndex, uint samplerDescriptorIndex, float3 texCoord, inout CubeMapData cubeMapData)
 | 
			
		||||
{
 | 
			
		||||
    return g_TextureCubeDescriptorHeap[resourceDescriptorIndex].Sample(g_SamplerDescriptorHeap[samplerDescriptorIndex], cubeMapData.cubeMapDirections[texCoord.z]);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
float4 tfetchR11G11B10(uint4 value)
 | 
			
		||||
{
 | 
			
		||||
    if (g_SpecConstants() & SPEC_CONSTANT_R11G11B10_NORMAL)
 | 
			
		||||
    {
 | 
			
		||||
        return float4(
 | 
			
		||||
            (value.x & 0x00000400 ? -1.0 : 0.0) + ((value.x & 0x3FF) / 1024.0),
 | 
			
		||||
            (value.x & 0x00200000 ? -1.0 : 0.0) + (((value.x >> 11) & 0x3FF) / 1024.0),
 | 
			
		||||
            (value.x & 0x80000000 ? -1.0 : 0.0) + (((value.x >> 22) & 0x1FF) / 512.0),
 | 
			
		||||
            0.0);
 | 
			
		||||
    }
 | 
			
		||||
    else
 | 
			
		||||
    {
 | 
			
		||||
        return asfloat(value);
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
float4 tfetchTexcoord(uint swappedTexcoords, float4 value, uint semanticIndex)
 | 
			
		||||
{
 | 
			
		||||
    return (swappedTexcoords & (1ull << semanticIndex)) != 0 ? value.yxwz : value;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
float4 cube(float4 value, inout CubeMapData cubeMapData)
 | 
			
		||||
{
 | 
			
		||||
    uint index = cubeMapData.cubeMapIndex;
 | 
			
		||||
    cubeMapData.cubeMapDirections[index] = value.xyz;
 | 
			
		||||
    ++cubeMapData.cubeMapIndex;
 | 
			
		||||
    
 | 
			
		||||
    return float4(0.0, 0.0, 0.0, index);
 | 
			
		||||
    return (swappedFloats & (1ull << semanticIndex)) != 0 ? value.yxwz : value;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
float4 dst(float4 src0, float4 src1)
 | 
			
		||||
| 
						 | 
				
			
			@ -204,15 +399,34 @@ float4 max4(float4 src0)
 | 
			
		|||
    return max(max(src0.x, src0.y), max(src0.z, src0.w));
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#ifdef __air__
 | 
			
		||||
 | 
			
		||||
float2 getPixelCoord(constant Texture2DDescriptorHeap* textureHeap,
 | 
			
		||||
                     uint resourceDescriptorIndex,
 | 
			
		||||
                     float2 texCoord)
 | 
			
		||||
{
 | 
			
		||||
    texture2d<float> texture = textureHeap[resourceDescriptorIndex].tex;
 | 
			
		||||
    return (float2)getTexture2DDimensions(texture) * texCoord;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#else
 | 
			
		||||
 | 
			
		||||
float2 getPixelCoord(uint resourceDescriptorIndex, float2 texCoord)
 | 
			
		||||
{
 | 
			
		||||
    return getTexture2DDimensions(g_Texture2DDescriptorHeap[resourceDescriptorIndex]) * texCoord;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
float computeMipLevel(float2 pixelCoord)
 | 
			
		||||
{
 | 
			
		||||
#ifdef __air__
 | 
			
		||||
    float2 dx = dfdx(pixelCoord);
 | 
			
		||||
    float2 dy = dfdy(pixelCoord);
 | 
			
		||||
#else
 | 
			
		||||
    float2 dx = ddx(pixelCoord);
 | 
			
		||||
    float2 dy = ddy(pixelCoord);
 | 
			
		||||
#endif
 | 
			
		||||
    float deltaMaxSqr = max(dot(dx, dx), dot(dy, dy));
 | 
			
		||||
    return max(0.0, 0.5 * log2(deltaMaxSqr));
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
										
											
												File diff suppressed because it is too large
												Load diff
											
										
									
								
							| 
						 | 
				
			
			@ -45,7 +45,7 @@ struct ShaderRecompiler : StringBuffer
 | 
			
		|||
            out += '\t';
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    void printDstSwizzle(uint32_t dstSwizzle, bool operand);
 | 
			
		||||
    uint32_t printDstSwizzle(uint32_t dstSwizzle, bool operand);
 | 
			
		||||
    void printDstSwizzle01(uint32_t dstRegister, uint32_t dstSwizzle);
 | 
			
		||||
 | 
			
		||||
    void recompile(const VertexFetchInstruction& instr, uint32_t address);
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
	Add table
		
		Reference in a new issue