mirror of
https://github.com/hedge-dev/XenosRecomp.git
synced 2025-10-30 07:12:17 +00:00
Compare commits
2 commits
99db834f5c
...
8f5afbf877
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
8f5afbf877 | ||
|
|
4906992039 |
9 changed files with 1091 additions and 279 deletions
|
|
@ -4,6 +4,10 @@ if (WIN32)
|
|||
option(XENOS_RECOMP_DXIL "Generate DXIL shader cache" ON)
|
||||
endif()
|
||||
|
||||
if (APPLE)
|
||||
option(XENOS_RECOMP_AIR "Generate Metal AIR shader cache" ON)
|
||||
endif()
|
||||
|
||||
set(SMOLV_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../thirdparty/smol-v/source")
|
||||
|
||||
add_executable(XenosRecomp
|
||||
|
|
@ -30,13 +34,6 @@ target_precompile_headers(XenosRecomp PRIVATE pch.h)
|
|||
|
||||
if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang" OR CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang")
|
||||
target_compile_options(XenosRecomp PRIVATE -Wno-switch -Wno-unused-variable -Wno-null-arithmetic -fms-extensions)
|
||||
|
||||
include(CheckCXXSymbolExists)
|
||||
check_cxx_symbol_exists(_LIBCPP_VERSION version LIBCPP)
|
||||
if(LIBCPP)
|
||||
# Allows using std::execution
|
||||
target_compile_options(XenosRecomp PRIVATE -fexperimental-library)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if (WIN32)
|
||||
|
|
@ -51,3 +48,8 @@ if (XENOS_RECOMP_DXIL)
|
|||
target_compile_definitions(XenosRecomp PRIVATE XENOS_RECOMP_DXIL)
|
||||
target_link_libraries(XenosRecomp PRIVATE Microsoft::DXIL)
|
||||
endif()
|
||||
|
||||
if (XENOS_RECOMP_AIR)
|
||||
target_compile_definitions(XenosRecomp PRIVATE XENOS_RECOMP_AIR)
|
||||
target_sources(XenosRecomp PRIVATE air_compiler.cpp air_compiler.h)
|
||||
endif()
|
||||
|
|
|
|||
81
XenosRecomp/air_compiler.cpp
Normal file
81
XenosRecomp/air_compiler.cpp
Normal file
|
|
@ -0,0 +1,81 @@
|
|||
#include "air_compiler.h"
|
||||
|
||||
#include <fstream>
|
||||
#include <iterator>
|
||||
#include <spawn.h>
|
||||
#include <unistd.h>
|
||||
|
||||
struct TemporaryPath
|
||||
{
|
||||
const std::string path;
|
||||
|
||||
explicit TemporaryPath(std::string_view path) : path(path) {}
|
||||
|
||||
~TemporaryPath()
|
||||
{
|
||||
unlink(path.c_str());
|
||||
}
|
||||
};
|
||||
|
||||
static int executeCommand(const char** argv)
|
||||
{
|
||||
pid_t pid;
|
||||
if (posix_spawn(&pid, argv[0], nullptr, nullptr, const_cast<char**>(argv), nullptr) != 0)
|
||||
return -1;
|
||||
|
||||
int status;
|
||||
if (waitpid(pid, &status, 0) == -1)
|
||||
return -1;
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
std::vector<uint8_t> AirCompiler::compile(const std::string& shaderSource)
|
||||
{
|
||||
// Save source to a location on disk for the compiler to read.
|
||||
char sourcePathTemplate[PATH_MAX] = "/tmp/xenos_metal_XXXXXX.metal";
|
||||
const int sourceFd = mkstemps(sourcePathTemplate, 6);
|
||||
if (sourceFd == -1)
|
||||
{
|
||||
fmt::println("Failed to create temporary file for shader source: {}", strerror(errno));
|
||||
std::exit(1);
|
||||
}
|
||||
|
||||
const TemporaryPath sourcePath(sourcePathTemplate);
|
||||
const TemporaryPath irPath(sourcePath.path + ".ir");
|
||||
const TemporaryPath metalLibPath(sourcePath.path + ".metallib");
|
||||
|
||||
const ssize_t sourceWritten = write(sourceFd, shaderSource.data(), shaderSource.size());
|
||||
close(sourceFd);
|
||||
if (sourceWritten < 0)
|
||||
{
|
||||
fmt::println("Failed to write shader source to disk: {}", strerror(errno));
|
||||
std::exit(1);
|
||||
}
|
||||
|
||||
const char* compileCommand[] = {
|
||||
"/usr/bin/xcrun", "-sdk", "macosx", "metal", "-o", irPath.path.c_str(), "-c", sourcePath.path.c_str(), "-Wno-unused-variable", "-frecord-sources", "-gline-tables-only", "-fmetal-math-mode=relaxed", "-D__air__",
|
||||
#ifdef UNLEASHED_RECOMP
|
||||
"-DUNLEASHED_RECOMP",
|
||||
#endif
|
||||
nullptr
|
||||
};
|
||||
if (const int compileStatus = executeCommand(compileCommand); compileStatus != 0)
|
||||
{
|
||||
fmt::println("Metal compiler exited with status: {}", compileStatus);
|
||||
fmt::println("Generated source:\n{}", shaderSource);
|
||||
std::exit(1);
|
||||
}
|
||||
|
||||
const char* linkCommand[] = { "/usr/bin/xcrun", "-sdk", "macosx", "metallib", "-o", metalLibPath.path.c_str(), irPath.path.c_str(), nullptr };
|
||||
if (const int linkStatus = executeCommand(linkCommand); linkStatus != 0)
|
||||
{
|
||||
fmt::println("Metal linker exited with status: {}", linkStatus);
|
||||
fmt::println("Generated source:\n{}", shaderSource);
|
||||
std::exit(1);
|
||||
}
|
||||
|
||||
std::ifstream libStream(metalLibPath.path, std::ios::binary);
|
||||
std::vector<uint8_t> data((std::istreambuf_iterator(libStream)), std::istreambuf_iterator<char>());
|
||||
return data;
|
||||
}
|
||||
10
XenosRecomp/air_compiler.h
Normal file
10
XenosRecomp/air_compiler.h
Normal file
|
|
@ -0,0 +1,10 @@
|
|||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
class AirCompiler
|
||||
{
|
||||
public:
|
||||
[[nodiscard]] static std::vector<uint8_t> compile(const std::string& shaderSource);
|
||||
};
|
||||
|
|
@ -34,6 +34,11 @@ IDxcBlob* DxcCompiler::compile(const std::string& shaderSource, bool compilePixe
|
|||
target = L"-T vs_6_0";
|
||||
}
|
||||
|
||||
if (!compileLibrary)
|
||||
{
|
||||
args[argCount++] = L"-E shaderMain";
|
||||
}
|
||||
|
||||
args[argCount++] = target;
|
||||
args[argCount++] = L"-HV 2021";
|
||||
args[argCount++] = L"-all-resources-bound";
|
||||
|
|
|
|||
|
|
@ -1,7 +1,15 @@
|
|||
#include <deque>
|
||||
#include <mutex>
|
||||
#include <thread>
|
||||
|
||||
#include "shader.h"
|
||||
#include "shader_recompiler.h"
|
||||
#include "dxc_compiler.h"
|
||||
|
||||
#ifdef XENOS_RECOMP_AIR
|
||||
#include "air_compiler.h"
|
||||
#endif
|
||||
|
||||
static std::unique_ptr<uint8_t[]> readAllBytes(const char* filePath, size_t& fileSize)
|
||||
{
|
||||
FILE* file = fopen(filePath, "rb");
|
||||
|
|
@ -26,9 +34,43 @@ struct RecompiledShader
|
|||
uint8_t* data = nullptr;
|
||||
IDxcBlob* dxil = nullptr;
|
||||
std::vector<uint8_t> spirv;
|
||||
std::vector<uint8_t> air;
|
||||
uint32_t specConstantsMask = 0;
|
||||
};
|
||||
|
||||
void recompileShader(RecompiledShader& shader, const std::string_view include, std::atomic<uint32_t>& progress, uint32_t numShaders)
|
||||
{
|
||||
thread_local ShaderRecompiler recompiler;
|
||||
recompiler = {};
|
||||
recompiler.recompile(shader.data, include);
|
||||
|
||||
shader.specConstantsMask = recompiler.specConstantsMask;
|
||||
|
||||
thread_local DxcCompiler dxcCompiler;
|
||||
|
||||
#ifdef XENOS_RECOMP_DXIL
|
||||
shader.dxil = dxcCompiler.compile(recompiler.out, recompiler.isPixelShader, recompiler.specConstantsMask != 0, false);
|
||||
assert(shader.dxil != nullptr);
|
||||
assert(*(reinterpret_cast<uint32_t *>(shader.dxil->GetBufferPointer()) + 1) != 0 && "DXIL was not signed properly!");
|
||||
#endif
|
||||
|
||||
#ifdef XENOS_RECOMP_AIR
|
||||
shader.air = AirCompiler::compile(recompiler.out);
|
||||
#endif
|
||||
|
||||
IDxcBlob* spirv = dxcCompiler.compile(recompiler.out, recompiler.isPixelShader, false, true);
|
||||
assert(spirv != nullptr);
|
||||
|
||||
bool result = smolv::Encode(spirv->GetBufferPointer(), spirv->GetBufferSize(), shader.spirv, smolv::kEncodeFlagStripDebugInfo);
|
||||
assert(result);
|
||||
|
||||
spirv->Release();
|
||||
|
||||
size_t currentProgress = ++progress;
|
||||
if ((currentProgress % 10) == 0 || (currentProgress == numShaders - 1))
|
||||
fmt::println("Recompiling shaders... {}%", currentProgress / float(numShaders) * 100.0f);
|
||||
}
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
#ifndef XENOS_RECOMP_INPUT
|
||||
|
|
@ -71,6 +113,7 @@ int main(int argc, char** argv)
|
|||
{
|
||||
std::vector<std::unique_ptr<uint8_t[]>> files;
|
||||
std::map<XXH64_hash_t, RecompiledShader> shaders;
|
||||
std::map<XXH64_hash_t, std::string> shaderFilenames;
|
||||
|
||||
for (auto& file : std::filesystem::recursive_directory_iterator(input))
|
||||
{
|
||||
|
|
@ -99,6 +142,7 @@ int main(int argc, char** argv)
|
|||
{
|
||||
shader.first->second.data = fileData.get() + i;
|
||||
foundAny = true;
|
||||
shaderFilenames[hash] = file.path().string();
|
||||
}
|
||||
|
||||
i += dataSize;
|
||||
|
|
@ -113,38 +157,42 @@ int main(int argc, char** argv)
|
|||
files.emplace_back(std::move(fileData));
|
||||
}
|
||||
|
||||
std::atomic<uint32_t> progress = 0;
|
||||
|
||||
std::for_each(std::execution::par_unseq, shaders.begin(), shaders.end(), [&](auto& hashShaderPair)
|
||||
std::mutex shaderQueueMutex;
|
||||
std::deque<XXH64_hash_t> shaderQueue;
|
||||
for (const auto& [hash, _] : shaders)
|
||||
{
|
||||
auto& shader = hashShaderPair.second;
|
||||
shaderQueue.emplace_back(hash);
|
||||
}
|
||||
|
||||
thread_local ShaderRecompiler recompiler;
|
||||
recompiler = {};
|
||||
recompiler.recompile(shader.data, include);
|
||||
const uint32_t numThreads = std::max(std::thread::hardware_concurrency(), 1u);
|
||||
fmt::println("Recompiling shaders with {} threads", numThreads);
|
||||
|
||||
shader.specConstantsMask = recompiler.specConstantsMask;
|
||||
|
||||
thread_local DxcCompiler dxcCompiler;
|
||||
|
||||
#ifdef XENOS_RECOMP_DXIL
|
||||
shader.dxil = dxcCompiler.compile(recompiler.out, recompiler.isPixelShader, recompiler.specConstantsMask != 0, false);
|
||||
assert(shader.dxil != nullptr);
|
||||
assert(*(reinterpret_cast<uint32_t *>(shader.dxil->GetBufferPointer()) + 1) != 0 && "DXIL was not signed properly!");
|
||||
#endif
|
||||
|
||||
IDxcBlob* spirv = dxcCompiler.compile(recompiler.out, recompiler.isPixelShader, false, true);
|
||||
assert(spirv != nullptr);
|
||||
|
||||
bool result = smolv::Encode(spirv->GetBufferPointer(), spirv->GetBufferSize(), shader.spirv, smolv::kEncodeFlagStripDebugInfo);
|
||||
assert(result);
|
||||
|
||||
spirv->Release();
|
||||
|
||||
size_t currentProgress = ++progress;
|
||||
if ((currentProgress % 10) == 0 || (currentProgress == shaders.size() - 1))
|
||||
fmt::println("Recompiling shaders... {}%", currentProgress / float(shaders.size()) * 100.0f);
|
||||
std::atomic<uint32_t> progress = 0;
|
||||
std::vector<std::thread> threads;
|
||||
threads.reserve(numThreads);
|
||||
for (uint32_t i = 0; i < numThreads; i++)
|
||||
{
|
||||
threads.emplace_back([&]
|
||||
{
|
||||
while (true)
|
||||
{
|
||||
XXH64_hash_t shaderHash;
|
||||
{
|
||||
std::lock_guard lock(shaderQueueMutex);
|
||||
if (shaderQueue.empty()) {
|
||||
return;
|
||||
}
|
||||
shaderHash = shaderQueue.front();
|
||||
shaderQueue.pop_front();
|
||||
}
|
||||
recompileShader(shaders[shaderHash], include, progress, shaders.size());
|
||||
}
|
||||
});
|
||||
}
|
||||
for (auto& thread : threads)
|
||||
{
|
||||
thread.join();
|
||||
}
|
||||
|
||||
fmt::println("Creating shader cache...");
|
||||
|
||||
|
|
@ -154,11 +202,21 @@ int main(int argc, char** argv)
|
|||
|
||||
std::vector<uint8_t> dxil;
|
||||
std::vector<uint8_t> spirv;
|
||||
std::vector<uint8_t> air;
|
||||
|
||||
for (auto& [hash, shader] : shaders)
|
||||
{
|
||||
f.println("\t{{ 0x{:X}, {}, {}, {}, {}, {} }},",
|
||||
hash, dxil.size(), (shader.dxil != nullptr) ? shader.dxil->GetBufferSize() : 0, spirv.size(), shader.spirv.size(), shader.specConstantsMask);
|
||||
const std::string& fullFilename = shaderFilenames[hash];
|
||||
std::string filename = fullFilename;
|
||||
size_t shaderPos = filename.find("shader");
|
||||
if (shaderPos != std::string::npos) {
|
||||
filename = filename.substr(shaderPos);
|
||||
// Prevent bad escape sequences in Windows shader path.
|
||||
std::replace(filename.begin(), filename.end(), '\\', '/');
|
||||
}
|
||||
f.println("\t{{ 0x{:X}, {}, {}, {}, {}, {}, {}, {}, \"{}\" }},",
|
||||
hash, dxil.size(), (shader.dxil != nullptr) ? shader.dxil->GetBufferSize() : 0,
|
||||
spirv.size(), shader.spirv.size(), air.size(), shader.air.size(), shader.specConstantsMask, filename);
|
||||
|
||||
if (shader.dxil != nullptr)
|
||||
{
|
||||
|
|
@ -166,6 +224,10 @@ int main(int argc, char** argv)
|
|||
reinterpret_cast<uint8_t *>(shader.dxil->GetBufferPointer()) + shader.dxil->GetBufferSize());
|
||||
}
|
||||
|
||||
#ifdef XENOS_RECOMP_AIR
|
||||
air.insert(air.end(), shader.air.begin(), shader.air.end());
|
||||
#endif
|
||||
|
||||
spirv.insert(spirv.end(), shader.spirv.begin(), shader.spirv.end());
|
||||
}
|
||||
|
||||
|
|
@ -189,6 +251,22 @@ int main(int argc, char** argv)
|
|||
f.println("const size_t g_dxilCacheDecompressedSize = {};", dxil.size());
|
||||
#endif
|
||||
|
||||
#ifdef XENOS_RECOMP_AIR
|
||||
fmt::println("Compressing AIR cache...");
|
||||
|
||||
std::vector<uint8_t> airCompressed(ZSTD_compressBound(air.size()));
|
||||
airCompressed.resize(ZSTD_compress(airCompressed.data(), airCompressed.size(), air.data(), air.size(), level));
|
||||
|
||||
f.print("const uint8_t g_compressedAirCache[] = {{");
|
||||
|
||||
for (auto data : airCompressed)
|
||||
f.print("{},", data);
|
||||
|
||||
f.println("}};");
|
||||
f.println("const size_t g_airCacheCompressedSize = {};", airCompressed.size());
|
||||
f.println("const size_t g_airCacheDecompressedSize = {};", air.size());
|
||||
#endif
|
||||
|
||||
fmt::println("Compressing SPIRV cache...");
|
||||
|
||||
std::vector<uint8_t> spirvCompressed(ZSTD_compressBound(spirv.size()));
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
#pragma once
|
||||
|
||||
#ifdef _WIN32
|
||||
#define NOMINMAX
|
||||
#include <Windows.h>
|
||||
#endif
|
||||
|
||||
|
|
|
|||
|
|
@ -10,10 +10,12 @@
|
|||
#define SPEC_CONSTANT_REVERSE_Z (1 << 4)
|
||||
#endif
|
||||
|
||||
#if !defined(__cplusplus) || defined(__INTELLISENSE__)
|
||||
#if defined(__air__) || !defined(__cplusplus) || defined(__INTELLISENSE__)
|
||||
|
||||
#ifndef __air__
|
||||
#define FLT_MIN asfloat(0xff7fffff)
|
||||
#define FLT_MAX asfloat(0x7f7fffff)
|
||||
#endif
|
||||
|
||||
#ifdef __spirv__
|
||||
|
||||
|
|
@ -35,6 +37,32 @@ struct PushConstants
|
|||
|
||||
#define g_SpecConstants() g_SpecConstants
|
||||
|
||||
#elif defined(__air__)
|
||||
|
||||
#include <metal_stdlib>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
constant uint G_SPEC_CONSTANTS [[function_constant(0)]];
|
||||
constant uint G_SPEC_CONSTANTS_VAL = is_function_constant_defined(G_SPEC_CONSTANTS) ? G_SPEC_CONSTANTS : 0;
|
||||
|
||||
uint g_SpecConstants()
|
||||
{
|
||||
return G_SPEC_CONSTANTS_VAL;
|
||||
}
|
||||
|
||||
struct PushConstants
|
||||
{
|
||||
ulong VertexShaderConstants;
|
||||
ulong PixelShaderConstants;
|
||||
ulong SharedConstants;
|
||||
};
|
||||
|
||||
#define g_Booleans (*(reinterpret_cast<device uint*>(g_PushConstants.SharedConstants + 256)))
|
||||
#define g_SwappedTexcoords (*(reinterpret_cast<device uint*>(g_PushConstants.SharedConstants + 260)))
|
||||
#define g_HalfPixelOffset (*(reinterpret_cast<device float2*>(g_PushConstants.SharedConstants + 264)))
|
||||
#define g_AlphaThreshold (*(reinterpret_cast<device float*>(g_PushConstants.SharedConstants + 272)))
|
||||
|
||||
#else
|
||||
|
||||
#define DEFINE_SHARED_CONSTANTS() \
|
||||
|
|
@ -47,6 +75,93 @@ uint g_SpecConstants();
|
|||
|
||||
#endif
|
||||
|
||||
struct CubeMapData
|
||||
{
|
||||
float3 cubeMapDirections[2];
|
||||
uint cubeMapIndex;
|
||||
};
|
||||
|
||||
#ifdef __air__
|
||||
|
||||
struct Texture2DDescriptorHeap
|
||||
{
|
||||
texture2d<float> tex;
|
||||
};
|
||||
|
||||
struct Texture3DDescriptorHeap
|
||||
{
|
||||
texture3d<float> tex;
|
||||
};
|
||||
|
||||
struct TextureCubeDescriptorHeap
|
||||
{
|
||||
texturecube<float> tex;
|
||||
};
|
||||
|
||||
struct SamplerDescriptorHeap
|
||||
{
|
||||
sampler samp;
|
||||
};
|
||||
|
||||
uint2 getTexture2DDimensions(texture2d<float> texture)
|
||||
{
|
||||
return uint2(texture.get_width(), texture.get_height());
|
||||
}
|
||||
|
||||
float4 tfetch2D(constant Texture2DDescriptorHeap* textureHeap,
|
||||
constant SamplerDescriptorHeap* samplerHeap,
|
||||
uint resourceDescriptorIndex,
|
||||
uint samplerDescriptorIndex,
|
||||
float2 texCoord, float2 offset)
|
||||
{
|
||||
texture2d<float> texture = textureHeap[resourceDescriptorIndex].tex;
|
||||
sampler sampler = samplerHeap[samplerDescriptorIndex].samp;
|
||||
return texture.sample(sampler, texCoord + offset / (float2)getTexture2DDimensions(texture));
|
||||
}
|
||||
|
||||
float2 getWeights2D(constant Texture2DDescriptorHeap* textureHeap,
|
||||
constant SamplerDescriptorHeap* samplerHeap,
|
||||
uint resourceDescriptorIndex,
|
||||
uint samplerDescriptorIndex,
|
||||
float2 texCoord, float2 offset)
|
||||
{
|
||||
texture2d<float> texture = textureHeap[resourceDescriptorIndex].tex;
|
||||
return select(fract(texCoord * float2(getTexture2DDimensions(texture)) + offset - 0.5), 0.0, isnan(texCoord));
|
||||
}
|
||||
|
||||
float4 tfetch3D(constant Texture3DDescriptorHeap* textureHeap,
|
||||
constant SamplerDescriptorHeap* samplerHeap,
|
||||
uint resourceDescriptorIndex,
|
||||
uint samplerDescriptorIndex,
|
||||
float3 texCoord)
|
||||
{
|
||||
texture3d<float> texture = textureHeap[resourceDescriptorIndex].tex;
|
||||
sampler sampler = samplerHeap[samplerDescriptorIndex].samp;
|
||||
return texture.sample(sampler, texCoord);
|
||||
}
|
||||
|
||||
float4 tfetchCube(constant TextureCubeDescriptorHeap* textureHeap,
|
||||
constant SamplerDescriptorHeap* samplerHeap,
|
||||
uint resourceDescriptorIndex,
|
||||
uint samplerDescriptorIndex,
|
||||
float3 texCoord, thread CubeMapData* cubeMapData)
|
||||
{
|
||||
texturecube<float> texture = textureHeap[resourceDescriptorIndex].tex;
|
||||
sampler sampler = samplerHeap[samplerDescriptorIndex].samp;
|
||||
return texture.sample(sampler, cubeMapData->cubeMapDirections[(uint)texCoord.z]);
|
||||
}
|
||||
|
||||
float4 cube(float4 value, thread CubeMapData* cubeMapData)
|
||||
{
|
||||
uint index = cubeMapData->cubeMapIndex;
|
||||
cubeMapData->cubeMapDirections[index] = value.xyz;
|
||||
++cubeMapData->cubeMapIndex;
|
||||
|
||||
return float4(0.0, 0.0, 0.0, index);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
Texture2D<float4> g_Texture2DDescriptorHeap[] : register(t0, space0);
|
||||
Texture3D<float4> g_Texture3DDescriptorHeap[] : register(t0, space1);
|
||||
TextureCube<float4> g_TextureCubeDescriptorHeap[] : register(t0, space2);
|
||||
|
|
@ -71,6 +186,85 @@ float2 getWeights2D(uint resourceDescriptorIndex, uint samplerDescriptorIndex, f
|
|||
return select(isnan(texCoord), 0.0, frac(texCoord * getTexture2DDimensions(texture) + offset - 0.5));
|
||||
}
|
||||
|
||||
float4 tfetch3D(uint resourceDescriptorIndex, uint samplerDescriptorIndex, float3 texCoord)
|
||||
{
|
||||
return g_Texture3DDescriptorHeap[resourceDescriptorIndex].Sample(g_SamplerDescriptorHeap[samplerDescriptorIndex], texCoord);
|
||||
}
|
||||
|
||||
float4 tfetchCube(uint resourceDescriptorIndex, uint samplerDescriptorIndex, float3 texCoord, inout CubeMapData cubeMapData)
|
||||
{
|
||||
return g_TextureCubeDescriptorHeap[resourceDescriptorIndex].Sample(g_SamplerDescriptorHeap[samplerDescriptorIndex], cubeMapData.cubeMapDirections[texCoord.z]);
|
||||
}
|
||||
|
||||
float4 cube(float4 value, inout CubeMapData cubeMapData)
|
||||
{
|
||||
uint index = cubeMapData.cubeMapIndex;
|
||||
cubeMapData.cubeMapDirections[index] = value.xyz;
|
||||
++cubeMapData.cubeMapIndex;
|
||||
|
||||
return float4(0.0, 0.0, 0.0, index);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
float4 tfetchR11G11B10(uint4 value)
|
||||
{
|
||||
if (g_SpecConstants() & SPEC_CONSTANT_R11G11B10_NORMAL)
|
||||
{
|
||||
return float4(
|
||||
(value.x & 0x00000400 ? -1.0 : 0.0) + ((value.x & 0x3FF) / 1024.0),
|
||||
(value.x & 0x00200000 ? -1.0 : 0.0) + (((value.x >> 11) & 0x3FF) / 1024.0),
|
||||
(value.x & 0x80000000 ? -1.0 : 0.0) + (((value.x >> 22) & 0x1FF) / 512.0),
|
||||
0.0);
|
||||
}
|
||||
else
|
||||
{
|
||||
#ifdef __air__
|
||||
return as_type<float4>(value);
|
||||
#else
|
||||
return asfloat(value);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef __air__
|
||||
#define selectWrapper(a, b, c) select(c, b, a)
|
||||
#else
|
||||
#define selectWrapper(a, b, c) select(a, b, c)
|
||||
#endif
|
||||
|
||||
#ifdef __air__
|
||||
#define frac(X) fract(X)
|
||||
|
||||
template<typename T>
|
||||
void clip(T a)
|
||||
{
|
||||
if (a < 0.0) {
|
||||
discard_fragment();
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
float rcp(T a)
|
||||
{
|
||||
return 1.0 / a;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
float4x4 mul(T a, T b)
|
||||
{
|
||||
return b * a;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef __air__
|
||||
#define UNROLL
|
||||
#define BRANCH
|
||||
#else
|
||||
#define UNROLL [unroll]
|
||||
#define BRANCH [branch]
|
||||
#endif
|
||||
|
||||
float w0(float a)
|
||||
{
|
||||
return (1.0f / 6.0f) * (a * (a * (-a + 3.0f) - 3.0f) + 1.0f);
|
||||
|
|
@ -111,6 +305,46 @@ float h1(float a)
|
|||
return 1.0f + w3(a) / (w2(a) + w3(a)) + 0.5f;
|
||||
}
|
||||
|
||||
#ifdef __air__
|
||||
|
||||
float4 tfetch2DBicubic(constant Texture2DDescriptorHeap* textureHeap,
|
||||
constant SamplerDescriptorHeap* samplerHeap,
|
||||
uint resourceDescriptorIndex,
|
||||
uint samplerDescriptorIndex,
|
||||
float2 texCoord, float2 offset)
|
||||
{
|
||||
texture2d<float> texture = textureHeap[resourceDescriptorIndex].tex;
|
||||
sampler sampler = samplerHeap[samplerDescriptorIndex].samp;
|
||||
uint2 dimensions = getTexture2DDimensions(texture);
|
||||
|
||||
float x = texCoord.x * dimensions.x + offset.x;
|
||||
float y = texCoord.y * dimensions.y + offset.y;
|
||||
|
||||
x -= 0.5f;
|
||||
y -= 0.5f;
|
||||
float px = floor(x);
|
||||
float py = floor(y);
|
||||
float fx = x - px;
|
||||
float fy = y - py;
|
||||
|
||||
float g0x = g0(fx);
|
||||
float g1x = g1(fx);
|
||||
float h0x = h0(fx);
|
||||
float h1x = h1(fx);
|
||||
float h0y = h0(fy);
|
||||
float h1y = h1(fy);
|
||||
|
||||
float4 r =
|
||||
g0(fy) * (g0x * texture.sample(sampler, float2(px + h0x, py + h0y) / float2(dimensions)) +
|
||||
g1x * texture.sample(sampler, float2(px + h1x, py + h0y) / float2(dimensions))) +
|
||||
g1(fy) * (g0x * texture.sample(sampler, float2(px + h0x, py + h1y) / float2(dimensions)) +
|
||||
g1x * texture.sample(sampler, float2(px + h1x, py + h1y) / float2(dimensions)));
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
float4 tfetch2DBicubic(uint resourceDescriptorIndex, uint samplerDescriptorIndex, float2 texCoord, float2 offset)
|
||||
{
|
||||
Texture2D<float4> texture = g_Texture2DDescriptorHeap[resourceDescriptorIndex];
|
||||
|
|
@ -143,50 +377,11 @@ float4 tfetch2DBicubic(uint resourceDescriptorIndex, uint samplerDescriptorIndex
|
|||
return r;
|
||||
}
|
||||
|
||||
float4 tfetch3D(uint resourceDescriptorIndex, uint samplerDescriptorIndex, float3 texCoord)
|
||||
{
|
||||
return g_Texture3DDescriptorHeap[resourceDescriptorIndex].Sample(g_SamplerDescriptorHeap[samplerDescriptorIndex], texCoord);
|
||||
}
|
||||
#endif
|
||||
|
||||
struct CubeMapData
|
||||
float4 swapFloats(uint swappedFloats, float4 value, uint semanticIndex)
|
||||
{
|
||||
float3 cubeMapDirections[2];
|
||||
uint cubeMapIndex;
|
||||
};
|
||||
|
||||
float4 tfetchCube(uint resourceDescriptorIndex, uint samplerDescriptorIndex, float3 texCoord, inout CubeMapData cubeMapData)
|
||||
{
|
||||
return g_TextureCubeDescriptorHeap[resourceDescriptorIndex].Sample(g_SamplerDescriptorHeap[samplerDescriptorIndex], cubeMapData.cubeMapDirections[texCoord.z]);
|
||||
}
|
||||
|
||||
float4 tfetchR11G11B10(uint4 value)
|
||||
{
|
||||
if (g_SpecConstants() & SPEC_CONSTANT_R11G11B10_NORMAL)
|
||||
{
|
||||
return float4(
|
||||
(value.x & 0x00000400 ? -1.0 : 0.0) + ((value.x & 0x3FF) / 1024.0),
|
||||
(value.x & 0x00200000 ? -1.0 : 0.0) + (((value.x >> 11) & 0x3FF) / 1024.0),
|
||||
(value.x & 0x80000000 ? -1.0 : 0.0) + (((value.x >> 22) & 0x1FF) / 512.0),
|
||||
0.0);
|
||||
}
|
||||
else
|
||||
{
|
||||
return asfloat(value);
|
||||
}
|
||||
}
|
||||
|
||||
float4 tfetchTexcoord(uint swappedTexcoords, float4 value, uint semanticIndex)
|
||||
{
|
||||
return (swappedTexcoords & (1ull << semanticIndex)) != 0 ? value.yxwz : value;
|
||||
}
|
||||
|
||||
float4 cube(float4 value, inout CubeMapData cubeMapData)
|
||||
{
|
||||
uint index = cubeMapData.cubeMapIndex;
|
||||
cubeMapData.cubeMapDirections[index] = value.xyz;
|
||||
++cubeMapData.cubeMapIndex;
|
||||
|
||||
return float4(0.0, 0.0, 0.0, index);
|
||||
return (swappedFloats & (1ull << semanticIndex)) != 0 ? value.yxwz : value;
|
||||
}
|
||||
|
||||
float4 dst(float4 src0, float4 src1)
|
||||
|
|
@ -204,15 +399,34 @@ float4 max4(float4 src0)
|
|||
return max(max(src0.x, src0.y), max(src0.z, src0.w));
|
||||
}
|
||||
|
||||
#ifdef __air__
|
||||
|
||||
float2 getPixelCoord(constant Texture2DDescriptorHeap* textureHeap,
|
||||
uint resourceDescriptorIndex,
|
||||
float2 texCoord)
|
||||
{
|
||||
texture2d<float> texture = textureHeap[resourceDescriptorIndex].tex;
|
||||
return (float2)getTexture2DDimensions(texture) * texCoord;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
float2 getPixelCoord(uint resourceDescriptorIndex, float2 texCoord)
|
||||
{
|
||||
return getTexture2DDimensions(g_Texture2DDescriptorHeap[resourceDescriptorIndex]) * texCoord;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
float computeMipLevel(float2 pixelCoord)
|
||||
{
|
||||
#ifdef __air__
|
||||
float2 dx = dfdx(pixelCoord);
|
||||
float2 dy = dfdy(pixelCoord);
|
||||
#else
|
||||
float2 dx = ddx(pixelCoord);
|
||||
float2 dy = ddy(pixelCoord);
|
||||
#endif
|
||||
float deltaMaxSqr = max(dot(dx, dx), dot(dy, dy));
|
||||
return max(0.0, 0.5 * log2(deltaMaxSqr));
|
||||
}
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -45,7 +45,7 @@ struct ShaderRecompiler : StringBuffer
|
|||
out += '\t';
|
||||
}
|
||||
|
||||
void printDstSwizzle(uint32_t dstSwizzle, bool operand);
|
||||
uint32_t printDstSwizzle(uint32_t dstSwizzle, bool operand);
|
||||
void printDstSwizzle01(uint32_t dstRegister, uint32_t dstSwizzle);
|
||||
|
||||
void recompile(const VertexFetchInstruction& instr, uint32_t address);
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue