D3D9 shader generation.

This commit is contained in:
Skyth 2025-07-24 18:26:58 +03:00
parent 421e3b3e79
commit be84413dbd
10 changed files with 140 additions and 971 deletions

View file

@ -1,31 +1,20 @@
project(XenosRecomp) project(XenosRecomp)
if (WIN32)
option(XENOS_RECOMP_DXIL "Generate DXIL shader cache" ON)
endif()
set(SMOLV_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../thirdparty/smol-v/source")
add_executable(XenosRecomp add_executable(XenosRecomp
constant_table.h constant_table.h
dxc_compiler.cpp fxc_compiler.cpp
dxc_compiler.h fxc_compiler.h
main.cpp main.cpp
pch.h pch.h
shader.h shader.h
shader_code.h shader_code.h
shader_recompiler.cpp shader_recompiler.cpp
shader_recompiler.h shader_recompiler.h)
"${SMOLV_SOURCE_DIR}/smolv.cpp")
target_link_libraries(XenosRecomp PRIVATE target_link_libraries(XenosRecomp PRIVATE
Microsoft::DirectXShaderCompiler d3dcompiler.lib
xxHash::xxhash
libzstd_static
fmt::fmt) fmt::fmt)
target_include_directories(XenosRecomp PRIVATE ${SMOLV_SOURCE_DIR})
target_precompile_headers(XenosRecomp PRIVATE pch.h) target_precompile_headers(XenosRecomp PRIVATE pch.h)
if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang" OR CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang") if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang" OR CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang")
@ -41,13 +30,4 @@ endif()
if (WIN32) if (WIN32)
target_compile_definitions(XenosRecomp PRIVATE _CRT_SECURE_NO_WARNINGS) target_compile_definitions(XenosRecomp PRIVATE _CRT_SECURE_NO_WARNINGS)
add_custom_command(TARGET XenosRecomp POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy $<TARGET_RUNTIME_DLLS:XenosRecomp> $<TARGET_FILE_DIR:XenosRecomp>
COMMAND_EXPAND_LISTS
)
endif()
if (XENOS_RECOMP_DXIL)
target_compile_definitions(XenosRecomp PRIVATE XENOS_RECOMP_DXIL)
target_link_libraries(XenosRecomp PRIVATE Microsoft::DXIL)
endif() endif()

View file

@ -1,100 +0,0 @@
#include "dxc_compiler.h"
DxcCompiler::DxcCompiler()
{
HRESULT hr = DxcCreateInstance(CLSID_DxcCompiler, IID_PPV_ARGS(&dxcCompiler));
assert(SUCCEEDED(hr));
}
DxcCompiler::~DxcCompiler()
{
dxcCompiler->Release();
}
IDxcBlob* DxcCompiler::compile(const std::string& shaderSource, bool compilePixelShader, bool compileLibrary, bool compileSpirv)
{
DxcBuffer source{};
source.Ptr = shaderSource.c_str();
source.Size = shaderSource.size();
const wchar_t* args[32]{};
uint32_t argCount = 0;
const wchar_t* target = nullptr;
if (compileLibrary)
{
assert(!compileSpirv);
target = L"-T lib_6_3";
}
else
{
if (compilePixelShader)
target = L"-T ps_6_0";
else
target = L"-T vs_6_0";
}
args[argCount++] = target;
args[argCount++] = L"-HV 2021";
args[argCount++] = L"-all-resources-bound";
if (compileSpirv)
{
args[argCount++] = L"-spirv";
args[argCount++] = L"-fvk-use-dx-layout";
if (!compilePixelShader)
args[argCount++] = L"-fvk-invert-y";
}
else
{
args[argCount++] = L"-Wno-ignored-attributes";
args[argCount++] = L"-Qstrip_reflect";
}
args[argCount++] = L"-Qstrip_debug";
#ifdef UNLEASHED_RECOMP
args[argCount++] = L"-DUNLEASHED_RECOMP";
#endif
IDxcResult* result = nullptr;
HRESULT hr = dxcCompiler->Compile(&source, args, argCount, nullptr, IID_PPV_ARGS(&result));
IDxcBlob* object = nullptr;
if (SUCCEEDED(hr))
{
assert(result != nullptr);
HRESULT status;
hr = result->GetStatus(&status);
assert(SUCCEEDED(hr));
if (FAILED(status))
{
if (result->HasOutput(DXC_OUT_ERRORS))
{
IDxcBlobUtf8* errors = nullptr;
hr = result->GetOutput(DXC_OUT_ERRORS, IID_PPV_ARGS(&errors), nullptr);
assert(SUCCEEDED(hr) && errors != nullptr);
fputs(errors->GetStringPointer(), stderr);
errors->Release();
}
}
else
{
hr = result->GetOutput(DXC_OUT_OBJECT, IID_PPV_ARGS(&object), nullptr);
assert(SUCCEEDED(hr) && object != nullptr);
}
result->Release();
}
else
{
assert(result == nullptr);
}
return object;
}

View file

@ -1,11 +0,0 @@
#pragma once
struct DxcCompiler
{
IDxcCompiler3* dxcCompiler = nullptr;
DxcCompiler();
~DxcCompiler();
IDxcBlob* compile(const std::string& shaderSource, bool compilePixelShader, bool compileLibrary, bool compileSpirv);
};

View file

@ -0,0 +1,28 @@
#include "fxc_compiler.h"
ID3DBlob* FxcCompiler::compile(const std::string& shaderSource, bool compilePixelShader)
{
ID3DBlob* code = nullptr;
ID3DBlob* errorMsgs = nullptr;
HRESULT result = D3DCompile(
shaderSource.data(),
shaderSource.size(),
nullptr,
nullptr,
nullptr,
"main",
compilePixelShader ? "ps_3_0" : "vs_3_0",
0,
0,
&code,
&errorMsgs);
if (FAILED(result) && errorMsgs != nullptr)
fputs(reinterpret_cast<const char*>(errorMsgs->GetBufferPointer()), stderr);
if (errorMsgs != nullptr)
errorMsgs->Release();
return code;
}

View file

@ -0,0 +1,6 @@
#pragma once
struct FxcCompiler
{
static ID3DBlob* compile(const std::string& shaderSource, bool compilePixelShader);
};

View file

@ -1,6 +1,6 @@
#include "shader.h" #include "shader.h"
#include "shader_recompiler.h" #include "shader_recompiler.h"
#include "dxc_compiler.h" #include "fxc_compiler.h"
static std::unique_ptr<uint8_t[]> readAllBytes(const char* filePath, size_t& fileSize) static std::unique_ptr<uint8_t[]> readAllBytes(const char* filePath, size_t& fileSize)
{ {
@ -21,198 +21,37 @@ static void writeAllBytes(const char* filePath, const void* data, size_t dataSiz
fclose(file); fclose(file);
} }
struct RecompiledShader
{
uint8_t* data = nullptr;
IDxcBlob* dxil = nullptr;
std::vector<uint8_t> spirv;
uint32_t specConstantsMask = 0;
};
int main(int argc, char** argv) int main(int argc, char** argv)
{ {
#ifndef XENOS_RECOMP_INPUT std::filesystem::path input(argv[1]);
if (argc < 4) std::filesystem::path output(argv[2]);
{ const char* includeInput = argv[3];
printf("Usage: XenosRecomp [input path] [output path] [shader common header file path]");
return 0;
}
#endif
const char* input =
#ifdef XENOS_RECOMP_INPUT
XENOS_RECOMP_INPUT
#else
argv[1]
#endif
;
const char* output =
#ifdef XENOS_RECOMP_OUTPUT
XENOS_RECOMP_OUTPUT
#else
argv[2]
#endif
;
const char* includeInput =
#ifdef XENOS_RECOMP_INCLUDE_INPUT
XENOS_RECOMP_INCLUDE_INPUT
#else
argv[3]
#endif
;
size_t includeSize = 0; size_t includeSize = 0;
auto includeData = readAllBytes(includeInput, includeSize); auto includeData = readAllBytes(includeInput, includeSize);
std::string_view include(reinterpret_cast<const char*>(includeData.get()), includeSize); std::string_view include(reinterpret_cast<const char*>(includeData.get()), includeSize);
if (std::filesystem::is_directory(input)) for (auto& inputFile : std::filesystem::directory_iterator(input))
{ {
std::vector<std::unique_ptr<uint8_t[]>> files; if ((inputFile.path().extension() == ".xvu") || (inputFile.path().extension() == ".xpu"))
std::map<XXH64_hash_t, RecompiledShader> shaders;
for (auto& file : std::filesystem::recursive_directory_iterator(input))
{ {
if (std::filesystem::is_directory(file)) fmt::println("{}", inputFile.path().string());
auto outputFile = (output / inputFile.path().filename()).string();
outputFile[outputFile.size() - 3] = 'w';
ShaderRecompiler recompiler;
size_t fileSize;
recompiler.recompile(readAllBytes(inputFile.path().string().c_str(), fileSize).get(), include);
ID3DBlob* blob = FxcCompiler::compile(recompiler.out, recompiler.isPixelShader);
if (blob != nullptr)
{ {
continue; writeAllBytes(outputFile.c_str(), blob->GetBufferPointer(), blob->GetBufferSize());
blob->Release();
} }
size_t fileSize = 0; //writeAllBytes((outputFile + ".hlsl").c_str(), recompiler.out.data(), recompiler.out.size());
auto fileData = readAllBytes(file.path().string().c_str(), fileSize);
bool foundAny = false;
for (size_t i = 0; fileSize > sizeof(ShaderContainer) && i < fileSize - sizeof(ShaderContainer) - 1;)
{
auto shaderContainer = reinterpret_cast<const ShaderContainer*>(fileData.get() + i);
size_t dataSize = shaderContainer->virtualSize + shaderContainer->physicalSize;
if ((shaderContainer->flags & 0xFFFFFF00) == 0x102A1100 &&
dataSize <= (fileSize - i) &&
shaderContainer->field1C == 0 &&
shaderContainer->field20 == 0)
{
XXH64_hash_t hash = XXH3_64bits(shaderContainer, dataSize);
auto shader = shaders.try_emplace(hash);
if (shader.second)
{
shader.first->second.data = fileData.get() + i;
foundAny = true;
}
i += dataSize;
}
else
{
i += sizeof(uint32_t);
}
}
if (foundAny)
files.emplace_back(std::move(fileData));
} }
std::atomic<uint32_t> progress = 0;
std::for_each(std::execution::par_unseq, shaders.begin(), shaders.end(), [&](auto& hashShaderPair)
{
auto& shader = hashShaderPair.second;
thread_local ShaderRecompiler recompiler;
recompiler = {};
recompiler.recompile(shader.data, include);
shader.specConstantsMask = recompiler.specConstantsMask;
thread_local DxcCompiler dxcCompiler;
#ifdef XENOS_RECOMP_DXIL
shader.dxil = dxcCompiler.compile(recompiler.out, recompiler.isPixelShader, recompiler.specConstantsMask != 0, false);
assert(shader.dxil != nullptr);
assert(*(reinterpret_cast<uint32_t *>(shader.dxil->GetBufferPointer()) + 1) != 0 && "DXIL was not signed properly!");
#endif
IDxcBlob* spirv = dxcCompiler.compile(recompiler.out, recompiler.isPixelShader, false, true);
assert(spirv != nullptr);
bool result = smolv::Encode(spirv->GetBufferPointer(), spirv->GetBufferSize(), shader.spirv, smolv::kEncodeFlagStripDebugInfo);
assert(result);
spirv->Release();
size_t currentProgress = ++progress;
if ((currentProgress % 10) == 0 || (currentProgress == shaders.size() - 1))
fmt::println("Recompiling shaders... {}%", currentProgress / float(shaders.size()) * 100.0f);
});
fmt::println("Creating shader cache...");
StringBuffer f;
f.println("#include \"shader_cache.h\"");
f.println("ShaderCacheEntry g_shaderCacheEntries[] = {{");
std::vector<uint8_t> dxil;
std::vector<uint8_t> spirv;
for (auto& [hash, shader] : shaders)
{
f.println("\t{{ 0x{:X}, {}, {}, {}, {}, {} }},",
hash, dxil.size(), (shader.dxil != nullptr) ? shader.dxil->GetBufferSize() : 0, spirv.size(), shader.spirv.size(), shader.specConstantsMask);
if (shader.dxil != nullptr)
{
dxil.insert(dxil.end(), reinterpret_cast<uint8_t *>(shader.dxil->GetBufferPointer()),
reinterpret_cast<uint8_t *>(shader.dxil->GetBufferPointer()) + shader.dxil->GetBufferSize());
}
spirv.insert(spirv.end(), shader.spirv.begin(), shader.spirv.end());
}
f.println("}};");
fmt::println("Compressing DXIL cache...");
int level = ZSTD_maxCLevel();
#ifdef XENOS_RECOMP_DXIL
std::vector<uint8_t> dxilCompressed(ZSTD_compressBound(dxil.size()));
dxilCompressed.resize(ZSTD_compress(dxilCompressed.data(), dxilCompressed.size(), dxil.data(), dxil.size(), level));
f.print("const uint8_t g_compressedDxilCache[] = {{");
for (auto data : dxilCompressed)
f.print("{},", data);
f.println("}};");
f.println("const size_t g_dxilCacheCompressedSize = {};", dxilCompressed.size());
f.println("const size_t g_dxilCacheDecompressedSize = {};", dxil.size());
#endif
fmt::println("Compressing SPIRV cache...");
std::vector<uint8_t> spirvCompressed(ZSTD_compressBound(spirv.size()));
spirvCompressed.resize(ZSTD_compress(spirvCompressed.data(), spirvCompressed.size(), spirv.data(), spirv.size(), level));
f.print("const uint8_t g_compressedSpirvCache[] = {{");
for (auto data : spirvCompressed)
f.print("{},", data);
f.println("}};");
f.println("const size_t g_spirvCacheCompressedSize = {};", spirvCompressed.size());
f.println("const size_t g_spirvCacheDecompressedSize = {};", spirv.size());
f.println("const size_t g_shaderCacheEntryCount = {};", shaders.size());
writeAllBytes(output, f.out.data(), f.out.size());
}
else
{
ShaderRecompiler recompiler;
size_t fileSize;
recompiler.recompile(readAllBytes(input, fileSize).get(), include);
writeAllBytes(output, recompiler.out.data(), recompiler.out.size());
} }
return 0; return 0;

View file

@ -4,7 +4,7 @@
#include <Windows.h> #include <Windows.h>
#endif #endif
#include <dxcapi.h> #include <d3dcompiler.h>
#include <bit> #include <bit>
#include <cassert> #include <cassert>
@ -12,12 +12,9 @@
#include <execution> #include <execution>
#include <filesystem> #include <filesystem>
#include <map> #include <map>
#include <smolv.h>
#include <fmt/core.h> #include <fmt/core.h>
#include <string> #include <string>
#include <unordered_map> #include <unordered_map>
#include <xxhash.h>
#include <zstd.h>
template<typename T> template<typename T>
static T byteSwap(T value) static T byteSwap(T value)

View file

@ -1,183 +1,15 @@
#ifndef SHADER_COMMON_H_INCLUDED #ifndef SHADER_COMMON_H_INCLUDED
#define SHADER_COMMON_H_INCLUDED #define SHADER_COMMON_H_INCLUDED
#define SPEC_CONSTANT_R11G11B10_NORMAL (1 << 0)
#define SPEC_CONSTANT_ALPHA_TEST (1 << 1)
#ifdef UNLEASHED_RECOMP
#define SPEC_CONSTANT_BICUBIC_GI_FILTER (1 << 2)
#define SPEC_CONSTANT_ALPHA_TO_COVERAGE (1 << 3)
#define SPEC_CONSTANT_REVERSE_Z (1 << 4)
#endif
#if !defined(__cplusplus) || defined(__INTELLISENSE__)
#define FLT_MIN asfloat(0xff7fffff)
#define FLT_MAX asfloat(0x7f7fffff)
#ifdef __spirv__
struct PushConstants
{
uint64_t VertexShaderConstants;
uint64_t PixelShaderConstants;
uint64_t SharedConstants;
};
[[vk::push_constant]] ConstantBuffer<PushConstants> g_PushConstants;
#define g_Booleans vk::RawBufferLoad<uint>(g_PushConstants.SharedConstants + 256)
#define g_SwappedTexcoords vk::RawBufferLoad<uint>(g_PushConstants.SharedConstants + 260)
#define g_HalfPixelOffset vk::RawBufferLoad<float2>(g_PushConstants.SharedConstants + 264)
#define g_AlphaThreshold vk::RawBufferLoad<float>(g_PushConstants.SharedConstants + 272)
[[vk::constant_id(0)]] const uint g_SpecConstants = 0;
#define g_SpecConstants() g_SpecConstants
#else
#define DEFINE_SHARED_CONSTANTS() \
uint g_Booleans : packoffset(c16.x); \
uint g_SwappedTexcoords : packoffset(c16.y); \
float2 g_HalfPixelOffset : packoffset(c16.z); \
float g_AlphaThreshold : packoffset(c17.x);
uint g_SpecConstants();
#endif
Texture2D<float4> g_Texture2DDescriptorHeap[] : register(t0, space0);
Texture3D<float4> g_Texture3DDescriptorHeap[] : register(t0, space1);
TextureCube<float4> g_TextureCubeDescriptorHeap[] : register(t0, space2);
SamplerState g_SamplerDescriptorHeap[] : register(s0, space3);
uint2 getTexture2DDimensions(Texture2D<float4> texture)
{
uint2 dimensions;
texture.GetDimensions(dimensions.x, dimensions.y);
return dimensions;
}
float4 tfetch2D(uint resourceDescriptorIndex, uint samplerDescriptorIndex, float2 texCoord, float2 offset)
{
Texture2D<float4> texture = g_Texture2DDescriptorHeap[resourceDescriptorIndex];
return texture.Sample(g_SamplerDescriptorHeap[samplerDescriptorIndex], texCoord + offset / getTexture2DDimensions(texture));
}
float2 getWeights2D(uint resourceDescriptorIndex, uint samplerDescriptorIndex, float2 texCoord, float2 offset)
{
Texture2D<float4> texture = g_Texture2DDescriptorHeap[resourceDescriptorIndex];
return select(isnan(texCoord), 0.0, frac(texCoord * getTexture2DDimensions(texture) + offset - 0.5));
}
float w0(float a)
{
return (1.0f / 6.0f) * (a * (a * (-a + 3.0f) - 3.0f) + 1.0f);
}
float w1(float a)
{
return (1.0f / 6.0f) * (a * a * (3.0f * a - 6.0f) + 4.0f);
}
float w2(float a)
{
return (1.0f / 6.0f) * (a * (a * (-3.0f * a + 3.0f) + 3.0f) + 1.0f);
}
float w3(float a)
{
return (1.0f / 6.0f) * (a * a * a);
}
float g0(float a)
{
return w0(a) + w1(a);
}
float g1(float a)
{
return w2(a) + w3(a);
}
float h0(float a)
{
return -1.0f + w1(a) / (w0(a) + w1(a)) + 0.5f;
}
float h1(float a)
{
return 1.0f + w3(a) / (w2(a) + w3(a)) + 0.5f;
}
float4 tfetch2DBicubic(uint resourceDescriptorIndex, uint samplerDescriptorIndex, float2 texCoord, float2 offset)
{
Texture2D<float4> texture = g_Texture2DDescriptorHeap[resourceDescriptorIndex];
SamplerState samplerState = g_SamplerDescriptorHeap[samplerDescriptorIndex];
uint2 dimensions = getTexture2DDimensions(texture);
float x = texCoord.x * dimensions.x + offset.x;
float y = texCoord.y * dimensions.y + offset.y;
x -= 0.5f;
y -= 0.5f;
float px = floor(x);
float py = floor(y);
float fx = x - px;
float fy = y - py;
float g0x = g0(fx);
float g1x = g1(fx);
float h0x = h0(fx);
float h1x = h1(fx);
float h0y = h0(fy);
float h1y = h1(fy);
float4 r =
g0(fy) * (g0x * texture.Sample(samplerState, float2(px + h0x, py + h0y) / float2(dimensions)) +
g1x * texture.Sample(samplerState, float2(px + h1x, py + h0y) / float2(dimensions))) +
g1(fy) * (g0x * texture.Sample(samplerState, float2(px + h0x, py + h1y) / float2(dimensions)) +
g1x * texture.Sample(samplerState, float2(px + h1x, py + h1y) / float2(dimensions)));
return r;
}
float4 tfetch3D(uint resourceDescriptorIndex, uint samplerDescriptorIndex, float3 texCoord)
{
return g_Texture3DDescriptorHeap[resourceDescriptorIndex].Sample(g_SamplerDescriptorHeap[samplerDescriptorIndex], texCoord);
}
struct CubeMapData struct CubeMapData
{ {
float3 cubeMapDirections[2]; float3 cubeMapDirections[2];
uint cubeMapIndex; uint cubeMapIndex;
}; };
float4 tfetchCube(uint resourceDescriptorIndex, uint samplerDescriptorIndex, float3 texCoord, inout CubeMapData cubeMapData) float4 texCUBE(samplerCUBE s, float3 texCoord, inout CubeMapData cubeMapData)
{ {
return g_TextureCubeDescriptorHeap[resourceDescriptorIndex].Sample(g_SamplerDescriptorHeap[samplerDescriptorIndex], cubeMapData.cubeMapDirections[texCoord.z]); return texCUBE(s, cubeMapData.cubeMapDirections[texCoord.z]);
}
float4 tfetchR11G11B10(uint4 value)
{
if (g_SpecConstants() & SPEC_CONSTANT_R11G11B10_NORMAL)
{
return float4(
(value.x & 0x00000400 ? -1.0 : 0.0) + ((value.x & 0x3FF) / 1024.0),
(value.x & 0x00200000 ? -1.0 : 0.0) + (((value.x >> 11) & 0x3FF) / 1024.0),
(value.x & 0x80000000 ? -1.0 : 0.0) + (((value.x >> 22) & 0x1FF) / 512.0),
0.0);
}
else
{
return asfloat(value);
}
}
float4 tfetchTexcoord(uint swappedTexcoords, float4 value, uint semanticIndex)
{
return (swappedTexcoords & (1ull << semanticIndex)) != 0 ? value.yxwz : value;
} }
float4 cube(float4 value, inout CubeMapData cubeMapData) float4 cube(float4 value, inout CubeMapData cubeMapData)
@ -204,19 +36,4 @@ float4 max4(float4 src0)
return max(max(src0.x, src0.y), max(src0.z, src0.w)); return max(max(src0.x, src0.y), max(src0.z, src0.w));
} }
float2 getPixelCoord(uint resourceDescriptorIndex, float2 texCoord)
{
return getTexture2DDimensions(g_Texture2DDescriptorHeap[resourceDescriptorIndex]) * texCoord;
}
float computeMipLevel(float2 pixelCoord)
{
float2 dx = ddx(pixelCoord);
float2 dy = ddy(pixelCoord);
float deltaMaxSqr = max(dot(dx, dx), dot(dy, dy));
return max(0.0, 0.5 * log2(deltaMaxSqr));
}
#endif
#endif #endif

View file

@ -1,5 +1,4 @@
#include "shader_recompiler.h" #include "shader_recompiler.h"
#include "shader_common.h"
static constexpr char SWIZZLES[] = static constexpr char SWIZZLES[] =
{ {
@ -13,24 +12,6 @@ static constexpr char SWIZZLES[] =
'_' '_'
}; };
static constexpr const char* USAGE_TYPES[] =
{
"float4", // POSITION
"float4", // BLENDWEIGHT
"uint4", // BLENDINDICES
"uint4", // NORMAL
"float4", // PSIZE
"float4", // TEXCOORD
"uint4", // TANGENT
"uint4", // BINORMAL
"float4", // TESSFACTOR
"float4", // POSITIONT
"float4", // COLOR
"float4", // FOG
"float4", // DEPTH
"float4", // SAMPLE
};
static constexpr const char* USAGE_VARIABLES[] = static constexpr const char* USAGE_VARIABLES[] =
{ {
"Position", "Position",
@ -67,57 +48,6 @@ static constexpr const char* USAGE_SEMANTICS[] =
"SAMPLE" "SAMPLE"
}; };
struct DeclUsageLocation
{
DeclUsage usage;
uint32_t usageIndex;
uint32_t location;
};
// NOTE: These are specialized Vulkan locations for Unleashed Recompiled. Change as necessary. Likely not going to work with other games.
static constexpr DeclUsageLocation USAGE_LOCATIONS[] =
{
{ DeclUsage::Position, 0, 0 },
{ DeclUsage::Normal, 0, 1 },
{ DeclUsage::Tangent, 0, 2 },
{ DeclUsage::Binormal, 0, 3 },
{ DeclUsage::TexCoord, 0, 4 },
{ DeclUsage::TexCoord, 1, 5 },
{ DeclUsage::TexCoord, 2, 6 },
{ DeclUsage::TexCoord, 3, 7 },
{ DeclUsage::Color, 0, 8 },
{ DeclUsage::BlendIndices, 0, 9 },
{ DeclUsage::BlendWeight, 0, 10 },
{ DeclUsage::Color, 1, 11 },
{ DeclUsage::TexCoord, 4, 12 },
{ DeclUsage::TexCoord, 5, 13 },
{ DeclUsage::TexCoord, 6, 14 },
{ DeclUsage::TexCoord, 7, 15 },
{ DeclUsage::Position, 1, 15 },
};
static constexpr std::pair<DeclUsage, size_t> INTERPOLATORS[] =
{
{ DeclUsage::TexCoord, 0 },
{ DeclUsage::TexCoord, 1 },
{ DeclUsage::TexCoord, 2 },
{ DeclUsage::TexCoord, 3 },
{ DeclUsage::TexCoord, 4 },
{ DeclUsage::TexCoord, 5 },
{ DeclUsage::TexCoord, 6 },
{ DeclUsage::TexCoord, 7 },
{ DeclUsage::TexCoord, 8 },
{ DeclUsage::TexCoord, 9 },
{ DeclUsage::TexCoord, 10 },
{ DeclUsage::TexCoord, 11 },
{ DeclUsage::TexCoord, 12 },
{ DeclUsage::TexCoord, 13 },
{ DeclUsage::TexCoord, 14 },
{ DeclUsage::TexCoord, 15 },
{ DeclUsage::Color, 0 },
{ DeclUsage::Color, 1 }
};
static constexpr std::string_view TEXTURE_DIMENSIONS[] = static constexpr std::string_view TEXTURE_DIMENSIONS[] =
{ {
"2D", "2D",
@ -179,35 +109,8 @@ void ShaderRecompiler::recompile(const VertexFetchInstruction& instr, uint32_t a
auto findResult = vertexElements.find(address); auto findResult = vertexElements.find(address);
assert(findResult != vertexElements.end()); assert(findResult != vertexElements.end());
switch (findResult->second.usage)
{
case DeclUsage::Normal:
case DeclUsage::Tangent:
case DeclUsage::Binormal:
specConstantsMask |= SPEC_CONSTANT_R11G11B10_NORMAL;
print("tfetchR11G11B10(");
break;
case DeclUsage::TexCoord:
print("tfetchTexcoord(g_SwappedTexcoords, ");
break;
}
print("i{}{}", USAGE_VARIABLES[uint32_t(findResult->second.usage)], uint32_t(findResult->second.usageIndex)); print("i{}{}", USAGE_VARIABLES[uint32_t(findResult->second.usage)], uint32_t(findResult->second.usageIndex));
switch (findResult->second.usage)
{
case DeclUsage::Normal:
case DeclUsage::Tangent:
case DeclUsage::Binormal:
out += ')';
break;
case DeclUsage::TexCoord:
print(", {})", uint32_t(findResult->second.usageIndex));
break;
}
out += '.'; out += '.';
printDstSwizzle(instr.dstSwizzle, true); printDstSwizzle(instr.dstSwizzle, true);
@ -225,7 +128,7 @@ void ShaderRecompiler::recompile(const VertexFetchInstruction& instr, uint32_t a
void ShaderRecompiler::recompile(const TextureFetchInstruction& instr, bool bicubic) void ShaderRecompiler::recompile(const TextureFetchInstruction& instr, bool bicubic)
{ {
if (instr.opcode != FetchOpcode::TextureFetch && instr.opcode != FetchOpcode::GetTextureWeights) if (instr.opcode != FetchOpcode::TextureFetch)
return; return;
if (instr.isPredicated) if (instr.isPredicated)
@ -248,18 +151,11 @@ void ShaderRecompiler::recompile(const TextureFetchInstruction& instr, bool bicu
std::string constName; std::string constName;
const char* constNamePtr = nullptr; const char* constNamePtr = nullptr;
#ifdef UNLEASHED_RECOMP
bool subtractFromOne = false;
#endif
auto findResult = samplers.find(instr.constIndex); auto findResult = samplers.find(instr.constIndex);
if (findResult != samplers.end()) if (findResult != samplers.end())
{ {
constNamePtr = findResult->second; constNamePtr = findResult->second;
#ifdef UNLEASHED_RECOMP
subtractFromOne = hasMtxPrevInvViewProjection && strcmp(constNamePtr, "sampZBuffer") == 0;
#endif
} }
else else
{ {
@ -267,36 +163,20 @@ void ShaderRecompiler::recompile(const TextureFetchInstruction& instr, bool bicu
constNamePtr = constName.c_str(); constNamePtr = constName.c_str();
} }
#ifdef UNLEASHED_RECOMP
if (instr.constIndex == 0 && instr.dimension == TextureDimension::Texture2D)
{
indent();
print("pixelCoord = getPixelCoord({}_Texture2DDescriptorIndex, ", constNamePtr);
printSrcRegister(2);
out += ");\n";
}
#endif
indent(); indent();
print("r{}.", instr.dstRegister); print("r{}.", instr.dstRegister);
printDstSwizzle(instr.dstSwizzle, false); printDstSwizzle(instr.dstSwizzle, false);
out += " = "; out += " = ";
if (strcmp(constNamePtr, "g_DepthSampler") == 0 || strcmp(constNamePtr, "sampZBuffer") == 0)
out += "1.0 - ";
switch (instr.opcode) switch (instr.opcode)
{ {
case FetchOpcode::TextureFetch: case FetchOpcode::TextureFetch:
{ {
#ifdef UNLEASHED_RECOMP out += "tex";
if (subtractFromOne)
out += "1.0 - ";
#endif
out += "tfetch";
break;
}
case FetchOpcode::GetTextureWeights:
{
out += "getWeights";
break; break;
} }
} }
@ -317,28 +197,35 @@ void ShaderRecompiler::recompile(const TextureFetchInstruction& instr, bool bicu
case TextureDimension::Texture3D: case TextureDimension::Texture3D:
dimension = "3D"; dimension = "3D";
componentCount = 3; componentCount = 3;
{
auto search = fmt::format("sampler2D {} : register", constNamePtr);
size_t index = out.find(search);
if (index != std::string::npos)
out[index + 7] = '3';
}
break; break;
case TextureDimension::TextureCube: case TextureDimension::TextureCube:
dimension = "Cube"; dimension = "CUBE";
componentCount = 3; componentCount = 3;
{
auto search = fmt::format("sampler2D {} : register", constNamePtr);
size_t index = out.find(search);
if (index != std::string::npos)
{
out.erase(index + 7, 2);
out.insert(index + 7, "CUBE");
}
}
break; break;
} }
out += dimension; out += dimension;
#ifdef UNLEASHED_RECOMP print("({}, ", constNamePtr);
if (bicubic)
out += "Bicubic";
#endif
print("({0}_Texture{1}DescriptorIndex, {0}_SamplerDescriptorIndex, ", constNamePtr, dimension);
printSrcRegister(componentCount); printSrcRegister(componentCount);
switch (instr.dimension) switch (instr.dimension)
{ {
case TextureDimension::Texture2D:
print(", float2({}, {})", instr.offsetX * 0.5f, instr.offsetY * 0.5f);
break;
case TextureDimension::TextureCube: case TextureDimension::TextureCube:
out += ", cubeMapData"; out += ", cubeMapData";
break; break;
@ -461,18 +348,8 @@ void ShaderRecompiler::recompile(const AluInstruction& instr)
const char* constantName = reinterpret_cast<const char*>(constantTableData + findResult->second->name); const char* constantName = reinterpret_cast<const char*>(constantTableData + findResult->second->name);
if (findResult->second->registerCount > 1) if (findResult->second->registerCount > 1)
{ {
#ifdef UNLEASHED_RECOMP regFormatted = fmt::format("{}[{}{}]", constantName,
if (hasMtxProjection && strcmp(constantName, "g_MtxProjection") == 0) reg - findResult->second->registerIndex, instr.const0Relative ? (instr.constAddressRegisterRelative ? " + a0" : " + aL") : "");
{
regFormatted = fmt::format("(iterationIndex == 0 ? mtxProjectionReverseZ[{0}] : mtxProjection[{0}])",
reg - findResult->second->registerIndex);
}
else
#endif
{
regFormatted = fmt::format("{}({}{})", constantName,
reg - findResult->second->registerIndex, instr.const0Relative ? (instr.constAddressRegisterRelative ? " + a0" : " + aL") : "");
}
} }
else else
{ {
@ -607,20 +484,6 @@ void ShaderRecompiler::recompile(const AluInstruction& instr)
{ {
case ExportRegister::VSPosition: case ExportRegister::VSPosition:
exportRegister = "oPos"; exportRegister = "oPos";
#ifdef UNLEASHED_RECOMP
if (hasMtxProjection)
{
indent();
out += "if ((g_SpecConstants() & SPEC_CONSTANT_REVERSE_Z) == 0 || iterationIndex == 0)\n";
indent();
out += "{\n";
++indentation;
closeIfBracket = true;
}
#endif
break; break;
default: default:
@ -743,15 +606,15 @@ void ShaderRecompiler::recompile(const AluInstruction& instr)
break; break;
case AluVectorOpcode::CndEq: case AluVectorOpcode::CndEq:
print("select({} == 0.0, {}, {})", op(VECTOR_0), op(VECTOR_1), op(VECTOR_2)); print("{} == 0.0 ? {} : {}", op(VECTOR_0), op(VECTOR_1), op(VECTOR_2));
break; break;
case AluVectorOpcode::CndGe: case AluVectorOpcode::CndGe:
print("select({} >= 0.0, {}, {})", op(VECTOR_0), op(VECTOR_1), op(VECTOR_2)); print("{} >= 0.0 ? {} : {}", op(VECTOR_0), op(VECTOR_1), op(VECTOR_2));
break; break;
case AluVectorOpcode::CndGt: case AluVectorOpcode::CndGt:
print("select({} > 0.0, {}, {})", op(VECTOR_0), op(VECTOR_1), op(VECTOR_2)); print("{} > 0.0 ? {} : {}", op(VECTOR_0), op(VECTOR_1), op(VECTOR_2));
break; break;
case AluVectorOpcode::Dp4: case AluVectorOpcode::Dp4:
@ -918,19 +781,19 @@ void ShaderRecompiler::recompile(const AluInstruction& instr)
case AluScalarOpcode::Logc: case AluScalarOpcode::Logc:
case AluScalarOpcode::Log: case AluScalarOpcode::Log:
print("clamp(log2({}), FLT_MIN, FLT_MAX)", op(SCALAR_0)); print("log2({})", op(SCALAR_0));
break; break;
case AluScalarOpcode::Rcpc: case AluScalarOpcode::Rcpc:
case AluScalarOpcode::Rcpf: case AluScalarOpcode::Rcpf:
case AluScalarOpcode::Rcp: case AluScalarOpcode::Rcp:
print("clamp(rcp({}), FLT_MIN, FLT_MAX)", op(SCALAR_0)); print("rcp({})", op(SCALAR_0));
break; break;
case AluScalarOpcode::Rsqc: case AluScalarOpcode::Rsqc:
case AluScalarOpcode::Rsqf: case AluScalarOpcode::Rsqf:
case AluScalarOpcode::Rsq: case AluScalarOpcode::Rsq:
print("clamp(rsqrt({}), FLT_MIN, FLT_MAX)", op(SCALAR_0)); print("rsqrt({})", op(SCALAR_0));
break; break;
case AluScalarOpcode::Subs: case AluScalarOpcode::Subs:
@ -1113,13 +976,6 @@ void ShaderRecompiler::recompile(const uint8_t* shaderData, const std::string_vi
const auto constantTableContainer = reinterpret_cast<const ConstantTableContainer*>(shaderData + shaderContainer->constantTableOffset); const auto constantTableContainer = reinterpret_cast<const ConstantTableContainer*>(shaderData + shaderContainer->constantTableOffset);
constantTableData = reinterpret_cast<const uint8_t*>(&constantTableContainer->constantTable); constantTableData = reinterpret_cast<const uint8_t*>(&constantTableContainer->constantTable);
out += "#ifdef __spirv__\n\n";
#ifdef UNLEASHED_RECOMP
bool isMetaInstancer = false;
bool hasIndexCount = false;
#endif
for (uint32_t i = 0; i < constantTableContainer->constantTable.constants; i++) for (uint32_t i = 0; i < constantTableContainer->constantTable.constants; i++)
{ {
const auto constantInfo = reinterpret_cast<const ConstantInfo*>( const auto constantInfo = reinterpret_cast<const ConstantInfo*>(
@ -1127,41 +983,16 @@ void ShaderRecompiler::recompile(const uint8_t* shaderData, const std::string_vi
const char* constantName = reinterpret_cast<const char*>(constantTableData + constantInfo->name); const char* constantName = reinterpret_cast<const char*>(constantTableData + constantInfo->name);
#ifdef UNLEASHED_RECOMP
if (!isPixelShader)
{
if (strcmp(constantName, "g_MtxProjection") == 0)
hasMtxProjection = true;
else if (strcmp(constantName, "g_InstanceTypes") == 0)
isMetaInstancer = true;
else if (strcmp(constantName, "g_IndexCount") == 0)
hasIndexCount = true;
}
else
{
if (strcmp(constantName, "g_MtxPrevInvViewProjection") == 0)
hasMtxPrevInvViewProjection = true;
}
#endif
switch (constantInfo->registerSet) switch (constantInfo->registerSet)
{ {
case RegisterSet::Float4: case RegisterSet::Float4:
{ {
const char* shaderName = isPixelShader ? "Pixel" : "Vertex"; print("float4 {}", constantName);
if (constantInfo->registerCount > 1) if (constantInfo->registerCount > 1)
{ print("[{}]", constantInfo->registerCount.get());
uint32_t tailCount = (isPixelShader ? 224 : 256) - constantInfo->registerIndex;
println("#define {}(INDEX) select((INDEX) < {}, vk::RawBufferLoad<float4>(g_PushConstants.{}ShaderConstants + ({} + min(INDEX, {})) * 16, 0x10), 0.0)", println(" : register(c{});", constantInfo->registerIndex.get());
constantName, tailCount, shaderName, constantInfo->registerIndex.get(), tailCount - 1);
}
else
{
println("#define {} vk::RawBufferLoad<float4>(g_PushConstants.{}ShaderConstants + {}, 0x10)",
constantName, shaderName, constantInfo->registerIndex * 16);
}
for (uint16_t j = 0; j < constantInfo->registerCount; j++) for (uint16_t j = 0; j < constantInfo->registerCount; j++)
float4Constants.emplace(constantInfo->registerIndex + j, constantInfo); float4Constants.emplace(constantInfo->registerIndex + j, constantInfo);
@ -1171,91 +1002,20 @@ void ShaderRecompiler::recompile(const uint8_t* shaderData, const std::string_vi
case RegisterSet::Sampler: case RegisterSet::Sampler:
{ {
for (size_t j = 0; j < std::size(TEXTURE_DIMENSIONS); j++) println("sampler2D {} : register(s{});", constantName, constantInfo->registerIndex.get());
{
println("#define {}_Texture{}DescriptorIndex vk::RawBufferLoad<uint>(g_PushConstants.SharedConstants + {})",
constantName, TEXTURE_DIMENSIONS[j], j * 64 + constantInfo->registerIndex * 4);
}
println("#define {}_SamplerDescriptorIndex vk::RawBufferLoad<uint>(g_PushConstants.SharedConstants + {})",
constantName, std::size(TEXTURE_DIMENSIONS) * 64 + constantInfo->registerIndex * 4);
samplers.emplace(constantInfo->registerIndex, constantName); samplers.emplace(constantInfo->registerIndex, constantName);
break; break;
} }
} case RegisterSet::Bool:
}
out += "\n#else\n\n";
println("cbuffer {}ShaderConstants : register(b{}, space4)", isPixelShader ? "Pixel" : "Vertex", isPixelShader ? 1 : 0);
out += "{\n";
for (uint32_t i = 0; i < constantTableContainer->constantTable.constants; i++)
{
const auto constantInfo = reinterpret_cast<const ConstantInfo*>(
constantTableData + constantTableContainer->constantTable.constantInfo + i * sizeof(ConstantInfo));
if (constantInfo->registerSet == RegisterSet::Float4)
{ {
const char* constantName = reinterpret_cast<const char*>(constantTableData + constantInfo->name); println("bool {} : register(b{});", constantName, constantInfo->registerIndex.get());
print("\tfloat4 {}", constantName);
if (constantInfo->registerCount > 1)
print("[{}]", constantInfo->registerCount.get());
println(" : packoffset(c{});", constantInfo->registerIndex.get());
if (constantInfo->registerCount > 1)
{
uint32_t tailCount = (isPixelShader ? 224 : 256) - constantInfo->registerIndex;
println("#define {0}(INDEX) select((INDEX) < {1}, {0}[min(INDEX, {2})], 0.0)", constantName, tailCount, tailCount - 1);
}
}
}
out += "};\n\n";
out += "cbuffer SharedConstants : register(b2, space4)\n";
out += "{\n";
for (uint32_t i = 0; i < constantTableContainer->constantTable.constants; i++)
{
const auto constantInfo = reinterpret_cast<const ConstantInfo*>(
constantTableData + constantTableContainer->constantTable.constantInfo + i * sizeof(ConstantInfo));
if (constantInfo->registerSet == RegisterSet::Sampler)
{
const char* constantName = reinterpret_cast<const char*>(constantTableData + constantInfo->name);
for (size_t j = 0; j < std::size(TEXTURE_DIMENSIONS); j++)
{
println("\tuint {}_Texture{}DescriptorIndex : packoffset(c{}.{});",
constantName, TEXTURE_DIMENSIONS[j], j * 4 + constantInfo->registerIndex / 4, SWIZZLES[constantInfo->registerIndex % 4]);
}
println("\tuint {}_SamplerDescriptorIndex : packoffset(c{}.{});",
constantName, 4 * std::size(TEXTURE_DIMENSIONS) + constantInfo->registerIndex / 4, SWIZZLES[constantInfo->registerIndex % 4]);
}
}
out += "\tDEFINE_SHARED_CONSTANTS();\n";
out += "};\n\n";
out += "#endif\n";
for (uint32_t i = 0; i < constantTableContainer->constantTable.constants; i++)
{
const auto constantInfo = reinterpret_cast<const ConstantInfo*>(
constantTableData + constantTableContainer->constantTable.constantInfo + i * sizeof(ConstantInfo));
if (constantInfo->registerSet == RegisterSet::Bool)
{
const char* constantName = reinterpret_cast<const char*>(constantTableData + constantInfo->name);
println("\t#define {} (1 << {})", constantName, constantInfo->registerIndex + (isPixelShader ? 16 : 0));
boolConstants.emplace(constantInfo->registerIndex, constantName); boolConstants.emplace(constantInfo->registerIndex, constantName);
break;
}
} }
} }
@ -1263,41 +1023,37 @@ void ShaderRecompiler::recompile(const uint8_t* shaderData, const std::string_vi
const auto shader = reinterpret_cast<const Shader*>(shaderData + shaderContainer->shaderOffset); const auto shader = reinterpret_cast<const Shader*>(shaderData + shaderContainer->shaderOffset);
out += "#ifndef __spirv__\n";
if (isPixelShader)
out += "[shader(\"pixel\")]\n";
else
out += "[shader(\"vertex\")]\n";
out += "#endif\n";
out += "void main(\n"; out += "void main(\n";
if (isPixelShader) if (isPixelShader)
{ {
out += "\tin float4 iPos : SV_Position,\n"; out += "\tin float4 iPos : VPOS";
for (auto& [usage, usageIndex] : INTERPOLATORS) uint32_t interpolatorCount = (shader->interpolatorInfo >> 5) & 0x1F;
println("\tin float4 i{0}{1} : {2}{1},", USAGE_VARIABLES[uint32_t(usage)], usageIndex, USAGE_SEMANTICS[uint32_t(usage)]);
out += "#ifdef __spirv__\n"; for (uint32_t i = 0; i < interpolatorCount; i++)
out += "\tin bool iFace : SV_IsFrontFace\n"; {
out += "#else\n"; union
out += "\tin uint iFace : SV_IsFrontFace\n"; {
out += "#endif\n"; Interpolator interpolator;
uint32_t value;
};
value = reinterpret_cast<const PixelShader*>(shader)->interpolators[i];
print(",\n\tin float4 i{0}{1} : {2}{1}", USAGE_VARIABLES[uint32_t(interpolator.usage)], uint32_t(interpolator.usageIndex), USAGE_SEMANTICS[uint32_t(interpolator.usage)]);
}
auto pixelShader = reinterpret_cast<const PixelShader*>(shader); auto pixelShader = reinterpret_cast<const PixelShader*>(shader);
if (pixelShader->outputs & PIXEL_SHADER_OUTPUT_COLOR0) if (pixelShader->outputs & PIXEL_SHADER_OUTPUT_COLOR0)
out += ",\n\tout float4 oC0 : SV_Target0"; out += ",\n\tout float4 oC0 : COLOR0";
if (pixelShader->outputs & PIXEL_SHADER_OUTPUT_COLOR1) if (pixelShader->outputs & PIXEL_SHADER_OUTPUT_COLOR1)
out += ",\n\tout float4 oC1 : SV_Target1"; out += ",\n\tout float4 oC1 : COLOR1";
if (pixelShader->outputs & PIXEL_SHADER_OUTPUT_COLOR2) if (pixelShader->outputs & PIXEL_SHADER_OUTPUT_COLOR2)
out += ",\n\tout float4 oC2 : SV_Target2"; out += ",\n\tout float4 oC2 : COLOR2";
if (pixelShader->outputs & PIXEL_SHADER_OUTPUT_COLOR3) if (pixelShader->outputs & PIXEL_SHADER_OUTPUT_COLOR3)
out += ",\n\tout float4 oC3 : SV_Target3"; out += ",\n\tout float4 oC3 : COLOR3";
if (pixelShader->outputs & PIXEL_SHADER_OUTPUT_DEPTH) if (pixelShader->outputs & PIXEL_SHADER_OUTPUT_DEPTH)
out += ",\n\tout float oDepth : SV_Depth"; out += ",\n\tout float oDepth : DEPTH";
} }
else else
{ {
@ -1312,65 +1068,33 @@ void ShaderRecompiler::recompile(const uint8_t* shaderData, const std::string_vi
value = vertexShader->vertexElementsAndInterpolators[vertexShader->field18 + i]; value = vertexShader->vertexElementsAndInterpolators[vertexShader->field18 + i];
const char* usageType = USAGE_TYPES[uint32_t(vertexElement.usage)]; println("\tin float4 i{0}{1} : {2}{1},", USAGE_VARIABLES[uint32_t(vertexElement.usage)],
#ifdef UNLEASHED_RECOMP
if ((vertexElement.usage == DeclUsage::TexCoord && vertexElement.usageIndex == 2 && isMetaInstancer) ||
(vertexElement.usage == DeclUsage::Position && vertexElement.usageIndex == 1))
{
usageType = "uint4";
}
#endif
out += '\t';
for (auto& usageLocation : USAGE_LOCATIONS)
{
if (usageLocation.usage == vertexElement.usage && usageLocation.usageIndex == vertexElement.usageIndex)
{
print("[[vk::location({})]] ", usageLocation.location);
break;
}
}
println("in {0} i{1}{2} : {3}{2},", usageType, USAGE_VARIABLES[uint32_t(vertexElement.usage)],
uint32_t(vertexElement.usageIndex), USAGE_SEMANTICS[uint32_t(vertexElement.usage)]); uint32_t(vertexElement.usageIndex), USAGE_SEMANTICS[uint32_t(vertexElement.usage)]);
vertexElements.emplace(uint32_t(vertexElement.address), vertexElement); vertexElements.emplace(uint32_t(vertexElement.address), vertexElement);
} }
#ifdef UNLEASHED_RECOMP uint32_t interpolatorCount = (shader->interpolatorInfo >> 5) & 0x1F;
if (hasIndexCount)
for (uint32_t i = 0; i < interpolatorCount; i++)
{ {
out += "\tin uint iVertexId : SV_VertexID,\n"; union
out += "\tin uint iInstanceId : SV_InstanceID,\n"; {
Interpolator interpolator;
uint32_t value;
};
auto vertexShader = reinterpret_cast<const VertexShader*>(shader);
value = vertexShader->vertexElementsAndInterpolators[vertexShader->field18 + vertexShader->vertexElementCount + i];
println("\tout float4 o{0}{1} : {2}{1},", USAGE_VARIABLES[uint32_t(interpolator.usage)], uint32_t(interpolator.usageIndex), USAGE_SEMANTICS[uint32_t(interpolator.usage)]);
} }
#endif
out += "\tout float4 oPos : SV_Position"; out += "\tout float4 oPos : POSITION";
for (auto& [usage, usageIndex] : INTERPOLATORS)
print(",\n\tout float4 o{0}{1} : {2}{1}", USAGE_VARIABLES[uint32_t(usage)], usageIndex, USAGE_SEMANTICS[uint32_t(usage)]);
} }
out += ")\n"; out += ")\n";
out += "{\n"; out += "{\n";
#ifdef UNLEASHED_RECOMP
if (hasMtxProjection)
{
specConstantsMask |= SPEC_CONSTANT_REVERSE_Z;
out += "\toPos = 0.0;\n";
out += "\tfloat4x4 mtxProjection = float4x4(g_MtxProjection(0), g_MtxProjection(1), g_MtxProjection(2), g_MtxProjection(3));\n";
out += "\tfloat4x4 mtxProjectionReverseZ = mul(mtxProjection, float4x4(1, 0, 0, 0, 0, 1, 0, 0, 0, 0, -1, 0, 0, 0, 1, 1));\n";
out += "\t[unroll] for (int iterationIndex = 0; iterationIndex < 2; iterationIndex++)\n";
out += "\t{\n";
}
#endif
if (shaderContainer->definitionTableOffset != NULL) if (shaderContainer->definitionTableOffset != NULL)
{ {
auto definitionTable = reinterpret_cast<const DefinitionTable*>(shaderData + shaderContainer->definitionTableOffset); auto definitionTable = reinterpret_cast<const DefinitionTable*>(shaderData + shaderContainer->definitionTableOffset);
@ -1381,8 +1105,8 @@ void ShaderRecompiler::recompile(const uint8_t* shaderData, const std::string_vi
auto value = reinterpret_cast<const be<uint32_t>*>(shaderData + shaderContainer->virtualSize + definition->physicalOffset); auto value = reinterpret_cast<const be<uint32_t>*>(shaderData + shaderContainer->virtualSize + definition->physicalOffset);
for (uint16_t i = 0; i < (definition->count + 3) / 4; i++) for (uint16_t i = 0; i < (definition->count + 3) / 4; i++)
{ {
println("\tfloat4 c{} = asfloat(uint4(0x{:X}, 0x{:X}, 0x{:X}, 0x{:X}));", println("\tfloat4 c{} = float4({}, {}, {}, {});",
definition->registerIndex + i - (isPixelShader ? 256 : 0), value[0].get(), value[1].get(), value[2].get(), value[3].get()); definition->registerIndex + i - (isPixelShader ? 256 : 0), std::_Bit_cast<float>(value[0].get()), std::_Bit_cast<float>(value[1].get()), std::_Bit_cast<float>(value[2].get()), std::_Bit_cast<float>(value[3].get()));
value += 4; value += 4;
} }
@ -1444,19 +1168,6 @@ void ShaderRecompiler::recompile(const uint8_t* shaderData, const std::string_vi
} }
} }
if (!isPixelShader)
{
#ifdef UNLEASHED_RECOMP
if (!hasMtxProjection)
out += "\toPos = 0.0;\n";
#endif
for (auto& [usage, usageIndex] : INTERPOLATORS)
println("\to{}{} = 0.0;", USAGE_VARIABLES[uint32_t(usage)], usageIndex);
out += "\n";
}
for (size_t i = 0; i < 32; i++) for (size_t i = 0; i < 32; i++)
{ {
if (!printedRegisters[i]) if (!printedRegisters[i])
@ -1464,14 +1175,8 @@ void ShaderRecompiler::recompile(const uint8_t* shaderData, const std::string_vi
print("\tfloat4 r{} = ", i); print("\tfloat4 r{} = ", i);
if (isPixelShader && i == ((shader->fieldC >> 8) & 0xFF)) if (isPixelShader && i == ((shader->fieldC >> 8) & 0xFF))
{ {
out += "float4((iPos.xy - 0.5) * float2(iFace ? 1.0 : -1.0, 1.0), 0.0, 0.0);\n"; out += "float4(iPos.xy, 0.0, 0.0);\n";
} }
#ifdef UNLEASHED_RECOMP
else if (!isPixelShader && hasIndexCount && i == 0)
{
out += "float4(iVertexId + g_IndexCount.x * iInstanceId, 0.0, 0.0, 0.0);\n";
}
#endif
else else
{ {
out += "0.0;\n"; out += "0.0;\n";
@ -1484,12 +1189,7 @@ void ShaderRecompiler::recompile(const uint8_t* shaderData, const std::string_vi
out += "\tbool p0 = false;\n"; out += "\tbool p0 = false;\n";
out += "\tfloat ps = 0.0;\n"; out += "\tfloat ps = 0.0;\n";
if (isPixelShader) if (isPixelShader)
{
#ifdef UNLEASHED_RECOMP
out += "\tfloat2 pixelCoord = 0.0;\n";
#endif
out += "\tCubeMapData cubeMapData = (CubeMapData)0;\n"; out += "\tCubeMapData cubeMapData = (CubeMapData)0;\n";
}
const be<uint32_t>* code = reinterpret_cast<const be<uint32_t>*>(shaderData + shaderContainer->virtualSize + shader->physicalOffset); const be<uint32_t>* code = reinterpret_cast<const be<uint32_t>*>(shaderData + shaderContainer->virtualSize + shader->physicalOffset);
@ -1645,9 +1345,6 @@ void ShaderRecompiler::recompile(const uint8_t* shaderData, const std::string_vi
if (simpleControlFlow) if (simpleControlFlow)
{ {
indent(); indent();
#ifdef UNLEASHED_RECOMP
print("[unroll] ");
#endif
println("for (aL = 0; aL < i{}.x; aL++)", uint32_t(cfInstr.loopStart.loopId)); println("for (aL = 0; aL < i{}.x; aL++)", uint32_t(cfInstr.loopStart.loopId));
indent(); indent();
out += "{\n"; out += "{\n";
@ -1696,7 +1393,7 @@ void ShaderRecompiler::recompile(const uint8_t* shaderData, const std::string_vi
{ {
auto findResult = boolConstants.find(cfInstr.condJmp.boolAddress); auto findResult = boolConstants.find(cfInstr.condJmp.boolAddress);
if (findResult != boolConstants.end()) if (findResult != boolConstants.end())
println("if ((g_Booleans & {}) {}= 0)", findResult->second, cfInstr.condJmp.condition ^ simpleControlFlow ? "!" : "="); println("if ({} {}= 0)", findResult->second, cfInstr.condJmp.condition ^ simpleControlFlow ? "!" : "=");
else else
println("if (b{} {}= 0)", uint32_t(cfInstr.condJmp.boolAddress), cfInstr.condJmp.condition ^ simpleControlFlow ? "!" : "="); println("if (b{} {}= 0)", uint32_t(cfInstr.condJmp.boolAddress), cfInstr.condJmp.condition ^ simpleControlFlow ? "!" : "=");
} }
@ -1748,39 +1445,7 @@ void ShaderRecompiler::recompile(const uint8_t* shaderData, const std::string_vi
} }
else else
{ {
#ifdef UNLEASHED_RECOMP recompile(textureFetch, false);
if (textureFetch.constIndex == 10) // g_GISampler
{
specConstantsMask |= SPEC_CONSTANT_BICUBIC_GI_FILTER;
indent();
out += "if (g_SpecConstants() & SPEC_CONSTANT_BICUBIC_GI_FILTER)";
indent();
out += '{';
++indentation;
recompile(textureFetch, true);
--indentation;
indent();
out += "}";
indent();
out += "else";
indent();
out += '{';
++indentation;
recompile(textureFetch, false);
--indentation;
indent();
out += '}';
}
else
#endif
{
recompile(textureFetch, false);
}
} }
} }
else else
@ -1794,56 +1459,10 @@ void ShaderRecompiler::recompile(const uint8_t* shaderData, const std::string_vi
if (shouldReturn) if (shouldReturn)
{ {
if (isPixelShader)
{
specConstantsMask |= SPEC_CONSTANT_ALPHA_TEST;
indent();
out += "[branch] if (g_SpecConstants() & SPEC_CONSTANT_ALPHA_TEST)";
indent();
out += '{';
indent();
out += "\tclip(oC0.w - g_AlphaThreshold);\n";
indent();
out += "}";
#ifdef UNLEASHED_RECOMP
specConstantsMask |= SPEC_CONSTANT_ALPHA_TO_COVERAGE;
indent();
out += "else if (g_SpecConstants() & SPEC_CONSTANT_ALPHA_TO_COVERAGE)";
indent();
out += '{';
indent();
out += "\toC0.w *= 1.0 + computeMipLevel(pixelCoord) * 0.25;\n";
indent();
out += "\toC0.w = 0.5 + (oC0.w - g_AlphaThreshold) / max(fwidth(oC0.w), 1e-6);\n";
indent();
out += '}';
#endif
}
else
{
out += "\toPos.xy += g_HalfPixelOffset * oPos.w;\n";
}
if (simpleControlFlow) if (simpleControlFlow)
{ {
indent(); indent();
#ifdef UNLEASHED_RECOMP out += "return;\n";
if (hasMtxProjection)
{
out += "continue;\n";
}
else
#endif
{
out += "return;\n";
}
} }
else else
{ {
@ -1871,10 +1490,5 @@ void ShaderRecompiler::recompile(const uint8_t* shaderData, const std::string_vi
out += "\t}\n"; out += "\t}\n";
} }
#ifdef UNLEASHED_RECOMP
if (hasMtxProjection)
out += "\t}\n";
#endif
out += "}"; out += "}";
} }

View file

@ -32,7 +32,6 @@ struct ShaderRecompiler : StringBuffer
std::unordered_map<uint32_t, const char*> boolConstants; std::unordered_map<uint32_t, const char*> boolConstants;
std::unordered_map<uint32_t, const char*> samplers; std::unordered_map<uint32_t, const char*> samplers;
std::unordered_map<uint32_t, uint32_t> ifEndLabels; std::unordered_map<uint32_t, uint32_t> ifEndLabels;
uint32_t specConstantsMask = 0;
#ifdef UNLEASHED_RECOMP #ifdef UNLEASHED_RECOMP
bool hasMtxProjection = false; bool hasMtxProjection = false;