mirror of
https://github.com/hedge-dev/XenosRecomp.git
synced 2025-12-18 14:02:37 +00:00
Refactor SPIRV constant usage to allow for better driver optimization.
This commit is contained in:
parent
85d1948655
commit
25b4f604af
2 changed files with 118 additions and 79 deletions
|
|
@ -1,6 +1,8 @@
|
|||
#define FLT_MIN asfloat(0xff7fffff)
|
||||
#define FLT_MAX asfloat(0x7f7fffff)
|
||||
|
||||
#define INPUT_LAYOUT_FLAG_HAS_R11G11B10_NORMAL (1 << 0)
|
||||
|
||||
#ifdef __spirv__
|
||||
|
||||
struct PushConstants
|
||||
|
|
@ -12,32 +14,25 @@ struct PushConstants
|
|||
|
||||
[[vk::push_constant]] ConstantBuffer<PushConstants> g_PushConstants;
|
||||
|
||||
#define CONSTANT_BUFFER(NAME, REGISTER) struct NAME
|
||||
#define PACK_OFFSET(REGISTER)
|
||||
|
||||
#define GET_CONSTANT(NAME) constants.NAME
|
||||
#define GET_SHARED_CONSTANT(NAME) sharedConstants.NAME
|
||||
#define g_AlphaTestMode vk::RawBufferLoad<uint>(g_PushConstants.SharedConstants + 128)
|
||||
#define g_AlphaThreshold vk::RawBufferLoad<float>(g_PushConstants.SharedConstants + 132)
|
||||
#define g_Booleans vk::RawBufferLoad<uint>(g_PushConstants.SharedConstants + 136)
|
||||
#define g_SwappedTexcoords vk::RawBufferLoad<uint>(g_PushConstants.SharedConstants + 140)
|
||||
#define g_InputLayoutFlags vk::RawBufferLoad<uint>(g_PushConstants.SharedConstants + 144)
|
||||
#define g_EnableGIBicubicFiltering vk::RawBufferLoad<bool>(g_PushConstants.SharedConstants + 148)
|
||||
|
||||
#else
|
||||
|
||||
#define CONSTANT_BUFFER(NAME, REGISTER) cbuffer NAME : register(REGISTER, space4)
|
||||
#define PACK_OFFSET(REGISTER) : packoffset(REGISTER)
|
||||
|
||||
#define GET_CONSTANT(NAME) NAME
|
||||
#define GET_SHARED_CONSTANT(NAME) NAME
|
||||
#define DEFINE_SHARED_CONSTANTS() \
|
||||
uint g_AlphaTestMode : packoffset(c8.x); \
|
||||
float g_AlphaThreshold : packoffset(c8.y); \
|
||||
uint g_Booleans : packoffset(c8.z); \
|
||||
uint g_SwappedTexcoords : packoffset(c8.w); \
|
||||
uint g_InputLayoutFlags : packoffset(c9.x); \
|
||||
bool g_EnableGIBicubicFiltering : packoffset(c9.y)
|
||||
|
||||
#endif
|
||||
|
||||
#define INPUT_LAYOUT_FLAG_HAS_R11G11B10_NORMAL (1 << 0)
|
||||
|
||||
#define SHARED_CONSTANTS \
|
||||
[[vk::offset(128)]] uint g_AlphaTestMode PACK_OFFSET(c8.x); \
|
||||
[[vk::offset(132)]] float g_AlphaThreshold PACK_OFFSET(c8.y); \
|
||||
[[vk::offset(136)]] uint g_Booleans PACK_OFFSET(c8.z); \
|
||||
[[vk::offset(140)]] uint g_SwappedTexcoords PACK_OFFSET(c8.w); \
|
||||
[[vk::offset(144)]] uint g_InputLayoutFlags PACK_OFFSET(c9.x); \
|
||||
[[vk::offset(148)]] bool g_EnableGIBicubicFiltering PACK_OFFSET(c9.y)
|
||||
|
||||
Texture2D<float4> g_Texture2DDescriptorHeap[] : register(t0, space0);
|
||||
Texture3D<float4> g_Texture3DDescriptorHeap[] : register(t0, space1);
|
||||
TextureCube<float4> g_TextureCubeDescriptorHeap[] : register(t0, space2);
|
||||
|
|
|
|||
|
|
@ -176,11 +176,11 @@ void ShaderRecompiler::recompile(const VertexFetchInstruction& instr, uint32_t a
|
|||
case DeclUsage::Normal:
|
||||
case DeclUsage::Tangent:
|
||||
case DeclUsage::Binormal:
|
||||
print("tfetchR11G11B10(GET_SHARED_CONSTANT(g_InputLayoutFlags), ");
|
||||
print("tfetchR11G11B10(g_InputLayoutFlags, ");
|
||||
break;
|
||||
|
||||
case DeclUsage::TexCoord:
|
||||
print("tfetchTexcoord(GET_SHARED_CONSTANT(g_SwappedTexcoords), ");
|
||||
print("tfetchTexcoord(g_SwappedTexcoords, ");
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
@ -254,7 +254,7 @@ void ShaderRecompiler::recompile(const TextureFetchInstruction& instr, bool bicu
|
|||
if (instr.constIndex == 0 && instr.dimension == TextureDimension::Texture2D)
|
||||
{
|
||||
indent();
|
||||
print("pixelCoord = getPixelCoord(GET_SHARED_CONSTANT({}_ResourceDescriptorIndex), ", constNamePtr);
|
||||
print("pixelCoord = getPixelCoord({}_ResourceDescriptorIndex, ", constNamePtr);
|
||||
printSrcRegister(2);
|
||||
out += ");\n";
|
||||
}
|
||||
|
|
@ -298,7 +298,7 @@ void ShaderRecompiler::recompile(const TextureFetchInstruction& instr, bool bicu
|
|||
if (bicubic)
|
||||
out += "Bicubic";
|
||||
|
||||
print("(GET_SHARED_CONSTANT({0}_ResourceDescriptorIndex), GET_SHARED_CONSTANT({0}_SamplerDescriptorIndex), ", constNamePtr);
|
||||
print("({0}_ResourceDescriptorIndex, {0}_SamplerDescriptorIndex, ", constNamePtr);
|
||||
printSrcRegister(componentCount);
|
||||
|
||||
switch (instr.dimension)
|
||||
|
|
@ -428,13 +428,13 @@ void ShaderRecompiler::recompile(const AluInstruction& instr)
|
|||
const char* constantName = reinterpret_cast<const char*>(constantTableData + findResult->second->name);
|
||||
if (findResult->second->registerCount > 1)
|
||||
{
|
||||
regFormatted = std::format("GET_CONSTANT({})[{}{}]", constantName,
|
||||
regFormatted = std::format("{}({}{})", constantName,
|
||||
reg - findResult->second->registerIndex, instr.const0Relative ? (instr.constAddressRegisterRelative ? " + a0" : " + aL") : "");
|
||||
}
|
||||
else
|
||||
{
|
||||
assert(!instr.const0Relative && !instr.const1Relative);
|
||||
regFormatted = std::format("GET_CONSTANT({})", constantName);
|
||||
regFormatted = constantName;
|
||||
}
|
||||
}
|
||||
else
|
||||
|
|
@ -1045,8 +1045,7 @@ void ShaderRecompiler::recompile(const uint8_t* shaderData)
|
|||
const auto constantTableContainer = reinterpret_cast<const ConstantTableContainer*>(shaderData + shaderContainer->constantTableOffset);
|
||||
constantTableData = reinterpret_cast<const uint8_t*>(&constantTableContainer->constantTable);
|
||||
|
||||
println("CONSTANT_BUFFER(Constants, b{})", isPixelShader ? 1 : 0);
|
||||
out += "{\n";
|
||||
out += "#ifdef __spirv__\n\n";
|
||||
|
||||
bool isMetaInstancer = false;
|
||||
bool hasIndexCount = false;
|
||||
|
|
@ -1056,35 +1055,57 @@ void ShaderRecompiler::recompile(const uint8_t* shaderData)
|
|||
const auto constantInfo = reinterpret_cast<const ConstantInfo*>(
|
||||
constantTableData + constantTableContainer->constantTable.constantInfo + i * sizeof(ConstantInfo));
|
||||
|
||||
assert(constantInfo->registerSet != RegisterSet::Int4);
|
||||
const char* constantName = reinterpret_cast<const char*>(constantTableData + constantInfo->name);
|
||||
|
||||
if (constantInfo->registerSet == RegisterSet::Float4)
|
||||
if (!isPixelShader)
|
||||
{
|
||||
const char* constantName = reinterpret_cast<const char*>(constantTableData + constantInfo->name);
|
||||
if (strcmp(constantName, "g_InstanceTypes") == 0)
|
||||
isMetaInstancer = true;
|
||||
else if (strcmp(constantName, "g_IndexCount") == 0)
|
||||
hasIndexCount = true;
|
||||
}
|
||||
|
||||
if (!isPixelShader)
|
||||
{
|
||||
if (strcmp(constantName, "g_InstanceTypes") == 0)
|
||||
isMetaInstancer = true;
|
||||
else if (strcmp(constantName, "g_IndexCount") == 0)
|
||||
hasIndexCount = true;
|
||||
}
|
||||
|
||||
print("\t[[vk::offset({})]] float4 {}", constantInfo->registerIndex * 16, constantName);
|
||||
switch (constantInfo->registerSet)
|
||||
{
|
||||
case RegisterSet::Float4:
|
||||
{
|
||||
const char* shaderName = isPixelShader ? "Pixel" : "Vertex";
|
||||
|
||||
if (constantInfo->registerCount > 1)
|
||||
print("[{}]", constantInfo->registerCount.get());
|
||||
|
||||
println(" PACK_OFFSET(c{});", constantInfo->registerIndex.get());
|
||||
|
||||
{
|
||||
println("#define {}(INDEX) vk::RawBufferLoad<float4>(g_PushConstants.{}ShaderConstants + ({} + INDEX) * 16, 0x10)",
|
||||
constantName, shaderName, constantInfo->registerIndex.get());
|
||||
}
|
||||
else
|
||||
{
|
||||
println("#define {} vk::RawBufferLoad<float4>(g_PushConstants.{}ShaderConstants + {}, 0x10)",
|
||||
constantName, shaderName, constantInfo->registerIndex * 16);
|
||||
}
|
||||
|
||||
for (uint16_t j = 0; j < constantInfo->registerCount; j++)
|
||||
float4Constants.emplace(constantInfo->registerIndex + j, constantInfo);
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
case RegisterSet::Sampler:
|
||||
{
|
||||
println("#define {}_ResourceDescriptorIndex vk::RawBufferLoad<uint>(g_PushConstants.SharedConstants + {})",
|
||||
constantName, constantInfo->registerIndex * 4);
|
||||
|
||||
println("#define {}_SamplerDescriptorIndex vk::RawBufferLoad<uint>(g_PushConstants.SharedConstants + {})",
|
||||
constantName, 64 + constantInfo->registerIndex * 4);
|
||||
|
||||
samplers.emplace(constantInfo->registerIndex, constantName);
|
||||
break;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
out += "};\n\n";
|
||||
out += "\n#else\n\n";
|
||||
|
||||
out += "CONSTANT_BUFFER(SharedConstants, b2)\n";
|
||||
println("cbuffer {}ShaderConstants : register(b{}, space4)", isPixelShader ? "Pixel" : "Vertex", isPixelShader ? 1 : 0);
|
||||
out += "{\n";
|
||||
|
||||
for (uint32_t i = 0; i < constantTableContainer->constantTable.constants; i++)
|
||||
|
|
@ -1092,36 +1113,64 @@ void ShaderRecompiler::recompile(const uint8_t* shaderData)
|
|||
const auto constantInfo = reinterpret_cast<const ConstantInfo*>(
|
||||
constantTableData + constantTableContainer->constantTable.constantInfo + i * sizeof(ConstantInfo));
|
||||
|
||||
const char* constantName = reinterpret_cast<const char*>(constantTableData + constantInfo->name);
|
||||
|
||||
assert(constantInfo->registerSet != RegisterSet::Int4);
|
||||
|
||||
switch (constantInfo->registerSet)
|
||||
if (constantInfo->registerSet == RegisterSet::Float4)
|
||||
{
|
||||
case RegisterSet::Bool:
|
||||
{
|
||||
println("#define {} (1 << {})", constantName, constantInfo->registerIndex + (isPixelShader ? 16 : 0));
|
||||
boolConstants.emplace(constantInfo->registerIndex, constantName);
|
||||
break;
|
||||
}
|
||||
const char* constantName = reinterpret_cast<const char*>(constantTableData + constantInfo->name);
|
||||
|
||||
case RegisterSet::Sampler:
|
||||
{
|
||||
println("\t[[vk::offset({})]] uint {}_ResourceDescriptorIndex PACK_OFFSET(c{}.{});",
|
||||
constantInfo->registerIndex * 4, constantName, constantInfo->registerIndex / 4, SWIZZLES[constantInfo->registerIndex % 4]);
|
||||
print("\tfloat4 {}", constantName);
|
||||
|
||||
println("\t[[vk::offset({})]] uint {}_SamplerDescriptorIndex PACK_OFFSET(c{}.{});",
|
||||
64 + constantInfo->registerIndex * 4, constantName, 4 + constantInfo->registerIndex / 4, SWIZZLES[constantInfo->registerIndex % 4]);
|
||||
if (constantInfo->registerCount > 1)
|
||||
print("[{}]", constantInfo->registerCount.get());
|
||||
|
||||
samplers.emplace(constantInfo->registerIndex, constantName);
|
||||
break;
|
||||
}
|
||||
println(" : packoffset(c{});", constantInfo->registerIndex.get());
|
||||
|
||||
if (constantInfo->registerCount > 1)
|
||||
println("#define {0}(INDEX) {0}[INDEX]", constantName);
|
||||
}
|
||||
}
|
||||
|
||||
out += "\tSHARED_CONSTANTS;\n";
|
||||
out += "};\n\n";
|
||||
|
||||
out += "cbuffer SharedConstants : register(b2, space4)\n";
|
||||
out += "{\n";
|
||||
|
||||
for (uint32_t i = 0; i < constantTableContainer->constantTable.constants; i++)
|
||||
{
|
||||
const auto constantInfo = reinterpret_cast<const ConstantInfo*>(
|
||||
constantTableData + constantTableContainer->constantTable.constantInfo + i * sizeof(ConstantInfo));
|
||||
|
||||
if (constantInfo->registerSet == RegisterSet::Sampler)
|
||||
{
|
||||
const char* constantName = reinterpret_cast<const char*>(constantTableData + constantInfo->name);
|
||||
|
||||
println("\tuint {}_ResourceDescriptorIndex : packoffset(c{}.{});",
|
||||
constantName, constantInfo->registerIndex / 4, SWIZZLES[constantInfo->registerIndex % 4]);
|
||||
|
||||
println("\tuint {}_SamplerDescriptorIndex : packoffset(c{}.{});",
|
||||
constantName, 4 + constantInfo->registerIndex / 4, SWIZZLES[constantInfo->registerIndex % 4]);
|
||||
}
|
||||
}
|
||||
|
||||
out += "\tDEFINE_SHARED_CONSTANTS();\n";
|
||||
out += "};\n\n";
|
||||
|
||||
out += "#endif\n";
|
||||
|
||||
for (uint32_t i = 0; i < constantTableContainer->constantTable.constants; i++)
|
||||
{
|
||||
const auto constantInfo = reinterpret_cast<const ConstantInfo*>(
|
||||
constantTableData + constantTableContainer->constantTable.constantInfo + i * sizeof(ConstantInfo));
|
||||
|
||||
if (constantInfo->registerSet == RegisterSet::Bool)
|
||||
{
|
||||
const char* constantName = reinterpret_cast<const char*>(constantTableData + constantInfo->name);
|
||||
println("\t#define {} (1 << {})", constantName, constantInfo->registerIndex + (isPixelShader ? 16 : 0));
|
||||
boolConstants.emplace(constantInfo->registerIndex, constantName);
|
||||
}
|
||||
}
|
||||
|
||||
out += '\n';
|
||||
|
||||
const auto shader = reinterpret_cast<const Shader*>(shaderData + shaderContainer->shaderOffset);
|
||||
|
||||
out += "void main(\n";
|
||||
|
|
@ -1195,11 +1244,6 @@ void ShaderRecompiler::recompile(const uint8_t* shaderData)
|
|||
out += ")\n";
|
||||
out += "{\n";
|
||||
|
||||
out += "#ifdef __spirv__\n";
|
||||
println("\tConstants constants = vk::RawBufferLoad<Constants>(g_PushConstants.{}ShaderConstants, 0x100);", isPixelShader ? "Pixel" : "Vertex");
|
||||
out += "\tSharedConstants sharedConstants = vk::RawBufferLoad<SharedConstants>(g_PushConstants.SharedConstants, 0x100);\n";
|
||||
out += "#endif\n\n";
|
||||
|
||||
if (shaderContainer->definitionTableOffset != NULL)
|
||||
{
|
||||
auto definitionTable = reinterpret_cast<const DefinitionTable*>(shaderData + shaderContainer->definitionTableOffset);
|
||||
|
|
@ -1293,7 +1337,7 @@ void ShaderRecompiler::recompile(const uint8_t* shaderData)
|
|||
}
|
||||
else if (!isPixelShader && hasIndexCount && i == 0)
|
||||
{
|
||||
out += "float4(iVertexId + GET_CONSTANT(g_IndexCount).x * iInstanceId, 0.0, 0.0, 0.0);\n";
|
||||
out += "float4(iVertexId + g_IndexCount.x * iInstanceId, 0.0, 0.0, 0.0);\n";
|
||||
}
|
||||
else
|
||||
{
|
||||
|
|
@ -1514,7 +1558,7 @@ void ShaderRecompiler::recompile(const uint8_t* shaderData)
|
|||
{
|
||||
auto findResult = boolConstants.find(cfInstr.condJmp.boolAddress);
|
||||
if (findResult != boolConstants.end())
|
||||
println("if ((GET_SHARED_CONSTANT(g_Booleans) & {}) {}= 0)", findResult->second, cfInstr.condJmp.condition ^ simpleControlFlow ? "!" : "=");
|
||||
println("if ((g_Booleans & {}) {}= 0)", findResult->second, cfInstr.condJmp.condition ^ simpleControlFlow ? "!" : "=");
|
||||
else
|
||||
println("if (b{} {}= 0)", uint32_t(cfInstr.condJmp.boolAddress), cfInstr.condJmp.condition ^ simpleControlFlow ? "!" : "=");
|
||||
}
|
||||
|
|
@ -1569,7 +1613,7 @@ void ShaderRecompiler::recompile(const uint8_t* shaderData)
|
|||
if (textureFetch.constIndex == 10) // g_GISampler
|
||||
{
|
||||
indent();
|
||||
out += "[branch] if (GET_SHARED_CONSTANT(g_EnableGIBicubicFiltering))";
|
||||
out += "[branch] if (g_EnableGIBicubicFiltering)";
|
||||
indent();
|
||||
out += '{';
|
||||
|
||||
|
|
@ -1611,24 +1655,24 @@ void ShaderRecompiler::recompile(const uint8_t* shaderData)
|
|||
if (isPixelShader)
|
||||
{
|
||||
indent();
|
||||
out += "[branch] if (GET_SHARED_CONSTANT(g_AlphaTestMode) == 1)";
|
||||
out += "[branch] if (g_AlphaTestMode == 1)";
|
||||
indent();
|
||||
out += '{';
|
||||
|
||||
indent();
|
||||
out += "\tclip(oC0.w - GET_SHARED_CONSTANT(g_AlphaThreshold));\n";
|
||||
out += "\tclip(oC0.w - g_AlphaThreshold);\n";
|
||||
|
||||
indent();
|
||||
out += "}";
|
||||
indent();
|
||||
out += "else if (GET_SHARED_CONSTANT(g_AlphaTestMode) == 2)";
|
||||
out += "else if (g_AlphaTestMode == 2)";
|
||||
indent();
|
||||
out += '{';
|
||||
|
||||
indent();
|
||||
out += "\toC0.w *= 1.0 + computeMipLevel(pixelCoord) * 0.25;\n";
|
||||
indent();
|
||||
out += "\toC0.w = 0.5 + (oC0.w - GET_SHARED_CONSTANT(g_AlphaThreshold)) / max(fwidth(oC0.w), 1e-6);\n";
|
||||
out += "\toC0.w = 0.5 + (oC0.w - g_AlphaThreshold) / max(fwidth(oC0.w), 1e-6);\n";
|
||||
|
||||
indent();
|
||||
out += '}';
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue