#include "shader_recompiler.h" #include "shader_common.hlsli.h" static constexpr char SWIZZLES[] = { 'x', 'y', 'z', 'w', '0', '1', '_', '_' }; static constexpr const char* USAGE_TYPES[] = { "float4", // POSITION "float4", // BLENDWEIGHT "uint4", // BLENDINDICES "uint4", // NORMAL "float4", // PSIZE "float4", // TEXCOORD "uint4", // TANGENT "uint4", // BINORMAL "float4", // TESSFACTOR "float4", // POSITIONT "float4", // COLOR "float4", // FOG "float4", // DEPTH "float4", // SAMPLE }; static constexpr const char* USAGE_VARIABLES[] = { "Position", "BlendWeight", "BlendIndices", "Normal", "PointSize", "TexCoord", "Tangent", "Binormal", "TessFactor", "PositionT", "Color", "Fog", "Depth", "Sample" }; static constexpr const char* USAGE_SEMANTICS[] = { "POSITION", "BLENDWEIGHT", "BLENDINDICES", "NORMAL", "PSIZE", "TEXCOORD", "TANGENT", "BINORMAL", "TESSFACTOR", "POSITIONT", "COLOR", "FOG", "DEPTH", "SAMPLE" }; struct DeclUsageLocation { DeclUsage usage; uint32_t usageIndex; uint32_t location; }; static constexpr DeclUsageLocation USAGE_LOCATIONS[] = { { DeclUsage::Position, 0, 0 }, { DeclUsage::Normal, 0, 1 }, { DeclUsage::Tangent, 0, 2 }, { DeclUsage::Binormal, 0, 3 }, { DeclUsage::TexCoord, 0, 4 }, { DeclUsage::TexCoord, 1, 5 }, { DeclUsage::TexCoord, 2, 6 }, { DeclUsage::TexCoord, 3, 7 }, { DeclUsage::Color, 0, 8 }, { DeclUsage::BlendIndices, 0, 9 }, { DeclUsage::BlendWeight, 0, 10 }, { DeclUsage::Color, 1, 11 }, { DeclUsage::TexCoord, 4, 12 }, { DeclUsage::TexCoord, 5, 13 }, { DeclUsage::TexCoord, 6, 14 }, { DeclUsage::TexCoord, 7, 15 }, { DeclUsage::Position, 1, 15 }, }; static constexpr std::pair INTERPOLATORS[] = { { DeclUsage::TexCoord, 0 }, { DeclUsage::TexCoord, 1 }, { DeclUsage::TexCoord, 2 }, { DeclUsage::TexCoord, 3 }, { DeclUsage::TexCoord, 4 }, { DeclUsage::TexCoord, 5 }, { DeclUsage::TexCoord, 6 }, { DeclUsage::TexCoord, 7 }, { DeclUsage::TexCoord, 8 }, { DeclUsage::TexCoord, 9 }, { DeclUsage::TexCoord, 10 }, { DeclUsage::TexCoord, 11 }, { DeclUsage::TexCoord, 12 }, { DeclUsage::TexCoord, 13 }, { DeclUsage::TexCoord, 14 }, { DeclUsage::TexCoord, 15 }, { DeclUsage::Color, 0 }, { DeclUsage::Color, 1 } }; static FetchDestinationSwizzle getDestSwizzle(uint32_t dstSwizzle, uint32_t index) { return FetchDestinationSwizzle((dstSwizzle >> (index * 3)) & 0x7); } void ShaderRecompiler::printDstSwizzle(uint32_t dstSwizzle, bool operand) { for (size_t i = 0; i < 4; i++) { const auto swizzle = getDestSwizzle(dstSwizzle, i); if (swizzle >= FetchDestinationSwizzle::X && swizzle <= FetchDestinationSwizzle::W) out += SWIZZLES[operand ? uint32_t(swizzle) : i]; } } void ShaderRecompiler::printDstSwizzle01(uint32_t dstRegister, uint32_t dstSwizzle) { for (size_t i = 0; i < 4; i++) { const auto swizzle = getDestSwizzle(dstSwizzle, i); if (swizzle == FetchDestinationSwizzle::Zero) { indent(); println("r{}.{} = 0.0;", dstRegister, SWIZZLES[i]); } else if (swizzle == FetchDestinationSwizzle::One) { indent(); println("r{}.{} = 1.0;", dstRegister, SWIZZLES[i]); } } } void ShaderRecompiler::recompile(const VertexFetchInstruction& instr, uint32_t address) { if (instr.isPredicated) { indent(); println("if ({}p0)", instr.predicateCondition ? "" : "!"); indent(); out += "{\n"; ++indentation; } indent(); print("r{}.", instr.dstRegister); printDstSwizzle(instr.dstSwizzle, false); out += " = "; auto findResult = vertexElements.find(address); assert(findResult != vertexElements.end()); switch (findResult->second.usage) { case DeclUsage::Normal: case DeclUsage::Tangent: case DeclUsage::Binormal: print("tfetchR11G11B10(GET_SHARED_CONSTANT(g_InputLayoutFlags), "); break; case DeclUsage::TexCoord: print("tfetchTexcoord(GET_SHARED_CONSTANT(g_SwappedTexcoords), "); break; } print("i{}{}", USAGE_VARIABLES[uint32_t(findResult->second.usage)], uint32_t(findResult->second.usageIndex)); switch (findResult->second.usage) { case DeclUsage::Normal: case DeclUsage::Tangent: case DeclUsage::Binormal: out += ')'; break; case DeclUsage::TexCoord: print(", {})", uint32_t(findResult->second.usageIndex)); break; } out += '.'; printDstSwizzle(instr.dstSwizzle, true); out += ";\n"; printDstSwizzle01(instr.dstRegister, instr.dstSwizzle); if (instr.isPredicated) { --indentation; indent(); out += "}\n"; } } void ShaderRecompiler::recompile(const TextureFetchInstruction& instr) { if (instr.opcode != FetchOpcode::TextureFetch) return; if (instr.isPredicated) { indent(); println("if ({}p0)", instr.predCondition ? "" : "!"); indent(); out += "{\n"; ++indentation; } indent(); print("r{}.", instr.dstRegister); printDstSwizzle(instr.dstSwizzle, false); out += " = tfetch"; uint32_t componentCount = 0; switch (instr.dimension) { case TextureDimension::Texture1D: out += "1D("; componentCount = 1; break; case TextureDimension::Texture2D: out += "2D("; componentCount = 2; break; case TextureDimension::Texture3D: out += "3D("; componentCount = 3; break; case TextureDimension::TextureCube: out += "Cube("; componentCount = 3; break; } auto findResult = samplers.find(instr.constIndex); if (findResult != samplers.end()) print("GET_SHARED_CONSTANT({}_ResourceDescriptorIndex), GET_SHARED_CONSTANT({}_SamplerDescriptorIndex)", findResult->second, findResult->second); else print("GET_SHARED_CONSTANT(s{}_ResourceDescriptorIndex), GET_SHARED_CONSTANT(s{}_SamplerDescriptorIndex)", instr.constIndex, instr.constIndex); print(", r{}.", instr.srcRegister); for (size_t i = 0; i < componentCount; i++) out += SWIZZLES[((instr.srcSwizzle >> (i * 2))) & 0x3]; if (instr.dimension == TextureDimension::TextureCube) out += ", cubeMapData"; out += ")."; printDstSwizzle(instr.dstSwizzle, true); out += ";\n"; printDstSwizzle01(instr.dstRegister, instr.dstSwizzle); if (instr.isPredicated) { --indentation; indent(); out += "}\n"; } } void ShaderRecompiler::recompile(const AluInstruction& instr) { if (instr.isPredicated) { indent(); println("if ({}p0)", instr.predicateCondition ? "" : "!"); indent(); out += "{\n"; ++indentation; } enum { VECTOR_0, VECTOR_1, VECTOR_2, SCALAR_0, SCALAR_1, SCALAR_CONSTANT_0, SCALAR_CONSTANT_1 }; auto op = [&](size_t operand) { size_t reg = 0; size_t swizzle = 0; bool select = true; bool negate = false; bool abs = false; switch (operand) { case SCALAR_CONSTANT_0: reg = instr.src3Register; swizzle = instr.src3Swizzle; select = false; negate = instr.src3Negate; abs = instr.absConstants; break; case SCALAR_CONSTANT_1: reg = (uint32_t(instr.scalarOpcode) & 1) | (instr.src3Select << 1) | (instr.src3Swizzle & 0x3C); swizzle = instr.src3Swizzle; select = true; negate = instr.src3Negate; abs = instr.absConstants; break; default: switch (operand) { case VECTOR_0: reg = instr.src1Register; swizzle = instr.src1Swizzle; select = instr.src1Select; negate = instr.src1Negate; break; case VECTOR_1: reg = instr.src2Register; swizzle = instr.src2Swizzle; select = instr.src2Select; negate = instr.src2Negate; break; case VECTOR_2: case SCALAR_0: case SCALAR_1: reg = instr.src3Register; swizzle = instr.src3Swizzle; select = instr.src3Select; negate = instr.src3Negate; break; } if (select) { abs = (reg & 0x80) != 0; reg &= 0x3F; } else { abs = instr.absConstants; } break; } std::string regFormatted; if (select) { regFormatted = std::format("r{}", reg); } else { auto findResult = float4Constants.find(reg); if (findResult != float4Constants.end()) { const char* constantName = reinterpret_cast(constantTableData + findResult->second->name); if (findResult->second->registerCount > 1) { regFormatted = std::format("GET_CONSTANT({})[{}{}]", constantName, reg - findResult->second->registerIndex, instr.const0Relative ? (instr.constAddressRegisterRelative ? " + a0" : " + aL") : ""); } else { assert(!instr.const0Relative && !instr.const1Relative); regFormatted = std::format("GET_CONSTANT({})", constantName); } } else { assert(!instr.const0Relative && !instr.const1Relative); regFormatted = std::format("c{}", reg); } } std::string result; if (negate) result += '-'; if (abs) result += "abs("; result += regFormatted; result += '.'; switch (operand) { case VECTOR_0: case VECTOR_1: case VECTOR_2: { uint32_t mask; switch (instr.vectorOpcode) { case AluVectorOpcode::Dp2Add: mask = (operand == VECTOR_2) ? 0b1 : 0b11; break; case AluVectorOpcode::Dp3: mask = 0b111; break; case AluVectorOpcode::Dp4: case AluVectorOpcode::Max4: mask = 0b1111; break; default: mask = instr.vectorWriteMask != 0 ? instr.vectorWriteMask : 0b1; break; } for (size_t i = 0; i < 4; i++) { if ((mask >> i) & 0x1) result += SWIZZLES[((swizzle >> (i * 2)) + i) & 0x3]; } break; } case SCALAR_0: case SCALAR_CONSTANT_0: result += SWIZZLES[((swizzle >> 6) + 3) & 0x3]; break; case SCALAR_1: case SCALAR_CONSTANT_1: result += SWIZZLES[swizzle & 0x3]; break; } if (abs) result += ")"; return result; }; switch (instr.vectorOpcode) { case AluVectorOpcode::KillEq: indent(); println("clip(any({} == {}) ? 1 : -1);", op(VECTOR_0), op(VECTOR_1)); break; case AluVectorOpcode::KillGt: indent(); println("clip(any({} > {}) ? 1 : -1);", op(VECTOR_0), op(VECTOR_1)); break; case AluVectorOpcode::KillGe: indent(); println("clip(any({} >= {}) ? 1 : -1);", op(VECTOR_0), op(VECTOR_1)); break; case AluVectorOpcode::KillNe: indent(); println("clip(any({} != {}) ? 1 : -1);", op(VECTOR_0), op(VECTOR_1)); break; } std::string_view exportRegister; if (instr.exportData) { if (isPixelShader) { switch (ExportRegister(instr.vectorDest)) { case ExportRegister::PSColor0: exportRegister = "oC0"; break; case ExportRegister::PSColor1: exportRegister = "oC1"; break; case ExportRegister::PSColor2: exportRegister = "oC2"; break; case ExportRegister::PSColor3: exportRegister = "oC3"; break; case ExportRegister::PSDepth: exportRegister = "oDepth"; break; } } else { switch (ExportRegister(instr.vectorDest)) { case ExportRegister::VSPosition: exportRegister = "oPos"; break; default: { auto findResult = interpolators.find(instr.vectorDest); assert(findResult != interpolators.end()); exportRegister = findResult->second; break; } } } } if (instr.vectorOpcode >= AluVectorOpcode::SetpEqPush && instr.vectorOpcode <= AluVectorOpcode::SetpGePush) { indent(); print("p0 = {} == 0.0 && {} ", op(VECTOR_0), op(VECTOR_1)); switch (instr.vectorOpcode) { case AluVectorOpcode::SetpEqPush: out += "=="; break; case AluVectorOpcode::SetpNePush: out += "!="; break; case AluVectorOpcode::SetpGtPush: out += ">"; break; case AluVectorOpcode::SetpGePush: out += ">="; break; } out += " 0.0;\n"; } else if (instr.vectorOpcode >= AluVectorOpcode::MaxA) { indent(); println("a0 = (int)clamp(floor(({}).w + 0.5), -256.0, 255.0);", op(VECTOR_0)); } uint32_t vectorWriteMask = instr.vectorWriteMask; if (instr.exportData) vectorWriteMask &= ~instr.scalarWriteMask; if (vectorWriteMask != 0) { indent(); if (!exportRegister.empty()) { out += exportRegister; out += '.'; } else { print("r{}.", instr.vectorDest); } for (size_t i = 0; i < 4; i++) { if ((vectorWriteMask >> i) & 0x1) out += SWIZZLES[i]; } out += " = "; if (instr.vectorSaturate) out += "saturate("; switch (instr.vectorOpcode) { case AluVectorOpcode::Add: print("{} + {}", op(VECTOR_0), op(VECTOR_1)); break; case AluVectorOpcode::Mul: print("{} * {}", op(VECTOR_0), op(VECTOR_1)); break; case AluVectorOpcode::Max: case AluVectorOpcode::MaxA: print("max({}, {})", op(VECTOR_0), op(VECTOR_1)); break; case AluVectorOpcode::Min: print("min({}, {})", op(VECTOR_0), op(VECTOR_1)); break; case AluVectorOpcode::Seq: print("{} == {}", op(VECTOR_0), op(VECTOR_1)); break; case AluVectorOpcode::Sgt: print("{} > {}", op(VECTOR_0), op(VECTOR_1)); break; case AluVectorOpcode::Sge: print("{} >= {}", op(VECTOR_0), op(VECTOR_1)); break; case AluVectorOpcode::Sne: print("{} != {}", op(VECTOR_0), op(VECTOR_1)); break; case AluVectorOpcode::Frc: print("frac({})", op(VECTOR_0)); break; case AluVectorOpcode::Trunc: print("trunc({})", op(VECTOR_0)); break; case AluVectorOpcode::Floor: print("floor({})", op(VECTOR_0)); break; case AluVectorOpcode::Mad: print("{} * {} + {}", op(VECTOR_0), op(VECTOR_1), op(VECTOR_2)); break; case AluVectorOpcode::CndEq: print("select({} == 0.0, {}, {})", op(VECTOR_0), op(VECTOR_1), op(VECTOR_2)); break; case AluVectorOpcode::CndGe: print("select({} >= 0.0, {}, {})", op(VECTOR_0), op(VECTOR_1), op(VECTOR_2)); break; case AluVectorOpcode::CndGt: print("select({} > 0.0, {}, {})", op(VECTOR_0), op(VECTOR_1), op(VECTOR_2)); break; case AluVectorOpcode::Dp4: case AluVectorOpcode::Dp3: print("dot({}, {})", op(VECTOR_0), op(VECTOR_1)); break; case AluVectorOpcode::Dp2Add: print("dot({}, {}) + {}", op(VECTOR_0), op(VECTOR_1), op(VECTOR_2)); break; case AluVectorOpcode::Cube: print("cube(r{}, cubeMapData)", instr.src1Register); break; case AluVectorOpcode::Max4: print("max4({})", op(VECTOR_0)); break; case AluVectorOpcode::SetpEqPush: case AluVectorOpcode::SetpNePush: case AluVectorOpcode::SetpGtPush: case AluVectorOpcode::SetpGePush: print("p0 ? 0.0 : {} + 1.0", op(VECTOR_0)); break; case AluVectorOpcode::KillEq: print("any({} == {})", op(VECTOR_0), op(VECTOR_1)); break; case AluVectorOpcode::KillGt: print("any({} > {})", op(VECTOR_0), op(VECTOR_1)); break; case AluVectorOpcode::KillGe: print("any({} >= {})", op(VECTOR_0), op(VECTOR_1)); break; case AluVectorOpcode::KillNe: print("any({} != {})", op(VECTOR_0), op(VECTOR_1)); break; case AluVectorOpcode::Dst: print("dst({}, {})", op(VECTOR_0), op(VECTOR_1)); break; } if (instr.vectorSaturate) out += ')'; out += ";\n"; } if (instr.scalarOpcode != AluScalarOpcode::RetainPrev) { if (instr.scalarOpcode >= AluScalarOpcode::SetpEq && instr.scalarOpcode <= AluScalarOpcode::SetpRstr) { indent(); out += "p0 = "; switch (instr.scalarOpcode) { case AluScalarOpcode::SetpEq: print("{} == 0.0", op(SCALAR_0)); break; case AluScalarOpcode::SetpNe: print("{} != 0.0", op(SCALAR_0)); break; case AluScalarOpcode::SetpGt: print("{} > 0.0", op(SCALAR_0)); break; case AluScalarOpcode::SetpGe: print("{} >= 0.0", op(SCALAR_0)); break; case AluScalarOpcode::SetpInv: print("{} == 1.0", op(SCALAR_0)); break; case AluScalarOpcode::SetpPop: print("{} - 1.0 <= 0.0", op(SCALAR_0)); break; case AluScalarOpcode::SetpClr: out += "false"; break; case AluScalarOpcode::SetpRstr: print("{} == 0.0", op(SCALAR_0)); break; } out += ";\n"; } indent(); out += "ps = "; if (instr.scalarSaturate) out += "saturate("; switch (instr.scalarOpcode) { case AluScalarOpcode::Adds: print("{} + {}", op(SCALAR_0), op(SCALAR_1)); break; case AluScalarOpcode::AddsPrev: print("{} + ps", op(SCALAR_0)); break; case AluScalarOpcode::Muls: print("{} * {}", op(SCALAR_0), op(SCALAR_1)); break; case AluScalarOpcode::MulsPrev: case AluScalarOpcode::MulsPrev2: print("{} * ps", op(SCALAR_0)); break; case AluScalarOpcode::Maxs: case AluScalarOpcode::MaxAs: case AluScalarOpcode::MaxAsf: print("max({}, {})", op(SCALAR_0), op(SCALAR_1)); break; case AluScalarOpcode::Mins: print("min({}, {})", op(SCALAR_0), op(SCALAR_1)); break; case AluScalarOpcode::Seqs: print("{} == 0.0", op(SCALAR_0)); break; case AluScalarOpcode::Sgts: print("{} > 0.0", op(SCALAR_0)); break; case AluScalarOpcode::Sges: print("{} >= 0.0", op(SCALAR_0)); break; case AluScalarOpcode::Snes: print("{} != 0.0", op(SCALAR_0)); break; case AluScalarOpcode::Frcs: print("frac({})", op(SCALAR_0)); break; case AluScalarOpcode::Truncs: print("trunc({})", op(SCALAR_0)); break; case AluScalarOpcode::Floors: print("floor({})", op(SCALAR_0)); break; case AluScalarOpcode::Exp: print("exp2({})", op(SCALAR_0)); break; case AluScalarOpcode::Logc: case AluScalarOpcode::Log: print("clamp(log2({}), FLT_MIN, FLT_MAX)", op(SCALAR_0)); break; case AluScalarOpcode::Rcpc: case AluScalarOpcode::Rcpf: case AluScalarOpcode::Rcp: print("clamp(rcp({}), FLT_MIN, FLT_MAX)", op(SCALAR_0)); break; case AluScalarOpcode::Rsqc: case AluScalarOpcode::Rsqf: case AluScalarOpcode::Rsq: print("clamp(rsqrt({}), FLT_MIN, FLT_MAX)", op(SCALAR_0)); break; case AluScalarOpcode::Subs: print("{} - {}", op(SCALAR_0), op(SCALAR_1)); break; case AluScalarOpcode::SubsPrev: print("{} - ps", op(SCALAR_0)); break; case AluScalarOpcode::SetpEq: case AluScalarOpcode::SetpNe: case AluScalarOpcode::SetpGt: case AluScalarOpcode::SetpGe: out += "p0 ? 0.0 : 1.0"; break; case AluScalarOpcode::SetpInv: print("{0} == 0.0 ? 1.0 : {0}", op(SCALAR_0)); break; case AluScalarOpcode::SetpPop: print("p0 ? 0.0 : ({} - 1.0)", op(SCALAR_0)); break; case AluScalarOpcode::SetpClr: out += "FLT_MAX"; break; case AluScalarOpcode::SetpRstr: print("p0 ? 0.0 : {}", op(SCALAR_0)); break; case AluScalarOpcode::KillsEq: print("{} == 0.0", op(SCALAR_0)); break; case AluScalarOpcode::KillsGt: print("{} > 0.0", op(SCALAR_0)); break; case AluScalarOpcode::KillsGe: print("{} >= 0.0", op(SCALAR_0)); break; case AluScalarOpcode::KillsNe: print("{} != 0.0", op(SCALAR_0)); break; case AluScalarOpcode::KillsOne: print("{} == 1.0", op(SCALAR_0)); break; case AluScalarOpcode::Sqrt: print("sqrt({})", op(SCALAR_0)); break; case AluScalarOpcode::Mulsc0: case AluScalarOpcode::Mulsc1: print("{} * {}", op(SCALAR_CONSTANT_0), op(SCALAR_CONSTANT_1)); break; case AluScalarOpcode::Addsc0: case AluScalarOpcode::Addsc1: print("{} + {}", op(SCALAR_CONSTANT_0), op(SCALAR_CONSTANT_1)); break; case AluScalarOpcode::Subsc0: case AluScalarOpcode::Subsc1: print("{} - {}", op(SCALAR_CONSTANT_0), op(SCALAR_CONSTANT_1)); break; case AluScalarOpcode::Sin: print("sin({})", op(SCALAR_0)); break; case AluScalarOpcode::Cos: print("cos({})", op(SCALAR_0)); break; } if (instr.scalarSaturate) out += ')'; out += ";\n"; switch (instr.scalarOpcode) { case AluScalarOpcode::MaxAs: indent(); println("a0 = (int)clamp(floor({} + 0.5), -256.0, 255.0);", op(SCALAR_0)); break; case AluScalarOpcode::MaxAsf: indent(); println("a0 = (int)clamp(floor({}), -256.0, 255.0);", op(SCALAR_0)); break; } } uint32_t scalarWriteMask = instr.scalarWriteMask; if (instr.exportData) scalarWriteMask &= ~instr.vectorWriteMask; if (scalarWriteMask != 0) { indent(); if (!exportRegister.empty()) { out += exportRegister; out += '.'; } else { print("r{}.", instr.scalarDest); } for (size_t i = 0; i < 4; i++) { if ((scalarWriteMask >> i) & 0x1) out += SWIZZLES[i]; } out += " = ps;\n"; } if (instr.exportData) { uint32_t zeroMask = instr.scalarDestRelative ? (0b1111 & ~(instr.vectorWriteMask | instr.scalarWriteMask)) : 0; uint32_t oneMask = instr.vectorWriteMask & instr.scalarWriteMask; for (size_t i = 0; i < 4; i++) { uint32_t mask = 1 << i; if (zeroMask & mask) { indent(); println("{}.{} = 0.0;", exportRegister, SWIZZLES[i]); } else if (oneMask & mask) { indent(); println("{}.{} = 1.0;", exportRegister, SWIZZLES[i]); } } } if (instr.scalarOpcode >= AluScalarOpcode::KillsEq && instr.scalarOpcode <= AluScalarOpcode::KillsOne) { indent(); out += "clip(ps != 0.0 ? 1 : -1);\n"; } if (instr.isPredicated) { --indentation; indent(); out += "}\n"; } } void ShaderRecompiler::recompile(const uint8_t* shaderData) { const auto shaderContainer = reinterpret_cast(shaderData); assert((shaderContainer->flags & 0xFFFFFF00) == 0x102A1100); assert(shaderContainer->constantTableOffset != NULL); out += std::string_view(SHADER_COMMON_HLSLI, SHADER_COMMON_HLSLI_SIZE); isPixelShader = (shaderContainer->flags & 0x1) == 0; const auto constantTableContainer = reinterpret_cast(shaderData + shaderContainer->constantTableOffset); constantTableData = reinterpret_cast(&constantTableContainer->constantTable); println("CONSTANT_BUFFER(Constants, b{})", isPixelShader ? 1 : 0); out += "{\n"; bool isMetaInstancer = false; bool hasIndexCount = false; bool isCsdShader = false; for (uint32_t i = 0; i < constantTableContainer->constantTable.constants; i++) { const auto constantInfo = reinterpret_cast( constantTableData + constantTableContainer->constantTable.constantInfo + i * sizeof(ConstantInfo)); assert(constantInfo->registerSet != RegisterSet::Int4); if (constantInfo->registerSet == RegisterSet::Float4) { const char* constantName = reinterpret_cast(constantTableData + constantInfo->name); if (!isPixelShader) { if (strcmp(constantName, "g_InstanceTypes") == 0) isMetaInstancer = true; else if (strcmp(constantName, "g_IndexCount") == 0) hasIndexCount = true; else if (strcmp(constantName, "g_Z") == 0) isCsdShader = true; } print("\t[[vk::offset({})]] float4 {}", constantInfo->registerIndex * 16, constantName); if (constantInfo->registerCount > 1) print("[{}]", constantInfo->registerCount.get()); println(" PACK_OFFSET(c{});", constantInfo->registerIndex.get()); for (uint16_t j = 0; j < constantInfo->registerCount; j++) float4Constants.emplace(constantInfo->registerIndex + j, constantInfo); } } out += "};\n\n"; out += "CONSTANT_BUFFER(SharedConstants, b2)\n"; out += "{\n"; for (uint32_t i = 0; i < constantTableContainer->constantTable.constants; i++) { const auto constantInfo = reinterpret_cast( constantTableData + constantTableContainer->constantTable.constantInfo + i * sizeof(ConstantInfo)); const char* constantName = reinterpret_cast(constantTableData + constantInfo->name); assert(constantInfo->registerSet != RegisterSet::Int4); switch (constantInfo->registerSet) { case RegisterSet::Bool: { println("#define {} (1 << {})", constantName, constantInfo->registerIndex + (isPixelShader ? 16 : 0)); boolConstants.emplace(constantInfo->registerIndex, constantName); break; } case RegisterSet::Sampler: { println("\t[[vk::offset({})]] uint {}_ResourceDescriptorIndex PACK_OFFSET(c{}.{});", constantInfo->registerIndex * 4, constantName, constantInfo->registerIndex / 4, SWIZZLES[constantInfo->registerIndex % 4]); println("\t[[vk::offset({})]] uint {}_SamplerDescriptorIndex PACK_OFFSET(c{}.{});", 64 + constantInfo->registerIndex * 4, constantName, 4 + constantInfo->registerIndex / 4, SWIZZLES[constantInfo->registerIndex % 4]); samplers.emplace(constantInfo->registerIndex, constantName); break; } } } out += "\tSHARED_CONSTANTS;\n"; out += "};\n\n"; const auto shader = reinterpret_cast(shaderData + shaderContainer->shaderOffset); out += "void main(\n"; if (isPixelShader) { out += "\tin float4 iPos : SV_Position,\n"; for (auto& [usage, usageIndex] : INTERPOLATORS) println("\tin float4 i{0}{1} : {2}{1},", USAGE_VARIABLES[uint32_t(usage)], usageIndex, USAGE_SEMANTICS[uint32_t(usage)]); out += "\tin bool iFace : SV_IsFrontFace"; auto pixelShader = reinterpret_cast(shader); if (pixelShader->outputs & PIXEL_SHADER_OUTPUT_COLOR0) out += ",\n\tout float4 oC0 : SV_Target0"; if (pixelShader->outputs & PIXEL_SHADER_OUTPUT_COLOR1) out += ",\n\tout float4 oC1 : SV_Target1"; if (pixelShader->outputs & PIXEL_SHADER_OUTPUT_COLOR2) out += ",\n\tout float4 oC2 : SV_Target2"; if (pixelShader->outputs & PIXEL_SHADER_OUTPUT_COLOR3) out += ",\n\tout float4 oC3 : SV_Target3"; if (pixelShader->outputs & PIXEL_SHADER_OUTPUT_DEPTH) out += ",\n\tout float oDepth : SV_Depth"; } else { auto vertexShader = reinterpret_cast(shader); for (uint32_t i = 0; i < vertexShader->vertexElementCount; i++) { union { VertexElement vertexElement; uint32_t value; }; value = vertexShader->vertexElementsAndInterpolators[vertexShader->field18 + i]; const char* usageType = USAGE_TYPES[uint32_t(vertexElement.usage)]; if (isMetaInstancer && vertexElement.usage == DeclUsage::TexCoord && vertexElement.usageIndex == 2) usageType = "uint4"; out += '\t'; for (auto& usageLocation : USAGE_LOCATIONS) { if (usageLocation.usage == vertexElement.usage && usageLocation.usageIndex == vertexElement.usageIndex) { print("[[vk::location({})]] ", usageLocation.location); break; } } println("in {0} i{1}{2} : {3}{2},", usageType, USAGE_VARIABLES[uint32_t(vertexElement.usage)], uint32_t(vertexElement.usageIndex), USAGE_SEMANTICS[uint32_t(vertexElement.usage)]); vertexElements.emplace(uint32_t(vertexElement.address), vertexElement); } if (hasIndexCount) { out += "\tin uint iVertexId : SV_VertexID,\n"; out += "\tin uint iInstanceId : SV_InstanceID,\n"; } out += "\tout float4 oPos : SV_Position"; for (auto& [usage, usageIndex] : INTERPOLATORS) print(",\n\tout float4 o{0}{1} : {2}{1}", USAGE_VARIABLES[uint32_t(usage)], usageIndex, USAGE_SEMANTICS[uint32_t(usage)]); } out += ")\n"; out += "{\n"; out += "#ifdef __spirv__\n"; println("\tConstants constants = vk::RawBufferLoad(g_PushConstants.{}ShaderConstants, 0x100);", isPixelShader ? "Pixel" : "Vertex"); out += "\tSharedConstants sharedConstants = vk::RawBufferLoad(g_PushConstants.SharedConstants, 0x100);\n"; out += "#endif\n\n"; if (shaderContainer->definitionTableOffset != NULL) { auto definitionTable = reinterpret_cast(shaderData + shaderContainer->definitionTableOffset); auto definitions = definitionTable->definitions; while (*definitions != 0) { auto definition = reinterpret_cast(definitions); auto value = reinterpret_cast*>(shaderData + shaderContainer->virtualSize + definition->physicalOffset); for (uint16_t i = 0; i < (definition->count + 3) / 4; i++) { println("\tfloat4 c{} = asfloat(uint4(0x{:X}, 0x{:X}, 0x{:X}, 0x{:X}));", definition->registerIndex + i - (isPixelShader ? 256 : 0), value[0].get(), value[1].get(), value[2].get(), value[3].get()); value += 4; } definitions += 2; } ++definitions; while (*definitions != 0) { auto definition = reinterpret_cast(definitions); for (uint16_t i = 0; i < definition->count; i++) { union { uint32_t value; struct { int8_t x; int8_t y; int8_t z; int8_t w; }; }; value = definition->values[i].get(); println("\tint4 i{} = int4({}, {}, {}, {});", (definition->registerIndex - 8992) / 4 + i, x, y, z, w); } definitions += 2; definitions += definition->count; } out += "\n"; } bool printedRegisters[32]{}; uint32_t interpolatorCount = (shader->interpolatorInfo >> 5) & 0x1F; for (uint32_t i = 0; i < interpolatorCount; i++) { union { Interpolator interpolator; uint32_t value; }; if (isPixelShader) { value = reinterpret_cast(shader)->interpolators[i]; println("\tfloat4 r{} = i{}{};", uint32_t(interpolator.reg), USAGE_VARIABLES[uint32_t(interpolator.usage)], uint32_t(interpolator.usageIndex)); printedRegisters[interpolator.reg] = true; } else { auto vertexShader = reinterpret_cast(shader); value = vertexShader->vertexElementsAndInterpolators[vertexShader->field18 + vertexShader->vertexElementCount + i]; interpolators.emplace(i, std::format("o{}{}", USAGE_VARIABLES[uint32_t(interpolator.usage)], uint32_t(interpolator.usageIndex))); } } if (!isPixelShader) { out += "\toPos = 0.0;\n"; for (auto& [usage, usageIndex] : INTERPOLATORS) println("\to{}{} = 0.0;", USAGE_VARIABLES[uint32_t(usage)], usageIndex); out += "\n"; } for (size_t i = 0; i < 32; i++) { if (!printedRegisters[i]) { print("\tfloat4 r{} = ", i); if (isPixelShader && i == ((shader->fieldC >> 8) & 0xFF)) { out += "float4((iPos.xy - 0.5) * float2(iFace ? 1.0 : -1.0, 1.0), 0.0, 0.0);\n"; } else if (!isPixelShader && hasIndexCount && i == 0) { out += "float4(iVertexId + GET_CONSTANT(g_IndexCount).x * iInstanceId, 0.0, 0.0, 0.0);\n"; } else { out += "0.0;\n"; } } } out += "\tint a0 = 0;\n"; out += "\tint aL = 0;\n"; out += "\tbool p0 = false;\n"; out += "\tfloat ps = 0.0;\n"; if (isPixelShader) out += "\tCubeMapData cubeMapData = (CubeMapData)0;\n"; const be* code = reinterpret_cast*>(shaderData + shaderContainer->virtualSize + shader->physicalOffset); union { ControlFlowInstruction controlFlow[2]; struct { uint32_t code0; uint32_t code1; uint32_t code2; uint32_t code3; }; }; auto controlFlowCode = code; uint32_t instrAddress = 0; uint32_t instrSize = shader->size; bool simpleControlFlow = true; while (instrAddress < instrSize) { code0 = controlFlowCode[0]; code1 = controlFlowCode[1] & 0xFFFF; code2 = (controlFlowCode[1] >> 16) | (controlFlowCode[2] << 16); code3 = controlFlowCode[2] >> 16; for (auto& cfInstr : controlFlow) { uint32_t address = 0; switch (cfInstr.opcode) { case ControlFlowOpcode::Exec: case ControlFlowOpcode::ExecEnd: address = cfInstr.exec.address; break; case ControlFlowOpcode::CondExec: case ControlFlowOpcode::CondExecEnd: case ControlFlowOpcode::CondExecPredClean: case ControlFlowOpcode::CondExecPredCleanEnd: address = cfInstr.condExec.address; break; case ControlFlowOpcode::CondExecPred: case ControlFlowOpcode::CondExecPredEnd: address = cfInstr.condExecPred.address; break; case ControlFlowOpcode::CondJmp: { if (cfInstr.condJmp.isUnconditional || cfInstr.condJmp.direction) simpleControlFlow = false; else ++ifEndLabels[cfInstr.condJmp.address]; break; } } if (address != 0) instrSize = std::min(instrSize, address * 12); } controlFlowCode += 3; instrAddress += 12; } if (simpleControlFlow) { out += '\n'; indentation = 1; } else { out += "\n\tuint pc = 0;\n"; out += "\twhile (true)\n"; out += "\t{\n"; out += "\t\tswitch (pc)\n"; out += "\t\t{\n"; } controlFlowCode = code; instrAddress = 0; uint32_t pc = 0; while (instrAddress < instrSize) { code0 = controlFlowCode[0]; code1 = controlFlowCode[1] & 0xFFFF; code2 = (controlFlowCode[1] >> 16) | (controlFlowCode[2] << 16); code3 = controlFlowCode[2] >> 16; for (auto& cfInstr : controlFlow) { if (!simpleControlFlow) { indentation = 3; println("\t\tcase {}:", pc); } else { auto findResult = ifEndLabels.find(pc); if (findResult != ifEndLabels.end()) { for (uint32_t i = 0; i < findResult->second; i++) { --indentation; indent(); out += "}\n"; } } } ++pc; uint32_t address = 0; uint32_t count = 0; uint32_t sequence = 0; bool shouldReturn = false; bool shouldCloseCurlyBracket = false; switch (cfInstr.opcode) { case ControlFlowOpcode::Exec: case ControlFlowOpcode::ExecEnd: address = cfInstr.exec.address; count = cfInstr.exec.count; sequence = cfInstr.exec.sequence; shouldReturn = (cfInstr.opcode == ControlFlowOpcode::ExecEnd); break; case ControlFlowOpcode::CondExec: case ControlFlowOpcode::CondExecEnd: case ControlFlowOpcode::CondExecPredClean: case ControlFlowOpcode::CondExecPredCleanEnd: address = cfInstr.condExec.address; count = cfInstr.condExec.count; sequence = cfInstr.condExec.sequence; shouldReturn = (cfInstr.opcode == ControlFlowOpcode::CondExecEnd || cfInstr.opcode == ControlFlowOpcode::CondExecEnd); break; case ControlFlowOpcode::CondExecPred: case ControlFlowOpcode::CondExecPredEnd: address = cfInstr.condExecPred.address; count = cfInstr.condExecPred.count; sequence = cfInstr.condExecPred.sequence; shouldReturn = (cfInstr.opcode == ControlFlowOpcode::CondExecPredEnd); break; case ControlFlowOpcode::LoopStart: if (simpleControlFlow) { indent(); println("for (aL = 0; aL < i{}.x; aL++)", uint32_t(cfInstr.loopStart.loopId)); indent(); out += "{\n"; ++indentation; } else { out += "\t\t\taL = 0;\n"; } break; case ControlFlowOpcode::LoopEnd: if (simpleControlFlow) { --indentation; indent(); out += "}\n"; } else { out += "\t\t\t++aL;\n"; println("\t\t\tif (aL < i{}.x)", uint32_t(cfInstr.loopEnd.loopId)); out += "\t\t\t{\n"; println("\t\t\t\tpc = {};", uint32_t(cfInstr.loopEnd.address)); out += "\t\t\t\tcontinue;\n"; out += "\t\t\t}\n"; } break; case ControlFlowOpcode::CondJmp: { if (cfInstr.condJmp.isUnconditional) { assert(!simpleControlFlow); println("\t\t\tpc = {};", uint32_t(cfInstr.condJmp.address)); out += "\t\t\tcontinue;\n"; } else { indent(); if (cfInstr.condJmp.isPredicated) { println("if ({}p0)", cfInstr.condJmp.condition ^ simpleControlFlow ? "" : "!"); } else { auto findResult = boolConstants.find(cfInstr.condJmp.boolAddress); if (findResult != boolConstants.end()) println("if ((GET_SHARED_CONSTANT(g_Booleans) & {}) {}= 0)", findResult->second, cfInstr.condJmp.condition ^ simpleControlFlow ? "!" : "="); else println("if (b{} {}= 0)", uint32_t(cfInstr.condJmp.boolAddress), cfInstr.condJmp.condition ^ simpleControlFlow ? "!" : "="); } if (simpleControlFlow) { indent(); out += "{\n"; ++indentation; } else { out += "\t\t\t{\n"; println("\t\t\t\tpc = {};", uint32_t(cfInstr.condJmp.address)); out += "\t\t\t\tcontinue;\n"; out += "\t\t\t}\n"; } } break; } } auto instructionCode = code + address * 3; for (uint32_t i = 0; i < count; i++) { union { VertexFetchInstruction vertexFetch; TextureFetchInstruction textureFetch; AluInstruction alu; struct { uint32_t code0; uint32_t code1; uint32_t code2; }; }; code0 = instructionCode[0]; code1 = instructionCode[1]; code2 = instructionCode[2]; if ((sequence & 0x1) != 0) { if (vertexFetch.opcode == FetchOpcode::VertexFetch) recompile(vertexFetch, address + i); else recompile(textureFetch); } else { recompile(alu); } sequence >>= 2; instructionCode += 3; } if (shouldReturn) { if (isPixelShader) { indent(); out += "if (GET_SHARED_CONSTANT(g_AlphaTestMode) != 0) clip(oC0.w - GET_SHARED_CONSTANT(g_AlphaThreshold));\n"; } else if (isCsdShader) { indent(); out += "oPos.xy += float2(GET_CONSTANT(g_ViewportSize.z), -GET_CONSTANT(g_ViewportSize.w));\n"; } if (simpleControlFlow) { indent(); out += "return;\n"; } else { out += "\t\t\tbreak;\n"; } } if (shouldCloseCurlyBracket) { --indentation; indent(); out += "}\n"; } } controlFlowCode += 3; instrAddress += 12; } if (!simpleControlFlow) { out += "\t\t\tbreak;\n"; out += "\t\t}\n"; out += "\t\tbreak;\n"; out += "\t}\n"; } out += "}"; }