Add simple control flow detection.

This commit is contained in:
Skyth 2024-10-17 14:59:20 +03:00
parent 8743874ffe
commit 9ebccbae90
2 changed files with 216 additions and 103 deletions

View file

@ -1232,32 +1232,27 @@ void ShaderRecompiler::recompile(const uint8_t* shaderData)
if (isPixelShader) if (isPixelShader)
out += "\tCubeMapData cubeMapData = (CubeMapData)0;\n"; out += "\tCubeMapData cubeMapData = (CubeMapData)0;\n";
out += "\n\tuint pc = 0;\n";
out += "\twhile (true)\n";
out += "\t{\n";
out += "\t\tswitch (pc)\n";
out += "\t\t{\n";
const be<uint32_t>* code = reinterpret_cast<const be<uint32_t>*>(shaderData + shaderContainer->virtualSize + shader->physicalOffset); const be<uint32_t>* code = reinterpret_cast<const be<uint32_t>*>(shaderData + shaderContainer->virtualSize + shader->physicalOffset);
auto controlFlowCode = code; union
uint32_t pc = 0;
uint32_t minInstrAddress = shader->size;
while (pc * 6 < minInstrAddress)
{ {
union ControlFlowInstruction controlFlow[2];
struct
{ {
ControlFlowInstruction controlFlow[2]; uint32_t code0;
struct uint32_t code1;
{ uint32_t code2;
uint32_t code0; uint32_t code3;
uint32_t code1;
uint32_t code2;
uint32_t code3;
};
}; };
};
auto controlFlowCode = code;
uint32_t instrAddress = 0;
uint32_t instrSize = shader->size;
bool simpleControlFlow = true;
while (instrAddress < instrSize)
{
code0 = controlFlowCode[0]; code0 = controlFlowCode[0];
code1 = controlFlowCode[1] & 0xFFFF; code1 = controlFlowCode[1] & 0xFFFF;
code2 = (controlFlowCode[1] >> 16) | (controlFlowCode[2] << 16); code2 = (controlFlowCode[1] >> 16) | (controlFlowCode[2] << 16);
@ -1265,27 +1260,108 @@ void ShaderRecompiler::recompile(const uint8_t* shaderData)
for (auto& cfInstr : controlFlow) for (auto& cfInstr : controlFlow)
{ {
indentation = 3; uint32_t address = 0;
println("\t\tcase {}:", pc);
switch (cfInstr.opcode)
{
case ControlFlowOpcode::Exec:
case ControlFlowOpcode::ExecEnd:
address = cfInstr.exec.address;
break;
case ControlFlowOpcode::CondExec:
case ControlFlowOpcode::CondExecEnd:
case ControlFlowOpcode::CondExecPredClean:
case ControlFlowOpcode::CondExecPredCleanEnd:
address = cfInstr.condExec.address;
break;
case ControlFlowOpcode::CondExecPred:
case ControlFlowOpcode::CondExecPredEnd:
address = cfInstr.condExecPred.address;
break;
case ControlFlowOpcode::CondJmp:
{
if (cfInstr.condJmp.isUnconditional || cfInstr.condJmp.direction)
simpleControlFlow = false;
else
++ifEndLabels[cfInstr.condJmp.address];
break;
}
}
if (address != 0)
instrSize = std::min<uint32_t>(instrSize, address * 12);
}
controlFlowCode += 3;
instrAddress += 12;
}
if (simpleControlFlow)
{
out += '\n';
indentation = 1;
}
else
{
out += "\n\tuint pc = 0;\n";
out += "\twhile (true)\n";
out += "\t{\n";
out += "\t\tswitch (pc)\n";
out += "\t\t{\n";
}
controlFlowCode = code;
instrAddress = 0;
uint32_t pc = 0;
while (instrAddress < instrSize)
{
code0 = controlFlowCode[0];
code1 = controlFlowCode[1] & 0xFFFF;
code2 = (controlFlowCode[1] >> 16) | (controlFlowCode[2] << 16);
code3 = controlFlowCode[2] >> 16;
for (auto& cfInstr : controlFlow)
{
if (!simpleControlFlow)
{
indentation = 3;
println("\t\tcase {}:", pc);
}
else
{
auto findResult = ifEndLabels.find(pc);
if (findResult != ifEndLabels.end())
{
for (uint32_t i = 0; i < findResult->second; i++)
{
--indentation;
indent();
out += "}\n";
}
}
}
++pc; ++pc;
uint32_t address = 0; uint32_t address = 0;
uint32_t count = 0; uint32_t count = 0;
uint32_t sequence = 0; uint32_t sequence = 0;
bool shouldBreak = false; bool shouldReturn = false;
bool shouldCloseCurlyBracket = false; bool shouldCloseCurlyBracket = false;
switch (cfInstr.opcode) switch (cfInstr.opcode)
{ {
case ControlFlowOpcode::Nop:
break;
case ControlFlowOpcode::Exec: case ControlFlowOpcode::Exec:
case ControlFlowOpcode::ExecEnd: case ControlFlowOpcode::ExecEnd:
address = cfInstr.exec.address; address = cfInstr.exec.address;
count = cfInstr.exec.count; count = cfInstr.exec.count;
sequence = cfInstr.exec.sequence; sequence = cfInstr.exec.sequence;
shouldBreak = (cfInstr.opcode == ControlFlowOpcode::ExecEnd); shouldReturn = (cfInstr.opcode == ControlFlowOpcode::ExecEnd);
break; break;
case ControlFlowOpcode::CondExec: case ControlFlowOpcode::CondExec:
@ -1295,7 +1371,7 @@ void ShaderRecompiler::recompile(const uint8_t* shaderData)
address = cfInstr.condExec.address; address = cfInstr.condExec.address;
count = cfInstr.condExec.count; count = cfInstr.condExec.count;
sequence = cfInstr.condExec.sequence; sequence = cfInstr.condExec.sequence;
shouldBreak = (cfInstr.opcode == ControlFlowOpcode::CondExecEnd || cfInstr.opcode == ControlFlowOpcode::CondExecEnd); shouldReturn = (cfInstr.opcode == ControlFlowOpcode::CondExecEnd || cfInstr.opcode == ControlFlowOpcode::CondExecEnd);
break; break;
case ControlFlowOpcode::CondExecPred: case ControlFlowOpcode::CondExecPred:
@ -1303,127 +1379,163 @@ void ShaderRecompiler::recompile(const uint8_t* shaderData)
address = cfInstr.condExecPred.address; address = cfInstr.condExecPred.address;
count = cfInstr.condExecPred.count; count = cfInstr.condExecPred.count;
sequence = cfInstr.condExecPred.sequence; sequence = cfInstr.condExecPred.sequence;
shouldBreak = (cfInstr.opcode == ControlFlowOpcode::CondExecPredEnd); shouldReturn = (cfInstr.opcode == ControlFlowOpcode::CondExecPredEnd);
break; break;
case ControlFlowOpcode::LoopStart: case ControlFlowOpcode::LoopStart:
out += "\t\t\taL = 0;\n"; if (simpleControlFlow)
{
indent();
println("for (aL = 0; aL < i{}.x; aL++)", uint32_t(cfInstr.loopStart.loopId));
indent();
out += "{\n";
++indentation;
}
else
{
out += "\t\t\taL = 0;\n";
}
break; break;
case ControlFlowOpcode::LoopEnd: case ControlFlowOpcode::LoopEnd:
out += "\t\t\t++aL;\n"; if (simpleControlFlow)
println("\t\t\tif (aL < i{}.x)", uint32_t(cfInstr.loopEnd.loopId)); {
out += "\t\t\t{\n"; --indentation;
println("\t\t\t\tpc = {};", uint32_t(cfInstr.loopEnd.address)); indent();
out += "\t\t\t\tcontinue;\n"; out += "}\n";
out += "\t\t\t}\n"; }
break; else
{
case ControlFlowOpcode::CondCall: out += "\t\t\t++aL;\n";
case ControlFlowOpcode::Return: println("\t\t\tif (aL < i{}.x)", uint32_t(cfInstr.loopEnd.loopId));
out += "\t\t\t{\n";
println("\t\t\t\tpc = {};", uint32_t(cfInstr.loopEnd.address));
out += "\t\t\t\tcontinue;\n";
out += "\t\t\t}\n";
}
break; break;
case ControlFlowOpcode::CondJmp: case ControlFlowOpcode::CondJmp:
{ {
if (cfInstr.condJmp.isUnconditional) if (cfInstr.condJmp.isUnconditional)
{ {
assert(!simpleControlFlow);
println("\t\t\tpc = {};", uint32_t(cfInstr.condJmp.address)); println("\t\t\tpc = {};", uint32_t(cfInstr.condJmp.address));
out += "\t\t\tcontinue;\n"; out += "\t\t\tcontinue;\n";
} }
else else
{ {
indent();
if (cfInstr.condJmp.isPredicated) if (cfInstr.condJmp.isPredicated)
{ {
println("\t\t\tif ({}p0)", cfInstr.condJmp.condition ? "" : "!"); println("if ({}p0)", cfInstr.condJmp.condition ^ simpleControlFlow ? "" : "!");
} }
else else
{ {
auto findResult = boolConstants.find(cfInstr.condJmp.boolAddress); auto findResult = boolConstants.find(cfInstr.condJmp.boolAddress);
if (findResult != boolConstants.end()) if (findResult != boolConstants.end())
println("\t\t\tif ((GET_SHARED_CONSTANT(g_Booleans) & {}) {}= 0)", findResult->second, cfInstr.condJmp.condition ? "!" : "="); println("if ((GET_SHARED_CONSTANT(g_Booleans) & {}) {}= 0)", findResult->second, cfInstr.condJmp.condition ^ simpleControlFlow ? "!" : "=");
else else
println("\t\t\tif (b{} {}= 0)", uint32_t(cfInstr.condJmp.boolAddress), cfInstr.condJmp.condition ? "!" : "="); println("if (b{} {}= 0)", uint32_t(cfInstr.condJmp.boolAddress), cfInstr.condJmp.condition ^ simpleControlFlow ? "!" : "=");
} }
out += "\t\t\t{\n"; if (simpleControlFlow)
println("\t\t\t\tpc = {};", uint32_t(cfInstr.condJmp.address));
out += "\t\t\t\tcontinue;\n";
out += "\t\t\t}\n";
}
break;
}
case ControlFlowOpcode::Alloc:
case ControlFlowOpcode::MarkVsFetchDone:
break;
}
if (count != 0)
{
minInstrAddress = std::min<uint32_t>(minInstrAddress, address * 12);
auto instructionCode = code + address * 3;
for (uint32_t i = 0; i < count; i++)
{
union
{ {
VertexFetchInstruction vertexFetch; indent();
TextureFetchInstruction textureFetch; out += "{\n";
AluInstruction alu; ++indentation;
struct
{
uint32_t code0;
uint32_t code1;
uint32_t code2;
};
};
code0 = instructionCode[0];
code1 = instructionCode[1];
code2 = instructionCode[2];
if ((sequence & 0x1) != 0)
{
if (vertexFetch.opcode == FetchOpcode::VertexFetch)
recompile(vertexFetch, address + i);
else
recompile(textureFetch);
} }
else else
{ {
recompile(alu); out += "\t\t\t{\n";
println("\t\t\t\tpc = {};", uint32_t(cfInstr.condJmp.address));
out += "\t\t\t\tcontinue;\n";
out += "\t\t\t}\n";
} }
sequence >>= 2;
instructionCode += 3;
} }
break;
}
} }
if (shouldBreak) auto instructionCode = code + address * 3;
out += "\t\t\tbreak;\n";
for (uint32_t i = 0; i < count; i++)
{
union
{
VertexFetchInstruction vertexFetch;
TextureFetchInstruction textureFetch;
AluInstruction alu;
struct
{
uint32_t code0;
uint32_t code1;
uint32_t code2;
};
};
code0 = instructionCode[0];
code1 = instructionCode[1];
code2 = instructionCode[2];
if ((sequence & 0x1) != 0)
{
if (vertexFetch.opcode == FetchOpcode::VertexFetch)
recompile(vertexFetch, address + i);
else
recompile(textureFetch);
}
else
{
recompile(alu);
}
sequence >>= 2;
instructionCode += 3;
}
if (shouldReturn)
{
if (isPixelShader)
{
indent();
out += "if (GET_SHARED_CONSTANT(g_AlphaTestMode) != 0) clip(oC0.w - GET_SHARED_CONSTANT(g_AlphaThreshold));\n";
}
else if (isCsdShader)
{
indent();
out += "oPos.xy += float2(GET_CONSTANT(g_ViewportSize.z), -GET_CONSTANT(g_ViewportSize.w));\n";
}
if (simpleControlFlow)
{
indent();
out += "return;\n";
}
else
{
out += "\t\t\tbreak;\n";
}
}
if (shouldCloseCurlyBracket) if (shouldCloseCurlyBracket)
{ {
--indentation; --indentation;
out += "\t\t\t}\n"; indent();
out += "}\n";
} }
} }
controlFlowCode += 3; controlFlowCode += 3;
instrAddress += 12;
} }
out += "\t\t\tbreak;\n"; if (!simpleControlFlow)
out += "\t\t}\n";
out += "\t\tbreak;\n";
out += "\t}\n";
if (isPixelShader)
{ {
out += "\tif (GET_SHARED_CONSTANT(g_AlphaTestMode) != 0) clip(oC0.w - GET_SHARED_CONSTANT(g_AlphaThreshold));\n"; out += "\t\t\tbreak;\n";
} out += "\t\t}\n";
else if (isCsdShader) out += "\t\tbreak;\n";
{ out += "\t}\n";
out += "\toPos.xy += float2(GET_CONSTANT(g_ViewportSize.z), -GET_CONSTANT(g_ViewportSize.w));\n";
} }
out += "}"; out += "}";

View file

@ -14,6 +14,7 @@ struct ShaderRecompiler
std::unordered_map<uint32_t, const ConstantInfo*> float4Constants; std::unordered_map<uint32_t, const ConstantInfo*> float4Constants;
std::unordered_map<uint32_t, const char*> boolConstants; std::unordered_map<uint32_t, const char*> boolConstants;
std::unordered_map<uint32_t, const char*> samplers; std::unordered_map<uint32_t, const char*> samplers;
std::unordered_map<uint32_t, uint32_t> ifEndLabels;
void indent() void indent()
{ {