commit a3e36719e719e77893672a6b653bd24f06425629 Author: Skyth <19259897+blueskythlikesclouds@users.noreply.github.com> Date: Wed Oct 16 16:40:31 2024 +0300 Initial commit. diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..47d4afc --- /dev/null +++ b/.gitignore @@ -0,0 +1,399 @@ +## Ignore Visual Studio temporary files, build results, and +## files generated by popular Visual Studio add-ons. +## +## Get latest from https://github.com/github/gitignore/blob/main/VisualStudio.gitignore + +# User-specific files +*.rsuser +*.suo +*.user +*.userosscache +*.sln.docstates + +# User-specific files (MonoDevelop/Xamarin Studio) +*.userprefs + +# Mono auto generated files +mono_crash.* + +# Build results +[Dd]ebug/ +[Dd]ebugPublic/ +[Rr]elease/ +[Rr]eleases/ +x64/ +x86/ +[Ww][Ii][Nn]32/ +[Aa][Rr][Mm]/ +[Aa][Rr][Mm]64/ +bld/ +[Bb]in/ +[Oo]bj/ +[Ll]og/ +[Ll]ogs/ +[Oo]ut/ + +# Visual Studio 2015/2017 cache/options directory +.vs/ +# Uncomment if you have tasks that create the project's static files in wwwroot +#wwwroot/ + +# Visual Studio 2017 auto generated files +Generated\ Files/ + +# MSTest test Results +[Tt]est[Rr]esult*/ +[Bb]uild[Ll]og.* + +# NUnit +*.VisualState.xml +TestResult.xml +nunit-*.xml + +# Build Results of an ATL Project +[Dd]ebugPS/ +[Rr]eleasePS/ +dlldata.c + +# Benchmark Results +BenchmarkDotNet.Artifacts/ + +# .NET Core +project.lock.json +project.fragment.lock.json +artifacts/ + +# ASP.NET Scaffolding +ScaffoldingReadMe.txt + +# StyleCop +StyleCopReport.xml + +# Files built by Visual Studio +*_i.c +*_p.c +*_h.h +*.ilk +*.meta +*.obj +*.iobj +*.pch +*.pdb +*.ipdb +*.pgc +*.pgd +*.rsp +*.sbr +*.tlb +*.tli +*.tlh +*.tmp +*.tmp_proj +*_wpftmp.csproj +*.log +*.tlog +*.vspscc +*.vssscc +.builds +*.pidb +*.svclog +*.scc + +# Chutzpah Test files +_Chutzpah* + +# Visual C++ cache files +ipch/ +*.aps +*.ncb +*.opendb +*.opensdf +*.sdf +*.cachefile +*.VC.db +*.VC.VC.opendb + +# Visual Studio profiler +*.psess +*.vsp +*.vspx +*.sap + +# Visual Studio Trace Files +*.e2e + +# TFS 2012 Local Workspace +$tf/ + +# Guidance Automation Toolkit +*.gpState + +# ReSharper is a .NET coding add-in +_ReSharper*/ +*.[Rr]e[Ss]harper +*.DotSettings.user + +# TeamCity is a build add-in +_TeamCity* + +# DotCover is a Code Coverage Tool +*.dotCover + +# AxoCover is a Code Coverage Tool +.axoCover/* +!.axoCover/settings.json + +# Coverlet is a free, cross platform Code Coverage Tool +coverage*.json +coverage*.xml +coverage*.info + +# Visual Studio code coverage results +*.coverage +*.coveragexml + +# NCrunch +_NCrunch_* +.*crunch*.local.xml +nCrunchTemp_* + +# MightyMoose +*.mm.* +AutoTest.Net/ + +# Web workbench (sass) +.sass-cache/ + +# Installshield output folder +[Ee]xpress/ + +# DocProject is a documentation generator add-in +DocProject/buildhelp/ +DocProject/Help/*.HxT +DocProject/Help/*.HxC +DocProject/Help/*.hhc +DocProject/Help/*.hhk +DocProject/Help/*.hhp +DocProject/Help/Html2 +DocProject/Help/html + +# Click-Once directory +publish/ + +# Publish Web Output +*.[Pp]ublish.xml +*.azurePubxml +# Note: Comment the next line if you want to checkin your web deploy settings, +# but database connection strings (with potential passwords) will be unencrypted +*.pubxml +*.publishproj + +# Microsoft Azure Web App publish settings. Comment the next line if you want to +# checkin your Azure Web App publish settings, but sensitive information contained +# in these scripts will be unencrypted +PublishScripts/ + +# NuGet Packages +*.nupkg +# NuGet Symbol Packages +*.snupkg +# The packages folder can be ignored because of Package Restore +**/[Pp]ackages/* +# except build/, which is used as an MSBuild target. +!**/[Pp]ackages/build/ +# Uncomment if necessary however generally it will be regenerated when needed +#!**/[Pp]ackages/repositories.config +# NuGet v3's project.json files produces more ignorable files +*.nuget.props +*.nuget.targets + +# Microsoft Azure Build Output +csx/ +*.build.csdef + +# Microsoft Azure Emulator +ecf/ +rcf/ + +# Windows Store app package directories and files +AppPackages/ +BundleArtifacts/ +Package.StoreAssociation.xml +_pkginfo.txt +*.appx +*.appxbundle +*.appxupload + +# Visual Studio cache files +# files ending in .cache can be ignored +*.[Cc]ache +# but keep track of directories ending in .cache +!?*.[Cc]ache/ + +# Others +ClientBin/ +~$* +*~ +*.dbmdl +*.dbproj.schemaview +*.jfm +*.pfx +*.publishsettings +orleans.codegen.cs + +# Including strong name files can present a security risk +# (https://github.com/github/gitignore/pull/2483#issue-259490424) +#*.snk + +# Since there are multiple workflows, uncomment next line to ignore bower_components +# (https://github.com/github/gitignore/pull/1529#issuecomment-104372622) +#bower_components/ + +# RIA/Silverlight projects +Generated_Code/ + +# Backup & report files from converting an old project file +# to a newer Visual Studio version. Backup files are not needed, +# because we have git ;-) +_UpgradeReport_Files/ +Backup*/ +UpgradeLog*.XML +UpgradeLog*.htm +ServiceFabricBackup/ +*.rptproj.bak + +# SQL Server files +*.mdf +*.ldf +*.ndf + +# Business Intelligence projects +*.rdl.data +*.bim.layout +*.bim_*.settings +*.rptproj.rsuser +*- [Bb]ackup.rdl +*- [Bb]ackup ([0-9]).rdl +*- [Bb]ackup ([0-9][0-9]).rdl + +# Microsoft Fakes +FakesAssemblies/ + +# GhostDoc plugin setting file +*.GhostDoc.xml + +# Node.js Tools for Visual Studio +.ntvs_analysis.dat +node_modules/ + +# Visual Studio 6 build log +*.plg + +# Visual Studio 6 workspace options file +*.opt + +# Visual Studio 6 auto-generated workspace file (contains which files were open etc.) +*.vbw + +# Visual Studio 6 auto-generated project file (contains which files were open etc.) +*.vbp + +# Visual Studio 6 workspace and project file (working project files containing files to include in project) +*.dsw +*.dsp + +# Visual Studio 6 technical files +*.ncb +*.aps + +# Visual Studio LightSwitch build output +**/*.HTMLClient/GeneratedArtifacts +**/*.DesktopClient/GeneratedArtifacts +**/*.DesktopClient/ModelManifest.xml +**/*.Server/GeneratedArtifacts +**/*.Server/ModelManifest.xml +_Pvt_Extensions + +# Paket dependency manager +.paket/paket.exe +paket-files/ + +# FAKE - F# Make +.fake/ + +# CodeRush personal settings +.cr/personal + +# Python Tools for Visual Studio (PTVS) +__pycache__/ +*.pyc + +# Cake - Uncomment if you are using it +# tools/** +# !tools/packages.config + +# Tabs Studio +*.tss + +# Telerik's JustMock configuration file +*.jmconfig + +# BizTalk build output +*.btp.cs +*.btm.cs +*.odx.cs +*.xsd.cs + +# OpenCover UI analysis results +OpenCover/ + +# Azure Stream Analytics local run output +ASALocalRun/ + +# MSBuild Binary and Structured Log +*.binlog + +# NVidia Nsight GPU debugger configuration file +*.nvuser + +# MFractors (Xamarin productivity tool) working folder +.mfractor/ + +# Local History for Visual Studio +.localhistory/ + +# Visual Studio History (VSHistory) files +.vshistory/ + +# BeatPulse healthcheck temp database +healthchecksdb + +# Backup folder for Package Reference Convert tool in Visual Studio 2017 +MigrationBackup/ + +# Ionide (cross platform F# VS Code tools) working folder +.ionide/ + +# Fody - auto-generated XML schema +FodyWeavers.xsd + +# VS Code files for those working on multiple tools +.vscode/* +!.vscode/settings.json +!.vscode/tasks.json +!.vscode/launch.json +!.vscode/extensions.json +*.code-workspace + +# Local History for Visual Studio Code +.history/ + +# Windows Installer files from build outputs +*.cab +*.msi +*.msix +*.msm +*.msp + +# JetBrains Rider +*.sln.iml diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..fe07c99 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "vcpkg"] + path = vcpkg + url = https://github.com/microsoft/vcpkg diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..db7508b --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,8 @@ +cmake_minimum_required(VERSION 3.29) + +include(vcpkg/scripts/buildsystems/vcpkg.cmake) +set(CMAKE_CXX_STANDARD 23) + +project(ShaderRecomp-ALL) + +add_subdirectory(ShaderRecomp) \ No newline at end of file diff --git a/CMakeSettings.json b/CMakeSettings.json new file mode 100644 index 0000000..5b03594 --- /dev/null +++ b/CMakeSettings.json @@ -0,0 +1,28 @@ +{ + "configurations": [ + { + "name": "x64-Clang-Debug", + "generator": "Ninja", + "configurationType": "Debug", + "buildRoot": "${projectDir}\\out\\build\\${name}", + "installRoot": "${projectDir}\\out\\install\\${name}", + "cmakeCommandArgs": "", + "buildCommandArgs": "", + "ctestCommandArgs": "", + "inheritEnvironments": [ "clang_cl_x64_x64" ], + "variables": [] + }, + { + "name": "x64-Clang-Release", + "generator": "Ninja", + "configurationType": "RelWithDebInfo", + "buildRoot": "${projectDir}\\out\\build\\${name}", + "installRoot": "${projectDir}\\out\\install\\${name}", + "cmakeCommandArgs": "", + "buildCommandArgs": "", + "ctestCommandArgs": "", + "inheritEnvironments": [ "clang_cl_x64_x64" ], + "variables": [] + } + ] +} \ No newline at end of file diff --git a/ShaderRecomp/.gitignore b/ShaderRecomp/.gitignore new file mode 100644 index 0000000..73a9f39 --- /dev/null +++ b/ShaderRecomp/.gitignore @@ -0,0 +1 @@ +*.hlsli.h \ No newline at end of file diff --git a/ShaderRecomp/CMakeLists.txt b/ShaderRecomp/CMakeLists.txt new file mode 100644 index 0000000..93924cb --- /dev/null +++ b/ShaderRecomp/CMakeLists.txt @@ -0,0 +1,23 @@ +project(ShaderRecomp) + +include(bin2h.cmake) +bin2h(SOURCE_FILE shader_common.hlsli HEADER_FILE shader_common.hlsli.h VARIABLE_NAME SHADER_COMMON_HLSLI) + +add_executable(ShaderRecomp + constant_table.h + dxc_compiler.cpp + dxc_compiler.h + main.cpp + pch.h + shader.h + shader_code.h + shader_common.hlsli.h + shader_recompiler.cpp + shader_recompiler.h) + +find_package(directx-dxc CONFIG REQUIRED) +target_link_libraries(ShaderRecomp PRIVATE Microsoft::DirectXShaderCompiler) + +target_precompile_headers(ShaderRecomp PRIVATE pch.h) + +add_compile_definitions(ShaderRecomp _CRT_SECURE_NO_WARNINGS) \ No newline at end of file diff --git a/ShaderRecomp/bin2h.cmake b/ShaderRecomp/bin2h.cmake new file mode 100644 index 0000000..e1f5e98 --- /dev/null +++ b/ShaderRecomp/bin2h.cmake @@ -0,0 +1,83 @@ +# https://github.com/sivachandran/cmake-bin2h +include(CMakeParseArguments) + +# Function to wrap a given string into multiple lines at the given column position. +# Parameters: +# VARIABLE - The name of the CMake variable holding the string. +# AT_COLUMN - The column position at which string will be wrapped. +function(WRAP_STRING) + set(oneValueArgs VARIABLE AT_COLUMN) + cmake_parse_arguments(WRAP_STRING "${options}" "${oneValueArgs}" "" ${ARGN}) + + string(LENGTH ${${WRAP_STRING_VARIABLE}} stringLength) + math(EXPR offset "0") + + while(stringLength GREATER 0) + + if(stringLength GREATER ${WRAP_STRING_AT_COLUMN}) + math(EXPR length "${WRAP_STRING_AT_COLUMN}") + else() + math(EXPR length "${stringLength}") + endif() + + string(SUBSTRING ${${WRAP_STRING_VARIABLE}} ${offset} ${length} line) + set(lines "${lines}\n${line}") + + math(EXPR stringLength "${stringLength} - ${length}") + math(EXPR offset "${offset} + ${length}") + endwhile() + + set(${WRAP_STRING_VARIABLE} "${lines}" PARENT_SCOPE) +endfunction() + +# Function to embed contents of a file as byte array in C/C++ header file(.h). The header file +# will contain a byte array and integer variable holding the size of the array. +# Parameters +# SOURCE_FILE - The path of source file whose contents will be embedded in the header file. +# VARIABLE_NAME - The name of the variable for the byte array. The string "_SIZE" will be append +# to this name and will be used a variable name for size variable. +# HEADER_FILE - The path of header file. +# APPEND - If specified appends to the header file instead of overwriting it +# NULL_TERMINATE - If specified a null byte(zero) will be append to the byte array. This will be +# useful if the source file is a text file and we want to use the file contents +# as string. But the size variable holds size of the byte array without this +# null byte. +# Usage: +# bin2h(SOURCE_FILE "Logo.png" HEADER_FILE "Logo.h" VARIABLE_NAME "LOGO_PNG") +function(BIN2H) + set(options APPEND NULL_TERMINATE) + set(oneValueArgs SOURCE_FILE VARIABLE_NAME HEADER_FILE) + cmake_parse_arguments(BIN2H "${options}" "${oneValueArgs}" "" ${ARGN}) + + # reads source file contents as hex string + file(READ ${BIN2H_SOURCE_FILE} hexString HEX) + string(LENGTH ${hexString} hexStringLength) + + # appends null byte if asked + if(BIN2H_NULL_TERMINATE) + set(hexString "${hexString}00") + endif() + + # wraps the hex string into multiple lines at column 32(i.e. 16 bytes per line) + wrap_string(VARIABLE hexString AT_COLUMN 32) + math(EXPR arraySize "${hexStringLength} / 2") + + # adds '0x' prefix and comma suffix before and after every byte respectively + string(REGEX REPLACE "([0-9a-f][0-9a-f])" "0x\\1, " arrayValues ${hexString}) + # removes trailing comma + string(REGEX REPLACE ", $" "" arrayValues ${arrayValues}) + + # converts the variable name into proper C identifier + string(MAKE_C_IDENTIFIER "${BIN2H_VARIABLE_NAME}" BIN2H_VARIABLE_NAME) + + # declares byte array and the length variables + set(arrayDefinition "const char ${BIN2H_VARIABLE_NAME}[] = { ${arrayValues} };") + set(arraySizeDefinition "const size_t ${BIN2H_VARIABLE_NAME}_SIZE = ${arraySize};") + + set(declarations "${arrayDefinition}\n\n${arraySizeDefinition}\n\n") + if(BIN2H_APPEND) + file(APPEND ${BIN2H_HEADER_FILE} "${declarations}") + else() + file(WRITE ${BIN2H_HEADER_FILE} "${declarations}") + endif() +endfunction() diff --git a/ShaderRecomp/constant_table.h b/ShaderRecomp/constant_table.h new file mode 100644 index 0000000..c0ea991 --- /dev/null +++ b/ShaderRecomp/constant_table.h @@ -0,0 +1,88 @@ +#pragma once + +enum class ParameterClass : uint16_t // D3DXPARAMETER_CLASS +{ + Scalar, + Vector, + MatrixRows, + MatrixColumns, + Object, + Struct +}; + +enum class ParameterType : uint16_t // D3DXPARAMETER_TYPE +{ + Void, + Bool, + Int, + Float, + String, + Texture, + Texture1D, + Texture2D, + Texture3D, + TextureCube, + Sampler, + Sampler1D, + Sampler2D, + Sampler3D, + SamplerCube, + PixelShader, + VertexShader, + PixelFragment, + VertexFragment, + Unsupported +}; + +struct StructMemberInfo // D3DXSHADER_STRUCTMEMBERINFO +{ + be name; + be typeInfo; +}; + +struct TypeInfo // D3DXSHADER_TYPEINFO +{ + be parameterClass; + be parameterType; + be rows; + be columns; + be elements; + be structMembers; + be structMemberInfo; +}; + +enum class RegisterSet : uint16_t // D3DXREGISTER_SET +{ + Bool, + Int4, + Float4, + Sampler +}; + +struct ConstantInfo // D3DXSHADER_CONSTANTINFO +{ + be name; + be registerSet; + be registerIndex; + be registerCount; + be reserved; + be typeInfo; + be defaultValue; +}; + +struct ConstantTable // D3DXSHADER_CONSTANTTABLE +{ + be size; + be creator; + be version; + be constants; + be constantInfo; + be flags; + be target; +}; + +struct ConstantTableContainer +{ + be size; + ConstantTable constantTable; +}; \ No newline at end of file diff --git a/ShaderRecomp/dxc_compiler.cpp b/ShaderRecomp/dxc_compiler.cpp new file mode 100644 index 0000000..4259f8e --- /dev/null +++ b/ShaderRecomp/dxc_compiler.cpp @@ -0,0 +1,79 @@ +#include "dxc_compiler.h" + +DxcCompiler::DxcCompiler() +{ + HRESULT hr = DxcCreateInstance(CLSID_DxcCompiler, IID_PPV_ARGS(&dxcCompiler)); + assert(SUCCEEDED(hr)); +} + +DxcCompiler::~DxcCompiler() +{ + dxcCompiler->Release(); +} + +IDxcBlob* DxcCompiler::compile(const std::string& shaderSource, bool isPixelShader, bool compileSpirv) +{ + DxcBuffer source{}; + source.Ptr = shaderSource.c_str(); + source.Size = shaderSource.size(); + + const wchar_t* args[16]{}; + uint32_t argCount = 0; + + args[argCount++] = isPixelShader ? L"-T ps_6_0" : L"-T vs_6_0"; + args[argCount++] = L"-HV 2021"; + args[argCount++] = L"-all-resources-bound"; + + if (compileSpirv) + { + args[argCount++] = L"-spirv"; + args[argCount++] = L"-fvk-use-dx-layout"; + + if (!isPixelShader) + args[argCount++] = L"-fvk-invert-y"; + } + else + { + args[argCount++] = L"-Wno-ignored-attributes"; + } + + IDxcResult* result = nullptr; + HRESULT hr = dxcCompiler->Compile(&source, args, argCount, nullptr, IID_PPV_ARGS(&result)); + + IDxcBlob* object = nullptr; + if (SUCCEEDED(hr)) + { + assert(result != nullptr); + + HRESULT status; + hr = result->GetStatus(&status); + assert(SUCCEEDED(hr)); + + if (FAILED(status)) + { + if (result->HasOutput(DXC_OUT_ERRORS)) + { + IDxcBlobUtf8* errors = nullptr; + hr = result->GetOutput(DXC_OUT_ERRORS, IID_PPV_ARGS(&errors), nullptr); + assert(SUCCEEDED(hr) && errors != nullptr); + + fputs(errors->GetStringPointer(), stderr); + + errors->Release(); + } + } + else + { + hr = result->GetOutput(DXC_OUT_OBJECT, IID_PPV_ARGS(&object), nullptr); + assert(SUCCEEDED(hr) && object != nullptr); + } + + result->Release(); + } + else + { + assert(result == nullptr); + } + + return object; +} diff --git a/ShaderRecomp/dxc_compiler.h b/ShaderRecomp/dxc_compiler.h new file mode 100644 index 0000000..49a579f --- /dev/null +++ b/ShaderRecomp/dxc_compiler.h @@ -0,0 +1,11 @@ +#pragma once + +struct DxcCompiler +{ + IDxcCompiler3* dxcCompiler = nullptr; + + DxcCompiler(); + ~DxcCompiler(); + + IDxcBlob* compile(const std::string& shaderSource, bool isPixelShader, bool compileSpirv); +}; \ No newline at end of file diff --git a/ShaderRecomp/main.cpp b/ShaderRecomp/main.cpp new file mode 100644 index 0000000..70cf668 --- /dev/null +++ b/ShaderRecomp/main.cpp @@ -0,0 +1,83 @@ +#include "shader.h" +#include "shader_recompiler.h" +#include "dxc_compiler.h" + +static std::unique_ptr readAllBytes(const char* filePath) +{ + FILE* file = fopen(filePath, "rb"); + fseek(file, 0, SEEK_END); + long fileSize = ftell(file); + fseek(file, 0, SEEK_SET); + auto data = std::make_unique(fileSize); + fread(data.get(), 1, fileSize, file); + fclose(file); + return data; +} + +static void writeAllBytes(const char* filePath, const void* data, size_t dataSize) +{ + FILE* file = fopen(filePath, "wb"); + fwrite(data, 1, dataSize, file); + fclose(file); +} + +int main(int argc, char** argv) +{ + if (std::filesystem::is_directory(argv[1])) + { + std::vector filePaths; + + for (auto& file : std::filesystem::directory_iterator(argv[1])) + { + auto extension = file.path().extension(); + if (extension == ".xpu" || extension == ".xvu") + filePaths.push_back(file.path().string()); + } + + std::for_each(std::execution::par_unseq, filePaths.begin(), filePaths.end(), [&](auto& filePath) + { + printf("%s\n", filePath.c_str()); + + thread_local ShaderRecompiler recompiler; + recompiler = {}; + recompiler.recompile(readAllBytes(filePath.c_str()).get()); + writeAllBytes((filePath + ".hlsl").c_str(), recompiler.out.data(), recompiler.out.size()); + + thread_local DxcCompiler dxcCompiler; + + auto dxil = dxcCompiler.compile(recompiler.out, recompiler.isPixelShader, false); + auto spirv = dxcCompiler.compile(recompiler.out, recompiler.isPixelShader, true); + + assert(dxil != nullptr && spirv != nullptr); + assert(*(reinterpret_cast(dxil->GetBufferPointer()) + 1) != 0 && "DXIL was not signed properly!"); + + std::string outFilePath = argv[2]; + outFilePath += '/'; + outFilePath += filePath.substr(filePath.find_last_of("\\/") + 1); + + FILE* file = fopen(outFilePath.c_str(), "wb"); + + struct + { + uint32_t version; + uint32_t dxilSize; + uint32_t spirvSize; + } header; + + header.version = 0; + header.dxilSize = uint32_t(dxil->GetBufferSize()); + header.spirvSize = uint32_t(spirv->GetBufferSize()); + + fwrite(&header, sizeof(header), 1, file); + fwrite(dxil->GetBufferPointer(), 1, dxil->GetBufferSize(), file); + fwrite(spirv->GetBufferPointer(), 1, spirv->GetBufferSize(), file); + + fclose(file); + }); + } + else + { + } + + return 0; +} \ No newline at end of file diff --git a/ShaderRecomp/pch.h b/ShaderRecomp/pch.h new file mode 100644 index 0000000..92457ed --- /dev/null +++ b/ShaderRecomp/pch.h @@ -0,0 +1,32 @@ +#pragma once + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +template +struct be +{ + T value; + + T get() const + { + if constexpr (std::is_enum_v) + return T(std::byteswap(std::underlying_type_t(value))); + else + return std::byteswap(value); + } + + operator T() const + { + return get(); + } +}; \ No newline at end of file diff --git a/ShaderRecomp/shader.h b/ShaderRecomp/shader.h new file mode 100644 index 0000000..b2d7873 --- /dev/null +++ b/ShaderRecomp/shader.h @@ -0,0 +1,107 @@ +#pragma once + +#include "constant_table.h" + +struct Float4Definition +{ + be registerIndex; + be count; + be physicalOffset; +}; + +struct Int4Definition +{ + be registerIndex; + be count; + be values[]; +}; + +struct DefinitionTable +{ + be field0; + be field4; + be field8; + be fieldC; + be size; + be definitions[]; // float4, int4 and bool separated by null terminators +}; + +struct Shader +{ + be physicalOffset; + be size; + be field8; + be fieldC; + be field10; + be interpolatorInfo; // interpolator count: (interpolatorInfo >> 5) & 0x1F +}; + +enum class DeclUsage : uint32_t +{ + Position = 0, + BlendWeight = 1, + BlendIndices = 2, + Normal = 3, + PointSize = 4, + TexCoord = 5, + Tangent = 6, + Binormal = 7, + TessFactor = 8, + PositionT = 9, + Color = 10, + Fog = 11, + Depth = 12, + Sample = 13 +}; + +struct VertexElement +{ + uint32_t address : 12; + DeclUsage usage : 4; + uint32_t usageIndex : 4; +}; + +struct Interpolator +{ + uint32_t usageIndex : 4; + DeclUsage usage : 4; + uint32_t reg : 4; + uint32_t : 20; +}; + +struct VertexShader : Shader +{ + be field18; + be vertexElementCount; + be field20; + be vertexElementsAndInterpolators[]; // field18 + vertex elements + interpolators +}; + +enum PixelShaderOutputs : uint32_t +{ + PIXEL_SHADER_OUTPUT_COLOR0 = 0x1, + PIXEL_SHADER_OUTPUT_COLOR1 = 0x2, + PIXEL_SHADER_OUTPUT_COLOR2 = 0x4, + PIXEL_SHADER_OUTPUT_COLOR3 = 0x8, + PIXEL_SHADER_OUTPUT_DEPTH = 0x10 +}; + +struct PixelShader : Shader +{ + be field18; + be outputs; + be interpolators[]; +}; + +struct ShaderContainer +{ + be flags; + be virtualSize; + be physicalSize; + be fieldC; + be constantTableOffset; + be definitionTableOffset; + be shaderOffset; + be field1C; + be field20; +}; \ No newline at end of file diff --git a/ShaderRecomp/shader_code.h b/ShaderRecomp/shader_code.h new file mode 100644 index 0000000..60c7da7 --- /dev/null +++ b/ShaderRecomp/shader_code.h @@ -0,0 +1,458 @@ +#pragma once + +enum class ControlFlowOpcode : uint32_t +{ + Nop = 0, + Exec = 1, + ExecEnd = 2, + CondExec = 3, + CondExecEnd = 4, + CondExecPred = 5, + CondExecPredEnd = 6, + LoopStart = 7, + LoopEnd = 8, + CondCall = 9, + Return = 10, + CondJmp = 11, + Alloc = 12, + CondExecPredClean = 13, + CondExecPredCleanEnd = 14, + MarkVsFetchDone = 15, +}; + +struct ControlFlowExecInstruction +{ + uint32_t address : 12; + uint32_t count : 3; + uint32_t isYield : 1; + uint32_t sequence : 12; + uint32_t vertexCacheHigh : 4; + uint32_t vertexCacheLow : 2; + uint32_t : 7; + uint32_t isPredicateClean : 1; + uint32_t : 1; + uint32_t absoluteAddressing : 1; + ControlFlowOpcode opcode : 4; +}; + +struct ControlFlowExecPredInstruction +{ + uint32_t address : 12; + uint32_t count : 3; + uint32_t isYield : 1; + uint32_t sequence : 12; + uint32_t vertexCacheHigh : 4; + uint32_t vertexCacheLow : 2; + uint32_t : 7; + uint32_t isPredicateClean : 1; + uint32_t condition : 1; + uint32_t absoluteAddressing : 1; + ControlFlowOpcode opcode : 4; +}; + +struct ControlFlowCondExecInstruction +{ + uint32_t address : 12; + uint32_t count : 3; + uint32_t isYield : 1; + uint32_t sequence : 12; + uint32_t vertexCacheHigh : 4; + uint32_t vertexCacheLow : 2; + uint32_t boolAddress : 8; + uint32_t condition : 1; + uint32_t absoluteAddressing : 1; + ControlFlowOpcode opcode : 4; +}; + +struct ControlFlowCondExecPredInstruction +{ + uint32_t address : 12; + uint32_t count : 3; + uint32_t isYield : 1; + uint32_t sequence : 12; + uint32_t vertexCacheHigh : 4; + uint32_t vertexCacheLow : 2; + uint32_t : 7; + uint32_t isPredicateClean : 1; + uint32_t condition : 1; + uint32_t absoluteAddressing : 1; + ControlFlowOpcode opcode : 4; +}; + +struct ControlFlowLoopStartInstruction +{ + uint32_t address : 13; + uint32_t isRepeat : 1; + uint32_t : 2; + uint32_t loopId : 5; + uint32_t : 11; + uint32_t : 11; + uint32_t absoluteAddressing : 1; + ControlFlowOpcode opcode : 4; +}; + +struct ControlFlowLoopEndInstruction +{ + uint32_t address : 13; + uint32_t : 3; + uint32_t loopId : 5; + uint32_t isPredicatedBreak : 1; + uint32_t : 10; + uint32_t : 10; + uint32_t condition : 1; + uint32_t absoluteAddressing : 1; + ControlFlowOpcode opcode : 4; +}; + +struct ControlFlowCondCallInstruction +{ + uint32_t address : 13; + uint32_t isUnconditional : 1; + uint32_t isPredicated : 1; + uint32_t : 17; + uint32_t : 2; + uint32_t boolAddress : 8; + uint32_t condition : 1; + uint32_t absoluteAddressing : 1; + ControlFlowOpcode opcode : 4; +}; + +struct ControlFlowReturnInstruction +{ + uint32_t : 32; + uint32_t : 11; + uint32_t absoluteAddressing : 1; + ControlFlowOpcode opcode : 4; +}; + +struct ControlFlowCondJmpInstruction +{ + uint32_t address : 13; + uint32_t isUnconditional : 1; + uint32_t isPredicated : 1; + uint32_t : 17; + uint32_t : 1; + uint32_t direction : 1; + uint32_t boolAddress : 8; + uint32_t condition : 1; + uint32_t absoluteAddressing : 1; + ControlFlowOpcode opcode : 4; +}; + +struct ControlFlowAllocInstruction +{ + uint32_t size : 3; + uint32_t : 29; + uint32_t : 8; + uint32_t isUnserialized : 1; + uint32_t allocType : 2; + uint32_t : 1; + ControlFlowOpcode opcode : 4; +}; + +union ControlFlowInstruction +{ + ControlFlowExecInstruction exec; + ControlFlowCondExecInstruction condExec; + ControlFlowCondExecPredInstruction condExecPred; + ControlFlowLoopStartInstruction loopStart; + ControlFlowLoopEndInstruction loopEnd; + ControlFlowCondCallInstruction condCall; + ControlFlowReturnInstruction ret; + ControlFlowCondJmpInstruction condJmp; + ControlFlowAllocInstruction alloc; + + struct + { + uint32_t : 32; + uint32_t : 12; + ControlFlowOpcode opcode : 4; + }; +}; + +enum class FetchOpcode : uint32_t +{ + VertexFetch = 0, + TextureFetch = 1, + GetTextureBorderColorFrac = 16, + GetTextureComputedLod = 17, + GetTextureGradients = 18, + GetTextureWeights = 19, + SetTextureLod = 24, + SetTextureGradientsHorz = 25, + SetTextureGradientsVert = 26 +}; + +enum class FetchDestinationSwizzle : uint32_t +{ + X = 0, + Y = 1, + Z = 2, + W = 3, + Zero = 4, + One = 5, + Keep = 7 +}; + +struct VertexFetchInstruction +{ + struct + { + FetchOpcode opcode : 5; + uint32_t srcRegister : 6; + uint32_t srcRegisterAm : 1; + uint32_t dstRegister : 6; + uint32_t dstRegisterAam : 1; + uint32_t mustBeOne : 1; + uint32_t constIndex : 5; + uint32_t constIndexSelect : 2; + uint32_t prefetchCount : 3; + uint32_t srcSwizzle : 2; + }; + struct + { + uint32_t dstSwizzle : 12; + uint32_t formatCompAll : 1; + uint32_t numFormatAll : 1; + uint32_t signedRfModeAll : 1; + uint32_t isIndexRounded : 1; + uint32_t format : 6; + uint32_t reserved2 : 2; + int32_t expAdjust : 6; + uint32_t isMiniFetch : 1; + uint32_t isPredicated : 1; + }; + struct + { + uint32_t stride : 8; + int32_t offset : 23; + uint32_t predicateCondition : 1; + }; +}; + +enum class TextureDimension : uint32_t +{ + Texture1D, + Texture2D, + Texture3D, + TextureCube +}; + +struct TextureFetchInstruction +{ + struct + { + FetchOpcode opcode : 5; + uint32_t srcRegister : 6; + uint32_t srcRegisterAm : 1; + uint32_t dstRegister : 6; + uint32_t dstRegisterAm : 1; + uint32_t fetchValidOnly : 1; + uint32_t constIndex : 5; + uint32_t texCoordDenorm : 1; + uint32_t srcSwizzle : 6; + }; + struct + { + uint32_t dstSwizzle : 12; + uint32_t magFilter : 2; + uint32_t minFilter : 2; + uint32_t mipFilter : 2; + uint32_t anisoFilter : 3; + uint32_t arbitraryFilter : 3; + uint32_t volMagFilter : 2; + uint32_t volMinFilter : 2; + uint32_t useCompLod : 1; + uint32_t useRegLod : 1; + uint32_t : 1; + uint32_t isPredicated : 1; + }; + struct + { + uint32_t useRegGradients : 1; + uint32_t sampleLocation : 1; + int32_t lodBias : 7; + uint32_t : 5; + TextureDimension dimension : 2; + int32_t offsetX : 5; + int32_t offsetY : 5; + int32_t offsetZ : 5; + uint32_t predCondition : 1; + }; +}; + +union FetchInstruction +{ + VertexFetchInstruction vertexFetch; + TextureFetchInstruction textureFetch; + + struct + { + FetchOpcode opcode : 5; + uint32_t : 27; + uint32_t : 32; + }; +}; + +enum class AluScalarOpcode : uint32_t +{ + Adds = 0, + AddsPrev = 1, + Muls = 2, + MulsPrev = 3, + MulsPrev2 = 4, + Maxs = 5, + Mins = 6, + Seqs = 7, + Sgts = 8, + Sges = 9, + Snes = 10, + Frcs = 11, + Truncs = 12, + Floors = 13, + Exp = 14, + Logc = 15, + Log = 16, + Rcpc = 17, + Rcpf = 18, + Rcp = 19, + Rsqc = 20, + Rsqf = 21, + Rsq = 22, + MaxAs = 23, + MaxAsf = 24, + Subs = 25, + SubsPrev = 26, + SetpEq = 27, + SetpNe = 28, + SetpGt = 29, + SetpGe = 30, + SetpInv = 31, + SetpPop = 32, + SetpClr = 33, + SetpRstr = 34, + KillsEq = 35, + KillsGt = 36, + KillsGe = 37, + KillsNe = 38, + KillsOne = 39, + Sqrt = 40, + Mulsc0 = 42, + Mulsc1 = 43, + Addsc0 = 44, + Addsc1 = 45, + Subsc0 = 46, + Subsc1 = 47, + Sin = 48, + Cos = 49, + RetainPrev = 50 +}; + +enum class AluVectorOpcode : uint32_t +{ + Add = 0, + Mul = 1, + Max = 2, + Min = 3, + Seq = 4, + Sgt = 5, + Sge = 6, + Sne = 7, + Frc = 8, + Trunc = 9, + Floor = 10, + Mad = 11, + CndEq = 12, + CndGe = 13, + CndGt = 14, + Dp4 = 15, + Dp3 = 16, + Dp2Add = 17, + Cube = 18, + Max4 = 19, + SetpEqPush = 20, + SetpNePush = 21, + SetpGtPush = 22, + SetpGePush = 23, + KillEq = 24, + KillGt = 25, + KillGe = 26, + KillNe = 27, + Dst = 28, + MaxA = 29 +}; + +enum class ExportRegister : uint32_t +{ + VSInterpolator0 = 0, + VSInterpolator1, + VSInterpolator2, + VSInterpolator3, + VSInterpolator4, + VSInterpolator5, + VSInterpolator6, + VSInterpolator7, + VSInterpolator8, + VSInterpolator9, + VSInterpolator10, + VSInterpolator11, + VSInterpolator12, + VSInterpolator13, + VSInterpolator14, + VSInterpolator15, + VSPosition = 62, + VSPointSizeEdgeFlagKillVertex = 63, + PSColor0 = 0, + PSColor1, + PSColor2, + PSColor3, + PSDepth = 61, + ExportAddress = 32, + ExportData0 = 33, + ExportData1, + ExportData2, + ExportData3, + ExportData4, +}; + +struct AluInstruction +{ + struct + { + uint32_t vectorDest : 6; + uint32_t vectorDestRelative : 1; + uint32_t absConstants : 1; + uint32_t scalarDest : 6; + uint32_t scalarDestRelative : 1; + uint32_t exportData : 1; + uint32_t vectorWriteMask : 4; + uint32_t scalarWriteMask : 4; + uint32_t vectorSaturate : 1; + uint32_t scalarSaturate : 1; + AluScalarOpcode scalarOpcode : 6; + }; + struct + { + uint32_t src3Swizzle : 8; + uint32_t src2Swizzle : 8; + uint32_t src1Swizzle : 8; + uint32_t src3Negate : 1; + uint32_t src2Negate : 1; + uint32_t src1Negate : 1; + uint32_t predicateCondition : 1; + uint32_t isPredicated : 1; + uint32_t constAddressRegisterRelative : 1; + uint32_t const1Relative : 1; + uint32_t const0Relative : 1; + }; + struct + { + uint32_t src3Register : 8; + uint32_t src2Register : 8; + uint32_t src1Register : 8; + AluVectorOpcode vectorOpcode : 5; + uint32_t src3Select : 1; + uint32_t src2Select : 1; + uint32_t src1Select : 1; + }; +}; \ No newline at end of file diff --git a/ShaderRecomp/shader_common.hlsli b/ShaderRecomp/shader_common.hlsli new file mode 100644 index 0000000..03f81b5 --- /dev/null +++ b/ShaderRecomp/shader_common.hlsli @@ -0,0 +1,80 @@ +#define FLT_MIN asfloat(0xff7fffff) +#define FLT_MAX asfloat(0x7f7fffff) + +#ifdef __spirv__ + +struct PushConstants +{ + uint64_t VertexShaderConstants; + uint64_t PixelShaderConstants; + uint64_t SharedConstants; +}; + +[[vk::push_constant]] ConstantBuffer g_PushConstants; + +#define CONSTANT_BUFFER(NAME, REGISTER) struct NAME +#define PACK_OFFSET(REGISTER) + +#define GET_CONSTANT(NAME) constants.NAME +#define GET_SHARED_CONSTANT(NAME) sharedConstants.NAME + +#else + +#define CONSTANT_BUFFER(NAME, REGISTER) cbuffer NAME : register(REGISTER, space4) +#define PACK_OFFSET(REGISTER) : packoffset(REGISTER) + +#define GET_CONSTANT(NAME) NAME +#define GET_SHARED_CONSTANT(NAME) NAME + +#endif + +#define INPUT_LAYOUT_FLAG_HAS_R11G11B10_NORMAL (1 << 0) + +#define SHARED_CONSTANTS \ + [[vk::offset(128)]] uint g_AlphaTestMode PACK_OFFSET(c8.x); \ + [[vk::offset(132)]] float g_AlphaThreshold PACK_OFFSET(c8.y); \ + [[vk::offset(136)]] uint g_Booleans PACK_OFFSET(c8.z); \ + [[vk::offset(140)]] uint g_SwappedTexcoords PACK_OFFSET(c8.w); \ + [[vk::offset(144)]] uint g_InputLayoutFlags PACK_OFFSET(c9.x) + +Texture2D g_Texture2DDescriptorHeap[] : register(t0, space0); +Texture3D g_Texture3DDescriptorHeap[] : register(t0, space1); +TextureCube g_TextureCubeDescriptorHeap[] : register(t0, space2); +SamplerState g_SamplerDescriptorHeap[] : register(s0, space3); + +float4 tfetch2D(uint resourceDescriptorIndex, uint samplerDescriptorIndex, float2 texCoord) +{ + return g_Texture2DDescriptorHeap[resourceDescriptorIndex].Sample(g_SamplerDescriptorHeap[samplerDescriptorIndex], texCoord); +} + +float4 tfetch3D(uint resourceDescriptorIndex, uint samplerDescriptorIndex, float3 texCoord) +{ + return g_Texture3DDescriptorHeap[resourceDescriptorIndex].Sample(g_SamplerDescriptorHeap[samplerDescriptorIndex], texCoord); +} + +float4 tfetchCube(uint resourceDescriptorIndex, uint samplerDescriptorIndex, float4 texCoord) +{ + return g_TextureCubeDescriptorHeap[resourceDescriptorIndex].Sample(g_SamplerDescriptorHeap[samplerDescriptorIndex], texCoord.xyz); +} + +float4 tfetchR11G11B10(uint inputLayoutFlags, uint4 value) +{ + if (inputLayoutFlags & INPUT_LAYOUT_FLAG_HAS_R11G11B10_NORMAL) + { + return float4( + (value.x & 0x00000400 ? -1.0 : 0.0) + ((value.x & 0x3FF) / 1024.0), + (value.x & 0x00200000 ? -1.0 : 0.0) + (((value.x >> 11) & 0x3FF) / 1024.0), + (value.x & 0x80000000 ? -1.0 : 0.0) + (((value.x >> 22) & 0x1FF) / 512.0), + 0.0); + } + else + { + return asfloat(value); + } +} + +float4 tfetchTexcoord(uint swappedTexcoords, float4 value, uint semanticIndex) +{ + return (swappedTexcoords & (1 << semanticIndex)) != 0 ? value.yxwz : value; +} + diff --git a/ShaderRecomp/shader_recompiler.cpp b/ShaderRecomp/shader_recompiler.cpp new file mode 100644 index 0000000..a460fc5 --- /dev/null +++ b/ShaderRecomp/shader_recompiler.cpp @@ -0,0 +1,1296 @@ +#include "shader_recompiler.h" +#include "shader_common.hlsli.h" + +static constexpr char SWIZZLES[] = +{ + 'x', + 'y', + 'z', + 'w', + '0', + '1', + '_', + '_' +}; + +static constexpr const char* USAGE_TYPES[] = +{ + "float4", // POSITION + "float4", // BLENDWEIGHT + "uint4", // BLENDINDICES + "uint4", // NORMAL + "float4", // PSIZE + "float4", // TEXCOORD + "uint4", // TANGENT + "uint4", // BINORMAL + "float4", // TESSFACTOR + "float4", // POSITIONT + "float4", // COLOR + "float4", // FOG + "float4", // DEPTH + "float4", // SAMPLE +}; + +static constexpr const char* USAGE_VARIABLES[] = +{ + "Position", + "BlendWeight", + "BlendIndices", + "Normal", + "PointSize", + "TexCoord", + "Tangent", + "Binormal", + "TessFactor", + "PositionT", + "Color", + "Fog", + "Depth", + "Sample" +}; + +static constexpr const char* USAGE_SEMANTICS[] = +{ + "POSITION", + "BLENDWEIGHT", + "BLENDINDICES", + "NORMAL", + "PSIZE", + "TEXCOORD", + "TANGENT", + "BINORMAL", + "TESSFACTOR", + "POSITIONT", + "COLOR", + "FOG", + "DEPTH", + "SAMPLE" +}; + +static constexpr std::pair INTERPOLATORS[] = +{ + { DeclUsage::TexCoord, 0 }, + { DeclUsage::TexCoord, 1 }, + { DeclUsage::TexCoord, 2 }, + { DeclUsage::TexCoord, 3 }, + { DeclUsage::TexCoord, 4 }, + { DeclUsage::TexCoord, 5 }, + { DeclUsage::TexCoord, 6 }, + { DeclUsage::TexCoord, 7 }, + { DeclUsage::TexCoord, 8 }, + { DeclUsage::TexCoord, 9 }, + { DeclUsage::TexCoord, 10 }, + { DeclUsage::TexCoord, 11 }, + { DeclUsage::TexCoord, 12 }, + { DeclUsage::TexCoord, 13 }, + { DeclUsage::TexCoord, 14 }, + { DeclUsage::TexCoord, 15 }, + { DeclUsage::Color, 0 }, + { DeclUsage::Color, 1 } +}; + +static FetchDestinationSwizzle getDestSwizzle(uint32_t dstSwizzle, uint32_t index) +{ + return FetchDestinationSwizzle((dstSwizzle >> (index * 3)) & 0x7); +} + +void ShaderRecompiler::printDstSwizzle(uint32_t dstSwizzle, bool operand) +{ + for (size_t i = 0; i < 4; i++) + { + const auto swizzle = getDestSwizzle(dstSwizzle, i); + if (swizzle >= FetchDestinationSwizzle::X && swizzle <= FetchDestinationSwizzle::W) + out += SWIZZLES[operand ? uint32_t(swizzle) : i]; + } +} + +void ShaderRecompiler::printDstSwizzle01(uint32_t dstRegister, uint32_t dstSwizzle) +{ + for (size_t i = 0; i < 4; i++) + { + const auto swizzle = getDestSwizzle(dstSwizzle, i); + if (swizzle == FetchDestinationSwizzle::Zero) + { + indent(); + println("r{}.{} = 0.0;", dstRegister, SWIZZLES[i]); + } + else if (swizzle == FetchDestinationSwizzle::One) + { + indent(); + println("r{}.{} = 1.0;", dstRegister, SWIZZLES[i]); + } + } +} + +void ShaderRecompiler::recompile(const VertexFetchInstruction& instr, uint32_t address) +{ + indent(); + print("r{}.", instr.dstRegister); + printDstSwizzle(instr.dstSwizzle, false); + + out += " = "; + + auto findResult = vertexElements.find(address); + assert(findResult != vertexElements.end()); + + switch (findResult->second.usage) + { + case DeclUsage::Normal: + case DeclUsage::Tangent: + case DeclUsage::Binormal: + print("tfetchR11G11B10(GET_SHARED_CONSTANT(g_InputLayoutFlags), "); + break; + + case DeclUsage::TexCoord: + print("tfetchTexcoord(GET_SHARED_CONSTANT(g_SwappedTexcoords), "); + break; + } + + print("i{}{}", USAGE_VARIABLES[uint32_t(findResult->second.usage)], uint32_t(findResult->second.usageIndex)); + + switch (findResult->second.usage) + { + case DeclUsage::Normal: + case DeclUsage::Tangent: + case DeclUsage::Binormal: + out += ')'; + break; + + case DeclUsage::TexCoord: + print(", {})", uint32_t(findResult->second.usageIndex)); + break; + } + + out += '.'; + printDstSwizzle(instr.dstSwizzle, true); + + out += ";\n"; + + printDstSwizzle01(instr.dstRegister, instr.dstSwizzle); +} + +void ShaderRecompiler::recompile(const TextureFetchInstruction& instr) +{ + if (instr.opcode != FetchOpcode::TextureFetch) + return; + + indent(); + print("r{}.", instr.dstRegister); + printDstSwizzle(instr.dstSwizzle, false); + + out += " = tfetch"; + + uint32_t componentCount = 0; + switch (instr.dimension) + { + case TextureDimension::Texture1D: + out += "1D("; + componentCount = 1; + break; + case TextureDimension::Texture2D: + out += "2D("; + componentCount = 2; + break; + case TextureDimension::Texture3D: + out += "3D("; + componentCount = 3; + break; + case TextureDimension::TextureCube: + out += "Cube("; + componentCount = 4; + break; + } + + auto findResult = samplers.find(instr.constIndex); + if (findResult != samplers.end()) + print("GET_SHARED_CONSTANT({}_ResourceDescriptorIndex), GET_SHARED_CONSTANT({}_SamplerDescriptorIndex)", findResult->second, findResult->second); + else + print("GET_SHARED_CONSTANT(s{}_ResourceDescriptorIndex), GET_SHARED_CONSTANT(s{}_SamplerDescriptorIndex)", instr.constIndex, instr.constIndex); + + print(", r{}.", instr.srcRegister); + + for (size_t i = 0; i < componentCount; i++) + out += SWIZZLES[((instr.srcSwizzle >> (i * 2))) & 0x3]; + + out += ")."; + + printDstSwizzle(instr.dstSwizzle, true); + + out += ";\n"; + + printDstSwizzle01(instr.dstRegister, instr.dstSwizzle); +} + +void ShaderRecompiler::recompile(const AluInstruction& instr) +{ + if (instr.isPredicated) + { + indent(); + println("if ({}p0)", instr.predicateCondition ? "!" : ""); + + indent(); + out += "{\n"; + ++indentation; + } + + enum + { + VECTOR_0, + VECTOR_1, + VECTOR_2, + SCALAR_0, + SCALAR_1, + SCALAR_CONSTANT_0, + SCALAR_CONSTANT_1 + }; + + auto op = [&](size_t operand) + { + size_t reg = 0; + size_t swizzle = 0; + bool select = true; + bool negate = false; + bool abs = false; + + switch (operand) + { + case SCALAR_CONSTANT_0: + reg = instr.src3Register; + swizzle = instr.src3Swizzle; + select = false; + negate = instr.src3Negate; + abs = instr.absConstants; + break; + + case SCALAR_CONSTANT_1: + reg = (uint32_t(instr.scalarOpcode) & 1) | (instr.src3Select << 1) | (instr.src3Swizzle & 0x3C); + swizzle = instr.src3Swizzle; + select = true; + negate = instr.src3Negate; + abs = instr.absConstants; + break; + + default: + switch (operand) + { + case VECTOR_0: + reg = instr.src1Register; + swizzle = instr.src1Swizzle; + select = instr.src1Select; + negate = instr.src1Negate; + break; + case VECTOR_1: + reg = instr.src2Register; + swizzle = instr.src2Swizzle; + select = instr.src2Select; + negate = instr.src2Negate; + break; + case VECTOR_2: + case SCALAR_0: + case SCALAR_1: + reg = instr.src3Register; + swizzle = instr.src3Swizzle; + select = instr.src3Select; + negate = instr.src3Negate; + break; + } + + if (select) + { + abs = (reg & 0x80) != 0; + reg &= 0x3F; + } + else + { + abs = instr.absConstants; + } + + break; + } + + std::string regFormatted; + + if (select) + { + regFormatted = std::format("r{}", reg); + } + else + { + auto findResult = float4Constants.find(reg); + if (findResult != float4Constants.end()) + { + const char* constantName = reinterpret_cast(constantTableData + findResult->second->name); + if (findResult->second->registerCount > 1) + { + regFormatted = std::format("GET_CONSTANT({})[{}{}]", constantName, + reg - findResult->second->registerIndex, instr.const0Relative ? (instr.constAddressRegisterRelative ? " + a0" : " + aL") : ""); + } + else + { + assert(!instr.const0Relative && !instr.const1Relative); + regFormatted = std::format("GET_CONSTANT({})", constantName); + } + } + else + { + assert(!instr.const0Relative && !instr.const1Relative); + regFormatted = std::format("c{}", reg); + } + } + + std::string result; + + if (negate) + result += '-'; + + if (abs) + result += "abs("; + + result += regFormatted; + result += '.'; + + switch (operand) + { + case VECTOR_0: + case VECTOR_1: + case VECTOR_2: + { + uint32_t mask; + + switch (instr.vectorOpcode) + { + case AluVectorOpcode::Dp2Add: + mask = (operand == VECTOR_2) ? 0b1 : 0b11; + break; + + case AluVectorOpcode::Dp3: + mask = 0b111; + break; + + case AluVectorOpcode::Dp4: + mask = 0b1111; + break; + + default: + mask = instr.vectorWriteMask != 0 ? instr.vectorWriteMask : 0b1; + break; + } + + for (size_t i = 0; i < 4; i++) + { + if ((mask >> i) & 0x1) + result += SWIZZLES[((swizzle >> (i * 2)) + i) & 0x3]; + } + + break; + } + + case SCALAR_0: + case SCALAR_CONSTANT_0: + result += SWIZZLES[((swizzle >> 6) + 3) & 0x3]; + break; + + case SCALAR_1: + case SCALAR_CONSTANT_1: + result += SWIZZLES[swizzle & 0x3]; + break; + } + + if (abs) + result += ")"; + + return result; + }; + + switch (instr.vectorOpcode) + { + case AluVectorOpcode::KillEq: + indent(); + println("clip(any({} == {}) ? 1 : -1);", op(VECTOR_0), op(VECTOR_1)); + break; + + case AluVectorOpcode::KillGt: + indent(); + println("clip(any({} > {}) ? 1 : -1);", op(VECTOR_0), op(VECTOR_1)); + break; + + case AluVectorOpcode::KillGe: + indent(); + println("clip(any({} >= {}) ? 1 : -1);", op(VECTOR_0), op(VECTOR_1)); + break; + + case AluVectorOpcode::KillNe: + indent(); + println("clip(any({} != {}) ? 1 : -1);", op(VECTOR_0), op(VECTOR_1)); + break; + } + + std::string_view exportRegister; + if (instr.exportData) + { + if (isPixelShader) + { + switch (ExportRegister(instr.vectorDest)) + { + case ExportRegister::PSColor0: + exportRegister = "oC0"; + break; + case ExportRegister::PSColor1: + exportRegister = "oC1"; + break; + case ExportRegister::PSColor2: + exportRegister = "oC2"; + break; + case ExportRegister::PSColor3: + exportRegister = "oC3"; + break; + case ExportRegister::PSDepth: + exportRegister = "oDepth"; + break; + } + } + else + { + switch (ExportRegister(instr.vectorDest)) + { + case ExportRegister::VSPosition: + exportRegister = "oPos"; + break; + + default: + { + auto findResult = interpolators.find(instr.vectorDest); + assert(findResult != interpolators.end()); + exportRegister = findResult->second; + break; + } + } + } + } + + uint32_t vectorWriteMask = instr.vectorWriteMask; + if (instr.exportData) + vectorWriteMask &= ~instr.scalarWriteMask; + + if (vectorWriteMask != 0) + { + indent(); + if (!exportRegister.empty()) + { + out += exportRegister; + out += '.'; + } + else + { + print("r{}.", instr.vectorDest); + } + + for (size_t i = 0; i < 4; i++) + { + if ((vectorWriteMask >> i) & 0x1) + out += SWIZZLES[i]; + } + + out += " = "; + + if (instr.vectorSaturate) + out += "saturate("; + + switch (instr.vectorOpcode) + { + case AluVectorOpcode::Add: + print("{} + {}", op(VECTOR_0), op(VECTOR_1)); + break; + + case AluVectorOpcode::Mul: + print("{} * {}", op(VECTOR_0), op(VECTOR_1)); + break; + + case AluVectorOpcode::Max: + print("max({}, {})", op(VECTOR_0), op(VECTOR_1)); + break; + + case AluVectorOpcode::Min: + print("min({}, {})", op(VECTOR_0), op(VECTOR_1)); + break; + + case AluVectorOpcode::Seq: + print("{} == {}", op(VECTOR_0), op(VECTOR_1)); + break; + + case AluVectorOpcode::Sgt: + print("{} > {}", op(VECTOR_0), op(VECTOR_1)); + break; + + case AluVectorOpcode::Sge: + print("{} >= {}", op(VECTOR_0), op(VECTOR_1)); + break; + + case AluVectorOpcode::Sne: + print("{} != {}", op(VECTOR_0), op(VECTOR_1)); + break; + + case AluVectorOpcode::Frc: + print("frac({})", op(VECTOR_0)); + break; + + case AluVectorOpcode::Trunc: + print("trunc({})", op(VECTOR_0)); + break; + + case AluVectorOpcode::Floor: + print("floor({})", op(VECTOR_0)); + break; + + case AluVectorOpcode::Mad: + print("{} * {} + {}", op(VECTOR_0), op(VECTOR_1), op(VECTOR_2)); + break; + + case AluVectorOpcode::CndEq: + print("select({} == 0.0, {}, {})", op(VECTOR_0), op(VECTOR_1), op(VECTOR_2)); + break; + + case AluVectorOpcode::CndGe: + print("select({} >= 0.0, {}, {})", op(VECTOR_0), op(VECTOR_1), op(VECTOR_2)); + break; + + case AluVectorOpcode::CndGt: + print("select({} > 0.0, {}, {})", op(VECTOR_0), op(VECTOR_1), op(VECTOR_2)); + break; + + case AluVectorOpcode::Dp4: + case AluVectorOpcode::Dp3: + print("dot({}, {})", op(VECTOR_0), op(VECTOR_1)); + break; + + case AluVectorOpcode::Dp2Add: + print("dot({}, {}) + {}", op(VECTOR_0), op(VECTOR_1), op(VECTOR_2)); + break; + + case AluVectorOpcode::Cube: + out += "0.0"; + break; + + case AluVectorOpcode::Max4: + assert(false); + break; + + case AluVectorOpcode::SetpEqPush: + case AluVectorOpcode::SetpNePush: + case AluVectorOpcode::SetpGtPush: + case AluVectorOpcode::SetpGePush: + print("p0 ? 0.0 : {} + 1.0", op(VECTOR_0)); + break; + + case AluVectorOpcode::KillEq: + print("any({} == {})", op(VECTOR_0), op(VECTOR_1)); + break; + + case AluVectorOpcode::KillGt: + print("any({} > {})", op(VECTOR_0), op(VECTOR_1)); + break; + + case AluVectorOpcode::KillGe: + print("any({} >= {})", op(VECTOR_0), op(VECTOR_1)); + break; + + case AluVectorOpcode::KillNe: + print("any({} != {})", op(VECTOR_0), op(VECTOR_1)); + break; + + case AluVectorOpcode::Dst: + case AluVectorOpcode::MaxA: + assert(false); + break; + } + + if (instr.vectorSaturate) + out += ')'; + + out += ";\n"; + } + + if (instr.scalarOpcode != AluScalarOpcode::RetainPrev) + { + if (instr.scalarOpcode >= AluScalarOpcode::SetpEq && instr.scalarOpcode <= AluScalarOpcode::SetpRstr) + { + indent(); + out += "p0 = "; + + switch (instr.scalarOpcode) + { + case AluScalarOpcode::SetpEq: + print("{} == 0.0", op(SCALAR_0)); + break; + + case AluScalarOpcode::SetpNe: + print("{} != 0.0", op(SCALAR_0)); + break; + + case AluScalarOpcode::SetpGt: + print("{} > 0.0", op(SCALAR_0)); + break; + + case AluScalarOpcode::SetpGe: + print("{} >= 0.0", op(SCALAR_0)); + break; + + case AluScalarOpcode::SetpInv: + print("{} == 1.0", op(SCALAR_0)); + break; + + case AluScalarOpcode::SetpPop: + print("{} - 1.0 <= 0.0", op(SCALAR_0)); + break; + + case AluScalarOpcode::SetpClr: + case AluScalarOpcode::SetpRstr: + assert(false); + break; + } + + out += ";\n"; + } + + indent(); + out += "ps = "; + if (instr.scalarSaturate) + out += "saturate("; + + switch (instr.scalarOpcode) + { + case AluScalarOpcode::Adds: + print("{} + {}", op(SCALAR_0), op(SCALAR_1)); + break; + + case AluScalarOpcode::AddsPrev: + print("{} + ps", op(SCALAR_0)); + break; + + case AluScalarOpcode::Muls: + print("{} * {}", op(SCALAR_0), op(SCALAR_1)); + break; + + case AluScalarOpcode::MulsPrev: + case AluScalarOpcode::MulsPrev2: + print("{} * ps", op(SCALAR_0)); + break; + + case AluScalarOpcode::Maxs: + case AluScalarOpcode::MaxAs: + case AluScalarOpcode::MaxAsf: + print("max({}, {})", op(SCALAR_0), op(SCALAR_1)); + break; + + case AluScalarOpcode::Mins: + print("min({}, {})", op(SCALAR_0), op(SCALAR_1)); + break; + + case AluScalarOpcode::Seqs: + print("{} == 0.0", op(SCALAR_0)); + break; + + case AluScalarOpcode::Sgts: + print("{} > 0.0", op(SCALAR_0)); + break; + + case AluScalarOpcode::Sges: + print("{} >= 0.0", op(SCALAR_0)); + break; + + case AluScalarOpcode::Snes: + print("{} != 0.0", op(SCALAR_0)); + break; + + case AluScalarOpcode::Frcs: + print("frac({})", op(SCALAR_0)); + break; + + case AluScalarOpcode::Truncs: + print("trunc({})", op(SCALAR_0)); + break; + + case AluScalarOpcode::Floors: + print("floor({})", op(SCALAR_0)); + break; + + case AluScalarOpcode::Exp: + print("exp2({})", op(SCALAR_0)); + break; + + case AluScalarOpcode::Logc: + case AluScalarOpcode::Log: + print("log2({})", op(SCALAR_0)); + break; + + case AluScalarOpcode::Rcpc: + case AluScalarOpcode::Rcpf: + case AluScalarOpcode::Rcp: + print("rcp({})", op(SCALAR_0)); + break; + + case AluScalarOpcode::Rsqc: + case AluScalarOpcode::Rsqf: + case AluScalarOpcode::Rsq: + print("rsqrt({})", op(SCALAR_0)); + break; + + case AluScalarOpcode::Subs: + print("{} - {}", op(SCALAR_0), op(SCALAR_1)); + break; + + case AluScalarOpcode::SubsPrev: + print("{} - ps", op(SCALAR_0)); + break; + + case AluScalarOpcode::SetpEq: + case AluScalarOpcode::SetpNe: + case AluScalarOpcode::SetpGt: + case AluScalarOpcode::SetpGe: + out += "p0 ? 0.0 : 1.0"; + break; + + case AluScalarOpcode::SetpInv: + print("{} == 0.0 ? 1.0 : {}", op(SCALAR_0), op(SCALAR_0)); + break; + + case AluScalarOpcode::SetpPop: + print("p0 ? 0.0 : ({} - 1.0)", op(SCALAR_0)); + break; + + case AluScalarOpcode::SetpClr: + case AluScalarOpcode::SetpRstr: + assert(false); + break; + + case AluScalarOpcode::KillsEq: + print("{} == 0.0", op(SCALAR_0)); + break; + + case AluScalarOpcode::KillsGt: + print("{} > 0.0", op(SCALAR_0)); + break; + + case AluScalarOpcode::KillsGe: + print("{} >= 0.0", op(SCALAR_0)); + break; + + case AluScalarOpcode::KillsNe: + print("{} != 0.0", op(SCALAR_0)); + break; + + case AluScalarOpcode::KillsOne: + print("{} == 1.0", op(SCALAR_0)); + break; + + case AluScalarOpcode::Sqrt: + print("sqrt({})", op(SCALAR_0)); + break; + + case AluScalarOpcode::Mulsc0: + case AluScalarOpcode::Mulsc1: + print("{} * {}", op(SCALAR_CONSTANT_0), op(SCALAR_CONSTANT_1)); + break; + + case AluScalarOpcode::Addsc0: + case AluScalarOpcode::Addsc1: + print("{} + {}", op(SCALAR_CONSTANT_0), op(SCALAR_CONSTANT_1)); + break; + + case AluScalarOpcode::Subsc0: + case AluScalarOpcode::Subsc1: + print("{} - {}", op(SCALAR_CONSTANT_0), op(SCALAR_CONSTANT_1)); + break; + + case AluScalarOpcode::Sin: + print("sin({})", op(SCALAR_0)); + break; + + case AluScalarOpcode::Cos: + print("cos({})", op(SCALAR_0)); + break; + } + + if (instr.scalarSaturate) + out += ')'; + + out += ";\n"; + } + + uint32_t scalarWriteMask = instr.scalarWriteMask; + if (instr.exportData) + scalarWriteMask &= ~instr.vectorWriteMask; + + if (scalarWriteMask != 0) + { + indent(); + if (!exportRegister.empty()) + { + out += exportRegister; + out += '.'; + } + else + { + print("r{}.", instr.scalarDest); + } + + for (size_t i = 0; i < 4; i++) + { + if ((scalarWriteMask >> i) & 0x1) + out += SWIZZLES[i]; + } + + out += " = ps;\n"; + } + + if (instr.exportData) + { + uint32_t zeroMask = instr.scalarDestRelative ? (0b1111 & ~(instr.vectorWriteMask | instr.scalarWriteMask)) : 0; + uint32_t oneMask = instr.vectorWriteMask & instr.scalarWriteMask; + + for (size_t i = 0; i < 4; i++) + { + uint32_t mask = 1 << i; + if (zeroMask & mask) + { + indent(); + println("{}.{} = 0.0;", exportRegister, SWIZZLES[i]); + } + else if (oneMask & mask) + { + indent(); + println("{}.{} = 1.0;", exportRegister, SWIZZLES[i]); + } + } + } + + if (instr.scalarOpcode >= AluScalarOpcode::KillsEq && instr.scalarOpcode <= AluScalarOpcode::KillsOne) + { + indent(); + out += "clip(ps != 0.0 ? 1 : -1);\n"; + } + + if (instr.isPredicated) + { + --indentation; + indent(); + out += "}\n"; + } +} + +void ShaderRecompiler::recompile(const uint8_t* shaderData) +{ + const auto shaderContainer = reinterpret_cast(shaderData); + + assert((shaderContainer->flags & 0xFFFFFF00) == 0x102A1100); + assert(shaderContainer->constantTableOffset != NULL); + + out += std::string_view(SHADER_COMMON_HLSLI, SHADER_COMMON_HLSLI_SIZE); + isPixelShader = (shaderContainer->flags & 0x1) == 0; + + const auto constantTableContainer = reinterpret_cast(shaderData + shaderContainer->constantTableOffset); + constantTableData = reinterpret_cast(&constantTableContainer->constantTable); + + println("CONSTANT_BUFFER(Constants, b{})", isPixelShader ? 1 : 0); + out += "{\n"; + + for (uint32_t i = 0; i < constantTableContainer->constantTable.constants; i++) + { + const auto constantInfo = reinterpret_cast( + constantTableData + constantTableContainer->constantTable.constantInfo + i * sizeof(ConstantInfo)); + + assert(constantInfo->registerSet != RegisterSet::Int4); + + if (constantInfo->registerSet == RegisterSet::Float4) + { + print("\t[[vk::offset({})]] float4 {}", constantInfo->registerIndex * 16, + reinterpret_cast(constantTableData + constantInfo->name)); + + if (constantInfo->registerCount > 1) + print("[{}]", constantInfo->registerCount.get()); + + println(" PACK_OFFSET(c{});", constantInfo->registerIndex.get()); + + for (uint16_t j = 0; j < constantInfo->registerCount; j++) + float4Constants.emplace(constantInfo->registerIndex + j, constantInfo); + } + } + + out += "};\n\n"; + + out += "CONSTANT_BUFFER(SharedConstants, b2)\n"; + out += "{\n"; + + for (uint32_t i = 0; i < constantTableContainer->constantTable.constants; i++) + { + const auto constantInfo = reinterpret_cast( + constantTableData + constantTableContainer->constantTable.constantInfo + i * sizeof(ConstantInfo)); + + const char* constantName = reinterpret_cast(constantTableData + constantInfo->name); + + assert(constantInfo->registerSet != RegisterSet::Int4); + + switch (constantInfo->registerSet) + { + case RegisterSet::Bool: + { + println("#define {} (1 << {})", constantName, constantInfo->registerIndex + (isPixelShader ? 16 : 0)); + boolConstants.emplace(constantInfo->registerIndex, constantName); + break; + } + + case RegisterSet::Sampler: + { + println("\t[[vk::offset({})]] uint {}_ResourceDescriptorIndex PACK_OFFSET(c{}.{});", + constantInfo->registerIndex * 4, constantName, constantInfo->registerIndex / 4, SWIZZLES[constantInfo->registerIndex % 4]); + + println("\t[[vk::offset({})]] uint {}_SamplerDescriptorIndex PACK_OFFSET(c{}.{});", + 64 + constantInfo->registerIndex * 4, constantName, 4 + constantInfo->registerIndex / 4, SWIZZLES[constantInfo->registerIndex % 4]); + + samplers.emplace(constantInfo->registerIndex, constantName); + break; + } + } + } + + out += "\tSHARED_CONSTANTS;\n"; + out += "};\n\n"; + + const auto shader = reinterpret_cast(shaderData + shaderContainer->shaderOffset); + + out += "void main(\n"; + + if (isPixelShader) + { + out += "\tin float4 iPos : SV_Position,\n"; + + for (auto& [usage, usageIndex] : INTERPOLATORS) + println("\tin float4 i{0}{1} : {2}{1},", USAGE_VARIABLES[uint32_t(usage)], usageIndex, USAGE_SEMANTICS[uint32_t(usage)]); + + out += "\tin bool iFace : SV_IsFrontFace"; + + auto pixelShader = reinterpret_cast(shader); + if (pixelShader->outputs & PIXEL_SHADER_OUTPUT_COLOR0) + out += ",\n\tout float4 oC0 : SV_Target0"; + if (pixelShader->outputs & PIXEL_SHADER_OUTPUT_COLOR1) + out += ",\n\tout float4 oC1 : SV_Target1"; + if (pixelShader->outputs & PIXEL_SHADER_OUTPUT_COLOR2) + out += ",\n\tout float4 oC2 : SV_Target2"; + if (pixelShader->outputs & PIXEL_SHADER_OUTPUT_COLOR3) + out += ",\n\tout float4 oC3 : SV_Target3"; + if (pixelShader->outputs & PIXEL_SHADER_OUTPUT_DEPTH) + out += ",\n\tout float oDepth : SV_Depth"; + } + else + { + auto vertexShader = reinterpret_cast(shader); + for (uint32_t i = 0; i < vertexShader->vertexElementCount; i++) + { + union + { + VertexElement vertexElement; + uint32_t value; + }; + + value = vertexShader->vertexElementsAndInterpolators[vertexShader->field18 + i]; + + println("\tin {0} i{1}{2} : {3}{2},", USAGE_TYPES[uint32_t(vertexElement.usage)], USAGE_VARIABLES[uint32_t(vertexElement.usage)], + uint32_t(vertexElement.usageIndex), USAGE_SEMANTICS[uint32_t(vertexElement.usage)]); + + vertexElements.emplace(uint32_t(vertexElement.address), vertexElement); + } + + out += "\tout float4 oPos : SV_Position"; + + for (auto& [usage, usageIndex] : INTERPOLATORS) + print(",\n\tout float4 o{0}{1} : {2}{1}", USAGE_VARIABLES[uint32_t(usage)], usageIndex, USAGE_SEMANTICS[uint32_t(usage)]); + } + + out += ")\n"; + out += "{\n"; + + out += "#ifdef __spirv__\n"; + println("\tConstants constants = vk::RawBufferLoad(g_PushConstants.{}ShaderConstants, 0x100);", isPixelShader ? "Pixel" : "Vertex"); + out += "\tSharedConstants sharedConstants = vk::RawBufferLoad(g_PushConstants.SharedConstants, 0x100);\n"; + out += "#endif\n\n"; + + if (shaderContainer->definitionTableOffset != NULL) + { + auto definitionTable = reinterpret_cast(shaderData + shaderContainer->definitionTableOffset); + auto definitions = definitionTable->definitions; + while (*definitions != 0) + { + auto definition = reinterpret_cast(definitions); + auto value = reinterpret_cast*>(shaderData + shaderContainer->virtualSize + definition->physicalOffset); + for (uint16_t i = 0; i < (definition->count + 3) / 4; i++) + { + println("\tfloat4 c{} = asfloat(uint4(0x{:X}, 0x{:X}, 0x{:X}, 0x{:X}));", + definition->registerIndex + i - (isPixelShader ? 256 : 0), value[0].get(), value[1].get(), value[2].get(), value[3].get()); + + value += 4; + } + definitions += 2; + } + ++definitions; + while (*definitions != 0) + { + auto definition = reinterpret_cast(definitions); + for (uint16_t i = 0; i < definition->count; i++) + { + union + { + uint32_t value; + struct + { + int8_t x; + int8_t y; + int8_t z; + int8_t w; + }; + }; + + value = definition->values[i].get(); + + println("\tint4 i{} = int4({}, {}, {}, {});", + (definition->registerIndex - 8992) / 4 + i, x, y, z, w); + } + definitions += 2; + definitions += definition->count; + } + + out += "\n"; + } + + bool printedRegisters[32]{}; + + uint32_t interpolatorCount = (shader->interpolatorInfo >> 5) & 0x1F; + + for (uint32_t i = 0; i < interpolatorCount; i++) + { + union + { + Interpolator interpolator; + uint32_t value; + }; + + if (isPixelShader) + { + value = reinterpret_cast(shader)->interpolators[i]; + println("\tfloat4 r{} = i{}{};", uint32_t(interpolator.reg), USAGE_VARIABLES[uint32_t(interpolator.usage)], uint32_t(interpolator.usageIndex)); + printedRegisters[interpolator.reg] = true; + } + else + { + auto vertexShader = reinterpret_cast(shader); + value = vertexShader->vertexElementsAndInterpolators[vertexShader->field18 + vertexShader->vertexElementCount + i]; + interpolators.emplace(i, std::format("o{}{}", USAGE_VARIABLES[uint32_t(interpolator.usage)], uint32_t(interpolator.usageIndex))); + } + } + + if (!isPixelShader) + { + out += "\toPos = 0.0;\n"; + for (auto& [usage, usageIndex] : INTERPOLATORS) + println("\to{}{} = 0.0;", USAGE_VARIABLES[uint32_t(usage)], usageIndex); + + out += "\n"; + } + + for (size_t i = 0; i < 32; i++) + { + if (!printedRegisters[i]) + println("\tfloat4 r{} = 0.0;", i); + } + + out += "\tint a0 = 0;\n"; + out += "\tint aL = 0;\n"; + out += "\tbool p0 = false;\n"; + out += "\tfloat ps = 0.0;\n"; + + out += "\n\tuint pc = 0;\n"; + out += "\twhile (true)\n"; + out += "\t{\n"; + out += "\t\tswitch (pc)\n"; + out += "\t\t{\n"; + + const be* code = reinterpret_cast*>(shaderData + shaderContainer->virtualSize + shader->physicalOffset); + + auto controlFlowCode = code; + uint32_t pc = 0; + uint32_t minInstrAddress = shader->size; + + while (pc * 6 < minInstrAddress) + { + union + { + ControlFlowInstruction controlFlow[2]; + struct + { + uint32_t code0; + uint32_t code1; + uint32_t code2; + uint32_t code3; + }; + }; + + code0 = controlFlowCode[0]; + code1 = controlFlowCode[1] & 0xFFFF; + code2 = (controlFlowCode[1] >> 16) | (controlFlowCode[2] << 16); + code3 = controlFlowCode[2] >> 16; + + for (auto& cfInstr : controlFlow) + { + indentation = 3; + println("\t\tcase {}:", pc); + ++pc; + + uint32_t address = 0; + uint32_t count = 0; + uint32_t sequence = 0; + bool shouldBreak = false; + bool shouldCloseCurlyBracket = false; + + switch (cfInstr.opcode) + { + case ControlFlowOpcode::Nop: + break; + + case ControlFlowOpcode::Exec: + case ControlFlowOpcode::ExecEnd: + address = cfInstr.exec.address; + count = cfInstr.exec.count; + sequence = cfInstr.exec.sequence; + shouldBreak = (cfInstr.opcode == ControlFlowOpcode::ExecEnd); + break; + + case ControlFlowOpcode::CondExec: + case ControlFlowOpcode::CondExecEnd: + case ControlFlowOpcode::CondExecPredClean: + case ControlFlowOpcode::CondExecPredCleanEnd: + address = cfInstr.condExec.address; + count = cfInstr.condExec.count; + sequence = cfInstr.condExec.sequence; + shouldBreak = (cfInstr.opcode == ControlFlowOpcode::CondExecEnd || cfInstr.opcode == ControlFlowOpcode::CondExecEnd); + break; + + case ControlFlowOpcode::CondExecPred: + case ControlFlowOpcode::CondExecPredEnd: + address = cfInstr.condExecPred.address; + count = cfInstr.condExecPred.count; + sequence = cfInstr.condExecPred.sequence; + shouldBreak = (cfInstr.opcode == ControlFlowOpcode::CondExecPredEnd); + break; + + case ControlFlowOpcode::LoopStart: + out += "\t\t\taL = 0;\n"; + break; + + case ControlFlowOpcode::LoopEnd: + out += "\t\t\t++aL;\n"; + println("\t\t\tif (aL < i{}.x)", uint32_t(cfInstr.loopEnd.loopId)); + out += "\t\t\t{\n"; + println("\t\t\t\tpc = {};", uint32_t(cfInstr.loopEnd.address)); + out += "\t\t\t\tcontinue;\n"; + out += "\t\t\t}\n"; + break; + + case ControlFlowOpcode::CondCall: + case ControlFlowOpcode::Return: + break; + + case ControlFlowOpcode::CondJmp: + { + if (cfInstr.condJmp.isUnconditional) + { + println("\t\t\tpc = {};", uint32_t(cfInstr.condJmp.address)); + out += "\t\t\tcontinue;\n"; + } + else + { + if (cfInstr.condJmp.isPredicated) + { + println("\t\t\tif ({}p0)", cfInstr.condJmp.condition ? "!" : ""); + } + else + { + auto findResult = boolConstants.find(cfInstr.condJmp.boolAddress); + if (findResult != boolConstants.end()) + println("\t\t\tif ((GET_SHARED_CONSTANT(g_Booleans) & {}) {}= 0)", findResult->second, cfInstr.condJmp.condition ? "!" : "="); + else + println("\t\t\tif (b{} {}= 0)", uint32_t(cfInstr.condJmp.boolAddress), cfInstr.condJmp.condition ? "!" : "="); + } + + out += "\t\t\t{\n"; + println("\t\t\t\tpc = {};", uint32_t(cfInstr.condJmp.address)); + out += "\t\t\t\tcontinue;\n"; + out += "\t\t\t}\n"; + } + break; + } + + case ControlFlowOpcode::Alloc: + case ControlFlowOpcode::MarkVsFetchDone: + break; + } + + if (count != 0) + { + minInstrAddress = std::min(minInstrAddress, address * 12); + auto instructionCode = code + address * 3; + + for (uint32_t i = 0; i < count; i++) + { + union + { + VertexFetchInstruction vertexFetch; + TextureFetchInstruction textureFetch; + AluInstruction alu; + struct + { + uint32_t code0; + uint32_t code1; + uint32_t code2; + }; + }; + + code0 = instructionCode[0]; + code1 = instructionCode[1]; + code2 = instructionCode[2]; + + if ((sequence & 0x1) != 0) + { + if (vertexFetch.opcode == FetchOpcode::VertexFetch) + recompile(vertexFetch, address + i); + else + recompile(textureFetch); + } + else + { + recompile(alu); + } + + sequence >>= 2; + instructionCode += 3; + } + } + + if (shouldBreak) + out += "\t\t\tbreak;\n"; + + if (shouldCloseCurlyBracket) + { + --indentation; + out += "\t\t\t}\n"; + } + } + + controlFlowCode += 3; + } + + out += "\t\t\tbreak;\n"; + out += "\t\t}\n"; + out += "\t\tbreak;\n"; + out += "\t}\n"; + + out += "}"; +} diff --git a/ShaderRecomp/shader_recompiler.h b/ShaderRecomp/shader_recompiler.h new file mode 100644 index 0000000..f1e5f3d --- /dev/null +++ b/ShaderRecomp/shader_recompiler.h @@ -0,0 +1,45 @@ +#pragma once + +#include "shader.h" +#include "shader_code.h" + +struct ShaderRecompiler +{ + std::string out; + uint32_t indentation = 0; + bool isPixelShader = false; + const uint8_t* constantTableData = nullptr; + std::unordered_map vertexElements; + std::unordered_map interpolators; + std::unordered_map float4Constants; + std::unordered_map boolConstants; + std::unordered_map samplers; + + void indent() + { + for (uint32_t i = 0; i < indentation; i++) + out += '\t'; + } + + template + void print(std::format_string fmt, Args&&... args) + { + std::vformat_to(std::back_inserter(out), fmt.get(), std::make_format_args(args...)); + } + + template + void println(std::format_string fmt, Args&&... args) + { + std::vformat_to(std::back_inserter(out), fmt.get(), std::make_format_args(args...)); + out += '\n'; + } + + void printDstSwizzle(uint32_t dstSwizzle, bool operand); + void printDstSwizzle01(uint32_t dstRegister, uint32_t dstSwizzle); + + void recompile(const VertexFetchInstruction& instr, uint32_t address); + void recompile(const TextureFetchInstruction& instr); + void recompile(const AluInstruction& instr); + + void recompile(const uint8_t* shaderData); +}; \ No newline at end of file diff --git a/vcpkg b/vcpkg new file mode 160000 index 0000000..e63bd09 --- /dev/null +++ b/vcpkg @@ -0,0 +1 @@ +Subproject commit e63bd09dc0b7204467705c1c7c71d0e2a3f8860b diff --git a/vcpkg.json b/vcpkg.json new file mode 100644 index 0000000..ba39103 --- /dev/null +++ b/vcpkg.json @@ -0,0 +1,6 @@ +{ + "builtin-baseline": "e63bd09dc0b7204467705c1c7c71d0e2a3f8860b", + "dependencies": [ + "directx-dxc" + ] +}