feat(fp16): translate shaders to fp16

this will not work with the steam version, nor is it deployable
This commit is contained in:
PancakeTAS 2025-07-30 18:59:22 +02:00 committed by Pancake
parent 3fcde7c126
commit cb234bde74
5 changed files with 135 additions and 82 deletions

View file

@ -45,7 +45,7 @@ set_target_properties(lsfg-vk PROPERTIES
target_include_directories(lsfg-vk
PUBLIC include)
target_link_libraries(lsfg-vk PUBLIC
pe-parse dxbc toml11
pe-parse dxbc toml11 SPIRV-Tools-opt
lsfg-vk-framegen)
get_target_property(TOML11_INCLUDE_DIRS toml11 INTERFACE_INCLUDE_DIRECTORIES)

View file

@ -17,10 +17,11 @@ namespace Extract {
/// Get a shader by name.
///
/// @param name The name of the shader to get.
/// @param fp16 If true, use the FP16 variant of shaders.
/// @return The shader bytecode.
///
/// @throws std::runtime_error if the shader is not found.
///
std::vector<uint8_t> getShader(const std::string& name);
std::vector<uint8_t> getShader(const std::string& name, bool fp16);
}

View file

@ -2,7 +2,6 @@
#include "config/config.hpp"
#include "common/exception.hpp"
#include "extract/extract.hpp"
#include "extract/trans.hpp"
#include "utils/utils.hpp"
#include "hooks.hpp"
#include "layer.hpp"
@ -99,11 +98,7 @@ LsContext::LsContext(const Hooks::DeviceInfo& info, VkSwapchainKHR swapchain,
lsfgInitialize(
Utils::getDeviceUUID(info.physicalDevice),
conf.hdr, 1.0F / conf.flowScale, conf.multiplier - 1,
[](const std::string& name) {
auto dxbc = Extract::getShader(name);
auto spirv = Extract::translateShader(dxbc);
return spirv;
}
Extract::getShader
);
this->lsfgCtxId = std::shared_ptr<int32_t>(

View file

@ -1,86 +1,68 @@
#include "extract/extract.hpp"
#include "config/config.hpp"
#include <spirv-tools/optimizer.hpp>
#include <spirv-tools/libspirv.h>
#include <pe-parse/parse.h>
#include <cstdlib>
#include <unordered_map>
#include <filesystem>
#include <algorithm>
#include <cstdint>
#include <stdexcept>
#include <cstdint>
#include <cstdlib>
#include <cstring>
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>
#include <array>
using namespace Extract;
const uint32_t NO = 49; // native offset
const std::unordered_map<std::string, uint32_t> nameIdxTable = {{
{ "mipmaps", 255 },
{ "alpha[0]", 267 },
{ "alpha[1]", 268 },
{ "alpha[2]", 269 },
{ "alpha[3]", 270 },
{ "beta[0]", 275 },
{ "beta[1]", 276 },
{ "beta[2]", 277 },
{ "beta[3]", 278 },
{ "beta[4]", 279 },
{ "gamma[0]", 257 },
{ "gamma[1]", 259 },
{ "gamma[2]", 260 },
{ "gamma[3]", 261 },
{ "gamma[4]", 262 },
{ "delta[0]", 257 },
{ "delta[1]", 263 },
{ "delta[2]", 264 },
{ "delta[3]", 265 },
{ "delta[4]", 266 },
{ "delta[5]", 258 },
{ "delta[6]", 271 },
{ "delta[7]", 272 },
{ "delta[8]", 273 },
{ "delta[9]", 274 },
{ "generate", 256 },
{ "p_mipmaps", 255 },
{ "p_alpha[0]", 290 },
{ "p_alpha[1]", 291 },
{ "p_alpha[2]", 292 },
{ "p_alpha[3]", 293 },
{ "p_beta[0]", 298 },
{ "p_beta[1]", 299 },
{ "p_beta[2]", 300 },
{ "p_beta[3]", 301 },
{ "p_beta[4]", 302 },
{ "p_gamma[0]", 280 },
{ "p_gamma[1]", 282 },
{ "p_gamma[2]", 283 },
{ "p_gamma[3]", 284 },
{ "p_gamma[4]", 285 },
{ "p_delta[0]", 280 },
{ "p_delta[1]", 286 },
{ "p_delta[2]", 287 },
{ "p_delta[3]", 288 },
{ "p_delta[4]", 289 },
{ "p_delta[5]", 281 },
{ "p_delta[6]", 294 },
{ "p_delta[7]", 295 },
{ "p_delta[8]", 296 },
{ "p_delta[9]", 297 },
{ "p_generate", 256 },
{ "mipmaps", 255 + NO },
{ "alpha[0]", 267 + NO },
{ "alpha[1]", 268 + NO },
{ "alpha[2]", 269 + NO },
{ "alpha[3]", 270 + NO },
{ "beta[0]", 275 + NO },
{ "beta[1]", 276 + NO },
{ "beta[2]", 277 + NO },
{ "beta[3]", 278 + NO },
{ "beta[4]", 279 + NO },
{ "gamma[0]", 257 + NO },
{ "gamma[1]", 259 + NO },
{ "gamma[2]", 260 + NO },
{ "gamma[3]", 261 + NO },
{ "gamma[4]", 262 + NO },
{ "delta[0]", 257 + NO },
{ "delta[1]", 263 + NO },
{ "delta[2]", 264 + NO },
{ "delta[3]", 265 + NO },
{ "delta[4]", 266 + NO },
{ "delta[5]", 258 + NO },
{ "delta[6]", 271 + NO },
{ "delta[7]", 272 + NO },
{ "delta[8]", 273 + NO },
{ "delta[9]", 274 + NO },
{ "generate", 256 + NO }
}};
namespace {
auto& shaders() {
static std::unordered_map<uint32_t, std::vector<uint8_t>> shaderData;
auto& pshaders() {
static std::unordered_map<uint32_t, std::array<std::vector<uint8_t>, 2>> shaderData;
return shaderData;
}
int on_resource(void*, const peparse::resource& res) {
int on_resource(void* ptr, const peparse::resource& res) {
if (res.type != peparse::RT_RCDATA || res.buf == nullptr || res.buf->bufLen <= 0)
return 0;
std::vector<uint8_t> resource_data(res.buf->bufLen);
std::copy_n(res.buf->buf, res.buf->bufLen, resource_data.data());
shaders()[res.name] = resource_data;
auto* shaders = reinterpret_cast<std::unordered_map<uint32_t, std::vector<uint8_t>>*>(ptr);
shaders->emplace(res.name, std::move(resource_data));
return 0;
}
@ -113,36 +95,116 @@ namespace {
// final fallback
return "Lossless.dll";
}
std::array<std::vector<uint8_t>, 2> fixShaders(const std::vector<uint8_t>& spirv) {
std::vector<uint32_t> shader(spirv.size() / 4);
std::copy_n(spirv.data(), spirv.size(), reinterpret_cast<uint8_t*>(shader.data()));
// patch bindings
std::vector<size_t> samplerOffsets{};
std::vector<size_t> sampledImageOffsets{};
std::vector<size_t> storageImageOffsets{};
std::vector<size_t> uniformBufferOffsets{};
uint32_t prevIdx{ 0 };
uint32_t type{ 0 };
size_t i{ 5 };
while (i < shader.size()) {
const uint32_t word = shader[i];
const uint16_t op = word & 0xFFFF;
const uint16_t len = word >> 16;
if (op == 71 /*spv::OpDecorate*/) {
const uint32_t decoration = shader[i + 2];
if (decoration == 33 /*spv::DecorationBinding*/) {
const uint32_t idx = shader[i + 3];
if (idx <= prevIdx)
type++;
prevIdx = idx;
switch (type) {
case 1:
samplerOffsets.emplace_back(i + 3);
break;
case 2:
sampledImageOffsets.emplace_back(i + 3);
break;
case 3:
storageImageOffsets.emplace_back(i + 3);
break;
case 4:
uniformBufferOffsets.emplace_back(i + 3);
break;
default:
break;
}
}
}
if (op == 54 /*spv::OpFunction*/)
break;
i += len ? len : 1;
}
uint32_t binding{ 0 };
for (const auto& idx : uniformBufferOffsets)
shader[idx] = binding++;
for (const auto& idx : samplerOffsets)
shader[idx] = binding++;
for (const auto& idx : sampledImageOffsets)
shader[idx] = binding++;
for (const auto& idx : storageImageOffsets)
shader[idx] = binding++;
std::vector<uint8_t> result_fp32(shader.size() * sizeof(uint32_t));
std::copy_n(reinterpret_cast<uint8_t*>(shader.data()),
result_fp32.size(), result_fp32.data());
spvtools::Optimizer optimizer(SPV_ENV_VULKAN_1_3);
optimizer.RegisterPass(spvtools::CreateConvertRelaxedToHalfPass());
optimizer.Run(shader.data(), shader.size() * sizeof(uint32_t), &shader);
std::vector<uint8_t> result_fp16(shader.size() * sizeof(uint32_t));
std::copy_n(reinterpret_cast<uint8_t*>(shader.data()),
result_fp16.size(), result_fp16.data());
return { std::move(result_fp32), std::move(result_fp16) };
}
}
void Extract::extractShaders() {
if (!shaders().empty())
if (!pshaders().empty())
return;
std::unordered_map<uint32_t, std::vector<uint8_t>> shaders{};
// parse the dll
peparse::parsed_pe* dll = peparse::ParsePEFromFile(getDllPath().c_str());
if (!dll)
throw std::runtime_error("Unable to read Lossless.dll, is it installed?");
peparse::IterRsrc(dll, on_resource, nullptr);
peparse::IterRsrc(dll, on_resource, reinterpret_cast<void*>(&shaders));
peparse::DestructParsedPE(dll);
// ensure all shaders are present
for (const auto& [name, idx] : nameIdxTable)
if (shaders().find(idx) == shaders().end())
if (shaders.find(idx) == shaders.end())
throw std::runtime_error("Shader not found: " + name + ".\n- Is Lossless Scaling up to date?");
// fix shader bytecode
for (auto& [idx, data] : shaders)
pshaders()[idx] = fixShaders(data);
}
std::vector<uint8_t> Extract::getShader(const std::string& name) {
if (shaders().empty())
std::vector<uint8_t> Extract::getShader(const std::string& name, bool fp16) {
if (pshaders().empty())
throw std::runtime_error("Shaders are not loaded.");
auto hit = nameIdxTable.find(name);
if (hit == nameIdxTable.end())
throw std::runtime_error("Shader hash not found: " + name);
auto sit = shaders().find(hit->second);
if (sit == shaders().end())
auto sit = pshaders().find(hit->second);
if (sit == pshaders().end())
throw std::runtime_error("Shader not found: " + name);
return sit->second;
return fp16 ? sit->second.at(1) : sit->second.at(0);
}

View file

@ -1,7 +1,6 @@
#include "utils/benchmark.hpp"
#include "config/config.hpp"
#include "extract/extract.hpp"
#include "extract/trans.hpp"
#include <vulkan/vulkan_core.h>
#include <lsfg_3_1.hpp>
@ -42,11 +41,7 @@ void Benchmark::run(uint32_t width, uint32_t height) {
lsfgInitialize(
deviceUUID, // some magic number if not given
conf.hdr, 1.0F / conf.flowScale, conf.multiplier - 1,
[](const std::string& name) -> std::vector<uint8_t> {
auto dxbc = Extract::getShader(name);
auto spirv = Extract::translateShader(dxbc);
return spirv;
}
Extract::getShader
);
const int32_t ctx = lsfgCreateContext(-1, -1, {},
{ .width = width, .height = height },