mirror of
https://github.com/PancakeTAS/lsfg-vk.git
synced 2025-10-30 07:01:10 +00:00
feat(fp16): translate shaders to fp16
this will not work with the steam version, nor is it deployable
This commit is contained in:
parent
3fcde7c126
commit
cb234bde74
5 changed files with 135 additions and 82 deletions
|
|
@ -45,7 +45,7 @@ set_target_properties(lsfg-vk PROPERTIES
|
|||
target_include_directories(lsfg-vk
|
||||
PUBLIC include)
|
||||
target_link_libraries(lsfg-vk PUBLIC
|
||||
pe-parse dxbc toml11
|
||||
pe-parse dxbc toml11 SPIRV-Tools-opt
|
||||
lsfg-vk-framegen)
|
||||
|
||||
get_target_property(TOML11_INCLUDE_DIRS toml11 INTERFACE_INCLUDE_DIRECTORIES)
|
||||
|
|
|
|||
|
|
@ -17,10 +17,11 @@ namespace Extract {
|
|||
/// Get a shader by name.
|
||||
///
|
||||
/// @param name The name of the shader to get.
|
||||
/// @param fp16 If true, use the FP16 variant of shaders.
|
||||
/// @return The shader bytecode.
|
||||
///
|
||||
/// @throws std::runtime_error if the shader is not found.
|
||||
///
|
||||
std::vector<uint8_t> getShader(const std::string& name);
|
||||
std::vector<uint8_t> getShader(const std::string& name, bool fp16);
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2,7 +2,6 @@
|
|||
#include "config/config.hpp"
|
||||
#include "common/exception.hpp"
|
||||
#include "extract/extract.hpp"
|
||||
#include "extract/trans.hpp"
|
||||
#include "utils/utils.hpp"
|
||||
#include "hooks.hpp"
|
||||
#include "layer.hpp"
|
||||
|
|
@ -99,11 +98,7 @@ LsContext::LsContext(const Hooks::DeviceInfo& info, VkSwapchainKHR swapchain,
|
|||
lsfgInitialize(
|
||||
Utils::getDeviceUUID(info.physicalDevice),
|
||||
conf.hdr, 1.0F / conf.flowScale, conf.multiplier - 1,
|
||||
[](const std::string& name) {
|
||||
auto dxbc = Extract::getShader(name);
|
||||
auto spirv = Extract::translateShader(dxbc);
|
||||
return spirv;
|
||||
}
|
||||
Extract::getShader
|
||||
);
|
||||
|
||||
this->lsfgCtxId = std::shared_ptr<int32_t>(
|
||||
|
|
|
|||
|
|
@ -1,86 +1,68 @@
|
|||
#include "extract/extract.hpp"
|
||||
#include "config/config.hpp"
|
||||
|
||||
#include <spirv-tools/optimizer.hpp>
|
||||
#include <spirv-tools/libspirv.h>
|
||||
#include <pe-parse/parse.h>
|
||||
|
||||
#include <cstdlib>
|
||||
#include <unordered_map>
|
||||
#include <filesystem>
|
||||
#include <algorithm>
|
||||
#include <cstdint>
|
||||
#include <stdexcept>
|
||||
#include <cstdint>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#include <array>
|
||||
|
||||
using namespace Extract;
|
||||
|
||||
const uint32_t NO = 49; // native offset
|
||||
const std::unordered_map<std::string, uint32_t> nameIdxTable = {{
|
||||
{ "mipmaps", 255 },
|
||||
{ "alpha[0]", 267 },
|
||||
{ "alpha[1]", 268 },
|
||||
{ "alpha[2]", 269 },
|
||||
{ "alpha[3]", 270 },
|
||||
{ "beta[0]", 275 },
|
||||
{ "beta[1]", 276 },
|
||||
{ "beta[2]", 277 },
|
||||
{ "beta[3]", 278 },
|
||||
{ "beta[4]", 279 },
|
||||
{ "gamma[0]", 257 },
|
||||
{ "gamma[1]", 259 },
|
||||
{ "gamma[2]", 260 },
|
||||
{ "gamma[3]", 261 },
|
||||
{ "gamma[4]", 262 },
|
||||
{ "delta[0]", 257 },
|
||||
{ "delta[1]", 263 },
|
||||
{ "delta[2]", 264 },
|
||||
{ "delta[3]", 265 },
|
||||
{ "delta[4]", 266 },
|
||||
{ "delta[5]", 258 },
|
||||
{ "delta[6]", 271 },
|
||||
{ "delta[7]", 272 },
|
||||
{ "delta[8]", 273 },
|
||||
{ "delta[9]", 274 },
|
||||
{ "generate", 256 },
|
||||
{ "p_mipmaps", 255 },
|
||||
{ "p_alpha[0]", 290 },
|
||||
{ "p_alpha[1]", 291 },
|
||||
{ "p_alpha[2]", 292 },
|
||||
{ "p_alpha[3]", 293 },
|
||||
{ "p_beta[0]", 298 },
|
||||
{ "p_beta[1]", 299 },
|
||||
{ "p_beta[2]", 300 },
|
||||
{ "p_beta[3]", 301 },
|
||||
{ "p_beta[4]", 302 },
|
||||
{ "p_gamma[0]", 280 },
|
||||
{ "p_gamma[1]", 282 },
|
||||
{ "p_gamma[2]", 283 },
|
||||
{ "p_gamma[3]", 284 },
|
||||
{ "p_gamma[4]", 285 },
|
||||
{ "p_delta[0]", 280 },
|
||||
{ "p_delta[1]", 286 },
|
||||
{ "p_delta[2]", 287 },
|
||||
{ "p_delta[3]", 288 },
|
||||
{ "p_delta[4]", 289 },
|
||||
{ "p_delta[5]", 281 },
|
||||
{ "p_delta[6]", 294 },
|
||||
{ "p_delta[7]", 295 },
|
||||
{ "p_delta[8]", 296 },
|
||||
{ "p_delta[9]", 297 },
|
||||
{ "p_generate", 256 },
|
||||
{ "mipmaps", 255 + NO },
|
||||
{ "alpha[0]", 267 + NO },
|
||||
{ "alpha[1]", 268 + NO },
|
||||
{ "alpha[2]", 269 + NO },
|
||||
{ "alpha[3]", 270 + NO },
|
||||
{ "beta[0]", 275 + NO },
|
||||
{ "beta[1]", 276 + NO },
|
||||
{ "beta[2]", 277 + NO },
|
||||
{ "beta[3]", 278 + NO },
|
||||
{ "beta[4]", 279 + NO },
|
||||
{ "gamma[0]", 257 + NO },
|
||||
{ "gamma[1]", 259 + NO },
|
||||
{ "gamma[2]", 260 + NO },
|
||||
{ "gamma[3]", 261 + NO },
|
||||
{ "gamma[4]", 262 + NO },
|
||||
{ "delta[0]", 257 + NO },
|
||||
{ "delta[1]", 263 + NO },
|
||||
{ "delta[2]", 264 + NO },
|
||||
{ "delta[3]", 265 + NO },
|
||||
{ "delta[4]", 266 + NO },
|
||||
{ "delta[5]", 258 + NO },
|
||||
{ "delta[6]", 271 + NO },
|
||||
{ "delta[7]", 272 + NO },
|
||||
{ "delta[8]", 273 + NO },
|
||||
{ "delta[9]", 274 + NO },
|
||||
{ "generate", 256 + NO }
|
||||
}};
|
||||
|
||||
namespace {
|
||||
auto& shaders() {
|
||||
static std::unordered_map<uint32_t, std::vector<uint8_t>> shaderData;
|
||||
auto& pshaders() {
|
||||
static std::unordered_map<uint32_t, std::array<std::vector<uint8_t>, 2>> shaderData;
|
||||
return shaderData;
|
||||
}
|
||||
|
||||
int on_resource(void*, const peparse::resource& res) {
|
||||
int on_resource(void* ptr, const peparse::resource& res) {
|
||||
if (res.type != peparse::RT_RCDATA || res.buf == nullptr || res.buf->bufLen <= 0)
|
||||
return 0;
|
||||
std::vector<uint8_t> resource_data(res.buf->bufLen);
|
||||
std::copy_n(res.buf->buf, res.buf->bufLen, resource_data.data());
|
||||
shaders()[res.name] = resource_data;
|
||||
|
||||
auto* shaders = reinterpret_cast<std::unordered_map<uint32_t, std::vector<uint8_t>>*>(ptr);
|
||||
shaders->emplace(res.name, std::move(resource_data));
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
@ -113,36 +95,116 @@ namespace {
|
|||
// final fallback
|
||||
return "Lossless.dll";
|
||||
}
|
||||
|
||||
std::array<std::vector<uint8_t>, 2> fixShaders(const std::vector<uint8_t>& spirv) {
|
||||
std::vector<uint32_t> shader(spirv.size() / 4);
|
||||
std::copy_n(spirv.data(), spirv.size(), reinterpret_cast<uint8_t*>(shader.data()));
|
||||
|
||||
// patch bindings
|
||||
std::vector<size_t> samplerOffsets{};
|
||||
std::vector<size_t> sampledImageOffsets{};
|
||||
std::vector<size_t> storageImageOffsets{};
|
||||
std::vector<size_t> uniformBufferOffsets{};
|
||||
|
||||
uint32_t prevIdx{ 0 };
|
||||
uint32_t type{ 0 };
|
||||
|
||||
size_t i{ 5 };
|
||||
while (i < shader.size()) {
|
||||
const uint32_t word = shader[i];
|
||||
const uint16_t op = word & 0xFFFF;
|
||||
const uint16_t len = word >> 16;
|
||||
if (op == 71 /*spv::OpDecorate*/) {
|
||||
const uint32_t decoration = shader[i + 2];
|
||||
if (decoration == 33 /*spv::DecorationBinding*/) {
|
||||
const uint32_t idx = shader[i + 3];
|
||||
if (idx <= prevIdx)
|
||||
type++;
|
||||
prevIdx = idx;
|
||||
|
||||
switch (type) {
|
||||
case 1:
|
||||
samplerOffsets.emplace_back(i + 3);
|
||||
break;
|
||||
case 2:
|
||||
sampledImageOffsets.emplace_back(i + 3);
|
||||
break;
|
||||
case 3:
|
||||
storageImageOffsets.emplace_back(i + 3);
|
||||
break;
|
||||
case 4:
|
||||
uniformBufferOffsets.emplace_back(i + 3);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (op == 54 /*spv::OpFunction*/)
|
||||
break;
|
||||
|
||||
i += len ? len : 1;
|
||||
}
|
||||
|
||||
uint32_t binding{ 0 };
|
||||
for (const auto& idx : uniformBufferOffsets)
|
||||
shader[idx] = binding++;
|
||||
for (const auto& idx : samplerOffsets)
|
||||
shader[idx] = binding++;
|
||||
for (const auto& idx : sampledImageOffsets)
|
||||
shader[idx] = binding++;
|
||||
for (const auto& idx : storageImageOffsets)
|
||||
shader[idx] = binding++;
|
||||
|
||||
std::vector<uint8_t> result_fp32(shader.size() * sizeof(uint32_t));
|
||||
std::copy_n(reinterpret_cast<uint8_t*>(shader.data()),
|
||||
result_fp32.size(), result_fp32.data());
|
||||
|
||||
spvtools::Optimizer optimizer(SPV_ENV_VULKAN_1_3);
|
||||
optimizer.RegisterPass(spvtools::CreateConvertRelaxedToHalfPass());
|
||||
optimizer.Run(shader.data(), shader.size() * sizeof(uint32_t), &shader);
|
||||
|
||||
std::vector<uint8_t> result_fp16(shader.size() * sizeof(uint32_t));
|
||||
std::copy_n(reinterpret_cast<uint8_t*>(shader.data()),
|
||||
result_fp16.size(), result_fp16.data());
|
||||
return { std::move(result_fp32), std::move(result_fp16) };
|
||||
}
|
||||
}
|
||||
|
||||
void Extract::extractShaders() {
|
||||
if (!shaders().empty())
|
||||
if (!pshaders().empty())
|
||||
return;
|
||||
|
||||
std::unordered_map<uint32_t, std::vector<uint8_t>> shaders{};
|
||||
|
||||
// parse the dll
|
||||
peparse::parsed_pe* dll = peparse::ParsePEFromFile(getDllPath().c_str());
|
||||
if (!dll)
|
||||
throw std::runtime_error("Unable to read Lossless.dll, is it installed?");
|
||||
peparse::IterRsrc(dll, on_resource, nullptr);
|
||||
peparse::IterRsrc(dll, on_resource, reinterpret_cast<void*>(&shaders));
|
||||
peparse::DestructParsedPE(dll);
|
||||
|
||||
// ensure all shaders are present
|
||||
for (const auto& [name, idx] : nameIdxTable)
|
||||
if (shaders().find(idx) == shaders().end())
|
||||
if (shaders.find(idx) == shaders.end())
|
||||
throw std::runtime_error("Shader not found: " + name + ".\n- Is Lossless Scaling up to date?");
|
||||
|
||||
// fix shader bytecode
|
||||
for (auto& [idx, data] : shaders)
|
||||
pshaders()[idx] = fixShaders(data);
|
||||
}
|
||||
|
||||
std::vector<uint8_t> Extract::getShader(const std::string& name) {
|
||||
if (shaders().empty())
|
||||
std::vector<uint8_t> Extract::getShader(const std::string& name, bool fp16) {
|
||||
if (pshaders().empty())
|
||||
throw std::runtime_error("Shaders are not loaded.");
|
||||
|
||||
auto hit = nameIdxTable.find(name);
|
||||
if (hit == nameIdxTable.end())
|
||||
throw std::runtime_error("Shader hash not found: " + name);
|
||||
|
||||
auto sit = shaders().find(hit->second);
|
||||
if (sit == shaders().end())
|
||||
auto sit = pshaders().find(hit->second);
|
||||
if (sit == pshaders().end())
|
||||
throw std::runtime_error("Shader not found: " + name);
|
||||
|
||||
return sit->second;
|
||||
return fp16 ? sit->second.at(1) : sit->second.at(0);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,7 +1,6 @@
|
|||
#include "utils/benchmark.hpp"
|
||||
#include "config/config.hpp"
|
||||
#include "extract/extract.hpp"
|
||||
#include "extract/trans.hpp"
|
||||
|
||||
#include <vulkan/vulkan_core.h>
|
||||
#include <lsfg_3_1.hpp>
|
||||
|
|
@ -42,11 +41,7 @@ void Benchmark::run(uint32_t width, uint32_t height) {
|
|||
lsfgInitialize(
|
||||
deviceUUID, // some magic number if not given
|
||||
conf.hdr, 1.0F / conf.flowScale, conf.multiplier - 1,
|
||||
[](const std::string& name) -> std::vector<uint8_t> {
|
||||
auto dxbc = Extract::getShader(name);
|
||||
auto spirv = Extract::translateShader(dxbc);
|
||||
return spirv;
|
||||
}
|
||||
Extract::getShader
|
||||
);
|
||||
const int32_t ctx = lsfgCreateContext(-1, -1, {},
|
||||
{ .width = width, .height = height },
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue