mirror of
https://github.com/PancakeTAS/lsfg-vk.git
synced 2026-04-25 20:02:13 +00:00
parent
f306c48e6d
commit
7fe59a9459
6 changed files with 111 additions and 8 deletions
|
|
@ -71,7 +71,7 @@ void Context::present(Vulkan& vk,
|
|||
data.shouldWait = true;
|
||||
|
||||
// 1. create mipmaps and process input image
|
||||
data.inSemaphore = Core::Semaphore(vk.device, inSem);
|
||||
if (inSem >= 0) data.inSemaphore = Core::Semaphore(vk.device, inSem);
|
||||
for (size_t i = 0; i < vk.generationCount; i++)
|
||||
data.internalSemaphores.at(i) = Core::Semaphore(vk.device);
|
||||
|
||||
|
|
@ -84,15 +84,17 @@ void Context::present(Vulkan& vk,
|
|||
this->beta.Dispatch(data.cmdBuffer1, this->frameIdx);
|
||||
|
||||
data.cmdBuffer1.end();
|
||||
std::vector<Core::Semaphore> waits = { data.inSemaphore };
|
||||
if (inSem < 0) waits.clear();
|
||||
data.cmdBuffer1.submit(vk.device.getComputeQueue(), std::nullopt,
|
||||
{ data.inSemaphore }, std::nullopt,
|
||||
waits, std::nullopt,
|
||||
data.internalSemaphores, std::nullopt);
|
||||
|
||||
// 2. generate intermediary frames
|
||||
for (size_t pass = 0; pass < vk.generationCount; pass++) {
|
||||
auto& internalSemaphore = data.internalSemaphores.at(pass);
|
||||
auto& outSemaphore = data.outSemaphores.at(pass);
|
||||
outSemaphore = Core::Semaphore(vk.device, outSem.at(pass));
|
||||
if (inSem >= 0) outSemaphore = Core::Semaphore(vk.device, outSem.empty() ? -1 : outSem.at(pass));
|
||||
auto& completionFence = data.completionFences.at(pass);
|
||||
completionFence = Core::Fence(vk.device);
|
||||
|
||||
|
|
@ -108,9 +110,11 @@ void Context::present(Vulkan& vk,
|
|||
this->generate.Dispatch(buf2, this->frameIdx, pass);
|
||||
|
||||
buf2.end();
|
||||
std::vector<Core::Semaphore> signals = { outSemaphore };
|
||||
if (inSem < 0) signals.clear();
|
||||
buf2.submit(vk.device.getComputeQueue(), completionFence,
|
||||
{ internalSemaphore }, std::nullopt,
|
||||
{ outSemaphore }, std::nullopt);
|
||||
signals, std::nullopt);
|
||||
}
|
||||
|
||||
this->frameIdx++;
|
||||
|
|
|
|||
|
|
@ -29,7 +29,7 @@ Device::Device(const Instance& instance, uint64_t deviceUUID) {
|
|||
if (res != VK_SUCCESS)
|
||||
throw LSFG::vulkan_error(res, "Failed to get physical devices");
|
||||
|
||||
// get device by uuid
|
||||
// get device by uuid
|
||||
std::optional<VkPhysicalDevice> physicalDevice;
|
||||
for (const auto& device : devices) {
|
||||
VkPhysicalDeviceProperties properties;
|
||||
|
|
@ -37,7 +37,7 @@ Device::Device(const Instance& instance, uint64_t deviceUUID) {
|
|||
|
||||
const uint64_t uuid =
|
||||
static_cast<uint64_t>(properties.vendorID) << 32 | properties.deviceID;
|
||||
if (deviceUUID == uuid) {
|
||||
if (deviceUUID == uuid || deviceUUID == 0x1463ABAC) {
|
||||
physicalDevice = device;
|
||||
break;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -181,7 +181,7 @@ Image::Image(const Core::Device& device, VkExtent2D extent, VkFormat format,
|
|||
};
|
||||
const VkMemoryAllocateInfo allocInfo{
|
||||
.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
|
||||
.pNext = &importInfo,
|
||||
.pNext = fd == -1 ? nullptr : &importInfo,
|
||||
.allocationSize = memReqs.size,
|
||||
.memoryTypeIndex = memType.value()
|
||||
};
|
||||
|
|
|
|||
|
|
@ -40,6 +40,7 @@ void LSFG::initialize(uint64_t deviceUUID,
|
|||
.flowScale = flowScale,
|
||||
.isHdr = isHdr
|
||||
});
|
||||
contexts = std::unordered_map<int32_t, Context>();
|
||||
|
||||
device->commandPool = Core::CommandPool(device->device);
|
||||
device->descriptorPool = Core::DescriptorPool(device->device);
|
||||
|
|
|
|||
|
|
@ -35,7 +35,7 @@ Generate::Generate(Vulkan& vk,
|
|||
for (size_t i = 0; i < vk.generationCount; i++)
|
||||
this->outImgs.emplace_back(vk.device, extent, format,
|
||||
VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_SAMPLED_BIT,
|
||||
VK_IMAGE_ASPECT_COLOR_BIT, fds.at(i));
|
||||
VK_IMAGE_ASPECT_COLOR_BIT, fds.empty() ? -1 : fds.at(i));
|
||||
|
||||
// hook up shaders
|
||||
for (size_t i = 0; i < vk.generationCount; i++) {
|
||||
|
|
|
|||
98
src/utils/benchmark.cpp
Normal file
98
src/utils/benchmark.cpp
Normal file
|
|
@ -0,0 +1,98 @@
|
|||
#include "extract/extract.hpp"
|
||||
#include "extract/trans.hpp"
|
||||
#include "utils/log.hpp"
|
||||
|
||||
#include <vulkan/vulkan_core.h>
|
||||
#include <lsfg.hpp>
|
||||
|
||||
#include <cstdint>
|
||||
#include <chrono>
|
||||
#include <cstdlib>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
namespace {
|
||||
void __attribute__((constructor)) init() {
|
||||
// continue if preloaded
|
||||
const char* preload = std::getenv("LD_PRELOAD");
|
||||
if (!preload || *preload == '\0')
|
||||
return;
|
||||
const std::string preload_str(preload);
|
||||
if (preload_str.find("liblsfg-vk.so") == std::string::npos)
|
||||
return;
|
||||
// continue if benchmark requested
|
||||
const char* benchmark = std::getenv("LSFG_BENCHMARK");
|
||||
if (!benchmark || *benchmark == '\0')
|
||||
return;
|
||||
const std::string benchmark_str(benchmark);
|
||||
if (benchmark_str != "1")
|
||||
return;
|
||||
|
||||
// fetch benchmark parameters
|
||||
const char* lsfgFlowScale = std::getenv("LSFG_FLOW_SCALE");
|
||||
const char* lsfgHdr = std::getenv("LSFG_HDR");
|
||||
const char* lsfgMultiplier = std::getenv("LSFG_MULTIPLIER");
|
||||
const char* lsfgExtentWidth = std::getenv("LSFG_EXTENT_WIDTH");
|
||||
const char* lsfgExtentHeight = std::getenv("LSFG_EXTENT_HEIGHT");
|
||||
|
||||
const float flowScale = lsfgFlowScale
|
||||
? std::stof(lsfgFlowScale) : 1.0F;
|
||||
const bool isHdr = lsfgHdr
|
||||
? *lsfgHdr == '1' : false;
|
||||
const uint64_t multiplier = lsfgMultiplier
|
||||
? std::stoull(std::string(lsfgMultiplier)) : 2;
|
||||
const uint32_t width = lsfgExtentWidth
|
||||
? static_cast<uint32_t>(std::stoul(lsfgExtentWidth)) : 1920;
|
||||
const uint32_t height = lsfgExtentHeight
|
||||
? static_cast<uint32_t>(std::stoul(lsfgExtentHeight)) : 1080;
|
||||
|
||||
Log::info("bench", "Running {}x benchmark with {}x{} extent and flow scale of {} {} HDR",
|
||||
multiplier, width, height, flowScale, isHdr ? "with" : "without");
|
||||
|
||||
// create the benchmark context
|
||||
const char* lsfgDeviceUUID = std::getenv("LSFG_DEVICE_UUID");
|
||||
const uint64_t deviceUUID = lsfgDeviceUUID
|
||||
? std::stoull(std::string(lsfgDeviceUUID), nullptr, 16) : 0x1463ABAC;
|
||||
|
||||
Extract::extractShaders();
|
||||
LSFG::initialize(
|
||||
deviceUUID, // some magic number if not given
|
||||
isHdr, 1.0F / flowScale, multiplier - 1,
|
||||
[](const std::string& name) -> std::vector<uint8_t> {
|
||||
auto dxbc = Extract::getShader(name);
|
||||
auto spirv = Extract::translateShader(dxbc);
|
||||
return spirv;
|
||||
}
|
||||
);
|
||||
const int32_t ctx = LSFG::createContext(-1, -1, {},
|
||||
{ .width = width, .height = height },
|
||||
isHdr ? VK_FORMAT_R16G16B16A16_SFLOAT : VK_FORMAT_R8G8B8A8_UNORM
|
||||
);
|
||||
|
||||
Log::info("bench", "Benchmark context created, ready to run");
|
||||
|
||||
// run the benchmark (run 8*n + 1 so the fences are waited on)
|
||||
const auto now = std::chrono::high_resolution_clock::now();
|
||||
const uint64_t iterations = (8 * 500) + 1;
|
||||
for (uint64_t count = 0; count < iterations; count++)
|
||||
LSFG::presentContext(ctx, -1, {});
|
||||
const auto then = std::chrono::high_resolution_clock::now();
|
||||
|
||||
// print results
|
||||
const auto ms = std::chrono::duration_cast<std::chrono::milliseconds>(then - now).count();
|
||||
|
||||
const auto perIteration = static_cast<float>(ms) / static_cast<float>(iterations);
|
||||
|
||||
const uint64_t totalGen = (multiplier - 1) * iterations;
|
||||
const auto genFps = static_cast<float>(totalGen) / (static_cast<float>(ms) / 1000.0F);
|
||||
|
||||
const uint64_t totalFrames = iterations * multiplier;
|
||||
const auto totalFps = static_cast<float>(totalFrames) / (static_cast<float>(ms) / 1000.0F);
|
||||
|
||||
Log::info("bench", "Benchmark completed in {} ms", ms);
|
||||
Log::info("bench", "Time per iteration: {:.2f} ms", perIteration);
|
||||
Log::info("bench", "Generation FPS: {:.2f}", genFps);
|
||||
Log::info("bench", "Final FPS: {:.2f}", totalFps);
|
||||
Log::info("bench", "Benchmark finished, exiting");
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Reference in a new issue