implement simple benchmark

fixes #44
This commit is contained in:
PancakeTAS 2025-07-10 18:32:47 +02:00
parent f306c48e6d
commit 7fe59a9459
No known key found for this signature in database
6 changed files with 111 additions and 8 deletions

View file

@ -71,7 +71,7 @@ void Context::present(Vulkan& vk,
data.shouldWait = true;
// 1. create mipmaps and process input image
data.inSemaphore = Core::Semaphore(vk.device, inSem);
if (inSem >= 0) data.inSemaphore = Core::Semaphore(vk.device, inSem);
for (size_t i = 0; i < vk.generationCount; i++)
data.internalSemaphores.at(i) = Core::Semaphore(vk.device);
@ -84,15 +84,17 @@ void Context::present(Vulkan& vk,
this->beta.Dispatch(data.cmdBuffer1, this->frameIdx);
data.cmdBuffer1.end();
std::vector<Core::Semaphore> waits = { data.inSemaphore };
if (inSem < 0) waits.clear();
data.cmdBuffer1.submit(vk.device.getComputeQueue(), std::nullopt,
{ data.inSemaphore }, std::nullopt,
waits, std::nullopt,
data.internalSemaphores, std::nullopt);
// 2. generate intermediary frames
for (size_t pass = 0; pass < vk.generationCount; pass++) {
auto& internalSemaphore = data.internalSemaphores.at(pass);
auto& outSemaphore = data.outSemaphores.at(pass);
outSemaphore = Core::Semaphore(vk.device, outSem.at(pass));
if (inSem >= 0) outSemaphore = Core::Semaphore(vk.device, outSem.empty() ? -1 : outSem.at(pass));
auto& completionFence = data.completionFences.at(pass);
completionFence = Core::Fence(vk.device);
@ -108,9 +110,11 @@ void Context::present(Vulkan& vk,
this->generate.Dispatch(buf2, this->frameIdx, pass);
buf2.end();
std::vector<Core::Semaphore> signals = { outSemaphore };
if (inSem < 0) signals.clear();
buf2.submit(vk.device.getComputeQueue(), completionFence,
{ internalSemaphore }, std::nullopt,
{ outSemaphore }, std::nullopt);
signals, std::nullopt);
}
this->frameIdx++;

View file

@ -29,7 +29,7 @@ Device::Device(const Instance& instance, uint64_t deviceUUID) {
if (res != VK_SUCCESS)
throw LSFG::vulkan_error(res, "Failed to get physical devices");
// get device by uuid
// get device by uuid
std::optional<VkPhysicalDevice> physicalDevice;
for (const auto& device : devices) {
VkPhysicalDeviceProperties properties;
@ -37,7 +37,7 @@ Device::Device(const Instance& instance, uint64_t deviceUUID) {
const uint64_t uuid =
static_cast<uint64_t>(properties.vendorID) << 32 | properties.deviceID;
if (deviceUUID == uuid) {
if (deviceUUID == uuid || deviceUUID == 0x1463ABAC) {
physicalDevice = device;
break;
}

View file

@ -181,7 +181,7 @@ Image::Image(const Core::Device& device, VkExtent2D extent, VkFormat format,
};
const VkMemoryAllocateInfo allocInfo{
.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
.pNext = &importInfo,
.pNext = fd == -1 ? nullptr : &importInfo,
.allocationSize = memReqs.size,
.memoryTypeIndex = memType.value()
};

View file

@ -40,6 +40,7 @@ void LSFG::initialize(uint64_t deviceUUID,
.flowScale = flowScale,
.isHdr = isHdr
});
contexts = std::unordered_map<int32_t, Context>();
device->commandPool = Core::CommandPool(device->device);
device->descriptorPool = Core::DescriptorPool(device->device);

View file

@ -35,7 +35,7 @@ Generate::Generate(Vulkan& vk,
for (size_t i = 0; i < vk.generationCount; i++)
this->outImgs.emplace_back(vk.device, extent, format,
VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_SAMPLED_BIT,
VK_IMAGE_ASPECT_COLOR_BIT, fds.at(i));
VK_IMAGE_ASPECT_COLOR_BIT, fds.empty() ? -1 : fds.at(i));
// hook up shaders
for (size_t i = 0; i < vk.generationCount; i++) {

98
src/utils/benchmark.cpp Normal file
View file

@ -0,0 +1,98 @@
#include "extract/extract.hpp"
#include "extract/trans.hpp"
#include "utils/log.hpp"
#include <vulkan/vulkan_core.h>
#include <lsfg.hpp>
#include <cstdint>
#include <chrono>
#include <cstdlib>
#include <string>
#include <vector>
namespace {
void __attribute__((constructor)) init() {
// continue if preloaded
const char* preload = std::getenv("LD_PRELOAD");
if (!preload || *preload == '\0')
return;
const std::string preload_str(preload);
if (preload_str.find("liblsfg-vk.so") == std::string::npos)
return;
// continue if benchmark requested
const char* benchmark = std::getenv("LSFG_BENCHMARK");
if (!benchmark || *benchmark == '\0')
return;
const std::string benchmark_str(benchmark);
if (benchmark_str != "1")
return;
// fetch benchmark parameters
const char* lsfgFlowScale = std::getenv("LSFG_FLOW_SCALE");
const char* lsfgHdr = std::getenv("LSFG_HDR");
const char* lsfgMultiplier = std::getenv("LSFG_MULTIPLIER");
const char* lsfgExtentWidth = std::getenv("LSFG_EXTENT_WIDTH");
const char* lsfgExtentHeight = std::getenv("LSFG_EXTENT_HEIGHT");
const float flowScale = lsfgFlowScale
? std::stof(lsfgFlowScale) : 1.0F;
const bool isHdr = lsfgHdr
? *lsfgHdr == '1' : false;
const uint64_t multiplier = lsfgMultiplier
? std::stoull(std::string(lsfgMultiplier)) : 2;
const uint32_t width = lsfgExtentWidth
? static_cast<uint32_t>(std::stoul(lsfgExtentWidth)) : 1920;
const uint32_t height = lsfgExtentHeight
? static_cast<uint32_t>(std::stoul(lsfgExtentHeight)) : 1080;
Log::info("bench", "Running {}x benchmark with {}x{} extent and flow scale of {} {} HDR",
multiplier, width, height, flowScale, isHdr ? "with" : "without");
// create the benchmark context
const char* lsfgDeviceUUID = std::getenv("LSFG_DEVICE_UUID");
const uint64_t deviceUUID = lsfgDeviceUUID
? std::stoull(std::string(lsfgDeviceUUID), nullptr, 16) : 0x1463ABAC;
Extract::extractShaders();
LSFG::initialize(
deviceUUID, // some magic number if not given
isHdr, 1.0F / flowScale, multiplier - 1,
[](const std::string& name) -> std::vector<uint8_t> {
auto dxbc = Extract::getShader(name);
auto spirv = Extract::translateShader(dxbc);
return spirv;
}
);
const int32_t ctx = LSFG::createContext(-1, -1, {},
{ .width = width, .height = height },
isHdr ? VK_FORMAT_R16G16B16A16_SFLOAT : VK_FORMAT_R8G8B8A8_UNORM
);
Log::info("bench", "Benchmark context created, ready to run");
// run the benchmark (run 8*n + 1 so the fences are waited on)
const auto now = std::chrono::high_resolution_clock::now();
const uint64_t iterations = (8 * 500) + 1;
for (uint64_t count = 0; count < iterations; count++)
LSFG::presentContext(ctx, -1, {});
const auto then = std::chrono::high_resolution_clock::now();
// print results
const auto ms = std::chrono::duration_cast<std::chrono::milliseconds>(then - now).count();
const auto perIteration = static_cast<float>(ms) / static_cast<float>(iterations);
const uint64_t totalGen = (multiplier - 1) * iterations;
const auto genFps = static_cast<float>(totalGen) / (static_cast<float>(ms) / 1000.0F);
const uint64_t totalFrames = iterations * multiplier;
const auto totalFps = static_cast<float>(totalFrames) / (static_cast<float>(ms) / 1000.0F);
Log::info("bench", "Benchmark completed in {} ms", ms);
Log::info("bench", "Time per iteration: {:.2f} ms", perIteration);
Log::info("bench", "Generation FPS: {:.2f}", genFps);
Log::info("bench", "Final FPS: {:.2f}", totalFps);
Log::info("bench", "Benchmark finished, exiting");
}
}