diff --git a/lsfg-vk-v3.1/src/context.cpp b/lsfg-vk-v3.1/src/context.cpp index fb8b62b..5bd9d51 100644 --- a/lsfg-vk-v3.1/src/context.cpp +++ b/lsfg-vk-v3.1/src/context.cpp @@ -71,7 +71,7 @@ void Context::present(Vulkan& vk, data.shouldWait = true; // 1. create mipmaps and process input image - data.inSemaphore = Core::Semaphore(vk.device, inSem); + if (inSem >= 0) data.inSemaphore = Core::Semaphore(vk.device, inSem); for (size_t i = 0; i < vk.generationCount; i++) data.internalSemaphores.at(i) = Core::Semaphore(vk.device); @@ -84,15 +84,17 @@ void Context::present(Vulkan& vk, this->beta.Dispatch(data.cmdBuffer1, this->frameIdx); data.cmdBuffer1.end(); + std::vector waits = { data.inSemaphore }; + if (inSem < 0) waits.clear(); data.cmdBuffer1.submit(vk.device.getComputeQueue(), std::nullopt, - { data.inSemaphore }, std::nullopt, + waits, std::nullopt, data.internalSemaphores, std::nullopt); // 2. generate intermediary frames for (size_t pass = 0; pass < vk.generationCount; pass++) { auto& internalSemaphore = data.internalSemaphores.at(pass); auto& outSemaphore = data.outSemaphores.at(pass); - outSemaphore = Core::Semaphore(vk.device, outSem.at(pass)); + if (inSem >= 0) outSemaphore = Core::Semaphore(vk.device, outSem.empty() ? -1 : outSem.at(pass)); auto& completionFence = data.completionFences.at(pass); completionFence = Core::Fence(vk.device); @@ -108,9 +110,11 @@ void Context::present(Vulkan& vk, this->generate.Dispatch(buf2, this->frameIdx, pass); buf2.end(); + std::vector signals = { outSemaphore }; + if (inSem < 0) signals.clear(); buf2.submit(vk.device.getComputeQueue(), completionFence, { internalSemaphore }, std::nullopt, - { outSemaphore }, std::nullopt); + signals, std::nullopt); } this->frameIdx++; diff --git a/lsfg-vk-v3.1/src/core/device.cpp b/lsfg-vk-v3.1/src/core/device.cpp index b257517..81ec914 100644 --- a/lsfg-vk-v3.1/src/core/device.cpp +++ b/lsfg-vk-v3.1/src/core/device.cpp @@ -29,7 +29,7 @@ Device::Device(const Instance& instance, uint64_t deviceUUID) { if (res != VK_SUCCESS) throw LSFG::vulkan_error(res, "Failed to get physical devices"); - // get device by uuid + // get device by uuid std::optional physicalDevice; for (const auto& device : devices) { VkPhysicalDeviceProperties properties; @@ -37,7 +37,7 @@ Device::Device(const Instance& instance, uint64_t deviceUUID) { const uint64_t uuid = static_cast(properties.vendorID) << 32 | properties.deviceID; - if (deviceUUID == uuid) { + if (deviceUUID == uuid || deviceUUID == 0x1463ABAC) { physicalDevice = device; break; } diff --git a/lsfg-vk-v3.1/src/core/image.cpp b/lsfg-vk-v3.1/src/core/image.cpp index c038373..9eb333d 100644 --- a/lsfg-vk-v3.1/src/core/image.cpp +++ b/lsfg-vk-v3.1/src/core/image.cpp @@ -181,7 +181,7 @@ Image::Image(const Core::Device& device, VkExtent2D extent, VkFormat format, }; const VkMemoryAllocateInfo allocInfo{ .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, - .pNext = &importInfo, + .pNext = fd == -1 ? nullptr : &importInfo, .allocationSize = memReqs.size, .memoryTypeIndex = memType.value() }; diff --git a/lsfg-vk-v3.1/src/lsfg.cpp b/lsfg-vk-v3.1/src/lsfg.cpp index 4a80619..49fc018 100644 --- a/lsfg-vk-v3.1/src/lsfg.cpp +++ b/lsfg-vk-v3.1/src/lsfg.cpp @@ -40,6 +40,7 @@ void LSFG::initialize(uint64_t deviceUUID, .flowScale = flowScale, .isHdr = isHdr }); + contexts = std::unordered_map(); device->commandPool = Core::CommandPool(device->device); device->descriptorPool = Core::DescriptorPool(device->device); diff --git a/lsfg-vk-v3.1/src/shaders/generate.cpp b/lsfg-vk-v3.1/src/shaders/generate.cpp index 20a1098..95c2f74 100644 --- a/lsfg-vk-v3.1/src/shaders/generate.cpp +++ b/lsfg-vk-v3.1/src/shaders/generate.cpp @@ -35,7 +35,7 @@ Generate::Generate(Vulkan& vk, for (size_t i = 0; i < vk.generationCount; i++) this->outImgs.emplace_back(vk.device, extent, format, VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, - VK_IMAGE_ASPECT_COLOR_BIT, fds.at(i)); + VK_IMAGE_ASPECT_COLOR_BIT, fds.empty() ? -1 : fds.at(i)); // hook up shaders for (size_t i = 0; i < vk.generationCount; i++) { diff --git a/src/utils/benchmark.cpp b/src/utils/benchmark.cpp new file mode 100644 index 0000000..91db75e --- /dev/null +++ b/src/utils/benchmark.cpp @@ -0,0 +1,98 @@ +#include "extract/extract.hpp" +#include "extract/trans.hpp" +#include "utils/log.hpp" + +#include +#include + +#include +#include +#include +#include +#include + +namespace { + void __attribute__((constructor)) init() { + // continue if preloaded + const char* preload = std::getenv("LD_PRELOAD"); + if (!preload || *preload == '\0') + return; + const std::string preload_str(preload); + if (preload_str.find("liblsfg-vk.so") == std::string::npos) + return; + // continue if benchmark requested + const char* benchmark = std::getenv("LSFG_BENCHMARK"); + if (!benchmark || *benchmark == '\0') + return; + const std::string benchmark_str(benchmark); + if (benchmark_str != "1") + return; + + // fetch benchmark parameters + const char* lsfgFlowScale = std::getenv("LSFG_FLOW_SCALE"); + const char* lsfgHdr = std::getenv("LSFG_HDR"); + const char* lsfgMultiplier = std::getenv("LSFG_MULTIPLIER"); + const char* lsfgExtentWidth = std::getenv("LSFG_EXTENT_WIDTH"); + const char* lsfgExtentHeight = std::getenv("LSFG_EXTENT_HEIGHT"); + + const float flowScale = lsfgFlowScale + ? std::stof(lsfgFlowScale) : 1.0F; + const bool isHdr = lsfgHdr + ? *lsfgHdr == '1' : false; + const uint64_t multiplier = lsfgMultiplier + ? std::stoull(std::string(lsfgMultiplier)) : 2; + const uint32_t width = lsfgExtentWidth + ? static_cast(std::stoul(lsfgExtentWidth)) : 1920; + const uint32_t height = lsfgExtentHeight + ? static_cast(std::stoul(lsfgExtentHeight)) : 1080; + + Log::info("bench", "Running {}x benchmark with {}x{} extent and flow scale of {} {} HDR", + multiplier, width, height, flowScale, isHdr ? "with" : "without"); + + // create the benchmark context + const char* lsfgDeviceUUID = std::getenv("LSFG_DEVICE_UUID"); + const uint64_t deviceUUID = lsfgDeviceUUID + ? std::stoull(std::string(lsfgDeviceUUID), nullptr, 16) : 0x1463ABAC; + + Extract::extractShaders(); + LSFG::initialize( + deviceUUID, // some magic number if not given + isHdr, 1.0F / flowScale, multiplier - 1, + [](const std::string& name) -> std::vector { + auto dxbc = Extract::getShader(name); + auto spirv = Extract::translateShader(dxbc); + return spirv; + } + ); + const int32_t ctx = LSFG::createContext(-1, -1, {}, + { .width = width, .height = height }, + isHdr ? VK_FORMAT_R16G16B16A16_SFLOAT : VK_FORMAT_R8G8B8A8_UNORM + ); + + Log::info("bench", "Benchmark context created, ready to run"); + + // run the benchmark (run 8*n + 1 so the fences are waited on) + const auto now = std::chrono::high_resolution_clock::now(); + const uint64_t iterations = (8 * 500) + 1; + for (uint64_t count = 0; count < iterations; count++) + LSFG::presentContext(ctx, -1, {}); + const auto then = std::chrono::high_resolution_clock::now(); + + // print results + const auto ms = std::chrono::duration_cast(then - now).count(); + + const auto perIteration = static_cast(ms) / static_cast(iterations); + + const uint64_t totalGen = (multiplier - 1) * iterations; + const auto genFps = static_cast(totalGen) / (static_cast(ms) / 1000.0F); + + const uint64_t totalFrames = iterations * multiplier; + const auto totalFps = static_cast(totalFrames) / (static_cast(ms) / 1000.0F); + + Log::info("bench", "Benchmark completed in {} ms", ms); + Log::info("bench", "Time per iteration: {:.2f} ms", perIteration); + Log::info("bench", "Generation FPS: {:.2f}", genFps); + Log::info("bench", "Final FPS: {:.2f}", totalFps); + Log::info("bench", "Benchmark finished, exiting"); + } +}