diff --git a/lsfg-vk-common/include/lsfg-vk-common/vulkan/vulkan.hpp b/lsfg-vk-common/include/lsfg-vk-common/vulkan/vulkan.hpp
index 40510f2..9e7fff4 100644
--- a/lsfg-vk-common/include/lsfg-vk-common/vulkan/vulkan.hpp
+++ b/lsfg-vk-common/include/lsfg-vk-common/vulkan/vulkan.hpp
@@ -15,6 +15,38 @@
 #include <vulkan/vulkan_core.h>
 #include <vulkan/vk_layer.h>
 
+// Compatibility shim for VK_KHR_present_wait2 (proposed extension, not yet in SDK)
+#ifndef VK_KHR_present_wait2
+#define VK_KHR_present_wait2 1
+#define VK_STRUCTURE_TYPE_PRESENT_WAIT_2_INFO_KHR ((VkStructureType)1000572000)
+#define VK_STRUCTURE_TYPE_PRESENT_ID_2_KHR ((VkStructureType)1000572001)
+
+typedef struct VkPresentWait2InfoKHR {
+    VkStructureType    sType;
+    const void*        pNext;
+    uint64_t           presentId;
+    uint64_t           timeout;
+} VkPresentWait2InfoKHR;
+
+typedef struct VkPresentId2KHR {
+    VkStructureType    sType;
+    const void*        pNext;
+    uint32_t           swapchainCount;
+    const uint64_t*    pPresentIds;
+} VkPresentId2KHR;
+
+typedef VkResult (VKAPI_PTR *PFN_vkWaitForPresent2KHR)(VkDevice device, VkSwapchainKHR swapchain, const VkPresentWait2InfoKHR* pPresentWaitInfo);
+#endif
+
+// Compatibility shim for VK_KHR_swapchain_maintenance1 (uses EXT naming in older SDKs)
+#ifndef VK_STRUCTURE_TYPE_SWAPCHAIN_PRESENT_MODE_INFO_KHR
+#define VK_STRUCTURE_TYPE_SWAPCHAIN_PRESENT_MODE_INFO_KHR VK_STRUCTURE_TYPE_SWAPCHAIN_PRESENT_MODE_INFO_EXT
+#define VK_STRUCTURE_TYPE_SWAPCHAIN_PRESENT_FENCE_INFO_KHR VK_STRUCTURE_TYPE_SWAPCHAIN_PRESENT_FENCE_INFO_EXT
+typedef VkSwapchainPresentModeInfoEXT VkSwapchainPresentModeInfoKHR;
+typedef VkSwapchainPresentFenceInfoEXT VkSwapchainPresentFenceInfoKHR;
+typedef VkReleaseSwapchainImagesInfoEXT VkReleaseSwapchainImagesInfoKHR;
+#endif
+
 namespace vk {
 
     /// vulkan instance function pointers
diff --git a/lsfg-vk-common/src/vulkan/command_buffer.cpp b/lsfg-vk-common/src/vulkan/command_buffer.cpp
index a3baee5..af8c0ec 100644
--- a/lsfg-vk-common/src/vulkan/command_buffer.cpp
+++ b/lsfg-vk-common/src/vulkan/command_buffer.cpp
@@ -210,13 +210,15 @@ void CommandBuffer::submit(const vk::Vulkan& vk,
         waitSemaphores.push_back(waitTimelineSemaphore);
 
     std::vector<uint64_t> waitValues(waitSemaphores.size(), 0);
-    waitValues.back() = waitValue;
+    if (!waitValues.empty())
+        waitValues.back() = waitValue;
 
     if (signalTimelineSemaphore)
         signalSemaphores.push_back(signalTimelineSemaphore);
 
     std::vector<uint64_t> signalValues(signalSemaphores.size(), 0);
-    signalValues.back() = signalValue;
+    if (!signalValues.empty())
+        signalValues.back() = signalValue;
 
     // create submit info
     const VkTimelineSemaphoreSubmitInfo timelineInfo{
diff --git a/lsfg-vk-layer/src/entrypoint.cpp b/lsfg-vk-layer/src/entrypoint.cpp
index feb856d..b5ecef4 100644
--- a/lsfg-vk-layer/src/entrypoint.cpp
+++ b/lsfg-vk-layer/src/entrypoint.cpp
@@ -176,6 +176,13 @@ namespace {
         }
 
         try {
+            // Get physical device name for backend GPU selection
+            VkPhysicalDeviceProperties2 props{
+                .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2
+            };
+            myvk_instance.funcs().GetPhysicalDeviceProperties2(physdev, &props);
+            myvk_layer.setActiveGpu(props.properties.deviceName);
+
             auto myvk_device = std::make_unique<MyVkDevice>(myvk_layer, myvk_instance,
                 physdev, *info,
                 layer_info->GetDeviceProcAddr, setLoaderData,
diff --git a/lsfg-vk-layer/src/hooks/device.cpp b/lsfg-vk-layer/src/hooks/device.cpp
index 0b6ccb6..600ce07 100644
--- a/lsfg-vk-layer/src/hooks/device.cpp
+++ b/lsfg-vk-layer/src/hooks/device.cpp
@@ -64,6 +64,7 @@ MyVkDevice::MyVkDevice(MyVkLayer& layer, MyVkInstance& instance,
         info.ppEnabledExtensionNames,
         info.enabledExtensionCount,
         {
+            "VK_KHR_swapchain",
             "VK_KHR_external_memory",
             "VK_KHR_external_memory_fd",
             "VK_KHR_external_semaphore",
diff --git a/lsfg-vk-layer/src/hooks/layer.cpp b/lsfg-vk-layer/src/hooks/layer.cpp
index 17c028d..d6613b5 100644
--- a/lsfg-vk-layer/src/hooks/layer.cpp
+++ b/lsfg-vk-layer/src/hooks/layer.cpp
@@ -72,18 +72,23 @@ backend::Instance& MyVkLayer::backend() {
         else
             dll = ls::findShaderDll();
 
+        // Use profile.gpu if set, otherwise use the active GPU from layer device
+        const std::optional<std::string> gpuFilter = profile.gpu.has_value()
+            ? profile.gpu
+            : (this->active_gpu.empty() ? std::nullopt : std::optional<std::string>(this->active_gpu));
+
         this->backend_instance.emplace(
-            [gpu = profile.gpu](
+            [gpuFilter](
                 const std::string& deviceName,
                 std::pair<const std::string&, const std::string&> ids,
                 const std::optional<std::string>& pci
             ) {
-                if (!gpu)
+                if (!gpuFilter)
                     return true;
 
-                return (deviceName == *gpu)
-                    || (ids.first + ":" + ids.second == *gpu)
-                    || (pci && *pci == *gpu);
+                return (deviceName == *gpuFilter)
+                    || (ids.first + ":" + ids.second == *gpuFilter)
+                    || (pci && *pci == *gpuFilter);
             },
             dll, global.allow_fp16
         );
diff --git a/lsfg-vk-layer/src/hooks/layer.hpp b/lsfg-vk-layer/src/hooks/layer.hpp
index 4495e8b..3abb03a 100644
--- a/lsfg-vk-layer/src/hooks/layer.hpp
+++ b/lsfg-vk-layer/src/hooks/layer.hpp
@@ -6,6 +6,8 @@
 #include "lsfg-vk-common/configuration/config.hpp"
 #include "lsfg-vk-common/helpers/pointers.hpp"
 
+#include <string>
+
 #include <vulkan/vk_layer.h>
 #include <vulkan/vulkan_core.h>
 
@@ -38,6 +40,10 @@ namespace lsfgvk::layer {
         /// @throws ls::error if an error occured during backend creation
         [[nodiscard]] backend::Instance& backend();
 
+        /// set the active GPU name for backend device selection
+        /// @param name the GPU device name
+        void setActiveGpu(const std::string& name) { this->active_gpu = name; }
+
         // non-moveable, non-copyable
         MyVkLayer(const MyVkLayer&) = delete;
         MyVkLayer& operator=(const MyVkLayer&) = delete;
@@ -48,6 +54,7 @@ namespace lsfgvk::layer {
         ls::WatchedConfig config;
         std::optional<ls::GameConf> current_profile;
 
+        std::string active_gpu;
         ls::lazy<backend::Instance> backend_instance;
     };
 
diff --git a/lsfg-vk-layer/src/hooks/swapchain.cpp b/lsfg-vk-layer/src/hooks/swapchain.cpp
index 406fccd..c6243e2 100644
--- a/lsfg-vk-layer/src/hooks/swapchain.cpp
+++ b/lsfg-vk-layer/src/hooks/swapchain.cpp
@@ -72,6 +72,10 @@ MyVkSwapchain::MyVkSwapchain(MyVkLayer& layer, MyVkInstance& instance, MyVkDevic
     this->handle = createFunc(&info);
     this->swapchainImages = getSwapchainImages(vk, this->handle);
 
+    // store for reinitialize
+    this->extent = info.imageExtent;
+    this->format = info.imageFormat;
+
     // create virtual swapchain images
     this->images.reserve(this->swapchainImages.size());
     this->availableImages = std::vector<bool>(this->swapchainImages.size(), true);
@@ -86,16 +90,21 @@ MyVkSwapchain::MyVkSwapchain(MyVkLayer& layer, MyVkInstance& instance, MyVkDevic
         );
     }
 
+    // create frame generator
+    this->generator = std::make_unique<Generator>(layer, device, this->extent, this->format);
+
     // create thread
     this->doneSemaphore.emplace(vk, 0);
     this->thread = std::thread(&MyVkSwapchain::thread_main, this);
-
-    // this->reinitialize();
 }
 
-// void MyVkSwapchain::reinitialize() {
-//     // ...
-// }
+void MyVkSwapchain::reinitialize() {
+    // recreate the generator with potentially new profile settings
+    this->generator = std::make_unique<Generator>(
+        this->layer.get(), this->device.get(),
+        this->extent, this->format
+    );
+}
 
 MyVkSwapchain::~MyVkSwapchain() noexcept {
     this->running.store(false);
@@ -130,9 +139,12 @@ void MyVkSwapchain::thread_main() noexcept {
         vk::Semaphore presentSemaphore;
     };
 
+    // allocate enough passes for generated frames + original frame
+    const size_t generatedCount = this->generator->count();
+    const size_t passCount = (this->swapchainImages.size() + 1) * (generatedCount + 1);
     std::vector<Pass> passes;
-    passes.reserve(this->swapchainImages.size() + 1);
-    for (size_t i = 0; i < this->swapchainImages.size() + 1; i++) {
+    passes.reserve(passCount);
+    for (size_t i = 0; i < passCount; i++) {
         passes.emplace_back(Pass {
             .acquireSemaphore = vk::Semaphore(vk),
             .commandBuffer = vk::CommandBuffer(vk),
@@ -141,24 +153,82 @@ void MyVkSwapchain::thread_main() noexcept {
         });
     }
 
-    try { // FIXME: indentation and stuff
-
+    try {
+    size_t passIdx{0};
     uint64_t counter{1};
     while (this->running.load()) {
         // wait for present signal and fetch the image index
-        const auto ppi = this->virtual_FetchUPresent(100'1000, counter);
+        const auto ppi = this->virtual_FetchUPresent(100'000, counter);
         if (!ppi.has_value())
             continue; // timeout after 100us
 
-        // acquire a real swapchain image
-        const auto& pass = passes[counter % passes.size()];
+        auto& virtualImage = this->images.at(ppi->idx);
+
+        // 1. PREPARE: Copy virtual image to backend source for frame generation
+        if (generatedCount > 0) {
+            const auto& preparePass = passes[passIdx++ % passes.size()];
+            const auto& prepareCmdbuf = preparePass.commandBuffer;
+
+            prepareCmdbuf.begin(vk);
+            const auto [prepareSem, prepareVal] = this->generator->prepare(
+                const_cast<vk::CommandBuffer&>(prepareCmdbuf), virtualImage.handle());
+            prepareCmdbuf.end(vk);
+
+            {
+                const std::scoped_lock<std::mutex> lock(offload.mutex);
+                prepareCmdbuf.submit(vk,
+                    {}, VK_NULL_HANDLE, 0,
+                    {}, prepareSem, prepareVal,
+                    preparePass.copyFence.handle(), offload.queue
+                );
+            }
+
+            // 2. SCHEDULE: Trigger backend frame generation
+            this->generator->schedule();
+
+            // wait for prepare to finish before generating frames
+            if (!preparePass.copyFence.wait(vk, UINT64_MAX))
+                throw ls::error("prepare fence wait timed out");
+            preparePass.copyFence.reset(vk);
+
+            // 3. GENERATED FRAMES: Present each generated frame
+            for (size_t frame = 0; frame < generatedCount; frame++) {
+                const auto& genPass = passes[passIdx++ % passes.size()];
+                const uint32_t gen_idx = this->virtual_AcquireNext(genPass.acquireSemaphore);
+
+                const auto& genCmdbuf = genPass.commandBuffer;
+                genCmdbuf.begin(vk);
+                const auto [obtainSem, obtainVal] = this->generator->obtain(
+                    const_cast<vk::CommandBuffer&>(genCmdbuf),
+                    this->swapchainImages.at(gen_idx));
+                genCmdbuf.end(vk);
+
+                {
+                    const std::scoped_lock<std::mutex> lock(offload.mutex);
+                    genCmdbuf.submit(vk,
+                        { genPass.acquireSemaphore.handle() }, obtainSem, obtainVal,
+                        { genPass.presentSemaphore.handle() }, VK_NULL_HANDLE, 0,
+                        genPass.copyFence.handle(), offload.queue
+                    );
+                }
+
+                // present the generated frame
+                this->virtual_PresentGenerated(genPass.presentSemaphore, gen_idx);
+
+                // wait for copy completion
+                if (!genPass.copyFence.wait(vk, UINT64_MAX))
+                    throw ls::error("generated frame copy fence wait timed out");
+                genPass.copyFence.reset(vk);
+            }
+        }
+
+        // 4. ORIGINAL FRAME: Acquire real swapchain image and copy virtual -> real
+        const auto& pass = passes[passIdx++ % passes.size()];
         const uint32_t real_idx = this->virtual_AcquireNext(pass.acquireSemaphore);
 
-        // copy virtual image into real swapchain image
         const auto& cmdbuf = pass.commandBuffer;
         cmdbuf.begin(vk);
 
-        auto& virtualImage = this->images.at(ppi->idx);
         auto& swapchainImage = this->swapchainImages.at(real_idx);
 
         cmdbuf.blitImage(vk,
@@ -205,7 +275,7 @@ void MyVkSwapchain::thread_main() noexcept {
             );
         }
 
-        // present the real swapchain image
+        // present the original frame (linked to app's present call)
         this->virtual_PresentLinked(*ppi, pass.presentSemaphore, real_idx);
 
         // wait for the copy to finish
@@ -213,7 +283,7 @@ void MyVkSwapchain::thread_main() noexcept {
             throw ls::error("virtual swapchain copy fence wait timed out");
         pass.copyFence.reset(vk);
 
-        // mark image as available again
+        // mark virtual image as available again
         this->virtual_CompleteUPresent(*ppi);
     }
 
@@ -320,6 +390,42 @@ void MyVkSwapchain::virtual_PresentLinked(const MyVkPresentInfo& original_info,
         this->doneSemaphore->signal(vk, presentId + 1);
 }
 
+void MyVkSwapchain::virtual_PresentGenerated(const vk::Semaphore& semaphore, uint32_t idx) {
+    const auto& vk = this->device.get().vkd();
+
+    // use FIFO for proper frame pacing of generated frames
+    const VkPresentModeKHR mode = VK_PRESENT_MODE_FIFO_KHR;
+    const VkSwapchainPresentModeInfoKHR presentModeInfo{
+        .sType = VK_STRUCTURE_TYPE_SWAPCHAIN_PRESENT_MODE_INFO_KHR,
+        .swapchainCount = 1,
+        .pPresentModes = &mode
+    };
+
+    const VkPresentInfoKHR presentInfo{
+        .sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR,
+        .pNext = &presentModeInfo,
+        .waitSemaphoreCount = 1,
+        .pWaitSemaphores = &semaphore.handle(),
+        .swapchainCount = 1,
+        .pSwapchains = &this->handle,
+        .pImageIndices = &idx,
+    };
+    {
+        auto& offload = this->device.get().offload();
+
+        const std::scoped_lock<std::mutex> lock(offload.mutex);
+        const std::scoped_lock<std::mutex> lock2(this->swapchainMutex);
+
+        auto res = vk.df().QueuePresentKHR(offload.queue, &presentInfo);
+        if (res != VK_SUCCESS) {
+            this->status.store(res);
+
+            if (res != VK_SUBOPTIMAL_KHR)
+                throw ls::error("vkQueuePresentKHR() failed for generated frame");
+        }
+    }
+}
+
 void MyVkSwapchain::virtual_CompleteUPresent(const MyVkPresentInfo& info) {
     const auto& vk = this->device.get().vkd();
 
diff --git a/lsfg-vk-layer/src/hooks/swapchain.hpp b/lsfg-vk-layer/src/hooks/swapchain.hpp
index 9f70f68..c68839b 100644
--- a/lsfg-vk-layer/src/hooks/swapchain.hpp
+++ b/lsfg-vk-layer/src/hooks/swapchain.hpp
@@ -4,6 +4,7 @@
 
 #include "device.hpp"
 #include "instance.hpp"
+#include "../generator.hpp"
 #include "lsfg-vk-common/helpers/pointers.hpp"
 #include "lsfg-vk-common/vulkan/image.hpp"
 #include "lsfg-vk-common/vulkan/semaphore.hpp"
@@ -12,6 +13,7 @@
 #include <atomic>
 #include <cstdint>
 #include <functional>
+#include <memory>
 #include <mutex>
 #include <optional>
 #include <queue>
@@ -105,11 +107,20 @@ namespace lsfgvk::layer {
         /// mark a present from the underlying swapchain as complete
         /// @param info present information
         void virtual_CompleteUPresent(const MyVkPresentInfo& info);
+
+        /// present a generated frame to the real swapchain
+        /// @param semaphore semaphore to wait on before presenting
+        /// @param idx index of the real swapchain image to present
+        void virtual_PresentGenerated(const vk::Semaphore& semaphore, uint32_t idx);
     private:
         ls::R<MyVkLayer> layer;
         ls::R<MyVkInstance> instance;
         ls::R<MyVkDevice> device;
 
+        std::unique_ptr<Generator> generator;
+        VkExtent2D extent;
+        VkFormat format;
+
         vk::TimelineSemaphore presentSemaphore;
         uint64_t presentIndex;