From fd009bcadae48f5d299d16aa9cdbcdcd9e3849f0 Mon Sep 17 00:00:00 2001 From: "Skyth (Asilkan)" <19259897+blueskythlikesclouds@users.noreply.github.com> Date: Wed, 25 Dec 2024 16:29:15 +0300 Subject: [PATCH] Sync render thread present with main thread. (#62) * Experimenting with syncing the render thread. * Separate wait functions for swap chains. * Sync render thread present to the main thread. * Move present to main thread and frame limit after presenting. * g_next -> s_next * Fix Vulkan validation errors. * Make max frame latency configurable. * Fix loading thread breaking waitable swap chain order. --- UnleashedRecomp/app.cpp | 3 + UnleashedRecomp/gpu/imgui/imgui_snapshot.cpp | 53 --- UnleashedRecomp/gpu/imgui/imgui_snapshot.h | 31 -- UnleashedRecomp/gpu/rhi/plume_d3d12.cpp | 19 +- UnleashedRecomp/gpu/rhi/plume_d3d12.h | 6 +- .../gpu/rhi/plume_render_interface.h | 3 +- UnleashedRecomp/gpu/rhi/plume_vulkan.cpp | 20 +- UnleashedRecomp/gpu/rhi/plume_vulkan.h | 6 +- UnleashedRecomp/gpu/video.cpp | 337 +++++++++++------- UnleashedRecomp/gpu/video.h | 3 +- UnleashedRecomp/patches/fps_patches.cpp | 24 -- UnleashedRecomp/ui/installer_wizard.cpp | 3 +- UnleashedRecomp/user/config.h | 3 +- UnleashedRecompLib/config/SWA.toml | 6 +- 14 files changed, 242 insertions(+), 275 deletions(-) diff --git a/UnleashedRecomp/app.cpp b/UnleashedRecomp/app.cpp index 306b7f9..f209836 100644 --- a/UnleashedRecomp/app.cpp +++ b/UnleashedRecomp/app.cpp @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -38,6 +39,8 @@ PPC_FUNC(sub_824EB490) PPC_FUNC_IMPL(__imp__sub_822C1130); PPC_FUNC(sub_822C1130) { + Video::WaitOnSwapChain(); + // Correct small delta time errors. if (Config::FPS >= FPS_MIN && Config::FPS < FPS_MAX) { diff --git a/UnleashedRecomp/gpu/imgui/imgui_snapshot.cpp b/UnleashedRecomp/gpu/imgui/imgui_snapshot.cpp index 5d5deff..b6454cd 100644 --- a/UnleashedRecomp/gpu/imgui/imgui_snapshot.cpp +++ b/UnleashedRecomp/gpu/imgui/imgui_snapshot.cpp @@ -7,59 +7,6 @@ #include #include -void ImDrawDataSnapshot::Clear() -{ - for (int n = 0; n < Cache.GetMapSize(); n++) - if (ImDrawDataSnapshotEntry* entry = Cache.TryGetMapData(n)) - IM_DELETE(entry->OurCopy); - Cache.Clear(); - DrawData.Clear(); -} - -void ImDrawDataSnapshot::SnapUsingSwap(ImDrawData* src, double current_time) -{ - ImDrawData* dst = &DrawData; - IM_ASSERT(src != dst && src->Valid); - - // Copy all fields except CmdLists[] - ImVector backup_draw_list; - backup_draw_list.swap(src->CmdLists); - IM_ASSERT(src->CmdLists.Data == NULL); - *dst = *src; - backup_draw_list.swap(src->CmdLists); - - // Swap and mark as used - for (ImDrawList* src_list : src->CmdLists) - { - ImDrawDataSnapshotEntry* entry = GetOrAddEntry(src_list); - if (entry->OurCopy == NULL) - { - entry->SrcCopy = src_list; - entry->OurCopy = IM_NEW(ImDrawList)(src_list->_Data); - } - IM_ASSERT(entry->SrcCopy == src_list); - entry->SrcCopy->CmdBuffer.swap(entry->OurCopy->CmdBuffer); // Cheap swap - entry->SrcCopy->IdxBuffer.swap(entry->OurCopy->IdxBuffer); - entry->SrcCopy->VtxBuffer.swap(entry->OurCopy->VtxBuffer); - entry->SrcCopy->CmdBuffer.reserve(entry->OurCopy->CmdBuffer.Capacity); // Preserve bigger size to avoid reallocs for two consecutive frames - entry->SrcCopy->IdxBuffer.reserve(entry->OurCopy->IdxBuffer.Capacity); - entry->SrcCopy->VtxBuffer.reserve(entry->OurCopy->VtxBuffer.Capacity); - entry->LastUsedTime = current_time; - dst->CmdLists.push_back(entry->OurCopy); - } - - // Cleanup unused data - const double gc_threshold = current_time - MemoryCompactTimer; - for (int n = 0; n < Cache.GetMapSize(); n++) - if (ImDrawDataSnapshotEntry* entry = Cache.TryGetMapData(n)) - { - if (entry->LastUsedTime > gc_threshold) - continue; - IM_DELETE(entry->OurCopy); - Cache.Remove(GetDrawListID(entry->SrcCopy), entry); - } -}; - template void ImFontAtlasSnapshot::SnapPointer(size_t offset, const T1& value, const T2& ptr, size_t count) { diff --git a/UnleashedRecomp/gpu/imgui/imgui_snapshot.h b/UnleashedRecomp/gpu/imgui/imgui_snapshot.h index cda49ad..a777451 100644 --- a/UnleashedRecomp/gpu/imgui/imgui_snapshot.h +++ b/UnleashedRecomp/gpu/imgui/imgui_snapshot.h @@ -1,36 +1,5 @@ #pragma once -// https://github.com/ocornut/imgui/issues/1860#issuecomment-1927630727 - -// Usage: -// static ImDrawDataSnapshot snapshot; // Important: make persistent accross frames to reuse buffers. -// snapshot.SnapUsingSwap(ImGui::GetDrawData(), ImGui::GetTime()); -// [...] -// ImGui_ImplDX11_RenderDrawData(&snapshot.DrawData); - -struct ImDrawDataSnapshotEntry -{ - ImDrawList* SrcCopy = NULL; - ImDrawList* OurCopy = NULL; - double LastUsedTime = 0.0; -}; - -struct ImDrawDataSnapshot -{ - // Members - ImDrawData DrawData; - ImPool Cache; - float MemoryCompactTimer = 20.0f; // Discard unused data after 20 seconds - - ~ImDrawDataSnapshot() { Clear(); } - void Clear(); - void SnapUsingSwap(ImDrawData* src, double current_time); // Efficient snapshot by swapping data, meaning "src_list" is unusable. - - // Internals - ImGuiID GetDrawListID(ImDrawList* src_list) { return ImHashData(&src_list, sizeof(src_list)); } // Hash pointer - ImDrawDataSnapshotEntry* GetOrAddEntry(ImDrawList* src_list) { return Cache.GetOrAddByKey(GetDrawListID(src_list)); } -}; - // Undefine this to generate a font atlas file in working directory. // You also need to do this if you are testing localization, as only // characters in the locale get added to the atlas. diff --git a/UnleashedRecomp/gpu/rhi/plume_d3d12.cpp b/UnleashedRecomp/gpu/rhi/plume_d3d12.cpp index 6c28cdf..336ef6b 100644 --- a/UnleashedRecomp/gpu/rhi/plume_d3d12.cpp +++ b/UnleashedRecomp/gpu/rhi/plume_d3d12.cpp @@ -1191,7 +1191,7 @@ namespace plume { // D3D12SwapChain - D3D12SwapChain::D3D12SwapChain(D3D12CommandQueue *commandQueue, RenderWindow renderWindow, uint32_t textureCount, RenderFormat format) { + D3D12SwapChain::D3D12SwapChain(D3D12CommandQueue *commandQueue, RenderWindow renderWindow, uint32_t textureCount, RenderFormat format, uint32_t maxFrameLatency) { assert(commandQueue != nullptr); assert(renderWindow != 0); @@ -1199,6 +1199,7 @@ namespace plume { this->renderWindow = renderWindow; this->textureCount = textureCount; this->format = format; + this->maxFrameLatency = maxFrameLatency; // Store the native format representation. nativeFormat = toDXGI(format); @@ -1230,7 +1231,7 @@ namespace plume { } d3d = static_cast(swapChain1); - d3d->SetMaximumFrameLatency(1); + d3d->SetMaximumFrameLatency(maxFrameLatency); waitableObject = d3d->GetFrameLatencyWaitableObject(); textures.resize(textureCount); @@ -1264,16 +1265,18 @@ namespace plume { } bool D3D12SwapChain::present(uint32_t textureIndex, RenderCommandSemaphore **waitSemaphores, uint32_t waitSemaphoreCount) { - if (waitableObject != NULL) { - WaitForSingleObject(waitableObject, INFINITE); - } - UINT syncInterval = vsyncEnabled ? 1 : 0; UINT flags = !vsyncEnabled ? DXGI_PRESENT_ALLOW_TEARING : 0; HRESULT res = d3d->Present(syncInterval, flags); return SUCCEEDED(res); } + void D3D12SwapChain::wait() { + if (waitableObject != NULL) { + WaitForSingleObject(waitableObject, INFINITE); + } + } + bool D3D12SwapChain::resize() { getWindowSize(width, height); @@ -2197,8 +2200,8 @@ namespace plume { } } - std::unique_ptr D3D12CommandQueue::createSwapChain(RenderWindow renderWindow, uint32_t bufferCount, RenderFormat format) { - return std::make_unique(this, renderWindow, bufferCount, format); + std::unique_ptr D3D12CommandQueue::createSwapChain(RenderWindow renderWindow, uint32_t bufferCount, RenderFormat format, uint32_t maxFrameLatency) { + return std::make_unique(this, renderWindow, bufferCount, format, maxFrameLatency); } void D3D12CommandQueue::executeCommandLists(const RenderCommandList **commandLists, uint32_t commandListCount, RenderCommandSemaphore **waitSemaphores, uint32_t waitSemaphoreCount, RenderCommandSemaphore **signalSemaphores, uint32_t signalSemaphoreCount, RenderCommandFence *signalFence) { diff --git a/UnleashedRecomp/gpu/rhi/plume_d3d12.h b/UnleashedRecomp/gpu/rhi/plume_d3d12.h index 464a139..bc1b4ea 100644 --- a/UnleashedRecomp/gpu/rhi/plume_d3d12.h +++ b/UnleashedRecomp/gpu/rhi/plume_d3d12.h @@ -107,10 +107,12 @@ namespace plume { uint32_t height = 0; uint32_t refreshRate = 0; bool vsyncEnabled = true; + uint32_t maxFrameLatency = 0; - D3D12SwapChain(D3D12CommandQueue *commandQueue, RenderWindow renderWindow, uint32_t textureCount, RenderFormat format); + D3D12SwapChain(D3D12CommandQueue *commandQueue, RenderWindow renderWindow, uint32_t textureCount, RenderFormat format, uint32_t maxFrameLatency); ~D3D12SwapChain() override; bool present(uint32_t textureIndex, RenderCommandSemaphore **waitSemaphores, uint32_t waitSemaphoreCount) override; + void wait() override; bool resize() override; bool needsResize() const override; void setVsyncEnabled(bool vsyncEnabled) override; @@ -230,7 +232,7 @@ namespace plume { D3D12CommandQueue(D3D12Device *device, RenderCommandListType type); ~D3D12CommandQueue() override; - std::unique_ptr createSwapChain(RenderWindow renderWindow, uint32_t textureCount, RenderFormat format) override; + std::unique_ptr createSwapChain(RenderWindow renderWindow, uint32_t textureCount, RenderFormat format, uint32_t newFrameLatency) override; void executeCommandLists(const RenderCommandList **commandLists, uint32_t commandListCount, RenderCommandSemaphore **waitSemaphores, uint32_t waitSemaphoreCount, RenderCommandSemaphore **signalSemaphores, uint32_t signalSemaphoreCount, RenderCommandFence *signalFence) override; void waitForCommandFence(RenderCommandFence *fence) override; }; diff --git a/UnleashedRecomp/gpu/rhi/plume_render_interface.h b/UnleashedRecomp/gpu/rhi/plume_render_interface.h index 1360e17..45b382a 100644 --- a/UnleashedRecomp/gpu/rhi/plume_render_interface.h +++ b/UnleashedRecomp/gpu/rhi/plume_render_interface.h @@ -87,6 +87,7 @@ namespace plume { struct RenderSwapChain { virtual ~RenderSwapChain() { } virtual bool present(uint32_t textureIndex, RenderCommandSemaphore **waitSemaphores, uint32_t waitSemaphoreCount) = 0; + virtual void wait() = 0; virtual bool resize() = 0; virtual bool needsResize() const = 0; virtual void setVsyncEnabled(bool vsyncEnabled) = 0; @@ -190,7 +191,7 @@ namespace plume { struct RenderCommandQueue { virtual ~RenderCommandQueue() { } - virtual std::unique_ptr createSwapChain(RenderWindow renderWindow, uint32_t textureCount, RenderFormat format) = 0; + virtual std::unique_ptr createSwapChain(RenderWindow renderWindow, uint32_t textureCount, RenderFormat format, uint32_t maxFrameLatency) = 0; virtual void executeCommandLists(const RenderCommandList **commandLists, uint32_t commandListCount, RenderCommandSemaphore **waitSemaphores = nullptr, uint32_t waitSemaphoreCount = 0, RenderCommandSemaphore **signalSemaphores = nullptr, uint32_t signalSemaphoreCount = 0, RenderCommandFence *signalFence = nullptr) = 0; virtual void waitForCommandFence(RenderCommandFence *fence) = 0; diff --git a/UnleashedRecomp/gpu/rhi/plume_vulkan.cpp b/UnleashedRecomp/gpu/rhi/plume_vulkan.cpp index 89334ce..b6b18f2 100644 --- a/UnleashedRecomp/gpu/rhi/plume_vulkan.cpp +++ b/UnleashedRecomp/gpu/rhi/plume_vulkan.cpp @@ -1966,13 +1966,14 @@ namespace plume { // VulkanSwapChain - VulkanSwapChain::VulkanSwapChain(VulkanCommandQueue *commandQueue, RenderWindow renderWindow, uint32_t textureCount, RenderFormat format) { + VulkanSwapChain::VulkanSwapChain(VulkanCommandQueue *commandQueue, RenderWindow renderWindow, uint32_t textureCount, RenderFormat format, uint32_t maxFrameLatency) { assert(commandQueue != nullptr); assert(textureCount > 0); this->commandQueue = commandQueue; this->renderWindow = renderWindow; this->format = format; + this->maxFrameLatency = maxFrameLatency; VkResult res; @@ -2131,12 +2132,6 @@ namespace plume { } bool VulkanSwapChain::present(uint32_t textureIndex, RenderCommandSemaphore **waitSemaphores, uint32_t waitSemaphoreCount) { - constexpr uint64_t MaxFrameDelay = 1; - if (commandQueue->device->capabilities.presentWait && (currentPresentId > MaxFrameDelay)) { - constexpr uint64_t waitTimeout = 100000000; - vkWaitForPresentKHR(commandQueue->device->vk, vk, currentPresentId - MaxFrameDelay, waitTimeout); - } - thread_local std::vector waitSemaphoresVector; waitSemaphoresVector.clear(); for (uint32_t i = 0; i < waitSemaphoreCount; i++) { @@ -2175,6 +2170,13 @@ namespace plume { return true; } + void VulkanSwapChain::wait() { + if (commandQueue->device->capabilities.presentWait && (currentPresentId >= maxFrameLatency)) { + constexpr uint64_t waitTimeout = 100000000; + vkWaitForPresentKHR(commandQueue->device->vk, vk, currentPresentId - (maxFrameLatency - 1), waitTimeout); + } + } + bool VulkanSwapChain::resize() { getWindowSize(width, height); @@ -3274,8 +3276,8 @@ namespace plume { device->queueFamilies[familyIndex].remove(this); } - std::unique_ptr VulkanCommandQueue::createSwapChain(RenderWindow renderWindow, uint32_t bufferCount, RenderFormat format) { - return std::make_unique(this, renderWindow, bufferCount, format); + std::unique_ptr VulkanCommandQueue::createSwapChain(RenderWindow renderWindow, uint32_t bufferCount, RenderFormat format, uint32_t maxFrameLatency) { + return std::make_unique(this, renderWindow, bufferCount, format, maxFrameLatency); } void VulkanCommandQueue::executeCommandLists(const RenderCommandList **commandLists, uint32_t commandListCount, RenderCommandSemaphore **waitSemaphores, uint32_t waitSemaphoreCount, RenderCommandSemaphore **signalSemaphores, uint32_t signalSemaphoreCount, RenderCommandFence *signalFence) { diff --git a/UnleashedRecomp/gpu/rhi/plume_vulkan.h b/UnleashedRecomp/gpu/rhi/plume_vulkan.h index 6ffe5aa..c4184a4 100644 --- a/UnleashedRecomp/gpu/rhi/plume_vulkan.h +++ b/UnleashedRecomp/gpu/rhi/plume_vulkan.h @@ -231,10 +231,12 @@ namespace plume { std::vector textures; uint64_t currentPresentId = 0; bool immediatePresentModeSupported = false; + uint32_t maxFrameLatency = 0; - VulkanSwapChain(VulkanCommandQueue *commandQueue, RenderWindow renderWindow, uint32_t textureCount, RenderFormat format); + VulkanSwapChain(VulkanCommandQueue *commandQueue, RenderWindow renderWindow, uint32_t textureCount, RenderFormat format, uint32_t maxFrameLatency); ~VulkanSwapChain() override; bool present(uint32_t textureIndex, RenderCommandSemaphore **waitSemaphores, uint32_t waitSemaphoreCount) override; + void wait() override; bool resize() override; bool needsResize() const override; void setVsyncEnabled(bool vsyncEnabled) override; @@ -346,7 +348,7 @@ namespace plume { VulkanCommandQueue(VulkanDevice *device, RenderCommandListType commandListType); ~VulkanCommandQueue() override; - std::unique_ptr createSwapChain(RenderWindow renderWindow, uint32_t bufferCount, RenderFormat format) override; + std::unique_ptr createSwapChain(RenderWindow renderWindow, uint32_t bufferCount, RenderFormat format, uint32_t maxFrameLatency) override; void executeCommandLists(const RenderCommandList **commandLists, uint32_t commandListCount, RenderCommandSemaphore **waitSemaphores, uint32_t waitSemaphoreCount, RenderCommandSemaphore **signalSemaphores, uint32_t signalSemaphoreCount, RenderCommandFence *signalFence) override; void waitForCommandFence(RenderCommandFence *fence) override; }; diff --git a/UnleashedRecomp/gpu/video.cpp b/UnleashedRecomp/gpu/video.cpp index e8a39e9..65d362d 100644 --- a/UnleashedRecomp/gpu/video.cpp +++ b/UnleashedRecomp/gpu/video.cpp @@ -629,7 +629,8 @@ enum class RenderCommandType UnlockBuffer16, UnlockBuffer32, DrawImGui, - Present, + ExecuteCommandList, + BeginCommandList, StretchRect, SetRenderTarget, SetDepthStencilSurface, @@ -1114,7 +1115,6 @@ static constexpr size_t SAMPLER_DESCRIPTOR_SIZE = 1024; static std::unique_ptr g_imFontTexture; static std::unique_ptr g_imPipelineLayout; static std::unique_ptr g_imPipeline; -static ImDrawDataSnapshot g_imSnapshot; template static void ExecuteCopyCommandList(const T& function) @@ -1309,7 +1309,91 @@ static void CreateImGuiBackend() #endif } -static void BeginCommandList(); +static void CheckSwapChain() +{ + g_swapChain->setVsyncEnabled(Config::VSync); + g_swapChainValid &= !g_swapChain->needsResize(); + + if (!g_swapChainValid) + { + Video::WaitForGPU(); + g_backBuffer->framebuffers.clear(); + g_swapChainValid = g_swapChain->resize(); + g_needsResize = g_swapChainValid; + } + + if (g_swapChainValid) + g_swapChainValid = g_swapChain->acquireTexture(g_acquireSemaphores[g_frame].get(), &g_backBufferIndex); +} + +static void BeginCommandList() +{ + g_renderTarget = g_backBuffer; + g_depthStencil = nullptr; + g_framebuffer = nullptr; + + g_pipelineState.renderTargetFormat = BACKBUFFER_FORMAT; + g_pipelineState.depthStencilFormat = RenderFormat::UNKNOWN; + + if (g_swapChainValid) + { + bool applyingGammaCorrection = Config::XboxColorCorrection || abs(Config::Brightness - 0.5f) > 0.001f; + + if (applyingGammaCorrection) + { + uint32_t width = g_swapChain->getWidth(); + uint32_t height = g_swapChain->getHeight(); + + if (g_intermediaryBackBufferTextureWidth != width || + g_intermediaryBackBufferTextureHeight != height) + { + if (g_intermediaryBackBufferTextureDescriptorIndex == NULL) + g_intermediaryBackBufferTextureDescriptorIndex = g_textureDescriptorAllocator.allocate(); + + Video::WaitForGPU(); // Fine to wait for GPU, this'll only happen during resize. + + g_intermediaryBackBufferTexture = g_device->createTexture(RenderTextureDesc::Texture2D(width, height, 1, BACKBUFFER_FORMAT, RenderTextureFlag::RENDER_TARGET)); + g_textureDescriptorSet->setTexture(g_intermediaryBackBufferTextureDescriptorIndex, g_intermediaryBackBufferTexture.get(), RenderTextureLayout::SHADER_READ); + + g_intermediaryBackBufferTextureWidth = width; + g_intermediaryBackBufferTextureHeight = height; + } + + g_backBuffer->texture = g_intermediaryBackBufferTexture.get(); + } + else + { + g_backBuffer->texture = g_swapChain->getTexture(g_backBufferIndex); + } + } + else + { + g_backBuffer->texture = g_backBuffer->textureHolder.get(); + } + + g_backBuffer->layout = RenderTextureLayout::UNKNOWN; + + for (size_t i = 0; i < 16; i++) + { + g_sharedConstants.texture2DIndices[i] = TEXTURE_DESCRIPTOR_NULL_TEXTURE_2D; + g_sharedConstants.texture3DIndices[i] = TEXTURE_DESCRIPTOR_NULL_TEXTURE_3D; + g_sharedConstants.textureCubeIndices[i] = TEXTURE_DESCRIPTOR_NULL_TEXTURE_CUBE; + } + + if (Config::GITextureFiltering == EGITextureFiltering::Bicubic) + g_pipelineState.specConstants |= SPEC_CONSTANT_BICUBIC_GI_FILTER; + else + g_pipelineState.specConstants &= ~SPEC_CONSTANT_BICUBIC_GI_FILTER; + + auto& commandList = g_commandLists[g_frame]; + + commandList->begin(); + commandList->setGraphicsPipelineLayout(g_pipelineLayout.get()); + commandList->setGraphicsDescriptorSet(g_textureDescriptorSet.get(), 0); + commandList->setGraphicsDescriptorSet(g_textureDescriptorSet.get(), 1); + commandList->setGraphicsDescriptorSet(g_textureDescriptorSet.get(), 2); + commandList->setGraphicsDescriptorSet(g_samplerDescriptorSet.get(), 3); +} void Video::CreateHostDevice(const char *sdlVideoDriver) { @@ -1380,7 +1464,7 @@ void Video::CreateHostDevice(const char *sdlVideoDriver) break; } - g_swapChain = g_queue->createSwapChain(GameWindow::s_renderWindow, bufferCount, BACKBUFFER_FORMAT); + g_swapChain = g_queue->createSwapChain(GameWindow::s_renderWindow, bufferCount, BACKBUFFER_FORMAT, Config::MaxFrameLatency); g_swapChain->setVsyncEnabled(Config::VSync); g_swapChainValid = !g_swapChain->needsResize(); @@ -1544,6 +1628,7 @@ void Video::CreateHostDevice(const char *sdlVideoDriver) g_backBuffer->format = BACKBUFFER_FORMAT; g_backBuffer->textureHolder = g_device->createTexture(RenderTextureDesc::Texture2D(1, 1, 1, BACKBUFFER_FORMAT, RenderTextureFlag::RENDER_TARGET)); + CheckSwapChain(); BeginCommandList(); RenderTextureBarrier blankTextureBarriers[TEXTURE_DESCRIPTOR_NULL_COUNT]; @@ -1574,96 +1659,6 @@ void Video::WaitForGPU() } } -static std::atomic g_pendingRenderThread; - -static void WaitForRenderThread() -{ - g_pendingRenderThread.wait(true); -} - -static void BeginCommandList() -{ - g_renderTarget = g_backBuffer; - g_depthStencil = nullptr; - g_framebuffer = nullptr; - - g_pipelineState.renderTargetFormat = BACKBUFFER_FORMAT; - g_pipelineState.depthStencilFormat = RenderFormat::UNKNOWN; - - g_swapChain->setVsyncEnabled(Config::VSync); - g_swapChainValid &= !g_swapChain->needsResize(); - - if (!g_swapChainValid) - { - Video::WaitForGPU(); - g_backBuffer->framebuffers.clear(); - g_swapChainValid = g_swapChain->resize(); - g_needsResize = g_swapChainValid; - } - - if (g_swapChainValid) - g_swapChainValid = g_swapChain->acquireTexture(g_acquireSemaphores[g_frame].get(), &g_backBufferIndex); - - if (g_swapChainValid) - { - bool applyingGammaCorrection = Config::XboxColorCorrection || abs(Config::Brightness - 0.5f) > 0.001f; - - if (applyingGammaCorrection) - { - uint32_t width = g_swapChain->getWidth(); - uint32_t height = g_swapChain->getHeight(); - - if (g_intermediaryBackBufferTextureWidth != width || - g_intermediaryBackBufferTextureHeight != height) - { - if (g_intermediaryBackBufferTextureDescriptorIndex == NULL) - g_intermediaryBackBufferTextureDescriptorIndex = g_textureDescriptorAllocator.allocate(); - - Video::WaitForGPU(); // Fine to wait for GPU, this'll only happen during resize. - - g_intermediaryBackBufferTexture = g_device->createTexture(RenderTextureDesc::Texture2D(width, height, 1, BACKBUFFER_FORMAT, RenderTextureFlag::RENDER_TARGET)); - g_textureDescriptorSet->setTexture(g_intermediaryBackBufferTextureDescriptorIndex, g_intermediaryBackBufferTexture.get(), RenderTextureLayout::SHADER_READ); - - g_intermediaryBackBufferTextureWidth = width; - g_intermediaryBackBufferTextureHeight = height; - } - - g_backBuffer->texture = g_intermediaryBackBufferTexture.get(); - } - else - { - g_backBuffer->texture = g_swapChain->getTexture(g_backBufferIndex); - } - } - else - { - g_backBuffer->texture = g_backBuffer->textureHolder.get(); - } - - g_backBuffer->layout = RenderTextureLayout::UNKNOWN; - - for (size_t i = 0; i < 16; i++) - { - g_sharedConstants.texture2DIndices[i] = TEXTURE_DESCRIPTOR_NULL_TEXTURE_2D; - g_sharedConstants.texture3DIndices[i] = TEXTURE_DESCRIPTOR_NULL_TEXTURE_3D; - g_sharedConstants.textureCubeIndices[i] = TEXTURE_DESCRIPTOR_NULL_TEXTURE_CUBE; - } - - if (Config::GITextureFiltering == EGITextureFiltering::Bicubic) - g_pipelineState.specConstants |= SPEC_CONSTANT_BICUBIC_GI_FILTER; - else - g_pipelineState.specConstants &= ~SPEC_CONSTANT_BICUBIC_GI_FILTER; - - auto& commandList = g_commandLists[g_frame]; - - commandList->begin(); - commandList->setGraphicsPipelineLayout(g_pipelineLayout.get()); - commandList->setGraphicsDescriptorSet(g_textureDescriptorSet.get(), 0); - commandList->setGraphicsDescriptorSet(g_textureDescriptorSet.get(), 1); - commandList->setGraphicsDescriptorSet(g_textureDescriptorSet.get(), 2); - commandList->setGraphicsDescriptorSet(g_samplerDescriptorSet.get(), 3); -} - static uint32_t CreateDevice(uint32_t a1, uint32_t a2, uint32_t a3, uint32_t a4, uint32_t a5, be* a6) { g_xdbfTextureCache = std::unordered_map(); @@ -1906,6 +1901,12 @@ struct Profiler value = std::chrono::duration(std::chrono::steady_clock::now() - start).count(); } + void Reset() + { + End(); + Begin(); + } + double UpdateAndReturnAverage() { values[g_profilerValueIndex] = value; @@ -1914,6 +1915,7 @@ struct Profiler }; static double g_applicationValues[PROFILER_VALUE_COUNT]; +static Profiler g_presentProfiler; static Profiler g_renderDirectorProfiler; static bool g_profilerVisible; @@ -1939,6 +1941,9 @@ static void DrawProfiler() if (ImGui::Begin("Profiler", &g_profilerVisible)) { g_applicationValues[g_profilerValueIndex] = App::s_deltaTime * 1000.0; + + const double applicationAvg = std::accumulate(g_applicationValues, g_applicationValues + PROFILER_VALUE_COUNT, 0.0) / PROFILER_VALUE_COUNT; + double presentAvg = g_presentProfiler.UpdateAndReturnAverage(); double renderDirectorAvg = g_renderDirectorProfiler.UpdateAndReturnAverage(); if (ImPlot::BeginPlot("Frame Time")) @@ -1946,18 +1951,23 @@ static void DrawProfiler() ImPlot::SetupAxisLimits(ImAxis_Y1, 0.0, 20.0); ImPlot::SetupAxis(ImAxis_Y1, "ms", ImPlotAxisFlags_None); ImPlot::PlotLine("Application", g_applicationValues, PROFILER_VALUE_COUNT, 1.0, 0.0, ImPlotLineFlags_None, g_profilerValueIndex); + ImPlot::PlotLine("Present", g_presentProfiler.values, PROFILER_VALUE_COUNT, 1.0, 0.0, ImPlotLineFlags_None, g_profilerValueIndex); ImPlot::PlotLine("Render Director", g_renderDirectorProfiler.values, PROFILER_VALUE_COUNT, 1.0, 0.0, ImPlotLineFlags_None, g_profilerValueIndex); - ImPlot::EndPlot(); } g_profilerValueIndex = (g_profilerValueIndex + 1) % PROFILER_VALUE_COUNT; - const double applicationAvg = std::accumulate(g_applicationValues, g_applicationValues + PROFILER_VALUE_COUNT, 0.0) / PROFILER_VALUE_COUNT; + ImGui::Text("Current Application: %g ms (%g FPS)", App::s_deltaTime * 1000.0, 1.0 / App::s_deltaTime); + ImGui::Text("Current Present: %g ms (%g FPS)", g_presentProfiler.value.load(), 1000.0 / g_presentProfiler.value.load()); + ImGui::Text("Current Render Director: %g ms (%g FPS)", g_renderDirectorProfiler.value.load(), 1000.0 / g_renderDirectorProfiler.value.load()); + ImGui::NewLine(); ImGui::Text("Average Application: %g ms (%g FPS)", applicationAvg, 1000.0 / applicationAvg); + ImGui::Text("Average Present: %g ms (%g FPS)", presentAvg, 1000.0 / presentAvg); ImGui::Text("Average Render Director: %g ms (%g FPS)", renderDirectorAvg, 1000.0 / renderDirectorAvg); - + ImGui::NewLine(); + O1HeapDiagnostics diagnostics, physicalDiagnostics; { std::lock_guard lock(g_userHeap.mutex); @@ -1970,10 +1980,12 @@ static void DrawProfiler() ImGui::Text("Heap Allocated: %d MB", int32_t(diagnostics.allocated / (1024 * 1024))); ImGui::Text("Physical Heap Allocated: %d MB", int32_t(physicalDiagnostics.allocated / (1024 * 1024))); + ImGui::NewLine(); auto capabilities = g_device->getCapabilities(); ImGui::Text("Present Wait: %s", capabilities.presentWait ? "Supported" : "Unsupported"); ImGui::Text("Triangle Fan: %s", capabilities.triangleFan ? "Supported" : "Unsupported"); + ImGui::NewLine(); const char* sdlVideoDriver = SDL_GetCurrentVideoDriver(); if (sdlVideoDriver != nullptr) @@ -2023,8 +2035,6 @@ static void DrawImGui() auto drawData = ImGui::GetDrawData(); if (drawData->CmdListsCount != 0) { - g_imSnapshot.SnapUsingSwap(drawData, ImGui::GetTime()); - RenderCommand cmd; cmd.type = RenderCommandType::DrawImGui; g_renderQueue.enqueue(cmd); @@ -2047,7 +2057,7 @@ static void ProcDrawImGui(const RenderCommand& cmd) commandList->setGraphicsDescriptorSet(g_textureDescriptorSet.get(), 0); commandList->setGraphicsDescriptorSet(g_samplerDescriptorSet.get(), 1); - auto& drawData = g_imSnapshot.DrawData; + auto& drawData = *ImGui::GetDrawData(); commandList->setViewports(RenderViewport(drawData.DisplayPos.x, drawData.DisplayPos.y, drawData.DisplaySize.x, drawData.DisplaySize.y)); ImGuiPushConstants pushConstants{}; @@ -2163,17 +2173,34 @@ static void ProcDrawImGui(const RenderCommand& cmd) } } -static bool g_shouldPrecompilePipelines = false; +// We have to check for this to properly handle the following situation: +// 1. Wait on swap chain. +// 2. Create loading thread. +// 3. Loading thread also waits on swap chain. +// 4. Loading thread presents and quits. +// 5. After the loading thread quits, application also presents. +static bool g_pendingWaitOnSwapChain = true; -void Video::HostPresent() +void Video::WaitOnSwapChain() +{ + if (g_pendingWaitOnSwapChain) + { + if (g_swapChainValid) + g_swapChain->wait(); + + g_pendingWaitOnSwapChain = false; + } +} + +static bool g_shouldPrecompilePipelines; +static std::atomic g_executedCommandList; + +void Video::Present() { - WaitForRenderThread(); DrawImGui(); - g_pendingRenderThread.store(true); - RenderCommand cmd; - cmd.type = RenderCommandType::Present; + cmd.type = RenderCommandType::ExecuteCommandList; g_renderQueue.enqueue(cmd); // All the shaders are available at this point. We can precompile embedded PSOs then. @@ -2187,6 +2214,64 @@ void Video::HostPresent() g_shouldPrecompilePipelines = false; } + + g_executedCommandList.wait(false); + g_executedCommandList = false; + + if (g_swapChainValid) + { + if (g_pendingWaitOnSwapChain) + g_swapChain->wait(); // Never gonna happen outside loading threads as explained above. + + RenderCommandSemaphore* signalSemaphores[] = { g_renderSemaphores[g_frame].get() }; + g_swapChainValid = g_swapChain->present(g_backBufferIndex, signalSemaphores, std::size(signalSemaphores)); + } + + g_pendingWaitOnSwapChain = true; + + g_frame = g_nextFrame; + g_nextFrame = (g_frame + 1) % NUM_FRAMES; + + if (g_commandListStates[g_frame]) + { + g_queue->waitForCommandFence(g_commandFences[g_frame].get()); + g_commandListStates[g_frame] = false; + } + + g_dirtyStates = DirtyStates(true); + g_uploadAllocators[g_frame].reset(); + g_triangleFanIndexData.reset(); + g_quadIndexData.reset(); + + CheckSwapChain(); + + cmd.type = RenderCommandType::BeginCommandList; + g_renderQueue.enqueue(cmd); + + if (Config::FPS >= FPS_MIN && Config::FPS < FPS_MAX) + { + using namespace std::chrono_literals; + + static std::chrono::steady_clock::time_point s_next; + + auto now = std::chrono::steady_clock::now(); + + if (now < s_next) + { + std::this_thread::sleep_for(std::chrono::floor(s_next - now - 2ms)); + + while ((now = std::chrono::steady_clock::now()) < s_next) + std::this_thread::yield(); + } + else + { + s_next = now; + } + + s_next += 1000000000ns / Config::FPS; + } + + g_presentProfiler.Reset(); } void Video::StartPipelinePrecompilation() @@ -2194,11 +2279,6 @@ void Video::StartPipelinePrecompilation() g_shouldPrecompilePipelines = true; } -static void GuestPresent() -{ - Video::HostPresent(); -} - static void SetRootDescriptor(const UploadAllocation& allocation, size_t index) { auto& commandList = g_commandLists[g_frame]; @@ -2209,7 +2289,7 @@ static void SetRootDescriptor(const UploadAllocation& allocation, size_t index) commandList->setGraphicsRootDescriptor(allocation.buffer->at(allocation.offset), index); } -static void ProcPresent(const RenderCommand& cmd) +static void ProcExecuteCommandList(const RenderCommand& cmd) { if (g_swapChainValid) { @@ -2292,8 +2372,6 @@ static void ProcPresent(const RenderCommand& cmd) waitSemaphores, std::size(waitSemaphores), signalSemaphores, std::size(signalSemaphores), g_commandFences[g_frame].get()); - - g_swapChainValid = g_swapChain->present(g_backBufferIndex, signalSemaphores, std::size(signalSemaphores)); } else { @@ -2302,25 +2380,14 @@ static void ProcPresent(const RenderCommand& cmd) g_commandListStates[g_frame] = true; - g_frame = g_nextFrame; - g_nextFrame = (g_frame + 1) % NUM_FRAMES; + g_executedCommandList = true; + g_executedCommandList.notify_one(); +} - if (g_commandListStates[g_frame]) - { - g_queue->waitForCommandFence(g_commandFences[g_frame].get()); - g_commandListStates[g_frame] = false; - } - - g_dirtyStates = DirtyStates(true); - g_uploadAllocators[g_frame].reset(); +static void ProcBeginCommandList(const RenderCommand& cmd) +{ DestructTempResources(); - g_triangleFanIndexData.reset(); - g_quadIndexData.reset(); - BeginCommandList(); - - g_pendingRenderThread.store(false); - g_pendingRenderThread.notify_all(); } static GuestSurface* GetBackBuffer() @@ -3387,8 +3454,6 @@ static void FlushRenderStateForMainThread(GuestDevice* device, LocalRenderComman if (g_dirtyStates.vertexShaderConstants || device->dirtyFlags[0] != 0) { - WaitForRenderThread(); - auto& cmd = queue.enqueue(); cmd.type = RenderCommandType::SetVertexShaderConstants; cmd.setVertexShaderConstants.allocation = g_uploadAllocators[g_frame].allocate(device->vertexShaderFloatConstants, 0x1000, 0x100); @@ -3398,8 +3463,6 @@ static void FlushRenderStateForMainThread(GuestDevice* device, LocalRenderComman if (g_dirtyStates.pixelShaderConstants || device->dirtyFlags[1] != 0) { - WaitForRenderThread(); - auto& cmd = queue.enqueue(); cmd.type = RenderCommandType::SetPixelShaderConstants; cmd.setPixelShaderConstants.allocation = g_uploadAllocators[g_frame].allocate(device->pixelShaderFloatConstants, 0xE00, 0x100); @@ -3640,7 +3703,6 @@ static void DrawPrimitiveUP(GuestDevice* device, uint32_t primitiveType, uint32_ { LocalRenderCommandQueue queue; FlushRenderStateForMainThread(device, queue); - WaitForRenderThread(); auto& cmd = queue.enqueue(); cmd.type = RenderCommandType::DrawPrimitiveUP; @@ -4191,7 +4253,8 @@ static std::thread g_renderThread([] case RenderCommandType::UnlockBuffer16: ProcUnlockBuffer16(cmd); break; case RenderCommandType::UnlockBuffer32: ProcUnlockBuffer32(cmd); break; case RenderCommandType::DrawImGui: ProcDrawImGui(cmd); break; - case RenderCommandType::Present: ProcPresent(cmd); break; + case RenderCommandType::ExecuteCommandList: ProcExecuteCommandList(cmd); break; + case RenderCommandType::BeginCommandList: ProcBeginCommandList(cmd); break; case RenderCommandType::StretchRect: ProcStretchRect(cmd); break; case RenderCommandType::SetRenderTarget: ProcSetRenderTarget(cmd); break; case RenderCommandType::SetDepthStencilSurface: ProcSetDepthStencilSurface(cmd); break; @@ -6025,7 +6088,7 @@ GUEST_FUNCTION_HOOK(sub_82BE96F0, GetSurfaceDesc); GUEST_FUNCTION_HOOK(sub_82BE04B0, GetVertexDeclaration); GUEST_FUNCTION_HOOK(sub_82BE0530, HashVertexDeclaration); -GUEST_FUNCTION_HOOK(sub_82BDA8C0, GuestPresent); +GUEST_FUNCTION_HOOK(sub_82BDA8C0, Video::Present); GUEST_FUNCTION_HOOK(sub_82BDD330, GetBackBuffer); GUEST_FUNCTION_HOOK(sub_82BE9498, CreateTexture); diff --git a/UnleashedRecomp/gpu/video.h b/UnleashedRecomp/gpu/video.h index 8718a09..87d67ed 100644 --- a/UnleashedRecomp/gpu/video.h +++ b/UnleashedRecomp/gpu/video.h @@ -15,7 +15,8 @@ using namespace plume; struct Video { static void CreateHostDevice(const char *sdlVideoDriver); - static void HostPresent(); + static void WaitOnSwapChain(); + static void Present(); static void StartPipelinePrecompilation(); static void WaitForGPU(); }; diff --git a/UnleashedRecomp/patches/fps_patches.cpp b/UnleashedRecomp/patches/fps_patches.cpp index 1e46648..70886c2 100644 --- a/UnleashedRecomp/patches/fps_patches.cpp +++ b/UnleashedRecomp/patches/fps_patches.cpp @@ -79,30 +79,6 @@ void Camera2DSlopeLerpFixMidAsmHook(PPCRegister& t, PPCRegister& deltaTime) using namespace std::chrono_literals; -static std::chrono::steady_clock::time_point g_next; - -void ApplicationUpdateMidAsmHook() -{ - if (Config::FPS >= FPS_MIN && Config::FPS < FPS_MAX) - { - auto now = std::chrono::steady_clock::now(); - - if (now < g_next) - { - std::this_thread::sleep_for(std::chrono::floor(g_next - now - 2ms)); - - while ((now = std::chrono::steady_clock::now()) < g_next) - std::this_thread::yield(); - } - else - { - g_next = now; - } - - g_next += 1000000000ns / Config::FPS; - } -} - static std::chrono::steady_clock::time_point g_prev; bool LoadingUpdateMidAsmHook(PPCRegister& r31) diff --git a/UnleashedRecomp/ui/installer_wizard.cpp b/UnleashedRecomp/ui/installer_wizard.cpp index 58328d5..40a0d0a 100644 --- a/UnleashedRecomp/ui/installer_wizard.cpp +++ b/UnleashedRecomp/ui/installer_wizard.cpp @@ -1474,10 +1474,11 @@ bool InstallerWizard::Run(std::filesystem::path installPath, bool skipGame) while (s_isVisible) { + Video::WaitOnSwapChain(); SDL_PumpEvents(); SDL_FlushEvents(SDL_FIRSTEVENT, SDL_LASTEVENT); GameWindow::Update(); - Video::HostPresent(); + Video::Present(); } GameWindow::SetFullscreenCursorVisibility(false); diff --git a/UnleashedRecomp/user/config.h b/UnleashedRecomp/user/config.h index f2c173e..30a251d 100644 --- a/UnleashedRecomp/user/config.h +++ b/UnleashedRecomp/user/config.h @@ -647,10 +647,11 @@ public: CONFIG_DEFINE_LOCALISED("Video", bool, VSync, true); CONFIG_DEFINE_ENUM("Video", ETripleBuffering, TripleBuffering, ETripleBuffering::Auto); CONFIG_DEFINE_LOCALISED("Video", int32_t, FPS, 60); + CONFIG_DEFINE("Video", uint32_t, MaxFrameLatency, 2); CONFIG_DEFINE_LOCALISED("Video", float, Brightness, 0.5f); CONFIG_DEFINE_ENUM_LOCALISED("Video", EAntiAliasing, AntiAliasing, EAntiAliasing::MSAA4x); CONFIG_DEFINE_LOCALISED("Video", bool, TransparencyAntiAliasing, true); - CONFIG_DEFINE("Video", size_t, AnisotropicFiltering, 16); + CONFIG_DEFINE("Video", uint32_t, AnisotropicFiltering, 16); CONFIG_DEFINE_ENUM_LOCALISED("Video", EShadowResolution, ShadowResolution, EShadowResolution::x4096); CONFIG_DEFINE_ENUM_LOCALISED("Video", EGITextureFiltering, GITextureFiltering, EGITextureFiltering::Bicubic); CONFIG_DEFINE_ENUM("Video", EDepthOfFieldQuality, DepthOfFieldQuality, EDepthOfFieldQuality::Auto); diff --git a/UnleashedRecompLib/config/SWA.toml b/UnleashedRecompLib/config/SWA.toml index 6436a26..91084cc 100644 --- a/UnleashedRecompLib/config/SWA.toml +++ b/UnleashedRecompLib/config/SWA.toml @@ -584,13 +584,9 @@ name = "PostureDPadSupportMidAsmHook" address = 0x823CDA2C registers = ["r3"] -[[midasm_hook]] -name = "ApplicationUpdateMidAsmHook" -address = 0x822C0EC8 - [[midasm_hook]] name = "LoadingUpdateMidAsmHook" address = 0x825360C8 registers = ["r31"] jump_address_on_true = 0x825360C8 -jump_address_on_false = 0x82536140 \ No newline at end of file +jump_address_on_false = 0x82536140