From c1ce16c7370075df1037eaae72a860d184fcb817 Mon Sep 17 00:00:00 2001 From: Dario Date: Mon, 10 Feb 2025 23:34:42 -0300 Subject: [PATCH] Add timestamps to D3D12. --- UnleashedRecomp/gpu/rhi/plume_d3d12.cpp | 70 ++++++++++++++++++++++++ UnleashedRecomp/gpu/rhi/plume_d3d12.h | 17 ++++++ UnleashedRecomp/gpu/rhi/plume_vulkan.cpp | 2 +- UnleashedRecomp/gpu/video.cpp | 12 ++-- 4 files changed, 94 insertions(+), 7 deletions(-) diff --git a/UnleashedRecomp/gpu/rhi/plume_d3d12.cpp b/UnleashedRecomp/gpu/rhi/plume_d3d12.cpp index 85974e16..6b9eeea5 100644 --- a/UnleashedRecomp/gpu/rhi/plume_d3d12.cpp +++ b/UnleashedRecomp/gpu/rhi/plume_d3d12.cpp @@ -1437,6 +1437,52 @@ namespace plume { return height; } + // D3D12QueryPool + + D3D12QueryPool::D3D12QueryPool(D3D12Device *device, uint32_t queryCount) { + assert(device != nullptr); + assert(queryCount > 0); + + this->device = device; + + D3D12_QUERY_HEAP_DESC queryHeapDesc = {}; + queryHeapDesc.Type = D3D12_QUERY_HEAP_TYPE_TIMESTAMP; + queryHeapDesc.Count = queryCount; + + HRESULT res = device->d3d->CreateQueryHeap(&queryHeapDesc, IID_PPV_ARGS(&d3d)); + if (FAILED(res)) { + fprintf(stderr, "CreateQueryHeap failed with error code 0x%lX.\n", res); + return; + } + + readbackBuffer = device->createBuffer(RenderBufferDesc::ReadbackBuffer(sizeof(uint64_t) * queryCount)); + results.resize(queryCount); + } + + D3D12QueryPool::~D3D12QueryPool() { + if (d3d != nullptr) { + d3d->Release(); + } + } + + void D3D12QueryPool::queryResults() { + void *readbackData = readbackBuffer->map(); + memcpy(results.data(), readbackData, sizeof(uint64_t) * results.size()); + readbackBuffer->unmap(); + + for (uint64_t &result : results) { + result = result / double(device->timestampFrequency) * 1000000000.0; + } + } + + const uint64_t *D3D12QueryPool::getResults() const { + return results.data(); + } + + uint32_t D3D12QueryPool::getCount() const { + return uint32_t(results.size()); + } + // D3D12CommandList D3D12CommandList::D3D12CommandList(D3D12Device *device, RenderCommandListType type) { @@ -2004,6 +2050,19 @@ namespace plume { d3d->DiscardResource(interfaceTexture->d3d, nullptr); } + void D3D12CommandList::resetQueryPool(const RenderQueryPool *queryPool, uint32_t queryFirstIndex, uint32_t queryCount) { + // Do nothing. + } + + void D3D12CommandList::writeTimestamp(const RenderQueryPool *queryPool, uint32_t queryIndex) { + assert(queryPool != nullptr); + + const D3D12QueryPool *interfaceQueryPool = static_cast(queryPool); + const D3D12Buffer *readbackBuffer = static_cast(interfaceQueryPool->readbackBuffer.get()); + d3d->EndQuery(interfaceQueryPool->d3d, D3D12_QUERY_TYPE_TIMESTAMP, queryIndex); + d3d->ResolveQueryData(interfaceQueryPool->d3d, D3D12_QUERY_TYPE_TIMESTAMP, queryIndex, 1, readbackBuffer->d3d, queryIndex * sizeof(uint64_t)); + } + void D3D12CommandList::checkDescriptorHeaps() { if (!descriptorHeapsSet) { ID3D12DescriptorHeap *descriptorHeaps[] = { device->viewHeapAllocator->heap, device->samplerHeapAllocator->heap }; @@ -3461,6 +3520,13 @@ namespace plume { samplerHeapAllocator = std::make_unique(this, SamplerDescriptorHeapSize, D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER); colorTargetHeapAllocator = std::make_unique(this, TargetDescriptorHeapSize, D3D12_DESCRIPTOR_HEAP_TYPE_RTV); depthTargetHeapAllocator = std::make_unique(this, TargetDescriptorHeapSize, D3D12_DESCRIPTOR_HEAP_TYPE_DSV); + + // Create a command queue only for retrieving the timestamp frequency. Delete it immediately afterwards. + std::unique_ptr timestampCommandQueue = std::make_unique(this, RenderCommandListType::DIRECT); + res = timestampCommandQueue->d3d->GetTimestampFrequency(×tampFrequency); + if (FAILED(res)) { + fprintf(stderr, "GetTimestampFrequency failed with error code 0x%lX. Timestamps will be inaccurate.\n", res); + } } D3D12Device::~D3D12Device() { @@ -3535,6 +3601,10 @@ namespace plume { return std::make_unique(this, desc); } + std::unique_ptr D3D12Device::createQueryPool(uint32_t queryCount) { + return std::make_unique(this, queryCount); + } + void D3D12Device::setBottomLevelASBuildInfo(RenderBottomLevelASBuildInfo &buildInfo, const RenderBottomLevelASMesh *meshes, uint32_t meshCount, bool preferFastBuild, bool preferFastTrace) { assert(meshes != nullptr); assert(meshCount > 0); diff --git a/UnleashedRecomp/gpu/rhi/plume_d3d12.h b/UnleashedRecomp/gpu/rhi/plume_d3d12.h index 6b00ed74..d4987fbc 100644 --- a/UnleashedRecomp/gpu/rhi/plume_d3d12.h +++ b/UnleashedRecomp/gpu/rhi/plume_d3d12.h @@ -144,6 +144,19 @@ namespace plume { uint32_t getHeight() const override; }; + struct D3D12QueryPool : RenderQueryPool { + D3D12Device *device = nullptr; + ID3D12QueryHeap *d3d = nullptr; + std::vector results; + std::unique_ptr readbackBuffer; + + D3D12QueryPool(D3D12Device *device, uint32_t queryCount); + virtual ~D3D12QueryPool() override; + virtual void queryResults() override; + virtual const uint64_t *getResults() const override; + virtual uint32_t getCount() const override; + }; + struct D3D12CommandList : RenderCommandList { ID3D12GraphicsCommandList9 *d3d = nullptr; ID3D12CommandAllocator *commandAllocator = nullptr; @@ -196,6 +209,8 @@ namespace plume { void buildBottomLevelAS(const RenderAccelerationStructure *dstAccelerationStructure, RenderBufferReference scratchBuffer, const RenderBottomLevelASBuildInfo &buildInfo) override; void buildTopLevelAS(const RenderAccelerationStructure *dstAccelerationStructure, RenderBufferReference scratchBuffer, RenderBufferReference instancesBuffer, const RenderTopLevelASBuildInfo &buildInfo) override; void discardTexture(const RenderTexture* texture) override; + void resetQueryPool(const RenderQueryPool *queryPool, uint32_t queryFirstIndex, uint32_t queryCount) override; + void writeTimestamp(const RenderQueryPool *queryPool, uint32_t queryIndex) override; void checkDescriptorHeaps(); void notifyDescriptorHeapWasChangedExternally(); void checkTopology(); @@ -417,6 +432,7 @@ namespace plume { std::unique_ptr depthTargetHeapAllocator; RenderDeviceCapabilities capabilities; RenderDeviceDescription description; + uint64_t timestampFrequency = 1; D3D12Device(D3D12Interface *renderInterface, const std::string &preferredDeviceName); ~D3D12Device() override; @@ -436,6 +452,7 @@ namespace plume { std::unique_ptr createCommandFence() override; std::unique_ptr createCommandSemaphore() override; std::unique_ptr createFramebuffer(const RenderFramebufferDesc &desc) override; + std::unique_ptr createQueryPool(uint32_t queryCount) override; void setBottomLevelASBuildInfo(RenderBottomLevelASBuildInfo &buildInfo, const RenderBottomLevelASMesh *meshes, uint32_t meshCount, bool preferFastBuild, bool preferFastTrace) override; void setTopLevelASBuildInfo(RenderTopLevelASBuildInfo &buildInfo, const RenderTopLevelASInstance *instances, uint32_t instanceCount, bool preferFastBuild, bool preferFastTrace) override; void setShaderBindingTableInfo(RenderShaderBindingTableInfo &tableInfo, const RenderShaderBindingGroups &groups, const RenderPipeline *pipeline, RenderDescriptorSet **descriptorSets, uint32_t descriptorSetCount) override; diff --git a/UnleashedRecomp/gpu/rhi/plume_vulkan.cpp b/UnleashedRecomp/gpu/rhi/plume_vulkan.cpp index 4684d338..a95df555 100644 --- a/UnleashedRecomp/gpu/rhi/plume_vulkan.cpp +++ b/UnleashedRecomp/gpu/rhi/plume_vulkan.cpp @@ -2580,7 +2580,7 @@ namespace plume { constexpr uint64_t shift_bits = 16; double timestampPeriod = double(device->physicalDeviceProperties.limits.timestampPeriod); uint64_t h = 0, l = 0; - for (size_t &result : results) { + for (uint64_t &result : results) { mult64to128(result, uint64_t(timestampPeriod * double(1 << shift_bits)), h, l); result = l; result >>= shift_bits; diff --git a/UnleashedRecomp/gpu/video.cpp b/UnleashedRecomp/gpu/video.cpp index e2562fbc..be5a3710 100644 --- a/UnleashedRecomp/gpu/video.cpp +++ b/UnleashedRecomp/gpu/video.cpp @@ -2254,9 +2254,9 @@ static void DrawProfiler() ImGui::Text("Current GPU Frame: %g ms (%g FPS)", g_gpuFrameProfiler.value.load(), 1000.0 / g_gpuFrameProfiler.value.load()); ImGui::Text("Current Present: %g ms (%g FPS)", g_presentProfiler.value.load(), 1000.0 / g_presentProfiler.value.load()); ImGui::Text("Current Render Director: %g ms (%g FPS)", g_renderDirectorProfiler.value.load(), 1000.0 / g_renderDirectorProfiler.value.load()); - ImGui::Text("Current Frame Fence: %g ms (%g FPS)", g_frameFenceProfiler.value.load(), 1000.0 / g_frameFenceProfiler.value.load()); - ImGui::Text("Current Present Wait: %g ms (%g FPS)", g_presentWaitProfiler.value.load(), 1000.0 / g_presentWaitProfiler.value.load()); - ImGui::Text("Current Swap Chain Acquire: %g ms (%g FPS)", g_swapChainAcquireProfiler.value.load(), 1000.0 / g_swapChainAcquireProfiler.value.load()); + ImGui::Text("Current Frame Fence: %g ms", g_frameFenceProfiler.value.load()); + ImGui::Text("Current Present Wait: %g ms", g_presentWaitProfiler.value.load()); + ImGui::Text("Current Swap Chain Acquire: %g ms", g_swapChainAcquireProfiler.value.load()); ImGui::NewLine(); @@ -2264,9 +2264,9 @@ static void DrawProfiler() ImGui::Text("Average GPU Frame: %g ms (%g FPS)", gpuFrameAvg, 1000.0 / gpuFrameAvg); ImGui::Text("Average Present: %g ms (%g FPS)", presentAvg, 1000.0 / presentAvg); ImGui::Text("Average Render Director: %g ms (%g FPS)", renderDirectorAvg, 1000.0 / renderDirectorAvg); - ImGui::Text("Average Frame Fence: %g ms (%g FPS)", frameFenceAvg, 1000.0 / frameFenceAvg); - ImGui::Text("Average Present Wait: %g ms (%g FPS)", presentWaitAvg, 1000.0 / presentWaitAvg); - ImGui::Text("Average Swap Chain Acquire: %g ms (%g FPS)", swapChainAcquireAvg, 1000.0 / swapChainAcquireAvg); + ImGui::Text("Average Frame Fence: %g ms", frameFenceAvg); + ImGui::Text("Average Present Wait: %g ms", presentWaitAvg); + ImGui::Text("Average Swap Chain Acquire: %g ms", swapChainAcquireAvg); ImGui::NewLine();