mirror of
https://github.com/hedge-dev/UnleashedRecomp.git
synced 2026-04-27 12:51:42 +00:00
Added profiling timestamps to Vulkan. Added more profilers in general.
This commit is contained in:
parent
fb55ac1087
commit
320c81fa4f
5 changed files with 216 additions and 39 deletions
|
|
@ -147,6 +147,8 @@ namespace plume {
|
||||||
virtual void buildBottomLevelAS(const RenderAccelerationStructure *dstAccelerationStructure, RenderBufferReference scratchBuffer, const RenderBottomLevelASBuildInfo &buildInfo) = 0;
|
virtual void buildBottomLevelAS(const RenderAccelerationStructure *dstAccelerationStructure, RenderBufferReference scratchBuffer, const RenderBottomLevelASBuildInfo &buildInfo) = 0;
|
||||||
virtual void buildTopLevelAS(const RenderAccelerationStructure *dstAccelerationStructure, RenderBufferReference scratchBuffer, RenderBufferReference instancesBuffer, const RenderTopLevelASBuildInfo &buildInfo) = 0;
|
virtual void buildTopLevelAS(const RenderAccelerationStructure *dstAccelerationStructure, RenderBufferReference scratchBuffer, RenderBufferReference instancesBuffer, const RenderTopLevelASBuildInfo &buildInfo) = 0;
|
||||||
virtual void discardTexture(const RenderTexture* texture) = 0; // D3D12 only.
|
virtual void discardTexture(const RenderTexture* texture) = 0; // D3D12 only.
|
||||||
|
virtual void resetQueryPool(const RenderQueryPool *queryPool, uint32_t queryFirstIndex, uint32_t queryCount) = 0;
|
||||||
|
virtual void writeTimestamp(const RenderQueryPool *queryPool, uint32_t queryIndex) = 0;
|
||||||
|
|
||||||
// Concrete implementation shortcuts.
|
// Concrete implementation shortcuts.
|
||||||
inline void barriers(RenderBarrierStages stages, const RenderBufferBarrier &barrier) {
|
inline void barriers(RenderBarrierStages stages, const RenderBufferBarrier &barrier) {
|
||||||
|
|
@ -208,6 +210,13 @@ namespace plume {
|
||||||
virtual std::unique_ptr<RenderTexture> createTexture(const RenderTextureDesc &desc) = 0;
|
virtual std::unique_ptr<RenderTexture> createTexture(const RenderTextureDesc &desc) = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct RenderQueryPool {
|
||||||
|
virtual ~RenderQueryPool() { }
|
||||||
|
virtual void queryResults() = 0;
|
||||||
|
virtual const uint64_t *getResults() const = 0;
|
||||||
|
virtual uint32_t getCount() const = 0;
|
||||||
|
};
|
||||||
|
|
||||||
struct RenderDevice {
|
struct RenderDevice {
|
||||||
virtual ~RenderDevice() { }
|
virtual ~RenderDevice() { }
|
||||||
virtual std::unique_ptr<RenderCommandList> createCommandList(RenderCommandListType type) = 0;
|
virtual std::unique_ptr<RenderCommandList> createCommandList(RenderCommandListType type) = 0;
|
||||||
|
|
@ -226,6 +235,7 @@ namespace plume {
|
||||||
virtual std::unique_ptr<RenderCommandFence> createCommandFence() = 0;
|
virtual std::unique_ptr<RenderCommandFence> createCommandFence() = 0;
|
||||||
virtual std::unique_ptr<RenderCommandSemaphore> createCommandSemaphore() = 0;
|
virtual std::unique_ptr<RenderCommandSemaphore> createCommandSemaphore() = 0;
|
||||||
virtual std::unique_ptr<RenderFramebuffer> createFramebuffer(const RenderFramebufferDesc &desc) = 0;
|
virtual std::unique_ptr<RenderFramebuffer> createFramebuffer(const RenderFramebufferDesc &desc) = 0;
|
||||||
|
virtual std::unique_ptr<RenderQueryPool> createQueryPool(uint32_t queryCount) = 0;
|
||||||
virtual void setBottomLevelASBuildInfo(RenderBottomLevelASBuildInfo &buildInfo, const RenderBottomLevelASMesh *meshes, uint32_t meshCount, bool preferFastBuild = true, bool preferFastTrace = false) = 0;
|
virtual void setBottomLevelASBuildInfo(RenderBottomLevelASBuildInfo &buildInfo, const RenderBottomLevelASMesh *meshes, uint32_t meshCount, bool preferFastBuild = true, bool preferFastTrace = false) = 0;
|
||||||
virtual void setTopLevelASBuildInfo(RenderTopLevelASBuildInfo &buildInfo, const RenderTopLevelASInstance *instances, uint32_t instanceCount, bool preferFastBuild = true, bool preferFastTrace = false) = 0;
|
virtual void setTopLevelASBuildInfo(RenderTopLevelASBuildInfo &buildInfo, const RenderTopLevelASInstance *instances, uint32_t instanceCount, bool preferFastBuild = true, bool preferFastTrace = false) = 0;
|
||||||
virtual void setShaderBindingTableInfo(RenderShaderBindingTableInfo &tableInfo, const RenderShaderBindingGroups &groups, const RenderPipeline *pipeline, RenderDescriptorSet **descriptorSets, uint32_t descriptorSetCount) = 0;
|
virtual void setShaderBindingTableInfo(RenderShaderBindingTableInfo &tableInfo, const RenderShaderBindingGroups &groups, const RenderPipeline *pipeline, RenderDescriptorSet **descriptorSets, uint32_t descriptorSetCount) = 0;
|
||||||
|
|
|
||||||
|
|
@ -69,6 +69,7 @@ namespace plume {
|
||||||
struct RenderSampler;
|
struct RenderSampler;
|
||||||
struct RenderShader;
|
struct RenderShader;
|
||||||
struct RenderTexture;
|
struct RenderTexture;
|
||||||
|
struct RenderQueryPool;
|
||||||
|
|
||||||
// Enums.
|
// Enums.
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -2522,6 +2522,80 @@ namespace plume {
|
||||||
return (depthAttachment == attachment);
|
return (depthAttachment == attachment);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// VulkanQueryPool
|
||||||
|
|
||||||
|
VulkanQueryPool::VulkanQueryPool(VulkanDevice *device, uint32_t queryCount) {
|
||||||
|
assert(device != nullptr);
|
||||||
|
assert(queryCount > 0);
|
||||||
|
|
||||||
|
this->device = device;
|
||||||
|
|
||||||
|
VkQueryPoolCreateInfo createInfo = {};
|
||||||
|
createInfo.sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO;
|
||||||
|
createInfo.queryType = VK_QUERY_TYPE_TIMESTAMP;
|
||||||
|
createInfo.queryCount = queryCount;
|
||||||
|
|
||||||
|
VkResult res = vkCreateQueryPool(device->vk, &createInfo, nullptr, &vk);
|
||||||
|
if (res != VK_SUCCESS) {
|
||||||
|
fprintf(stderr, "vkCreateQueryPool failed with error code 0x%X.\n", res);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
results.resize(queryCount);
|
||||||
|
}
|
||||||
|
|
||||||
|
VulkanQueryPool::~VulkanQueryPool() {
|
||||||
|
vkDestroyQueryPool(device->vk, vk, nullptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
void VulkanQueryPool::queryResults() {
|
||||||
|
VkResult res = vkGetQueryPoolResults(device->vk, vk, 0, uint32_t(results.size()), sizeof(uint64_t) * results.size(), results.data(), sizeof(uint64_t), VK_QUERY_RESULT_64_BIT);
|
||||||
|
if (res != VK_SUCCESS) {
|
||||||
|
fprintf(stderr, "vkGetQueryPoolResults failed with error code 0x%X.\n", res);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Conversion sourced from Godot Engine's Vulkan Rendering Driver.
|
||||||
|
auto mult64to128 = [](uint64_t u, uint64_t v, uint64_t &h, uint64_t &l) {
|
||||||
|
uint64_t u1 = (u & 0xffffffff);
|
||||||
|
uint64_t v1 = (v & 0xffffffff);
|
||||||
|
uint64_t t = (u1 * v1);
|
||||||
|
uint64_t w3 = (t & 0xffffffff);
|
||||||
|
uint64_t k = (t >> 32);
|
||||||
|
|
||||||
|
u >>= 32;
|
||||||
|
t = (u * v1) + k;
|
||||||
|
k = (t & 0xffffffff);
|
||||||
|
uint64_t w1 = (t >> 32);
|
||||||
|
|
||||||
|
v >>= 32;
|
||||||
|
t = (u1 * v) + k;
|
||||||
|
k = (t >> 32);
|
||||||
|
|
||||||
|
h = (u * v) + w1 + k;
|
||||||
|
l = (t << 32) + w3;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Convert results to timestamps.
|
||||||
|
constexpr uint64_t shift_bits = 16;
|
||||||
|
double timestampPeriod = double(device->physicalDeviceProperties.limits.timestampPeriod);
|
||||||
|
uint64_t h = 0, l = 0;
|
||||||
|
for (size_t &result : results) {
|
||||||
|
mult64to128(result, uint64_t(timestampPeriod * double(1 << shift_bits)), h, l);
|
||||||
|
result = l;
|
||||||
|
result >>= shift_bits;
|
||||||
|
result |= h << (64 - shift_bits);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const uint64_t *VulkanQueryPool::getResults() const {
|
||||||
|
return results.data();
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t VulkanQueryPool::getCount() const {
|
||||||
|
return uint32_t(results.size());
|
||||||
|
}
|
||||||
|
|
||||||
// VulkanCommandList
|
// VulkanCommandList
|
||||||
|
|
||||||
VulkanCommandList::VulkanCommandList(VulkanDevice *device, RenderCommandListType type) {
|
VulkanCommandList::VulkanCommandList(VulkanDevice *device, RenderCommandListType type) {
|
||||||
|
|
@ -3210,6 +3284,20 @@ namespace plume {
|
||||||
// Not required in Vulkan.
|
// Not required in Vulkan.
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void VulkanCommandList::resetQueryPool(const RenderQueryPool *queryPool, uint32_t queryFirstIndex, uint32_t queryCount) {
|
||||||
|
assert(queryPool != nullptr);
|
||||||
|
|
||||||
|
const VulkanQueryPool *interfaceQueryPool = static_cast<const VulkanQueryPool *>(queryPool);
|
||||||
|
vkCmdResetQueryPool(vk, interfaceQueryPool->vk, queryFirstIndex, queryCount);
|
||||||
|
}
|
||||||
|
|
||||||
|
void VulkanCommandList::writeTimestamp(const RenderQueryPool *queryPool, uint32_t queryIndex) {
|
||||||
|
assert(queryPool != nullptr);
|
||||||
|
|
||||||
|
const VulkanQueryPool *interfaceQueryPool = static_cast<const VulkanQueryPool *>(queryPool);
|
||||||
|
vkCmdWriteTimestamp(vk, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, interfaceQueryPool->vk, queryIndex);
|
||||||
|
}
|
||||||
|
|
||||||
void VulkanCommandList::checkActiveRenderPass() {
|
void VulkanCommandList::checkActiveRenderPass() {
|
||||||
assert(targetFramebuffer != nullptr);
|
assert(targetFramebuffer != nullptr);
|
||||||
|
|
||||||
|
|
@ -3891,6 +3979,10 @@ namespace plume {
|
||||||
return std::make_unique<VulkanFramebuffer>(this, desc);
|
return std::make_unique<VulkanFramebuffer>(this, desc);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::unique_ptr<RenderQueryPool> VulkanDevice::createQueryPool(uint32_t queryCount) {
|
||||||
|
return std::make_unique<VulkanQueryPool>(this, queryCount);
|
||||||
|
}
|
||||||
|
|
||||||
void VulkanDevice::setBottomLevelASBuildInfo(RenderBottomLevelASBuildInfo &buildInfo, const RenderBottomLevelASMesh *meshes, uint32_t meshCount, bool preferFastBuild, bool preferFastTrace) {
|
void VulkanDevice::setBottomLevelASBuildInfo(RenderBottomLevelASBuildInfo &buildInfo, const RenderBottomLevelASMesh *meshes, uint32_t meshCount, bool preferFastBuild, bool preferFastTrace) {
|
||||||
assert(meshes != nullptr);
|
assert(meshes != nullptr);
|
||||||
assert(meshCount > 0);
|
assert(meshCount > 0);
|
||||||
|
|
|
||||||
|
|
@ -271,6 +271,18 @@ namespace plume {
|
||||||
bool contains(const VulkanTexture *attachment) const;
|
bool contains(const VulkanTexture *attachment) const;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct VulkanQueryPool : RenderQueryPool {
|
||||||
|
VulkanDevice *device = nullptr;
|
||||||
|
std::vector<uint64_t> results;
|
||||||
|
VkQueryPool vk = VK_NULL_HANDLE;
|
||||||
|
|
||||||
|
VulkanQueryPool(VulkanDevice *device, uint32_t queryCount);
|
||||||
|
virtual ~VulkanQueryPool() override;
|
||||||
|
virtual void queryResults() override;
|
||||||
|
virtual const uint64_t *getResults() const override;
|
||||||
|
virtual uint32_t getCount() const override;
|
||||||
|
};
|
||||||
|
|
||||||
struct VulkanCommandList : RenderCommandList {
|
struct VulkanCommandList : RenderCommandList {
|
||||||
VkCommandBuffer vk = VK_NULL_HANDLE;
|
VkCommandBuffer vk = VK_NULL_HANDLE;
|
||||||
VkCommandPool commandPool = VK_NULL_HANDLE;
|
VkCommandPool commandPool = VK_NULL_HANDLE;
|
||||||
|
|
@ -319,6 +331,8 @@ namespace plume {
|
||||||
void buildBottomLevelAS(const RenderAccelerationStructure *dstAccelerationStructure, RenderBufferReference scratchBuffer, const RenderBottomLevelASBuildInfo &buildInfo) override;
|
void buildBottomLevelAS(const RenderAccelerationStructure *dstAccelerationStructure, RenderBufferReference scratchBuffer, const RenderBottomLevelASBuildInfo &buildInfo) override;
|
||||||
void buildTopLevelAS(const RenderAccelerationStructure *dstAccelerationStructure, RenderBufferReference scratchBuffer, RenderBufferReference instancesBuffer, const RenderTopLevelASBuildInfo &buildInfo) override;
|
void buildTopLevelAS(const RenderAccelerationStructure *dstAccelerationStructure, RenderBufferReference scratchBuffer, RenderBufferReference instancesBuffer, const RenderTopLevelASBuildInfo &buildInfo) override;
|
||||||
void discardTexture(const RenderTexture* texture) override;
|
void discardTexture(const RenderTexture* texture) override;
|
||||||
|
void resetQueryPool(const RenderQueryPool *queryPool, uint32_t queryFirstIndex, uint32_t queryCount) override;
|
||||||
|
void writeTimestamp(const RenderQueryPool *queryPool, uint32_t queryIndex) override;
|
||||||
void checkActiveRenderPass();
|
void checkActiveRenderPass();
|
||||||
void endActiveRenderPass();
|
void endActiveRenderPass();
|
||||||
void setDescriptorSet(VkPipelineBindPoint bindPoint, const VulkanPipelineLayout *pipelineLayout, const RenderDescriptorSet *descriptorSet, uint32_t setIndex);
|
void setDescriptorSet(VkPipelineBindPoint bindPoint, const VulkanPipelineLayout *pipelineLayout, const RenderDescriptorSet *descriptorSet, uint32_t setIndex);
|
||||||
|
|
@ -409,6 +423,7 @@ namespace plume {
|
||||||
std::unique_ptr<RenderCommandFence> createCommandFence() override;
|
std::unique_ptr<RenderCommandFence> createCommandFence() override;
|
||||||
std::unique_ptr<RenderCommandSemaphore> createCommandSemaphore() override;
|
std::unique_ptr<RenderCommandSemaphore> createCommandSemaphore() override;
|
||||||
std::unique_ptr<RenderFramebuffer> createFramebuffer(const RenderFramebufferDesc &desc) override;
|
std::unique_ptr<RenderFramebuffer> createFramebuffer(const RenderFramebufferDesc &desc) override;
|
||||||
|
std::unique_ptr<RenderQueryPool> createQueryPool(uint32_t queryCount) override;
|
||||||
void setBottomLevelASBuildInfo(RenderBottomLevelASBuildInfo &buildInfo, const RenderBottomLevelASMesh *meshes, uint32_t meshCount, bool preferFastBuild, bool preferFastTrace) override;
|
void setBottomLevelASBuildInfo(RenderBottomLevelASBuildInfo &buildInfo, const RenderBottomLevelASMesh *meshes, uint32_t meshCount, bool preferFastBuild, bool preferFastTrace) override;
|
||||||
void setTopLevelASBuildInfo(RenderTopLevelASBuildInfo &buildInfo, const RenderTopLevelASInstance *instances, uint32_t instanceCount, bool preferFastBuild, bool preferFastTrace) override;
|
void setTopLevelASBuildInfo(RenderTopLevelASBuildInfo &buildInfo, const RenderTopLevelASInstance *instances, uint32_t instanceCount, bool preferFastBuild, bool preferFastTrace) override;
|
||||||
void setShaderBindingTableInfo(RenderShaderBindingTableInfo &tableInfo, const RenderShaderBindingGroups &groups, const RenderPipeline *pipeline, RenderDescriptorSet **descriptorSets, uint32_t descriptorSetCount) override;
|
void setShaderBindingTableInfo(RenderShaderBindingTableInfo &tableInfo, const RenderShaderBindingGroups &groups, const RenderPipeline *pipeline, RenderDescriptorSet **descriptorSets, uint32_t descriptorSetCount) override;
|
||||||
|
|
|
||||||
|
|
@ -230,6 +230,54 @@ static void SetDirtyValue(bool& dirtyState, T& dest, const T& src)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static constexpr size_t PROFILER_VALUE_COUNT = 256;
|
||||||
|
static size_t g_profilerValueIndex;
|
||||||
|
|
||||||
|
struct Profiler
|
||||||
|
{
|
||||||
|
std::atomic<double> value;
|
||||||
|
double values[PROFILER_VALUE_COUNT];
|
||||||
|
std::chrono::steady_clock::time_point start;
|
||||||
|
|
||||||
|
void Begin()
|
||||||
|
{
|
||||||
|
start = std::chrono::steady_clock::now();
|
||||||
|
}
|
||||||
|
|
||||||
|
void End()
|
||||||
|
{
|
||||||
|
value = std::chrono::duration<double, std::milli>(std::chrono::steady_clock::now() - start).count();
|
||||||
|
}
|
||||||
|
|
||||||
|
void Set(double v)
|
||||||
|
{
|
||||||
|
value = v;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Reset()
|
||||||
|
{
|
||||||
|
End();
|
||||||
|
Begin();
|
||||||
|
}
|
||||||
|
|
||||||
|
double UpdateAndReturnAverage()
|
||||||
|
{
|
||||||
|
values[g_profilerValueIndex] = value;
|
||||||
|
return std::accumulate(values, values + PROFILER_VALUE_COUNT, 0.0) / PROFILER_VALUE_COUNT;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
static double g_applicationValues[PROFILER_VALUE_COUNT];
|
||||||
|
static Profiler g_gpuFrameProfiler;
|
||||||
|
static Profiler g_presentProfiler;
|
||||||
|
static Profiler g_renderDirectorProfiler;
|
||||||
|
static Profiler g_frameFenceProfiler;
|
||||||
|
static Profiler g_presentWaitProfiler;
|
||||||
|
static Profiler g_swapChainAcquireProfiler;
|
||||||
|
|
||||||
|
static bool g_profilerVisible;
|
||||||
|
static bool g_profilerWasToggled;
|
||||||
|
|
||||||
#ifdef UNLEASHED_RECOMP_D3D12
|
#ifdef UNLEASHED_RECOMP_D3D12
|
||||||
static bool g_vulkan = false;
|
static bool g_vulkan = false;
|
||||||
#else
|
#else
|
||||||
|
|
@ -245,6 +293,7 @@ static std::unique_ptr<RenderDevice> g_device;
|
||||||
static RenderDeviceCapabilities g_capabilities;
|
static RenderDeviceCapabilities g_capabilities;
|
||||||
|
|
||||||
static constexpr size_t NUM_FRAMES = 2;
|
static constexpr size_t NUM_FRAMES = 2;
|
||||||
|
static constexpr size_t NUM_QUERIES = 2;
|
||||||
|
|
||||||
static uint32_t g_frame = 0;
|
static uint32_t g_frame = 0;
|
||||||
static uint32_t g_nextFrame = 1;
|
static uint32_t g_nextFrame = 1;
|
||||||
|
|
@ -252,6 +301,7 @@ static uint32_t g_nextFrame = 1;
|
||||||
static std::unique_ptr<RenderCommandQueue> g_queue;
|
static std::unique_ptr<RenderCommandQueue> g_queue;
|
||||||
static std::unique_ptr<RenderCommandList> g_commandLists[NUM_FRAMES];
|
static std::unique_ptr<RenderCommandList> g_commandLists[NUM_FRAMES];
|
||||||
static std::unique_ptr<RenderCommandFence> g_commandFences[NUM_FRAMES];
|
static std::unique_ptr<RenderCommandFence> g_commandFences[NUM_FRAMES];
|
||||||
|
static std::unique_ptr<RenderQueryPool> g_queryPools[NUM_FRAMES];
|
||||||
static bool g_commandListStates[NUM_FRAMES];
|
static bool g_commandListStates[NUM_FRAMES];
|
||||||
|
|
||||||
static Mutex g_copyMutex;
|
static Mutex g_copyMutex;
|
||||||
|
|
@ -1476,7 +1526,11 @@ static void CheckSwapChain()
|
||||||
}
|
}
|
||||||
|
|
||||||
if (g_swapChainValid)
|
if (g_swapChainValid)
|
||||||
|
{
|
||||||
|
g_swapChainAcquireProfiler.Begin();
|
||||||
g_swapChainValid = g_swapChain->acquireTexture(g_acquireSemaphores[g_frame].get(), &g_backBufferIndex);
|
g_swapChainValid = g_swapChain->acquireTexture(g_acquireSemaphores[g_frame].get(), &g_backBufferIndex);
|
||||||
|
g_swapChainAcquireProfiler.End();
|
||||||
|
}
|
||||||
|
|
||||||
if (g_needsResize)
|
if (g_needsResize)
|
||||||
Video::ComputeViewportDimensions();
|
Video::ComputeViewportDimensions();
|
||||||
|
|
@ -1552,6 +1606,8 @@ static void BeginCommandList()
|
||||||
auto& commandList = g_commandLists[g_frame];
|
auto& commandList = g_commandLists[g_frame];
|
||||||
|
|
||||||
commandList->begin();
|
commandList->begin();
|
||||||
|
commandList->resetQueryPool(g_queryPools[g_frame].get(), 0, NUM_QUERIES);
|
||||||
|
commandList->writeTimestamp(g_queryPools[g_frame].get(), 0);
|
||||||
commandList->setGraphicsPipelineLayout(g_pipelineLayout.get());
|
commandList->setGraphicsPipelineLayout(g_pipelineLayout.get());
|
||||||
commandList->setGraphicsDescriptorSet(g_textureDescriptorSet.get(), 0);
|
commandList->setGraphicsDescriptorSet(g_textureDescriptorSet.get(), 0);
|
||||||
commandList->setGraphicsDescriptorSet(g_textureDescriptorSet.get(), 1);
|
commandList->setGraphicsDescriptorSet(g_textureDescriptorSet.get(), 1);
|
||||||
|
|
@ -1655,6 +1711,9 @@ bool Video::CreateHostDevice(const char *sdlVideoDriver)
|
||||||
for (auto& commandFence : g_commandFences)
|
for (auto& commandFence : g_commandFences)
|
||||||
commandFence = g_device->createCommandFence();
|
commandFence = g_device->createCommandFence();
|
||||||
|
|
||||||
|
for (auto& queryPool : g_queryPools)
|
||||||
|
queryPool = g_device->createQueryPool(NUM_QUERIES);
|
||||||
|
|
||||||
g_copyQueue = g_device->createCommandQueue(RenderCommandListType::COPY);
|
g_copyQueue = g_device->createCommandQueue(RenderCommandListType::COPY);
|
||||||
g_copyCommandList = g_device->createCommandList(RenderCommandListType::COPY);
|
g_copyCommandList = g_device->createCommandList(RenderCommandListType::COPY);
|
||||||
g_copyCommandFence = g_device->createCommandFence();
|
g_copyCommandFence = g_device->createCommandFence();
|
||||||
|
|
@ -1875,8 +1934,12 @@ bool Video::CreateHostDevice(const char *sdlVideoDriver)
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static uint32_t g_waitForGPUCount = 0;
|
||||||
|
|
||||||
void Video::WaitForGPU()
|
void Video::WaitForGPU()
|
||||||
{
|
{
|
||||||
|
g_waitForGPUCount++;
|
||||||
|
|
||||||
if (g_vulkan)
|
if (g_vulkan)
|
||||||
{
|
{
|
||||||
g_device->waitIdle();
|
g_device->waitIdle();
|
||||||
|
|
@ -2125,45 +2188,6 @@ static uint32_t HashVertexDeclaration(uint32_t vertexDeclaration)
|
||||||
return vertexDeclaration;
|
return vertexDeclaration;
|
||||||
}
|
}
|
||||||
|
|
||||||
static constexpr size_t PROFILER_VALUE_COUNT = 256;
|
|
||||||
static size_t g_profilerValueIndex;
|
|
||||||
|
|
||||||
struct Profiler
|
|
||||||
{
|
|
||||||
std::atomic<double> value;
|
|
||||||
double values[PROFILER_VALUE_COUNT];
|
|
||||||
std::chrono::steady_clock::time_point start;
|
|
||||||
|
|
||||||
void Begin()
|
|
||||||
{
|
|
||||||
start = std::chrono::steady_clock::now();
|
|
||||||
}
|
|
||||||
|
|
||||||
void End()
|
|
||||||
{
|
|
||||||
value = std::chrono::duration<double, std::milli>(std::chrono::steady_clock::now() - start).count();
|
|
||||||
}
|
|
||||||
|
|
||||||
void Reset()
|
|
||||||
{
|
|
||||||
End();
|
|
||||||
Begin();
|
|
||||||
}
|
|
||||||
|
|
||||||
double UpdateAndReturnAverage()
|
|
||||||
{
|
|
||||||
values[g_profilerValueIndex] = value;
|
|
||||||
return std::accumulate(values, values + PROFILER_VALUE_COUNT, 0.0) / PROFILER_VALUE_COUNT;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
static double g_applicationValues[PROFILER_VALUE_COUNT];
|
|
||||||
static Profiler g_presentProfiler;
|
|
||||||
static Profiler g_renderDirectorProfiler;
|
|
||||||
|
|
||||||
static bool g_profilerVisible;
|
|
||||||
static bool g_profilerWasToggled;
|
|
||||||
|
|
||||||
static const char *DeviceTypeName(RenderDeviceType type)
|
static const char *DeviceTypeName(RenderDeviceType type)
|
||||||
{
|
{
|
||||||
switch (type)
|
switch (type)
|
||||||
|
|
@ -2203,29 +2227,47 @@ static void DrawProfiler()
|
||||||
g_applicationValues[g_profilerValueIndex] = App::s_deltaTime * 1000.0;
|
g_applicationValues[g_profilerValueIndex] = App::s_deltaTime * 1000.0;
|
||||||
|
|
||||||
const double applicationAvg = std::accumulate(g_applicationValues, g_applicationValues + PROFILER_VALUE_COUNT, 0.0) / PROFILER_VALUE_COUNT;
|
const double applicationAvg = std::accumulate(g_applicationValues, g_applicationValues + PROFILER_VALUE_COUNT, 0.0) / PROFILER_VALUE_COUNT;
|
||||||
|
double gpuFrameAvg = g_gpuFrameProfiler.UpdateAndReturnAverage();
|
||||||
double presentAvg = g_presentProfiler.UpdateAndReturnAverage();
|
double presentAvg = g_presentProfiler.UpdateAndReturnAverage();
|
||||||
double renderDirectorAvg = g_renderDirectorProfiler.UpdateAndReturnAverage();
|
double renderDirectorAvg = g_renderDirectorProfiler.UpdateAndReturnAverage();
|
||||||
|
double frameFenceAvg = g_frameFenceProfiler.UpdateAndReturnAverage();
|
||||||
|
double presentWaitAvg = g_presentWaitProfiler.UpdateAndReturnAverage();
|
||||||
|
double swapChainAcquireAvg = g_swapChainAcquireProfiler.UpdateAndReturnAverage();
|
||||||
|
|
||||||
if (ImPlot::BeginPlot("Frame Time"))
|
if (ImPlot::BeginPlot("Frame Time"))
|
||||||
{
|
{
|
||||||
ImPlot::SetupAxisLimits(ImAxis_Y1, 0.0, 20.0);
|
ImPlot::SetupAxisLimits(ImAxis_Y1, 0.0, 20.0);
|
||||||
ImPlot::SetupAxis(ImAxis_Y1, "ms", ImPlotAxisFlags_None);
|
ImPlot::SetupAxis(ImAxis_Y1, "ms", ImPlotAxisFlags_None);
|
||||||
ImPlot::PlotLine<double>("Application", g_applicationValues, PROFILER_VALUE_COUNT, 1.0, 0.0, ImPlotLineFlags_None, g_profilerValueIndex);
|
ImPlot::PlotLine<double>("Application", g_applicationValues, PROFILER_VALUE_COUNT, 1.0, 0.0, ImPlotLineFlags_None, g_profilerValueIndex);
|
||||||
|
ImPlot::PlotLine<double>("GPU Frame", g_gpuFrameProfiler.values, PROFILER_VALUE_COUNT, 1.0, 0.0, ImPlotLineFlags_None, g_profilerValueIndex);
|
||||||
ImPlot::PlotLine<double>("Present", g_presentProfiler.values, PROFILER_VALUE_COUNT, 1.0, 0.0, ImPlotLineFlags_None, g_profilerValueIndex);
|
ImPlot::PlotLine<double>("Present", g_presentProfiler.values, PROFILER_VALUE_COUNT, 1.0, 0.0, ImPlotLineFlags_None, g_profilerValueIndex);
|
||||||
ImPlot::PlotLine<double>("Render Director", g_renderDirectorProfiler.values, PROFILER_VALUE_COUNT, 1.0, 0.0, ImPlotLineFlags_None, g_profilerValueIndex);
|
ImPlot::PlotLine<double>("Render Director", g_renderDirectorProfiler.values, PROFILER_VALUE_COUNT, 1.0, 0.0, ImPlotLineFlags_None, g_profilerValueIndex);
|
||||||
|
ImPlot::PlotLine<double>("Frame Fence", g_frameFenceProfiler.values, PROFILER_VALUE_COUNT, 1.0, 0.0, ImPlotLineFlags_None, g_profilerValueIndex);
|
||||||
|
ImPlot::PlotLine<double>("Present Wait", g_presentWaitProfiler.values, PROFILER_VALUE_COUNT, 1.0, 0.0, ImPlotLineFlags_None, g_profilerValueIndex);
|
||||||
|
ImPlot::PlotLine<double>("Swap Chain Acquire", g_swapChainAcquireProfiler.values, PROFILER_VALUE_COUNT, 1.0, 0.0, ImPlotLineFlags_None, g_profilerValueIndex);
|
||||||
ImPlot::EndPlot();
|
ImPlot::EndPlot();
|
||||||
}
|
}
|
||||||
|
|
||||||
g_profilerValueIndex = (g_profilerValueIndex + 1) % PROFILER_VALUE_COUNT;
|
g_profilerValueIndex = (g_profilerValueIndex + 1) % PROFILER_VALUE_COUNT;
|
||||||
|
|
||||||
ImGui::Text("Current Application: %g ms (%g FPS)", App::s_deltaTime * 1000.0, 1.0 / App::s_deltaTime);
|
ImGui::Text("Current Application: %g ms (%g FPS)", App::s_deltaTime * 1000.0, 1.0 / App::s_deltaTime);
|
||||||
|
ImGui::Text("Current GPU Frame: %g ms (%g FPS)", g_gpuFrameProfiler.value.load(), 1000.0 / g_gpuFrameProfiler.value.load());
|
||||||
ImGui::Text("Current Present: %g ms (%g FPS)", g_presentProfiler.value.load(), 1000.0 / g_presentProfiler.value.load());
|
ImGui::Text("Current Present: %g ms (%g FPS)", g_presentProfiler.value.load(), 1000.0 / g_presentProfiler.value.load());
|
||||||
ImGui::Text("Current Render Director: %g ms (%g FPS)", g_renderDirectorProfiler.value.load(), 1000.0 / g_renderDirectorProfiler.value.load());
|
ImGui::Text("Current Render Director: %g ms (%g FPS)", g_renderDirectorProfiler.value.load(), 1000.0 / g_renderDirectorProfiler.value.load());
|
||||||
|
ImGui::Text("Current Frame Fence: %g ms (%g FPS)", g_frameFenceProfiler.value.load(), 1000.0 / g_frameFenceProfiler.value.load());
|
||||||
|
ImGui::Text("Current Present Wait: %g ms (%g FPS)", g_presentWaitProfiler.value.load(), 1000.0 / g_presentWaitProfiler.value.load());
|
||||||
|
ImGui::Text("Current Swap Chain Acquire: %g ms (%g FPS)", g_swapChainAcquireProfiler.value.load(), 1000.0 / g_swapChainAcquireProfiler.value.load());
|
||||||
|
|
||||||
ImGui::NewLine();
|
ImGui::NewLine();
|
||||||
|
|
||||||
ImGui::Text("Average Application: %g ms (%g FPS)", applicationAvg, 1000.0 / applicationAvg);
|
ImGui::Text("Average Application: %g ms (%g FPS)", applicationAvg, 1000.0 / applicationAvg);
|
||||||
|
ImGui::Text("Average GPU Frame: %g ms (%g FPS)", gpuFrameAvg, 1000.0 / gpuFrameAvg);
|
||||||
ImGui::Text("Average Present: %g ms (%g FPS)", presentAvg, 1000.0 / presentAvg);
|
ImGui::Text("Average Present: %g ms (%g FPS)", presentAvg, 1000.0 / presentAvg);
|
||||||
ImGui::Text("Average Render Director: %g ms (%g FPS)", renderDirectorAvg, 1000.0 / renderDirectorAvg);
|
ImGui::Text("Average Render Director: %g ms (%g FPS)", renderDirectorAvg, 1000.0 / renderDirectorAvg);
|
||||||
|
ImGui::Text("Average Frame Fence: %g ms (%g FPS)", frameFenceAvg, 1000.0 / frameFenceAvg);
|
||||||
|
ImGui::Text("Average Present Wait: %g ms (%g FPS)", presentWaitAvg, 1000.0 / presentWaitAvg);
|
||||||
|
ImGui::Text("Average Swap Chain Acquire: %g ms (%g FPS)", swapChainAcquireAvg, 1000.0 / swapChainAcquireAvg);
|
||||||
|
|
||||||
ImGui::NewLine();
|
ImGui::NewLine();
|
||||||
|
|
||||||
O1HeapDiagnostics diagnostics, physicalDiagnostics;
|
O1HeapDiagnostics diagnostics, physicalDiagnostics;
|
||||||
|
|
@ -2240,6 +2282,7 @@ static void DrawProfiler()
|
||||||
|
|
||||||
ImGui::Text("Heap Allocated: %d MB", int32_t(diagnostics.allocated / (1024 * 1024)));
|
ImGui::Text("Heap Allocated: %d MB", int32_t(diagnostics.allocated / (1024 * 1024)));
|
||||||
ImGui::Text("Physical Heap Allocated: %d MB", int32_t(physicalDiagnostics.allocated / (1024 * 1024)));
|
ImGui::Text("Physical Heap Allocated: %d MB", int32_t(physicalDiagnostics.allocated / (1024 * 1024)));
|
||||||
|
ImGui::Text("GPU Waits: %d", int32_t(g_waitForGPUCount));
|
||||||
ImGui::NewLine();
|
ImGui::NewLine();
|
||||||
|
|
||||||
ImGui::Text("Present Wait: %s", g_capabilities.presentWait ? "Supported" : "Unsupported");
|
ImGui::Text("Present Wait: %s", g_capabilities.presentWait ? "Supported" : "Unsupported");
|
||||||
|
|
@ -2509,7 +2552,11 @@ void Video::WaitOnSwapChain()
|
||||||
if (g_pendingWaitOnSwapChain)
|
if (g_pendingWaitOnSwapChain)
|
||||||
{
|
{
|
||||||
if (g_swapChainValid)
|
if (g_swapChainValid)
|
||||||
|
{
|
||||||
|
g_presentWaitProfiler.Begin();
|
||||||
g_swapChain->wait();
|
g_swapChain->wait();
|
||||||
|
g_presentWaitProfiler.End();
|
||||||
|
}
|
||||||
|
|
||||||
g_pendingWaitOnSwapChain = false;
|
g_pendingWaitOnSwapChain = false;
|
||||||
}
|
}
|
||||||
|
|
@ -2542,7 +2589,11 @@ void Video::Present()
|
||||||
if (g_swapChainValid)
|
if (g_swapChainValid)
|
||||||
{
|
{
|
||||||
if (g_pendingWaitOnSwapChain)
|
if (g_pendingWaitOnSwapChain)
|
||||||
|
{
|
||||||
|
g_presentWaitProfiler.Begin();
|
||||||
g_swapChain->wait(); // Never gonna happen outside loading threads as explained above.
|
g_swapChain->wait(); // Never gonna happen outside loading threads as explained above.
|
||||||
|
g_presentWaitProfiler.End();
|
||||||
|
}
|
||||||
|
|
||||||
RenderCommandSemaphore* signalSemaphores[] = { g_renderSemaphores[g_frame].get() };
|
RenderCommandSemaphore* signalSemaphores[] = { g_renderSemaphores[g_frame].get() };
|
||||||
g_swapChainValid = g_swapChain->present(g_backBufferIndex, signalSemaphores, std::size(signalSemaphores));
|
g_swapChainValid = g_swapChain->present(g_backBufferIndex, signalSemaphores, std::size(signalSemaphores));
|
||||||
|
|
@ -2555,8 +2606,15 @@ void Video::Present()
|
||||||
|
|
||||||
if (g_commandListStates[g_frame])
|
if (g_commandListStates[g_frame])
|
||||||
{
|
{
|
||||||
|
g_frameFenceProfiler.Begin();
|
||||||
g_queue->waitForCommandFence(g_commandFences[g_frame].get());
|
g_queue->waitForCommandFence(g_commandFences[g_frame].get());
|
||||||
|
g_frameFenceProfiler.End();
|
||||||
g_commandListStates[g_frame] = false;
|
g_commandListStates[g_frame] = false;
|
||||||
|
|
||||||
|
// Update the GPU profiler with the results from the timestamps of the frame.
|
||||||
|
g_queryPools[g_frame]->queryResults();
|
||||||
|
const uint64_t *frameTimestamps = g_queryPools[g_frame]->getResults();
|
||||||
|
g_gpuFrameProfiler.Set(double(frameTimestamps[1] - frameTimestamps[0]) / 1000000.0);
|
||||||
}
|
}
|
||||||
|
|
||||||
g_dirtyStates = DirtyStates(true);
|
g_dirtyStates = DirtyStates(true);
|
||||||
|
|
@ -2691,6 +2749,7 @@ static void ProcExecuteCommandList(const RenderCommand& cmd)
|
||||||
}
|
}
|
||||||
|
|
||||||
auto &commandList = g_commandLists[g_frame];
|
auto &commandList = g_commandLists[g_frame];
|
||||||
|
commandList->writeTimestamp(g_queryPools[g_frame].get(), 1);
|
||||||
commandList->end();
|
commandList->end();
|
||||||
|
|
||||||
if (g_swapChainValid)
|
if (g_swapChainValid)
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue