mirror of
https://github.com/KartKrewDev/RingRacers.git
synced 2025-10-30 08:01:28 +00:00
Merge branch 'update-tracy' into 'master'
Update tracy client to latest version See merge request KartKrew/Kart!1877
This commit is contained in:
commit
b254ce51c0
26 changed files with 1316 additions and 817 deletions
2
thirdparty/tracy/CMakeLists.txt
vendored
2
thirdparty/tracy/CMakeLists.txt
vendored
|
|
@ -1,4 +1,4 @@
|
|||
# Tracy Profiler Client 0.9.1
|
||||
# Tracy Profiler Client 0.10.0
|
||||
# BSD 3-clause
|
||||
# Copyright (c) 2017-2023, Bartosz Taudul <wolf@nereid.pl>
|
||||
|
||||
|
|
|
|||
|
|
@ -22,6 +22,7 @@
|
|||
#include "common/tracy_lz4.cpp"
|
||||
#include "client/TracyProfiler.cpp"
|
||||
#include "client/TracyCallstack.cpp"
|
||||
#include "client/TracySysPower.cpp"
|
||||
#include "client/TracySysTime.cpp"
|
||||
#include "client/TracySysTrace.cpp"
|
||||
#include "common/TracySocket.cpp"
|
||||
|
|
|
|||
|
|
@ -686,7 +686,9 @@ void InitCallstackCritical()
|
|||
void InitCallstack()
|
||||
{
|
||||
cb_bts = backtrace_create_state( nullptr, 0, nullptr, nullptr );
|
||||
#ifndef TRACY_DEMANGLE
|
||||
___tracy_init_demangle_buffer();
|
||||
#endif
|
||||
|
||||
#ifdef __linux
|
||||
InitKernelSymbols();
|
||||
|
|
@ -761,7 +763,9 @@ debuginfod_client* GetDebuginfodClient()
|
|||
|
||||
void EndCallstack()
|
||||
{
|
||||
#ifndef TRACY_DEMANGLE
|
||||
___tracy_free_demangle_buffer();
|
||||
#endif
|
||||
#ifdef TRACY_DEBUGINFOD
|
||||
ClearDebugInfoVector( s_di_known );
|
||||
debuginfod_end( s_debuginfod );
|
||||
|
|
|
|||
|
|
@ -21,7 +21,7 @@ public:
|
|||
, m_active( false )
|
||||
#endif
|
||||
{
|
||||
assert( m_id != std::numeric_limits<uint32_t>::max() );
|
||||
assert( m_id != (std::numeric_limits<uint32_t>::max)() );
|
||||
|
||||
auto item = Profiler::QueueSerial();
|
||||
MemWrite( &item->hdr.type, QueueType::LockAnnounce );
|
||||
|
|
@ -154,7 +154,7 @@ public:
|
|||
|
||||
tracy_force_inline void CustomName( const char* name, size_t size )
|
||||
{
|
||||
assert( size < std::numeric_limits<uint16_t>::max() );
|
||||
assert( size < (std::numeric_limits<uint16_t>::max)() );
|
||||
auto ptr = (char*)tracy_malloc( size );
|
||||
memcpy( ptr, name, size );
|
||||
auto item = Profiler::QueueSerial();
|
||||
|
|
@ -235,7 +235,7 @@ public:
|
|||
, m_active( false )
|
||||
#endif
|
||||
{
|
||||
assert( m_id != std::numeric_limits<uint32_t>::max() );
|
||||
assert( m_id != (std::numeric_limits<uint32_t>::max)() );
|
||||
|
||||
auto item = Profiler::QueueSerial();
|
||||
MemWrite( &item->hdr.type, QueueType::LockAnnounce );
|
||||
|
|
@ -450,7 +450,7 @@ public:
|
|||
|
||||
tracy_force_inline void CustomName( const char* name, size_t size )
|
||||
{
|
||||
assert( size < std::numeric_limits<uint16_t>::max() );
|
||||
assert( size < (std::numeric_limits<uint16_t>::max)() );
|
||||
auto ptr = (char*)tracy_malloc( size );
|
||||
memcpy( ptr, name, size );
|
||||
auto item = Profiler::QueueSerial();
|
||||
|
|
|
|||
|
|
@ -83,7 +83,9 @@
|
|||
#endif
|
||||
|
||||
#ifdef __APPLE__
|
||||
# define TRACY_DELAYED_INIT
|
||||
# ifndef TRACY_DELAYED_INIT
|
||||
# define TRACY_DELAYED_INIT
|
||||
# endif
|
||||
#else
|
||||
# ifdef __GNUC__
|
||||
# define init_order( val ) __attribute__ ((init_priority(val)))
|
||||
|
|
@ -1072,7 +1074,9 @@ static void CrashHandler( int signal, siginfo_t* info, void* /*ucontext*/ )
|
|||
}
|
||||
closedir( dp );
|
||||
|
||||
#ifdef TRACY_HAS_CALLSTACK
|
||||
if( selfTid == s_symbolTid ) s_symbolThreadGone.store( true, std::memory_order_release );
|
||||
#endif
|
||||
|
||||
TracyLfqPrepare( QueueType::Crash );
|
||||
TracyLfqCommit;
|
||||
|
|
@ -1353,6 +1357,7 @@ Profiler::Profiler()
|
|||
, m_queryImage( nullptr )
|
||||
, m_queryData( nullptr )
|
||||
, m_crashHandlerInstalled( false )
|
||||
, m_programName( nullptr )
|
||||
{
|
||||
assert( !s_instance );
|
||||
s_instance = this;
|
||||
|
|
@ -1711,6 +1716,9 @@ void Profiler::Worker()
|
|||
if( m_sock ) break;
|
||||
#ifndef TRACY_ON_DEMAND
|
||||
ProcessSysTime();
|
||||
# ifdef TRACY_HAS_SYSPOWER
|
||||
m_sysPower.Tick();
|
||||
# endif
|
||||
#endif
|
||||
|
||||
if( m_broadcast )
|
||||
|
|
@ -1718,6 +1726,14 @@ void Profiler::Worker()
|
|||
const auto t = std::chrono::high_resolution_clock::now().time_since_epoch().count();
|
||||
if( t - lastBroadcast > 3000000000 ) // 3s
|
||||
{
|
||||
m_programNameLock.lock();
|
||||
if( m_programName )
|
||||
{
|
||||
broadcastMsg = GetBroadcastMessage( m_programName, strlen( m_programName ), broadcastLen, dataPort );
|
||||
m_programName = nullptr;
|
||||
}
|
||||
m_programNameLock.unlock();
|
||||
|
||||
lastBroadcast = t;
|
||||
const auto ts = std::chrono::duration_cast<std::chrono::seconds>( std::chrono::system_clock::now().time_since_epoch() ).count();
|
||||
broadcastMsg.activeTime = int32_t( ts - m_epoch );
|
||||
|
|
@ -1828,6 +1844,9 @@ void Profiler::Worker()
|
|||
for(;;)
|
||||
{
|
||||
ProcessSysTime();
|
||||
#ifdef TRACY_HAS_SYSPOWER
|
||||
m_sysPower.Tick();
|
||||
#endif
|
||||
const auto status = Dequeue( token );
|
||||
const auto serialStatus = DequeueSerial();
|
||||
if( status == DequeueStatus::ConnectionLost || serialStatus == DequeueStatus::ConnectionLost )
|
||||
|
|
@ -4149,6 +4168,7 @@ TRACY_API void ___tracy_emit_frame_image( const void* image, uint16_t w, uint16_
|
|||
TRACY_API void ___tracy_emit_plot( const char* name, double val ) { tracy::Profiler::PlotData( name, val ); }
|
||||
TRACY_API void ___tracy_emit_plot_float( const char* name, float val ) { tracy::Profiler::PlotData( name, val ); }
|
||||
TRACY_API void ___tracy_emit_plot_int( const char* name, int64_t val ) { tracy::Profiler::PlotData( name, val ); }
|
||||
TRACY_API void ___tracy_emit_plot_config( const char* name, int type, int step, int fill, uint32_t color ) { tracy::Profiler::ConfigurePlot( name, tracy::PlotFormatType(type), step, fill, color ); }
|
||||
TRACY_API void ___tracy_emit_message( const char* txt, size_t size, int callstack ) { tracy::Profiler::Message( txt, size, callstack ); }
|
||||
TRACY_API void ___tracy_emit_messageL( const char* txt, int callstack ) { tracy::Profiler::Message( txt, callstack ); }
|
||||
TRACY_API void ___tracy_emit_messageC( const char* txt, size_t size, uint32_t color, int callstack ) { tracy::Profiler::MessageColor( txt, size, color, callstack ); }
|
||||
|
|
@ -4167,7 +4187,7 @@ TRACY_API void ___tracy_emit_gpu_zone_begin( const struct ___tracy_gpu_zone_begi
|
|||
{
|
||||
TracyLfqPrepareC( tracy::QueueType::GpuZoneBegin );
|
||||
tracy::MemWrite( &item->gpuZoneBegin.cpuTime, tracy::Profiler::GetTime() );
|
||||
tracy::MemWrite( &item->gpuNewContext.thread, tracy::GetThreadHandle() );
|
||||
tracy::MemWrite( &item->gpuZoneBegin.thread, tracy::GetThreadHandle() );
|
||||
tracy::MemWrite( &item->gpuZoneBegin.srcloc, data.srcloc );
|
||||
tracy::MemWrite( &item->gpuZoneBegin.queryId, data.queryId );
|
||||
tracy::MemWrite( &item->gpuZoneBegin.context, data.context );
|
||||
|
|
@ -4190,7 +4210,7 @@ TRACY_API void ___tracy_emit_gpu_zone_begin_alloc( const struct ___tracy_gpu_zon
|
|||
{
|
||||
TracyLfqPrepareC( tracy::QueueType::GpuZoneBeginAllocSrcLoc );
|
||||
tracy::MemWrite( &item->gpuZoneBegin.cpuTime, tracy::Profiler::GetTime() );
|
||||
tracy::MemWrite( &item->gpuNewContext.thread, tracy::GetThreadHandle() );
|
||||
tracy::MemWrite( &item->gpuZoneBegin.thread, tracy::GetThreadHandle() );
|
||||
tracy::MemWrite( &item->gpuZoneBegin.srcloc, data.srcloc );
|
||||
tracy::MemWrite( &item->gpuZoneBegin.queryId, data.queryId );
|
||||
tracy::MemWrite( &item->gpuZoneBegin.context, data.context );
|
||||
|
|
@ -4202,7 +4222,7 @@ TRACY_API void ___tracy_emit_gpu_zone_begin_alloc_callstack( const struct ___tra
|
|||
tracy::GetProfiler().SendCallstack( data.depth );
|
||||
TracyLfqPrepareC( tracy::QueueType::GpuZoneBeginAllocSrcLocCallstack );
|
||||
tracy::MemWrite( &item->gpuZoneBegin.cpuTime, tracy::Profiler::GetTime() );
|
||||
tracy::MemWrite( &item->gpuNewContext.thread, tracy::GetThreadHandle() );
|
||||
tracy::MemWrite( &item->gpuZoneBegin.thread, tracy::GetThreadHandle() );
|
||||
tracy::MemWrite( &item->gpuZoneBegin.srcloc, data.srcloc );
|
||||
tracy::MemWrite( &item->gpuZoneBegin.queryId, data.queryId );
|
||||
tracy::MemWrite( &item->gpuZoneBegin.context, data.context );
|
||||
|
|
@ -4292,7 +4312,7 @@ TRACY_API void ___tracy_emit_gpu_zone_begin_alloc_serial( const struct ___tracy_
|
|||
auto item = tracy::Profiler::QueueSerial();
|
||||
tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuZoneBeginAllocSrcLocSerial );
|
||||
tracy::MemWrite( &item->gpuZoneBegin.cpuTime, tracy::Profiler::GetTime() );
|
||||
tracy::MemWrite( &item->gpuNewContext.thread, tracy::GetThreadHandle() );
|
||||
tracy::MemWrite( &item->gpuZoneBegin.thread, tracy::GetThreadHandle() );
|
||||
tracy::MemWrite( &item->gpuZoneBegin.srcloc, data.srcloc );
|
||||
tracy::MemWrite( &item->gpuZoneBegin.queryId, data.queryId );
|
||||
tracy::MemWrite( &item->gpuZoneBegin.context, data.context );
|
||||
|
|
@ -4304,7 +4324,7 @@ TRACY_API void ___tracy_emit_gpu_zone_begin_alloc_callstack_serial( const struct
|
|||
auto item = tracy::Profiler::QueueSerialCallstack( tracy::Callstack( data.depth ) );
|
||||
tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuZoneBeginAllocSrcLocCallstackSerial );
|
||||
tracy::MemWrite( &item->gpuZoneBegin.cpuTime, tracy::Profiler::GetTime() );
|
||||
tracy::MemWrite( &item->gpuNewContext.thread, tracy::GetThreadHandle() );
|
||||
tracy::MemWrite( &item->gpuZoneBegin.thread, tracy::GetThreadHandle() );
|
||||
tracy::MemWrite( &item->gpuZoneBegin.srcloc, data.srcloc );
|
||||
tracy::MemWrite( &item->gpuZoneBegin.queryId, data.queryId );
|
||||
tracy::MemWrite( &item->gpuZoneBegin.context, data.context );
|
||||
|
|
|
|||
|
|
@ -10,6 +10,7 @@
|
|||
#include "tracy_concurrentqueue.h"
|
||||
#include "tracy_SPSCQueue.h"
|
||||
#include "TracyCallstack.hpp"
|
||||
#include "TracySysPower.hpp"
|
||||
#include "TracySysTime.hpp"
|
||||
#include "TracyFastVector.hpp"
|
||||
#include "../common/TracyQueue.hpp"
|
||||
|
|
@ -208,7 +209,22 @@ public:
|
|||
if( HardwareSupportsInvariantTSC() )
|
||||
{
|
||||
uint64_t rax, rdx;
|
||||
#ifdef TRACY_PATCHABLE_NOPSLEDS
|
||||
// Some external tooling (such as rr) wants to patch our rdtsc and replace it by a
|
||||
// branch to control the external input seen by a program. This kind of patching is
|
||||
// not generally possible depending on the surrounding code and can lead to significant
|
||||
// slowdowns if the compiler generated unlucky code and rr and tracy are used together.
|
||||
// To avoid this, use the rr-safe `nopl 0(%rax, %rax, 1); rdtsc` instruction sequence,
|
||||
// which rr promises will be patchable independent of the surrounding code.
|
||||
asm volatile (
|
||||
// This is nopl 0(%rax, %rax, 1), but assemblers are inconsistent about whether
|
||||
// they emit that as a 4 or 5 byte sequence and we need to be guaranteed to use
|
||||
// the 5 byte one.
|
||||
".byte 0x0f, 0x1f, 0x44, 0x00, 0x00\n\t"
|
||||
"rdtsc" : "=a" (rax), "=d" (rdx) );
|
||||
#else
|
||||
asm volatile ( "rdtsc" : "=a" (rax), "=d" (rdx) );
|
||||
#endif
|
||||
return (int64_t)(( rdx << 32 ) + rax);
|
||||
}
|
||||
# else
|
||||
|
|
@ -288,7 +304,7 @@ public:
|
|||
{
|
||||
#ifndef TRACY_NO_FRAME_IMAGE
|
||||
auto& profiler = GetProfiler();
|
||||
assert( profiler.m_frameCount.load( std::memory_order_relaxed ) < std::numeric_limits<uint32_t>::max() );
|
||||
assert( profiler.m_frameCount.load( std::memory_order_relaxed ) < (std::numeric_limits<uint32_t>::max)() );
|
||||
# ifdef TRACY_ON_DEMAND
|
||||
if( !profiler.IsConnected() ) return;
|
||||
# endif
|
||||
|
|
@ -305,6 +321,12 @@ public:
|
|||
fi->flip = flip;
|
||||
profiler.m_fiQueue.commit_next();
|
||||
profiler.m_fiLock.unlock();
|
||||
#else
|
||||
static_cast<void>(image); // unused
|
||||
static_cast<void>(w); // unused
|
||||
static_cast<void>(h); // unused
|
||||
static_cast<void>(offset); // unused
|
||||
static_cast<void>(flip); // unused
|
||||
#endif
|
||||
}
|
||||
|
||||
|
|
@ -362,7 +384,7 @@ public:
|
|||
|
||||
static tracy_force_inline void Message( const char* txt, size_t size, int callstack )
|
||||
{
|
||||
assert( size < std::numeric_limits<uint16_t>::max() );
|
||||
assert( size < (std::numeric_limits<uint16_t>::max)() );
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
if( !GetProfiler().IsConnected() ) return;
|
||||
#endif
|
||||
|
|
@ -399,7 +421,7 @@ public:
|
|||
|
||||
static tracy_force_inline void MessageColor( const char* txt, size_t size, uint32_t color, int callstack )
|
||||
{
|
||||
assert( size < std::numeric_limits<uint16_t>::max() );
|
||||
assert( size < (std::numeric_limits<uint16_t>::max)() );
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
if( !GetProfiler().IsConnected() ) return;
|
||||
#endif
|
||||
|
|
@ -442,7 +464,7 @@ public:
|
|||
|
||||
static tracy_force_inline void MessageAppInfo( const char* txt, size_t size )
|
||||
{
|
||||
assert( size < std::numeric_limits<uint16_t>::max() );
|
||||
assert( size < (std::numeric_limits<uint16_t>::max)() );
|
||||
auto ptr = (char*)tracy_malloc( size );
|
||||
memcpy( ptr, txt, size );
|
||||
TracyLfqPrepare( QueueType::MessageAppInfo );
|
||||
|
|
@ -676,6 +698,13 @@ public:
|
|||
return m_isConnected.load( std::memory_order_acquire );
|
||||
}
|
||||
|
||||
tracy_force_inline void SetProgramName( const char* name )
|
||||
{
|
||||
m_programNameLock.lock();
|
||||
m_programName = name;
|
||||
m_programNameLock.unlock();
|
||||
}
|
||||
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
tracy_force_inline uint64_t ConnectionId() const
|
||||
{
|
||||
|
|
@ -730,7 +759,7 @@ public:
|
|||
static tracy_force_inline uint64_t AllocSourceLocation( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz )
|
||||
{
|
||||
const auto sz32 = uint32_t( 2 + 4 + 4 + functionSz + 1 + sourceSz + 1 + nameSz );
|
||||
assert( sz32 <= std::numeric_limits<uint16_t>::max() );
|
||||
assert( sz32 <= (std::numeric_limits<uint16_t>::max)() );
|
||||
const auto sz = uint16_t( sz32 );
|
||||
auto ptr = (char*)tracy_malloc( sz );
|
||||
memcpy( ptr, &sz, 2 );
|
||||
|
|
@ -941,6 +970,10 @@ private:
|
|||
void ProcessSysTime() {}
|
||||
#endif
|
||||
|
||||
#ifdef TRACY_HAS_SYSPOWER
|
||||
SysPower m_sysPower;
|
||||
#endif
|
||||
|
||||
ParameterCallback m_paramCallback;
|
||||
void* m_paramCallbackData;
|
||||
SourceContentsCallback m_sourceCallback;
|
||||
|
|
@ -959,6 +992,9 @@ private:
|
|||
} m_prevSignal;
|
||||
#endif
|
||||
bool m_crashHandlerInstalled;
|
||||
|
||||
const char* m_programName;
|
||||
TracyMutex m_programNameLock;
|
||||
};
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -108,7 +108,7 @@ public:
|
|||
|
||||
tracy_force_inline void Text( const char* txt, size_t size )
|
||||
{
|
||||
assert( size < std::numeric_limits<uint16_t>::max() );
|
||||
assert( size < (std::numeric_limits<uint16_t>::max)() );
|
||||
if( !m_active ) return;
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
if( GetProfiler().ConnectionId() != m_connectionId ) return;
|
||||
|
|
@ -123,7 +123,7 @@ public:
|
|||
|
||||
tracy_force_inline void Name( const char* txt, size_t size )
|
||||
{
|
||||
assert( size < std::numeric_limits<uint16_t>::max() );
|
||||
assert( size < (std::numeric_limits<uint16_t>::max)() );
|
||||
if( !m_active ) return;
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
if( GetProfiler().ConnectionId() != m_connectionId ) return;
|
||||
|
|
|
|||
164
thirdparty/tracy/include/tracy/client/TracySysPower.cpp
vendored
Normal file
164
thirdparty/tracy/include/tracy/client/TracySysPower.cpp
vendored
Normal file
|
|
@ -0,0 +1,164 @@
|
|||
#include "TracySysPower.hpp"
|
||||
|
||||
#ifdef TRACY_HAS_SYSPOWER
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <dirent.h>
|
||||
#include <chrono>
|
||||
#include <inttypes.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "TracyDebug.hpp"
|
||||
#include "TracyProfiler.hpp"
|
||||
#include "../common/TracyAlloc.hpp"
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
SysPower::SysPower()
|
||||
: m_domains( 4 )
|
||||
, m_lastTime( 0 )
|
||||
{
|
||||
ScanDirectory( "/sys/devices/virtual/powercap/intel-rapl", -1 );
|
||||
}
|
||||
|
||||
SysPower::~SysPower()
|
||||
{
|
||||
for( auto& v : m_domains )
|
||||
{
|
||||
fclose( v.handle );
|
||||
// Do not release v.name, as it may be still needed
|
||||
}
|
||||
}
|
||||
|
||||
void SysPower::Tick()
|
||||
{
|
||||
auto t = std::chrono::high_resolution_clock::now().time_since_epoch().count();
|
||||
if( t - m_lastTime > 10000000 ) // 10 ms
|
||||
{
|
||||
m_lastTime = t;
|
||||
for( auto& v : m_domains )
|
||||
{
|
||||
char tmp[32];
|
||||
if( fread( tmp, 1, 32, v.handle ) > 0 )
|
||||
{
|
||||
rewind( v.handle );
|
||||
auto p = (uint64_t)atoll( tmp );
|
||||
uint64_t delta;
|
||||
if( p >= v.value )
|
||||
{
|
||||
delta = p - v.value;
|
||||
}
|
||||
else
|
||||
{
|
||||
delta = v.overflow - v.value + p;
|
||||
}
|
||||
v.value = p;
|
||||
|
||||
TracyLfqPrepare( QueueType::SysPowerReport );
|
||||
MemWrite( &item->sysPower.time, Profiler::GetTime() );
|
||||
MemWrite( &item->sysPower.delta, delta );
|
||||
MemWrite( &item->sysPower.name, (uint64_t)v.name );
|
||||
TracyLfqCommit;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void SysPower::ScanDirectory( const char* path, int parent )
|
||||
{
|
||||
DIR* dir = opendir( path );
|
||||
if( !dir ) return;
|
||||
struct dirent* ent;
|
||||
uint64_t maxRange = 0;
|
||||
char* name = nullptr;
|
||||
FILE* handle = nullptr;
|
||||
while( ( ent = readdir( dir ) ) )
|
||||
{
|
||||
if( ent->d_type == DT_REG )
|
||||
{
|
||||
if( strcmp( ent->d_name, "max_energy_range_uj" ) == 0 )
|
||||
{
|
||||
char tmp[PATH_MAX];
|
||||
snprintf( tmp, PATH_MAX, "%s/max_energy_range_uj", path );
|
||||
FILE* f = fopen( tmp, "r" );
|
||||
if( f )
|
||||
{
|
||||
fscanf( f, "%" PRIu64, &maxRange );
|
||||
fclose( f );
|
||||
}
|
||||
}
|
||||
else if( strcmp( ent->d_name, "name" ) == 0 )
|
||||
{
|
||||
char tmp[PATH_MAX];
|
||||
snprintf( tmp, PATH_MAX, "%s/name", path );
|
||||
FILE* f = fopen( tmp, "r" );
|
||||
if( f )
|
||||
{
|
||||
char ntmp[128];
|
||||
if( fgets( ntmp, 128, f ) )
|
||||
{
|
||||
// Last character is newline, skip it
|
||||
const auto sz = strlen( ntmp ) - 1;
|
||||
if( parent < 0 )
|
||||
{
|
||||
name = (char*)tracy_malloc( sz + 1 );
|
||||
memcpy( name, ntmp, sz );
|
||||
name[sz] = '\0';
|
||||
}
|
||||
else
|
||||
{
|
||||
const auto p = m_domains[parent];
|
||||
const auto psz = strlen( p.name );
|
||||
name = (char*)tracy_malloc( psz + sz + 2 );
|
||||
memcpy( name, p.name, psz );
|
||||
name[psz] = ':';
|
||||
memcpy( name+psz+1, ntmp, sz );
|
||||
name[psz+sz+1] = '\0';
|
||||
}
|
||||
}
|
||||
fclose( f );
|
||||
}
|
||||
}
|
||||
else if( strcmp( ent->d_name, "energy_uj" ) == 0 )
|
||||
{
|
||||
char tmp[PATH_MAX];
|
||||
snprintf( tmp, PATH_MAX, "%s/energy_uj", path );
|
||||
handle = fopen( tmp, "r" );
|
||||
}
|
||||
}
|
||||
if( name && handle && maxRange > 0 ) break;
|
||||
}
|
||||
if( name && handle && maxRange > 0 )
|
||||
{
|
||||
parent = (int)m_domains.size();
|
||||
Domain* domain = m_domains.push_next();
|
||||
domain->value = 0;
|
||||
domain->overflow = maxRange;
|
||||
domain->handle = handle;
|
||||
domain->name = name;
|
||||
TracyDebug( "Power domain id %i, %s found at %s\n", parent, name, path );
|
||||
}
|
||||
else
|
||||
{
|
||||
if( name ) tracy_free( name );
|
||||
if( handle ) fclose( handle );
|
||||
}
|
||||
|
||||
rewinddir( dir );
|
||||
while( ( ent = readdir( dir ) ) )
|
||||
{
|
||||
if( ent->d_type == DT_DIR && strncmp( ent->d_name, "intel-rapl:", 11 ) == 0 )
|
||||
{
|
||||
char tmp[PATH_MAX];
|
||||
snprintf( tmp, PATH_MAX, "%s/%s", path, ent->d_name );
|
||||
ScanDirectory( tmp, parent );
|
||||
}
|
||||
}
|
||||
closedir( dir );
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
44
thirdparty/tracy/include/tracy/client/TracySysPower.hpp
vendored
Normal file
44
thirdparty/tracy/include/tracy/client/TracySysPower.hpp
vendored
Normal file
|
|
@ -0,0 +1,44 @@
|
|||
#ifndef __TRACYSYSPOWER_HPP__
|
||||
#define __TRACYSYSPOWER_HPP__
|
||||
|
||||
#if defined __linux__
|
||||
# define TRACY_HAS_SYSPOWER
|
||||
#endif
|
||||
|
||||
#ifdef TRACY_HAS_SYSPOWER
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include "TracyFastVector.hpp"
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
class SysPower
|
||||
{
|
||||
struct Domain
|
||||
{
|
||||
uint64_t value;
|
||||
uint64_t overflow;
|
||||
FILE* handle;
|
||||
const char* name;
|
||||
};
|
||||
|
||||
public:
|
||||
SysPower();
|
||||
~SysPower();
|
||||
|
||||
void Tick();
|
||||
|
||||
private:
|
||||
void ScanDirectory( const char* path, int parent );
|
||||
|
||||
FastVector<Domain> m_domains;
|
||||
uint64_t m_lastTime;
|
||||
};
|
||||
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
|
@ -770,6 +770,13 @@ bool SysTraceStart( int64_t& samplingPeriod )
|
|||
TracyDebug( "sched_wakeup id: %i\n", wakeupId );
|
||||
TracyDebug( "drm_vblank_event id: %i\n", vsyncId );
|
||||
|
||||
#ifdef TRACY_NO_SAMPLING
|
||||
const bool noSoftwareSampling = true;
|
||||
#else
|
||||
const char* noSoftwareSamplingEnv = GetEnvVar( "TRACY_NO_SAMPLING" );
|
||||
const bool noSoftwareSampling = noSoftwareSamplingEnv && noSoftwareSamplingEnv[0] == '1';
|
||||
#endif
|
||||
|
||||
#ifdef TRACY_NO_SAMPLE_RETIREMENT
|
||||
const bool noRetirement = true;
|
||||
#else
|
||||
|
|
@ -839,28 +846,31 @@ bool SysTraceStart( int64_t& samplingPeriod )
|
|||
pe.clockid = CLOCK_MONOTONIC_RAW;
|
||||
#endif
|
||||
|
||||
TracyDebug( "Setup software sampling\n" );
|
||||
ProbePreciseIp( pe, currentPid );
|
||||
for( int i=0; i<s_numCpus; i++ )
|
||||
if( !noSoftwareSampling )
|
||||
{
|
||||
int fd = perf_event_open( &pe, currentPid, i, -1, PERF_FLAG_FD_CLOEXEC );
|
||||
if( fd == -1 )
|
||||
TracyDebug( "Setup software sampling\n" );
|
||||
ProbePreciseIp( pe, currentPid );
|
||||
for( int i=0; i<s_numCpus; i++ )
|
||||
{
|
||||
pe.exclude_kernel = 1;
|
||||
ProbePreciseIp( pe, currentPid );
|
||||
fd = perf_event_open( &pe, currentPid, i, -1, PERF_FLAG_FD_CLOEXEC );
|
||||
int fd = perf_event_open( &pe, currentPid, i, -1, PERF_FLAG_FD_CLOEXEC );
|
||||
if( fd == -1 )
|
||||
{
|
||||
TracyDebug( " Failed to setup!\n");
|
||||
break;
|
||||
pe.exclude_kernel = 1;
|
||||
ProbePreciseIp( pe, currentPid );
|
||||
fd = perf_event_open( &pe, currentPid, i, -1, PERF_FLAG_FD_CLOEXEC );
|
||||
if( fd == -1 )
|
||||
{
|
||||
TracyDebug( " Failed to setup!\n");
|
||||
break;
|
||||
}
|
||||
TracyDebug( " No access to kernel samples\n" );
|
||||
}
|
||||
new( s_ring+s_numBuffers ) RingBuffer( 64*1024, fd, EventCallstack );
|
||||
if( s_ring[s_numBuffers].IsValid() )
|
||||
{
|
||||
s_numBuffers++;
|
||||
TracyDebug( " Core %i ok\n", i );
|
||||
}
|
||||
TracyDebug( " No access to kernel samples\n" );
|
||||
}
|
||||
new( s_ring+s_numBuffers ) RingBuffer( 64*1024, fd, EventCallstack );
|
||||
if( s_ring[s_numBuffers].IsValid() )
|
||||
{
|
||||
s_numBuffers++;
|
||||
TracyDebug( " Core %i ok\n", i );
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -147,7 +147,7 @@
|
|||
# if defined(__APPLE__)
|
||||
# include <TargetConditionals.h>
|
||||
# if !TARGET_OS_IPHONE && !TARGET_OS_SIMULATOR
|
||||
# include <mach/mach_vm.h>
|
||||
# include <mach/mach.h>
|
||||
# include <mach/vm_statistics.h>
|
||||
# endif
|
||||
# include <pthread.h>
|
||||
|
|
|
|||
|
|
@ -9,14 +9,14 @@ namespace tracy
|
|||
|
||||
constexpr unsigned Lz4CompressBound( unsigned isize ) { return isize + ( isize / 255 ) + 16; }
|
||||
|
||||
enum : uint32_t { ProtocolVersion = 63 };
|
||||
enum : uint32_t { ProtocolVersion = 64 };
|
||||
enum : uint16_t { BroadcastVersion = 3 };
|
||||
|
||||
using lz4sz_t = uint32_t;
|
||||
|
||||
enum { TargetFrameSize = 256 * 1024 };
|
||||
enum { LZ4Size = Lz4CompressBound( TargetFrameSize ) };
|
||||
static_assert( LZ4Size <= std::numeric_limits<lz4sz_t>::max(), "LZ4Size greater than lz4sz_t" );
|
||||
static_assert( LZ4Size <= (std::numeric_limits<lz4sz_t>::max)(), "LZ4Size greater than lz4sz_t" );
|
||||
static_assert( TargetFrameSize * 2 >= 64 * 1024, "Not enough space for LZ4 stream buffer" );
|
||||
|
||||
enum { HandshakeShibbolethSize = 8 };
|
||||
|
|
|
|||
|
|
@ -90,6 +90,7 @@ enum class QueueType : uint8_t
|
|||
GpuNewContext,
|
||||
CallstackFrame,
|
||||
SysTimeReport,
|
||||
SysPowerReport,
|
||||
TidToPid,
|
||||
HwSampleCpuCycle,
|
||||
HwSampleInstructionRetired,
|
||||
|
|
@ -563,6 +564,13 @@ struct QueueSysTime
|
|||
float sysTime;
|
||||
};
|
||||
|
||||
struct QueueSysPower
|
||||
{
|
||||
int64_t time;
|
||||
uint64_t delta;
|
||||
uint64_t name; // ptr
|
||||
};
|
||||
|
||||
struct QueueContextSwitch
|
||||
{
|
||||
int64_t time;
|
||||
|
|
@ -729,6 +737,7 @@ struct QueueItem
|
|||
QueueCrashReport crashReport;
|
||||
QueueCrashReportThread crashReportThread;
|
||||
QueueSysTime sysTime;
|
||||
QueueSysPower sysPower;
|
||||
QueueContextSwitch contextSwitch;
|
||||
QueueThreadWakeup threadWakeup;
|
||||
QueueTidToPid tidToPid;
|
||||
|
|
@ -832,6 +841,7 @@ static constexpr size_t QueueDataSize[] = {
|
|||
sizeof( QueueHeader ) + sizeof( QueueGpuNewContext ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueCallstackFrame ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueSysTime ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueSysPower ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueTidToPid ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueHwSample ), // cpu cycle
|
||||
sizeof( QueueHeader ) + sizeof( QueueHwSample ), // instruction retired
|
||||
|
|
|
|||
|
|
@ -353,7 +353,7 @@ int Socket::Recv( void* _buf, int len, int timeout )
|
|||
}
|
||||
}
|
||||
|
||||
int Socket::ReadUpTo( void* _buf, int len, int timeout )
|
||||
int Socket::ReadUpTo( void* _buf, int len )
|
||||
{
|
||||
const auto sock = m_sock.load( std::memory_order_relaxed );
|
||||
auto buf = (char*)_buf;
|
||||
|
|
@ -678,10 +678,10 @@ bool UdpListen::Listen( uint16_t port )
|
|||
#endif
|
||||
#if defined _WIN32
|
||||
unsigned long reuse = 1;
|
||||
setsockopt( m_sock, SOL_SOCKET, SO_REUSEADDR, (const char*)&reuse, sizeof( reuse ) );
|
||||
setsockopt( sock, SOL_SOCKET, SO_REUSEADDR, (const char*)&reuse, sizeof( reuse ) );
|
||||
#else
|
||||
int reuse = 1;
|
||||
setsockopt( m_sock, SOL_SOCKET, SO_REUSEADDR, &reuse, sizeof( reuse ) );
|
||||
setsockopt( sock, SOL_SOCKET, SO_REUSEADDR, &reuse, sizeof( reuse ) );
|
||||
#endif
|
||||
#if defined _WIN32
|
||||
unsigned long broadcast = 1;
|
||||
|
|
|
|||
|
|
@ -29,7 +29,7 @@ public:
|
|||
int Send( const void* buf, int len );
|
||||
int GetSendBufSize();
|
||||
|
||||
int ReadUpTo( void* buf, int len, int timeout );
|
||||
int ReadUpTo( void* buf, int len );
|
||||
bool Read( void* buf, int len, int timeout );
|
||||
|
||||
template<typename ShouldExit>
|
||||
|
|
|
|||
|
|
@ -213,21 +213,24 @@ TRACY_API const char* GetThreadName( uint32_t id )
|
|||
# else
|
||||
static auto _GetThreadDescription = (t_GetThreadDescription)GetProcAddress( GetModuleHandleA( "kernel32.dll" ), "GetThreadDescription" );
|
||||
# endif
|
||||
if( _GetThreadDescription )
|
||||
{
|
||||
auto hnd = OpenThread( THREAD_QUERY_LIMITED_INFORMATION, FALSE, (DWORD)id );
|
||||
if( hnd != 0 )
|
||||
{
|
||||
PWSTR tmp;
|
||||
_GetThreadDescription( hnd, &tmp );
|
||||
auto ret = wcstombs( buf, tmp, 256 );
|
||||
CloseHandle( hnd );
|
||||
if( ret != 0 )
|
||||
{
|
||||
return buf;
|
||||
}
|
||||
}
|
||||
}
|
||||
if( _GetThreadDescription )
|
||||
{
|
||||
auto hnd = OpenThread( THREAD_QUERY_LIMITED_INFORMATION, FALSE, (DWORD)id );
|
||||
if( hnd != 0 )
|
||||
{
|
||||
PWSTR tmp;
|
||||
if( SUCCEEDED( _GetThreadDescription( hnd, &tmp ) ) )
|
||||
{
|
||||
auto ret = wcstombs( buf, tmp, 256 );
|
||||
CloseHandle( hnd );
|
||||
LocalFree( tmp );
|
||||
if( ret != static_cast<size_t>( -1 ) )
|
||||
{
|
||||
return buf;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
#elif defined __linux__
|
||||
int cs, fd;
|
||||
char path[32];
|
||||
|
|
|
|||
|
|
@ -6,8 +6,8 @@ namespace tracy
|
|||
namespace Version
|
||||
{
|
||||
enum { Major = 0 };
|
||||
enum { Minor = 9 };
|
||||
enum { Patch = 1 };
|
||||
enum { Minor = 10 };
|
||||
enum { Patch = 0 };
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,4 +1,8 @@
|
|||
#include <limits.h>
|
||||
#if defined(__linux__) && !defined(__GLIBC__) && !defined(__WORDSIZE)
|
||||
// include __WORDSIZE headers for musl
|
||||
# include <bits/reg.h>
|
||||
#endif
|
||||
#if __WORDSIZE == 64
|
||||
# define BACKTRACE_ELF_SIZE 64
|
||||
#else
|
||||
|
|
|
|||
|
|
@ -473,7 +473,7 @@ enum attr_val_encoding
|
|||
/* An address. */
|
||||
ATTR_VAL_ADDRESS,
|
||||
/* An index into the .debug_addr section, whose value is relative to
|
||||
* the DW_AT_addr_base attribute of the compilation unit. */
|
||||
the DW_AT_addr_base attribute of the compilation unit. */
|
||||
ATTR_VAL_ADDRESS_INDEX,
|
||||
/* A unsigned integer. */
|
||||
ATTR_VAL_UINT,
|
||||
|
|
@ -611,8 +611,8 @@ struct function
|
|||
struct function_addrs
|
||||
{
|
||||
/* Range is LOW <= PC < HIGH. */
|
||||
uint64_t low;
|
||||
uint64_t high;
|
||||
uintptr_t low;
|
||||
uintptr_t high;
|
||||
/* Function for this address range. */
|
||||
struct function *function;
|
||||
};
|
||||
|
|
@ -693,8 +693,8 @@ struct unit
|
|||
struct unit_addrs
|
||||
{
|
||||
/* Range is LOW <= PC < HIGH. */
|
||||
uint64_t low;
|
||||
uint64_t high;
|
||||
uintptr_t low;
|
||||
uintptr_t high;
|
||||
/* Compilation unit for this address range. */
|
||||
struct unit *u;
|
||||
};
|
||||
|
|
@ -1431,7 +1431,7 @@ resolve_addr_index (const struct dwarf_sections *dwarf_sections,
|
|||
uint64_t addr_base, int addrsize, int is_bigendian,
|
||||
uint64_t addr_index,
|
||||
backtrace_error_callback error_callback, void *data,
|
||||
uint64_t *address)
|
||||
uintptr_t *address)
|
||||
{
|
||||
uint64_t offset;
|
||||
struct dwarf_buf addr_buf;
|
||||
|
|
@ -1452,7 +1452,7 @@ resolve_addr_index (const struct dwarf_sections *dwarf_sections,
|
|||
addr_buf.data = data;
|
||||
addr_buf.reported_underflow = 0;
|
||||
|
||||
*address = read_address (&addr_buf, addrsize);
|
||||
*address = (uintptr_t) read_address (&addr_buf, addrsize);
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
|
@ -1531,7 +1531,7 @@ function_addrs_search (const void *vkey, const void *ventry)
|
|||
|
||||
static int
|
||||
add_unit_addr (struct backtrace_state *state, void *rdata,
|
||||
uint64_t lowpc, uint64_t highpc,
|
||||
uintptr_t lowpc, uintptr_t highpc,
|
||||
backtrace_error_callback error_callback, void *data,
|
||||
void *pvec)
|
||||
{
|
||||
|
|
@ -1867,10 +1867,10 @@ lookup_abbrev (struct abbrevs *abbrevs, uint64_t code,
|
|||
lowpc/highpc is set or ranges is set. */
|
||||
|
||||
struct pcrange {
|
||||
uint64_t lowpc; /* The low PC value. */
|
||||
uintptr_t lowpc; /* The low PC value. */
|
||||
int have_lowpc; /* Whether a low PC value was found. */
|
||||
int lowpc_is_addr_index; /* Whether lowpc is in .debug_addr. */
|
||||
uint64_t highpc; /* The high PC value. */
|
||||
uintptr_t highpc; /* The high PC value. */
|
||||
int have_highpc; /* Whether a high PC value was found. */
|
||||
int highpc_is_relative; /* Whether highpc is relative to lowpc. */
|
||||
int highpc_is_addr_index; /* Whether highpc is in .debug_addr. */
|
||||
|
|
@ -1890,12 +1890,12 @@ update_pcrange (const struct attr* attr, const struct attr_val* val,
|
|||
case DW_AT_low_pc:
|
||||
if (val->encoding == ATTR_VAL_ADDRESS)
|
||||
{
|
||||
pcrange->lowpc = val->u.uint;
|
||||
pcrange->lowpc = (uintptr_t) val->u.uint;
|
||||
pcrange->have_lowpc = 1;
|
||||
}
|
||||
else if (val->encoding == ATTR_VAL_ADDRESS_INDEX)
|
||||
{
|
||||
pcrange->lowpc = val->u.uint;
|
||||
pcrange->lowpc = (uintptr_t) val->u.uint;
|
||||
pcrange->have_lowpc = 1;
|
||||
pcrange->lowpc_is_addr_index = 1;
|
||||
}
|
||||
|
|
@ -1904,18 +1904,18 @@ update_pcrange (const struct attr* attr, const struct attr_val* val,
|
|||
case DW_AT_high_pc:
|
||||
if (val->encoding == ATTR_VAL_ADDRESS)
|
||||
{
|
||||
pcrange->highpc = val->u.uint;
|
||||
pcrange->highpc = (uintptr_t) val->u.uint;
|
||||
pcrange->have_highpc = 1;
|
||||
}
|
||||
else if (val->encoding == ATTR_VAL_UINT)
|
||||
{
|
||||
pcrange->highpc = val->u.uint;
|
||||
pcrange->highpc = (uintptr_t) val->u.uint;
|
||||
pcrange->have_highpc = 1;
|
||||
pcrange->highpc_is_relative = 1;
|
||||
}
|
||||
else if (val->encoding == ATTR_VAL_ADDRESS_INDEX)
|
||||
{
|
||||
pcrange->highpc = val->u.uint;
|
||||
pcrange->highpc = (uintptr_t) val->u.uint;
|
||||
pcrange->have_highpc = 1;
|
||||
pcrange->highpc_is_addr_index = 1;
|
||||
}
|
||||
|
|
@ -1950,16 +1950,16 @@ add_low_high_range (struct backtrace_state *state,
|
|||
uintptr_t base_address, int is_bigendian,
|
||||
struct unit *u, const struct pcrange *pcrange,
|
||||
int (*add_range) (struct backtrace_state *state,
|
||||
void *rdata, uint64_t lowpc,
|
||||
uint64_t highpc,
|
||||
void *rdata, uintptr_t lowpc,
|
||||
uintptr_t highpc,
|
||||
backtrace_error_callback error_callback,
|
||||
void *data, void *vec),
|
||||
void *rdata,
|
||||
backtrace_error_callback error_callback, void *data,
|
||||
void *vec)
|
||||
{
|
||||
uint64_t lowpc;
|
||||
uint64_t highpc;
|
||||
uintptr_t lowpc;
|
||||
uintptr_t highpc;
|
||||
|
||||
lowpc = pcrange->lowpc;
|
||||
if (pcrange->lowpc_is_addr_index)
|
||||
|
|
@ -1997,10 +1997,10 @@ add_ranges_from_ranges (
|
|||
struct backtrace_state *state,
|
||||
const struct dwarf_sections *dwarf_sections,
|
||||
uintptr_t base_address, int is_bigendian,
|
||||
struct unit *u, uint64_t base,
|
||||
struct unit *u, uintptr_t base,
|
||||
const struct pcrange *pcrange,
|
||||
int (*add_range) (struct backtrace_state *state, void *rdata,
|
||||
uint64_t lowpc, uint64_t highpc,
|
||||
uintptr_t lowpc, uintptr_t highpc,
|
||||
backtrace_error_callback error_callback, void *data,
|
||||
void *vec),
|
||||
void *rdata,
|
||||
|
|
@ -2039,12 +2039,12 @@ add_ranges_from_ranges (
|
|||
break;
|
||||
|
||||
if (is_highest_address (low, u->addrsize))
|
||||
base = high;
|
||||
base = (uintptr_t) high;
|
||||
else
|
||||
{
|
||||
if (!add_range (state, rdata,
|
||||
low + base + base_address,
|
||||
high + base + base_address,
|
||||
(uintptr_t) low + base + base_address,
|
||||
(uintptr_t) high + base + base_address,
|
||||
error_callback, data, vec))
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -2064,10 +2064,10 @@ add_ranges_from_rnglists (
|
|||
struct backtrace_state *state,
|
||||
const struct dwarf_sections *dwarf_sections,
|
||||
uintptr_t base_address, int is_bigendian,
|
||||
struct unit *u, uint64_t base,
|
||||
struct unit *u, uintptr_t base,
|
||||
const struct pcrange *pcrange,
|
||||
int (*add_range) (struct backtrace_state *state, void *rdata,
|
||||
uint64_t lowpc, uint64_t highpc,
|
||||
uintptr_t lowpc, uintptr_t highpc,
|
||||
backtrace_error_callback error_callback, void *data,
|
||||
void *vec),
|
||||
void *rdata,
|
||||
|
|
@ -2133,8 +2133,8 @@ add_ranges_from_rnglists (
|
|||
case DW_RLE_startx_endx:
|
||||
{
|
||||
uint64_t index;
|
||||
uint64_t low;
|
||||
uint64_t high;
|
||||
uintptr_t low;
|
||||
uintptr_t high;
|
||||
|
||||
index = read_uleb128 (&rnglists_buf);
|
||||
if (!resolve_addr_index (dwarf_sections, u->addr_base,
|
||||
|
|
@ -2156,8 +2156,8 @@ add_ranges_from_rnglists (
|
|||
case DW_RLE_startx_length:
|
||||
{
|
||||
uint64_t index;
|
||||
uint64_t low;
|
||||
uint64_t length;
|
||||
uintptr_t low;
|
||||
uintptr_t length;
|
||||
|
||||
index = read_uleb128 (&rnglists_buf);
|
||||
if (!resolve_addr_index (dwarf_sections, u->addr_base,
|
||||
|
|
@ -2187,16 +2187,16 @@ add_ranges_from_rnglists (
|
|||
break;
|
||||
|
||||
case DW_RLE_base_address:
|
||||
base = read_address (&rnglists_buf, u->addrsize);
|
||||
base = (uintptr_t) read_address (&rnglists_buf, u->addrsize);
|
||||
break;
|
||||
|
||||
case DW_RLE_start_end:
|
||||
{
|
||||
uint64_t low;
|
||||
uint64_t high;
|
||||
uintptr_t low;
|
||||
uintptr_t high;
|
||||
|
||||
low = read_address (&rnglists_buf, u->addrsize);
|
||||
high = read_address (&rnglists_buf, u->addrsize);
|
||||
low = (uintptr_t) read_address (&rnglists_buf, u->addrsize);
|
||||
high = (uintptr_t) read_address (&rnglists_buf, u->addrsize);
|
||||
if (!add_range (state, rdata, low + base_address,
|
||||
high + base_address, error_callback, data,
|
||||
vec))
|
||||
|
|
@ -2206,11 +2206,11 @@ add_ranges_from_rnglists (
|
|||
|
||||
case DW_RLE_start_length:
|
||||
{
|
||||
uint64_t low;
|
||||
uint64_t length;
|
||||
uintptr_t low;
|
||||
uintptr_t length;
|
||||
|
||||
low = read_address (&rnglists_buf, u->addrsize);
|
||||
length = read_uleb128 (&rnglists_buf);
|
||||
low = (uintptr_t) read_address (&rnglists_buf, u->addrsize);
|
||||
length = (uintptr_t) read_uleb128 (&rnglists_buf);
|
||||
low += base_address;
|
||||
if (!add_range (state, rdata, low, low + length,
|
||||
error_callback, data, vec))
|
||||
|
|
@ -2240,9 +2240,9 @@ static int
|
|||
add_ranges (struct backtrace_state *state,
|
||||
const struct dwarf_sections *dwarf_sections,
|
||||
uintptr_t base_address, int is_bigendian,
|
||||
struct unit *u, uint64_t base, const struct pcrange *pcrange,
|
||||
struct unit *u, uintptr_t base, const struct pcrange *pcrange,
|
||||
int (*add_range) (struct backtrace_state *state, void *rdata,
|
||||
uint64_t lowpc, uint64_t highpc,
|
||||
uintptr_t lowpc, uintptr_t highpc,
|
||||
backtrace_error_callback error_callback,
|
||||
void *data, void *vec),
|
||||
void *rdata,
|
||||
|
|
@ -3520,7 +3520,7 @@ read_referenced_name (struct dwarf_data *ddata, struct unit *u,
|
|||
|
||||
static int
|
||||
add_function_range (struct backtrace_state *state, void *rdata,
|
||||
uint64_t lowpc, uint64_t highpc,
|
||||
uintptr_t lowpc, uintptr_t highpc,
|
||||
backtrace_error_callback error_callback, void *data,
|
||||
void *pvec)
|
||||
{
|
||||
|
|
@ -3560,7 +3560,7 @@ add_function_range (struct backtrace_state *state, void *rdata,
|
|||
|
||||
static int
|
||||
read_function_entry (struct backtrace_state *state, struct dwarf_data *ddata,
|
||||
struct unit *u, uint64_t base, struct dwarf_buf *unit_buf,
|
||||
struct unit *u, uintptr_t base, struct dwarf_buf *unit_buf,
|
||||
const struct line_header *lhdr,
|
||||
backtrace_error_callback error_callback, void *data,
|
||||
struct function_vector *vec_function,
|
||||
|
|
@ -3624,7 +3624,7 @@ read_function_entry (struct backtrace_state *state, struct dwarf_data *ddata,
|
|||
&& abbrev->attrs[i].name == DW_AT_low_pc)
|
||||
{
|
||||
if (val.encoding == ATTR_VAL_ADDRESS)
|
||||
base = val.u.uint;
|
||||
base = (uintptr_t) val.u.uint;
|
||||
else if (val.encoding == ATTR_VAL_ADDRESS_INDEX)
|
||||
{
|
||||
if (!resolve_addr_index (&ddata->dwarf_sections,
|
||||
|
|
|
|||
|
|
@ -2823,18 +2823,18 @@ elf_zstd_read_fse (const unsigned char **ppin, const unsigned char *pinend,
|
|||
while ((val & 0xfff) == 0xfff)
|
||||
{
|
||||
zidx += 3 * 6;
|
||||
if (!elf_fetch_bits (&pin, pinend, &val, &bits))
|
||||
return 0;
|
||||
val >>= 12;
|
||||
bits -= 12;
|
||||
if (!elf_fetch_bits (&pin, pinend, &val, &bits))
|
||||
return 0;
|
||||
}
|
||||
while ((val & 3) == 3)
|
||||
{
|
||||
zidx += 3;
|
||||
if (!elf_fetch_bits (&pin, pinend, &val, &bits))
|
||||
return 0;
|
||||
val >>= 2;
|
||||
bits -= 2;
|
||||
if (!elf_fetch_bits (&pin, pinend, &val, &bits))
|
||||
return 0;
|
||||
}
|
||||
/* We have at least 13 bits here, don't need to fetch. */
|
||||
zidx += val & 3;
|
||||
|
|
@ -2964,7 +2964,7 @@ elf_zstd_build_fse (const int16_t *norm, int idx, uint16_t *next,
|
|||
pos = (pos + step) & mask;
|
||||
}
|
||||
}
|
||||
if (pos != 0)
|
||||
if (unlikely (pos != 0))
|
||||
{
|
||||
elf_uncompress_failed ();
|
||||
return 0;
|
||||
|
|
@ -3440,17 +3440,17 @@ static const struct elf_zstd_fse_baseline_entry elf_zstd_match_table[64] =
|
|||
|
||||
static const struct elf_zstd_fse_baseline_entry elf_zstd_offset_table[32] =
|
||||
{
|
||||
{ 1, 0, 5, 0 }, { 64, 6, 4, 0 }, { 512, 9, 5, 0 },
|
||||
{ 32768, 15, 5, 0 }, { 2097152, 21, 5, 0 }, { 8, 3, 5, 0 },
|
||||
{ 128, 7, 4, 0 }, { 4096, 12, 5, 0 }, { 262144, 18, 5, 0 },
|
||||
{ 8388608, 23, 5, 0 }, { 32, 5, 5, 0 }, { 256, 8, 4, 0 },
|
||||
{ 16384, 14, 5, 0 }, { 1048576, 20, 5, 0 }, { 4, 2, 5, 0 },
|
||||
{ 128, 7, 4, 16 }, { 2048, 11, 5, 0 }, { 131072, 17, 5, 0 },
|
||||
{ 4194304, 22, 5, 0 }, { 16, 4, 5, 0 }, { 256, 8, 4, 16 },
|
||||
{ 8192, 13, 5, 0 }, { 524288, 19, 5, 0 }, { 2, 1, 5, 0 },
|
||||
{ 64, 6, 4, 16 }, { 1024, 10, 5, 0 }, { 65536, 16, 5, 0 },
|
||||
{ 268435456, 28, 5, 0 }, { 134217728, 27, 5, 0 }, { 67108864, 26, 5, 0 },
|
||||
{ 33554432, 25, 5, 0 }, { 16777216, 24, 5, 0 },
|
||||
{ 1, 0, 5, 0 }, { 61, 6, 4, 0 }, { 509, 9, 5, 0 },
|
||||
{ 32765, 15, 5, 0 }, { 2097149, 21, 5, 0 }, { 5, 3, 5, 0 },
|
||||
{ 125, 7, 4, 0 }, { 4093, 12, 5, 0 }, { 262141, 18, 5, 0 },
|
||||
{ 8388605, 23, 5, 0 }, { 29, 5, 5, 0 }, { 253, 8, 4, 0 },
|
||||
{ 16381, 14, 5, 0 }, { 1048573, 20, 5, 0 }, { 1, 2, 5, 0 },
|
||||
{ 125, 7, 4, 16 }, { 2045, 11, 5, 0 }, { 131069, 17, 5, 0 },
|
||||
{ 4194301, 22, 5, 0 }, { 13, 4, 5, 0 }, { 253, 8, 4, 16 },
|
||||
{ 8189, 13, 5, 0 }, { 524285, 19, 5, 0 }, { 2, 1, 5, 0 },
|
||||
{ 61, 6, 4, 16 }, { 1021, 10, 5, 0 }, { 65533, 16, 5, 0 },
|
||||
{ 268435453, 28, 5, 0 }, { 134217725, 27, 5, 0 }, { 67108861, 26, 5, 0 },
|
||||
{ 33554429, 25, 5, 0 }, { 16777213, 24, 5, 0 },
|
||||
};
|
||||
|
||||
/* Read a zstd Huffman table and build the decoding table in *TABLE, reading
|
||||
|
|
@ -3635,7 +3635,7 @@ elf_zstd_read_huff (const unsigned char **ppin, const unsigned char *pinend,
|
|||
}
|
||||
|
||||
weight_mark = (uint32_t *) (weights + 256);
|
||||
memset (weight_mark, 0, 12 * sizeof (uint32_t));
|
||||
memset (weight_mark, 0, 13 * sizeof (uint32_t));
|
||||
weight_mask = 0;
|
||||
for (i = 0; i < count; ++i)
|
||||
{
|
||||
|
|
@ -3702,7 +3702,7 @@ elf_zstd_read_huff (const unsigned char **ppin, const unsigned char *pinend,
|
|||
|
||||
/* Change WEIGHT_MARK from a count of weights to the index of the first
|
||||
symbol for that weight. We shift the indexes to also store how many we
|
||||
hae seen so far, below. */
|
||||
have seen so far, below. */
|
||||
{
|
||||
uint32_t next;
|
||||
|
||||
|
|
@ -3783,7 +3783,7 @@ elf_zstd_read_literals (const unsigned char **ppin,
|
|||
{
|
||||
int raw;
|
||||
|
||||
/* Raw_literals_Block or RLE_Literals_Block */
|
||||
/* Raw_Literals_Block or RLE_Literals_Block */
|
||||
|
||||
raw = (hdr & 3) == 0;
|
||||
|
||||
|
|
@ -3965,7 +3965,7 @@ elf_zstd_read_literals (const unsigned char **ppin,
|
|||
unsigned int bits;
|
||||
uint32_t i;
|
||||
|
||||
pback = pin + compressed_size - 1;
|
||||
pback = pin + total_streams_size - 1;
|
||||
pbackend = pin;
|
||||
if (!elf_fetch_backward_init (&pback, pbackend, &val, &bits))
|
||||
return 0;
|
||||
|
|
|
|||
|
|
@ -109,6 +109,7 @@
|
|||
#define TracyParameterRegister(x,y)
|
||||
#define TracyParameterSetup(x,y,z,w)
|
||||
#define TracyIsConnected false
|
||||
#define TracySetProgramName(x)
|
||||
|
||||
#define TracyFiberEnter(x)
|
||||
#define TracyFiberLeave
|
||||
|
|
@ -270,6 +271,7 @@
|
|||
#define TracyParameterRegister( cb, data ) tracy::Profiler::ParameterRegister( cb, data )
|
||||
#define TracyParameterSetup( idx, name, isBool, val ) tracy::Profiler::ParameterSetup( idx, name, isBool, val )
|
||||
#define TracyIsConnected tracy::GetProfiler().IsConnected()
|
||||
#define TracySetProgramName( name ) tracy::GetProfiler().SetProgramName( name );
|
||||
|
||||
#ifdef TRACY_FIBERS
|
||||
# define TracyFiberEnter( fiber ) tracy::Profiler::EnterFiber( fiber )
|
||||
|
|
|
|||
12
thirdparty/tracy/include/tracy/tracy/TracyC.h
vendored
12
thirdparty/tracy/include/tracy/tracy/TracyC.h
vendored
|
|
@ -11,6 +11,14 @@
|
|||
extern "C" {
|
||||
#endif
|
||||
|
||||
enum TracyPlotFormatEnum
|
||||
{
|
||||
TracyPlotFormatNumber,
|
||||
TracyPlotFormatMemory,
|
||||
TracyPlotFormatPercentage,
|
||||
TracyPlotFormatWatt
|
||||
};
|
||||
|
||||
TRACY_API void ___tracy_set_thread_name( const char* name );
|
||||
|
||||
#define TracyCSetThreadName( name ) ___tracy_set_thread_name( name );
|
||||
|
|
@ -60,6 +68,8 @@ typedef const void* TracyCZoneCtx;
|
|||
#define TracyCPlot(x,y)
|
||||
#define TracyCPlotF(x,y)
|
||||
#define TracyCPlotI(x,y)
|
||||
#define TracyCPlotConfig(x,y,z,w,a)
|
||||
|
||||
#define TracyCMessage(x,y)
|
||||
#define TracyCMessageL(x)
|
||||
#define TracyCMessageC(x,y,z)
|
||||
|
|
@ -289,11 +299,13 @@ TRACY_API void ___tracy_emit_frame_image( const void* image, uint16_t w, uint16_
|
|||
TRACY_API void ___tracy_emit_plot( const char* name, double val );
|
||||
TRACY_API void ___tracy_emit_plot_float( const char* name, float val );
|
||||
TRACY_API void ___tracy_emit_plot_int( const char* name, int64_t val );
|
||||
TRACY_API void ___tracy_emit_plot_config( const char* name, int type, int step, int fill, uint32_t color );
|
||||
TRACY_API void ___tracy_emit_message_appinfo( const char* txt, size_t size );
|
||||
|
||||
#define TracyCPlot( name, val ) ___tracy_emit_plot( name, val );
|
||||
#define TracyCPlotF( name, val ) ___tracy_emit_plot_float( name, val );
|
||||
#define TracyCPlotI( name, val ) ___tracy_emit_plot_int( name, val );
|
||||
#define TracyCPlotConfig( name, type, step, fill, color ) ___tracy_emit_plot_config( name, type, step, fill, color );
|
||||
#define TracyCAppInfo( txt, size ) ___tracy_emit_message_appinfo( txt, size );
|
||||
|
||||
|
||||
|
|
|
|||
412
thirdparty/tracy/include/tracy/tracy/TracyD3D11.hpp
vendored
412
thirdparty/tracy/include/tracy/tracy/TracyD3D11.hpp
vendored
|
|
@ -13,13 +13,13 @@
|
|||
#define TracyD3D11ZoneC(ctx, name, color)
|
||||
#define TracyD3D11NamedZone(ctx, varname, name, active)
|
||||
#define TracyD3D11NamedZoneC(ctx, varname, name, color, active)
|
||||
#define TracyD3D12ZoneTransient(ctx, varname, name, active)
|
||||
#define TracyD3D11ZoneTransient(ctx, varname, name, active)
|
||||
|
||||
#define TracyD3D11ZoneS(ctx, name, depth)
|
||||
#define TracyD3D11ZoneCS(ctx, name, color, depth)
|
||||
#define TracyD3D11NamedZoneS(ctx, varname, name, depth, active)
|
||||
#define TracyD3D11NamedZoneCS(ctx, varname, name, color, depth, active)
|
||||
#define TracyD3D12ZoneTransientS(ctx, varname, name, depth, active)
|
||||
#define TracyD3D11ZoneTransientS(ctx, varname, name, depth, active)
|
||||
|
||||
#define TracyD3D11Collect(ctx)
|
||||
|
||||
|
|
@ -39,11 +39,12 @@ using TracyD3D11Ctx = void*;
|
|||
#include "Tracy.hpp"
|
||||
#include "../client/TracyProfiler.hpp"
|
||||
#include "../client/TracyCallstack.hpp"
|
||||
#include "../common/TracyAlign.hpp"
|
||||
#include "../common/TracyAlloc.hpp"
|
||||
#include "../common/TracyYield.hpp"
|
||||
|
||||
#include <d3d11.h>
|
||||
|
||||
#define TracyD3D11Panic(msg, ...) do { assert(false && "TracyD3D11: " msg); TracyMessageLC("TracyD3D11: " msg, tracy::Color::Red4); __VA_ARGS__; } while(false);
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
|
|
@ -51,71 +52,83 @@ class D3D11Ctx
|
|||
{
|
||||
friend class D3D11ZoneScope;
|
||||
|
||||
enum { QueryCount = 64 * 1024 };
|
||||
static constexpr uint32_t MaxQueries = 64 * 1024;
|
||||
|
||||
enum CollectMode { POLL, BLOCK };
|
||||
|
||||
public:
|
||||
D3D11Ctx( ID3D11Device* device, ID3D11DeviceContext* devicectx )
|
||||
: m_device( device )
|
||||
, m_devicectx( devicectx )
|
||||
, m_context( GetGpuCtxCounter().fetch_add( 1, std::memory_order_relaxed ) )
|
||||
, m_head( 0 )
|
||||
, m_tail( 0 )
|
||||
{
|
||||
assert( m_context != 255 );
|
||||
// TODO: consider calling ID3D11Device::GetImmediateContext() instead of passing it as an argument
|
||||
m_device = device;
|
||||
device->AddRef();
|
||||
m_immediateDevCtx = devicectx;
|
||||
devicectx->AddRef();
|
||||
|
||||
for (int i = 0; i < QueryCount; i++)
|
||||
{
|
||||
HRESULT hr = S_OK;
|
||||
D3D11_QUERY_DESC desc;
|
||||
desc.MiscFlags = 0;
|
||||
|
||||
desc.Query = D3D11_QUERY_TIMESTAMP;
|
||||
hr |= device->CreateQuery(&desc, &m_queries[i]);
|
||||
|
||||
D3D11_QUERY_DESC desc = { };
|
||||
desc.Query = D3D11_QUERY_TIMESTAMP_DISJOINT;
|
||||
hr |= device->CreateQuery(&desc, &m_disjoints[i]);
|
||||
|
||||
m_disjointMap[i] = nullptr;
|
||||
|
||||
assert(SUCCEEDED(hr));
|
||||
if (FAILED(m_device->CreateQuery(&desc, &m_disjointQuery)))
|
||||
{
|
||||
TracyD3D11Panic("unable to create disjoint timestamp query.", return);
|
||||
}
|
||||
}
|
||||
|
||||
// Force query the initial GPU timestamp (pipeline stall)
|
||||
D3D11_QUERY_DATA_TIMESTAMP_DISJOINT disjoint;
|
||||
UINT64 timestamp;
|
||||
for (ID3D11Query*& query : m_queries)
|
||||
{
|
||||
D3D11_QUERY_DESC desc = { };
|
||||
desc.Query = D3D11_QUERY_TIMESTAMP;
|
||||
if (FAILED(m_device->CreateQuery(&desc, &query)))
|
||||
{
|
||||
TracyD3D11Panic("unable to create timestamp query.", return);
|
||||
}
|
||||
}
|
||||
|
||||
// Calibrate CPU and GPU timestamps
|
||||
int64_t tcpu = 0;
|
||||
int64_t tgpu = 0;
|
||||
for (int attempts = 0; attempts < 50; attempts++)
|
||||
{
|
||||
devicectx->Begin(m_disjoints[0]);
|
||||
devicectx->End(m_queries[0]);
|
||||
devicectx->End(m_disjoints[0]);
|
||||
devicectx->Flush();
|
||||
m_immediateDevCtx->Begin(m_disjointQuery);
|
||||
m_immediateDevCtx->End(m_queries[0]);
|
||||
m_immediateDevCtx->End(m_disjointQuery);
|
||||
|
||||
while (devicectx->GetData(m_disjoints[0], &disjoint, sizeof(disjoint), 0) == S_FALSE)
|
||||
/* Nothing */;
|
||||
int64_t tcpu0 = Profiler::GetTime();
|
||||
WaitForQuery(m_disjointQuery);
|
||||
int64_t tcpu1 = Profiler::GetTime();
|
||||
|
||||
D3D11_QUERY_DATA_TIMESTAMP_DISJOINT disjoint = { };
|
||||
if (m_immediateDevCtx->GetData(m_disjointQuery, &disjoint, sizeof(disjoint), 0) != S_OK)
|
||||
{
|
||||
TracyMessageLC("TracyD3D11: unable to query GPU timestamp; retrying...", tracy::Color::Tomato);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (disjoint.Disjoint)
|
||||
continue;
|
||||
|
||||
while (devicectx->GetData(m_queries[0], ×tamp, sizeof(timestamp), 0) == S_FALSE)
|
||||
/* Nothing */;
|
||||
UINT64 timestamp = 0;
|
||||
if (m_immediateDevCtx->GetData(m_queries[0], ×tamp, sizeof(timestamp), 0) != S_OK)
|
||||
continue; // this should never happen, since the enclosing disjoint query succeeded
|
||||
|
||||
tcpu = tcpu0 + (tcpu1 - tcpu0) * 1 / 2;
|
||||
tgpu = timestamp * (1000000000 / disjoint.Frequency);
|
||||
break;
|
||||
}
|
||||
|
||||
int64_t tgpu = timestamp * (1000000000ull / disjoint.Frequency);
|
||||
int64_t tcpu = Profiler::GetTime();
|
||||
// ready to roll
|
||||
m_contextId = GetGpuCtxCounter().fetch_add(1);
|
||||
m_immediateDevCtx->Begin(m_disjointQuery);
|
||||
m_previousCheckpoint = m_nextCheckpoint = 0;
|
||||
|
||||
uint8_t flags = 0;
|
||||
|
||||
const float period = 1.f;
|
||||
auto* item = Profiler::QueueSerial();
|
||||
MemWrite( &item->hdr.type, QueueType::GpuNewContext );
|
||||
MemWrite( &item->gpuNewContext.cpuTime, tcpu );
|
||||
MemWrite( &item->gpuNewContext.gpuTime, tgpu );
|
||||
memset(&item->gpuNewContext.thread, 0, sizeof(item->gpuNewContext.thread));
|
||||
MemWrite( &item->gpuNewContext.period, period );
|
||||
MemWrite( &item->gpuNewContext.context, m_context );
|
||||
MemWrite( &item->gpuNewContext.flags, flags );
|
||||
MemWrite( &item->gpuNewContext.thread, uint32_t(0) ); // #TODO: why not GetThreadHandle()?
|
||||
MemWrite( &item->gpuNewContext.period, 1.0f );
|
||||
MemWrite( &item->gpuNewContext.context, m_contextId);
|
||||
MemWrite( &item->gpuNewContext.flags, uint8_t(0) );
|
||||
MemWrite( &item->gpuNewContext.type, GpuContextType::Direct3D11 );
|
||||
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
|
|
@ -127,12 +140,20 @@ public:
|
|||
|
||||
~D3D11Ctx()
|
||||
{
|
||||
for (int i = 0; i < QueryCount; i++)
|
||||
// collect all pending timestamps before destroying everything
|
||||
do
|
||||
{
|
||||
m_queries[i]->Release();
|
||||
m_disjoints[i]->Release();
|
||||
m_disjointMap[i] = nullptr;
|
||||
Collect(BLOCK);
|
||||
} while (m_previousCheckpoint != m_queryCounter);
|
||||
|
||||
for (ID3D11Query* query : m_queries)
|
||||
{
|
||||
query->Release();
|
||||
}
|
||||
m_immediateDevCtx->End(m_disjointQuery);
|
||||
m_disjointQuery->Release();
|
||||
m_immediateDevCtx->Release();
|
||||
m_device->Release();
|
||||
}
|
||||
|
||||
void Name( const char* name, uint16_t len )
|
||||
|
|
@ -142,7 +163,7 @@ public:
|
|||
|
||||
auto item = Profiler::QueueSerial();
|
||||
MemWrite( &item->hdr.type, QueueType::GpuContextName );
|
||||
MemWrite( &item->gpuContextNameFat.context, m_context );
|
||||
MemWrite( &item->gpuContextNameFat.context, m_contextId );
|
||||
MemWrite( &item->gpuContextNameFat.ptr, (uint64_t)ptr );
|
||||
MemWrite( &item->gpuContextNameFat.size, len );
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
|
|
@ -151,217 +172,170 @@ public:
|
|||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
void Collect()
|
||||
void Collect(CollectMode mode = POLL)
|
||||
{
|
||||
ZoneScopedC( Color::Red4 );
|
||||
|
||||
if( m_tail == m_head ) return;
|
||||
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
if( !GetProfiler().IsConnected() )
|
||||
{
|
||||
m_head = m_tail = 0;
|
||||
m_previousCheckpoint = m_nextCheckpoint = m_queryCounter;
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
auto start = m_tail;
|
||||
auto end = m_head + QueryCount;
|
||||
auto cnt = (end - start) % QueryCount;
|
||||
while (cnt > 1)
|
||||
if (m_previousCheckpoint == m_nextCheckpoint)
|
||||
{
|
||||
auto mid = start + cnt / 2;
|
||||
|
||||
bool available =
|
||||
m_devicectx->GetData(m_disjointMap[mid % QueryCount], nullptr, 0, D3D11_ASYNC_GETDATA_DONOTFLUSH) == S_OK &&
|
||||
m_devicectx->GetData(m_queries[mid % QueryCount], nullptr, 0, D3D11_ASYNC_GETDATA_DONOTFLUSH) == S_OK;
|
||||
|
||||
if (available)
|
||||
uintptr_t nextCheckpoint = m_queryCounter;
|
||||
if (nextCheckpoint == m_nextCheckpoint)
|
||||
{
|
||||
start = mid;
|
||||
return;
|
||||
}
|
||||
else
|
||||
{
|
||||
end = mid;
|
||||
}
|
||||
cnt = (end - start) % QueryCount;
|
||||
m_nextCheckpoint = nextCheckpoint;
|
||||
m_immediateDevCtx->End(m_disjointQuery);
|
||||
}
|
||||
|
||||
start %= QueryCount;
|
||||
|
||||
while (m_tail != start)
|
||||
if (mode == CollectMode::BLOCK)
|
||||
{
|
||||
D3D11_QUERY_DATA_TIMESTAMP_DISJOINT disjoint;
|
||||
UINT64 time;
|
||||
WaitForQuery(m_disjointQuery);
|
||||
}
|
||||
|
||||
m_devicectx->GetData(m_disjointMap[m_tail], &disjoint, sizeof(disjoint), 0);
|
||||
m_devicectx->GetData(m_queries[m_tail], &time, sizeof(time), 0);
|
||||
D3D11_QUERY_DATA_TIMESTAMP_DISJOINT disjoint = { };
|
||||
if (m_immediateDevCtx->GetData(m_disjointQuery, &disjoint, sizeof(disjoint), D3D11_ASYNC_GETDATA_DONOTFLUSH) != S_OK)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
time *= (1000000000ull / disjoint.Frequency);
|
||||
if (disjoint.Disjoint == TRUE)
|
||||
{
|
||||
m_previousCheckpoint = m_nextCheckpoint;
|
||||
TracyD3D11Panic("disjoint timestamps detected; dropping.");
|
||||
return;
|
||||
}
|
||||
|
||||
auto begin = m_previousCheckpoint;
|
||||
auto end = m_nextCheckpoint;
|
||||
for (auto i = begin; i != end; ++i)
|
||||
{
|
||||
uint32_t k = RingIndex(i);
|
||||
UINT64 timestamp = 0;
|
||||
if (m_immediateDevCtx->GetData(m_queries[k], ×tamp, sizeof(timestamp), 0) != S_OK)
|
||||
{
|
||||
TracyD3D11Panic("timestamp expected to be ready, but it was not!");
|
||||
break;
|
||||
}
|
||||
timestamp *= (1000000000ull / disjoint.Frequency);
|
||||
auto* item = Profiler::QueueSerial();
|
||||
MemWrite(&item->hdr.type, QueueType::GpuTime);
|
||||
MemWrite(&item->gpuTime.gpuTime, (int64_t)time);
|
||||
MemWrite(&item->gpuTime.queryId, (uint16_t)m_tail);
|
||||
MemWrite(&item->gpuTime.context, m_context);
|
||||
MemWrite(&item->gpuTime.gpuTime, static_cast<int64_t>(timestamp));
|
||||
MemWrite(&item->gpuTime.queryId, static_cast<uint16_t>(k));
|
||||
MemWrite(&item->gpuTime.context, m_contextId);
|
||||
Profiler::QueueSerialFinish();
|
||||
|
||||
m_tail = (m_tail + 1) % QueryCount;
|
||||
}
|
||||
|
||||
// disjoint timestamp queries should only be invoked once per frame or less
|
||||
// https://learn.microsoft.com/en-us/windows/win32/api/d3d11/ne-d3d11-d3d11_query
|
||||
m_immediateDevCtx->Begin(m_disjointQuery);
|
||||
m_previousCheckpoint = m_nextCheckpoint;
|
||||
}
|
||||
|
||||
private:
|
||||
tracy_force_inline unsigned int NextQueryId()
|
||||
tracy_force_inline uint32_t RingIndex(uintptr_t index)
|
||||
{
|
||||
const auto id = m_head;
|
||||
m_head = ( m_head + 1 ) % QueryCount;
|
||||
assert( m_head != m_tail );
|
||||
return id;
|
||||
index %= MaxQueries;
|
||||
return static_cast<uint32_t>(index);
|
||||
}
|
||||
|
||||
tracy_force_inline ID3D11Query* TranslateQueryId( unsigned int id )
|
||||
tracy_force_inline uint32_t RingCount(uintptr_t begin, uintptr_t end)
|
||||
{
|
||||
// wrap-around safe: all unsigned
|
||||
uintptr_t count = end - begin;
|
||||
return static_cast<uint32_t>(count);
|
||||
}
|
||||
|
||||
tracy_force_inline uint32_t NextQueryId()
|
||||
{
|
||||
auto id = m_queryCounter++;
|
||||
if (RingCount(m_previousCheckpoint, id) >= MaxQueries)
|
||||
{
|
||||
TracyD3D11Panic("too many pending timestamp queries.");
|
||||
// #TODO: return some sentinel value; ideally a "hidden" query index
|
||||
}
|
||||
return RingIndex(id);
|
||||
}
|
||||
|
||||
tracy_force_inline ID3D11Query* GetQueryObjectFromId(uint32_t id)
|
||||
{
|
||||
return m_queries[id];
|
||||
}
|
||||
|
||||
tracy_force_inline ID3D11Query* MapDisjointQueryId( unsigned int id, unsigned int disjointId )
|
||||
tracy_force_inline void WaitForQuery(ID3D11Query* query)
|
||||
{
|
||||
m_disjointMap[id] = m_disjoints[disjointId];
|
||||
return m_disjoints[disjointId];
|
||||
m_immediateDevCtx->Flush();
|
||||
while (m_immediateDevCtx->GetData(query, nullptr, 0, 0) != S_OK)
|
||||
YieldThread(); // busy-wait :-( attempt to reduce power usage with _mm_pause() & friends...
|
||||
}
|
||||
|
||||
tracy_force_inline uint8_t GetId() const
|
||||
tracy_force_inline uint8_t GetContextId() const
|
||||
{
|
||||
return m_context;
|
||||
return m_contextId;
|
||||
}
|
||||
|
||||
ID3D11Device* m_device;
|
||||
ID3D11DeviceContext* m_devicectx;
|
||||
ID3D11Device* m_device = nullptr;
|
||||
ID3D11DeviceContext* m_immediateDevCtx = nullptr;
|
||||
|
||||
ID3D11Query* m_queries[QueryCount];
|
||||
ID3D11Query* m_disjoints[QueryCount];
|
||||
ID3D11Query* m_disjointMap[QueryCount]; // Multiple time queries can have one disjoint query
|
||||
uint8_t m_context;
|
||||
ID3D11Query* m_queries[MaxQueries];
|
||||
ID3D11Query* m_disjointQuery = nullptr;
|
||||
|
||||
unsigned int m_head;
|
||||
unsigned int m_tail;
|
||||
uint8_t m_contextId = 255; // NOTE: apparently, 255 means invalid id; is this documented anywhere?
|
||||
|
||||
uintptr_t m_queryCounter = 0;
|
||||
|
||||
uintptr_t m_previousCheckpoint = 0;
|
||||
uintptr_t m_nextCheckpoint = 0;
|
||||
};
|
||||
|
||||
class D3D11ZoneScope
|
||||
{
|
||||
public:
|
||||
tracy_force_inline D3D11ZoneScope( D3D11Ctx* ctx, const SourceLocationData* srcloc, bool is_active )
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
: m_active( is_active && GetProfiler().IsConnected() )
|
||||
#else
|
||||
: m_active( is_active )
|
||||
#endif
|
||||
tracy_force_inline D3D11ZoneScope( D3D11Ctx* ctx, const SourceLocationData* srcloc, bool active )
|
||||
: D3D11ZoneScope(ctx, active)
|
||||
{
|
||||
if( !m_active ) return;
|
||||
m_ctx = ctx;
|
||||
|
||||
const auto queryId = ctx->NextQueryId();
|
||||
ctx->m_devicectx->Begin(ctx->MapDisjointQueryId(queryId, queryId));
|
||||
ctx->m_devicectx->End(ctx->TranslateQueryId(queryId));
|
||||
|
||||
m_disjointId = queryId;
|
||||
|
||||
auto* item = Profiler::QueueSerial();
|
||||
MemWrite( &item->hdr.type, QueueType::GpuZoneBeginSerial );
|
||||
MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() );
|
||||
MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)srcloc );
|
||||
MemWrite( &item->gpuZoneBegin.thread, GetThreadHandle() );
|
||||
MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) );
|
||||
MemWrite( &item->gpuZoneBegin.context, ctx->GetId() );
|
||||
|
||||
Profiler::QueueSerialFinish();
|
||||
WriteQueueItem(item, QueueType::GpuZoneBeginSerial, reinterpret_cast<uint64_t>(srcloc));
|
||||
}
|
||||
|
||||
tracy_force_inline D3D11ZoneScope( D3D11Ctx* ctx, const SourceLocationData* srcloc, int depth, bool is_active )
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
: m_active( is_active && GetProfiler().IsConnected() )
|
||||
#else
|
||||
: m_active( is_active )
|
||||
#endif
|
||||
tracy_force_inline D3D11ZoneScope( D3D11Ctx* ctx, const SourceLocationData* srcloc, int depth, bool active )
|
||||
: D3D11ZoneScope(ctx, active)
|
||||
{
|
||||
if( !m_active ) return;
|
||||
m_ctx = ctx;
|
||||
|
||||
const auto queryId = ctx->NextQueryId();
|
||||
ctx->m_devicectx->Begin(ctx->MapDisjointQueryId(queryId, queryId));
|
||||
ctx->m_devicectx->End(ctx->TranslateQueryId(queryId));
|
||||
|
||||
m_disjointId = queryId;
|
||||
|
||||
auto* item = Profiler::QueueSerial();
|
||||
MemWrite( &item->hdr.type, QueueType::GpuZoneBeginCallstackSerial );
|
||||
MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() );
|
||||
MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)srcloc );
|
||||
MemWrite( &item->gpuZoneBegin.thread, GetThreadHandle() );
|
||||
MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) );
|
||||
MemWrite( &item->gpuZoneBegin.context, ctx->GetId() );
|
||||
|
||||
Profiler::QueueSerialFinish();
|
||||
|
||||
GetProfiler().SendCallstack( depth );
|
||||
auto* item = Profiler::QueueSerialCallstack(Callstack(depth));
|
||||
WriteQueueItem(item, QueueType::GpuZoneBeginCallstackSerial, reinterpret_cast<uint64_t>(srcloc));
|
||||
}
|
||||
|
||||
tracy_force_inline D3D11ZoneScope(D3D11Ctx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, bool active)
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
: m_active(active&& GetProfiler().IsConnected())
|
||||
#else
|
||||
: m_active(active)
|
||||
#endif
|
||||
: D3D11ZoneScope(ctx, active)
|
||||
{
|
||||
if( !m_active ) return;
|
||||
m_ctx = ctx;
|
||||
|
||||
const auto queryId = ctx->NextQueryId();
|
||||
ctx->m_devicectx->Begin(ctx->MapDisjointQueryId(queryId, queryId));
|
||||
ctx->m_devicectx->End(ctx->TranslateQueryId(queryId));
|
||||
|
||||
m_disjointId = queryId;
|
||||
|
||||
const auto sourceLocation = Profiler::AllocSourceLocation(line, source, sourceSz, function, functionSz, name, nameSz);
|
||||
|
||||
auto* item = Profiler::QueueSerial();
|
||||
MemWrite(&item->hdr.type, QueueType::GpuZoneBeginAllocSrcLocSerial);
|
||||
MemWrite(&item->gpuZoneBegin.cpuTime, Profiler::GetTime());
|
||||
MemWrite(&item->gpuZoneBegin.srcloc, sourceLocation);
|
||||
MemWrite(&item->gpuZoneBegin.thread, GetThreadHandle());
|
||||
MemWrite(&item->gpuZoneBegin.queryId, static_cast<uint16_t>(queryId));
|
||||
MemWrite(&item->gpuZoneBegin.context, ctx->GetId());
|
||||
|
||||
Profiler::QueueSerialFinish();
|
||||
WriteQueueItem(item, QueueType::GpuZoneBeginAllocSrcLocSerial, sourceLocation);
|
||||
}
|
||||
|
||||
tracy_force_inline D3D11ZoneScope(D3D11Ctx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, int depth, bool active)
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
: m_active(active&& GetProfiler().IsConnected())
|
||||
#else
|
||||
: m_active(active)
|
||||
#endif
|
||||
: D3D11ZoneScope(ctx, active)
|
||||
{
|
||||
if( !m_active ) return;
|
||||
m_ctx = ctx;
|
||||
|
||||
const auto queryId = ctx->NextQueryId();
|
||||
ctx->m_devicectx->Begin(ctx->MapDisjointQueryId(queryId, queryId));
|
||||
ctx->m_devicectx->End(ctx->TranslateQueryId(queryId));
|
||||
|
||||
m_disjointId = queryId;
|
||||
|
||||
const auto sourceLocation = Profiler::AllocSourceLocation(line, source, sourceSz, function, functionSz, name, nameSz);
|
||||
|
||||
auto* item = Profiler::QueueSerialCallstack(Callstack(depth));
|
||||
MemWrite(&item->hdr.type, QueueType::GpuZoneBeginAllocSrcLocCallstackSerial);
|
||||
MemWrite(&item->gpuZoneBegin.cpuTime, Profiler::GetTime());
|
||||
MemWrite(&item->gpuZoneBegin.srcloc, sourceLocation);
|
||||
MemWrite(&item->gpuZoneBegin.thread, GetThreadHandle());
|
||||
MemWrite(&item->gpuZoneBegin.queryId, static_cast<uint16_t>(queryId));
|
||||
MemWrite(&item->gpuZoneBegin.context, ctx->GetId());
|
||||
|
||||
Profiler::QueueSerialFinish();
|
||||
WriteQueueItem(item, QueueType::GpuZoneBeginAllocSrcLocCallstackSerial, sourceLocation);
|
||||
}
|
||||
|
||||
tracy_force_inline ~D3D11ZoneScope()
|
||||
|
|
@ -369,24 +343,46 @@ public:
|
|||
if( !m_active ) return;
|
||||
|
||||
const auto queryId = m_ctx->NextQueryId();
|
||||
m_ctx->m_devicectx->End(m_ctx->TranslateQueryId(queryId));
|
||||
m_ctx->m_devicectx->End(m_ctx->MapDisjointQueryId(queryId, m_disjointId));
|
||||
m_ctx->m_immediateDevCtx->End(m_ctx->GetQueryObjectFromId(queryId));
|
||||
|
||||
auto* item = Profiler::QueueSerial();
|
||||
MemWrite( &item->hdr.type, QueueType::GpuZoneEndSerial );
|
||||
MemWrite( &item->gpuZoneEnd.cpuTime, Profiler::GetTime() );
|
||||
MemWrite( &item->gpuZoneEnd.thread, GetThreadHandle() );
|
||||
MemWrite( &item->gpuZoneEnd.queryId, uint16_t( queryId ) );
|
||||
MemWrite( &item->gpuZoneEnd.context, m_ctx->GetId() );
|
||||
|
||||
MemWrite( &item->gpuZoneEnd.context, m_ctx->GetContextId() );
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
private:
|
||||
tracy_force_inline D3D11ZoneScope( D3D11Ctx* ctx, bool active )
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
: m_active( is_active && GetProfiler().IsConnected() )
|
||||
#else
|
||||
: m_active( active )
|
||||
#endif
|
||||
{
|
||||
if( !m_active ) return;
|
||||
m_ctx = ctx;
|
||||
}
|
||||
|
||||
void WriteQueueItem(tracy::QueueItem* item, tracy::QueueType queueItemType, uint64_t sourceLocation)
|
||||
{
|
||||
const auto queryId = m_ctx->NextQueryId();
|
||||
m_ctx->m_immediateDevCtx->End(m_ctx->GetQueryObjectFromId(queryId));
|
||||
|
||||
MemWrite( &item->hdr.type, queueItemType);
|
||||
MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() );
|
||||
MemWrite( &item->gpuZoneBegin.srcloc, sourceLocation );
|
||||
MemWrite( &item->gpuZoneBegin.thread, GetThreadHandle() );
|
||||
MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) );
|
||||
MemWrite( &item->gpuZoneBegin.context, m_ctx->GetContextId() );
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
const bool m_active;
|
||||
|
||||
D3D11Ctx* m_ctx;
|
||||
unsigned int m_disjointId;
|
||||
};
|
||||
|
||||
static inline D3D11Ctx* CreateD3D11Context( ID3D11Device* device, ID3D11DeviceContext* devicectx )
|
||||
|
|
@ -403,38 +399,44 @@ static inline void DestroyD3D11Context( D3D11Ctx* ctx )
|
|||
}
|
||||
}
|
||||
|
||||
#undef TracyD3D11Panic
|
||||
|
||||
using TracyD3D11Ctx = tracy::D3D11Ctx*;
|
||||
|
||||
#define TracyD3D11Context( device, devicectx ) tracy::CreateD3D11Context( device, devicectx );
|
||||
#define TracyD3D11Destroy(ctx) tracy::DestroyD3D11Context(ctx);
|
||||
#define TracyD3D11ContextName(ctx, name, size) ctx->Name(name, size);
|
||||
|
||||
#define TracyD3D11UnnamedZone ___tracy_gpu_d3d11_zone
|
||||
#define TracyD3D11SrcLocSymbol TracyConcat(__tracy_gpu_d3d11_source_location,TracyLine)
|
||||
#define TracyD3D11SrcLocObject(name, color) static constexpr tracy::SourceLocationData TracyD3D11SrcLocSymbol { name, TracyFunction, TracyFile, (uint32_t)TracyLine, color };
|
||||
|
||||
#if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK
|
||||
# define TracyD3D11Zone( ctx, name ) TracyD3D11NamedZoneS( ctx, ___tracy_gpu_zone, name, TRACY_CALLSTACK, true )
|
||||
# define TracyD3D11ZoneC( ctx, name, color ) TracyD3D11NamedZoneCS( ctx, ___tracy_gpu_zone, name, color, TRACY_CALLSTACK, true )
|
||||
# define TracyD3D11NamedZone( ctx, varname, name, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::D3D11ZoneScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,TracyLine), TRACY_CALLSTACK, active );
|
||||
# define TracyD3D11NamedZoneC( ctx, varname, name, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::D3D11ZoneScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,TracyLine), TRACY_CALLSTACK, active );
|
||||
# define TracyD3D11Zone( ctx, name ) TracyD3D11NamedZoneS( ctx, TracyD3D11UnnamedZone, name, TRACY_CALLSTACK, true )
|
||||
# define TracyD3D11ZoneC( ctx, name, color ) TracyD3D11NamedZoneCS( ctx, TracyD3D11UnnamedZone, name, color, TRACY_CALLSTACK, true )
|
||||
# define TracyD3D11NamedZone( ctx, varname, name, active ) TracyD3D11SrcLocObject(name, 0); tracy::D3D11ZoneScope varname( ctx, &TracyD3D11SrcLocSymbol, TRACY_CALLSTACK, active );
|
||||
# define TracyD3D11NamedZoneC( ctx, varname, name, color, active ) TracyD3D11SrcLocObject(name, color); tracy::D3D11ZoneScope varname( ctx, &TracyD3D11SrcLocSymbol, TRACY_CALLSTACK, active );
|
||||
# define TracyD3D11ZoneTransient(ctx, varname, name, active) TracyD3D11ZoneTransientS(ctx, varname, cmdList, name, TRACY_CALLSTACK, active)
|
||||
#else
|
||||
# define TracyD3D11Zone( ctx, name ) TracyD3D11NamedZone( ctx, ___tracy_gpu_zone, name, true )
|
||||
# define TracyD3D11ZoneC( ctx, name, color ) TracyD3D11NamedZoneC( ctx, ___tracy_gpu_zone, name, color, true )
|
||||
# define TracyD3D11NamedZone( ctx, varname, name, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::D3D11ZoneScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,TracyLine), active );
|
||||
# define TracyD3D11NamedZoneC( ctx, varname, name, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::D3D11ZoneScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,TracyLine), active );
|
||||
# define TracyD3D11Zone( ctx, name ) TracyD3D11NamedZone( ctx, TracyD3D11UnnamedZone, name, true )
|
||||
# define TracyD3D11ZoneC( ctx, name, color ) TracyD3D11NamedZoneC( ctx, TracyD3D11UnnamedZone, name, color, true )
|
||||
# define TracyD3D11NamedZone( ctx, varname, name, active ) TracyD3D11SrcLocObject(name, 0); tracy::D3D11ZoneScope varname( ctx, &TracyD3D11SrcLocSymbol, active );
|
||||
# define TracyD3D11NamedZoneC( ctx, varname, name, color, active ) TracyD3D11SrcLocObject(name, color); tracy::D3D11ZoneScope varname( ctx, &TracyD3D11SrcLocSymbol, active );
|
||||
# define TracyD3D11ZoneTransient(ctx, varname, name, active) tracy::D3D11ZoneScope varname{ ctx, TracyLine, TracyFile, strlen(TracyFile), TracyFunction, strlen(TracyFunction), name, strlen(name), active };
|
||||
#endif
|
||||
|
||||
#ifdef TRACY_HAS_CALLSTACK
|
||||
# define TracyD3D11ZoneS( ctx, name, depth ) TracyD3D11NamedZoneS( ctx, ___tracy_gpu_zone, name, depth, true )
|
||||
# define TracyD3D11ZoneCS( ctx, name, color, depth ) TracyD3D11NamedZoneCS( ctx, ___tracy_gpu_zone, name, color, depth, true )
|
||||
# define TracyD3D11NamedZoneS( ctx, varname, name, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::D3D11ZoneScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,TracyLine), depth, active );
|
||||
# define TracyD3D11NamedZoneCS( ctx, varname, name, color, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::D3D11ZoneScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,TracyLine), depth, active );
|
||||
# define TracyD3D11ZoneS( ctx, name, depth ) TracyD3D11NamedZoneS( ctx, TracyD3D11UnnamedZone, name, depth, true )
|
||||
# define TracyD3D11ZoneCS( ctx, name, color, depth ) TracyD3D11NamedZoneCS( ctx, TracyD3D11UnnamedZone, name, color, depth, true )
|
||||
# define TracyD3D11NamedZoneS( ctx, varname, name, depth, active ) TracyD3D11SrcLocObject(name, 0); tracy::D3D11ZoneScope varname( ctx, &TracyD3D11SrcLocSymbol, depth, active );
|
||||
# define TracyD3D11NamedZoneCS( ctx, varname, name, color, depth, active ) TracyD3D11SrcLocObject(name, color); tracy::D3D11ZoneScope varname( ctx, &TracyD3D11SrcLocSymbol, depth, active );
|
||||
# define TracyD3D11ZoneTransientS(ctx, varname, name, depth, active) tracy::D3D11ZoneScope varname{ ctx, TracyLine, TracyFile, strlen(TracyFile), TracyFunction, strlen(TracyFunction), name, strlen(name), depth, active };
|
||||
#else
|
||||
# define TracyD3D11ZoneS( ctx, name, depth, active ) TracyD3D11Zone( ctx, name )
|
||||
# define TracyD3D11ZoneCS( ctx, name, color, depth, active ) TracyD3D11ZoneC( name, color )
|
||||
# define TracyD3D11NamedZoneS( ctx, varname, name, depth, active ) TracyD3D11NamedZone( ctx, varname, name, active )
|
||||
# define TracyD3D11NamedZoneCS( ctx, varname, name, color, depth, active ) TracyD3D11NamedZoneC( ctx, varname, name, color, active )
|
||||
# define TracyD3D11ZoneTransientS(ctx, varname, name, depth, active) TracyD3D12ZoneTransient(ctx, varname, name, active)
|
||||
# define TracyD3D11ZoneTransientS(ctx, varname, name, depth, active) TracyD3D11ZoneTransient(ctx, varname, name, active)
|
||||
#endif
|
||||
|
||||
#define TracyD3D11Collect( ctx ) ctx->Collect();
|
||||
|
|
|
|||
738
thirdparty/tracy/include/tracy/tracy/TracyD3D12.hpp
vendored
738
thirdparty/tracy/include/tracy/tracy/TracyD3D12.hpp
vendored
|
|
@ -25,7 +25,7 @@
|
|||
|
||||
namespace tracy
|
||||
{
|
||||
class D3D12ZoneScope {};
|
||||
class D3D12ZoneScope {};
|
||||
}
|
||||
|
||||
using TracyD3D12Ctx = void*;
|
||||
|
|
@ -40,429 +40,419 @@ using TracyD3D12Ctx = void*;
|
|||
#include <cassert>
|
||||
#include <d3d12.h>
|
||||
#include <dxgi.h>
|
||||
#include <wrl/client.h>
|
||||
#include <queue>
|
||||
|
||||
#define TracyD3D12Panic(msg, ...) do { assert(false && "TracyD3D12: " msg); TracyMessageLC("TracyD3D12: " msg, tracy::Color::Red4); __VA_ARGS__; } while(false);
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
struct D3D12QueryPayload
|
||||
{
|
||||
uint32_t m_queryIdStart = 0;
|
||||
uint32_t m_queryCount = 0;
|
||||
};
|
||||
struct D3D12QueryPayload
|
||||
{
|
||||
uint32_t m_queryIdStart = 0;
|
||||
uint32_t m_queryCount = 0;
|
||||
};
|
||||
|
||||
// Command queue context.
|
||||
class D3D12QueueCtx
|
||||
{
|
||||
friend class D3D12ZoneScope;
|
||||
// Command queue context.
|
||||
class D3D12QueueCtx
|
||||
{
|
||||
friend class D3D12ZoneScope;
|
||||
|
||||
static constexpr uint32_t MaxQueries = 64 * 1024; // Queries are begin and end markers, so we can store half as many total time durations. Must be even!
|
||||
ID3D12Device* m_device = nullptr;
|
||||
ID3D12CommandQueue* m_queue = nullptr;
|
||||
uint8_t m_contextId = 255; // TODO: apparently, 255 means "invalid id"; is this documented somewhere?
|
||||
ID3D12QueryHeap* m_queryHeap = nullptr;
|
||||
ID3D12Resource* m_readbackBuffer = nullptr;
|
||||
|
||||
bool m_initialized = false;
|
||||
// In-progress payload.
|
||||
uint32_t m_queryLimit = 0;
|
||||
std::atomic<uint32_t> m_queryCounter = 0;
|
||||
uint32_t m_previousQueryCounter = 0;
|
||||
|
||||
ID3D12Device* m_device = nullptr;
|
||||
ID3D12CommandQueue* m_queue = nullptr;
|
||||
uint8_t m_context;
|
||||
Microsoft::WRL::ComPtr<ID3D12QueryHeap> m_queryHeap;
|
||||
Microsoft::WRL::ComPtr<ID3D12Resource> m_readbackBuffer;
|
||||
uint32_t m_activePayload = 0;
|
||||
ID3D12Fence* m_payloadFence = nullptr;
|
||||
std::queue<D3D12QueryPayload> m_payloadQueue;
|
||||
|
||||
// In-progress payload.
|
||||
uint32_t m_queryLimit = MaxQueries;
|
||||
std::atomic<uint32_t> m_queryCounter = 0;
|
||||
uint32_t m_previousQueryCounter = 0;
|
||||
UINT64 m_prevCalibrationTicksCPU = 0;
|
||||
|
||||
uint32_t m_activePayload = 0;
|
||||
Microsoft::WRL::ComPtr<ID3D12Fence> m_payloadFence;
|
||||
std::queue<D3D12QueryPayload> m_payloadQueue;
|
||||
void RecalibrateClocks()
|
||||
{
|
||||
UINT64 cpuTimestamp;
|
||||
UINT64 gpuTimestamp;
|
||||
if (FAILED(m_queue->GetClockCalibration(&gpuTimestamp, &cpuTimestamp)))
|
||||
{
|
||||
TracyD3D12Panic("failed to obtain queue clock calibration counters.", return);
|
||||
}
|
||||
|
||||
int64_t m_prevCalibration = 0;
|
||||
int64_t m_qpcToNs = int64_t{ 1000000000 / GetFrequencyQpc() };
|
||||
int64_t cpuDeltaTicks = cpuTimestamp - m_prevCalibrationTicksCPU;
|
||||
if (cpuDeltaTicks > 0)
|
||||
{
|
||||
static const int64_t nanosecodsPerTick = int64_t(1000000000) / GetFrequencyQpc();
|
||||
int64_t cpuDeltaNS = cpuDeltaTicks * nanosecodsPerTick;
|
||||
// Save the device cpu timestamp, not the Tracy profiler timestamp:
|
||||
m_prevCalibrationTicksCPU = cpuTimestamp;
|
||||
|
||||
public:
|
||||
D3D12QueueCtx(ID3D12Device* device, ID3D12CommandQueue* queue)
|
||||
: m_device(device)
|
||||
, m_queue(queue)
|
||||
, m_context(GetGpuCtxCounter().fetch_add(1, std::memory_order_relaxed))
|
||||
{
|
||||
// Verify we support timestamp queries on this queue.
|
||||
cpuTimestamp = Profiler::GetTime();
|
||||
|
||||
if (queue->GetDesc().Type == D3D12_COMMAND_LIST_TYPE_COPY)
|
||||
{
|
||||
D3D12_FEATURE_DATA_D3D12_OPTIONS3 featureData{};
|
||||
auto* item = Profiler::QueueSerial();
|
||||
MemWrite(&item->hdr.type, QueueType::GpuCalibration);
|
||||
MemWrite(&item->gpuCalibration.gpuTime, gpuTimestamp);
|
||||
MemWrite(&item->gpuCalibration.cpuTime, cpuTimestamp);
|
||||
MemWrite(&item->gpuCalibration.cpuDelta, cpuDeltaNS);
|
||||
MemWrite(&item->gpuCalibration.context, GetId());
|
||||
SubmitQueueItem(item);
|
||||
}
|
||||
}
|
||||
|
||||
bool Success = SUCCEEDED(device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS3, &featureData, sizeof(featureData)));
|
||||
assert(Success && featureData.CopyQueueTimestampQueriesSupported && "Platform does not support profiling of copy queues.");
|
||||
}
|
||||
tracy_force_inline void SubmitQueueItem(tracy::QueueItem* item)
|
||||
{
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
GetProfiler().DeferItem(*item);
|
||||
#endif
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
uint64_t timestampFrequency;
|
||||
public:
|
||||
D3D12QueueCtx(ID3D12Device* device, ID3D12CommandQueue* queue)
|
||||
: m_device(device)
|
||||
, m_queue(queue)
|
||||
{
|
||||
// Verify we support timestamp queries on this queue.
|
||||
|
||||
if (FAILED(queue->GetTimestampFrequency(×tampFrequency)))
|
||||
{
|
||||
assert(false && "Failed to get timestamp frequency.");
|
||||
}
|
||||
if (queue->GetDesc().Type == D3D12_COMMAND_LIST_TYPE_COPY)
|
||||
{
|
||||
D3D12_FEATURE_DATA_D3D12_OPTIONS3 featureData{};
|
||||
|
||||
uint64_t cpuTimestamp;
|
||||
uint64_t gpuTimestamp;
|
||||
HRESULT hr = device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS3, &featureData, sizeof(featureData));
|
||||
if (FAILED(hr) || (featureData.CopyQueueTimestampQueriesSupported == FALSE))
|
||||
{
|
||||
TracyD3D12Panic("Platform does not support profiling of copy queues.", return);
|
||||
}
|
||||
}
|
||||
|
||||
if (FAILED(queue->GetClockCalibration(&gpuTimestamp, &cpuTimestamp)))
|
||||
{
|
||||
assert(false && "Failed to get queue clock calibration.");
|
||||
}
|
||||
static constexpr uint32_t MaxQueries = 64 * 1024; // Must be even, because queries are (begin, end) pairs
|
||||
m_queryLimit = MaxQueries;
|
||||
|
||||
// Save the device cpu timestamp, not the profiler's timestamp.
|
||||
m_prevCalibration = cpuTimestamp * m_qpcToNs;
|
||||
D3D12_QUERY_HEAP_DESC heapDesc{};
|
||||
heapDesc.Type = queue->GetDesc().Type == D3D12_COMMAND_LIST_TYPE_COPY ? D3D12_QUERY_HEAP_TYPE_COPY_QUEUE_TIMESTAMP : D3D12_QUERY_HEAP_TYPE_TIMESTAMP;
|
||||
heapDesc.Count = m_queryLimit;
|
||||
heapDesc.NodeMask = 0; // #TODO: Support multiple adapters.
|
||||
|
||||
cpuTimestamp = Profiler::GetTime();
|
||||
while (FAILED(device->CreateQueryHeap(&heapDesc, IID_PPV_ARGS(&m_queryHeap))))
|
||||
{
|
||||
m_queryLimit /= 2;
|
||||
heapDesc.Count = m_queryLimit;
|
||||
}
|
||||
|
||||
D3D12_QUERY_HEAP_DESC heapDesc{};
|
||||
heapDesc.Type = queue->GetDesc().Type == D3D12_COMMAND_LIST_TYPE_COPY ? D3D12_QUERY_HEAP_TYPE_COPY_QUEUE_TIMESTAMP : D3D12_QUERY_HEAP_TYPE_TIMESTAMP;
|
||||
heapDesc.Count = m_queryLimit;
|
||||
heapDesc.NodeMask = 0; // #TODO: Support multiple adapters.
|
||||
// Create a readback buffer, which will be used as a destination for the query data.
|
||||
|
||||
while (FAILED(device->CreateQueryHeap(&heapDesc, IID_PPV_ARGS(&m_queryHeap))))
|
||||
{
|
||||
m_queryLimit /= 2;
|
||||
heapDesc.Count = m_queryLimit;
|
||||
}
|
||||
D3D12_RESOURCE_DESC readbackBufferDesc{};
|
||||
readbackBufferDesc.Alignment = 0;
|
||||
readbackBufferDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER;
|
||||
readbackBufferDesc.Width = m_queryLimit * sizeof(uint64_t);
|
||||
readbackBufferDesc.Height = 1;
|
||||
readbackBufferDesc.DepthOrArraySize = 1;
|
||||
readbackBufferDesc.Format = DXGI_FORMAT_UNKNOWN;
|
||||
readbackBufferDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; // Buffers are always row major.
|
||||
readbackBufferDesc.MipLevels = 1;
|
||||
readbackBufferDesc.SampleDesc.Count = 1;
|
||||
readbackBufferDesc.SampleDesc.Quality = 0;
|
||||
readbackBufferDesc.Flags = D3D12_RESOURCE_FLAG_NONE;
|
||||
|
||||
// Create a readback buffer, which will be used as a destination for the query data.
|
||||
D3D12_HEAP_PROPERTIES readbackHeapProps{};
|
||||
readbackHeapProps.Type = D3D12_HEAP_TYPE_READBACK;
|
||||
readbackHeapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN;
|
||||
readbackHeapProps.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN;
|
||||
readbackHeapProps.CreationNodeMask = 0;
|
||||
readbackHeapProps.VisibleNodeMask = 0; // #TODO: Support multiple adapters.
|
||||
|
||||
D3D12_RESOURCE_DESC readbackBufferDesc{};
|
||||
readbackBufferDesc.Alignment = 0;
|
||||
readbackBufferDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER;
|
||||
readbackBufferDesc.Width = m_queryLimit * sizeof(uint64_t);
|
||||
readbackBufferDesc.Height = 1;
|
||||
readbackBufferDesc.DepthOrArraySize = 1;
|
||||
readbackBufferDesc.Format = DXGI_FORMAT_UNKNOWN;
|
||||
readbackBufferDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; // Buffers are always row major.
|
||||
readbackBufferDesc.MipLevels = 1;
|
||||
readbackBufferDesc.SampleDesc.Count = 1;
|
||||
readbackBufferDesc.SampleDesc.Quality = 0;
|
||||
readbackBufferDesc.Flags = D3D12_RESOURCE_FLAG_NONE;
|
||||
if (FAILED(device->CreateCommittedResource(&readbackHeapProps, D3D12_HEAP_FLAG_NONE, &readbackBufferDesc, D3D12_RESOURCE_STATE_COPY_DEST, nullptr, IID_PPV_ARGS(&m_readbackBuffer))))
|
||||
{
|
||||
TracyD3D12Panic("Failed to create query readback buffer.", return);
|
||||
}
|
||||
|
||||
D3D12_HEAP_PROPERTIES readbackHeapProps{};
|
||||
readbackHeapProps.Type = D3D12_HEAP_TYPE_READBACK;
|
||||
readbackHeapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN;
|
||||
readbackHeapProps.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN;
|
||||
readbackHeapProps.CreationNodeMask = 0;
|
||||
readbackHeapProps.VisibleNodeMask = 0; // #TODO: Support multiple adapters.
|
||||
if (FAILED(device->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&m_payloadFence))))
|
||||
{
|
||||
TracyD3D12Panic("Failed to create payload fence.", return);
|
||||
}
|
||||
|
||||
if (FAILED(device->CreateCommittedResource(&readbackHeapProps, D3D12_HEAP_FLAG_NONE, &readbackBufferDesc, D3D12_RESOURCE_STATE_COPY_DEST, nullptr, IID_PPV_ARGS(&m_readbackBuffer))))
|
||||
{
|
||||
assert(false && "Failed to create query readback buffer.");
|
||||
}
|
||||
float period = [queue]()
|
||||
{
|
||||
uint64_t timestampFrequency;
|
||||
if (FAILED(queue->GetTimestampFrequency(×tampFrequency)))
|
||||
{
|
||||
return 0.0f;
|
||||
}
|
||||
return static_cast<float>( 1E+09 / static_cast<double>(timestampFrequency) );
|
||||
}();
|
||||
|
||||
if (FAILED(device->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&m_payloadFence))))
|
||||
{
|
||||
assert(false && "Failed to create payload fence.");
|
||||
}
|
||||
if (period == 0.0f)
|
||||
{
|
||||
TracyD3D12Panic("Failed to get timestamp frequency.", return);
|
||||
}
|
||||
|
||||
auto* item = Profiler::QueueSerial();
|
||||
MemWrite(&item->hdr.type, QueueType::GpuNewContext);
|
||||
MemWrite(&item->gpuNewContext.cpuTime, cpuTimestamp);
|
||||
MemWrite(&item->gpuNewContext.gpuTime, gpuTimestamp);
|
||||
memset(&item->gpuNewContext.thread, 0, sizeof(item->gpuNewContext.thread));
|
||||
MemWrite(&item->gpuNewContext.period, 1E+09f / static_cast<float>(timestampFrequency));
|
||||
MemWrite(&item->gpuNewContext.context, m_context);
|
||||
MemWrite(&item->gpuNewContext.flags, GpuContextCalibration);
|
||||
MemWrite(&item->gpuNewContext.type, GpuContextType::Direct3D12);
|
||||
uint64_t cpuTimestamp;
|
||||
uint64_t gpuTimestamp;
|
||||
if (FAILED(queue->GetClockCalibration(&gpuTimestamp, &cpuTimestamp)))
|
||||
{
|
||||
TracyD3D12Panic("Failed to get queue clock calibration.", return);
|
||||
}
|
||||
|
||||
// Save the device cpu timestamp, not the profiler's timestamp.
|
||||
m_prevCalibrationTicksCPU = cpuTimestamp;
|
||||
|
||||
cpuTimestamp = Profiler::GetTime();
|
||||
|
||||
// all checked: ready to roll
|
||||
m_contextId = GetGpuCtxCounter().fetch_add(1);
|
||||
|
||||
auto* item = Profiler::QueueSerial();
|
||||
MemWrite(&item->hdr.type, QueueType::GpuNewContext);
|
||||
MemWrite(&item->gpuNewContext.cpuTime, cpuTimestamp);
|
||||
MemWrite(&item->gpuNewContext.gpuTime, gpuTimestamp);
|
||||
MemWrite(&item->gpuNewContext.thread, decltype(item->gpuNewContext.thread)(0)); // #TODO: why 0 instead of GetThreadHandle()?
|
||||
MemWrite(&item->gpuNewContext.period, period);
|
||||
MemWrite(&item->gpuNewContext.context, GetId());
|
||||
MemWrite(&item->gpuNewContext.flags, GpuContextCalibration);
|
||||
MemWrite(&item->gpuNewContext.type, GpuContextType::Direct3D12);
|
||||
SubmitQueueItem(item);
|
||||
}
|
||||
|
||||
~D3D12QueueCtx()
|
||||
{
|
||||
ZoneScopedC(Color::Red4);
|
||||
// collect all pending timestamps
|
||||
while (m_payloadFence->GetCompletedValue() != m_activePayload)
|
||||
/* busy-wait ... */;
|
||||
Collect();
|
||||
m_payloadFence->Release();
|
||||
m_readbackBuffer->Release();
|
||||
m_queryHeap->Release();
|
||||
}
|
||||
|
||||
|
||||
void NewFrame()
|
||||
{
|
||||
uint32_t queryCounter = m_queryCounter.exchange(0);
|
||||
m_payloadQueue.emplace(D3D12QueryPayload{ m_previousQueryCounter, queryCounter });
|
||||
m_previousQueryCounter += queryCounter;
|
||||
|
||||
if (m_previousQueryCounter >= m_queryLimit)
|
||||
{
|
||||
m_previousQueryCounter -= m_queryLimit;
|
||||
}
|
||||
|
||||
m_queue->Signal(m_payloadFence, ++m_activePayload);
|
||||
}
|
||||
|
||||
void Name( const char* name, uint16_t len )
|
||||
{
|
||||
auto ptr = (char*)tracy_malloc( len );
|
||||
memcpy( ptr, name, len );
|
||||
|
||||
auto item = Profiler::QueueSerial();
|
||||
MemWrite( &item->hdr.type, QueueType::GpuContextName );
|
||||
MemWrite( &item->gpuContextNameFat.context, GetId());
|
||||
MemWrite( &item->gpuContextNameFat.ptr, (uint64_t)ptr );
|
||||
MemWrite( &item->gpuContextNameFat.size, len );
|
||||
SubmitQueueItem(item);
|
||||
}
|
||||
|
||||
void Collect()
|
||||
{
|
||||
ZoneScopedC(Color::Red4);
|
||||
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
GetProfiler().DeferItem(*item);
|
||||
if (!GetProfiler().IsConnected())
|
||||
{
|
||||
m_queryCounter = 0;
|
||||
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
Profiler::QueueSerialFinish();
|
||||
// Find out what payloads are available.
|
||||
const auto newestReadyPayload = m_payloadFence->GetCompletedValue();
|
||||
const auto payloadCount = m_payloadQueue.size() - (m_activePayload - newestReadyPayload);
|
||||
|
||||
m_initialized = true;
|
||||
}
|
||||
if (!payloadCount)
|
||||
{
|
||||
return; // No payloads are available yet, exit out.
|
||||
}
|
||||
|
||||
void NewFrame()
|
||||
{
|
||||
uint32_t queryCounter = m_queryCounter.exchange(0);
|
||||
m_payloadQueue.emplace(D3D12QueryPayload{ m_previousQueryCounter, queryCounter });
|
||||
m_previousQueryCounter += queryCounter;
|
||||
D3D12_RANGE mapRange{ 0, m_queryLimit * sizeof(uint64_t) };
|
||||
|
||||
if (m_previousQueryCounter >= m_queryLimit)
|
||||
{
|
||||
m_previousQueryCounter -= m_queryLimit;
|
||||
}
|
||||
// Map the readback buffer so we can fetch the query data from the GPU.
|
||||
void* readbackBufferMapping = nullptr;
|
||||
|
||||
m_queue->Signal(m_payloadFence.Get(), ++m_activePayload);
|
||||
}
|
||||
if (FAILED(m_readbackBuffer->Map(0, &mapRange, &readbackBufferMapping)))
|
||||
{
|
||||
TracyD3D12Panic("Failed to map readback buffer.", return);
|
||||
}
|
||||
|
||||
void Name( const char* name, uint16_t len )
|
||||
{
|
||||
auto ptr = (char*)tracy_malloc( len );
|
||||
memcpy( ptr, name, len );
|
||||
auto* timestampData = static_cast<uint64_t*>(readbackBufferMapping);
|
||||
|
||||
auto item = Profiler::QueueSerial();
|
||||
MemWrite( &item->hdr.type, QueueType::GpuContextName );
|
||||
MemWrite( &item->gpuContextNameFat.context, m_context );
|
||||
MemWrite( &item->gpuContextNameFat.ptr, (uint64_t)ptr );
|
||||
MemWrite( &item->gpuContextNameFat.size, len );
|
||||
for (uint32_t i = 0; i < payloadCount; ++i)
|
||||
{
|
||||
const auto& payload = m_payloadQueue.front();
|
||||
|
||||
for (uint32_t j = 0; j < payload.m_queryCount; ++j)
|
||||
{
|
||||
const auto counter = (payload.m_queryIdStart + j) % m_queryLimit;
|
||||
const auto timestamp = timestampData[counter];
|
||||
const auto queryId = counter;
|
||||
|
||||
auto* item = Profiler::QueueSerial();
|
||||
MemWrite(&item->hdr.type, QueueType::GpuTime);
|
||||
MemWrite(&item->gpuTime.gpuTime, timestamp);
|
||||
MemWrite(&item->gpuTime.queryId, static_cast<uint16_t>(queryId));
|
||||
MemWrite(&item->gpuTime.context, GetId());
|
||||
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
m_payloadQueue.pop();
|
||||
}
|
||||
|
||||
m_readbackBuffer->Unmap(0, nullptr);
|
||||
|
||||
// Recalibrate to account for drift.
|
||||
RecalibrateClocks();
|
||||
}
|
||||
|
||||
private:
|
||||
tracy_force_inline uint32_t NextQueryId()
|
||||
{
|
||||
uint32_t queryCounter = m_queryCounter.fetch_add(2);
|
||||
if (queryCounter >= m_queryLimit)
|
||||
{
|
||||
TracyD3D12Panic("Submitted too many GPU queries! Consider increasing MaxQueries.");
|
||||
// #TODO: consider returning an invalid id or sentinel value here
|
||||
}
|
||||
|
||||
const uint32_t id = (m_previousQueryCounter + queryCounter) % m_queryLimit;
|
||||
|
||||
return id;
|
||||
}
|
||||
|
||||
tracy_force_inline uint8_t GetId() const
|
||||
{
|
||||
return m_contextId;
|
||||
}
|
||||
};
|
||||
|
||||
class D3D12ZoneScope
|
||||
{
|
||||
const bool m_active;
|
||||
D3D12QueueCtx* m_ctx = nullptr;
|
||||
ID3D12GraphicsCommandList* m_cmdList = nullptr;
|
||||
uint32_t m_queryId = 0; // Used for tracking in nested zones.
|
||||
|
||||
tracy_force_inline void WriteQueueItem(QueueItem* item, QueueType type, uint64_t srcLocation)
|
||||
{
|
||||
MemWrite(&item->hdr.type, type);
|
||||
MemWrite(&item->gpuZoneBegin.cpuTime, Profiler::GetTime());
|
||||
MemWrite(&item->gpuZoneBegin.srcloc, srcLocation);
|
||||
MemWrite(&item->gpuZoneBegin.thread, GetThreadHandle());
|
||||
MemWrite(&item->gpuZoneBegin.queryId, static_cast<uint16_t>(m_queryId));
|
||||
MemWrite(&item->gpuZoneBegin.context, m_ctx->GetId());
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
tracy_force_inline D3D12ZoneScope(D3D12QueueCtx* ctx, ID3D12GraphicsCommandList* cmdList, bool active)
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
GetProfiler().DeferItem( *item );
|
||||
#endif
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
void Collect()
|
||||
{
|
||||
ZoneScopedC(Color::Red4);
|
||||
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
if (!GetProfiler().IsConnected())
|
||||
{
|
||||
m_queryCounter = 0;
|
||||
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
// Find out what payloads are available.
|
||||
const auto newestReadyPayload = m_payloadFence->GetCompletedValue();
|
||||
const auto payloadCount = m_payloadQueue.size() - (m_activePayload - newestReadyPayload);
|
||||
|
||||
if (!payloadCount)
|
||||
{
|
||||
return; // No payloads are available yet, exit out.
|
||||
}
|
||||
|
||||
D3D12_RANGE mapRange{ 0, m_queryLimit * sizeof(uint64_t) };
|
||||
|
||||
// Map the readback buffer so we can fetch the query data from the GPU.
|
||||
void* readbackBufferMapping = nullptr;
|
||||
|
||||
if (FAILED(m_readbackBuffer->Map(0, &mapRange, &readbackBufferMapping)))
|
||||
{
|
||||
assert(false && "Failed to map readback buffer.");
|
||||
}
|
||||
|
||||
auto* timestampData = static_cast<uint64_t*>(readbackBufferMapping);
|
||||
|
||||
for (uint32_t i = 0; i < payloadCount; ++i)
|
||||
{
|
||||
const auto& payload = m_payloadQueue.front();
|
||||
|
||||
for (uint32_t j = 0; j < payload.m_queryCount; ++j)
|
||||
{
|
||||
const auto counter = (payload.m_queryIdStart + j) % m_queryLimit;
|
||||
const auto timestamp = timestampData[counter];
|
||||
const auto queryId = counter;
|
||||
|
||||
auto* item = Profiler::QueueSerial();
|
||||
MemWrite(&item->hdr.type, QueueType::GpuTime);
|
||||
MemWrite(&item->gpuTime.gpuTime, timestamp);
|
||||
MemWrite(&item->gpuTime.queryId, static_cast<uint16_t>(queryId));
|
||||
MemWrite(&item->gpuTime.context, m_context);
|
||||
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
m_payloadQueue.pop();
|
||||
}
|
||||
|
||||
m_readbackBuffer->Unmap(0, nullptr);
|
||||
|
||||
// Recalibrate to account for drift.
|
||||
|
||||
uint64_t cpuTimestamp;
|
||||
uint64_t gpuTimestamp;
|
||||
|
||||
if (FAILED(m_queue->GetClockCalibration(&gpuTimestamp, &cpuTimestamp)))
|
||||
{
|
||||
assert(false && "Failed to get queue clock calibration.");
|
||||
}
|
||||
|
||||
cpuTimestamp *= m_qpcToNs;
|
||||
|
||||
const auto cpuDelta = cpuTimestamp - m_prevCalibration;
|
||||
if (cpuDelta > 0)
|
||||
{
|
||||
m_prevCalibration = cpuTimestamp;
|
||||
cpuTimestamp = Profiler::GetTime();
|
||||
|
||||
auto* item = Profiler::QueueSerial();
|
||||
MemWrite(&item->hdr.type, QueueType::GpuCalibration);
|
||||
MemWrite(&item->gpuCalibration.gpuTime, gpuTimestamp);
|
||||
MemWrite(&item->gpuCalibration.cpuTime, cpuTimestamp);
|
||||
MemWrite(&item->gpuCalibration.cpuDelta, cpuDelta);
|
||||
MemWrite(&item->gpuCalibration.context, m_context);
|
||||
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
tracy_force_inline uint32_t NextQueryId()
|
||||
{
|
||||
uint32_t queryCounter = m_queryCounter.fetch_add(2);
|
||||
assert(queryCounter < m_queryLimit && "Submitted too many GPU queries! Consider increasing MaxQueries.");
|
||||
|
||||
const uint32_t id = (m_previousQueryCounter + queryCounter) % m_queryLimit;
|
||||
|
||||
return id;
|
||||
}
|
||||
|
||||
tracy_force_inline uint8_t GetId() const
|
||||
{
|
||||
return m_context;
|
||||
}
|
||||
};
|
||||
|
||||
class D3D12ZoneScope
|
||||
{
|
||||
const bool m_active;
|
||||
D3D12QueueCtx* m_ctx = nullptr;
|
||||
ID3D12GraphicsCommandList* m_cmdList = nullptr;
|
||||
uint32_t m_queryId = 0; // Used for tracking in nested zones.
|
||||
|
||||
public:
|
||||
tracy_force_inline D3D12ZoneScope(D3D12QueueCtx* ctx, ID3D12GraphicsCommandList* cmdList, const SourceLocationData* srcLocation, bool active)
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
: m_active(active && GetProfiler().IsConnected())
|
||||
: m_active(active&& GetProfiler().IsConnected())
|
||||
#else
|
||||
: m_active(active)
|
||||
: m_active(active)
|
||||
#endif
|
||||
{
|
||||
if (!m_active) return;
|
||||
{
|
||||
if (!m_active) return;
|
||||
|
||||
m_ctx = ctx;
|
||||
m_cmdList = cmdList;
|
||||
m_ctx = ctx;
|
||||
m_cmdList = cmdList;
|
||||
|
||||
m_queryId = ctx->NextQueryId();
|
||||
cmdList->EndQuery(ctx->m_queryHeap.Get(), D3D12_QUERY_TYPE_TIMESTAMP, m_queryId);
|
||||
m_queryId = m_ctx->NextQueryId();
|
||||
m_cmdList->EndQuery(m_ctx->m_queryHeap, D3D12_QUERY_TYPE_TIMESTAMP, m_queryId);
|
||||
}
|
||||
|
||||
auto* item = Profiler::QueueSerial();
|
||||
MemWrite(&item->hdr.type, QueueType::GpuZoneBeginSerial);
|
||||
MemWrite(&item->gpuZoneBegin.cpuTime, Profiler::GetTime());
|
||||
MemWrite(&item->gpuZoneBegin.srcloc, reinterpret_cast<uint64_t>(srcLocation));
|
||||
MemWrite(&item->gpuZoneBegin.thread, GetThreadHandle());
|
||||
MemWrite(&item->gpuZoneBegin.queryId, static_cast<uint16_t>(m_queryId));
|
||||
MemWrite(&item->gpuZoneBegin.context, ctx->GetId());
|
||||
public:
|
||||
tracy_force_inline D3D12ZoneScope(D3D12QueueCtx* ctx, ID3D12GraphicsCommandList* cmdList, const SourceLocationData* srcLocation, bool active)
|
||||
: D3D12ZoneScope(ctx, cmdList, active)
|
||||
{
|
||||
if (!m_active) return;
|
||||
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
auto* item = Profiler::QueueSerial();
|
||||
WriteQueueItem(item, QueueType::GpuZoneBeginSerial, reinterpret_cast<uint64_t>(srcLocation));
|
||||
}
|
||||
|
||||
tracy_force_inline D3D12ZoneScope(D3D12QueueCtx* ctx, ID3D12GraphicsCommandList* cmdList, const SourceLocationData* srcLocation, int depth, bool active)
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
: m_active(active&& GetProfiler().IsConnected())
|
||||
#else
|
||||
: m_active(active)
|
||||
#endif
|
||||
{
|
||||
if (!m_active) return;
|
||||
tracy_force_inline D3D12ZoneScope(D3D12QueueCtx* ctx, ID3D12GraphicsCommandList* cmdList, const SourceLocationData* srcLocation, int depth, bool active)
|
||||
: D3D12ZoneScope(ctx, cmdList, active)
|
||||
{
|
||||
if (!m_active) return;
|
||||
|
||||
m_ctx = ctx;
|
||||
m_cmdList = cmdList;
|
||||
auto* item = Profiler::QueueSerialCallstack(Callstack(depth));
|
||||
WriteQueueItem(item, QueueType::GpuZoneBeginCallstackSerial, reinterpret_cast<uint64_t>(srcLocation));
|
||||
}
|
||||
|
||||
m_queryId = ctx->NextQueryId();
|
||||
cmdList->EndQuery(ctx->m_queryHeap.Get(), D3D12_QUERY_TYPE_TIMESTAMP, m_queryId);
|
||||
tracy_force_inline D3D12ZoneScope(D3D12QueueCtx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, ID3D12GraphicsCommandList* cmdList, bool active)
|
||||
: D3D12ZoneScope(ctx, cmdList, active)
|
||||
{
|
||||
if (!m_active) return;
|
||||
|
||||
auto* item = Profiler::QueueSerialCallstack(Callstack(depth));
|
||||
MemWrite(&item->hdr.type, QueueType::GpuZoneBeginCallstackSerial);
|
||||
MemWrite(&item->gpuZoneBegin.cpuTime, Profiler::GetTime());
|
||||
MemWrite(&item->gpuZoneBegin.srcloc, reinterpret_cast<uint64_t>(srcLocation));
|
||||
MemWrite(&item->gpuZoneBegin.thread, GetThreadHandle());
|
||||
MemWrite(&item->gpuZoneBegin.queryId, static_cast<uint16_t>(m_queryId));
|
||||
MemWrite(&item->gpuZoneBegin.context, ctx->GetId());
|
||||
const auto sourceLocation = Profiler::AllocSourceLocation(line, source, sourceSz, function, functionSz, name, nameSz);
|
||||
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
auto* item = Profiler::QueueSerial();
|
||||
WriteQueueItem(item, QueueType::GpuZoneBeginAllocSrcLocSerial, sourceLocation);
|
||||
}
|
||||
|
||||
tracy_force_inline D3D12ZoneScope(D3D12QueueCtx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, ID3D12GraphicsCommandList* cmdList, bool active)
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
: m_active(active&& GetProfiler().IsConnected())
|
||||
#else
|
||||
: m_active(active)
|
||||
#endif
|
||||
{
|
||||
if (!m_active) return;
|
||||
tracy_force_inline D3D12ZoneScope(D3D12QueueCtx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, ID3D12GraphicsCommandList* cmdList, int depth, bool active)
|
||||
: D3D12ZoneScope(ctx, cmdList, active)
|
||||
{
|
||||
if (!m_active) return;
|
||||
|
||||
m_ctx = ctx;
|
||||
m_cmdList = cmdList;
|
||||
const auto sourceLocation = Profiler::AllocSourceLocation(line, source, sourceSz, function, functionSz, name, nameSz);
|
||||
|
||||
m_queryId = ctx->NextQueryId();
|
||||
cmdList->EndQuery(ctx->m_queryHeap.Get(), D3D12_QUERY_TYPE_TIMESTAMP, m_queryId);
|
||||
auto* item = Profiler::QueueSerialCallstack(Callstack(depth));
|
||||
WriteQueueItem(item, QueueType::GpuZoneBeginAllocSrcLocCallstackSerial, sourceLocation);
|
||||
}
|
||||
|
||||
const auto sourceLocation = Profiler::AllocSourceLocation(line, source, sourceSz, function, functionSz, name, nameSz);
|
||||
tracy_force_inline ~D3D12ZoneScope()
|
||||
{
|
||||
if (!m_active) return;
|
||||
|
||||
auto* item = Profiler::QueueSerial();
|
||||
MemWrite(&item->hdr.type, QueueType::GpuZoneBeginAllocSrcLocSerial);
|
||||
MemWrite(&item->gpuZoneBegin.cpuTime, Profiler::GetTime());
|
||||
MemWrite(&item->gpuZoneBegin.srcloc, sourceLocation);
|
||||
MemWrite(&item->gpuZoneBegin.thread, GetThreadHandle());
|
||||
MemWrite(&item->gpuZoneBegin.queryId, static_cast<uint16_t>(m_queryId));
|
||||
MemWrite(&item->gpuZoneBegin.context, ctx->GetId());
|
||||
const auto queryId = m_queryId + 1; // Our end query slot is immediately after the begin slot.
|
||||
m_cmdList->EndQuery(m_ctx->m_queryHeap, D3D12_QUERY_TYPE_TIMESTAMP, queryId);
|
||||
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
auto* item = Profiler::QueueSerial();
|
||||
MemWrite(&item->hdr.type, QueueType::GpuZoneEndSerial);
|
||||
MemWrite(&item->gpuZoneEnd.cpuTime, Profiler::GetTime());
|
||||
MemWrite(&item->gpuZoneEnd.thread, GetThreadHandle());
|
||||
MemWrite(&item->gpuZoneEnd.queryId, static_cast<uint16_t>(queryId));
|
||||
MemWrite(&item->gpuZoneEnd.context, m_ctx->GetId());
|
||||
Profiler::QueueSerialFinish();
|
||||
|
||||
tracy_force_inline D3D12ZoneScope(D3D12QueueCtx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, ID3D12GraphicsCommandList* cmdList, int depth, bool active)
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
: m_active(active&& GetProfiler().IsConnected())
|
||||
#else
|
||||
: m_active(active)
|
||||
#endif
|
||||
{
|
||||
if (!m_active) return;
|
||||
m_cmdList->ResolveQueryData(m_ctx->m_queryHeap, D3D12_QUERY_TYPE_TIMESTAMP, m_queryId, 2, m_ctx->m_readbackBuffer, m_queryId * sizeof(uint64_t));
|
||||
}
|
||||
};
|
||||
|
||||
m_ctx = ctx;
|
||||
m_cmdList = cmdList;
|
||||
static inline D3D12QueueCtx* CreateD3D12Context(ID3D12Device* device, ID3D12CommandQueue* queue)
|
||||
{
|
||||
auto* ctx = static_cast<D3D12QueueCtx*>(tracy_malloc(sizeof(D3D12QueueCtx)));
|
||||
new (ctx) D3D12QueueCtx{ device, queue };
|
||||
|
||||
m_queryId = ctx->NextQueryId();
|
||||
cmdList->EndQuery(ctx->m_queryHeap.Get(), D3D12_QUERY_TYPE_TIMESTAMP, m_queryId);
|
||||
return ctx;
|
||||
}
|
||||
|
||||
const auto sourceLocation = Profiler::AllocSourceLocation(line, source, sourceSz, function, functionSz, name, nameSz);
|
||||
|
||||
auto* item = Profiler::QueueSerialCallstack(Callstack(depth));
|
||||
MemWrite(&item->hdr.type, QueueType::GpuZoneBeginAllocSrcLocCallstackSerial);
|
||||
MemWrite(&item->gpuZoneBegin.cpuTime, Profiler::GetTime());
|
||||
MemWrite(&item->gpuZoneBegin.srcloc, sourceLocation);
|
||||
MemWrite(&item->gpuZoneBegin.thread, GetThreadHandle());
|
||||
MemWrite(&item->gpuZoneBegin.queryId, static_cast<uint16_t>(m_queryId));
|
||||
MemWrite(&item->gpuZoneBegin.context, ctx->GetId());
|
||||
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
tracy_force_inline ~D3D12ZoneScope()
|
||||
{
|
||||
if (!m_active) return;
|
||||
|
||||
const auto queryId = m_queryId + 1; // Our end query slot is immediately after the begin slot.
|
||||
m_cmdList->EndQuery(m_ctx->m_queryHeap.Get(), D3D12_QUERY_TYPE_TIMESTAMP, queryId);
|
||||
|
||||
auto* item = Profiler::QueueSerial();
|
||||
MemWrite(&item->hdr.type, QueueType::GpuZoneEndSerial);
|
||||
MemWrite(&item->gpuZoneEnd.cpuTime, Profiler::GetTime());
|
||||
MemWrite(&item->gpuZoneEnd.thread, GetThreadHandle());
|
||||
MemWrite(&item->gpuZoneEnd.queryId, static_cast<uint16_t>(queryId));
|
||||
MemWrite(&item->gpuZoneEnd.context, m_ctx->GetId());
|
||||
|
||||
Profiler::QueueSerialFinish();
|
||||
|
||||
m_cmdList->ResolveQueryData(m_ctx->m_queryHeap.Get(), D3D12_QUERY_TYPE_TIMESTAMP, m_queryId, 2, m_ctx->m_readbackBuffer.Get(), m_queryId * sizeof(uint64_t));
|
||||
}
|
||||
};
|
||||
|
||||
static inline D3D12QueueCtx* CreateD3D12Context(ID3D12Device* device, ID3D12CommandQueue* queue)
|
||||
{
|
||||
auto* ctx = static_cast<D3D12QueueCtx*>(tracy_malloc(sizeof(D3D12QueueCtx)));
|
||||
new (ctx) D3D12QueueCtx{ device, queue };
|
||||
|
||||
return ctx;
|
||||
}
|
||||
|
||||
static inline void DestroyD3D12Context(D3D12QueueCtx* ctx)
|
||||
{
|
||||
ctx->~D3D12QueueCtx();
|
||||
tracy_free(ctx);
|
||||
}
|
||||
static inline void DestroyD3D12Context(D3D12QueueCtx* ctx)
|
||||
{
|
||||
ctx->~D3D12QueueCtx();
|
||||
tracy_free(ctx);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#undef TracyD3D12Panic
|
||||
|
||||
using TracyD3D12Ctx = tracy::D3D12QueueCtx*;
|
||||
|
||||
#define TracyD3D12Context(device, queue) tracy::CreateD3D12Context(device, queue);
|
||||
|
|
@ -471,25 +461,29 @@ using TracyD3D12Ctx = tracy::D3D12QueueCtx*;
|
|||
|
||||
#define TracyD3D12NewFrame(ctx) ctx->NewFrame();
|
||||
|
||||
#define TracyD3D12UnnamedZone ___tracy_gpu_d3d12_zone
|
||||
#define TracyD3D12SrcLocSymbol TracyConcat(__tracy_d3d12_source_location,TracyLine)
|
||||
#define TracyD3D12SrcLocObject(name, color) static constexpr tracy::SourceLocationData TracyD3D12SrcLocSymbol { name, TracyFunction, TracyFile, (uint32_t)TracyLine, color };
|
||||
|
||||
#if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK
|
||||
# define TracyD3D12Zone(ctx, cmdList, name) TracyD3D12NamedZoneS(ctx, ___tracy_gpu_zone, cmdList, name, TRACY_CALLSTACK, true)
|
||||
# define TracyD3D12ZoneC(ctx, cmdList, name, color) TracyD3D12NamedZoneCS(ctx, ___tracy_gpu_zone, cmdList, name, color, TRACY_CALLSTACK, true)
|
||||
# define TracyD3D12NamedZone(ctx, varname, cmdList, name, active) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location, TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::D3D12ZoneScope varname{ ctx, cmdList, &TracyConcat(__tracy_gpu_source_location, TracyLine), TRACY_CALLSTACK, active };
|
||||
# define TracyD3D12NamedZoneC(ctx, varname, cmdList, name, color, active) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location, TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::D3D12ZoneScope varname{ ctx, cmdList, &TracyConcat(__tracy_gpu_source_location, TracyLine), TRACY_CALLSTACK, active };
|
||||
# define TracyD3D12Zone(ctx, cmdList, name) TracyD3D12NamedZoneS(ctx, TracyD3D12UnnamedZone, cmdList, name, TRACY_CALLSTACK, true)
|
||||
# define TracyD3D12ZoneC(ctx, cmdList, name, color) TracyD3D12NamedZoneCS(ctx, TracyD3D12UnnamedZone, cmdList, name, color, TRACY_CALLSTACK, true)
|
||||
# define TracyD3D12NamedZone(ctx, varname, cmdList, name, active) TracyD3D12SrcLocObject(name, 0); tracy::D3D12ZoneScope varname{ ctx, cmdList, &TracyD3D12SrcLocSymbol, TRACY_CALLSTACK, active };
|
||||
# define TracyD3D12NamedZoneC(ctx, varname, cmdList, name, color, active) TracyD3D12SrcLocObject(name, color); tracy::D3D12ZoneScope varname{ ctx, cmdList, &TracyD3D12SrcLocSymbol, TRACY_CALLSTACK, active };
|
||||
# define TracyD3D12ZoneTransient(ctx, varname, cmdList, name, active) TracyD3D12ZoneTransientS(ctx, varname, cmdList, name, TRACY_CALLSTACK, active)
|
||||
#else
|
||||
# define TracyD3D12Zone(ctx, cmdList, name) TracyD3D12NamedZone(ctx, ___tracy_gpu_zone, cmdList, name, true)
|
||||
# define TracyD3D12ZoneC(ctx, cmdList, name, color) TracyD3D12NamedZoneC(ctx, ___tracy_gpu_zone, cmdList, name, color, true)
|
||||
# define TracyD3D12NamedZone(ctx, varname, cmdList, name, active) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location, TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::D3D12ZoneScope varname{ ctx, cmdList, &TracyConcat(__tracy_gpu_source_location, TracyLine), active };
|
||||
# define TracyD3D12NamedZoneC(ctx, varname, cmdList, name, color, active) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location, TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::D3D12ZoneScope varname{ ctx, cmdList, &TracyConcat(__tracy_gpu_source_location, TracyLine), active };
|
||||
# define TracyD3D12Zone(ctx, cmdList, name) TracyD3D12NamedZone(ctx, TracyD3D12UnnamedZone, cmdList, name, true)
|
||||
# define TracyD3D12ZoneC(ctx, cmdList, name, color) TracyD3D12NamedZoneC(ctx, TracyD3D12UnnamedZone, cmdList, name, color, true)
|
||||
# define TracyD3D12NamedZone(ctx, varname, cmdList, name, active) TracyD3D12SrcLocObject(name, 0); tracy::D3D12ZoneScope varname{ ctx, cmdList, &TracyD3D12SrcLocSymbol, active };
|
||||
# define TracyD3D12NamedZoneC(ctx, varname, cmdList, name, color, active) TracyD3D12SrcLocObject(name, color); tracy::D3D12ZoneScope varname{ ctx, cmdList, &TracyD3D12SrcLocSymbol, active };
|
||||
# define TracyD3D12ZoneTransient(ctx, varname, cmdList, name, active) tracy::D3D12ZoneScope varname{ ctx, TracyLine, TracyFile, strlen(TracyFile), TracyFunction, strlen(TracyFunction), name, strlen(name), cmdList, active };
|
||||
#endif
|
||||
|
||||
#ifdef TRACY_HAS_CALLSTACK
|
||||
# define TracyD3D12ZoneS(ctx, cmdList, name, depth) TracyD3D12NamedZoneS(ctx, ___tracy_gpu_zone, cmdList, name, depth, true)
|
||||
# define TracyD3D12ZoneCS(ctx, cmdList, name, color, depth) TracyD3D12NamedZoneCS(ctx, ___tracy_gpu_zone, cmdList, name, color, depth, true)
|
||||
# define TracyD3D12NamedZoneS(ctx, varname, cmdList, name, depth, active) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location, TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::D3D12ZoneScope varname{ ctx, cmdList, &TracyConcat(__tracy_gpu_source_location, TracyLine), depth, active };
|
||||
# define TracyD3D12NamedZoneCS(ctx, varname, cmdList, name, color, depth, active) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location, TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::D3D12ZoneScope varname{ ctx, cmdList, &TracyConcat(__tracy_gpu_source_location, TracyLine), depth, active };
|
||||
# define TracyD3D12ZoneS(ctx, cmdList, name, depth) TracyD3D12NamedZoneS(ctx, TracyD3D12UnnamedZone, cmdList, name, depth, true)
|
||||
# define TracyD3D12ZoneCS(ctx, cmdList, name, color, depth) TracyD3D12NamedZoneCS(ctx, TracyD3D12UnnamedZone, cmdList, name, color, depth, true)
|
||||
# define TracyD3D12NamedZoneS(ctx, varname, cmdList, name, depth, active) TracyD3D12SrcLocObject(name, 0); tracy::D3D12ZoneScope varname{ ctx, cmdList, &TracyD3D12SrcLocSymbol, depth, active };
|
||||
# define TracyD3D12NamedZoneCS(ctx, varname, cmdList, name, color, depth, active) TracyD3D12SrcLocObject(name, color); tracy::D3D12ZoneScope varname{ ctx, cmdList, &TracyD3D12SrcLocSymbol, depth, active };
|
||||
# define TracyD3D12ZoneTransientS(ctx, varname, cmdList, name, depth, active) tracy::D3D12ZoneScope varname{ ctx, TracyLine, TracyFile, strlen(TracyFile), TracyFunction, strlen(TracyFunction), name, strlen(name), cmdList, depth, active };
|
||||
#else
|
||||
# define TracyD3D12ZoneS(ctx, cmdList, name, depth) TracyD3D12Zone(ctx, cmdList, name)
|
||||
|
|
|
|||
|
|
@ -173,10 +173,10 @@ static tracy_force_inline void SendLuaCallstack( lua_State* L, uint32_t depth )
|
|||
{
|
||||
const uint32_t line = dbg[i].currentline;
|
||||
memcpy( dst, &line, 4 ); dst += 4;
|
||||
assert( fsz[i] <= std::numeric_limits<uint16_t>::max() );
|
||||
assert( fsz[i] <= (std::numeric_limits<uint16_t>::max)() );
|
||||
memcpy( dst, fsz+i, 2 ); dst += 2;
|
||||
memcpy( dst, func[i], fsz[i] ); dst += fsz[i];
|
||||
assert( ssz[i] <= std::numeric_limits<uint16_t>::max() );
|
||||
assert( ssz[i] <= (std::numeric_limits<uint16_t>::max)() );
|
||||
memcpy( dst, ssz+i, 2 ); dst += 2;
|
||||
memcpy( dst, dbg[i].source, ssz[i] ), dst += ssz[i];
|
||||
}
|
||||
|
|
@ -333,7 +333,7 @@ static inline int LuaZoneText( lua_State* L )
|
|||
|
||||
auto txt = lua_tostring( L, 1 );
|
||||
const auto size = strlen( txt );
|
||||
assert( size < std::numeric_limits<uint16_t>::max() );
|
||||
assert( size < (std::numeric_limits<uint16_t>::max)() );
|
||||
|
||||
auto ptr = (char*)tracy_malloc( size );
|
||||
memcpy( ptr, txt, size );
|
||||
|
|
@ -358,7 +358,7 @@ static inline int LuaZoneName( lua_State* L )
|
|||
|
||||
auto txt = lua_tostring( L, 1 );
|
||||
const auto size = strlen( txt );
|
||||
assert( size < std::numeric_limits<uint16_t>::max() );
|
||||
assert( size < (std::numeric_limits<uint16_t>::max)() );
|
||||
|
||||
auto ptr = (char*)tracy_malloc( size );
|
||||
memcpy( ptr, txt, size );
|
||||
|
|
@ -378,7 +378,7 @@ static inline int LuaMessage( lua_State* L )
|
|||
|
||||
auto txt = lua_tostring( L, 1 );
|
||||
const auto size = strlen( txt );
|
||||
assert( size < std::numeric_limits<uint16_t>::max() );
|
||||
assert( size < (std::numeric_limits<uint16_t>::max)() );
|
||||
|
||||
auto ptr = (char*)tracy_malloc( size );
|
||||
memcpy( ptr, txt, size );
|
||||
|
|
|
|||
417
thirdparty/tracy/include/tracy/tracy/TracyVulkan.hpp
vendored
417
thirdparty/tracy/include/tracy/tracy/TracyVulkan.hpp
vendored
|
|
@ -5,6 +5,9 @@
|
|||
|
||||
#define TracyVkContext(x,y,z,w) nullptr
|
||||
#define TracyVkContextCalibrated(x,y,z,w,a,b) nullptr
|
||||
#if defined VK_EXT_host_query_reset
|
||||
#define TracyVkContextHostCalibrated(x,y,z,w,a) nullptr
|
||||
#endif
|
||||
#define TracyVkDestroy(x)
|
||||
#define TracyVkContextName(c,x,y)
|
||||
#define TracyVkNamedZone(c,x,y,z,w)
|
||||
|
|
@ -39,9 +42,47 @@ using TracyVkCtx = void*;
|
|||
#include "../client/TracyProfiler.hpp"
|
||||
#include "../client/TracyCallstack.hpp"
|
||||
|
||||
#include <atomic>
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
#if defined TRACY_VK_USE_SYMBOL_TABLE
|
||||
#define LoadVkDeviceCoreSymbols(Operation) \
|
||||
Operation(vkBeginCommandBuffer) \
|
||||
Operation(vkCmdResetQueryPool) \
|
||||
Operation(vkCmdWriteTimestamp) \
|
||||
Operation(vkCreateQueryPool) \
|
||||
Operation(vkDestroyQueryPool) \
|
||||
Operation(vkEndCommandBuffer) \
|
||||
Operation(vkGetQueryPoolResults) \
|
||||
Operation(vkQueueSubmit) \
|
||||
Operation(vkQueueWaitIdle) \
|
||||
Operation(vkResetQueryPool)
|
||||
|
||||
#define LoadVkDeviceExtensionSymbols(Operation) \
|
||||
Operation(vkGetCalibratedTimestampsEXT) \
|
||||
Operation(vkGetPhysicalDeviceCalibrateableTimeDomainsEXT)
|
||||
|
||||
#define LoadVkInstanceCoreSymbols(Operation) \
|
||||
Operation(vkGetPhysicalDeviceProperties)
|
||||
|
||||
struct VkSymbolTable
|
||||
{
|
||||
#define MAKE_PFN(name) PFN_##name name;
|
||||
LoadVkDeviceCoreSymbols(MAKE_PFN)
|
||||
LoadVkDeviceExtensionSymbols(MAKE_PFN)
|
||||
LoadVkInstanceCoreSymbols(MAKE_PFN)
|
||||
#undef MAKE_PFN
|
||||
};
|
||||
|
||||
#define VK_FUNCTION_WRAPPER(callSignature) m_symbols.callSignature
|
||||
#define CONTEXT_VK_FUNCTION_WRAPPER(callSignature) m_ctx->m_symbols.callSignature
|
||||
#else
|
||||
#define VK_FUNCTION_WRAPPER(callSignature) callSignature
|
||||
#define CONTEXT_VK_FUNCTION_WRAPPER(callSignature) callSignature
|
||||
#endif
|
||||
|
||||
class VkCtx
|
||||
{
|
||||
friend class VkCtxScope;
|
||||
|
|
@ -49,7 +90,11 @@ class VkCtx
|
|||
enum { QueryCount = 64 * 1024 };
|
||||
|
||||
public:
|
||||
VkCtx( VkPhysicalDevice physdev, VkDevice device, VkQueue queue, VkCommandBuffer cmdbuf, PFN_vkGetPhysicalDeviceCalibrateableTimeDomainsEXT _vkGetPhysicalDeviceCalibrateableTimeDomainsEXT, PFN_vkGetCalibratedTimestampsEXT _vkGetCalibratedTimestampsEXT )
|
||||
#if defined TRACY_VK_USE_SYMBOL_TABLE
|
||||
VkCtx( VkInstance instance, VkPhysicalDevice physdev, VkDevice device, VkQueue queue, VkCommandBuffer cmdbuf, PFN_vkGetInstanceProcAddr instanceProcAddr, PFN_vkGetDeviceProcAddr deviceProcAddr, bool calibrated )
|
||||
#else
|
||||
VkCtx( VkPhysicalDevice physdev, VkDevice device, VkQueue queue, VkCommandBuffer cmdbuf, PFN_vkGetPhysicalDeviceCalibrateableTimeDomainsEXT vkGetPhysicalDeviceCalibrateableTimeDomainsEXT, PFN_vkGetCalibratedTimestampsEXT vkGetCalibratedTimestampsEXT)
|
||||
#endif
|
||||
: m_device( device )
|
||||
, m_timeDomain( VK_TIME_DOMAIN_DEVICE_EXT )
|
||||
, m_context( GetGpuCtxCounter().fetch_add( 1, std::memory_order_relaxed ) )
|
||||
|
|
@ -57,47 +102,28 @@ public:
|
|||
, m_tail( 0 )
|
||||
, m_oldCnt( 0 )
|
||||
, m_queryCount( QueryCount )
|
||||
, m_vkGetCalibratedTimestampsEXT( _vkGetCalibratedTimestampsEXT )
|
||||
#if !defined TRACY_VK_USE_SYMBOL_TABLE
|
||||
, m_vkGetCalibratedTimestampsEXT( vkGetCalibratedTimestampsEXT )
|
||||
#endif
|
||||
{
|
||||
assert( m_context != 255 );
|
||||
|
||||
if( _vkGetPhysicalDeviceCalibrateableTimeDomainsEXT && _vkGetCalibratedTimestampsEXT )
|
||||
#if defined TRACY_VK_USE_SYMBOL_TABLE
|
||||
PopulateSymbolTable(instance, instanceProcAddr, deviceProcAddr);
|
||||
if ( calibrated )
|
||||
{
|
||||
uint32_t num;
|
||||
_vkGetPhysicalDeviceCalibrateableTimeDomainsEXT( physdev, &num, nullptr );
|
||||
if( num > 4 ) num = 4;
|
||||
VkTimeDomainEXT data[4];
|
||||
_vkGetPhysicalDeviceCalibrateableTimeDomainsEXT( physdev, &num, data );
|
||||
VkTimeDomainEXT supportedDomain = (VkTimeDomainEXT)-1;
|
||||
#if defined _WIN32
|
||||
supportedDomain = VK_TIME_DOMAIN_QUERY_PERFORMANCE_COUNTER_EXT;
|
||||
#elif defined __linux__ && defined CLOCK_MONOTONIC_RAW
|
||||
supportedDomain = VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT;
|
||||
m_vkGetCalibratedTimestampsEXT = m_symbols.vkGetCalibratedTimestampsEXT;
|
||||
}
|
||||
|
||||
#endif
|
||||
for( uint32_t i=0; i<num; i++ )
|
||||
{
|
||||
if( data[i] == supportedDomain )
|
||||
{
|
||||
m_timeDomain = data[i];
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
VkPhysicalDeviceProperties prop;
|
||||
vkGetPhysicalDeviceProperties( physdev, &prop );
|
||||
const float period = prop.limits.timestampPeriod;
|
||||
|
||||
VkQueryPoolCreateInfo poolInfo = {};
|
||||
poolInfo.sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO;
|
||||
poolInfo.queryCount = m_queryCount;
|
||||
poolInfo.queryType = VK_QUERY_TYPE_TIMESTAMP;
|
||||
while( vkCreateQueryPool( device, &poolInfo, nullptr, &m_query ) != VK_SUCCESS )
|
||||
if( VK_FUNCTION_WRAPPER( vkGetPhysicalDeviceCalibrateableTimeDomainsEXT ) && m_vkGetCalibratedTimestampsEXT )
|
||||
{
|
||||
m_queryCount /= 2;
|
||||
poolInfo.queryCount = m_queryCount;
|
||||
FindAvailableTimeDomains( physdev, VK_FUNCTION_WRAPPER( vkGetPhysicalDeviceCalibrateableTimeDomainsEXT ) );
|
||||
}
|
||||
|
||||
CreateQueryPool();
|
||||
|
||||
VkCommandBufferBeginInfo beginInfo = {};
|
||||
beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
|
||||
beginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
|
||||
|
|
@ -107,87 +133,96 @@ public:
|
|||
submitInfo.commandBufferCount = 1;
|
||||
submitInfo.pCommandBuffers = &cmdbuf;
|
||||
|
||||
vkBeginCommandBuffer( cmdbuf, &beginInfo );
|
||||
vkCmdResetQueryPool( cmdbuf, m_query, 0, m_queryCount );
|
||||
vkEndCommandBuffer( cmdbuf );
|
||||
vkQueueSubmit( queue, 1, &submitInfo, VK_NULL_HANDLE );
|
||||
vkQueueWaitIdle( queue );
|
||||
VK_FUNCTION_WRAPPER( vkBeginCommandBuffer( cmdbuf, &beginInfo ) );
|
||||
VK_FUNCTION_WRAPPER( vkCmdResetQueryPool( cmdbuf, m_query, 0, m_queryCount ) );
|
||||
VK_FUNCTION_WRAPPER( vkEndCommandBuffer( cmdbuf ) );
|
||||
VK_FUNCTION_WRAPPER( vkQueueSubmit( queue, 1, &submitInfo, VK_NULL_HANDLE ) );
|
||||
VK_FUNCTION_WRAPPER( vkQueueWaitIdle( queue ) );
|
||||
|
||||
int64_t tcpu, tgpu;
|
||||
if( m_timeDomain == VK_TIME_DOMAIN_DEVICE_EXT )
|
||||
{
|
||||
vkBeginCommandBuffer( cmdbuf, &beginInfo );
|
||||
vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, m_query, 0 );
|
||||
vkEndCommandBuffer( cmdbuf );
|
||||
vkQueueSubmit( queue, 1, &submitInfo, VK_NULL_HANDLE );
|
||||
vkQueueWaitIdle( queue );
|
||||
VK_FUNCTION_WRAPPER( vkBeginCommandBuffer( cmdbuf, &beginInfo ) );
|
||||
VK_FUNCTION_WRAPPER( vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, m_query, 0 ) );
|
||||
VK_FUNCTION_WRAPPER( vkEndCommandBuffer( cmdbuf ) );
|
||||
VK_FUNCTION_WRAPPER( vkQueueSubmit( queue, 1, &submitInfo, VK_NULL_HANDLE ) );
|
||||
VK_FUNCTION_WRAPPER( vkQueueWaitIdle( queue ) );
|
||||
|
||||
tcpu = Profiler::GetTime();
|
||||
vkGetQueryPoolResults( device, m_query, 0, 1, sizeof( tgpu ), &tgpu, sizeof( tgpu ), VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT );
|
||||
VK_FUNCTION_WRAPPER( vkGetQueryPoolResults( device, m_query, 0, 1, sizeof( tgpu ), &tgpu, sizeof( tgpu ), VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT ) );
|
||||
|
||||
vkBeginCommandBuffer( cmdbuf, &beginInfo );
|
||||
vkCmdResetQueryPool( cmdbuf, m_query, 0, 1 );
|
||||
vkEndCommandBuffer( cmdbuf );
|
||||
vkQueueSubmit( queue, 1, &submitInfo, VK_NULL_HANDLE );
|
||||
vkQueueWaitIdle( queue );
|
||||
VK_FUNCTION_WRAPPER( vkBeginCommandBuffer( cmdbuf, &beginInfo ) );
|
||||
VK_FUNCTION_WRAPPER( vkCmdResetQueryPool( cmdbuf, m_query, 0, 1 ) );
|
||||
VK_FUNCTION_WRAPPER( vkEndCommandBuffer( cmdbuf ) );
|
||||
VK_FUNCTION_WRAPPER( vkQueueSubmit( queue, 1, &submitInfo, VK_NULL_HANDLE ) );
|
||||
VK_FUNCTION_WRAPPER( vkQueueWaitIdle( queue ) );
|
||||
}
|
||||
else
|
||||
{
|
||||
enum { NumProbes = 32 };
|
||||
|
||||
VkCalibratedTimestampInfoEXT spec[2] = {
|
||||
{ VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT, nullptr, VK_TIME_DOMAIN_DEVICE_EXT },
|
||||
{ VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT, nullptr, m_timeDomain },
|
||||
};
|
||||
uint64_t ts[2];
|
||||
uint64_t deviation[NumProbes];
|
||||
for( int i=0; i<NumProbes; i++ )
|
||||
{
|
||||
_vkGetCalibratedTimestampsEXT( device, 2, spec, ts, deviation+i );
|
||||
}
|
||||
uint64_t minDeviation = deviation[0];
|
||||
for( int i=1; i<NumProbes; i++ )
|
||||
{
|
||||
if( minDeviation > deviation[i] )
|
||||
{
|
||||
minDeviation = deviation[i];
|
||||
}
|
||||
}
|
||||
m_deviation = minDeviation * 3 / 2;
|
||||
|
||||
#if defined _WIN32
|
||||
m_qpcToNs = int64_t( 1000000000. / GetFrequencyQpc() );
|
||||
#endif
|
||||
|
||||
FindCalibratedTimestampDeviation();
|
||||
Calibrate( device, m_prevCalibration, tgpu );
|
||||
tcpu = Profiler::GetTime();
|
||||
}
|
||||
|
||||
uint8_t flags = 0;
|
||||
if( m_timeDomain != VK_TIME_DOMAIN_DEVICE_EXT ) flags |= GpuContextCalibration;
|
||||
|
||||
auto item = Profiler::QueueSerial();
|
||||
MemWrite( &item->hdr.type, QueueType::GpuNewContext );
|
||||
MemWrite( &item->gpuNewContext.cpuTime, tcpu );
|
||||
MemWrite( &item->gpuNewContext.gpuTime, tgpu );
|
||||
memset( &item->gpuNewContext.thread, 0, sizeof( item->gpuNewContext.thread ) );
|
||||
MemWrite( &item->gpuNewContext.period, period );
|
||||
MemWrite( &item->gpuNewContext.context, m_context );
|
||||
MemWrite( &item->gpuNewContext.flags, flags );
|
||||
MemWrite( &item->gpuNewContext.type, GpuContextType::Vulkan );
|
||||
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
GetProfiler().DeferItem( *item );
|
||||
#endif
|
||||
Profiler::QueueSerialFinish();
|
||||
WriteInitialItem( physdev, tcpu, tgpu );
|
||||
|
||||
m_res = (int64_t*)tracy_malloc( sizeof( int64_t ) * m_queryCount );
|
||||
}
|
||||
|
||||
#if defined VK_EXT_host_query_reset
|
||||
/**
|
||||
* This alternative constructor does not use command buffers and instead uses functionality from
|
||||
* VK_EXT_host_query_reset (core with 1.2 and non-optional) and VK_EXT_calibrated_timestamps. This requires
|
||||
* the physical device to have another time domain apart from DEVICE to be calibrateable.
|
||||
*/
|
||||
#if defined TRACY_VK_USE_SYMBOL_TABLE
|
||||
VkCtx( VkInstance instance, VkPhysicalDevice physdev, VkDevice device, PFN_vkGetInstanceProcAddr instanceProcAddr, PFN_vkGetDeviceProcAddr deviceProcAddr )
|
||||
#else
|
||||
VkCtx( VkPhysicalDevice physdev, VkDevice device, PFN_vkResetQueryPoolEXT vkResetQueryPool, PFN_vkGetPhysicalDeviceCalibrateableTimeDomainsEXT vkGetPhysicalDeviceCalibrateableTimeDomainsEXT, PFN_vkGetCalibratedTimestampsEXT vkGetCalibratedTimestampsEXT )
|
||||
#endif
|
||||
: m_device( device )
|
||||
, m_timeDomain( VK_TIME_DOMAIN_DEVICE_EXT )
|
||||
, m_context( GetGpuCtxCounter().fetch_add(1, std::memory_order_relaxed) )
|
||||
, m_head( 0 )
|
||||
, m_tail( 0 )
|
||||
, m_oldCnt( 0 )
|
||||
, m_queryCount( QueryCount )
|
||||
#if !defined TRACY_VK_USE_SYMBOL_TABLE
|
||||
, m_vkGetCalibratedTimestampsEXT( vkGetCalibratedTimestampsEXT )
|
||||
#endif
|
||||
{
|
||||
assert( m_context != 255);
|
||||
|
||||
#if defined TRACY_VK_USE_SYMBOL_TABLE
|
||||
PopulateSymbolTable(instance, instanceProcAddr, deviceProcAddr);
|
||||
m_vkGetCalibratedTimestampsEXT = m_symbols.vkGetCalibratedTimestampsEXT;
|
||||
#endif
|
||||
|
||||
assert( VK_FUNCTION_WRAPPER( vkResetQueryPool ) != nullptr );
|
||||
assert( VK_FUNCTION_WRAPPER( vkGetPhysicalDeviceCalibrateableTimeDomainsEXT ) != nullptr );
|
||||
assert( VK_FUNCTION_WRAPPER( vkGetCalibratedTimestampsEXT ) != nullptr );
|
||||
|
||||
FindAvailableTimeDomains( physdev, VK_FUNCTION_WRAPPER( vkGetPhysicalDeviceCalibrateableTimeDomainsEXT ) );
|
||||
|
||||
// We require a host time domain to be available to properly calibrate.
|
||||
FindCalibratedTimestampDeviation();
|
||||
int64_t tgpu;
|
||||
Calibrate( device, m_prevCalibration, tgpu );
|
||||
int64_t tcpu = Profiler::GetTime();
|
||||
|
||||
CreateQueryPool();
|
||||
VK_FUNCTION_WRAPPER( vkResetQueryPool( device, m_query, 0, m_queryCount ) );
|
||||
|
||||
WriteInitialItem( physdev, tcpu, tgpu );
|
||||
|
||||
m_res = (int64_t*)tracy_malloc( sizeof( int64_t ) * m_queryCount );
|
||||
}
|
||||
#endif
|
||||
|
||||
~VkCtx()
|
||||
{
|
||||
tracy_free( m_res );
|
||||
vkDestroyQueryPool( m_device, m_query, nullptr );
|
||||
VK_FUNCTION_WRAPPER( vkDestroyQueryPool( m_device, m_query, nullptr ) );
|
||||
}
|
||||
|
||||
void Name( const char* name, uint16_t len )
|
||||
|
|
@ -210,18 +245,23 @@ public:
|
|||
{
|
||||
ZoneScopedC( Color::Red4 );
|
||||
|
||||
if( m_tail == m_head ) return;
|
||||
const uint64_t head = m_head.load(std::memory_order_relaxed);
|
||||
if( m_tail == head ) return;
|
||||
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
if( !GetProfiler().IsConnected() )
|
||||
{
|
||||
vkCmdResetQueryPool( cmdbuf, m_query, 0, m_queryCount );
|
||||
m_head = m_tail = m_oldCnt = 0;
|
||||
VK_FUNCTION_WRAPPER( vkCmdResetQueryPool( cmdbuf, m_query, 0, m_queryCount ) );
|
||||
m_tail = head;
|
||||
m_oldCnt = 0;
|
||||
int64_t tgpu;
|
||||
if( m_timeDomain != VK_TIME_DOMAIN_DEVICE_EXT ) Calibrate( m_device, m_prevCalibration, tgpu );
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
assert( head > m_tail );
|
||||
|
||||
const unsigned int wrappedTail = (unsigned int)( m_tail % m_queryCount );
|
||||
|
||||
unsigned int cnt;
|
||||
if( m_oldCnt != 0 )
|
||||
|
|
@ -231,10 +271,16 @@ public:
|
|||
}
|
||||
else
|
||||
{
|
||||
cnt = m_head < m_tail ? m_queryCount - m_tail : m_head - m_tail;
|
||||
cnt = (unsigned int)( head - m_tail );
|
||||
assert( cnt <= m_queryCount );
|
||||
if( wrappedTail + cnt > m_queryCount )
|
||||
{
|
||||
cnt = m_queryCount - wrappedTail;
|
||||
}
|
||||
}
|
||||
|
||||
if( vkGetQueryPoolResults( m_device, m_query, m_tail, cnt, sizeof( int64_t ) * m_queryCount, m_res, sizeof( int64_t ), VK_QUERY_RESULT_64_BIT ) == VK_NOT_READY )
|
||||
|
||||
if( VK_FUNCTION_WRAPPER( vkGetQueryPoolResults( m_device, m_query, wrappedTail, cnt, sizeof( int64_t ) * m_queryCount, m_res, sizeof( int64_t ), VK_QUERY_RESULT_64_BIT ) == VK_NOT_READY ) )
|
||||
{
|
||||
m_oldCnt = cnt;
|
||||
return;
|
||||
|
|
@ -245,7 +291,7 @@ public:
|
|||
auto item = Profiler::QueueSerial();
|
||||
MemWrite( &item->hdr.type, QueueType::GpuTime );
|
||||
MemWrite( &item->gpuTime.gpuTime, m_res[idx] );
|
||||
MemWrite( &item->gpuTime.queryId, uint16_t( m_tail + idx ) );
|
||||
MemWrite( &item->gpuTime.queryId, uint16_t( wrappedTail + idx ) );
|
||||
MemWrite( &item->gpuTime.context, m_context );
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
|
@ -269,19 +315,16 @@ public:
|
|||
}
|
||||
}
|
||||
|
||||
vkCmdResetQueryPool( cmdbuf, m_query, m_tail, cnt );
|
||||
VK_FUNCTION_WRAPPER( vkCmdResetQueryPool( cmdbuf, m_query, wrappedTail, cnt ) );
|
||||
|
||||
m_tail += cnt;
|
||||
if( m_tail == m_queryCount ) m_tail = 0;
|
||||
}
|
||||
|
||||
private:
|
||||
tracy_force_inline unsigned int NextQueryId()
|
||||
{
|
||||
const auto id = m_head;
|
||||
m_head = ( m_head + 1 ) % m_queryCount;
|
||||
assert( m_head != m_tail );
|
||||
return id;
|
||||
const uint64_t id = m_head.fetch_add(1, std::memory_order_relaxed);
|
||||
return id % m_queryCount;
|
||||
}
|
||||
|
||||
tracy_force_inline uint8_t GetId() const
|
||||
|
|
@ -315,16 +358,126 @@ private:
|
|||
#endif
|
||||
}
|
||||
|
||||
tracy_force_inline void CreateQueryPool()
|
||||
{
|
||||
VkQueryPoolCreateInfo poolInfo = {};
|
||||
poolInfo.sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO;
|
||||
poolInfo.queryCount = m_queryCount;
|
||||
poolInfo.queryType = VK_QUERY_TYPE_TIMESTAMP;
|
||||
while ( VK_FUNCTION_WRAPPER( vkCreateQueryPool( m_device, &poolInfo, nullptr, &m_query ) != VK_SUCCESS ) )
|
||||
{
|
||||
m_queryCount /= 2;
|
||||
poolInfo.queryCount = m_queryCount;
|
||||
}
|
||||
}
|
||||
|
||||
tracy_force_inline void FindAvailableTimeDomains( VkPhysicalDevice physicalDevice, PFN_vkGetPhysicalDeviceCalibrateableTimeDomainsEXT _vkGetPhysicalDeviceCalibrateableTimeDomainsEXT )
|
||||
{
|
||||
uint32_t num;
|
||||
_vkGetPhysicalDeviceCalibrateableTimeDomainsEXT( physicalDevice, &num, nullptr );
|
||||
if(num > 4) num = 4;
|
||||
VkTimeDomainEXT data[4];
|
||||
_vkGetPhysicalDeviceCalibrateableTimeDomainsEXT( physicalDevice, &num, data );
|
||||
VkTimeDomainEXT supportedDomain = (VkTimeDomainEXT)-1;
|
||||
#if defined _WIN32
|
||||
supportedDomain = VK_TIME_DOMAIN_QUERY_PERFORMANCE_COUNTER_EXT;
|
||||
#elif defined __linux__ && defined CLOCK_MONOTONIC_RAW
|
||||
supportedDomain = VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT;
|
||||
#endif
|
||||
for( uint32_t i=0; i<num; i++ ) {
|
||||
if(data[i] == supportedDomain) {
|
||||
m_timeDomain = data[i];
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
tracy_force_inline void FindCalibratedTimestampDeviation()
|
||||
{
|
||||
assert( m_timeDomain != VK_TIME_DOMAIN_DEVICE_EXT );
|
||||
constexpr size_t NumProbes = 32;
|
||||
VkCalibratedTimestampInfoEXT spec[2] = {
|
||||
{ VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT, nullptr, VK_TIME_DOMAIN_DEVICE_EXT },
|
||||
{ VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT, nullptr, m_timeDomain },
|
||||
};
|
||||
uint64_t ts[2];
|
||||
uint64_t deviation[NumProbes];
|
||||
for( int i=0; i<NumProbes; i++ ) {
|
||||
m_vkGetCalibratedTimestampsEXT( m_device, 2, spec, ts, deviation + i );
|
||||
}
|
||||
uint64_t minDeviation = deviation[0];
|
||||
for( int i=1; i<NumProbes; i++ ) {
|
||||
if ( minDeviation > deviation[i] ) {
|
||||
minDeviation = deviation[i];
|
||||
}
|
||||
}
|
||||
m_deviation = minDeviation * 3 / 2;
|
||||
|
||||
#if defined _WIN32
|
||||
m_qpcToNs = int64_t( 1000000000. / GetFrequencyQpc() );
|
||||
#endif
|
||||
}
|
||||
|
||||
tracy_force_inline void WriteInitialItem( VkPhysicalDevice physdev, int64_t tcpu, int64_t tgpu )
|
||||
{
|
||||
uint8_t flags = 0;
|
||||
if( m_timeDomain != VK_TIME_DOMAIN_DEVICE_EXT ) flags |= GpuContextCalibration;
|
||||
|
||||
VkPhysicalDeviceProperties prop;
|
||||
VK_FUNCTION_WRAPPER( vkGetPhysicalDeviceProperties( physdev, &prop ) );
|
||||
const float period = prop.limits.timestampPeriod;
|
||||
|
||||
auto item = Profiler::QueueSerial();
|
||||
MemWrite( &item->hdr.type, QueueType::GpuNewContext );
|
||||
MemWrite( &item->gpuNewContext.cpuTime, tcpu );
|
||||
MemWrite( &item->gpuNewContext.gpuTime, tgpu );
|
||||
memset( &item->gpuNewContext.thread, 0, sizeof( item->gpuNewContext.thread ) );
|
||||
MemWrite( &item->gpuNewContext.period, period );
|
||||
MemWrite( &item->gpuNewContext.context, m_context );
|
||||
MemWrite( &item->gpuNewContext.flags, flags );
|
||||
MemWrite( &item->gpuNewContext.type, GpuContextType::Vulkan );
|
||||
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
GetProfiler().DeferItem( *item );
|
||||
#endif
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
#if defined TRACY_VK_USE_SYMBOL_TABLE
|
||||
void PopulateSymbolTable( VkInstance instance, PFN_vkGetInstanceProcAddr instanceProcAddr, PFN_vkGetDeviceProcAddr deviceProcAddr )
|
||||
{
|
||||
#define VK_GET_DEVICE_SYMBOL( name ) \
|
||||
(PFN_##name)deviceProcAddr( m_device, #name );
|
||||
#define VK_LOAD_DEVICE_SYMBOL( name ) \
|
||||
m_symbols.name = VK_GET_DEVICE_SYMBOL( name );
|
||||
#define VK_GET_INSTANCE_SYMBOL( name ) \
|
||||
(PFN_##name)instanceProcAddr( instance, #name );
|
||||
#define VK_LOAD_INSTANCE_SYMBOL( name ) \
|
||||
m_symbols.name = VK_GET_INSTANCE_SYMBOL( name );
|
||||
|
||||
LoadVkDeviceCoreSymbols( VK_LOAD_DEVICE_SYMBOL )
|
||||
LoadVkDeviceExtensionSymbols( VK_LOAD_DEVICE_SYMBOL )
|
||||
LoadVkInstanceCoreSymbols( VK_LOAD_INSTANCE_SYMBOL )
|
||||
#undef VK_GET_DEVICE_SYMBOL
|
||||
#undef VK_LOAD_DEVICE_SYMBOL
|
||||
#undef VK_GET_INSTANCE_SYMBOL
|
||||
#undef VK_LOAD_INSTANCE_SYMBOL
|
||||
}
|
||||
#endif
|
||||
|
||||
VkDevice m_device;
|
||||
VkQueryPool m_query;
|
||||
VkTimeDomainEXT m_timeDomain;
|
||||
#if defined TRACY_VK_USE_SYMBOL_TABLE
|
||||
VkSymbolTable m_symbols;
|
||||
#endif
|
||||
uint64_t m_deviation;
|
||||
int64_t m_qpcToNs;
|
||||
int64_t m_prevCalibration;
|
||||
uint8_t m_context;
|
||||
|
||||
unsigned int m_head;
|
||||
unsigned int m_tail;
|
||||
std::atomic<uint64_t> m_head;
|
||||
uint64_t m_tail;
|
||||
unsigned int m_oldCnt;
|
||||
unsigned int m_queryCount;
|
||||
|
||||
|
|
@ -348,7 +501,7 @@ public:
|
|||
m_ctx = ctx;
|
||||
|
||||
const auto queryId = ctx->NextQueryId();
|
||||
vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, ctx->m_query, queryId );
|
||||
CONTEXT_VK_FUNCTION_WRAPPER( vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, ctx->m_query, queryId ) );
|
||||
|
||||
auto item = Profiler::QueueSerial();
|
||||
MemWrite( &item->hdr.type, QueueType::GpuZoneBeginSerial );
|
||||
|
|
@ -372,7 +525,7 @@ public:
|
|||
m_ctx = ctx;
|
||||
|
||||
const auto queryId = ctx->NextQueryId();
|
||||
vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, ctx->m_query, queryId );
|
||||
CONTEXT_VK_FUNCTION_WRAPPER( vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, ctx->m_query, queryId ) );
|
||||
|
||||
auto item = Profiler::QueueSerialCallstack( Callstack( depth ) );
|
||||
MemWrite( &item->hdr.type, QueueType::GpuZoneBeginCallstackSerial );
|
||||
|
|
@ -396,7 +549,7 @@ public:
|
|||
m_ctx = ctx;
|
||||
|
||||
const auto queryId = ctx->NextQueryId();
|
||||
vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, ctx->m_query, queryId );
|
||||
CONTEXT_VK_FUNCTION_WRAPPER( vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, ctx->m_query, queryId ) );
|
||||
|
||||
const auto srcloc = Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz );
|
||||
auto item = Profiler::QueueSerial();
|
||||
|
|
@ -421,7 +574,7 @@ public:
|
|||
m_ctx = ctx;
|
||||
|
||||
const auto queryId = ctx->NextQueryId();
|
||||
vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, ctx->m_query, queryId );
|
||||
CONTEXT_VK_FUNCTION_WRAPPER( vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, ctx->m_query, queryId ) );
|
||||
|
||||
const auto srcloc = Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz );
|
||||
auto item = Profiler::QueueSerialCallstack( Callstack( depth ) );
|
||||
|
|
@ -439,7 +592,7 @@ public:
|
|||
if( !m_active ) return;
|
||||
|
||||
const auto queryId = m_ctx->NextQueryId();
|
||||
vkCmdWriteTimestamp( m_cmdbuf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, m_ctx->m_query, queryId );
|
||||
CONTEXT_VK_FUNCTION_WRAPPER( vkCmdWriteTimestamp( m_cmdbuf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, m_ctx->m_query, queryId ) );
|
||||
|
||||
auto item = Profiler::QueueSerial();
|
||||
MemWrite( &item->hdr.type, QueueType::GpuZoneEndSerial );
|
||||
|
|
@ -457,13 +610,38 @@ private:
|
|||
VkCtx* m_ctx;
|
||||
};
|
||||
|
||||
#if defined TRACY_VK_USE_SYMBOL_TABLE
|
||||
static inline VkCtx* CreateVkContext( VkInstance instance, VkPhysicalDevice physdev, VkDevice device, VkQueue queue, VkCommandBuffer cmdbuf, PFN_vkGetInstanceProcAddr instanceProcAddr, PFN_vkGetDeviceProcAddr getDeviceProcAddr, bool calibrated = false )
|
||||
#else
|
||||
static inline VkCtx* CreateVkContext( VkPhysicalDevice physdev, VkDevice device, VkQueue queue, VkCommandBuffer cmdbuf, PFN_vkGetPhysicalDeviceCalibrateableTimeDomainsEXT gpdctd, PFN_vkGetCalibratedTimestampsEXT gct )
|
||||
#endif
|
||||
{
|
||||
auto ctx = (VkCtx*)tracy_malloc( sizeof( VkCtx ) );
|
||||
#if defined TRACY_VK_USE_SYMBOL_TABLE
|
||||
new(ctx) VkCtx( instance, physdev, device, queue, cmdbuf, instanceProcAddr, getDeviceProcAddr, calibrated );
|
||||
#else
|
||||
new(ctx) VkCtx( physdev, device, queue, cmdbuf, gpdctd, gct );
|
||||
#endif
|
||||
return ctx;
|
||||
}
|
||||
|
||||
#if defined VK_EXT_host_query_reset
|
||||
#if defined TRACY_VK_USE_SYMBOL_TABLE
|
||||
static inline VkCtx* CreateVkContext( VkInstance instance, VkPhysicalDevice physdev, VkDevice device, PFN_vkGetInstanceProcAddr instanceProcAddr, PFN_vkGetDeviceProcAddr getDeviceProcAddr )
|
||||
#else
|
||||
static inline VkCtx* CreateVkContext( VkPhysicalDevice physdev, VkDevice device, PFN_vkResetQueryPoolEXT qpreset, PFN_vkGetPhysicalDeviceCalibrateableTimeDomainsEXT gpdctd, PFN_vkGetCalibratedTimestampsEXT gct )
|
||||
#endif
|
||||
{
|
||||
auto ctx = (VkCtx*)tracy_malloc( sizeof( VkCtx ) );
|
||||
#if defined TRACY_VK_USE_SYMBOL_TABLE
|
||||
new(ctx) VkCtx( instance, physdev, device, instanceProcAddr, getDeviceProcAddr );
|
||||
#else
|
||||
new(ctx) VkCtx( physdev, device, qpreset, gpdctd, gct );
|
||||
#endif
|
||||
return ctx;
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline void DestroyVkContext( VkCtx* ctx )
|
||||
{
|
||||
ctx->~VkCtx();
|
||||
|
|
@ -474,8 +652,23 @@ static inline void DestroyVkContext( VkCtx* ctx )
|
|||
|
||||
using TracyVkCtx = tracy::VkCtx*;
|
||||
|
||||
#if defined TRACY_VK_USE_SYMBOL_TABLE
|
||||
#define TracyVkContext( instance, physdev, device, queue, cmdbuf, instanceProcAddr, deviceProcAddr ) tracy::CreateVkContext( instance, physdev, device, queue, cmdbuf, instanceProcAddr, deviceProcAddr );
|
||||
#else
|
||||
#define TracyVkContext( physdev, device, queue, cmdbuf ) tracy::CreateVkContext( physdev, device, queue, cmdbuf, nullptr, nullptr );
|
||||
#endif
|
||||
#if defined TRACY_VK_USE_SYMBOL_TABLE
|
||||
#define TracyVkContextCalibrated( instance, physdev, device, queue, cmdbuf, instanceProcAddr, deviceProcAddr ) tracy::CreateVkContext( instance, physdev, device, queue, cmdbuf, instanceProcAddr, deviceProcAddr, true );
|
||||
#else
|
||||
#define TracyVkContextCalibrated( physdev, device, queue, cmdbuf, gpdctd, gct ) tracy::CreateVkContext( physdev, device, queue, cmdbuf, gpdctd, gct );
|
||||
#endif
|
||||
#if defined VK_EXT_host_query_reset
|
||||
#if defined TRACY_VK_USE_SYMBOL_TABLE
|
||||
#define TracyVkContextHostCalibrated( instance, physdev, device, instanceProcAddr, deviceProcAddr ) tracy::CreateVkContext( instance, physdev, device, instanceProcAddr, deviceProcAddr );
|
||||
#else
|
||||
#define TracyVkContextHostCalibrated( physdev, device, qpreset, gpdctd, gct ) tracy::CreateVkContext( physdev, device, qpreset, gpdctd, gct );
|
||||
#endif
|
||||
#endif
|
||||
#define TracyVkDestroy( ctx ) tracy::DestroyVkContext( ctx );
|
||||
#define TracyVkContextName( ctx, name, size ) ctx->Name( name, size );
|
||||
#if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue