From c75f35be57cf21dccc710065ad25895008c1b312 Mon Sep 17 00:00:00 2001 From: Mr-Wiseguy Date: Sun, 10 Aug 2025 02:08:20 -0400 Subject: [PATCH] Initial tracy profiler support --- librecomp/CMakeLists.txt | 7 + librecomp/src/ai.cpp | 5 + librecomp/src/pi.cpp | 4 + librecomp/src/sp.cpp | 2 + ultramodern/CMakeLists.txt | 7 + .../include/ultramodern/ultramodern_tracy.hpp | 218 ++++++++++++++++++ ultramodern/src/events.cpp | 9 +- ultramodern/src/input.cpp | 4 + ultramodern/src/rsp.cpp | 2 + ultramodern/src/threads.cpp | 22 +- 10 files changed, 276 insertions(+), 4 deletions(-) create mode 100644 ultramodern/include/ultramodern/ultramodern_tracy.hpp diff --git a/librecomp/CMakeLists.txt b/librecomp/CMakeLists.txt index 9c4bf53..d78d1bc 100644 --- a/librecomp/CMakeLists.txt +++ b/librecomp/CMakeLists.txt @@ -5,6 +5,8 @@ set(CMAKE_CXX_STANDARD 20) set(CMAKE_CXX_STANDARD_REQUIRED True) set(CMAKE_CXX_EXTENSIONS OFF) +set(LIBRECOMP_TRACY_PATH "" CACHE STRING "Include path for Tracy library, if used.") + # Define the library add_library(librecomp STATIC "${CMAKE_CURRENT_SOURCE_DIR}/src/ai.cpp" @@ -52,6 +54,11 @@ target_compile_options(librecomp PRIVATE -Wno-unused-parameter ) +if(NOT LIBRECOMP_TRACY_PATH STREQUAL "") + target_compile_definitions(librecomp PRIVATE "TRACY_ENABLED") + target_include_directories(librecomp PRIVATE ${ULTRAMODERN_TRACY_PATH}) +endif() + if (WIN32) add_compile_definitions(NOMINMAX) endif() diff --git a/librecomp/src/ai.cpp b/librecomp/src/ai.cpp index 0195f2b..3ab2430 100644 --- a/librecomp/src/ai.cpp +++ b/librecomp/src/ai.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #define VI_NTSC_CLOCK 48681812 @@ -13,9 +14,13 @@ extern "C" void osAiSetFrequency_recomp(uint8_t* rdram, recomp_context* ctx) { //freq = VI_NTSC_CLOCK / dacRate; ctx->r2 = freq; ultramodern::set_audio_frequency(freq); + + std::string tracy_message = "Set AI Freq: " + std::to_string(freq); + TracyMessage(tracy_message.c_str(), tracy_message.size() + 1); } extern "C" void osAiSetNextBuffer_recomp(uint8_t* rdram, recomp_context* ctx) { + ZoneScoped; ultramodern::queue_audio_buffer(rdram, ctx->r4, ctx->r5); ctx->r2 = 0; } diff --git a/librecomp/src/pi.cpp b/librecomp/src/pi.cpp index d7ebd53..52c334a 100644 --- a/librecomp/src/pi.cpp +++ b/librecomp/src/pi.cpp @@ -10,6 +10,7 @@ #include "librecomp/files.hpp" #include #include +#include static std::vector rom; @@ -310,6 +311,7 @@ void do_dma(RDRAM_ARG PTR(OSMesgQueue) mq, gpr rdram_address, uint32_t physical_ } extern "C" void osPiStartDma_recomp(RDRAM_ARG recomp_context* ctx) { + ZoneScoped; uint32_t mb = ctx->r4; uint32_t pri = ctx->r5; uint32_t direction = ctx->r6; @@ -327,6 +329,7 @@ extern "C" void osPiStartDma_recomp(RDRAM_ARG recomp_context* ctx) { } extern "C" void osEPiStartDma_recomp(RDRAM_ARG recomp_context* ctx) { + ZoneScoped; OSPiHandle* handle = TO_PTR(OSPiHandle, ctx->r4); OSIoMesg* mb = TO_PTR(OSIoMesg, ctx->r5); uint32_t direction = ctx->r6; @@ -344,6 +347,7 @@ extern "C" void osEPiStartDma_recomp(RDRAM_ARG recomp_context* ctx) { } extern "C" void osEPiReadIo_recomp(RDRAM_ARG recomp_context * ctx) { + ZoneScoped; OSPiHandle* handle = TO_PTR(OSPiHandle, ctx->r4); uint32_t devAddr = handle->baseAddress | ctx->r5; gpr dramAddr = ctx->r6; diff --git a/librecomp/src/sp.cpp b/librecomp/src/sp.cpp index ae0f44f..21b71f1 100644 --- a/librecomp/src/sp.cpp +++ b/librecomp/src/sp.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include "recomp.h" extern "C" void osSpTaskLoad_recomp(uint8_t* rdram, recomp_context* ctx) { @@ -10,6 +11,7 @@ extern "C" void osSpTaskLoad_recomp(uint8_t* rdram, recomp_context* ctx) { bool dump_frame = false; extern "C" void osSpTaskStartGo_recomp(uint8_t* rdram, recomp_context* ctx) { + ZoneScoped; //printf("[sp] osSpTaskStartGo(0x%08X)\n", (uint32_t)ctx->r4); OSTask* task = TO_PTR(OSTask, ctx->r4); if (task->t.type == M_GFXTASK) { diff --git a/ultramodern/CMakeLists.txt b/ultramodern/CMakeLists.txt index 90ac35a..2a6e437 100644 --- a/ultramodern/CMakeLists.txt +++ b/ultramodern/CMakeLists.txt @@ -5,6 +5,8 @@ set(CMAKE_CXX_STANDARD 20) set(CMAKE_CXX_STANDARD_REQUIRED True) set(CMAKE_CXX_EXTENSIONS OFF) +set(ULTRAMODERN_TRACY_PATH "" CACHE STRING "Include path for Tracy library, if used.") + add_library(ultramodern STATIC "${CMAKE_CURRENT_SOURCE_DIR}/src/audio.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/src/error_handling.cpp" @@ -35,6 +37,11 @@ target_compile_options(ultramodern PRIVATE -Wno-unused-parameter ) +if(NOT ULTRAMODERN_TRACY_PATH STREQUAL "") + target_compile_definitions(ultramodern PRIVATE "TRACY_ENABLED") + target_include_directories(ultramodern PRIVATE ${ULTRAMODERN_TRACY_PATH}) +endif() + if (WIN32) add_compile_definitions(NOMINMAX) endif() diff --git a/ultramodern/include/ultramodern/ultramodern_tracy.hpp b/ultramodern/include/ultramodern/ultramodern_tracy.hpp new file mode 100644 index 0000000..7647186 --- /dev/null +++ b/ultramodern/include/ultramodern/ultramodern_tracy.hpp @@ -0,0 +1,218 @@ +#ifndef __ULTRAMODERN_TRACY_H__ +#define __ULTRAMODERN_TRACY_H__ + +#ifdef TRACY_ENABLED +# define TRACY_ENABLE +# include +# include + +namespace tracy { + void SetThreadName( const char* name ); + void SetThreadNameWithHint( const char* name, int32_t groupHint ); + const char* GetThreadName( uint32_t id ); + + const char* GetEnvVar( const char* name ); +} + +#define TracySetThreadName(name) tracy::SetThreadName(name) +#define TracySetThreadNameWithHint(name, groupHint) tracy::SetThreadNameWithHint(name, groupHint) + +#else + +// C++ API + +# define TracyNoop + +# define ZoneNamed(x,y) +# define ZoneNamedN(x,y,z) +# define ZoneNamedC(x,y,z) +# define ZoneNamedNC(x,y,z,w) + +# define ZoneTransient(x,y) +# define ZoneTransientN(x,y,z) + +# define ZoneScoped +# define ZoneScopedN(x) +# define ZoneScopedC(x) +# define ZoneScopedNC(x,y) + +# define ZoneText(x,y) +# define ZoneTextV(x,y,z) +# define ZoneTextF(x,...) +# define ZoneTextVF(x,y,...) +# define ZoneName(x,y) +# define ZoneNameV(x,y,z) +# define ZoneNameF(x,...) +# define ZoneNameVF(x,y,...) +# define ZoneColor(x) +# define ZoneColorV(x,y) +# define ZoneValue(x) +# define ZoneValueV(x,y) +# define ZoneIsActive false +# define ZoneIsActiveV(x) false + +# define FrameMark +# define FrameMarkNamed(x) +# define FrameMarkStart(x) +# define FrameMarkEnd(x) + +# define FrameImage(x,y,z,w,a) + +# define TracyLockable( type, varname ) type varname +# define TracyLockableN( type, varname, desc ) type varname +# define TracySharedLockable( type, varname ) type varname +# define TracySharedLockableN( type, varname, desc ) type varname +# define LockableBase( type ) type +# define SharedLockableBase( type ) type +# define LockMark(x) (void)x +# define LockableName(x,y,z) + +# define TracyPlot(x,y) +# define TracyPlotConfig(x,y,z,w,a) + +# define TracyMessage(x,y) +# define TracyMessageL(x) +# define TracyMessageC(x,y,z) +# define TracyMessageLC(x,y) +# define TracyAppInfo(x,y) + +# define TracyAlloc(x,y) +# define TracyFree(x) +# define TracySecureAlloc(x,y) +# define TracySecureFree(x) + +# define TracyAllocN(x,y,z) +# define TracyFreeN(x,y) +# define TracySecureAllocN(x,y,z) +# define TracySecureFreeN(x,y) + +# define ZoneNamedS(x,y,z) +# define ZoneNamedNS(x,y,z,w) +# define ZoneNamedCS(x,y,z,w) +# define ZoneNamedNCS(x,y,z,w,a) + +# define ZoneTransientS(x,y,z) +# define ZoneTransientNS(x,y,z,w) + +# define ZoneScopedS(x) +# define ZoneScopedNS(x,y) +# define ZoneScopedCS(x,y) +# define ZoneScopedNCS(x,y,z) + +# define TracyAllocS(x,y,z) +# define TracyFreeS(x,y) +# define TracySecureAllocS(x,y,z) +# define TracySecureFreeS(x,y) + +# define TracyAllocNS(x,y,z,w) +# define TracyFreeNS(x,y,z) +# define TracySecureAllocNS(x,y,z,w) +# define TracySecureFreeNS(x,y,z) + +# define TracyMessageS(x,y,z) +# define TracyMessageLS(x,y) +# define TracyMessageCS(x,y,z,w) +# define TracyMessageLCS(x,y,z) + +# define TracySourceCallbackRegister(x,y) +# define TracyParameterRegister(x,y) +# define TracyParameterSetup(x,y,z,w) +# define TracyIsConnected false +# define TracyIsStarted false +# define TracySetProgramName(x) + +# define TracyFiberEnter(x) +# define TracyFiberEnterHint(x,y) +# define TracyFiberLeave + +// C API + +typedef const void* TracyCZoneCtx; + +typedef const void* TracyCLockCtx; + +# define TracyCZone(c,x) +# define TracyCZoneN(c,x,y) +# define TracyCZoneC(c,x,y) +# define TracyCZoneNC(c,x,y,z) +# define TracyCZoneEnd(c) +# define TracyCZoneText(c,x,y) +# define TracyCZoneName(c,x,y) +# define TracyCZoneColor(c,x) +# define TracyCZoneValue(c,x) + +# define TracyCAlloc(x,y) +# define TracyCFree(x) +# define TracyCMemoryDiscard(x) +# define TracyCSecureAlloc(x,y) +# define TracyCSecureFree(x) +# define TracyCSecureMemoryDiscard(x) + +# define TracyCAllocN(x,y,z) +# define TracyCFreeN(x,y) +# define TracyCSecureAllocN(x,y,z) +# define TracyCSecureFreeN(x,y) + +# define TracyCFrameMark +# define TracyCFrameMarkNamed(x) +# define TracyCFrameMarkStart(x) +# define TracyCFrameMarkEnd(x) +# define TracyCFrameImage(x,y,z,w,a) + +# define TracyCPlot(x,y) +# define TracyCPlotF(x,y) +# define TracyCPlotI(x,y) +# define TracyCPlotConfig(x,y,z,w,a) + +# define TracyCMessage(x,y) +# define TracyCMessageL(x) +# define TracyCMessageC(x,y,z) +# define TracyCMessageLC(x,y) +# define TracyCAppInfo(x,y) + +# define TracyCZoneS(x,y,z) +# define TracyCZoneNS(x,y,z,w) +# define TracyCZoneCS(x,y,z,w) +# define TracyCZoneNCS(x,y,z,w,a) + +# define TracyCAllocS(x,y,z) +# define TracyCFreeS(x,y) +# define TracyCMemoryDiscardS(x,y) +# define TracyCSecureAllocS(x,y,z) +# define TracyCSecureFreeS(x,y) +# define TracyCSecureMemoryDiscardS(x,y) + +# define TracyCAllocNS(x,y,z,w) +# define TracyCFreeNS(x,y,z) +# define TracyCSecureAllocNS(x,y,z,w) +# define TracyCSecureFreeNS(x,y,z) + +# define TracyCMessageS(x,y,z) +# define TracyCMessageLS(x,y) +# define TracyCMessageCS(x,y,z,w) +# define TracyCMessageLCS(x,y,z) + +# define TracyCLockCtx(l) +# define TracyCLockAnnounce(l) +# define TracyCLockTerminate(l) +# define TracyCLockBeforeLock(l) +# define TracyCLockAfterLock(l) +# define TracyCLockAfterUnlock(l) +# define TracyCLockAfterTryLock(l,x) +# define TracyCLockMark(l) +# define TracyCLockCustomName(l,x,y) + +# define TracyCIsConnected 0 +# define TracyCIsStarted 0 + +# define TracyCFiberEnter(fiber) +# define TracyCFiberLeave + +// Other tracy functions + +#define TracySetThreadName(name) +#define TracySetThreadNameWithHint(name, groupHint) + +#endif + +#endif \ No newline at end of file diff --git a/ultramodern/src/events.cpp b/ultramodern/src/events.cpp index 46f63f1..6525c22 100644 --- a/ultramodern/src/events.cpp +++ b/ultramodern/src/events.cpp @@ -13,6 +13,7 @@ #include "ultramodern/ultra64.h" #include "ultramodern/ultramodern.hpp" +#include "ultramodern/ultramodern_tracy.hpp" #include "ultramodern/rsp.hpp" #include "ultramodern/renderer_context.hpp" @@ -231,6 +232,7 @@ void vi_thread_func() { ViState* cur_state = events_context.vi.get_cur_state(); if (remaining_retraces == 0) { if (cur_state->mq != NULLPTR) { + TracyMessageL("VI Event"); if (osSendMesg(PASS_RDRAM cur_state->mq, cur_state->msg, OS_MESG_NOBLOCK) == -1) { //printf("Game skipped a VI frame!\n"); } @@ -238,6 +240,7 @@ void vi_thread_func() { remaining_retraces = cur_state->retrace_count; } if (events_context.ai.mq != NULLPTR) { + TracyMessageL("AI Event"); if (osSendMesg(PASS_RDRAM events_context.ai.mq, events_context.ai.msg, OS_MESG_NOBLOCK) == -1) { //printf("Game skipped a AI frame!\n"); } @@ -367,7 +370,10 @@ void gfx_thread_func(uint8_t* rdram, moodycamel::LightweightSemaphore* thread_re ultramodern::measure_input_latency(); [[maybe_unused]] auto renderer_start = std::chrono::high_resolution_clock::now(); - renderer_context->send_dl(&task_action->task); + { + ZoneScopedN("Displaylist"); + renderer_context->send_dl(&task_action->task); + } [[maybe_unused]] auto renderer_end = std::chrono::high_resolution_clock::now(); dp_complete(); // printf("Renderer ProcessDList time: %d us\n", static_cast(std::chrono::duration_cast(renderer_end - renderer_start).count())); @@ -444,6 +450,7 @@ void set_dummy_vi(bool odd) { } extern "C" void osViSwapBuffer(RDRAM_ARG PTR(void) frameBufPtr) { + ZoneScoped; std::lock_guard lock{ events_context.message_mutex }; events_context.vi.get_next_state()->framebuffer = frameBufPtr; } diff --git a/ultramodern/src/input.cpp b/ultramodern/src/input.cpp index 03a7c6b..ccf1136 100644 --- a/ultramodern/src/input.cpp +++ b/ultramodern/src/input.cpp @@ -3,6 +3,7 @@ #include "ultramodern/input.hpp" #include "ultramodern/ultra64.h" #include "ultramodern/ultramodern.hpp" +#include "ultramodern/ultramodern_tracy.hpp" static ultramodern::input::callbacks_t input_callbacks {}; @@ -104,6 +105,7 @@ extern "C" s32 osContStartQuery(RDRAM_ARG PTR(OSMesgQueue) mq) { } extern "C" s32 osContStartReadData(RDRAM_ARG PTR(OSMesgQueue) mq) { + ZoneScoped; if (input_callbacks.poll_input != nullptr) { input_callbacks.poll_input(); } @@ -128,6 +130,7 @@ extern "C" void osContGetQuery(RDRAM_ARG PTR(OSContStatus) data_) { } extern "C" void osContGetReadData(OSContPad *data) { + ZoneScoped; for (int controller = 0; controller < max_controllers; controller++) { uint16_t buttons = 0; float x = 0.0f; @@ -168,6 +171,7 @@ s32 osMotorStart(RDRAM_ARG PTR(OSPfs) pfs) { } s32 __osMotorAccess(RDRAM_ARG PTR(OSPfs) pfs_, s32 flag) { + ZoneScoped; OSPfs *pfs = TO_PTR(OSPfs, pfs_); if (input_callbacks.set_rumble != nullptr) { diff --git a/ultramodern/src/rsp.cpp b/ultramodern/src/rsp.cpp index 1594304..9205377 100644 --- a/ultramodern/src/rsp.cpp +++ b/ultramodern/src/rsp.cpp @@ -2,6 +2,7 @@ #include #include "ultramodern/rsp.hpp" +#include "ultramodern/ultramodern_tracy.hpp" static ultramodern::rsp::callbacks_t rsp_callbacks {}; @@ -16,6 +17,7 @@ void ultramodern::rsp::init() { } bool ultramodern::rsp::run_task(RDRAM_ARG const OSTask* task) { + ZoneScoped; assert(rsp_callbacks.run_task != nullptr); return rsp_callbacks.run_task(PASS_RDRAM task); diff --git a/ultramodern/src/threads.cpp b/ultramodern/src/threads.cpp index 812c556..394bebf 100644 --- a/ultramodern/src/threads.cpp +++ b/ultramodern/src/threads.cpp @@ -5,6 +5,7 @@ #include "ultramodern/ultra64.h" #include "ultramodern/ultramodern.hpp" +#include "ultramodern/ultramodern_tracy.hpp" #include "blockingconcurrentqueue.h" #include "ultramodern/threads.hpp" @@ -146,8 +147,14 @@ void ultramodern::set_native_thread_name(const std::string& name) { void ultramodern::set_native_thread_priority(ThreadPriority pri) {} #endif -void wait_for_resumed(RDRAM_ARG UltraThreadContext* thread_context) { +void wait_for_resumed(RDRAM_ARG UltraThreadContext* thread_context, bool first_start = false) { + if (!first_start) { + // TracyMessageL("Pause"); + } thread_context->running.wait(); + if (!first_start) { + // TracyMessageL("Resume"); + } // If this thread's context was replaced by another thread or deleted, destroy it again from its own context. // This will trigger thread cleanup instead. if (TO_PTR(OSThread, ultramodern::this_thread())->context != thread_context) { @@ -189,9 +196,15 @@ static void _thread_func(RDRAM_ARG PTR(OSThread) self_, PTR(thread_func_t) entry is_game_thread = true; // Set the thread name - ultramodern::set_native_thread_name(ultramodern::threads::get_game_thread_name(self)); + std::string thread_name = ultramodern::threads::get_game_thread_name(self); + + // Copy the thread name into the fixed address buffer (for profiling). + ultramodern::set_native_thread_name(thread_name); ultramodern::set_native_thread_priority(ultramodern::ThreadPriority::High); + // Set the thread name in tracy and the thread group to 0x1064 to indicate a game thread. + TracySetThreadNameWithHint(thread_name.c_str(), 0x1064); + // Signal the initialized semaphore to indicate that this thread can be started. thread_context->initialized.signal(); @@ -199,10 +212,13 @@ static void _thread_func(RDRAM_ARG PTR(OSThread) self_, PTR(thread_func_t) entry // Wait until the thread is marked as running. try { - wait_for_resumed(PASS_RDRAM thread_context); + ZoneScopedN("Wait for Start"); + wait_for_resumed(PASS_RDRAM thread_context, true); } catch (ultramodern::thread_terminated& terminated) { } + TracyMessageL("Start"); + // Make sure the thread wasn't replaced or destroyed before it was started. if (self->context == thread_context) { debug_printf("[Thread] Thread started: %d\n", self->id);