From 1e4ede799b2bdd935e52257858a524b91881266c Mon Sep 17 00:00:00 2001 From: Isaac0-dev <62234577+Isaac0-dev@users.noreply.github.com> Date: Fri, 8 May 2026 20:57:06 +1000 Subject: [PATCH] massive performance improvements for add_scroll_target (#1219) Loading the scroll targets was probably the slowest parts to loading a romhack. The reason for this is that many romhacks can have thousands of calls to `add_scroll_target`. So, for 4,107 calls to `add_scroll_target`, the time went from ~4.5759 seconds to ~0.0173 seconds in total. Changes made: - Previously, simply finding the material data to scroll was rather slow due to using a linear sub string search across all vertices in all levels. To speed this up, I added a cache. The cache bypasses checking every level by storing the exact string (rather than the substring) in a hashmap, so lookups become a simple case of a string lookup as a key in the map. It falls back to the full lookup if the cache doesn't hit. - Changed the vertex buffer management in `scroll_targets.c` to behave closer to a modern dynamic array, where buffer size is doubled each time a new vertex buffer is added, to reduce the number of allocations performed. --- data/dynos_misc.cpp | 62 +++++++++++++++++++++++++++++++----- src/game/scroll_targets.c | 27 ++++++++-------- src/game/scroll_targets.h | 1 + src/pc/lua/smlua_functions.c | 5 ++- 4 files changed, 73 insertions(+), 22 deletions(-) diff --git a/data/dynos_misc.cpp b/data/dynos_misc.cpp index c13964351..e6fd48c95 100644 --- a/data/dynos_misc.cpp +++ b/data/dynos_misc.cpp @@ -1,3 +1,6 @@ +#include +#include +#include #include "dynos.cpp.h" extern "C" { #include "game/scroll_targets.h" @@ -7,16 +10,59 @@ extern "C" { // Scroll Targets // +static void DynOS_Add_Scroll_Target_Match(u32 index, const char* name, u32 offset, u32 size, DataNode* node) { + if (offset >= node->mSize) { return; } + u32 finalSize = (size > 0 && size <= (node->mSize - offset)) ? size : (node->mSize - offset); + add_vtx_scroll_target( + index, + &node->mData[offset], + finalSize, + offset > 0 + ); +} + void DynOS_Add_Scroll_Target(u32 index, const char* name, u32 offset, u32 size) { - for (auto& lvlPair : DynOS_Lvl_GetArray()) { - for (auto& node : lvlPair.second->mVertices) { + static std::unordered_multimap*> sVertexNodesExactMap; + static std::vector sLvlGfxDataCache; // cache existing level pointers to know when to rebuild + + auto& lvlArray = DynOS_Lvl_GetArray(); + + // Check if cache needs rebuilding + bool rebuild = (lvlArray.size() != sLvlGfxDataCache.size()); + if (!rebuild) { + for (size_t i = 0; i < lvlArray.size(); ++i) { + if (lvlArray[i].second != sLvlGfxDataCache[i]) { + rebuild = true; + break; + } + } + } + + if (rebuild) { + sVertexNodesExactMap.clear(); + sLvlGfxDataCache.clear(); + for (const auto& lvlPair : lvlArray) { + sLvlGfxDataCache.push_back(lvlPair.second); + for (const auto& node : lvlPair.second->mVertices) { + sVertexNodesExactMap.emplace(std::string_view(node->mName.begin(), node->mName.Length()), node); + } + } + } + + // Check exact match + auto range = sVertexNodesExactMap.equal_range(name); + if (range.first != range.second) { + for (auto it = range.first; it != range.second; ++it) { + DynOS_Add_Scroll_Target_Match(index, name, offset, size, it->second); + } + return; + } + + // Fallback to substring search + for (const auto& lvlPair : lvlArray) { + for (const auto& node : lvlPair.second->mVertices) { if (node->mName.Find(name) >= 0) { - add_vtx_scroll_target( - index, - &node->mData[offset], - (size > 0 && size < node->mSize) ? size : node->mSize, - offset > 0 - ); + DynOS_Add_Scroll_Target_Match(index, name, offset, size, node); } } } diff --git a/src/game/scroll_targets.c b/src/game/scroll_targets.c index a2af8bc19..f59bc9e7f 100644 --- a/src/game/scroll_targets.c +++ b/src/game/scroll_targets.c @@ -22,6 +22,7 @@ struct ScrollTarget *get_scroll_targets(u32 id, u16 size, u16 offset) { free(scroll->vertices); scroll->vertices = newVtx; scroll->size = size; + scroll->capacity = size; scroll->hasOffset = true; } @@ -51,6 +52,7 @@ struct ScrollTarget* find_or_create_scroll_targets(u32 id, bool hasOffset) { scroll = malloc(sizeof(struct ScrollTarget)); scroll->id = id; scroll->size = 0; + scroll->capacity = 0; scroll->vertices = NULL; scroll->hasOffset = hasOffset; scroll->hasInterpInit = false; @@ -75,25 +77,24 @@ struct ScrollTarget* find_or_create_scroll_targets(u32 id, bool hasOffset) { void add_vtx_scroll_target(u32 id, Vtx *vtx, u32 size, bool hasOffset) { struct ScrollTarget *scroll = find_or_create_scroll_targets(id, hasOffset); if (!scroll) { return; } - u32 oldSize = sizeof(Vtx*) * scroll->size; - u32 newSize = oldSize + (sizeof(Vtx*) * size); - Vtx* *newArray = realloc(scroll->vertices, newSize); - - if (!newArray) { - newArray = malloc(newSize); - if (!newArray) { return; } - if (scroll->vertices && oldSize > 0) { - memcpy(newArray, scroll->vertices, oldSize); + u32 neededSize = scroll->size + size; + if (neededSize > scroll->capacity) { + u32 newCapacity = scroll->capacity == 0 ? 16 : scroll->capacity; + while (newCapacity < neededSize) { + newCapacity *= 2; } - free(scroll->vertices); + Vtx* *newArray = realloc(scroll->vertices, sizeof(Vtx*) * newCapacity); + if (!newArray) { return; } + scroll->vertices = newArray; + scroll->capacity = newCapacity; } - scroll->vertices = newArray; - + Vtx** dest = &scroll->vertices[scroll->size]; for (u32 i = 0; i < size; ++i) { - scroll->vertices[scroll->size++] = &vtx[i]; + dest[i] = &vtx[i]; } + scroll->size += size; } /* diff --git a/src/game/scroll_targets.h b/src/game/scroll_targets.h index 901a9c839..825e13255 100644 --- a/src/game/scroll_targets.h +++ b/src/game/scroll_targets.h @@ -30,6 +30,7 @@ struct ScrollTarget { u32 id; u32 size; + u32 capacity; Vtx* *vertices; bool hasOffset; diff --git a/src/pc/lua/smlua_functions.c b/src/pc/lua/smlua_functions.c index 4d332774f..7f239ecd9 100644 --- a/src/pc/lua/smlua_functions.c +++ b/src/pc/lua/smlua_functions.c @@ -821,8 +821,11 @@ int smlua_func_add_scroll_target(lua_State* L) { } // add_scroll_target used to require offset and size of the vertex buffer to be used - if (!smlua_functions_valid_param_range(L, 2, 4)) { return 0; } int paramCount = lua_gettop(L); + if (paramCount < 2 || paramCount > 4) { + LOG_LUA_LINE("Improper param count: Expected (2 - 4), Received %u", paramCount); + return 0; + } u32 index = smlua_to_integer(L, 1); if (!gSmLuaConvertSuccess) { LOG_LUA("add_scroll_target: Failed to convert parameter 1 for function"); return 0; }