massive performance improvements for add_scroll_target (#1219)
Some checks are pending
Build coop / build-linux (push) Waiting to run
Build coop / build-steamos (push) Waiting to run
Build coop / build-windows-opengl (push) Waiting to run
Build coop / build-windows-directx (push) Waiting to run
Build coop / build-macos-arm (push) Waiting to run
Build coop / build-macos-intel (push) Waiting to run

Loading the scroll targets was probably the slowest parts to loading a romhack.
The reason for this is that many romhacks can have thousands of calls to `add_scroll_target`.
So, for 4,107 calls to `add_scroll_target`, the time went from ~4.5759 seconds to ~0.0173 seconds in total. 
Changes made:
- Previously, simply finding the material data to scroll was rather slow due to using a linear sub string search across all vertices in all levels. To speed this up, I added a cache.
The cache bypasses checking every level by storing the exact string (rather than the substring) in a hashmap, so lookups become a simple case of a string lookup as a key in the map. It falls back to the full lookup if the cache doesn't hit.
- Changed the vertex buffer management in `scroll_targets.c` to behave closer to a modern dynamic array, where buffer size is doubled each time a new vertex buffer is added, to reduce the number of allocations performed.
This commit is contained in:
Isaac0-dev 2026-05-08 20:57:06 +10:00 committed by GitHub
parent 149cb10153
commit 1e4ede799b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 73 additions and 22 deletions

View file

@ -1,3 +1,6 @@
#include <unordered_map>
#include <string_view>
#include <string>
#include "dynos.cpp.h"
extern "C" {
#include "game/scroll_targets.h"
@ -7,16 +10,59 @@ extern "C" {
// Scroll Targets
//
static void DynOS_Add_Scroll_Target_Match(u32 index, const char* name, u32 offset, u32 size, DataNode<Vtx>* node) {
if (offset >= node->mSize) { return; }
u32 finalSize = (size > 0 && size <= (node->mSize - offset)) ? size : (node->mSize - offset);
add_vtx_scroll_target(
index,
&node->mData[offset],
finalSize,
offset > 0
);
}
void DynOS_Add_Scroll_Target(u32 index, const char* name, u32 offset, u32 size) {
for (auto& lvlPair : DynOS_Lvl_GetArray()) {
for (auto& node : lvlPair.second->mVertices) {
static std::unordered_multimap<std::string_view, DataNode<Vtx>*> sVertexNodesExactMap;
static std::vector<GfxData*> sLvlGfxDataCache; // cache existing level pointers to know when to rebuild
auto& lvlArray = DynOS_Lvl_GetArray();
// Check if cache needs rebuilding
bool rebuild = (lvlArray.size() != sLvlGfxDataCache.size());
if (!rebuild) {
for (size_t i = 0; i < lvlArray.size(); ++i) {
if (lvlArray[i].second != sLvlGfxDataCache[i]) {
rebuild = true;
break;
}
}
}
if (rebuild) {
sVertexNodesExactMap.clear();
sLvlGfxDataCache.clear();
for (const auto& lvlPair : lvlArray) {
sLvlGfxDataCache.push_back(lvlPair.second);
for (const auto& node : lvlPair.second->mVertices) {
sVertexNodesExactMap.emplace(std::string_view(node->mName.begin(), node->mName.Length()), node);
}
}
}
// Check exact match
auto range = sVertexNodesExactMap.equal_range(name);
if (range.first != range.second) {
for (auto it = range.first; it != range.second; ++it) {
DynOS_Add_Scroll_Target_Match(index, name, offset, size, it->second);
}
return;
}
// Fallback to substring search
for (const auto& lvlPair : lvlArray) {
for (const auto& node : lvlPair.second->mVertices) {
if (node->mName.Find(name) >= 0) {
add_vtx_scroll_target(
index,
&node->mData[offset],
(size > 0 && size < node->mSize) ? size : node->mSize,
offset > 0
);
DynOS_Add_Scroll_Target_Match(index, name, offset, size, node);
}
}
}

View file

@ -22,6 +22,7 @@ struct ScrollTarget *get_scroll_targets(u32 id, u16 size, u16 offset) {
free(scroll->vertices);
scroll->vertices = newVtx;
scroll->size = size;
scroll->capacity = size;
scroll->hasOffset = true;
}
@ -51,6 +52,7 @@ struct ScrollTarget* find_or_create_scroll_targets(u32 id, bool hasOffset) {
scroll = malloc(sizeof(struct ScrollTarget));
scroll->id = id;
scroll->size = 0;
scroll->capacity = 0;
scroll->vertices = NULL;
scroll->hasOffset = hasOffset;
scroll->hasInterpInit = false;
@ -75,25 +77,24 @@ struct ScrollTarget* find_or_create_scroll_targets(u32 id, bool hasOffset) {
void add_vtx_scroll_target(u32 id, Vtx *vtx, u32 size, bool hasOffset) {
struct ScrollTarget *scroll = find_or_create_scroll_targets(id, hasOffset);
if (!scroll) { return; }
u32 oldSize = sizeof(Vtx*) * scroll->size;
u32 newSize = oldSize + (sizeof(Vtx*) * size);
Vtx* *newArray = realloc(scroll->vertices, newSize);
if (!newArray) {
newArray = malloc(newSize);
if (!newArray) { return; }
if (scroll->vertices && oldSize > 0) {
memcpy(newArray, scroll->vertices, oldSize);
u32 neededSize = scroll->size + size;
if (neededSize > scroll->capacity) {
u32 newCapacity = scroll->capacity == 0 ? 16 : scroll->capacity;
while (newCapacity < neededSize) {
newCapacity *= 2;
}
free(scroll->vertices);
Vtx* *newArray = realloc(scroll->vertices, sizeof(Vtx*) * newCapacity);
if (!newArray) { return; }
scroll->vertices = newArray;
scroll->capacity = newCapacity;
}
scroll->vertices = newArray;
Vtx** dest = &scroll->vertices[scroll->size];
for (u32 i = 0; i < size; ++i) {
scroll->vertices[scroll->size++] = &vtx[i];
dest[i] = &vtx[i];
}
scroll->size += size;
}
/*

View file

@ -30,6 +30,7 @@
struct ScrollTarget {
u32 id;
u32 size;
u32 capacity;
Vtx* *vertices;
bool hasOffset;

View file

@ -821,8 +821,11 @@ int smlua_func_add_scroll_target(lua_State* L) {
}
// add_scroll_target used to require offset and size of the vertex buffer to be used
if (!smlua_functions_valid_param_range(L, 2, 4)) { return 0; }
int paramCount = lua_gettop(L);
if (paramCount < 2 || paramCount > 4) {
LOG_LUA_LINE("Improper param count: Expected (2 - 4), Received %u", paramCount);
return 0;
}
u32 index = smlua_to_integer(L, 1);
if (!gSmLuaConvertSuccess) { LOG_LUA("add_scroll_target: Failed to convert parameter 1 for function"); return 0; }