mirror of
https://github.com/coop-deluxe/sm64coopdx.git
synced 2026-05-10 19:01:46 +00:00
massive performance improvements for add_scroll_target (#1219)
Some checks are pending
Build coop / build-linux (push) Waiting to run
Build coop / build-steamos (push) Waiting to run
Build coop / build-windows-opengl (push) Waiting to run
Build coop / build-windows-directx (push) Waiting to run
Build coop / build-macos-arm (push) Waiting to run
Build coop / build-macos-intel (push) Waiting to run
Some checks are pending
Build coop / build-linux (push) Waiting to run
Build coop / build-steamos (push) Waiting to run
Build coop / build-windows-opengl (push) Waiting to run
Build coop / build-windows-directx (push) Waiting to run
Build coop / build-macos-arm (push) Waiting to run
Build coop / build-macos-intel (push) Waiting to run
Loading the scroll targets was probably the slowest parts to loading a romhack. The reason for this is that many romhacks can have thousands of calls to `add_scroll_target`. So, for 4,107 calls to `add_scroll_target`, the time went from ~4.5759 seconds to ~0.0173 seconds in total. Changes made: - Previously, simply finding the material data to scroll was rather slow due to using a linear sub string search across all vertices in all levels. To speed this up, I added a cache. The cache bypasses checking every level by storing the exact string (rather than the substring) in a hashmap, so lookups become a simple case of a string lookup as a key in the map. It falls back to the full lookup if the cache doesn't hit. - Changed the vertex buffer management in `scroll_targets.c` to behave closer to a modern dynamic array, where buffer size is doubled each time a new vertex buffer is added, to reduce the number of allocations performed.
This commit is contained in:
parent
149cb10153
commit
1e4ede799b
4 changed files with 73 additions and 22 deletions
|
|
@ -1,3 +1,6 @@
|
|||
#include <unordered_map>
|
||||
#include <string_view>
|
||||
#include <string>
|
||||
#include "dynos.cpp.h"
|
||||
extern "C" {
|
||||
#include "game/scroll_targets.h"
|
||||
|
|
@ -7,16 +10,59 @@ extern "C" {
|
|||
// Scroll Targets
|
||||
//
|
||||
|
||||
static void DynOS_Add_Scroll_Target_Match(u32 index, const char* name, u32 offset, u32 size, DataNode<Vtx>* node) {
|
||||
if (offset >= node->mSize) { return; }
|
||||
u32 finalSize = (size > 0 && size <= (node->mSize - offset)) ? size : (node->mSize - offset);
|
||||
add_vtx_scroll_target(
|
||||
index,
|
||||
&node->mData[offset],
|
||||
finalSize,
|
||||
offset > 0
|
||||
);
|
||||
}
|
||||
|
||||
void DynOS_Add_Scroll_Target(u32 index, const char* name, u32 offset, u32 size) {
|
||||
for (auto& lvlPair : DynOS_Lvl_GetArray()) {
|
||||
for (auto& node : lvlPair.second->mVertices) {
|
||||
static std::unordered_multimap<std::string_view, DataNode<Vtx>*> sVertexNodesExactMap;
|
||||
static std::vector<GfxData*> sLvlGfxDataCache; // cache existing level pointers to know when to rebuild
|
||||
|
||||
auto& lvlArray = DynOS_Lvl_GetArray();
|
||||
|
||||
// Check if cache needs rebuilding
|
||||
bool rebuild = (lvlArray.size() != sLvlGfxDataCache.size());
|
||||
if (!rebuild) {
|
||||
for (size_t i = 0; i < lvlArray.size(); ++i) {
|
||||
if (lvlArray[i].second != sLvlGfxDataCache[i]) {
|
||||
rebuild = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (rebuild) {
|
||||
sVertexNodesExactMap.clear();
|
||||
sLvlGfxDataCache.clear();
|
||||
for (const auto& lvlPair : lvlArray) {
|
||||
sLvlGfxDataCache.push_back(lvlPair.second);
|
||||
for (const auto& node : lvlPair.second->mVertices) {
|
||||
sVertexNodesExactMap.emplace(std::string_view(node->mName.begin(), node->mName.Length()), node);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check exact match
|
||||
auto range = sVertexNodesExactMap.equal_range(name);
|
||||
if (range.first != range.second) {
|
||||
for (auto it = range.first; it != range.second; ++it) {
|
||||
DynOS_Add_Scroll_Target_Match(index, name, offset, size, it->second);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// Fallback to substring search
|
||||
for (const auto& lvlPair : lvlArray) {
|
||||
for (const auto& node : lvlPair.second->mVertices) {
|
||||
if (node->mName.Find(name) >= 0) {
|
||||
add_vtx_scroll_target(
|
||||
index,
|
||||
&node->mData[offset],
|
||||
(size > 0 && size < node->mSize) ? size : node->mSize,
|
||||
offset > 0
|
||||
);
|
||||
DynOS_Add_Scroll_Target_Match(index, name, offset, size, node);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -22,6 +22,7 @@ struct ScrollTarget *get_scroll_targets(u32 id, u16 size, u16 offset) {
|
|||
free(scroll->vertices);
|
||||
scroll->vertices = newVtx;
|
||||
scroll->size = size;
|
||||
scroll->capacity = size;
|
||||
scroll->hasOffset = true;
|
||||
}
|
||||
|
||||
|
|
@ -51,6 +52,7 @@ struct ScrollTarget* find_or_create_scroll_targets(u32 id, bool hasOffset) {
|
|||
scroll = malloc(sizeof(struct ScrollTarget));
|
||||
scroll->id = id;
|
||||
scroll->size = 0;
|
||||
scroll->capacity = 0;
|
||||
scroll->vertices = NULL;
|
||||
scroll->hasOffset = hasOffset;
|
||||
scroll->hasInterpInit = false;
|
||||
|
|
@ -75,25 +77,24 @@ struct ScrollTarget* find_or_create_scroll_targets(u32 id, bool hasOffset) {
|
|||
void add_vtx_scroll_target(u32 id, Vtx *vtx, u32 size, bool hasOffset) {
|
||||
struct ScrollTarget *scroll = find_or_create_scroll_targets(id, hasOffset);
|
||||
if (!scroll) { return; }
|
||||
u32 oldSize = sizeof(Vtx*) * scroll->size;
|
||||
u32 newSize = oldSize + (sizeof(Vtx*) * size);
|
||||
|
||||
Vtx* *newArray = realloc(scroll->vertices, newSize);
|
||||
|
||||
if (!newArray) {
|
||||
newArray = malloc(newSize);
|
||||
if (!newArray) { return; }
|
||||
if (scroll->vertices && oldSize > 0) {
|
||||
memcpy(newArray, scroll->vertices, oldSize);
|
||||
u32 neededSize = scroll->size + size;
|
||||
if (neededSize > scroll->capacity) {
|
||||
u32 newCapacity = scroll->capacity == 0 ? 16 : scroll->capacity;
|
||||
while (newCapacity < neededSize) {
|
||||
newCapacity *= 2;
|
||||
}
|
||||
free(scroll->vertices);
|
||||
Vtx* *newArray = realloc(scroll->vertices, sizeof(Vtx*) * newCapacity);
|
||||
if (!newArray) { return; }
|
||||
scroll->vertices = newArray;
|
||||
scroll->capacity = newCapacity;
|
||||
}
|
||||
|
||||
scroll->vertices = newArray;
|
||||
|
||||
Vtx** dest = &scroll->vertices[scroll->size];
|
||||
for (u32 i = 0; i < size; ++i) {
|
||||
scroll->vertices[scroll->size++] = &vtx[i];
|
||||
dest[i] = &vtx[i];
|
||||
}
|
||||
scroll->size += size;
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
|
|||
|
|
@ -30,6 +30,7 @@
|
|||
struct ScrollTarget {
|
||||
u32 id;
|
||||
u32 size;
|
||||
u32 capacity;
|
||||
Vtx* *vertices;
|
||||
|
||||
bool hasOffset;
|
||||
|
|
|
|||
|
|
@ -821,8 +821,11 @@ int smlua_func_add_scroll_target(lua_State* L) {
|
|||
}
|
||||
|
||||
// add_scroll_target used to require offset and size of the vertex buffer to be used
|
||||
if (!smlua_functions_valid_param_range(L, 2, 4)) { return 0; }
|
||||
int paramCount = lua_gettop(L);
|
||||
if (paramCount < 2 || paramCount > 4) {
|
||||
LOG_LUA_LINE("Improper param count: Expected (2 - 4), Received %u", paramCount);
|
||||
return 0;
|
||||
}
|
||||
|
||||
u32 index = smlua_to_integer(L, 1);
|
||||
if (!gSmLuaConvertSuccess) { LOG_LUA("add_scroll_target: Failed to convert parameter 1 for function"); return 0; }
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue