mirror of
https://github.com/N64Recomp/N64Recomp.git
synced 2026-06-11 02:21:17 +00:00
RSPRecomp: Path A — persistent RSP GPRs across run_task calls
Real RSP hardware retains GPRs across task switches. rspboot only
writes $1/$2/$3/$4/$7 before jumping to the loaded ucode; everything
else is whatever the previous task left. The previous emit zero-init'd
all 32 GPRs at every entry, breaking any ucode that depends on
inherited state — e.g. Pokemon Stadium's libultra aspMain reads $29
on its first dispatch iteration expecting it from a prior run.
RSPRecomp/src/rsp_recomp.cpp:
* All GPRs (and dma_*, jump_target, rsp) are now emitted as C++
references into *ctx — writes auto-persist through to the backing
RspContext, no manual store-back at exit points.
* No-overlay case (Stadium aspMain shape) emits an `_impl(rdram,
ctx)` function plus a legacy-ABI wrapper `(rdram, ucode_addr)`
that owns a `static thread_local RspContext`. The wrapper preserves
the runtime ABI (no librecomp change needed) while the static
thread_local delivers cross-run_task GPR retention.
* Overlay-swap function's stack-local `RspContext ctx{}` promoted to
`static thread_local` for the same reason.
* write_overlay_swap_return reduced from 9 lines of manual store-back
to just `return RspExitReason::SwapOverlay` — references handle it.
(NOTE: original local commit 5c6c654 also bundled per-register cop0
dispatch + cop0_regs[32] storage. That portion is split out for a
separate follow-up PR — it depends on a runtime API not yet in
upstream N64ModernRuntime.)
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
81213c1831
commit
b288b6319e
1 changed files with 92 additions and 58 deletions
|
|
@ -593,16 +593,11 @@ void write_indirect_jumps(std::ofstream& output_file, const BranchTargets& branc
|
|||
}
|
||||
|
||||
void write_overlay_swap_return(std::ofstream& output_file) {
|
||||
// GPRs / dma_*/jump_target / rsp are emitted as C++ references into
|
||||
// *ctx (see create_function), so writes through the local names
|
||||
// already land in ctx — no explicit store-back needed here.
|
||||
fmt::print(output_file,
|
||||
"do_overlay_swap:\n"
|
||||
" ctx->r1 = r1; ctx->r2 = r2; ctx->r3 = r3; ctx->r4 = r4; ctx->r5 = r5; ctx->r6 = r6; ctx->r7 = r7;\n"
|
||||
" ctx->r8 = r8; ctx->r9 = r9; ctx->r10 = r10; ctx->r11 = r11; ctx->r12 = r12; ctx->r13 = r13; ctx->r14 = r14; ctx->r15 = r15;\n"
|
||||
" ctx->r16 = r16; ctx->r17 = r17; ctx->r18 = r18; ctx->r19 = r19; ctx->r20 = r20; ctx->r21 = r21; ctx->r22 = r22; ctx->r23 = r23;\n"
|
||||
" ctx->r24 = r24; ctx->r25 = r25; ctx->r26 = r26; ctx->r27 = r27; ctx->r28 = r28; ctx->r29 = r29; ctx->r30 = r30; ctx->r31 = r31;\n"
|
||||
" ctx->dma_mem_address = dma_mem_address;\n"
|
||||
" ctx->dma_dram_address = dma_dram_address;\n"
|
||||
" ctx->jump_target = jump_target;\n"
|
||||
" ctx->rsp = rsp;\n"
|
||||
" return RspExitReason::SwapOverlay;\n");
|
||||
}
|
||||
|
||||
|
|
@ -950,10 +945,14 @@ void create_overlay_swap_function(const std::string& function_name, std::ofstrea
|
|||
}
|
||||
fmt::print(output_file, "}};\n\n");
|
||||
|
||||
// Main function
|
||||
// Main function — owns a static thread_local RspContext so GPRs
|
||||
// persist across run_task calls (matches real-hardware semantics:
|
||||
// rspboot only resets $1/$2/$3/$4/$7; everything else inherits
|
||||
// from the previous task). See create_function() for the same
|
||||
// pattern in the no-overlay case.
|
||||
fmt::print(output_file,
|
||||
"RspExitReason {}(uint8_t* rdram, uint32_t ucode_addr) {{\n"
|
||||
" RspContext ctx{{}};\n",
|
||||
" static thread_local RspContext ctx{{}};\n",
|
||||
config.output_function_name);
|
||||
|
||||
std::string slots_init_str = "";
|
||||
|
|
@ -1011,59 +1010,76 @@ void create_function(const std::string& function_name, std::ofstream& output_fil
|
|||
}
|
||||
|
||||
// Write function
|
||||
if (is_permutation) {
|
||||
//
|
||||
// Two emit shapes — both take RspContext* now:
|
||||
//
|
||||
// is_permutation = true : called by the overlay-swap wrapper
|
||||
// with a persistent ctx; resume targets
|
||||
// may be jumped to.
|
||||
//
|
||||
// is_permutation = false : the no-overlay case (e.g. Stadium's
|
||||
// aspMain). We emit an _impl function
|
||||
// taking ctx, plus a legacy-ABI wrapper
|
||||
// (rdram, ucode_addr) that owns a
|
||||
// static thread_local RspContext so
|
||||
// GPRs persist across run_task calls
|
||||
// (matches real-hardware semantics:
|
||||
// rspboot only writes $1/$2/$3/$4/$7;
|
||||
// everything else is whatever the prior
|
||||
// task left).
|
||||
//
|
||||
// GPRs / dma_* / jump_target / rsp are emitted as C++ references
|
||||
// into *ctx, so writes through the local names auto-persist with
|
||||
// no manual store-back. Exit paths (return / SwapOverlay /
|
||||
// UnhandledJumpTarget / etc.) all benefit.
|
||||
std::string impl_function_name = is_permutation
|
||||
? function_name // permutation/initial: name as given
|
||||
: (function_name + "_impl"); // no-overlay: wrap with _impl
|
||||
|
||||
fmt::print(output_file,
|
||||
"RspExitReason {}(uint8_t* rdram, RspContext* ctx) {{\n"
|
||||
" uint32_t& r1 = ctx->r1; uint32_t& r2 = ctx->r2; uint32_t& r3 = ctx->r3; uint32_t& r4 = ctx->r4; uint32_t& r5 = ctx->r5; uint32_t& r6 = ctx->r6; uint32_t& r7 = ctx->r7;\n"
|
||||
" uint32_t& r8 = ctx->r8; uint32_t& r9 = ctx->r9; uint32_t& r10 = ctx->r10; uint32_t& r11 = ctx->r11; uint32_t& r12 = ctx->r12; uint32_t& r13 = ctx->r13; uint32_t& r14 = ctx->r14; uint32_t& r15 = ctx->r15;\n"
|
||||
" uint32_t& r16 = ctx->r16; uint32_t& r17 = ctx->r17; uint32_t& r18 = ctx->r18; uint32_t& r19 = ctx->r19; uint32_t& r20 = ctx->r20; uint32_t& r21 = ctx->r21; uint32_t& r22 = ctx->r22; uint32_t& r23 = ctx->r23;\n"
|
||||
" uint32_t& r24 = ctx->r24; uint32_t& r25 = ctx->r25; uint32_t& r26 = ctx->r26; uint32_t& r27 = ctx->r27; uint32_t& r28 = ctx->r28; uint32_t& r29 = ctx->r29; uint32_t& r30 = ctx->r30; uint32_t& r31 = ctx->r31;\n"
|
||||
" uint32_t& dma_mem_address = ctx->dma_mem_address; uint32_t& dma_dram_address = ctx->dma_dram_address; uint32_t& jump_target = ctx->jump_target;\n"
|
||||
" const char * debug_file = NULL; int debug_line = 0;\n"
|
||||
" RSP& rsp = ctx->rsp;\n", impl_function_name);
|
||||
|
||||
// Permutation-only: handle resume targets when re-entering after a
|
||||
// SwapOverlay round-trip. is_initial=true means "first call into
|
||||
// the ucode for this task" so there's no prior resume to honor.
|
||||
if (is_permutation && !is_initial) {
|
||||
fmt::print(output_file,
|
||||
"RspExitReason {}(uint8_t* rdram, RspContext* ctx) {{\n"
|
||||
" uint32_t r1 = ctx->r1, r2 = ctx->r2, r3 = ctx->r3, r4 = ctx->r4, r5 = ctx->r5, r6 = ctx->r6, r7 = ctx->r7;\n"
|
||||
" uint32_t r8 = ctx->r8, r9 = ctx->r9, r10 = ctx->r10, r11 = ctx->r11, r12 = ctx->r12, r13 = ctx->r13, r14 = ctx->r14, r15 = ctx->r15;\n"
|
||||
" uint32_t r16 = ctx->r16, r17 = ctx->r17, r18 = ctx->r18, r19 = ctx->r19, r20 = ctx->r20, r21 = ctx->r21, r22 = ctx->r22, r23 = ctx->r23;\n"
|
||||
" uint32_t r24 = ctx->r24, r25 = ctx->r25, r26 = ctx->r26, r27 = ctx->r27, r28 = ctx->r28, r29 = ctx->r29, r30 = ctx->r30, r31 = ctx->r31;\n"
|
||||
" uint32_t dma_mem_address = ctx->dma_mem_address, dma_dram_address = ctx->dma_dram_address, jump_target = ctx->jump_target;\n"
|
||||
" const char * debug_file = NULL; int debug_line = 0;\n"
|
||||
" RSP rsp = ctx->rsp;\n", function_name);
|
||||
" if (ctx->resume_delay) {{\n"
|
||||
" switch (ctx->resume_address) {{\n");
|
||||
|
||||
// Write jumps to resume targets
|
||||
if (!is_initial) {
|
||||
fmt::print(output_file,
|
||||
" if (ctx->resume_delay) {{\n"
|
||||
" switch (ctx->resume_address) {{\n");
|
||||
|
||||
for (uint32_t address : resume_targets.delay_targets) {
|
||||
fmt::print(output_file, " case 0x{0:04X}: goto R_{0:04X}_delay;\n",
|
||||
address);
|
||||
}
|
||||
|
||||
fmt::print(output_file,
|
||||
" }}\n"
|
||||
" }} else {{\n"
|
||||
" switch (ctx->resume_address) {{\n");
|
||||
|
||||
for (uint32_t address : resume_targets.non_delay_targets) {
|
||||
fmt::print(output_file, " case 0x{0:04X}: goto R_{0:04X};\n",
|
||||
address);
|
||||
}
|
||||
|
||||
fmt::print(output_file,
|
||||
" }}\n"
|
||||
" }}\n"
|
||||
" printf(\"Unhandled resume target 0x%04X (delay slot: %d) in microcode {}\\n\", ctx->resume_address, ctx->resume_delay);\n"
|
||||
" return RspExitReason::UnhandledResumeTarget;\n",
|
||||
config.output_function_name);
|
||||
for (uint32_t address : resume_targets.delay_targets) {
|
||||
fmt::print(output_file, " case 0x{0:04X}: goto R_{0:04X}_delay;\n",
|
||||
address);
|
||||
}
|
||||
|
||||
fmt::print(output_file, " r1 = 0xFC0;\n");
|
||||
} else {
|
||||
fmt::print(output_file,
|
||||
"RspExitReason {}(uint8_t* rdram, [[maybe_unused]] uint32_t ucode_addr) {{\n"
|
||||
" uint32_t r1 = 0, r2 = 0, r3 = 0, r4 = 0, r5 = 0, r6 = 0, r7 = 0;\n"
|
||||
" uint32_t r8 = 0, r9 = 0, r10 = 0, r11 = 0, r12 = 0, r13 = 0, r14 = 0, r15 = 0;\n"
|
||||
" uint32_t r16 = 0, r17 = 0, r18 = 0, r19 = 0, r20 = 0, r21 = 0, r22 = 0, r23 = 0;\n"
|
||||
" uint32_t r24 = 0, r25 = 0, r26 = 0, r27 = 0, r28 = 0, r29 = 0, r30 = 0, r31 = 0;\n"
|
||||
" uint32_t dma_mem_address = 0, dma_dram_address = 0, jump_target = 0;\n"
|
||||
" const char * debug_file = NULL; int debug_line = 0;\n"
|
||||
" RSP rsp{{}};\n"
|
||||
" r1 = 0xFC0;\n", function_name);
|
||||
" }}\n"
|
||||
" }} else {{\n"
|
||||
" switch (ctx->resume_address) {{\n");
|
||||
|
||||
for (uint32_t address : resume_targets.non_delay_targets) {
|
||||
fmt::print(output_file, " case 0x{0:04X}: goto R_{0:04X};\n",
|
||||
address);
|
||||
}
|
||||
|
||||
fmt::print(output_file,
|
||||
" }}\n"
|
||||
" }}\n"
|
||||
" printf(\"Unhandled resume target 0x%04X (delay slot: %d) in microcode {}\\n\", ctx->resume_address, ctx->resume_delay);\n"
|
||||
" return RspExitReason::UnhandledResumeTarget;\n",
|
||||
config.output_function_name);
|
||||
}
|
||||
|
||||
// rspboot semantics: $1 is reset to 0xFC0 at every entry. All other
|
||||
// GPRs persist from the previous task (already in *ctx via refs).
|
||||
fmt::print(output_file, " r1 = 0xFC0;\n");
|
||||
// Write each instruction
|
||||
for (size_t instr_index = 0; instr_index < instrs.size(); instr_index++) {
|
||||
process_instruction(instr_index, instrs, output_file, branch_targets, config.unsupported_instructions, resume_targets, is_permutation, false, false);
|
||||
|
|
@ -1080,8 +1096,26 @@ void create_function(const std::string& function_name, std::ofstream& output_fil
|
|||
write_overlay_swap_return(output_file);
|
||||
}
|
||||
|
||||
// End the file
|
||||
// End the impl function
|
||||
fmt::print(output_file, "}}\n");
|
||||
|
||||
// For the no-overlay case, also emit a legacy-ABI wrapper so the
|
||||
// runtime (which calls via the RspUcodeFunc typedef in rsp.hpp,
|
||||
// signature `(rdram, ucode_addr)`) can keep calling unchanged.
|
||||
// The wrapper owns a static thread_local RspContext so GPRs
|
||||
// persist across run_task calls. Per-ucode storage is correct for
|
||||
// the common pattern (same ucode invoked repeatedly); games that
|
||||
// depend on cross-ucode GPR leak would need a runtime-managed
|
||||
// shared context — separate librecomp change, not this engine fix.
|
||||
if (!is_permutation) {
|
||||
fmt::print(output_file,
|
||||
"\n"
|
||||
"RspExitReason {0}(uint8_t* rdram, [[maybe_unused]] uint32_t ucode_addr) {{\n"
|
||||
" static thread_local RspContext persistent_ctx{{}};\n"
|
||||
" return {0}_impl(rdram, &persistent_ctx);\n"
|
||||
"}}\n",
|
||||
function_name);
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, const char** argv) {
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue