N64Recomp/src/decompressed.cpp
Matthew Stanley 6f9649c7e7 decompressed: per-variant synthetic link identities for pattern fragments
Path 2 of the pattern-fragment dispatch architecture: each variant of
a [[input.decompressed_section_pattern]] now gets a unique link-time
ram_addr from a synthetic vram pool (0xC0000000+, KSEG2/KSEG3 — unused
by N64 software so it can't collide with engine-resident sections like
RSP at 0xA4000000+).

Why: when multiple variants share a single canonical link bucket
(e.g. all stadium_models pattern variants at 0x8FF00000), runtime
fragment-vaddr resolution via gFragments[id] is single-pointer and
ambiguous when more than one variant is host-resident at the same
time. Per-variant synthetic ram_addrs make each variant's RELOC_HI16
/ RELOC_LO16 emit produce a unique 0xCXXXXXXX literal at runtime,
giving variant-internal references unambiguous identity without
depending on caller PC, host stack walks, or data-context tracking.

Implementation:

- add_decompressed_section accepts an override_link_ram_addr param.
  The bytes-encoded `vram` (= canonical link bucket) is passed to
  parse_fragment_relocs and discover_function_bounds (so jump tables
  resolve correctly against the body's encoded references), while
  section.ram_addr is set to the override. The two roles of vram are
  cleanly separated.

- New original_pattern_id field on Section. Populated for synthetic-
  link variants with the original game-side fragment id derived from
  the pattern's canonical bucket (e.g. 0xEF for stadium_models).
  Lets the runtime candidate filter know which game id should
  include this synthetic section as a candidate, eliminating cross-
  pattern hash-collision misregistration.

- main.cpp emit: section_load_table now writes original_pattern_id
  into the SectionTableEntry initializer.

- decompressed.cpp pattern loop: every unique variant now gets
  synthetic ram_addr = 0xC0000000 + variant_idx * 0x100000 (1 MB
  stride, ~286 KB largest observed variant). For Stadium's 279
  unique variants the pool occupies 0xC0000000..0xCDB00000, well
  within the runtime-side 512-bucket capacity.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-05 21:47:45 -07:00

921 lines
38 KiB
C++

#include "decompressed.h"
#include <cstdio>
#include <cstring>
#include <fstream>
#include <unordered_map>
#include <vector>
#include "compression/pers_szp.h"
#include "compression/yay0.h"
#include "fmt/format.h"
#include "rabbitizer.hpp"
#include <set>
#include "analysis.h"
namespace N64Recomp {
namespace {
uint32_t read_be_u32(const uint8_t* p) {
return (uint32_t(p[0]) << 24) | (uint32_t(p[1]) << 16) |
(uint32_t(p[2]) << 8) | uint32_t(p[3]);
}
// FNV-1a 64-bit content hash. Used to deduplicate wrappers whose
// decompressed bytes are byte-for-byte identical (Stadium's 0x8FF00000
// slot has ~11 such pairs out of 279), and as the runtime dispatch key
// when multiple wrappers share a link vram.
uint64_t fnv1a_64(const uint8_t* data, size_t len) {
uint64_t h = 0xCBF29CE484222325ull;
for (size_t i = 0; i < len; i++) {
h ^= uint64_t(data[i]);
h *= 0x100000001B3ull;
}
return h;
}
// Reads an entire file into memory. Returns empty vector on error.
std::vector<uint8_t> read_rom_file(const std::filesystem::path& path) {
std::ifstream f(path, std::ios::binary | std::ios::ate);
if (!f.good()) return {};
auto size = f.tellg();
if (size <= 0) return {};
std::vector<uint8_t> buf(static_cast<size_t>(size));
f.seekg(0, std::ios::beg);
f.read(reinterpret_cast<char*>(buf.data()), size);
if (!f.good()) return {};
return buf;
}
// Stadium's runtime reloc-table format (see disasm/src/memmap.c
// Memmap_RelocateFragment). One uint32 per reloc:
// bits 31:24 type (2 = R_MIPS_32, 4 = R_MIPS_26,
// 5 = R_MIPS_HI16, 6 = R_MIPS_LO16)
// bits 23:0 offset into the fragment
// The table is preceded by a uint32 count.
//
// Stadium's type codes don't match ELF type codes — translate.
RelocType translate_stadium_reloc_type(uint8_t stadium_type) {
switch (stadium_type) {
case 2: return RelocType::R_MIPS_32;
case 4: return RelocType::R_MIPS_26;
case 5: return RelocType::R_MIPS_HI16;
case 6: return RelocType::R_MIPS_LO16;
default: return RelocType::R_MIPS_NONE;
}
}
// Parses the FRAGMENT-format header at the start of the decompressed
// blob and the trailing reloc table. Populates `section.relocs`. The
// per-reloc target_section is filled in by the caller after all
// decompressed sections are added (so cross-fragment R_MIPS_32 targets
// can be resolved against the full section list).
//
// Per-type computation of `target_section_offset` (the field the
// recompiler reads at codegen time):
// - R_MIPS_32: word value is an absolute pointer; offset =
// word - section_vram (then refined cross-section by caller).
// - R_MIPS_26: J/JAL target = (word & 0x03FFFFFF) << 2 OR'd with
// PC[31:28]; offset = target - section_vram.
// - R_MIPS_HI16/LO16: paired. Combined immediate =
// (HI << 16) + (int16_t)LO. Offset = combined - section_vram.
// The recompiler emits both RELOC_HI16(idx, off) and
// RELOC_LO16(idx, off) using each reloc's target_section_offset,
// so both members of the pair carry the SAME computed offset.
//
// Stadium's reloc table orders HI16 immediately followed by its paired
// LO16 (matches the body's instruction order). We pair by adjacency.
//
// Returns false if the header is malformed.
bool parse_fragment_relocs(const std::vector<uint8_t>& bytes,
uint32_t section_vram,
uint16_t section_index,
Section& section_out) {
if (bytes.size() < 0x20) {
std::fprintf(stderr,
"decompressed: blob smaller than FRAGMENT header (size=0x%zX)\n",
bytes.size());
return false;
}
if (std::memcmp(bytes.data() + 0x08, "FRAGMENT", 8) != 0) {
std::fprintf(stderr,
"decompressed: missing FRAGMENT magic at +0x08\n");
return false;
}
const uint32_t reloc_offset = read_be_u32(bytes.data() + 0x14);
const uint32_t size_in_ram = read_be_u32(bytes.data() + 0x1C);
if (reloc_offset > bytes.size() || size_in_ram > bytes.size()) {
std::fprintf(stderr,
"decompressed: relocOffset 0x%X / sizeInRam 0x%X exceed blob "
"size 0x%zX\n",
reloc_offset, size_in_ram, bytes.size());
return false;
}
if (reloc_offset + 4 > bytes.size()) {
std::fprintf(stderr,
"decompressed: no room for reloc count at offset 0x%X\n",
reloc_offset);
return false;
}
const uint32_t n_relocs = read_be_u32(bytes.data() + reloc_offset);
const size_t reloc_table_end = reloc_offset + 4 + 4 * size_t(n_relocs);
if (reloc_table_end > bytes.size()) {
std::fprintf(stderr,
"decompressed: reloc table (count=%u) overruns blob\n", n_relocs);
return false;
}
// First pass: parse raw entries.
struct RawReloc {
RelocType type;
uint32_t section_offset;
uint32_t word; // instruction word at section_offset
};
std::vector<RawReloc> raw;
raw.reserve(n_relocs);
for (uint32_t i = 0; i < n_relocs; i++) {
const uint32_t entry = read_be_u32(
bytes.data() + reloc_offset + 4 + 4 * i);
const uint8_t stadium_type = uint8_t((entry >> 24) & 0x7F);
const uint32_t section_offset = entry & 0x00FFFFFFu;
const RelocType type = translate_stadium_reloc_type(stadium_type);
if (type == RelocType::R_MIPS_NONE) {
std::fprintf(stderr,
"decompressed: unknown Stadium reloc type 0x%X at "
"offset 0x%X — skipped\n", stadium_type, section_offset);
continue;
}
if (section_offset + 4 > size_in_ram) {
std::fprintf(stderr,
"decompressed: reloc[%u] offset 0x%X out of body\n",
i, section_offset);
continue;
}
const uint32_t word = read_be_u32(bytes.data() + section_offset);
raw.push_back({type, section_offset, word});
}
// Second pass: emit Reloc entries. HI16 pairs with the next LO16
// in the list (Stadium's table orders them this way).
section_out.relocs.reserve(raw.size());
for (size_t i = 0; i < raw.size(); i++) {
const RawReloc& rr = raw[i];
Reloc r{};
r.address = section_vram + rr.section_offset;
r.target_section_offset = 0;
r.target_section = section_index; // default; cross-section pass refines
r.symbol_index = uint32_t(-1);
r.type = rr.type;
r.reference_symbol = false;
switch (rr.type) {
case RelocType::R_MIPS_32: {
r.target_section_offset = rr.word - section_vram;
break;
}
case RelocType::R_MIPS_26: {
const uint32_t pc_high = section_vram & 0xF0000000u;
const uint32_t target = pc_high |
((rr.word & 0x03FFFFFFu) << 2);
r.target_section_offset = target - section_vram;
break;
}
case RelocType::R_MIPS_HI16: {
// Pair with next LO16 in raw list.
size_t j = i + 1;
while (j < raw.size() && raw[j].type != RelocType::R_MIPS_LO16) {
j++;
}
if (j >= raw.size()) {
std::fprintf(stderr,
"decompressed: HI16 at offset 0x%X has no paired "
"LO16 in reloc table\n", rr.section_offset);
break;
}
const uint16_t hi_imm = uint16_t(rr.word & 0xFFFFu);
const int16_t lo_imm = int16_t(raw[j].word & 0xFFFFu);
const uint32_t combined =
(uint32_t(hi_imm) << 16) + uint32_t(int32_t(lo_imm));
r.target_section_offset = combined - section_vram;
break;
}
case RelocType::R_MIPS_LO16: {
// Find preceding HI16. We scan backward for the most
// recent HI16 (matches Stadium's adjacency convention).
size_t j = i;
bool found = false;
while (j > 0) {
j--;
if (raw[j].type == RelocType::R_MIPS_HI16) {
const uint16_t hi_imm = uint16_t(raw[j].word & 0xFFFFu);
const int16_t lo_imm = int16_t(rr.word & 0xFFFFu);
const uint32_t combined =
(uint32_t(hi_imm) << 16) +
uint32_t(int32_t(lo_imm));
r.target_section_offset = combined - section_vram;
found = true;
break;
}
}
if (!found) {
std::fprintf(stderr,
"decompressed: LO16 at offset 0x%X has no paired "
"HI16 in reloc table\n", rr.section_offset);
}
break;
}
default:
break;
}
section_out.relocs.emplace_back(r);
if (rr.type == RelocType::R_MIPS_32) {
section_out.has_mips32_relocs = true;
}
}
return true;
}
// Once every decompressed section is added, walk every reloc with
// target_section == its own index (the default we set above) and
// re-target R_MIPS_32 entries that actually point into a different
// section. Self-targeting relocs stay self-targeting.
void resolve_cross_section_targets(Context& context,
uint16_t first_added_index) {
for (size_t si = first_added_index; si < context.sections.size(); si++) {
Section& section = context.sections[si];
for (Reloc& r : section.relocs) {
if (r.type != RelocType::R_MIPS_32) continue;
// Compute the target absolute address.
const uint32_t target_addr = section.ram_addr +
r.target_section_offset;
// Find the section that contains target_addr.
for (size_t ti = 0; ti < context.sections.size(); ti++) {
const Section& candidate = context.sections[ti];
if (candidate.size == 0) continue;
if (target_addr >= candidate.ram_addr &&
target_addr < candidate.ram_addr + candidate.size) {
r.target_section = uint16_t(ti);
r.target_section_offset = target_addr -
candidate.ram_addr;
break;
}
}
}
}
}
} // namespace
bool synthesize_decompressed_sections(
Context& context,
const std::filesystem::path& rom_path,
const std::vector<DecompressedSection>& configs)
{
if (configs.empty()) return true;
const std::vector<uint8_t> rom = read_rom_file(rom_path);
if (rom.empty()) {
std::fprintf(stderr,
"decompressed: failed to read ROM file: %s\n",
rom_path.string().c_str());
return false;
}
const uint16_t first_added_index = uint16_t(context.sections.size());
for (const DecompressedSection& cfg : configs) {
// Bounds-check the wrapper offset.
if (cfg.rom_wrapper >= rom.size()) {
std::fprintf(stderr,
"decompressed: section %s rom_wrapper 0x%X is past EOF\n",
cfg.name.c_str(), cfg.rom_wrapper);
return false;
}
// Decompress per format.
std::vector<uint8_t> blob;
bool ok = false;
if (cfg.wrapper_format == "pers_szp_yay0") {
ok = compression::pers_szp_decompress(
rom.data() + cfg.rom_wrapper,
rom.size() - cfg.rom_wrapper,
blob);
} else if (cfg.wrapper_format == "yay0") {
ok = compression::yay0_decompress(
rom.data() + cfg.rom_wrapper,
rom.size() - cfg.rom_wrapper,
blob);
} else {
std::fprintf(stderr,
"decompressed: section %s unknown wrapper_format '%s'\n",
cfg.name.c_str(), cfg.wrapper_format.c_str());
return false;
}
if (!ok) {
std::fprintf(stderr,
"decompressed: section %s failed to decompress wrapper "
"at ROM 0x%X (format=%s)\n",
cfg.name.c_str(), cfg.rom_wrapper,
cfg.wrapper_format.c_str());
return false;
}
// Stash decompressed bytes at the end of context.rom so the
// existing pipeline (which addresses sections via rom_addr)
// finds them. The synthesized rom_addr deliberately encodes
// the wrapper offset in the upper bits for traceability:
// synthetic_rom = 0xFE000000 | wrapper_offset
// The 0xFE prefix is reserved for synthesized sections so it
// never collides with real ROM offsets (ROM is at most 64MB).
const uint32_t synthetic_rom = 0xFE000000u | cfg.rom_wrapper;
// Section size = relocOffset (body + bss before relocs).
const uint32_t reloc_offset = read_be_u32(blob.data() + 0x14);
// Append decompressed bytes to context.rom at synthetic_rom.
// Size we copy is reloc_offset (only the body, NOT the trailing
// reloc table — that's metadata, not section content).
const size_t needed_rom_size =
size_t(synthetic_rom) + reloc_offset;
if (context.rom.size() < needed_rom_size) {
context.rom.resize(needed_rom_size, 0);
}
std::memcpy(context.rom.data() + synthetic_rom,
blob.data(), reloc_offset);
// Build the Section struct.
const uint16_t section_index =
uint16_t(context.sections.size());
Section section{};
section.rom_addr = synthetic_rom;
section.ram_addr = cfg.vram;
section.size = reloc_offset;
section.bss_size = 0; // BSS is part of body in this format.
section.name = cfg.name;
section.executable = true;
section.relocatable = cfg.relocatable;
if (!parse_fragment_relocs(blob, cfg.vram, section_index, section)) {
std::fprintf(stderr,
"decompressed: section %s reloc parsing failed\n",
cfg.name.c_str());
return false;
}
// Add the section to the context. We need to grow
// section_functions in lockstep.
context.sections.emplace_back(std::move(section));
context.section_functions.emplace_back();
// Synthesize functions for the FRAGMENT layout:
//
// 1. fragment_entry at +0x00 — 8 bytes (J insn + nop) that
// jumps to the real implementation at +0x20.
// 2. The implementation function at +0x20 — runs from +0x20
// to the first `jr $ra` (0x03E00008) we encounter in the
// body, plus its delay slot.
//
// Without (2), the recompiler's emit for (1) sees "branch to
// 0x...0020 (no symbol)" and falls back to recomp_unhandled_
// branch, which is a runtime abort. Once (2) exists in
// functions_by_vram, the J becomes a tail call and dispatch
// works the same way it does for ELF-symtab-listed functions.
//
// Function::words holds raw ROM bytes (big-endian instructions
// stored in host-endian uint32 — numerically byteswapped from
// the actual instruction value). The recompilation pass calls
// byteswap(word) to recover the BE numeric form.
auto add_function = [&](uint32_t vram, uint32_t rom,
std::vector<uint32_t> words,
std::string name) {
const size_t fi = context.functions.size();
context.functions.emplace_back(
vram, rom, std::move(words), name,
section_index, false, false, false);
context.section_functions[section_index].push_back(fi);
context.sections[section_index].function_addrs.push_back(vram);
context.functions_by_vram[vram].push_back(fi);
context.functions_by_name[name] = fi;
};
// (1) Entry trampoline: 8 bytes at vram+0.
std::vector<uint32_t> entry_words(2);
std::memcpy(entry_words.data(), blob.data() + 0x00, 8);
add_function(cfg.vram, synthetic_rom,
std::move(entry_words),
cfg.name + "_entry");
// (2) Implementation function at vram+0x20. Scan forward from
// +0x20 for the first `jr $ra` (BE numeric 0x03E00008, stored
// little-endian in our blob bytes as 08 00 E0 03). Include the
// delay slot in the function size.
constexpr uint32_t IMPL_OFFSET = 0x20;
const uint8_t jr_ra_be[4] = { 0x03, 0xE0, 0x00, 0x08 };
size_t impl_end = 0;
for (size_t off = IMPL_OFFSET; off + 4 <= reloc_offset; off += 4) {
if (std::memcmp(blob.data() + off, jr_ra_be, 4) == 0) {
// Include this jr ra and its delay slot.
impl_end = off + 8;
if (impl_end > reloc_offset) impl_end = reloc_offset;
break;
}
}
if (impl_end > IMPL_OFFSET) {
const size_t impl_size = impl_end - IMPL_OFFSET;
std::vector<uint32_t> impl_words(impl_size / 4);
std::memcpy(impl_words.data(),
blob.data() + IMPL_OFFSET, impl_size);
// Use the convention func_<vram> so the name matches what
// the recompiler would have generated from an ELF symbol.
const std::string impl_name = fmt::format(
"func_{:08X}", cfg.vram + IMPL_OFFSET);
add_function(cfg.vram + IMPL_OFFSET,
synthetic_rom + IMPL_OFFSET,
std::move(impl_words),
impl_name);
} else {
std::fprintf(stderr,
"decompressed: section %s — could not locate jr $ra "
"in body at +0x20; only fragment_entry will be "
"recompiled (jal targets through the entry will become "
"runtime aborts)\n", cfg.name.c_str());
}
std::fprintf(stderr,
"decompressed: synthesized section %s @ vram 0x%08X "
"size 0x%X relocs=%zu (wrapper rom 0x%X format=%s)\n",
cfg.name.c_str(), cfg.vram, reloc_offset,
context.sections[section_index].relocs.size(),
cfg.rom_wrapper, cfg.wrapper_format.c_str());
}
// Cross-section R_MIPS_32 retargeting now that all decompressed
// sections are in context.sections.
resolve_cross_section_targets(context, first_added_index);
return true;
}
namespace {
// Adds one synthesized section + its functions + reloc table to the
// context. Used by both the explicit per-fragment path and the pattern
// auto-discovery path. `blob` is the decompressed body+relocs (must
// start with the FRAGMENT header). On success, returns the section
// index. On failure, returns size_t(-1) and prints to stderr.
size_t add_decompressed_section(Context& context,
const std::vector<uint8_t>& blob,
uint32_t rom_wrapper,
uint32_t vram,
const std::string& section_name,
bool relocatable,
uint64_t content_hash,
uint32_t override_link_ram_addr = 0,
uint32_t original_pattern_id = 0xFFFFFFFFu)
{
// `vram` is the BYTES-ENCODED vram — what the body's R_MIPS_HI16/LO16
// / R_MIPS_32 / J/JAL targets are encoded relative to. The CFG walker
// and reloc parser need this value (otherwise jump-table entries
// resolve to the wrong section, etc.).
//
// `override_link_ram_addr` (if non-zero) is the section's LINK
// IDENTITY — what gets stored in section.ram_addr and used at
// runtime as section_addresses[N]'s initial value. For pattern
// variants we want this DIFFERENT from `vram` so multiple variants
// can have unique link identities while sharing the canonical
// bytes-encoded vram.
const uint32_t link_ram_addr =
(override_link_ram_addr != 0) ? override_link_ram_addr : vram;
if (blob.size() < 0x20) {
std::fprintf(stderr,
"decompressed: section %s blob smaller than FRAGMENT header\n",
section_name.c_str());
return size_t(-1);
}
if (std::memcmp(blob.data() + 0x08, "FRAGMENT", 8) != 0) {
std::fprintf(stderr,
"decompressed: section %s missing FRAGMENT magic\n",
section_name.c_str());
return size_t(-1);
}
// Stash decompressed bytes in context.rom at a synthetic_rom that's
// GUARANTEED not to overlap any other section. We use a cumulative
// allocator: a static counter that grows as sections are added, so
// each section's bytes occupy a fresh, non-overlapping range.
//
// The previous formula (0xFE000000 | wrap_off) was wrong because
// Stadium's wrappers are densely packed in ROM — wrap_offs are
// closer together than the SUM of their decompressed sizes — so
// (0xFE000000 | wrap_offA) + size_A often overlapped
// (0xFE000000 | wrap_offB). The second memcpy clobbered the first
// section's body, including its jump-table entries.
//
// Cumulative allocation eliminates the overlap entirely. The
// 0xFE000000 prefix is preserved for traceability (synthetic ranges
// start above any real ROM offset, which is at most ~64 MB).
const uint32_t reloc_offset = read_be_u32(blob.data() + 0x14);
if (reloc_offset > blob.size()) {
std::fprintf(stderr,
"decompressed: section %s relocOffset 0x%X exceeds blob 0x%zX\n",
section_name.c_str(), reloc_offset, blob.size());
return size_t(-1);
}
// Cumulative synthetic-rom counter. Aligned to 4 bytes so MIPS
// instruction reads are always aligned.
static uint64_t next_synthetic_rom = 0xFE000000ull;
const uint32_t synthetic_rom = uint32_t(next_synthetic_rom);
next_synthetic_rom += (uint64_t(reloc_offset) + 3u) & ~uint64_t(3u);
if (next_synthetic_rom > 0xFFFFFFFFull) {
std::fprintf(stderr,
"decompressed: section %s — synthetic_rom counter overflowed "
"32 bits (next=0x%llX). Engine assumes < 256 MB of "
"synthesized-section payload total.\n",
section_name.c_str(),
(unsigned long long)next_synthetic_rom);
return size_t(-1);
}
const size_t needed_rom_size = size_t(synthetic_rom) + reloc_offset;
if (context.rom.size() < needed_rom_size) {
context.rom.resize(needed_rom_size, 0);
}
std::memcpy(context.rom.data() + synthetic_rom,
blob.data(), reloc_offset);
const uint16_t section_index = uint16_t(context.sections.size());
Section section{};
section.rom_addr = synthetic_rom;
// Section identity (link_ram_addr) may differ from the bytes-encoded
// vram (`vram`) for pattern variants that get assigned a synthetic
// per-variant link identity. The reloc parser stays with the
// bytes-encoded vram so target_section_offset values are correct
// intra-section byte distances.
section.ram_addr = link_ram_addr;
section.size = reloc_offset;
section.bss_size = 0;
section.name = section_name;
section.executable = true;
section.relocatable = relocatable;
section.content_hash = content_hash;
section.original_pattern_id = original_pattern_id;
if (!parse_fragment_relocs(blob, vram, section_index, section)) {
return size_t(-1);
}
context.sections.emplace_back(std::move(section));
context.section_functions.emplace_back();
auto add_function = [&](uint32_t f_vram, uint32_t f_rom,
std::vector<uint32_t> words,
std::string name) {
const size_t fi = context.functions.size();
context.functions.emplace_back(
f_vram, f_rom, std::move(words), name,
section_index, false, false, false);
context.section_functions[section_index].push_back(fi);
context.sections[section_index].function_addrs.push_back(f_vram);
context.functions_by_vram[f_vram].push_back(fi);
context.functions_by_name[name] = fi;
};
// Stadium has two FRAGMENT shapes that share the same +0x00..0x20
// header (J trampoline + magic + sizes):
//
// Code fragment: +0x20 is a real MIPS function ending in jr $ra
// (and possibly more functions interspersed with
// data). Stadium calls the J at +0x00 to dispatch
// into the function.
//
// Data fragment: +0x20 onwards is pure data (tables of
// (tag, pointer) records, etc.). The J at +0x00
// is a dormant placeholder that Stadium NEVER
// actually calls — Stadium reads the data
// directly. No MIPS function exists.
//
// We distinguish by scanning the first 0x100 instructions of the
// body for ANY jr $ra (0x03E00008). If absent, the fragment is
// data-only: we register the section + R_MIPS_32 relocs but emit
// NO FuncEntry rows. Stadium's dispatch never goes through a
// func_map entry for these. If something ever does call the
// entry-trampoline J, the runtime LOOKUP_FUNC reports the miss
// loudly, which is the correct surface — NOT a stub.
constexpr uint32_t IMPL_OFFSET = 0x20;
bool has_jr_ra = false;
{
const size_t scan_end = std::min<size_t>(
reloc_offset, IMPL_OFFSET + 0x400); // first 256 insns
for (size_t off = IMPL_OFFSET; off + 4 <= scan_end; off += 4) {
if (read_be_u32(blob.data() + off) == 0x03E00008u) {
has_jr_ra = true;
break;
}
}
}
if (!has_jr_ra) {
// Data-only fragment: section + relocs only, no functions.
std::fprintf(stderr,
"decompressed: section %s — data-only fragment (no jr $ra "
"in first 0x400 bytes); registered as section + relocs "
"with no FuncEntry rows. Stadium never dispatches the +0x00 "
"J trampoline for these (would surface as a runtime "
"lookup miss if it did, which is the correct diagnostic).\n",
section_name.c_str());
return section_index;
}
// Code fragment path: synthesize entry trampoline + impl function.
// (1) Entry trampoline at vram+0 (8 bytes).
std::vector<uint32_t> entry_words(2);
std::memcpy(entry_words.data(), blob.data() + 0x00, 8);
add_function(vram, synthetic_rom,
std::move(entry_words),
section_name + "_entry");
// (2) Implementation function at vram+0x20. The engine's
// analysis.cpp::discover_function_bounds runs a real BFS-based
// control-flow walk that follows conditional branches, j/jal
// targets, and jr-via-jump-table dispatches (resolved using the
// existing register-state simulator). On failure it reports a
// specific offset and reason; we propagate that as a build error
// — no graceful skip, no stub.
if (reloc_offset <= IMPL_OFFSET + 4) {
std::fprintf(stderr,
"decompressed: section %s — body too small to contain a "
"function at +0x20 (reloc_offset=0x%X)\n",
section_name.c_str(), reloc_offset);
return size_t(-1);
}
size_t impl_size = 0;
std::string discover_err;
bool ok = discover_function_bounds(
blob.data(), reloc_offset,
vram, IMPL_OFFSET,
impl_size, discover_err);
if (!ok) {
std::fprintf(stderr,
"decompressed: section %s — function-bounds discovery "
"failed: %s\n"
" Build aborted. Resolutions, in order of preference:\n"
" 1. If this is a recompiler analysis gap, fix the\n"
" analyzer in src/analysis.cpp.\n"
" 2. If the fragment legitimately has a shape the\n"
" analyzer can't handle, declare it via the\n"
" single-block [[input.decompressed_section]] form\n"
" (manual analysis path).\n"
" 3. If the wrapper is unused / unreachable in this\n"
" game's runtime path, exclude it via a future\n"
" pattern.exclude config field.\n"
" No graceful skip, no stub. Build refuses to ship.\n",
section_name.c_str(), discover_err.c_str());
return size_t(-1);
}
std::vector<uint32_t> impl_words(impl_size / 4);
std::memcpy(impl_words.data(),
blob.data() + IMPL_OFFSET, impl_size);
const std::string impl_name = fmt::format(
"func_{:08X}", vram + IMPL_OFFSET);
add_function(vram + IMPL_OFFSET,
synthetic_rom + IMPL_OFFSET,
std::move(impl_words),
impl_name);
return section_index;
}
// Decompress a wrapper at the given ROM offset using the named format.
// Returns true + populates blob on success.
bool decompress_wrapper_at(const std::vector<uint8_t>& rom,
uint32_t rom_wrapper,
const std::string& wrapper_format,
std::vector<uint8_t>& blob_out)
{
if (rom_wrapper >= rom.size()) return false;
if (wrapper_format == "pers_szp_yay0") {
return compression::pers_szp_decompress(
rom.data() + rom_wrapper,
rom.size() - rom_wrapper, blob_out);
} else if (wrapper_format == "yay0") {
return compression::yay0_decompress(
rom.data() + rom_wrapper,
rom.size() - rom_wrapper, blob_out);
}
return false;
}
} // namespace
bool synthesize_decompressed_patterns(
Context& context,
const std::filesystem::path& rom_path,
const std::vector<DecompressedSectionPattern>& patterns)
{
if (patterns.empty()) return true;
const std::vector<uint8_t> rom = read_rom_file(rom_path);
if (rom.empty()) {
std::fprintf(stderr,
"decompressed: failed to read ROM file: %s\n",
rom_path.string().c_str());
return false;
}
const uint16_t first_added_index = uint16_t(context.sections.size());
for (const DecompressedSectionPattern& p : patterns) {
// Compute the J-trampoline encoding we expect at +0x00 of any
// matching fragment: J <vram + 0x20> + nop. MIPS J insn:
// opcode 0x02 << 26 | (target >> 2) & 0x03FFFFFF
const uint32_t j_target = p.vram + 0x20u;
const uint32_t j_insn = 0x08000000u |
((j_target >> 2) & 0x03FFFFFFu);
// Big-endian byte pattern for the first 8 bytes (J + nop).
uint8_t expected_first8[8];
expected_first8[0] = uint8_t(j_insn >> 24);
expected_first8[1] = uint8_t(j_insn >> 16);
expected_first8[2] = uint8_t(j_insn >> 8);
expected_first8[3] = uint8_t(j_insn);
expected_first8[4] = 0;
expected_first8[5] = 0;
expected_first8[6] = 0;
expected_first8[7] = 0;
const uint8_t fragment_magic[8] = {
'F', 'R', 'A', 'G', 'M', 'E', 'N', 'T'
};
// Resolve the base_name (default: "frag_<vram>").
std::string base_name = p.base_name;
if (base_name.empty()) {
base_name = fmt::format("frag_{:08X}", p.vram);
}
// Scan the ROM for Yay0 magic. For each, decompress 0x40 bytes,
// check the J-insn + FRAGMENT-magic match, and accept.
std::vector<std::pair<uint32_t, std::vector<uint8_t>>> hits;
size_t scan_pos = 0;
while (scan_pos + 16 < rom.size()) {
// Find next "Yay0" magic.
size_t y0 = std::string::npos;
for (size_t i = scan_pos; i + 4 <= rom.size(); i++) {
if (rom[i] == 'Y' && rom[i+1] == 'a' &&
rom[i+2] == 'y' && rom[i+3] == '0') {
y0 = i;
break;
}
}
if (y0 == std::string::npos) break;
scan_pos = y0 + 4;
// Quick prefix decompress to test the FRAGMENT shape.
std::vector<uint8_t> prefix;
if (!compression::yay0_decompress(
rom.data() + y0, rom.size() - y0, prefix)) {
continue;
}
if (prefix.size() < 0x10) continue;
if (std::memcmp(prefix.data(), expected_first8, 8) != 0) continue;
if (std::memcmp(prefix.data() + 8, fragment_magic, 8) != 0) continue;
// Match — figure out the wrapper offset (PERS-SZP wraps Yay0
// at -0x18 if the format is pers_szp_yay0; otherwise the
// wrapper offset IS the Yay0 offset).
uint32_t wrap_off = uint32_t(y0);
if (p.wrapper_format == "pers_szp_yay0") {
if (y0 < 0x18) continue;
if (std::memcmp(rom.data() + (y0 - 0x18),
"PERS-SZP", 8) != 0) {
continue;
}
wrap_off = uint32_t(y0 - 0x18);
} else if (p.wrapper_format != "yay0") {
std::fprintf(stderr,
"decompressed: pattern %s unknown wrapper_format '%s'\n",
base_name.c_str(), p.wrapper_format.c_str());
return false;
}
// Full decompress.
std::vector<uint8_t> body;
if (!decompress_wrapper_at(rom, wrap_off, p.wrapper_format, body)) {
continue;
}
hits.emplace_back(wrap_off, std::move(body));
}
std::fprintf(stderr,
"decompressed pattern %s @ vram 0x%08X format=%s: "
"found %zu wrappers in ROM\n",
base_name.c_str(), p.vram, p.wrapper_format.c_str(),
hits.size());
if (hits.empty()) continue;
// Deduplicate by content hash. Hash window is the first 0x100
// bytes — measured at 95% uniqueness for Stadium's 0x8FF00000
// slot. The runtime side uses the SAME window over the bytes
// Stadium decompressed into RDRAM, so build-time and runtime
// hashes match. (Smaller fragments hash their full body.)
constexpr size_t HASH_WINDOW = 0x100;
std::unordered_map<uint64_t, size_t> seen_hashes;
size_t added = 0;
size_t deduped = 0;
// Path 2: every unique pattern variant gets its own synthetic
// per-variant ram_addr in the 0xC0000000+ sentinel pool. The
// bytes-encoded vram (parsing/CFG) stays at p.vram for ALL
// variants — only section.ram_addr (the link identity) changes,
// so each variant's RELOC_HI16/LO16 macros emit a unique
// 0xCXXXXXXX literal at runtime and the synthetic resolver
// can translate that literal back to the variant's runtime
// RDRAM buffer.
//
// Pool placement: 0xC0000000 is KSEG2, unused by N64 software,
// so the sentinel is "obviously invalid as an N64 vaddr" and
// can only be handled by the recomp synthetic resolver. KSEG1
// (0xA0000000) was tried first but collides with the engine's
// RSP code section at 0xA4000040.
//
// Stride 0x00100000 = 1 MB per variant. With Stadium's 219
// variants, the pool occupies 0xC0000000..0xCDB00000 — well
// within the 256-slot capacity (kSyntheticBucketCount on the
// runtime side). If the variant count grows past 256 in some
// future game, both sides need to bump the pool size.
const uint32_t kSyntheticPoolBase = 0xC0000000u;
const uint32_t kSyntheticPoolStride = 0x00100000u;
size_t probe_variant_idx = 0;
for (auto& [wrap_off, body] : hits) {
const size_t window = std::min(HASH_WINDOW, body.size());
const uint64_t content_hash =
fnv1a_64(body.data(), window);
auto it = seen_hashes.find(content_hash);
if (it != seen_hashes.end()) {
deduped++;
continue;
}
seen_hashes.emplace(content_hash, wrap_off);
// Original game-side fragment id derived from the pattern's
// canonical bucket. All variants of this pattern share the
// same original id (e.g. 0xEF for stadium_models). Stored
// on each section so the runtime can filter synthetic
// candidates to the matching id and avoid cross-pattern
// hash-collision misregistration.
const uint32_t orig_id =
((p.vram & 0x0FF00000u) >> 0x14) - 0x10u;
// Per-variant synthetic link identity. The bytes-encoded
// vram (used for parsing/CFG) stays at p.vram — only this
// link identity changes per variant.
const uint32_t variant_link_addr =
kSyntheticPoolBase +
uint32_t(probe_variant_idx) * kSyntheticPoolStride;
probe_variant_idx++;
const std::string section_name = fmt::format(
"{}__rom_{:X}", base_name, wrap_off);
size_t si = add_decompressed_section(
context, body, wrap_off, p.vram,
section_name, p.relocatable, content_hash,
variant_link_addr, orig_id);
if (si == size_t(-1)) {
// Hard failure: the section's bytes can't be bounded
// by our CFG walk (or some other unrecoverable parse
// error). NOT a soft-skip; the user has to decide.
std::fprintf(stderr,
"decompressed: pattern %s aborted — section for "
"ROM 0x%X failed to synthesize. See message above.\n",
base_name.c_str(), wrap_off);
return false;
}
added++;
}
std::fprintf(stderr,
"decompressed pattern %s: %zu sections added "
"(%zu deduped as content-identical)\n",
base_name.c_str(), added, deduped);
}
// Cross-section R_MIPS_32 retargeting once everything is in.
resolve_cross_section_targets(context, first_added_index);
return true;
}
} // namespace N64Recomp