N64Recomp/src/decompressed.cpp
Matthew Stanley bd5f42fb22 analysis: discover_function_bounds — real CFG walk with jump-table support
Adds a public N64Recomp::discover_function_bounds() in src/analysis.h
that performs a BFS-based control-flow walk of a function's body,
following:
  - Conditional branches (target + fall-through)
  - Unconditional j/jal targets when intra-body
  - jr $ra returns (block ends after delay slot)
  - jr-via-jump-table dispatches: the existing register-state
    simulator from analyze_function detects the lui+addiu+addu+lw+jr
    pattern and records the jtbl base; we then read entries out of
    the body bytes and feed targets back into the BFS until
    convergence.

Returns the function's byte size (max-reachable + 4 to cover the
delay slot of the last instruction). On failure, populates a specific
error message with the offending offset and reason — caller treats
this as a build error, NOT a graceful skip (per the project's
no-stubs principle).

Wires into decompressed.cpp's pattern path, replacing the prior
inline BFS that had a TODO for jump-table handling. The pattern
caller now propagates failures via `synthesize_decompressed_patterns`
returning false, which surfaces in main.cpp's exit_failure path.

Concrete behavior change: activating a pattern that includes a
fragment with computed jumps now produces a build error pointing at
the specific section name + offset + the analyzer's failure reason,
instead of silently producing a partial binary. Tested on Stadium's
0x8FF00000 slot — first failing wrapper is at ROM 0x8CC400 with an
indirect jr at offset 0x827C the simulator doesn't pattern-match.
The static [[input.decompressed_section]] path for fragment78 is
unaffected (still recompiles cleanly, no regression on boot logo +
PIKA jingle).

Future work surfaced by this change: the simulator's lui+addiu
+addu+lw+jr pattern doesn't cover every jump-table shape Stadium
uses. Each gap surfaces as a specific build-error offset; resolution
is to extend analyze_instruction to recognize the additional pattern
(or, when it's a true tail-call rather than a jtbl, distinguish
those at the jr site).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-05 21:47:44 -07:00

786 lines
31 KiB
C++

#include "decompressed.h"
#include <cstdio>
#include <cstring>
#include <fstream>
#include <unordered_map>
#include <vector>
#include "compression/pers_szp.h"
#include "compression/yay0.h"
#include "fmt/format.h"
#include "rabbitizer.hpp"
#include <set>
#include "analysis.h"
namespace N64Recomp {
namespace {
uint32_t read_be_u32(const uint8_t* p) {
return (uint32_t(p[0]) << 24) | (uint32_t(p[1]) << 16) |
(uint32_t(p[2]) << 8) | uint32_t(p[3]);
}
// FNV-1a 64-bit content hash. Used to deduplicate wrappers whose
// decompressed bytes are byte-for-byte identical (Stadium's 0x8FF00000
// slot has ~11 such pairs out of 279), and as the runtime dispatch key
// when multiple wrappers share a link vram.
uint64_t fnv1a_64(const uint8_t* data, size_t len) {
uint64_t h = 0xCBF29CE484222325ull;
for (size_t i = 0; i < len; i++) {
h ^= uint64_t(data[i]);
h *= 0x100000001B3ull;
}
return h;
}
// Reads an entire file into memory. Returns empty vector on error.
std::vector<uint8_t> read_rom_file(const std::filesystem::path& path) {
std::ifstream f(path, std::ios::binary | std::ios::ate);
if (!f.good()) return {};
auto size = f.tellg();
if (size <= 0) return {};
std::vector<uint8_t> buf(static_cast<size_t>(size));
f.seekg(0, std::ios::beg);
f.read(reinterpret_cast<char*>(buf.data()), size);
if (!f.good()) return {};
return buf;
}
// Stadium's runtime reloc-table format (see disasm/src/memmap.c
// Memmap_RelocateFragment). One uint32 per reloc:
// bits 31:24 type (2 = R_MIPS_32, 4 = R_MIPS_26,
// 5 = R_MIPS_HI16, 6 = R_MIPS_LO16)
// bits 23:0 offset into the fragment
// The table is preceded by a uint32 count.
//
// Stadium's type codes don't match ELF type codes — translate.
RelocType translate_stadium_reloc_type(uint8_t stadium_type) {
switch (stadium_type) {
case 2: return RelocType::R_MIPS_32;
case 4: return RelocType::R_MIPS_26;
case 5: return RelocType::R_MIPS_HI16;
case 6: return RelocType::R_MIPS_LO16;
default: return RelocType::R_MIPS_NONE;
}
}
// Parses the FRAGMENT-format header at the start of the decompressed
// blob and the trailing reloc table. Populates `section.relocs`. The
// per-reloc target_section is filled in by the caller after all
// decompressed sections are added (so cross-fragment R_MIPS_32 targets
// can be resolved against the full section list).
//
// Per-type computation of `target_section_offset` (the field the
// recompiler reads at codegen time):
// - R_MIPS_32: word value is an absolute pointer; offset =
// word - section_vram (then refined cross-section by caller).
// - R_MIPS_26: J/JAL target = (word & 0x03FFFFFF) << 2 OR'd with
// PC[31:28]; offset = target - section_vram.
// - R_MIPS_HI16/LO16: paired. Combined immediate =
// (HI << 16) + (int16_t)LO. Offset = combined - section_vram.
// The recompiler emits both RELOC_HI16(idx, off) and
// RELOC_LO16(idx, off) using each reloc's target_section_offset,
// so both members of the pair carry the SAME computed offset.
//
// Stadium's reloc table orders HI16 immediately followed by its paired
// LO16 (matches the body's instruction order). We pair by adjacency.
//
// Returns false if the header is malformed.
bool parse_fragment_relocs(const std::vector<uint8_t>& bytes,
uint32_t section_vram,
uint16_t section_index,
Section& section_out) {
if (bytes.size() < 0x20) {
std::fprintf(stderr,
"decompressed: blob smaller than FRAGMENT header (size=0x%zX)\n",
bytes.size());
return false;
}
if (std::memcmp(bytes.data() + 0x08, "FRAGMENT", 8) != 0) {
std::fprintf(stderr,
"decompressed: missing FRAGMENT magic at +0x08\n");
return false;
}
const uint32_t reloc_offset = read_be_u32(bytes.data() + 0x14);
const uint32_t size_in_ram = read_be_u32(bytes.data() + 0x1C);
if (reloc_offset > bytes.size() || size_in_ram > bytes.size()) {
std::fprintf(stderr,
"decompressed: relocOffset 0x%X / sizeInRam 0x%X exceed blob "
"size 0x%zX\n",
reloc_offset, size_in_ram, bytes.size());
return false;
}
if (reloc_offset + 4 > bytes.size()) {
std::fprintf(stderr,
"decompressed: no room for reloc count at offset 0x%X\n",
reloc_offset);
return false;
}
const uint32_t n_relocs = read_be_u32(bytes.data() + reloc_offset);
const size_t reloc_table_end = reloc_offset + 4 + 4 * size_t(n_relocs);
if (reloc_table_end > bytes.size()) {
std::fprintf(stderr,
"decompressed: reloc table (count=%u) overruns blob\n", n_relocs);
return false;
}
// First pass: parse raw entries.
struct RawReloc {
RelocType type;
uint32_t section_offset;
uint32_t word; // instruction word at section_offset
};
std::vector<RawReloc> raw;
raw.reserve(n_relocs);
for (uint32_t i = 0; i < n_relocs; i++) {
const uint32_t entry = read_be_u32(
bytes.data() + reloc_offset + 4 + 4 * i);
const uint8_t stadium_type = uint8_t((entry >> 24) & 0x7F);
const uint32_t section_offset = entry & 0x00FFFFFFu;
const RelocType type = translate_stadium_reloc_type(stadium_type);
if (type == RelocType::R_MIPS_NONE) {
std::fprintf(stderr,
"decompressed: unknown Stadium reloc type 0x%X at "
"offset 0x%X — skipped\n", stadium_type, section_offset);
continue;
}
if (section_offset + 4 > size_in_ram) {
std::fprintf(stderr,
"decompressed: reloc[%u] offset 0x%X out of body\n",
i, section_offset);
continue;
}
const uint32_t word = read_be_u32(bytes.data() + section_offset);
raw.push_back({type, section_offset, word});
}
// Second pass: emit Reloc entries. HI16 pairs with the next LO16
// in the list (Stadium's table orders them this way).
section_out.relocs.reserve(raw.size());
for (size_t i = 0; i < raw.size(); i++) {
const RawReloc& rr = raw[i];
Reloc r{};
r.address = section_vram + rr.section_offset;
r.target_section_offset = 0;
r.target_section = section_index; // default; cross-section pass refines
r.symbol_index = uint32_t(-1);
r.type = rr.type;
r.reference_symbol = false;
switch (rr.type) {
case RelocType::R_MIPS_32: {
r.target_section_offset = rr.word - section_vram;
break;
}
case RelocType::R_MIPS_26: {
const uint32_t pc_high = section_vram & 0xF0000000u;
const uint32_t target = pc_high |
((rr.word & 0x03FFFFFFu) << 2);
r.target_section_offset = target - section_vram;
break;
}
case RelocType::R_MIPS_HI16: {
// Pair with next LO16 in raw list.
size_t j = i + 1;
while (j < raw.size() && raw[j].type != RelocType::R_MIPS_LO16) {
j++;
}
if (j >= raw.size()) {
std::fprintf(stderr,
"decompressed: HI16 at offset 0x%X has no paired "
"LO16 in reloc table\n", rr.section_offset);
break;
}
const uint16_t hi_imm = uint16_t(rr.word & 0xFFFFu);
const int16_t lo_imm = int16_t(raw[j].word & 0xFFFFu);
const uint32_t combined =
(uint32_t(hi_imm) << 16) + uint32_t(int32_t(lo_imm));
r.target_section_offset = combined - section_vram;
break;
}
case RelocType::R_MIPS_LO16: {
// Find preceding HI16. We scan backward for the most
// recent HI16 (matches Stadium's adjacency convention).
size_t j = i;
bool found = false;
while (j > 0) {
j--;
if (raw[j].type == RelocType::R_MIPS_HI16) {
const uint16_t hi_imm = uint16_t(raw[j].word & 0xFFFFu);
const int16_t lo_imm = int16_t(rr.word & 0xFFFFu);
const uint32_t combined =
(uint32_t(hi_imm) << 16) +
uint32_t(int32_t(lo_imm));
r.target_section_offset = combined - section_vram;
found = true;
break;
}
}
if (!found) {
std::fprintf(stderr,
"decompressed: LO16 at offset 0x%X has no paired "
"HI16 in reloc table\n", rr.section_offset);
}
break;
}
default:
break;
}
section_out.relocs.emplace_back(r);
if (rr.type == RelocType::R_MIPS_32) {
section_out.has_mips32_relocs = true;
}
}
return true;
}
// Once every decompressed section is added, walk every reloc with
// target_section == its own index (the default we set above) and
// re-target R_MIPS_32 entries that actually point into a different
// section. Self-targeting relocs stay self-targeting.
void resolve_cross_section_targets(Context& context,
uint16_t first_added_index) {
for (size_t si = first_added_index; si < context.sections.size(); si++) {
Section& section = context.sections[si];
for (Reloc& r : section.relocs) {
if (r.type != RelocType::R_MIPS_32) continue;
// Compute the target absolute address.
const uint32_t target_addr = section.ram_addr +
r.target_section_offset;
// Find the section that contains target_addr.
for (size_t ti = 0; ti < context.sections.size(); ti++) {
const Section& candidate = context.sections[ti];
if (candidate.size == 0) continue;
if (target_addr >= candidate.ram_addr &&
target_addr < candidate.ram_addr + candidate.size) {
r.target_section = uint16_t(ti);
r.target_section_offset = target_addr -
candidate.ram_addr;
break;
}
}
}
}
}
} // namespace
bool synthesize_decompressed_sections(
Context& context,
const std::filesystem::path& rom_path,
const std::vector<DecompressedSection>& configs)
{
if (configs.empty()) return true;
const std::vector<uint8_t> rom = read_rom_file(rom_path);
if (rom.empty()) {
std::fprintf(stderr,
"decompressed: failed to read ROM file: %s\n",
rom_path.string().c_str());
return false;
}
const uint16_t first_added_index = uint16_t(context.sections.size());
for (const DecompressedSection& cfg : configs) {
// Bounds-check the wrapper offset.
if (cfg.rom_wrapper >= rom.size()) {
std::fprintf(stderr,
"decompressed: section %s rom_wrapper 0x%X is past EOF\n",
cfg.name.c_str(), cfg.rom_wrapper);
return false;
}
// Decompress per format.
std::vector<uint8_t> blob;
bool ok = false;
if (cfg.wrapper_format == "pers_szp_yay0") {
ok = compression::pers_szp_decompress(
rom.data() + cfg.rom_wrapper,
rom.size() - cfg.rom_wrapper,
blob);
} else if (cfg.wrapper_format == "yay0") {
ok = compression::yay0_decompress(
rom.data() + cfg.rom_wrapper,
rom.size() - cfg.rom_wrapper,
blob);
} else {
std::fprintf(stderr,
"decompressed: section %s unknown wrapper_format '%s'\n",
cfg.name.c_str(), cfg.wrapper_format.c_str());
return false;
}
if (!ok) {
std::fprintf(stderr,
"decompressed: section %s failed to decompress wrapper "
"at ROM 0x%X (format=%s)\n",
cfg.name.c_str(), cfg.rom_wrapper,
cfg.wrapper_format.c_str());
return false;
}
// Stash decompressed bytes at the end of context.rom so the
// existing pipeline (which addresses sections via rom_addr)
// finds them. The synthesized rom_addr deliberately encodes
// the wrapper offset in the upper bits for traceability:
// synthetic_rom = 0xFE000000 | wrapper_offset
// The 0xFE prefix is reserved for synthesized sections so it
// never collides with real ROM offsets (ROM is at most 64MB).
const uint32_t synthetic_rom = 0xFE000000u | cfg.rom_wrapper;
// Section size = relocOffset (body + bss before relocs).
const uint32_t reloc_offset = read_be_u32(blob.data() + 0x14);
// Append decompressed bytes to context.rom at synthetic_rom.
// Size we copy is reloc_offset (only the body, NOT the trailing
// reloc table — that's metadata, not section content).
const size_t needed_rom_size =
size_t(synthetic_rom) + reloc_offset;
if (context.rom.size() < needed_rom_size) {
context.rom.resize(needed_rom_size, 0);
}
std::memcpy(context.rom.data() + synthetic_rom,
blob.data(), reloc_offset);
// Build the Section struct.
const uint16_t section_index =
uint16_t(context.sections.size());
Section section{};
section.rom_addr = synthetic_rom;
section.ram_addr = cfg.vram;
section.size = reloc_offset;
section.bss_size = 0; // BSS is part of body in this format.
section.name = cfg.name;
section.executable = true;
section.relocatable = cfg.relocatable;
if (!parse_fragment_relocs(blob, cfg.vram, section_index, section)) {
std::fprintf(stderr,
"decompressed: section %s reloc parsing failed\n",
cfg.name.c_str());
return false;
}
// Add the section to the context. We need to grow
// section_functions in lockstep.
context.sections.emplace_back(std::move(section));
context.section_functions.emplace_back();
// Synthesize functions for the FRAGMENT layout:
//
// 1. fragment_entry at +0x00 — 8 bytes (J insn + nop) that
// jumps to the real implementation at +0x20.
// 2. The implementation function at +0x20 — runs from +0x20
// to the first `jr $ra` (0x03E00008) we encounter in the
// body, plus its delay slot.
//
// Without (2), the recompiler's emit for (1) sees "branch to
// 0x...0020 (no symbol)" and falls back to recomp_unhandled_
// branch, which is a runtime abort. Once (2) exists in
// functions_by_vram, the J becomes a tail call and dispatch
// works the same way it does for ELF-symtab-listed functions.
//
// Function::words holds raw ROM bytes (big-endian instructions
// stored in host-endian uint32 — numerically byteswapped from
// the actual instruction value). The recompilation pass calls
// byteswap(word) to recover the BE numeric form.
auto add_function = [&](uint32_t vram, uint32_t rom,
std::vector<uint32_t> words,
std::string name) {
const size_t fi = context.functions.size();
context.functions.emplace_back(
vram, rom, std::move(words), name,
section_index, false, false, false);
context.section_functions[section_index].push_back(fi);
context.sections[section_index].function_addrs.push_back(vram);
context.functions_by_vram[vram].push_back(fi);
context.functions_by_name[name] = fi;
};
// (1) Entry trampoline: 8 bytes at vram+0.
std::vector<uint32_t> entry_words(2);
std::memcpy(entry_words.data(), blob.data() + 0x00, 8);
add_function(cfg.vram, synthetic_rom,
std::move(entry_words),
cfg.name + "_entry");
// (2) Implementation function at vram+0x20. Scan forward from
// +0x20 for the first `jr $ra` (BE numeric 0x03E00008, stored
// little-endian in our blob bytes as 08 00 E0 03). Include the
// delay slot in the function size.
constexpr uint32_t IMPL_OFFSET = 0x20;
const uint8_t jr_ra_be[4] = { 0x03, 0xE0, 0x00, 0x08 };
size_t impl_end = 0;
for (size_t off = IMPL_OFFSET; off + 4 <= reloc_offset; off += 4) {
if (std::memcmp(blob.data() + off, jr_ra_be, 4) == 0) {
// Include this jr ra and its delay slot.
impl_end = off + 8;
if (impl_end > reloc_offset) impl_end = reloc_offset;
break;
}
}
if (impl_end > IMPL_OFFSET) {
const size_t impl_size = impl_end - IMPL_OFFSET;
std::vector<uint32_t> impl_words(impl_size / 4);
std::memcpy(impl_words.data(),
blob.data() + IMPL_OFFSET, impl_size);
// Use the convention func_<vram> so the name matches what
// the recompiler would have generated from an ELF symbol.
const std::string impl_name = fmt::format(
"func_{:08X}", cfg.vram + IMPL_OFFSET);
add_function(cfg.vram + IMPL_OFFSET,
synthetic_rom + IMPL_OFFSET,
std::move(impl_words),
impl_name);
} else {
std::fprintf(stderr,
"decompressed: section %s — could not locate jr $ra "
"in body at +0x20; only fragment_entry will be "
"recompiled (jal targets through the entry will become "
"runtime aborts)\n", cfg.name.c_str());
}
std::fprintf(stderr,
"decompressed: synthesized section %s @ vram 0x%08X "
"size 0x%X relocs=%zu (wrapper rom 0x%X format=%s)\n",
cfg.name.c_str(), cfg.vram, reloc_offset,
context.sections[section_index].relocs.size(),
cfg.rom_wrapper, cfg.wrapper_format.c_str());
}
// Cross-section R_MIPS_32 retargeting now that all decompressed
// sections are in context.sections.
resolve_cross_section_targets(context, first_added_index);
return true;
}
namespace {
// Adds one synthesized section + its functions + reloc table to the
// context. Used by both the explicit per-fragment path and the pattern
// auto-discovery path. `blob` is the decompressed body+relocs (must
// start with the FRAGMENT header). On success, returns the section
// index. On failure, returns size_t(-1) and prints to stderr.
size_t add_decompressed_section(Context& context,
const std::vector<uint8_t>& blob,
uint32_t rom_wrapper,
uint32_t vram,
const std::string& section_name,
bool relocatable,
uint64_t content_hash)
{
if (blob.size() < 0x20) {
std::fprintf(stderr,
"decompressed: section %s blob smaller than FRAGMENT header\n",
section_name.c_str());
return size_t(-1);
}
if (std::memcmp(blob.data() + 0x08, "FRAGMENT", 8) != 0) {
std::fprintf(stderr,
"decompressed: section %s missing FRAGMENT magic\n",
section_name.c_str());
return size_t(-1);
}
// Stash decompressed bytes at synthetic_rom = 0xFE000000 | wrapper_off
// so the existing pipeline (which addresses sections via rom_addr)
// finds them. The 0xFE prefix is reserved for synthesized sections.
const uint32_t synthetic_rom = 0xFE000000u | rom_wrapper;
const uint32_t reloc_offset = read_be_u32(blob.data() + 0x14);
if (reloc_offset > blob.size()) {
std::fprintf(stderr,
"decompressed: section %s relocOffset 0x%X exceeds blob 0x%zX\n",
section_name.c_str(), reloc_offset, blob.size());
return size_t(-1);
}
const size_t needed_rom_size = size_t(synthetic_rom) + reloc_offset;
if (context.rom.size() < needed_rom_size) {
context.rom.resize(needed_rom_size, 0);
}
std::memcpy(context.rom.data() + synthetic_rom,
blob.data(), reloc_offset);
const uint16_t section_index = uint16_t(context.sections.size());
Section section{};
section.rom_addr = synthetic_rom;
section.ram_addr = vram;
section.size = reloc_offset;
section.bss_size = 0;
section.name = section_name;
section.executable = true;
section.relocatable = relocatable;
section.content_hash = content_hash;
if (!parse_fragment_relocs(blob, vram, section_index, section)) {
return size_t(-1);
}
context.sections.emplace_back(std::move(section));
context.section_functions.emplace_back();
auto add_function = [&](uint32_t f_vram, uint32_t f_rom,
std::vector<uint32_t> words,
std::string name) {
const size_t fi = context.functions.size();
context.functions.emplace_back(
f_vram, f_rom, std::move(words), name,
section_index, false, false, false);
context.section_functions[section_index].push_back(fi);
context.sections[section_index].function_addrs.push_back(f_vram);
context.functions_by_vram[f_vram].push_back(fi);
context.functions_by_name[name] = fi;
};
// (1) Entry trampoline at vram+0 (8 bytes).
std::vector<uint32_t> entry_words(2);
std::memcpy(entry_words.data(), blob.data() + 0x00, 8);
add_function(vram, synthetic_rom,
std::move(entry_words),
section_name + "_entry");
// (2) Implementation function at vram+0x20. The engine's
// analysis.cpp::discover_function_bounds runs a real BFS-based
// control-flow walk that follows conditional branches, j/jal
// targets, and jr-via-jump-table dispatches (resolved using the
// existing register-state simulator). On failure it reports a
// specific offset and reason; we propagate that as a build error
// — no graceful skip, no stub.
constexpr uint32_t IMPL_OFFSET = 0x20;
if (reloc_offset <= IMPL_OFFSET + 4) {
std::fprintf(stderr,
"decompressed: section %s — body too small to contain a "
"function at +0x20 (reloc_offset=0x%X)\n",
section_name.c_str(), reloc_offset);
return size_t(-1);
}
size_t impl_size = 0;
std::string discover_err;
bool ok = discover_function_bounds(
blob.data(), reloc_offset,
vram, IMPL_OFFSET,
impl_size, discover_err);
if (!ok) {
std::fprintf(stderr,
"decompressed: section %s — function-bounds discovery "
"failed: %s\n"
" Build aborted. Resolutions, in order of preference:\n"
" 1. If this is a recompiler analysis gap, fix the\n"
" analyzer in src/analysis.cpp.\n"
" 2. If the fragment legitimately has a shape the\n"
" analyzer can't handle, declare it via the\n"
" single-block [[input.decompressed_section]] form\n"
" (manual analysis path).\n"
" 3. If the wrapper is unused / unreachable in this\n"
" game's runtime path, exclude it via a future\n"
" pattern.exclude config field.\n"
" No graceful skip, no stub. Build refuses to ship.\n",
section_name.c_str(), discover_err.c_str());
return size_t(-1);
}
std::vector<uint32_t> impl_words(impl_size / 4);
std::memcpy(impl_words.data(),
blob.data() + IMPL_OFFSET, impl_size);
const std::string impl_name = fmt::format(
"func_{:08X}", vram + IMPL_OFFSET);
add_function(vram + IMPL_OFFSET,
synthetic_rom + IMPL_OFFSET,
std::move(impl_words),
impl_name);
return section_index;
}
// Decompress a wrapper at the given ROM offset using the named format.
// Returns true + populates blob on success.
bool decompress_wrapper_at(const std::vector<uint8_t>& rom,
uint32_t rom_wrapper,
const std::string& wrapper_format,
std::vector<uint8_t>& blob_out)
{
if (rom_wrapper >= rom.size()) return false;
if (wrapper_format == "pers_szp_yay0") {
return compression::pers_szp_decompress(
rom.data() + rom_wrapper,
rom.size() - rom_wrapper, blob_out);
} else if (wrapper_format == "yay0") {
return compression::yay0_decompress(
rom.data() + rom_wrapper,
rom.size() - rom_wrapper, blob_out);
}
return false;
}
} // namespace
bool synthesize_decompressed_patterns(
Context& context,
const std::filesystem::path& rom_path,
const std::vector<DecompressedSectionPattern>& patterns)
{
if (patterns.empty()) return true;
const std::vector<uint8_t> rom = read_rom_file(rom_path);
if (rom.empty()) {
std::fprintf(stderr,
"decompressed: failed to read ROM file: %s\n",
rom_path.string().c_str());
return false;
}
const uint16_t first_added_index = uint16_t(context.sections.size());
for (const DecompressedSectionPattern& p : patterns) {
// Compute the J-trampoline encoding we expect at +0x00 of any
// matching fragment: J <vram + 0x20> + nop. MIPS J insn:
// opcode 0x02 << 26 | (target >> 2) & 0x03FFFFFF
const uint32_t j_target = p.vram + 0x20u;
const uint32_t j_insn = 0x08000000u |
((j_target >> 2) & 0x03FFFFFFu);
// Big-endian byte pattern for the first 8 bytes (J + nop).
uint8_t expected_first8[8];
expected_first8[0] = uint8_t(j_insn >> 24);
expected_first8[1] = uint8_t(j_insn >> 16);
expected_first8[2] = uint8_t(j_insn >> 8);
expected_first8[3] = uint8_t(j_insn);
expected_first8[4] = 0;
expected_first8[5] = 0;
expected_first8[6] = 0;
expected_first8[7] = 0;
const uint8_t fragment_magic[8] = {
'F', 'R', 'A', 'G', 'M', 'E', 'N', 'T'
};
// Resolve the base_name (default: "frag_<vram>").
std::string base_name = p.base_name;
if (base_name.empty()) {
base_name = fmt::format("frag_{:08X}", p.vram);
}
// Scan the ROM for Yay0 magic. For each, decompress 0x40 bytes,
// check the J-insn + FRAGMENT-magic match, and accept.
std::vector<std::pair<uint32_t, std::vector<uint8_t>>> hits;
size_t scan_pos = 0;
while (scan_pos + 16 < rom.size()) {
// Find next "Yay0" magic.
size_t y0 = std::string::npos;
for (size_t i = scan_pos; i + 4 <= rom.size(); i++) {
if (rom[i] == 'Y' && rom[i+1] == 'a' &&
rom[i+2] == 'y' && rom[i+3] == '0') {
y0 = i;
break;
}
}
if (y0 == std::string::npos) break;
scan_pos = y0 + 4;
// Quick prefix decompress to test the FRAGMENT shape.
std::vector<uint8_t> prefix;
if (!compression::yay0_decompress(
rom.data() + y0, rom.size() - y0, prefix)) {
continue;
}
if (prefix.size() < 0x10) continue;
if (std::memcmp(prefix.data(), expected_first8, 8) != 0) continue;
if (std::memcmp(prefix.data() + 8, fragment_magic, 8) != 0) continue;
// Match — figure out the wrapper offset (PERS-SZP wraps Yay0
// at -0x18 if the format is pers_szp_yay0; otherwise the
// wrapper offset IS the Yay0 offset).
uint32_t wrap_off = uint32_t(y0);
if (p.wrapper_format == "pers_szp_yay0") {
if (y0 < 0x18) continue;
if (std::memcmp(rom.data() + (y0 - 0x18),
"PERS-SZP", 8) != 0) {
continue;
}
wrap_off = uint32_t(y0 - 0x18);
} else if (p.wrapper_format != "yay0") {
std::fprintf(stderr,
"decompressed: pattern %s unknown wrapper_format '%s'\n",
base_name.c_str(), p.wrapper_format.c_str());
return false;
}
// Full decompress.
std::vector<uint8_t> body;
if (!decompress_wrapper_at(rom, wrap_off, p.wrapper_format, body)) {
continue;
}
hits.emplace_back(wrap_off, std::move(body));
}
std::fprintf(stderr,
"decompressed pattern %s @ vram 0x%08X format=%s: "
"found %zu wrappers in ROM\n",
base_name.c_str(), p.vram, p.wrapper_format.c_str(),
hits.size());
if (hits.empty()) continue;
// Deduplicate by content hash. Hash window is the first 0x100
// bytes — measured at 95% uniqueness for Stadium's 0x8FF00000
// slot. The runtime side uses the SAME window over the bytes
// Stadium decompressed into RDRAM, so build-time and runtime
// hashes match. (Smaller fragments hash their full body.)
constexpr size_t HASH_WINDOW = 0x100;
std::unordered_map<uint64_t, size_t> seen_hashes;
size_t added = 0;
size_t deduped = 0;
for (auto& [wrap_off, body] : hits) {
const size_t window = std::min(HASH_WINDOW, body.size());
const uint64_t content_hash =
fnv1a_64(body.data(), window);
auto it = seen_hashes.find(content_hash);
if (it != seen_hashes.end()) {
deduped++;
continue;
}
seen_hashes.emplace(content_hash, wrap_off);
const std::string section_name = fmt::format(
"{}__rom_{:X}", base_name, wrap_off);
size_t si = add_decompressed_section(
context, body, wrap_off, p.vram,
section_name, p.relocatable, content_hash);
if (si == size_t(-1)) {
// Hard failure: the section's bytes can't be bounded
// by our CFG walk (or some other unrecoverable parse
// error). NOT a soft-skip; the user has to decide.
std::fprintf(stderr,
"decompressed: pattern %s aborted — section for "
"ROM 0x%X failed to synthesize. See message above.\n",
base_name.c_str(), wrap_off);
return false;
}
added++;
}
std::fprintf(stderr,
"decompressed pattern %s: %zu sections added "
"(%zu deduped as content-identical)\n",
base_name.c_str(), added, deduped);
}
// Cross-section R_MIPS_32 retargeting once everything is in.
resolve_cross_section_targets(context, first_added_index);
return true;
}
} // namespace N64Recomp