Implement mod symbol parsing

This commit is contained in:
Mr-Wiseguy 2024-07-09 16:40:17 -04:00
parent 07f2569647
commit a88bc6e16a
6 changed files with 253 additions and 14 deletions

View file

@ -71,6 +71,7 @@ target_sources(N64Recomp PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/src/operations.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/cgenerator.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/recompilation.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/mod_symbols.cpp
)
target_include_directories(N64Recomp PUBLIC

View file

@ -9,7 +9,6 @@
#include <unordered_map>
#include <unordered_set>
#include <filesystem>
#include "rabbitizer.hpp"
#ifdef _MSC_VER
inline uint32_t byteswap(uint32_t val) {
@ -51,9 +50,9 @@ namespace N64Recomp {
struct Reloc {
uint32_t address;
uint32_t section_offset;
uint32_t symbol_index;
uint32_t target_section;
uint32_t target_section_offset;
uint32_t symbol_index; // Only used for reference symbols
uint16_t target_section;
RelocType type;
bool reference_symbol;
};
@ -127,6 +126,36 @@ namespace N64Recomp {
};
bool recompile_function(const Context& context, const Function& func, const std::string& recomp_include, std::ofstream& output_file, std::span<std::vector<uint32_t>> static_funcs, bool write_header);
enum class ReplacementFlags : uint32_t {
Force = 1 << 0,
};
struct FunctionReplacement {
uint32_t func_index;
uint32_t original_vram;
ReplacementFlags flags;
};
struct ModSectionInfo {
uint32_t original_rom_addr;
std::vector<FunctionReplacement> replacements;
};
struct ModContext {
Context base_context;
std::vector<ModSectionInfo> section_info;
};
enum class ModSymbolsError {
Good,
NotASymbolFile,
UnknownSymbolFileVersion,
CorruptSymbolFile,
FunctionOutOfBounds,
};
ModSymbolsError parse_mod_symbols(std::span<const char> data, std::span<const uint8_t> binary, const std::unordered_map<uint32_t, uint16_t>& sections_by_vrom, Context& context_out, ModContext& mod_context_out);
}
#endif

View file

@ -462,7 +462,7 @@ bool N64Recomp::Context::from_symbol_file(const std::filesystem::path& symbol_fi
throw toml::parse_error("Section entry missing required field(s)", el.source());
}
size_t section_index = ret.sections.size();
uint16_t section_index = (uint16_t)ret.sections.size();
Section& section = ret.sections.emplace_back(Section{});
section.rom_addr = rom_addr.value();
@ -561,7 +561,7 @@ bool N64Recomp::Context::from_symbol_file(const std::filesystem::path& symbol_fi
Reloc cur_reloc{};
cur_reloc.address = vram.value();
cur_reloc.section_offset = target_vram.value() - section.ram_addr;
cur_reloc.target_section_offset = target_vram.value() - section.ram_addr;
cur_reloc.symbol_index = (uint32_t)-1;
cur_reloc.target_section = section_index;
cur_reloc.type = reloc_type;

View file

@ -1170,7 +1170,7 @@ ELFIO::section* read_sections(N64Recomp::Context& context, const N64Recomp::Conf
if (reloc_out.type == N64Recomp::RelocType::R_MIPS_LO16) {
uint32_t rel_immediate = instr.getProcessedImmediate();
uint32_t full_immediate = (prev_hi_immediate << 16) + (int16_t)rel_immediate;
reloc_out.section_offset = full_immediate + rel_symbol_offset - rel_section_vram;
reloc_out.target_section_offset = full_immediate + rel_symbol_offset - rel_section_vram;
if (prev_hi) {
if (prev_hi_symbol != rel_symbol) {
fmt::print(stderr, "Paired HI16 and LO16 relocations have different symbols\n"
@ -1180,7 +1180,7 @@ ELFIO::section* read_sections(N64Recomp::Context& context, const N64Recomp::Conf
}
// Set the previous HI16 relocs' relocated address.
section_out.relocs[i - 1].section_offset = reloc_out.section_offset;
section_out.relocs[i - 1].target_section_offset = reloc_out.target_section_offset;
}
else {
// Orphaned LO16 reloc warnings.
@ -1227,7 +1227,7 @@ ELFIO::section* read_sections(N64Recomp::Context& context, const N64Recomp::Conf
if (reloc_out.type == N64Recomp::RelocType::R_MIPS_32) {
// The reloc addend is just the existing word before relocation, so the section offset can just be the symbol's section offset.
// Incorporating the addend will be handled at load-time.
reloc_out.section_offset = rel_symbol_offset;
reloc_out.target_section_offset = rel_symbol_offset;
// TODO set section_out.has_mips32_relocs to true if this section should emit its mips32 relocs (mainly for TLB mapping).
if (reloc_out.reference_symbol) {
@ -1236,14 +1236,14 @@ ELFIO::section* read_sections(N64Recomp::Context& context, const N64Recomp::Conf
reloc_target_section_addr = context.reference_sections[reloc_out.target_section].ram_addr;
}
// Patch the word in the ROM to incorporate the symbol's value.
uint32_t updated_reloc_word = reloc_rom_word + reloc_target_section_addr + reloc_out.section_offset;
uint32_t updated_reloc_word = reloc_rom_word + reloc_target_section_addr + reloc_out.target_section_offset;
*reinterpret_cast<uint32_t*>(context.rom.data() + reloc_rom_addr) = byteswap(updated_reloc_word);
}
}
if (reloc_out.type == N64Recomp::RelocType::R_MIPS_26) {
uint32_t rel_immediate = instr.getProcessedImmediate();
reloc_out.section_offset = rel_immediate + rel_symbol_offset;
reloc_out.target_section_offset = rel_immediate + rel_symbol_offset;
}
}
}
@ -1394,7 +1394,7 @@ void dump_context(const N64Recomp::Context& context, const std::unordered_map<ui
// TODO allow emitting MIPS32 relocs for specific sections via a toml option for TLB mapping support.
if (reloc.type == N64Recomp::RelocType::R_MIPS_HI16 || reloc.type == N64Recomp::RelocType::R_MIPS_LO16) {
fmt::print(func_context_file, " {{ type = \"{}\", vram = 0x{:08X}, target_vram = 0x{:08X} }},\n",
reloc_names[static_cast<int>(reloc.type)], reloc.address, reloc.section_offset + section.ram_addr);
reloc_names[static_cast<int>(reloc.type)], reloc.address, reloc.target_section_offset + section.ram_addr);
}
}
}

209
src/mod_symbols.cpp Normal file
View file

@ -0,0 +1,209 @@
#include "n64recomp.h"
struct FileHeader {
char magic[8]; // N64RSYMS
uint32_t version;
};
struct FileSubHeaderV1 {
uint32_t num_sections;
};
struct SectionHeaderV1 {
uint32_t file_offset;
uint32_t vram;
uint32_t original_vrom; // 0 if this is a new section
uint32_t rom_size;
uint32_t bss_size;
uint32_t num_funcs;
uint32_t num_relocs;
uint32_t num_replacements;
};
struct FuncV1 {
uint32_t section_offset;
uint32_t size;
};
struct RelocV1 {
uint32_t section_offset;
uint32_t type;
uint32_t target_section_offset;
uint32_t target_section_vrom; // 0 means current section
};
struct ReplacementV1 {
uint32_t func_index;
uint32_t original_vram;
uint32_t flags; // force
};
template <typename T>
const T* reinterpret_data(std::span<const char> data, size_t& offset, size_t count = 1) {
if (offset + (sizeof(T) * count) > data.size()) {
return nullptr;
}
size_t original_offset = offset;
offset += sizeof(T) * count;
return reinterpret_cast<const T*>(data.data() + original_offset);
}
bool check_magic(const FileHeader* header) {
static const char good_magic[] = {'N','6','4','R','S','Y','M','S'};
static_assert(sizeof(good_magic) == sizeof(FileHeader::magic));
return memcmp(header->magic, good_magic, sizeof(good_magic)) == 0;
}
bool parse_v1(std::span<const char> data, const std::unordered_map<uint32_t, uint16_t>& sections_by_vrom, N64Recomp::Context& ret, N64Recomp::ModContext& mod_context) {
size_t offset = sizeof(FileHeader);
const FileSubHeaderV1* subheader = reinterpret_data<FileSubHeaderV1>(data, offset);
if (subheader == nullptr) {
return false;
}
size_t num_sections = subheader->num_sections;
ret.sections.resize(num_sections);
mod_context.section_info.resize(num_sections);
for (size_t section_index = 0; section_index < num_sections; section_index++) {
const SectionHeaderV1* section_header = reinterpret_data<SectionHeaderV1>(data, offset);
if (section_header == nullptr) {
return false;
}
N64Recomp::Section& cur_section = ret.sections[section_index];
N64Recomp::ModSectionInfo& cur_mod_section = mod_context.section_info[section_index];
cur_section.rom_addr = section_header->file_offset;
cur_section.ram_addr = section_header->vram;
cur_section.size = section_header->rom_size;
cur_section.bss_size = section_header->bss_size;
cur_section.name = "mod_section_" + std::to_string(section_index);
cur_mod_section.original_rom_addr = section_header->original_vrom;
uint32_t num_funcs = section_header->num_funcs;
uint32_t num_relocs = section_header->num_relocs;
uint32_t num_replacements = section_header->num_replacements;
const FuncV1* funcs = reinterpret_data<FuncV1>(data, offset, num_funcs);
if (funcs == nullptr) {
printf("Failed to read funcs (count: %d)\n", num_funcs);
return false;
}
const RelocV1* relocs = reinterpret_data<RelocV1>(data, offset, num_relocs);
if (relocs == nullptr) {
printf("Failed to read relocs (count: %d)\n", num_relocs);
return false;
}
const ReplacementV1* replacements = reinterpret_data<ReplacementV1>(data, offset, num_replacements);
if (replacements == nullptr) {
printf("Failed to read replacements (count: %d)\n", num_replacements);
return false;
}
size_t start_func_index = ret.functions.size();
ret.functions.resize(ret.functions.size() + num_funcs);
cur_section.relocs.resize(num_relocs);
cur_mod_section.replacements.resize(num_replacements);
for (size_t func_index = 0; func_index < num_funcs; func_index++) {
uint32_t func_rom_addr = cur_section.rom_addr + funcs[func_index].section_offset;
if ((func_rom_addr & 0b11) != 0) {
printf("Function %zu in section %zu file offset is not a multiple of 4\n", func_index, section_index);
return false;
}
if ((funcs[func_index].size & 0b11) != 0) {
printf("Function %zu in section %zu size is not a multiple of 4\n", func_index, section_index);
return false;
}
N64Recomp::Function& cur_func = ret.functions[start_func_index + func_index];
cur_func.rom = cur_section.rom_addr + funcs[func_index].section_offset;
cur_func.words.resize(funcs[func_index].size / sizeof(uint32_t)); // Filled in later
cur_func.name = "mod_func_" + std::to_string(start_func_index + func_index);
cur_func.section_index = section_index;
}
for (size_t reloc_index = 0; reloc_index < num_relocs; reloc_index++) {
N64Recomp::Reloc& cur_reloc = cur_section.relocs[reloc_index];
cur_reloc.address = cur_section.ram_addr + relocs[reloc_index].section_offset;
cur_reloc.type = static_cast<N64Recomp::RelocType>(relocs[reloc_index].type);
cur_reloc.target_section_offset = relocs[reloc_index].target_section_offset;
uint32_t target_section_vrom = relocs[reloc_index].target_section_vrom;
if (target_section_vrom == (uint32_t)-1) {
cur_reloc.target_section = N64Recomp::SectionSelf;
}
else {
// TODO lookup by section index by original vrom
auto find_section_it = sections_by_vrom.find(target_section_vrom);
if (find_section_it == sections_by_vrom.end()) {
printf("Reloc %zu in section %zu size has a target section vrom (%08X) that doesn't match any original section\n",
reloc_index, section_index, target_section_vrom);
return false;
}
cur_reloc.target_section = find_section_it->second;
}
}
for (size_t replacement_index = 0; replacement_index < num_replacements; replacement_index++) {
N64Recomp::FunctionReplacement& cur_replacement = cur_mod_section.replacements[replacement_index];
cur_replacement.func_index = replacements[replacement_index].func_index;
cur_replacement.original_vram = replacements[replacement_index].original_vram;
cur_replacement.flags = static_cast<N64Recomp::ReplacementFlags>(replacements[replacement_index].flags);
}
}
return offset == data.size();
}
N64Recomp::ModSymbolsError N64Recomp::parse_mod_symbols(std::span<const char> data, std::span<const uint8_t> binary, const std::unordered_map<uint32_t, uint16_t>& sections_by_vrom, Context& context_out, ModContext& mod_context_out) {
size_t offset = 0;
context_out = {};
mod_context_out = {};
const FileHeader* header = reinterpret_data<FileHeader>(data, offset);
if (header == nullptr) {
return ModSymbolsError::NotASymbolFile;
}
if (!check_magic(header)) {
return ModSymbolsError::NotASymbolFile;
}
bool valid = false;
switch (header->version) {
case 1:
valid = parse_v1(data, sections_by_vrom, context_out, mod_context_out);
break;
default:
return ModSymbolsError::UnknownSymbolFileVersion;
}
if (!valid) {
context_out = {};
mod_context_out = {};
return ModSymbolsError::CorruptSymbolFile;
}
// Fill in the words for each function.
for (auto& cur_func : context_out.functions) {
if (cur_func.rom + cur_func.words.size() * sizeof(cur_func.words[0]) > binary.size()) {
context_out = {};
mod_context_out = {};
return ModSymbolsError::FunctionOutOfBounds;
}
const uint32_t* func_rom = reinterpret_cast<const uint32_t*>(binary.data() + cur_func.rom);
for (size_t word_index = 0; word_index < cur_func.words.size(); word_index++) {
cur_func.words[word_index] = func_rom[word_index];
}
}
return ModSymbolsError::Good;
}

View file

@ -170,7 +170,7 @@ bool process_instruction(const N64Recomp::Context& context, const N64Recomp::Fun
if (reloc_section == func.section_index || reloc_section == section.bss_section_index || reloc.reference_symbol) {
// Record the reloc's data.
reloc_type = reloc.type;
reloc_target_section_offset = reloc.section_offset;
reloc_target_section_offset = reloc.target_section_offset;
// Ignore all relocs that aren't HI16 or LO16.
if (reloc_type == N64Recomp::RelocType::R_MIPS_HI16 || reloc_type == N64Recomp::RelocType::R_MIPS_LO16 || reloc_type == N64Recomp::RelocType::R_MIPS_26) {
if (reloc.reference_symbol) {
@ -184,7 +184,7 @@ bool process_instruction(const N64Recomp::Context& context, const N64Recomp::Fun
const auto& reloc_reference_section = reloc.target_section == N64Recomp::SectionAbsolute ? dummy_section : context.reference_sections[reloc.target_section];
// Resolve HI16 and LO16 reference symbol relocs to non-relocatable sections by patching the instruction immediate.
if (!reloc_reference_section.relocatable && (reloc_type == N64Recomp::RelocType::R_MIPS_HI16 || reloc_type == N64Recomp::RelocType::R_MIPS_LO16)) {
uint32_t full_immediate = reloc.section_offset + reloc_reference_section.ram_addr;
uint32_t full_immediate = reloc.target_section_offset + reloc_reference_section.ram_addr;
if (reloc_type == N64Recomp::RelocType::R_MIPS_HI16) {
imm = (full_immediate >> 16) + ((full_immediate >> 15) & 1);