From a88bc6e16a6bc185ef5fa36f61001b933bdc97f4 Mon Sep 17 00:00:00 2001 From: Mr-Wiseguy Date: Tue, 9 Jul 2024 16:40:17 -0400 Subject: [PATCH] Implement mod symbol parsing --- CMakeLists.txt | 1 + include/n64recomp.h | 37 +++++++- src/config.cpp | 4 +- src/main.cpp | 12 +-- src/mod_symbols.cpp | 209 ++++++++++++++++++++++++++++++++++++++++++ src/recompilation.cpp | 4 +- 6 files changed, 253 insertions(+), 14 deletions(-) create mode 100644 src/mod_symbols.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index d2e9753..484b548 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -71,6 +71,7 @@ target_sources(N64Recomp PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/src/operations.cpp ${CMAKE_CURRENT_SOURCE_DIR}/src/cgenerator.cpp ${CMAKE_CURRENT_SOURCE_DIR}/src/recompilation.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/src/mod_symbols.cpp ) target_include_directories(N64Recomp PUBLIC diff --git a/include/n64recomp.h b/include/n64recomp.h index e92e415..f9d8467 100644 --- a/include/n64recomp.h +++ b/include/n64recomp.h @@ -9,7 +9,6 @@ #include #include #include -#include "rabbitizer.hpp" #ifdef _MSC_VER inline uint32_t byteswap(uint32_t val) { @@ -51,9 +50,9 @@ namespace N64Recomp { struct Reloc { uint32_t address; - uint32_t section_offset; - uint32_t symbol_index; - uint32_t target_section; + uint32_t target_section_offset; + uint32_t symbol_index; // Only used for reference symbols + uint16_t target_section; RelocType type; bool reference_symbol; }; @@ -127,6 +126,36 @@ namespace N64Recomp { }; bool recompile_function(const Context& context, const Function& func, const std::string& recomp_include, std::ofstream& output_file, std::span> static_funcs, bool write_header); + + enum class ReplacementFlags : uint32_t { + Force = 1 << 0, + }; + + struct FunctionReplacement { + uint32_t func_index; + uint32_t original_vram; + ReplacementFlags flags; + }; + + struct ModSectionInfo { + uint32_t original_rom_addr; + std::vector replacements; + + }; + + struct ModContext { + Context base_context; + std::vector section_info; + }; + enum class ModSymbolsError { + Good, + NotASymbolFile, + UnknownSymbolFileVersion, + CorruptSymbolFile, + FunctionOutOfBounds, + }; + + ModSymbolsError parse_mod_symbols(std::span data, std::span binary, const std::unordered_map& sections_by_vrom, Context& context_out, ModContext& mod_context_out); } #endif diff --git a/src/config.cpp b/src/config.cpp index 2fc0b58..9648840 100644 --- a/src/config.cpp +++ b/src/config.cpp @@ -462,7 +462,7 @@ bool N64Recomp::Context::from_symbol_file(const std::filesystem::path& symbol_fi throw toml::parse_error("Section entry missing required field(s)", el.source()); } - size_t section_index = ret.sections.size(); + uint16_t section_index = (uint16_t)ret.sections.size(); Section& section = ret.sections.emplace_back(Section{}); section.rom_addr = rom_addr.value(); @@ -561,7 +561,7 @@ bool N64Recomp::Context::from_symbol_file(const std::filesystem::path& symbol_fi Reloc cur_reloc{}; cur_reloc.address = vram.value(); - cur_reloc.section_offset = target_vram.value() - section.ram_addr; + cur_reloc.target_section_offset = target_vram.value() - section.ram_addr; cur_reloc.symbol_index = (uint32_t)-1; cur_reloc.target_section = section_index; cur_reloc.type = reloc_type; diff --git a/src/main.cpp b/src/main.cpp index f0a2fd9..48fe5ac 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -1170,7 +1170,7 @@ ELFIO::section* read_sections(N64Recomp::Context& context, const N64Recomp::Conf if (reloc_out.type == N64Recomp::RelocType::R_MIPS_LO16) { uint32_t rel_immediate = instr.getProcessedImmediate(); uint32_t full_immediate = (prev_hi_immediate << 16) + (int16_t)rel_immediate; - reloc_out.section_offset = full_immediate + rel_symbol_offset - rel_section_vram; + reloc_out.target_section_offset = full_immediate + rel_symbol_offset - rel_section_vram; if (prev_hi) { if (prev_hi_symbol != rel_symbol) { fmt::print(stderr, "Paired HI16 and LO16 relocations have different symbols\n" @@ -1180,7 +1180,7 @@ ELFIO::section* read_sections(N64Recomp::Context& context, const N64Recomp::Conf } // Set the previous HI16 relocs' relocated address. - section_out.relocs[i - 1].section_offset = reloc_out.section_offset; + section_out.relocs[i - 1].target_section_offset = reloc_out.target_section_offset; } else { // Orphaned LO16 reloc warnings. @@ -1227,7 +1227,7 @@ ELFIO::section* read_sections(N64Recomp::Context& context, const N64Recomp::Conf if (reloc_out.type == N64Recomp::RelocType::R_MIPS_32) { // The reloc addend is just the existing word before relocation, so the section offset can just be the symbol's section offset. // Incorporating the addend will be handled at load-time. - reloc_out.section_offset = rel_symbol_offset; + reloc_out.target_section_offset = rel_symbol_offset; // TODO set section_out.has_mips32_relocs to true if this section should emit its mips32 relocs (mainly for TLB mapping). if (reloc_out.reference_symbol) { @@ -1236,14 +1236,14 @@ ELFIO::section* read_sections(N64Recomp::Context& context, const N64Recomp::Conf reloc_target_section_addr = context.reference_sections[reloc_out.target_section].ram_addr; } // Patch the word in the ROM to incorporate the symbol's value. - uint32_t updated_reloc_word = reloc_rom_word + reloc_target_section_addr + reloc_out.section_offset; + uint32_t updated_reloc_word = reloc_rom_word + reloc_target_section_addr + reloc_out.target_section_offset; *reinterpret_cast(context.rom.data() + reloc_rom_addr) = byteswap(updated_reloc_word); } } if (reloc_out.type == N64Recomp::RelocType::R_MIPS_26) { uint32_t rel_immediate = instr.getProcessedImmediate(); - reloc_out.section_offset = rel_immediate + rel_symbol_offset; + reloc_out.target_section_offset = rel_immediate + rel_symbol_offset; } } } @@ -1394,7 +1394,7 @@ void dump_context(const N64Recomp::Context& context, const std::unordered_map(reloc.type)], reloc.address, reloc.section_offset + section.ram_addr); + reloc_names[static_cast(reloc.type)], reloc.address, reloc.target_section_offset + section.ram_addr); } } } diff --git a/src/mod_symbols.cpp b/src/mod_symbols.cpp new file mode 100644 index 0000000..8cfa3a0 --- /dev/null +++ b/src/mod_symbols.cpp @@ -0,0 +1,209 @@ +#include "n64recomp.h" + +struct FileHeader { + char magic[8]; // N64RSYMS + uint32_t version; +}; + +struct FileSubHeaderV1 { + uint32_t num_sections; +}; + +struct SectionHeaderV1 { + uint32_t file_offset; + uint32_t vram; + uint32_t original_vrom; // 0 if this is a new section + uint32_t rom_size; + uint32_t bss_size; + uint32_t num_funcs; + uint32_t num_relocs; + uint32_t num_replacements; +}; + +struct FuncV1 { + uint32_t section_offset; + uint32_t size; +}; + +struct RelocV1 { + uint32_t section_offset; + uint32_t type; + uint32_t target_section_offset; + uint32_t target_section_vrom; // 0 means current section +}; + +struct ReplacementV1 { + uint32_t func_index; + uint32_t original_vram; + uint32_t flags; // force +}; + +template +const T* reinterpret_data(std::span data, size_t& offset, size_t count = 1) { + if (offset + (sizeof(T) * count) > data.size()) { + return nullptr; + } + + size_t original_offset = offset; + offset += sizeof(T) * count; + return reinterpret_cast(data.data() + original_offset); +} + +bool check_magic(const FileHeader* header) { + static const char good_magic[] = {'N','6','4','R','S','Y','M','S'}; + static_assert(sizeof(good_magic) == sizeof(FileHeader::magic)); + + return memcmp(header->magic, good_magic, sizeof(good_magic)) == 0; +} + +bool parse_v1(std::span data, const std::unordered_map& sections_by_vrom, N64Recomp::Context& ret, N64Recomp::ModContext& mod_context) { + size_t offset = sizeof(FileHeader); + const FileSubHeaderV1* subheader = reinterpret_data(data, offset); + if (subheader == nullptr) { + return false; + } + + size_t num_sections = subheader->num_sections; + + ret.sections.resize(num_sections); + mod_context.section_info.resize(num_sections); + for (size_t section_index = 0; section_index < num_sections; section_index++) { + const SectionHeaderV1* section_header = reinterpret_data(data, offset); + if (section_header == nullptr) { + return false; + } + + N64Recomp::Section& cur_section = ret.sections[section_index]; + N64Recomp::ModSectionInfo& cur_mod_section = mod_context.section_info[section_index]; + + cur_section.rom_addr = section_header->file_offset; + cur_section.ram_addr = section_header->vram; + cur_section.size = section_header->rom_size; + cur_section.bss_size = section_header->bss_size; + cur_section.name = "mod_section_" + std::to_string(section_index); + cur_mod_section.original_rom_addr = section_header->original_vrom; + uint32_t num_funcs = section_header->num_funcs; + uint32_t num_relocs = section_header->num_relocs; + uint32_t num_replacements = section_header->num_replacements; + + + const FuncV1* funcs = reinterpret_data(data, offset, num_funcs); + if (funcs == nullptr) { + printf("Failed to read funcs (count: %d)\n", num_funcs); + return false; + } + + const RelocV1* relocs = reinterpret_data(data, offset, num_relocs); + if (relocs == nullptr) { + printf("Failed to read relocs (count: %d)\n", num_relocs); + return false; + } + + const ReplacementV1* replacements = reinterpret_data(data, offset, num_replacements); + if (replacements == nullptr) { + printf("Failed to read replacements (count: %d)\n", num_replacements); + return false; + } + + size_t start_func_index = ret.functions.size(); + ret.functions.resize(ret.functions.size() + num_funcs); + cur_section.relocs.resize(num_relocs); + cur_mod_section.replacements.resize(num_replacements); + + for (size_t func_index = 0; func_index < num_funcs; func_index++) { + uint32_t func_rom_addr = cur_section.rom_addr + funcs[func_index].section_offset; + if ((func_rom_addr & 0b11) != 0) { + printf("Function %zu in section %zu file offset is not a multiple of 4\n", func_index, section_index); + return false; + } + + if ((funcs[func_index].size & 0b11) != 0) { + printf("Function %zu in section %zu size is not a multiple of 4\n", func_index, section_index); + return false; + } + + N64Recomp::Function& cur_func = ret.functions[start_func_index + func_index]; + cur_func.rom = cur_section.rom_addr + funcs[func_index].section_offset; + cur_func.words.resize(funcs[func_index].size / sizeof(uint32_t)); // Filled in later + cur_func.name = "mod_func_" + std::to_string(start_func_index + func_index); + cur_func.section_index = section_index; + } + + for (size_t reloc_index = 0; reloc_index < num_relocs; reloc_index++) { + N64Recomp::Reloc& cur_reloc = cur_section.relocs[reloc_index]; + cur_reloc.address = cur_section.ram_addr + relocs[reloc_index].section_offset; + cur_reloc.type = static_cast(relocs[reloc_index].type); + cur_reloc.target_section_offset = relocs[reloc_index].target_section_offset; + uint32_t target_section_vrom = relocs[reloc_index].target_section_vrom; + if (target_section_vrom == (uint32_t)-1) { + cur_reloc.target_section = N64Recomp::SectionSelf; + } + else { + // TODO lookup by section index by original vrom + auto find_section_it = sections_by_vrom.find(target_section_vrom); + if (find_section_it == sections_by_vrom.end()) { + printf("Reloc %zu in section %zu size has a target section vrom (%08X) that doesn't match any original section\n", + reloc_index, section_index, target_section_vrom); + return false; + } + cur_reloc.target_section = find_section_it->second; + } + } + + for (size_t replacement_index = 0; replacement_index < num_replacements; replacement_index++) { + N64Recomp::FunctionReplacement& cur_replacement = cur_mod_section.replacements[replacement_index]; + + cur_replacement.func_index = replacements[replacement_index].func_index; + cur_replacement.original_vram = replacements[replacement_index].original_vram; + cur_replacement.flags = static_cast(replacements[replacement_index].flags); + } + } + + return offset == data.size(); +} + +N64Recomp::ModSymbolsError N64Recomp::parse_mod_symbols(std::span data, std::span binary, const std::unordered_map& sections_by_vrom, Context& context_out, ModContext& mod_context_out) { + size_t offset = 0; + context_out = {}; + mod_context_out = {}; + const FileHeader* header = reinterpret_data(data, offset); + + if (header == nullptr) { + return ModSymbolsError::NotASymbolFile; + } + + if (!check_magic(header)) { + return ModSymbolsError::NotASymbolFile; + } + + bool valid = false; + + switch (header->version) { + case 1: + valid = parse_v1(data, sections_by_vrom, context_out, mod_context_out); + break; + default: + return ModSymbolsError::UnknownSymbolFileVersion; + } + + if (!valid) { + context_out = {}; + mod_context_out = {}; + return ModSymbolsError::CorruptSymbolFile; + } + + // Fill in the words for each function. + for (auto& cur_func : context_out.functions) { + if (cur_func.rom + cur_func.words.size() * sizeof(cur_func.words[0]) > binary.size()) { + context_out = {}; + mod_context_out = {}; + return ModSymbolsError::FunctionOutOfBounds; + } + const uint32_t* func_rom = reinterpret_cast(binary.data() + cur_func.rom); + for (size_t word_index = 0; word_index < cur_func.words.size(); word_index++) { + cur_func.words[word_index] = func_rom[word_index]; + } + } + + return ModSymbolsError::Good; +} diff --git a/src/recompilation.cpp b/src/recompilation.cpp index 64dac35..6a61207 100644 --- a/src/recompilation.cpp +++ b/src/recompilation.cpp @@ -170,7 +170,7 @@ bool process_instruction(const N64Recomp::Context& context, const N64Recomp::Fun if (reloc_section == func.section_index || reloc_section == section.bss_section_index || reloc.reference_symbol) { // Record the reloc's data. reloc_type = reloc.type; - reloc_target_section_offset = reloc.section_offset; + reloc_target_section_offset = reloc.target_section_offset; // Ignore all relocs that aren't HI16 or LO16. if (reloc_type == N64Recomp::RelocType::R_MIPS_HI16 || reloc_type == N64Recomp::RelocType::R_MIPS_LO16 || reloc_type == N64Recomp::RelocType::R_MIPS_26) { if (reloc.reference_symbol) { @@ -184,7 +184,7 @@ bool process_instruction(const N64Recomp::Context& context, const N64Recomp::Fun const auto& reloc_reference_section = reloc.target_section == N64Recomp::SectionAbsolute ? dummy_section : context.reference_sections[reloc.target_section]; // Resolve HI16 and LO16 reference symbol relocs to non-relocatable sections by patching the instruction immediate. if (!reloc_reference_section.relocatable && (reloc_type == N64Recomp::RelocType::R_MIPS_HI16 || reloc_type == N64Recomp::RelocType::R_MIPS_LO16)) { - uint32_t full_immediate = reloc.section_offset + reloc_reference_section.ram_addr; + uint32_t full_immediate = reloc.target_section_offset + reloc_reference_section.ram_addr; if (reloc_type == N64Recomp::RelocType::R_MIPS_HI16) { imm = (full_immediate >> 16) + ((full_immediate >> 15) & 1);