From d8dcb43d5a6540b80e54e3a448d496f6bdb323e8 Mon Sep 17 00:00:00 2001 From: Mr-Wiseguy Date: Tue, 16 Jul 2024 22:24:25 -0400 Subject: [PATCH] Move elf parsing into a separate library --- CMakeLists.txt | 42 +- include/n64recomp.h | 4 + src/elf.cpp | 570 +++++++++++++++++++ src/main.cpp | 1245 +----------------------------------------- src/symbol_lists.cpp | 660 ++++++++++++++++++++++ 5 files changed, 1276 insertions(+), 1245 deletions(-) create mode 100644 src/elf.cpp create mode 100644 src/symbol_lists.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 484b548..0be4a0b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -62,6 +62,18 @@ add_subdirectory(lib/fmt) set(TOML_ENABLE_FORMATTERS OFF) add_subdirectory(lib/tomlplusplus) +# Hardcoded symbol lists (separate library to not force a dependency on N64Recomp) +project(SymbolLists) +add_library(SymbolLists) + +target_sources(SymbolLists PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR}/src/symbol_lists.cpp +) + +target_include_directories(SymbolLists PUBLIC + "${CMAKE_CURRENT_SOURCE_DIR}/include" +) + # N64 recompiler core library project(N64Recomp) add_library(N64Recomp) @@ -75,9 +87,29 @@ target_sources(N64Recomp PRIVATE ) target_include_directories(N64Recomp PUBLIC - "${CMAKE_CURRENT_SOURCE_DIR}/include") + "${CMAKE_CURRENT_SOURCE_DIR}/include" +) -target_link_libraries(N64Recomp fmt rabbitizer tomlplusplus::tomlplusplus) +target_link_libraries(N64Recomp SymbolLists fmt rabbitizer tomlplusplus::tomlplusplus) + +# N64 recompiler elf parsing +project(N64RecompElf) +add_library(N64RecompElf) + +target_sources(N64RecompElf PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR}/src/elf.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/src/symbol_lists.cpp +) + +target_include_directories(N64RecompElf PUBLIC + "${CMAKE_CURRENT_SOURCE_DIR}/include" +) + +target_include_directories(N64RecompElf PRIVATE + "${CMAKE_CURRENT_SOURCE_DIR}/lib/ELFIO" +) + +target_link_libraries(N64RecompElf fmt) # N64 recompiler executable project(N64RecompCLI) @@ -89,10 +121,10 @@ target_sources(N64RecompCLI PRIVATE ) target_include_directories(N64RecompCLI PRIVATE - "${CMAKE_CURRENT_SOURCE_DIR}/lib/ELFIO" - "${CMAKE_CURRENT_SOURCE_DIR}/include") + "${CMAKE_CURRENT_SOURCE_DIR}/include" +) -target_link_libraries(N64RecompCLI fmt rabbitizer tomlplusplus::tomlplusplus N64Recomp) +target_link_libraries(N64RecompCLI fmt rabbitizer tomlplusplus::tomlplusplus N64Recomp N64RecompElf) set_target_properties(N64RecompCLI PROPERTIES OUTPUT_NAME N64Recomp) # RSP recompiler diff --git a/include/n64recomp.h b/include/n64recomp.h index c3a6f02..dbe597b 100644 --- a/include/n64recomp.h +++ b/include/n64recomp.h @@ -107,6 +107,10 @@ namespace N64Recomp { using DataSymbolMap = std::unordered_map>; + extern const std::unordered_set reimplemented_funcs; + extern const std::unordered_set ignored_funcs; + extern const std::unordered_set renamed_funcs; + struct Context { std::vector
sections; std::vector functions; diff --git a/src/elf.cpp b/src/elf.cpp new file mode 100644 index 0000000..337945d --- /dev/null +++ b/src/elf.cpp @@ -0,0 +1,570 @@ +#include "fmt/format.h" +// #include "fmt/ostream.h" + +#include "n64recomp.h" +#include "elfio/elfio.hpp" + +bool read_symbols(N64Recomp::Context& context, const ELFIO::elfio& elf_file, ELFIO::section* symtab_section, const N64Recomp::ElfParsingConfig& elf_config, bool dumping_context, std::unordered_map>& data_syms) { + bool found_entrypoint_func = false; + ELFIO::symbol_section_accessor symbols{ elf_file, symtab_section }; + fmt::print("Num symbols: {}\n", symbols.get_symbols_num()); + + std::unordered_map bss_section_to_target_section{}; + + // Create a mapping of bss section to the corresponding non-bss section. This is only used when dumping context in order + // for patches and mods to correctly relocate symbols in bss. This mapping only matters for relocatable sections. + if (dumping_context) { + // Process bss and reloc sections + for (size_t cur_section_index = 0; cur_section_index < context.sections.size(); cur_section_index++) { + const N64Recomp::Section& cur_section = context.sections[cur_section_index]; + // Check if a bss section was found that corresponds with this section. + if (cur_section.bss_section_index != (uint16_t)-1) { + bss_section_to_target_section[cur_section.bss_section_index] = cur_section_index; + } + } + } + + for (int sym_index = 0; sym_index < symbols.get_symbols_num(); sym_index++) { + std::string name; + ELFIO::Elf64_Addr value; + ELFIO::Elf_Xword size; + unsigned char bind; + unsigned char type; + ELFIO::Elf_Half section_index; + unsigned char other; + bool ignored = false; + bool reimplemented = false; + bool recorded_symbol = false; + + // Read symbol properties + symbols.get_symbol(sym_index, name, value, size, bind, type, + section_index, other); + + if (section_index == ELFIO::SHN_ABS && elf_config.use_absolute_symbols) { + uint32_t vram = static_cast(value); + context.functions_by_vram[vram].push_back(context.functions.size()); + + context.functions.emplace_back( + vram, + 0, + std::vector{}, + std::move(name), + 0, + true, + reimplemented, + false + ); + continue; + } + + if (section_index < context.sections.size()) { + // Check if this symbol is the entrypoint + if (elf_config.has_entrypoint && value == elf_config.entrypoint_address && type == ELFIO::STT_FUNC) { + if (found_entrypoint_func) { + fmt::print(stderr, "Ambiguous entrypoint: {}\n", name); + return false; + } + found_entrypoint_func = true; + fmt::print("Found entrypoint, original name: {}\n", name); + size = 0x50; // dummy size for entrypoints, should cover them all + name = "recomp_entrypoint"; + } + + // Check if this symbol has a size override + auto size_find = elf_config.manually_sized_funcs.find(name); + if (size_find != elf_config.manually_sized_funcs.end()) { + size = size_find->second; + type = ELFIO::STT_FUNC; + } + + if (!dumping_context) { + if (N64Recomp::reimplemented_funcs.contains(name)) { + reimplemented = true; + name = name + "_recomp"; + ignored = true; + } else if (N64Recomp::ignored_funcs.contains(name)) { + name = name + "_recomp"; + ignored = true; + } + } + + auto& section = context.sections[section_index]; + + // Check if this symbol is a function or has no type (like a regular glabel would) + // Symbols with no type have a dummy entry created so that their symbol can be looked up for function calls + if (ignored || type == ELFIO::STT_FUNC || type == ELFIO::STT_NOTYPE || type == ELFIO::STT_OBJECT) { + if (!dumping_context) { + if (N64Recomp::renamed_funcs.contains(name)) { + name = name + "_recomp"; + ignored = false; + } + } + + if (section_index < context.sections.size()) { + auto section_offset = value - elf_file.sections[section_index]->get_address(); + const uint32_t* words = reinterpret_cast(elf_file.sections[section_index]->get_data() + section_offset); + uint32_t vram = static_cast(value); + uint32_t num_instructions = type == ELFIO::STT_FUNC ? size / 4 : 0; + uint32_t rom_address = static_cast(section_offset + section.rom_addr); + + section.function_addrs.push_back(vram); + context.functions_by_vram[vram].push_back(context.functions.size()); + + // Find the entrypoint by rom address in case it doesn't have vram as its value + if (elf_config.has_entrypoint && rom_address == 0x1000 && type == ELFIO::STT_FUNC) { + vram = elf_config.entrypoint_address; + found_entrypoint_func = true; + name = "recomp_entrypoint"; + if (size == 0) { + num_instructions = 0x50 / 4; + } + } + + // Suffix local symbols to prevent name conflicts. + if (bind == ELFIO::STB_LOCAL) { + name = fmt::format("{}_{:08X}", name, rom_address); + } + + if (num_instructions > 0) { + context.section_functions[section_index].push_back(context.functions.size()); + recorded_symbol = true; + } + context.functions_by_name[name] = context.functions.size(); + + std::vector insn_words(num_instructions); + insn_words.assign(words, words + num_instructions); + + context.functions.emplace_back( + vram, + rom_address, + std::move(insn_words), + name, + section_index, + ignored, + reimplemented + ); + } else { + // TODO is this case needed anymore? + uint32_t vram = static_cast(value); + section.function_addrs.push_back(vram); + context.functions_by_vram[vram].push_back(context.functions.size()); + context.functions.emplace_back( + vram, + 0, + std::vector{}, + name, + section_index, + ignored, + reimplemented + ); + } + } + } + + // The symbol wasn't detected as a function, so add it to the data symbols if the context is being dumped. + if (!recorded_symbol && dumping_context && !name.empty()) { + uint32_t vram = static_cast(value); + + // Place this symbol in the absolute symbol list if it's in the absolute section. + uint16_t target_section_index = section_index; + if (section_index == ELFIO::SHN_ABS) { + target_section_index = N64Recomp::SectionAbsolute; + } + else if (section_index >= context.sections.size()) { + fmt::print("Symbol \"{}\" not in a valid section ({})\n", name, section_index); + } + + // Move this symbol into the corresponding non-bss section if it's in a bss section. + auto find_bss_it = bss_section_to_target_section.find(target_section_index); + if (find_bss_it != bss_section_to_target_section.end()) { + target_section_index = find_bss_it->second; + } + + data_syms[target_section_index].emplace_back( + vram, + std::move(name) + ); + } + } + + return found_entrypoint_func; +} + +struct SegmentEntry { + ELFIO::Elf64_Off data_offset; + ELFIO::Elf64_Addr physical_address; + ELFIO::Elf_Xword memory_size; +}; + +std::optional get_segment(const std::vector& segments, ELFIO::Elf_Xword section_size, ELFIO::Elf64_Off section_offset) { + // A linear search is safest even if the segment list is sorted, as there may be overlapping segments + for (size_t i = 0; i < segments.size(); i++) { + const auto& segment = segments[i]; + + // Check that the section's data in the elf file is within bounds of the segment's data + if (section_offset >= segment.data_offset && section_offset + section_size <= segment.data_offset + segment.memory_size) { + return i; + } + } + + return std::nullopt; +} + +ELFIO::section* read_sections(N64Recomp::Context& context, const N64Recomp::ElfParsingConfig& elf_config, const ELFIO::elfio& elf_file) { + ELFIO::section* symtab_section = nullptr; + std::vector segments{}; + segments.resize(elf_file.segments.size()); + + // Copy the data for each segment into the segment entry list + for (size_t segment_index = 0; segment_index < elf_file.segments.size(); segment_index++) { + const auto& segment = *elf_file.segments[segment_index]; + segments[segment_index].data_offset = segment.get_offset(); + segments[segment_index].physical_address = segment.get_physical_address(); + segments[segment_index].memory_size = segment.get_file_size(); + } + + //// Sort the segments by physical address + //std::sort(segments.begin(), segments.end(), + // [](const SegmentEntry& lhs, const SegmentEntry& rhs) { + // return lhs.data_offset < rhs.data_offset; + // } + //); + + std::unordered_map reloc_sections_by_name; + std::unordered_map bss_sections_by_name; + + // Iterate over every section to record rom addresses and find the symbol table + fmt::print("Sections\n"); + for (const auto& section : elf_file.sections) { + auto& section_out = context.sections[section->get_index()]; + //fmt::print(" {}: {} @ 0x{:08X}, 0x{:08X}\n", section->get_index(), section->get_name(), section->get_address(), context.rom.size()); + // Set the rom address of this section to the current accumulated ROM size + section_out.ram_addr = section->get_address(); + section_out.size = section->get_size(); + ELFIO::Elf_Word type = section->get_type(); + std::string section_name = section->get_name(); + + // Check if this section is the symbol table and record it if so + if (type == ELFIO::SHT_SYMTAB) { + symtab_section = section.get(); + } + + if (elf_config.relocatable_sections.contains(section_name)) { + section_out.relocatable = true; + } + + // Check if this section is a reloc section + if (type == ELFIO::SHT_REL) { + // If it is, determine the name of the section it relocates + if (!section_name.starts_with(".rel")) { + fmt::print(stderr, "Could not determine corresponding section for reloc section {}\n", section_name.c_str()); + return nullptr; + } + + std::string reloc_target_section = section_name.substr(strlen(".rel")); + + // If this reloc section is for a section that has been marked as relocatable, record it in the reloc section lookup. + // Alternatively, if this recompilation uses reference symbols then record all reloc sections. + if (!context.reference_sections.empty() || elf_config.relocatable_sections.contains(reloc_target_section)) { + reloc_sections_by_name[reloc_target_section] = section.get(); + } + } + + // If the section is bss (SHT_NOBITS) and ends with the bss suffix, add it to the bss section map + if (type == ELFIO::SHT_NOBITS && section_name.ends_with(elf_config.bss_section_suffix)) { + std::string bss_target_section = section_name.substr(0, section_name.size() - elf_config.bss_section_suffix.size()); + + // If this bss section is for a section that has been marked as relocatable, record it in the reloc section lookup + if (elf_config.relocatable_sections.contains(bss_target_section)) { + bss_sections_by_name[bss_target_section] = section.get(); + } + } + + // If this section isn't bss (SHT_NOBITS) and ends up in the rom (SHF_ALLOC), + // find this section's rom address and copy it into the rom + if (type != ELFIO::SHT_NOBITS && section->get_flags() & ELFIO::SHF_ALLOC && section->get_size() != 0) { + //// Find the segment this section is in to determine the physical (rom) address of the section + //auto segment_it = std::upper_bound(segments.begin(), segments.end(), section->get_offset(), + // [](ELFIO::Elf64_Off section_offset, const SegmentEntry& segment) { + // return section_offset < segment.data_offset; + // } + //); + //if (segment_it == segments.begin()) { + // fmt::print(stderr, "Could not find segment that section {} belongs to!\n", section_name.c_str()); + // return nullptr; + //} + //// Upper bound returns the iterator after the element we're looking for, so rewind by one + //// This is safe because we checked if segment_it was segments.begin() already, which is the minimum value it could be + //const SegmentEntry& segment = *(segment_it - 1); + //// Check to be sure that the section is actually in this segment + //if (section->get_offset() >= segment.data_offset + segment.memory_size) { + // fmt::print(stderr, "Section {} out of range of segment at offset 0x{:08X}\n", section_name.c_str(), segment.data_offset); + // return nullptr; + //} + std::optional segment_index = get_segment(segments, section_out.size, section->get_offset()); + if (!segment_index.has_value()) { + fmt::print(stderr, "Could not find segment that section {} belongs to!\n", section_name.c_str()); + return nullptr; + } + const SegmentEntry& segment = segments[segment_index.value()]; + // Calculate the rom address based on this section's offset into the segment and the segment's rom address + section_out.rom_addr = segment.physical_address + (section->get_offset() - segment.data_offset); + // Resize the output rom if needed to fit this section + size_t required_rom_size = section_out.rom_addr + section_out.size; + if (required_rom_size > context.rom.size()) { + context.rom.resize(required_rom_size); + } + // Copy this section's data into the rom + std::copy(section->get_data(), section->get_data() + section->get_size(), &context.rom[section_out.rom_addr]); + } else { + // Otherwise mark this section as having an invalid rom address + section_out.rom_addr = (uint32_t)-1; + } + // Check if this section is marked as executable, which means it has code in it + if (section->get_flags() & ELFIO::SHF_EXECINSTR) { + section_out.executable = true; + } + section_out.name = section_name; + } + + if (symtab_section == nullptr) { + fmt::print(stderr, "No symtab section found\n"); + return nullptr; + } + + ELFIO::symbol_section_accessor symbol_accessor{ elf_file, symtab_section }; + auto num_syms = symbol_accessor.get_symbols_num(); + + // TODO make sure that a reloc section was found for every section marked as relocatable + + // Process bss and reloc sections + for (size_t section_index = 0; section_index < context.sections.size(); section_index++) { + N64Recomp::Section& section_out = context.sections[section_index]; + // Check if a bss section was found that corresponds with this section + auto bss_find = bss_sections_by_name.find(section_out.name); + if (bss_find != bss_sections_by_name.end()) { + section_out.bss_section_index = bss_find->second->get_index(); + section_out.bss_size = bss_find->second->get_size(); + } + + if (!context.reference_symbols.empty() || section_out.relocatable) { + // Check if a reloc section was found that corresponds with this section + auto reloc_find = reloc_sections_by_name.find(section_out.name); + if (reloc_find != reloc_sections_by_name.end()) { + // Create an accessor for the reloc section + ELFIO::relocation_section_accessor rel_accessor{ elf_file, reloc_find->second }; + // Allocate space for the relocs in this section + section_out.relocs.resize(rel_accessor.get_entries_num()); + // Track whether the previous reloc was a HI16 and its previous full_immediate + bool prev_hi = false; + // Track whether the previous reloc was a LO16 + bool prev_lo = false; + uint32_t prev_hi_immediate = 0; + uint32_t prev_hi_symbol = std::numeric_limits::max(); + + for (size_t i = 0; i < section_out.relocs.size(); i++) { + // Get the current reloc + ELFIO::Elf64_Addr rel_offset; + ELFIO::Elf_Word rel_symbol; + unsigned int rel_type; + ELFIO::Elf_Sxword bad_rel_addend; // Addends aren't encoded in the reloc, so ignore this one + rel_accessor.get_entry(i, rel_offset, rel_symbol, rel_type, bad_rel_addend); + + N64Recomp::Reloc& reloc_out = section_out.relocs[i]; + + // Get the real full_immediate by extracting the immediate from the instruction + uint32_t reloc_rom_addr = section_out.rom_addr + rel_offset - section_out.ram_addr; + uint32_t reloc_rom_word = byteswap(*reinterpret_cast(context.rom.data() + reloc_rom_addr)); + //context.rom section_out.rom_addr; + + reloc_out.address = rel_offset; + reloc_out.symbol_index = rel_symbol; + reloc_out.type = static_cast(rel_type); + + std::string rel_symbol_name; + ELFIO::Elf64_Addr rel_symbol_value; + ELFIO::Elf_Xword rel_symbol_size; + unsigned char rel_symbol_bind; + unsigned char rel_symbol_type; + ELFIO::Elf_Half rel_symbol_section_index; + unsigned char rel_symbol_other; + + bool found_rel_symbol = symbol_accessor.get_symbol( + rel_symbol, rel_symbol_name, rel_symbol_value, rel_symbol_size, rel_symbol_bind, rel_symbol_type, rel_symbol_section_index, rel_symbol_other); + + uint32_t rel_section_vram = section_out.ram_addr; + uint32_t rel_symbol_offset = 0; + + // Check if the symbol is undefined and to know whether to look for it in the reference symbols. + if (rel_symbol_section_index == ELFIO::SHN_UNDEF) { + // Undefined sym, check the reference symbols. + auto sym_find_it = context.reference_symbols_by_name.find(rel_symbol_name); + if (sym_find_it == context.reference_symbols_by_name.end()) { + fmt::print(stderr, "Undefined symbol: {}, not found in input or reference symbols!\n", + rel_symbol_name); + return nullptr; + } + + reloc_out.reference_symbol = true; + // Replace the reloc's symbol index with the index into the reference symbol array. + reloc_out.symbol_index = sym_find_it->second; + rel_section_vram = 0; + rel_symbol_offset = context.reference_symbols[reloc_out.symbol_index].section_offset; + reloc_out.target_section = context.reference_symbols[reloc_out.symbol_index].section_index; + + bool target_section_relocatable = false; + + if (reloc_out.target_section != N64Recomp::SectionAbsolute && context.reference_sections[reloc_out.target_section].relocatable) { + target_section_relocatable = true; + } + + if (reloc_out.type == N64Recomp::RelocType::R_MIPS_32 && target_section_relocatable) { + fmt::print(stderr, "Cannot reference {} in a statically initialized variable as it's defined in a relocatable section!\n", + rel_symbol_name); + return nullptr; + } + } + else { + reloc_out.reference_symbol = false; + reloc_out.target_section = rel_symbol_section_index; + } + + // Reloc pairing, see MIPS System V ABI documentation page 4-18 (https://refspecs.linuxfoundation.org/elf/mipsabi.pdf) + if (reloc_out.type == N64Recomp::RelocType::R_MIPS_LO16) { + uint32_t rel_immediate = reloc_rom_word & 0xFFFF; + uint32_t full_immediate = (prev_hi_immediate << 16) + (int16_t)rel_immediate; + reloc_out.target_section_offset = full_immediate + rel_symbol_offset - rel_section_vram; + if (prev_hi) { + if (prev_hi_symbol != rel_symbol) { + fmt::print(stderr, "Paired HI16 and LO16 relocations have different symbols\n" + " LO16 reloc index {} in section {} referencing symbol {} with offset 0x{:08X}\n", + i, section_out.name, reloc_out.symbol_index, reloc_out.address); + return nullptr; + } + + // Set the previous HI16 relocs' relocated address. + section_out.relocs[i - 1].target_section_offset = reloc_out.target_section_offset; + } + else { + // Orphaned LO16 reloc warnings. + if (elf_config.unpaired_lo16_warnings) { + if (prev_lo) { + // Don't warn if multiple LO16 in a row reference the same symbol, as some linkers will use this behavior. + if (prev_hi_symbol != rel_symbol) { + fmt::print(stderr, "[WARN] LO16 reloc index {} in section {} referencing symbol {} with offset 0x{:08X} follows LO16 with different symbol\n", + i, section_out.name, reloc_out.symbol_index, reloc_out.address); + } + } + else { + fmt::print(stderr, "[WARN] Unpaired LO16 reloc index {} in section {} referencing symbol {} with offset 0x{:08X}\n", + i, section_out.name, reloc_out.symbol_index, reloc_out.address); + } + } + // Even though this is an orphaned LO16 reloc, the previous calculation for the addend still follows the MIPS System V ABI documentation: + // "R_MIPS_LO16 entries without an R_MIPS_HI16 entry immediately preceding are orphaned and the previously defined + // R_MIPS_HI16 is used for computing the addend." + // Therefore, nothing needs to be done to the section_offset member. + } + prev_lo = true; + } else { + if (prev_hi) { + // This is an invalid elf as the MIPS System V ABI documentation states: + // "Each relocation type of R_MIPS_HI16 must have an associated R_MIPS_LO16 entry + // immediately following it in the list of relocations." + fmt::print(stderr, "Unpaired HI16 reloc index {} in section {} referencing symbol {} with offset 0x{:08X}\n", + i - 1, section_out.name, section_out.relocs[i - 1].symbol_index, section_out.relocs[i - 1].address); + return nullptr; + } + prev_lo = false; + } + + if (reloc_out.type == N64Recomp::RelocType::R_MIPS_HI16) { + uint32_t rel_immediate = reloc_rom_word & 0xFFFF; + prev_hi = true; + prev_hi_immediate = rel_immediate; + prev_hi_symbol = rel_symbol; + } else { + prev_hi = false; + } + + if (reloc_out.type == N64Recomp::RelocType::R_MIPS_32) { + // The reloc addend is just the existing word before relocation, so the section offset can just be the symbol's section offset. + // Incorporating the addend will be handled at load-time. + reloc_out.target_section_offset = rel_symbol_offset; + // TODO set section_out.has_mips32_relocs to true if this section should emit its mips32 relocs (mainly for TLB mapping). + + if (reloc_out.reference_symbol) { + uint32_t reloc_target_section_addr = 0; + if (reloc_out.target_section != N64Recomp::SectionAbsolute) { + reloc_target_section_addr = context.reference_sections[reloc_out.target_section].ram_addr; + } + // Patch the word in the ROM to incorporate the symbol's value. + uint32_t updated_reloc_word = reloc_rom_word + reloc_target_section_addr + reloc_out.target_section_offset; + *reinterpret_cast(context.rom.data() + reloc_rom_addr) = byteswap(updated_reloc_word); + } + } + + if (reloc_out.type == N64Recomp::RelocType::R_MIPS_26) { + uint32_t rel_immediate = (reloc_rom_word & 0x3FFFFFF) << 2; + reloc_out.target_section_offset = rel_immediate + rel_symbol_offset; + } + } + } + + // Sort this section's relocs by address, which allows for binary searching and more efficient iteration during recompilation. + // This is safe to do as the entire full_immediate in present in relocs due to the pairing that was done earlier, so the HI16 does not + // need to directly preceed the matching LO16 anymore. + std::sort(section_out.relocs.begin(), section_out.relocs.end(), + [](const N64Recomp::Reloc& a, const N64Recomp::Reloc& b) { + return a.address < b.address; + } + ); + } + } + + return symtab_section; +} + +static void setup_context_for_elf(N64Recomp::Context& context, const ELFIO::elfio& elf_file) { + context.sections.resize(elf_file.sections.size()); + context.section_functions.resize(elf_file.sections.size()); + context.functions.reserve(1024); + context.functions_by_vram.reserve(context.functions.capacity()); + context.functions_by_name.reserve(context.functions.capacity()); + context.rom.reserve(8 * 1024 * 1024); +} + +bool N64Recomp::Context::from_elf_file(const std::filesystem::path& elf_file_path, Context& out, const ElfParsingConfig& elf_config, bool for_dumping_context, DataSymbolMap& data_syms_out, bool& found_entrypoint_out) { + ELFIO::elfio elf_file; + + if (!elf_file.load(elf_file_path.string())) { + fmt::print("Elf file not found\n"); + return false; + } + + if (elf_file.get_class() != ELFIO::ELFCLASS32) { + fmt::print("Incorrect elf class\n"); + return false; + } + + if (elf_file.get_encoding() != ELFIO::ELFDATA2MSB) { + fmt::print("Incorrect endianness\n"); + return false; + } + + setup_context_for_elf(out, elf_file); + + // Read all of the sections in the elf and look for the symbol table section + ELFIO::section* symtab_section = read_sections(out, elf_config, elf_file); + + // If no symbol table was found then exit + if (symtab_section == nullptr) { + fmt::print("No symbol table section found\n"); + return false; + } + + // Read all of the symbols in the elf and look for the entrypoint function + found_entrypoint_out = read_symbols(out, elf_file, symtab_section, elf_config, for_dumping_context, data_syms_out); + + return true; +} diff --git a/src/main.cpp b/src/main.cpp index ee729a8..3c8b34d 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -6,7 +6,6 @@ #include #include "rabbitizer.hpp" -#include "elfio/elfio.hpp" #include "fmt/format.h" #include "fmt/ostream.h" @@ -14,851 +13,6 @@ #include "config.h" #include -std::unordered_set reimplemented_funcs{ - // OS initialize functions - "__osInitialize_common", - "osInitialize", - "osGetMemSize", - // Audio interface functions - "osAiGetLength", - "osAiGetStatus", - "osAiSetFrequency", - "osAiSetNextBuffer", - // Video interface functions - "osViSetXScale", - "osViSetYScale", - "osCreateViManager", - "osViBlack", - "osViSetSpecialFeatures", - "osViGetCurrentFramebuffer", - "osViGetNextFramebuffer", - "osViSwapBuffer", - "osViSetMode", - "osViSetEvent", - // RDP functions - "osDpSetNextBuffer", - // RSP functions - "osSpTaskLoad", - "osSpTaskStartGo", - "osSpTaskYield", - "osSpTaskYielded", - "__osSpSetPc", - // Controller functions - "osContInit", - "osContStartReadData", - "osContGetReadData", - "osContStartQuery", - "osContGetQuery", - "osContSetCh", - // EEPROM functions - "osEepromProbe", - "osEepromWrite", - "osEepromLongWrite", - "osEepromRead", - "osEepromLongRead", - // Rumble functions - "__osMotorAccess", - "osMotorInit", - "osMotorStart", - "osMotorStop", - // PFS functions - "osPfsInitPak", - "osPfsFreeBlocks", - "osPfsAllocateFile", - "osPfsDeleteFile", - "osPfsFileState", - "osPfsFindFile", - "osPfsReadWriteFile", - // Parallel interface (cartridge, DMA, etc.) functions - "osCartRomInit", - "osCreatePiManager", - "osPiStartDma", - "osEPiStartDma", - "osPiGetStatus", - "osEPiRawStartDma", - "osEPiReadIo", - // Flash saving functions - "osFlashInit", - "osFlashReadStatus", - "osFlashReadId", - "osFlashClearStatus", - "osFlashAllErase", - "osFlashAllEraseThrough", - "osFlashSectorErase", - "osFlashSectorEraseThrough", - "osFlashCheckEraseEnd", - "osFlashWriteBuffer", - "osFlashWriteArray", - "osFlashReadArray", - "osFlashChange", - // Threading functions - "osCreateThread", - "osStartThread", - "osStopThread", - "osDestroyThread", - "osSetThreadPri", - "osGetThreadPri", - "osGetThreadId", - // Message Queue functions - "osCreateMesgQueue", - "osRecvMesg", - "osSendMesg", - "osJamMesg", - "osSetEventMesg", - // Timer functions - "osGetTime", - "osSetTimer", - "osStopTimer", - // Voice functions - "osVoiceSetWord", - "osVoiceCheckWord", - "osVoiceStopReadData", - "osVoiceInit", - "osVoiceMaskDictionary", - "osVoiceStartReadData", - "osVoiceControlGain", - "osVoiceGetReadData", - "osVoiceClearDictionary", - // interrupt functions - "osSetIntMask", - "__osDisableInt", - "__osRestoreInt", - // TLB functions - "osVirtualToPhysical", - // Coprocessor 0/1 functions - "osGetCount", - "__osSetFpcCsr", - // Cache funcs - "osInvalDCache", - "osInvalICache", - "osWritebackDCache", - "osWritebackDCacheAll", - // Debug functions - "is_proutSyncPrintf", - "__checkHardware_msp", - "__checkHardware_kmc", - "__checkHardware_isv", - "__osInitialize_msp", - "__osInitialize_kmc", - "__osInitialize_isv", - "__osRdbSend", - // ido math routines - "__ull_div", - "__ll_div", - "__ll_mul", - "__ull_rem", - "__ull_to_d", - "__ull_to_f", -}; - -std::unordered_set ignored_funcs { - // OS initialize functions - "__createSpeedParam", - "__osInitialize_common", - "osInitialize", - "osGetMemSize", - // Audio interface functions - "osAiGetLength", - "osAiGetStatus", - "osAiSetFrequency", - "osAiSetNextBuffer", - "__osAiDeviceBusy", - // Video interface functions - "osViBlack", - "osViFade", - "osViGetCurrentField", - "osViGetCurrentFramebuffer", - "osViGetCurrentLine", - "osViGetCurrentMode", - "osViGetNextFramebuffer", - "osViGetStatus", - "osViRepeatLine", - "osViSetEvent", - "osViSetMode", - "osViSetSpecialFeatures", - "osViSetXScale", - "osViSetYScale", - "osViSwapBuffer", - "osCreateViManager", - "viMgrMain", - "__osViInit", - "__osViSwapContext", - "__osViGetCurrentContext", - // RDP functions - "osDpGetCounters", - "osDpSetStatus", - "osDpGetStatus", - "osDpSetNextBuffer", - "__osDpDeviceBusy", - // RSP functions - "osSpTaskLoad", - "osSpTaskStartGo", - "osSpTaskYield", - "osSpTaskYielded", - "__osSpDeviceBusy", - "__osSpGetStatus", - "__osSpRawStartDma", - "__osSpRawReadIo", - "__osSpRawWriteIo", - "__osSpSetPc", - "__osSpSetStatus", - // Controller functions - "osContGetQuery", - "osContGetReadData", - "osContInit", - "osContReset", - "osContSetCh", - "osContStartQuery", - "osContStartReadData", - "__osContAddressCrc", - "__osContDataCrc", - "__osContGetInitData", - "__osContRamRead", - "__osContRamWrite", - "__osContChannelReset", - // EEPROM functions - "osEepromLongRead", - "osEepromLongWrite", - "osEepromProbe", - "osEepromRead", - "osEepromWrite", - "__osEepStatus", - // Rumble functions - "osMotorInit", - "osMotorStart", - "osMotorStop", - "__osMotorAccess", - "_MakeMotorData", - // Pack functions - "__osCheckId", - "__osCheckPackId", - "__osGetId", - "__osPfsRWInode", - "__osRepairPackId", - "__osPfsSelectBank", - "__osCheckPackId", - "ramromMain", - // PFS functions - "osPfsAllocateFile", - "osPfsChecker", - "osPfsDeleteFile", - "osPfsFileState", - "osPfsFindFile", - "osPfsFreeBlocks", - "osPfsGetLabel", - "osPfsInit", - "osPfsInitPak", - "osPfsIsPlug", - "osPfsNumFiles", - "osPfsRepairId", - "osPfsReadWriteFile", - "__osPackEepReadData", - "__osPackEepWriteData", - "__osPackRamReadData", - "__osPackRamWriteData", - "__osPackReadData", - "__osPackRequestData", - "__osPfsGetInitData", - "__osPfsGetOneChannelData", - "__osPfsGetStatus", - "__osPfsRequestData", - "__osPfsRequestOneChannel", - "__osPfsCreateAccessQueue", - "__osPfsCheckRamArea", - "__osPfsGetNextPage", - // Low level serial interface functions - "__osSiDeviceBusy", - "__osSiGetStatus", - "__osSiRawStartDma", - "__osSiRawReadIo", - "__osSiRawWriteIo", - "__osSiCreateAccessQueue", - "__osSiGetAccess", - "__osSiRelAccess", - // Parallel interface (cartridge, DMA, etc.) functions - "osCartRomInit", - "osLeoDiskInit", - "osCreatePiManager", - "__osDevMgrMain", - "osPiGetCmdQueue", - "osPiGetStatus", - "osPiReadIo", - "osPiStartDma", - "osPiWriteIo", - "osEPiGetDeviceType", - "osEPiStartDma", - "osEPiWriteIo", - "osEPiReadIo", - "osPiRawStartDma", - "osPiRawReadIo", - "osPiRawWriteIo", - "osEPiRawStartDma", - "osEPiRawReadIo", - "osEPiRawWriteIo", - "__osPiRawStartDma", - "__osPiRawReadIo", - "__osPiRawWriteIo", - "__osEPiRawStartDma", - "__osEPiRawReadIo", - "__osEPiRawWriteIo", - "__osPiDeviceBusy", - "__osPiCreateAccessQueue", - "__osPiGetAccess", - "__osPiRelAccess", - "__osLeoAbnormalResume", - "__osLeoInterrupt", - "__osLeoResume", - // Flash saving functions - "osFlashInit", - "osFlashReadStatus", - "osFlashReadId", - "osFlashClearStatus", - "osFlashAllErase", - "osFlashAllEraseThrough", - "osFlashSectorErase", - "osFlashSectorEraseThrough", - "osFlashCheckEraseEnd", - "osFlashWriteBuffer", - "osFlashWriteArray", - "osFlashReadArray", - "osFlashChange", - // Threading functions - "osCreateThread", - "osStartThread", - "osStopThread", - "osDestroyThread", - "osYieldThread", - "osSetThreadPri", - "osGetThreadPri", - "osGetThreadId", - "__osDequeueThread", - // Message Queue functions - "osCreateMesgQueue", - "osSendMesg", - "osJamMesg", - "osRecvMesg", - "osSetEventMesg", - // Timer functions - "osStartTimer", - "osSetTimer", - "osStopTimer", - "osGetTime", - "__osInsertTimer", - "__osTimerInterrupt", - "__osTimerServicesInit", - "__osSetTimerIntr", - // Voice functions - "osVoiceSetWord", - "osVoiceCheckWord", - "osVoiceStopReadData", - "osVoiceInit", - "osVoiceMaskDictionary", - "osVoiceStartReadData", - "osVoiceControlGain", - "osVoiceGetReadData", - "osVoiceClearDictionary", - "__osVoiceCheckResult", - "__osVoiceContRead36", - "__osVoiceContWrite20", - "__osVoiceContWrite4", - "__osVoiceContRead2", - "__osVoiceSetADConverter", - "__osVoiceContDataCrc", - "__osVoiceGetStatus", - "corrupted", - "corrupted_init", - // exceptasm functions - "__osExceptionPreamble", - "__osException", - "__ptExceptionPreamble", - "__ptException", - "send_mesg", - "handle_CpU", - "__osEnqueueAndYield", - "__osEnqueueThread", - "__osPopThread", - "__osNop", - "__osDispatchThread", - "__osCleanupThread", - "osGetCurrFaultedThread", - "osGetNextFaultedThread", - // interrupt functions - "osSetIntMask", - "osGetIntMask", - "__osDisableInt", - "__osRestoreInt", - "__osSetGlobalIntMask", - "__osResetGlobalIntMask", - // TLB functions - "osMapTLB", - "osUnmapTLB", - "osUnmapTLBAll", - "osSetTLBASID", - "osMapTLBRdb", - "osVirtualToPhysical", - "__osGetTLBHi", - "__osGetTLBLo0", - "__osGetTLBLo1", - "__osGetTLBPageMask", - "__osGetTLBASID", - "__osProbeTLB", - // Coprocessor 0/1 functions - "__osSetCount", - "osGetCount", - "__osSetSR", - "__osGetSR", - "__osSetCause", - "__osGetCause", - "__osSetCompare", - "__osGetCompare", - "__osSetConfig", - "__osGetConfig", - "__osSetWatchLo", - "__osGetWatchLo", - "__osSetFpcCsr", - // Cache funcs - "osInvalDCache", - "osInvalICache", - "osWritebackDCache", - "osWritebackDCacheAll", - // Microcodes - "rspbootTextStart", - "gspF3DEX2_fifoTextStart", - "gspS2DEX2_fifoTextStart", - "gspL3DEX2_fifoTextStart", - // Debug functions - "msp_proutSyncPrintf", - "__osInitialize_msp", - "__checkHardware_msp", - "kmc_proutSyncPrintf", - "__osInitialize_kmc", - "__checkHardware_kmc", - "isPrintfInit", - "is_proutSyncPrintf", - "__osInitialize_isv", - "__checkHardware_isv", - "__isExpJP", - "__isExp", - "__osRdbSend", - "__rmonSendData", - "__rmonWriteMem", - "__rmonReadWordAt", - "__rmonWriteWordTo", - "__rmonWriteMem", - "__rmonSetSRegs", - "__rmonSetVRegs", - "__rmonStopThread", - "__rmonGetThreadStatus", - "__rmonGetVRegs", - "__rmonHitSpBreak", - "__rmonRunThread", - "__rmonClearBreak", - "__rmonGetBranchTarget", - "__rmonGetSRegs", - "__rmonSetBreak", - "__rmonReadMem", - "__rmonRunThread", - "__rmonCopyWords", - "__rmonExecute", - "__rmonGetExceptionStatus", - "__rmonGetExeName", - "__rmonGetFRegisters", - "__rmonGetGRegisters", - "__rmonGetRegionCount", - "__rmonGetRegions", - "__rmonGetRegisterContents", - "__rmonGetTCB", - "__rmonHitBreak", - "__rmonHitCpuFault", - "__rmonIdleRCP", - "__rmonInit", - "__rmonIOflush", - "__rmonIOhandler", - "__rmonIOputw", - "__rmonListBreak", - "__rmonListProcesses", - "__rmonListThreads", - "__rmonLoadProgram", - "__rmonMaskIdleThreadInts", - "__rmonMemcpy", - "__rmonPanic", - "__rmonRCPrunning", - "__rmonRunRCP", - "__rmonSendFault", - "__rmonSendHeader", - "__rmonSendReply", - "__rmonSetComm", - "__rmonSetFault", - "__rmonSetFRegisters", - "__rmonSetGRegisters", - "__rmonSetSingleStep", - "__rmonStepRCP", - "__rmonStopUserThreads", - "__rmonThreadStatus", - "__rmon", - "__rmonRunThread", - "rmonFindFaultedThreads", - "rmonMain", - "rmonPrintf", - "rmonGetRcpRegister", - "kdebugserver", - "send", - - // ido math routines - "__ll_div", - "__ll_lshift", - "__ll_mod", - "__ll_mul", - "__ll_rem", - "__ll_rshift", - "__ull_div", - "__ull_divremi", - "__ull_rem", - "__ull_rshift", - "__d_to_ll", - "__f_to_ll", - "__d_to_ull", - "__f_to_ull", - "__ll_to_d", - "__ll_to_f", - "__ull_to_d", - "__ull_to_f", - // Setjmp/longjmp for mario party - "setjmp", - "longjmp" - // 64-bit functions for banjo - "func_8025C29C", - "func_8025C240", - "func_8025C288", - - // rmonregs - "LoadStoreSU", - "LoadStoreVU", - "SetUpForRCPop", - "CleanupFromRCPop", - "__rmonGetGRegisters", - "__rmonSetGRegisters", - "__rmonGetFRegisters", - "__rmonSetFRegisters", - "rmonGetRcpRegister", - "__rmonGetSRegs", - "__rmonSetSRegs", - "__rmonGetVRegs", - "__rmonSetVRegs", - "__rmonGetRegisterContents", - - // rmonbrk - "SetTempBreakpoint", - "ClearTempBreakpoint", - "__rmonSetBreak", - "__rmonListBreak", - "__rmonClearBreak", - "__rmonGetBranchTarget", - "IsJump", - "__rmonSetSingleStep", - "__rmonGetExceptionStatus", - "rmonSendBreakMessage", - "__rmonHitBreak", - "__rmonHitSpBreak", - "__rmonHitCpuFault", - "rmonFindFaultedThreads", - - // kdebugserver - "string_to_u32", - "send_packet", - "clear_IP6", - "send", - "kdebugserver", -}; - -std::unordered_set renamed_funcs{ - // Math - "sincosf", - "sinf", - "cosf", - "__sinf", - "__cosf", - "asinf", - "acosf", - "atanf", - "atan2f", - "tanf", - "sqrt", - "sqrtf", - - // Memory - "memcpy", - "memset", - "memmove", - "memcmp", - "strcmp", - "strcat", - "strcpy", - "strchr", - "strlen", - "strtok", - "sprintf", - "bzero", - "bcopy", - "bcmp", - - // long jumps - "setjmp", - "longjmp", - - // Math 2 - "ldiv", - "lldiv", - "ceil", - "ceilf", - "floor", - "floorf", - "fmodf", - "fmod", - "modf", - "lround", - "lroundf", - "nearbyint", - "nearbyintf", - "round", - "roundf", - "trunc", - "truncf", - - // printf family - "vsprintf", - "gcvt", - "fcvt", - "ecvt", - - "__assert", - - // allocations - "malloc", - "free", - "realloc", - "calloc", - - // rand - "rand", - "srand", - "random", - - // gzip - "huft_build", - "huft_free", - "inflate_codes", - "inflate_stored", - "inflate_fixed", - "inflate_dynamic", - "inflate_block", - "inflate", - "expand_gzip", - "auRomDataRead" - "data_write", - "unzip", - "updcrc", - "clear_bufs", - "fill_inbuf", - "flush_window", - - // libgcc math routines - "__muldi3", - "__divdi3", - "__udivdi3", - "__umoddi3", - "div64_64", - "div64_32", - "__moddi3", - "_matherr", -}; - -bool read_symbols(N64Recomp::Context& context, const ELFIO::elfio& elf_file, ELFIO::section* symtab_section, const N64Recomp::ElfParsingConfig& elf_config, bool dumping_context, std::unordered_map>& data_syms) { - bool found_entrypoint_func = false; - ELFIO::symbol_section_accessor symbols{ elf_file, symtab_section }; - fmt::print("Num symbols: {}\n", symbols.get_symbols_num()); - - std::unordered_map bss_section_to_target_section{}; - - // Create a mapping of bss section to the corresponding non-bss section. This is only used when dumping context in order - // for patches and mods to correctly relocate symbols in bss. This mapping only matters for relocatable sections. - if (dumping_context) { - // Process bss and reloc sections - for (size_t cur_section_index = 0; cur_section_index < context.sections.size(); cur_section_index++) { - const N64Recomp::Section& cur_section = context.sections[cur_section_index]; - // Check if a bss section was found that corresponds with this section. - if (cur_section.bss_section_index != (uint16_t)-1) { - bss_section_to_target_section[cur_section.bss_section_index] = cur_section_index; - } - } - } - - for (int sym_index = 0; sym_index < symbols.get_symbols_num(); sym_index++) { - std::string name; - ELFIO::Elf64_Addr value; - ELFIO::Elf_Xword size; - unsigned char bind; - unsigned char type; - ELFIO::Elf_Half section_index; - unsigned char other; - bool ignored = false; - bool reimplemented = false; - bool recorded_symbol = false; - - // Read symbol properties - symbols.get_symbol(sym_index, name, value, size, bind, type, - section_index, other); - - if (section_index == ELFIO::SHN_ABS && elf_config.use_absolute_symbols) { - uint32_t vram = static_cast(value); - context.functions_by_vram[vram].push_back(context.functions.size()); - - context.functions.emplace_back( - vram, - 0, - std::vector{}, - std::move(name), - 0, - true, - reimplemented, - false - ); - continue; - } - - if (section_index < context.sections.size()) { - // Check if this symbol is the entrypoint - if (elf_config.has_entrypoint && value == elf_config.entrypoint_address && type == ELFIO::STT_FUNC) { - if (found_entrypoint_func) { - fmt::print(stderr, "Ambiguous entrypoint: {}\n", name); - return false; - } - found_entrypoint_func = true; - fmt::print("Found entrypoint, original name: {}\n", name); - size = 0x50; // dummy size for entrypoints, should cover them all - name = "recomp_entrypoint"; - } - - // Check if this symbol has a size override - auto size_find = elf_config.manually_sized_funcs.find(name); - if (size_find != elf_config.manually_sized_funcs.end()) { - size = size_find->second; - type = ELFIO::STT_FUNC; - } - - if (!dumping_context) { - if (reimplemented_funcs.contains(name)) { - reimplemented = true; - name = name + "_recomp"; - ignored = true; - } else if (ignored_funcs.contains(name)) { - name = name + "_recomp"; - ignored = true; - } - } - - auto& section = context.sections[section_index]; - - // Check if this symbol is a function or has no type (like a regular glabel would) - // Symbols with no type have a dummy entry created so that their symbol can be looked up for function calls - if (ignored || type == ELFIO::STT_FUNC || type == ELFIO::STT_NOTYPE || type == ELFIO::STT_OBJECT) { - if (!dumping_context) { - if (renamed_funcs.contains(name)) { - name = name + "_recomp"; - ignored = false; - } - } - - if (section_index < context.sections.size()) { - auto section_offset = value - elf_file.sections[section_index]->get_address(); - const uint32_t* words = reinterpret_cast(elf_file.sections[section_index]->get_data() + section_offset); - uint32_t vram = static_cast(value); - uint32_t num_instructions = type == ELFIO::STT_FUNC ? size / 4 : 0; - uint32_t rom_address = static_cast(section_offset + section.rom_addr); - - section.function_addrs.push_back(vram); - context.functions_by_vram[vram].push_back(context.functions.size()); - - // Find the entrypoint by rom address in case it doesn't have vram as its value - if (elf_config.has_entrypoint && rom_address == 0x1000 && type == ELFIO::STT_FUNC) { - vram = elf_config.entrypoint_address; - found_entrypoint_func = true; - name = "recomp_entrypoint"; - if (size == 0) { - num_instructions = 0x50 / 4; - } - } - - // Suffix local symbols to prevent name conflicts. - if (bind == ELFIO::STB_LOCAL) { - name = fmt::format("{}_{:08X}", name, rom_address); - } - - if (num_instructions > 0) { - context.section_functions[section_index].push_back(context.functions.size()); - recorded_symbol = true; - } - context.functions_by_name[name] = context.functions.size(); - - std::vector insn_words(num_instructions); - insn_words.assign(words, words + num_instructions); - - context.functions.emplace_back( - vram, - rom_address, - std::move(insn_words), - name, - section_index, - ignored, - reimplemented - ); - } else { - // TODO is this case needed anymore? - uint32_t vram = static_cast(value); - section.function_addrs.push_back(vram); - context.functions_by_vram[vram].push_back(context.functions.size()); - context.functions.emplace_back( - vram, - 0, - std::vector{}, - name, - section_index, - ignored, - reimplemented - ); - } - } - } - - // The symbol wasn't detected as a function, so add it to the data symbols if the context is being dumped. - if (!recorded_symbol && dumping_context && !name.empty()) { - uint32_t vram = static_cast(value); - - // Place this symbol in the absolute symbol list if it's in the absolute section. - uint16_t target_section_index = section_index; - if (section_index == ELFIO::SHN_ABS) { - target_section_index = N64Recomp::SectionAbsolute; - } - else if (section_index >= context.sections.size()) { - fmt::print("Symbol \"{}\" not in a valid section ({})\n", name, section_index); - } - - // Move this symbol into the corresponding non-bss section if it's in a bss section. - auto find_bss_it = bss_section_to_target_section.find(target_section_index); - if (find_bss_it != bss_section_to_target_section.end()) { - target_section_index = find_bss_it->second; - } - - data_syms[target_section_index].emplace_back( - vram, - std::move(name) - ); - } - } - - return found_entrypoint_func; -} - void add_manual_functions(N64Recomp::Context& context, const std::vector& manual_funcs) { auto exit_failure = [](const std::string& error_str) { fmt::vprint(stderr, error_str, fmt::make_format_args()); @@ -905,7 +59,7 @@ void add_manual_functions(N64Recomp::Context& context, const std::vector get_segment(const std::vector& segments, ELFIO::Elf_Xword section_size, ELFIO::Elf64_Off section_offset) { - // A linear search is safest even if the segment list is sorted, as there may be overlapping segments - for (size_t i = 0; i < segments.size(); i++) { - const auto& segment = segments[i]; - - // Check that the section's data in the elf file is within bounds of the segment's data - if (section_offset >= segment.data_offset && section_offset + section_size <= segment.data_offset + segment.memory_size) { - return i; - } - } - - return std::nullopt; -} - -ELFIO::section* read_sections(N64Recomp::Context& context, const N64Recomp::ElfParsingConfig& elf_config, const ELFIO::elfio& elf_file) { - ELFIO::section* symtab_section = nullptr; - std::vector segments{}; - segments.resize(elf_file.segments.size()); - - // Copy the data for each segment into the segment entry list - for (size_t segment_index = 0; segment_index < elf_file.segments.size(); segment_index++) { - const auto& segment = *elf_file.segments[segment_index]; - segments[segment_index].data_offset = segment.get_offset(); - segments[segment_index].physical_address = segment.get_physical_address(); - segments[segment_index].memory_size = segment.get_file_size(); - } - - //// Sort the segments by physical address - //std::sort(segments.begin(), segments.end(), - // [](const SegmentEntry& lhs, const SegmentEntry& rhs) { - // return lhs.data_offset < rhs.data_offset; - // } - //); - - std::unordered_map reloc_sections_by_name; - std::unordered_map bss_sections_by_name; - - // Iterate over every section to record rom addresses and find the symbol table - fmt::print("Sections\n"); - for (const auto& section : elf_file.sections) { - auto& section_out = context.sections[section->get_index()]; - //fmt::print(" {}: {} @ 0x{:08X}, 0x{:08X}\n", section->get_index(), section->get_name(), section->get_address(), context.rom.size()); - // Set the rom address of this section to the current accumulated ROM size - section_out.ram_addr = section->get_address(); - section_out.size = section->get_size(); - ELFIO::Elf_Word type = section->get_type(); - std::string section_name = section->get_name(); - - // Check if this section is the symbol table and record it if so - if (type == ELFIO::SHT_SYMTAB) { - symtab_section = section.get(); - } - - if (elf_config.relocatable_sections.contains(section_name)) { - section_out.relocatable = true; - } - - // Check if this section is a reloc section - if (type == ELFIO::SHT_REL) { - // If it is, determine the name of the section it relocates - if (!section_name.starts_with(".rel")) { - fmt::print(stderr, "Could not determine corresponding section for reloc section {}\n", section_name.c_str()); - return nullptr; - } - - std::string reloc_target_section = section_name.substr(strlen(".rel")); - - // If this reloc section is for a section that has been marked as relocatable, record it in the reloc section lookup. - // Alternatively, if this recompilation uses reference symbols then record all reloc sections. - if (!context.reference_sections.empty() || elf_config.relocatable_sections.contains(reloc_target_section)) { - reloc_sections_by_name[reloc_target_section] = section.get(); - } - } - - // If the section is bss (SHT_NOBITS) and ends with the bss suffix, add it to the bss section map - if (type == ELFIO::SHT_NOBITS && section_name.ends_with(elf_config.bss_section_suffix)) { - std::string bss_target_section = section_name.substr(0, section_name.size() - elf_config.bss_section_suffix.size()); - - // If this bss section is for a section that has been marked as relocatable, record it in the reloc section lookup - if (elf_config.relocatable_sections.contains(bss_target_section)) { - bss_sections_by_name[bss_target_section] = section.get(); - } - } - - // If this section isn't bss (SHT_NOBITS) and ends up in the rom (SHF_ALLOC), - // find this section's rom address and copy it into the rom - if (type != ELFIO::SHT_NOBITS && section->get_flags() & ELFIO::SHF_ALLOC && section->get_size() != 0) { - //// Find the segment this section is in to determine the physical (rom) address of the section - //auto segment_it = std::upper_bound(segments.begin(), segments.end(), section->get_offset(), - // [](ELFIO::Elf64_Off section_offset, const SegmentEntry& segment) { - // return section_offset < segment.data_offset; - // } - //); - //if (segment_it == segments.begin()) { - // fmt::print(stderr, "Could not find segment that section {} belongs to!\n", section_name.c_str()); - // return nullptr; - //} - //// Upper bound returns the iterator after the element we're looking for, so rewind by one - //// This is safe because we checked if segment_it was segments.begin() already, which is the minimum value it could be - //const SegmentEntry& segment = *(segment_it - 1); - //// Check to be sure that the section is actually in this segment - //if (section->get_offset() >= segment.data_offset + segment.memory_size) { - // fmt::print(stderr, "Section {} out of range of segment at offset 0x{:08X}\n", section_name.c_str(), segment.data_offset); - // return nullptr; - //} - std::optional segment_index = get_segment(segments, section_out.size, section->get_offset()); - if (!segment_index.has_value()) { - fmt::print(stderr, "Could not find segment that section {} belongs to!\n", section_name.c_str()); - return nullptr; - } - const SegmentEntry& segment = segments[segment_index.value()]; - // Calculate the rom address based on this section's offset into the segment and the segment's rom address - section_out.rom_addr = segment.physical_address + (section->get_offset() - segment.data_offset); - // Resize the output rom if needed to fit this section - size_t required_rom_size = section_out.rom_addr + section_out.size; - if (required_rom_size > context.rom.size()) { - context.rom.resize(required_rom_size); - } - // Copy this section's data into the rom - std::copy(section->get_data(), section->get_data() + section->get_size(), &context.rom[section_out.rom_addr]); - } else { - // Otherwise mark this section as having an invalid rom address - section_out.rom_addr = (uint32_t)-1; - } - // Check if this section is marked as executable, which means it has code in it - if (section->get_flags() & ELFIO::SHF_EXECINSTR) { - section_out.executable = true; - } - section_out.name = section_name; - } - - if (symtab_section == nullptr) { - fmt::print(stderr, "No symtab section found\n"); - return nullptr; - } - - ELFIO::symbol_section_accessor symbol_accessor{ elf_file, symtab_section }; - auto num_syms = symbol_accessor.get_symbols_num(); - - // TODO make sure that a reloc section was found for every section marked as relocatable - - // Process bss and reloc sections - for (size_t section_index = 0; section_index < context.sections.size(); section_index++) { - N64Recomp::Section& section_out = context.sections[section_index]; - // Check if a bss section was found that corresponds with this section - auto bss_find = bss_sections_by_name.find(section_out.name); - if (bss_find != bss_sections_by_name.end()) { - section_out.bss_section_index = bss_find->second->get_index(); - section_out.bss_size = bss_find->second->get_size(); - } - - if (!context.reference_symbols.empty() || section_out.relocatable) { - // Check if a reloc section was found that corresponds with this section - auto reloc_find = reloc_sections_by_name.find(section_out.name); - if (reloc_find != reloc_sections_by_name.end()) { - // Create an accessor for the reloc section - ELFIO::relocation_section_accessor rel_accessor{ elf_file, reloc_find->second }; - // Allocate space for the relocs in this section - section_out.relocs.resize(rel_accessor.get_entries_num()); - // Track whether the previous reloc was a HI16 and its previous full_immediate - bool prev_hi = false; - // Track whether the previous reloc was a LO16 - bool prev_lo = false; - uint32_t prev_hi_immediate = 0; - uint32_t prev_hi_symbol = std::numeric_limits::max(); - - for (size_t i = 0; i < section_out.relocs.size(); i++) { - // Get the current reloc - ELFIO::Elf64_Addr rel_offset; - ELFIO::Elf_Word rel_symbol; - unsigned int rel_type; - ELFIO::Elf_Sxword bad_rel_addend; // Addends aren't encoded in the reloc, so ignore this one - rel_accessor.get_entry(i, rel_offset, rel_symbol, rel_type, bad_rel_addend); - - N64Recomp::Reloc& reloc_out = section_out.relocs[i]; - - // Get the real full_immediate by extracting the immediate from the instruction - uint32_t reloc_rom_addr = section_out.rom_addr + rel_offset - section_out.ram_addr; - uint32_t reloc_rom_word = byteswap(*reinterpret_cast(context.rom.data() + reloc_rom_addr)); - rabbitizer::InstructionCpu instr{ reloc_rom_word, static_cast(rel_offset) }; - //context.rom section_out.rom_addr; - - reloc_out.address = rel_offset; - reloc_out.symbol_index = rel_symbol; - reloc_out.type = static_cast(rel_type); - - std::string rel_symbol_name; - ELFIO::Elf64_Addr rel_symbol_value; - ELFIO::Elf_Xword rel_symbol_size; - unsigned char rel_symbol_bind; - unsigned char rel_symbol_type; - ELFIO::Elf_Half rel_symbol_section_index; - unsigned char rel_symbol_other; - - bool found_rel_symbol = symbol_accessor.get_symbol( - rel_symbol, rel_symbol_name, rel_symbol_value, rel_symbol_size, rel_symbol_bind, rel_symbol_type, rel_symbol_section_index, rel_symbol_other); - - uint32_t rel_section_vram = section_out.ram_addr; - uint32_t rel_symbol_offset = 0; - - // Check if the symbol is undefined and to know whether to look for it in the reference symbols. - if (rel_symbol_section_index == ELFIO::SHN_UNDEF) { - // Undefined sym, check the reference symbols. - auto sym_find_it = context.reference_symbols_by_name.find(rel_symbol_name); - if (sym_find_it == context.reference_symbols_by_name.end()) { - fmt::print(stderr, "Undefined symbol: {}, not found in input or reference symbols!\n", - rel_symbol_name); - return nullptr; - } - - reloc_out.reference_symbol = true; - // Replace the reloc's symbol index with the index into the reference symbol array. - reloc_out.symbol_index = sym_find_it->second; - rel_section_vram = 0; - rel_symbol_offset = context.reference_symbols[reloc_out.symbol_index].section_offset; - reloc_out.target_section = context.reference_symbols[reloc_out.symbol_index].section_index; - - bool target_section_relocatable = false; - - if (reloc_out.target_section != N64Recomp::SectionAbsolute && context.reference_sections[reloc_out.target_section].relocatable) { - target_section_relocatable = true; - } - - if (reloc_out.type == N64Recomp::RelocType::R_MIPS_32 && target_section_relocatable) { - fmt::print(stderr, "Cannot reference {} in a statically initialized variable as it's defined in a relocatable section!\n", - rel_symbol_name); - return nullptr; - } - } - else { - reloc_out.reference_symbol = false; - reloc_out.target_section = rel_symbol_section_index; - } - - // Reloc pairing, see MIPS System V ABI documentation page 4-18 (https://refspecs.linuxfoundation.org/elf/mipsabi.pdf) - if (reloc_out.type == N64Recomp::RelocType::R_MIPS_LO16) { - uint32_t rel_immediate = instr.getProcessedImmediate(); - uint32_t full_immediate = (prev_hi_immediate << 16) + (int16_t)rel_immediate; - reloc_out.target_section_offset = full_immediate + rel_symbol_offset - rel_section_vram; - if (prev_hi) { - if (prev_hi_symbol != rel_symbol) { - fmt::print(stderr, "Paired HI16 and LO16 relocations have different symbols\n" - " LO16 reloc index {} in section {} referencing symbol {} with offset 0x{:08X}\n", - i, section_out.name, reloc_out.symbol_index, reloc_out.address); - return nullptr; - } - - // Set the previous HI16 relocs' relocated address. - section_out.relocs[i - 1].target_section_offset = reloc_out.target_section_offset; - } - else { - // Orphaned LO16 reloc warnings. - if (elf_config.unpaired_lo16_warnings) { - if (prev_lo) { - // Don't warn if multiple LO16 in a row reference the same symbol, as some linkers will use this behavior. - if (prev_hi_symbol != rel_symbol) { - fmt::print(stderr, "[WARN] LO16 reloc index {} in section {} referencing symbol {} with offset 0x{:08X} follows LO16 with different symbol\n", - i, section_out.name, reloc_out.symbol_index, reloc_out.address); - } - } - else { - fmt::print(stderr, "[WARN] Unpaired LO16 reloc index {} in section {} referencing symbol {} with offset 0x{:08X}\n", - i, section_out.name, reloc_out.symbol_index, reloc_out.address); - } - } - // Even though this is an orphaned LO16 reloc, the previous calculation for the addend still follows the MIPS System V ABI documentation: - // "R_MIPS_LO16 entries without an R_MIPS_HI16 entry immediately preceding are orphaned and the previously defined - // R_MIPS_HI16 is used for computing the addend." - // Therefore, nothing needs to be done to the section_offset member. - } - prev_lo = true; - } else { - if (prev_hi) { - // This is an invalid elf as the MIPS System V ABI documentation states: - // "Each relocation type of R_MIPS_HI16 must have an associated R_MIPS_LO16 entry - // immediately following it in the list of relocations." - fmt::print(stderr, "Unpaired HI16 reloc index {} in section {} referencing symbol {} with offset 0x{:08X}\n", - i - 1, section_out.name, section_out.relocs[i - 1].symbol_index, section_out.relocs[i - 1].address); - return nullptr; - } - prev_lo = false; - } - - if (reloc_out.type == N64Recomp::RelocType::R_MIPS_HI16) { - uint32_t rel_immediate = instr.getProcessedImmediate(); - prev_hi = true; - prev_hi_immediate = rel_immediate; - prev_hi_symbol = rel_symbol; - } else { - prev_hi = false; - } - - if (reloc_out.type == N64Recomp::RelocType::R_MIPS_32) { - // The reloc addend is just the existing word before relocation, so the section offset can just be the symbol's section offset. - // Incorporating the addend will be handled at load-time. - reloc_out.target_section_offset = rel_symbol_offset; - // TODO set section_out.has_mips32_relocs to true if this section should emit its mips32 relocs (mainly for TLB mapping). - - if (reloc_out.reference_symbol) { - uint32_t reloc_target_section_addr = 0; - if (reloc_out.target_section != N64Recomp::SectionAbsolute) { - reloc_target_section_addr = context.reference_sections[reloc_out.target_section].ram_addr; - } - // Patch the word in the ROM to incorporate the symbol's value. - uint32_t updated_reloc_word = reloc_rom_word + reloc_target_section_addr + reloc_out.target_section_offset; - *reinterpret_cast(context.rom.data() + reloc_rom_addr) = byteswap(updated_reloc_word); - } - } - - if (reloc_out.type == N64Recomp::RelocType::R_MIPS_26) { - uint32_t rel_immediate = instr.getProcessedImmediate(); - reloc_out.target_section_offset = rel_immediate + rel_symbol_offset; - } - } - } - - // Sort this section's relocs by address, which allows for binary searching and more efficient iteration during recompilation. - // This is safe to do as the entire full_immediate in present in relocs due to the pairing that was done earlier, so the HI16 does not - // need to directly preceed the matching LO16 anymore. - std::sort(section_out.relocs.begin(), section_out.relocs.end(), - [](const N64Recomp::Reloc& a, const N64Recomp::Reloc& b) { - return a.address < b.address; - } - ); - } - } - - return symtab_section; -} - -template void -for_each_if(Iterator begin, Iterator end, Pred p, Operation op) { - for (; begin != end; begin++) { - if (p(*begin)) { - op(*begin); - } - } -} - bool read_list_file(const std::filesystem::path& filename, std::vector& entries_out) { std::ifstream input_file{ filename }; if (!input_file.good()) { @@ -1451,50 +260,6 @@ static std::vector read_file(const std::filesystem::path& path) { return ret; } -static void setup_context_for_elf(N64Recomp::Context& context, const ELFIO::elfio& elf_file) { - context.sections.resize(elf_file.sections.size()); - context.section_functions.resize(elf_file.sections.size()); - context.functions.reserve(1024); - context.functions_by_vram.reserve(context.functions.capacity()); - context.functions_by_name.reserve(context.functions.capacity()); - context.rom.reserve(8 * 1024 * 1024); -} - -bool N64Recomp::Context::from_elf_file(const std::filesystem::path& elf_file_path, Context& out, const ElfParsingConfig& elf_config, bool for_dumping_context, DataSymbolMap& data_syms_out, bool& found_entrypoint_out) { - ELFIO::elfio elf_file; - - if (!elf_file.load(elf_file_path.string())) { - fmt::print("Failed to load provided elf file\n"); - return false; - } - - if (elf_file.get_class() != ELFIO::ELFCLASS32) { - fmt::print("Incorrect elf class\n"); - return false; - } - - if (elf_file.get_encoding() != ELFIO::ELFDATA2MSB) { - fmt::print("Incorrect endianness\n"); - return false; - } - - setup_context_for_elf(out, elf_file); - - // Read all of the sections in the elf and look for the symbol table section - ELFIO::section* symtab_section = read_sections(out, elf_config, elf_file); - - // If no symbol table was found then exit - if (symtab_section == nullptr) { - fmt::print("No symbol table section found\n"); - return false; - } - - // Read all of the symbols in the elf and look for the entrypoint function - found_entrypoint_out = read_symbols(out, elf_file, symtab_section, elf_config, for_dumping_context, data_syms_out); - - return true; -} - int main(int argc, char** argv) { auto exit_failure = [] (const std::string& error_str) { fmt::vprint(stderr, error_str, fmt::make_format_args()); @@ -1632,14 +397,14 @@ int main(int argc, char** argv) { for (size_t func_index = 0; func_index < context.functions.size(); func_index++) { N64Recomp::Function& func = context.functions[func_index]; - if (reimplemented_funcs.contains(func.name)) { + if (N64Recomp::reimplemented_funcs.contains(func.name)) { rename_function(func_index, func.name + "_recomp"); func.reimplemented = true; func.ignored = true; - } else if (ignored_funcs.contains(func.name)) { + } else if (N64Recomp::ignored_funcs.contains(func.name)) { rename_function(func_index, func.name + "_recomp"); func.ignored = true; - } else if (renamed_funcs.contains(func.name)) { + } else if (N64Recomp::renamed_funcs.contains(func.name)) { rename_function(func_index, func.name + "_recomp"); func.ignored = false; } @@ -1879,7 +644,7 @@ int main(int argc, char** argv) { rom_addr, std::move(insn_words), fmt::format("static_{}_{:08X}", section_index, static_func_addr), - static_cast(section_index), + static_cast(section_index), false }; diff --git a/src/symbol_lists.cpp b/src/symbol_lists.cpp new file mode 100644 index 0000000..182ccde --- /dev/null +++ b/src/symbol_lists.cpp @@ -0,0 +1,660 @@ +#include "n64recomp.h" + +const std::unordered_set N64Recomp::reimplemented_funcs{ + // OS initialize functions + "__osInitialize_common", + "osInitialize", + "osGetMemSize", + // Audio interface functions + "osAiGetLength", + "osAiGetStatus", + "osAiSetFrequency", + "osAiSetNextBuffer", + // Video interface functions + "osViSetXScale", + "osViSetYScale", + "osCreateViManager", + "osViBlack", + "osViSetSpecialFeatures", + "osViGetCurrentFramebuffer", + "osViGetNextFramebuffer", + "osViSwapBuffer", + "osViSetMode", + "osViSetEvent", + // RDP functions + "osDpSetNextBuffer", + // RSP functions + "osSpTaskLoad", + "osSpTaskStartGo", + "osSpTaskYield", + "osSpTaskYielded", + "__osSpSetPc", + // Controller functions + "osContInit", + "osContStartReadData", + "osContGetReadData", + "osContStartQuery", + "osContGetQuery", + "osContSetCh", + // EEPROM functions + "osEepromProbe", + "osEepromWrite", + "osEepromLongWrite", + "osEepromRead", + "osEepromLongRead", + // Rumble functions + "__osMotorAccess", + "osMotorInit", + "osMotorStart", + "osMotorStop", + // PFS functions + "osPfsInitPak", + "osPfsFreeBlocks", + "osPfsAllocateFile", + "osPfsDeleteFile", + "osPfsFileState", + "osPfsFindFile", + "osPfsReadWriteFile", + // Parallel interface (cartridge, DMA, etc.) functions + "osCartRomInit", + "osCreatePiManager", + "osPiStartDma", + "osEPiStartDma", + "osPiGetStatus", + "osEPiRawStartDma", + "osEPiReadIo", + // Flash saving functions + "osFlashInit", + "osFlashReadStatus", + "osFlashReadId", + "osFlashClearStatus", + "osFlashAllErase", + "osFlashAllEraseThrough", + "osFlashSectorErase", + "osFlashSectorEraseThrough", + "osFlashCheckEraseEnd", + "osFlashWriteBuffer", + "osFlashWriteArray", + "osFlashReadArray", + "osFlashChange", + // Threading functions + "osCreateThread", + "osStartThread", + "osStopThread", + "osDestroyThread", + "osSetThreadPri", + "osGetThreadPri", + "osGetThreadId", + // Message Queue functions + "osCreateMesgQueue", + "osRecvMesg", + "osSendMesg", + "osJamMesg", + "osSetEventMesg", + // Timer functions + "osGetTime", + "osSetTimer", + "osStopTimer", + // Voice functions + "osVoiceSetWord", + "osVoiceCheckWord", + "osVoiceStopReadData", + "osVoiceInit", + "osVoiceMaskDictionary", + "osVoiceStartReadData", + "osVoiceControlGain", + "osVoiceGetReadData", + "osVoiceClearDictionary", + // interrupt functions + "osSetIntMask", + "__osDisableInt", + "__osRestoreInt", + // TLB functions + "osVirtualToPhysical", + // Coprocessor 0/1 functions + "osGetCount", + "__osSetFpcCsr", + // Cache funcs + "osInvalDCache", + "osInvalICache", + "osWritebackDCache", + "osWritebackDCacheAll", + // Debug functions + "is_proutSyncPrintf", + "__checkHardware_msp", + "__checkHardware_kmc", + "__checkHardware_isv", + "__osInitialize_msp", + "__osInitialize_kmc", + "__osInitialize_isv", + "__osRdbSend", + // ido math routines + "__ull_div", + "__ll_div", + "__ll_mul", + "__ull_rem", + "__ull_to_d", + "__ull_to_f", +}; + +const std::unordered_set N64Recomp::ignored_funcs { + // OS initialize functions + "__createSpeedParam", + "__osInitialize_common", + "osInitialize", + "osGetMemSize", + // Audio interface functions + "osAiGetLength", + "osAiGetStatus", + "osAiSetFrequency", + "osAiSetNextBuffer", + "__osAiDeviceBusy", + // Video interface functions + "osViBlack", + "osViFade", + "osViGetCurrentField", + "osViGetCurrentFramebuffer", + "osViGetCurrentLine", + "osViGetCurrentMode", + "osViGetNextFramebuffer", + "osViGetStatus", + "osViRepeatLine", + "osViSetEvent", + "osViSetMode", + "osViSetSpecialFeatures", + "osViSetXScale", + "osViSetYScale", + "osViSwapBuffer", + "osCreateViManager", + "viMgrMain", + "__osViInit", + "__osViSwapContext", + "__osViGetCurrentContext", + // RDP functions + "osDpGetCounters", + "osDpSetStatus", + "osDpGetStatus", + "osDpSetNextBuffer", + "__osDpDeviceBusy", + // RSP functions + "osSpTaskLoad", + "osSpTaskStartGo", + "osSpTaskYield", + "osSpTaskYielded", + "__osSpDeviceBusy", + "__osSpGetStatus", + "__osSpRawStartDma", + "__osSpRawReadIo", + "__osSpRawWriteIo", + "__osSpSetPc", + "__osSpSetStatus", + // Controller functions + "osContGetQuery", + "osContGetReadData", + "osContInit", + "osContReset", + "osContSetCh", + "osContStartQuery", + "osContStartReadData", + "__osContAddressCrc", + "__osContDataCrc", + "__osContGetInitData", + "__osContRamRead", + "__osContRamWrite", + "__osContChannelReset", + // EEPROM functions + "osEepromLongRead", + "osEepromLongWrite", + "osEepromProbe", + "osEepromRead", + "osEepromWrite", + "__osEepStatus", + // Rumble functions + "osMotorInit", + "osMotorStart", + "osMotorStop", + "__osMotorAccess", + "_MakeMotorData", + // Pack functions + "__osCheckId", + "__osCheckPackId", + "__osGetId", + "__osPfsRWInode", + "__osRepairPackId", + "__osPfsSelectBank", + "__osCheckPackId", + "ramromMain", + // PFS functions + "osPfsAllocateFile", + "osPfsChecker", + "osPfsDeleteFile", + "osPfsFileState", + "osPfsFindFile", + "osPfsFreeBlocks", + "osPfsGetLabel", + "osPfsInit", + "osPfsInitPak", + "osPfsIsPlug", + "osPfsNumFiles", + "osPfsRepairId", + "osPfsReadWriteFile", + "__osPackEepReadData", + "__osPackEepWriteData", + "__osPackRamReadData", + "__osPackRamWriteData", + "__osPackReadData", + "__osPackRequestData", + "__osPfsGetInitData", + "__osPfsGetOneChannelData", + "__osPfsGetStatus", + "__osPfsRequestData", + "__osPfsRequestOneChannel", + "__osPfsCreateAccessQueue", + "__osPfsCheckRamArea", + "__osPfsGetNextPage", + // Low level serial interface functions + "__osSiDeviceBusy", + "__osSiGetStatus", + "__osSiRawStartDma", + "__osSiRawReadIo", + "__osSiRawWriteIo", + "__osSiCreateAccessQueue", + "__osSiGetAccess", + "__osSiRelAccess", + // Parallel interface (cartridge, DMA, etc.) functions + "osCartRomInit", + "osLeoDiskInit", + "osCreatePiManager", + "__osDevMgrMain", + "osPiGetCmdQueue", + "osPiGetStatus", + "osPiReadIo", + "osPiStartDma", + "osPiWriteIo", + "osEPiGetDeviceType", + "osEPiStartDma", + "osEPiWriteIo", + "osEPiReadIo", + "osPiRawStartDma", + "osPiRawReadIo", + "osPiRawWriteIo", + "osEPiRawStartDma", + "osEPiRawReadIo", + "osEPiRawWriteIo", + "__osPiRawStartDma", + "__osPiRawReadIo", + "__osPiRawWriteIo", + "__osEPiRawStartDma", + "__osEPiRawReadIo", + "__osEPiRawWriteIo", + "__osPiDeviceBusy", + "__osPiCreateAccessQueue", + "__osPiGetAccess", + "__osPiRelAccess", + "__osLeoAbnormalResume", + "__osLeoInterrupt", + "__osLeoResume", + // Flash saving functions + "osFlashInit", + "osFlashReadStatus", + "osFlashReadId", + "osFlashClearStatus", + "osFlashAllErase", + "osFlashAllEraseThrough", + "osFlashSectorErase", + "osFlashSectorEraseThrough", + "osFlashCheckEraseEnd", + "osFlashWriteBuffer", + "osFlashWriteArray", + "osFlashReadArray", + "osFlashChange", + // Threading functions + "osCreateThread", + "osStartThread", + "osStopThread", + "osDestroyThread", + "osYieldThread", + "osSetThreadPri", + "osGetThreadPri", + "osGetThreadId", + "__osDequeueThread", + // Message Queue functions + "osCreateMesgQueue", + "osSendMesg", + "osJamMesg", + "osRecvMesg", + "osSetEventMesg", + // Timer functions + "osStartTimer", + "osSetTimer", + "osStopTimer", + "osGetTime", + "__osInsertTimer", + "__osTimerInterrupt", + "__osTimerServicesInit", + "__osSetTimerIntr", + // Voice functions + "osVoiceSetWord", + "osVoiceCheckWord", + "osVoiceStopReadData", + "osVoiceInit", + "osVoiceMaskDictionary", + "osVoiceStartReadData", + "osVoiceControlGain", + "osVoiceGetReadData", + "osVoiceClearDictionary", + "__osVoiceCheckResult", + "__osVoiceContRead36", + "__osVoiceContWrite20", + "__osVoiceContWrite4", + "__osVoiceContRead2", + "__osVoiceSetADConverter", + "__osVoiceContDataCrc", + "__osVoiceGetStatus", + "corrupted", + "corrupted_init", + // exceptasm functions + "__osExceptionPreamble", + "__osException", + "__ptExceptionPreamble", + "__ptException", + "send_mesg", + "handle_CpU", + "__osEnqueueAndYield", + "__osEnqueueThread", + "__osPopThread", + "__osNop", + "__osDispatchThread", + "__osCleanupThread", + "osGetCurrFaultedThread", + "osGetNextFaultedThread", + // interrupt functions + "osSetIntMask", + "osGetIntMask", + "__osDisableInt", + "__osRestoreInt", + "__osSetGlobalIntMask", + "__osResetGlobalIntMask", + // TLB functions + "osMapTLB", + "osUnmapTLB", + "osUnmapTLBAll", + "osSetTLBASID", + "osMapTLBRdb", + "osVirtualToPhysical", + "__osGetTLBHi", + "__osGetTLBLo0", + "__osGetTLBLo1", + "__osGetTLBPageMask", + "__osGetTLBASID", + "__osProbeTLB", + // Coprocessor 0/1 functions + "__osSetCount", + "osGetCount", + "__osSetSR", + "__osGetSR", + "__osSetCause", + "__osGetCause", + "__osSetCompare", + "__osGetCompare", + "__osSetConfig", + "__osGetConfig", + "__osSetWatchLo", + "__osGetWatchLo", + "__osSetFpcCsr", + // Cache funcs + "osInvalDCache", + "osInvalICache", + "osWritebackDCache", + "osWritebackDCacheAll", + // Microcodes + "rspbootTextStart", + "gspF3DEX2_fifoTextStart", + "gspS2DEX2_fifoTextStart", + "gspL3DEX2_fifoTextStart", + // Debug functions + "msp_proutSyncPrintf", + "__osInitialize_msp", + "__checkHardware_msp", + "kmc_proutSyncPrintf", + "__osInitialize_kmc", + "__checkHardware_kmc", + "isPrintfInit", + "is_proutSyncPrintf", + "__osInitialize_isv", + "__checkHardware_isv", + "__isExpJP", + "__isExp", + "__osRdbSend", + "__rmonSendData", + "__rmonWriteMem", + "__rmonReadWordAt", + "__rmonWriteWordTo", + "__rmonWriteMem", + "__rmonSetSRegs", + "__rmonSetVRegs", + "__rmonStopThread", + "__rmonGetThreadStatus", + "__rmonGetVRegs", + "__rmonHitSpBreak", + "__rmonRunThread", + "__rmonClearBreak", + "__rmonGetBranchTarget", + "__rmonGetSRegs", + "__rmonSetBreak", + "__rmonReadMem", + "__rmonRunThread", + "__rmonCopyWords", + "__rmonExecute", + "__rmonGetExceptionStatus", + "__rmonGetExeName", + "__rmonGetFRegisters", + "__rmonGetGRegisters", + "__rmonGetRegionCount", + "__rmonGetRegions", + "__rmonGetRegisterContents", + "__rmonGetTCB", + "__rmonHitBreak", + "__rmonHitCpuFault", + "__rmonIdleRCP", + "__rmonInit", + "__rmonIOflush", + "__rmonIOhandler", + "__rmonIOputw", + "__rmonListBreak", + "__rmonListProcesses", + "__rmonListThreads", + "__rmonLoadProgram", + "__rmonMaskIdleThreadInts", + "__rmonMemcpy", + "__rmonPanic", + "__rmonRCPrunning", + "__rmonRunRCP", + "__rmonSendFault", + "__rmonSendHeader", + "__rmonSendReply", + "__rmonSetComm", + "__rmonSetFault", + "__rmonSetFRegisters", + "__rmonSetGRegisters", + "__rmonSetSingleStep", + "__rmonStepRCP", + "__rmonStopUserThreads", + "__rmonThreadStatus", + "__rmon", + "__rmonRunThread", + "rmonFindFaultedThreads", + "rmonMain", + "rmonPrintf", + "rmonGetRcpRegister", + "kdebugserver", + "send", + + // ido math routines + "__ll_div", + "__ll_lshift", + "__ll_mod", + "__ll_mul", + "__ll_rem", + "__ll_rshift", + "__ull_div", + "__ull_divremi", + "__ull_rem", + "__ull_rshift", + "__d_to_ll", + "__f_to_ll", + "__d_to_ull", + "__f_to_ull", + "__ll_to_d", + "__ll_to_f", + "__ull_to_d", + "__ull_to_f", + // Setjmp/longjmp for mario party + "setjmp", + "longjmp" + // 64-bit functions for banjo + "func_8025C29C", + "func_8025C240", + "func_8025C288", + + // rmonregs + "LoadStoreSU", + "LoadStoreVU", + "SetUpForRCPop", + "CleanupFromRCPop", + "__rmonGetGRegisters", + "__rmonSetGRegisters", + "__rmonGetFRegisters", + "__rmonSetFRegisters", + "rmonGetRcpRegister", + "__rmonGetSRegs", + "__rmonSetSRegs", + "__rmonGetVRegs", + "__rmonSetVRegs", + "__rmonGetRegisterContents", + + // rmonbrk + "SetTempBreakpoint", + "ClearTempBreakpoint", + "__rmonSetBreak", + "__rmonListBreak", + "__rmonClearBreak", + "__rmonGetBranchTarget", + "IsJump", + "__rmonSetSingleStep", + "__rmonGetExceptionStatus", + "rmonSendBreakMessage", + "__rmonHitBreak", + "__rmonHitSpBreak", + "__rmonHitCpuFault", + "rmonFindFaultedThreads", + + // kdebugserver + "string_to_u32", + "send_packet", + "clear_IP6", + "send", + "kdebugserver", +}; + +const std::unordered_set N64Recomp::renamed_funcs{ + // Math + "sincosf", + "sinf", + "cosf", + "__sinf", + "__cosf", + "asinf", + "acosf", + "atanf", + "atan2f", + "tanf", + "sqrt", + "sqrtf", + + // Memory + "memcpy", + "memset", + "memmove", + "memcmp", + "strcmp", + "strcat", + "strcpy", + "strchr", + "strlen", + "strtok", + "sprintf", + "bzero", + "bcopy", + "bcmp", + + // long jumps + "setjmp", + "longjmp", + + // Math 2 + "ldiv", + "lldiv", + "ceil", + "ceilf", + "floor", + "floorf", + "fmodf", + "fmod", + "modf", + "lround", + "lroundf", + "nearbyint", + "nearbyintf", + "round", + "roundf", + "trunc", + "truncf", + + // printf family + "vsprintf", + "gcvt", + "fcvt", + "ecvt", + + "__assert", + + // allocations + "malloc", + "free", + "realloc", + "calloc", + + // rand + "rand", + "srand", + "random", + + // gzip + "huft_build", + "huft_free", + "inflate_codes", + "inflate_stored", + "inflate_fixed", + "inflate_dynamic", + "inflate_block", + "inflate", + "expand_gzip", + "auRomDataRead" + "data_write", + "unzip", + "updcrc", + "clear_bufs", + "fill_inbuf", + "flush_window", + + // libgcc math routines + "__muldi3", + "__divdi3", + "__udivdi3", + "__umoddi3", + "div64_64", + "div64_32", + "__moddi3", + "_matherr", +};