Move elf parsing into a separate library

This commit is contained in:
Mr-Wiseguy 2024-07-16 22:24:25 -04:00
parent 04daa21908
commit d8dcb43d5a
5 changed files with 1276 additions and 1245 deletions

View file

@ -62,6 +62,18 @@ add_subdirectory(lib/fmt)
set(TOML_ENABLE_FORMATTERS OFF)
add_subdirectory(lib/tomlplusplus)
# Hardcoded symbol lists (separate library to not force a dependency on N64Recomp)
project(SymbolLists)
add_library(SymbolLists)
target_sources(SymbolLists PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/src/symbol_lists.cpp
)
target_include_directories(SymbolLists PUBLIC
"${CMAKE_CURRENT_SOURCE_DIR}/include"
)
# N64 recompiler core library
project(N64Recomp)
add_library(N64Recomp)
@ -75,9 +87,29 @@ target_sources(N64Recomp PRIVATE
)
target_include_directories(N64Recomp PUBLIC
"${CMAKE_CURRENT_SOURCE_DIR}/include")
"${CMAKE_CURRENT_SOURCE_DIR}/include"
)
target_link_libraries(N64Recomp fmt rabbitizer tomlplusplus::tomlplusplus)
target_link_libraries(N64Recomp SymbolLists fmt rabbitizer tomlplusplus::tomlplusplus)
# N64 recompiler elf parsing
project(N64RecompElf)
add_library(N64RecompElf)
target_sources(N64RecompElf PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/src/elf.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/symbol_lists.cpp
)
target_include_directories(N64RecompElf PUBLIC
"${CMAKE_CURRENT_SOURCE_DIR}/include"
)
target_include_directories(N64RecompElf PRIVATE
"${CMAKE_CURRENT_SOURCE_DIR}/lib/ELFIO"
)
target_link_libraries(N64RecompElf fmt)
# N64 recompiler executable
project(N64RecompCLI)
@ -89,10 +121,10 @@ target_sources(N64RecompCLI PRIVATE
)
target_include_directories(N64RecompCLI PRIVATE
"${CMAKE_CURRENT_SOURCE_DIR}/lib/ELFIO"
"${CMAKE_CURRENT_SOURCE_DIR}/include")
"${CMAKE_CURRENT_SOURCE_DIR}/include"
)
target_link_libraries(N64RecompCLI fmt rabbitizer tomlplusplus::tomlplusplus N64Recomp)
target_link_libraries(N64RecompCLI fmt rabbitizer tomlplusplus::tomlplusplus N64Recomp N64RecompElf)
set_target_properties(N64RecompCLI PROPERTIES OUTPUT_NAME N64Recomp)
# RSP recompiler

View file

@ -107,6 +107,10 @@ namespace N64Recomp {
using DataSymbolMap = std::unordered_map<uint16_t, std::vector<DataSymbol>>;
extern const std::unordered_set<std::string> reimplemented_funcs;
extern const std::unordered_set<std::string> ignored_funcs;
extern const std::unordered_set<std::string> renamed_funcs;
struct Context {
std::vector<Section> sections;
std::vector<Function> functions;

570
src/elf.cpp Normal file
View file

@ -0,0 +1,570 @@
#include "fmt/format.h"
// #include "fmt/ostream.h"
#include "n64recomp.h"
#include "elfio/elfio.hpp"
bool read_symbols(N64Recomp::Context& context, const ELFIO::elfio& elf_file, ELFIO::section* symtab_section, const N64Recomp::ElfParsingConfig& elf_config, bool dumping_context, std::unordered_map<uint16_t, std::vector<N64Recomp::DataSymbol>>& data_syms) {
bool found_entrypoint_func = false;
ELFIO::symbol_section_accessor symbols{ elf_file, symtab_section };
fmt::print("Num symbols: {}\n", symbols.get_symbols_num());
std::unordered_map<uint16_t, uint16_t> bss_section_to_target_section{};
// Create a mapping of bss section to the corresponding non-bss section. This is only used when dumping context in order
// for patches and mods to correctly relocate symbols in bss. This mapping only matters for relocatable sections.
if (dumping_context) {
// Process bss and reloc sections
for (size_t cur_section_index = 0; cur_section_index < context.sections.size(); cur_section_index++) {
const N64Recomp::Section& cur_section = context.sections[cur_section_index];
// Check if a bss section was found that corresponds with this section.
if (cur_section.bss_section_index != (uint16_t)-1) {
bss_section_to_target_section[cur_section.bss_section_index] = cur_section_index;
}
}
}
for (int sym_index = 0; sym_index < symbols.get_symbols_num(); sym_index++) {
std::string name;
ELFIO::Elf64_Addr value;
ELFIO::Elf_Xword size;
unsigned char bind;
unsigned char type;
ELFIO::Elf_Half section_index;
unsigned char other;
bool ignored = false;
bool reimplemented = false;
bool recorded_symbol = false;
// Read symbol properties
symbols.get_symbol(sym_index, name, value, size, bind, type,
section_index, other);
if (section_index == ELFIO::SHN_ABS && elf_config.use_absolute_symbols) {
uint32_t vram = static_cast<uint32_t>(value);
context.functions_by_vram[vram].push_back(context.functions.size());
context.functions.emplace_back(
vram,
0,
std::vector<uint32_t>{},
std::move(name),
0,
true,
reimplemented,
false
);
continue;
}
if (section_index < context.sections.size()) {
// Check if this symbol is the entrypoint
if (elf_config.has_entrypoint && value == elf_config.entrypoint_address && type == ELFIO::STT_FUNC) {
if (found_entrypoint_func) {
fmt::print(stderr, "Ambiguous entrypoint: {}\n", name);
return false;
}
found_entrypoint_func = true;
fmt::print("Found entrypoint, original name: {}\n", name);
size = 0x50; // dummy size for entrypoints, should cover them all
name = "recomp_entrypoint";
}
// Check if this symbol has a size override
auto size_find = elf_config.manually_sized_funcs.find(name);
if (size_find != elf_config.manually_sized_funcs.end()) {
size = size_find->second;
type = ELFIO::STT_FUNC;
}
if (!dumping_context) {
if (N64Recomp::reimplemented_funcs.contains(name)) {
reimplemented = true;
name = name + "_recomp";
ignored = true;
} else if (N64Recomp::ignored_funcs.contains(name)) {
name = name + "_recomp";
ignored = true;
}
}
auto& section = context.sections[section_index];
// Check if this symbol is a function or has no type (like a regular glabel would)
// Symbols with no type have a dummy entry created so that their symbol can be looked up for function calls
if (ignored || type == ELFIO::STT_FUNC || type == ELFIO::STT_NOTYPE || type == ELFIO::STT_OBJECT) {
if (!dumping_context) {
if (N64Recomp::renamed_funcs.contains(name)) {
name = name + "_recomp";
ignored = false;
}
}
if (section_index < context.sections.size()) {
auto section_offset = value - elf_file.sections[section_index]->get_address();
const uint32_t* words = reinterpret_cast<const uint32_t*>(elf_file.sections[section_index]->get_data() + section_offset);
uint32_t vram = static_cast<uint32_t>(value);
uint32_t num_instructions = type == ELFIO::STT_FUNC ? size / 4 : 0;
uint32_t rom_address = static_cast<uint32_t>(section_offset + section.rom_addr);
section.function_addrs.push_back(vram);
context.functions_by_vram[vram].push_back(context.functions.size());
// Find the entrypoint by rom address in case it doesn't have vram as its value
if (elf_config.has_entrypoint && rom_address == 0x1000 && type == ELFIO::STT_FUNC) {
vram = elf_config.entrypoint_address;
found_entrypoint_func = true;
name = "recomp_entrypoint";
if (size == 0) {
num_instructions = 0x50 / 4;
}
}
// Suffix local symbols to prevent name conflicts.
if (bind == ELFIO::STB_LOCAL) {
name = fmt::format("{}_{:08X}", name, rom_address);
}
if (num_instructions > 0) {
context.section_functions[section_index].push_back(context.functions.size());
recorded_symbol = true;
}
context.functions_by_name[name] = context.functions.size();
std::vector<uint32_t> insn_words(num_instructions);
insn_words.assign(words, words + num_instructions);
context.functions.emplace_back(
vram,
rom_address,
std::move(insn_words),
name,
section_index,
ignored,
reimplemented
);
} else {
// TODO is this case needed anymore?
uint32_t vram = static_cast<uint32_t>(value);
section.function_addrs.push_back(vram);
context.functions_by_vram[vram].push_back(context.functions.size());
context.functions.emplace_back(
vram,
0,
std::vector<uint32_t>{},
name,
section_index,
ignored,
reimplemented
);
}
}
}
// The symbol wasn't detected as a function, so add it to the data symbols if the context is being dumped.
if (!recorded_symbol && dumping_context && !name.empty()) {
uint32_t vram = static_cast<uint32_t>(value);
// Place this symbol in the absolute symbol list if it's in the absolute section.
uint16_t target_section_index = section_index;
if (section_index == ELFIO::SHN_ABS) {
target_section_index = N64Recomp::SectionAbsolute;
}
else if (section_index >= context.sections.size()) {
fmt::print("Symbol \"{}\" not in a valid section ({})\n", name, section_index);
}
// Move this symbol into the corresponding non-bss section if it's in a bss section.
auto find_bss_it = bss_section_to_target_section.find(target_section_index);
if (find_bss_it != bss_section_to_target_section.end()) {
target_section_index = find_bss_it->second;
}
data_syms[target_section_index].emplace_back(
vram,
std::move(name)
);
}
}
return found_entrypoint_func;
}
struct SegmentEntry {
ELFIO::Elf64_Off data_offset;
ELFIO::Elf64_Addr physical_address;
ELFIO::Elf_Xword memory_size;
};
std::optional<size_t> get_segment(const std::vector<SegmentEntry>& segments, ELFIO::Elf_Xword section_size, ELFIO::Elf64_Off section_offset) {
// A linear search is safest even if the segment list is sorted, as there may be overlapping segments
for (size_t i = 0; i < segments.size(); i++) {
const auto& segment = segments[i];
// Check that the section's data in the elf file is within bounds of the segment's data
if (section_offset >= segment.data_offset && section_offset + section_size <= segment.data_offset + segment.memory_size) {
return i;
}
}
return std::nullopt;
}
ELFIO::section* read_sections(N64Recomp::Context& context, const N64Recomp::ElfParsingConfig& elf_config, const ELFIO::elfio& elf_file) {
ELFIO::section* symtab_section = nullptr;
std::vector<SegmentEntry> segments{};
segments.resize(elf_file.segments.size());
// Copy the data for each segment into the segment entry list
for (size_t segment_index = 0; segment_index < elf_file.segments.size(); segment_index++) {
const auto& segment = *elf_file.segments[segment_index];
segments[segment_index].data_offset = segment.get_offset();
segments[segment_index].physical_address = segment.get_physical_address();
segments[segment_index].memory_size = segment.get_file_size();
}
//// Sort the segments by physical address
//std::sort(segments.begin(), segments.end(),
// [](const SegmentEntry& lhs, const SegmentEntry& rhs) {
// return lhs.data_offset < rhs.data_offset;
// }
//);
std::unordered_map<std::string, ELFIO::section*> reloc_sections_by_name;
std::unordered_map<std::string, ELFIO::section*> bss_sections_by_name;
// Iterate over every section to record rom addresses and find the symbol table
fmt::print("Sections\n");
for (const auto& section : elf_file.sections) {
auto& section_out = context.sections[section->get_index()];
//fmt::print(" {}: {} @ 0x{:08X}, 0x{:08X}\n", section->get_index(), section->get_name(), section->get_address(), context.rom.size());
// Set the rom address of this section to the current accumulated ROM size
section_out.ram_addr = section->get_address();
section_out.size = section->get_size();
ELFIO::Elf_Word type = section->get_type();
std::string section_name = section->get_name();
// Check if this section is the symbol table and record it if so
if (type == ELFIO::SHT_SYMTAB) {
symtab_section = section.get();
}
if (elf_config.relocatable_sections.contains(section_name)) {
section_out.relocatable = true;
}
// Check if this section is a reloc section
if (type == ELFIO::SHT_REL) {
// If it is, determine the name of the section it relocates
if (!section_name.starts_with(".rel")) {
fmt::print(stderr, "Could not determine corresponding section for reloc section {}\n", section_name.c_str());
return nullptr;
}
std::string reloc_target_section = section_name.substr(strlen(".rel"));
// If this reloc section is for a section that has been marked as relocatable, record it in the reloc section lookup.
// Alternatively, if this recompilation uses reference symbols then record all reloc sections.
if (!context.reference_sections.empty() || elf_config.relocatable_sections.contains(reloc_target_section)) {
reloc_sections_by_name[reloc_target_section] = section.get();
}
}
// If the section is bss (SHT_NOBITS) and ends with the bss suffix, add it to the bss section map
if (type == ELFIO::SHT_NOBITS && section_name.ends_with(elf_config.bss_section_suffix)) {
std::string bss_target_section = section_name.substr(0, section_name.size() - elf_config.bss_section_suffix.size());
// If this bss section is for a section that has been marked as relocatable, record it in the reloc section lookup
if (elf_config.relocatable_sections.contains(bss_target_section)) {
bss_sections_by_name[bss_target_section] = section.get();
}
}
// If this section isn't bss (SHT_NOBITS) and ends up in the rom (SHF_ALLOC),
// find this section's rom address and copy it into the rom
if (type != ELFIO::SHT_NOBITS && section->get_flags() & ELFIO::SHF_ALLOC && section->get_size() != 0) {
//// Find the segment this section is in to determine the physical (rom) address of the section
//auto segment_it = std::upper_bound(segments.begin(), segments.end(), section->get_offset(),
// [](ELFIO::Elf64_Off section_offset, const SegmentEntry& segment) {
// return section_offset < segment.data_offset;
// }
//);
//if (segment_it == segments.begin()) {
// fmt::print(stderr, "Could not find segment that section {} belongs to!\n", section_name.c_str());
// return nullptr;
//}
//// Upper bound returns the iterator after the element we're looking for, so rewind by one
//// This is safe because we checked if segment_it was segments.begin() already, which is the minimum value it could be
//const SegmentEntry& segment = *(segment_it - 1);
//// Check to be sure that the section is actually in this segment
//if (section->get_offset() >= segment.data_offset + segment.memory_size) {
// fmt::print(stderr, "Section {} out of range of segment at offset 0x{:08X}\n", section_name.c_str(), segment.data_offset);
// return nullptr;
//}
std::optional<size_t> segment_index = get_segment(segments, section_out.size, section->get_offset());
if (!segment_index.has_value()) {
fmt::print(stderr, "Could not find segment that section {} belongs to!\n", section_name.c_str());
return nullptr;
}
const SegmentEntry& segment = segments[segment_index.value()];
// Calculate the rom address based on this section's offset into the segment and the segment's rom address
section_out.rom_addr = segment.physical_address + (section->get_offset() - segment.data_offset);
// Resize the output rom if needed to fit this section
size_t required_rom_size = section_out.rom_addr + section_out.size;
if (required_rom_size > context.rom.size()) {
context.rom.resize(required_rom_size);
}
// Copy this section's data into the rom
std::copy(section->get_data(), section->get_data() + section->get_size(), &context.rom[section_out.rom_addr]);
} else {
// Otherwise mark this section as having an invalid rom address
section_out.rom_addr = (uint32_t)-1;
}
// Check if this section is marked as executable, which means it has code in it
if (section->get_flags() & ELFIO::SHF_EXECINSTR) {
section_out.executable = true;
}
section_out.name = section_name;
}
if (symtab_section == nullptr) {
fmt::print(stderr, "No symtab section found\n");
return nullptr;
}
ELFIO::symbol_section_accessor symbol_accessor{ elf_file, symtab_section };
auto num_syms = symbol_accessor.get_symbols_num();
// TODO make sure that a reloc section was found for every section marked as relocatable
// Process bss and reloc sections
for (size_t section_index = 0; section_index < context.sections.size(); section_index++) {
N64Recomp::Section& section_out = context.sections[section_index];
// Check if a bss section was found that corresponds with this section
auto bss_find = bss_sections_by_name.find(section_out.name);
if (bss_find != bss_sections_by_name.end()) {
section_out.bss_section_index = bss_find->second->get_index();
section_out.bss_size = bss_find->second->get_size();
}
if (!context.reference_symbols.empty() || section_out.relocatable) {
// Check if a reloc section was found that corresponds with this section
auto reloc_find = reloc_sections_by_name.find(section_out.name);
if (reloc_find != reloc_sections_by_name.end()) {
// Create an accessor for the reloc section
ELFIO::relocation_section_accessor rel_accessor{ elf_file, reloc_find->second };
// Allocate space for the relocs in this section
section_out.relocs.resize(rel_accessor.get_entries_num());
// Track whether the previous reloc was a HI16 and its previous full_immediate
bool prev_hi = false;
// Track whether the previous reloc was a LO16
bool prev_lo = false;
uint32_t prev_hi_immediate = 0;
uint32_t prev_hi_symbol = std::numeric_limits<uint32_t>::max();
for (size_t i = 0; i < section_out.relocs.size(); i++) {
// Get the current reloc
ELFIO::Elf64_Addr rel_offset;
ELFIO::Elf_Word rel_symbol;
unsigned int rel_type;
ELFIO::Elf_Sxword bad_rel_addend; // Addends aren't encoded in the reloc, so ignore this one
rel_accessor.get_entry(i, rel_offset, rel_symbol, rel_type, bad_rel_addend);
N64Recomp::Reloc& reloc_out = section_out.relocs[i];
// Get the real full_immediate by extracting the immediate from the instruction
uint32_t reloc_rom_addr = section_out.rom_addr + rel_offset - section_out.ram_addr;
uint32_t reloc_rom_word = byteswap(*reinterpret_cast<const uint32_t*>(context.rom.data() + reloc_rom_addr));
//context.rom section_out.rom_addr;
reloc_out.address = rel_offset;
reloc_out.symbol_index = rel_symbol;
reloc_out.type = static_cast<N64Recomp::RelocType>(rel_type);
std::string rel_symbol_name;
ELFIO::Elf64_Addr rel_symbol_value;
ELFIO::Elf_Xword rel_symbol_size;
unsigned char rel_symbol_bind;
unsigned char rel_symbol_type;
ELFIO::Elf_Half rel_symbol_section_index;
unsigned char rel_symbol_other;
bool found_rel_symbol = symbol_accessor.get_symbol(
rel_symbol, rel_symbol_name, rel_symbol_value, rel_symbol_size, rel_symbol_bind, rel_symbol_type, rel_symbol_section_index, rel_symbol_other);
uint32_t rel_section_vram = section_out.ram_addr;
uint32_t rel_symbol_offset = 0;
// Check if the symbol is undefined and to know whether to look for it in the reference symbols.
if (rel_symbol_section_index == ELFIO::SHN_UNDEF) {
// Undefined sym, check the reference symbols.
auto sym_find_it = context.reference_symbols_by_name.find(rel_symbol_name);
if (sym_find_it == context.reference_symbols_by_name.end()) {
fmt::print(stderr, "Undefined symbol: {}, not found in input or reference symbols!\n",
rel_symbol_name);
return nullptr;
}
reloc_out.reference_symbol = true;
// Replace the reloc's symbol index with the index into the reference symbol array.
reloc_out.symbol_index = sym_find_it->second;
rel_section_vram = 0;
rel_symbol_offset = context.reference_symbols[reloc_out.symbol_index].section_offset;
reloc_out.target_section = context.reference_symbols[reloc_out.symbol_index].section_index;
bool target_section_relocatable = false;
if (reloc_out.target_section != N64Recomp::SectionAbsolute && context.reference_sections[reloc_out.target_section].relocatable) {
target_section_relocatable = true;
}
if (reloc_out.type == N64Recomp::RelocType::R_MIPS_32 && target_section_relocatable) {
fmt::print(stderr, "Cannot reference {} in a statically initialized variable as it's defined in a relocatable section!\n",
rel_symbol_name);
return nullptr;
}
}
else {
reloc_out.reference_symbol = false;
reloc_out.target_section = rel_symbol_section_index;
}
// Reloc pairing, see MIPS System V ABI documentation page 4-18 (https://refspecs.linuxfoundation.org/elf/mipsabi.pdf)
if (reloc_out.type == N64Recomp::RelocType::R_MIPS_LO16) {
uint32_t rel_immediate = reloc_rom_word & 0xFFFF;
uint32_t full_immediate = (prev_hi_immediate << 16) + (int16_t)rel_immediate;
reloc_out.target_section_offset = full_immediate + rel_symbol_offset - rel_section_vram;
if (prev_hi) {
if (prev_hi_symbol != rel_symbol) {
fmt::print(stderr, "Paired HI16 and LO16 relocations have different symbols\n"
" LO16 reloc index {} in section {} referencing symbol {} with offset 0x{:08X}\n",
i, section_out.name, reloc_out.symbol_index, reloc_out.address);
return nullptr;
}
// Set the previous HI16 relocs' relocated address.
section_out.relocs[i - 1].target_section_offset = reloc_out.target_section_offset;
}
else {
// Orphaned LO16 reloc warnings.
if (elf_config.unpaired_lo16_warnings) {
if (prev_lo) {
// Don't warn if multiple LO16 in a row reference the same symbol, as some linkers will use this behavior.
if (prev_hi_symbol != rel_symbol) {
fmt::print(stderr, "[WARN] LO16 reloc index {} in section {} referencing symbol {} with offset 0x{:08X} follows LO16 with different symbol\n",
i, section_out.name, reloc_out.symbol_index, reloc_out.address);
}
}
else {
fmt::print(stderr, "[WARN] Unpaired LO16 reloc index {} in section {} referencing symbol {} with offset 0x{:08X}\n",
i, section_out.name, reloc_out.symbol_index, reloc_out.address);
}
}
// Even though this is an orphaned LO16 reloc, the previous calculation for the addend still follows the MIPS System V ABI documentation:
// "R_MIPS_LO16 entries without an R_MIPS_HI16 entry immediately preceding are orphaned and the previously defined
// R_MIPS_HI16 is used for computing the addend."
// Therefore, nothing needs to be done to the section_offset member.
}
prev_lo = true;
} else {
if (prev_hi) {
// This is an invalid elf as the MIPS System V ABI documentation states:
// "Each relocation type of R_MIPS_HI16 must have an associated R_MIPS_LO16 entry
// immediately following it in the list of relocations."
fmt::print(stderr, "Unpaired HI16 reloc index {} in section {} referencing symbol {} with offset 0x{:08X}\n",
i - 1, section_out.name, section_out.relocs[i - 1].symbol_index, section_out.relocs[i - 1].address);
return nullptr;
}
prev_lo = false;
}
if (reloc_out.type == N64Recomp::RelocType::R_MIPS_HI16) {
uint32_t rel_immediate = reloc_rom_word & 0xFFFF;
prev_hi = true;
prev_hi_immediate = rel_immediate;
prev_hi_symbol = rel_symbol;
} else {
prev_hi = false;
}
if (reloc_out.type == N64Recomp::RelocType::R_MIPS_32) {
// The reloc addend is just the existing word before relocation, so the section offset can just be the symbol's section offset.
// Incorporating the addend will be handled at load-time.
reloc_out.target_section_offset = rel_symbol_offset;
// TODO set section_out.has_mips32_relocs to true if this section should emit its mips32 relocs (mainly for TLB mapping).
if (reloc_out.reference_symbol) {
uint32_t reloc_target_section_addr = 0;
if (reloc_out.target_section != N64Recomp::SectionAbsolute) {
reloc_target_section_addr = context.reference_sections[reloc_out.target_section].ram_addr;
}
// Patch the word in the ROM to incorporate the symbol's value.
uint32_t updated_reloc_word = reloc_rom_word + reloc_target_section_addr + reloc_out.target_section_offset;
*reinterpret_cast<uint32_t*>(context.rom.data() + reloc_rom_addr) = byteswap(updated_reloc_word);
}
}
if (reloc_out.type == N64Recomp::RelocType::R_MIPS_26) {
uint32_t rel_immediate = (reloc_rom_word & 0x3FFFFFF) << 2;
reloc_out.target_section_offset = rel_immediate + rel_symbol_offset;
}
}
}
// Sort this section's relocs by address, which allows for binary searching and more efficient iteration during recompilation.
// This is safe to do as the entire full_immediate in present in relocs due to the pairing that was done earlier, so the HI16 does not
// need to directly preceed the matching LO16 anymore.
std::sort(section_out.relocs.begin(), section_out.relocs.end(),
[](const N64Recomp::Reloc& a, const N64Recomp::Reloc& b) {
return a.address < b.address;
}
);
}
}
return symtab_section;
}
static void setup_context_for_elf(N64Recomp::Context& context, const ELFIO::elfio& elf_file) {
context.sections.resize(elf_file.sections.size());
context.section_functions.resize(elf_file.sections.size());
context.functions.reserve(1024);
context.functions_by_vram.reserve(context.functions.capacity());
context.functions_by_name.reserve(context.functions.capacity());
context.rom.reserve(8 * 1024 * 1024);
}
bool N64Recomp::Context::from_elf_file(const std::filesystem::path& elf_file_path, Context& out, const ElfParsingConfig& elf_config, bool for_dumping_context, DataSymbolMap& data_syms_out, bool& found_entrypoint_out) {
ELFIO::elfio elf_file;
if (!elf_file.load(elf_file_path.string())) {
fmt::print("Elf file not found\n");
return false;
}
if (elf_file.get_class() != ELFIO::ELFCLASS32) {
fmt::print("Incorrect elf class\n");
return false;
}
if (elf_file.get_encoding() != ELFIO::ELFDATA2MSB) {
fmt::print("Incorrect endianness\n");
return false;
}
setup_context_for_elf(out, elf_file);
// Read all of the sections in the elf and look for the symbol table section
ELFIO::section* symtab_section = read_sections(out, elf_config, elf_file);
// If no symbol table was found then exit
if (symtab_section == nullptr) {
fmt::print("No symbol table section found\n");
return false;
}
// Read all of the symbols in the elf and look for the entrypoint function
found_entrypoint_out = read_symbols(out, elf_file, symtab_section, elf_config, for_dumping_context, data_syms_out);
return true;
}

File diff suppressed because it is too large Load diff

660
src/symbol_lists.cpp Normal file
View file

@ -0,0 +1,660 @@
#include "n64recomp.h"
const std::unordered_set<std::string> N64Recomp::reimplemented_funcs{
// OS initialize functions
"__osInitialize_common",
"osInitialize",
"osGetMemSize",
// Audio interface functions
"osAiGetLength",
"osAiGetStatus",
"osAiSetFrequency",
"osAiSetNextBuffer",
// Video interface functions
"osViSetXScale",
"osViSetYScale",
"osCreateViManager",
"osViBlack",
"osViSetSpecialFeatures",
"osViGetCurrentFramebuffer",
"osViGetNextFramebuffer",
"osViSwapBuffer",
"osViSetMode",
"osViSetEvent",
// RDP functions
"osDpSetNextBuffer",
// RSP functions
"osSpTaskLoad",
"osSpTaskStartGo",
"osSpTaskYield",
"osSpTaskYielded",
"__osSpSetPc",
// Controller functions
"osContInit",
"osContStartReadData",
"osContGetReadData",
"osContStartQuery",
"osContGetQuery",
"osContSetCh",
// EEPROM functions
"osEepromProbe",
"osEepromWrite",
"osEepromLongWrite",
"osEepromRead",
"osEepromLongRead",
// Rumble functions
"__osMotorAccess",
"osMotorInit",
"osMotorStart",
"osMotorStop",
// PFS functions
"osPfsInitPak",
"osPfsFreeBlocks",
"osPfsAllocateFile",
"osPfsDeleteFile",
"osPfsFileState",
"osPfsFindFile",
"osPfsReadWriteFile",
// Parallel interface (cartridge, DMA, etc.) functions
"osCartRomInit",
"osCreatePiManager",
"osPiStartDma",
"osEPiStartDma",
"osPiGetStatus",
"osEPiRawStartDma",
"osEPiReadIo",
// Flash saving functions
"osFlashInit",
"osFlashReadStatus",
"osFlashReadId",
"osFlashClearStatus",
"osFlashAllErase",
"osFlashAllEraseThrough",
"osFlashSectorErase",
"osFlashSectorEraseThrough",
"osFlashCheckEraseEnd",
"osFlashWriteBuffer",
"osFlashWriteArray",
"osFlashReadArray",
"osFlashChange",
// Threading functions
"osCreateThread",
"osStartThread",
"osStopThread",
"osDestroyThread",
"osSetThreadPri",
"osGetThreadPri",
"osGetThreadId",
// Message Queue functions
"osCreateMesgQueue",
"osRecvMesg",
"osSendMesg",
"osJamMesg",
"osSetEventMesg",
// Timer functions
"osGetTime",
"osSetTimer",
"osStopTimer",
// Voice functions
"osVoiceSetWord",
"osVoiceCheckWord",
"osVoiceStopReadData",
"osVoiceInit",
"osVoiceMaskDictionary",
"osVoiceStartReadData",
"osVoiceControlGain",
"osVoiceGetReadData",
"osVoiceClearDictionary",
// interrupt functions
"osSetIntMask",
"__osDisableInt",
"__osRestoreInt",
// TLB functions
"osVirtualToPhysical",
// Coprocessor 0/1 functions
"osGetCount",
"__osSetFpcCsr",
// Cache funcs
"osInvalDCache",
"osInvalICache",
"osWritebackDCache",
"osWritebackDCacheAll",
// Debug functions
"is_proutSyncPrintf",
"__checkHardware_msp",
"__checkHardware_kmc",
"__checkHardware_isv",
"__osInitialize_msp",
"__osInitialize_kmc",
"__osInitialize_isv",
"__osRdbSend",
// ido math routines
"__ull_div",
"__ll_div",
"__ll_mul",
"__ull_rem",
"__ull_to_d",
"__ull_to_f",
};
const std::unordered_set<std::string> N64Recomp::ignored_funcs {
// OS initialize functions
"__createSpeedParam",
"__osInitialize_common",
"osInitialize",
"osGetMemSize",
// Audio interface functions
"osAiGetLength",
"osAiGetStatus",
"osAiSetFrequency",
"osAiSetNextBuffer",
"__osAiDeviceBusy",
// Video interface functions
"osViBlack",
"osViFade",
"osViGetCurrentField",
"osViGetCurrentFramebuffer",
"osViGetCurrentLine",
"osViGetCurrentMode",
"osViGetNextFramebuffer",
"osViGetStatus",
"osViRepeatLine",
"osViSetEvent",
"osViSetMode",
"osViSetSpecialFeatures",
"osViSetXScale",
"osViSetYScale",
"osViSwapBuffer",
"osCreateViManager",
"viMgrMain",
"__osViInit",
"__osViSwapContext",
"__osViGetCurrentContext",
// RDP functions
"osDpGetCounters",
"osDpSetStatus",
"osDpGetStatus",
"osDpSetNextBuffer",
"__osDpDeviceBusy",
// RSP functions
"osSpTaskLoad",
"osSpTaskStartGo",
"osSpTaskYield",
"osSpTaskYielded",
"__osSpDeviceBusy",
"__osSpGetStatus",
"__osSpRawStartDma",
"__osSpRawReadIo",
"__osSpRawWriteIo",
"__osSpSetPc",
"__osSpSetStatus",
// Controller functions
"osContGetQuery",
"osContGetReadData",
"osContInit",
"osContReset",
"osContSetCh",
"osContStartQuery",
"osContStartReadData",
"__osContAddressCrc",
"__osContDataCrc",
"__osContGetInitData",
"__osContRamRead",
"__osContRamWrite",
"__osContChannelReset",
// EEPROM functions
"osEepromLongRead",
"osEepromLongWrite",
"osEepromProbe",
"osEepromRead",
"osEepromWrite",
"__osEepStatus",
// Rumble functions
"osMotorInit",
"osMotorStart",
"osMotorStop",
"__osMotorAccess",
"_MakeMotorData",
// Pack functions
"__osCheckId",
"__osCheckPackId",
"__osGetId",
"__osPfsRWInode",
"__osRepairPackId",
"__osPfsSelectBank",
"__osCheckPackId",
"ramromMain",
// PFS functions
"osPfsAllocateFile",
"osPfsChecker",
"osPfsDeleteFile",
"osPfsFileState",
"osPfsFindFile",
"osPfsFreeBlocks",
"osPfsGetLabel",
"osPfsInit",
"osPfsInitPak",
"osPfsIsPlug",
"osPfsNumFiles",
"osPfsRepairId",
"osPfsReadWriteFile",
"__osPackEepReadData",
"__osPackEepWriteData",
"__osPackRamReadData",
"__osPackRamWriteData",
"__osPackReadData",
"__osPackRequestData",
"__osPfsGetInitData",
"__osPfsGetOneChannelData",
"__osPfsGetStatus",
"__osPfsRequestData",
"__osPfsRequestOneChannel",
"__osPfsCreateAccessQueue",
"__osPfsCheckRamArea",
"__osPfsGetNextPage",
// Low level serial interface functions
"__osSiDeviceBusy",
"__osSiGetStatus",
"__osSiRawStartDma",
"__osSiRawReadIo",
"__osSiRawWriteIo",
"__osSiCreateAccessQueue",
"__osSiGetAccess",
"__osSiRelAccess",
// Parallel interface (cartridge, DMA, etc.) functions
"osCartRomInit",
"osLeoDiskInit",
"osCreatePiManager",
"__osDevMgrMain",
"osPiGetCmdQueue",
"osPiGetStatus",
"osPiReadIo",
"osPiStartDma",
"osPiWriteIo",
"osEPiGetDeviceType",
"osEPiStartDma",
"osEPiWriteIo",
"osEPiReadIo",
"osPiRawStartDma",
"osPiRawReadIo",
"osPiRawWriteIo",
"osEPiRawStartDma",
"osEPiRawReadIo",
"osEPiRawWriteIo",
"__osPiRawStartDma",
"__osPiRawReadIo",
"__osPiRawWriteIo",
"__osEPiRawStartDma",
"__osEPiRawReadIo",
"__osEPiRawWriteIo",
"__osPiDeviceBusy",
"__osPiCreateAccessQueue",
"__osPiGetAccess",
"__osPiRelAccess",
"__osLeoAbnormalResume",
"__osLeoInterrupt",
"__osLeoResume",
// Flash saving functions
"osFlashInit",
"osFlashReadStatus",
"osFlashReadId",
"osFlashClearStatus",
"osFlashAllErase",
"osFlashAllEraseThrough",
"osFlashSectorErase",
"osFlashSectorEraseThrough",
"osFlashCheckEraseEnd",
"osFlashWriteBuffer",
"osFlashWriteArray",
"osFlashReadArray",
"osFlashChange",
// Threading functions
"osCreateThread",
"osStartThread",
"osStopThread",
"osDestroyThread",
"osYieldThread",
"osSetThreadPri",
"osGetThreadPri",
"osGetThreadId",
"__osDequeueThread",
// Message Queue functions
"osCreateMesgQueue",
"osSendMesg",
"osJamMesg",
"osRecvMesg",
"osSetEventMesg",
// Timer functions
"osStartTimer",
"osSetTimer",
"osStopTimer",
"osGetTime",
"__osInsertTimer",
"__osTimerInterrupt",
"__osTimerServicesInit",
"__osSetTimerIntr",
// Voice functions
"osVoiceSetWord",
"osVoiceCheckWord",
"osVoiceStopReadData",
"osVoiceInit",
"osVoiceMaskDictionary",
"osVoiceStartReadData",
"osVoiceControlGain",
"osVoiceGetReadData",
"osVoiceClearDictionary",
"__osVoiceCheckResult",
"__osVoiceContRead36",
"__osVoiceContWrite20",
"__osVoiceContWrite4",
"__osVoiceContRead2",
"__osVoiceSetADConverter",
"__osVoiceContDataCrc",
"__osVoiceGetStatus",
"corrupted",
"corrupted_init",
// exceptasm functions
"__osExceptionPreamble",
"__osException",
"__ptExceptionPreamble",
"__ptException",
"send_mesg",
"handle_CpU",
"__osEnqueueAndYield",
"__osEnqueueThread",
"__osPopThread",
"__osNop",
"__osDispatchThread",
"__osCleanupThread",
"osGetCurrFaultedThread",
"osGetNextFaultedThread",
// interrupt functions
"osSetIntMask",
"osGetIntMask",
"__osDisableInt",
"__osRestoreInt",
"__osSetGlobalIntMask",
"__osResetGlobalIntMask",
// TLB functions
"osMapTLB",
"osUnmapTLB",
"osUnmapTLBAll",
"osSetTLBASID",
"osMapTLBRdb",
"osVirtualToPhysical",
"__osGetTLBHi",
"__osGetTLBLo0",
"__osGetTLBLo1",
"__osGetTLBPageMask",
"__osGetTLBASID",
"__osProbeTLB",
// Coprocessor 0/1 functions
"__osSetCount",
"osGetCount",
"__osSetSR",
"__osGetSR",
"__osSetCause",
"__osGetCause",
"__osSetCompare",
"__osGetCompare",
"__osSetConfig",
"__osGetConfig",
"__osSetWatchLo",
"__osGetWatchLo",
"__osSetFpcCsr",
// Cache funcs
"osInvalDCache",
"osInvalICache",
"osWritebackDCache",
"osWritebackDCacheAll",
// Microcodes
"rspbootTextStart",
"gspF3DEX2_fifoTextStart",
"gspS2DEX2_fifoTextStart",
"gspL3DEX2_fifoTextStart",
// Debug functions
"msp_proutSyncPrintf",
"__osInitialize_msp",
"__checkHardware_msp",
"kmc_proutSyncPrintf",
"__osInitialize_kmc",
"__checkHardware_kmc",
"isPrintfInit",
"is_proutSyncPrintf",
"__osInitialize_isv",
"__checkHardware_isv",
"__isExpJP",
"__isExp",
"__osRdbSend",
"__rmonSendData",
"__rmonWriteMem",
"__rmonReadWordAt",
"__rmonWriteWordTo",
"__rmonWriteMem",
"__rmonSetSRegs",
"__rmonSetVRegs",
"__rmonStopThread",
"__rmonGetThreadStatus",
"__rmonGetVRegs",
"__rmonHitSpBreak",
"__rmonRunThread",
"__rmonClearBreak",
"__rmonGetBranchTarget",
"__rmonGetSRegs",
"__rmonSetBreak",
"__rmonReadMem",
"__rmonRunThread",
"__rmonCopyWords",
"__rmonExecute",
"__rmonGetExceptionStatus",
"__rmonGetExeName",
"__rmonGetFRegisters",
"__rmonGetGRegisters",
"__rmonGetRegionCount",
"__rmonGetRegions",
"__rmonGetRegisterContents",
"__rmonGetTCB",
"__rmonHitBreak",
"__rmonHitCpuFault",
"__rmonIdleRCP",
"__rmonInit",
"__rmonIOflush",
"__rmonIOhandler",
"__rmonIOputw",
"__rmonListBreak",
"__rmonListProcesses",
"__rmonListThreads",
"__rmonLoadProgram",
"__rmonMaskIdleThreadInts",
"__rmonMemcpy",
"__rmonPanic",
"__rmonRCPrunning",
"__rmonRunRCP",
"__rmonSendFault",
"__rmonSendHeader",
"__rmonSendReply",
"__rmonSetComm",
"__rmonSetFault",
"__rmonSetFRegisters",
"__rmonSetGRegisters",
"__rmonSetSingleStep",
"__rmonStepRCP",
"__rmonStopUserThreads",
"__rmonThreadStatus",
"__rmon",
"__rmonRunThread",
"rmonFindFaultedThreads",
"rmonMain",
"rmonPrintf",
"rmonGetRcpRegister",
"kdebugserver",
"send",
// ido math routines
"__ll_div",
"__ll_lshift",
"__ll_mod",
"__ll_mul",
"__ll_rem",
"__ll_rshift",
"__ull_div",
"__ull_divremi",
"__ull_rem",
"__ull_rshift",
"__d_to_ll",
"__f_to_ll",
"__d_to_ull",
"__f_to_ull",
"__ll_to_d",
"__ll_to_f",
"__ull_to_d",
"__ull_to_f",
// Setjmp/longjmp for mario party
"setjmp",
"longjmp"
// 64-bit functions for banjo
"func_8025C29C",
"func_8025C240",
"func_8025C288",
// rmonregs
"LoadStoreSU",
"LoadStoreVU",
"SetUpForRCPop",
"CleanupFromRCPop",
"__rmonGetGRegisters",
"__rmonSetGRegisters",
"__rmonGetFRegisters",
"__rmonSetFRegisters",
"rmonGetRcpRegister",
"__rmonGetSRegs",
"__rmonSetSRegs",
"__rmonGetVRegs",
"__rmonSetVRegs",
"__rmonGetRegisterContents",
// rmonbrk
"SetTempBreakpoint",
"ClearTempBreakpoint",
"__rmonSetBreak",
"__rmonListBreak",
"__rmonClearBreak",
"__rmonGetBranchTarget",
"IsJump",
"__rmonSetSingleStep",
"__rmonGetExceptionStatus",
"rmonSendBreakMessage",
"__rmonHitBreak",
"__rmonHitSpBreak",
"__rmonHitCpuFault",
"rmonFindFaultedThreads",
// kdebugserver
"string_to_u32",
"send_packet",
"clear_IP6",
"send",
"kdebugserver",
};
const std::unordered_set<std::string> N64Recomp::renamed_funcs{
// Math
"sincosf",
"sinf",
"cosf",
"__sinf",
"__cosf",
"asinf",
"acosf",
"atanf",
"atan2f",
"tanf",
"sqrt",
"sqrtf",
// Memory
"memcpy",
"memset",
"memmove",
"memcmp",
"strcmp",
"strcat",
"strcpy",
"strchr",
"strlen",
"strtok",
"sprintf",
"bzero",
"bcopy",
"bcmp",
// long jumps
"setjmp",
"longjmp",
// Math 2
"ldiv",
"lldiv",
"ceil",
"ceilf",
"floor",
"floorf",
"fmodf",
"fmod",
"modf",
"lround",
"lroundf",
"nearbyint",
"nearbyintf",
"round",
"roundf",
"trunc",
"truncf",
// printf family
"vsprintf",
"gcvt",
"fcvt",
"ecvt",
"__assert",
// allocations
"malloc",
"free",
"realloc",
"calloc",
// rand
"rand",
"srand",
"random",
// gzip
"huft_build",
"huft_free",
"inflate_codes",
"inflate_stored",
"inflate_fixed",
"inflate_dynamic",
"inflate_block",
"inflate",
"expand_gzip",
"auRomDataRead"
"data_write",
"unzip",
"updcrc",
"clear_bufs",
"fill_inbuf",
"flush_window",
// libgcc math routines
"__muldi3",
"__divdi3",
"__udivdi3",
"__umoddi3",
"div64_64",
"div64_32",
"__moddi3",
"_matherr",
};