mirror of
https://github.com/N64Recomp/N64Recomp.git
synced 2026-04-28 04:51:43 +00:00
Refactor elf parsing into static Context method for reusability
This commit is contained in:
parent
ff7aae11df
commit
04daa21908
2 changed files with 96 additions and 72 deletions
|
|
@ -86,8 +86,28 @@ namespace N64Recomp {
|
|||
bool is_function;
|
||||
};
|
||||
|
||||
struct ElfParsingConfig {
|
||||
std::string bss_section_suffix;
|
||||
// Functions with manual size overrides
|
||||
std::unordered_map<std::string, size_t> manually_sized_funcs;
|
||||
// The section names that were specified as relocatable
|
||||
std::unordered_set<std::string> relocatable_sections;
|
||||
bool has_entrypoint;
|
||||
int32_t entrypoint_address;
|
||||
bool use_absolute_symbols;
|
||||
bool unpaired_lo16_warnings;
|
||||
};
|
||||
|
||||
struct DataSymbol {
|
||||
uint32_t vram;
|
||||
std::string name;
|
||||
|
||||
DataSymbol(uint32_t vram, std::string&& name) : vram(vram), name(std::move(name)) {}
|
||||
};
|
||||
|
||||
using DataSymbolMap = std::unordered_map<uint16_t, std::vector<DataSymbol>>;
|
||||
|
||||
struct Context {
|
||||
// ROM address of each section
|
||||
std::vector<Section> sections;
|
||||
std::vector<Function> functions;
|
||||
std::unordered_map<uint32_t, std::vector<size_t>> functions_by_vram;
|
||||
|
|
@ -100,10 +120,6 @@ namespace N64Recomp {
|
|||
std::unordered_map<std::string, size_t> functions_by_name;
|
||||
// A list of the list of each function (by index in `functions`) in a given section
|
||||
std::vector<std::vector<size_t>> section_functions;
|
||||
// The section names that were specified as relocatable (only used for elf files)
|
||||
std::unordered_set<std::string> relocatable_sections; //
|
||||
// Functions with manual size overrides (only used for elf files)
|
||||
std::unordered_map<std::string, size_t> manually_sized_funcs;
|
||||
|
||||
//// Reference symbols (used for populating relocations for patches)
|
||||
// A list of the sections that contain the reference symbols.
|
||||
|
|
@ -121,6 +137,7 @@ namespace N64Recomp {
|
|||
bool read_data_reference_syms(const std::filesystem::path& data_syms_file_path);
|
||||
|
||||
static bool from_symbol_file(const std::filesystem::path& symbol_file_path, std::vector<uint8_t>&& rom, Context& out, bool with_relocs);
|
||||
static bool from_elf_file(const std::filesystem::path& elf_file_path, Context& out, const ElfParsingConfig& flags, bool for_dumping_context, DataSymbolMap& data_syms_out, bool& found_entrypoint_out);
|
||||
|
||||
Context() = default;
|
||||
};
|
||||
|
|
|
|||
141
src/main.cpp
141
src/main.cpp
|
|
@ -673,14 +673,7 @@ std::unordered_set<std::string> renamed_funcs{
|
|||
"_matherr",
|
||||
};
|
||||
|
||||
struct DataSymbol {
|
||||
uint32_t vram;
|
||||
std::string name;
|
||||
|
||||
DataSymbol(uint32_t vram, std::string&& name) : vram(vram), name(std::move(name)) {}
|
||||
};
|
||||
|
||||
bool read_symbols(N64Recomp::Context& context, const ELFIO::elfio& elf_file, ELFIO::section* symtab_section, uint32_t entrypoint, bool has_entrypoint, bool use_absolute_symbols, bool dumping_context, std::unordered_map<uint16_t, std::vector<DataSymbol>>& data_syms) {
|
||||
bool read_symbols(N64Recomp::Context& context, const ELFIO::elfio& elf_file, ELFIO::section* symtab_section, const N64Recomp::ElfParsingConfig& elf_config, bool dumping_context, std::unordered_map<uint16_t, std::vector<N64Recomp::DataSymbol>>& data_syms) {
|
||||
bool found_entrypoint_func = false;
|
||||
ELFIO::symbol_section_accessor symbols{ elf_file, symtab_section };
|
||||
fmt::print("Num symbols: {}\n", symbols.get_symbols_num());
|
||||
|
|
@ -716,7 +709,7 @@ bool read_symbols(N64Recomp::Context& context, const ELFIO::elfio& elf_file, ELF
|
|||
symbols.get_symbol(sym_index, name, value, size, bind, type,
|
||||
section_index, other);
|
||||
|
||||
if (section_index == ELFIO::SHN_ABS && use_absolute_symbols) {
|
||||
if (section_index == ELFIO::SHN_ABS && elf_config.use_absolute_symbols) {
|
||||
uint32_t vram = static_cast<uint32_t>(value);
|
||||
context.functions_by_vram[vram].push_back(context.functions.size());
|
||||
|
||||
|
|
@ -735,7 +728,7 @@ bool read_symbols(N64Recomp::Context& context, const ELFIO::elfio& elf_file, ELF
|
|||
|
||||
if (section_index < context.sections.size()) {
|
||||
// Check if this symbol is the entrypoint
|
||||
if (has_entrypoint && value == entrypoint && type == ELFIO::STT_FUNC) {
|
||||
if (elf_config.has_entrypoint && value == elf_config.entrypoint_address && type == ELFIO::STT_FUNC) {
|
||||
if (found_entrypoint_func) {
|
||||
fmt::print(stderr, "Ambiguous entrypoint: {}\n", name);
|
||||
return false;
|
||||
|
|
@ -747,8 +740,8 @@ bool read_symbols(N64Recomp::Context& context, const ELFIO::elfio& elf_file, ELF
|
|||
}
|
||||
|
||||
// Check if this symbol has a size override
|
||||
auto size_find = context.manually_sized_funcs.find(name);
|
||||
if (size_find != context.manually_sized_funcs.end()) {
|
||||
auto size_find = elf_config.manually_sized_funcs.find(name);
|
||||
if (size_find != elf_config.manually_sized_funcs.end()) {
|
||||
size = size_find->second;
|
||||
type = ELFIO::STT_FUNC;
|
||||
}
|
||||
|
|
@ -787,8 +780,8 @@ bool read_symbols(N64Recomp::Context& context, const ELFIO::elfio& elf_file, ELF
|
|||
context.functions_by_vram[vram].push_back(context.functions.size());
|
||||
|
||||
// Find the entrypoint by rom address in case it doesn't have vram as its value
|
||||
if (has_entrypoint && rom_address == 0x1000 && type == ELFIO::STT_FUNC) {
|
||||
vram = entrypoint;
|
||||
if (elf_config.has_entrypoint && rom_address == 0x1000 && type == ELFIO::STT_FUNC) {
|
||||
vram = elf_config.entrypoint_address;
|
||||
found_entrypoint_func = true;
|
||||
name = "recomp_entrypoint";
|
||||
if (size == 0) {
|
||||
|
|
@ -853,7 +846,6 @@ bool read_symbols(N64Recomp::Context& context, const ELFIO::elfio& elf_file, ELF
|
|||
// Move this symbol into the corresponding non-bss section if it's in a bss section.
|
||||
auto find_bss_it = bss_section_to_target_section.find(target_section_index);
|
||||
if (find_bss_it != bss_section_to_target_section.end()) {
|
||||
fmt::print("mapping {} to {}\n", context.sections[section_index].name, context.sections[find_bss_it->second].name);
|
||||
target_section_index = find_bss_it->second;
|
||||
}
|
||||
|
||||
|
|
@ -867,7 +859,7 @@ bool read_symbols(N64Recomp::Context& context, const ELFIO::elfio& elf_file, ELF
|
|||
return found_entrypoint_func;
|
||||
}
|
||||
|
||||
void add_manual_functions(N64Recomp::Context& context, const ELFIO::elfio& elf_file, const std::vector<N64Recomp::ManualFunction>& manual_funcs) {
|
||||
void add_manual_functions(N64Recomp::Context& context, const std::vector<N64Recomp::ManualFunction>& manual_funcs) {
|
||||
auto exit_failure = [](const std::string& error_str) {
|
||||
fmt::vprint(stderr, error_str, fmt::make_format_args());
|
||||
std::exit(EXIT_FAILURE);
|
||||
|
|
@ -903,7 +895,7 @@ void add_manual_functions(N64Recomp::Context& context, const ELFIO::elfio& elf_f
|
|||
|
||||
std::vector<uint32_t> words;
|
||||
words.resize(cur_func_def.size / 4);
|
||||
const uint32_t* elf_words = reinterpret_cast<const uint32_t*>(elf_file.sections[section_index]->get_data() + section_offset);
|
||||
const uint32_t* elf_words = reinterpret_cast<const uint32_t*>(context.rom.data() + context.sections[section_index].rom_addr + section_offset);
|
||||
|
||||
words.assign(elf_words, elf_words + words.size());
|
||||
|
||||
|
|
@ -946,7 +938,7 @@ std::optional<size_t> get_segment(const std::vector<SegmentEntry>& segments, ELF
|
|||
return std::nullopt;
|
||||
}
|
||||
|
||||
ELFIO::section* read_sections(N64Recomp::Context& context, const N64Recomp::Config& config, const ELFIO::elfio& elf_file) {
|
||||
ELFIO::section* read_sections(N64Recomp::Context& context, const N64Recomp::ElfParsingConfig& elf_config, const ELFIO::elfio& elf_file) {
|
||||
ELFIO::section* symtab_section = nullptr;
|
||||
std::vector<SegmentEntry> segments{};
|
||||
segments.resize(elf_file.segments.size());
|
||||
|
|
@ -985,7 +977,7 @@ ELFIO::section* read_sections(N64Recomp::Context& context, const N64Recomp::Conf
|
|||
symtab_section = section.get();
|
||||
}
|
||||
|
||||
if (context.relocatable_sections.contains(section_name)) {
|
||||
if (elf_config.relocatable_sections.contains(section_name)) {
|
||||
section_out.relocatable = true;
|
||||
}
|
||||
|
||||
|
|
@ -1001,17 +993,17 @@ ELFIO::section* read_sections(N64Recomp::Context& context, const N64Recomp::Conf
|
|||
|
||||
// If this reloc section is for a section that has been marked as relocatable, record it in the reloc section lookup.
|
||||
// Alternatively, if this recompilation uses reference symbols then record all reloc sections.
|
||||
if (!context.reference_sections.empty() || context.relocatable_sections.contains(reloc_target_section)) {
|
||||
if (!context.reference_sections.empty() || elf_config.relocatable_sections.contains(reloc_target_section)) {
|
||||
reloc_sections_by_name[reloc_target_section] = section.get();
|
||||
}
|
||||
}
|
||||
|
||||
// If the section is bss (SHT_NOBITS) and ends with the bss suffix, add it to the bss section map
|
||||
if (type == ELFIO::SHT_NOBITS && section_name.ends_with(config.bss_section_suffix)) {
|
||||
std::string bss_target_section = section_name.substr(0, section_name.size() - config.bss_section_suffix.size());
|
||||
if (type == ELFIO::SHT_NOBITS && section_name.ends_with(elf_config.bss_section_suffix)) {
|
||||
std::string bss_target_section = section_name.substr(0, section_name.size() - elf_config.bss_section_suffix.size());
|
||||
|
||||
// If this bss section is for a section that has been marked as relocatable, record it in the reloc section lookup
|
||||
if (context.relocatable_sections.contains(bss_target_section)) {
|
||||
if (elf_config.relocatable_sections.contains(bss_target_section)) {
|
||||
bss_sections_by_name[bss_target_section] = section.get();
|
||||
}
|
||||
}
|
||||
|
|
@ -1184,7 +1176,7 @@ ELFIO::section* read_sections(N64Recomp::Context& context, const N64Recomp::Conf
|
|||
}
|
||||
else {
|
||||
// Orphaned LO16 reloc warnings.
|
||||
if (config.unpaired_lo16_warnings) {
|
||||
if (elf_config.unpaired_lo16_warnings) {
|
||||
if (prev_lo) {
|
||||
// Don't warn if multiple LO16 in a row reference the same symbol, as some linkers will use this behavior.
|
||||
if (prev_hi_symbol != rel_symbol) {
|
||||
|
|
@ -1350,15 +1342,15 @@ std::vector<std::string> reloc_names {
|
|||
"R_MIPS_GPREL16",
|
||||
};
|
||||
|
||||
void dump_context(const N64Recomp::Context& context, const std::unordered_map<uint16_t, std::vector<DataSymbol>>& data_syms, const std::filesystem::path& func_path, const std::filesystem::path& data_path) {
|
||||
void dump_context(const N64Recomp::Context& context, const std::unordered_map<uint16_t, std::vector<N64Recomp::DataSymbol>>& data_syms, const std::filesystem::path& func_path, const std::filesystem::path& data_path) {
|
||||
std::ofstream func_context_file {func_path};
|
||||
std::ofstream data_context_file {data_path};
|
||||
|
||||
fmt::print(func_context_file, "# Autogenerated from an ELF via N64Recomp\n");
|
||||
fmt::print(data_context_file, "# Autogenerated from an ELF via N64Recomp\n");
|
||||
|
||||
auto print_section = [](std::ofstream& output_file, const std::string& name, uint64_t rom_addr, uint64_t ram_addr, uint64_t size) {
|
||||
if (rom_addr == (uint64_t)-1) {
|
||||
auto print_section = [](std::ofstream& output_file, const std::string& name, uint32_t rom_addr, uint32_t ram_addr, uint32_t size) {
|
||||
if (rom_addr == (uint32_t)-1) {
|
||||
fmt::print(output_file,
|
||||
"[[section]]\n"
|
||||
"name = \"{}\"\n"
|
||||
|
|
@ -1416,15 +1408,12 @@ void dump_context(const N64Recomp::Context& context, const std::unordered_map<ui
|
|||
|
||||
const auto find_syms_it = data_syms.find((uint16_t)section_index);
|
||||
if (find_syms_it != data_syms.end() && !find_syms_it->second.empty()) {
|
||||
if (section.name.ends_with(".bss")) {
|
||||
fmt::print("asdasd {}\n", section.name);
|
||||
}
|
||||
print_section(data_context_file, section.name, section.rom_addr, section.ram_addr, section.size);
|
||||
|
||||
// Dump other symbols into the data context file.
|
||||
fmt::print(data_context_file, "symbols = [\n");
|
||||
|
||||
for (const DataSymbol& cur_sym : find_syms_it->second) {
|
||||
for (const N64Recomp::DataSymbol& cur_sym : find_syms_it->second) {
|
||||
fmt::print(data_context_file, " {{ name = \"{}\", vram = 0x{:08X} }},\n", cur_sym.name, cur_sym.vram);
|
||||
}
|
||||
|
||||
|
|
@ -1432,13 +1421,13 @@ void dump_context(const N64Recomp::Context& context, const std::unordered_map<ui
|
|||
}
|
||||
}
|
||||
|
||||
const auto find_abs_syms_it = data_syms.find((uint16_t)-1);
|
||||
const auto find_abs_syms_it = data_syms.find(N64Recomp::SectionAbsolute);
|
||||
if (find_abs_syms_it != data_syms.end() && !find_abs_syms_it->second.empty()) {
|
||||
// Dump absolute symbols into the data context file.
|
||||
print_section(data_context_file, "ABSOLUTE_SYMS", (uint64_t)-1, 0, 0);
|
||||
print_section(data_context_file, "ABSOLUTE_SYMS", (uint32_t)-1, 0, 0);
|
||||
fmt::print(data_context_file, "symbols = [\n");
|
||||
|
||||
for (const DataSymbol& cur_sym : find_abs_syms_it->second) {
|
||||
for (const N64Recomp::DataSymbol& cur_sym : find_abs_syms_it->second) {
|
||||
fmt::print(data_context_file, " {{ name = \"{}\", vram = 0x{:08X} }},\n", cur_sym.name, cur_sym.vram);
|
||||
}
|
||||
|
||||
|
|
@ -1471,6 +1460,41 @@ static void setup_context_for_elf(N64Recomp::Context& context, const ELFIO::elfi
|
|||
context.rom.reserve(8 * 1024 * 1024);
|
||||
}
|
||||
|
||||
bool N64Recomp::Context::from_elf_file(const std::filesystem::path& elf_file_path, Context& out, const ElfParsingConfig& elf_config, bool for_dumping_context, DataSymbolMap& data_syms_out, bool& found_entrypoint_out) {
|
||||
ELFIO::elfio elf_file;
|
||||
|
||||
if (!elf_file.load(elf_file_path.string())) {
|
||||
fmt::print("Failed to load provided elf file\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (elf_file.get_class() != ELFIO::ELFCLASS32) {
|
||||
fmt::print("Incorrect elf class\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (elf_file.get_encoding() != ELFIO::ELFDATA2MSB) {
|
||||
fmt::print("Incorrect endianness\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
setup_context_for_elf(out, elf_file);
|
||||
|
||||
// Read all of the sections in the elf and look for the symbol table section
|
||||
ELFIO::section* symtab_section = read_sections(out, elf_config, elf_file);
|
||||
|
||||
// If no symbol table was found then exit
|
||||
if (symtab_section == nullptr) {
|
||||
fmt::print("No symbol table section found\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
// Read all of the symbols in the elf and look for the entrypoint function
|
||||
found_entrypoint_out = read_symbols(out, elf_file, symtab_section, elf_config, for_dumping_context, data_syms_out);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
auto exit_failure = [] (const std::string& error_str) {
|
||||
fmt::vprint(stderr, error_str, fmt::make_format_args());
|
||||
|
|
@ -1478,7 +1502,7 @@ int main(int argc, char** argv) {
|
|||
};
|
||||
|
||||
// TODO expose a way to dump the context from the command line.
|
||||
bool dumping_context = false;
|
||||
bool dumping_context = true;// false;
|
||||
|
||||
if (argc != 2) {
|
||||
fmt::print("Usage: {} [config file]\n", argv[0]);
|
||||
|
|
@ -1517,22 +1541,8 @@ int main(int argc, char** argv) {
|
|||
|
||||
// Build a context from the provided elf file.
|
||||
if (!config.elf_path.empty()) {
|
||||
ELFIO::elfio elf_file;
|
||||
|
||||
if (!elf_file.load(config.elf_path.string())) {
|
||||
exit_failure("Failed to load provided elf file\n");
|
||||
}
|
||||
|
||||
if (elf_file.get_class() != ELFIO::ELFCLASS32) {
|
||||
exit_failure("Incorrect elf class\n");
|
||||
}
|
||||
|
||||
if (elf_file.get_encoding() != ELFIO::ELFDATA2MSB) {
|
||||
exit_failure("Incorrect endianness\n");
|
||||
}
|
||||
|
||||
setup_context_for_elf(context, elf_file);
|
||||
context.relocatable_sections = std::move(relocatable_sections);
|
||||
// Lists of data symbols organized by section, only used if dumping context.
|
||||
std::unordered_map<uint16_t, std::vector<N64Recomp::DataSymbol>> data_syms;
|
||||
|
||||
// Import symbols from any reference symbols files that were provided.
|
||||
if (!config.func_reference_syms_file_path.empty()) {
|
||||
|
|
@ -1555,27 +1565,24 @@ int main(int argc, char** argv) {
|
|||
}
|
||||
}
|
||||
|
||||
// Read all of the sections in the elf and look for the symbol table section
|
||||
ELFIO::section* symtab_section = read_sections(context, config, elf_file);
|
||||
N64Recomp::ElfParsingConfig elf_config {
|
||||
.bss_section_suffix = config.bss_section_suffix,
|
||||
.relocatable_sections = std::move(relocatable_sections),
|
||||
.has_entrypoint = config.has_entrypoint,
|
||||
.entrypoint_address = config.entrypoint,
|
||||
.use_absolute_symbols = config.use_absolute_symbols,
|
||||
.unpaired_lo16_warnings = config.unpaired_lo16_warnings
|
||||
};
|
||||
|
||||
// If no symbol table was found then exit
|
||||
if (symtab_section == nullptr) {
|
||||
exit_failure("No symbol table section found\n");
|
||||
}
|
||||
|
||||
// Manually sized functions
|
||||
for (const auto& func_size : config.manual_func_sizes) {
|
||||
context.manually_sized_funcs.emplace(func_size.func_name, func_size.size_bytes);
|
||||
elf_config.manually_sized_funcs.emplace(func_size.func_name, func_size.size_bytes);
|
||||
}
|
||||
|
||||
// Lists of data symbols organized by section, only used if dumping context.
|
||||
std::unordered_map<uint16_t, std::vector<DataSymbol>> data_syms;
|
||||
|
||||
// Read all of the symbols in the elf and look for the entrypoint function
|
||||
bool found_entrypoint_func = read_symbols(context, elf_file, symtab_section, config.entrypoint, config.has_entrypoint, config.use_absolute_symbols, dumping_context, data_syms);
|
||||
bool found_entrypoint_func;
|
||||
N64Recomp::Context::from_elf_file(config.elf_path, context, elf_config, dumping_context, data_syms, found_entrypoint_func);
|
||||
|
||||
// Add any manual functions
|
||||
add_manual_functions(context, elf_file, config.manual_functions);
|
||||
add_manual_functions(context, config.manual_functions);
|
||||
|
||||
if (config.has_entrypoint && !found_entrypoint_func) {
|
||||
exit_failure("Could not find entrypoint function\n");
|
||||
|
|
@ -1586,7 +1593,7 @@ int main(int argc, char** argv) {
|
|||
// Sort the data syms by address so the output is nicer.
|
||||
for (auto& [section_index, section_syms] : data_syms) {
|
||||
std::sort(section_syms.begin(), section_syms.end(),
|
||||
[](const DataSymbol& a, const DataSymbol& b) {
|
||||
[](const N64Recomp::DataSymbol& a, const N64Recomp::DataSymbol& b) {
|
||||
return a.vram < b.vram;
|
||||
}
|
||||
);
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue