Refactor elf parsing into static Context method for reusability

This commit is contained in:
Mr-Wiseguy 2024-07-16 21:20:32 -04:00
parent ff7aae11df
commit 04daa21908
2 changed files with 96 additions and 72 deletions

View file

@ -86,8 +86,28 @@ namespace N64Recomp {
bool is_function; bool is_function;
}; };
struct ElfParsingConfig {
std::string bss_section_suffix;
// Functions with manual size overrides
std::unordered_map<std::string, size_t> manually_sized_funcs;
// The section names that were specified as relocatable
std::unordered_set<std::string> relocatable_sections;
bool has_entrypoint;
int32_t entrypoint_address;
bool use_absolute_symbols;
bool unpaired_lo16_warnings;
};
struct DataSymbol {
uint32_t vram;
std::string name;
DataSymbol(uint32_t vram, std::string&& name) : vram(vram), name(std::move(name)) {}
};
using DataSymbolMap = std::unordered_map<uint16_t, std::vector<DataSymbol>>;
struct Context { struct Context {
// ROM address of each section
std::vector<Section> sections; std::vector<Section> sections;
std::vector<Function> functions; std::vector<Function> functions;
std::unordered_map<uint32_t, std::vector<size_t>> functions_by_vram; std::unordered_map<uint32_t, std::vector<size_t>> functions_by_vram;
@ -100,10 +120,6 @@ namespace N64Recomp {
std::unordered_map<std::string, size_t> functions_by_name; std::unordered_map<std::string, size_t> functions_by_name;
// A list of the list of each function (by index in `functions`) in a given section // A list of the list of each function (by index in `functions`) in a given section
std::vector<std::vector<size_t>> section_functions; std::vector<std::vector<size_t>> section_functions;
// The section names that were specified as relocatable (only used for elf files)
std::unordered_set<std::string> relocatable_sections; //
// Functions with manual size overrides (only used for elf files)
std::unordered_map<std::string, size_t> manually_sized_funcs;
//// Reference symbols (used for populating relocations for patches) //// Reference symbols (used for populating relocations for patches)
// A list of the sections that contain the reference symbols. // A list of the sections that contain the reference symbols.
@ -121,6 +137,7 @@ namespace N64Recomp {
bool read_data_reference_syms(const std::filesystem::path& data_syms_file_path); bool read_data_reference_syms(const std::filesystem::path& data_syms_file_path);
static bool from_symbol_file(const std::filesystem::path& symbol_file_path, std::vector<uint8_t>&& rom, Context& out, bool with_relocs); static bool from_symbol_file(const std::filesystem::path& symbol_file_path, std::vector<uint8_t>&& rom, Context& out, bool with_relocs);
static bool from_elf_file(const std::filesystem::path& elf_file_path, Context& out, const ElfParsingConfig& flags, bool for_dumping_context, DataSymbolMap& data_syms_out, bool& found_entrypoint_out);
Context() = default; Context() = default;
}; };

View file

@ -673,14 +673,7 @@ std::unordered_set<std::string> renamed_funcs{
"_matherr", "_matherr",
}; };
struct DataSymbol { bool read_symbols(N64Recomp::Context& context, const ELFIO::elfio& elf_file, ELFIO::section* symtab_section, const N64Recomp::ElfParsingConfig& elf_config, bool dumping_context, std::unordered_map<uint16_t, std::vector<N64Recomp::DataSymbol>>& data_syms) {
uint32_t vram;
std::string name;
DataSymbol(uint32_t vram, std::string&& name) : vram(vram), name(std::move(name)) {}
};
bool read_symbols(N64Recomp::Context& context, const ELFIO::elfio& elf_file, ELFIO::section* symtab_section, uint32_t entrypoint, bool has_entrypoint, bool use_absolute_symbols, bool dumping_context, std::unordered_map<uint16_t, std::vector<DataSymbol>>& data_syms) {
bool found_entrypoint_func = false; bool found_entrypoint_func = false;
ELFIO::symbol_section_accessor symbols{ elf_file, symtab_section }; ELFIO::symbol_section_accessor symbols{ elf_file, symtab_section };
fmt::print("Num symbols: {}\n", symbols.get_symbols_num()); fmt::print("Num symbols: {}\n", symbols.get_symbols_num());
@ -716,7 +709,7 @@ bool read_symbols(N64Recomp::Context& context, const ELFIO::elfio& elf_file, ELF
symbols.get_symbol(sym_index, name, value, size, bind, type, symbols.get_symbol(sym_index, name, value, size, bind, type,
section_index, other); section_index, other);
if (section_index == ELFIO::SHN_ABS && use_absolute_symbols) { if (section_index == ELFIO::SHN_ABS && elf_config.use_absolute_symbols) {
uint32_t vram = static_cast<uint32_t>(value); uint32_t vram = static_cast<uint32_t>(value);
context.functions_by_vram[vram].push_back(context.functions.size()); context.functions_by_vram[vram].push_back(context.functions.size());
@ -735,7 +728,7 @@ bool read_symbols(N64Recomp::Context& context, const ELFIO::elfio& elf_file, ELF
if (section_index < context.sections.size()) { if (section_index < context.sections.size()) {
// Check if this symbol is the entrypoint // Check if this symbol is the entrypoint
if (has_entrypoint && value == entrypoint && type == ELFIO::STT_FUNC) { if (elf_config.has_entrypoint && value == elf_config.entrypoint_address && type == ELFIO::STT_FUNC) {
if (found_entrypoint_func) { if (found_entrypoint_func) {
fmt::print(stderr, "Ambiguous entrypoint: {}\n", name); fmt::print(stderr, "Ambiguous entrypoint: {}\n", name);
return false; return false;
@ -747,8 +740,8 @@ bool read_symbols(N64Recomp::Context& context, const ELFIO::elfio& elf_file, ELF
} }
// Check if this symbol has a size override // Check if this symbol has a size override
auto size_find = context.manually_sized_funcs.find(name); auto size_find = elf_config.manually_sized_funcs.find(name);
if (size_find != context.manually_sized_funcs.end()) { if (size_find != elf_config.manually_sized_funcs.end()) {
size = size_find->second; size = size_find->second;
type = ELFIO::STT_FUNC; type = ELFIO::STT_FUNC;
} }
@ -787,8 +780,8 @@ bool read_symbols(N64Recomp::Context& context, const ELFIO::elfio& elf_file, ELF
context.functions_by_vram[vram].push_back(context.functions.size()); context.functions_by_vram[vram].push_back(context.functions.size());
// Find the entrypoint by rom address in case it doesn't have vram as its value // Find the entrypoint by rom address in case it doesn't have vram as its value
if (has_entrypoint && rom_address == 0x1000 && type == ELFIO::STT_FUNC) { if (elf_config.has_entrypoint && rom_address == 0x1000 && type == ELFIO::STT_FUNC) {
vram = entrypoint; vram = elf_config.entrypoint_address;
found_entrypoint_func = true; found_entrypoint_func = true;
name = "recomp_entrypoint"; name = "recomp_entrypoint";
if (size == 0) { if (size == 0) {
@ -853,7 +846,6 @@ bool read_symbols(N64Recomp::Context& context, const ELFIO::elfio& elf_file, ELF
// Move this symbol into the corresponding non-bss section if it's in a bss section. // Move this symbol into the corresponding non-bss section if it's in a bss section.
auto find_bss_it = bss_section_to_target_section.find(target_section_index); auto find_bss_it = bss_section_to_target_section.find(target_section_index);
if (find_bss_it != bss_section_to_target_section.end()) { if (find_bss_it != bss_section_to_target_section.end()) {
fmt::print("mapping {} to {}\n", context.sections[section_index].name, context.sections[find_bss_it->second].name);
target_section_index = find_bss_it->second; target_section_index = find_bss_it->second;
} }
@ -867,7 +859,7 @@ bool read_symbols(N64Recomp::Context& context, const ELFIO::elfio& elf_file, ELF
return found_entrypoint_func; return found_entrypoint_func;
} }
void add_manual_functions(N64Recomp::Context& context, const ELFIO::elfio& elf_file, const std::vector<N64Recomp::ManualFunction>& manual_funcs) { void add_manual_functions(N64Recomp::Context& context, const std::vector<N64Recomp::ManualFunction>& manual_funcs) {
auto exit_failure = [](const std::string& error_str) { auto exit_failure = [](const std::string& error_str) {
fmt::vprint(stderr, error_str, fmt::make_format_args()); fmt::vprint(stderr, error_str, fmt::make_format_args());
std::exit(EXIT_FAILURE); std::exit(EXIT_FAILURE);
@ -903,7 +895,7 @@ void add_manual_functions(N64Recomp::Context& context, const ELFIO::elfio& elf_f
std::vector<uint32_t> words; std::vector<uint32_t> words;
words.resize(cur_func_def.size / 4); words.resize(cur_func_def.size / 4);
const uint32_t* elf_words = reinterpret_cast<const uint32_t*>(elf_file.sections[section_index]->get_data() + section_offset); const uint32_t* elf_words = reinterpret_cast<const uint32_t*>(context.rom.data() + context.sections[section_index].rom_addr + section_offset);
words.assign(elf_words, elf_words + words.size()); words.assign(elf_words, elf_words + words.size());
@ -946,7 +938,7 @@ std::optional<size_t> get_segment(const std::vector<SegmentEntry>& segments, ELF
return std::nullopt; return std::nullopt;
} }
ELFIO::section* read_sections(N64Recomp::Context& context, const N64Recomp::Config& config, const ELFIO::elfio& elf_file) { ELFIO::section* read_sections(N64Recomp::Context& context, const N64Recomp::ElfParsingConfig& elf_config, const ELFIO::elfio& elf_file) {
ELFIO::section* symtab_section = nullptr; ELFIO::section* symtab_section = nullptr;
std::vector<SegmentEntry> segments{}; std::vector<SegmentEntry> segments{};
segments.resize(elf_file.segments.size()); segments.resize(elf_file.segments.size());
@ -985,7 +977,7 @@ ELFIO::section* read_sections(N64Recomp::Context& context, const N64Recomp::Conf
symtab_section = section.get(); symtab_section = section.get();
} }
if (context.relocatable_sections.contains(section_name)) { if (elf_config.relocatable_sections.contains(section_name)) {
section_out.relocatable = true; section_out.relocatable = true;
} }
@ -1001,17 +993,17 @@ ELFIO::section* read_sections(N64Recomp::Context& context, const N64Recomp::Conf
// If this reloc section is for a section that has been marked as relocatable, record it in the reloc section lookup. // If this reloc section is for a section that has been marked as relocatable, record it in the reloc section lookup.
// Alternatively, if this recompilation uses reference symbols then record all reloc sections. // Alternatively, if this recompilation uses reference symbols then record all reloc sections.
if (!context.reference_sections.empty() || context.relocatable_sections.contains(reloc_target_section)) { if (!context.reference_sections.empty() || elf_config.relocatable_sections.contains(reloc_target_section)) {
reloc_sections_by_name[reloc_target_section] = section.get(); reloc_sections_by_name[reloc_target_section] = section.get();
} }
} }
// If the section is bss (SHT_NOBITS) and ends with the bss suffix, add it to the bss section map // If the section is bss (SHT_NOBITS) and ends with the bss suffix, add it to the bss section map
if (type == ELFIO::SHT_NOBITS && section_name.ends_with(config.bss_section_suffix)) { if (type == ELFIO::SHT_NOBITS && section_name.ends_with(elf_config.bss_section_suffix)) {
std::string bss_target_section = section_name.substr(0, section_name.size() - config.bss_section_suffix.size()); std::string bss_target_section = section_name.substr(0, section_name.size() - elf_config.bss_section_suffix.size());
// If this bss section is for a section that has been marked as relocatable, record it in the reloc section lookup // If this bss section is for a section that has been marked as relocatable, record it in the reloc section lookup
if (context.relocatable_sections.contains(bss_target_section)) { if (elf_config.relocatable_sections.contains(bss_target_section)) {
bss_sections_by_name[bss_target_section] = section.get(); bss_sections_by_name[bss_target_section] = section.get();
} }
} }
@ -1184,7 +1176,7 @@ ELFIO::section* read_sections(N64Recomp::Context& context, const N64Recomp::Conf
} }
else { else {
// Orphaned LO16 reloc warnings. // Orphaned LO16 reloc warnings.
if (config.unpaired_lo16_warnings) { if (elf_config.unpaired_lo16_warnings) {
if (prev_lo) { if (prev_lo) {
// Don't warn if multiple LO16 in a row reference the same symbol, as some linkers will use this behavior. // Don't warn if multiple LO16 in a row reference the same symbol, as some linkers will use this behavior.
if (prev_hi_symbol != rel_symbol) { if (prev_hi_symbol != rel_symbol) {
@ -1350,15 +1342,15 @@ std::vector<std::string> reloc_names {
"R_MIPS_GPREL16", "R_MIPS_GPREL16",
}; };
void dump_context(const N64Recomp::Context& context, const std::unordered_map<uint16_t, std::vector<DataSymbol>>& data_syms, const std::filesystem::path& func_path, const std::filesystem::path& data_path) { void dump_context(const N64Recomp::Context& context, const std::unordered_map<uint16_t, std::vector<N64Recomp::DataSymbol>>& data_syms, const std::filesystem::path& func_path, const std::filesystem::path& data_path) {
std::ofstream func_context_file {func_path}; std::ofstream func_context_file {func_path};
std::ofstream data_context_file {data_path}; std::ofstream data_context_file {data_path};
fmt::print(func_context_file, "# Autogenerated from an ELF via N64Recomp\n"); fmt::print(func_context_file, "# Autogenerated from an ELF via N64Recomp\n");
fmt::print(data_context_file, "# Autogenerated from an ELF via N64Recomp\n"); fmt::print(data_context_file, "# Autogenerated from an ELF via N64Recomp\n");
auto print_section = [](std::ofstream& output_file, const std::string& name, uint64_t rom_addr, uint64_t ram_addr, uint64_t size) { auto print_section = [](std::ofstream& output_file, const std::string& name, uint32_t rom_addr, uint32_t ram_addr, uint32_t size) {
if (rom_addr == (uint64_t)-1) { if (rom_addr == (uint32_t)-1) {
fmt::print(output_file, fmt::print(output_file,
"[[section]]\n" "[[section]]\n"
"name = \"{}\"\n" "name = \"{}\"\n"
@ -1416,15 +1408,12 @@ void dump_context(const N64Recomp::Context& context, const std::unordered_map<ui
const auto find_syms_it = data_syms.find((uint16_t)section_index); const auto find_syms_it = data_syms.find((uint16_t)section_index);
if (find_syms_it != data_syms.end() && !find_syms_it->second.empty()) { if (find_syms_it != data_syms.end() && !find_syms_it->second.empty()) {
if (section.name.ends_with(".bss")) {
fmt::print("asdasd {}\n", section.name);
}
print_section(data_context_file, section.name, section.rom_addr, section.ram_addr, section.size); print_section(data_context_file, section.name, section.rom_addr, section.ram_addr, section.size);
// Dump other symbols into the data context file. // Dump other symbols into the data context file.
fmt::print(data_context_file, "symbols = [\n"); fmt::print(data_context_file, "symbols = [\n");
for (const DataSymbol& cur_sym : find_syms_it->second) { for (const N64Recomp::DataSymbol& cur_sym : find_syms_it->second) {
fmt::print(data_context_file, " {{ name = \"{}\", vram = 0x{:08X} }},\n", cur_sym.name, cur_sym.vram); fmt::print(data_context_file, " {{ name = \"{}\", vram = 0x{:08X} }},\n", cur_sym.name, cur_sym.vram);
} }
@ -1432,13 +1421,13 @@ void dump_context(const N64Recomp::Context& context, const std::unordered_map<ui
} }
} }
const auto find_abs_syms_it = data_syms.find((uint16_t)-1); const auto find_abs_syms_it = data_syms.find(N64Recomp::SectionAbsolute);
if (find_abs_syms_it != data_syms.end() && !find_abs_syms_it->second.empty()) { if (find_abs_syms_it != data_syms.end() && !find_abs_syms_it->second.empty()) {
// Dump absolute symbols into the data context file. // Dump absolute symbols into the data context file.
print_section(data_context_file, "ABSOLUTE_SYMS", (uint64_t)-1, 0, 0); print_section(data_context_file, "ABSOLUTE_SYMS", (uint32_t)-1, 0, 0);
fmt::print(data_context_file, "symbols = [\n"); fmt::print(data_context_file, "symbols = [\n");
for (const DataSymbol& cur_sym : find_abs_syms_it->second) { for (const N64Recomp::DataSymbol& cur_sym : find_abs_syms_it->second) {
fmt::print(data_context_file, " {{ name = \"{}\", vram = 0x{:08X} }},\n", cur_sym.name, cur_sym.vram); fmt::print(data_context_file, " {{ name = \"{}\", vram = 0x{:08X} }},\n", cur_sym.name, cur_sym.vram);
} }
@ -1471,6 +1460,41 @@ static void setup_context_for_elf(N64Recomp::Context& context, const ELFIO::elfi
context.rom.reserve(8 * 1024 * 1024); context.rom.reserve(8 * 1024 * 1024);
} }
bool N64Recomp::Context::from_elf_file(const std::filesystem::path& elf_file_path, Context& out, const ElfParsingConfig& elf_config, bool for_dumping_context, DataSymbolMap& data_syms_out, bool& found_entrypoint_out) {
ELFIO::elfio elf_file;
if (!elf_file.load(elf_file_path.string())) {
fmt::print("Failed to load provided elf file\n");
return false;
}
if (elf_file.get_class() != ELFIO::ELFCLASS32) {
fmt::print("Incorrect elf class\n");
return false;
}
if (elf_file.get_encoding() != ELFIO::ELFDATA2MSB) {
fmt::print("Incorrect endianness\n");
return false;
}
setup_context_for_elf(out, elf_file);
// Read all of the sections in the elf and look for the symbol table section
ELFIO::section* symtab_section = read_sections(out, elf_config, elf_file);
// If no symbol table was found then exit
if (symtab_section == nullptr) {
fmt::print("No symbol table section found\n");
return false;
}
// Read all of the symbols in the elf and look for the entrypoint function
found_entrypoint_out = read_symbols(out, elf_file, symtab_section, elf_config, for_dumping_context, data_syms_out);
return true;
}
int main(int argc, char** argv) { int main(int argc, char** argv) {
auto exit_failure = [] (const std::string& error_str) { auto exit_failure = [] (const std::string& error_str) {
fmt::vprint(stderr, error_str, fmt::make_format_args()); fmt::vprint(stderr, error_str, fmt::make_format_args());
@ -1478,7 +1502,7 @@ int main(int argc, char** argv) {
}; };
// TODO expose a way to dump the context from the command line. // TODO expose a way to dump the context from the command line.
bool dumping_context = false; bool dumping_context = true;// false;
if (argc != 2) { if (argc != 2) {
fmt::print("Usage: {} [config file]\n", argv[0]); fmt::print("Usage: {} [config file]\n", argv[0]);
@ -1517,22 +1541,8 @@ int main(int argc, char** argv) {
// Build a context from the provided elf file. // Build a context from the provided elf file.
if (!config.elf_path.empty()) { if (!config.elf_path.empty()) {
ELFIO::elfio elf_file; // Lists of data symbols organized by section, only used if dumping context.
std::unordered_map<uint16_t, std::vector<N64Recomp::DataSymbol>> data_syms;
if (!elf_file.load(config.elf_path.string())) {
exit_failure("Failed to load provided elf file\n");
}
if (elf_file.get_class() != ELFIO::ELFCLASS32) {
exit_failure("Incorrect elf class\n");
}
if (elf_file.get_encoding() != ELFIO::ELFDATA2MSB) {
exit_failure("Incorrect endianness\n");
}
setup_context_for_elf(context, elf_file);
context.relocatable_sections = std::move(relocatable_sections);
// Import symbols from any reference symbols files that were provided. // Import symbols from any reference symbols files that were provided.
if (!config.func_reference_syms_file_path.empty()) { if (!config.func_reference_syms_file_path.empty()) {
@ -1555,27 +1565,24 @@ int main(int argc, char** argv) {
} }
} }
// Read all of the sections in the elf and look for the symbol table section N64Recomp::ElfParsingConfig elf_config {
ELFIO::section* symtab_section = read_sections(context, config, elf_file); .bss_section_suffix = config.bss_section_suffix,
.relocatable_sections = std::move(relocatable_sections),
.has_entrypoint = config.has_entrypoint,
.entrypoint_address = config.entrypoint,
.use_absolute_symbols = config.use_absolute_symbols,
.unpaired_lo16_warnings = config.unpaired_lo16_warnings
};
// If no symbol table was found then exit
if (symtab_section == nullptr) {
exit_failure("No symbol table section found\n");
}
// Manually sized functions
for (const auto& func_size : config.manual_func_sizes) { for (const auto& func_size : config.manual_func_sizes) {
context.manually_sized_funcs.emplace(func_size.func_name, func_size.size_bytes); elf_config.manually_sized_funcs.emplace(func_size.func_name, func_size.size_bytes);
} }
// Lists of data symbols organized by section, only used if dumping context. bool found_entrypoint_func;
std::unordered_map<uint16_t, std::vector<DataSymbol>> data_syms; N64Recomp::Context::from_elf_file(config.elf_path, context, elf_config, dumping_context, data_syms, found_entrypoint_func);
// Read all of the symbols in the elf and look for the entrypoint function
bool found_entrypoint_func = read_symbols(context, elf_file, symtab_section, config.entrypoint, config.has_entrypoint, config.use_absolute_symbols, dumping_context, data_syms);
// Add any manual functions // Add any manual functions
add_manual_functions(context, elf_file, config.manual_functions); add_manual_functions(context, config.manual_functions);
if (config.has_entrypoint && !found_entrypoint_func) { if (config.has_entrypoint && !found_entrypoint_func) {
exit_failure("Could not find entrypoint function\n"); exit_failure("Could not find entrypoint function\n");
@ -1586,7 +1593,7 @@ int main(int argc, char** argv) {
// Sort the data syms by address so the output is nicer. // Sort the data syms by address so the output is nicer.
for (auto& [section_index, section_syms] : data_syms) { for (auto& [section_index, section_syms] : data_syms) {
std::sort(section_syms.begin(), section_syms.end(), std::sort(section_syms.begin(), section_syms.end(),
[](const DataSymbol& a, const DataSymbol& b) { [](const N64Recomp::DataSymbol& a, const N64Recomp::DataSymbol& b) {
return a.vram < b.vram; return a.vram < b.vram;
} }
); );