From 4522b501a84b8364a260d945ef3b5512f16132b4 Mon Sep 17 00:00:00 2001 From: Mr-Wiseguy Date: Sun, 23 Jun 2024 16:22:59 -0400 Subject: [PATCH] Added data symbol context dumping --- src/main.cpp | 279 +++++++++++++++++++++++++++++++++------------------ 1 file changed, 184 insertions(+), 95 deletions(-) diff --git a/src/main.cpp b/src/main.cpp index bd01977..af3944f 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -672,11 +672,33 @@ std::unordered_set renamed_funcs{ "_matherr", }; -bool read_symbols(RecompPort::Context& context, const ELFIO::elfio& elf_file, ELFIO::section* symtab_section, uint32_t entrypoint, bool has_entrypoint, bool use_absolute_symbols, bool do_renaming) { +struct DataSymbol { + uint32_t vram; + std::string name; + + DataSymbol(uint32_t vram, std::string&& name) : vram(vram), name(std::move(name)) {} +}; + +bool read_symbols(RecompPort::Context& context, const ELFIO::elfio& elf_file, ELFIO::section* symtab_section, uint32_t entrypoint, bool has_entrypoint, bool use_absolute_symbols, bool dumping_context, std::unordered_map>& data_syms) { bool found_entrypoint_func = false; ELFIO::symbol_section_accessor symbols{ elf_file, symtab_section }; fmt::print("Num symbols: {}\n", symbols.get_symbols_num()); + std::unordered_map bss_section_to_target_section{}; + + // Create a mapping of bss section to the corresponding non-bss section. This is only used when dumping context in order + // for patches and mods to correctly relocate symbols in bss. This mapping only matters for relocatable sections. + if (dumping_context) { + // Process bss and reloc sections + for (size_t cur_section_index = 0; cur_section_index < context.sections.size(); cur_section_index++) { + const RecompPort::Section& cur_section = context.sections[cur_section_index]; + // Check if a bss section was found that corresponds with this section. + if (cur_section.bss_section_index != (uint16_t)-1) { + bss_section_to_target_section[cur_section.bss_section_index] = cur_section_index; + } + } + } + for (int sym_index = 0; sym_index < symbols.get_symbols_num(); sym_index++) { std::string name; ELFIO::Elf64_Addr value; @@ -687,6 +709,7 @@ bool read_symbols(RecompPort::Context& context, const ELFIO::elfio& elf_file, EL unsigned char other; bool ignored = false; bool reimplemented = false; + bool recorded_symbol = false; // Read symbol properties symbols.get_symbol(sym_index, name, value, size, bind, type, @@ -709,109 +732,136 @@ bool read_symbols(RecompPort::Context& context, const ELFIO::elfio& elf_file, EL continue; } - if (section_index >= context.sections.size()) { - continue; - } - - // Check if this symbol is the entrypoint - if (has_entrypoint && value == entrypoint && type == ELFIO::STT_FUNC) { - if (found_entrypoint_func) { - fmt::print(stderr, "Ambiguous entrypoint: {}\n", name); - return false; + if (section_index < context.sections.size()) { + // Check if this symbol is the entrypoint + if (has_entrypoint && value == entrypoint && type == ELFIO::STT_FUNC) { + if (found_entrypoint_func) { + fmt::print(stderr, "Ambiguous entrypoint: {}\n", name); + return false; + } + found_entrypoint_func = true; + fmt::print("Found entrypoint, original name: {}\n", name); + size = 0x50; // dummy size for entrypoints, should cover them all + name = "recomp_entrypoint"; } - found_entrypoint_func = true; - fmt::print("Found entrypoint, original name: {}\n", name); - size = 0x50; // dummy size for entrypoints, should cover them all - name = "recomp_entrypoint"; - } - // Check if this symbol has a size override - auto size_find = context.manually_sized_funcs.find(name); - if (size_find != context.manually_sized_funcs.end()) { - size = size_find->second; - type = ELFIO::STT_FUNC; - } - - if (do_renaming) { - if (reimplemented_funcs.contains(name)) { - reimplemented = true; - name = name + "_recomp"; - ignored = true; - } else if (ignored_funcs.contains(name)) { - name = name + "_recomp"; - ignored = true; + // Check if this symbol has a size override + auto size_find = context.manually_sized_funcs.find(name); + if (size_find != context.manually_sized_funcs.end()) { + size = size_find->second; + type = ELFIO::STT_FUNC; } - } - auto& section = context.sections[section_index]; - - // Check if this symbol is a function or has no type (like a regular glabel would) - // Symbols with no type have a dummy entry created so that their symbol can be looked up for function calls - if (ignored || type == ELFIO::STT_FUNC || type == ELFIO::STT_NOTYPE || type == ELFIO::STT_OBJECT) { - if (do_renaming) { - if (renamed_funcs.contains(name)) { + if (!dumping_context) { + if (reimplemented_funcs.contains(name)) { + reimplemented = true; name = name + "_recomp"; - ignored = false; + ignored = true; + } else if (ignored_funcs.contains(name)) { + name = name + "_recomp"; + ignored = true; } } - if (section_index < context.sections.size()) { - auto section_offset = value - elf_file.sections[section_index]->get_address(); - const uint32_t* words = reinterpret_cast(elf_file.sections[section_index]->get_data() + section_offset); - uint32_t vram = static_cast(value); - uint32_t num_instructions = type == ELFIO::STT_FUNC ? size / 4 : 0; - uint32_t rom_address = static_cast(section_offset + section.rom_addr); + auto& section = context.sections[section_index]; - section.function_addrs.push_back(vram); - context.functions_by_vram[vram].push_back(context.functions.size()); - - // Find the entrypoint by rom address in case it doesn't have vram as its value - if (has_entrypoint && rom_address == 0x1000 && type == ELFIO::STT_FUNC) { - vram = entrypoint; - found_entrypoint_func = true; - name = "recomp_entrypoint"; - if (size == 0) { - num_instructions = 0x50 / 4; + // Check if this symbol is a function or has no type (like a regular glabel would) + // Symbols with no type have a dummy entry created so that their symbol can be looked up for function calls + if (ignored || type == ELFIO::STT_FUNC || type == ELFIO::STT_NOTYPE || type == ELFIO::STT_OBJECT) { + if (!dumping_context) { + if (renamed_funcs.contains(name)) { + name = name + "_recomp"; + ignored = false; } } - // Suffix local symbols to prevent name conflicts. - if (bind == ELFIO::STB_LOCAL) { - name = fmt::format("{}_{:08X}", name, rom_address); - } - - if (num_instructions > 0) { - context.section_functions[section_index].push_back(context.functions.size()); - } - context.functions_by_name[name] = context.functions.size(); + if (section_index < context.sections.size()) { + auto section_offset = value - elf_file.sections[section_index]->get_address(); + const uint32_t* words = reinterpret_cast(elf_file.sections[section_index]->get_data() + section_offset); + uint32_t vram = static_cast(value); + uint32_t num_instructions = type == ELFIO::STT_FUNC ? size / 4 : 0; + uint32_t rom_address = static_cast(section_offset + section.rom_addr); - std::vector insn_words(num_instructions); - insn_words.assign(words, words + num_instructions); + section.function_addrs.push_back(vram); + context.functions_by_vram[vram].push_back(context.functions.size()); - context.functions.emplace_back( - vram, - rom_address, - std::move(insn_words), - std::move(name), - section_index, - ignored, - reimplemented - ); - } else { - uint32_t vram = static_cast(value); - section.function_addrs.push_back(vram); - context.functions_by_vram[vram].push_back(context.functions.size()); - context.functions.emplace_back( - vram, - 0, - std::vector{}, - std::move(name), - section_index, - ignored, - reimplemented - ); + // Find the entrypoint by rom address in case it doesn't have vram as its value + if (has_entrypoint && rom_address == 0x1000 && type == ELFIO::STT_FUNC) { + vram = entrypoint; + found_entrypoint_func = true; + name = "recomp_entrypoint"; + if (size == 0) { + num_instructions = 0x50 / 4; + } + } + + // Suffix local symbols to prevent name conflicts. + if (bind == ELFIO::STB_LOCAL) { + name = fmt::format("{}_{:08X}", name, rom_address); + } + + if (num_instructions > 0) { + context.section_functions[section_index].push_back(context.functions.size()); + recorded_symbol = true; + } + context.functions_by_name[name] = context.functions.size(); + + std::vector insn_words(num_instructions); + insn_words.assign(words, words + num_instructions); + + context.functions.emplace_back( + vram, + rom_address, + std::move(insn_words), + name, + section_index, + ignored, + reimplemented + ); + } else { + // TODO is this case needed anymore? + fmt::print("asdasdasd: {}\n", name.c_str()); + uint32_t vram = static_cast(value); + section.function_addrs.push_back(vram); + context.functions_by_vram[vram].push_back(context.functions.size()); + context.functions.emplace_back( + vram, + 0, + std::vector{}, + name, + section_index, + ignored, + reimplemented + ); + } } } + + // The symbol wasn't detected as a function, so add it to the data symbols if the context is being dumped. + if (!recorded_symbol && dumping_context && !name.empty()) { + uint32_t vram = static_cast(value); + + // Place this symbol in the absolute symbol list (section -1) if it's in the absolute section. + uint16_t target_section_index = section_index; + if (section_index == ELFIO::SHN_ABS) { + target_section_index = (uint16_t)-1; + } + else if (section_index >= context.sections.size()) { + fmt::print("Symbol \"{}\" not in a valid section ({})\n", name, section_index); + } + + // Move this symbol into the corresponding non-bss section if it's in a bss section. + auto find_bss_it = bss_section_to_target_section.find(target_section_index); + if (find_bss_it != bss_section_to_target_section.end()) { + fmt::print("mapping {} to {}\n", context.sections[section_index].name, context.sections[find_bss_it->second].name); + target_section_index = find_bss_it->second; + } + + data_syms[target_section_index].emplace_back( + vram, + std::move(name) + ); + } } return found_entrypoint_func; @@ -1262,7 +1312,7 @@ std::vector reloc_names { "R_MIPS_GPREL16", }; -void dump_context(const RecompPort::Context& context, const std::filesystem::path& func_path, const std::filesystem::path& data_path) { +void dump_context(const RecompPort::Context& context, const std::unordered_map>& data_syms, const std::filesystem::path& func_path, const std::filesystem::path& data_path) { std::ofstream func_context_file {func_path}; std::ofstream data_context_file {data_path}; @@ -1285,7 +1335,6 @@ void dump_context(const RecompPort::Context& context, const std::filesystem::pat const std::vector& section_funcs = context.section_functions[section_index]; if (!section_funcs.empty()) { print_section(func_context_file, section.name, section.rom_addr, section.ram_addr, section.size); - print_section(data_context_file, section.name, section.rom_addr, section.ram_addr, section.size); // Dump relocs into the function context file. if (!section.relocs.empty()) { @@ -1313,11 +1362,39 @@ void dump_context(const RecompPort::Context& context, const std::filesystem::pat func.name, func.vram, func.words.size() * sizeof(func.words[0])); } - // Dump variables into the data context file. - - fmt::print(func_context_file, "]\n\n"); } + + const auto find_syms_it = data_syms.find((uint16_t)section_index); + if (find_syms_it != data_syms.end() && !find_syms_it->second.empty()) { + if (section.name.ends_with(".bss")) { + fmt::print("asdasd {}\n", section.name); + } + print_section(data_context_file, section.name, section.rom_addr, section.ram_addr, section.size); + + // Dump other symbols into the data context file. + fmt::print(data_context_file, "symbols = [\n"); + + for (const DataSymbol& cur_sym : find_syms_it->second) { + fmt::print(data_context_file, " {{ name = \"{}\", vram = 0x{:08X} }},\n", cur_sym.name, cur_sym.vram); + } + + fmt::print(data_context_file, "]\n\n"); + } + } + + const auto find_abs_syms_it = data_syms.find((uint16_t)-1); + if (find_abs_syms_it != data_syms.end() && !find_abs_syms_it->second.empty()) { + // Dump absolute symbols into the data context file. + fmt::print(data_context_file, + "[absolute]\n\n"); + fmt::print(data_context_file, "symbols = [\n"); + + for (const DataSymbol& cur_sym : find_abs_syms_it->second) { + fmt::print(data_context_file, " {{ name = \"{}\", vram = 0x{:08X} }},\n", cur_sym.name, cur_sym.vram); + } + + fmt::print(data_context_file, "]\n\n"); } } @@ -1416,8 +1493,11 @@ int main(int argc, char** argv) { context.manually_sized_funcs.emplace(func_size.func_name, func_size.size_bytes); } + // Lists of data symbols organized by section, only used if dumping context. + std::unordered_map> data_syms; + // Read all of the symbols in the elf and look for the entrypoint function - bool found_entrypoint_func = read_symbols(context, elf_file, symtab_section, config.entrypoint, config.has_entrypoint, config.use_absolute_symbols, !dumping_context); + bool found_entrypoint_func = read_symbols(context, elf_file, symtab_section, config.entrypoint, config.has_entrypoint, config.use_absolute_symbols, dumping_context, data_syms); // Add any manual functions add_manual_functions(context, elf_file, config.manual_functions); @@ -1428,7 +1508,16 @@ int main(int argc, char** argv) { if (dumping_context) { fmt::print("Dumping context\n"); - dump_context(context, "dump.toml", "data_dump.toml"); + // Sort the data syms by address so the output is nicer. + for (auto& [section_index, section_syms] : data_syms) { + std::sort(section_syms.begin(), section_syms.end(), + [](const DataSymbol& a, const DataSymbol& b) { + return a.vram < b.vram; + } + ); + } + + dump_context(context, data_syms, "dump.toml", "data_dump.toml"); return 0; } }