diff --git a/CMakeLists.txt b/CMakeLists.txt index 0be4a0b..0db7a7a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -137,3 +137,20 @@ target_link_libraries(RSPRecomp fmt rabbitizer tomlplusplus::tomlplusplus) target_sources(RSPRecomp PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/RSPRecomp/src/rsp_recomp.cpp) + +# Mod tool +project(RecompModTool) +add_executable(RecompModTool) + +target_sources(RecompModTool PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR}/src/config.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/src/mod_symbols.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/RecompModTool/main.cpp +) + +target_include_directories(RecompModTool PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR}/lib/ELFIO +) + +target_link_libraries(RecompModTool fmt tomlplusplus::tomlplusplus N64RecompElf) + diff --git a/RecompModTool/main.cpp b/RecompModTool/main.cpp new file mode 100644 index 0000000..2265bb3 --- /dev/null +++ b/RecompModTool/main.cpp @@ -0,0 +1,307 @@ +#include +#include +#include +#include +#include "fmt/format.h" +#include "n64recomp.h" +#include + +struct ModConfig { + std::filesystem::path output_syms_path; + std::filesystem::path output_binary_path; + std::filesystem::path elf_path; + std::filesystem::path func_reference_syms_file_path; + std::vector data_reference_syms_file_paths; +}; + +static std::filesystem::path concat_if_not_empty(const std::filesystem::path& parent, const std::filesystem::path& child) { + if (!child.empty()) { + return parent / child; + } + return child; +} + +static std::vector get_data_syms_paths(const toml::array* data_syms_paths_array, const std::filesystem::path& basedir) { + std::vector ret; + + // Reserve room for all the funcs in the map. + ret.reserve(data_syms_paths_array->size()); + data_syms_paths_array->for_each([&ret, &basedir](auto&& el) { + if constexpr (toml::is_string) { + ret.emplace_back(concat_if_not_empty(basedir, el.template value_exact().value())); + } + else { + throw toml::parse_error("Invalid type for data reference symbol file entry", el.source()); + } + }); + + return ret; +} + +ModConfig parse_mod_config(const std::filesystem::path& config_path, bool& good) { + ModConfig ret{}; + good = false; + + toml::table toml_data{}; + + try { + toml_data = toml::parse_file(config_path.native()); + std::filesystem::path basedir = config_path.parent_path(); + + const auto config_data = toml_data["config"]; + + // Output symbol file path + std::optional output_syms_path_opt = config_data["output_syms_path"].value(); + if (output_syms_path_opt.has_value()) { + ret.output_syms_path = concat_if_not_empty(basedir, output_syms_path_opt.value()); + } + else { + throw toml::parse_error("Mod toml is missing output symbol file path", config_data.node()->source()); + } + + // Output binary file path + std::optional output_binary_path_opt = config_data["output_binary_path"].value(); + if (output_binary_path_opt.has_value()) { + ret.output_binary_path = concat_if_not_empty(basedir, output_binary_path_opt.value()); + } + else { + throw toml::parse_error("Mod toml is missing output binary file path", config_data.node()->source()); + } + + // Elf file + std::optional elf_path_opt = config_data["elf_path"].value(); + if (elf_path_opt.has_value()) { + ret.elf_path = concat_if_not_empty(basedir, elf_path_opt.value()); + } + else { + throw toml::parse_error("Mod toml is missing elf file", config_data.node()->source()); + } + + // Function reference symbols file + std::optional func_reference_syms_file_opt = config_data["func_reference_syms_file"].value(); + if (func_reference_syms_file_opt.has_value()) { + ret.func_reference_syms_file_path = concat_if_not_empty(basedir, func_reference_syms_file_opt.value()); + } + else { + throw toml::parse_error("Mod toml is missing function reference symbol file", config_data.node()->source()); + } + + // Data reference symbols files + toml::node_view data_reference_syms_file_data = config_data["data_reference_syms_files"]; + if (data_reference_syms_file_data.is_array()) { + const toml::array* array = data_reference_syms_file_data.as_array(); + ret.data_reference_syms_file_paths = get_data_syms_paths(array, basedir); + } + else { + if (data_reference_syms_file_data) { + throw toml::parse_error("Mod toml is missing data reference symbol file list", config_data.node()->source()); + } + else { + throw toml::parse_error("Invalid data reference symbol file list", data_reference_syms_file_data.node()->source()); + } + } + } + catch (const toml::parse_error& err) { + std::cerr << "Syntax error parsing toml: " << *err.source().path << " (" << err.source().begin << "):\n" << err.description() << std::endl; + return {}; + } + + good = true; + return ret; +} + +N64Recomp::ModContext build_mod_context(const N64Recomp::Context& input_context, bool& good) { + N64Recomp::ModContext ret{}; + good = false; + + // Make a vector containing 0, 1, 2, ... section count - 1 + std::vector section_order; + section_order.resize(input_context.sections.size()); + std::iota(section_order.begin(), section_order.end(), 0); + + // Sort the vector based on the rom address of the corresponding section. + std::sort(section_order.begin(), section_order.end(), + [&](uint16_t a, uint16_t b) { + return input_context.sections[a].rom_addr < input_context.sections[b].rom_addr; + } + ); + + uint32_t rom_to_ram = (uint32_t)-1; + size_t output_section_index = (size_t)-1; + ret.base_context.sections.resize(1); + + // Iterate over the input sections in their sorted order. + for (uint16_t section_index : section_order) { + const auto& cur_section = input_context.sections[section_index]; + uint32_t cur_rom_to_ram = cur_section.ram_addr - cur_section.rom_addr; + + // Stop checking sections once a non-allocated section has been reached. + if (cur_section.rom_addr == (uint32_t)-1) { + break; + } + + // Check if this section matches up with the previous section to merge them together. + if (rom_to_ram == cur_rom_to_ram) { + auto& section_out = ret.base_context.sections[output_section_index]; + uint32_t cur_section_end = cur_section.rom_addr + cur_section.size; + section_out.size = cur_section_end - section_out.rom_addr; + } + // Otherwise, create a new output section and advance to it. + else { + output_section_index++; + ret.base_context.sections.resize(output_section_index + 1); + ret.base_context.section_functions.resize(output_section_index + 1); + rom_to_ram = cur_rom_to_ram; + + auto& new_section = ret.base_context.sections[output_section_index]; + new_section.rom_addr = cur_section.rom_addr; + new_section.ram_addr = cur_section.ram_addr; + new_section.size = cur_section.size; + } + + // Check for special section names. + bool patch_section = cur_section.name == ".recomp_patch"; + bool force_patch_section = cur_section.name == ".recomp_force_patch"; + bool export_section = cur_section.name == ".recomp_export"; + + // Add the functions from the current input section to the current output section. + auto& section_out = ret.base_context.sections[output_section_index]; + + size_t starting_function_index = ret.base_context.functions.size(); + const auto& cur_section_funcs = input_context.section_functions[section_index]; + + for (size_t section_function_index = 0; section_function_index < cur_section_funcs.size(); section_function_index++) { + size_t output_func_index = ret.base_context.functions.size(); + size_t input_func_index = cur_section_funcs[section_function_index]; + const auto& cur_func = input_context.functions[input_func_index]; + + // If this is the patch section, create a replacement for this function. + if (patch_section || force_patch_section) { + // Find the corresponding symbol in the reference symbols. + auto find_sym_it = input_context.reference_symbols_by_name.find(cur_func.name); + if (find_sym_it == input_context.reference_symbols_by_name.end()) { + fmt::print("Function {} is marked as a patch but doesn't exist in the original ROM!\n", cur_func.name); + return {}; + } + + // Check that the reference symbol is actually a function. + const auto& reference_symbol = input_context.reference_symbols[find_sym_it->second]; + if (!reference_symbol.is_function) { + fmt::print("Function {0} is marked as a patch, but {0} was a variable in the original ROM!\n", cur_func.name); + return {}; + } + + const auto& reference_section = input_context.reference_sections[reference_symbol.section_index]; + + // Add a replacement for this function to the output context. + ret.replacements.emplace_back( + N64Recomp::FunctionReplacement { + .func_index = (uint32_t)output_func_index, + .original_section_vrom = reference_section.rom_addr, + .original_vram = reference_section.ram_addr + reference_symbol.section_offset, + .flags = force_patch_section ? N64Recomp::ReplacementFlags::Force : N64Recomp::ReplacementFlags{} + } + ); + } + + ret.base_context.section_functions[output_section_index].push_back(output_func_index); + + // Add this function to the output context. + ret.base_context.functions.emplace_back( + cur_func.vram, + cur_func.rom, + std::vector{}, // words + "", // name + (uint16_t)output_section_index, + false, // ignored + false, // reimplemented + false // stubbed + ); + + // Resize the words vector so the function has the correct size. No need to copy the words, as they aren't used when making a mod symbol file. + ret.base_context.functions[output_func_index].words.resize(cur_func.words.size()); + } + + // TODO relocs (including reference symbols and HI16 and LO16 patching for non-relocatable reference symbols) + + + // TODO exports + } + + good = true; + return ret; +} + +int main(int argc, const char** argv) { + if (argc != 2) { + fmt::print("Usage: {} [mod toml]\n", argv[0]); + return EXIT_SUCCESS; + } + + bool config_good; + ModConfig config = parse_mod_config(argv[1], config_good); + + if (!config_good) { + fmt::print(stderr, "Failed to read mod config file: {}\n", argv[1]); + return EXIT_FAILURE; + } + + N64Recomp::Context context{}; + + // Import symbols from symbols files that were provided. + { + // Create a new temporary context to read the function reference symbol file into, since it's the same format as the recompilation symbol file. + std::vector dummy_rom{}; + N64Recomp::Context reference_context{}; + if (!N64Recomp::Context::from_symbol_file(config.func_reference_syms_file_path, std::move(dummy_rom), reference_context, false)) { + fmt::print(stderr, "Failed to load provided function reference symbol file\n"); + return EXIT_FAILURE; + } + + // Use the reference context to build a reference symbol list for the actual context. + context.import_reference_context(reference_context); + } + + for (const std::filesystem::path& cur_data_sym_path : config.data_reference_syms_file_paths) { + if (!context.read_data_reference_syms(cur_data_sym_path)) { + fmt::print(stderr, "Failed to load provided data reference symbol file: {}\n", cur_data_sym_path.string()); + return EXIT_FAILURE; + } + } + + N64Recomp::ElfParsingConfig elf_config { + .bss_section_suffix = {}, + .manually_sized_funcs = {}, + .relocatable_sections = {}, + .has_entrypoint = false, + .entrypoint_address = 0, + .use_absolute_symbols = false, + .unpaired_lo16_warnings = false, + .all_sections_relocatable = true + }; + bool dummy_found_entrypoint; + N64Recomp::DataSymbolMap dummy_syms_map; + bool elf_good = N64Recomp::Context::from_elf_file(config.elf_path, context, elf_config, false, dummy_syms_map, dummy_found_entrypoint); + + if (!elf_good) { + fmt::print(stderr, "Failed to parse mod elf\n"); + return EXIT_FAILURE; + } + + if (context.sections.size() == 0) { + fmt::print(stderr, "No sections found in mod elf\n"); + return EXIT_FAILURE; + } + + bool mod_context_good; + N64Recomp::ModContext mod_context = build_mod_context(context, mod_context_good); + std::vector symbols_bin = N64Recomp::symbols_to_bin_v1(mod_context); + + std::ofstream output_syms_file{ config.output_syms_path, std::ios::binary }; + output_syms_file.write(reinterpret_cast(symbols_bin.data()), symbols_bin.size()); + + std::ofstream output_binary_file{ config.output_binary_path, std::ios::binary }; + output_binary_file.write(reinterpret_cast(context.rom.data()), context.rom.size()); + + return EXIT_SUCCESS; +} diff --git a/include/n64recomp.h b/include/n64recomp.h index dbe597b..925c8ae 100644 --- a/include/n64recomp.h +++ b/include/n64recomp.h @@ -96,6 +96,7 @@ namespace N64Recomp { int32_t entrypoint_address; bool use_absolute_symbols; bool unpaired_lo16_warnings; + bool all_sections_relocatable; }; struct DataSymbol { @@ -114,16 +115,17 @@ namespace N64Recomp { struct Context { std::vector
sections; std::vector functions; + // A list of the list of each function (by index in `functions`) in a given section + std::vector> section_functions; + // A mapping of vram address to every function with that address. std::unordered_map> functions_by_vram; // The target ROM being recompiled, TODO move this outside of the context to avoid making a copy for mod contexts. // Used for reading relocations and for the output binary feature. std::vector rom; - //// Only used by the CLI, TODO move these to a struct in the internal headers. + //// Only used by the CLI, TODO move this to a struct in the internal headers. // A mapping of function name to index in the functions vector - std::unordered_map functions_by_name; - // A list of the list of each function (by index in `functions`) in a given section - std::vector> section_functions; + std::unordered_map functions_by_name; //// Reference symbols (used for populating relocations for patches) // A list of the sections that contain the reference symbols. @@ -151,6 +153,8 @@ namespace N64Recomp { enum class ReplacementFlags : uint32_t { Force = 1 << 0, }; + inline ReplacementFlags operator&(ReplacementFlags lhs, ReplacementFlags rhs) { return ReplacementFlags(uint32_t(lhs) & uint32_t(rhs)); } + inline ReplacementFlags operator|(ReplacementFlags lhs, ReplacementFlags rhs) { return ReplacementFlags(uint32_t(lhs) | uint32_t(rhs)); } struct FunctionReplacement { uint32_t func_index; @@ -172,6 +176,7 @@ namespace N64Recomp { }; ModSymbolsError parse_mod_symbols(std::span data, std::span binary, const std::unordered_map& sections_by_vrom, Context& context_out, ModContext& mod_context_out); + std::vector symbols_to_bin_v1(const ModContext& mod_context); } #endif diff --git a/src/elf.cpp b/src/elf.cpp index 337945d..59d24ef 100644 --- a/src/elf.cpp +++ b/src/elf.cpp @@ -7,7 +7,6 @@ bool read_symbols(N64Recomp::Context& context, const ELFIO::elfio& elf_file, ELFIO::section* symtab_section, const N64Recomp::ElfParsingConfig& elf_config, bool dumping_context, std::unordered_map>& data_syms) { bool found_entrypoint_func = false; ELFIO::symbol_section_accessor symbols{ elf_file, symtab_section }; - fmt::print("Num symbols: {}\n", symbols.get_symbols_num()); std::unordered_map bss_section_to_target_section{}; @@ -234,7 +233,6 @@ ELFIO::section* read_sections(N64Recomp::Context& context, const N64Recomp::ElfP std::unordered_map bss_sections_by_name; // Iterate over every section to record rom addresses and find the symbol table - fmt::print("Sections\n"); for (const auto& section : elf_file.sections) { auto& section_out = context.sections[section->get_index()]; //fmt::print(" {}: {} @ 0x{:08X}, 0x{:08X}\n", section->get_index(), section->get_name(), section->get_address(), context.rom.size()); @@ -249,7 +247,7 @@ ELFIO::section* read_sections(N64Recomp::Context& context, const N64Recomp::ElfP symtab_section = section.get(); } - if (elf_config.relocatable_sections.contains(section_name)) { + if (elf_config.all_sections_relocatable || elf_config.relocatable_sections.contains(section_name)) { section_out.relocatable = true; } @@ -265,7 +263,7 @@ ELFIO::section* read_sections(N64Recomp::Context& context, const N64Recomp::ElfP // If this reloc section is for a section that has been marked as relocatable, record it in the reloc section lookup. // Alternatively, if this recompilation uses reference symbols then record all reloc sections. - if (!context.reference_sections.empty() || elf_config.relocatable_sections.contains(reloc_target_section)) { + if (elf_config.all_sections_relocatable || !context.reference_sections.empty() || elf_config.relocatable_sections.contains(reloc_target_section)) { reloc_sections_by_name[reloc_target_section] = section.get(); } } @@ -275,7 +273,7 @@ ELFIO::section* read_sections(N64Recomp::Context& context, const N64Recomp::ElfP std::string bss_target_section = section_name.substr(0, section_name.size() - elf_config.bss_section_suffix.size()); // If this bss section is for a section that has been marked as relocatable, record it in the reloc section lookup - if (elf_config.relocatable_sections.contains(bss_target_section)) { + if (elf_config.all_sections_relocatable || elf_config.relocatable_sections.contains(bss_target_section)) { bss_sections_by_name[bss_target_section] = section.get(); } } @@ -559,7 +557,6 @@ bool N64Recomp::Context::from_elf_file(const std::filesystem::path& elf_file_pat // If no symbol table was found then exit if (symtab_section == nullptr) { - fmt::print("No symbol table section found\n"); return false; } diff --git a/src/main.cpp b/src/main.cpp index 3c8b34d..d1c29e6 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -336,7 +336,8 @@ int main(int argc, char** argv) { .has_entrypoint = config.has_entrypoint, .entrypoint_address = config.entrypoint, .use_absolute_symbols = config.use_absolute_symbols, - .unpaired_lo16_warnings = config.unpaired_lo16_warnings + .unpaired_lo16_warnings = config.unpaired_lo16_warnings, + .all_sections_relocatable = false, }; for (const auto& func_size : config.manual_func_sizes) { diff --git a/src/mod_symbols.cpp b/src/mod_symbols.cpp index dec60f0..53e61af 100644 --- a/src/mod_symbols.cpp +++ b/src/mod_symbols.cpp @@ -114,6 +114,7 @@ bool parse_v1(std::span data, const std::unordered_map data, const std::unordered_map(relocs[reloc_index].type); cur_reloc.target_section_offset = relocs[reloc_index].target_section_offset; uint32_t target_section_vrom = relocs[reloc_index].target_section_vrom; - if (target_section_vrom == (uint32_t)-1) { + if (target_section_vrom == 0) { cur_reloc.target_section = N64Recomp::SectionSelf; } else { @@ -144,7 +145,7 @@ bool parse_v1(std::span data, const std::unordered_map(data, offset, num_replacements); if (replacements == nullptr) { - printf("Failed to read replacements (count: %d)\n", num_replacements); + printf("Failed to read replacements (count: %zu)\n", num_replacements); return false; } @@ -205,3 +206,93 @@ N64Recomp::ModSymbolsError N64Recomp::parse_mod_symbols(std::span da return ModSymbolsError::Good; } + +template +void vec_put(std::vector& vec, const T* data) { + size_t start_size = vec.size(); + vec.resize(vec.size() + sizeof(T)); + memcpy(vec.data() + start_size, reinterpret_cast(data), sizeof(T)); +} + +std::vector N64Recomp::symbols_to_bin_v1(const N64Recomp::ModContext& mod_context) { + std::vector ret{}; + ret.reserve(1024); + const N64Recomp::Context& context = mod_context.base_context; + + const static FileHeader header { + .magic = {'N', '6', '4', 'R', 'S', 'Y', 'M', 'S'}, + .version = 1 + }; + + vec_put(ret, &header); + + FileSubHeaderV1 sub_header { + .num_sections = static_cast(context.sections.size()), + .num_replacements = static_cast(mod_context.replacements.size()), + }; + + vec_put(ret, &sub_header); + + for (size_t section_index = 0; section_index < context.sections.size(); section_index++) { + const Section& cur_section = context.sections[section_index]; + SectionHeaderV1 section_out { + .file_offset = cur_section.rom_addr, + .vram = cur_section.ram_addr, + .rom_size = cur_section.size, + .bss_size = cur_section.bss_size, + .num_funcs = static_cast(context.section_functions[section_index].size()), + .num_relocs = static_cast(cur_section.relocs.size()) + }; + + vec_put(ret, §ion_out); + + for (size_t func_index : context.section_functions[section_index]) { + const Function& cur_func = context.functions[func_index]; + FuncV1 func_out { + .section_offset = cur_func.vram - cur_section.ram_addr, + .size = (uint32_t)(cur_func.words.size() * sizeof(cur_func.words[0])) + }; + + vec_put(ret, &func_out); + } + + for (const Reloc& cur_reloc : cur_section.relocs) { + uint32_t target_section_vrom; + if (cur_reloc.target_section == SectionSelf) { + target_section_vrom = 0; + } + else if (cur_reloc.reference_symbol) { + target_section_vrom = context.reference_sections[cur_reloc.target_section].rom_addr; + } + else { + target_section_vrom = context.sections[cur_reloc.target_section].rom_addr; + } + RelocV1 reloc_out { + .section_offset = cur_reloc.address - cur_section.ram_addr, + .type = static_cast(cur_reloc.type), + .target_section_offset = cur_reloc.target_section_offset, + .target_section_vrom = target_section_vrom + }; + + vec_put(ret, &reloc_out); + } + } + + for (const FunctionReplacement& cur_replacement : mod_context.replacements) { + uint32_t flags = 0; + if ((cur_replacement.flags & ReplacementFlags::Force) == ReplacementFlags::Force) { + flags |= 0x1; + } + + ReplacementV1 replacement_out { + .func_index = cur_replacement.func_index, + .original_section_vrom = cur_replacement.original_section_vrom, + .original_vram = cur_replacement.original_vram, + .flags = flags + }; + + vec_put(ret, &replacement_out); + }; + + return ret; +}