WIP elf to mod tool, currently working without relocations or API exports/imports

This commit is contained in:
Mr-Wiseguy 2024-07-17 00:54:43 -04:00
parent d8dcb43d5a
commit a7a4134123
6 changed files with 431 additions and 13 deletions

View file

@ -137,3 +137,20 @@ target_link_libraries(RSPRecomp fmt rabbitizer tomlplusplus::tomlplusplus)
target_sources(RSPRecomp PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/RSPRecomp/src/rsp_recomp.cpp)
# Mod tool
project(RecompModTool)
add_executable(RecompModTool)
target_sources(RecompModTool PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/src/config.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/mod_symbols.cpp
${CMAKE_CURRENT_SOURCE_DIR}/RecompModTool/main.cpp
)
target_include_directories(RecompModTool PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/lib/ELFIO
)
target_link_libraries(RecompModTool fmt tomlplusplus::tomlplusplus N64RecompElf)

307
RecompModTool/main.cpp Normal file
View file

@ -0,0 +1,307 @@
#include <fstream>
#include <filesystem>
#include <iostream>
#include <numeric>
#include "fmt/format.h"
#include "n64recomp.h"
#include <toml++/toml.hpp>
struct ModConfig {
std::filesystem::path output_syms_path;
std::filesystem::path output_binary_path;
std::filesystem::path elf_path;
std::filesystem::path func_reference_syms_file_path;
std::vector<std::filesystem::path> data_reference_syms_file_paths;
};
static std::filesystem::path concat_if_not_empty(const std::filesystem::path& parent, const std::filesystem::path& child) {
if (!child.empty()) {
return parent / child;
}
return child;
}
static std::vector<std::filesystem::path> get_data_syms_paths(const toml::array* data_syms_paths_array, const std::filesystem::path& basedir) {
std::vector<std::filesystem::path> ret;
// Reserve room for all the funcs in the map.
ret.reserve(data_syms_paths_array->size());
data_syms_paths_array->for_each([&ret, &basedir](auto&& el) {
if constexpr (toml::is_string<decltype(el)>) {
ret.emplace_back(concat_if_not_empty(basedir, el.template value_exact<std::string>().value()));
}
else {
throw toml::parse_error("Invalid type for data reference symbol file entry", el.source());
}
});
return ret;
}
ModConfig parse_mod_config(const std::filesystem::path& config_path, bool& good) {
ModConfig ret{};
good = false;
toml::table toml_data{};
try {
toml_data = toml::parse_file(config_path.native());
std::filesystem::path basedir = config_path.parent_path();
const auto config_data = toml_data["config"];
// Output symbol file path
std::optional<std::string> output_syms_path_opt = config_data["output_syms_path"].value<std::string>();
if (output_syms_path_opt.has_value()) {
ret.output_syms_path = concat_if_not_empty(basedir, output_syms_path_opt.value());
}
else {
throw toml::parse_error("Mod toml is missing output symbol file path", config_data.node()->source());
}
// Output binary file path
std::optional<std::string> output_binary_path_opt = config_data["output_binary_path"].value<std::string>();
if (output_binary_path_opt.has_value()) {
ret.output_binary_path = concat_if_not_empty(basedir, output_binary_path_opt.value());
}
else {
throw toml::parse_error("Mod toml is missing output binary file path", config_data.node()->source());
}
// Elf file
std::optional<std::string> elf_path_opt = config_data["elf_path"].value<std::string>();
if (elf_path_opt.has_value()) {
ret.elf_path = concat_if_not_empty(basedir, elf_path_opt.value());
}
else {
throw toml::parse_error("Mod toml is missing elf file", config_data.node()->source());
}
// Function reference symbols file
std::optional<std::string> func_reference_syms_file_opt = config_data["func_reference_syms_file"].value<std::string>();
if (func_reference_syms_file_opt.has_value()) {
ret.func_reference_syms_file_path = concat_if_not_empty(basedir, func_reference_syms_file_opt.value());
}
else {
throw toml::parse_error("Mod toml is missing function reference symbol file", config_data.node()->source());
}
// Data reference symbols files
toml::node_view data_reference_syms_file_data = config_data["data_reference_syms_files"];
if (data_reference_syms_file_data.is_array()) {
const toml::array* array = data_reference_syms_file_data.as_array();
ret.data_reference_syms_file_paths = get_data_syms_paths(array, basedir);
}
else {
if (data_reference_syms_file_data) {
throw toml::parse_error("Mod toml is missing data reference symbol file list", config_data.node()->source());
}
else {
throw toml::parse_error("Invalid data reference symbol file list", data_reference_syms_file_data.node()->source());
}
}
}
catch (const toml::parse_error& err) {
std::cerr << "Syntax error parsing toml: " << *err.source().path << " (" << err.source().begin << "):\n" << err.description() << std::endl;
return {};
}
good = true;
return ret;
}
N64Recomp::ModContext build_mod_context(const N64Recomp::Context& input_context, bool& good) {
N64Recomp::ModContext ret{};
good = false;
// Make a vector containing 0, 1, 2, ... section count - 1
std::vector<uint16_t> section_order;
section_order.resize(input_context.sections.size());
std::iota(section_order.begin(), section_order.end(), 0);
// Sort the vector based on the rom address of the corresponding section.
std::sort(section_order.begin(), section_order.end(),
[&](uint16_t a, uint16_t b) {
return input_context.sections[a].rom_addr < input_context.sections[b].rom_addr;
}
);
uint32_t rom_to_ram = (uint32_t)-1;
size_t output_section_index = (size_t)-1;
ret.base_context.sections.resize(1);
// Iterate over the input sections in their sorted order.
for (uint16_t section_index : section_order) {
const auto& cur_section = input_context.sections[section_index];
uint32_t cur_rom_to_ram = cur_section.ram_addr - cur_section.rom_addr;
// Stop checking sections once a non-allocated section has been reached.
if (cur_section.rom_addr == (uint32_t)-1) {
break;
}
// Check if this section matches up with the previous section to merge them together.
if (rom_to_ram == cur_rom_to_ram) {
auto& section_out = ret.base_context.sections[output_section_index];
uint32_t cur_section_end = cur_section.rom_addr + cur_section.size;
section_out.size = cur_section_end - section_out.rom_addr;
}
// Otherwise, create a new output section and advance to it.
else {
output_section_index++;
ret.base_context.sections.resize(output_section_index + 1);
ret.base_context.section_functions.resize(output_section_index + 1);
rom_to_ram = cur_rom_to_ram;
auto& new_section = ret.base_context.sections[output_section_index];
new_section.rom_addr = cur_section.rom_addr;
new_section.ram_addr = cur_section.ram_addr;
new_section.size = cur_section.size;
}
// Check for special section names.
bool patch_section = cur_section.name == ".recomp_patch";
bool force_patch_section = cur_section.name == ".recomp_force_patch";
bool export_section = cur_section.name == ".recomp_export";
// Add the functions from the current input section to the current output section.
auto& section_out = ret.base_context.sections[output_section_index];
size_t starting_function_index = ret.base_context.functions.size();
const auto& cur_section_funcs = input_context.section_functions[section_index];
for (size_t section_function_index = 0; section_function_index < cur_section_funcs.size(); section_function_index++) {
size_t output_func_index = ret.base_context.functions.size();
size_t input_func_index = cur_section_funcs[section_function_index];
const auto& cur_func = input_context.functions[input_func_index];
// If this is the patch section, create a replacement for this function.
if (patch_section || force_patch_section) {
// Find the corresponding symbol in the reference symbols.
auto find_sym_it = input_context.reference_symbols_by_name.find(cur_func.name);
if (find_sym_it == input_context.reference_symbols_by_name.end()) {
fmt::print("Function {} is marked as a patch but doesn't exist in the original ROM!\n", cur_func.name);
return {};
}
// Check that the reference symbol is actually a function.
const auto& reference_symbol = input_context.reference_symbols[find_sym_it->second];
if (!reference_symbol.is_function) {
fmt::print("Function {0} is marked as a patch, but {0} was a variable in the original ROM!\n", cur_func.name);
return {};
}
const auto& reference_section = input_context.reference_sections[reference_symbol.section_index];
// Add a replacement for this function to the output context.
ret.replacements.emplace_back(
N64Recomp::FunctionReplacement {
.func_index = (uint32_t)output_func_index,
.original_section_vrom = reference_section.rom_addr,
.original_vram = reference_section.ram_addr + reference_symbol.section_offset,
.flags = force_patch_section ? N64Recomp::ReplacementFlags::Force : N64Recomp::ReplacementFlags{}
}
);
}
ret.base_context.section_functions[output_section_index].push_back(output_func_index);
// Add this function to the output context.
ret.base_context.functions.emplace_back(
cur_func.vram,
cur_func.rom,
std::vector<uint32_t>{}, // words
"", // name
(uint16_t)output_section_index,
false, // ignored
false, // reimplemented
false // stubbed
);
// Resize the words vector so the function has the correct size. No need to copy the words, as they aren't used when making a mod symbol file.
ret.base_context.functions[output_func_index].words.resize(cur_func.words.size());
}
// TODO relocs (including reference symbols and HI16 and LO16 patching for non-relocatable reference symbols)
// TODO exports
}
good = true;
return ret;
}
int main(int argc, const char** argv) {
if (argc != 2) {
fmt::print("Usage: {} [mod toml]\n", argv[0]);
return EXIT_SUCCESS;
}
bool config_good;
ModConfig config = parse_mod_config(argv[1], config_good);
if (!config_good) {
fmt::print(stderr, "Failed to read mod config file: {}\n", argv[1]);
return EXIT_FAILURE;
}
N64Recomp::Context context{};
// Import symbols from symbols files that were provided.
{
// Create a new temporary context to read the function reference symbol file into, since it's the same format as the recompilation symbol file.
std::vector<uint8_t> dummy_rom{};
N64Recomp::Context reference_context{};
if (!N64Recomp::Context::from_symbol_file(config.func_reference_syms_file_path, std::move(dummy_rom), reference_context, false)) {
fmt::print(stderr, "Failed to load provided function reference symbol file\n");
return EXIT_FAILURE;
}
// Use the reference context to build a reference symbol list for the actual context.
context.import_reference_context(reference_context);
}
for (const std::filesystem::path& cur_data_sym_path : config.data_reference_syms_file_paths) {
if (!context.read_data_reference_syms(cur_data_sym_path)) {
fmt::print(stderr, "Failed to load provided data reference symbol file: {}\n", cur_data_sym_path.string());
return EXIT_FAILURE;
}
}
N64Recomp::ElfParsingConfig elf_config {
.bss_section_suffix = {},
.manually_sized_funcs = {},
.relocatable_sections = {},
.has_entrypoint = false,
.entrypoint_address = 0,
.use_absolute_symbols = false,
.unpaired_lo16_warnings = false,
.all_sections_relocatable = true
};
bool dummy_found_entrypoint;
N64Recomp::DataSymbolMap dummy_syms_map;
bool elf_good = N64Recomp::Context::from_elf_file(config.elf_path, context, elf_config, false, dummy_syms_map, dummy_found_entrypoint);
if (!elf_good) {
fmt::print(stderr, "Failed to parse mod elf\n");
return EXIT_FAILURE;
}
if (context.sections.size() == 0) {
fmt::print(stderr, "No sections found in mod elf\n");
return EXIT_FAILURE;
}
bool mod_context_good;
N64Recomp::ModContext mod_context = build_mod_context(context, mod_context_good);
std::vector<uint8_t> symbols_bin = N64Recomp::symbols_to_bin_v1(mod_context);
std::ofstream output_syms_file{ config.output_syms_path, std::ios::binary };
output_syms_file.write(reinterpret_cast<const char*>(symbols_bin.data()), symbols_bin.size());
std::ofstream output_binary_file{ config.output_binary_path, std::ios::binary };
output_binary_file.write(reinterpret_cast<const char*>(context.rom.data()), context.rom.size());
return EXIT_SUCCESS;
}

View file

@ -96,6 +96,7 @@ namespace N64Recomp {
int32_t entrypoint_address;
bool use_absolute_symbols;
bool unpaired_lo16_warnings;
bool all_sections_relocatable;
};
struct DataSymbol {
@ -114,16 +115,17 @@ namespace N64Recomp {
struct Context {
std::vector<Section> sections;
std::vector<Function> functions;
// A list of the list of each function (by index in `functions`) in a given section
std::vector<std::vector<size_t>> section_functions;
// A mapping of vram address to every function with that address.
std::unordered_map<uint32_t, std::vector<size_t>> functions_by_vram;
// The target ROM being recompiled, TODO move this outside of the context to avoid making a copy for mod contexts.
// Used for reading relocations and for the output binary feature.
std::vector<uint8_t> rom;
//// Only used by the CLI, TODO move these to a struct in the internal headers.
//// Only used by the CLI, TODO move this to a struct in the internal headers.
// A mapping of function name to index in the functions vector
std::unordered_map<std::string, size_t> functions_by_name;
// A list of the list of each function (by index in `functions`) in a given section
std::vector<std::vector<size_t>> section_functions;
std::unordered_map<std::string, size_t> functions_by_name;
//// Reference symbols (used for populating relocations for patches)
// A list of the sections that contain the reference symbols.
@ -151,6 +153,8 @@ namespace N64Recomp {
enum class ReplacementFlags : uint32_t {
Force = 1 << 0,
};
inline ReplacementFlags operator&(ReplacementFlags lhs, ReplacementFlags rhs) { return ReplacementFlags(uint32_t(lhs) & uint32_t(rhs)); }
inline ReplacementFlags operator|(ReplacementFlags lhs, ReplacementFlags rhs) { return ReplacementFlags(uint32_t(lhs) | uint32_t(rhs)); }
struct FunctionReplacement {
uint32_t func_index;
@ -172,6 +176,7 @@ namespace N64Recomp {
};
ModSymbolsError parse_mod_symbols(std::span<const char> data, std::span<const uint8_t> binary, const std::unordered_map<uint32_t, uint16_t>& sections_by_vrom, Context& context_out, ModContext& mod_context_out);
std::vector<uint8_t> symbols_to_bin_v1(const ModContext& mod_context);
}
#endif

View file

@ -7,7 +7,6 @@
bool read_symbols(N64Recomp::Context& context, const ELFIO::elfio& elf_file, ELFIO::section* symtab_section, const N64Recomp::ElfParsingConfig& elf_config, bool dumping_context, std::unordered_map<uint16_t, std::vector<N64Recomp::DataSymbol>>& data_syms) {
bool found_entrypoint_func = false;
ELFIO::symbol_section_accessor symbols{ elf_file, symtab_section };
fmt::print("Num symbols: {}\n", symbols.get_symbols_num());
std::unordered_map<uint16_t, uint16_t> bss_section_to_target_section{};
@ -234,7 +233,6 @@ ELFIO::section* read_sections(N64Recomp::Context& context, const N64Recomp::ElfP
std::unordered_map<std::string, ELFIO::section*> bss_sections_by_name;
// Iterate over every section to record rom addresses and find the symbol table
fmt::print("Sections\n");
for (const auto& section : elf_file.sections) {
auto& section_out = context.sections[section->get_index()];
//fmt::print(" {}: {} @ 0x{:08X}, 0x{:08X}\n", section->get_index(), section->get_name(), section->get_address(), context.rom.size());
@ -249,7 +247,7 @@ ELFIO::section* read_sections(N64Recomp::Context& context, const N64Recomp::ElfP
symtab_section = section.get();
}
if (elf_config.relocatable_sections.contains(section_name)) {
if (elf_config.all_sections_relocatable || elf_config.relocatable_sections.contains(section_name)) {
section_out.relocatable = true;
}
@ -265,7 +263,7 @@ ELFIO::section* read_sections(N64Recomp::Context& context, const N64Recomp::ElfP
// If this reloc section is for a section that has been marked as relocatable, record it in the reloc section lookup.
// Alternatively, if this recompilation uses reference symbols then record all reloc sections.
if (!context.reference_sections.empty() || elf_config.relocatable_sections.contains(reloc_target_section)) {
if (elf_config.all_sections_relocatable || !context.reference_sections.empty() || elf_config.relocatable_sections.contains(reloc_target_section)) {
reloc_sections_by_name[reloc_target_section] = section.get();
}
}
@ -275,7 +273,7 @@ ELFIO::section* read_sections(N64Recomp::Context& context, const N64Recomp::ElfP
std::string bss_target_section = section_name.substr(0, section_name.size() - elf_config.bss_section_suffix.size());
// If this bss section is for a section that has been marked as relocatable, record it in the reloc section lookup
if (elf_config.relocatable_sections.contains(bss_target_section)) {
if (elf_config.all_sections_relocatable || elf_config.relocatable_sections.contains(bss_target_section)) {
bss_sections_by_name[bss_target_section] = section.get();
}
}
@ -559,7 +557,6 @@ bool N64Recomp::Context::from_elf_file(const std::filesystem::path& elf_file_pat
// If no symbol table was found then exit
if (symtab_section == nullptr) {
fmt::print("No symbol table section found\n");
return false;
}

View file

@ -336,7 +336,8 @@ int main(int argc, char** argv) {
.has_entrypoint = config.has_entrypoint,
.entrypoint_address = config.entrypoint,
.use_absolute_symbols = config.use_absolute_symbols,
.unpaired_lo16_warnings = config.unpaired_lo16_warnings
.unpaired_lo16_warnings = config.unpaired_lo16_warnings,
.all_sections_relocatable = false,
};
for (const auto& func_size : config.manual_func_sizes) {

View file

@ -114,6 +114,7 @@ bool parse_v1(std::span<const char> data, const std::unordered_map<uint32_t, uin
}
N64Recomp::Function& cur_func = ret.functions[start_func_index + func_index];
cur_func.vram = cur_section.ram_addr + funcs[func_index].section_offset;
cur_func.rom = cur_section.rom_addr + funcs[func_index].section_offset;
cur_func.words.resize(funcs[func_index].size / sizeof(uint32_t)); // Filled in later
cur_func.name = "mod_func_" + std::to_string(start_func_index + func_index);
@ -126,7 +127,7 @@ bool parse_v1(std::span<const char> data, const std::unordered_map<uint32_t, uin
cur_reloc.type = static_cast<N64Recomp::RelocType>(relocs[reloc_index].type);
cur_reloc.target_section_offset = relocs[reloc_index].target_section_offset;
uint32_t target_section_vrom = relocs[reloc_index].target_section_vrom;
if (target_section_vrom == (uint32_t)-1) {
if (target_section_vrom == 0) {
cur_reloc.target_section = N64Recomp::SectionSelf;
}
else {
@ -144,7 +145,7 @@ bool parse_v1(std::span<const char> data, const std::unordered_map<uint32_t, uin
const ReplacementV1* replacements = reinterpret_data<ReplacementV1>(data, offset, num_replacements);
if (replacements == nullptr) {
printf("Failed to read replacements (count: %d)\n", num_replacements);
printf("Failed to read replacements (count: %zu)\n", num_replacements);
return false;
}
@ -205,3 +206,93 @@ N64Recomp::ModSymbolsError N64Recomp::parse_mod_symbols(std::span<const char> da
return ModSymbolsError::Good;
}
template <typename T>
void vec_put(std::vector<uint8_t>& vec, const T* data) {
size_t start_size = vec.size();
vec.resize(vec.size() + sizeof(T));
memcpy(vec.data() + start_size, reinterpret_cast<const uint8_t*>(data), sizeof(T));
}
std::vector<uint8_t> N64Recomp::symbols_to_bin_v1(const N64Recomp::ModContext& mod_context) {
std::vector<uint8_t> ret{};
ret.reserve(1024);
const N64Recomp::Context& context = mod_context.base_context;
const static FileHeader header {
.magic = {'N', '6', '4', 'R', 'S', 'Y', 'M', 'S'},
.version = 1
};
vec_put(ret, &header);
FileSubHeaderV1 sub_header {
.num_sections = static_cast<uint32_t>(context.sections.size()),
.num_replacements = static_cast<uint32_t>(mod_context.replacements.size()),
};
vec_put(ret, &sub_header);
for (size_t section_index = 0; section_index < context.sections.size(); section_index++) {
const Section& cur_section = context.sections[section_index];
SectionHeaderV1 section_out {
.file_offset = cur_section.rom_addr,
.vram = cur_section.ram_addr,
.rom_size = cur_section.size,
.bss_size = cur_section.bss_size,
.num_funcs = static_cast<uint32_t>(context.section_functions[section_index].size()),
.num_relocs = static_cast<uint32_t>(cur_section.relocs.size())
};
vec_put(ret, &section_out);
for (size_t func_index : context.section_functions[section_index]) {
const Function& cur_func = context.functions[func_index];
FuncV1 func_out {
.section_offset = cur_func.vram - cur_section.ram_addr,
.size = (uint32_t)(cur_func.words.size() * sizeof(cur_func.words[0]))
};
vec_put(ret, &func_out);
}
for (const Reloc& cur_reloc : cur_section.relocs) {
uint32_t target_section_vrom;
if (cur_reloc.target_section == SectionSelf) {
target_section_vrom = 0;
}
else if (cur_reloc.reference_symbol) {
target_section_vrom = context.reference_sections[cur_reloc.target_section].rom_addr;
}
else {
target_section_vrom = context.sections[cur_reloc.target_section].rom_addr;
}
RelocV1 reloc_out {
.section_offset = cur_reloc.address - cur_section.ram_addr,
.type = static_cast<uint32_t>(cur_reloc.type),
.target_section_offset = cur_reloc.target_section_offset,
.target_section_vrom = target_section_vrom
};
vec_put(ret, &reloc_out);
}
}
for (const FunctionReplacement& cur_replacement : mod_context.replacements) {
uint32_t flags = 0;
if ((cur_replacement.flags & ReplacementFlags::Force) == ReplacementFlags::Force) {
flags |= 0x1;
}
ReplacementV1 replacement_out {
.func_index = cur_replacement.func_index,
.original_section_vrom = cur_replacement.original_section_vrom,
.original_vram = cur_replacement.original_vram,
.flags = flags
};
vec_put(ret, &replacement_out);
};
return ret;
}