Make mod tool emit relocs and patch binary for non-relocatable symbol references as needed

This commit is contained in:
Mr-Wiseguy 2024-07-22 00:01:47 -04:00
parent a7a4134123
commit ab80ff962e
5 changed files with 251 additions and 15 deletions

View file

@ -12,6 +12,7 @@ struct ModConfig {
std::filesystem::path elf_path;
std::filesystem::path func_reference_syms_file_path;
std::vector<std::filesystem::path> data_reference_syms_file_paths;
std::vector<std::filesystem::path> dependency_paths;
};
static std::filesystem::path concat_if_not_empty(const std::filesystem::path& parent, const std::filesystem::path& child) {
@ -21,14 +22,14 @@ static std::filesystem::path concat_if_not_empty(const std::filesystem::path& pa
return child;
}
static std::vector<std::filesystem::path> get_data_syms_paths(const toml::array* data_syms_paths_array, const std::filesystem::path& basedir) {
static std::vector<std::filesystem::path> get_toml_path_array(const toml::array* toml_array, const std::filesystem::path& basedir) {
std::vector<std::filesystem::path> ret;
// Reserve room for all the funcs in the map.
ret.reserve(data_syms_paths_array->size());
data_syms_paths_array->for_each([&ret, &basedir](auto&& el) {
ret.reserve(toml_array->size());
toml_array->for_each([&ret, &basedir](auto&& el) {
if constexpr (toml::is_string<decltype(el)>) {
ret.emplace_back(concat_if_not_empty(basedir, el.template value_exact<std::string>().value()));
ret.emplace_back(concat_if_not_empty(basedir, el.ref<std::string>()));
}
else {
throw toml::parse_error("Invalid type for data reference symbol file entry", el.source());
@ -38,6 +39,80 @@ static std::vector<std::filesystem::path> get_data_syms_paths(const toml::array*
return ret;
}
static bool read_dependency_file(const std::filesystem::path& dependency_path, N64Recomp::Context& context, std::vector<std::string>& import_symbol_mod_ids) {
toml::table toml_data{};
try {
toml_data = toml::parse_file(dependency_path.native());
const auto dependency_data = toml_data["dependency"];
if (!dependency_data.is_array()) {
if (dependency_data) {
throw toml::parse_error("No dependency array found", dependency_data.node()->source());
}
else {
throw toml::parse_error("Invalid dependency array", dependency_data.node()->source());
}
}
toml::array* dependency_array = dependency_data.as_array();
for (const auto& dependency_node : *dependency_array) {
if (!dependency_node.is_table()) {
throw toml::parse_error("Invalid dependency entry", dependency_node.source());
}
// Mod ID
toml::node_view mod_id_node = dependency_node[toml::path{"mod_id"}];
if (!mod_id_node.is_string()) {
if (mod_id_node) {
throw toml::parse_error("Invalid mod id", mod_id_node.node()->source());
}
else {
throw toml::parse_error("Dependency entry is missing mod id", dependency_node.source());
}
}
const std::string& mod_id = mod_id_node.ref<std::string>();
// Symbol list
toml::node_view functions_data = dependency_node[toml::path{"functions"}];
if (functions_data.is_array()) {
const toml::array* functions_array = functions_data.as_array();
for (const auto& function_node : *functions_array) {
if (!function_node.is_string()) {
throw toml::parse_error("Invalid dependency function", function_node.source());
}
const std::string& function_name = function_node.ref<std::string>();
context.reference_symbol_names.emplace_back(function_name);
context.reference_symbols_by_name[function_name] = context.reference_symbols.size();
context.reference_symbols.emplace_back(
N64Recomp::ReferenceSymbol {
.section_index = N64Recomp::SectionImport,
.section_offset = 0,
.is_function = true
}
);
import_symbol_mod_ids.emplace_back(mod_id);
}
}
else {
if (functions_data) {
throw toml::parse_error("Mod toml is missing data reference symbol file list", functions_data.node()->source());
}
else {
throw toml::parse_error("Invalid data reference symbol file list", functions_data.node()->source());
}
}
}
}
catch (const toml::parse_error& err) {
std::cerr << "Syntax error parsing symbol import file: " << *err.source().path << " (" << err.source().begin << "):\n" << err.description() << std::endl;
return false;
}
return true;
}
ModConfig parse_mod_config(const std::filesystem::path& config_path, bool& good) {
ModConfig ret{};
good = false;
@ -90,7 +165,7 @@ ModConfig parse_mod_config(const std::filesystem::path& config_path, bool& good)
toml::node_view data_reference_syms_file_data = config_data["data_reference_syms_files"];
if (data_reference_syms_file_data.is_array()) {
const toml::array* array = data_reference_syms_file_data.as_array();
ret.data_reference_syms_file_paths = get_data_syms_paths(array, basedir);
ret.data_reference_syms_file_paths = get_toml_path_array(array, basedir);
}
else {
if (data_reference_syms_file_data) {
@ -100,6 +175,16 @@ ModConfig parse_mod_config(const std::filesystem::path& config_path, bool& good)
throw toml::parse_error("Invalid data reference symbol file list", data_reference_syms_file_data.node()->source());
}
}
// Imported symbols files (optional)
toml::node_view dependency_data = config_data["dependencies"];
if (dependency_data.is_array()) {
const toml::array* array = dependency_data.as_array();
ret.dependency_paths = get_toml_path_array(array, basedir);
}
else if (dependency_data) {
throw toml::parse_error("Invalid imported symbols file list", dependency_data.node()->source());
}
}
catch (const toml::parse_error& err) {
std::cerr << "Syntax error parsing toml: " << *err.source().path << " (" << err.source().begin << "):\n" << err.description() << std::endl;
@ -110,6 +195,10 @@ ModConfig parse_mod_config(const std::filesystem::path& config_path, bool& good)
return ret;
}
static inline uint32_t round_up_16(uint32_t value) {
return (value + 15) & (~15);
}
N64Recomp::ModContext build_mod_context(const N64Recomp::Context& input_context, bool& good) {
N64Recomp::ModContext ret{};
good = false;
@ -122,10 +211,20 @@ N64Recomp::ModContext build_mod_context(const N64Recomp::Context& input_context,
// Sort the vector based on the rom address of the corresponding section.
std::sort(section_order.begin(), section_order.end(),
[&](uint16_t a, uint16_t b) {
return input_context.sections[a].rom_addr < input_context.sections[b].rom_addr;
const auto& section_a = input_context.sections[a];
const auto& section_b = input_context.sections[b];
// Sort primarily by ROM address.
if (section_a.rom_addr != section_b.rom_addr) {
return section_a.rom_addr < section_b.rom_addr;
}
// Sort secondarily by RAM address.
return section_a.ram_addr < section_b.ram_addr;
}
);
// TODO avoid a copy here.
ret.base_context.rom = input_context.rom;
uint32_t rom_to_ram = (uint32_t)-1;
size_t output_section_index = (size_t)-1;
ret.base_context.sections.resize(1);
@ -135,9 +234,21 @@ N64Recomp::ModContext build_mod_context(const N64Recomp::Context& input_context,
const auto& cur_section = input_context.sections[section_index];
uint32_t cur_rom_to_ram = cur_section.ram_addr - cur_section.rom_addr;
// Stop checking sections once a non-allocated section has been reached.
// Check if this is a non-allocated section.
if (cur_section.rom_addr == (uint32_t)-1) {
break;
// If so, check if it has a vram address directly after the current output section. If it does, then add this
// section's size to the output section's bss size.
if (output_section_index != -1 && cur_section.size != 0) {
auto& section_out = ret.base_context.sections[output_section_index];
uint32_t output_section_bss_start = section_out.ram_addr + section_out.size;
uint32_t output_section_bss_end = output_section_bss_start + section_out.bss_size;
// Check if the current section starts at the end of the output section, allowing for a range of matches to account for 16 byte section alignment.
if (cur_section.ram_addr >= output_section_bss_end && cur_section.ram_addr <= round_up_16(output_section_bss_end)) {
// Calculate the output section's bss size by using its non-bss end address and the current section's end address.
section_out.bss_size = cur_section.ram_addr + cur_section.size - output_section_bss_start;
}
}
continue;
}
// Check if this section matches up with the previous section to merge them together.
@ -178,8 +289,23 @@ N64Recomp::ModContext build_mod_context(const N64Recomp::Context& input_context,
// If this is the patch section, create a replacement for this function.
if (patch_section || force_patch_section) {
// Find the corresponding symbol in the reference symbols.
bool original_func_exists = false;
auto find_sym_it = input_context.reference_symbols_by_name.find(cur_func.name);
// Check if the function was found.
if (find_sym_it == input_context.reference_symbols_by_name.end()) {
original_func_exists = false;
}
// Ignore reference symbols in the import section, as those are imports and not original symbols.
else if (input_context.reference_symbols[find_sym_it->second].section_index == N64Recomp::SectionImport) {
original_func_exists = false;
}
else {
original_func_exists = true;
}
// Check that the function being patched exists in the original reference symbols.
if (!original_func_exists) {
fmt::print("Function {} is marked as a patch but doesn't exist in the original ROM!\n", cur_func.name);
return {};
}
@ -222,10 +348,89 @@ N64Recomp::ModContext build_mod_context(const N64Recomp::Context& input_context,
ret.base_context.functions[output_func_index].words.resize(cur_func.words.size());
}
// TODO relocs (including reference symbols and HI16 and LO16 patching for non-relocatable reference symbols)
// Copy relocs and patch HI16/LO16/26 relocs for non-relocatable reference symbols
section_out.relocs.reserve(cur_section.relocs.size());
for (const auto& cur_reloc : cur_section.relocs) {
// Skip null relocs.
if (cur_reloc.type == N64Recomp::RelocType::R_MIPS_NONE) {
continue;
}
// Reloc to an imported symbol.
if (cur_reloc.reference_symbol && cur_reloc.target_section == N64Recomp::SectionImport) {
// Copy the reloc as-is.
section_out.relocs.emplace_back(cur_reloc);
}
// Reloc to a reference symbol.
else if (cur_reloc.reference_symbol) {
const auto& reloc_section = input_context.reference_sections[cur_reloc.target_section];
// Patch relocations to non-relocatable reference sections.
if (!reloc_section.relocatable) {
uint32_t reloc_target_address = reloc_section.ram_addr + cur_reloc.target_section_offset;
uint32_t reloc_rom_address = cur_reloc.address - cur_section.ram_addr + cur_section.rom_addr;
uint32_t* reloc_word_ptr = reinterpret_cast<uint32_t*>(ret.base_context.rom.data() + reloc_rom_address);
uint32_t reloc_word = byteswap(*reloc_word_ptr);
switch (cur_reloc.type) {
case N64Recomp::RelocType::R_MIPS_32:
// Don't patch MIPS32 relocations, as they've already been patched during elf parsing.
break;
case N64Recomp::RelocType::R_MIPS_NONE:
// Nothing to do.
break;
case N64Recomp::RelocType::R_MIPS_HI16:
reloc_word &= 0xFFFF0000;
reloc_word |= (reloc_target_address - (int16_t)(reloc_target_address & 0xFFFF)) >> 16 & 0xFFFF;
break;
case N64Recomp::RelocType::R_MIPS_LO16:
reloc_word &= 0xFFFF0000;
reloc_word |= reloc_target_address & 0xFFFF;
break;
case N64Recomp::RelocType::R_MIPS_26:
if (reloc_target_address & 0x3) {
fmt::print("R_MIPS_26 reloc at address 0x{:08X} in section {} has a target address not divisible by 4!\n",
cur_reloc.address, cur_section.name);
return {};
}
reloc_word &= 0xFC000000;
reloc_word |= (reloc_target_address >> 2) & 0x3FFFFFF;
break;
default:
fmt::print("Unsupported or unknown relocation type {} in reloc at address 0x{:08X} in section {}!\n",
(int)cur_reloc.type, cur_reloc.address, cur_section.name);
return {};
}
*reloc_word_ptr = byteswap(reloc_word);
}
// Copy relocations to relocatable reference sections as-is.
else {
section_out.relocs.emplace_back(cur_reloc);
}
}
// Reloc to an internal symbol.
else {
const N64Recomp::Section& target_section = input_context.sections[cur_reloc.target_section];
uint32_t target_rom_to_ram = target_section.ram_addr - target_section.rom_addr;
bool is_noload = target_section.rom_addr == (uint32_t)-1;
if (!is_noload && target_rom_to_ram != cur_rom_to_ram) {
fmt::print("Reloc at address 0x{:08X} in section {} points to a different section!\n",
cur_reloc.address, cur_section.name);
return {};
}
uint32_t output_section_offset = cur_reloc.target_section_offset + target_section.ram_addr - cur_section.ram_addr;
section_out.relocs.emplace_back(N64Recomp::Reloc{
.address = cur_reloc.address,
.target_section_offset = output_section_offset,
.symbol_index = 0,
.target_section = N64Recomp::SectionSelf,
.type = cur_reloc.type,
.reference_symbol = false,
});
}
}
// TODO exports
// TODO imports
}
good = true;
@ -262,6 +467,16 @@ int main(int argc, const char** argv) {
context.import_reference_context(reference_context);
}
size_t import_section_symbol_start = context.reference_symbols.size();
std::vector<std::string> import_symbol_mod_ids{};
for (const std::filesystem::path& dependency_path : config.dependency_paths) {
if (!read_dependency_file(dependency_path, context, import_symbol_mod_ids)) {
fmt::print(stderr, "Failed to read dependency file: {}\n", dependency_path.string());
return EXIT_FAILURE;
}
}
for (const std::filesystem::path& cur_data_sym_path : config.data_reference_syms_file_paths) {
if (!context.read_data_reference_syms(cur_data_sym_path)) {
fmt::print(stderr, "Failed to load provided data reference symbol file: {}\n", cur_data_sym_path.string());
@ -301,7 +516,7 @@ int main(int argc, const char** argv) {
output_syms_file.write(reinterpret_cast<const char*>(symbols_bin.data()), symbols_bin.size());
std::ofstream output_binary_file{ config.output_binary_path, std::ios::binary };
output_binary_file.write(reinterpret_cast<const char*>(context.rom.data()), context.rom.size());
output_binary_file.write(reinterpret_cast<const char*>(mod_context.base_context.rom.data()), mod_context.base_context.rom.size());
return EXIT_SUCCESS;
}

View file

@ -59,6 +59,7 @@ namespace N64Recomp {
constexpr uint16_t SectionSelf = (uint16_t)-1;
constexpr uint16_t SectionAbsolute = (uint16_t)-2;
constexpr uint16_t SectionImport = (uint16_t)-3; // Imported symbols for mods
struct Section {
uint32_t rom_addr = 0;
uint32_t ram_addr = 0;

View file

@ -612,6 +612,7 @@ void N64Recomp::Context::import_reference_context(const N64Recomp::Context& refe
for (const N64Recomp::Function& func_in: reference_context.functions) {
const N64Recomp::Section& func_section = reference_context.sections[func_in.section_index];
// TODO Check if reference_symbols_by_name already contains the name and show a conflict error if so.
reference_symbols_by_name.emplace(func_in.name, reference_symbols.size());
reference_symbols.emplace_back(N64Recomp::ReferenceSymbol{
@ -708,6 +709,7 @@ bool N64Recomp::Context::read_data_reference_syms(const std::filesystem::path& d
throw toml::parse_error("Reference data symbol entry is missing required field(s)", data_sym_el.source());
}
// TODO Check if reference_symbols_by_name already contains the name and show a conflict error if so.
this->reference_symbols_by_name.emplace(name.value(), reference_symbols.size());
this->reference_symbols.emplace_back(

View file

@ -390,7 +390,7 @@ ELFIO::section* read_sections(N64Recomp::Context& context, const N64Recomp::ElfP
bool found_rel_symbol = symbol_accessor.get_symbol(
rel_symbol, rel_symbol_name, rel_symbol_value, rel_symbol_size, rel_symbol_bind, rel_symbol_type, rel_symbol_section_index, rel_symbol_other);
uint32_t rel_section_vram = section_out.ram_addr;
uint32_t rel_section_vram = 0;
uint32_t rel_symbol_offset = 0;
// Check if the symbol is undefined and to know whether to look for it in the reference symbols.
@ -412,7 +412,13 @@ ELFIO::section* read_sections(N64Recomp::Context& context, const N64Recomp::ElfP
bool target_section_relocatable = false;
if (reloc_out.target_section != N64Recomp::SectionAbsolute && context.reference_sections[reloc_out.target_section].relocatable) {
if (reloc_out.target_section == N64Recomp::SectionImport) {
target_section_relocatable = true;
}
else if (reloc_out.target_section == N64Recomp::SectionAbsolute) {
target_section_relocatable = false;
}
else if (context.reference_sections[reloc_out.target_section].relocatable) {
target_section_relocatable = true;
}
@ -425,6 +431,7 @@ ELFIO::section* read_sections(N64Recomp::Context& context, const N64Recomp::ElfP
else {
reloc_out.reference_symbol = false;
reloc_out.target_section = rel_symbol_section_index;
rel_section_vram = context.sections[rel_symbol_section_index].ram_addr;
}
// Reloc pairing, see MIPS System V ABI documentation page 4-18 (https://refspecs.linuxfoundation.org/elf/mipsabi.pdf)

View file

@ -8,6 +8,8 @@ struct FileHeader {
struct FileSubHeaderV1 {
uint32_t num_sections;
uint32_t num_replacements;
uint32_t num_exports;
uint32_t num_imports;
};
struct SectionHeaderV1 {
@ -38,6 +40,9 @@ struct ReplacementV1 {
uint32_t flags; // force
};
constexpr uint32_t SectionSelfVromV1 = 0xFFFFFFFF;
constexpr uint32_t SectionImportVromV1 = 0xFFFFFFFE;
template <typename T>
const T* reinterpret_data(std::span<const char> data, size_t& offset, size_t count = 1) {
if (offset + (sizeof(T) * count) > data.size()) {
@ -127,9 +132,12 @@ bool parse_v1(std::span<const char> data, const std::unordered_map<uint32_t, uin
cur_reloc.type = static_cast<N64Recomp::RelocType>(relocs[reloc_index].type);
cur_reloc.target_section_offset = relocs[reloc_index].target_section_offset;
uint32_t target_section_vrom = relocs[reloc_index].target_section_vrom;
if (target_section_vrom == 0) {
if (target_section_vrom == SectionSelfVromV1) {
cur_reloc.target_section = N64Recomp::SectionSelf;
}
else if (target_section_vrom == SectionImportVromV1) {
cur_reloc.target_section = N64Recomp::SectionImport;
}
else {
// TODO lookup by section index by original vrom
auto find_section_it = sections_by_vrom.find(target_section_vrom);
@ -259,7 +267,10 @@ std::vector<uint8_t> N64Recomp::symbols_to_bin_v1(const N64Recomp::ModContext& m
for (const Reloc& cur_reloc : cur_section.relocs) {
uint32_t target_section_vrom;
if (cur_reloc.target_section == SectionSelf) {
target_section_vrom = 0;
target_section_vrom = SectionSelfVromV1;
}
else if (cur_reloc.target_section == SectionImport) {
target_section_vrom = SectionImportVromV1;
}
else if (cur_reloc.reference_symbol) {
target_section_vrom = context.reference_sections[cur_reloc.target_section].rom_addr;