mirror of
				https://github.com/hedge-dev/XenonRecomp.git
				synced 2025-10-30 07:11:38 +00:00 
			
		
		
		
	Compare commits
	
		
			10 commits
		
	
	
		
			f2a92cf8fc
			...
			8247910df0
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|   | 8247910df0 | ||
|   | 8fc280bed9 | ||
|   | 4452868029 | ||
|   | 9a4dc311c7 | ||
|   | 444ee2bda1 | ||
|   | 2365f4d697 | ||
|   | 21f1a81aa3 | ||
|   | 6dbbc6ea14 | ||
|   | fe3fdbdda5 | ||
|   | b18a1a6206 | 
					 2 changed files with 303 additions and 0 deletions
				
			
		
							
								
								
									
										297
									
								
								Auto_Function_Parser.py
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										297
									
								
								Auto_Function_Parser.py
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,297 @@ | |||
| ## | ||||
| ## Searches for functions in .text that are referenced by functions in .pdata | ||||
| ## | ||||
| ## Input:  | ||||
| ## Decompiled code - Created in IDA Pro 9.0SP1 with File -> Produce File -> Create HTML File... | ||||
| ## CLI output from a XenonRecomp run - When trying to compile with XenonRecomp, use > to save the output from the terminal | ||||
| ## | ||||
| ## Output:  | ||||
| ## XenonRecomp config - Function block for TOML to be inputted into XenonRecomp  | ||||
| ## | ||||
| 
 | ||||
| import sys | ||||
| import re | ||||
| 
 | ||||
| # Check if correct number of input arguments were given | ||||
| if len(sys.argv) != 4: | ||||
|     sys.exit("Auto_Function_Parser.py [IDA HTML] [XenonRecomp log] [Output TOML]") | ||||
| 
 | ||||
| # Filepath input arguments | ||||
| ida_html = sys.argv[1] | ||||
| xenonrecomp_log = sys.argv[2] | ||||
| output_file = sys.argv[3] | ||||
| 
 | ||||
| # Disable extra debug output  | ||||
| debug = False | ||||
| 
 | ||||
| ## | ||||
| ## Parse XenonRecomp log | ||||
| ## | ||||
| 
 | ||||
| # The starting index of the erroneous switch statement address in the XenonRecomp log | ||||
| switch_idx = 22 | ||||
| 
 | ||||
| # Initialize list to store erroneous switch statement addresses | ||||
| switch_addrs = [] | ||||
|      | ||||
| print("Parsing XenonRecomp log...") | ||||
| # Import each line of XenonRecomp log | ||||
| with open(xenonrecomp_log, 'r') as file: | ||||
|     # Read each line in the file | ||||
|     for line in file:  | ||||
|         # If this line describes an error, it has the address of a problematic switch statement | ||||
|         if re.search('ERROR: Switch case at ', line): | ||||
|             # Save the address as integer | ||||
|             switch_addrs.append(line[switch_idx:switch_idx+8]) | ||||
| 
 | ||||
| # Save only unique addresses and sort | ||||
| switch_addrs = set(switch_addrs) | ||||
| 
 | ||||
| ## | ||||
| ## Parse IDA HTML | ||||
| ## | ||||
| 
 | ||||
| # Initialize list to store start and end of functions  | ||||
| functs = [] | ||||
| 
 | ||||
| # Count how many functions have been added | ||||
| num_functs = 0 | ||||
| 
 | ||||
| # Function for adding to function list and incrementing count | ||||
| def add_function(new_start_addr, prev_end_addr, start_type): | ||||
|     global num_functs | ||||
|     # If an end address for the last added function was specified | ||||
|     if prev_end_addr != None: | ||||
|         # Set end address for last added function | ||||
|         functs[num_functs-1][1] = prev_end_addr | ||||
|     # Add a new function to the list with the specified starting address | ||||
|     functs.append([new_start_addr, 0, [], start_type]) | ||||
|     # Increment the number of functions | ||||
|     num_functs = num_functs+1 | ||||
| 
 | ||||
| # Mark if we are in .text section | ||||
| in_text = False | ||||
| 
 | ||||
| # Mark if we should end parsing | ||||
| end_parse = False | ||||
| 
 | ||||
| # Initialize address of last bctr instruction to 0 | ||||
| bctr_addr = '00000000' | ||||
| 
 | ||||
| # Initialize address of last blr instruction to 0 | ||||
| blr_addr = '00000000' | ||||
| 
 | ||||
| # Initialize address of last 'End of function' comment to 0 | ||||
| eof_addr = '00000000' | ||||
| 
 | ||||
| # Initialize address of last restgprlr instruction to 0 | ||||
| restgprlr_addr = '00000000' | ||||
| 
 | ||||
| # Initialize address of last padding to 0 | ||||
| pad_addr = 0 | ||||
| 
 | ||||
| # Import each line of decompiled code | ||||
| print("Parsing IDA HTML...") | ||||
| with open(ida_html, 'r') as file: | ||||
|     # Read each line in the file | ||||
|     for line in file: | ||||
|         if not end_parse: | ||||
|             # If in .text | ||||
|             if in_text: | ||||
|                 # Get the current address | ||||
|                 colon_idx = line.find(':') | ||||
|                 curr_addr = line[colon_idx+1:colon_idx+9] | ||||
| 
 | ||||
|                 # Check if this is the start of a function | ||||
|                 if re.search(r'\.text:'+curr_addr+' </span><span class="c[0-9]*">sub_'+curr_addr+'</span><span class="c[0-9]*">: *</span><span class="c[0-9]*"># [A-Z][A-Z][A-Z][A-Z] XREF:', line): | ||||
|                     # Save current address as integer | ||||
|                     curr_addr_int = int(curr_addr, 16) | ||||
| 
 | ||||
|                     # If this is not the first function being added | ||||
|                     if num_functs > 0: | ||||
|                         # If last address had padding or restgprlr instruction, then this function was already added | ||||
|                         if curr_addr_int-4 == pad_addr or curr_addr_int-4 == restgprlr_addr: | ||||
|                             # Set function type for start address | ||||
|                             functs[num_functs-1][3] = 'sub' | ||||
|                         else: | ||||
|                             # Check if this function is part of latest added function | ||||
|                             is_nested_funct = False | ||||
|                             nested_functs = functs[num_functs-1][2] | ||||
|                             for nested_funct in nested_functs: | ||||
|                                 if nested_funct == curr_addr: | ||||
|                                     is_nested_funct = True | ||||
|                                     break | ||||
|                              | ||||
|                             # If last address was not padding and not nested in latest function | ||||
|                             if not is_nested_funct: | ||||
|                                 # Add new function and last function's end address | ||||
|                                 add_function(curr_addr_int, curr_addr_int, 'sub') | ||||
|                     else: | ||||
|                         # Add new function | ||||
|                         add_function(curr_addr_int, None, 'sub') | ||||
| 
 | ||||
|                 # If this is a location | ||||
|                 elif re.search(r'^\.text:'+curr_addr+' </span><span class="c[0-9]*">loc_'+curr_addr, line): | ||||
|                     curr_addr_int = int(curr_addr, 16) | ||||
|                     curr_funct = functs[num_functs-1] | ||||
|                     # If previous address was a blr instruction | ||||
|                     if curr_addr_int-4 == int(blr_addr, 16): | ||||
|                         # If previous address had an 'End of function' comment or if there was a bctr with the comment | ||||
|                         if blr_addr == eof_addr or bctr_addr == eof_addr: | ||||
|                             # Find a XREF pointing to a .text address | ||||
|                             xref_idx = line.find('XREF: .text:') | ||||
|                             if xref_idx > -1: | ||||
|                                 underscore_idx = line.find('_', xref_idx) | ||||
|                                 if underscore_idx > -1: | ||||
|                                     xref = line[underscore_idx+1:underscore_idx+9] | ||||
|                                 else: | ||||
|                                     xref = line[xref_idx+12:xref_idx+20] | ||||
|                             else: | ||||
|                                 xref = None | ||||
| 
 | ||||
|                             # Couldn't find XREF pointing to .text address or the XREF is after this address | ||||
|                             if xref == None or int(xref, 16) > curr_addr_int: | ||||
|                                 # Add as new function | ||||
|                                 add_function(curr_addr_int, curr_addr_int, 'loc') | ||||
| 
 | ||||
|                     else: | ||||
|                         # Find address of function that references this | ||||
|                         xref_idx = line.find('CODE XREF: sub_') | ||||
|                         # If it was found | ||||
|                         if xref_idx > -1: | ||||
|                             # Store as nested function in latest function | ||||
|                             functs[num_functs-1][2].append(line[xref_idx+15:xref_idx+23]) | ||||
| 
 | ||||
|                 # Check if this line is padding | ||||
|                 elif num_functs > 0 and re.search(r'<span class="c[0-9]*">\.long </span><span class="c[0-9]*">0$', line): | ||||
|                     # Convert current address to integer  | ||||
|                     curr_addr_int = int(curr_addr, 16) | ||||
| 
 | ||||
|                     # Add a new function at the line after padding, and end the current function at this padding address | ||||
|                     add_function(curr_addr_int+4, curr_addr_int, None) | ||||
|                      | ||||
|                     # Save padding address | ||||
|                     pad_addr = curr_addr_int | ||||
| 
 | ||||
|                 # Check for blr instruction | ||||
|                 elif re.search('<span class="c[0-9]*">blr$', line): | ||||
|                     blr_addr = curr_addr | ||||
| 
 | ||||
|                 # Check for 'End of function' comment | ||||
|                 elif re.search('End of function ', line): | ||||
|                     eof_addr = curr_addr | ||||
|      | ||||
|                 # Check for bctr instruction | ||||
|                 elif re.search('<span class="c[0-9]*">bctr$', line): | ||||
|                     bctr_addr = curr_addr | ||||
| 
 | ||||
|                 # Check for restgprlr instruction | ||||
|                 elif re.search('<span class="c[0-9]*">b         </span><span class="c[0-9]*">__restgprlr_[0-9][0-9]$', line): | ||||
|                     # Convert current address to integer  | ||||
|                     curr_addr_int = int(curr_addr, 16) | ||||
| 
 | ||||
|                     # Add a new function at the line after restgprlr instruction, and end the current function at this address | ||||
|                     add_function(curr_addr_int+4, curr_addr_int, None) | ||||
|                      | ||||
|                     restgprlr_addr = curr_addr_int | ||||
| 
 | ||||
|             # If not in .text | ||||
|             else: | ||||
|                 # If .text section header found | ||||
|                 if re.search(r'<span class="c[0-9]*">\.section "\.text"', line): | ||||
|                     in_text = True | ||||
| 
 | ||||
| ## | ||||
| ## Find .text functions that are referenced by .pdata functions | ||||
| ## | ||||
| 
 | ||||
| # Initialize list for functions that need to be added to toml | ||||
| output_functs = [] | ||||
| 
 | ||||
| # Look for related functions for every unique errored switch statement | ||||
| print("Searching for needed functions...") | ||||
| for switch_addr in switch_addrs: | ||||
|     # Start looking at first subroutine | ||||
|     curr_funct_idx = 0 | ||||
| 
 | ||||
|     # Save current switch statement address as integer | ||||
|     switch_addr_int = int(switch_addr, 16) | ||||
| 
 | ||||
|     # The related function for this switch statement has not been found yet | ||||
|     search_for_funct = True | ||||
| 
 | ||||
|     # Start search for function relating to switch statement | ||||
|     while(search_for_funct): | ||||
|         curr_funct = functs[curr_funct_idx] | ||||
|         # If switch address is after this function's start | ||||
|         curr_funct_start = curr_funct[0] | ||||
|         if(switch_addr_int > curr_funct_start): | ||||
|             # If switch address is before this function's end | ||||
|             curr_funct_end = curr_funct[1] | ||||
|             if(switch_addr_int <= curr_funct_end): | ||||
|                 # Save current function's start address and the function's length | ||||
|                 if debug: | ||||
|                     output_functs.append([hex(curr_funct_start), hex(curr_funct_end-curr_funct_start), switch_addr]) | ||||
|                 else: | ||||
|                     output_functs.append([hex(curr_funct_start), hex(curr_funct_end-curr_funct_start)]) | ||||
|              | ||||
|                 # Don't need to continue search for this switch statement | ||||
|                 search_for_funct = False | ||||
| 
 | ||||
|             # Look in next function | ||||
|             curr_funct_idx = curr_funct_idx + 1 | ||||
| 
 | ||||
|         # Related function was not found | ||||
|         else: | ||||
|             print(f"WARNING: Function relating to {switch_addr} not found! Skipping.") | ||||
|             # Don't need to continue search for this switch statement | ||||
|             search_for_funct = False | ||||
| 
 | ||||
| # Remove duplicates | ||||
| if not debug:  | ||||
|     output_functs = list(set(tuple(funct) for funct in output_functs)) | ||||
| 
 | ||||
| # Make sure there are no functions with the same starting address but different lengths | ||||
| if not debug: | ||||
|     for i in range(len(output_functs)): | ||||
|         for j in range(i+1, len(output_functs)): | ||||
|             curr_funct_start = output_functs[i][0] | ||||
|             if curr_funct_start == output_functs[j][0]: | ||||
|                 print(f"WARNING: {curr_funct_start} has multiple entries of different lengths, manually find correct one.") | ||||
| 
 | ||||
| print(f"{len(output_functs)} functions found!") | ||||
| 
 | ||||
| ## | ||||
| ## Output all found functions to TOML in correct format | ||||
| ## | ||||
| 
 | ||||
| # Create formatted string to export to TOML | ||||
| output_str = "functions = [" | ||||
| 
 | ||||
| # Append all function addresses and lengths to formatted string | ||||
| print("Outputting to formatted file...") | ||||
| for funct in output_functs: | ||||
|     # Format hex to uppercase  | ||||
|     curr_funct_start = '0x'+funct[0][2:].upper() | ||||
|     curr_funct_end = '0x'+funct[1][2:].upper() | ||||
| 
 | ||||
|     # Format function  | ||||
|     curr_funct = "\n    { address = "+curr_funct_start+", size = "+curr_funct_end | ||||
|     if debug: | ||||
|         curr_funct = curr_funct+", src = "+funct[2] | ||||
|     curr_funct = curr_funct+" }," | ||||
| 
 | ||||
|     # Add to complete output string | ||||
|     output_str = output_str+curr_funct | ||||
| 
 | ||||
| # Delete last comma | ||||
| output_str = output_str[:len(output_str)-1] | ||||
| 
 | ||||
| # Add last bracket | ||||
| output_str = output_str+"\n]" | ||||
| 
 | ||||
| # Output to file | ||||
| with open(output_file, "w") as file: | ||||
|     file.write(output_str) | ||||
| 
 | ||||
| 
 | ||||
|  | @ -190,6 +190,12 @@ functions = [ | |||
| 
 | ||||
| You can define function boundaries explicitly using the `functions` property if XenonAnalyse fails to analyze them correctly, for example, with functions containing jump tables. | ||||
| 
 | ||||
| You can automatically generate these using the FunctionParser.py script. You will need to create a HTML of your decompiled XEX with IDA using `File -> Produce File -> Create HTML File...` and save the terminal output from running XenonRecomp by appending `> [output log file path]` to the command. | ||||
| 
 | ||||
| ``` | ||||
| python3 FunctionParser.py [input IDA HTML file path] [input XenonRecomp log file path] [output function list file path] | ||||
| ``` | ||||
| 
 | ||||
| #### Invalid Instruction Skips | ||||
| 
 | ||||
| ```toml | ||||
|  |  | |||
		Loading…
	
	Add table
		
		Reference in a new issue