From b18a1a62067ed877f26f699eaf1fa4836e7831b2 Mon Sep 17 00:00:00 2001
From: JillianTo <jtoschool388@gmail.com>
Date: Sat, 8 Mar 2025 12:47:45 -0500
Subject: [PATCH 1/9] mostly correct

---
 parser.py | 221 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 221 insertions(+)
 create mode 100644 parser.py
diff --git a/parser.py b/parser.py
new file mode 100644
index 0000000..907b3bc
--- /dev/null
+++ b/parser.py
@@ -0,0 +1,221 @@
+##
+## Searches for functions in .text that are referenced by functions in .pdata
+##
+## Input: 
+## Decompiled code - Created in IDA Pro 9.0SP1 with File -> Produce File -> Create HTML File...
+## CLI output from a XenonRecomp run - When trying to compile with XenonRecomp, use > to save the output from the terminal
+##
+## Output: 
+## XenonRecomp config - Function block for TOML to be inputted into XenonRecomp 
+##
+
+import sys
+import re
+
+# Check if correct number of input arguments were given
+if len(sys.argv) != 4:
+    sys.exit("parser.py [IDA HTML] [XenonRecomp log] [Output TOML]")
+
+# Filepath input arguments
+ida_html = sys.argv[1]
+xenonrecomp_log = sys.argv[2]
+output_file = sys.argv[3]
+
+##
+## Parse XenonRecomp log
+##
+
+# The starting index of the erroneous switch statement address in the XenonRecomp log
+switch_idx = 22
+
+# Initialize list to store erroneous switch statement addresses
+switch_addrs = []
+    
+print("Parsing XenonRecomp log...")
+# Import each line of XenonRecomp log
+with open(xenonrecomp_log, 'r') as file:
+    # Read each line in the file
+    for line in file: 
+        # If this line describes an error, it has the address of a problematic switch statement
+        if re.search('ERROR', line) != None:
+            # Save the address as integer
+            switch_addrs.append(line[switch_idx:switch_idx+8])
+
+# Save only unique addresses and sort
+switch_addrs = set(switch_addrs)
+
+##
+## Parse IDA HTML
+##
+
+# See if current function is referenced by the inputted comparison address 
+def compare_xref_addr(line, compare_addr):
+    # Get the address of the referencing function
+    xref_idx = line.find('CODE XREF: sub_')
+    # If there is not a referencing function or it is in a different file, this doesn't need to be verified
+    if xref_idx == -1:
+        return True
+    else:
+        xref = line[xref_idx+15:xref_idx+23]
+
+    # Check equality between XREF address and comparison address
+    return xref == compare_addr
+
+# Initialize list to store start and end of functions 
+functs = []
+
+# Count how many functions have been added
+num_functs = 0
+
+# Mark if we are in .text section
+in_text = False
+
+# Mark if we should end parsing
+end_parse = False
+
+# Initialize address of last padding to 0
+pad_addr = '00000000'
+
+# Import each line of decompiled code
+print("Parsing IDA HTML...")
+with open(ida_html, 'r') as file:
+    # Read each line in the file
+    for line in file:
+        if not end_parse:
+            # If in .text
+            if in_text:
+                # Get the current address
+                colon_idx = line.find(':')
+                curr_addr = line[colon_idx+1:colon_idx+9]
+
+                # Check if this is the start of a function
+                if re.search('^\.text:'+curr_addr+' </s pan><span class="c[0-9]*">sub_'+curr_addr, line):
+                    # Check if this is a new function and not part of a switch
+                    if num_functs > 0:
+                        # If the referencing function is not the last added function, then it is not part of a switch
+                        equal_xref = compare_xref_addr(line, functs[num_functs-1][0])
+                        if equal_xref:
+                            # Add this address as a new function
+                            functs.append([curr_addr, 0])
+                            num_functs = num_functs+1
+                            # Convert addresses to integer for comparison
+                            curr_addr_int = int(curr_addr, 16)
+                            pad_addr_int = int(pad_addr, 16)
+                            # If previous address was padding, end last function at the padding
+                            if curr_addr_int-4 == pad_addr_int:
+                                functs[num_functs-2][1] = pad_addr_int
+                            # Else, end last function as this address
+                            else:
+                                functs[num_functs-2][1] = curr_addr_int
+
+                    # If this is the first function to be added, don't need to check if it is part of a switch
+                    else:
+                        # Add this address as a new function
+                        functs.append([curr_addr, 0])
+                        num_functs = num_functs+1
+
+                # If this is not the start of a function
+                else:
+                    # Check if it is a nested loc_ or def_
+                    if re.search('^\.text:'+curr_addr+' </span><span class="c[0-9]*">[ld][oe][cf]_'+curr_addr, line):
+                        # If the referencing function is not the last added function, then it is not part of a switch
+                        if not compare_xref_addr(line, functs[num_functs-1][0]):
+                            # Add this address as a new function
+                            functs.append([curr_addr, 0])
+                            num_functs = num_functs+1
+                            # Convert addresses to integer for comparison
+                            curr_addr_int = int(curr_addr, 16)
+                            pad_addr_int = int(pad_addr, 16)
+                            # If previous address was padding, end last function at the padding
+                            if curr_addr_int-4 == pad_addr_int:
+                                functs[num_functs-2][1] = pad_addr_int
+                            # End the last function at the previous address
+                            else:
+                                functs[num_functs-2][1] = curr_addr_int
+                    
+                    # Check if this line is padding
+                    elif re.search('<span class="c[0-9]*">\.long </span><span class="c[0-9]*">0$', line):
+                        # Save address of most recently found padding
+                        pad_addr = curr_addr
+
+                    # Check if we are still in .text
+                    elif re.search('\.text:', line) == None:
+                        # If not, end parsing
+                            end_parse = True
+
+            # If not in .text
+            else:
+                # If .text section header found
+                if re.search('<span class="c[0-9]*">\.section &quot;\.text&quot;', line) != None:
+                    in_text = True
+
+##
+## Find .text functions that are referenced by .pdata functions
+##
+
+# Initialize list for functions that need to be added to toml
+output_functs = []
+
+# Look for related functions for every unique errored switch statement
+print("Searching for needed functions...")
+for switch_addr in switch_addrs:
+    # Start looking at first subroutine
+    curr_funct_idx = 0
+
+    # Save current switch statement address as integer
+    switch_addr_int = int(switch_addr, 16)
+
+    # The related function for this switch statement has not been found yet
+    search_for_funct = True
+
+    # Start search for function relating to switch statement
+    while(search_for_funct):
+        curr_funct = functs[curr_funct_idx]
+        # If switch address is after this function's start
+        curr_funct_start = int(curr_funct[0], 16)
+        if(switch_addr_int > curr_funct_start):
+            # If switch address is before this function's end
+            curr_funct_end = curr_funct[1]
+            if(switch_addr_int <= curr_funct_end):
+                # Save current function's start address and the function's length
+                output_functs.append([hex(curr_funct_start), hex(curr_funct_end-curr_funct_start)])
+                # Don't need to continue search for this switch statement
+                search_for_funct = False
+
+            # Look in next function
+            curr_funct_idx = curr_funct_idx + 1
+
+        # Related function was not found
+        else:
+            print(f"WARNING: Function relating to {switch_addr} not found")
+            # Don't need to continue search for this switch statement
+            search_for_funct = False
+
+print(f"{len(output_functs)} functions found!")                
+
+# Create formatted string to export to TOML
+output_str = "functions = ["
+
+# Append all function addresses and lengths to formatted string
+for funct in output_functs:
+    # Format hex to uppercase 
+    curr_funct_start = '0x'+funct[0][2:].upper()
+    curr_funct_end = '0x'+funct[1][2:].upper()
+
+    # Format function 
+    curr_funct = "\n    { address = "+curr_funct_start+", size = "+curr_funct_end+" },"
+
+    # Add to complete output string
+    output_str = output_str+curr_funct
+
+# Delete last comma
+output_str = output_str[:len(output_str)-1]
+
+# Add last bracket
+output_str = output_str+"\n]"
+
+# Output to file
+with open(output_file, "w") as file:
+    file.write(output_str)
+
+

From fe3fdbdda5a5f5051f43c4743aa576997b666636 Mon Sep 17 00:00:00 2001
From: JillianTo <jtoschool388@gmail.com>
Date: Sat, 8 Mar 2025 13:00:48 -0500
Subject: [PATCH 2/9] fixed typo

---
 parser.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/parser.py b/parser.py
index 907b3bc..8577788 100644
--- a/parser.py
+++ b/parser.py
@@ -89,12 +89,11 @@ with open(ida_html, 'r') as file:
                 curr_addr = line[colon_idx+1:colon_idx+9]
 
                 # Check if this is the start of a function
-                if re.search('^\.text:'+curr_addr+' </s pan><span class="c[0-9]*">sub_'+curr_addr, line):
+                if re.search('^\.text:'+curr_addr+' </span><span class="c[0-9]*">sub_'+curr_addr, line):
                     # Check if this is a new function and not part of a switch
                     if num_functs > 0:
                         # If the referencing function is not the last added function, then it is not part of a switch
-                        equal_xref = compare_xref_addr(line, functs[num_functs-1][0])
-                        if equal_xref:
+                        if not compare_xref_addr(line, functs[num_functs-1][0]):
                             # Add this address as a new function
                             functs.append([curr_addr, 0])
                             num_functs = num_functs+1

From 6dbbc6ea147517d96fb551f8210f1acaf9d8db20 Mon Sep 17 00:00:00 2001
From: JillianTo <jtoschool388@gmail.com>
Date: Sat, 8 Mar 2025 21:22:26 -0500
Subject: [PATCH 3/9] only gets address/size of 0x82893088 and size of
 0x82CF7080 wrong

---
 parser.py | 126 +++++++++++++++++++++++++++---------------------------
 1 file changed, 62 insertions(+), 64 deletions(-)

diff --git a/parser.py b/parser.py
index 8577788..7297e38 100644
--- a/parser.py
+++ b/parser.py
@@ -48,31 +48,33 @@ switch_addrs = set(switch_addrs)
 ## Parse IDA HTML
 ##
 
-# See if current function is referenced by the inputted comparison address 
-def compare_xref_addr(line, compare_addr):
-    # Get the address of the referencing function
-    xref_idx = line.find('CODE XREF: sub_')
-    # If there is not a referencing function or it is in a different file, this doesn't need to be verified
-    if xref_idx == -1:
-        return True
-    else:
-        xref = line[xref_idx+15:xref_idx+23]
-
-    # Check equality between XREF address and comparison address
-    return xref == compare_addr
-
 # Initialize list to store start and end of functions 
 functs = []
 
 # Count how many functions have been added
 num_functs = 0
 
+# Function for adding to function list and incrementing count
+def add_function(new_start_addr, prev_end_addr):
+    global num_functs
+    # If an end address for the last added function was specified
+    if prev_end_addr != None:
+        # Set end address for last added function
+        functs[num_functs-1][1] = prev_end_addr
+    # Add a new function to the list with the specified starting address
+    functs.append([new_start_addr, 0, []])
+    # Increment the number of functions
+    num_functs = num_functs+1
+
 # Mark if we are in .text section
 in_text = False
 
 # Mark if we should end parsing
 end_parse = False
 
+# Initialize address of last blr instruction to 0
+blr_addr = '00000000'
+
 # Initialize address of last padding to 0
 pad_addr = '00000000'
 
@@ -89,58 +91,54 @@ with open(ida_html, 'r') as file:
                 curr_addr = line[colon_idx+1:colon_idx+9]
 
                 # Check if this is the start of a function
-                if re.search('^\.text:'+curr_addr+' </span><span class="c[0-9]*">sub_'+curr_addr, line):
-                    # Check if this is a new function and not part of a switch
+                if re.search('^\.text:'+curr_addr+' </span><span class="c[0-9]*">sub_'+curr_addr+'</span><span class="c[0-9]*">: *</span><span class="c[0-9]*"># [A-Z][A-Z][A-Z][A-Z] XREF:.*', line):
+                    # Save current address as integer
+                    curr_addr_int = int(curr_addr, 16)
+
                     if num_functs > 0:
-                        # If the referencing function is not the last added function, then it is not part of a switch
-                        if not compare_xref_addr(line, functs[num_functs-1][0]):
-                            # Add this address as a new function
-                            functs.append([curr_addr, 0])
-                            num_functs = num_functs+1
-                            # Convert addresses to integer for comparison
-                            curr_addr_int = int(curr_addr, 16)
-                            pad_addr_int = int(pad_addr, 16)
-                            # If previous address was padding, end last function at the padding
-                            if curr_addr_int-4 == pad_addr_int:
-                                functs[num_functs-2][1] = pad_addr_int
-                            # Else, end last function as this address
-                            else:
-                                functs[num_functs-2][1] = curr_addr_int
-
-                    # If this is the first function to be added, don't need to check if it is part of a switch
+                        # If last address had padding, then this function was already added
+                        if not curr_addr_int-4 == int(pad_addr, 16):
+                            # Check if this function is part of latest added function
+                            is_nested_funct = False
+                            nested_functs = functs[num_functs-1][2]
+                            for nested_funct in nested_functs:
+                                is_nested_funct = nested_funct==curr_addr
+                            
+                            # If last address was not padding and not nested in latest function
+                            if not is_nested_funct:
+                                # If this is not the first function being added
+                                if num_functs > 0:
+                                    # Add new function and last function's end address
+                                    add_function(curr_addr_int, curr_addr_int)
                     else:
-                        # Add this address as a new function
-                        functs.append([curr_addr, 0])
-                        num_functs = num_functs+1
+                        # Add new function
+                        add_function(curr_addr_int, None)
 
-                # If this is not the start of a function
-                else:
-                    # Check if it is a nested loc_ or def_
-                    if re.search('^\.text:'+curr_addr+' </span><span class="c[0-9]*">[ld][oe][cf]_'+curr_addr, line):
-                        # If the referencing function is not the last added function, then it is not part of a switch
-                        if not compare_xref_addr(line, functs[num_functs-1][0]):
-                            # Add this address as a new function
-                            functs.append([curr_addr, 0])
-                            num_functs = num_functs+1
-                            # Convert addresses to integer for comparison
-                            curr_addr_int = int(curr_addr, 16)
-                            pad_addr_int = int(pad_addr, 16)
-                            # If previous address was padding, end last function at the padding
-                            if curr_addr_int-4 == pad_addr_int:
-                                functs[num_functs-2][1] = pad_addr_int
-                            # End the last function at the previous address
-                            else:
-                                functs[num_functs-2][1] = curr_addr_int
-                    
-                    # Check if this line is padding
-                    elif re.search('<span class="c[0-9]*">\.long </span><span class="c[0-9]*">0$', line):
-                        # Save address of most recently found padding
-                        pad_addr = curr_addr
+                # If this is a location
+                elif re.search('^\.text:'+curr_addr+' </span><span class="c[0-9]*">loc_'+curr_addr, line):
+                    curr_addr_int = int(curr_addr, 16)
+                    # If previous address was a blr instruction
+                    if curr_addr_int-4 == blr_addr:
+                        print(curr_addr)
+                        add_function(curr_addr_int, curr_addr_int)
+                    # If not, store as nested function in latest function
+                    else:
+                        # Find address of function that references this
+                        xref_idx = line.find('XREF: sub_')
+                        # If it was found
+                        if xref_idx > -1:
+                            # Store as nested function in latest function
+                            functs[num_functs-1][2].append(line[xref_idx+10:xref_idx+18])
 
-                    # Check if we are still in .text
-                    elif re.search('\.text:', line) == None:
-                        # If not, end parsing
-                            end_parse = True
+                # Check if this line is padding
+                elif num_functs > 0 and re.search('<span class="c[0-9]*">\.long </span><span class="c[0-9]*">0$', line):
+                    curr_addr_int = int(curr_addr, 16)
+                    # Add a new function at the line after padding, and end the current function at this padding address
+                    add_function(curr_addr_int+4, curr_addr_int)
+
+                # Check for blr instruction
+                elif re.search('<span class="c[0-9]*">blr', line):
+                    blr_addr = curr_addr 
 
             # If not in .text
             else:
@@ -171,13 +169,13 @@ for switch_addr in switch_addrs:
     while(search_for_funct):
         curr_funct = functs[curr_funct_idx]
         # If switch address is after this function's start
-        curr_funct_start = int(curr_funct[0], 16)
+        curr_funct_start = curr_funct[0]
         if(switch_addr_int > curr_funct_start):
             # If switch address is before this function's end
             curr_funct_end = curr_funct[1]
             if(switch_addr_int <= curr_funct_end):
                 # Save current function's start address and the function's length
-                output_functs.append([hex(curr_funct_start), hex(curr_funct_end-curr_funct_start)])
+                output_functs.append([hex(curr_funct_start), hex(curr_funct_end-curr_funct_start), switch_addr])
                 # Don't need to continue search for this switch statement
                 search_for_funct = False
 
@@ -202,7 +200,7 @@ for funct in output_functs:
     curr_funct_end = '0x'+funct[1][2:].upper()
 
     # Format function 
-    curr_funct = "\n    { address = "+curr_funct_start+", size = "+curr_funct_end+" },"
+    curr_funct = "\n    { address = "+curr_funct_start+", size = "+curr_funct_end+" src = "+funct[2]+" },"
 
     # Add to complete output string
     output_str = output_str+curr_funct

From 21f1a81aa30520d71d35b54d55100f68e87da8cd Mon Sep 17 00:00:00 2001
From: JillianTo <jtoschool388@gmail.com>
Date: Sun, 9 Mar 2025 12:53:48 -0400
Subject: [PATCH 4/9] missing 3 functions, 0x82F08730 starts at wrong address

---
 README.md |   6 +++
 parser.py | 120 +++++++++++++++++++++++++++++++++++++++++++++---------
 2 files changed, 107 insertions(+), 19 deletions(-)

diff --git a/README.md b/README.md
index 01d2542..b13285e 100644
--- a/README.md
+++ b/README.md
@@ -188,6 +188,12 @@ functions = [
 
 You can define function boundaries explicitly using the `functions` property if XenonAnalyse fails to analyze them correctly, for example, with functions containing jump tables.
 
+You can automatically generate these using the FunctionParser.py script. You will need to create a HTML of your decompiled XEX with IDA using `File -> Produce File -> Create HTML File...` and save the terminal output from running XenonRecomp by appending `> [output log file path]` to the command.
+
+```
+python3 FunctionParser.py [input IDA HTML file path] [input XenonRecomp log file path] [output function list file path]
+```
+
 #### Invalid Instruction Skips
 
 ```toml
diff --git a/parser.py b/parser.py
index 7297e38..d3699b6 100644
--- a/parser.py
+++ b/parser.py
@@ -21,6 +21,9 @@ ida_html = sys.argv[1]
 xenonrecomp_log = sys.argv[2]
 output_file = sys.argv[3]
 
+# Disable extra debug output 
+debug = False
+
 ##
 ## Parse XenonRecomp log
 ##
@@ -37,7 +40,7 @@ with open(xenonrecomp_log, 'r') as file:
     # Read each line in the file
     for line in file: 
         # If this line describes an error, it has the address of a problematic switch statement
-        if re.search('ERROR', line) != None:
+        if re.search('ERROR: Switch case at ', line) != None:
             # Save the address as integer
             switch_addrs.append(line[switch_idx:switch_idx+8])
 
@@ -55,14 +58,14 @@ functs = []
 num_functs = 0
 
 # Function for adding to function list and incrementing count
-def add_function(new_start_addr, prev_end_addr):
+def add_function(new_start_addr, prev_end_addr, start_type):
     global num_functs
     # If an end address for the last added function was specified
     if prev_end_addr != None:
         # Set end address for last added function
         functs[num_functs-1][1] = prev_end_addr
     # Add a new function to the list with the specified starting address
-    functs.append([new_start_addr, 0, []])
+    functs.append([new_start_addr, 0, [], start_type])
     # Increment the number of functions
     num_functs = num_functs+1
 
@@ -75,8 +78,11 @@ end_parse = False
 # Initialize address of last blr instruction to 0
 blr_addr = '00000000'
 
+# Initialize address of last bctr instruction to 0
+bctr_addr = '00000000'
+
 # Initialize address of last padding to 0
-pad_addr = '00000000'
+pad_addr = 0
 
 # Import each line of decompiled code
 print("Parsing IDA HTML...")
@@ -95,32 +101,80 @@ with open(ida_html, 'r') as file:
                     # Save current address as integer
                     curr_addr_int = int(curr_addr, 16)
 
+                    # If this is not the first function being added
                     if num_functs > 0:
                         # If last address had padding, then this function was already added
-                        if not curr_addr_int-4 == int(pad_addr, 16):
+                        if curr_addr_int-4 == pad_addr:
+                            # Set function type for start address
+                            functs[num_functs-1][3] = 'sub'
+                        else:
                             # Check if this function is part of latest added function
                             is_nested_funct = False
                             nested_functs = functs[num_functs-1][2]
                             for nested_funct in nested_functs:
-                                is_nested_funct = nested_funct==curr_addr
+                                if nested_funct == curr_addr:
+                                    is_nested_funct = True
+                                    break
                             
                             # If last address was not padding and not nested in latest function
                             if not is_nested_funct:
-                                # If this is not the first function being added
-                                if num_functs > 0:
-                                    # Add new function and last function's end address
-                                    add_function(curr_addr_int, curr_addr_int)
+                                # Add new function and last function's end address
+                                add_function(curr_addr_int, curr_addr_int, 'sub')
                     else:
                         # Add new function
-                        add_function(curr_addr_int, None)
+                        add_function(curr_addr_int, None, 'sub')
 
                 # If this is a location
                 elif re.search('^\.text:'+curr_addr+' </span><span class="c[0-9]*">loc_'+curr_addr, line):
                     curr_addr_int = int(curr_addr, 16)
+                    curr_funct = functs[num_functs-1]
                     # If previous address was a blr instruction
-                    if curr_addr_int-4 == blr_addr:
-                        print(curr_addr)
-                        add_function(curr_addr_int, curr_addr_int)
+                    if curr_addr_int-4 == int(blr_addr, 16):
+                        # If last added function is a subroutine and has no nested functions
+                        if curr_funct[3] == 'sub' and not curr_funct[2]:
+                            xref_idx = line.find('XREF: sub_')
+                            # If XREF is a subroutine
+                            if xref_idx > -1:
+                                xref = line[xref_idx+10:xref_idx+18]
+                                # If the XREF is equivalent to the last function's start address
+                                if int(xref, 16) == curr_funct[0]:
+                                    # Store as nested function in latest function
+                                    functs[num_functs-1][2].append(xref)
+                                # If not, add this address as a new function
+                                else: 
+                                    add_function(curr_addr_int, curr_addr_int, 'loc')
+                            # If not, add this address as new function
+                            else:
+                                add_function(curr_addr_int, curr_addr_int, 'loc')
+
+                        # If last added function is not a subroutine or has nested functions:
+                        else:
+                            # Check for XREF to subroutine
+                            xref_idx = line.find('XREF: sub_')
+                            if xref_idx > -1:
+                                xref = line[xref_idx+10:xref_idx+18]
+                            # If not found, check for XREF to .text address
+                            else:
+                                xref_idx = line.find('XREF: .text:')
+                                if xref_idx > -1:
+                                    underscore_idx = line.find('_', xref_idx)
+                                    # If referencing sub_, loc_, etc.
+                                    if underscore_idx > -1:
+                                        xref = line[underscore_idx+1:underscore_idx+9]
+                                    # Else, there's only the address after .text
+                                    else:
+                                        xref = line[xref_idx+12:xref_idx+20]
+                                else:
+                                    xref = '-1'
+                                
+                            # If XREF points to subroutine or .text address before current address
+                            if int(xref, 16) < curr_addr_int:
+                                # Store as nested function
+                                functs[num_functs-1][2].append(xref)
+                            # If not, add this address as new funciton
+                            else:
+                                add_function(curr_addr_int, curr_addr_int, 'loc')
+
                     # If not, store as nested function in latest function
                     else:
                         # Find address of function that references this
@@ -132,9 +186,14 @@ with open(ida_html, 'r') as file:
 
                 # Check if this line is padding
                 elif num_functs > 0 and re.search('<span class="c[0-9]*">\.long </span><span class="c[0-9]*">0$', line):
+                    # Convert current address to integer 
                     curr_addr_int = int(curr_addr, 16)
+
                     # Add a new function at the line after padding, and end the current function at this padding address
-                    add_function(curr_addr_int+4, curr_addr_int)
+                    add_function(curr_addr_int+4, curr_addr_int, None)
+                    
+                    # Save padding address
+                    pad_addr = curr_addr_int
 
                 # Check for blr instruction
                 elif re.search('<span class="c[0-9]*">blr', line):
@@ -175,7 +234,11 @@ for switch_addr in switch_addrs:
             curr_funct_end = curr_funct[1]
             if(switch_addr_int <= curr_funct_end):
                 # Save current function's start address and the function's length
-                output_functs.append([hex(curr_funct_start), hex(curr_funct_end-curr_funct_start), switch_addr])
+                if debug:
+                    output_functs.append([hex(curr_funct_start), hex(curr_funct_end-curr_funct_start), switch_addr])
+                else:
+                    output_functs.append([hex(curr_funct_start), hex(curr_funct_end-curr_funct_start)])
+            
                 # Don't need to continue search for this switch statement
                 search_for_funct = False
 
@@ -184,23 +247,42 @@ for switch_addr in switch_addrs:
 
         # Related function was not found
         else:
-            print(f"WARNING: Function relating to {switch_addr} not found")
+            print(f"WARNING: Function relating to {switch_addr} not found! Skipping.")
             # Don't need to continue search for this switch statement
             search_for_funct = False
 
-print(f"{len(output_functs)} functions found!")                
+# Remove duplicates
+if not debug: 
+    output_functs = list(set(tuple(funct) for funct in output_functs))
+
+# Make sure there are no functions with the same starting address but different lengths
+for i in range(len(output_functs)):
+    for j in range(i+1, len(output_functs)):
+        curr_funct_start = output_functs[i][0]
+        if curr_funct_start == output_functs[j][0]:
+            print(f"WARNING: {curr_funct_start} has multiple entries of different lengths, manually find correct one.")
+
+print(f"{len(output_functs)} functions found!")
+
+##
+## Output all found functions to TOML in correct format
+##
 
 # Create formatted string to export to TOML
 output_str = "functions = ["
 
 # Append all function addresses and lengths to formatted string
+print("Outputting to formatted file...")
 for funct in output_functs:
     # Format hex to uppercase 
     curr_funct_start = '0x'+funct[0][2:].upper()
     curr_funct_end = '0x'+funct[1][2:].upper()
 
     # Format function 
-    curr_funct = "\n    { address = "+curr_funct_start+", size = "+curr_funct_end+" src = "+funct[2]+" },"
+    curr_funct = "\n    { address = "+curr_funct_start+", size = "+curr_funct_end
+    if debug:
+        curr_funct = curr_funct+", src = "+funct[2]
+    curr_funct = curr_funct+" },"
 
     # Add to complete output string
     output_str = output_str+curr_funct

From 2365f4d69761cc04a7627edbdf2dcc5b36285f21 Mon Sep 17 00:00:00 2001
From: JillianTo <jtoschool388@gmail.com>
Date: Sun, 9 Mar 2025 17:56:37 -0400
Subject: [PATCH 5/9] missing 3 functions, but otherwise correct

---
 parser.py | 94 ++++++++++++++++++++++++++-----------------------------
 1 file changed, 45 insertions(+), 49 deletions(-)

diff --git a/parser.py b/parser.py
index d3699b6..05da7f0 100644
--- a/parser.py
+++ b/parser.py
@@ -75,11 +75,17 @@ in_text = False
 # Mark if we should end parsing
 end_parse = False
 
+# Initialize address of last bctr instruction to 0
+bctr_addr = '00000000'
+
 # Initialize address of last blr instruction to 0
 blr_addr = '00000000'
 
-# Initialize address of last bctr instruction to 0
-bctr_addr = '00000000'
+# Initialize address of last 'End of function' comment to 0
+eof_addr = '00000000'
+
+# Initialize address of last restgprlr instruction to 0
+restgprlr_addr = '00000000'
 
 # Initialize address of last padding to 0
 pad_addr = 0
@@ -103,8 +109,8 @@ with open(ida_html, 'r') as file:
 
                     # If this is not the first function being added
                     if num_functs > 0:
-                        # If last address had padding, then this function was already added
-                        if curr_addr_int-4 == pad_addr:
+                        # If last address had padding or restgprlr instruction, then this function was already added
+                        if curr_addr_int-4 == pad_addr or curr_addr_int-4 == restgprlr_addr:
                             # Set function type for start address
                             functs[num_functs-1][3] = 'sub'
                         else:
@@ -130,59 +136,31 @@ with open(ida_html, 'r') as file:
                     curr_funct = functs[num_functs-1]
                     # If previous address was a blr instruction
                     if curr_addr_int-4 == int(blr_addr, 16):
-                        # If last added function is a subroutine and has no nested functions
-                        if curr_funct[3] == 'sub' and not curr_funct[2]:
-                            xref_idx = line.find('XREF: sub_')
-                            # If XREF is a subroutine
+                        # If previous address had an 'End of function' comment or if there was a bctr with the comment
+                        if blr_addr == eof_addr or bctr_addr == eof_addr:
+                            # Find a XREF pointing to a .text address
+                            xref_idx = line.find('XREF: .text:')
                             if xref_idx > -1:
-                                xref = line[xref_idx+10:xref_idx+18]
-                                # If the XREF is equivalent to the last function's start address
-                                if int(xref, 16) == curr_funct[0]:
-                                    # Store as nested function in latest function
-                                    functs[num_functs-1][2].append(xref)
-                                # If not, add this address as a new function
-                                else: 
-                                    add_function(curr_addr_int, curr_addr_int, 'loc')
-                            # If not, add this address as new function
-                            else:
-                                add_function(curr_addr_int, curr_addr_int, 'loc')
-
-                        # If last added function is not a subroutine or has nested functions:
-                        else:
-                            # Check for XREF to subroutine
-                            xref_idx = line.find('XREF: sub_')
-                            if xref_idx > -1:
-                                xref = line[xref_idx+10:xref_idx+18]
-                            # If not found, check for XREF to .text address
-                            else:
-                                xref_idx = line.find('XREF: .text:')
-                                if xref_idx > -1:
-                                    underscore_idx = line.find('_', xref_idx)
-                                    # If referencing sub_, loc_, etc.
-                                    if underscore_idx > -1:
-                                        xref = line[underscore_idx+1:underscore_idx+9]
-                                    # Else, there's only the address after .text
-                                    else:
-                                        xref = line[xref_idx+12:xref_idx+20]
+                                underscore_idx = line.find('_', xref_idx)
+                                if underscore_idx > -1:
+                                    xref = line[underscore_idx+1:underscore_idx+9]
                                 else:
-                                    xref = '-1'
-                                
-                            # If XREF points to subroutine or .text address before current address
-                            if int(xref, 16) < curr_addr_int:
-                                # Store as nested function
-                                functs[num_functs-1][2].append(xref)
-                            # If not, add this address as new funciton
+                                    xref = line[xref_idx+12:xref_idx+20]
                             else:
+                                xref = None
+
+                            # Couldn't find XREF pointing to .text address or the XREF is after this address
+                            if xref == None or int(xref, 16) > curr_addr_int:
+                                # Add as new function
                                 add_function(curr_addr_int, curr_addr_int, 'loc')
 
-                    # If not, store as nested function in latest function
                     else:
                         # Find address of function that references this
-                        xref_idx = line.find('XREF: sub_')
+                        xref_idx = line.find('CODE XREF: sub_')
                         # If it was found
                         if xref_idx > -1:
                             # Store as nested function in latest function
-                            functs[num_functs-1][2].append(line[xref_idx+10:xref_idx+18])
+                            functs[num_functs-1][2].append(line[xref_idx+15:xref_idx+23])
 
                 # Check if this line is padding
                 elif num_functs > 0 and re.search('<span class="c[0-9]*">\.long </span><span class="c[0-9]*">0$', line):
@@ -196,8 +174,26 @@ with open(ida_html, 'r') as file:
                     pad_addr = curr_addr_int
 
                 # Check for blr instruction
-                elif re.search('<span class="c[0-9]*">blr', line):
-                    blr_addr = curr_addr 
+                elif re.search('<span class="c[0-9]*">blr$', line):
+                    blr_addr = curr_addr
+
+                # Check for 'End of function' comment
+                elif re.search('End of function ', line):
+                    eof_addr = curr_addr
+    
+                # Check for bctr instruction
+                elif re.search('<span class="c[0-9]*">bctr$', line):
+                    bctr_addr = curr_addr
+
+                # Check for restgprlr instruction
+                elif re.search('<span class="c[0-9]*">b         </span><span class="c[0-9]*">__restgprlr_[0-9][0-9]$', line):
+                    # Convert current address to integer 
+                    curr_addr_int = int(curr_addr, 16)
+
+                    # Add a new function at the line after restgprlr instruction, and end the current function at this address
+                    add_function(curr_addr_int+4, curr_addr_int, None)
+                    
+                    restgprlr_addr = curr_addr_int
 
             # If not in .text
             else:

From 444ee2bda177dc25aa8c151acc1553ba474dbb7f Mon Sep 17 00:00:00 2001
From: JillianTo <jtoschool388@gmail.com>
Date: Sun, 9 Mar 2025 19:10:49 -0400
Subject: [PATCH 6/9] don't run duplicate check during debug because we allow
 duplicates during that, rename file

---
 parser.py => Auto_Function_Parser.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)
 rename parser.py => Auto_Function_Parser.py (97%)

diff --git a/parser.py b/Auto_Function_Parser.py
similarity index 97%
rename from parser.py
rename to Auto_Function_Parser.py
index 05da7f0..cce8094 100644
--- a/parser.py
+++ b/Auto_Function_Parser.py
@@ -252,11 +252,12 @@ if not debug:
     output_functs = list(set(tuple(funct) for funct in output_functs))
 
 # Make sure there are no functions with the same starting address but different lengths
-for i in range(len(output_functs)):
-    for j in range(i+1, len(output_functs)):
-        curr_funct_start = output_functs[i][0]
-        if curr_funct_start == output_functs[j][0]:
-            print(f"WARNING: {curr_funct_start} has multiple entries of different lengths, manually find correct one.")
+if not debug:
+    for i in range(len(output_functs)):
+        for j in range(i+1, len(output_functs)):
+            curr_funct_start = output_functs[i][0]
+            if curr_funct_start == output_functs[j][0]:
+                print(f"WARNING: {curr_funct_start} has multiple entries of different lengths, manually find correct one.")
 
 print(f"{len(output_functs)} functions found!")
 

From 9a4dc311c7776fd6618987fd1f1d6f09fdff623c Mon Sep 17 00:00:00 2001
From: JillianTo <jtoschool388@gmail.com>
Date: Sun, 9 Mar 2025 19:12:39 -0400
Subject: [PATCH 7/9] fix name in script

---
 Auto_Function_Parser.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Auto_Function_Parser.py b/Auto_Function_Parser.py
index cce8094..7177b8e 100644
--- a/Auto_Function_Parser.py
+++ b/Auto_Function_Parser.py
@@ -14,7 +14,7 @@ import re
 
 # Check if correct number of input arguments were given
 if len(sys.argv) != 4:
-    sys.exit("parser.py [IDA HTML] [XenonRecomp log] [Output TOML]")
+    sys.exit("Auto_Function_Parser.py [IDA HTML] [XenonRecomp log] [Output TOML]")
 
 # Filepath input arguments
 ida_html = sys.argv[1]

From 4452868029753602a884068b63aa72c3dbba6805 Mon Sep 17 00:00:00 2001
From: JillianTo <jtoschool388@gmail.com>
Date: Sun, 9 Mar 2025 20:53:46 -0400
Subject: [PATCH 8/9] When checking for subroutine, don't make .text have to be
 at the beginning of the line, this accounts for some weird HTML formatting

---
 Auto_Function_Parser.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Auto_Function_Parser.py b/Auto_Function_Parser.py
index 7177b8e..2e584c2 100644
--- a/Auto_Function_Parser.py
+++ b/Auto_Function_Parser.py
@@ -103,7 +103,7 @@ with open(ida_html, 'r') as file:
                 curr_addr = line[colon_idx+1:colon_idx+9]
 
                 # Check if this is the start of a function
-                if re.search('^\.text:'+curr_addr+' </span><span class="c[0-9]*">sub_'+curr_addr+'</span><span class="c[0-9]*">: *</span><span class="c[0-9]*"># [A-Z][A-Z][A-Z][A-Z] XREF:.*', line):
+                if re.search('.text:'+curr_addr+' </span><span class="c[0-9]*">sub_'+curr_addr+'</span><span class="c[0-9]*">: *</span><span class="c[0-9]*"># [A-Z][A-Z][A-Z][A-Z] XREF:', line):
                     # Save current address as integer
                     curr_addr_int = int(curr_addr, 16)
 

From 8fc280bed99903d7bfaf1003e18cfec0c627141d Mon Sep 17 00:00:00 2001
From: JillianTo <jtoschool388@gmail.com>
Date: Tue, 11 Mar 2025 15:54:36 -0400
Subject: [PATCH 9/9] use raw strings to avoid escape character syntax errors
 in python 3.12

---
 Auto_Function_Parser.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/Auto_Function_Parser.py b/Auto_Function_Parser.py
index 2e584c2..d86a23e 100644
--- a/Auto_Function_Parser.py
+++ b/Auto_Function_Parser.py
@@ -40,7 +40,7 @@ with open(xenonrecomp_log, 'r') as file:
     # Read each line in the file
     for line in file: 
         # If this line describes an error, it has the address of a problematic switch statement
-        if re.search('ERROR: Switch case at ', line) != None:
+        if re.search('ERROR: Switch case at ', line):
             # Save the address as integer
             switch_addrs.append(line[switch_idx:switch_idx+8])
 
@@ -103,7 +103,7 @@ with open(ida_html, 'r') as file:
                 curr_addr = line[colon_idx+1:colon_idx+9]
 
                 # Check if this is the start of a function
-                if re.search('.text:'+curr_addr+' </span><span class="c[0-9]*">sub_'+curr_addr+'</span><span class="c[0-9]*">: *</span><span class="c[0-9]*"># [A-Z][A-Z][A-Z][A-Z] XREF:', line):
+                if re.search(r'\.text:'+curr_addr+' </span><span class="c[0-9]*">sub_'+curr_addr+'</span><span class="c[0-9]*">: *</span><span class="c[0-9]*"># [A-Z][A-Z][A-Z][A-Z] XREF:', line):
                     # Save current address as integer
                     curr_addr_int = int(curr_addr, 16)
 
@@ -131,7 +131,7 @@ with open(ida_html, 'r') as file:
                         add_function(curr_addr_int, None, 'sub')
 
                 # If this is a location
-                elif re.search('^\.text:'+curr_addr+' </span><span class="c[0-9]*">loc_'+curr_addr, line):
+                elif re.search(r'^\.text:'+curr_addr+' </span><span class="c[0-9]*">loc_'+curr_addr, line):
                     curr_addr_int = int(curr_addr, 16)
                     curr_funct = functs[num_functs-1]
                     # If previous address was a blr instruction
@@ -163,7 +163,7 @@ with open(ida_html, 'r') as file:
                             functs[num_functs-1][2].append(line[xref_idx+15:xref_idx+23])
 
                 # Check if this line is padding
-                elif num_functs > 0 and re.search('<span class="c[0-9]*">\.long </span><span class="c[0-9]*">0$', line):
+                elif num_functs > 0 and re.search(r'<span class="c[0-9]*">\.long </span><span class="c[0-9]*">0$', line):
                     # Convert current address to integer 
                     curr_addr_int = int(curr_addr, 16)
 
@@ -198,7 +198,7 @@ with open(ida_html, 'r') as file:
             # If not in .text
             else:
                 # If .text section header found
-                if re.search('<span class="c[0-9]*">\.section &quot;\.text&quot;', line) != None:
+                if re.search(r'<span class="c[0-9]*">\.section &quot;\.text&quot;', line):
                     in_text = True
 
 ##