shithub: pokecrystal

Download patch

ref: 3359121ba732f702fa3dbbc06357e3b5085a9067
parent: e471fbeb3217107e94acca38213f8f9ffed98665
author: Bryan Bishop <kanzure@gmail.com>
date: Wed Jun 20 23:37:13 EDT 2012

graph.py - parse pokecrystal into a function graph for d3.js

--- /dev/null
+++ b/extras/graph.py
@@ -1,0 +1,143 @@
+#!/usr/bin/python
+# author: Bryan Bishop <kanzure@gmail.com>
+# date: 2012-06-20
+
+import networkx as nx
+
+from romstr import RomStr, DisAsm, \
+    relative_jumps, call_commands, \
+    relative_unconditional_jumps
+
+class RomGraph(nx.DiGraph):
+    """ Graphs various functions pointing to each other.
+
+        TODO: Bank switches are nasty. They should be detected. Otherwise,
+        functions will point to non-functions within the same bank. Another way
+        to detect bankswitches is retroactively. By disassembling one function
+        after another within the function banks, it can be roughly assumed that
+        anything pointing to something else (within the same bank) is really
+        actually a bankswitch. An even better method to handle bankswitches
+        would be to just detect those situations in the asm (but I presently
+        forget how bankswitches are performed in pokecrystal).
+    """
+
+    # some areas shouldn't be parsed as asm
+    exclusions = []
+
+    # where is the first function located?
+    start_address = 0x150
+
+    # and where is a good place to stop?
+    end_address = 0x4000 * 0x01 # only do the first bank? sure..
+
+    # where is the rom stored?
+    rompath = "../baserom.gbc"
+
+    def __init__(self, rom=None, **kwargs):
+        """ Loads and parses the ROM into a function graph.
+        """
+        # continue the initialization
+        nx.DiGraph.__init__(self, **kwargs)
+
+        # load the graph
+        if rom == None:
+            self.load_rom()
+        else:
+            self.rom = rom
+
+        # start parsing the ROM
+        self.parse()
+
+    def load_rom(self):
+        """ Creates a RomStr from rompath.
+        """
+        file_handler = open(self.rompath, "r")
+        self.rom = RomStr(file_handler.read())
+        file_handler.close()
+
+    def parse(self):
+        """ Parses the ROM starting with the first function address. Each
+            function is disassembled and parsed to find where else it leads to.
+        """
+        functions = {}
+
+        address = self.start_address
+
+        other_addresses = set()
+
+        count = 0
+
+        while True:
+            if count > 100:
+                break
+
+            if address < self.end_address and address not in functions.keys():
+                # address is okay to parse at, keep going
+                pass
+            elif len(other_addresses) > 0:
+                # parse some other address possibly in a remote bank
+                address = other_addresses.pop()
+            else:
+                # no more addresses detected- exit loop
+                break
+
+            # parse the asm
+            func = self.rom.to_asm(address)
+
+            # store this parsed function
+            functions[address] = func
+
+            # where does this function jump to?
+            used_addresses = set(func.used_addresses())
+
+            # add this information to the graph
+            for used_address in used_addresses:
+                # only add this remote address if it's not yet parsed
+                if used_address not in functions.keys():
+                    other_addresses.update([used_address])
+
+                # add this other address to the graph
+                self.add_node(used_address)
+
+                # add this as an edge between the two nodes
+                self.add_edge(address, used_address)
+
+            # setup the next function to be parsed
+            address = func.last_address
+
+            count += 1
+
+        self.functions = functions
+
+    def pretty_printer(self):
+        """ Shows some text output describing which nodes point to which other
+            nodes.
+        """
+        print self.edges()
+
+    def to_d3(self):
+        """ Exports to d3.js because we're gangster like that.
+        """
+        import networkx.readwrite.json_graph as json_graph
+        content = json_graph.dumps(self)
+        fh = open("graphs.json", "w")
+        fh.write(content)
+        fh.close()
+
+class RedGraph(RomGraph):
+    """ Not implemented. Go away.
+    """
+
+    rompath = "../pokered-baserom.gbc"
+
+class CryGraph(RomGraph):
+    exclusions = [
+        [0x000, 0x149],
+    ]
+
+    rompath = "../baserom.gbc"
+
+if __name__ == "__main__":
+    crygraph = CryGraph()
+    crygraph.pretty_printer()
+    crygraph.to_d3()
--- a/extras/romstr.py
+++ b/extras/romstr.py
@@ -11,9 +11,9 @@
     0xe9, # jp hl
     0xc9, # ret
 ] # possibly also:
-    # 0xc3, # jp
+    # 0xc3,  # jp
     # 0xc18, # jr
-    # 0xda, 0xe9, 0xd2, 0xc2, 0xca, 0xc3, 0x38, 0x30, 0x20, 0x28, 0x18, 0xd8,
+    # 0xda, 0xe9, 0xd2, 0xc2, 0xca, 0x38, 0x30, 0x20, 0x28, 0x18, 0xd8,
     # 0xd0, 0xc0, 0xc8, 0xc9
 
 spacing = "\t"
@@ -124,7 +124,7 @@
             that will be parsed, so that large patches of data aren't parsed as
             code.
         """
-        if "0x" in address:
+        if type(address) == str and "0x" in address:
             address = int(address, 16)
 
         start_address = address
@@ -302,6 +302,7 @@
 
                             opstr2 = base_opstr[:base_opstr.find("x")].lower() + insertion + base_opstr[base_opstr.find("x")+1:].lower()
                             asm_command["formatted_with_labels"] = opstr2
+                            asm_command["target_address"] = target_address
 
                         current_byte_number += 1
                         offset += 1
@@ -331,6 +332,7 @@
 
                         opstr2 = base_opstr[:base_opstr.find("?")].lower() + insertion + base_opstr[base_opstr.find("?")+1:].lower()
                         asm_command["formatted_with_labels"] = opstr2
+                        asm_command["target_address"] = target_address
 
                         current_byte_number += 2
                         offset += 2
@@ -423,18 +425,31 @@
             offset += 1
 
         # also save the last command if necessary
-        if asm_commands[asm_commands.keys()[-1]] is not asm_command:
+        if len(asm_commands.keys()) > 0 and asm_commands[asm_commands.keys()[-1]] is not asm_command:
             asm_commands[asm_command["address"]] = asm_command
 
         # store the set of commands on this object
         self.asm_commands = asm_commands
 
-        self.end_address = offset + 1
+        self.end_address  = offset + 1
+        self.last_address = self.end_address
 
     def has_outstanding_labels(self, asm_commands, offset):
         """ Checks if there are any labels that haven't yet been created.
         """ # is this really necessary??
         return False
+
+    def used_addresses(self):
+        """ Returns a list of unique addresses that this function will probably
+            call.
+        """
+        addresses = set()
+
+        for (id, command) in self.asm_commands.items():
+            if command.has_key("target_address"):
+                addresses.add(command["target_address"])
+
+        return addresses
 
     def __str__(self):
         """ ASM pretty printer.
--