shithub: pokecrystal

--- /dev/null

+++ b/extras/pkmnasm/asmlex.py

@@ -1,0 +1,494 @@

+# -*- coding: utf-8 -*-

+import ply.lex as lex

+import sys, os

+FILENAME = '' # Current filename

+_tokens = ('STRING', 'NEWLINE', 'LABEL',

+        'ID', 'COMMA', 'PLUS', 'MINUS', 'LP', 'RP', 'MUL', 'DIV', 'POW',

+        'UMINUS', 'APO', 'INTEGER', 'ADDR', 'RB', 'LB',

+        'LOCALLABEL', 'LSHIFT', 'RSHIFT', 'BITWISE_OR', 'BITWISE_AND',

+        'LOGICAL_NOT', 'BITWISE_COMPLEMENT',

+    )

+reserved_instructions = {

+    'adc': 'ADC',

+    'add': 'ADD',

+    'and': 'AND',

+    'bit': 'BIT',

+    'call': 'CALL',

+    'ccf': 'CCF',

+    'cp': 'CP',

+    'cpd': 'CPD',

+    'cpdr': 'CPDR',

+    'cpi': 'CPI',

+    'cpir': 'CPIR',

+    'cpl': 'CPL',

+    'daa': 'DAA',

+    'dec': 'DEC',

+    'di': 'DI',

+    'djnz': 'DJNZ',

+    'ei': 'EI',

+    'ex': 'EX',

+    'exx': 'EXX',

+    'halt': 'HALT',

+    'im': 'IM',

+    'in': 'IN',

+    'inc': 'INC',

+    'ind': 'IND',

+    'indr': 'INDR',

+    'ini': 'INI',

+    'inir': 'INIR',

+    'jp': 'JP',

+    'jr': 'JR',

+    'ld': 'LD',

+    'ldd': 'LDD',

+    'lddr': 'LDDR',

+    'ldi': 'LDI',

+    'ldir': 'LDIR',

+    'neg': 'NEG',

+    'nop': 'NOP',

+    'or': 'OR',

+    'otdr': 'OTDR',

+    'otir': 'OTIR',

+    'out': 'OUT',

+    'outd': 'OUTD',

+    'outi': 'OUTI',

+    'pop': 'POP',

+    'push': 'PUSH',

+    'res': 'RES',

+    'ret': 'RET',

+    'reti': 'RETI',

+    'retn': 'RETN',

+    'rl': 'RL',

+    'rla': 'RLA',

+    'rlc': 'RLC',

+    'rlca': 'RLCA',

+    'rld': 'RLD',

+    'rr': 'RR',

+    'rra': 'RRA',

+    'rrc': 'RRC',

+    'rrca': 'RRCA',

+    'rrd': 'RRD',

+    'rst': 'RST',

+    'sbc': 'SBC',

+    'scf': 'SCF',

+    'set': 'SET',

+    'sla': 'SLA',

+    'sll': 'SLL',

+    'sra': 'SRA',

+    'srl': 'SRL',

+    'sub': 'SUB',

+    'xor': 'XOR',

+    }

+pseudo = { # pseudo ops

+    'align': 'ALIGN',

+    'org': 'ORG',

+    'defb': 'DEFB',

+    'defm': 'DEFB',

+    'db'  : 'DEFB',

+    'defs': 'DEFS',

+    'defw': 'DEFW',

+    'ds'  : 'DEFS',

+    'dw'  : 'DEFW',

+    'equ': 'EQU',

+    'proc': 'PROC',

+    'endp': 'ENDP',

+    'local': 'LOCAL',

+    'end': 'END',

+    'incbin': 'INCBIN'

+    }

+regs8 = {'a': 'A',

+    'b': 'B', 'c': 'C',

+    'd': 'D', 'e': 'E',

+    'h': 'H', 'l': 'L',

+    'i': 'I', 'r': 'R',

+    'ixh': 'IXH', 'ixl': 'IXL',

+    'iyh': 'IYH', 'iyl': 'IYL'

+    }

+regs16 = {

+    'af': 'AF',

+    'bc': 'BC',

+    'de': 'DE',

+    'hl': 'HL',

+    'ix': 'IX',

+    'iy': 'IY',

+    'sp': 'SP'

+}

+flags = {

+    'z' : 'Z',

+    'nz' : 'NZ',

+    'nc' : 'NC',

+    'po' : 'PO',

+    'pe' : 'PE',

+    'p' : 'P',

+    'm' : 'M',

+}

+preprocessor = {

+    'init' : '_INIT',

+    'line' : '_LINE'

+}

+# List of token names.

+_tokens = _tokens \

+        + tuple(reserved_instructions.values()) \

+        + tuple(pseudo.values()) \

+        + tuple(regs8.values()) \

+        + tuple(regs16.values()) \

+        + tuple(flags.values()) \

+        + tuple(preprocessor.values())

+def get_uniques(l):

+    ''' Returns a list with no repeated elements.

+    '''

+    result = []

+    for i in l:

+        if i not in result:

+            result.append(i)

+    return result

+tokens = get_uniques(_tokens)

+class Lexer(object):

+    ''' Own class lexer to allow multiple instances.

+    This lexer is just a wrapper of the current FILESTACK[-1] lexer

+    '''

+    states = (

+        ('preproc', 'exclusive'),

+    )

+    # -------------- TOKEN ACTIONS --------------

+    def __set_lineno(self, value):

+        ''' Setter for lexer.lineno

+        '''

+        self.lex.lineno = value

+    def __get_lineno(self):

+        ''' Getter for lexer.lineno

+        '''

+        if self.lex is None:

+            return 0

+        return self.lex.lineno

+    lineno = property(__get_lineno, __set_lineno)

+    def t_INITIAL_preproc_skip(self, t):

+        r'[ \t]+'

+        pass    # Ignore whitespaces and tabs

+    def t_CHAR(self, t):

+        r"'.'" # A single char

+        t.value = ord(t.value[1])

+        t.type = 'INTEGER'

+        return t

+    def t_HEXA(self, t):

+        r'([0-9][0-9a-fA-F]*[hH])|(\$[0-9a-fA-F]+)'

+        if t.value[0] == '$':

+            t.value = t.value[1:] # Remove initial '$'

+        else:

+            t.value = t.value[:-1] # Remove last 'h'

+        t.value = int(t.value, 16) # Convert to decimal

+        t.type = 'INTEGER'

+        return t

+    def t_BIN(self, t):

+        r'(%[01]+)|([01]+[bB])' # A Binary integer

+        # Note 00B is a 0 binary, but

+        # 00Bh is a 12 in hex. So this pattern must come

+        # after HEXA

+        if t.value[0] == '%':

+            t.value = t.value[1:] # Remove initial %

+        else:

+            t.value = t.value[:-1] # Remove last 'b'

+        t.value = int(t.value, 2) # Convert to decimal

+        t.type = 'INTEGER'

+        return t

+    def t_INITIAL_preproc_INTEGER(self, t):

+        r'[0-9]+' # an integer decimal number

+        t.value = int(t.value)

+        return t

+    def t_INITIAL_ID(self, t):

+        r'[_a-zA-Z.]([.]?[_a-zA-Z0-9\\@\#]+)*[:]?(\\\W)?' # Any identifier

+        tmp = t.value # Saves original value

+        if tmp[-1] == ':':

+            t.type = 'LABEL'

+            t.value = tmp[:-1]

+            return t

+        if tmp[0] == "." and (tmp[-2:] == "\@" or tmp[-3:] == "\@:"):

+            t.type = "LOCALLABEL"

+            t.value = tmp[1:]

+            return t

+        t.value = tmp.upper() # Convert it to uppercase, since our internal tables uses uppercase

+        id = tmp.lower()

+        t.type = reserved_instructions.get(id)

+        if t.type is not None: return t

+        t.type = pseudo.get(id)

+        if t.type is not None: return t

+        t.type = regs8.get(id)

+        if t.type is not None: return t

+        t.type = flags.get(id)

+        if t.type is not None: return t

+        t.type = regs16.get(id, 'ID')

+        if t.type == 'ID':

+            t.value = tmp # Restores original value

+        return t

+    def t_preproc_ID(self, t):

+        r'[_a-zA-Z][_a-zA-Z0-9]*' # preprocessor directives

+        t.type = preprocessor.get(t.value.lower(), 'ID')

+        return t

+    def t_COMMA(self, t):

+        r','

+        return t

+    def t_ADDR(self, t):

+        r'\$'

+        return t

+    def t_LP(self, t):

+        r'\('

+        return t

+    def t_RP(self, t):

+        r'\)'

+        return t

+    def t_RB(self, t):

+        r'\['

+        return t

+    def t_LB(self, t):

+        r'\]'

+        return t

+    def t_LSHIFT(self, t):

+        r'<<'

+        return t

+    def t_RSHIFT(self, t):

+        r'>>'

+        return t

+    def t_BITWISE_OR(self, t):

+        r'\|'

+        return t

+    def t_BITWISE_AND(self, t):

+        r'\&'

+        return t

+    def t_BITWISE_COMPLEMENT(self, t):

+        r'~'

+        return t

+    def t_LOGICAL_NOT(self, t):

+        r'\!'

+        return t

+    def t_PLUS(self, t):

+        r'\+'

+        return t

+    def t_MINUS(self, t):

+        r'\-'

+        return t

+    def t_MUL(self, t):

+        r'\*'

+        return t

+    def t_DIV(self, t):

+        r'\/'

+        return t

+    def t_POW(self, t):

+        r'\^'

+        return t

+    def t_APO(self, t):

+        r"'"

+        return t

+    def t_INITIAL_preproc_STRING(self, t):

+        r'"[^"]*"' # a doubled quoted string

+        t.value = t.value[1:-1] # Remove quotes

+        return t

+    def t_INITIAL_preproc_error(self, t):

+        ''' error handling rule

+        '''

+        self.error("illegal character '%s'" % t.value[0])

+    def t_INITIAL_preproc_CONTINUE(self, t):

+        r'\\\r?\n'

+        t.lexer.lineno += 1

+        # Allows line breaking

+    def t_COMMENT(self, t):

+        r';.*'

+        # Skip to end of line (except end of line)

+    def t_INITIAL_preproc_NEWLINE(self, t):

+        r'\r?\n'

+        t.lexer.lineno += 1

+        t.lexer.begin('INITIAL')

+        return t

+    def t_INITIAL_SHARP(self, t):

+        r'\#'

+        if self.find_column(t) == 1:

+            t.lexer.begin('preproc')

+        else:

+            self.error("illegal character '%s'" % t.value[0])

+    def __init__(self):

+        ''' Creates a new GLOBAL lexer instance

+        '''

+        self.lex = None

+        self.filestack = [] # Current filename, and line number being parsed

+        self.input_data = ''

+        self.tokens = tokens

+        self.next_token = None # if set to something, this will be returned once

+    def input(self, str):

+        ''' Defines input string, removing current lexer.

+        '''

+        self.input_data = str

+        self.lex = lex.lex(object = self)

+        self.lex.input(self.input_data)

+    def token(self):

+        return self.lex.token()

+    def find_column(self, token):

+        ''' Compute column:

+                - token is a token instance

+        '''

+        i = token.lexpos

+        while i > 0:

+            if self.input_data[i - 1] == '\n': break

+            i -= 1

+        column = token.lexpos - i + 1

+        return column

+    def msg(self, str):

+        ''' Prints an error msg.

+        '''

+        #print '%s:%i %s' % (FILENAME, self.lex.lineno, str)

+        print '%s:%s %s' % (FILENAME, "?", str)

+    def error(self, str):

+        ''' Prints an error msg, and exits.

+        '''

+        self.msg('Error: %s' % str)

+        sys.exit(1)

+    def warning(self, str):

+        ''' Emmits a warning and continue execution.

+        '''

+        self.msg('Warning: %s' % str)

+# Needed for states

+tmp = lex.lex(object = Lexer(), lextab = 'zxbasmlextab')

+if __name__ == '__main__':

+    FILENAME = sys.argv[1]

+    tmp.input(open(sys.argv[1]).read())

+    tok = tmp.token()

+    while tok:

+        print tok

+        tok = tmp.token()

--

⑨