ethminer/serpent/parser.py


								import re


								# Number of spaces at the beginning of a line

								def spaces(ln):

								    spaces = 0

								    while spaces < len(ln) and ln[spaces] == ' ': spaces += 1

								    return spaces


								# Main parse function

								def parse(document):

								    return parse_lines(document.split('\n'))


								def strip_line(ln):

								    ln2 = ln.strip()

								    if '//' in ln2:

								        return ln2[:ln2.find('//')]

								    else:

								        return ln2


								# Parse the statement-level structure, including if and while statements

								def parse_lines(lns):

								    o = []

								    i = 0

								    while i < len(lns):

								        main = lns[i]

								        # Skip empty lines

								        if len(main.strip()) == 0:

								            i += 1

								            continue

								        if spaces(main) > 0:

								            raise Exception("Line "+str(i)+" indented too much!")

								        main = strip_line(main)

								        # Grab the child block of an if statement

								        start_child_block = i+1

								        indent = 99999999

								        i += 1

								        child_lns = []

								        while i < len(lns):

								            if len(strip_line(lns[i])) > 0:

								                sp = spaces(lns[i])

								                if sp == 0: break

								                indent = min(sp,indent)

								                child_lns.append(lns[i])

								            i += 1

								        child_block = map(lambda x:x[indent:],child_lns)

								        # Calls parse_line to parse the individual line

								        out = parse_line(main)

								        # Include the child block into the parsed expression

								        if out[0] in ['if', 'else', 'while', 'else if']:

								            if len(child_block) == 0:

								                raise Exception("If/else/while statement must have sub-clause! (%d)" % i)

								            else:

								                out.append(parse_lines(child_block))

								        else:

								            if len(child_block) > 0:

								                raise Exception("Not an if/else/while statement, can't have sub-clause! (%d)" % i)

								        # This is somewhat complicated. Essentially, it converts something like

								        # "if c1 then s1 elif c2 then s2 elif c3 then s3 else s4" (with appropriate

								        # indenting) to [ if c1 s1 [ if c2 s2 [ if c3 s3 s4 ] ] ]

								        if out[0] == 'else if':

								            if len(o) == 0: raise Exception("Cannot start with else if! (%d)" % i)

								            u = o[-1]

								            while len(u) == 4: u = u[-1]

								            u.append(['if'] + out[1:])

								        elif out[0] == 'else':

								            if len(o) == 0: raise Exception("Cannot start with else! (%d)" % i)

								            u = o[-1]

								            while len(u) == 4: u = u[-1]

								            u.append(out[1])

								        else:

								            # Normal case: just add the parsed line to the output

								            o.append(out)

								    return o[0] if len(o) == 1 else ['seq'] + o


								# Tokens contain one or more chars of the same type, with a few exceptions

								def chartype(c):

								    if c in 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789.':

								        return 'alphanum'

								    elif c in '\t ': return 'space'

								    elif c in '()[]': return 'brack'

								    elif c == '"': return 'dquote'

								    elif c == "'": return 'squote'

								    else: return 'symb'


								# Converts something like "b[4] = x+2 > y*-3" to

								# [ 'b', '[', '4', ']', '=', 'x', '+', '2', '>', 'y', '*', '-', '3' ]

								def tokenize(ln):

								    tp = 'space'

								    i = 0

								    o = []

								    global cur

								    cur = ''

								    # Finish a token and start a new one

								    def nxt():

								        global cur

								        if len(cur) >= 2 and cur[-1] == '-':

								            o.extend([cur[:-1],'-'])

								        elif len(cur.strip()) >= 1:

								            o.append(cur)

								        cur = ''

								    # Main loop

								    while i < len(ln):

								        c = chartype(ln[i])

								        # Inside a string

								        if tp == 'squote' or tp == "dquote":

								            if c == tp:

								                cur += ln[i]

								                nxt()

								                i += 1

								                tp = 'space'

								            elif ln[i:i+2] == '\\x':

								                cur += ln[i+2:i+4].decode('hex')

								                i += 4

								            elif ln[i:i+2] == '\\n':

								                cur += '\x0a'

								                i += 2

								            elif ln[i] == '\\':

								                cur += ln[i+1]

								                i += 2

								            else:

								                cur += ln[i]

								                i += 1

								        # Not inside a string

								        else:

								            if c == 'brack' or tp == 'brack': nxt()

								            elif c == 'space': nxt()

								            elif c != 'space' and tp == 'space': nxt()

								            elif c == 'symb' and tp != 'symb': nxt()

								            elif c == 'alphanum' and tp == 'symb': nxt()

								            elif c == 'squote' or c == "dquote": nxt()

								            cur += ln[i]

								            tp = c

								            i += 1

								    nxt()

								    if o[-1] in [':',':\n','\n']: o.pop()

								    if tp in ['squote','dquote']: raise Exception("Unclosed string: "+ln)

								    return o


								# This is the part where we turn a token list into an abstract syntax tree

								precedence = {

								    '^': 1,

								    '*': 2,

								    '/': 3,

								    '%': 4,

								    '#/': 2,

								    '#%': 2,

								    '+': 3,

								    '-': 3,

								    '<': 4,

								    '<=': 4,

								    '>': 4,

								    '>=': 4,

								    '==': 5,

								    'and': 6,

								    '&&': 6,

								    'or': 7,

								    '||': 7,

								    '!': 0

								}


								def toktype(token):

								    if token is None: return None

								    elif token in ['(','[']: return 'left_paren'

								    elif token in [')',']']: return 'right_paren'

								    elif token == ',': return 'comma'

								    elif token == ':': return 'colon'

								    elif token in ['!']: return 'unary_operation'

								    elif not isinstance(token,str): return 'compound'

								    elif token in precedence: return 'binary_operation'

								    elif re.match('^[0-9a-zA-Z\-\.]*$',token): return 'alphanum'

								    elif token[0] in ['"',"'"] and token[0] == token[-1]: return 'alphanum'

								    else: raise Exception("Invalid token: "+token)


								# https://en.wikipedia.org/wiki/Shunting-yard_algorithm

								#

								# The algorithm works by maintaining three stacks: iq, stack, oq. Initially,

								# the tokens are placed in order on the iq. Then, one by one, the tokens are

								# processed. Values are moved immediately to the output queue. Operators are

								# pushed onto the stack, but if an operator comes along with lower precendence

								# then all operators on the stack with higher precedence are applied first.

								# For example:

								# iq = 2 + 3 * 5 + 7, stack = \, oq = \

								# iq = + 3 * 5 + 7, stack = \, oq = 2

								# iq = 3 * 5 + 7, stack = +, oq = 2

								# iq = * 5 + 7, stack = +, oq = 2 3

								# iq = 5 + 7, stack = + *, oq = 2 3 (since * > + in precedence)

								# iq = + 7, stack = + *, oq = 2 3 5

								# iq = 7, stack = + +, oq = 2 [* 3 5] (since + > * in precedence)

								# iq = \, stack = + +, oq = 2 [* 3 5] 7

								# iq = \, stack = +, oq = 2 [+ [* 3 5] 7]

								# iq = \, stack = \, oq = [+ 2 [+ [* 3 5] 7] ]

								#

								# Functions, where function arguments begin with a left bracket preceded by

								# the function name, are separated by commas, and end with a right bracket,

								# are also included in this algorithm, though in a different way

								def shunting_yard(tokens):

								    iq = [x for x in tokens]

								    oq = []

								    stack = []

								    prev,tok = None,None

								    # The normal Shunting-Yard algorithm simply converts expressions into

								    # reverse polish notation. Here, we try to be slightly more ambitious

								    # and build up the AST directly on the output queue

								    # eg. say oq = [ 2, 5, 3 ] and we add "+" then "*"

								    # we get first [ 2, [ +, 5, 3 ] ] then [ [ *, 2, [ +, 5, 3 ] ] ]

								    def popstack(stack,oq):

								        tok = stack.pop()

								        typ = toktype(tok)

								        if typ == 'binary_operation':

								            a,b = oq.pop(), oq.pop()

								            oq.append([ tok, b, a])

								        elif typ == 'unary_operation':

								            a = oq.pop()

								            oq.append([ tok, a ])

								        elif typ == 'right_paren':

								            args = []

								            while toktype(oq[-1]) != 'left_paren':

								                args.insert(0,oq.pop())

								            oq.pop()

								            if tok == ']' and args[0] != 'id':

								                oq.append(['access'] + args)

								            elif tok == ']':

								                oq.append(['array_lit'] + args[1:])

								            elif tok == ')' and len(args) and args[0] != 'id':

								                oq.append(args)

								            else:

								                oq.append(args[1])

								    # The main loop

								    while len(iq) > 0:

								        prev = tok

								        tok = iq.pop(0)

								        typ = toktype(tok)

								        if typ == 'alphanum':

								            oq.append(tok)

								        elif typ == 'left_paren':

								            # Handle cases like 3 * (2 + 5) by using 'id' as a default function

								            # name

								            if toktype(prev) != 'alphanum' and toktype(prev) != 'right_paren':

								                oq.append('id')

								            # Say the statement is "... f(45...". At the start, we would have f

								            # as the last item on the oq. So we move it onto the stack, put the

								            # leftparen on the oq, and move f back to the stack, so we have ( f

								            # as the last two items on the oq. We also put the leftparen on the

								            # stack so we have a separator on both the stack and the oq

								            stack.append(oq.pop())

								            oq.append(tok)

								            oq.append(stack.pop())

								            stack.append(tok)

								        elif typ == 'right_paren':

								            # eg. f(27, 3 * 5 + 4). First, we finish evaluating all the

								            # arithmetic inside the last argument. Then, we run popstack

								            # to coalesce all of the function arguments sitting on the

								            # oq into a single list

								            while len(stack) and toktype(stack[-1]) != 'left_paren':

								                popstack(stack,oq)

								            if len(stack):

								                stack.pop()

								            stack.append(tok)

								            popstack(stack,oq)

								        elif typ == 'unary_operation' or typ == 'binary_operation':

								            # -5 -> 0 - 5

								            if tok == '-' and toktype(prev) not in ['alphanum', 'right_paren']:

								                oq.append('0')

								            # Handle BEDMAS operator precedence

								            prec = precedence[tok]

								            while len(stack) and toktype(stack[-1]) == 'binary_operation' and precedence[stack[-1]] < prec:

								                popstack(stack,oq)

								            stack.append(tok)

								        elif typ == 'comma':

								            # Finish evaluating all arithmetic before the comma

								            while len(stack) and toktype(stack[-1]) != 'left_paren':

								                popstack(stack,oq)

								        elif typ == 'colon':

								            # Colon is like a comma except it stays in the argument list

								            while len(stack) and toktype(stack[-1]) != 'right_paren':

								                popstack(stack,oq)

								            oq.append(tok)

								    while len(stack):

								        popstack(stack,oq)

								    if len(oq) == 1:

								        return oq[0]

								    else:

								        raise Exception("Wrong number of items left on stack: "+str(oq))


								def parse_line(ln):

								    tokens = tokenize(ln.strip())

								    if tokens[0] == 'if' or tokens[0] == 'while':

								        return [ tokens[0], shunting_yard(tokens[1:]) ]

								    elif len(tokens) >= 2 and tokens[0] == 'else' and tokens[1] == 'if':

								        return [ 'else if', shunting_yard(tokens[2:]) ]

								    elif len(tokens) >= 1 and tokens[0] == 'elif':

								        return [ 'else if', shunting_yard(tokens[1:]) ]

								    elif len(tokens) == 1 and tokens[0] == 'else':

								        return [ 'else' ]

								    elif '=' in tokens:

								        eqplace = tokens.index('=')

								        return [ 'set', shunting_yard(tokens[:eqplace]), shunting_yard(tokens[eqplace+1:]) ]

								    else:

								        return shunting_yard(tokens)