import re

# Number of spaces at the beginning of a line
def spaces(ln):
    spaces = 0
    while spaces < len(ln) and ln[spaces] == ' ': spaces += 1
    return spaces

# Main parse function
def parse(document):
    return parse_lines(document.split('\n'))

def strip_line(ln):
    ln2 = ln.strip()
    if '//' in ln2:
        return ln2[:ln2.find('//')]
    else:
        return ln2

# Parse the statement-level structure, including if and while statements
def parse_lines(lns):
    o = []
    i = 0
    while i < len(lns):
        main = lns[i]
        # Skip empty lines
        if len(main.strip()) == 0:
            i += 1
            continue
        if spaces(main) > 0:
            raise Exception("Line "+str(i)+" indented too much!")
        main = strip_line(main)
        # Grab the child block of an if statement
        start_child_block = i+1
        indent = 99999999
        i += 1
        child_lns = []
        while i < len(lns):
            if len(strip_line(lns[i])) > 0:
                sp = spaces(lns[i])
                if sp == 0: break
                indent = min(sp,indent)
                child_lns.append(lns[i])
            i += 1
        child_block = map(lambda x:x[indent:],child_lns)
        # Calls parse_line to parse the individual line
        out = parse_line(main)
        # Include the child block into the parsed expression
        if out[0] in ['if', 'else', 'while', 'else if']:
            if len(child_block) == 0:
                raise Exception("If/else/while statement must have sub-clause! (%d)" % i)
            else:
                out.append(parse_lines(child_block))
        else:
            if len(child_block) > 0:
                raise Exception("Not an if/else/while statement, can't have sub-clause! (%d)" % i)
        # This is somewhat complicated. Essentially, it converts something like
        # "if c1 then s1 elif c2 then s2 elif c3 then s3 else s4" (with appropriate
        # indenting) to [ if c1 s1 [ if c2 s2 [ if c3 s3 s4 ] ] ]
        if out[0] == 'else if':
            if len(o) == 0: raise Exception("Cannot start with else if! (%d)" % i)
            u = o[-1]
            while len(u) == 4: u = u[-1]
            u.append(['if'] + out[1:])
        elif out[0] == 'else':
            if len(o) == 0: raise Exception("Cannot start with else! (%d)" % i)
            u = o[-1]
            while len(u) == 4: u = u[-1]
            u.append(out[1])
        else:
            # Normal case: just add the parsed line to the output
            o.append(out)
    return o[0] if len(o) == 1 else ['seq'] + o

# Tokens contain one or more chars of the same type, with a few exceptions
def chartype(c):
    if c in 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789.':
        return 'alphanum'
    elif c in '\t ': return 'space'
    elif c in '()[]': return 'brack'
    elif c == '"': return 'dquote'
    elif c == "'": return 'squote'
    else: return 'symb'

# Converts something like "b[4] = x+2 > y*-3" to
# [ 'b', '[', '4', ']', '=', 'x', '+', '2', '>', 'y', '*', '-', '3' ]
def tokenize(ln):
    tp = 'space'
    i = 0
    o = []
    global cur
    cur = ''
    # Finish a token and start a new one
    def nxt():
        global cur
        if len(cur) >= 2 and cur[-1] == '-':
            o.extend([cur[:-1],'-'])
        elif len(cur.strip()) >= 1:
            o.append(cur)
        cur = ''
    # Main loop
    while i < len(ln):
        c = chartype(ln[i])
        # Inside a string
        if tp == 'squote' or tp == "dquote":
            if c == tp:
                cur += ln[i]
                nxt()
                i += 1
                tp = 'space'
            elif ln[i:i+2] == '\\x':
                cur += ln[i+2:i+4].decode('hex')
                i += 4
            elif ln[i:i+2] == '\\n':
                cur += '\x0a'
                i += 2
            elif ln[i] == '\\':
                cur += ln[i+1]
                i += 2
            else:
                cur += ln[i]
                i += 1
        # Not inside a string
        else:
            if c == 'brack' or tp == 'brack': nxt()
            elif c == 'space': nxt()
            elif c != 'space' and tp == 'space': nxt()
            elif c == 'symb' and tp != 'symb': nxt()
            elif c == 'alphanum' and tp == 'symb': nxt()
            elif c == 'squote' or c == "dquote": nxt()
            cur += ln[i]
            tp = c
            i += 1
    nxt()
    if o[-1] in [':',':\n','\n']: o.pop()
    if tp in ['squote','dquote']: raise Exception("Unclosed string: "+ln)
    return o

# This is the part where we turn a token list into an abstract syntax tree
precedence = {
    '^': 1,
    '*': 2,
    '/': 3,
    '%': 4,
    '#/': 2,
    '#%': 2,
    '+': 3,
    '-': 3,
    '<': 4,
    '<=': 4,
    '>': 4,
    '>=': 4,
    '==': 5,
    'and': 6,
    '&&': 6,
    'or': 7,
    '||': 7,
    '!': 0
}

def toktype(token):
    if token is None: return None
    elif token in ['(','[']: return 'left_paren'
    elif token in [')',']']: return 'right_paren'
    elif token == ',': return 'comma'
    elif token == ':': return 'colon'
    elif token in ['!']: return 'unary_operation' 
    elif not isinstance(token,str): return 'compound'
    elif token in precedence: return 'binary_operation'
    elif re.match('^[0-9a-zA-Z\-\.]*$',token): return 'alphanum'
    elif token[0] in ['"',"'"] and token[0] == token[-1]: return 'alphanum'
    else: raise Exception("Invalid token: "+token)

# https://en.wikipedia.org/wiki/Shunting-yard_algorithm
#
# The algorithm works by maintaining three stacks: iq, stack, oq. Initially,
# the tokens are placed in order on the iq. Then, one by one, the tokens are
# processed. Values are moved immediately to the output queue. Operators are
# pushed onto the stack, but if an operator comes along with lower precendence
# then all operators on the stack with higher precedence are applied first.
# For example:
# iq = 2 + 3 * 5 + 7, stack = \, oq = \
# iq = + 3 * 5 + 7, stack = \, oq = 2
# iq = 3 * 5 + 7, stack = +, oq = 2
# iq = * 5 + 7, stack = +, oq = 2 3
# iq = 5 + 7, stack = + *, oq = 2 3 (since * > + in precedence)
# iq = + 7, stack = + *, oq = 2 3 5
# iq = 7, stack = + +, oq = 2 [* 3 5] (since + > * in precedence)
# iq = \, stack = + +, oq = 2 [* 3 5] 7
# iq = \, stack = +, oq = 2 [+ [* 3 5] 7]
# iq = \, stack = \, oq = [+ 2 [+ [* 3 5] 7] ]
#
# Functions, where function arguments begin with a left bracket preceded by
# the function name, are separated by commas, and end with a right bracket,
# are also included in this algorithm, though in a different way
def shunting_yard(tokens):
    iq = [x for x in tokens]
    oq = []
    stack = []
    prev,tok = None,None
    # The normal Shunting-Yard algorithm simply converts expressions into
    # reverse polish notation. Here, we try to be slightly more ambitious
    # and build up the AST directly on the output queue
    # eg. say oq = [ 2, 5, 3 ] and we add "+" then "*"
    # we get first [ 2, [ +, 5, 3 ] ] then [ [ *, 2, [ +, 5, 3 ] ] ]
    def popstack(stack,oq):
        tok = stack.pop()
        typ = toktype(tok)
        if typ == 'binary_operation':
            a,b = oq.pop(), oq.pop()
            oq.append([ tok, b, a])
        elif typ == 'unary_operation':
            a = oq.pop()
            oq.append([ tok, a ])
        elif typ == 'right_paren':
            args = []
            while toktype(oq[-1]) != 'left_paren':
                args.insert(0,oq.pop())
            oq.pop()
            if tok == ']' and args[0] != 'id':
                oq.append(['access'] + args)
            elif tok == ']':
                oq.append(['array_lit'] + args[1:])
            elif tok == ')' and len(args) and args[0] != 'id':
                oq.append(args)
            else:
                oq.append(args[1])
    # The main loop
    while len(iq) > 0:
        prev = tok
        tok = iq.pop(0)
        typ = toktype(tok)
        if typ == 'alphanum':
            oq.append(tok)
        elif typ == 'left_paren':
            # Handle cases like 3 * (2 + 5) by using 'id' as a default function
            # name
            if toktype(prev) != 'alphanum' and toktype(prev) != 'right_paren':
                oq.append('id')
            # Say the statement is "... f(45...". At the start, we would have f
            # as the last item on the oq. So we move it onto the stack, put the
            # leftparen on the oq, and move f back to the stack, so we have ( f
            # as the last two items on the oq. We also put the leftparen on the
            # stack so we have a separator on both the stack and the oq
            stack.append(oq.pop())
            oq.append(tok)
            oq.append(stack.pop())
            stack.append(tok)
        elif typ == 'right_paren':
            # eg. f(27, 3 * 5 + 4). First, we finish evaluating all the
            # arithmetic inside the last argument. Then, we run popstack
            # to coalesce all of the function arguments sitting on the
            # oq into a single list
            while len(stack) and toktype(stack[-1]) != 'left_paren':
                popstack(stack,oq)
            if len(stack):
                stack.pop()
            stack.append(tok)
            popstack(stack,oq)
        elif typ == 'unary_operation' or typ == 'binary_operation':
            # -5 -> 0 - 5
            if tok == '-' and toktype(prev) not in ['alphanum', 'right_paren']:
                oq.append('0')
            # Handle BEDMAS operator precedence
            prec = precedence[tok]
            while len(stack) and toktype(stack[-1]) == 'binary_operation' and precedence[stack[-1]] < prec:
                popstack(stack,oq)
            stack.append(tok)
        elif typ == 'comma':
            # Finish evaluating all arithmetic before the comma
            while len(stack) and toktype(stack[-1]) != 'left_paren':
                popstack(stack,oq)
        elif typ == 'colon':
            # Colon is like a comma except it stays in the argument list
            while len(stack) and toktype(stack[-1]) != 'right_paren':
                popstack(stack,oq)
            oq.append(tok)
    while len(stack):
        popstack(stack,oq)
    if len(oq) == 1:
        return oq[0]
    else:
        raise Exception("Wrong number of items left on stack: "+str(oq))

def parse_line(ln):
    tokens = tokenize(ln.strip())
    if tokens[0] == 'if' or tokens[0] == 'while':
        return [ tokens[0], shunting_yard(tokens[1:]) ]
    elif len(tokens) >= 2 and tokens[0] == 'else' and tokens[1] == 'if':
        return [ 'else if', shunting_yard(tokens[2:]) ]
    elif len(tokens) >= 1 and tokens[0] == 'elif':
        return [ 'else if', shunting_yard(tokens[1:]) ]
    elif len(tokens) == 1 and tokens[0] == 'else':
        return [ 'else' ]
    elif '=' in tokens:
        eqplace = tokens.index('=')
        return [ 'set', shunting_yard(tokens[:eqplace]), shunting_yard(tokens[eqplace+1:]) ]
    else:
        return shunting_yard(tokens)