peon/src/frontend/parser.nim

# Copyright 2022 Mattia Giambirtone & All Contributors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

## A recursive-descent top-down parser implementation

import std/strformat
import std/strutils
import std/os

import meta/token
import meta/ast
import meta/errors
import lexer as l
import ../util/symbols
import ../config


export token, ast, errors


type

    LoopContext {.pure.} = enum
        Loop, None
    Precedence {.pure.} = enum
        ## Operator precedence
        ## clearly stolen from
        ## nim
        Arrow = 0,
        Assign,
        Or,
        And,
        Compare,
        Bitwise,
        Addition,
        Multiplication,
        Power,
        None # Used for stuff that isn't an operator

    OperatorTable = ref object
        ## A table for storing and
        ## handling the precedence
        ## of operators
        tokens: seq[string]
        precedence: array[Precedence, seq[string]]

    Parser* = ref object
        ## A recursive-descent top-down
        ## parser implementation

        # Index into self.tokens
        current: int
        # The name of the file being parsed.
        # Only meaningful for parse errors
        file: string
        # The list of tokens representing
        # the source code to be parsed.
        # In most cases, those will come
        # from the builtin lexer, but this
        # behavior is not enforced and the
        # tokenizer is entirely separate from
        # the parser
        tokens: seq[Token]
        # Little internal attribute that tells
        # us if we're inside a loop or not. This
        # allows us to detect errors like break
        # being used outside loops
        currentLoop: LoopContext
        # Stores the current function
        # being parsed. This is a reference
        # to either a FunDecl or LambdaExpr
        # AST node and is nil when the parser
        # is at the top-level. It allows the
        # parser to detect errors like return
        # outside functions
        currentFunction: Declaration
        # Stores the current scope depth (0 = global, > 0 local)
        scopeDepth: int
        # Operator table
        operators: OperatorTable
        # The AST node
        tree: seq[Declaration]
        # Stores line data
        lines: seq[tuple[start, stop: int]]
        # The source of the current module
        source: string
        # Keeps track of imported modules
        modules: seq[tuple[name: string, loaded: bool]]
    ParseError* = ref object of PeonException
        parser*: Parser
        file*: string
        token*: Token
        module*: string


proc newOperatorTable: OperatorTable =
    ## Initializes a new OperatorTable
    ## object
    new(result)
    result.tokens = @[]
    for prec in Precedence:
        result.precedence[prec] = @[]


proc addOperator(self: OperatorTable, lexeme: string) =
    ## Adds an operator to the table. Its precedence
    ## is inferred from the operator's lexeme (the
    ## criteria are similar to Nim's)
    if lexeme in self.tokens:
        return # We've already added it!
    var prec = Power
    if lexeme.len() >= 2 and lexeme[^2..^1] in ["->", "~>", "=>"]:
        prec = Arrow
    elif lexeme == "and":
        prec = Precedence.And
    elif lexeme == "or":
        prec = Precedence.Or
    elif lexeme.endsWith("=") and lexeme[0] notin {'<', '>', '!', '?', '~', '='} or lexeme == "=":
        prec = Assign
    elif lexeme[0] in {'$', } or lexeme == "**":
        prec = Power
    elif lexeme[0] in {'*', '%', '/', '\\'}:
        prec = Multiplication
    elif lexeme[0] in {'+', '-'}:
        prec = Addition
    elif lexeme in [">>", "<<", "|", "~", "&", "^"]:
        prec = Bitwise
    elif lexeme[0] in {'<', '>', '=', '!'}:
        prec = Compare
    self.tokens.add(lexeme)
    self.precedence[prec].add(lexeme)


proc getPrecedence(self: OperatorTable, lexeme: string): Precedence =
    ## Gets the precedence of a given operator
    for (prec, operators) in self.precedence.pairs():
        if lexeme in operators:
            return prec
    return Precedence.None


proc newParser*: Parser =
    ## Initializes a new Parser object
    new(result)
    result.current = 0
    result.file = ""
    result.tokens = @[]
    result.currentFunction = nil
    result.currentLoop = LoopContext.None
    result.scopeDepth = 0
    result.operators = newOperatorTable()
    result.tree = @[]
    result.source = ""


# Public getters for improved error formatting
proc getCurrent*(self: Parser): int {.inline.} = self.current
proc getCurrentToken*(self: Parser): Token {.inline.} = (if self.getCurrent() >=
        self.tokens.high() or
    self.getCurrent() - 1 < 0: self.tokens[^1] else: self.tokens[self.current - 1])
proc getCurrentFunction*(self: Parser): Declaration {.inline.} = self.currentFunction
proc getFile*(self: Parser): string {.inline.} = self.file
proc getModule*(self: Parser): string {.inline.} = self.getFile().splitFile().name
proc getLines*(self: Parser): seq[tuple[start, stop: int]] = self.lines
proc getSource*(self: Parser): string = self.source
proc getRelPos*(self: Parser, line: int): tuple[start, stop: int] = self.lines[line - 1]
template endOfFile: Token = Token(kind: EndOfFile, lexeme: "", line: -1)
template endOfLine(msg: string, tok: Token = nil) = self.expect(Semicolon, msg, tok)


proc peek(self: Parser, distance: int = 0): Token =
    ## Peeks at the token at the given distance.
    ## If the distance is out of bounds, an EOF
    ## token is returned. A negative distance may
    ## be used to retrieve previously consumed
    ## tokens
    if self.tokens.high() == -1 or self.current + distance > self.tokens.high(
            ) or self.current + distance < 0:
        result = endOfFile
    else:
        result = self.tokens[self.current + distance]


proc done(self: Parser): bool {.inline.} =
    ## Returns true if we're at the
    ## end of the file. Note that the
    ## parser expects an explicit
    ## EOF token to signal the end
    ## of the file
    result = self.peek().kind == EndOfFile


proc step(self: Parser, n: int = 1): Token =
    ## Steps n tokens into the input,
    ## returning the last consumed one
    if self.done():
        result = self.peek()
    else:
        result = self.tokens[self.current]
        self.current += 1


proc error(self: Parser, message: string, token: Token = nil) {.raises: [ParseError].} =
    ## Raises a ParseError exception
    raise ParseError(msg: message, token: if token.isNil(): self.getCurrentToken() else: token, file: self.file, module: self.getModule(), parser: self)


# Why do we allow strings or enum members of TokenType? Well, it's simple:
# symbols like ":" and "=" are both valid operator names (therefore they are
# tokenized as symbols), but they are also used in a context where they are just
# separators (for example, the colon is used in type declarations). Since we can't
# tell at tokenization time which of the two contexts we're in, we just treat everything
# as a symbol and in the cases where we need a specific token we just match the string
# directly
proc check[T: TokenType or string](self: Parser, kind: T,
        distance: int = 0): bool =
    ## Checks if the given token at the given distance
    ## matches the expected kind and returns a boolean.
    ## The distance parameter is passed directly to
    ## self.peek()
    when T is TokenType:
        self.peek(distance).kind == kind
    else:
        when T is string:
            self.peek(distance).lexeme == kind


proc check[T: TokenType or string](self: Parser, kind: openarray[T]): bool =
    ## Calls self.check() in a loop with each entry of
    ## the given openarray of token kinds and returns
    ## at the first match. Note that this assumes
    ## that only one token may match at a given
    ## position
    for k in kind:
        if self.check(k):
            return true
    return false


proc match[T: TokenType or string](self: Parser, kind: T): bool =
    ## Behaves like self.check(), except that when a token
    ## matches it is also consumed
    if self.check(kind):
        discard self.step()
        result = true
    else:
        result = false


proc match[T: TokenType or string](self: Parser, kind: openarray[T]): bool =
    ## Calls self.match() in a loop with each entry of
    ## the given openarray of token kinds and returns
    ## at the first match. Note that this assumes
    ## that only one token may exist at a given
    ## position
    for k in kind:
        if self.match(k):
            return true
    result = false


proc expect[T: TokenType or string](self: Parser, kind: T, message: string = "", token: Token = nil) =
    ## Behaves like self.match(), except that
    ## when a token doesn't match, an error
    ## is raised. If no error message is
    ## given, a default one is used
    if not self.match(kind):
        if message.len() == 0:
            self.error(&"expecting token of kind {kind}, found {self.peek().kind} instead", token)
        else:
            self.error(message)


proc expect[T: TokenType or string](self: Parser, kind: openarray[T], message: string = "", token: Token = nil) {.used.} =
    ## Behaves like self.expect(), except that
    ## an error is raised only if none of the
    ## given token kinds matches
    for k in kind:
        if self.match(kind):
            return
    if message.len() == 0:
        self.error(&"""expecting any of the following tokens: {kind.join(", ")}, but got {self.peek().kind} instead""", token)


# Forward declarations
proc expression(self: Parser): Expression
proc expressionStatement(self: Parser): Statement
proc statement(self: Parser): Statement
proc varDecl(self: Parser, isLet: bool = false,
        isConst: bool = false): Declaration
proc parseFunExpr(self: Parser): LambdaExpr
proc funDecl(self: Parser, isAsync: bool = false, isGenerator: bool = false,
        isLambda: bool = false, isOperator: bool = false): Declaration
proc declaration(self: Parser): Declaration
proc parse*(self: Parser, tokens: seq[Token], file: string, lines: seq[tuple[start, stop: int]], source: string, persist: bool = false): seq[Declaration]
# End of forward declarations


proc primary(self: Parser): Expression =
    ## Parses primary expressions such
    ## as integer literals and keywords
    ## that map to builtin types (true,
    ## false, nil, etc.)
    case self.peek().kind:
        of True:
            result = newTrueExpr(self.step())
        of False:
            result = newFalseExpr(self.step())
        of TokenType.NotANumber:
            result = newNanExpr(self.step())
        of Nil:
            result = newNilExpr(self.step())
        of Float:
            result = newFloatExpr(self.step())
        of Integer:
            result = newIntExpr(self.step())
        of Identifier:
            result = newIdentExpr(self.step(), self.scopeDepth)
        of LeftParen:
            let tok = self.step()
            result = newGroupingExpr(self.expression(), tok)
            self.expect(RightParen, "unterminated parenthesized expression")
        of Yield:
            let tok = self.step()
            if self.currentFunction.isNil():
                self.error("'yield' cannot be used outside functions", tok)
            elif self.currentFunction.token.kind != Generator:
                # It's easier than doing conversions for lambda/funDecl
                self.error("'yield' cannot be used outside generators", tok)
            if not self.check([RightBrace, RightBracket, RightParen, Comma, Semicolon]):
                # Expression delimiters
                result = newYieldExpr(self.expression(), tok)
            else:
                # Empty yield
                result = newYieldExpr(newNilExpr(Token()), tok)
        of Await:
            let tok = self.step()
            if self.currentFunction.isNil():
                self.error("'await' cannot be used outside functions", tok)
            if self.currentFunction.token.kind != Coroutine:
                self.error("'await' can only be used inside coroutines", tok)
            result = newAwaitExpr(self.expression(), tok)
        of RightParen, RightBracket, RightBrace:
            # This is *technically* unnecessary: the parser would
            # throw an error regardless, but it's a little bit nicer
            # when the error message is more specific
            self.error(&"unmatched '{self.peek().lexeme}'")
        of Hex:
            result = newHexExpr(self.step())
        of Octal:
            result = newOctExpr(self.step())
        of Binary:
            result = newBinExpr(self.step())
        of String:
            result = newStrExpr(self.step())
        of Infinity:
            result = newInfExpr(self.step())
        of Function:
            discard self.step()
            result = Expression(self.funDecl(isLambda=true))
        of Coroutine:
            discard self.step()
            result = Expression(self.funDecl(isAsync=true, isLambda=true))
        of Generator:
            discard self.step()
            result = Expression(self.funDecl(isGenerator=true, isLambda=true))
        of TokenType.Var:
            discard self.step()
            result = newVarExpr(self.expression(), self.peek(-1))
        of TokenType.Ref:
            discard self.step()
            result = newRefExpr(self.expression(), self.peek(-1))
        of TokenType.Ptr:
            discard self.step()
            result = newPtrExpr(self.expression(), self.peek(-1))
        else:
            self.error("invalid syntax")


proc makeCall(self: Parser, callee: Expression): CallExpr =
    ## Utility function called iteratively by self.call()
    ## to parse a function call
    let tok = self.peek(-1)
    var argNames: seq[IdentExpr] = @[]
    var arguments: tuple[positionals: seq[Expression], keyword: seq[tuple[
            name: IdentExpr, value: Expression]]] = (positionals: @[],
            keyword: @[])
    var argument: Expression = nil
    var argCount = 0
    if not self.check(RightParen):
        while true:
            if argCount >= 255:
                self.error("call can not have more than 255 arguments")
                break
            argument = self.expression()
            if argument.kind == binaryExpr and BinaryExpr(argument).operator.lexeme == "=":
                # TODO: This will explode with slices!
                if IdentExpr(BinaryExpr(argument).a) in argNames:
                    self.error("duplicate keyword argument in call")
                argNames.add(IdentExpr(BinaryExpr(argument).a))
                arguments.keyword.add((name: IdentExpr(BinaryExpr(argument).a), value: BinaryExpr(argument).b))
            elif arguments.keyword.len() == 0:
                arguments.positionals.add(argument)
            else:
                self.error("positional argument cannot follow keyword argument in call")
            if not self.match(Comma):
                break
        argCount += 1
    self.expect(RightParen)
    result = newCallExpr(callee, arguments, tok)


proc parseGenericArgs(self: Parser) =
    ## Parses function generic arguments
    ## like function[type](arg)
    discard


proc call(self: Parser): Expression =
    ## Parses function calls and object field
    ## accessing
    result = self.primary()
    while true:
        if self.match(LeftParen):
            result = self.makeCall(result)
        elif self.match(Dot):
            self.expect(Identifier, "expecting attribute name after '.'")
            result = newGetItemExpr(result, newIdentExpr(self.peek(-1), self.scopeDepth), self.peek(-1))
        elif self.match(LeftBracket):
            self.parseGenericArgs()  # TODO
            result = self.makeCall(result)
        else:
            break

## Operator parsing handlers

proc unary(self: Parser): Expression =
    ## Parses unary expressions
    if self.peek().kind in [Identifier, Symbol] and self.peek().lexeme in self.operators.tokens:
        result = newUnaryExpr(self.step(), self.unary())
    else:
        result = self.call()


proc parsePow(self: Parser): Expression =
    ## Parses power expressions
    result = self.unary()
    var operator: Token
    var right: Expression
    while self.peek().kind in [Identifier, Symbol] and self.operators.getPrecedence(self.peek().lexeme) == Power:
        operator = self.step()
        right = self.unary()
        result = newBinaryExpr(result, operator, right)


proc parseMul(self: Parser): Expression =
    ## Parses multiplication and division
    ## expressions
    result = self.parsePow()
    var operator: Token
    var right: Expression
    while self.peek().kind in [Identifier, Symbol] and self.operators.getPrecedence(self.peek().lexeme) == Multiplication:
        operator = self.step()
        right = self.parsePow()
        result = newBinaryExpr(result, operator, right)


proc parseAdd(self: Parser): Expression =
    ## Parses addition and subtraction
    ## expressions
    result = self.parseMul()
    var operator: Token
    var right: Expression
    while self.peek().kind in [Identifier, Symbol] and self.operators.getPrecedence(self.peek().lexeme) == Addition:
        operator = self.step()
        right = self.parseMul()
        result = newBinaryExpr(result, operator, right)


proc parseBitwise(self: Parser): Expression =
    ## Parses bitwise expressions
    result = self.parseAdd()
    var operator: Token
    var right: Expression
    while self.peek().kind in [Identifier, Symbol] and self.operators.getPrecedence(self.peek().lexeme) == Bitwise:
        operator = self.step()
        right = self.parseAdd()
        result = newBinaryExpr(result, operator, right)


proc parseCmp(self: Parser): Expression =
    ## Parses comparison expressions
    result = self.parseBitwise()
    var operator: Token
    var right: Expression
    while self.peek().kind in [Identifier, Symbol] and self.operators.getPrecedence(self.peek().lexeme) == Compare:
        operator = self.step()
        right = self.parseAdd()
        result = newBinaryExpr(result, operator, right)


proc parseAnd(self: Parser): Expression =
    ## Parses logical and expressions
    result = self.parseCmp()
    var operator: Token
    var right: Expression
    while self.peek().kind in [Identifier, Symbol] and self.operators.getPrecedence(self.peek().lexeme) == Precedence.And:
        operator = self.step()
        right = self.parseCmp()
        result = newBinaryExpr(result, operator, right)


proc parseOr(self: Parser): Expression =
    ## Parses logical or expressions
    result = self.parseAnd()
    var operator: Token
    var right: Expression
    while self.peek().kind in [Identifier, Symbol] and self.operators.getPrecedence(self.peek().lexeme) == Precedence.Or:
        operator = self.step()
        right = self.parseAnd()
        result = newBinaryExpr(result, operator, right)


proc parseAssign(self: Parser): Expression =
    ## Parses assignment expressions
    result = self.parseOr()
    if self.peek().kind in [Identifier, Symbol] and self.operators.getPrecedence(self.peek().lexeme) == Assign:
        let tok = self.step()
        var value = self.expression()
        case result.kind:
            of identExpr, sliceExpr:
                result = newAssignExpr(result, value, tok)
            of getItemExpr:
                result = newSetItemExpr(GetItemExpr(result).obj, GetItemExpr(result).name, value, tok)
            else:
                self.error("invalid assignment target", tok)


proc parseArrow(self: Parser): Expression =
    ## Parses arrow expressions
    result = self.parseAssign()
    var operator: Token
    var right: Expression
    while self.peek().kind in [Identifier, Symbol] and self.operators.getPrecedence(self.peek().lexeme) == Precedence.Or:
        operator = self.step()
        right = self.parseAssign()
        result = newBinaryExpr(result, operator, right)


## End of operator parsing handlers


proc assertStmt(self: Parser): Statement =
    ## Parses "assert" statements, which
    ## raise an error if the expression
    ## fed into them is false
    let tok = self.peek(-1)
    var expression = self.expression()
    endOfLine("missing semicolon after 'assert'")
    result = newAssertStmt(expression, tok)


proc beginScope(self: Parser) =
    ## Begins a new lexical scope
    inc(self.scopeDepth)


proc endScope(self: Parser) =
    ## Ends a new lexical scope
    dec(self.scopeDepth)


proc blockStmt(self: Parser): Statement =
    ## Parses block statements. A block
    ## statement simply opens a new local
    ## scope
    self.beginScope()
    let tok = self.peek(-1)
    var code: seq[Declaration] = @[]
    while not self.check(RightBrace) and not self.done():
        code.add(self.declaration())
        if code[^1].isNil():
            code.delete(code.high())
    self.expect(RightBrace, "expecting '}'")
    result = newBlockStmt(code, tok)
    self.endScope()


proc breakStmt(self: Parser): Statement =
    ## Parses break statements
    let tok = self.peek(-1)
    if self.currentLoop != Loop:
        self.error("'break' cannot be used outside loops")
    endOfLine("missing semicolon after 'break'")
    result = newBreakStmt(tok)


proc deferStmt(self: Parser): Statement =
    ## Parses defer statements
    let tok = self.peek(-1)
    if self.currentFunction.isNil():
        self.error("'defer' cannot be used outside functions")
    endOfLine("missing semicolon after 'defer'")
    result = newDeferStmt(self.expression(), tok)


proc continueStmt(self: Parser): Statement =
    ## Parses continue statements
    let tok = self.peek(-1)
    if self.currentLoop != Loop:
        self.error("'continue' cannot be used outside loops")
    endOfLine("missing semicolon after 'continue'")
    result = newContinueStmt(tok)


proc returnStmt(self: Parser): Statement =
    ## Parses return statements
    let tok = self.peek(-1)
    if self.currentFunction.isNil():
        self.error("'return' cannot be used outside functions")
    var value: Expression
    if not self.check(Semicolon):
        # Since return can be used on its own too
        # we need to check if there's an actual value
        # to return or not
        value = self.expression()
    endOfLine("missing semicolon after 'return'")
    result = newReturnStmt(value, tok)
    case self.currentFunction.kind:
        of NodeKind.funDecl:
            FunDecl(self.currentFunction).hasExplicitReturn = true
        else:
            LambdaExpr(self.currentFunction).hasExplicitReturn = true


proc yieldStmt(self: Parser): Statement =
    ## Parses yield statements
    let tok = self.peek(-1)
    if self.currentFunction.isNil():
        self.error("'yield' cannot be outside functions")
    elif self.currentFunction.token.kind != Generator:
        self.error("'yield' can only be used inside generators")
    if not self.check(Semicolon):
        result = newYieldStmt(self.expression(), tok)
    else:
        result = newYieldStmt(newNilExpr(Token(lexeme: "nil")), tok)
    endOfLine("missing semicolon after 'yield'")


proc awaitStmt(self: Parser): Statement =
    ## Parses await statements
    let tok = self.peek(-1)
    if self.currentFunction.isNil():
        self.error("'await' cannot be used outside functions")
    if self.currentFunction.token.kind != Coroutine:
        self.error("'await' can only be used inside coroutines")
    endOfLine("missing semicolon after 'await'")
    result = newAwaitStmt(self.expression(), tok)


proc raiseStmt(self: Parser): Statement =
    ## Parses raise statements
    var exception: Expression
    let tok = self.peek(-1)
    if not self.check(Semicolon):
        # Raise can be used on its own, in which
        # case it re-raises the last active exception
        exception = self.expression()
    endOfLine("missing semicolon after 'raise'")
    result = newRaiseStmt(exception, tok)


proc forEachStmt(self: Parser): Statement =
    ## Parses C#-like foreach loops
    let tok = self.peek(-1)
    let enclosingLoop = self.currentLoop
    self.currentLoop = Loop
    self.expect(Identifier)
    let identifier = newIdentExpr(self.peek(-1), self.scopeDepth)
    self.expect("in")
    let expression = self.expression()
    self.expect(LeftBrace)
    result = newForEachStmt(identifier, expression, self.blockStmt(), tok)
    self.currentLoop = enclosingLoop


proc findOperators(self: Parser, tokens: seq[Token])


proc importStmt(self: Parser, fromStmt: bool = false): Statement =
    ## Parses import statements. This is a little
    ## convoluted because we need to pre-parse the
    ## module to import the operators from it
    if self.scopeDepth > 0:
        self.error("import statements are only allowed at the top level")
    var tok: Token
    if fromStmt:
        tok = self.peek(-2)
    else:
        tok = self.peek(-1)
    var moduleName = ""
    while not self.check(Semicolon) and not self.done():
        if self.match(".."):
            if not self.check("/"):
                self.error("expecting '/' after '..' in import statement")
            moduleName &= "../"
        elif self.match("/"):
            self.expect(Identifier, "expecting identifier after '/' in import statement")
            moduleName &= &"/{self.peek(-1).lexeme}"
        elif self.match(Identifier):
            moduleName &= self.peek(-1).lexeme
        else:
            break
    endOfLine("missing semicolon after import statement")
    moduleName &= ".pn"
    result = newImportStmt(newIdentExpr(Token(kind: Identifier, lexeme: moduleName, line: self.peek(-1).line), self.scopeDepth), tok)
    var lexer = newLexer()
    lexer.fillSymbolTable()
    var path = ""
    for i, searchPath in moduleLookupPaths:
        if searchPath == "":
            path = joinPath(getCurrentDir(), joinPath(splitPath(self.file).head, moduleName))
        else:
            path = joinPath(getCurrentDir(), joinPath(searchPath, moduleName))
        if fileExists(path):
            break
        elif i == searchPath.high():
            self.error(&"""could not import '{path}': module not found""")
    try:
        var source = readFile(path)
        var tree = self.tree
        var current = self.current
        var tokens = self.tokens
        var src = self.source
        var file = self.file
        discard self.parse(lexer.lex(source, path), file=path, source=source, lines=lexer.getLines(), persist=true)
        self.file = file
        self.source = src
        self.tree = tree
        self.current = current
        self.tokens = tokens
    except IOError:
        self.error(&"""could not import '{path}': {getCurrentExceptionMsg()}""")
    except OSError:
        self.error(&"""could not import '{path}': {getCurrentExceptionMsg()} [errno {osLastError()}]""")


proc tryStmt(self: Parser): Statement =
    ## Parses try/except/else/finally blocks
    let tok = self.peek(-1)
    self.expect(LeftBrace, "expecting '{' after 'try'")
    var body = self.blockStmt()
    var handlers: seq[tuple[body: Statement, exc: IdentExpr]] = @[]
    var finallyClause: Statement
    var elseClause: Statement
    while self.match(Except):
        if self.match(LeftBrace):
            handlers.add((body: self.blockStmt(), exc: newIdentExpr(self.peek(-1))))
        else:
            self.expect(Identifier, "expecting exception name after 'except'")
            self.expect(LeftBrace, "expecting '{' after exception name")
            handlers.add((body: self.blockStmt(), exc: nil))
    if self.match(Else):
        self.expect(LeftBrace, "expecting '{' after 'else' name")
        elseClause = self.blockStmt()
    if self.match(Finally):
        self.expect(LeftBrace, "expecting '{' after 'finally'")
        finallyClause = self.blockStmt()
    if handlers.len() == 0 and elseClause.isNil() and finallyClause.isNil():
        self.error("expecting 'except', 'finally' or 'else' statement after 'try' block", tok)
    for i, handler in handlers:
        if handler.exc.isNil() and i != handlers.high():
            self.error("catch-all exception handler with bare 'except' must come last in try statement", handler.exc.token)
    result = newTryStmt(body, handlers, finallyClause, elseClause, tok)


proc whileStmt(self: Parser): Statement =
    ## Parses a C-style while loop statement
    let tok = self.peek(-1)
    self.beginScope()
    let enclosingLoop = self.currentLoop
    let condition = self.expression()
    self.expect(LeftBrace)
    self.currentLoop = Loop
    result = newWhileStmt(condition, self.blockStmt(), tok)
    self.currentLoop = enclosingLoop
    self.endScope()


proc ifStmt(self: Parser): Statement =
    ## Parses if statements
    let tok = self.peek(-1)
    let condition = self.expression()
    self.expect(LeftBrace)
    let thenBranch = self.blockStmt()
    var elseBranch: Statement
    if self.match(Else):
        if self.match(If):
            elseBranch = self.ifStmt()
        else:
            self.expect(LeftBrace, "expecting 'if' or block statement")
            elseBranch = self.blockStmt()
    result = newIfStmt(condition, thenBranch, elseBranch, tok)


proc exportStmt(self: Parser): Statement =
    ## Parses export statements
    var exported: IdentExpr
    let tok = self.peek(-1)
    if not self.match(Identifier):
        self.error("expecting identifier after 'export' in export statement")
    exported = newIdentExpr(self.peek(-1))
    endOfLine("missing semicolon after 'raise'")
    result = newExportStmt(exported, tok)


template checkDecl(self: Parser, isPrivate: bool) =
    ## Handy utility template that avoids us from copy
    ## pasting the same checks to all declaration handlers
    if not isPrivate and self.scopeDepth > 0:
        self.error("cannot bind public names inside local scopes")


proc parsePragmas(self: Parser): seq[Pragma] =
    ## Parses pragmas
    var
        name: IdentExpr
        args: seq[LiteralExpr]
        exp: Expression
        names: seq[string]
    while not self.match("]") and not self.done():
        args = @[]
        self.expect(Identifier, "expecting pragma name")
        if self.peek(-1).lexeme in names:
            self.error("duplicate pragmas are not allowed")
        names.add(self.peek(-1).lexeme)
        name = newIdentExpr(self.peek(-1), self.scopeDepth)
        if not self.match(":"):
            if self.match("]"):
                result.add(newPragma(name, @[]))
                break
        elif self.match("("):
            while not self.match(")") and not self.done():
                exp = self.primary()
                if not exp.isLiteral():
                    self.error("pragma arguments can only be literals", exp.token)
                args.add(LiteralExpr(exp))
                if not self.match(","):
                    break
            self.expect(LeftParen, "unterminated parenthesis in pragma arguments")
        else:
            exp = self.primary()
            if not exp.isLiteral():
                self.error("pragma arguments can only be literals", exp.token)
            args.add(LiteralExpr(exp))
        result.add(newPragma(name, args))
        if self.match(","):
            continue


proc varDecl(self: Parser, isLet: bool = false,
        isConst: bool = false): Declaration =
    ## Parses variable declarations
    var tok = self.peek(-1)
    var value: Expression
    self.expect(Identifier, &"expecting identifier after '{tok.lexeme}'")
    var name = newIdentExpr(self.peek(-1), self.scopeDepth)
    let isPrivate = not self.match("*")
    self.checkDecl(isPrivate)
    var valueType: IdentExpr
    var hasInit = false
    var pragmas: seq[Pragma] = @[]
    if self.match(":"):
        # We don't enforce it here because
        # the compiler may be able to infer
        # the type later!
        self.expect(Identifier, "expecting type name after ':'")
        valueType = newIdentExpr(self.peek(-1), self.scopeDepth)
    if self.match("="):
        hasInit = true
        if self.match([Function, Coroutine, Generator]):
            value = self.parseFunExpr()
        else:
            value = self.expression()
        if isConst and not value.isConst():
            self.error("constant initializer is not a constant")
    elif tok.kind != TokenType.Var:
        self.error(&"{tok.lexeme} declaration requires an initializer")
    self.expect(Semicolon, "expecting semicolon after declaration")
    if self.match(TokenType.Pragma):
        for pragma in self.parsePragmas():
            pragmas.add(pragma)
    case tok.kind:
        of TokenType.Var:
            result = newVarDecl(name, value, isPrivate = isPrivate, token = tok,
                    valueType = valueType, pragmas = (@[]))
        of Const:
            result = newVarDecl(name, value, isPrivate = isPrivate, token = tok,
                    isConst = true, valueType = valueType, pragmas = (@[]))
        of Let:
            result = newVarDecl(name, value, isPrivate = isPrivate, token = tok,
                    isLet = isLet, valueType = valueType, pragmas = (@[]))
        else:
            discard # Unreachable
    if not hasInit and VarDecl(result).valueType.isNil():
        self.error("expecting initializer or explicit type annotation, but neither was found", result.token)
    result.pragmas = pragmas


proc parseDeclArguments(self: Parser, arguments: var seq[tuple[name: IdentExpr, valueType: Expression]],
                        parameter: var tuple[name: IdentExpr, valueType: Expression],
                        defaults: var seq[Expression]) =
    ## Helper to parse declaration arguments and avoid code duplication
    while not self.check(RightParen):
        if arguments.len > 255:
            self.error("cannot have more than 255 arguments in function declaration", self.peek(-1))
        self.expect(Identifier, "expecting parameter name")
        parameter.name = newIdentExpr(self.peek(-1), self.scopeDepth)
        if self.match(":"):
            parameter.valueType = self.expression()
            for i in countdown(arguments.high(), 0):
                if arguments[i].valueType != nil:
                    break
                arguments[i].valueType = parameter.valueType
        else:
            parameter.valueType = nil
        if parameter in arguments:
            self.error("duplicate parameter name in function declaration", parameter.name.token)
        arguments.add(parameter)
        if self.match("="):
            defaults.add(self.expression())
        elif defaults.len() > 0:
            self.error("positional argument cannot follow default argument in function declaration", parameter.name.token)
        if not self.match(Comma):
            break
    self.expect(RightParen)
    for argument in arguments:
        if argument.valueType.isNil():
            self.error(&"missing type declaration for '{argument.name.token.lexeme}' in function declaration")


proc parseFunExpr(self: Parser): LambdaExpr =
    ## Parses the return value of a function
    ## when it is another function. Works
    ## recursively
    var arguments: seq[tuple[name: IdentExpr, valueType: Expression]] = @[]
    var defaults: seq[Expression] = @[]
    result = newLambdaExpr(arguments, defaults, nil, isGenerator=self.peek(-1).kind == Generator,
                           isAsync=self.peek(-1).kind == Coroutine, token=self.peek(-1),
                           returnType=nil, depth=self.scopeDepth)
    var parameter: tuple[name: IdentExpr, valueType: Expression]
    if self.match(LeftParen):
        self.parseDeclArguments(arguments, parameter, defaults)
    if self.match(":"):
        if self.match([Function, Coroutine, Generator]):
            result.returnType = self.parseFunExpr()
        else:
            result.returnType = self.expression()
    result.arguments = arguments
    result.defaults = defaults


proc parseGenericConstraint(self: Parser): Expression =
    ## Recursivelt parses a generic constraint
    ## and returns it as an expression
    result = self.expression()  # First value is always an identifier of some sort
    if not self.check(RightBracket):
        case self.peek().lexeme:
            of "|":
                result = newBinaryExpr(result, self.step(), self.parseGenericConstraint())
            of "~":
                result = newUnaryExpr(self.step(), result)
            else:
                self.error("invalid type constraint in generic declaration")


proc parseGenerics(self: Parser, decl: Declaration) =
    ## Parses generics in declarations
    var gen: tuple[name: IdentExpr, cond: Expression]
    while not self.check(RightBracket) and not self.done():
        self.expect(Identifier, "expecting generic type name")
        gen.name = newIdentExpr(self.peek(-1), self.scopeDepth)
        self.expect(":", "expecting type constraint after generic name")
        gen.cond = self.parseGenericConstraint()
        decl.generics.add(gen)
        if not self.match(Comma):
            break
    self.expect(RightBracket)


proc funDecl(self: Parser, isAsync: bool = false, isGenerator: bool = false,
        isLambda: bool = false, isOperator: bool = false): Declaration =  # Can't use just FunDecl because it can also return LambdaExpr!
    ## Parses all types of functions, coroutines, generators and operators
    ## (with or without a name, where applicable)
    let tok = self.peek(-1)
    var enclosingFunction = self.currentFunction
    var arguments: seq[tuple[name: IdentExpr, valueType: Expression]] = @[]
    var defaults: seq[Expression] = @[]
    var returnType: Expression
    var pragmas: seq[Pragma] = @[]
    if not isLambda and self.match(Identifier):
        # We do this extra check because we might
        # be called from a context where it's
        # ambiguous whether we're parsing a declaration
        # or an expression. Fortunately anonymous functions
        # are nameless, so we can sort the ambiguity by checking
        # if there's an identifier after the keyword
        self.currentFunction = newFunDecl(newIdentExpr(self.peek(-1), self.scopeDepth), arguments, defaults, newBlockStmt(@[], Token()),
                                          isAsync=isAsync,
                                          isGenerator=isGenerator,
                                          isPrivate=true,
                                          token=tok,
                                          returnType=nil,
                                          depth=self.scopeDepth)
        if self.match("*"):
            FunDecl(self.currentFunction).isPrivate = false
            self.checkDecl(FunDecl(self.currentFunction).isPrivate)
        if self.match(LeftBracket):
            self.parseGenerics(self.currentFunction)
    elif not isLambda and (self.check([LeftBrace, LeftParen]) or self.check(":")):
        # We do a bit of hacking to pretend we never
        # wanted to parse this as a declaration in
        # the first place and pass control over to
        # expressionStatement(), which will in turn
        # go all the way up to primary(), which will
        # call us back with isLambda=true, allowing us
        # to actually parse the function as an expression
        while not self.check(tok.kind):   # We rewind back to the token that caused us to be called
            dec(self.current)
        result = Declaration(self.expressionStatement())
        self.currentFunction = enclosingFunction
        return result
    elif isLambda:
        self.currentFunction = newLambdaExpr(arguments, defaults, newBlockStmt(@[], Token()), isGenerator=isGenerator, isAsync=isAsync, token=tok,
                                             returnType=nil, depth=self.scopeDepth)
    if self.match(":"):
        # Function has explicit return type
        if self.match([Function, Coroutine, Generator]):
            # The function's return type is another
            # function. We specialize this case because
            # the type declaration for a function lacks
            # the braces that would qualify it as an
            # expression
            returnType = self.parseFunExpr()
        else:
            returnType = self.expression()
    if self.match(LeftParen):
        var parameter: tuple[name: IdentExpr, valueType: Expression]
        self.parseDeclArguments(arguments, parameter, defaults)
        if self.match(":"):
            # Function's return type
            if self.match([Function, Coroutine, Generator]):
                returnType = self.parseFunExpr()
            else:
                returnType = self.expression()
    if self.currentFunction.kind == funDecl:
        if not self.match(Semicolon):
            # If we don't find a semicolon,
            # it's not a forward declaration
            self.expect(LeftBrace)
            if self.match(TokenType.Pragma):
                for pragma in self.parsePragmas():
                    pragmas.add(pragma)
            FunDecl(self.currentFunction).body = self.blockStmt()
        else:
            # This is a forward declaration, so we explicitly
            # nullify the function's body to tell the compiler
            # to look for it elsewhere in the file later
            FunDecl(self.currentFunction).body = nil
            if self.match(TokenType.Pragma):
                for pragma in self.parsePragmas():
                    pragmas.add(pragma)
        FunDecl(self.currentFunction).arguments = arguments
        FunDecl(self.currentFunction).returnType = returnType
    else:
        self.expect(LeftBrace)
        if self.match(TokenType.Pragma):
            for pragma in self.parsePragmas():
                pragmas.add(pragma)
        LambdaExpr(Expression(self.currentFunction)).body = self.blockStmt()
        LambdaExpr(Expression(self.currentFunction)).arguments = arguments
        LambdaExpr(Expression(self.currentFunction)).returnType = returnType
    result = self.currentFunction
    if isOperator:
        if arguments.len() == 0:
            self.error("cannot declare operator without arguments")
        elif isLambda:
            self.error("cannot declare anonymous operator")
    for argument in arguments:
        if argument.valueType == nil:
            self.error(&"missing type declaration for '{argument.name.token.lexeme}' in function declaration")
    self.currentFunction = enclosingFunction
    result.pragmas = pragmas


proc expression(self: Parser): Expression =
    ## Parses expressions
    result = self.parseArrow() # Highest-level expression


proc expressionStatement(self: Parser): Statement =
    ## Parses expression statements, which
    ## are expressions followed by a semicolon
    var expression = self.expression()
    endOfLine("missing expression terminator", expression.token)
    result = Statement(newExprStmt(expression, expression.token))


proc statement(self: Parser): Statement =
    ## Parses statements
    case self.peek().kind:
        of If:
            discard self.step()
            result = self.ifStmt()
        of Assert:
            discard self.step()
            result = self.assertStmt()
        of Raise:
            discard self.step()
            result = self.raiseStmt()
        of Break:
            discard self.step()
            result = self.breakStmt()
        of Continue:
            discard self.step()
            result = self.continueStmt()
        of Return:
            discard self.step()
            result = self.returnStmt()
        of Import:
            discard self.step()
            result = self.importStmt()
        of Export:
            discard self.step()
            result = self.exportStmt()
        of From:
            # TODO
            # from module import a [, b, c as d]
            discard self.step()
            result = self.importStmt(fromStmt=true)
        of While:
            discard self.step()
            result = self.whileStmt()
        of Foreach:
            discard self.step()
            result = self.forEachStmt()
        of LeftBrace:
            discard self.step()
            result = self.blockStmt()
        of Yield:
            discard self.step()
            result = self.yieldStmt()
        of Await:
            discard self.step()
            result = self.awaitStmt()
        of Defer:
            discard self.step()
            result = self.deferStmt()
        of Try:
            discard self.step()
            result = self.tryStmt()
        else:
            result = self.expressionStatement()


proc typeDecl(self: Parser): TypeDecl =
    ## Parses type declarations
    let token = self.peek(-1)
    self.expect(Identifier, "expecting type name after 'type'")
    let isPrivate = not self.match("*")
    self.checkDecl(isPrivate)
    var name = newIdentExpr(self.peek(-1), self.scopeDepth)
    var fields: seq[tuple[name: IdentExpr, valueType: Expression, isPrivate: bool]] = @[]
    var defaults: seq[Expression] = @[]
    var generics: seq[tuple[name: IdentExpr, cond: Expression]] = @[]
    var pragmas: seq[Pragma] = @[]
    result = newTypeDecl(name, fields, defaults, isPrivate, token, pragmas, generics, nil, false, false)
    if self.match(LeftBracket):
        self.parseGenerics(result)
    self.expect("=", "expecting '=' after type name")
    case self.step().lexeme:
        of "ref":
            self.expect("object", "expecting 'object' after 'ref'")
            result.isRef = true
        of "enum":
            result.isEnum = true
        of "object":
            discard  # Default case
        else:
            self.error("invalid syntax")
    if not result.isEnum and self.match("of"):
        self.expect(Identifier, "expecting parent type name after 'of'")
        result.parent = newIdentExpr(self.peek(-1))
    self.expect(LeftBrace, "expecting '{' after type declaration")
    if self.match(TokenType.Pragma):
        for pragma in self.parsePragmas():
            pragmas.add(pragma)
    var
        argName: IdentExpr
        argPrivate: bool
        argType: Expression
    while not self.match(RightBrace) and not self.done():
        self.expect(Identifier, "expecting field name")
        argName = newIdentExpr(self.peek(-1), self.scopeDepth)
        if not result.isEnum:
            argPrivate = not self.match("*")
            self.expect(":", "expecting ':' after field name")
            argType = self.expression()
            result.fields.add((argName, argType, argPrivate))
            if self.match("="):
                result.defaults.add(self.expression())
        else:
            result.fields.add((argName, nil, false))
        if not result.isEnum:
            self.expect(";", "expecting semicolon after type field declaration")
        else:
            if not self.check(RightBrace):
                self.expect(",", "expecting comma after enum field declaration")
    result.pragmas = pragmas


proc declaration(self: Parser): Declaration =
    ## Parses declarations
    case self.peek().kind:
        of TokenType.Var, Const, Let:
            let keyword = self.step()
            result = self.varDecl(isLet = keyword.kind == Let,
                    isConst = keyword.kind == Const)
        of Function:
            discard self.step()
            result = self.funDecl()
        of Coroutine:
            discard self.step()
            result = self.funDecl(isAsync=true)
        of Generator:
            discard self.step()
            result = self.funDecl(isGenerator=true)
        of Operator:
            discard self.step()
            result = self.funDecl(isOperator=true)
        of TokenType.Pragma:
            discard self.step()
            for p in self.parsePragmas():
                self.tree.add(p)
        of Type:
            discard self.step()
            result = self.typeDecl()
        of Comment:
            discard self.step() # TODO: Docstrings and stuff
        else:
            result = Declaration(self.statement())


proc findOperators(self: Parser, tokens: seq[Token]) =
    ## Finds operators in a token stream
    for i, token in tokens:
        # We do a first pass over the tokens
        # to find operators. Note that this
        # relies on the lexer ending the input
        # with an EOF token
        if token.kind == Operator:
            if i == tokens.high():
                self.error("invalid state: found malformed tokenizer input while looking for operators (missing EOF)", token)
            self.operators.addOperator(tokens[i + 1].lexeme)
        if i == tokens.high() and token.kind != EndOfFile:
            # Since we're iterating this list anyway we might as
            # well perform some extra checks
            self.error("invalid state: found malformed tokenizer input while looking for operators (missing EOF)", token)


proc parse*(self: Parser, tokens: seq[Token], file: string, lines: seq[tuple[start, stop: int]], source: string, persist: bool = false): seq[Declaration] =
    ## Parses a sequence of tokens into a sequence of AST nodes
    self.tokens = tokens
    self.file = file
    self.source = source
    self.lines = lines
    self.current = 0
    self.scopeDepth = 0
    self.currentLoop = LoopContext.None
    self.currentFunction = nil
    self.tree = @[]
    if not persist:
        self.operators = newOperatorTable()
    self.findOperators(tokens)
    while not self.done():
        self.tree.add(self.declaration())
        if self.tree[^1] == nil:
            self.tree.delete(self.tree.high())
    result = self.tree