# Copyright 2022 Mattia Giambirtone & All Contributors # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ## A recursive-descent top-down parser implementation import std/strformat import std/strutils import std/tables import std/os import meta/token import meta/ast import meta/errors import lexer as l import ../util/symbols export token, ast, errors type LoopContext {.pure.} = enum Loop, None Precedence {.pure.} = enum ## Operator precedence ## clearly stolen from ## nim Arrow = 0, Assign, Or, And, Compare, Addition, Multiplication, Power, None # Used for stuff that isn't an operator OperatorTable = ref object ## A table for storing and ## handling the precedence ## of operators tokens: seq[string] precedence: TableRef[Precedence, seq[string]] Parser* = ref object ## A recursive-descent top-down ## parser implementation # Index into self.tokens current: int # The name of the file being parsed. # Only meaningful for parse errors file: string # The list of tokens representing # the source code to be parsed. # In most cases, those will come # from the builtin lexer, but this # behavior is not enforced and the # tokenizer is entirely separate from # the parser tokens: seq[Token] # Little internal attribute that tells # us if we're inside a loop or not. This # allows us to detect errors like break # being used outside loops currentLoop: LoopContext # Stores the current function # being parsed. This is a reference # to either a FunDecl or LambdaExpr # AST node and is nil when the parser # is at the top-level. It allows the # parser to detect errors like return # outside functions currentFunction: Declaration # Stores the current scope depth (0 = global, > 0 local) scopeDepth: int # TODO scopes: seq[Declaration] operators: OperatorTable # The AST node tree: seq[Declaration] # Stores line data lines: seq[tuple[start, stop: int]] # The source of the current module source: string ParseError* = ref object of PeonException parser*: Parser file*: string token*: Token module*: string proc newOperatorTable: OperatorTable = ## Initializes a new OperatorTable ## object new(result) result.tokens = @[] result.precedence = newTable[Precedence, seq[string]]() for prec in Precedence: result.precedence[prec] = @[] proc addOperator(self: OperatorTable, lexeme: string) = ## Adds an operator to the table. Its precedence ## is inferred from the operator's lexeme (the ## criteria are similar to Nim's) if lexeme in self.tokens: return # We've already added it! var prec = Power if lexeme.len() >= 2 and lexeme[^2..^1] in ["->", "~>", "=>"]: prec = Arrow elif lexeme.endsWith("=") and lexeme[0] notin {'<', '>', '!', '?', '~', '='} or lexeme == "=": prec = Assign elif lexeme[0] in {'$', } or lexeme == "**": prec = Power elif lexeme[0] in {'*', '%', '/', '\\'}: prec = Multiplication elif lexeme[0] in {'+', '-', '|', '~'}: prec = Addition elif lexeme[0] in {'<', '>', '=', '!'}: prec = Compare elif lexeme == "and": prec = Precedence.And elif lexeme == "or": prec = Precedence.Or self.tokens.add(lexeme) self.precedence[prec].add(lexeme) proc getPrecedence(self: OperatorTable, lexeme: string): Precedence = ## Gets the precedence of a given operator for (prec, operators) in self.precedence.pairs(): if lexeme in operators: return prec proc newParser*: Parser = ## Initializes a new Parser object new(result) result.current = 0 result.file = "" result.tokens = @[] result.currentFunction = nil result.currentLoop = LoopContext.None result.scopeDepth = 0 result.operators = newOperatorTable() result.tree = @[] result.source = "" # Public getters for improved error formatting proc getCurrent*(self: Parser): int {.inline.} = self.current proc getCurrentToken*(self: Parser): Token {.inline.} = (if self.getCurrent() >= self.tokens.high() or self.getCurrent() - 1 < 0: self.tokens[^1] else: self.tokens[self.current - 1]) proc getCurrentFunction*(self: Parser): Declaration {.inline.} = self.currentFunction proc getFile*(self: Parser): string {.inline.} = self.file proc getModule*(self: Parser): string {.inline.} = self.getFile().splitFile().name proc getLines*(self: Parser): seq[tuple[start, stop: int]] = self.lines proc getSource*(self: Parser): string = self.source proc getRelPos*(self: Parser, line: int): tuple[start, stop: int] = self.lines[line - 1] template endOfFile: Token = Token(kind: EndOfFile, lexeme: "", line: -1) template endOfLine(msg: string, tok: Token = nil) = self.expect(Semicolon, msg, tok) proc peek(self: Parser, distance: int = 0): Token = ## Peeks at the token at the given distance. ## If the distance is out of bounds, an EOF ## token is returned. A negative distance may ## be used to retrieve previously consumed ## tokens if self.tokens.high() == -1 or self.current + distance > self.tokens.high( ) or self.current + distance < 0: result = endOfFile else: result = self.tokens[self.current + distance] proc done(self: Parser): bool {.inline.} = ## Returns true if we're at the ## end of the file. Note that the ## parser expects an explicit ## EOF token to signal the end ## of the file result = self.peek().kind == EndOfFile proc step(self: Parser, n: int = 1): Token = ## Steps n tokens into the input, ## returning the last consumed one if self.done(): result = self.peek() else: result = self.tokens[self.current] self.current += 1 proc error(self: Parser, message: string, token: Token = nil) {.raises: [ParseError].} = ## Raises a ParseError exception raise ParseError(msg: message, token: if token.isNil(): self.getCurrentToken() else: token, file: self.file, module: self.getModule(), parser: self) # Why do we allow strings or enum members of TokenType? Well, it's simple: # symbols like ":" and "=" are both valid operator names (therefore they are # tokenized as symbols), but they are also used in a context where they are just # separators (for example, the colon is used in type declarations). Since we can't # tell at tokenization time which of the two contexts we're in, we just treat everything # as a symbol and in the cases where we need a specific token we just match the string # directly proc check[T: TokenType or string](self: Parser, kind: T, distance: int = 0): bool = ## Checks if the given token at the given distance ## matches the expected kind and returns a boolean. ## The distance parameter is passed directly to ## self.peek() when T is TokenType: self.peek(distance).kind == kind else: when T is string: self.peek(distance).lexeme == kind proc check[T: TokenType or string](self: Parser, kind: openarray[T]): bool = ## Calls self.check() in a loop with each entry of ## the given openarray of token kinds and returns ## at the first match. Note that this assumes ## that only one token may match at a given ## position for k in kind: if self.check(k): return true return false proc match[T: TokenType or string](self: Parser, kind: T): bool = ## Behaves like self.check(), except that when a token ## matches it is also consumed if self.check(kind): discard self.step() result = true else: result = false proc match[T: TokenType or string](self: Parser, kind: openarray[T]): bool = ## Calls self.match() in a loop with each entry of ## the given openarray of token kinds and returns ## at the first match. Note that this assumes ## that only one token may exist at a given ## position for k in kind: if self.match(k): return true result = false proc expect[T: TokenType or string](self: Parser, kind: T, message: string = "", token: Token = nil) = ## Behaves like self.match(), except that ## when a token doesn't match, an error ## is raised. If no error message is ## given, a default one is used if not self.match(kind): if message.len() == 0: self.error(&"expecting token of kind {kind}, found {self.peek().kind} instead", token) else: self.error(message) proc expect[T: TokenType or string](self: Parser, kind: openarray[T], message: string = "", token: Token = nil) {.used.} = ## Behaves like self.expect(), except that ## an error is raised only if none of the ## given token kinds matches for k in kind: if self.match(kind): return if message.len() == 0: self.error(&"""expecting any of the following tokens: {kind.join(", ")}, but got {self.peek().kind} instead""", token) # Forward declarations proc expression(self: Parser): Expression proc expressionStatement(self: Parser): Statement proc statement(self: Parser): Statement proc varDecl(self: Parser, isLet: bool = false, isConst: bool = false): Declaration proc funDecl(self: Parser, isAsync: bool = false, isGenerator: bool = false, isLambda: bool = false, isOperator: bool = false): Declaration proc declaration(self: Parser): Declaration # End of forward declarations proc primary(self: Parser): Expression = ## Parses primary expressions such ## as integer literals and keywords ## that map to builtin types (true, ## false, nil, etc.) case self.peek().kind: of True: result = newTrueExpr(self.step()) of False: result = newFalseExpr(self.step()) of TokenType.NotANumber: result = newNanExpr(self.step()) of Nil: result = newNilExpr(self.step()) of Float: result = newFloatExpr(self.step()) of Integer: result = newIntExpr(self.step()) of Identifier: result = newIdentExpr(self.step(), self.scopeDepth) of LeftParen: let tok = self.step() result = newGroupingExpr(self.expression(), tok) self.expect(RightParen, "unterminated parenthesized expression") of Yield: let tok = self.step() if self.currentFunction.isNil(): self.error("'yield' cannot be used outside functions", tok) elif self.currentFunction.token.kind != Generator: # It's easier than doing conversions for lambda/funDecl self.error("'yield' cannot be used outside generators", tok) if not self.check([RightBrace, RightBracket, RightParen, Comma, Semicolon]): # Expression delimiters result = newYieldExpr(self.expression(), tok) else: # Empty yield result = newYieldExpr(newNilExpr(Token()), tok) of Await: let tok = self.step() if self.currentFunction.isNil(): self.error("'await' cannot be used outside functions", tok) if self.currentFunction.token.kind != Coroutine: self.error("'await' can only be used inside coroutines", tok) result = newAwaitExpr(self.expression(), tok) of RightParen, RightBracket, RightBrace: # This is *technically* unnecessary: the parser would # throw an error regardless, but it's a little bit nicer # when the error message is more specific self.error(&"unmatched '{self.peek().lexeme}'") of Hex: result = newHexExpr(self.step()) of Octal: result = newOctExpr(self.step()) of Binary: result = newBinExpr(self.step()) of String: result = newStrExpr(self.step()) of Infinity: result = newInfExpr(self.step()) of Function: discard self.step() result = Expression(self.funDecl(isLambda=true)) of Coroutine: discard self.step() result = Expression(self.funDecl(isAsync=true, isLambda=true)) of Generator: discard self.step() result = Expression(self.funDecl(isGenerator=true, isLambda=true)) of TokenType.Var: discard self.step() result = newVarExpr(self.expression(), self.peek(-1)) of TokenType.Ref: discard self.step() result = newRefExpr(self.expression(), self.peek(-1)) of TokenType.Ptr: discard self.step() result = newPtrExpr(self.expression(), self.peek(-1)) else: self.error("invalid syntax") proc makeCall(self: Parser, callee: Expression): CallExpr = ## Utility function called iteratively by self.call() ## to parse a function call let tok = self.peek(-1) var argNames: seq[IdentExpr] = @[] var arguments: tuple[positionals: seq[Expression], keyword: seq[tuple[ name: IdentExpr, value: Expression]]] = (positionals: @[], keyword: @[]) var argument: Expression = nil var argCount = 0 if not self.check(RightParen): while true: if argCount >= 255: self.error("call can not have more than 255 arguments") break argument = self.expression() if argument.kind == binaryExpr and BinaryExpr(argument).operator.lexeme == "=": # TODO: This will explode with slices! echo argument if IdentExpr(BinaryExpr(argument).a) in argNames: self.error("duplicate keyword argument in call") argNames.add(IdentExpr(BinaryExpr(argument).a)) arguments.keyword.add((name: IdentExpr(BinaryExpr(argument).a), value: BinaryExpr(argument).b)) elif arguments.keyword.len() == 0: arguments.positionals.add(argument) else: self.error("positional argument cannot follow keyword argument in call") if not self.match(Comma): break argCount += 1 self.expect(RightParen) result = newCallExpr(callee, arguments, tok) proc parseGenericArgs(self: Parser) = ## Parses function generic arguments ## like function[type](arg) discard proc call(self: Parser): Expression = ## Parses function calls and object field ## accessing result = self.primary() while true: if self.match(LeftParen): result = self.makeCall(result) elif self.match(Dot): self.expect(Identifier, "expecting attribute name after '.'") result = newGetItemExpr(result, newIdentExpr(self.peek(-1), self.scopeDepth), self.peek(-1)) elif self.match(LeftBracket): self.parseGenericArgs() # TODO result = self.makeCall(result) else: break ## Operator parsing handlers proc unary(self: Parser): Expression = ## Parses unary expressions if self.peek().kind in [Identifier, Symbol] and self.peek().lexeme in self.operators.tokens: result = newUnaryExpr(self.step(), self.unary()) else: result = self.call() proc parsePow(self: Parser): Expression = ## Parses power expressions result = self.unary() var operator: Token var right: Expression while self.peek().kind in [Identifier, Symbol] and self.operators.getPrecedence(self.peek().lexeme) == Power: operator = self.step() right = self.unary() result = newBinaryExpr(result, operator, right) proc parseMul(self: Parser): Expression = ## Parses multiplication and division ## expressions result = self.parsePow() var operator: Token var right: Expression while self.peek().kind in [Identifier, Symbol] and self.operators.getPrecedence(self.peek().lexeme) == Multiplication: operator = self.step() right = self.parsePow() result = newBinaryExpr(result, operator, right) proc parseAdd(self: Parser): Expression = ## Parses addition and subtraction ## expressions result = self.parseMul() var operator: Token var right: Expression while self.peek().kind in [Identifier, Symbol] and self.operators.getPrecedence(self.peek().lexeme) == Addition: operator = self.step() right = self.parseMul() result = newBinaryExpr(result, operator, right) proc parseCmp(self: Parser): Expression = ## Parses comparison expressions result = self.parseAdd() var operator: Token var right: Expression while self.peek().kind in [Identifier, Symbol] and self.operators.getPrecedence(self.peek().lexeme) == Compare: operator = self.step() right = self.parseAdd() result = newBinaryExpr(result, operator, right) proc parseAnd(self: Parser): Expression = ## Parses logical and expressions result = self.parseCmp() var operator: Token var right: Expression while self.peek().kind in [Identifier, Symbol] and self.operators.getPrecedence(self.peek().lexeme) == Precedence.And: operator = self.step() right = self.parseCmp() result = newBinaryExpr(result, operator, right) proc parseOr(self: Parser): Expression = ## Parses logical or expressions result = self.parseAnd() var operator: Token var right: Expression while self.peek().kind in [Identifier, Symbol] and self.operators.getPrecedence(self.peek().lexeme) == Precedence.Or: operator = self.step() right = self.parseAnd() result = newBinaryExpr(result, operator, right) proc parseAssign(self: Parser): Expression = ## Parses assignment expressions result = self.parseOr() if self.peek().kind in [Identifier, Symbol] and self.operators.getPrecedence(self.peek().lexeme) == Assign: let tok = self.step() var value = self.expression() case result.kind: of identExpr, sliceExpr: result = newAssignExpr(result, value, tok) of getItemExpr: result = newSetItemExpr(GetItemExpr(result).obj, GetItemExpr(result).name, value, tok) else: self.error("invalid assignment target", tok) proc parseArrow(self: Parser): Expression = ## Parses arrow expressions result = self.parseAssign() var operator: Token var right: Expression while self.peek().kind in [Identifier, Symbol] and self.operators.getPrecedence(self.peek().lexeme) == Precedence.Or: operator = self.step() right = self.parseAssign() result = newBinaryExpr(result, operator, right) ## End of operator parsing handlers proc assertStmt(self: Parser): Statement = ## Parses "assert" statements, which ## raise an error if the expression ## fed into them is false let tok = self.peek(-1) var expression = self.expression() endOfLine("missing statement terminator after 'assert'") result = newAssertStmt(expression, tok) proc beginScope(self: Parser) = ## Begins a new lexical scope inc(self.scopeDepth) proc endScope(self: Parser) = ## Ends a new lexical scope dec(self.scopeDepth) proc blockStmt(self: Parser): Statement = ## Parses block statements. A block ## statement simply opens a new local ## scope self.beginScope() let tok = self.peek(-1) var code: seq[Declaration] = @[] while not self.check(RightBrace) and not self.done(): code.add(self.declaration()) if code[^1].isNil(): code.delete(code.high()) self.expect(RightBrace, "expecting '}'") result = newBlockStmt(code, tok) self.endScope() proc breakStmt(self: Parser): Statement = ## Parses break statements let tok = self.peek(-1) if self.currentLoop != Loop: self.error("'break' cannot be used outside loops") endOfLine("missing statement terminator after 'break'") result = newBreakStmt(tok) proc deferStmt(self: Parser): Statement = ## Parses defer statements let tok = self.peek(-1) if self.currentFunction.isNil(): self.error("'defer' cannot be used outside functions") endOfLine("missing statement terminator after 'defer'") result = newDeferStmt(self.expression(), tok) proc continueStmt(self: Parser): Statement = ## Parses continue statements let tok = self.peek(-1) if self.currentLoop != Loop: self.error("'continue' cannot be used outside loops") endOfLine("missing statement terminator after 'continue'") result = newContinueStmt(tok) proc returnStmt(self: Parser): Statement = ## Parses return statements let tok = self.peek(-1) if self.currentFunction.isNil(): self.error("'return' cannot be used outside functions") var value: Expression if not self.check(Semicolon): # Since return can be used on its own too # we need to check if there's an actual value # to return or not value = self.expression() endOfLine("missing statement terminator after 'return'") result = newReturnStmt(value, tok) case self.currentFunction.kind: of NodeKind.funDecl: FunDecl(self.currentFunction).hasExplicitReturn = true else: LambdaExpr(self.currentFunction).hasExplicitReturn = true proc yieldStmt(self: Parser): Statement = ## Parses yield statements let tok = self.peek(-1) if self.currentFunction.isNil(): self.error("'yield' cannot be outside functions") elif self.currentFunction.token.kind != Generator: self.error("'yield' can only be used inside generators") if not self.check(Semicolon): result = newYieldStmt(self.expression(), tok) else: result = newYieldStmt(newNilExpr(Token(lexeme: "nil")), tok) endOfLine("missing statement terminator after 'yield'") proc awaitStmt(self: Parser): Statement = ## Parses await statements let tok = self.peek(-1) if self.currentFunction.isNil(): self.error("'await' cannot be used outside functions") if self.currentFunction.token.kind != Coroutine: self.error("'await' can only be used inside coroutines") endOfLine("missing statement terminator after 'await'") result = newAwaitStmt(self.expression(), tok) proc raiseStmt(self: Parser): Statement = ## Parses raise statements var exception: Expression let tok = self.peek(-1) if not self.check(Semicolon): # Raise can be used on its own, in which # case it re-raises the last active exception exception = self.expression() endOfLine("missing statement terminator after 'raise'") result = newRaiseStmt(exception, tok) proc forEachStmt(self: Parser): Statement = ## Parses C#-like foreach loops let tok = self.peek(-1) let enclosingLoop = self.currentLoop self.currentLoop = Loop self.expect(Identifier) let identifier = newIdentExpr(self.peek(-1), self.scopeDepth) self.expect("in") let expression = self.expression() self.expect(LeftBrace) result = newForEachStmt(identifier, expression, self.blockStmt(), tok) self.currentLoop = enclosingLoop proc parse*(self: Parser, tokens: seq[Token], file: string, lines: seq[tuple[start, stop: int]], source: string, persist: bool = false): seq[Declaration] proc findOperators(self: Parser, tokens: seq[Token]) proc importStmt(self: Parser, fromStmt: bool = false): Statement = ## Parses import statements if self.scopeDepth > 0: self.error("import statements are only allowed at the top level") var tok: Token if fromStmt: tok = self.peek(-2) else: tok = self.peek(-1) # TODO: New AST node self.expect(Identifier, "expecting module name(s) after import statement") endOfLine("missing statement terminator after 'import'") result = newImportStmt(newIdentExpr(self.peek(-2), self.scopeDepth), tok) var filename = ImportStmt(result).moduleName.token.lexeme & ".pn" var lexer = newLexer() lexer.fillSymbolTable() let path = joinPath(splitPath(self.file).head, filename) # TODO: This is obviously horrible. It's just a test try: self.findOperators(lexer.lex(readFile(path), filename)) except IOError: self.error(&"""could not import '{filename}': {getCurrentExceptionMsg()}""") except OSError: self.error(&"""could not import '{filename}': {getCurrentExceptionMsg()} [errno {osLastError()}]""") proc tryStmt(self: Parser): Statement = ## Parses try/except/else/finally blocks let tok = self.peek(-1) var body = self.statement() var handlers: seq[tuple[body: Statement, exc: IdentExpr]] = @[] var finallyClause: Statement var elseClause: Statement var excName: Expression var handlerBody: Statement while self.match(Except): excName = self.expression() if excName.kind == identExpr: handlerBody = self.statement() handlers.add((body: handlerBody, exc: IdentExpr(excName))) else: excName = nil if self.match(Else): elseClause = self.statement() if self.match(Finally): finallyClause = self.statement() if handlers.len() == 0 and elseClause.isNil() and finallyClause.isNil(): self.error("expecting 'except', 'finally' or 'else' statement after 'try' block", tok) for i, handler in handlers: if handler.exc.isNil() and i != handlers.high(): self.error("catch-all exception handler with bare 'except' must come last in try statement", handler.exc.token) result = newTryStmt(body, handlers, finallyClause, elseClause, tok) proc whileStmt(self: Parser): Statement = ## Parses a C-style while loop statement let tok = self.peek(-1) self.beginScope() let enclosingLoop = self.currentLoop let condition = self.expression() self.expect(LeftBrace) self.currentLoop = Loop result = newWhileStmt(condition, self.blockStmt(), tok) self.currentLoop = enclosingLoop self.endScope() #[ proc forStmt(self: Parser): Statement = ## Parses a C-style for loop self.beginScope() let tok = self.peek(-1) var enclosingLoop = self.currentLoop self.currentLoop = Loop self.expect(LeftParen, "expecting '(' after 'for'") var initializer: ASTNode = nil var condition: Expression = nil var increment: Expression = nil if self.match(Semicolon): discard elif self.match(TokenType.Var): initializer = self.varDecl() if not VarDecl(initializer).isPrivate: self.error("cannot declare public for loop initializer") else: initializer = self.expressionStatement() if not self.check(Semicolon): condition = self.expression() self.expect(Semicolon, "expecting ';' after for loop condition") if not self.check(RightParen): increment = self.expression() self.expect(RightParen, "unterminated for loop increment") var body = self.statement() if not increment.isNil(): # The increment runs after each iteration, so we # inject it into the block as the last statement body = newBlockStmt(@[Declaration(body), newExprStmt(increment, increment.token)], tok) if condition.isNil(): ## An empty condition is functionally ## equivalent to "true" condition = newTrueExpr(Token(lexeme: "true")) # We can use a while loop, which in this case works just as well body = newWhileStmt(condition, body, tok) if not initializer.isNil(): # Nested blocks, so the initializer is # only executed once body = newBlockStmt(@[Declaration(initializer), Declaration(body)], tok) # This desgugars the following code: # for (var i = 0; i < 10; i += 1) { # print(i); # } # To the semantically equivalent snippet # below: # { # var i = 0; # while (i < 10) { # print(i); # i += 1; # } # } result = body self.currentLoop = enclosingLoop self.endScope() ]# proc ifStmt(self: Parser): Statement = ## Parses if statements let tok = self.peek(-1) let condition = self.expression() self.expect(LeftBrace) let thenBranch = self.blockStmt() var elseBranch: Statement if self.match(Else): if self.match(If): elseBranch = self.ifStmt() else: self.expect(LeftBrace, "expecting 'if' or block statement") elseBranch = self.blockStmt() result = newIfStmt(condition, thenBranch, elseBranch, tok) template checkDecl(self: Parser, isPrivate: bool) = ## Handy utility template that avoids us from copy ## pasting the same checks to all declaration handlers if not isPrivate and self.scopeDepth > 0: self.error("cannot bind public names inside local scopes") proc parsePragmas(self: Parser): seq[Pragma] = ## Parses pragmas var name: IdentExpr args: seq[LiteralExpr] exp: Expression names: seq[string] while not self.match("]") and not self.done(): args = @[] self.expect(Identifier, "expecting pragma name") if self.peek(-1).lexeme in names: self.error("duplicate pragmas are not allowed") names.add(self.peek(-1).lexeme) name = newIdentExpr(self.peek(-1), self.scopeDepth) if not self.match(":"): if self.match("]"): result.add(newPragma(name, @[])) break elif self.match("("): while not self.match(")") and not self.done(): exp = self.primary() if not exp.isLiteral(): self.error("pragma arguments can only be literals", exp.token) args.add(LiteralExpr(exp)) if not self.match(","): break self.expect(LeftParen, "unterminated parenthesis in pragma arguments") else: exp = self.primary() if not exp.isLiteral(): self.error("pragma arguments can only be literals", exp.token) args.add(LiteralExpr(exp)) result.add(newPragma(name, args)) if self.match(","): continue proc varDecl(self: Parser, isLet: bool = false, isConst: bool = false): Declaration = ## Parses variable declarations var tok = self.peek(-1) var value: Expression self.expect(Identifier, &"expecting identifier after '{tok.lexeme}'") var name = newIdentExpr(self.peek(-1), self.scopeDepth) let isPrivate = not self.match("*") self.checkDecl(isPrivate) var valueType: IdentExpr var hasInit = false var pragmas: seq[Pragma] = @[] if self.match(":"): # We don't enforce it here because # the compiler may be able to infer # the type later! self.expect(Identifier, "expecting type name after ':'") valueType = newIdentExpr(self.peek(-1), self.scopeDepth) if self.match("="): hasInit = true value = self.expression() if isConst and not value.isConst(): self.error("constant initializer is not a constant") else: if tok.kind != TokenType.Var: self.error(&"{tok.lexeme} declaration requires an initializer") value = newNilExpr(Token(lexeme: "nil")) self.expect(Semicolon, "expecting semicolon after declaration") if self.match(TokenType.Pragma): for pragma in self.parsePragmas(): pragmas.add(pragma) case tok.kind: of TokenType.Var: result = newVarDecl(name, value, isPrivate = isPrivate, token = tok, valueType = valueType, pragmas = (@[])) of Const: result = newVarDecl(name, value, isPrivate = isPrivate, token = tok, isConst = true, valueType = valueType, pragmas = (@[])) of Let: result = newVarDecl(name, value, isPrivate = isPrivate, token = tok, isLet = isLet, valueType = valueType, pragmas = (@[])) else: discard # Unreachable if not hasInit and VarDecl(result).valueType.isNil(): self.error("expecting initializer or explicit type annotation, but neither was found", result.token) result.pragmas = pragmas proc parseDeclArguments(self: Parser, arguments: var seq[tuple[name: IdentExpr, valueType: Expression]], parameter: var tuple[name: IdentExpr, valueType: Expression], defaults: var seq[Expression]) = ## Helper to parse declaration arguments and avoid code duplication while not self.check(RightParen): if arguments.len > 255: self.error("cannot have more than 255 arguments in function declaration", self.peek(-1)) self.expect(Identifier, "expecting parameter name") parameter.name = newIdentExpr(self.peek(-1), self.scopeDepth) if self.match(":"): parameter.valueType = self.expression() for i in countdown(arguments.high(), 0): if arguments[i].valueType != nil: break arguments[i].valueType = parameter.valueType else: parameter.valueType = nil if parameter in arguments: self.error("duplicate parameter name in function declaration", parameter.name.token) arguments.add(parameter) if self.match("="): defaults.add(self.expression()) elif defaults.len() > 0: self.error("positional argument cannot follow default argument in function declaration", parameter.name.token) if not self.match(Comma): break self.expect(RightParen) for argument in arguments: if argument.valueType.isNil(): self.error(&"missing type declaration for '{argument.name.token.lexeme}' in function declaration") proc parseFunExpr(self: Parser): LambdaExpr = ## Parses the return value of a function ## when it is another function. Works ## recursively var arguments: seq[tuple[name: IdentExpr, valueType: Expression]] = @[] var defaults: seq[Expression] = @[] result = newLambdaExpr(arguments, defaults, nil, isGenerator=self.peek(-1).kind == Generator, isAsync=self.peek(-1).kind == Coroutine, token=self.peek(-1), returnType=nil, depth=self.scopeDepth) var parameter: tuple[name: IdentExpr, valueType: Expression] if self.match(LeftParen): self.parseDeclArguments(arguments, parameter, defaults) if self.match(":"): if self.match([Function, Coroutine, Generator]): result.returnType = self.parseFunExpr() else: result.returnType = self.expression() proc parseGenerics(self: Parser, decl: Declaration) = ## Parses generics in declarations var gen: tuple[name: IdentExpr, cond: Expression] while not self.check(RightBracket) and not self.done(): self.expect(Identifier, "expecting generic type name") gen.name = newIdentExpr(self.peek(-1), self.scopeDepth) self.expect(":", "expecting type constraint after generic name") gen.cond = self.expression() decl.generics.add(gen) if not self.match(Comma): break self.expect(RightBracket) proc funDecl(self: Parser, isAsync: bool = false, isGenerator: bool = false, isLambda: bool = false, isOperator: bool = false): Declaration = # Can't use just FunDecl because it can also return LambdaExpr! ## Parses all types of functions, coroutines, generators and operators ## (with or without a name, where applicable) let tok = self.peek(-1) var enclosingFunction = self.currentFunction var arguments: seq[tuple[name: IdentExpr, valueType: Expression]] = @[] var defaults: seq[Expression] = @[] var returnType: Expression var pragmas: seq[Pragma] = @[] if not isLambda and self.match(Identifier): # We do this extra check because we might # be called from a context where it's # ambiguous whether we're parsing a declaration # or an expression. Fortunately anonymous functions # are nameless, so we can sort the ambiguity by checking # if there's an identifier after the keyword self.currentFunction = newFunDecl(newIdentExpr(self.peek(-1), self.scopeDepth), arguments, defaults, newBlockStmt(@[], Token()), isAsync=isAsync, isGenerator=isGenerator, isPrivate=true, token=tok, returnType=nil, depth=self.scopeDepth) if self.match("*"): FunDecl(self.currentFunction).isPrivate = false self.checkDecl(FunDecl(self.currentFunction).isPrivate) if self.match(LeftBracket): self.parseGenerics(self.currentFunction) elif not isLambda and (self.check([LeftBrace, LeftParen]) or self.check(":")): # We do a bit of hacking to pretend we never # wanted to parse this as a declaration in # the first place and pass control over to # expressionStatement(), which will in turn # go all the way up to primary(), which will # call us back with isLambda=true, allowing us # to actually parse the function as an expression while not self.check(tok.kind): # We rewind back to the token that caused us to be called dec(self.current) result = Declaration(self.expressionStatement()) self.currentFunction = enclosingFunction return result elif isLambda: self.currentFunction = newLambdaExpr(arguments, defaults, newBlockStmt(@[], Token()), isGenerator=isGenerator, isAsync=isAsync, token=tok, returnType=nil, depth=self.scopeDepth) self.scopes.add(FunDecl(self.currentFunction)) if self.match(":"): # Function has explicit return type if self.match([Function, Coroutine, Generator]): # The function's return type is another # function. We specialize this case because # the type declaration for a function lacks # the braces that would qualify it as an # expression returnType = self.parseFunExpr() else: returnType = self.expression() if self.match(LeftParen): var parameter: tuple[name: IdentExpr, valueType: Expression] self.parseDeclArguments(arguments, parameter, defaults) if self.match(":"): # Function's return type if self.match([Function, Coroutine, Generator]): returnType = self.parseFunExpr() else: returnType = self.expression() if self.currentFunction.kind == funDecl: if not self.match(Semicolon): # If we don't find a semicolon, # it's not a forward declaration self.expect(LeftBrace) if self.match(TokenType.Pragma): for pragma in self.parsePragmas(): pragmas.add(pragma) FunDecl(self.currentFunction).body = self.blockStmt() else: # This is a forward declaration, so we explicitly # nullify the function's body to tell the compiler # to look for it elsewhere in the file later FunDecl(self.currentFunction).body = nil if self.match(TokenType.Pragma): for pragma in self.parsePragmas(): pragmas.add(pragma) FunDecl(self.currentFunction).arguments = arguments FunDecl(self.currentFunction).returnType = returnType else: self.expect(LeftBrace) if self.match(TokenType.Pragma): for pragma in self.parsePragmas(): pragmas.add(pragma) LambdaExpr(Expression(self.currentFunction)).body = self.blockStmt() LambdaExpr(Expression(self.currentFunction)).arguments = arguments LambdaExpr(Expression(self.currentFunction)).returnType = returnType result = self.currentFunction if isOperator: if arguments.len() == 0: self.error("cannot declare operator without arguments") elif isLambda: self.error("cannot declare anonymous operator") for argument in arguments: if argument.valueType == nil: self.error(&"missing type declaration for '{argument.name.token.lexeme}' in function declaration") self.currentFunction = enclosingFunction result.pragmas = pragmas proc expression(self: Parser): Expression = ## Parses expressions result = self.parseArrow() # Highest-level expression proc expressionStatement(self: Parser): Statement = ## Parses expression statements, which ## are expressions followed by a semicolon var expression = self.expression() endOfLine("missing expression terminator", expression.token) result = Statement(newExprStmt(expression, expression.token)) proc statement(self: Parser): Statement = ## Parses statements case self.peek().kind: of If: discard self.step() result = self.ifStmt() of Assert: discard self.step() result = self.assertStmt() of Raise: discard self.step() result = self.raiseStmt() of Break: discard self.step() result = self.breakStmt() of Continue: discard self.step() result = self.continueStmt() of Return: discard self.step() result = self.returnStmt() of Import: discard self.step() result = self.importStmt() of From: # TODO # from module import a [, b, c as d] discard self.step() result = self.importStmt(fromStmt=true) of While: discard self.step() result = self.whileStmt() #[ of For: discard self.step() result = self.forStmt() ]# of Foreach: discard self.step() result = self.forEachStmt() of LeftBrace: discard self.step() result = self.blockStmt() of Yield: discard self.step() result = self.yieldStmt() of Await: discard self.step() result = self.awaitStmt() of Defer: discard self.step() result = self.deferStmt() of Try: discard self.step() result = self.tryStmt() else: result = self.expressionStatement() proc typeDecl(self: Parser): TypeDecl = ## Parses type declarations let token = self.peek(-1) self.expect(Identifier, "expecting type name after 'type'") let isPrivate = not self.match("*") self.checkDecl(isPrivate) var name = newIdentExpr(self.peek(-1), self.scopeDepth) var fields: seq[tuple[name: IdentExpr, valueType: Expression, isPrivate: bool]] = @[] var defaults: seq[Expression] = @[] var generics: seq[tuple[name: IdentExpr, cond: Expression]] = @[] var pragmas: seq[Pragma] = @[] result = newTypeDecl(name, fields, defaults, isPrivate, token, pragmas, generics, nil) if self.match(LeftBracket): self.parseGenerics(result) self.expect("=", "expecting '=' after type name") result.valueType = self.expression() self.expect(LeftBrace, "expecting '{' after type declaration") if self.match(TokenType.Pragma): for pragma in self.parsePragmas(): pragmas.add(pragma) var argName: IdentExpr argPrivate: bool argType: Expression while not self.match(RightBrace) and not self.done(): self.expect(Identifier, "expecting field name") argName = newIdentExpr(self.peek(-1), self.scopeDepth) argPrivate = not self.match("*") self.expect(":", "expecting ':' after field name") argType = self.expression() result.fields.add((argName, argType, argPrivate)) if self.match("="): result.defaults.add(self.expression()) self.expect(";", "expecting semicolon after field declaration") result.pragmas = pragmas proc declaration(self: Parser): Declaration = ## Parses declarations case self.peek().kind: of TokenType.Var, Const, Let: let keyword = self.step() result = self.varDecl(isLet = keyword.kind == Let, isConst = keyword.kind == Const) of Function: discard self.step() result = self.funDecl() of Coroutine: discard self.step() result = self.funDecl(isAsync=true) of Generator: discard self.step() result = self.funDecl(isGenerator=true) of Operator: discard self.step() result = self.funDecl(isOperator=true) of TokenType.Pragma: discard self.step() for p in self.parsePragmas(): self.tree.add(p) of Type: discard self.step() result = self.typeDecl() of Comment: discard self.step() # TODO: Docstrings and stuff else: result = Declaration(self.statement()) proc findOperators(self: Parser, tokens: seq[Token]) = ## Finds operators in a token stream for i, token in tokens: # We do a first pass over the tokens # to find operators. Note that this # relies on the lexer ending the input # with an EOF token if token.kind == Operator: if i == tokens.high(): self.error("invalid state: found malformed tokenizer input while looking for operators (missing EOF)", token) self.operators.addOperator(tokens[i + 1].lexeme) if i == tokens.high() and token.kind != EndOfFile: # Since we're iterating this list anyway might as # well perform some extra checks self.error("invalid state: found malformed tokenizer input while looking for operators (missing EOF)", token) proc parse*(self: Parser, tokens: seq[Token], file: string, lines: seq[tuple[start, stop: int]], source: string, persist: bool = false): seq[Declaration] = ## Parses a sequence of tokens into a sequence of AST nodes self.tokens = tokens self.file = file self.current = 0 self.currentLoop = LoopContext.None self.currentFunction = nil self.scopeDepth = 0 if not persist: self.operators = newOperatorTable() self.tree = @[] self.source = source self.lines = lines self.findOperators(tokens) while not self.done(): self.tree.add(self.declaration()) if self.tree[^1] == nil: self.tree.delete(self.tree.high()) result = self.tree