# Copyright 2022 Mattia Giambirtone & All Contributors # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ## A recursive-descent top-down parser implementation import strformat import strutils import meta/token import meta/ast import meta/errors export token, ast, errors type LoopContext = enum Loop, None Parser* = ref object ## A recursive-descent top-down ## parser implementation # Index into self.tokens current: int # The name of the file being parsed. # Only meaningful for parse errors file: string # The list of tokens representing # the source code to be parsed. # In most cases, those will come # from the builtin lexer, but this # behavior is not enforced and the # tokenizer is entirely separate from # the parser tokens: seq[Token] # Little internal attribute that tells # us if we're inside a loop or not. This # allows us to detect errors like break # being used outside loops currentLoop: LoopContext # Stores the current function # being parsed. This is a reference # to either a FunDecl or LambdaExpr # AST node and is nil when the parser # is at the top-level. It allows the # parser to detect errors like return # outside functions currentFunction: Declaration # Stores the current scope depth (0 = global, > 0 local) scopeDepth: int # We store user-defined operators for later use operators: seq[string] proc newParser*(): Parser = ## Initializes a new Parser object new(result) result.current = 0 result.file = "" result.tokens = @[] result.currentFunction = nil result.currentLoop = None result.scopeDepth = 0 # Public getters for improved error formatting proc getCurrent*(self: Parser): int {.inline.} = self.current proc getCurrentToken*(self: Parser): Token = if self.getCurrent() >= self.tokens.high() or self.getCurrent() - 1 < 0: return self.tokens[^1] else: return self.tokens[self.current - 1] # Handy templates to make our life easier, thanks nim! template endOfFile: Token = Token(kind: EndOfFile, lexeme: "", line: -1) template endOfLine(msg: string) = self.expect(Semicolon, msg) proc peek(self: Parser, distance: int = 0): Token = ## Peeks at the token at the given distance. ## If the distance is out of bounds, an EOF ## token is returned. A negative distance may ## be used to retrieve previously consumed ## tokens if self.tokens.high() == -1 or self.current + distance > self.tokens.high() or self.current + distance < 0: result = endOfFile else: result = self.tokens[self.current + distance] proc done(self: Parser): bool = ## Returns true if we're at the ## end of the file. Note that the ## parser expects an explicit ## EOF token to signal the end ## of the file result = self.peek().kind == EndOfFile proc step(self: Parser, n: int = 1): Token = ## Steps n tokens into the input, ## returning the last consumed one if self.done(): result = self.peek() else: result = self.tokens[self.current] self.current += 1 proc error(self: Parser, message: string) {.raises: [ParseError, ValueError].} = ## Raises a formatted ParseError exception var lexeme = self.getCurrentToken().lexeme var errorMessage = &"A fatal error occurred while parsing '{self.file}', line {self.peek().line} at '{lexeme}' -> {message}" raise newException(ParseError, errorMessage) proc check(self: Parser, kind: TokenType, distance: int = 0): bool = ## Checks if the given token at the given distance ## matches the expected kind and returns a boolean. ## The distance parameter is passed directly to ## self.peek() self.peek(distance).kind == kind proc check(self: Parser, kind: openarray[TokenType]): bool = ## Calls self.check() in a loop with each entry of ## the given openarray of token kinds and returns ## at the first match. Note that this assumes ## that only one token may match at a given ## position for k in kind: if self.check(k): return true return false proc match(self: Parser, kind: TokenType): bool = ## Behaves like self.check(), except that when a token ## matches it is also consumed if self.check(kind,): discard self.step() result = true else: result = false proc match(self: Parser, kind: openarray[TokenType]): bool = ## Calls self.match() in a loop with each entry of ## the given openarray of token kinds and returns ## at the first match. Note that this assumes ## that only one token may exist at a given ## position for k in kind: if self.match(k): return true result = false proc expect(self: Parser, kind: TokenType, message: string = "") = ## Behaves like self.match(), except that ## when a token doesn't match, an error ## is raised. If no error message is ## given, a default one is used if not self.match(kind): if message.len() == 0: self.error(&"expecting token of kind {kind}, found {self.peek().kind} instead") else: self.error(message) proc expect(self: Parser, kinds: openarray[TokenType], message: string = "") = ## Behaves like self.expect(), except that ## an error is raised only if none of the ## given token kinds matches for kind in kinds: if self.match(kind): return if message.len() == 0: self.error(&"""expecting any of the following tokens: {kinds.join(", ")}, but got {self.peek().kind} instead""") # Forward declarations proc expression(self: Parser): Expression proc expressionStatement(self: Parser): Statement proc statement(self: Parser): Statement proc varDecl(self: Parser, isLet: bool = false, isConst: bool = false): Declaration proc funDecl(self: Parser, isAsync: bool = false, isGenerator: bool = false, isLambda: bool = false, isOperator: bool = false): Declaration proc declaration(self: Parser): Declaration proc primary(self: Parser): Expression = ## Parses primary expressions such ## as integer literals and keywords ## that map to builtin types (true, ## false, nil, etc.) case self.peek().kind: of True: result = newTrueExpr(self.step()) of False: result = newFalseExpr(self.step()) of TokenType.NotANumber: result = newNanExpr(self.step()) of Nil: result = newNilExpr(self.step()) of Float: result = newFloatExpr(self.step()) of Integer: result = newIntExpr(self.step()) of Identifier: result = newIdentExpr(self.step()) of LeftParen: let tok = self.step() if self.match(RightParen): # This yields an empty tuple result = newTupleExpr(@[], tok) else: result = self.expression() if self.match(Comma): var tupleObject = newTupleExpr(@[result], tok) while not self.check(RightParen): tupleObject.members.add(self.expression()) if not self.match(Comma): break result = tupleObject self.expect(RightParen, "unterminated tuple literal") else: self.expect(RightParen, "unterminated parenthesized expression") result = newGroupingExpr(result, tok) of LeftBracket: let tok = self.step() if self.match(RightBracket): # This yields an empty list result = newListExpr(@[], tok) else: var listObject = newListExpr(@[], tok) while not self.check(RightBracket): listObject.members.add(self.expression()) if not self.match(Comma): break result = listObject self.expect(RightBracket, "unterminated list literal") of LeftBrace: let tok = self.step() if self.match(RightBrace): # This yields an empty dictionary, not an empty set! # For empty sets, there will be a builtin set() type # that can be instantiated with no arguments result = newDictExpr(@[], @[], tok) else: result = self.expression() if self.match(Comma) or self.check(RightBrace): var setObject = newSetExpr(@[result], tok) while not self.check(RightBrace): setObject.members.add(self.expression()) if not self.match(Comma): break result = setObject self.expect(RightBrace, "unterminated set literal") elif self.match(Colon): var dictObject = newDictExpr(@[result], @[self.expression()], tok) if self.match(RightBrace): return dictObject if self.match(Comma): while not self.check(RightBrace): dictObject.keys.add(self.expression()) self.expect(Colon) dictObject.values.add(self.expression()) if not self.match(Comma): break self.expect(RightBrace, "unterminated dict literal") result = dictObject of Yield: let tok = self.step() if self.currentFunction == nil: self.error("'yield' cannot be used outside functions") elif self.currentFunction.token.kind != Generator: # It's easier than doing conversions for lambda/funDecl self.error("'yield' cannot be used outside generators") if not self.check([RightBrace, RightBracket, RightParen, Comma, Semicolon]): # Expression delimiters result = newYieldExpr(self.expression(), tok) else: # Empty yield result = newYieldExpr(newNilExpr(Token()), tok) of Await: let tok = self.step() if self.currentFunction == nil: self.error("'await' cannot be used outside functions") if self.currentFunction.token.kind != Coroutine: self.error("'await' can only be used inside coroutines") result = newAwaitExpr(self.expression(), tok) of RightParen, RightBracket, RightBrace: # This is *technically* unnecessary: the parser would # throw an error regardless, but it's a little bit nicer # when the error message is more specific self.error(&"unmatched '{self.peek().lexeme}'") of Hex: result = newHexExpr(self.step()) of Octal: result = newOctExpr(self.step()) of Binary: result = newBinExpr(self.step()) of String: result = newStrExpr(self.step()) of Infinity: result = newInfExpr(self.step()) of Function: discard self.step() result = Expression(self.funDecl(isLambda=true)) of Coroutine: discard self.step() result = Expression(self.funDecl(isAsync=true, isLambda=true)) of Generator: discard self.step() result = Expression(self.funDecl(isGenerator=true, isLambda=true)) else: self.error("invalid syntax") proc makeCall(self: Parser, callee: Expression): Expression = ## Utility function called iteratively by self.call() ## to parse a function call let tok = self.peek(-1) var argNames: seq[IdentExpr] = @[] var arguments: tuple[positionals: seq[Expression], keyword: seq[tuple[name: IdentExpr, value: Expression]]] = (positionals: @[], keyword: @[]) var argument: Expression = nil var argCount = 0 if not self.check(RightParen): while true: if argCount >= 255: self.error("call can not have more than 255 arguments") break argument = self.expression() if argument.kind == assignExpr: # TODO: This will explode with slices! if IdentExpr(AssignExpr(argument).name) in argNames: self.error("duplicate keyword argument in call") argNames.add(IdentExpr(AssignExpr(argument).name)) arguments.keyword.add((name: IdentExpr(AssignExpr(argument).name), value: AssignExpr(argument).value)) elif arguments.keyword.len() == 0: arguments.positionals.add(argument) else: self.error("positional argument cannot follow keyword argument in call") if not self.match(Comma): break argCount += 1 self.expect(RightParen) result = newCallExpr(callee, arguments, tok) proc call(self: Parser): Expression = ## Parses function calls, object field ## accessing and slicing expressions result = self.primary() while true: if self.match(LeftParen): result = self.makeCall(result) elif self.match(Dot): self.expect(Identifier, "expecting attribute name after '.'") result = newGetItemExpr(result, newIdentExpr(self.peek(-1)), self.peek(-1)) elif self.match(LeftBracket): # Slicing such as a[1:2] let tok = self.peek(-1) var ends: seq[ASTNode] = @[] while not self.check(RightBracket) and not self.done(): if self.check(Colon): ends.add(newNilExpr(Token())) discard self.step() else: ends.add(self.expression()) discard self.match(Colon) self.expect(RightBracket, "expecting ']'") result = newSliceExpr(result, ends, tok) else: break proc unary(self: Parser): Expression = ## Parses unary expressions if self.match([Minus, Tilde, LogicalNot, Plus]): result = newUnaryExpr(self.peek(-1), self.unary()) else: result = self.call() proc customUnaryOperator(self: Parser): Expression = ## Parses user-defined unary expressions if self.peek().lexeme in self.operators: discard self.step() result = newUnaryExpr(self.peek(-1), self.customUnaryOperator()) else: result = self.unary() proc pow(self: Parser): Expression = ## Parses exponentiation expressions result = self.customUnaryOperator() var operator: Token var right: Expression while self.match(DoubleStar): operator = self.peek(-1) right = self.customUnaryOperator() result = newBinaryExpr(result, operator, right) proc mul(self: Parser): Expression = ## Parses multiplication and division expressions result = self.pow() var operator: Token var right: Expression while self.match([Slash, Percentage, FloorDiv, Star]): operator = self.peek(-1) right = self.pow() result = newBinaryExpr(result, operator, right) proc add(self: Parser): Expression = ## Parses addition and subtraction expressions result = self.mul() var operator: Token var right: Expression while self.match([Plus, Minus]): operator = self.peek(-1) right = self.mul() result = newBinaryExpr(result, operator, right) proc comparison(self: Parser): Expression = ## Parses other comparison expressions ## and some other operators result = self.add() var operator: Token var right: Expression while self.match([LessThan, GreaterThan, LessOrEqual, GreaterOrEqual, Is, As, Of, IsNot]): operator = self.peek(-1) right = self.add() result = newBinaryExpr(result, operator, right) proc equality(self: Parser): Expression = ## Parses equality expressions result = self.comparison() var operator: Token var right: Expression while self.match([DoubleEqual, NotEqual]): operator = self.peek(-1) right = self.comparison() result = newBinaryExpr(result, operator, right) proc logicalAnd(self: Parser): Expression = ## Parses logical and expressions ## (a and b) result = self.equality() var operator: Token var right: Expression while self.match(LogicalAnd): operator = self.peek(-1) right = self.equality() result = newBinaryExpr(result, operator, right) proc logicalOr(self: Parser): Expression = ## Parses logical or expressions ## (a or b) result = self.logicalAnd() var operator: Token var right: Expression while self.match(LogicalOr): operator = self.peek(-1) right = self.logicalAnd() result = newBinaryExpr(result, operator, right) proc bitwiseAnd(self: Parser): Expression = ## Parses a & b expressions result = self.logicalOr() var operator: Token var right: Expression while self.match(Pipe): operator = self.peek(-1) right = self.logicalOr() result = newBinaryExpr(result, operator, right) proc bitwiseOr(self: Parser): Expression = ## Parses a | b expressions result = self.bitwiseAnd() var operator: Token var right: Expression while self.match(Ampersand): operator = self.peek(-1) right = self.bitwiseAnd() result = newBinaryExpr(result, operator, right) proc customBinaryOperator(self: Parser): Expression = ## Parses user-defined binary operators result = self.bitwiseOr() var operator: Token var right: Expression while self.peek().lexeme in self.operators: operator = self.step() right = self.bitwiseOr() result = newBinaryExpr(result, operator, right) proc assignment(self: Parser): Expression = ## Parses assignment, the highest-level ## expression (including stuff like a.b = 1). ## Slice assignments are also parsed here result = self.customBinaryOperator() if self.match([Equal, InplaceAdd, InplaceSub, InplaceDiv, InplaceMod, InplacePow, InplaceMul, InplaceXor, InplaceAnd, InplaceOr, InplaceFloorDiv, InplaceRightShift, InplaceLeftShift]): let tok = self.peek(-1) var value = self.expression() if result.kind in {identExpr, sliceExpr}: result = newAssignExpr(result, value, tok) elif result.kind == getItemExpr: result = newSetItemExpr(GetItemExpr(result).obj, GetItemExpr(result).name, value, tok) else: self.error("invalid assignment target") proc assertStmt(self: Parser): Statement = ## Parses "assert" statements, which ## raise an error if the expression ## fed into them is falsey let tok = self.peek(-1) var expression = self.expression() endOfLine("missing semicolon after assert statement") result = newAssertStmt(expression, tok) proc beginScope(self: Parser) = ## Begins a new lexical scope inc(self.scopeDepth) proc endScope(self: Parser) = ## Ends a new lexical scope dec(self.scopeDepth) proc blockStmt(self: Parser): Statement = ## Parses block statements. A block ## statement simply opens a new local ## scope self.beginScope() let tok = self.peek(-1) var code: seq[Declaration] = @[] while not self.check(RightBrace) and not self.done(): code.add(self.declaration()) self.expect(RightBrace, "expecting '}'") result = newBlockStmt(code, tok) self.endScope() proc breakStmt(self: Parser): Statement = ## Parses break statements let tok = self.peek(-1) if self.currentLoop != Loop: self.error("'break' cannot be used outside loops") endOfLine("missing semicolon after break statement") result = newBreakStmt(tok) proc deferStmt(self: Parser): Statement = ## Parses defer statements let tok = self.peek(-1) if self.currentFunction == nil: self.error("'defer' cannot be used outside functions") result = newDeferStmt(self.expression(), tok) endOfLine("missing semicolon after defer statement") proc continueStmt(self: Parser): Statement = ## Parses continue statements let tok = self.peek(-1) if self.currentLoop != Loop: self.error("'continue' cannot be used outside loops") endOfLine("missing semicolon after continue statement") result = newContinueStmt(tok) proc returnStmt(self: Parser): Statement = ## Parses return statements let tok = self.peek(-1) if self.currentFunction == nil: self.error("'return' cannot be used outside functions") var value: Expression = newNilExpr(Token(lexeme: "nil")) if not self.check(Semicolon): # Since return can be used on its own too # (in which case it implicitly returns nil), # we need to check if there's an actual value # to return or not value = self.expression() endOfLine("missing semicolon after return statement") result = newReturnStmt(value, tok) proc yieldStmt(self: Parser): Statement = ## Parses yield statements let tok = self.peek(-1) if self.currentFunction == nil: self.error("'yield' cannot be outside functions") elif self.currentFunction.token.kind != Generator: self.error("'yield' can only be used inside generators") if not self.check(Semicolon): result = newYieldStmt(self.expression(), tok) else: result = newYieldStmt(newNilExpr(Token()), tok) endOfLine("missing semicolon after yield statement") proc awaitStmt(self: Parser): Statement = ## Parses await statements let tok = self.peek(-1) if self.currentFunction == nil: self.error("'await' cannot be used outside functions") if self.currentFunction.token.kind != Coroutine: self.error("'await' can only be used inside coroutines") result = newAwaitStmt(self.expression(), tok) endOfLine("missing semicolon after await statement") proc raiseStmt(self: Parser): Statement = ## Parses raise statements var exception: Expression let tok = self.peek(-1) if not self.check(Semicolon): # Raise can be used on its own, in which # case it re-raises the last active exception exception = self.expression() endOfLine("missing semicolon after raise statement") result = newRaiseStmt(exception, tok) proc forEachStmt(self: Parser): Statement = ## Parses C#-like foreach loops let tok = self.peek(-1) var enclosingLoop = self.currentLoop self.currentLoop = Loop self.expect(LeftParen, "expecting '(' after 'foreach'") self.expect(Identifier) var identifier = newIdentExpr(self.peek(-1)) self.expect(Colon) var expression = self.expression() self.expect(RightParen) var body = self.statement() result = newForEachStmt(identifier, expression, body, tok) self.currentLoop = enclosingLoop proc importStmt(self: Parser, fromStmt: bool = false): Statement = ## Parses import statements var tok: Token if fromStmt: tok = self.peek(-2) else: tok = self.peek(-1) # TODO: New AST node self.expect(Identifier, "expecting module name(s) after import statement") result = newImportStmt(newIdentExpr(self.peek(-1)), tok) endOfLine("missing semicolon after import statement") proc tryStmt(self: Parser): Statement = ## Parses try/except/else/finally blocks let tok = self.peek(-1) var body = self.statement() var handlers: seq[tuple[body: Statement, exc: IdentExpr, name: IdentExpr]] = @[] var finallyClause: Statement var elseClause: Statement var asName: IdentExpr var excName: Expression var handlerBody: Statement while self.match(Except): excName = self.expression() if excName.kind == identExpr: handlerBody = self.statement() handlers.add((body: handlerBody, exc: IdentExpr(excName), name: asName)) asName = nil elif excName.kind == binaryExpr and BinaryExpr(excName).operator.kind == As: asName = IdentExpr(BinaryExpr(excName).b) if BinaryExpr(excName).b.kind != identExpr: self.error("expecting alias name after 'except ... as'") elif BinaryExpr(excName).a.kind != identExpr: self.error("expecting exception name") excName = BinaryExpr(excName).a # Note how we don't use elif here: when the if above sets excName to As' # first operand, that might be a tuple, which we unpack below if excName.kind == tupleExpr: # This allows to do except (a, b, c) as SomeError {...} # TODO: Consider adding the ability to make exc a sequence # instead of adding the same body with different exception # types each time handlerBody = self.statement() for element in TupleExpr(excName).members: if element.kind != identExpr: self.error("expecting exception name") handlers.add((body: handlerBody, exc: IdentExpr(element), name: asName)) continue else: excName = nil if self.match(Else): elseClause = self.statement() if self.match(Finally): finallyClause = self.statement() if handlers.len() == 0 and elseClause == nil and finallyClause == nil: self.error("expecting 'except', 'finally' or 'else' statement after 'try' block") for i, handler in handlers: if handler.exc == nil and i != handlers.high(): self.error("catch-all exception handler with bare 'except' must come last in try statement") result = newTryStmt(body, handlers, finallyClause, elseClause, tok) proc whileStmt(self: Parser): Statement = ## Parses a C-style while loop statement let tok = self.peek(-1) self.beginScope() var enclosingLoop = self.currentLoop self.currentLoop = Loop self.expect(LeftParen, "expecting '(' before while loop condition") var condition = self.expression() self.expect(RightParen, "unterminated while loop condition") result = newWhileStmt(condition, self.statement(), tok) self.currentLoop = enclosingLoop self.endScope() proc forStmt(self: Parser): Statement = ## Parses a C-style for loop self.beginScope() let tok = self.peek(-1) var enclosingLoop = self.currentLoop self.currentLoop = Loop self.expect(LeftParen, "expecting '(' after 'for'") var initializer: ASTNode = nil var condition: Expression = nil var increment: Expression = nil if self.match(Semicolon): discard elif self.match(Var): initializer = self.varDecl() if not VarDecl(initializer).isPrivate: self.error("cannot declare public for loop initializer") else: initializer = self.expressionStatement() if not self.check(Semicolon): condition = self.expression() self.expect(Semicolon, "expecting ';' after for loop condition") if not self.check(RightParen): increment = self.expression() self.expect(RightParen, "unterminated for loop increment") var body = self.statement() if increment != nil: # The increment runs after each iteration, so we # inject it into the block as the last statement body = newBlockStmt(@[Declaration(body), newExprStmt(increment, increment.token)], tok) if condition == nil: ## An empty condition is functionally ## equivalent to "true" condition = newTrueExpr(Token()) # We can use a while loop, which in this case works just as well body = newWhileStmt(condition, body, tok) if initializer != nil: # Nested blocks, so the initializer is # only executed once body = newBlockStmt(@[Declaration(initializer), Declaration(body)], tok) # This desgugars the following code: # for (var i = 0; i < 10; i += 1) { # print(i); # } # To the semantically equivalent snippet # below: # { # var i = 0; # while (i < 10) { # print(i); # i += 1; # } # } result = body self.currentLoop = enclosingLoop self.endScope() proc ifStmt(self: Parser): Statement = ## Parses if statements let tok = self.peek(-1) self.expect(LeftParen, "expecting '(' before if condition") var condition = self.expression() self.expect(RightParen, "expecting ')' after if condition") var thenBranch = self.statement() var elseBranch: Statement = nil if self.match(Else): elseBranch = self.statement() result = newIfStmt(condition, thenBranch, elseBranch, tok) template checkDecl(self: Parser, isPrivate: bool) = ## Handy utility template that avoids us from copy ## pasting the same checks to all declaration handlers if not isPrivate and self.currentFunction != nil: self.error("cannot bind public names inside functions") if not isPrivate and self.scopeDepth > 0: self.error("cannot bind public names inside local scopes") proc varDecl(self: Parser, isLet: bool = false, isConst: bool = false): Declaration = ## Parses variable declarations var tok = self.peek(-1) var value: Expression self.expect(Identifier, &"expecting identifier after '{tok.lexeme}'") var name = newIdentExpr(self.peek(-1)) let isPrivate = not self.match(Star) self.checkDecl(isPrivate) var valueType: IdentExpr if self.match(Colon): # We don't enforce it here because # the compiler may be able to infer # the type later! self.expect(Identifier, "expecting type name after ':'") valueType = newIdentExpr(self.peek(-1)) if self.match(Equal): value = self.expression() if isConst and not value.isConst(): self.error("constant initializer is not a constant") else: if tok.kind != Var: self.error(&"{tok.lexeme} declaration requires an initializer") value = newNilExpr(Token()) self.expect(Semicolon, &"expecting semicolon after declaration") case tok.kind: of Var: result = newVarDecl(name, value, isPrivate=isPrivate, token=tok, closedOver=false, valueType=valueType) of Const: result = newVarDecl(name, value, isPrivate=isPrivate, token=tok, isConst=true, closedOver=false, valueType=valueType) of Let: result = newVarDecl(name, value, isPrivate=isPrivate, token=tok, isLet=isLet, closedOver=false, valueType=valueType) else: discard # Unreachable proc funDecl(self: Parser, isAsync: bool = false, isGenerator: bool = false, isLambda: bool = false, isOperator: bool = false): Declaration = ## Parses functions, coroutines, generators, anonymous functions and custom operators let tok = self.peek(-1) var enclosingFunction = self.currentFunction var arguments: seq[tuple[name: IdentExpr, valueType: IdentExpr]] = @[] var defaults: seq[Expression] = @[] var returnType: IdentExpr if not isLambda and self.check(Identifier): # We do this extra check because we might # be called from a contexst where it's # ambiguous whether we're parsing a declaration # or an expression. Fortunately anonymous functions # are nameless, so we can sort the ambiguity by checking # if there's an identifier after the keyword self.expect(Identifier, &"expecting function name after '{tok.lexeme}'") self.checkDecl(not self.check(Star)) self.currentFunction = newFunDecl(nil, arguments, defaults, newBlockStmt(@[], Token()), isAsync=isAsync, isGenerator=isGenerator, isPrivate=true, token=tok, closedOver=false) FunDecl(self.currentFunction).name = newIdentExpr(self.peek(-1)) if self.match(Star): FunDecl(self.currentFunction).isPrivate = false elif not isLambda and self.check([LeftBrace, Colon]): # We do a bit of hacking to pretend we never # wanted to parse this as a declaration in # the first place and pass control over to # expressionStatement(), which will in turn # go all the way up to primary(), which will # call us back with isLambda=true, allowing us # to actually parse the function as an expression dec(self.current) result = Declaration(self.expressionStatement()) self.currentFunction = enclosingFunction return result elif isLambda: self.currentFunction = newLambdaExpr(arguments, defaults, newBlockStmt(@[], Token()), isGenerator=isGenerator, isAsync=isAsync, token=tok) elif not isOperator: self.error("funDecl: invalid state") if self.match(Colon): # A function without an explicit # return type is the same as a void # function in C (i.e. no return type) self.expect([Identifier, Nil], "expecting function return type after ':'") returnType = newIdentExpr(self.peek(-1)) if not self.match(LeftBrace): # Argument-less function var parameter: tuple[name: IdentExpr, valueType: IdentExpr] self.expect(LeftParen) while not self.check(RightParen): if arguments.len > 255: self.error("cannot have more than 255 arguments in function declaration") self.expect(Identifier, "expecting parameter name") parameter.name = newIdentExpr(self.peek(-1)) self.expect(Colon, "expecting ':' after parameter name") self.expect(Identifier, "expecting parameter type") parameter.valueType = newIdentExpr(self.peek(-1)) if parameter in arguments: self.error("duplicate parameter name in function declaration") arguments.add(parameter) if self.match(Equal): defaults.add(self.expression()) elif defaults.len() > 0: self.error("positional argument cannot follow default argument in function declaration") if not self.match(Comma): break self.expect(RightParen) if self.match(Colon): # Function's return type self.expect(Identifier, "expecting return type after ':'") returnType = newIdentExpr(self.peek(-1)) self.expect(LeftBrace) if self.currentFunction.kind == funDecl: if not self.match(Semicolon): # If we don't find a semicolon, # it's not a forward declaration FunDecl(self.currentFunction).body = self.blockStmt() else: # This is a forward declaration so we explicitly # nullify the function's body to tell the compiler # to look for it elsewhere in the file later FunDecl(self.currentFunction).body = nil FunDecl(self.currentFunction).arguments = arguments FunDecl(self.currentFunction).returnType = returnType else: LambdaExpr(Expression(self.currentFunction)).body = self.blockStmt() LambdaExpr(Expression(self.currentFunction)).arguments = arguments LambdaExpr(Expression(self.currentFunction)).returnType = returnType result = self.currentFunction if isOperator: # isOperator is only true for functions # with a name (since nameless operators # don't make much sense) if arguments.len() == 0: self.error("cannot declare argument-less operator") elif arguments.len() > 2: self.error("cannot declare operator with more than 2 arguments") elif FunDecl(result).returnType == nil: self.error("operator cannot have void return type") self.currentFunction = enclosingFunction proc expression(self: Parser): Expression = ## Parses expressions result = self.assignment() proc expressionStatement(self: Parser): Statement = ## Parses expression statements, which ## are expressions followed by a semicolon var expression = self.expression() endOfLine("missing semicolon after expression") result = Statement(newExprStmt(expression, expression.token)) proc statement(self: Parser): Statement = ## Parses statements case self.peek().kind: of If: discard self.step() result = self.ifStmt() of Assert: discard self.step() result = self.assertStmt() of Raise: discard self.step() result = self.raiseStmt() of Break: discard self.step() result = self.breakStmt() of Continue: discard self.step() result = self.continueStmt() of Return: discard self.step() result = self.returnStmt() of Import: discard self.step() result = self.importStmt() of From: # TODO # from module import a [, b, c as d] discard self.step() result = self.importStmt(fromStmt=true) of While: discard self.step() result = self.whileStmt() of For: discard self.step() result = self.forStmt() of Foreach: discard self.step() result = self.forEachStmt() of LeftBrace: discard self.step() result = self.blockStmt() of Yield: discard self.step() result = self.yieldStmt() of Await: discard self.step() result = self.awaitStmt() of Defer: discard self.step() result = self.deferStmt() of Try: discard self.step() result = self.tryStmt() else: result = self.expressionStatement() proc declaration(self: Parser): Declaration = ## Parses declarations case self.peek().kind: of Var, Const, Let: let keyword = self.step() result = self.varDecl(isLet=keyword.kind == Let, isConst=keyword.kind == Const) of Function: discard self.step() result = self.funDecl() of Coroutine: discard self.step() result = self.funDecl(isAsync=true) of Generator: discard self.step() result = self.funDecl(isGenerator=true) of Operator: discard self.step() result = self.funDecl(isOperator=true) of Type, Comment, TokenType.Whitespace, TokenType.Tab: discard self.step() # TODO else: result = Declaration(self.statement()) proc parse*(self: Parser, tokens: seq[Token], file: string): seq[ASTNode] = ## Parses a series of tokens into an AST node self.tokens = tokens self.file = file self.current = 0 self.currentLoop = None self.currentFunction = nil self.scopeDepth = 0 self.operators = @[] for i, token in self.tokens: # We do a first pass over the tokens # to find user-defined operators. # Note that this relies on the lexer # ending the input with an EOF token if token.kind == Operator: if i == self.tokens.high(): self.error("invalid state: found malformed tokenizer input while looking for operators (missing EOF)") self.operators.add(self.tokens[i + 1].lexeme) if i == self.tokens.high() and token.kind != EndOfFile: # Since we're iterating this list anyway might as # well perform some extra checks self.error("invalid state: found malformed tokenizer input while looking for operators (missing EOF)") while not self.done(): result.add(self.declaration())