# Copyright 2022 Mattia Giambirtone & All Contributors # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ## A recursive-descent top-down parser implementation import strformat import strutils import tables import meta/token import meta/ast import meta/errors export token, ast, errors type LoopContext {.pure.} = enum Loop, None Precedence {.pure.} = enum ## Operator precedence ## clearly stolen from ## nim Arrow = 0, Assign, Or, And, Compare, Addition, Multiplication, Power, None # Used for stuff that isn't an operator OperatorTable = ref object ## A table for storing and ## handling the precedence ## of operators tokens: seq[string] precedence: TableRef[Precedence, seq[string]] Parser* = ref object ## A recursive-descent top-down ## parser implementation # Index into self.tokens current: int # The name of the file being parsed. # Only meaningful for parse errors file: string # The list of tokens representing # the source code to be parsed. # In most cases, those will come # from the builtin lexer, but this # behavior is not enforced and the # tokenizer is entirely separate from # the parser tokens: seq[Token] # Little internal attribute that tells # us if we're inside a loop or not. This # allows us to detect errors like break # being used outside loops currentLoop: LoopContext # Stores the current function # being parsed. This is a reference # to either a FunDecl or LambdaExpr # AST node and is nil when the parser # is at the top-level. It allows the # parser to detect errors like return # outside functions currentFunction: Declaration # Stores the current scope depth (0 = global, > 0 local) scopeDepth: int operators: OperatorTable proc newOperatorTable: OperatorTable = ## Initializes a new OperatorTable ## object new(result) result.tokens = @[] result.precedence = newTable[Precedence, seq[string]]() for prec in Precedence: result.precedence[prec] = @[] proc addOperator(self: OperatorTable, lexeme: string) = ## Adds an operator to the table. Its precedence ## is inferred from the operator's lexeme (the ## criteria are similar to Nim's) if lexeme in self.tokens: return # We've already added it! var prec = Precedence.high() if lexeme.len() >= 2 and lexeme[^2..^1] in ["->", "~>", "=>"]: prec = Arrow elif lexeme.endsWith("=") and lexeme[0] notin {'<', '>', '!', '?', '~', '='}: prec = Assign elif lexeme[0] in {'$', } or lexeme == "**": prec = Power elif lexeme[0] in {'*', '%', '/', '\\'}: prec = Multiplication elif lexeme[0] in {'+', '-', '|', '~'}: prec = Addition elif lexeme[0] in {'<', '>', '=', '!'}: prec = Compare elif lexeme == "and": prec = Precedence.And elif lexeme == "or": prec = Precedence.Or self.tokens.add(lexeme) self.precedence[prec].add(lexeme) proc getPrecedence(self: OperatorTable, lexeme: string): Precedence = ## Gets the precedence of a given operator for (prec, operators) in self.precedence.pairs(): if lexeme in operators: return prec proc newParser*: Parser = ## Initializes a new Parser object new(result) result.current = 0 result.file = "" result.tokens = @[] result.currentFunction = nil result.currentLoop = LoopContext.None result.scopeDepth = 0 result.operators = newOperatorTable() # Public getters for improved error formatting proc getCurrent*(self: Parser): int {.inline.} = self.current proc getCurrentToken*(self: Parser): Token {.inline.} = (if self.getCurrent() >= self.tokens.high() or self.getCurrent() - 1 < 0: self.tokens[^1] else: self.tokens[self.current - 1]) # Handy templates to make our life easier, thanks nim! template endOfFile: Token = Token(kind: EndOfFile, lexeme: "", line: -1) template endOfLine(msg: string) = self.expect(Semicolon, msg) proc peek(self: Parser, distance: int = 0): Token = ## Peeks at the token at the given distance. ## If the distance is out of bounds, an EOF ## token is returned. A negative distance may ## be used to retrieve previously consumed ## tokens if self.tokens.high() == -1 or self.current + distance > self.tokens.high() or self.current + distance < 0: result = endOfFile else: result = self.tokens[self.current + distance] proc done(self: Parser): bool = ## Returns true if we're at the ## end of the file. Note that the ## parser expects an explicit ## EOF token to signal the end ## of the file result = self.peek().kind == EndOfFile proc step(self: Parser, n: int = 1): Token = ## Steps n tokens into the input, ## returning the last consumed one if self.done(): result = self.peek() else: result = self.tokens[self.current] self.current += 1 proc error(self: Parser, message: string) {.raises: [ParseError, ValueError].} = ## Raises a formatted ParseError exception var lexeme = self.getCurrentToken().lexeme var errorMessage = &"A fatal error occurred while parsing '{self.file}', line {self.peek().line} at '{lexeme}' -> {message}" raise newException(ParseError, errorMessage) # Why do we allow strings or enum members of TokenType? Well, it's simple: # symbols like ":" and "=" are both valid operator names (therefore they are # tokenized as symbols), but they are also used in a context where they are just # separators (for example, the colon is used in type declarations). Since we can't # tell at tokenization time which of the two contexts we're in, we just treat everything # as a symbol and in the cases where we need a specific token we just match the string # directly proc check[T: TokenType or string](self: Parser, kind: T, distance: int = 0): bool = ## Checks if the given token at the given distance ## matches the expected kind and returns a boolean. ## The distance parameter is passed directly to ## self.peek() when T is TokenType: self.peek(distance).kind == kind else: when T is string: self.peek(distance).lexeme == kind proc check[T: TokenType or string](self: Parser, kind: openarray[T]): bool = ## Calls self.check() in a loop with each entry of ## the given openarray of token kinds and returns ## at the first match. Note that this assumes ## that only one token may match at a given ## position for k in kind: if self.check(k): return true return false proc match[T: TokenType or string](self: Parser, kind: T): bool = ## Behaves like self.check(), except that when a token ## matches it is also consumed if self.check(kind): discard self.step() result = true else: result = false proc match[T: TokenType or string](self: Parser, kind: openarray[T]): bool = ## Calls self.match() in a loop with each entry of ## the given openarray of token kinds and returns ## at the first match. Note that this assumes ## that only one token may exist at a given ## position for k in kind: if self.match(k): return true result = false proc expect[T: TokenType or string](self: Parser, kind: T, message: string = "") = ## Behaves like self.match(), except that ## when a token doesn't match, an error ## is raised. If no error message is ## given, a default one is used if not self.match(kind): if message.len() == 0: self.error(&"expecting token of kind {kind}, found {self.peek().kind} instead") else: self.error(message) proc expect[T: TokenType or string](self: Parser, kind: openarray[T], message: string = "") = ## Behaves like self.expect(), except that ## an error is raised only if none of the ## given token kinds matches for k in kind: if self.match(kind): return if message.len() == 0: self.error(&"""expecting any of the following tokens: {kinds.join(", ")}, but got {self.peek().kind} instead""") # Forward declarations proc expression(self: Parser): Expression proc expressionStatement(self: Parser): Statement proc statement(self: Parser): Statement proc varDecl(self: Parser, isLet: bool = false, isConst: bool = false): Declaration proc funDecl(self: Parser, isAsync: bool = false, isGenerator: bool = false, isLambda: bool = false, isOperator: bool = false): Declaration proc declaration(self: Parser): Declaration # End of forward declarations proc primary(self: Parser): Expression = ## Parses primary expressions such ## as integer literals and keywords ## that map to builtin types (true, ## false, nil, etc.) case self.peek().kind: of True: result = newTrueExpr(self.step()) of False: result = newFalseExpr(self.step()) of TokenType.NotANumber: result = newNanExpr(self.step()) of Nil: result = newNilExpr(self.step()) of Float: result = newFloatExpr(self.step()) of Integer: result = newIntExpr(self.step()) of Identifier: result = newIdentExpr(self.step()) of LeftParen: let tok = self.step() result = newGroupingExpr(self.expression(), tok) self.expect(RightParen, "unterminated parenthesized expression") of Yield: let tok = self.step() if self.currentFunction == nil: self.error("'yield' cannot be used outside functions") elif self.currentFunction.token.kind != Generator: # It's easier than doing conversions for lambda/funDecl self.error("'yield' cannot be used outside generators") if not self.check([RightBrace, RightBracket, RightParen, Comma, Semicolon]): # Expression delimiters result = newYieldExpr(self.expression(), tok) else: # Empty yield result = newYieldExpr(newNilExpr(Token()), tok) of Await: let tok = self.step() if self.currentFunction == nil: self.error("'await' cannot be used outside functions") if self.currentFunction.token.kind != Coroutine: self.error("'await' can only be used inside coroutines") result = newAwaitExpr(self.expression(), tok) of RightParen, RightBracket, RightBrace: # This is *technically* unnecessary: the parser would # throw an error regardless, but it's a little bit nicer # when the error message is more specific self.error(&"unmatched '{self.peek().lexeme}'") of Hex: result = newHexExpr(self.step()) of Octal: result = newOctExpr(self.step()) of Binary: result = newBinExpr(self.step()) of String: result = newStrExpr(self.step()) of Infinity: result = newInfExpr(self.step()) of Function: discard self.step() result = Expression(self.funDecl(isLambda=true)) of Coroutine: discard self.step() result = Expression(self.funDecl(isAsync=true, isLambda=true)) of Generator: discard self.step() result = Expression(self.funDecl(isGenerator=true, isLambda=true)) else: self.error("invalid syntax") proc makeCall(self: Parser, callee: Expression): Expression = ## Utility function called iteratively by self.call() ## to parse a function call let tok = self.peek(-1) var argNames: seq[IdentExpr] = @[] var arguments: tuple[positionals: seq[Expression], keyword: seq[tuple[name: IdentExpr, value: Expression]]] = (positionals: @[], keyword: @[]) var argument: Expression = nil var argCount = 0 if not self.check(RightParen): while true: if argCount >= 255: self.error("call can not have more than 255 arguments") break argument = self.expression() if argument.kind == assignExpr: # TODO: This will explode with slices! if IdentExpr(AssignExpr(argument).name) in argNames: self.error("duplicate keyword argument in call") argNames.add(IdentExpr(AssignExpr(argument).name)) arguments.keyword.add((name: IdentExpr(AssignExpr(argument).name), value: AssignExpr(argument).value)) elif arguments.keyword.len() == 0: arguments.positionals.add(argument) else: self.error("positional argument cannot follow keyword argument in call") if not self.match(Comma): break argCount += 1 self.expect(RightParen) result = newCallExpr(callee, arguments, tok) proc call(self: Parser): Expression = ## Parses function calls, object field ## accessing and slicing expressions result = self.primary() while true: if self.match(LeftParen): result = self.makeCall(result) elif self.match(Dot): self.expect(Identifier, "expecting attribute name after '.'") result = newGetItemExpr(result, newIdentExpr(self.peek(-1)), self.peek(-1)) elif self.match(LeftBracket): # Slicing such as a[1:2], which is then # translated to `[]`(a, 1, 2) let tok = self.peek(-1) var ends: seq[Expression] = @[] while not self.check(RightBracket) and not self.done(): if self.check(":"): ends.add(newNilExpr(Token(lexeme: "nil"))) discard self.step() else: ends.add(self.expression()) discard self.match(":") self.expect(RightBracket, "expecting ']'") result = newSliceExpr(result, ends, tok) else: break ## Operator parsing handlers proc unary(self: Parser): Expression = if self.peek().lexeme in self.operators.tokens: result = newUnaryExpr(self.step(), self.unary()) else: result = self.call() proc parsePow(self: Parser): Expression = result = self.unary() var operator: Token var right: Expression while self.operators.getPrecedence(self.peek().lexeme) == Power: operator = self.step() right = self.unary() result = newBinaryExpr(result, operator, right) proc parseMul(self: Parser): Expression = result = self.parsePow() var operator: Token var right: Expression while self.operators.getPrecedence(self.peek().lexeme) == Multiplication: operator = self.step() right = self.parsePow() result = newBinaryExpr(result, operator, right) proc parseAdd(self: Parser): Expression = result = self.parseMul() var operator: Token var right: Expression while self.operators.getPrecedence(self.peek().lexeme) == Addition: operator = self.step() right = self.parseMul() result = newBinaryExpr(result, operator, right) proc parseCmp(self: Parser): Expression = result = self.parseAdd() var operator: Token var right: Expression while self.operators.getPrecedence(self.peek().lexeme) == Compare: operator = self.step() right = self.parseAdd() result = newBinaryExpr(result, operator, right) proc parseAnd(self: Parser): Expression = result = self.parseCmp() var operator: Token var right: Expression while self.operators.getPrecedence(self.peek().lexeme) == Precedence.And: operator = self.step() right = self.parseCmp() result = newBinaryExpr(result, operator, right) proc parseOr(self: Parser): Expression = result = self.parseAnd() var operator: Token var right: Expression while self.operators.getPrecedence(self.peek().lexeme) == Precedence.Or: operator = self.step() right = self.parseAnd() result = newBinaryExpr(result, operator, right) proc parseAssign(self: Parser): Expression = result = self.parseOr() if self.operators.getPrecedence(self.peek().lexeme) == Assign: let tok = self.step() var value = self.expression() case result.kind: of identExpr, sliceExpr: result = newAssignExpr(result, value, tok) of getItemExpr: result = newSetItemExpr(GetItemExpr(result).obj, GetItemExpr(result).name, value, tok) else: self.error("invalid assignment target") proc parseArrow(self: Parser): Expression = result = self.parseAssign() var operator: Token var right: Expression while self.operators.getPrecedence(self.peek().lexeme) == Precedence.Or: operator = self.step() right = self.parseAssign() result = newBinaryExpr(result, operator, right) ## End of operator parsing handlers proc assertStmt(self: Parser): Statement = ## Parses "assert" statements, which ## raise an error if the expression ## fed into them is falsey let tok = self.peek(-1) var expression = self.expression() endOfLine("missing semicolon after assert statement") result = newAssertStmt(expression, tok) proc beginScope(self: Parser) = ## Begins a new lexical scope inc(self.scopeDepth) proc endScope(self: Parser) = ## Ends a new lexical scope dec(self.scopeDepth) proc blockStmt(self: Parser): Statement = ## Parses block statements. A block ## statement simply opens a new local ## scope self.beginScope() let tok = self.peek(-1) var code: seq[Declaration] = @[] while not self.check(RightBrace) and not self.done(): code.add(self.declaration()) self.expect(RightBrace, "expecting '}'") result = newBlockStmt(code, tok) self.endScope() proc breakStmt(self: Parser): Statement = ## Parses break statements let tok = self.peek(-1) if self.currentLoop != Loop: self.error("'break' cannot be used outside loops") endOfLine("missing semicolon after break statement") result = newBreakStmt(tok) proc deferStmt(self: Parser): Statement = ## Parses defer statements let tok = self.peek(-1) if self.currentFunction == nil: self.error("'defer' cannot be used outside functions") result = newDeferStmt(self.expression(), tok) endOfLine("missing semicolon after defer statement") proc continueStmt(self: Parser): Statement = ## Parses continue statements let tok = self.peek(-1) if self.currentLoop != Loop: self.error("'continue' cannot be used outside loops") endOfLine("missing semicolon after continue statement") result = newContinueStmt(tok) proc returnStmt(self: Parser): Statement = ## Parses return statements let tok = self.peek(-1) if self.currentFunction == nil: self.error("'return' cannot be used outside functions") var value: Expression if not self.check(Semicolon): # Since return can be used on its own too # we need to check if there's an actual value # to return or not value = self.expression() endOfLine("missing semicolon after return statement") result = newReturnStmt(value, tok) proc yieldStmt(self: Parser): Statement = ## Parses yield statements let tok = self.peek(-1) if self.currentFunction == nil: self.error("'yield' cannot be outside functions") elif self.currentFunction.token.kind != Generator: self.error("'yield' can only be used inside generators") if not self.check(Semicolon): result = newYieldStmt(self.expression(), tok) else: result = newYieldStmt(newNilExpr(Token(lexeme: "nil")), tok) endOfLine("missing semicolon after yield statement") proc awaitStmt(self: Parser): Statement = ## Parses await statements let tok = self.peek(-1) if self.currentFunction == nil: self.error("'await' cannot be used outside functions") if self.currentFunction.token.kind != Coroutine: self.error("'await' can only be used inside coroutines") result = newAwaitStmt(self.expression(), tok) endOfLine("missing semicolon after await statement") proc raiseStmt(self: Parser): Statement = ## Parses raise statements var exception: Expression let tok = self.peek(-1) if not self.check(Semicolon): # Raise can be used on its own, in which # case it re-raises the last active exception exception = self.expression() endOfLine("missing semicolon after raise statement") result = newRaiseStmt(exception, tok) proc forEachStmt(self: Parser): Statement = ## Parses C#-like foreach loops let tok = self.peek(-1) var enclosingLoop = self.currentLoop self.currentLoop = Loop self.expect(LeftParen, "expecting '(' after 'foreach'") self.expect(Identifier) var identifier = newIdentExpr(self.peek(-1)) self.expect(":") var expression = self.expression() self.expect(RightParen) var body = self.statement() result = newForEachStmt(identifier, expression, body, tok) self.currentLoop = enclosingLoop proc importStmt(self: Parser, fromStmt: bool = false): Statement = ## Parses import statements var tok: Token if fromStmt: tok = self.peek(-2) else: tok = self.peek(-1) # TODO: New AST node self.expect(Identifier, "expecting module name(s) after import statement") result = newImportStmt(newIdentExpr(self.peek(-1)), tok) endOfLine("missing semicolon after import statement") proc tryStmt(self: Parser): Statement = ## Parses try/except/else/finally blocks let tok = self.peek(-1) var body = self.statement() var handlers: seq[tuple[body: Statement, exc: IdentExpr]] = @[] var finallyClause: Statement var elseClause: Statement var excName: Expression var handlerBody: Statement while self.match(Except): excName = self.expression() if excName.kind == identExpr: handlerBody = self.statement() handlers.add((body: handlerBody, exc: IdentExpr(excName))) else: excName = nil if self.match(Else): elseClause = self.statement() if self.match(Finally): finallyClause = self.statement() if handlers.len() == 0 and elseClause == nil and finallyClause == nil: self.error("expecting 'except', 'finally' or 'else' statement after 'try' block") for i, handler in handlers: if handler.exc == nil and i != handlers.high(): self.error("catch-all exception handler with bare 'except' must come last in try statement") result = newTryStmt(body, handlers, finallyClause, elseClause, tok) proc whileStmt(self: Parser): Statement = ## Parses a C-style while loop statement let tok = self.peek(-1) self.beginScope() var enclosingLoop = self.currentLoop self.currentLoop = Loop self.expect(LeftParen, "expecting '(' before while loop condition") var condition = self.expression() self.expect(RightParen, "unterminated while loop condition") result = newWhileStmt(condition, self.statement(), tok) self.currentLoop = enclosingLoop self.endScope() proc forStmt(self: Parser): Statement = ## Parses a C-style for loop self.beginScope() let tok = self.peek(-1) var enclosingLoop = self.currentLoop self.currentLoop = Loop self.expect(LeftParen, "expecting '(' after 'for'") var initializer: ASTNode = nil var condition: Expression = nil var increment: Expression = nil if self.match(Semicolon): discard elif self.match(Var): initializer = self.varDecl() if not VarDecl(initializer).isPrivate: self.error("cannot declare public for loop initializer") else: initializer = self.expressionStatement() if not self.check(Semicolon): condition = self.expression() self.expect(Semicolon, "expecting ';' after for loop condition") if not self.check(RightParen): increment = self.expression() self.expect(RightParen, "unterminated for loop increment") var body = self.statement() if increment != nil: # The increment runs after each iteration, so we # inject it into the block as the last statement body = newBlockStmt(@[Declaration(body), newExprStmt(increment, increment.token)], tok) if condition == nil: ## An empty condition is functionally ## equivalent to "true" condition = newTrueExpr(Token(lexeme: "true")) # We can use a while loop, which in this case works just as well body = newWhileStmt(condition, body, tok) if initializer != nil: # Nested blocks, so the initializer is # only executed once body = newBlockStmt(@[Declaration(initializer), Declaration(body)], tok) # This desgugars the following code: # for (var i = 0; i < 10; i += 1) { # print(i); # } # To the semantically equivalent snippet # below: # { # var i = 0; # while (i < 10) { # print(i); # i += 1; # } # } result = body self.currentLoop = enclosingLoop self.endScope() proc ifStmt(self: Parser): Statement = ## Parses if statements let tok = self.peek(-1) self.expect(LeftParen, "expecting '(' before if condition") var condition = self.expression() self.expect(RightParen, "expecting ')' after if condition") var thenBranch = self.statement() var elseBranch: Statement = nil if self.match(Else): elseBranch = self.statement() result = newIfStmt(condition, thenBranch, elseBranch, tok) template checkDecl(self: Parser, isPrivate: bool) = ## Handy utility template that avoids us from copy ## pasting the same checks to all declaration handlers if not isPrivate and self.currentFunction != nil: self.error("cannot bind public names inside functions") if not isPrivate and self.scopeDepth > 0: self.error("cannot bind public names inside local scopes") proc varDecl(self: Parser, isLet: bool = false, isConst: bool = false): Declaration = ## Parses variable declarations var tok = self.peek(-1) var value: Expression self.expect(Identifier, &"expecting identifier after '{tok.lexeme}'") var name = newIdentExpr(self.peek(-1)) let isPrivate = not self.match("*") self.checkDecl(isPrivate) var valueType: IdentExpr if self.match(":"): # We don't enforce it here because # the compiler may be able to infer # the type later! self.expect(Identifier, "expecting type name after ':'") valueType = newIdentExpr(self.peek(-1)) if self.match("="): value = self.expression() if isConst and not value.isConst(): self.error("constant initializer is not a constant") else: if tok.kind != Var: self.error(&"{tok.lexeme} declaration requires an initializer") value = newNilExpr(Token(lexeme: "nil")) self.expect(Semicolon, &"expecting semicolon after declaration") case tok.kind: of Var: result = newVarDecl(name, value, isPrivate=isPrivate, token=tok, valueType=valueType, pragmas=(@[])) of Const: result = newVarDecl(name, value, isPrivate=isPrivate, token=tok, isConst=true, valueType=valueType, pragmas=(@[])) of Let: result = newVarDecl(name, value, isPrivate=isPrivate, token=tok, isLet=isLet, valueType=valueType, pragmas=(@[])) else: discard # Unreachable proc parseDeclArguments(self: Parser, arguments: var seq[tuple[name: IdentExpr, valueType: Expression, mutable: bool, isRef: bool, isPtr: bool]], parameter: var tuple[name: IdentExpr, valueType: Expression, mutable: bool, isRef: bool, isPtr: bool], defaults: var seq[Expression]) = ## Helper to parse declaration arguments and avoid code duplication while not self.check(RightParen): if arguments.len > 255: self.error("cannot have more than 255 arguments in function declaration") self.expect(Identifier, "expecting parameter name") parameter.name = newIdentExpr(self.peek(-1)) if self.match(":"): parameter.mutable = false parameter.isPtr = false parameter.isRef = false if self.match(Var): parameter.mutable = true elif self.match(Ptr): parameter.isPtr = true elif self.match(Ref): parameter.isRef = true parameter.valueType = self.expression() for i in countdown(arguments.high(), 0): if arguments[i].valueType != nil: break arguments[i].valueType = parameter.valueType arguments[i].mutable = parameter.mutable else: parameter.valueType = nil if parameter in arguments: self.error("duplicate parameter name in function declaration") arguments.add(parameter) if self.match("="): defaults.add(self.expression()) elif defaults.len() > 0: self.error("positional argument cannot follow default argument in function declaration") if not self.match(Comma): break self.expect(RightParen) for argument in arguments: if argument.valueType == nil: self.error(&"missing type declaration for '{argument.name.token.lexeme}' in function declaration") proc funDecl(self: Parser, isAsync: bool = false, isGenerator: bool = false, isLambda: bool = false, isOperator: bool = false): Declaration = ## Parses functions, coroutines, generators, anonymous functions and operators let tok = self.peek(-1) var enclosingFunction = self.currentFunction var arguments: seq[tuple[name: IdentExpr, valueType: Expression, mutable: bool, isRef: bool, isPtr: bool]] = @[] var defaults: seq[Expression] = @[] var returnType: Expression if not isLambda and self.check(Identifier): # We do this extra check because we might # be called from a context where it's # ambiguous whether we're parsing a declaration # or an expression. Fortunately anonymous functions # are nameless, so we can sort the ambiguity by checking # if there's an identifier after the keyword self.expect(Identifier, &"expecting identifier after '{tok.lexeme}'") self.checkDecl(not self.check("*")) self.currentFunction = newFunDecl(nil, arguments, defaults, newBlockStmt(@[], Token()), isAsync=isAsync, isGenerator=isGenerator, isPrivate=true, token=tok, pragmas=(@[]), returnType=nil) FunDecl(self.currentFunction).name = newIdentExpr(self.peek(-1)) if self.match("*"): FunDecl(self.currentFunction).isPrivate = false elif not isLambda and (self.check([LeftBrace, LeftParen]) or self.check(":")): # We do a bit of hacking to pretend we never # wanted to parse this as a declaration in # the first place and pass control over to # expressionStatement(), which will in turn # go all the way up to primary(), which will # call us back with isLambda=true, allowing us # to actually parse the function as an expression dec(self.current) result = Declaration(self.expressionStatement()) self.currentFunction = enclosingFunction return result elif isLambda: self.currentFunction = newLambdaExpr(arguments, defaults, newBlockStmt(@[], Token()), isGenerator=isGenerator, isAsync=isAsync, token=tok, returnType=nil) elif not isOperator: self.error("funDecl: invalid state") if self.match(":"): # Function has explicit return type if self.match([Function, Coroutine, Generator]): # The function's return type is another # function. We specialize this case because # the type declaration for a function lacks # the braces that would qualify it as an # expression var arguments: seq[tuple[name: IdentExpr, valueType: Expression, mutable: bool, isRef: bool, isPtr: bool]] = @[] var defaults: seq[Expression] = @[] returnType = newLambdaExpr(arguments, defaults, nil, isGenerator=self.peek(-1).kind == Generator, isAsync=self.peek(-1).kind == Coroutine, token=self.peek(-1), returnType=nil) var parameter: tuple[name: IdentExpr, valueType: Expression, mutable: bool, isRef: bool, isPtr: bool] if self.match(LeftParen): self.parseDeclArguments(arguments, parameter, defaults) if self.match(":"): LambdaExpr(returnType).returnType = self.expression() else: returnType = self.expression() if not self.match(LeftBrace): self.expect(LeftParen) var parameter: tuple[name: IdentExpr, valueType: Expression, mutable: bool, isRef: bool, isPtr: bool] self.parseDeclArguments(arguments, parameter, defaults) if self.match(":"): # Function's return type if self.match([Function, Coroutine, Generator]): var arguments: seq[tuple[name: IdentExpr, valueType: Expression, mutable: bool, isRef: bool, isPtr: bool]] = @[] var defaults: seq[Expression] = @[] returnType = newLambdaExpr(arguments, defaults, nil, isGenerator=self.peek(-1).kind == Generator, isAsync=self.peek(-1).kind == Coroutine, token=self.peek(-1), returnType=nil) var parameter: tuple[name: IdentExpr, valueType: Expression, mutable: bool, isRef: bool, isPtr: bool] if self.match(LeftParen): self.parseDeclArguments(arguments, parameter, defaults) if self.match(":"): LambdaExpr(returnType).returnType = self.expression() else: returnType = self.expression() self.expect(LeftBrace) if self.currentFunction.kind == funDecl: if not self.match(Semicolon): # If we don't find a semicolon, # it's not a forward declaration FunDecl(self.currentFunction).body = self.blockStmt() else: # This is a forward declaration so we explicitly # nullify the function's body to tell the compiler # to look for it elsewhere in the file later FunDecl(self.currentFunction).body = nil FunDecl(self.currentFunction).arguments = arguments FunDecl(self.currentFunction).returnType = returnType else: LambdaExpr(Expression(self.currentFunction)).body = self.blockStmt() LambdaExpr(Expression(self.currentFunction)).arguments = arguments LambdaExpr(Expression(self.currentFunction)).returnType = returnType result = self.currentFunction if isOperator: if arguments.len() == 0: self.error("cannot declare operator without arguments") elif FunDecl(result).returnType == nil: self.error("operators must have a return type") for argument in arguments: if argument.valueType == nil: self.error(&"missing type declaration for '{argument.name.token.lexeme}' in function declaration") self.currentFunction = enclosingFunction proc expression(self: Parser): Expression = ## Parses expressions result = self.parseArrow() # Highest-level expression proc expressionStatement(self: Parser): Statement = ## Parses expression statements, which ## are expressions followed by a semicolon var expression = self.expression() endOfLine("missing semicolon after expression") result = Statement(newExprStmt(expression, expression.token)) proc statement(self: Parser): Statement = ## Parses statements case self.peek().kind: of If: discard self.step() result = self.ifStmt() of Assert: discard self.step() result = self.assertStmt() of Raise: discard self.step() result = self.raiseStmt() of Break: discard self.step() result = self.breakStmt() of Continue: discard self.step() result = self.continueStmt() of Return: discard self.step() result = self.returnStmt() of Import: discard self.step() result = self.importStmt() of From: # TODO # from module import a [, b, c as d] discard self.step() result = self.importStmt(fromStmt=true) of While: discard self.step() result = self.whileStmt() of For: discard self.step() result = self.forStmt() of Foreach: discard self.step() result = self.forEachStmt() of LeftBrace: discard self.step() result = self.blockStmt() of Yield: discard self.step() result = self.yieldStmt() of Await: discard self.step() result = self.awaitStmt() of Defer: discard self.step() result = self.deferStmt() of Try: discard self.step() result = self.tryStmt() else: result = self.expressionStatement() proc declaration(self: Parser): Declaration = ## Parses declarations case self.peek().kind: of Var, Const, Let: let keyword = self.step() result = self.varDecl(isLet=keyword.kind == Let, isConst=keyword.kind == Const) of Function: discard self.step() result = self.funDecl() of Coroutine: discard self.step() result = self.funDecl(isAsync=true) of Generator: discard self.step() result = self.funDecl(isGenerator=true) of Operator: discard self.step() result = self.funDecl(isOperator=true) of Type, TokenType.Whitespace, TokenType.Tab, Comment: # TODO: Comments, pragmas, docstrings discard self.step() # TODO return newNilExpr(Token(lexeme: "nil")) else: result = Declaration(self.statement()) proc parse*(self: Parser, tokens: seq[Token], file: string): seq[ASTNode] = ## Parses a series of tokens into an AST node self.tokens = tokens self.file = file self.current = 0 self.currentLoop = LoopContext.None self.currentFunction = nil self.scopeDepth = 0 self.operators = newOperatorTable() for i, token in self.tokens: # We do a first pass over the tokens # to find operators. Note that this # relies on the lexer ending the input # with an EOF token if token.kind == Operator: if i == self.tokens.high(): self.error("invalid state: found malformed tokenizer input while looking for operators (missing EOF)") self.operators.addOperator(self.tokens[i + 1].lexeme) if i == self.tokens.high() and token.kind != EndOfFile: # Since we're iterating this list anyway might as # well perform some extra checks self.error("invalid state: found malformed tokenizer input while looking for operators (missing EOF)") while not self.done(): result.add(self.declaration())