peon/src/frontend/parser.nim

1080 lines
40 KiB
Nim
Raw Normal View History

# Copyright 2022 Mattia Giambirtone & All Contributors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
## A recursive-descent top-down parser implementation
import strformat
import strutils
import meta/token
import meta/ast
import meta/errors
export token, ast, errors
type
LoopContext = enum
Loop, None
Parser* = ref object
## A recursive-descent top-down
## parser implementation
# Index into self.tokens
current: int
# The name of the file being parsed.
# Only meaningful for parse errors
file: string
# The list of tokens representing
# the source code to be parsed.
# In most cases, those will come
# from the builtin lexer, but this
# behavior is not enforced and the
# tokenizer is entirely separate from
# the parser
tokens: seq[Token]
# Little internal attribute that tells
# us if we're inside a loop or not. This
# allows us to detect errors like break
# being used outside loops
currentLoop: LoopContext
# Stores the current function
# being parsed. This is a reference
# to either a FunDecl or LambdaExpr
# AST node and is nil when the parser
# is at the top-level. It allows the
# parser to detect errors like return
# outside functions
currentFunction: Declaration
# Stores the current scope depth (0 = global, > 0 local)
scopeDepth: int
# We store user-defined operators for later use
operators: seq[string]
2022-04-05 11:23:59 +02:00
proc newParser*(): Parser =
## Initializes a new Parser object
new(result)
result.current = 0
result.file = ""
result.tokens = @[]
result.currentFunction = nil
result.currentLoop = None
result.scopeDepth = 0
# Public getters for improved error formatting
proc getCurrent*(self: Parser): int {.inline.} = self.current
proc getCurrentToken*(self: Parser): Token =
if self.getCurrent() >= self.tokens.high() or self.getCurrent() - 1 < 0:
return self.tokens[^1]
else:
return self.tokens[self.current - 1]
# Handy templates to make our life easier, thanks nim!
template endOfFile: Token = Token(kind: EndOfFile, lexeme: "", line: -1)
template endOfLine(msg: string) = self.expect(Semicolon, msg)
proc peek(self: Parser, distance: int = 0): Token =
## Peeks at the token at the given distance.
## If the distance is out of bounds, an EOF
## token is returned. A negative distance may
## be used to retrieve previously consumed
## tokens
if self.tokens.high() == -1 or self.current + distance > self.tokens.high() or self.current + distance < 0:
result = endOfFile
else:
result = self.tokens[self.current + distance]
proc done(self: Parser): bool =
## Returns true if we're at the
## end of the file. Note that the
## parser expects an explicit
## EOF token to signal the end
2022-04-11 14:59:34 +02:00
## of the file
result = self.peek().kind == EndOfFile
proc step(self: Parser, n: int = 1): Token =
## Steps n tokens into the input,
## returning the last consumed one
if self.done():
result = self.peek()
else:
result = self.tokens[self.current]
self.current += 1
proc error(self: Parser, message: string) {.raises: [ParseError, ValueError].} =
## Raises a formatted ParseError exception
var lexeme = self.getCurrentToken().lexeme
var errorMessage = &"A fatal error occurred while parsing '{self.file}', line {self.peek().line} at '{lexeme}' -> {message}"
raise newException(ParseError, errorMessage)
proc check(self: Parser, kind: TokenType, distance: int = 0): bool =
## Checks if the given token at the given distance
## matches the expected kind and returns a boolean.
## The distance parameter is passed directly to
## self.peek()
self.peek(distance).kind == kind
proc check(self: Parser, kind: openarray[TokenType]): bool =
## Calls self.check() in a loop with each entry of
## the given openarray of token kinds and returns
## at the first match. Note that this assumes
## that only one token may match at a given
## position
for k in kind:
if self.check(k):
return true
return false
proc match(self: Parser, kind: TokenType): bool =
## Behaves like self.check(), except that when a token
## matches it is also consumed
if self.check(kind,):
discard self.step()
result = true
else:
result = false
proc match(self: Parser, kind: openarray[TokenType]): bool =
## Calls self.match() in a loop with each entry of
## the given openarray of token kinds and returns
## at the first match. Note that this assumes
## that only one token may exist at a given
## position
for k in kind:
if self.match(k):
return true
result = false
proc expect(self: Parser, kind: TokenType, message: string = "") =
## Behaves like self.match(), except that
2022-04-11 14:59:34 +02:00
## when a token doesn't match, an error
## is raised. If no error message is
## given, a default one is used
if not self.match(kind):
if message.len() == 0:
self.error(&"expecting token of kind {kind}, found {self.peek().kind} instead")
else:
self.error(message)
proc expect(self: Parser, kinds: openarray[TokenType], message: string = "") =
## Behaves like self.expect(), except that
## an error is raised only if none of the
## given token kinds matches
for kind in kinds:
if self.match(kind):
return
if message.len() == 0:
self.error(&"""expecting any of the following tokens: {kinds.join(", ")}, but got {self.peek().kind} instead""")
# Forward declarations
proc expression(self: Parser): Expression
proc expressionStatement(self: Parser): Statement
proc statement(self: Parser): Statement
proc varDecl(self: Parser, isLet: bool = false, isConst: bool = false): Declaration
proc funDecl(self: Parser, isAsync: bool = false, isGenerator: bool = false, isLambda: bool = false, isOperator: bool = false): Declaration
proc declaration(self: Parser): Declaration
proc primary(self: Parser): Expression =
## Parses primary expressions such
## as integer literals and keywords
## that map to builtin types (true,
## false, nil, etc.)
case self.peek().kind:
of True:
result = newTrueExpr(self.step())
of False:
result = newFalseExpr(self.step())
of TokenType.NotANumber:
result = newNanExpr(self.step())
of Nil:
result = newNilExpr(self.step())
of Float:
result = newFloatExpr(self.step())
of Integer:
result = newIntExpr(self.step())
of Identifier:
result = newIdentExpr(self.step())
of LeftParen:
let tok = self.step()
if self.match(RightParen):
# This yields an empty tuple
result = newTupleExpr(@[], tok)
else:
result = self.expression()
if self.match(Comma):
var tupleObject = newTupleExpr(@[result], tok)
while not self.check(RightParen):
tupleObject.members.add(self.expression())
if not self.match(Comma):
break
result = tupleObject
self.expect(RightParen, "unterminated tuple literal")
else:
self.expect(RightParen, "unterminated parenthesized expression")
result = newGroupingExpr(result, tok)
of LeftBracket:
let tok = self.step()
if self.match(RightBracket):
# This yields an empty list
result = newListExpr(@[], tok)
else:
var listObject = newListExpr(@[], tok)
while not self.check(RightBracket):
listObject.members.add(self.expression())
if not self.match(Comma):
break
result = listObject
self.expect(RightBracket, "unterminated list literal")
of LeftBrace:
let tok = self.step()
if self.match(RightBrace):
# This yields an empty dictionary, not an empty set!
# For empty sets, there will be a builtin set() type
# that can be instantiated with no arguments
result = newDictExpr(@[], @[], tok)
else:
result = self.expression()
if self.match(Comma) or self.check(RightBrace):
var setObject = newSetExpr(@[result], tok)
while not self.check(RightBrace):
setObject.members.add(self.expression())
if not self.match(Comma):
break
result = setObject
self.expect(RightBrace, "unterminated set literal")
elif self.match(Colon):
var dictObject = newDictExpr(@[result], @[self.expression()], tok)
if self.match(RightBrace):
return dictObject
if self.match(Comma):
while not self.check(RightBrace):
dictObject.keys.add(self.expression())
self.expect(Colon)
dictObject.values.add(self.expression())
if not self.match(Comma):
break
self.expect(RightBrace, "unterminated dict literal")
result = dictObject
of Yield:
let tok = self.step()
if self.currentFunction == nil:
self.error("'yield' cannot be used outside functions")
elif self.currentFunction.token.kind != Generator:
# It's easier than doing conversions for lambda/funDecl
self.error("'yield' cannot be used outside generators")
if not self.check([RightBrace, RightBracket, RightParen, Comma, Semicolon]):
# Expression delimiters
result = newYieldExpr(self.expression(), tok)
else:
# Empty yield
result = newYieldExpr(newNilExpr(Token()), tok)
of Await:
let tok = self.step()
if self.currentFunction == nil:
self.error("'await' cannot be used outside functions")
if self.currentFunction.token.kind != Coroutine:
self.error("'await' can only be used inside coroutines")
result = newAwaitExpr(self.expression(), tok)
of RightParen, RightBracket, RightBrace:
# This is *technically* unnecessary: the parser would
# throw an error regardless, but it's a little bit nicer
# when the error message is more specific
self.error(&"unmatched '{self.peek().lexeme}'")
of Hex:
result = newHexExpr(self.step())
of Octal:
result = newOctExpr(self.step())
of Binary:
result = newBinExpr(self.step())
of String:
result = newStrExpr(self.step())
of Infinity:
result = newInfExpr(self.step())
of Function:
discard self.step()
result = Expression(self.funDecl(isLambda=true))
of Coroutine:
discard self.step()
result = Expression(self.funDecl(isAsync=true, isLambda=true))
of Generator:
discard self.step()
result = Expression(self.funDecl(isGenerator=true, isLambda=true))
else:
self.error("invalid syntax")
proc makeCall(self: Parser, callee: Expression): Expression =
## Utility function called iteratively by self.call()
2022-04-05 11:23:59 +02:00
## to parse a function call
let tok = self.peek(-1)
var argNames: seq[IdentExpr] = @[]
var arguments: tuple[positionals: seq[Expression], keyword: seq[tuple[name: IdentExpr, value: Expression]]] = (positionals: @[], keyword: @[])
var argument: Expression = nil
var argCount = 0
if not self.check(RightParen):
while true:
if argCount >= 255:
self.error("call can not have more than 255 arguments")
break
argument = self.expression()
if argument.kind == assignExpr:
# TODO: This will explode with slices!
if IdentExpr(AssignExpr(argument).name) in argNames:
self.error("duplicate keyword argument in call")
argNames.add(IdentExpr(AssignExpr(argument).name))
arguments.keyword.add((name: IdentExpr(AssignExpr(argument).name), value: AssignExpr(argument).value))
elif arguments.keyword.len() == 0:
arguments.positionals.add(argument)
else:
self.error("positional argument cannot follow keyword argument in call")
if not self.match(Comma):
break
argCount += 1
self.expect(RightParen)
result = newCallExpr(callee, arguments, tok)
proc call(self: Parser): Expression =
## Parses function calls, object field
## accessing and slicing expressions
result = self.primary()
while true:
if self.match(LeftParen):
result = self.makeCall(result)
elif self.match(Dot):
self.expect(Identifier, "expecting attribute name after '.'")
result = newGetItemExpr(result, newIdentExpr(self.peek(-1)), self.peek(-1))
elif self.match(LeftBracket):
# Slicing such as a[1:2]
let tok = self.peek(-1)
var ends: seq[ASTNode] = @[]
while not self.check(RightBracket) and not self.done():
if self.check(Colon):
ends.add(newNilExpr(Token()))
discard self.step()
else:
ends.add(self.expression())
discard self.match(Colon)
self.expect(RightBracket, "expecting ']'")
result = newSliceExpr(result, ends, tok)
else:
break
proc unary(self: Parser): Expression =
## Parses unary expressions
if self.match([Minus, Tilde, LogicalNot, Plus]):
result = newUnaryExpr(self.peek(-1), self.unary())
else:
result = self.call()
proc customUnaryOperator(self: Parser): Expression =
## Parses user-defined unary expressions
if self.peek().lexeme in self.operators:
discard self.step()
result = newUnaryExpr(self.peek(-1), self.customUnaryOperator())
else:
result = self.unary()
proc pow(self: Parser): Expression =
## Parses exponentiation expressions
result = self.customUnaryOperator()
var operator: Token
var right: Expression
2022-04-05 11:23:59 +02:00
while self.match(DoubleStar):
operator = self.peek(-1)
right = self.customUnaryOperator()
result = newBinaryExpr(result, operator, right)
proc mul(self: Parser): Expression =
## Parses multiplication and division expressions
result = self.pow()
var operator: Token
var right: Expression
2022-04-05 11:23:59 +02:00
while self.match([Slash, Percentage, FloorDiv, Star]):
operator = self.peek(-1)
right = self.pow()
result = newBinaryExpr(result, operator, right)
proc add(self: Parser): Expression =
## Parses addition and subtraction expressions
result = self.mul()
var operator: Token
var right: Expression
while self.match([Plus, Minus]):
operator = self.peek(-1)
right = self.mul()
result = newBinaryExpr(result, operator, right)
proc comparison(self: Parser): Expression =
2022-04-05 11:23:59 +02:00
## Parses other comparison expressions
## and some other operators
result = self.add()
var operator: Token
var right: Expression
while self.match([LessThan, GreaterThan, LessOrEqual, GreaterOrEqual, Is, As, Of, IsNot]):
operator = self.peek(-1)
right = self.add()
result = newBinaryExpr(result, operator, right)
proc equality(self: Parser): Expression =
## Parses equality expressions
result = self.comparison()
var operator: Token
var right: Expression
while self.match([DoubleEqual, NotEqual]):
operator = self.peek(-1)
right = self.comparison()
result = newBinaryExpr(result, operator, right)
proc logicalAnd(self: Parser): Expression =
2022-04-05 11:23:59 +02:00
## Parses logical and expressions
## (a and b)
result = self.equality()
var operator: Token
var right: Expression
while self.match(LogicalAnd):
operator = self.peek(-1)
right = self.equality()
result = newBinaryExpr(result, operator, right)
proc logicalOr(self: Parser): Expression =
2022-04-05 11:23:59 +02:00
## Parses logical or expressions
## (a or b)
result = self.logicalAnd()
var operator: Token
var right: Expression
while self.match(LogicalOr):
operator = self.peek(-1)
right = self.logicalAnd()
result = newBinaryExpr(result, operator, right)
proc bitwiseAnd(self: Parser): Expression =
2022-04-05 11:23:59 +02:00
## Parses a & b expressions
result = self.logicalOr()
var operator: Token
var right: Expression
while self.match(Pipe):
operator = self.peek(-1)
right = self.logicalOr()
result = newBinaryExpr(result, operator, right)
proc bitwiseOr(self: Parser): Expression =
2022-04-05 11:23:59 +02:00
## Parses a | b expressions
result = self.bitwiseAnd()
var operator: Token
var right: Expression
while self.match(Ampersand):
operator = self.peek(-1)
right = self.bitwiseAnd()
result = newBinaryExpr(result, operator, right)
proc customBinaryOperator(self: Parser): Expression =
## Parses user-defined binary operators
result = self.bitwiseOr()
var operator: Token
var right: Expression
while self.peek().lexeme in self.operators:
operator = self.step()
right = self.bitwiseOr()
result = newBinaryExpr(result, operator, right)
proc assignment(self: Parser): Expression =
## Parses assignment, the highest-level
## expression (including stuff like a.b = 1).
## Slice assignments are also parsed here
result = self.customBinaryOperator()
if self.match([Equal, InplaceAdd, InplaceSub, InplaceDiv, InplaceMod,
InplacePow, InplaceMul, InplaceXor, InplaceAnd, InplaceOr,
InplaceFloorDiv, InplaceRightShift, InplaceLeftShift]):
let tok = self.peek(-1)
var value = self.expression()
if result.kind in {identExpr, sliceExpr}:
result = newAssignExpr(result, value, tok)
elif result.kind == getItemExpr:
result = newSetItemExpr(GetItemExpr(result).obj, GetItemExpr(result).name, value, tok)
else:
self.error("invalid assignment target")
proc assertStmt(self: Parser): Statement =
## Parses "assert" statements, which
## raise an error if the expression
## fed into them is falsey
let tok = self.peek(-1)
var expression = self.expression()
endOfLine("missing semicolon after assert statement")
result = newAssertStmt(expression, tok)
proc beginScope(self: Parser) =
## Begins a new lexical scope
inc(self.scopeDepth)
proc endScope(self: Parser) =
## Ends a new lexical scope
dec(self.scopeDepth)
proc blockStmt(self: Parser): Statement =
## Parses block statements. A block
## statement simply opens a new local
## scope
self.beginScope()
let tok = self.peek(-1)
var code: seq[Declaration] = @[]
while not self.check(RightBrace) and not self.done():
code.add(self.declaration())
self.expect(RightBrace, "expecting '}'")
result = newBlockStmt(code, tok)
self.endScope()
proc breakStmt(self: Parser): Statement =
## Parses break statements
let tok = self.peek(-1)
if self.currentLoop != Loop:
self.error("'break' cannot be used outside loops")
endOfLine("missing semicolon after break statement")
result = newBreakStmt(tok)
proc deferStmt(self: Parser): Statement =
## Parses defer statements
let tok = self.peek(-1)
if self.currentFunction == nil:
self.error("'defer' cannot be used outside functions")
result = newDeferStmt(self.expression(), tok)
endOfLine("missing semicolon after defer statement")
proc continueStmt(self: Parser): Statement =
## Parses continue statements
let tok = self.peek(-1)
if self.currentLoop != Loop:
self.error("'continue' cannot be used outside loops")
endOfLine("missing semicolon after continue statement")
result = newContinueStmt(tok)
proc returnStmt(self: Parser): Statement =
## Parses return statements
let tok = self.peek(-1)
if self.currentFunction == nil:
self.error("'return' cannot be used outside functions")
var value: Expression = newNilExpr(Token(lexeme: "nil"))
if not self.check(Semicolon):
# Since return can be used on its own too
# (in which case it implicitly returns nil),
# we need to check if there's an actual value
# to return or not
value = self.expression()
endOfLine("missing semicolon after return statement")
result = newReturnStmt(value, tok)
proc yieldStmt(self: Parser): Statement =
## Parses yield statements
let tok = self.peek(-1)
if self.currentFunction == nil:
self.error("'yield' cannot be outside functions")
elif self.currentFunction.token.kind != Generator:
self.error("'yield' can only be used inside generators")
if not self.check(Semicolon):
result = newYieldStmt(self.expression(), tok)
else:
result = newYieldStmt(newNilExpr(Token()), tok)
endOfLine("missing semicolon after yield statement")
proc awaitStmt(self: Parser): Statement =
## Parses await statements
let tok = self.peek(-1)
if self.currentFunction == nil:
self.error("'await' cannot be used outside functions")
if self.currentFunction.token.kind != Coroutine:
self.error("'await' can only be used inside coroutines")
result = newAwaitStmt(self.expression(), tok)
endOfLine("missing semicolon after await statement")
proc raiseStmt(self: Parser): Statement =
## Parses raise statements
var exception: Expression
let tok = self.peek(-1)
if not self.check(Semicolon):
# Raise can be used on its own, in which
# case it re-raises the last active exception
exception = self.expression()
endOfLine("missing semicolon after raise statement")
result = newRaiseStmt(exception, tok)
proc forEachStmt(self: Parser): Statement =
## Parses C#-like foreach loops
let tok = self.peek(-1)
var enclosingLoop = self.currentLoop
self.currentLoop = Loop
self.expect(LeftParen, "expecting '(' after 'foreach'")
self.expect(Identifier)
var identifier = newIdentExpr(self.peek(-1))
self.expect(Colon)
var expression = self.expression()
self.expect(RightParen)
var body = self.statement()
result = newForEachStmt(identifier, expression, body, tok)
self.currentLoop = enclosingLoop
proc importStmt(self: Parser, fromStmt: bool = false): Statement =
## Parses import statements
var tok: Token
if fromStmt:
tok = self.peek(-2)
else:
tok = self.peek(-1)
# TODO: New AST node
self.expect(Identifier, "expecting module name(s) after import statement")
result = newImportStmt(newIdentExpr(self.peek(-1)), tok)
endOfLine("missing semicolon after import statement")
proc tryStmt(self: Parser): Statement =
## Parses try/except/else/finally blocks
let tok = self.peek(-1)
var body = self.statement()
var handlers: seq[tuple[body: Statement, exc: IdentExpr, name: IdentExpr]] = @[]
var finallyClause: Statement
var elseClause: Statement
var asName: IdentExpr
var excName: Expression
var handlerBody: Statement
while self.match(Except):
excName = self.expression()
if excName.kind == identExpr:
handlerBody = self.statement()
handlers.add((body: handlerBody, exc: IdentExpr(excName), name: asName))
asName = nil
elif excName.kind == binaryExpr and BinaryExpr(excName).operator.kind == As:
asName = IdentExpr(BinaryExpr(excName).b)
if BinaryExpr(excName).b.kind != identExpr:
self.error("expecting alias name after 'except ... as'")
elif BinaryExpr(excName).a.kind != identExpr:
self.error("expecting exception name")
excName = BinaryExpr(excName).a
# Note how we don't use elif here: when the if above sets excName to As'
# first operand, that might be a tuple, which we unpack below
if excName.kind == tupleExpr:
# This allows to do except (a, b, c) as SomeError {...}
# TODO: Consider adding the ability to make exc a sequence
# instead of adding the same body with different exception
# types each time
handlerBody = self.statement()
for element in TupleExpr(excName).members:
if element.kind != identExpr:
self.error("expecting exception name")
handlers.add((body: handlerBody, exc: IdentExpr(element), name: asName))
continue
else:
excName = nil
if self.match(Else):
elseClause = self.statement()
if self.match(Finally):
finallyClause = self.statement()
if handlers.len() == 0 and elseClause == nil and finallyClause == nil:
self.error("expecting 'except', 'finally' or 'else' statement after 'try' block")
for i, handler in handlers:
if handler.exc == nil and i != handlers.high():
self.error("catch-all exception handler with bare 'except' must come last in try statement")
result = newTryStmt(body, handlers, finallyClause, elseClause, tok)
proc whileStmt(self: Parser): Statement =
## Parses a C-style while loop statement
let tok = self.peek(-1)
self.beginScope()
var enclosingLoop = self.currentLoop
self.currentLoop = Loop
self.expect(LeftParen, "expecting '(' before while loop condition")
var condition = self.expression()
self.expect(RightParen, "unterminated while loop condition")
result = newWhileStmt(condition, self.statement(), tok)
self.currentLoop = enclosingLoop
self.endScope()
proc forStmt(self: Parser): Statement =
## Parses a C-style for loop
self.beginScope()
let tok = self.peek(-1)
var enclosingLoop = self.currentLoop
self.currentLoop = Loop
self.expect(LeftParen, "expecting '(' after 'for'")
var initializer: ASTNode = nil
var condition: Expression = nil
var increment: Expression = nil
if self.match(Semicolon):
discard
elif self.match(Var):
initializer = self.varDecl()
2022-04-11 14:41:20 +02:00
if not VarDecl(initializer).isPrivate:
self.error("cannot declare public for loop initializer")
else:
initializer = self.expressionStatement()
if not self.check(Semicolon):
condition = self.expression()
self.expect(Semicolon, "expecting ';' after for loop condition")
if not self.check(RightParen):
increment = self.expression()
self.expect(RightParen, "unterminated for loop increment")
var body = self.statement()
if increment != nil:
# The increment runs after each iteration, so we
# inject it into the block as the last statement
body = newBlockStmt(@[Declaration(body), newExprStmt(increment, increment.token)], tok)
if condition == nil:
## An empty condition is functionally
## equivalent to "true"
condition = newTrueExpr(Token())
# We can use a while loop, which in this case works just as well
body = newWhileStmt(condition, body, tok)
if initializer != nil:
# Nested blocks, so the initializer is
# only executed once
body = newBlockStmt(@[Declaration(initializer), Declaration(body)], tok)
# This desgugars the following code:
# for (var i = 0; i < 10; i += 1) {
# print(i);
# }
# To the semantically equivalent snippet
# below:
# {
# var i = 0;
# while (i < 10) {
# print(i);
# i += 1;
# }
# }
result = body
self.currentLoop = enclosingLoop
self.endScope()
proc ifStmt(self: Parser): Statement =
## Parses if statements
let tok = self.peek(-1)
self.expect(LeftParen, "expecting '(' before if condition")
var condition = self.expression()
self.expect(RightParen, "expecting ')' after if condition")
var thenBranch = self.statement()
var elseBranch: Statement = nil
if self.match(Else):
elseBranch = self.statement()
result = newIfStmt(condition, thenBranch, elseBranch, tok)
template checkDecl(self: Parser, isPrivate: bool) =
## Handy utility template that avoids us from copy
## pasting the same checks to all declaration handlers
if not isPrivate and self.currentFunction != nil:
self.error("cannot bind public names inside functions")
if not isPrivate and self.scopeDepth > 0:
self.error("cannot bind public names inside local scopes")
proc varDecl(self: Parser, isLet: bool = false, isConst: bool = false): Declaration =
## Parses variable declarations
var tok = self.peek(-1)
var value: Expression
self.expect(Identifier, &"expecting identifier after '{tok.lexeme}'")
var name = newIdentExpr(self.peek(-1))
let isPrivate = not self.match(Star)
self.checkDecl(isPrivate)
var valueType: IdentExpr
if self.match(Colon):
# We don't enforce it here because
# the compiler may be able to infer
# the type later!
self.expect(Identifier, "expecting type name after ':'")
2022-04-12 12:18:25 +02:00
valueType = newIdentExpr(self.peek(-1))
if self.match(Equal):
value = self.expression()
if isConst and not value.isConst():
self.error("constant initializer is not a constant")
else:
if tok.kind != Var:
self.error(&"{tok.lexeme} declaration requires an initializer")
value = newNilExpr(Token())
self.expect(Semicolon, &"expecting semicolon after declaration")
case tok.kind:
of Var:
result = newVarDecl(name, value, isPrivate=isPrivate, token=tok, closedOver=false, valueType=valueType)
of Const:
result = newVarDecl(name, value, isPrivate=isPrivate, token=tok, isConst=true, closedOver=false, valueType=valueType)
of Let:
result = newVarDecl(name, value, isPrivate=isPrivate, token=tok, isLet=isLet, closedOver=false, valueType=valueType)
else:
discard # Unreachable
proc funDecl(self: Parser, isAsync: bool = false, isGenerator: bool = false, isLambda: bool = false, isOperator: bool = false): Declaration =
## Parses functions, coroutines, generators, anonymous functions and custom operators
let tok = self.peek(-1)
var enclosingFunction = self.currentFunction
var arguments: seq[tuple[name: IdentExpr, valueType: IdentExpr]] = @[]
var defaults: seq[Expression] = @[]
var returnType: IdentExpr
if not isLambda and self.check(Identifier):
# We do this extra check because we might
# be called from a contexst where it's
# ambiguous whether we're parsing a declaration
# or an expression. Fortunately anonymous functions
# are nameless, so we can sort the ambiguity by checking
# if there's an identifier after the keyword
self.expect(Identifier, &"expecting function name after '{tok.lexeme}'")
self.checkDecl(not self.check(Star))
self.currentFunction = newFunDecl(nil, arguments, defaults, newBlockStmt(@[], Token()),
isAsync=isAsync, isGenerator=isGenerator, isPrivate=true,
token=tok, closedOver=false)
FunDecl(self.currentFunction).name = newIdentExpr(self.peek(-1))
if self.match(Star):
FunDecl(self.currentFunction).isPrivate = false
elif not isLambda and self.check([LeftBrace, Colon]):
# We do a bit of hacking to pretend we never
# wanted to parse this as a declaration in
# the first place and pass control over to
# expressionStatement(), which will in turn
# go all the way up to primary(), which will
# call us back with isLambda=true, allowing us
# to actually parse the function as an expression
dec(self.current)
result = Declaration(self.expressionStatement())
self.currentFunction = enclosingFunction
return result
elif isLambda:
self.currentFunction = newLambdaExpr(arguments, defaults, newBlockStmt(@[], Token()), isGenerator=isGenerator, isAsync=isAsync, token=tok)
elif not isOperator:
self.error("funDecl: invalid state")
if self.match(Colon):
# A function without an explicit
# return type is the same as a void
# function in C (i.e. no return type)
self.expect([Identifier, Nil], "expecting function return type after ':'")
returnType = newIdentExpr(self.peek(-1))
if not self.match(LeftBrace):
# Argument-less function
var parameter: tuple[name: IdentExpr, valueType: IdentExpr]
self.expect(LeftParen)
while not self.check(RightParen):
if arguments.len > 255:
self.error("cannot have more than 255 arguments in function declaration")
self.expect(Identifier, "expecting parameter name")
parameter.name = newIdentExpr(self.peek(-1))
self.expect(Colon, "expecting ':' after parameter name")
self.expect(Identifier, "expecting parameter type")
parameter.valueType = newIdentExpr(self.peek(-1))
if parameter in arguments:
self.error("duplicate parameter name in function declaration")
arguments.add(parameter)
if self.match(Equal):
defaults.add(self.expression())
elif defaults.len() > 0:
self.error("positional argument cannot follow default argument in function declaration")
if not self.match(Comma):
break
self.expect(RightParen)
if self.match(Colon):
# Function's return type
self.expect(Identifier, "expecting return type after ':'")
returnType = newIdentExpr(self.peek(-1))
self.expect(LeftBrace)
if self.currentFunction.kind == funDecl:
if not self.match(Semicolon):
# If we don't find a semicolon,
# it's not a forward declaration
FunDecl(self.currentFunction).body = self.blockStmt()
else:
# This is a forward declaration so we explicitly
# nullify the function's body to tell the compiler
# to look for it elsewhere in the file later
FunDecl(self.currentFunction).body = nil
FunDecl(self.currentFunction).arguments = arguments
FunDecl(self.currentFunction).returnType = returnType
else:
LambdaExpr(Expression(self.currentFunction)).body = self.blockStmt()
LambdaExpr(Expression(self.currentFunction)).arguments = arguments
LambdaExpr(Expression(self.currentFunction)).returnType = returnType
result = self.currentFunction
if isOperator:
# isOperator is only true for functions
# with a name (since nameless operators
# don't make much sense)
if arguments.len() == 0:
self.error("cannot declare argument-less operator")
elif arguments.len() > 2:
self.error("cannot declare operator with more than 2 arguments")
elif FunDecl(result).returnType == nil:
self.error("operator cannot have void return type")
self.currentFunction = enclosingFunction
proc expression(self: Parser): Expression =
## Parses expressions
result = self.assignment()
proc expressionStatement(self: Parser): Statement =
## Parses expression statements, which
## are expressions followed by a semicolon
var expression = self.expression()
endOfLine("missing semicolon after expression")
result = Statement(newExprStmt(expression, expression.token))
proc statement(self: Parser): Statement =
## Parses statements
case self.peek().kind:
of If:
discard self.step()
result = self.ifStmt()
of Assert:
discard self.step()
result = self.assertStmt()
of Raise:
discard self.step()
result = self.raiseStmt()
of Break:
discard self.step()
result = self.breakStmt()
of Continue:
discard self.step()
result = self.continueStmt()
of Return:
discard self.step()
result = self.returnStmt()
of Import:
discard self.step()
result = self.importStmt()
of From:
# TODO
# from module import a [, b, c as d]
discard self.step()
result = self.importStmt(fromStmt=true)
of While:
discard self.step()
result = self.whileStmt()
of For:
discard self.step()
result = self.forStmt()
of Foreach:
discard self.step()
result = self.forEachStmt()
of LeftBrace:
discard self.step()
result = self.blockStmt()
of Yield:
discard self.step()
result = self.yieldStmt()
of Await:
discard self.step()
result = self.awaitStmt()
of Defer:
discard self.step()
result = self.deferStmt()
of Try:
discard self.step()
result = self.tryStmt()
else:
result = self.expressionStatement()
proc declaration(self: Parser): Declaration =
## Parses declarations
case self.peek().kind:
2022-04-05 11:23:59 +02:00
of Var, Const, Let:
let keyword = self.step()
result = self.varDecl(isLet=keyword.kind == Let, isConst=keyword.kind == Const)
2022-04-05 11:23:59 +02:00
of Function:
discard self.step()
result = self.funDecl()
of Coroutine:
discard self.step()
result = self.funDecl(isAsync=true)
of Generator:
discard self.step()
result = self.funDecl(isGenerator=true)
of Operator:
discard self.step()
result = self.funDecl(isOperator=true)
of Type, Comment, TokenType.Whitespace, TokenType.Tab:
discard self.step() # TODO
else:
result = Declaration(self.statement())
proc parse*(self: Parser, tokens: seq[Token], file: string): seq[ASTNode] =
## Parses a series of tokens into an AST node
self.tokens = tokens
self.file = file
self.current = 0
self.currentLoop = None
self.currentFunction = nil
self.scopeDepth = 0
self.operators = @[]
for i, token in self.tokens:
# We do a first pass over the tokens
# to find user-defined operators.
# Note that this relies on the lexer
# ending the input with an EOF token
if token.kind == Operator:
if i == self.tokens.high():
self.error("invalid state: found malformed tokenizer input while looking for operators (missing EOF)")
self.operators.add(self.tokens[i + 1].lexeme)
2022-04-11 14:41:20 +02:00
if i == self.tokens.high() and token.kind != EndOfFile:
# Since we're iterating this list anyway might as
# well perform some extra checks
2022-04-11 14:41:20 +02:00
self.error("invalid state: found malformed tokenizer input while looking for operators (missing EOF)")
while not self.done():
result.add(self.declaration())