peon/src/frontend/parser.nim

1301 lines
49 KiB
Nim

# Copyright 2022 Mattia Giambirtone & All Contributors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
## A recursive-descent top-down parser implementation
import std/strformat
import std/strutils
import std/os
import meta/token
import meta/ast
import meta/errors
import lexer as l
import ../util/symbols
import ../config
export token, ast, errors
type
LoopContext {.pure.} = enum
Loop, None
Precedence {.pure.} = enum
## Operator precedence
## clearly stolen from
## nim
Arrow = 0,
Assign,
Or,
And,
Compare,
Bitwise,
Addition,
Multiplication,
Power,
None # Used for stuff that isn't an operator
OperatorTable = ref object
## A table for storing and
## handling the precedence
## of operators
tokens: seq[string]
precedence: array[Precedence, seq[string]]
Parser* = ref object
## A recursive-descent top-down
## parser implementation
# Index into self.tokens
current: int
# The name of the file being parsed.
# Only meaningful for parse errors
file: string
# The list of tokens representing
# the source code to be parsed.
# In most cases, those will come
# from the builtin lexer, but this
# behavior is not enforced and the
# tokenizer is entirely separate from
# the parser
tokens: seq[Token]
# Little internal attribute that tells
# us if we're inside a loop or not. This
# allows us to detect errors like break
# being used outside loops
currentLoop: LoopContext
# Stores the current function
# being parsed. This is a reference
# to either a FunDecl or LambdaExpr
# AST node and is nil when the parser
# is at the top-level. It allows the
# parser to detect errors like return
# outside functions
currentFunction: Declaration
# Stores the current scope depth (0 = global, > 0 local)
scopeDepth: int
# Operator table
operators: OperatorTable
# The AST node
tree: seq[Declaration]
# Stores line data
lines: seq[tuple[start, stop: int]]
# The source of the current module
source: string
# Keeps track of imported modules
modules: seq[tuple[name: string, loaded: bool]]
ParseError* = ref object of PeonException
parser*: Parser
file*: string
token*: Token
module*: string
proc newOperatorTable: OperatorTable =
## Initializes a new OperatorTable
## object
new(result)
result.tokens = @[]
for prec in Precedence:
result.precedence[prec] = @[]
proc addOperator(self: OperatorTable, lexeme: string) =
## Adds an operator to the table. Its precedence
## is inferred from the operator's lexeme (the
## criteria are similar to Nim's)
if lexeme in self.tokens:
return # We've already added it!
var prec = Power
if lexeme.len() >= 2 and lexeme[^2..^1] in ["->", "~>", "=>"]:
prec = Arrow
elif lexeme == "and":
prec = Precedence.And
elif lexeme == "or":
prec = Precedence.Or
elif lexeme.endsWith("=") and lexeme[0] notin {'<', '>', '!', '?', '~', '='} or lexeme == "=":
prec = Assign
elif lexeme[0] in {'$', } or lexeme == "**":
prec = Power
elif lexeme[0] in {'*', '%', '/', '\\'}:
prec = Multiplication
elif lexeme[0] in {'+', '-'}:
prec = Addition
elif lexeme in [">>", "<<", "|", "~", "&", "^"]:
prec = Bitwise
elif lexeme[0] in {'<', '>', '=', '!'}:
prec = Compare
self.tokens.add(lexeme)
self.precedence[prec].add(lexeme)
proc getPrecedence(self: OperatorTable, lexeme: string): Precedence =
## Gets the precedence of a given operator
for (prec, operators) in self.precedence.pairs():
if lexeme in operators:
return prec
return Precedence.None
proc newParser*: Parser =
## Initializes a new Parser object
new(result)
result.current = 0
result.file = ""
result.tokens = @[]
result.currentFunction = nil
result.currentLoop = LoopContext.None
result.scopeDepth = 0
result.operators = newOperatorTable()
result.tree = @[]
result.source = ""
# Public getters for improved error formatting
proc getCurrent*(self: Parser): int {.inline.} = self.current
proc getCurrentToken*(self: Parser): Token {.inline.} = (if self.getCurrent() >=
self.tokens.high() or
self.getCurrent() - 1 < 0: self.tokens[^1] else: self.tokens[self.current - 1])
proc getCurrentFunction*(self: Parser): Declaration {.inline.} = self.currentFunction
proc getFile*(self: Parser): string {.inline.} = self.file
proc getModule*(self: Parser): string {.inline.} = self.getFile().splitFile().name
proc getLines*(self: Parser): seq[tuple[start, stop: int]] = self.lines
proc getSource*(self: Parser): string = self.source
proc getRelPos*(self: Parser, line: int): tuple[start, stop: int] = self.lines[line - 1]
template endOfFile: Token = Token(kind: EndOfFile, lexeme: "", line: -1)
template endOfLine(msg: string, tok: Token = nil) = self.expect(Semicolon, msg, tok)
proc peek(self: Parser, distance: int = 0): Token =
## Peeks at the token at the given distance.
## If the distance is out of bounds, an EOF
## token is returned. A negative distance may
## be used to retrieve previously consumed
## tokens
if self.tokens.high() == -1 or self.current + distance > self.tokens.high(
) or self.current + distance < 0:
result = endOfFile
else:
result = self.tokens[self.current + distance]
proc done(self: Parser): bool {.inline.} =
## Returns true if we're at the
## end of the file. Note that the
## parser expects an explicit
## EOF token to signal the end
## of the file
result = self.peek().kind == EndOfFile
proc step(self: Parser, n: int = 1): Token =
## Steps n tokens into the input,
## returning the last consumed one
if self.done():
result = self.peek()
else:
result = self.tokens[self.current]
self.current += 1
proc error(self: Parser, message: string, token: Token = nil) {.raises: [ParseError].} =
## Raises a ParseError exception
raise ParseError(msg: message, token: if token.isNil(): self.getCurrentToken() else: token, file: self.file, module: self.getModule(), parser: self)
# Why do we allow strings or enum members of TokenType? Well, it's simple:
# symbols like ":" and "=" are both valid operator names (therefore they are
# tokenized as symbols), but they are also used in a context where they are just
# separators (for example, the colon is used in type declarations). Since we can't
# tell at tokenization time which of the two contexts we're in, we just treat everything
# as a symbol and in the cases where we need a specific token we just match the string
# directly
proc check[T: TokenType or string](self: Parser, kind: T,
distance: int = 0): bool =
## Checks if the given token at the given distance
## matches the expected kind and returns a boolean.
## The distance parameter is passed directly to
## self.peek()
when T is TokenType:
self.peek(distance).kind == kind
else:
when T is string:
self.peek(distance).lexeme == kind
proc check[T: TokenType or string](self: Parser, kind: openarray[T]): bool =
## Calls self.check() in a loop with each entry of
## the given openarray of token kinds and returns
## at the first match. Note that this assumes
## that only one token may match at a given
## position
for k in kind:
if self.check(k):
return true
return false
proc match[T: TokenType or string](self: Parser, kind: T): bool =
## Behaves like self.check(), except that when a token
## matches it is also consumed
if self.check(kind):
discard self.step()
result = true
else:
result = false
proc match[T: TokenType or string](self: Parser, kind: openarray[T]): bool =
## Calls self.match() in a loop with each entry of
## the given openarray of token kinds and returns
## at the first match. Note that this assumes
## that only one token may exist at a given
## position
for k in kind:
if self.match(k):
return true
result = false
proc expect[T: TokenType or string](self: Parser, kind: T, message: string = "", token: Token = nil) =
## Behaves like self.match(), except that
## when a token doesn't match, an error
## is raised. If no error message is
## given, a default one is used
if not self.match(kind):
if message.len() == 0:
self.error(&"expecting token of kind {kind}, found {self.peek().kind} instead", token)
else:
self.error(message)
proc expect[T: TokenType or string](self: Parser, kind: openarray[T], message: string = "", token: Token = nil) {.used.} =
## Behaves like self.expect(), except that
## an error is raised only if none of the
## given token kinds matches
for k in kind:
if self.match(kind):
return
if message.len() == 0:
self.error(&"""expecting any of the following tokens: {kind.join(", ")}, but got {self.peek().kind} instead""", token)
# Forward declarations
proc expression(self: Parser): Expression
proc expressionStatement(self: Parser): Statement
proc statement(self: Parser): Statement
proc varDecl(self: Parser, isLet: bool = false,
isConst: bool = false): Declaration
proc parseFunExpr(self: Parser): LambdaExpr
proc funDecl(self: Parser, isAsync: bool = false, isGenerator: bool = false,
isLambda: bool = false, isOperator: bool = false): Declaration
proc declaration(self: Parser): Declaration
proc parse*(self: Parser, tokens: seq[Token], file: string, lines: seq[tuple[start, stop: int]], source: string, persist: bool = false): seq[Declaration]
# End of forward declarations
proc primary(self: Parser): Expression =
## Parses primary expressions such
## as integer literals and keywords
## that map to builtin types (true,
## false, nil, etc.)
case self.peek().kind:
of True:
result = newTrueExpr(self.step())
of False:
result = newFalseExpr(self.step())
of TokenType.NotANumber:
result = newNanExpr(self.step())
of Nil:
result = newNilExpr(self.step())
of Float:
result = newFloatExpr(self.step())
of Integer:
result = newIntExpr(self.step())
of Identifier:
result = newIdentExpr(self.step(), self.scopeDepth)
of LeftParen:
let tok = self.step()
result = newGroupingExpr(self.expression(), tok)
self.expect(RightParen, "unterminated parenthesized expression")
of Yield:
let tok = self.step()
if self.currentFunction.isNil():
self.error("'yield' cannot be used outside functions", tok)
elif self.currentFunction.token.kind != Generator:
# It's easier than doing conversions for lambda/funDecl
self.error("'yield' cannot be used outside generators", tok)
if not self.check([RightBrace, RightBracket, RightParen, Comma, Semicolon]):
# Expression delimiters
result = newYieldExpr(self.expression(), tok)
else:
# Empty yield
result = newYieldExpr(newNilExpr(Token()), tok)
of Await:
let tok = self.step()
if self.currentFunction.isNil():
self.error("'await' cannot be used outside functions", tok)
if self.currentFunction.token.kind != Coroutine:
self.error("'await' can only be used inside coroutines", tok)
result = newAwaitExpr(self.expression(), tok)
of RightParen, RightBracket, RightBrace:
# This is *technically* unnecessary: the parser would
# throw an error regardless, but it's a little bit nicer
# when the error message is more specific
self.error(&"unmatched '{self.peek().lexeme}'")
of Hex:
result = newHexExpr(self.step())
of Octal:
result = newOctExpr(self.step())
of Binary:
result = newBinExpr(self.step())
of String:
result = newStrExpr(self.step())
of Infinity:
result = newInfExpr(self.step())
of Function:
discard self.step()
result = Expression(self.funDecl(isLambda=true))
of Coroutine:
discard self.step()
result = Expression(self.funDecl(isAsync=true, isLambda=true))
of Generator:
discard self.step()
result = Expression(self.funDecl(isGenerator=true, isLambda=true))
of TokenType.Var:
discard self.step()
result = newVarExpr(self.expression(), self.peek(-1))
of TokenType.Ref:
discard self.step()
result = newRefExpr(self.expression(), self.peek(-1))
of TokenType.Ptr:
discard self.step()
result = newPtrExpr(self.expression(), self.peek(-1))
else:
self.error("invalid syntax")
proc makeCall(self: Parser, callee: Expression): CallExpr =
## Utility function called iteratively by self.call()
## to parse a function call
let tok = self.peek(-1)
var argNames: seq[IdentExpr] = @[]
var arguments: tuple[positionals: seq[Expression], keyword: seq[tuple[
name: IdentExpr, value: Expression]]] = (positionals: @[],
keyword: @[])
var argument: Expression = nil
var argCount = 0
if not self.check(RightParen):
while true:
if argCount >= 255:
self.error("call can not have more than 255 arguments")
break
argument = self.expression()
if argument.kind == binaryExpr and BinaryExpr(argument).operator.lexeme == "=":
# TODO: This will explode with slices!
if IdentExpr(BinaryExpr(argument).a) in argNames:
self.error("duplicate keyword argument in call")
argNames.add(IdentExpr(BinaryExpr(argument).a))
arguments.keyword.add((name: IdentExpr(BinaryExpr(argument).a), value: BinaryExpr(argument).b))
elif arguments.keyword.len() == 0:
arguments.positionals.add(argument)
else:
self.error("positional argument cannot follow keyword argument in call")
if not self.match(Comma):
break
argCount += 1
self.expect(RightParen)
result = newCallExpr(callee, arguments, tok)
proc parseGenericArgs(self: Parser) =
## Parses function generic arguments
## like function[type](arg)
discard
proc call(self: Parser): Expression =
## Parses function calls and object field
## accessing
result = self.primary()
while true:
if self.match(LeftParen):
result = self.makeCall(result)
elif self.match(Dot):
self.expect(Identifier, "expecting attribute name after '.'")
result = newGetItemExpr(result, newIdentExpr(self.peek(-1), self.scopeDepth), self.peek(-1))
elif self.match(LeftBracket):
self.parseGenericArgs() # TODO
result = self.makeCall(result)
else:
break
## Operator parsing handlers
proc unary(self: Parser): Expression =
## Parses unary expressions
if self.peek().kind in [Identifier, Symbol] and self.peek().lexeme in self.operators.tokens:
result = newUnaryExpr(self.step(), self.unary())
else:
result = self.call()
proc parsePow(self: Parser): Expression =
## Parses power expressions
result = self.unary()
var operator: Token
var right: Expression
while self.peek().kind in [Identifier, Symbol] and self.operators.getPrecedence(self.peek().lexeme) == Power:
operator = self.step()
right = self.unary()
result = newBinaryExpr(result, operator, right)
proc parseMul(self: Parser): Expression =
## Parses multiplication and division
## expressions
result = self.parsePow()
var operator: Token
var right: Expression
while self.peek().kind in [Identifier, Symbol] and self.operators.getPrecedence(self.peek().lexeme) == Multiplication:
operator = self.step()
right = self.parsePow()
result = newBinaryExpr(result, operator, right)
proc parseAdd(self: Parser): Expression =
## Parses addition and subtraction
## expressions
result = self.parseMul()
var operator: Token
var right: Expression
while self.peek().kind in [Identifier, Symbol] and self.operators.getPrecedence(self.peek().lexeme) == Addition:
operator = self.step()
right = self.parseMul()
result = newBinaryExpr(result, operator, right)
proc parseBitwise(self: Parser): Expression =
## Parses bitwise expressions
result = self.parseAdd()
var operator: Token
var right: Expression
while self.peek().kind in [Identifier, Symbol] and self.operators.getPrecedence(self.peek().lexeme) == Bitwise:
operator = self.step()
right = self.parseAdd()
result = newBinaryExpr(result, operator, right)
proc parseCmp(self: Parser): Expression =
## Parses comparison expressions
result = self.parseBitwise()
var operator: Token
var right: Expression
while self.peek().kind in [Identifier, Symbol] and self.operators.getPrecedence(self.peek().lexeme) == Compare:
operator = self.step()
right = self.parseAdd()
result = newBinaryExpr(result, operator, right)
proc parseAnd(self: Parser): Expression =
## Parses logical and expressions
result = self.parseCmp()
var operator: Token
var right: Expression
while self.peek().kind in [Identifier, Symbol] and self.operators.getPrecedence(self.peek().lexeme) == Precedence.And:
operator = self.step()
right = self.parseCmp()
result = newBinaryExpr(result, operator, right)
proc parseOr(self: Parser): Expression =
## Parses logical or expressions
result = self.parseAnd()
var operator: Token
var right: Expression
while self.peek().kind in [Identifier, Symbol] and self.operators.getPrecedence(self.peek().lexeme) == Precedence.Or:
operator = self.step()
right = self.parseAnd()
result = newBinaryExpr(result, operator, right)
proc parseAssign(self: Parser): Expression =
## Parses assignment expressions
result = self.parseOr()
if self.peek().kind in [Identifier, Symbol] and self.operators.getPrecedence(self.peek().lexeme) == Assign:
let tok = self.step()
var value = self.expression()
case result.kind:
of identExpr, sliceExpr:
result = newAssignExpr(result, value, tok)
of getItemExpr:
result = newSetItemExpr(GetItemExpr(result).obj, GetItemExpr(result).name, value, tok)
else:
self.error("invalid assignment target", tok)
proc parseArrow(self: Parser): Expression =
## Parses arrow expressions
result = self.parseAssign()
var operator: Token
var right: Expression
while self.peek().kind in [Identifier, Symbol] and self.operators.getPrecedence(self.peek().lexeme) == Precedence.Or:
operator = self.step()
right = self.parseAssign()
result = newBinaryExpr(result, operator, right)
## End of operator parsing handlers
proc assertStmt(self: Parser): Statement =
## Parses "assert" statements, which
## raise an error if the expression
## fed into them is false
let tok = self.peek(-1)
var expression = self.expression()
endOfLine("missing semicolon after 'assert'")
result = newAssertStmt(expression, tok)
proc beginScope(self: Parser) =
## Begins a new lexical scope
inc(self.scopeDepth)
proc endScope(self: Parser) =
## Ends a new lexical scope
dec(self.scopeDepth)
proc blockStmt(self: Parser): Statement =
## Parses block statements. A block
## statement simply opens a new local
## scope
self.beginScope()
let tok = self.peek(-1)
var code: seq[Declaration] = @[]
while not self.check(RightBrace) and not self.done():
code.add(self.declaration())
if code[^1].isNil():
code.delete(code.high())
self.expect(RightBrace, "expecting '}'")
result = newBlockStmt(code, tok)
self.endScope()
proc breakStmt(self: Parser): Statement =
## Parses break statements
let tok = self.peek(-1)
if self.currentLoop != Loop:
self.error("'break' cannot be used outside loops")
endOfLine("missing semicolon after 'break'")
result = newBreakStmt(tok)
proc deferStmt(self: Parser): Statement =
## Parses defer statements
let tok = self.peek(-1)
if self.currentFunction.isNil():
self.error("'defer' cannot be used outside functions")
endOfLine("missing semicolon after 'defer'")
result = newDeferStmt(self.expression(), tok)
proc continueStmt(self: Parser): Statement =
## Parses continue statements
let tok = self.peek(-1)
if self.currentLoop != Loop:
self.error("'continue' cannot be used outside loops")
endOfLine("missing semicolon after 'continue'")
result = newContinueStmt(tok)
proc returnStmt(self: Parser): Statement =
## Parses return statements
let tok = self.peek(-1)
if self.currentFunction.isNil():
self.error("'return' cannot be used outside functions")
var value: Expression
if not self.check(Semicolon):
# Since return can be used on its own too
# we need to check if there's an actual value
# to return or not
value = self.expression()
endOfLine("missing semicolon after 'return'")
result = newReturnStmt(value, tok)
case self.currentFunction.kind:
of NodeKind.funDecl:
FunDecl(self.currentFunction).hasExplicitReturn = true
else:
LambdaExpr(self.currentFunction).hasExplicitReturn = true
proc yieldStmt(self: Parser): Statement =
## Parses yield statements
let tok = self.peek(-1)
if self.currentFunction.isNil():
self.error("'yield' cannot be outside functions")
elif self.currentFunction.token.kind != Generator:
self.error("'yield' can only be used inside generators")
if not self.check(Semicolon):
result = newYieldStmt(self.expression(), tok)
else:
result = newYieldStmt(newNilExpr(Token(lexeme: "nil")), tok)
endOfLine("missing semicolon after 'yield'")
proc awaitStmt(self: Parser): Statement =
## Parses await statements
let tok = self.peek(-1)
if self.currentFunction.isNil():
self.error("'await' cannot be used outside functions")
if self.currentFunction.token.kind != Coroutine:
self.error("'await' can only be used inside coroutines")
endOfLine("missing semicolon after 'await'")
result = newAwaitStmt(self.expression(), tok)
proc raiseStmt(self: Parser): Statement =
## Parses raise statements
var exception: Expression
let tok = self.peek(-1)
if not self.check(Semicolon):
# Raise can be used on its own, in which
# case it re-raises the last active exception
exception = self.expression()
endOfLine("missing semicolon after 'raise'")
result = newRaiseStmt(exception, tok)
proc forEachStmt(self: Parser): Statement =
## Parses C#-like foreach loops
let tok = self.peek(-1)
let enclosingLoop = self.currentLoop
self.currentLoop = Loop
self.expect(Identifier)
let identifier = newIdentExpr(self.peek(-1), self.scopeDepth)
self.expect("in")
let expression = self.expression()
self.expect(LeftBrace)
result = newForEachStmt(identifier, expression, self.blockStmt(), tok)
self.currentLoop = enclosingLoop
proc findOperators(self: Parser, tokens: seq[Token])
proc importStmt(self: Parser, fromStmt: bool = false): Statement =
## Parses import statements. This is a little
## convoluted because we need to pre-parse the
## module to import the operators from it
if self.scopeDepth > 0:
self.error("import statements are only allowed at the top level")
var tok: Token
if fromStmt:
tok = self.peek(-2)
else:
tok = self.peek(-1)
var moduleName = ""
while not self.check(Semicolon) and not self.done():
if self.match(".."):
if not self.check("/"):
self.error("expecting '/' after '..' in import statement")
moduleName &= "../"
elif self.match("/"):
self.expect(Identifier, "expecting identifier after '/' in import statement")
moduleName &= &"/{self.peek(-1).lexeme}"
elif self.match(Identifier):
moduleName &= self.peek(-1).lexeme
else:
break
endOfLine("missing semicolon after import statement")
moduleName &= ".pn"
result = newImportStmt(newIdentExpr(Token(kind: Identifier, lexeme: moduleName, line: self.peek(-1).line), self.scopeDepth), tok)
var lexer = newLexer()
lexer.fillSymbolTable()
var path = ""
for i, searchPath in moduleLookupPaths:
if searchPath == "":
path = joinPath(getCurrentDir(), joinPath(splitPath(self.file).head, moduleName))
else:
path = joinPath(getCurrentDir(), joinPath(searchPath, moduleName))
if fileExists(path):
break
elif i == searchPath.high():
self.error(&"""could not import '{path}': module not found""")
try:
var source = readFile(path)
var tree = self.tree
var current = self.current
var tokens = self.tokens
var src = self.source
var file = self.file
discard self.parse(lexer.lex(source, path), file=path, source=source, lines=lexer.getLines(), persist=true)
self.file = file
self.source = src
self.tree = tree
self.current = current
self.tokens = tokens
except IOError:
self.error(&"""could not import '{path}': {getCurrentExceptionMsg()}""")
except OSError:
self.error(&"""could not import '{path}': {getCurrentExceptionMsg()} [errno {osLastError()}]""")
proc tryStmt(self: Parser): Statement =
## Parses try/except/else/finally blocks
let tok = self.peek(-1)
self.expect(LeftBrace, "expecting '{' after 'try'")
var body = self.blockStmt()
var handlers: seq[tuple[body: Statement, exc: IdentExpr]] = @[]
var finallyClause: Statement
var elseClause: Statement
while self.match(Except):
if self.match(LeftBrace):
handlers.add((body: self.blockStmt(), exc: newIdentExpr(self.peek(-1))))
else:
self.expect(Identifier, "expecting exception name after 'except'")
self.expect(LeftBrace, "expecting '{' after exception name")
handlers.add((body: self.blockStmt(), exc: nil))
if self.match(Else):
self.expect(LeftBrace, "expecting '{' after 'else' name")
elseClause = self.blockStmt()
if self.match(Finally):
self.expect(LeftBrace, "expecting '{' after 'finally'")
finallyClause = self.blockStmt()
if handlers.len() == 0 and elseClause.isNil() and finallyClause.isNil():
self.error("expecting 'except', 'finally' or 'else' statement after 'try' block", tok)
for i, handler in handlers:
if handler.exc.isNil() and i != handlers.high():
self.error("catch-all exception handler with bare 'except' must come last in try statement", handler.exc.token)
result = newTryStmt(body, handlers, finallyClause, elseClause, tok)
proc whileStmt(self: Parser): Statement =
## Parses a C-style while loop statement
let tok = self.peek(-1)
self.beginScope()
let enclosingLoop = self.currentLoop
let condition = self.expression()
self.expect(LeftBrace)
self.currentLoop = Loop
result = newWhileStmt(condition, self.blockStmt(), tok)
self.currentLoop = enclosingLoop
self.endScope()
proc ifStmt(self: Parser): Statement =
## Parses if statements
let tok = self.peek(-1)
let condition = self.expression()
self.expect(LeftBrace)
let thenBranch = self.blockStmt()
var elseBranch: Statement
if self.match(Else):
if self.match(If):
elseBranch = self.ifStmt()
else:
self.expect(LeftBrace, "expecting 'if' or block statement")
elseBranch = self.blockStmt()
result = newIfStmt(condition, thenBranch, elseBranch, tok)
proc exportStmt(self: Parser): Statement =
## Parses export statements
var exported: IdentExpr
let tok = self.peek(-1)
if not self.match(Identifier):
self.error("expecting identifier after 'export' in export statement")
exported = newIdentExpr(self.peek(-1))
endOfLine("missing semicolon after 'raise'")
result = newExportStmt(exported, tok)
template checkDecl(self: Parser, isPrivate: bool) =
## Handy utility template that avoids us from copy
## pasting the same checks to all declaration handlers
if not isPrivate and self.scopeDepth > 0:
self.error("cannot bind public names inside local scopes")
proc parsePragmas(self: Parser): seq[Pragma] =
## Parses pragmas
var
name: IdentExpr
args: seq[LiteralExpr]
exp: Expression
names: seq[string]
while not self.match("]") and not self.done():
args = @[]
self.expect(Identifier, "expecting pragma name")
if self.peek(-1).lexeme in names:
self.error("duplicate pragmas are not allowed")
names.add(self.peek(-1).lexeme)
name = newIdentExpr(self.peek(-1), self.scopeDepth)
if not self.match(":"):
if self.match("]"):
result.add(newPragma(name, @[]))
break
elif self.match("("):
while not self.match(")") and not self.done():
exp = self.primary()
if not exp.isLiteral():
self.error("pragma arguments can only be literals", exp.token)
args.add(LiteralExpr(exp))
if not self.match(","):
break
self.expect(LeftParen, "unterminated parenthesis in pragma arguments")
else:
exp = self.primary()
if not exp.isLiteral():
self.error("pragma arguments can only be literals", exp.token)
args.add(LiteralExpr(exp))
result.add(newPragma(name, args))
if self.match(","):
continue
proc varDecl(self: Parser, isLet: bool = false,
isConst: bool = false): Declaration =
## Parses variable declarations
var tok = self.peek(-1)
var value: Expression
self.expect(Identifier, &"expecting identifier after '{tok.lexeme}'")
var name = newIdentExpr(self.peek(-1), self.scopeDepth)
let isPrivate = not self.match("*")
self.checkDecl(isPrivate)
var valueType: IdentExpr
var hasInit = false
var pragmas: seq[Pragma] = @[]
if self.match(":"):
# We don't enforce it here because
# the compiler may be able to infer
# the type later!
self.expect(Identifier, "expecting type name after ':'")
valueType = newIdentExpr(self.peek(-1), self.scopeDepth)
if self.match("="):
hasInit = true
if self.match([Function, Coroutine, Generator]):
value = self.parseFunExpr()
else:
value = self.expression()
if isConst and not value.isConst():
self.error("constant initializer is not a constant")
elif tok.kind != TokenType.Var:
self.error(&"{tok.lexeme} declaration requires an initializer")
self.expect(Semicolon, "expecting semicolon after declaration")
if self.match(TokenType.Pragma):
for pragma in self.parsePragmas():
pragmas.add(pragma)
case tok.kind:
of TokenType.Var:
result = newVarDecl(name, value, isPrivate = isPrivate, token = tok,
valueType = valueType, pragmas = (@[]))
of Const:
result = newVarDecl(name, value, isPrivate = isPrivate, token = tok,
isConst = true, valueType = valueType, pragmas = (@[]))
of Let:
result = newVarDecl(name, value, isPrivate = isPrivate, token = tok,
isLet = isLet, valueType = valueType, pragmas = (@[]))
else:
discard # Unreachable
if not hasInit and VarDecl(result).valueType.isNil():
self.error("expecting initializer or explicit type annotation, but neither was found", result.token)
result.pragmas = pragmas
proc parseDeclArguments(self: Parser, arguments: var seq[tuple[name: IdentExpr, valueType: Expression]],
parameter: var tuple[name: IdentExpr, valueType: Expression],
defaults: var seq[Expression]) =
## Helper to parse declaration arguments and avoid code duplication
while not self.check(RightParen):
if arguments.len > 255:
self.error("cannot have more than 255 arguments in function declaration", self.peek(-1))
self.expect(Identifier, "expecting parameter name")
parameter.name = newIdentExpr(self.peek(-1), self.scopeDepth)
if self.match(":"):
parameter.valueType = self.expression()
for i in countdown(arguments.high(), 0):
if arguments[i].valueType != nil:
break
arguments[i].valueType = parameter.valueType
else:
parameter.valueType = nil
if parameter in arguments:
self.error("duplicate parameter name in function declaration", parameter.name.token)
arguments.add(parameter)
if self.match("="):
defaults.add(self.expression())
elif defaults.len() > 0:
self.error("positional argument cannot follow default argument in function declaration", parameter.name.token)
if not self.match(Comma):
break
self.expect(RightParen)
for argument in arguments:
if argument.valueType.isNil():
self.error(&"missing type declaration for '{argument.name.token.lexeme}' in function declaration")
proc parseFunExpr(self: Parser): LambdaExpr =
## Parses the return value of a function
## when it is another function. Works
## recursively
var arguments: seq[tuple[name: IdentExpr, valueType: Expression]] = @[]
var defaults: seq[Expression] = @[]
result = newLambdaExpr(arguments, defaults, nil, isGenerator=self.peek(-1).kind == Generator,
isAsync=self.peek(-1).kind == Coroutine, token=self.peek(-1),
returnType=nil, depth=self.scopeDepth)
var parameter: tuple[name: IdentExpr, valueType: Expression]
if self.match(LeftParen):
self.parseDeclArguments(arguments, parameter, defaults)
if self.match(":"):
if self.match([Function, Coroutine, Generator]):
result.returnType = self.parseFunExpr()
else:
result.returnType = self.expression()
result.arguments = arguments
result.defaults = defaults
proc parseGenericConstraint(self: Parser): Expression =
## Recursivelt parses a generic constraint
## and returns it as an expression
result = self.expression() # First value is always an identifier of some sort
if not self.check(RightBracket):
case self.peek().lexeme:
of "|":
result = newBinaryExpr(result, self.step(), self.parseGenericConstraint())
of "~":
result = newUnaryExpr(self.step(), result)
else:
self.error("invalid type constraint in generic declaration")
proc parseGenerics(self: Parser, decl: Declaration) =
## Parses generics in declarations
var gen: tuple[name: IdentExpr, cond: Expression]
while not self.check(RightBracket) and not self.done():
self.expect(Identifier, "expecting generic type name")
gen.name = newIdentExpr(self.peek(-1), self.scopeDepth)
self.expect(":", "expecting type constraint after generic name")
gen.cond = self.parseGenericConstraint()
decl.generics.add(gen)
if not self.match(Comma):
break
self.expect(RightBracket)
proc funDecl(self: Parser, isAsync: bool = false, isGenerator: bool = false,
isLambda: bool = false, isOperator: bool = false): Declaration = # Can't use just FunDecl because it can also return LambdaExpr!
## Parses all types of functions, coroutines, generators and operators
## (with or without a name, where applicable)
let tok = self.peek(-1)
var enclosingFunction = self.currentFunction
var arguments: seq[tuple[name: IdentExpr, valueType: Expression]] = @[]
var defaults: seq[Expression] = @[]
var returnType: Expression
var pragmas: seq[Pragma] = @[]
if not isLambda and self.match(Identifier):
# We do this extra check because we might
# be called from a context where it's
# ambiguous whether we're parsing a declaration
# or an expression. Fortunately anonymous functions
# are nameless, so we can sort the ambiguity by checking
# if there's an identifier after the keyword
self.currentFunction = newFunDecl(newIdentExpr(self.peek(-1), self.scopeDepth), arguments, defaults, newBlockStmt(@[], Token()),
isAsync=isAsync,
isGenerator=isGenerator,
isPrivate=true,
token=tok,
returnType=nil,
depth=self.scopeDepth)
if self.match("*"):
FunDecl(self.currentFunction).isPrivate = false
self.checkDecl(FunDecl(self.currentFunction).isPrivate)
if self.match(LeftBracket):
self.parseGenerics(self.currentFunction)
elif not isLambda and (self.check([LeftBrace, LeftParen]) or self.check(":")):
# We do a bit of hacking to pretend we never
# wanted to parse this as a declaration in
# the first place and pass control over to
# expressionStatement(), which will in turn
# go all the way up to primary(), which will
# call us back with isLambda=true, allowing us
# to actually parse the function as an expression
while not self.check(tok.kind): # We rewind back to the token that caused us to be called
dec(self.current)
result = Declaration(self.expressionStatement())
self.currentFunction = enclosingFunction
return result
elif isLambda:
self.currentFunction = newLambdaExpr(arguments, defaults, newBlockStmt(@[], Token()), isGenerator=isGenerator, isAsync=isAsync, token=tok,
returnType=nil, depth=self.scopeDepth)
if self.match(":"):
# Function has explicit return type
if self.match([Function, Coroutine, Generator]):
# The function's return type is another
# function. We specialize this case because
# the type declaration for a function lacks
# the braces that would qualify it as an
# expression
returnType = self.parseFunExpr()
else:
returnType = self.expression()
if self.match(LeftParen):
var parameter: tuple[name: IdentExpr, valueType: Expression]
self.parseDeclArguments(arguments, parameter, defaults)
if self.match(":"):
# Function's return type
if self.match([Function, Coroutine, Generator]):
returnType = self.parseFunExpr()
else:
returnType = self.expression()
if self.currentFunction.kind == funDecl:
if not self.match(Semicolon):
# If we don't find a semicolon,
# it's not a forward declaration
self.expect(LeftBrace)
if self.match(TokenType.Pragma):
for pragma in self.parsePragmas():
pragmas.add(pragma)
FunDecl(self.currentFunction).body = self.blockStmt()
else:
# This is a forward declaration, so we explicitly
# nullify the function's body to tell the compiler
# to look for it elsewhere in the file later
FunDecl(self.currentFunction).body = nil
if self.match(TokenType.Pragma):
for pragma in self.parsePragmas():
pragmas.add(pragma)
FunDecl(self.currentFunction).arguments = arguments
FunDecl(self.currentFunction).returnType = returnType
else:
self.expect(LeftBrace)
if self.match(TokenType.Pragma):
for pragma in self.parsePragmas():
pragmas.add(pragma)
LambdaExpr(Expression(self.currentFunction)).body = self.blockStmt()
LambdaExpr(Expression(self.currentFunction)).arguments = arguments
LambdaExpr(Expression(self.currentFunction)).returnType = returnType
result = self.currentFunction
if isOperator:
if arguments.len() == 0:
self.error("cannot declare operator without arguments")
elif isLambda:
self.error("cannot declare anonymous operator")
for argument in arguments:
if argument.valueType == nil:
self.error(&"missing type declaration for '{argument.name.token.lexeme}' in function declaration")
self.currentFunction = enclosingFunction
result.pragmas = pragmas
proc expression(self: Parser): Expression =
## Parses expressions
result = self.parseArrow() # Highest-level expression
proc expressionStatement(self: Parser): Statement =
## Parses expression statements, which
## are expressions followed by a semicolon
var expression = self.expression()
endOfLine("missing expression terminator", expression.token)
result = Statement(newExprStmt(expression, expression.token))
proc statement(self: Parser): Statement =
## Parses statements
case self.peek().kind:
of If:
discard self.step()
result = self.ifStmt()
of Assert:
discard self.step()
result = self.assertStmt()
of Raise:
discard self.step()
result = self.raiseStmt()
of Break:
discard self.step()
result = self.breakStmt()
of Continue:
discard self.step()
result = self.continueStmt()
of Return:
discard self.step()
result = self.returnStmt()
of Import:
discard self.step()
result = self.importStmt()
of Export:
discard self.step()
result = self.exportStmt()
of From:
# TODO
# from module import a [, b, c as d]
discard self.step()
result = self.importStmt(fromStmt=true)
of While:
discard self.step()
result = self.whileStmt()
of Foreach:
discard self.step()
result = self.forEachStmt()
of LeftBrace:
discard self.step()
result = self.blockStmt()
of Yield:
discard self.step()
result = self.yieldStmt()
of Await:
discard self.step()
result = self.awaitStmt()
of Defer:
discard self.step()
result = self.deferStmt()
of Try:
discard self.step()
result = self.tryStmt()
else:
result = self.expressionStatement()
proc typeDecl(self: Parser): TypeDecl =
## Parses type declarations
let token = self.peek(-1)
self.expect(Identifier, "expecting type name after 'type'")
let isPrivate = not self.match("*")
self.checkDecl(isPrivate)
var name = newIdentExpr(self.peek(-1), self.scopeDepth)
var fields: seq[tuple[name: IdentExpr, valueType: Expression, isPrivate: bool]] = @[]
var defaults: seq[Expression] = @[]
var generics: seq[tuple[name: IdentExpr, cond: Expression]] = @[]
var pragmas: seq[Pragma] = @[]
result = newTypeDecl(name, fields, defaults, isPrivate, token, pragmas, generics, nil, false, false)
if self.match(LeftBracket):
self.parseGenerics(result)
self.expect("=", "expecting '=' after type name")
case self.step().lexeme:
of "ref":
self.expect("object", "expecting 'object' after 'ref'")
result.isRef = true
of "enum":
result.isEnum = true
of "object":
discard # Default case
else:
self.error("invalid syntax")
if not result.isEnum and self.match("of"):
self.expect(Identifier, "expecting parent type name after 'of'")
result.parent = newIdentExpr(self.peek(-1))
self.expect(LeftBrace, "expecting '{' after type declaration")
if self.match(TokenType.Pragma):
for pragma in self.parsePragmas():
pragmas.add(pragma)
var
argName: IdentExpr
argPrivate: bool
argType: Expression
while not self.match(RightBrace) and not self.done():
self.expect(Identifier, "expecting field name")
argName = newIdentExpr(self.peek(-1), self.scopeDepth)
if not result.isEnum:
argPrivate = not self.match("*")
self.expect(":", "expecting ':' after field name")
argType = self.expression()
result.fields.add((argName, argType, argPrivate))
if self.match("="):
result.defaults.add(self.expression())
else:
result.fields.add((argName, nil, false))
if not result.isEnum:
self.expect(";", "expecting semicolon after type field declaration")
else:
if not self.check(RightBrace):
self.expect(",", "expecting comma after enum field declaration")
result.pragmas = pragmas
proc declaration(self: Parser): Declaration =
## Parses declarations
case self.peek().kind:
of TokenType.Var, Const, Let:
let keyword = self.step()
result = self.varDecl(isLet = keyword.kind == Let,
isConst = keyword.kind == Const)
of Function:
discard self.step()
result = self.funDecl()
of Coroutine:
discard self.step()
result = self.funDecl(isAsync=true)
of Generator:
discard self.step()
result = self.funDecl(isGenerator=true)
of Operator:
discard self.step()
result = self.funDecl(isOperator=true)
of TokenType.Pragma:
discard self.step()
for p in self.parsePragmas():
self.tree.add(p)
of Type:
discard self.step()
result = self.typeDecl()
of Comment:
discard self.step() # TODO: Docstrings and stuff
else:
result = Declaration(self.statement())
proc findOperators(self: Parser, tokens: seq[Token]) =
## Finds operators in a token stream
for i, token in tokens:
# We do a first pass over the tokens
# to find operators. Note that this
# relies on the lexer ending the input
# with an EOF token
if token.kind == Operator:
if i == tokens.high():
self.error("invalid state: found malformed tokenizer input while looking for operators (missing EOF)", token)
self.operators.addOperator(tokens[i + 1].lexeme)
if i == tokens.high() and token.kind != EndOfFile:
# Since we're iterating this list anyway we might as
# well perform some extra checks
self.error("invalid state: found malformed tokenizer input while looking for operators (missing EOF)", token)
proc parse*(self: Parser, tokens: seq[Token], file: string, lines: seq[tuple[start, stop: int]], source: string, persist: bool = false): seq[Declaration] =
## Parses a sequence of tokens into a sequence of AST nodes
self.tokens = tokens
self.file = file
self.source = source
self.lines = lines
self.current = 0
self.scopeDepth = 0
self.currentLoop = LoopContext.None
self.currentFunction = nil
self.tree = @[]
if not persist:
self.operators = newOperatorTable()
self.findOperators(tokens)
while not self.done():
self.tree.add(self.declaration())
if self.tree[^1] == nil:
self.tree.delete(self.tree.high())
result = self.tree