350 lines
11 KiB
Nim
350 lines
11 KiB
Nim
# Copyright 2020 Mattia Giambirtone
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
import strformat
|
|
|
|
|
|
import meta/token
|
|
import meta/ast
|
|
|
|
export token, ast
|
|
|
|
|
|
|
|
|
|
type Parser* = ref object
|
|
## A recursive-descent top-down
|
|
## parser implementation
|
|
current*: int
|
|
file: string
|
|
errored*: bool
|
|
errorMessage*: string
|
|
tokens*: seq[Token]
|
|
|
|
|
|
proc initParser*(self: Parser = nil): Parser =
|
|
## Initializes a new Parser object
|
|
## or resets an already existing one
|
|
if self != nil:
|
|
result = self
|
|
new(result)
|
|
result.current = 0
|
|
result.file = ""
|
|
result.errored = false
|
|
result.errorMessage = ""
|
|
result.tokens = @[]
|
|
|
|
|
|
template endOfFile: Token = Token(kind: TokenType.EndOfFile, lexeme: "", line: -1)
|
|
|
|
|
|
proc peek(self: Parser, distance: int = 0): Token =
|
|
## Peeks at the token at the given distance.
|
|
## If the distance is out of bounds, an EOF
|
|
## token is returned. A negative distance may
|
|
## be used to retrieve previously consumed
|
|
## tokens
|
|
if self.tokens.high() == -1 or self.current + distance > self.tokens.high() or self.current + distance < 0:
|
|
result = endOfFile
|
|
else:
|
|
result = self.tokens[self.current + distance]
|
|
|
|
|
|
|
|
proc done(self: Parser): bool =
|
|
## Returns true if we're at the
|
|
## end of the file. Note that the
|
|
## parser expects an explicit
|
|
## EOF token to signal the end
|
|
## of the file (unless the token
|
|
## list is empty)
|
|
result = self.peek().kind == TokenType.EndOfFile
|
|
|
|
|
|
proc step(self: Parser, n: int = 1): Token =
|
|
## Steps n tokens into the input,
|
|
## returning the last consumed one
|
|
if self.done():
|
|
result = self.peek()
|
|
else:
|
|
result = self.tokens[self.current]
|
|
self.current += 1
|
|
|
|
|
|
proc error(self: Parser, message: string) =
|
|
## Sets the appropriate error fields
|
|
## in the parser. If an error already
|
|
## occurred, this function is a no-op
|
|
if self.errored:
|
|
return
|
|
self.errored = true
|
|
var lexeme = if not self.done(): self.peek().lexeme else: self.peek(-1).lexeme
|
|
self.errorMessage = &"A fatal error occurred while parsing '{self.file}', line {self.peek().line} at {lexeme} -> {message}"
|
|
|
|
|
|
proc check(self: Parser, kind: TokenType, distance: int = 0): bool =
|
|
## Checks if the given token at the given distance
|
|
## matches the expected kind and returns a boolean.
|
|
## The distance parameter is passed directly to
|
|
## self.peek()
|
|
self.peek(distance).kind == kind
|
|
|
|
|
|
proc check(self: Parser, kind: openarray[TokenType]): bool =
|
|
## Calls self.check() in a loop with each entry of
|
|
## the given openarray of token kinds and returns
|
|
## at the first match. Note that this assumes
|
|
## that only one token may exist at a given
|
|
## position
|
|
for k in kind:
|
|
if self.check(k):
|
|
return true
|
|
return false
|
|
|
|
|
|
proc match(self: Parser, kind: TokenType, distance: int = 0): bool =
|
|
## Behaves like self.check(), except that when a token
|
|
## matches it is consumed
|
|
if self.check(kind, distance):
|
|
discard self.step()
|
|
result = true
|
|
else:
|
|
result = false
|
|
|
|
|
|
proc match(self: Parser, kind: openarray[TokenType]): bool =
|
|
## Calls self.match() in a loop with each entry of
|
|
## the given openarray of token kinds and returns
|
|
## at the first match. Note that this assumes
|
|
## that only one token may exist at a given
|
|
## position
|
|
for k in kind:
|
|
if self.match(k):
|
|
return true
|
|
result = false
|
|
|
|
|
|
proc expect(self: Parser, kind: TokenType, message: string = ""): bool =
|
|
## Behaves like self.match(), except that
|
|
## when a token doesn't match an error
|
|
## is "raised". If no error message is
|
|
## given, a default one is used
|
|
if self.match(kind):
|
|
result = true
|
|
else:
|
|
result = false
|
|
if message.len() == 0:
|
|
self.error(&"Expecting token of kind {kind}, found {self.peek().kind} instead")
|
|
else:
|
|
self.error(message)
|
|
|
|
# Forward declaration
|
|
proc expression(self: Parser): ASTNode
|
|
|
|
|
|
proc primary(self: Parser): ASTNode =
|
|
## Parses primary expressions such
|
|
## as integer literals and keywords
|
|
## that map to types (true, false, etc)
|
|
|
|
case self.peek().kind:
|
|
of TokenType.True:
|
|
result = newASTNode(self.step(), NodeKind.trueExpr)
|
|
of TokenType.False:
|
|
result = newASTNode(self.step(), NodeKind.falseExpr)
|
|
of TokenType.NaN:
|
|
result = newASTNode(self.step(), NodeKind.nanExpr)
|
|
of TokenType.Nil:
|
|
result = newASTNode(self.step(), NodeKind.nilExpr)
|
|
of TokenType.Float:
|
|
result = newASTNode(self.step(), NodeKind.floatExpr)
|
|
of TokenType.Integer:
|
|
result = newASTNode(self.step(), NodeKind.intExpr)
|
|
of TokenType.Identifier:
|
|
result = newASTNode(self.step(), NodeKind.identExpr)
|
|
of TokenType.LeftParen:
|
|
discard self.step()
|
|
result = self.expression()
|
|
if self.expect(TokenType.RightParen, "Unmatched '('"):
|
|
result = newASTNode(self.peek(-3), NodeKind.groupingExpr, @[result])
|
|
of TokenType.RightParen:
|
|
self.error("Unmatched ')'")
|
|
of TokenType.Hex:
|
|
result = newASTNode(self.step(), NodeKind.hexExpr)
|
|
of TokenType.Octal:
|
|
result = newASTNode(self.step(), NodeKind.octExpr)
|
|
of TokenType.Binary:
|
|
result = newASTNode(self.step(), NodeKind.binExpr)
|
|
else:
|
|
self.error("Invalid syntax")
|
|
|
|
|
|
proc make_call(self: Parser, callee: ASTNode): ASTNode =
|
|
## Utility function called iteratively by self.call()
|
|
## to parse a function-like call
|
|
var arguments: seq[ASTNode] = @[callee]
|
|
if not self.check(TokenType.RightParen):
|
|
while true:
|
|
if len(arguments) >= 255:
|
|
self.error("Cannot have more than 255 arguments")
|
|
break
|
|
arguments.add(self.expression())
|
|
if not self.match(TokenType.Comma):
|
|
break
|
|
if self.expect(TokenType.RightParen):
|
|
result = newASTNode(self.peek(-1), NodeKind.callExpr, arguments)
|
|
|
|
|
|
proc call(self: Parser): ASTNode =
|
|
## Parses call expressions and object
|
|
## accessing ("dot syntax")
|
|
result = self.primary()
|
|
while true:
|
|
if self.match(TokenType.LeftParen):
|
|
result = self.make_call(result)
|
|
elif self.match(TokenType.Dot):
|
|
if self.expect(TokenType.Identifier, "Expecting attribute name after '.'"):
|
|
result = newASTNode(self.peek(-2), NodeKind.getExpr, @[result, newAstNode(self.peek(-1), NodeKind.identExpr)])
|
|
else:
|
|
break
|
|
|
|
|
|
proc unary(self: Parser): ASTNode =
|
|
## Parses unary expressions
|
|
if self.match([TokenType.Minus, TokenType.Tilde]):
|
|
result = newASTNode(self.peek(-1), NodeKind.unaryExpr, @[self.unary()])
|
|
else:
|
|
result = self.call()
|
|
|
|
|
|
proc pow(self: Parser): ASTNode =
|
|
## Parses exponentiation expressions
|
|
result = self.unary()
|
|
var operator: Token
|
|
var right: ASTNode
|
|
while self.match(TokenType.DoubleAsterisk):
|
|
operator = self.peek(-1)
|
|
right = self.unary()
|
|
result = newASTNode(operator, NodeKind.binaryExpr, @[result, right])
|
|
|
|
|
|
proc mul(self: Parser): ASTNode =
|
|
## Parses multiplication and division expressions
|
|
result = self.pow()
|
|
var operator: Token
|
|
var right: ASTNode
|
|
while self.match([TokenType.Slash, TokenType.Percentage, TokenType.FloorDiv]):
|
|
operator = self.peek(-1)
|
|
right = self.pow()
|
|
result = newASTNode(operator, NodeKind.binaryExpr, @[result, right])
|
|
|
|
|
|
proc add(self: Parser): ASTNode =
|
|
## Parses addition and subtraction expressions
|
|
result = self.mul()
|
|
var operator: Token
|
|
var right: ASTNode
|
|
while self.match([TokenType.Plus, TokenType.Minus]):
|
|
operator = self.peek(-1)
|
|
right = self.mul()
|
|
result = newASTNode(operator, NodeKind.binaryExpr, @[result, right])
|
|
|
|
|
|
proc comparison(self: Parser): ASTNode =
|
|
## Parses comparison expressions
|
|
result = self.add()
|
|
var operator: Token
|
|
var right: ASTNode
|
|
while self.match([TokenType.LessThan, TokenType.GreaterThan, TokenType.LessOrEqual, TokenType.GreaterOrEqual]):
|
|
operator = self.peek(-1)
|
|
right = self.add()
|
|
result = newASTNode(operator, NodeKind.binaryExpr, @[result, right])
|
|
|
|
|
|
proc equality(self: Parser): ASTNode =
|
|
## Parses equality expressions
|
|
result = self.comparison()
|
|
var operator: Token
|
|
var right: ASTNode
|
|
while self.match([TokenType.DoubleEqual, TokenType.NotEqual]):
|
|
operator = self.peek(-1)
|
|
right = self.comparison()
|
|
result = newASTNode(operator, NodeKind.binaryExpr, @[result, right])
|
|
|
|
|
|
proc logical_and(self: Parser): ASTNode =
|
|
## Parses logical AND expressions
|
|
result = self.equality()
|
|
var operator: Token
|
|
var right: ASTNode
|
|
while self.match(TokenType.LogicalAnd):
|
|
operator = self.peek(-1)
|
|
right = self.equality()
|
|
result = newASTNode(operator, NodeKind.binaryExpr, @[result, right])
|
|
|
|
|
|
proc logical_or(self: Parser): ASTNode =
|
|
## Parses logical OR expressions
|
|
result = self.logical_and()
|
|
var operator: Token
|
|
var right: ASTNode
|
|
while self.match(TokenType.LogicalOr):
|
|
operator = self.peek(-1)
|
|
right = self.logical_and()
|
|
result = newASTNode(operator, NodeKind.binaryExpr, @[result, right])
|
|
|
|
|
|
proc binary(self: Parser): ASTNode =
|
|
## Parses binary expressions
|
|
result = self.logical_or()
|
|
|
|
|
|
proc assignment(self: Parser): ASTNode =
|
|
## Parses assignment, the highest-level
|
|
## expression
|
|
result = self.binary()
|
|
if self.match(TokenType.Equal):
|
|
var tok = self.peek(-1)
|
|
var value = self.assignment()
|
|
if result.kind == NodeKind.identExpr:
|
|
result = newASTNode(tok, NodeKind.assignExpr, @[result, value])
|
|
elif result.kind == NodeKind.getExpr:
|
|
result = newASTNode(tok, NodeKind.setExpr, @[result.children[0], result.children[1], value])
|
|
|
|
|
|
proc expression(self: Parser): ASTNode =
|
|
## Parses expressions
|
|
self.assignment()
|
|
|
|
|
|
proc expressionStatement(self: Parser): ASTNode =
|
|
## Parses expression statements, which
|
|
## are expressions followed by a semicolon
|
|
var expression = self.expression()
|
|
discard self.expect(TokenType.Semicolon, "missing semicolon after expression")
|
|
result = newAstNode(self.peek(-1), NodeKind.exprStmt, @[expression])
|
|
|
|
|
|
proc parse*(self: Parser, tokens: seq[Token], file: string): seq[ASTNode] =
|
|
## Parses a series of tokens into an AST node
|
|
discard self.initParser()
|
|
self.tokens = tokens
|
|
self.file = file
|
|
var program: seq[ASTNode] = @[]
|
|
while not self.done():
|
|
program.add(self.expressionStatement())
|
|
if self.errored:
|
|
program = @[]
|
|
break
|
|
result = program
|