JustAnotherJAPL/src/backend/parser.nim

350 lines
11 KiB
Nim

# Copyright 2020 Mattia Giambirtone
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import strformat
import meta/token
import meta/ast
export token, ast
type Parser* = ref object
## A recursive-descent top-down
## parser implementation
current*: int
file: string
errored*: bool
errorMessage*: string
tokens*: seq[Token]
proc initParser*(self: Parser = nil): Parser =
## Initializes a new Parser object
## or resets an already existing one
if self != nil:
result = self
new(result)
result.current = 0
result.file = ""
result.errored = false
result.errorMessage = ""
result.tokens = @[]
template endOfFile: Token = Token(kind: TokenType.EndOfFile, lexeme: "", line: -1)
proc peek(self: Parser, distance: int = 0): Token =
## Peeks at the token at the given distance.
## If the distance is out of bounds, an EOF
## token is returned. A negative distance may
## be used to retrieve previously consumed
## tokens
if self.tokens.high() == -1 or self.current + distance > self.tokens.high() or self.current + distance < 0:
result = endOfFile
else:
result = self.tokens[self.current + distance]
proc done(self: Parser): bool =
## Returns true if we're at the
## end of the file. Note that the
## parser expects an explicit
## EOF token to signal the end
## of the file (unless the token
## list is empty)
result = self.peek().kind == TokenType.EndOfFile
proc step(self: Parser, n: int = 1): Token =
## Steps n tokens into the input,
## returning the last consumed one
if self.done():
result = self.peek()
else:
result = self.tokens[self.current]
self.current += 1
proc error(self: Parser, message: string) =
## Sets the appropriate error fields
## in the parser. If an error already
## occurred, this function is a no-op
if self.errored:
return
self.errored = true
var lexeme = if not self.done(): self.peek().lexeme else: self.peek(-1).lexeme
self.errorMessage = &"A fatal error occurred while parsing '{self.file}', line {self.peek().line} at {lexeme} -> {message}"
proc check(self: Parser, kind: TokenType, distance: int = 0): bool =
## Checks if the given token at the given distance
## matches the expected kind and returns a boolean.
## The distance parameter is passed directly to
## self.peek()
self.peek(distance).kind == kind
proc check(self: Parser, kind: openarray[TokenType]): bool =
## Calls self.check() in a loop with each entry of
## the given openarray of token kinds and returns
## at the first match. Note that this assumes
## that only one token may exist at a given
## position
for k in kind:
if self.check(k):
return true
return false
proc match(self: Parser, kind: TokenType, distance: int = 0): bool =
## Behaves like self.check(), except that when a token
## matches it is consumed
if self.check(kind, distance):
discard self.step()
result = true
else:
result = false
proc match(self: Parser, kind: openarray[TokenType]): bool =
## Calls self.match() in a loop with each entry of
## the given openarray of token kinds and returns
## at the first match. Note that this assumes
## that only one token may exist at a given
## position
for k in kind:
if self.match(k):
return true
result = false
proc expect(self: Parser, kind: TokenType, message: string = ""): bool =
## Behaves like self.match(), except that
## when a token doesn't match an error
## is "raised". If no error message is
## given, a default one is used
if self.match(kind):
result = true
else:
result = false
if message.len() == 0:
self.error(&"Expecting token of kind {kind}, found {self.peek().kind} instead")
else:
self.error(message)
# Forward declaration
proc expression(self: Parser): ASTNode
proc primary(self: Parser): ASTNode =
## Parses primary expressions such
## as integer literals and keywords
## that map to types (true, false, etc)
case self.peek().kind:
of TokenType.True:
result = newASTNode(self.step(), NodeKind.trueExpr)
of TokenType.False:
result = newASTNode(self.step(), NodeKind.falseExpr)
of TokenType.NaN:
result = newASTNode(self.step(), NodeKind.nanExpr)
of TokenType.Nil:
result = newASTNode(self.step(), NodeKind.nilExpr)
of TokenType.Float:
result = newASTNode(self.step(), NodeKind.floatExpr)
of TokenType.Integer:
result = newASTNode(self.step(), NodeKind.intExpr)
of TokenType.Identifier:
result = newASTNode(self.step(), NodeKind.identExpr)
of TokenType.LeftParen:
discard self.step()
result = self.expression()
if self.expect(TokenType.RightParen, "Unmatched '('"):
result = newASTNode(self.peek(-3), NodeKind.groupingExpr, @[result])
of TokenType.RightParen:
self.error("Unmatched ')'")
of TokenType.Hex:
result = newASTNode(self.step(), NodeKind.hexExpr)
of TokenType.Octal:
result = newASTNode(self.step(), NodeKind.octExpr)
of TokenType.Binary:
result = newASTNode(self.step(), NodeKind.binExpr)
else:
self.error("Invalid syntax")
proc make_call(self: Parser, callee: ASTNode): ASTNode =
## Utility function called iteratively by self.call()
## to parse a function-like call
var arguments: seq[ASTNode] = @[callee]
if not self.check(TokenType.RightParen):
while true:
if len(arguments) >= 255:
self.error("Cannot have more than 255 arguments")
break
arguments.add(self.expression())
if not self.match(TokenType.Comma):
break
if self.expect(TokenType.RightParen):
result = newASTNode(self.peek(-1), NodeKind.callExpr, arguments)
proc call(self: Parser): ASTNode =
## Parses call expressions and object
## accessing ("dot syntax")
result = self.primary()
while true:
if self.match(TokenType.LeftParen):
result = self.make_call(result)
elif self.match(TokenType.Dot):
if self.expect(TokenType.Identifier, "Expecting attribute name after '.'"):
result = newASTNode(self.peek(-2), NodeKind.getExpr, @[result, newAstNode(self.peek(-1), NodeKind.identExpr)])
else:
break
proc unary(self: Parser): ASTNode =
## Parses unary expressions
if self.match([TokenType.Minus, TokenType.Tilde]):
result = newASTNode(self.peek(-1), NodeKind.unaryExpr, @[self.unary()])
else:
result = self.call()
proc pow(self: Parser): ASTNode =
## Parses exponentiation expressions
result = self.unary()
var operator: Token
var right: ASTNode
while self.match(TokenType.DoubleAsterisk):
operator = self.peek(-1)
right = self.unary()
result = newASTNode(operator, NodeKind.binaryExpr, @[result, right])
proc mul(self: Parser): ASTNode =
## Parses multiplication and division expressions
result = self.pow()
var operator: Token
var right: ASTNode
while self.match([TokenType.Slash, TokenType.Percentage, TokenType.FloorDiv]):
operator = self.peek(-1)
right = self.pow()
result = newASTNode(operator, NodeKind.binaryExpr, @[result, right])
proc add(self: Parser): ASTNode =
## Parses addition and subtraction expressions
result = self.mul()
var operator: Token
var right: ASTNode
while self.match([TokenType.Plus, TokenType.Minus]):
operator = self.peek(-1)
right = self.mul()
result = newASTNode(operator, NodeKind.binaryExpr, @[result, right])
proc comparison(self: Parser): ASTNode =
## Parses comparison expressions
result = self.add()
var operator: Token
var right: ASTNode
while self.match([TokenType.LessThan, TokenType.GreaterThan, TokenType.LessOrEqual, TokenType.GreaterOrEqual]):
operator = self.peek(-1)
right = self.add()
result = newASTNode(operator, NodeKind.binaryExpr, @[result, right])
proc equality(self: Parser): ASTNode =
## Parses equality expressions
result = self.comparison()
var operator: Token
var right: ASTNode
while self.match([TokenType.DoubleEqual, TokenType.NotEqual]):
operator = self.peek(-1)
right = self.comparison()
result = newASTNode(operator, NodeKind.binaryExpr, @[result, right])
proc logical_and(self: Parser): ASTNode =
## Parses logical AND expressions
result = self.equality()
var operator: Token
var right: ASTNode
while self.match(TokenType.LogicalAnd):
operator = self.peek(-1)
right = self.equality()
result = newASTNode(operator, NodeKind.binaryExpr, @[result, right])
proc logical_or(self: Parser): ASTNode =
## Parses logical OR expressions
result = self.logical_and()
var operator: Token
var right: ASTNode
while self.match(TokenType.LogicalOr):
operator = self.peek(-1)
right = self.logical_and()
result = newASTNode(operator, NodeKind.binaryExpr, @[result, right])
proc binary(self: Parser): ASTNode =
## Parses binary expressions
result = self.logical_or()
proc assignment(self: Parser): ASTNode =
## Parses assignment, the highest-level
## expression
result = self.binary()
if self.match(TokenType.Equal):
var tok = self.peek(-1)
var value = self.assignment()
if result.kind == NodeKind.identExpr:
result = newASTNode(tok, NodeKind.assignExpr, @[result, value])
elif result.kind == NodeKind.getExpr:
result = newASTNode(tok, NodeKind.setExpr, @[result.children[0], result.children[1], value])
proc expression(self: Parser): ASTNode =
## Parses expressions
self.assignment()
proc expressionStatement(self: Parser): ASTNode =
## Parses expression statements, which
## are expressions followed by a semicolon
var expression = self.expression()
discard self.expect(TokenType.Semicolon, "missing semicolon after expression")
result = newAstNode(self.peek(-1), NodeKind.exprStmt, @[expression])
proc parse*(self: Parser, tokens: seq[Token], file: string): seq[ASTNode] =
## Parses a series of tokens into an AST node
discard self.initParser()
self.tokens = tokens
self.file = file
var program: seq[ASTNode] = @[]
while not self.done():
program.add(self.expressionStatement())
if self.errored:
program = @[]
break
result = program