Added basic expressions to recursive descent parser

2021-08-20 18:47:30 +02:00 · 2021-08-20 18:47:30 +02:00 · 1b4e8d6fab
parent 5bf36e5f2d
commit 1b4e8d6fab
2 changed files with 312 additions and 8 deletions
--- a/src/backend/meta/ast.nim
+++ b/src/backend/meta/ast.nim
@ -29,13 +29,10 @@ type
        ## node types, sorted by
        ## precedence

-        program = 0u8,
        # Declarations
-        structDecl,
+        classDecl = 0u8,
        funDecl,
        varDecl,
-        # An expression followed by a semicolon
-        exprStmt,
        # Statements
        forStmt,
        ifStmt,
@ -44,12 +41,28 @@ type
        continueStmt,
        whileStmt,
        blockStmt,
+        raiseStmt,
+        fromStmt,
+        importStmt,
+        # An expression followed by a semicolon
+        exprStmt,
        # Expressions
        assignExpr,
+        setExpr,  # Set expressions like a.b = "c"
        binaryExpr,
        unaryExpr,
        callExpr,
-        primaryExpr
+        getExpr,  # Get expressions like a.b
+        # Primary expressions
+        groupingExpr,  # Parenthesized expressions such as (true)
+        trueExpr,
+        falseExpr,
+        strExpr,
+        intExpr,
+        floatExpr,
+        nilExpr,
+        nanExpr,
+        identExpr,   # Identifier


    ASTNode* = ref object
@ -74,4 +87,12 @@ proc newASTNode*(token: Token, kind: NodeKind, children: seq[ASTNode] = @[]): AS
    result.children = children


-proc `$`*(self: ASTNode): string = &"ASTNode(token={self.token}, kind={self.kind}, children=[{self.children.join(\", \")}])"
+proc `$`*(self: ASTNode): string = 
+    result &= "ASTNode("
+    if self.token.kind != TokenType.EndOfFile:
+        result &= &"token={self.token}, "
+    result &= &"kind={self.kind}"
+    if self.children.len() > 0:
+        result &= &", children=[{self.children.join(\", \")}]"
+    result &= ")"
+    
--- a/src/backend/parser.nim
+++ b/src/backend/parser.nim
@ -13,13 +13,15 @@
 # Dissemination of this information or reproduction of this material
 # is strictly forbidden unless prior written permission is obtained
 # from Mattia Giambirtone
+import strformat


 import meta/token
 import meta/ast

-export `$`
-export ast
+export token, ast
+
+


 type Parser* = ref object
@ -30,3 +32,284 @@ type Parser* = ref object
    errored*: bool
    errorMessage*: string
    tokens: seq[Token]
+
+
+proc initParser*(self: Parser = nil): Parser = 
+    ## Initializes a new Parser object
+    ## or resets an already existing one
+    if self != nil:
+        result = self
+    new(result)
+    result.current = 0
+    result.file = ""
+    result.errored = false
+    result.errorMessage = ""
+    result.tokens = @[]
+
+
+template endOfFile: Token = Token(kind: TokenType.EndOfFile, lexeme: "EOF", line: -1)
+
+
+proc peek(self: Parser, distance: int = 0): Token =
+    ## Peeks at the token at the given distance.
+    ## If the distance is out of bounds, an EOF
+    ## token is returned. A negative distance may
+    ## be used to retrieve previously consumed
+    ## tokens
+    if self.tokens.high() == -1 or self.current + distance > self.tokens.high():
+        result = endOfFile
+    else:
+        result = self.tokens[self.current + distance]
+
+
+proc done(self: Parser): bool =
+    ## Returns true if we're at the
+    ## end of the file. Note that the
+    ## parser expects an explicit
+    ## EOF token to signal the end
+    ## of the file (unless the token
+    ## list is empty)
+    result = self.peek().kind == TokenType.EndOfFile
+
+
+proc step(self: Parser, n: int = 1): Token = 
+    ## Steps n tokens into the input,
+    ## returning the last consumed one
+    if self.done():
+        result = self.peek()
+    else:
+        result = self.tokens[self.current]
+        self.current += 1
+
+
+proc error(self: Parser, message: string) =
+    ## Sets the appropriate error fields
+    ## in the parser. If an error already
+    ## occurred, this function is a no-op
+    if self.errored:
+        return
+    self.errored = true
+    self.errorMessage = &"A fatal error occurred while parsing '{self.file}', line {self.peek().line} at '{self.peek().lexeme}' -> {message}"
+    
+
+proc check(self: Parser, kind: TokenType, distance: int = 0): bool = 
+    ## Checks if the given token at the given distance
+    ## matches the expected kind and returns a boolean.
+    ## The distance parameter is passed directly to
+    ## self.peek()
+    self.peek(distance).kind == kind
+
+
+proc check(self: Parser, kind: openarray[TokenType]): bool =
+    ## Calls self.check() in a loop with each entry of
+    ## the given openarray of token kinds and returns
+    ## at the first match. Note that this assumes
+    ## that only one token may exist at a given
+    ## position
+    for k in kind:
+        if self.check(k):
+            return true
+    return false
+
+
+proc match(self: Parser, kind: TokenType, distance: int = 0): bool =
+    ## Behaves like self.check(), except that when a token
+    ## matches it is consumed
+    if self.check(kind, distance):
+        discard self.step()
+        result = true
+    else:
+        result = false
+
+
+proc match(self: Parser, kind: openarray[TokenType]): bool =
+    ## Calls self.match() in a loop with each entry of
+    ## the given openarray of token kinds and returns
+    ## at the first match. Note that this assumes
+    ## that only one token may exist at a given
+    ## position
+    for k in kind:
+        if self.match(k):
+            return true
+    result = false
+
+
+proc expect(self: Parser, kind: TokenType, message: string = ""): bool = 
+    ## Behaves like self.match(), except that
+    ## when a token doesn't match an error
+    ## is "raised". If no error message is
+    ## given, a default one is used
+    if self.match(kind):
+        result = true
+    else:
+        result = false
+        if message.len() == 0:
+            self.error(&"Expecting token of kind {kind}, found {self.peek().kind} instead")
+        else:
+            self.error(message)
+
+# Forward declaration
+proc expression(self: Parser): ASTNode
+
+
+proc primary(self: Parser): ASTNode = 
+    ## Parses primary expressions such
+    ## as integer literals and keywords
+    ## that map to types (true, false, etc)
+    
+    case self.peek().kind:
+        of TokenType.True:
+            result = newASTNode(self.step(), NodeKind.trueExpr)
+        of TokenType.False:
+            result = newASTNode(self.step(), NodeKind.falseExpr)
+        of TokenType.NaN:
+            result = newASTNode(self.step(), NodeKind.nanExpr)
+        of TokenType.Nil:
+            result = newASTNode(self.step(), NodeKind.nilExpr)
+        of TokenType.Float:
+            result = newASTNode(self.step(), NodeKind.floatExpr)
+        of TokenType.Integer:
+            result = newASTNode(self.step(), NodeKind.intExpr)
+        of TokenType.Identifier:
+            result = newASTNode(self.step(), NodeKind.identExpr)
+        of TokenType.LeftParen:
+            discard self.step()
+            var expression = self.expression()
+            if self.expect(TokenType.RightParen, "Unmatched '('"):
+                result = newASTNode(self.peek(-1), NodeKind.groupingExpr, @[expression])
+        else:
+            self.error("Invalid syntax")
+
+
+proc make_call(self: Parser, callee: ASTNode): ASTNode =
+    ## Utility function called iteratively by self.call()
+    ## to parse a function-like call
+    var arguments: seq[ASTNode] = @[]
+    arguments.add(callee)
+    if not self.check(TokenType.RightParen):
+        while true:
+            if len(arguments) >= 255:
+                self.error("Cannot have more than 255 arguments")
+                break
+            arguments.add(self.expression())
+            if not self.match(TokenType.Comma):
+                break
+    if self.expect(TokenType.RightParen):
+        result = newASTNode(self.peek(-1), NodeKind.callExpr, arguments)
+
+
+proc call(self: Parser): ASTNode = 
+    ## Parses call expressions and object
+    ## accessing ("dot syntax")
+    var expression = self.primary()
+    while true:
+        if self.match(TokenType.LeftParen):
+            expression = self.make_call(expression)
+        elif self.match(TokenType.Dot):
+            if self.expect(TokenType.Identifier, "Expecting attribute name after '.'"):
+                expression = newASTNode(self.peek(-2), NodeKind.getExpr, @[newAstNode(self.peek(-1), NodeKind.identExpr, @[expression])])
+        else:
+            break
+    result = expression
+
+
+proc unary(self: Parser): ASTNode = 
+    ## Parses unary expressions
+    if self.match([TokenType.Minus, TokenType.Tilde]):
+        result = newASTNode(self.peek(-1), NodeKind.unaryExpr, @[self.unary()])
+    else:
+        result = self.call()
+
+
+proc pow(self: Parser): ASTNode =
+    ## Parses exponentiation expressions
+    result = self.unary()
+    var operator: Token
+    var right: ASTNode
+    while self.match(TokenType.DoubleAsterisk):
+        operator = self.peek(-1)
+        right = self.unary()
+        result = newASTNode(operator, NodeKind.binaryExpr, @[result, right])
+
+
+proc mul(self: Parser): ASTNode =
+    ## Parses multiplication and division expressions
+    result = self.pow()
+    var operator: Token
+    var right: ASTNode
+    while self.match([TokenType.Slash, TokenType.Percentage, TokenType.FloorDiv]):
+        operator = self.peek(-1)
+        right = self.pow()
+        result = newASTNode(operator, NodeKind.binaryExpr, @[result, right])
+
+
+proc add(self: Parser): ASTNode =
+    ## Parses addition and subtraction expressions
+    result = self.mul()
+    var operator: Token
+    var right: ASTNode
+    while self.match([TokenType.Plus, TokenType.Minus]):
+        operator = self.peek(-1)
+        right = self.mul()
+        result = newASTNode(operator, NodeKind.binaryExpr, @[result, right])
+
+
+proc comparison(self: Parser): ASTNode =
+    ## Parses comparison expressions
+    result = self.add()
+    var operator: Token
+    var right: ASTNode
+    while self.match([TokenType.LessThan, TokenType.GreaterThan, TokenType.LessOrEqual, TokenType.GreaterOrEqual]):
+        operator = self.peek(-1)
+        right = self.add()
+        result = newASTNode(operator, NodeKind.binaryExpr, @[result, right])
+
+
+proc equality(self: Parser): ASTNode =
+    ## Parses equality expressions
+    result = self.comparison()
+    var operator: Token
+    var right: ASTNode
+    while self.match([TokenType.DoubleEqual, TokenType.NotEqual]):
+        operator = self.peek(-1)
+        right = self.comparison()
+        result = newASTNode(operator, NodeKind.binaryExpr, @[result, right])
+
+
+proc logical_and(self: Parser): ASTNode =
+    ## Parses logical AND expressions
+    result = self.equality()
+    var operator: Token
+    var right: ASTNode
+    while self.match(TokenType.LogicalAnd):
+        operator = self.peek(-1)
+        right = self.equality()
+        result = newASTNode(operator, NodeKind.binaryExpr, @[result, right])
+
+
+proc logical_or(self: Parser): ASTNode =
+    ## Parses logical OR expressions
+    result = self.logical_and()
+    var operator: Token
+    var right: ASTNode
+    while self.match(TokenType.LogicalOr):
+        operator = self.peek(-1)
+        right = self.logical_and()
+        result = newASTNode(operator, NodeKind.binaryExpr, @[result, right])
+
+
+proc expression(self: Parser): ASTNode = self.logical_or()
+
+
+proc parse*(self: Parser, tokens: seq[Token], file: string): seq[ASTNode] =
+    ## Parses a series of tokens into an AST node
+    discard self.initParser()
+    self.tokens = tokens
+    self.file = file
+    var program: seq[ASTNode] = @[]
+    while not self.done():
+        program.add(self.expression())
+        if self.errored:
+            program = @[]
+            break
+    result = program