Initial work on the new compiler module

2023-03-27 09:53:56 +02:00 · 2023-03-27 09:53:56 +02:00 · 8277472819
parent 32ae21d143
commit 8277472819
4 changed files with 1114 additions and 66 deletions
--- a/README.md
+++ b/README.md
@ -14,7 +14,8 @@ Peon is a multi-paradigm, statically-typed programming language inspired by C, N
 features such as automatic type inference, parametrically polymorphic generic types, pure functions, closures, interfaces, single inheritance,
 reference types, templates, coroutines, raw pointers and exceptions. 
-The memory management model is rather simple: a Mark and Sweep garbage collector is employed to reclaim unused memory. 
+The memory management model is rather simple: a Mark and Sweep garbage collector is employed to reclaim unused memory, although more garbage
 collection strategies (such as generational GC or deferred reference counting) are planned to be added in the future.
 Peon features a native cooperative concurrency model designed to take advantage of the inherent waiting of typical I/O workloads, without the use of more than one OS thread (wherever possible), allowing for much greater efficiency and a smaller memory footprint. The asynchronous model used forces developers to write code that is both easy to reason about, thanks to the  [Structured concurrency](https://vorpus.org/blog/notes-on-structured-concurrency-or-go-statement-considered-harmful/) model that is core to peon's async event loop implementation, and works as expected every time (without dropping signals, exceptions, or task return values). 
@ -41,7 +42,7 @@ will move through the input history (which is never reset). Also note that UTF-8
 ### TODO List
 In no particular order, here's a list of stuff that's done/to do (might be incomplete/out of date):
-  - User-defined types
+  - User-defined types 
  - Function calls ✅
  - Control flow (if-then-else, switch) ✅
  - Looping (while) ✅
@ -57,7 +58,6 @@ In no particular order, here's a list of stuff that's done/to do (might be incom
  - Named scopes/blocks ✅
  - Inheritance
  - Interfaces
  - Indexing operator
  - Generics ✅
  - Automatic types ✅
  - Iterators/Generators
@ -76,12 +76,14 @@ In no particular order, here's a list of stuff that's done/to do (might be incom
 Here's a random list of high-level features I would like peon to have and that I think are kinda neat (some may 
 have been implemented alredady):
 - Reference types are not nullable by default (must use `#pragma[nullable]`)
 - The `commutative` pragma, which allows to define just one implementation of an operator
  and have it become commutative
 - Easy C/Nim interop via FFI
 - C/C++ backend
 - Nim backend
 - [Structured concurrency](https://vorpus.org/blog/notes-on-structured-concurrency-or-go-statement-considered-harmful/) (must-have!)
 - Simple OOP (with multiple dispatch!)
- RTTI, with methods that dispatch at runtime based on the true type of a value
+- RTTI, with methods that dispatch at runtime based on the true (aka runtime) type of a value
 - Limited compile-time evaluation (embed the Peon VM in the C/C++/Nim backend and use that to execute peon code at compile time)
--- a/src/frontend/compiler/compiler.nim
+++ b/src/frontend/compiler/compiler.nim
@ -12,19 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# Copyright 2022 Mattia Giambirtone & All Contributors
+
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #    http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import std/tables
 import std/strformat
 import std/algorithm
--- a/src/frontend/compiler/newcompiler.nim
+++ b/src/frontend/compiler/newcompiler.nim
--- a/src/frontend/parsing/parser.nim
+++ b/src/frontend/parsing/parser.nim
@ -16,6 +16,7 @@
 import std/strformat
 import std/strutils
 import std/tables
 import std/os
@ -31,9 +32,6 @@ export token, ast, errors
 type
    LoopContext {.pure.} = enum
        Loop, None
    Precedence {.pure.} = enum
        ## Operator precedence
        ## clearly stolen from
@ -66,18 +64,16 @@ type
        # Only meaningful for parse errors
        file: string
        # The list of tokens representing
-        # the source code to be parsed.
+        # the source code to be parsed
        # In most cases, those will come
        # from the builtin lexer, but this
        # behavior is not enforced and the
        # tokenizer is entirely separate from
        # the parser
        tokens: seq[Token]
-        # Little internal attribute that tells
+        # Just like scope depth tells us how
-        # us if we're inside a loop or not. This
+        # many nested scopes are above us, the
-        # allows us to detect errors like break
+        # loop depth tells us how many nested
-        # being used outside loops
+        # loops are above us. It's just a simple
-        currentLoop: LoopContext
+        # way of statically detecting stuff like
        # the break statement being used outside
        # loops. Maybe a bit overkill for a parser?
        loopDepth: int
        # Stores the current function
        # being parsed. This is a reference
        # to either a FunDecl or LambdaExpr
@ -96,8 +92,13 @@ type
        lines: seq[tuple[start, stop: int]]
        # The source of the current module
        source: string
-        # Keeps track of imported modules
+        # Keeps track of imported modules.
-        modules: seq[tuple[name: string, loaded: bool]]
+        # The key is the module's fully qualified
        # path, while the boolean indicates whether
        # it has been fully loaded. This is useful
        # to avoid importing a module twice and to
        # detect recursive dependency cycles
        modules: TableRef[string, bool]
    ParseError* = ref object of PeonException
        ## A parsing exception
        parser*: Parser
@ -140,7 +141,7 @@ proc newOperatorTable: OperatorTable =
    result.tokens = @[]
    for prec in Precedence:
        result.precedence[prec] = @[]
-    # These operators are currently not built-in
+    # These operators are currently hardcoded
    # due to compiler limitations
    result.addOperator("=")
    result.addOperator(".")
@ -161,11 +162,12 @@ proc newParser*: Parser =
    result.file = ""
    result.tokens = @[]
    result.currentFunction = nil
-    result.currentLoop = LoopContext.None
+    result.loopDepth = 0
    result.scopeDepth = 0
    result.operators = newOperatorTable()
    result.tree = @[]
    result.source = ""
    result.modules = newTable[string, bool]()
 # Public getters for improved error formatting
@ -180,7 +182,7 @@ template endOfLine(msg: string, tok: Token = nil) = self.expect(Semicolon, msg,
-proc peek(self: Parser, distance: int = 0): Token =
+proc peek(self: Parser, distance: int = 0): Token {.inline.} =
    ## Peeks at the token at the given distance.
    ## If the distance is out of bounds, an EOF
    ## token is returned. A negative distance may
@ -201,7 +203,7 @@ proc done(self: Parser): bool {.inline.} =
    result = self.peek().kind == EndOfFile
-proc step(self: Parser, n: int = 1): Token =
+proc step(self: Parser, n: int = 1): Token {.inline.} =
    ## Steps n tokens into the input,
    ## returning the last consumed one
    if self.done():
@ -227,7 +229,7 @@ proc error(self: Parser, message: string, token: Token = nil) {.raises: [ParseEr
 # as a symbol and in the cases where we need a specific token we just match the string
 # directly
 proc check[T: TokenType or string](self: Parser, kind: T,
-        distance: int = 0): bool =
+        distance: int = 0): bool {.inline.} =
    ## Checks if the given token at the given distance
    ## matches the expected kind and returns a boolean.
    ## The distance parameter is passed directly to
@ -239,7 +241,7 @@ proc check[T: TokenType or string](self: Parser, kind: T,
            self.peek(distance).lexeme == kind
-proc check[T: TokenType or string](self: Parser, kind: openarray[T]): bool =
+proc check[T: TokenType or string](self: Parser, kind: openarray[T]): bool {.inline.} =
    ## Calls self.check() in a loop with each entry of
    ## the given openarray of token kinds and returns
    ## at the first match. Note that this assumes
@ -251,7 +253,7 @@ proc check[T: TokenType or string](self: Parser, kind: openarray[T]): bool =
    return false
-proc match[T: TokenType or string](self: Parser, kind: T): bool =
+proc match[T: TokenType or string](self: Parser, kind: T): bool {.inline.} =
    ## Behaves like self.check(), except that when a token
    ## matches it is also consumed
    if self.check(kind):
@ -261,7 +263,7 @@ proc match[T: TokenType or string](self: Parser, kind: T): bool =
        result = false
-proc match[T: TokenType or string](self: Parser, kind: openarray[T]): bool =
+proc match[T: TokenType or string](self: Parser, kind: openarray[T]): bool {.inline.} =
    ## Calls self.match() in a loop with each entry of
    ## the given openarray of token kinds and returns
    ## at the first match. Note that this assumes
@ -273,7 +275,7 @@ proc match[T: TokenType or string](self: Parser, kind: openarray[T]): bool =
    result = false
-proc expect[T: TokenType or string](self: Parser, kind: T, message: string = "", token: Token = nil) =
+proc expect[T: TokenType or string](self: Parser, kind: T, message: string = "", token: Token = nil) {.inline.} =
    ## Behaves like self.match(), except that
    ## when a token doesn't match, an error
    ## is raised. If no error message is
@ -285,7 +287,7 @@ proc expect[T: TokenType or string](self: Parser, kind: T, message: string = "",
            self.error(message)
-proc expect[T: TokenType or string](self: Parser, kind: openarray[T], message: string = "", token: Token = nil) {.used.} =
+proc expect[T: TokenType or string](self: Parser, kind: openarray[T], message: string = "", token: Token = nil) {.inline, used.} =
    ## Behaves like self.expect(), except that
    ## an error is raised only if none of the
    ## given token kinds matches
@ -307,6 +309,7 @@ proc funDecl(self: Parser, isAsync: bool = false, isGenerator: bool = false,
        isLambda: bool = false, isOperator: bool = false, isTemplate: bool = false): Declaration
 proc declaration(self: Parser): Declaration
 proc parse*(self: Parser, tokens: seq[Token], file: string, lines: seq[tuple[start, stop: int]], source: string, persist: bool = false): seq[Declaration]
 proc findOperators(self: Parser, tokens: seq[Token])
 # End of forward declarations
@ -436,7 +439,7 @@ proc makeCall(self: Parser, callee: Expression): CallExpr =
 proc parseGenericArgs(self: Parser) =
    ## Parses function generic arguments
    ## like function[type](arg)
-    discard
+    discard  # TODO
 proc call(self: Parser): Expression =
@ -596,12 +599,12 @@ proc assertStmt(self: Parser): Statement =
    result.file = self.file
-proc beginScope(self: Parser) =
+proc beginScope(self: Parser) {.inline.} =
    ## Begins a new lexical scope
    inc(self.scopeDepth)
-proc endScope(self: Parser) =
+proc endScope(self: Parser) {.inline.} =
    ## Ends a new lexical scope
    dec(self.scopeDepth)
@ -631,8 +634,7 @@ proc namedBlockStmt(self: Parser): Statement =
    self.expect(Identifier, "expecting block name after 'block'")
    var name = newIdentExpr(self.peek(-1), self.scopeDepth)
    name.file = self.file
-    let enclosingLoop = self.currentLoop
+    inc(self.loopDepth)
    self.currentLoop = Loop
    self.expect(LeftBrace, "expecting '{' after 'block'")
    while not self.check(RightBrace) and not self.done():
        code.add(self.declaration())
@ -642,14 +644,14 @@ proc namedBlockStmt(self: Parser): Statement =
    result = newNamedBlockStmt(code, name, tok)
    result.file = self.file
    self.endScope()
-    self.currentLoop = enclosingLoop
+    dec(self.loopDepth)
 proc breakStmt(self: Parser): Statement =
    ## Parses break statements
    let tok = self.peek(-1)
    var label: IdentExpr
-    if self.currentLoop != Loop:
+    if self.loopDepth == 0:
        self.error("'break' cannot be used outside loops")
    if self.match(Identifier):
        label = newIdentExpr(self.peek(-1), self.scopeDepth)
@ -673,7 +675,7 @@ proc continueStmt(self: Parser): Statement =
    ## Parses continue statements
    let tok = self.peek(-1)
    var label: IdentExpr
-    if self.currentLoop != Loop:
+    if self.loopDepth == 0:
        self.error("'continue' cannot be used outside loops")
    if self.match(Identifier):
        label = newIdentExpr(self.peek(-1), self.scopeDepth)
@ -747,8 +749,7 @@ proc raiseStmt(self: Parser): Statement =
 proc forEachStmt(self: Parser): Statement =
    ## Parses C#-like foreach loops
    let tok = self.peek(-1)
-    let enclosingLoop = self.currentLoop
+    inc(self.loopDepth)
    self.currentLoop = Loop
    self.expect(Identifier)
    let identifier = newIdentExpr(self.peek(-1), self.scopeDepth)
    self.expect("in")
@ -756,10 +757,7 @@ proc forEachStmt(self: Parser): Statement =
    self.expect(LeftBrace)
    result = newForEachStmt(identifier, expression, self.blockStmt(), tok)
    result.file = self.file
-    self.currentLoop = enclosingLoop
+    dec(self.loopDepth)
 proc findOperators(self: Parser, tokens: seq[Token])
 proc importStmt(self: Parser, fromStmt: bool = false): Statement =
@ -806,6 +804,10 @@ proc importStmt(self: Parser, fromStmt: bool = false): Statement =
            break
        elif i == searchPath.high():
            self.error(&"""could not import '{path}': module not found""")
    if not self.modules.getOrDefault(path, true):
        self.error(&"coult not import '{path}' (recursive dependency detected)")
    else:
        self.modules[path] = false
    try:
        var source = readFile(path)
        var tree = self.tree
@ -819,6 +821,8 @@ proc importStmt(self: Parser, fromStmt: bool = false): Statement =
        self.tree = tree
        self.current = current
        self.tokens = tokens
        # Module has been fully loaded and can now be used
        self.modules[path] = true
    except IOError:
        self.error(&"could not import '{path}': {getCurrentExceptionMsg()}")
    except OSError:
@ -859,14 +863,13 @@ proc whileStmt(self: Parser): Statement =
    ## Parses a C-style while loop statement
    let tok = self.peek(-1)
    self.beginScope()
-    let enclosingLoop = self.currentLoop
+    inc(self.loopDepth)
    let condition = self.expression()
    self.expect(LeftBrace)
    self.currentLoop = Loop
    result = newWhileStmt(condition, self.blockStmt(), tok)
    result.file = self.file
    self.currentLoop = enclosingLoop
    self.endScope()
    dec(self.loopDepth)
 proc ifStmt(self: Parser): Statement =
@ -1049,7 +1052,7 @@ proc parseFunExpr(self: Parser): LambdaExpr =
 proc parseGenericConstraint(self: Parser): Expression =
-    ## Recursivelt parses a generic constraint
+    ## Recursively parses a generic constraint
    ## and returns it as an expression
    result = self.expression()  # First value is always an identifier of some sort
    if not self.check(RightBracket):
@ -1301,6 +1304,7 @@ proc typeDecl(self: Parser): TypeDecl =
    var generics: seq[tuple[name: IdentExpr, cond: Expression]] = @[]
    var pragmas: seq[Pragma] = @[]
    result = newTypeDecl(name, fields, defaults, isPrivate, token, pragmas, generics, nil, false, false)
    result.file = self.file
    if self.match(LeftBracket):
        self.parseGenerics(result)
    self.expect("=", "expecting '=' after type name")
@ -1315,7 +1319,6 @@ proc typeDecl(self: Parser): TypeDecl =
            result.isEnum = true
        of "object":
            discard self.step()
            discard  # Default case
        else:
            hasNone = true
    if hasNone:
@ -1334,7 +1337,7 @@ proc typeDecl(self: Parser): TypeDecl =
        self.expect(LeftBrace, "expecting '{' after type declaration")
        if self.match(TokenType.Pragma):
            for pragma in self.parsePragmas():
-                pragmas.add(pragma)
+                result.pragmas.add(pragma)
        var 
            argName: IdentExpr
            argPrivate: bool
@ -1356,8 +1359,6 @@ proc typeDecl(self: Parser): TypeDecl =
            else:
                if not self.check(RightBrace):
                    self.expect(",", "expecting comma after enum field declaration")
    result.pragmas = pragmas
    result.file = self.file
 proc declaration(self: Parser): Declaration =
@ -1420,11 +1421,12 @@ proc parse*(self: Parser, tokens: seq[Token], file: string, lines: seq[tuple[sta
    self.lines = lines
    self.current = 0
    self.scopeDepth = 0
-    self.currentLoop = LoopContext.None
+    self.loopDepth = 0
    self.currentFunction = nil
    self.tree = @[]
    if not persist:
        self.operators = newOperatorTable()
        self.modules = newTable[string, bool]()
    self.findOperators(tokens)
    while not self.done():
        self.tree.add(self.declaration())