Initial work on the new compiler module

2023-03-27 09:53:56 +02:00 · 2023-03-27 09:53:56 +02:00 · 8277472819
parent 32ae21d143
commit 8277472819
4 changed files with 1114 additions and 66 deletions
--- a/README.md
+++ b/README.md
@ -14,7 +14,8 @@ Peon is a multi-paradigm, statically-typed programming language inspired by C, N
 features such as automatic type inference, parametrically polymorphic generic types, pure functions, closures, interfaces, single inheritance,
 reference types, templates, coroutines, raw pointers and exceptions. 

-The memory management model is rather simple: a Mark and Sweep garbage collector is employed to reclaim unused memory. 
+The memory management model is rather simple: a Mark and Sweep garbage collector is employed to reclaim unused memory, although more garbage
+collection strategies (such as generational GC or deferred reference counting) are planned to be added in the future.

 Peon features a native cooperative concurrency model designed to take advantage of the inherent waiting of typical I/O workloads, without the use of more than one OS thread (wherever possible), allowing for much greater efficiency and a smaller memory footprint. The asynchronous model used forces developers to write code that is both easy to reason about, thanks to the  [Structured concurrency](https://vorpus.org/blog/notes-on-structured-concurrency-or-go-statement-considered-harmful/) model that is core to peon's async event loop implementation, and works as expected every time (without dropping signals, exceptions, or task return values). 

@ -41,7 +42,7 @@ will move through the input history (which is never reset). Also note that UTF-8
 ### TODO List

 In no particular order, here's a list of stuff that's done/to do (might be incomplete/out of date):
-  - User-defined types
+  - User-defined types 
  - Function calls ✅
  - Control flow (if-then-else, switch) ✅
  - Looping (while) ✅
@ -57,7 +58,6 @@ In no particular order, here's a list of stuff that's done/to do (might be incom
  - Named scopes/blocks ✅
  - Inheritance
  - Interfaces
-  - Indexing operator
  - Generics ✅
  - Automatic types ✅
  - Iterators/Generators
@ -76,12 +76,14 @@ In no particular order, here's a list of stuff that's done/to do (might be incom
 Here's a random list of high-level features I would like peon to have and that I think are kinda neat (some may 
 have been implemented alredady):
 - Reference types are not nullable by default (must use `#pragma[nullable]`)
+- The `commutative` pragma, which allows to define just one implementation of an operator
+  and have it become commutative
 - Easy C/Nim interop via FFI
 - C/C++ backend
 - Nim backend
 - [Structured concurrency](https://vorpus.org/blog/notes-on-structured-concurrency-or-go-statement-considered-harmful/) (must-have!)
 - Simple OOP (with multiple dispatch!)
- RTTI, with methods that dispatch at runtime based on the true type of a value
+- RTTI, with methods that dispatch at runtime based on the true (aka runtime) type of a value
 - Limited compile-time evaluation (embed the Peon VM in the C/C++/Nim backend and use that to execute peon code at compile time)


--- a/src/frontend/compiler/compiler.nim
+++ b/src/frontend/compiler/compiler.nim
@ -12,19 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

-# Copyright 2022 Mattia Giambirtone & All Contributors
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
+
 import std/tables
 import std/strformat
 import std/algorithm
--- a/src/frontend/compiler/newcompiler.nim
+++ b/src/frontend/compiler/newcompiler.nim
--- a/src/frontend/parsing/parser.nim
+++ b/src/frontend/parsing/parser.nim
@ -16,6 +16,7 @@

 import std/strformat
 import std/strutils
+import std/tables
 import std/os


@ -31,9 +32,6 @@ export token, ast, errors


 type
-
-    LoopContext {.pure.} = enum
-        Loop, None
    Precedence {.pure.} = enum
        ## Operator precedence
        ## clearly stolen from
@ -66,18 +64,16 @@ type
        # Only meaningful for parse errors
        file: string
        # The list of tokens representing
-        # the source code to be parsed.
-        # In most cases, those will come
-        # from the builtin lexer, but this
-        # behavior is not enforced and the
-        # tokenizer is entirely separate from
-        # the parser
+        # the source code to be parsed
        tokens: seq[Token]
-        # Little internal attribute that tells
-        # us if we're inside a loop or not. This
-        # allows us to detect errors like break
-        # being used outside loops
-        currentLoop: LoopContext
+        # Just like scope depth tells us how
+        # many nested scopes are above us, the
+        # loop depth tells us how many nested
+        # loops are above us. It's just a simple
+        # way of statically detecting stuff like
+        # the break statement being used outside
+        # loops. Maybe a bit overkill for a parser?
+        loopDepth: int
        # Stores the current function
        # being parsed. This is a reference
        # to either a FunDecl or LambdaExpr
@ -96,8 +92,13 @@ type
        lines: seq[tuple[start, stop: int]]
        # The source of the current module
        source: string
-        # Keeps track of imported modules
-        modules: seq[tuple[name: string, loaded: bool]]
+        # Keeps track of imported modules.
+        # The key is the module's fully qualified
+        # path, while the boolean indicates whether
+        # it has been fully loaded. This is useful
+        # to avoid importing a module twice and to
+        # detect recursive dependency cycles
+        modules: TableRef[string, bool]
    ParseError* = ref object of PeonException
        ## A parsing exception
        parser*: Parser
@ -140,7 +141,7 @@ proc newOperatorTable: OperatorTable =
    result.tokens = @[]
    for prec in Precedence:
        result.precedence[prec] = @[]
-    # These operators are currently not built-in
+    # These operators are currently hardcoded
    # due to compiler limitations
    result.addOperator("=")
    result.addOperator(".")
@ -161,11 +162,12 @@ proc newParser*: Parser =
    result.file = ""
    result.tokens = @[]
    result.currentFunction = nil
-    result.currentLoop = LoopContext.None
+    result.loopDepth = 0
    result.scopeDepth = 0
    result.operators = newOperatorTable()
    result.tree = @[]
    result.source = ""
+    result.modules = newTable[string, bool]()


 # Public getters for improved error formatting
@ -180,7 +182,7 @@ template endOfLine(msg: string, tok: Token = nil) = self.expect(Semicolon, msg,



-proc peek(self: Parser, distance: int = 0): Token =
+proc peek(self: Parser, distance: int = 0): Token {.inline.} =
    ## Peeks at the token at the given distance.
    ## If the distance is out of bounds, an EOF
    ## token is returned. A negative distance may
@ -201,7 +203,7 @@ proc done(self: Parser): bool {.inline.} =
    result = self.peek().kind == EndOfFile


-proc step(self: Parser, n: int = 1): Token =
+proc step(self: Parser, n: int = 1): Token {.inline.} =
    ## Steps n tokens into the input,
    ## returning the last consumed one
    if self.done():
@ -227,7 +229,7 @@ proc error(self: Parser, message: string, token: Token = nil) {.raises: [ParseEr
 # as a symbol and in the cases where we need a specific token we just match the string
 # directly
 proc check[T: TokenType or string](self: Parser, kind: T,
-        distance: int = 0): bool =
+        distance: int = 0): bool {.inline.} =
    ## Checks if the given token at the given distance
    ## matches the expected kind and returns a boolean.
    ## The distance parameter is passed directly to
@ -239,7 +241,7 @@ proc check[T: TokenType or string](self: Parser, kind: T,
            self.peek(distance).lexeme == kind


-proc check[T: TokenType or string](self: Parser, kind: openarray[T]): bool =
+proc check[T: TokenType or string](self: Parser, kind: openarray[T]): bool {.inline.} =
    ## Calls self.check() in a loop with each entry of
    ## the given openarray of token kinds and returns
    ## at the first match. Note that this assumes
@ -251,7 +253,7 @@ proc check[T: TokenType or string](self: Parser, kind: openarray[T]): bool =
    return false


-proc match[T: TokenType or string](self: Parser, kind: T): bool =
+proc match[T: TokenType or string](self: Parser, kind: T): bool {.inline.} =
    ## Behaves like self.check(), except that when a token
    ## matches it is also consumed
    if self.check(kind):
@ -261,7 +263,7 @@ proc match[T: TokenType or string](self: Parser, kind: T): bool =
        result = false


-proc match[T: TokenType or string](self: Parser, kind: openarray[T]): bool =
+proc match[T: TokenType or string](self: Parser, kind: openarray[T]): bool {.inline.} =
    ## Calls self.match() in a loop with each entry of
    ## the given openarray of token kinds and returns
    ## at the first match. Note that this assumes
@ -273,7 +275,7 @@ proc match[T: TokenType or string](self: Parser, kind: openarray[T]): bool =
    result = false


-proc expect[T: TokenType or string](self: Parser, kind: T, message: string = "", token: Token = nil) =
+proc expect[T: TokenType or string](self: Parser, kind: T, message: string = "", token: Token = nil) {.inline.} =
    ## Behaves like self.match(), except that
    ## when a token doesn't match, an error
    ## is raised. If no error message is
@ -285,7 +287,7 @@ proc expect[T: TokenType or string](self: Parser, kind: T, message: string = "",
            self.error(message)


-proc expect[T: TokenType or string](self: Parser, kind: openarray[T], message: string = "", token: Token = nil) {.used.} =
+proc expect[T: TokenType or string](self: Parser, kind: openarray[T], message: string = "", token: Token = nil) {.inline, used.} =
    ## Behaves like self.expect(), except that
    ## an error is raised only if none of the
    ## given token kinds matches
@ -307,6 +309,7 @@ proc funDecl(self: Parser, isAsync: bool = false, isGenerator: bool = false,
        isLambda: bool = false, isOperator: bool = false, isTemplate: bool = false): Declaration
 proc declaration(self: Parser): Declaration
 proc parse*(self: Parser, tokens: seq[Token], file: string, lines: seq[tuple[start, stop: int]], source: string, persist: bool = false): seq[Declaration]
+proc findOperators(self: Parser, tokens: seq[Token])
 # End of forward declarations


@ -436,7 +439,7 @@ proc makeCall(self: Parser, callee: Expression): CallExpr =
 proc parseGenericArgs(self: Parser) =
    ## Parses function generic arguments
    ## like function[type](arg)
-    discard
+    discard  # TODO


 proc call(self: Parser): Expression =
@ -596,12 +599,12 @@ proc assertStmt(self: Parser): Statement =
    result.file = self.file


-proc beginScope(self: Parser) =
+proc beginScope(self: Parser) {.inline.} =
    ## Begins a new lexical scope
    inc(self.scopeDepth)


-proc endScope(self: Parser) =
+proc endScope(self: Parser) {.inline.} =
    ## Ends a new lexical scope
    dec(self.scopeDepth)

@ -631,8 +634,7 @@ proc namedBlockStmt(self: Parser): Statement =
    self.expect(Identifier, "expecting block name after 'block'")
    var name = newIdentExpr(self.peek(-1), self.scopeDepth)
    name.file = self.file
-    let enclosingLoop = self.currentLoop
-    self.currentLoop = Loop
+    inc(self.loopDepth)
    self.expect(LeftBrace, "expecting '{' after 'block'")
    while not self.check(RightBrace) and not self.done():
        code.add(self.declaration())
@ -642,14 +644,14 @@ proc namedBlockStmt(self: Parser): Statement =
    result = newNamedBlockStmt(code, name, tok)
    result.file = self.file
    self.endScope()
-    self.currentLoop = enclosingLoop
+    dec(self.loopDepth)


 proc breakStmt(self: Parser): Statement =
    ## Parses break statements
    let tok = self.peek(-1)
    var label: IdentExpr
-    if self.currentLoop != Loop:
+    if self.loopDepth == 0:
        self.error("'break' cannot be used outside loops")
    if self.match(Identifier):
        label = newIdentExpr(self.peek(-1), self.scopeDepth)
@ -673,7 +675,7 @@ proc continueStmt(self: Parser): Statement =
    ## Parses continue statements
    let tok = self.peek(-1)
    var label: IdentExpr
-    if self.currentLoop != Loop:
+    if self.loopDepth == 0:
        self.error("'continue' cannot be used outside loops")
    if self.match(Identifier):
        label = newIdentExpr(self.peek(-1), self.scopeDepth)
@ -747,8 +749,7 @@ proc raiseStmt(self: Parser): Statement =
 proc forEachStmt(self: Parser): Statement =
    ## Parses C#-like foreach loops
    let tok = self.peek(-1)
-    let enclosingLoop = self.currentLoop
-    self.currentLoop = Loop
+    inc(self.loopDepth)
    self.expect(Identifier)
    let identifier = newIdentExpr(self.peek(-1), self.scopeDepth)
    self.expect("in")
@ -756,10 +757,7 @@ proc forEachStmt(self: Parser): Statement =
    self.expect(LeftBrace)
    result = newForEachStmt(identifier, expression, self.blockStmt(), tok)
    result.file = self.file
-    self.currentLoop = enclosingLoop
-
-
-proc findOperators(self: Parser, tokens: seq[Token])
+    dec(self.loopDepth)


 proc importStmt(self: Parser, fromStmt: bool = false): Statement =
@ -806,6 +804,10 @@ proc importStmt(self: Parser, fromStmt: bool = false): Statement =
            break
        elif i == searchPath.high():
            self.error(&"""could not import '{path}': module not found""")
+    if not self.modules.getOrDefault(path, true):
+        self.error(&"coult not import '{path}' (recursive dependency detected)")
+    else:
+        self.modules[path] = false
    try:
        var source = readFile(path)
        var tree = self.tree
@ -819,6 +821,8 @@ proc importStmt(self: Parser, fromStmt: bool = false): Statement =
        self.tree = tree
        self.current = current
        self.tokens = tokens
+        # Module has been fully loaded and can now be used
+        self.modules[path] = true
    except IOError:
        self.error(&"could not import '{path}': {getCurrentExceptionMsg()}")
    except OSError:
@ -859,14 +863,13 @@ proc whileStmt(self: Parser): Statement =
    ## Parses a C-style while loop statement
    let tok = self.peek(-1)
    self.beginScope()
-    let enclosingLoop = self.currentLoop
+    inc(self.loopDepth)
    let condition = self.expression()
    self.expect(LeftBrace)
-    self.currentLoop = Loop
    result = newWhileStmt(condition, self.blockStmt(), tok)
    result.file = self.file
-    self.currentLoop = enclosingLoop
    self.endScope()
+    dec(self.loopDepth)


 proc ifStmt(self: Parser): Statement =
@ -1049,7 +1052,7 @@ proc parseFunExpr(self: Parser): LambdaExpr =


 proc parseGenericConstraint(self: Parser): Expression =
-    ## Recursivelt parses a generic constraint
+    ## Recursively parses a generic constraint
    ## and returns it as an expression
    result = self.expression()  # First value is always an identifier of some sort
    if not self.check(RightBracket):
@ -1301,6 +1304,7 @@ proc typeDecl(self: Parser): TypeDecl =
    var generics: seq[tuple[name: IdentExpr, cond: Expression]] = @[]
    var pragmas: seq[Pragma] = @[]
    result = newTypeDecl(name, fields, defaults, isPrivate, token, pragmas, generics, nil, false, false)
+    result.file = self.file
    if self.match(LeftBracket):
        self.parseGenerics(result)
    self.expect("=", "expecting '=' after type name")
@ -1315,7 +1319,6 @@ proc typeDecl(self: Parser): TypeDecl =
            result.isEnum = true
        of "object":
            discard self.step()
-            discard  # Default case
        else:
            hasNone = true
    if hasNone:
@ -1334,7 +1337,7 @@ proc typeDecl(self: Parser): TypeDecl =
        self.expect(LeftBrace, "expecting '{' after type declaration")
        if self.match(TokenType.Pragma):
            for pragma in self.parsePragmas():
-                pragmas.add(pragma)
+                result.pragmas.add(pragma)
        var 
            argName: IdentExpr
            argPrivate: bool
@ -1356,8 +1359,6 @@ proc typeDecl(self: Parser): TypeDecl =
            else:
                if not self.check(RightBrace):
                    self.expect(",", "expecting comma after enum field declaration")
-    result.pragmas = pragmas
-    result.file = self.file


 proc declaration(self: Parser): Declaration =
@ -1420,11 +1421,12 @@ proc parse*(self: Parser, tokens: seq[Token], file: string, lines: seq[tuple[sta
    self.lines = lines
    self.current = 0
    self.scopeDepth = 0
-    self.currentLoop = LoopContext.None
+    self.loopDepth = 0
    self.currentFunction = nil
    self.tree = @[]
    if not persist:
        self.operators = newOperatorTable()
+        self.modules = newTable[string, bool]()
    self.findOperators(tokens)
    while not self.done():
        self.tree.add(self.declaration())