Initial work on the new compiler module

This commit is contained in:
Mattia Giambirtone 2023-03-27 09:53:56 +02:00
parent 32ae21d143
commit 8277472819
Signed by: nocturn9x
GPG Key ID: 8270F9F467971E59
4 changed files with 1114 additions and 66 deletions

View File

@ -14,7 +14,8 @@ Peon is a multi-paradigm, statically-typed programming language inspired by C, N
features such as automatic type inference, parametrically polymorphic generic types, pure functions, closures, interfaces, single inheritance, features such as automatic type inference, parametrically polymorphic generic types, pure functions, closures, interfaces, single inheritance,
reference types, templates, coroutines, raw pointers and exceptions. reference types, templates, coroutines, raw pointers and exceptions.
The memory management model is rather simple: a Mark and Sweep garbage collector is employed to reclaim unused memory. The memory management model is rather simple: a Mark and Sweep garbage collector is employed to reclaim unused memory, although more garbage
collection strategies (such as generational GC or deferred reference counting) are planned to be added in the future.
Peon features a native cooperative concurrency model designed to take advantage of the inherent waiting of typical I/O workloads, without the use of more than one OS thread (wherever possible), allowing for much greater efficiency and a smaller memory footprint. The asynchronous model used forces developers to write code that is both easy to reason about, thanks to the [Structured concurrency](https://vorpus.org/blog/notes-on-structured-concurrency-or-go-statement-considered-harmful/) model that is core to peon's async event loop implementation, and works as expected every time (without dropping signals, exceptions, or task return values). Peon features a native cooperative concurrency model designed to take advantage of the inherent waiting of typical I/O workloads, without the use of more than one OS thread (wherever possible), allowing for much greater efficiency and a smaller memory footprint. The asynchronous model used forces developers to write code that is both easy to reason about, thanks to the [Structured concurrency](https://vorpus.org/blog/notes-on-structured-concurrency-or-go-statement-considered-harmful/) model that is core to peon's async event loop implementation, and works as expected every time (without dropping signals, exceptions, or task return values).
@ -41,7 +42,7 @@ will move through the input history (which is never reset). Also note that UTF-8
### TODO List ### TODO List
In no particular order, here's a list of stuff that's done/to do (might be incomplete/out of date): In no particular order, here's a list of stuff that's done/to do (might be incomplete/out of date):
- User-defined types - User-defined types
- Function calls ✅ - Function calls ✅
- Control flow (if-then-else, switch) ✅ - Control flow (if-then-else, switch) ✅
- Looping (while) ✅ - Looping (while) ✅
@ -57,7 +58,6 @@ In no particular order, here's a list of stuff that's done/to do (might be incom
- Named scopes/blocks ✅ - Named scopes/blocks ✅
- Inheritance - Inheritance
- Interfaces - Interfaces
- Indexing operator
- Generics ✅ - Generics ✅
- Automatic types ✅ - Automatic types ✅
- Iterators/Generators - Iterators/Generators
@ -76,12 +76,14 @@ In no particular order, here's a list of stuff that's done/to do (might be incom
Here's a random list of high-level features I would like peon to have and that I think are kinda neat (some may Here's a random list of high-level features I would like peon to have and that I think are kinda neat (some may
have been implemented alredady): have been implemented alredady):
- Reference types are not nullable by default (must use `#pragma[nullable]`) - Reference types are not nullable by default (must use `#pragma[nullable]`)
- The `commutative` pragma, which allows to define just one implementation of an operator
and have it become commutative
- Easy C/Nim interop via FFI - Easy C/Nim interop via FFI
- C/C++ backend - C/C++ backend
- Nim backend - Nim backend
- [Structured concurrency](https://vorpus.org/blog/notes-on-structured-concurrency-or-go-statement-considered-harmful/) (must-have!) - [Structured concurrency](https://vorpus.org/blog/notes-on-structured-concurrency-or-go-statement-considered-harmful/) (must-have!)
- Simple OOP (with multiple dispatch!) - Simple OOP (with multiple dispatch!)
- RTTI, with methods that dispatch at runtime based on the true type of a value - RTTI, with methods that dispatch at runtime based on the true (aka runtime) type of a value
- Limited compile-time evaluation (embed the Peon VM in the C/C++/Nim backend and use that to execute peon code at compile time) - Limited compile-time evaluation (embed the Peon VM in the C/C++/Nim backend and use that to execute peon code at compile time)

View File

@ -12,19 +12,7 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# Copyright 2022 Mattia Giambirtone & All Contributors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import std/tables import std/tables
import std/strformat import std/strformat
import std/algorithm import std/algorithm

File diff suppressed because it is too large Load Diff

View File

@ -16,6 +16,7 @@
import std/strformat import std/strformat
import std/strutils import std/strutils
import std/tables
import std/os import std/os
@ -31,9 +32,6 @@ export token, ast, errors
type type
LoopContext {.pure.} = enum
Loop, None
Precedence {.pure.} = enum Precedence {.pure.} = enum
## Operator precedence ## Operator precedence
## clearly stolen from ## clearly stolen from
@ -66,18 +64,16 @@ type
# Only meaningful for parse errors # Only meaningful for parse errors
file: string file: string
# The list of tokens representing # The list of tokens representing
# the source code to be parsed. # the source code to be parsed
# In most cases, those will come
# from the builtin lexer, but this
# behavior is not enforced and the
# tokenizer is entirely separate from
# the parser
tokens: seq[Token] tokens: seq[Token]
# Little internal attribute that tells # Just like scope depth tells us how
# us if we're inside a loop or not. This # many nested scopes are above us, the
# allows us to detect errors like break # loop depth tells us how many nested
# being used outside loops # loops are above us. It's just a simple
currentLoop: LoopContext # way of statically detecting stuff like
# the break statement being used outside
# loops. Maybe a bit overkill for a parser?
loopDepth: int
# Stores the current function # Stores the current function
# being parsed. This is a reference # being parsed. This is a reference
# to either a FunDecl or LambdaExpr # to either a FunDecl or LambdaExpr
@ -96,8 +92,13 @@ type
lines: seq[tuple[start, stop: int]] lines: seq[tuple[start, stop: int]]
# The source of the current module # The source of the current module
source: string source: string
# Keeps track of imported modules # Keeps track of imported modules.
modules: seq[tuple[name: string, loaded: bool]] # The key is the module's fully qualified
# path, while the boolean indicates whether
# it has been fully loaded. This is useful
# to avoid importing a module twice and to
# detect recursive dependency cycles
modules: TableRef[string, bool]
ParseError* = ref object of PeonException ParseError* = ref object of PeonException
## A parsing exception ## A parsing exception
parser*: Parser parser*: Parser
@ -140,7 +141,7 @@ proc newOperatorTable: OperatorTable =
result.tokens = @[] result.tokens = @[]
for prec in Precedence: for prec in Precedence:
result.precedence[prec] = @[] result.precedence[prec] = @[]
# These operators are currently not built-in # These operators are currently hardcoded
# due to compiler limitations # due to compiler limitations
result.addOperator("=") result.addOperator("=")
result.addOperator(".") result.addOperator(".")
@ -161,11 +162,12 @@ proc newParser*: Parser =
result.file = "" result.file = ""
result.tokens = @[] result.tokens = @[]
result.currentFunction = nil result.currentFunction = nil
result.currentLoop = LoopContext.None result.loopDepth = 0
result.scopeDepth = 0 result.scopeDepth = 0
result.operators = newOperatorTable() result.operators = newOperatorTable()
result.tree = @[] result.tree = @[]
result.source = "" result.source = ""
result.modules = newTable[string, bool]()
# Public getters for improved error formatting # Public getters for improved error formatting
@ -180,7 +182,7 @@ template endOfLine(msg: string, tok: Token = nil) = self.expect(Semicolon, msg,
proc peek(self: Parser, distance: int = 0): Token = proc peek(self: Parser, distance: int = 0): Token {.inline.} =
## Peeks at the token at the given distance. ## Peeks at the token at the given distance.
## If the distance is out of bounds, an EOF ## If the distance is out of bounds, an EOF
## token is returned. A negative distance may ## token is returned. A negative distance may
@ -201,7 +203,7 @@ proc done(self: Parser): bool {.inline.} =
result = self.peek().kind == EndOfFile result = self.peek().kind == EndOfFile
proc step(self: Parser, n: int = 1): Token = proc step(self: Parser, n: int = 1): Token {.inline.} =
## Steps n tokens into the input, ## Steps n tokens into the input,
## returning the last consumed one ## returning the last consumed one
if self.done(): if self.done():
@ -227,7 +229,7 @@ proc error(self: Parser, message: string, token: Token = nil) {.raises: [ParseEr
# as a symbol and in the cases where we need a specific token we just match the string # as a symbol and in the cases where we need a specific token we just match the string
# directly # directly
proc check[T: TokenType or string](self: Parser, kind: T, proc check[T: TokenType or string](self: Parser, kind: T,
distance: int = 0): bool = distance: int = 0): bool {.inline.} =
## Checks if the given token at the given distance ## Checks if the given token at the given distance
## matches the expected kind and returns a boolean. ## matches the expected kind and returns a boolean.
## The distance parameter is passed directly to ## The distance parameter is passed directly to
@ -239,7 +241,7 @@ proc check[T: TokenType or string](self: Parser, kind: T,
self.peek(distance).lexeme == kind self.peek(distance).lexeme == kind
proc check[T: TokenType or string](self: Parser, kind: openarray[T]): bool = proc check[T: TokenType or string](self: Parser, kind: openarray[T]): bool {.inline.} =
## Calls self.check() in a loop with each entry of ## Calls self.check() in a loop with each entry of
## the given openarray of token kinds and returns ## the given openarray of token kinds and returns
## at the first match. Note that this assumes ## at the first match. Note that this assumes
@ -251,7 +253,7 @@ proc check[T: TokenType or string](self: Parser, kind: openarray[T]): bool =
return false return false
proc match[T: TokenType or string](self: Parser, kind: T): bool = proc match[T: TokenType or string](self: Parser, kind: T): bool {.inline.} =
## Behaves like self.check(), except that when a token ## Behaves like self.check(), except that when a token
## matches it is also consumed ## matches it is also consumed
if self.check(kind): if self.check(kind):
@ -261,7 +263,7 @@ proc match[T: TokenType or string](self: Parser, kind: T): bool =
result = false result = false
proc match[T: TokenType or string](self: Parser, kind: openarray[T]): bool = proc match[T: TokenType or string](self: Parser, kind: openarray[T]): bool {.inline.} =
## Calls self.match() in a loop with each entry of ## Calls self.match() in a loop with each entry of
## the given openarray of token kinds and returns ## the given openarray of token kinds and returns
## at the first match. Note that this assumes ## at the first match. Note that this assumes
@ -273,7 +275,7 @@ proc match[T: TokenType or string](self: Parser, kind: openarray[T]): bool =
result = false result = false
proc expect[T: TokenType or string](self: Parser, kind: T, message: string = "", token: Token = nil) = proc expect[T: TokenType or string](self: Parser, kind: T, message: string = "", token: Token = nil) {.inline.} =
## Behaves like self.match(), except that ## Behaves like self.match(), except that
## when a token doesn't match, an error ## when a token doesn't match, an error
## is raised. If no error message is ## is raised. If no error message is
@ -285,7 +287,7 @@ proc expect[T: TokenType or string](self: Parser, kind: T, message: string = "",
self.error(message) self.error(message)
proc expect[T: TokenType or string](self: Parser, kind: openarray[T], message: string = "", token: Token = nil) {.used.} = proc expect[T: TokenType or string](self: Parser, kind: openarray[T], message: string = "", token: Token = nil) {.inline, used.} =
## Behaves like self.expect(), except that ## Behaves like self.expect(), except that
## an error is raised only if none of the ## an error is raised only if none of the
## given token kinds matches ## given token kinds matches
@ -307,6 +309,7 @@ proc funDecl(self: Parser, isAsync: bool = false, isGenerator: bool = false,
isLambda: bool = false, isOperator: bool = false, isTemplate: bool = false): Declaration isLambda: bool = false, isOperator: bool = false, isTemplate: bool = false): Declaration
proc declaration(self: Parser): Declaration proc declaration(self: Parser): Declaration
proc parse*(self: Parser, tokens: seq[Token], file: string, lines: seq[tuple[start, stop: int]], source: string, persist: bool = false): seq[Declaration] proc parse*(self: Parser, tokens: seq[Token], file: string, lines: seq[tuple[start, stop: int]], source: string, persist: bool = false): seq[Declaration]
proc findOperators(self: Parser, tokens: seq[Token])
# End of forward declarations # End of forward declarations
@ -436,7 +439,7 @@ proc makeCall(self: Parser, callee: Expression): CallExpr =
proc parseGenericArgs(self: Parser) = proc parseGenericArgs(self: Parser) =
## Parses function generic arguments ## Parses function generic arguments
## like function[type](arg) ## like function[type](arg)
discard discard # TODO
proc call(self: Parser): Expression = proc call(self: Parser): Expression =
@ -596,12 +599,12 @@ proc assertStmt(self: Parser): Statement =
result.file = self.file result.file = self.file
proc beginScope(self: Parser) = proc beginScope(self: Parser) {.inline.} =
## Begins a new lexical scope ## Begins a new lexical scope
inc(self.scopeDepth) inc(self.scopeDepth)
proc endScope(self: Parser) = proc endScope(self: Parser) {.inline.} =
## Ends a new lexical scope ## Ends a new lexical scope
dec(self.scopeDepth) dec(self.scopeDepth)
@ -631,8 +634,7 @@ proc namedBlockStmt(self: Parser): Statement =
self.expect(Identifier, "expecting block name after 'block'") self.expect(Identifier, "expecting block name after 'block'")
var name = newIdentExpr(self.peek(-1), self.scopeDepth) var name = newIdentExpr(self.peek(-1), self.scopeDepth)
name.file = self.file name.file = self.file
let enclosingLoop = self.currentLoop inc(self.loopDepth)
self.currentLoop = Loop
self.expect(LeftBrace, "expecting '{' after 'block'") self.expect(LeftBrace, "expecting '{' after 'block'")
while not self.check(RightBrace) and not self.done(): while not self.check(RightBrace) and not self.done():
code.add(self.declaration()) code.add(self.declaration())
@ -642,14 +644,14 @@ proc namedBlockStmt(self: Parser): Statement =
result = newNamedBlockStmt(code, name, tok) result = newNamedBlockStmt(code, name, tok)
result.file = self.file result.file = self.file
self.endScope() self.endScope()
self.currentLoop = enclosingLoop dec(self.loopDepth)
proc breakStmt(self: Parser): Statement = proc breakStmt(self: Parser): Statement =
## Parses break statements ## Parses break statements
let tok = self.peek(-1) let tok = self.peek(-1)
var label: IdentExpr var label: IdentExpr
if self.currentLoop != Loop: if self.loopDepth == 0:
self.error("'break' cannot be used outside loops") self.error("'break' cannot be used outside loops")
if self.match(Identifier): if self.match(Identifier):
label = newIdentExpr(self.peek(-1), self.scopeDepth) label = newIdentExpr(self.peek(-1), self.scopeDepth)
@ -673,7 +675,7 @@ proc continueStmt(self: Parser): Statement =
## Parses continue statements ## Parses continue statements
let tok = self.peek(-1) let tok = self.peek(-1)
var label: IdentExpr var label: IdentExpr
if self.currentLoop != Loop: if self.loopDepth == 0:
self.error("'continue' cannot be used outside loops") self.error("'continue' cannot be used outside loops")
if self.match(Identifier): if self.match(Identifier):
label = newIdentExpr(self.peek(-1), self.scopeDepth) label = newIdentExpr(self.peek(-1), self.scopeDepth)
@ -747,8 +749,7 @@ proc raiseStmt(self: Parser): Statement =
proc forEachStmt(self: Parser): Statement = proc forEachStmt(self: Parser): Statement =
## Parses C#-like foreach loops ## Parses C#-like foreach loops
let tok = self.peek(-1) let tok = self.peek(-1)
let enclosingLoop = self.currentLoop inc(self.loopDepth)
self.currentLoop = Loop
self.expect(Identifier) self.expect(Identifier)
let identifier = newIdentExpr(self.peek(-1), self.scopeDepth) let identifier = newIdentExpr(self.peek(-1), self.scopeDepth)
self.expect("in") self.expect("in")
@ -756,10 +757,7 @@ proc forEachStmt(self: Parser): Statement =
self.expect(LeftBrace) self.expect(LeftBrace)
result = newForEachStmt(identifier, expression, self.blockStmt(), tok) result = newForEachStmt(identifier, expression, self.blockStmt(), tok)
result.file = self.file result.file = self.file
self.currentLoop = enclosingLoop dec(self.loopDepth)
proc findOperators(self: Parser, tokens: seq[Token])
proc importStmt(self: Parser, fromStmt: bool = false): Statement = proc importStmt(self: Parser, fromStmt: bool = false): Statement =
@ -806,6 +804,10 @@ proc importStmt(self: Parser, fromStmt: bool = false): Statement =
break break
elif i == searchPath.high(): elif i == searchPath.high():
self.error(&"""could not import '{path}': module not found""") self.error(&"""could not import '{path}': module not found""")
if not self.modules.getOrDefault(path, true):
self.error(&"coult not import '{path}' (recursive dependency detected)")
else:
self.modules[path] = false
try: try:
var source = readFile(path) var source = readFile(path)
var tree = self.tree var tree = self.tree
@ -819,6 +821,8 @@ proc importStmt(self: Parser, fromStmt: bool = false): Statement =
self.tree = tree self.tree = tree
self.current = current self.current = current
self.tokens = tokens self.tokens = tokens
# Module has been fully loaded and can now be used
self.modules[path] = true
except IOError: except IOError:
self.error(&"could not import '{path}': {getCurrentExceptionMsg()}") self.error(&"could not import '{path}': {getCurrentExceptionMsg()}")
except OSError: except OSError:
@ -859,14 +863,13 @@ proc whileStmt(self: Parser): Statement =
## Parses a C-style while loop statement ## Parses a C-style while loop statement
let tok = self.peek(-1) let tok = self.peek(-1)
self.beginScope() self.beginScope()
let enclosingLoop = self.currentLoop inc(self.loopDepth)
let condition = self.expression() let condition = self.expression()
self.expect(LeftBrace) self.expect(LeftBrace)
self.currentLoop = Loop
result = newWhileStmt(condition, self.blockStmt(), tok) result = newWhileStmt(condition, self.blockStmt(), tok)
result.file = self.file result.file = self.file
self.currentLoop = enclosingLoop
self.endScope() self.endScope()
dec(self.loopDepth)
proc ifStmt(self: Parser): Statement = proc ifStmt(self: Parser): Statement =
@ -1049,7 +1052,7 @@ proc parseFunExpr(self: Parser): LambdaExpr =
proc parseGenericConstraint(self: Parser): Expression = proc parseGenericConstraint(self: Parser): Expression =
## Recursivelt parses a generic constraint ## Recursively parses a generic constraint
## and returns it as an expression ## and returns it as an expression
result = self.expression() # First value is always an identifier of some sort result = self.expression() # First value is always an identifier of some sort
if not self.check(RightBracket): if not self.check(RightBracket):
@ -1301,6 +1304,7 @@ proc typeDecl(self: Parser): TypeDecl =
var generics: seq[tuple[name: IdentExpr, cond: Expression]] = @[] var generics: seq[tuple[name: IdentExpr, cond: Expression]] = @[]
var pragmas: seq[Pragma] = @[] var pragmas: seq[Pragma] = @[]
result = newTypeDecl(name, fields, defaults, isPrivate, token, pragmas, generics, nil, false, false) result = newTypeDecl(name, fields, defaults, isPrivate, token, pragmas, generics, nil, false, false)
result.file = self.file
if self.match(LeftBracket): if self.match(LeftBracket):
self.parseGenerics(result) self.parseGenerics(result)
self.expect("=", "expecting '=' after type name") self.expect("=", "expecting '=' after type name")
@ -1315,7 +1319,6 @@ proc typeDecl(self: Parser): TypeDecl =
result.isEnum = true result.isEnum = true
of "object": of "object":
discard self.step() discard self.step()
discard # Default case
else: else:
hasNone = true hasNone = true
if hasNone: if hasNone:
@ -1334,7 +1337,7 @@ proc typeDecl(self: Parser): TypeDecl =
self.expect(LeftBrace, "expecting '{' after type declaration") self.expect(LeftBrace, "expecting '{' after type declaration")
if self.match(TokenType.Pragma): if self.match(TokenType.Pragma):
for pragma in self.parsePragmas(): for pragma in self.parsePragmas():
pragmas.add(pragma) result.pragmas.add(pragma)
var var
argName: IdentExpr argName: IdentExpr
argPrivate: bool argPrivate: bool
@ -1356,8 +1359,6 @@ proc typeDecl(self: Parser): TypeDecl =
else: else:
if not self.check(RightBrace): if not self.check(RightBrace):
self.expect(",", "expecting comma after enum field declaration") self.expect(",", "expecting comma after enum field declaration")
result.pragmas = pragmas
result.file = self.file
proc declaration(self: Parser): Declaration = proc declaration(self: Parser): Declaration =
@ -1420,11 +1421,12 @@ proc parse*(self: Parser, tokens: seq[Token], file: string, lines: seq[tuple[sta
self.lines = lines self.lines = lines
self.current = 0 self.current = 0
self.scopeDepth = 0 self.scopeDepth = 0
self.currentLoop = LoopContext.None self.loopDepth = 0
self.currentFunction = nil self.currentFunction = nil
self.tree = @[] self.tree = @[]
if not persist: if not persist:
self.operators = newOperatorTable() self.operators = newOperatorTable()
self.modules = newTable[string, bool]()
self.findOperators(tokens) self.findOperators(tokens)
while not self.done(): while not self.done():
self.tree.add(self.declaration()) self.tree.add(self.declaration())