From 827747281954740d0be13a544ab525b4e2472c9e Mon Sep 17 00:00:00 2001 From: Mattia Giambirtone Date: Mon, 27 Mar 2023 09:53:56 +0200 Subject: [PATCH] Initial work on the new compiler module --- README.md | 10 +- src/frontend/compiler/compiler.nim | 14 +- src/frontend/compiler/newcompiler.nim | 1056 +++++++++++++++++++++++++ src/frontend/parsing/parser.nim | 100 +-- 4 files changed, 1114 insertions(+), 66 deletions(-) create mode 100644 src/frontend/compiler/newcompiler.nim diff --git a/README.md b/README.md index 53f71f7..543d418 100644 --- a/README.md +++ b/README.md @@ -14,7 +14,8 @@ Peon is a multi-paradigm, statically-typed programming language inspired by C, N features such as automatic type inference, parametrically polymorphic generic types, pure functions, closures, interfaces, single inheritance, reference types, templates, coroutines, raw pointers and exceptions. -The memory management model is rather simple: a Mark and Sweep garbage collector is employed to reclaim unused memory. +The memory management model is rather simple: a Mark and Sweep garbage collector is employed to reclaim unused memory, although more garbage +collection strategies (such as generational GC or deferred reference counting) are planned to be added in the future. Peon features a native cooperative concurrency model designed to take advantage of the inherent waiting of typical I/O workloads, without the use of more than one OS thread (wherever possible), allowing for much greater efficiency and a smaller memory footprint. The asynchronous model used forces developers to write code that is both easy to reason about, thanks to the [Structured concurrency](https://vorpus.org/blog/notes-on-structured-concurrency-or-go-statement-considered-harmful/) model that is core to peon's async event loop implementation, and works as expected every time (without dropping signals, exceptions, or task return values). @@ -41,7 +42,7 @@ will move through the input history (which is never reset). Also note that UTF-8 ### TODO List In no particular order, here's a list of stuff that's done/to do (might be incomplete/out of date): - - User-defined types + - User-defined types - Function calls ✅ - Control flow (if-then-else, switch) ✅ - Looping (while) ✅ @@ -57,7 +58,6 @@ In no particular order, here's a list of stuff that's done/to do (might be incom - Named scopes/blocks ✅ - Inheritance - Interfaces - - Indexing operator - Generics ✅ - Automatic types ✅ - Iterators/Generators @@ -76,12 +76,14 @@ In no particular order, here's a list of stuff that's done/to do (might be incom Here's a random list of high-level features I would like peon to have and that I think are kinda neat (some may have been implemented alredady): - Reference types are not nullable by default (must use `#pragma[nullable]`) +- The `commutative` pragma, which allows to define just one implementation of an operator + and have it become commutative - Easy C/Nim interop via FFI - C/C++ backend - Nim backend - [Structured concurrency](https://vorpus.org/blog/notes-on-structured-concurrency-or-go-statement-considered-harmful/) (must-have!) - Simple OOP (with multiple dispatch!) -- RTTI, with methods that dispatch at runtime based on the true type of a value +- RTTI, with methods that dispatch at runtime based on the true (aka runtime) type of a value - Limited compile-time evaluation (embed the Peon VM in the C/C++/Nim backend and use that to execute peon code at compile time) diff --git a/src/frontend/compiler/compiler.nim b/src/frontend/compiler/compiler.nim index 041134e..3f082f5 100644 --- a/src/frontend/compiler/compiler.nim +++ b/src/frontend/compiler/compiler.nim @@ -12,19 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -# Copyright 2022 Mattia Giambirtone & All Contributors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. + import std/tables import std/strformat import std/algorithm diff --git a/src/frontend/compiler/newcompiler.nim b/src/frontend/compiler/newcompiler.nim new file mode 100644 index 0000000..cc6b19c --- /dev/null +++ b/src/frontend/compiler/newcompiler.nim @@ -0,0 +1,1056 @@ +# Copyright 2022 Mattia Giambirtone & All Contributors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import std/os +import std/sets +import std/tables +import std/hashes +import std/strutils +import std/terminal +import std/sequtils +import std/algorithm +import std/strformat + +import errors +import config +import frontend/parsing/token +import frontend/parsing/ast +import frontend/parsing/lexer as l +import frontend/parsing/parser as p + + +type + # Just a bunch of convenience type aliases + TypedArgument* = tuple[name: string, kind: Type, default: TypedNode] + TypeConstraint* = tuple[match: bool, kind: Type] + + PragmaKind* = enum + ## A pragma type enumeration + + # "Immediate" pragmas are processed right + # when they are encountered. This is useful + # for some types of pragmas, such as those + # that flick compile-time switches on or off + # or that mark objects with some compile-time + # property. "Delayed" pragmas, on the other hand, + # are processed when their associated object is + # used in some way. For example, the "error" + # pragma, when associated to a function, causes + # the compiler to raise a static error when + # attempting to call said function, rather than + # doing so at declaration time. This allows the + # detection of errors such as trying to negate + # unsigned integers without explicitly hardcoding + # the check into the compiler (all that's needed + # is a definition of the `-` operator that will + # raise a static error once called) + Immediate, Delayed + + Pragma* = object + ## A pragma object. Pragmas are + ## (usually) hooks into compiler + ## functions or serve as markers + ## for objects (for example, to + ## signal that a function has no + ## side effects or that a type is + ## nullable) + kind*: PragmaKind + name*: string # All pragmas have names + arguments*: seq[Type] # The arguments to the pragma. Must be values computable at compile time + + TypeKind* = enum + ## An enumeration of compile-time + ## types + + # Intrinsic (aka "built-in") types + + # Signed and unsigned integer types + Int8, UInt8, Int16, UInt16, + Int32, UInt32, Int64, UInt64, + + # Floating point types + Float32, Float64, + + Char, # A single ASCII character + Byte, # Basically an alias for char + String, # A string. No encoding is specified + Function, # A function + TypeDecl, # A type declaration + Nil, # The nil type (aka null) + Nan, # The value of NaN (Not a Number) + Bool, # Booleans (true and false) + Inf, # Negative and positive infinity + # Note: nil, nan, true, false and inf are all singletons + Typevar, # A type variable is the type of a type. For example, the type of `int` is typevar + Generic, # A parametrically polymorphic generic type + Reference, # A managed (aka GC'ed) reference + Pointer, # An unmanaged (aka malloc()-ed) reference + Any, # The "any" type is a placeholder for a single type (similar to Python's builtins.object) + All, # The all type means "any type or no type at all". It is not exposed outside of the compiler + Union, # An untagged type union (acts like an exclusive "logical or" constraint) + Auto, # An automatic type. The compiler infers the true type of the object from its value when necessary + Enum, # An enumeration type + + Type* = ref object + ## A compile-time type + case kind: TypeKind: + of Generic: + # A generic type + constraints*: seq[TypeConstraint] # The type's generic constraints. For example, + # fn foo[T*: int & ~uint](...) {...} would map to [(true, int), (false, uint)] + name*: IdentExpr # The generic's name (in our example above, this would be "T") + asUnion*: bool # A generic constraint is treated like a "logical and", which means all + # of its constraints must be satisfied. This allows for parametric polymorphism to work, + # but it woudln't allow to make use of the type with only one of the types of the constraint, + # which is pretty useless. When this value is set to true, which it isn't by default, the + # constraints turn into an exclusive "logical or" instead, meaning that any type in the constraints + # is a valid instance of the type itself. This allows the compiler to typecheck the type for all + # possible types in the constraint and then let the user instantiate said type with any of the types + # in said constraint. The field's name means "treat this generic constraint like a type union" + of Union: + # A type union + types*: seq[TypeConstraint] + of Reference: + # A managed reference + nullable*: bool # Is null a valid value for this type? (false by default) + value*: Type # The type the reference points to + of Pointer: + # An unmanaged reference. Much + # like a raw pointer in C + data*: Type # The type we point to + of TypeDecl: + # A user-defined type + fields*: seq[TypedArgument] # List of fields in the object. May be empty + parent*: Type # The parent of this object if inheritance is used. May be nil + implements*: seq[Type] # The interfaces this object implements. May be empty + of Function: + # A function-like object. Wraps regular + # functions, lambdas, coroutines and generators + isLambda*: bool # Is this a lambda (aka anonymous) function? + isCoroutine*: bool # Is this a coroutine? + isGenerator*: bool # Is this a generator? + isAuto*: bool # Is this an automatic function? + arguments*: seq[TypedArgument] # The function's arguments + forwarded*: bool # Is this a forward declaration? + returnType*: Type # The function's return type + else: + discard + # Can this type be mutated? + mutable: bool + + TypedNode* = ref object + ## A typed AST node + node*: ASTNode # The original (typeless) AST node + value*: Type # The node's type + + NameKind* = enum + ## A name enumeration type + DeclType, # Any type declaration + Module + + Name* = ref object + ## A name object. Name objects associate + ## peon objects to identifiers + case kind*: NameKind + of Module: + path*: string # The module's path + else: + discard + ident*: IdentExpr # The name's identifier + file*: string # The file where this name is declared in + belongsTo*: Name # The function owning this name, if any + obj*: TypedNode # The name's associated object + owner*: Name # The module owning this name + depth*: int # The name's scope depth + isPrivate*: bool # Is this name private? + isConst*: bool # Is this name a constant? + isLet*: bool # Can this name's value be mutated? + isGeneric*: bool # Is this a generic type? + line*: int # The line where this name is declared + resolved*: bool # Has this name ever been used? + node*: Declaration # The declaration associated with this name + exports*: HashSet[Name] # The modules to which this name is visible to + isBuiltin*: bool # Is this name a built-in? + isReal*: bool # Is this an actual name in user code? (The compiler + # generates some names for its internal use and they may even duplicate existing + # ones, so that is why we need this attribute) + + WarningKind* {.pure.} = enum + ## A warning enumeration type + UnreachableCode, UnusedName, ShadowOuterScope, + MutateOuterScope + + CompileMode* {.pure.} = enum + ## A compilation mode enumeration + Debug, Release + + CompileError* = ref object of PeonException + node*: ASTNode + function*: Declaration + compiler*: Compiler + + Compiler* = ref object + ## The peon compiler + ast: seq[Declaration] # The (typeless) AST of the current module + current: int # Index into self.ast of the current node we're compiling + file*: string # The current file being compiled (used only for error reporting) + depth*: int # The current scope depth. If > 0, we're in a local scope, otherwise it's global + replMode*: bool # Are we in REPL mode? + names*: seq[Name] # List of all currently declared names + lines*: seq[tuple[start, stop: int]] # Stores line data for error reporting + source*: string # The source of the current module, used for error reporting + # We store these objects to compile modules + lexer*: Lexer + parser*: Parser + isMainModule*: bool # Are we compiling the main module? + disabledWarnings*: seq[WarningKind] # List of disabled warnings + showMismatches*: bool # Whether to show detailed info about type mismatches when we dispatch + mode*: CompileMode # Are we compiling in debug mode or release mode? + currentFunction*: Name # The current function being compiled + currentModule*: Name # The current module being compiled + parentModule*: Name # The module importing us, if any + modules*: HashSet[Name] # Currently imported modules + + +# Makes our name objects hashable +func hash(self: Name): Hash {.inline.} = self.ident.token.lexeme.hash() + +proc `$`*(self: Name): string = $(self[]) +proc `$`(self: Type): string = $(self[]) +proc `$`(self: TypedNode): string = $(self[]) + + +# Public getters for nicer error formatting +func getCurrentNode*(self: Compiler): ASTNode {.inline.} = (if self.current >= self.ast.len(): self.ast[^1] else: self.ast[self.current - 1]) +func getCurrentFunction*(self: Compiler): Declaration {.inline.} = (if self.currentFunction.isNil(): nil else: self.currentFunction.node) +func getSource*(self: Compiler): string {.inline.} = self.source + +# Utility functions + +proc peek*(self: Compiler, distance: int = 0): ASTNode = + ## Peeks at the AST node at the given distance. + ## If the distance is out of bounds, the last + ## AST node in the tree is returned. A negative + ## distance may be used to retrieve previously + ## consumed AST nodes + if self.ast.high() == -1 or self.current + distance > self.ast.high() or self.current + distance < 0: + result = self.ast[^1] + else: + result = self.ast[self.current + distance] + + +proc done*(self: Compiler): bool {.inline.} = + ## Returns true if the compiler is done + ## compiling, false otherwise + result = self.current > self.ast.high() + + +proc error*(self: Compiler, message: string, node: ASTNode = nil) {.inline.} = + ## Raises a CompileError exception + let node = if node.isNil(): self.getCurrentNode() else: node + raise CompileError(msg: message, node: node, line: node.token.line, file: node.file, compiler: self) + + +proc warning*(self: Compiler, kind: WarningKind, message: string, name: Name = nil, node: ASTNode = nil) = + ## Raises a warning. Note that warnings are always disabled in REPL mode + if self.replMode or kind in self.disabledWarnings: + return + var node: ASTNode = node + var fn: Declaration + if name.isNil(): + if node.isNil(): + node = self.getCurrentNode() + fn = self.getCurrentFunction() + else: + node = name.node + if node.isNil(): + node = self.getCurrentNode() + if not name.belongsTo.isNil(): + fn = name.belongsTo.node + else: + fn = self.getCurrentFunction() + var file = self.file + if not name.isNil(): + file = name.owner.file + var pos = node.getRelativeBoundaries() + if file notin ["", ""]: + file = relativePath(file, getCurrentDir()) + stderr.styledWrite(fgYellow, styleBright, "Warning in ", fgRed, &"{file}:{node.token.line}:{pos.start}") + if not fn.isNil() and fn.kind == funDecl: + stderr.styledWrite(fgYellow, styleBright, " in function ", fgRed, FunDecl(fn).name.token.lexeme) + stderr.styledWriteLine(styleBright, fgDefault, ": ", message) + try: + # We try to be as specific as possible with the warning message, pointing to the + # line it belongs to, but since warnings are not always raised from the source + # file they're generated in, we take into account the fact that retrieving the + # exact warning location may fail and bail out silently if it does + let line = readFile(file).splitLines()[node.token.line - 1].strip(chars={'\n'}) + stderr.styledWrite(fgYellow, styleBright, "Source line: ", resetStyle, fgDefault, line[0.. a.len(): + long = b + short = a + var i = 0 + for cond1 in short: + for cond2 in long: + if not self.compare(cond1.kind, cond2.kind) or cond1.match != cond2.match: + continue + inc(i) + return i >= short.len() + + +proc toIntrinsic*(name: string): Type = + ## Converts a string to an intrinsic + ## type if it is valid and returns nil + ## otherwise + if name == "any": + return Type(kind: Any) + elif name == "all": + return Type(kind: All) + elif name == "auto": + return Type(kind: Auto) + elif name in ["int", "int64", "i64"]: + return Type(kind: Int64) + elif name in ["uint64", "u64", "uint"]: + return Type(kind: UInt64) + elif name in ["int32", "i32"]: + return Type(kind: Int32) + elif name in ["uint32", "u32"]: + return Type(kind: UInt32) + elif name in ["int16", "i16", "short"]: + return Type(kind: Int16) + elif name in ["uint16", "u16"]: + return Type(kind: UInt16) + elif name in ["int8", "i8"]: + return Type(kind: Int8) + elif name in ["uint8", "u8"]: + return Type(kind: UInt8) + elif name in ["f64", "float", "float64"]: + return Type(kind: Float64) + elif name in ["f32", "float32"]: + return Type(kind: Float32) + elif name in ["byte", "b"]: + return Type(kind: Byte) + elif name in ["char", "c"]: + return Type(kind: Char) + elif name == "nan": + return Type(kind: TypeKind.Nan) + elif name == "nil": + return Type(kind: Nil) + elif name == "inf": + return Type(kind: TypeKind.Inf) + elif name == "bool": + return Type(kind: Bool) + elif name == "typevar": + return Type(kind: Typevar) + elif name == "string": + return Type(kind: String) + + +proc infer*(self: Compiler, node: LiteralExpr): Type = + ## Infers the type of a given literal expression + if node.isNil(): + return nil + case node.kind: + of intExpr, binExpr, octExpr, hexExpr: + let size = node.token.lexeme.split("'") + if size.len() == 1: + return Type(kind: Int64) + let typ = size[1].toIntrinsic() + if not self.compare(typ, nil): + return typ + else: + self.error(&"invalid type specifier '{size[1]}' for int", node) + of floatExpr: + let size = node.token.lexeme.split("'") + if size.len() == 1: + return Type(kind: Float64) + let typ = size[1].toIntrinsic() + if not typ.isNil(): + return typ + else: + self.error(&"invalid type specifier '{size[1]}' for float", node) + of trueExpr: + return Type(kind: Bool) + of falseExpr: + return Type(kind: Bool) + of strExpr: + return Type(kind: String) + else: + discard # Unreachable + + +proc infer*(self: Compiler, node: Expression): Type = + ## Infers the type of a given expression and + ## returns it + if node.isNil(): + return nil + case node.kind: + of NodeKind.identExpr: + result = self.identifier(IdentExpr(node), compile=false, strict=false) + of NodeKind.unaryExpr: + result = self.unary(UnaryExpr(node), compile=false) + of NodeKind.binaryExpr: + result = self.binary(BinaryExpr(node), compile=false) + of {NodeKind.intExpr, NodeKind.hexExpr, NodeKind.binExpr, NodeKind.octExpr, + NodeKind.strExpr, NodeKind.falseExpr, NodeKind.trueExpr, NodeKind.floatExpr + }: + result = self.infer(LiteralExpr(node)) + of NodeKind.callExpr: + result = self.call(CallExpr(node), compile=false) + of NodeKind.refExpr: + result = Type(kind: Reference, value: self.infer(Ref(node).value)) + of NodeKind.ptrExpr: + result = Type(kind: Pointer, data: self.infer(Ptr(node).value)) + of NodeKind.groupingExpr: + result = self.infer(GroupingExpr(node).expression) + of NodeKind.getItemExpr: + result = self.getItemExpr(GetItemExpr(node), compile=false) + of NodeKind.lambdaExpr: + result = self.lambdaExpr(LambdaExpr(node), compile=false) + else: + discard # TODO + + +proc inferOrError*(self: Compiler, node: Expression): Type = + ## Attempts to infer the type of + ## the given expression and raises an + ## error if it fails + result = self.infer(node) + if result.isNil(): + self.error("expression has no type", node) + + +proc stringify*(self: Compiler, typ: Type): string = + ## Returns the string representation of a + ## type object + if typ.isNil(): + return "nil" + case typ.value.kind: + of Int8, UInt8, Int16, UInt16, Int32, + UInt32, Int64, UInt64, Float32, Float64, + Char, Byte, String, Nil, TypeKind.Nan, Bool, + TypeKind.Inf, Auto: + result &= ($typ.value.kind).toLowerAscii() + of Pointer: + result &= &"ptr {self.stringify(typ.value)}" + of Reference: + result &= &"ref {self.stringify(typ.value)}" + of Any: + return "any" + of Union: + for i, condition in typ.types: + if i > 0: + result &= " | " + if not condition.match: + result &= "~" + result &= self.stringify(condition.kind) + of Generic: + for i, condition in typ.constraints: + if i > 0: + result &= " | " + if not condition.match: + result &= "~" + result &= self.stringify(condition.kind) + else: + discard + +proc stringify*(self: Compiler, typ: TypedNode): string = + ## Returns the string representation of a + ## type object + if typ.isNil(): + return "nil" + case typ.value.kind: + of Int8, UInt8, Int16, UInt16, Int32, + UInt32, Int64, UInt64, Float32, Float64, + Char, Byte, String, Nil, TypeKind.Nan, Bool, + TypeKind.Inf, Auto, Pointer, Reference, Any, + Union, Generic: + result &= self.stringify(typ.value) + of Function: + result &= "fn (" + for i, (argName, argType, argDefault) in typ.value.arguments: + result &= &"{argName}: {self.stringify(argType)}" + if not argDefault.isNil(): + result &= &" = {argDefault}" + if i < typ.value.arguments.len() - 1: + result &= ", " + result &= ")" + if not typ.value.returnType.isNil(): + result &= &": {self.stringify(typ.value.returnType)}" + var node = Declaration(typ.node) + if node.pragmas.len() > 0: + result &= " {" + for i, pragma in node.pragmas: + result &= &"{pragma.name.token.lexeme}" + if pragma.args.len() > 0: + result &= ": " + for j, arg in pragma.args: + result &= arg.token.lexeme + if j < pragma.args.high(): + result &= ", " + if i < node.pragmas.high(): + result &= ", " + else: + result &= "}" + else: + discard + + +proc findByName*(self: Compiler, name: string): seq[Name] = + ## Looks for objects that have been already declared + ## with the given name. Returns all objects that apply. + for obj in reversed(self.names): + if obj.ident.token.lexeme == name: + if obj.owner.path != self.currentModule.path: + if obj.isPrivate or self.currentModule notin obj.exports: + continue + result.add(obj) + + +proc findInModule*(self: Compiler, name: string, module: Name): seq[Name] = + ## Looks for objects that have been already declared as + ## public within the given module with the given name. + ## Returns all objects that apply. If the name is an + ## empty string, returns all objects within the given + ## module, regardless of whether they are exported to + ## the current one or not + if name == "": + for obj in reversed(self.names): + if not obj.isPrivate and obj.owner == module: + result.add(obj) + else: + for obj in self.findInModule("", module): + if obj.ident.token.lexeme == name and self.currentModule in obj.exports: + result.add(obj) + + +proc findByType*(self: Compiler, name: string, kind: Type): seq[Name] = + ## Looks for objects that have already been declared + ## with the given name and type. Returns all objects + ## that apply + for name in self.findByName(name): + if self.compare(name.obj.value, kind): + result.add(name) + + +proc findAtDepth*(self: Compiler, name: string, depth: int): seq[Name] {.used.} = + ## Looks for objects that have been already declared + ## with the given name at the given scope depth. + ## Returns all objects that apply + for obj in self.findByName(name): + if obj.depth == depth: + result.add(obj) + + +proc check*(self: Compiler, term: Expression, kind: Type) {.inline.} = + ## Checks the type of term against a known type. + ## Raises an error if appropriate and returns + ## otherwise + let k = self.inferOrError(term) + if not self.compare(k, kind): + self.error(&"expecting value of type {self.stringify(kind)}, got {self.stringify(k)}", term) + elif k.kind == Any and kind.kind != Any: + self.error(&"any is not a valid type in this context") + + +proc isAny*(typ: Type): bool = + ## Returns true if the given type is + ## of (or contains) the any type + case typ.kind: + of Any: + return true + of Generic: + for condition in typ.constraints: + if condition.kind.isAny(): + return true + of Union: + for condition in typ.types: + if condition.kind.isAny(): + return true + else: + return false + + +proc match*(self: Compiler, name: string, kind: Type, node: ASTNode = nil, allowFwd: bool = true): Name = + ## Tries to find a matching function implementation + ## compatible with the given type and returns its + ## name object + var impl: seq[Name] = self.findByType(name, kind) + if impl.len() == 0: + let names = self.findByName(name) + var msg = &"failed to find a suitable implementation for '{name}'" + if names.len() > 0: + msg &= &", found {len(names)} potential candidate" + if names.len() > 1: + msg &= "s" + if self.showMismatches: + msg &= ":" + for name in names: + msg &= &"\n - in {relativePath(name.file, getCurrentDir())}:{name.ident.token.line}:{name.ident.token.relPos.start} -> {self.stringify(name.obj.value)}" + if name.obj.value.kind != Function: + msg &= ": not a callable" + elif kind.arguments.len() != name.obj.value.arguments.len(): + msg &= &": wrong number of arguments (expected {name.obj.value.arguments.len()}, got {kind.arguments.len()})" + else: + for i, arg in kind.arguments: + if not self.compare(arg.kind, name.obj.value.arguments[i].kind): + msg &= &": first mismatch at position {i + 1}: (expected {self.stringify(name.obj.value.arguments[i].kind)}, got {self.stringify(arg.kind)})" + break + else: + msg &= " (compile with --showMismatches for more details)" + else: + msg = &"call to undefined function '{name}'" + self.error(msg, node) + elif impl.len() > 1: + # If we happen to find more than one match, we try again + # and ignore forward declarations and automatic functions + impl = filterIt(impl, not it.obj.value.forwarded and not it.obj.value.isAuto) + if impl.len() > 1: + # If there's *still* more than one match, then it's an error + var msg = &"multiple matching implementations of '{name}' found" + if self.showMismatches: + msg &= ":" + for fn in reversed(impl): + msg &= &"\n- in {relativePath(fn.file, getCurrentDir())}, line {fn.line} of type {self.stringify(fn.obj.value)}" + else: + msg &= " (compile with --showMismatches for more details)" + self.error(msg, node) + # This is only true when we're called by self.patchForwardDeclarations() + if impl[0].obj.value.forwarded and not allowFwd: + self.error(&"expecting an implementation for function '{impl[0].ident.token.lexeme}' declared in module '{impl[0].owner.ident.token.lexeme}' at line {impl[0].ident.token.line} of type '{self.stringify(impl[0].obj.value)}'") + result = impl[0] + for (a, b) in zip(result.obj.value.arguments, kind.arguments): + if not a.kind.isAny() and b.kind.isAny(): + self.error("any is not a valid type in this context", node) + + +proc beginScope*(self: Compiler) = + ## Begins a new local scope by incrementing the current + ## scope's depth + inc(self.depth) + + +proc unpackGenerics*(self: Compiler, condition: Expression, list: var seq[tuple[match: bool, kind: Type]], accept: bool = true) = + ## Recursively unpacks a type constraint in a generic type + case condition.kind: + of identExpr: + list.add((accept, self.inferOrError(condition))) + if list[^1].kind.kind == Auto: + self.error("automatic types cannot be used within generics", condition) + of binaryExpr: + let condition = BinaryExpr(condition) + case condition.operator.lexeme: + of "|": + self.unpackGenerics(condition.a, list) + self.unpackGenerics(condition.b, list) + else: + self.error("invalid type constraint in generic declaration", condition) + of unaryExpr: + let condition = UnaryExpr(condition) + case condition.operator.lexeme: + of "~": + self.unpackGenerics(condition.a, list, accept=false) + else: + self.error("invalid type constraint in generic declaration", condition) + else: + self.error("invalid type constraint in generic declaration", condition) + + +proc unpackUnion*(self: Compiler, condition: Expression, list: var seq[tuple[match: bool, kind: Type]], accept: bool = true) = + ## Recursively unpacks a type union + case condition.kind: + of identExpr: + list.add((accept, self.inferOrError(condition))) + of binaryExpr: + let condition = BinaryExpr(condition) + case condition.operator.lexeme: + of "|": + self.unpackUnion(condition.a, list) + self.unpackUnion(condition.b, list) + else: + self.error("invalid type constraint in type union", condition) + of unaryExpr: + let condition = UnaryExpr(condition) + case condition.operator.lexeme: + of "~": + self.unpackUnion(condition.a, list, accept=false) + else: + self.error("invalid type constraint in type union", condition) + else: + self.error("invalid type constraint in type union", condition) + + +proc dispatchPragmas(self: Compiler, name: Name) = discard +proc dispatchDelayedPragmas(self: Compiler, name: Name) = discard + + +proc declare*(self: Compiler, node: ASTNode): Name {.discardable.} = + ## Statically declares a name into the current scope. + ## "Declaring" a name only means updating our internal + ## list of identifiers so that further calls to resolve() + ## correctly return them. There is no code to actually + ## declare a variable at runtime: the value is already + ## on the stack + var declaredName: string = "" + var n: Name + if self.names.high() > 16777215: + # If someone ever hits this limit in real-world scenarios, I swear I'll + # slap myself 100 times with a sign saying "I'm dumb". Mark my words + self.error("cannot declare more than 16777215 names at a time") + case node.kind: + of NodeKind.varDecl: + var node = VarDecl(node) + declaredName = node.name.token.lexeme + # Creates a new Name entry so that self.identifier emits the proper stack offset + self.names.add(Name(depth: self.depth, + ident: node.name, + isPrivate: node.isPrivate, + owner: self.currentModule, + file: self.file, + isConst: node.isConst, + obj: nil, # Done later + isLet: node.isLet, + line: node.token.line, + belongsTo: self.currentFunction, + kind: DeclType, + node: node, + isReal: true + )) + n = self.names[^1] + of NodeKind.funDecl: + var node = FunDecl(node) + declaredName = node.name.token.lexeme + var fn = Name(depth: self.depth, + isPrivate: node.isPrivate, + isConst: false, + owner: self.currentModule, + file: self.file, + obj: TypedNode(node: node, + value: Type(kind: Function, + returnType: nil, # We check it later + arguments: @[], + forwarded: node.body.isNil(), + isAuto: false) + ), + ident: node.name, + node: node, + isLet: false, + line: node.token.line, + kind: DeclType, + belongsTo: self.currentFunction, + isReal: true) + if node.generics.len() > 0: + n.isGeneric = true + var typ: Type + for argument in node.arguments: + typ = self.infer(argument.valueType) + if not typ.isNil() and typ.kind == Auto: + n.obj.value.isAuto = true + if n.isGeneric: + self.error("automatic types cannot be used within generics", argument.valueType) + break + typ = self.infer(node.returnType) + if not typ.isNil() and typ.kind == Auto: + n.obj.value.isAuto = true + if n.isGeneric: + self.error("automatic types cannot be used within generics", node.returnType) + self.names.add(fn) + self.prepareFunction(fn) + n = fn + of NodeKind.importStmt: + var node = ImportStmt(node) + # We change the name of the module internally so that + # if you import /path/to/mod, then doing mod.f() will + # still work without any extra work on our end. Note how + # we don't change the metadata about the identifier's + # position so that error messages still highlight the + # full path + let path = node.moduleName.token.lexeme + node.moduleName.token.lexeme = node.moduleName.token.lexeme.extractFilename() + self.names.add(Name(depth: self.depth, + owner: self.currentModule, + file: "", # The file of the module isn't known until it's compiled! + path: path, + ident: node.moduleName, + line: node.moduleName.token.line, + kind: NameKind.Module, + isPrivate: false, + isReal: true + )) + n = self.names[^1] + declaredName = self.names[^1].ident.token.lexeme + of NodeKind.typeDecl: + var node = ast.TypeDecl(node) + self.names.add(Name(kind: DeclType, + depth: self.depth, + owner: self.currentModule, + node: node, + ident: node.name, + line: node.token.line, + isPrivate: node.isPrivate, + isReal: true, + belongsTo: self.currentFunction, + obj: TypedNode(node: node, value: Type(kind: TypeDecl)) + ) + ) + n = self.names[^1] + declaredName = node.name.token.lexeme + if node.value.isNil(): + discard # TODO: Fields + else: + case node.value.kind: + of identExpr: + n.obj.value = self.inferOrError(node.value) + of binaryExpr: + # Type union + n.obj.value = Type(kind: Union, types: @[]) + self.unpackUnion(node.value, n.obj.value.types) + else: + discard + else: + discard # TODO: enums + if not n.isNil(): + self.dispatchPragmas(n) + for name in self.findByName(declaredName): + if name == n: + continue + # We don't check for name clashes with functions because self.match() does that + if name.kind == DeclType and name.depth == n.depth and name.owner == n.owner: + self.error(&"re-declaration of {declaredName} is not allowed (previously declared in {name.owner.ident.token.lexeme}:{name.ident.token.line}:{name.ident.token.relPos.start})") + # We emit a bunch of warnings, mostly for QoL + for name in self.names: + if name == n: + break + if name.ident.token.lexeme != declaredName: + continue + if name.owner != n.owner and (name.isPrivate or n.owner notin name.exports): + continue + if name.kind == DeclType: + if name.depth < n.depth: + self.warning(WarningKind.ShadowOuterScope, &"'{declaredName}' at depth {name.depth} shadows a name from an outer scope ({name.owner.file}.pn:{name.ident.token.line}:{name.ident.token.relPos.start})", n) + if name.owner != n.owner: + self.warning(WarningKind.ShadowOuterScope, &"'{declaredName}' at depth {name.depth} shadows a name from an outer module ({name.owner.file}.pn:{name.ident.token.line}:{name.ident.token.relPos.start})", n) + return n diff --git a/src/frontend/parsing/parser.nim b/src/frontend/parsing/parser.nim index 4c2750e..06a5dc7 100644 --- a/src/frontend/parsing/parser.nim +++ b/src/frontend/parsing/parser.nim @@ -16,6 +16,7 @@ import std/strformat import std/strutils +import std/tables import std/os @@ -31,9 +32,6 @@ export token, ast, errors type - - LoopContext {.pure.} = enum - Loop, None Precedence {.pure.} = enum ## Operator precedence ## clearly stolen from @@ -66,18 +64,16 @@ type # Only meaningful for parse errors file: string # The list of tokens representing - # the source code to be parsed. - # In most cases, those will come - # from the builtin lexer, but this - # behavior is not enforced and the - # tokenizer is entirely separate from - # the parser + # the source code to be parsed tokens: seq[Token] - # Little internal attribute that tells - # us if we're inside a loop or not. This - # allows us to detect errors like break - # being used outside loops - currentLoop: LoopContext + # Just like scope depth tells us how + # many nested scopes are above us, the + # loop depth tells us how many nested + # loops are above us. It's just a simple + # way of statically detecting stuff like + # the break statement being used outside + # loops. Maybe a bit overkill for a parser? + loopDepth: int # Stores the current function # being parsed. This is a reference # to either a FunDecl or LambdaExpr @@ -96,8 +92,13 @@ type lines: seq[tuple[start, stop: int]] # The source of the current module source: string - # Keeps track of imported modules - modules: seq[tuple[name: string, loaded: bool]] + # Keeps track of imported modules. + # The key is the module's fully qualified + # path, while the boolean indicates whether + # it has been fully loaded. This is useful + # to avoid importing a module twice and to + # detect recursive dependency cycles + modules: TableRef[string, bool] ParseError* = ref object of PeonException ## A parsing exception parser*: Parser @@ -140,7 +141,7 @@ proc newOperatorTable: OperatorTable = result.tokens = @[] for prec in Precedence: result.precedence[prec] = @[] - # These operators are currently not built-in + # These operators are currently hardcoded # due to compiler limitations result.addOperator("=") result.addOperator(".") @@ -161,11 +162,12 @@ proc newParser*: Parser = result.file = "" result.tokens = @[] result.currentFunction = nil - result.currentLoop = LoopContext.None + result.loopDepth = 0 result.scopeDepth = 0 result.operators = newOperatorTable() result.tree = @[] result.source = "" + result.modules = newTable[string, bool]() # Public getters for improved error formatting @@ -180,7 +182,7 @@ template endOfLine(msg: string, tok: Token = nil) = self.expect(Semicolon, msg, -proc peek(self: Parser, distance: int = 0): Token = +proc peek(self: Parser, distance: int = 0): Token {.inline.} = ## Peeks at the token at the given distance. ## If the distance is out of bounds, an EOF ## token is returned. A negative distance may @@ -201,7 +203,7 @@ proc done(self: Parser): bool {.inline.} = result = self.peek().kind == EndOfFile -proc step(self: Parser, n: int = 1): Token = +proc step(self: Parser, n: int = 1): Token {.inline.} = ## Steps n tokens into the input, ## returning the last consumed one if self.done(): @@ -227,7 +229,7 @@ proc error(self: Parser, message: string, token: Token = nil) {.raises: [ParseEr # as a symbol and in the cases where we need a specific token we just match the string # directly proc check[T: TokenType or string](self: Parser, kind: T, - distance: int = 0): bool = + distance: int = 0): bool {.inline.} = ## Checks if the given token at the given distance ## matches the expected kind and returns a boolean. ## The distance parameter is passed directly to @@ -239,7 +241,7 @@ proc check[T: TokenType or string](self: Parser, kind: T, self.peek(distance).lexeme == kind -proc check[T: TokenType or string](self: Parser, kind: openarray[T]): bool = +proc check[T: TokenType or string](self: Parser, kind: openarray[T]): bool {.inline.} = ## Calls self.check() in a loop with each entry of ## the given openarray of token kinds and returns ## at the first match. Note that this assumes @@ -251,7 +253,7 @@ proc check[T: TokenType or string](self: Parser, kind: openarray[T]): bool = return false -proc match[T: TokenType or string](self: Parser, kind: T): bool = +proc match[T: TokenType or string](self: Parser, kind: T): bool {.inline.} = ## Behaves like self.check(), except that when a token ## matches it is also consumed if self.check(kind): @@ -261,7 +263,7 @@ proc match[T: TokenType or string](self: Parser, kind: T): bool = result = false -proc match[T: TokenType or string](self: Parser, kind: openarray[T]): bool = +proc match[T: TokenType or string](self: Parser, kind: openarray[T]): bool {.inline.} = ## Calls self.match() in a loop with each entry of ## the given openarray of token kinds and returns ## at the first match. Note that this assumes @@ -273,7 +275,7 @@ proc match[T: TokenType or string](self: Parser, kind: openarray[T]): bool = result = false -proc expect[T: TokenType or string](self: Parser, kind: T, message: string = "", token: Token = nil) = +proc expect[T: TokenType or string](self: Parser, kind: T, message: string = "", token: Token = nil) {.inline.} = ## Behaves like self.match(), except that ## when a token doesn't match, an error ## is raised. If no error message is @@ -285,7 +287,7 @@ proc expect[T: TokenType or string](self: Parser, kind: T, message: string = "", self.error(message) -proc expect[T: TokenType or string](self: Parser, kind: openarray[T], message: string = "", token: Token = nil) {.used.} = +proc expect[T: TokenType or string](self: Parser, kind: openarray[T], message: string = "", token: Token = nil) {.inline, used.} = ## Behaves like self.expect(), except that ## an error is raised only if none of the ## given token kinds matches @@ -307,6 +309,7 @@ proc funDecl(self: Parser, isAsync: bool = false, isGenerator: bool = false, isLambda: bool = false, isOperator: bool = false, isTemplate: bool = false): Declaration proc declaration(self: Parser): Declaration proc parse*(self: Parser, tokens: seq[Token], file: string, lines: seq[tuple[start, stop: int]], source: string, persist: bool = false): seq[Declaration] +proc findOperators(self: Parser, tokens: seq[Token]) # End of forward declarations @@ -436,7 +439,7 @@ proc makeCall(self: Parser, callee: Expression): CallExpr = proc parseGenericArgs(self: Parser) = ## Parses function generic arguments ## like function[type](arg) - discard + discard # TODO proc call(self: Parser): Expression = @@ -596,12 +599,12 @@ proc assertStmt(self: Parser): Statement = result.file = self.file -proc beginScope(self: Parser) = +proc beginScope(self: Parser) {.inline.} = ## Begins a new lexical scope inc(self.scopeDepth) -proc endScope(self: Parser) = +proc endScope(self: Parser) {.inline.} = ## Ends a new lexical scope dec(self.scopeDepth) @@ -631,8 +634,7 @@ proc namedBlockStmt(self: Parser): Statement = self.expect(Identifier, "expecting block name after 'block'") var name = newIdentExpr(self.peek(-1), self.scopeDepth) name.file = self.file - let enclosingLoop = self.currentLoop - self.currentLoop = Loop + inc(self.loopDepth) self.expect(LeftBrace, "expecting '{' after 'block'") while not self.check(RightBrace) and not self.done(): code.add(self.declaration()) @@ -642,14 +644,14 @@ proc namedBlockStmt(self: Parser): Statement = result = newNamedBlockStmt(code, name, tok) result.file = self.file self.endScope() - self.currentLoop = enclosingLoop + dec(self.loopDepth) proc breakStmt(self: Parser): Statement = ## Parses break statements let tok = self.peek(-1) var label: IdentExpr - if self.currentLoop != Loop: + if self.loopDepth == 0: self.error("'break' cannot be used outside loops") if self.match(Identifier): label = newIdentExpr(self.peek(-1), self.scopeDepth) @@ -673,7 +675,7 @@ proc continueStmt(self: Parser): Statement = ## Parses continue statements let tok = self.peek(-1) var label: IdentExpr - if self.currentLoop != Loop: + if self.loopDepth == 0: self.error("'continue' cannot be used outside loops") if self.match(Identifier): label = newIdentExpr(self.peek(-1), self.scopeDepth) @@ -747,8 +749,7 @@ proc raiseStmt(self: Parser): Statement = proc forEachStmt(self: Parser): Statement = ## Parses C#-like foreach loops let tok = self.peek(-1) - let enclosingLoop = self.currentLoop - self.currentLoop = Loop + inc(self.loopDepth) self.expect(Identifier) let identifier = newIdentExpr(self.peek(-1), self.scopeDepth) self.expect("in") @@ -756,10 +757,7 @@ proc forEachStmt(self: Parser): Statement = self.expect(LeftBrace) result = newForEachStmt(identifier, expression, self.blockStmt(), tok) result.file = self.file - self.currentLoop = enclosingLoop - - -proc findOperators(self: Parser, tokens: seq[Token]) + dec(self.loopDepth) proc importStmt(self: Parser, fromStmt: bool = false): Statement = @@ -806,6 +804,10 @@ proc importStmt(self: Parser, fromStmt: bool = false): Statement = break elif i == searchPath.high(): self.error(&"""could not import '{path}': module not found""") + if not self.modules.getOrDefault(path, true): + self.error(&"coult not import '{path}' (recursive dependency detected)") + else: + self.modules[path] = false try: var source = readFile(path) var tree = self.tree @@ -819,6 +821,8 @@ proc importStmt(self: Parser, fromStmt: bool = false): Statement = self.tree = tree self.current = current self.tokens = tokens + # Module has been fully loaded and can now be used + self.modules[path] = true except IOError: self.error(&"could not import '{path}': {getCurrentExceptionMsg()}") except OSError: @@ -859,14 +863,13 @@ proc whileStmt(self: Parser): Statement = ## Parses a C-style while loop statement let tok = self.peek(-1) self.beginScope() - let enclosingLoop = self.currentLoop + inc(self.loopDepth) let condition = self.expression() self.expect(LeftBrace) - self.currentLoop = Loop result = newWhileStmt(condition, self.blockStmt(), tok) result.file = self.file - self.currentLoop = enclosingLoop self.endScope() + dec(self.loopDepth) proc ifStmt(self: Parser): Statement = @@ -1049,7 +1052,7 @@ proc parseFunExpr(self: Parser): LambdaExpr = proc parseGenericConstraint(self: Parser): Expression = - ## Recursivelt parses a generic constraint + ## Recursively parses a generic constraint ## and returns it as an expression result = self.expression() # First value is always an identifier of some sort if not self.check(RightBracket): @@ -1301,6 +1304,7 @@ proc typeDecl(self: Parser): TypeDecl = var generics: seq[tuple[name: IdentExpr, cond: Expression]] = @[] var pragmas: seq[Pragma] = @[] result = newTypeDecl(name, fields, defaults, isPrivate, token, pragmas, generics, nil, false, false) + result.file = self.file if self.match(LeftBracket): self.parseGenerics(result) self.expect("=", "expecting '=' after type name") @@ -1315,7 +1319,6 @@ proc typeDecl(self: Parser): TypeDecl = result.isEnum = true of "object": discard self.step() - discard # Default case else: hasNone = true if hasNone: @@ -1334,7 +1337,7 @@ proc typeDecl(self: Parser): TypeDecl = self.expect(LeftBrace, "expecting '{' after type declaration") if self.match(TokenType.Pragma): for pragma in self.parsePragmas(): - pragmas.add(pragma) + result.pragmas.add(pragma) var argName: IdentExpr argPrivate: bool @@ -1356,8 +1359,6 @@ proc typeDecl(self: Parser): TypeDecl = else: if not self.check(RightBrace): self.expect(",", "expecting comma after enum field declaration") - result.pragmas = pragmas - result.file = self.file proc declaration(self: Parser): Declaration = @@ -1420,11 +1421,12 @@ proc parse*(self: Parser, tokens: seq[Token], file: string, lines: seq[tuple[sta self.lines = lines self.current = 0 self.scopeDepth = 0 - self.currentLoop = LoopContext.None + self.loopDepth = 0 self.currentFunction = nil self.tree = @[] if not persist: self.operators = newOperatorTable() + self.modules = newTable[string, bool]() self.findOperators(tokens) while not self.done(): self.tree.add(self.declaration())