# Copyright 2022 Mattia Giambirtone & All Contributors # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import meta/token import meta/ast import meta/errors import ../util/multibyte import ../util/symbols import lexer as l import parser as p import ../config import std/tables import std/strformat import std/algorithm import std/parseutils import std/strutils import std/sequtils import std/sets import std/os export ast export token export multibyte type TypeKind = enum ## An enumeration of compile-time ## types Int8, UInt8, Int16, UInt16, Int32, UInt32, Int64, UInt64, Float32, Float64, Char, Byte, String, Function, CustomType, Nil, Nan, Bool, Inf, Typevar, Generic, Reference, Pointer Any # Any is used internally in a few cases, # for example when looking for operators # when only the type of the arguments is of # interest Type = ref object ## A wrapper around ## compile-time types mutable: bool case kind: TypeKind: of Function: isLambda: bool isGenerator: bool isCoroutine: bool isGeneric: bool args: seq[tuple[name: string, kind: Type]] returnType: Type isBuiltinFunction: bool builtinOp: string fun: FunDecl isClosure: bool envLen: int children: seq[Type] parent: Type retJumps: seq[int] of CustomType: fields: TableRef[string, Type] of Reference, Pointer: value: Type of Generic: # cond represents a type constraint. For # example, fn foo[T: int & !uint](...) {...} # would map to [(true, int), (false, uint)] cond: seq[tuple[match: bool, kind: Type]] name: string else: discard # This way we don't have recursive dependency issues import meta/bytecode export bytecode type NameKind {.pure.} = enum ## A name enumeration type None, Module, Argument, Var, Function, CustomType, Enum Name = ref object ## A compile-time wrapper around ## statically resolved names # The name's identifier ident: IdentExpr # Type of the identifier (NOT of the value!) kind: NameKind # Owner of the identifier (module) owner: string # Scope depth depth: int # Is this name private? isPrivate: bool # Is this a constant? isConst: bool # Can this name's value be mutated? isLet: bool # The name's type valueType: Type # For functions, this marks where the function's # code begins codePos: int # The function that owns this variable (may be nil!) belongsTo: Name # Where is this node declared in the file? line: int # Has this name been closed over? isClosedOver: bool # Has this name been referenced at least once? resolved: bool # The AST node associated with this node. This # is needed because we compile declarations only # if they're actually used node: Declaration # Is this name exported? (Only makes sense if isPrivate # equals false) exported: bool Loop = object ## A "loop object" used ## by the compiler to emit ## appropriate jump offsets ## for continue and break ## statements # Position in the bytecode where the loop starts start: int # Scope depth where the loop is located depth: int # Jump offsets into our bytecode that we need to # patch. Used for break statements breakJumps: seq[int] Compiler* = ref object ## A wrapper around the Peon compiler's state # The bytecode chunk where we write code to chunk: Chunk # The output of our parser (AST) ast: seq[Declaration] # The current AST node we're looking at current: int # The current file being compiled (used only for # error reporting) file: string # Compile-time "simulation" of the stack at # runtime to load variables that have stack # behavior more efficiently names: seq[Name] # The current scope depth. If > 0, we're # in a local scope, otherwise it's global depth: int # Scope ownership data scopeOwners: seq[tuple[owner: Name, depth: int]] # The current function being compiled currentFunction: Name # The current loop being compiled (used to # keep track of where to jump) currentLoop: Loop # Are we in REPL mode? If so, Pop instructions # for expression statements at the top level are # swapped for a special PopRepl instruction that # prints the result of the expression once it is # evaluated replMode: bool # The current module being compiled # (used to restrict access to statically # defined variables at compile time) currentModule: string # Each time a defer statement is # compiled, its code is emitted # here. Later, if there is any code # to defer in the current function, # funDecl will wrap the function's code # inside an implicit try/finally block # and add this code in the finally branch. # This sequence is emptied each time a # function declaration is compiled and stores only # deferred code for the current function (may # be empty) deferred: seq[uint8] # List of closed-over variables closures: seq[Name] # Compiler procedures called by pragmas compilerProcs: TableRef[string, proc (self: Compiler, pragma: Pragma, name: Name)] # Stores line data for error reporting lines: seq[tuple[start, stop: int]] # The source of the current module, # used for error reporting source: string # Currently imported modules modules: HashSet[string] # Stores the position of all jumps jumps: seq[tuple[patched: bool, offset: int]] # List of CFI start offsets into our CFI data cfiOffsets: seq[tuple[start, stop, pos: int, fn: Name]] # We store these objects to compile modules lexer: Lexer parser: Parser # Are we compiling the main module? isMainModule: bool CompileError* = ref object of PeonException compiler*: Compiler node*: ASTNode file*: string module*: string # Forward declarations proc compile*(self: Compiler, ast: seq[Declaration], file: string, lines: seq[tuple[start, stop: int]], source: string, chunk: Chunk = nil, incremental: bool = false, isMainModule: bool = true): Chunk proc expression(self: Compiler, node: Expression) proc statement(self: Compiler, node: Statement) proc declaration(self: Compiler, node: Declaration) proc peek(self: Compiler, distance: int = 0): ASTNode proc identifier(self: Compiler, node: IdentExpr) proc varDecl(self: Compiler, node: VarDecl, name: Name) proc specialize(self: Compiler, name: Name, args: seq[Expression]): Name proc matchImpl(self: Compiler, name: string, kind: Type, node: ASTNode = nil): Name proc infer(self: Compiler, node: LiteralExpr, allowGeneric: bool = false): Type proc infer(self: Compiler, node: Expression, allowGeneric: bool = false): Type proc inferOrError[T: LiteralExpr | Expression](self: Compiler, node: T, allowGeneric: bool = false): Type proc findByName(self: Compiler, name: string): seq[Name] proc findByModule(self: Compiler, name: string): seq[Name] proc findByType(self: Compiler, name: string, kind: Type, depth: int = -1): seq[Name] proc compare(self: Compiler, a, b: Type): bool proc patchReturnAddress(self: Compiler, pos: int) proc handleMagicPragma(self: Compiler, pragma: Pragma, name: Name) proc handlePurePragma(self: Compiler, pragma: Pragma, name: Name) proc dispatchPragmas(self: Compiler, name: Name) proc funDecl(self: Compiler, node: FunDecl, name: Name) proc typeDecl(self: Compiler, node: TypeDecl, name: Name) proc compileModule(self: Compiler, moduleName: string) proc generateCall(self: Compiler, fn: Name, args: seq[Expression], line: int) # End of forward declarations proc newCompiler*(replMode: bool = false): Compiler = ## Initializes a new Compiler object new(result) result.ast = @[] result.current = 0 result.file = "" result.names = @[] result.depth = 0 result.lines = @[] result.jumps = @[] result.currentFunction = nil result.replMode = replMode result.currentModule = "" result.compilerProcs = newTable[string, proc (self: Compiler, pragma: Pragma, name: Name)]() result.compilerProcs["magic"] = handleMagicPragma result.compilerProcs["pure"] = handlePurePragma result.source = "" result.scopeOwners = @[] result.lexer = newLexer() result.lexer.fillSymbolTable() result.parser = newParser() result.isMainModule = false result.closures = @[] ## Public getters for nicer error formatting proc getCurrentNode*(self: Compiler): ASTNode = (if self.current >= self.ast.len(): self.ast[^1] else: self.ast[self.current - 1]) proc getCurrentFunction*(self: Compiler): Declaration {.inline.} = (if self.currentFunction.isNil(): nil else: self.currentFunction.valueType.fun) proc getFile*(self: Compiler): string {.inline.} = self.file proc getModule*(self: Compiler): string {.inline.} = self.currentModule proc getLines*(self: Compiler): seq[tuple[start, stop: int]] = self.lines proc getSource*(self: Compiler): string = self.source proc getRelPos*(self: Compiler, line: int): tuple[start, stop: int] = self.lines[line - 1] ## Utility functions proc `$`*(self: Name): string = $self[] proc `$`(self: Type): string = $self[] proc peek(self: Compiler, distance: int = 0): ASTNode = ## Peeks at the AST node at the given distance. ## If the distance is out of bounds, the last ## AST node in the tree is returned. A negative ## distance may be used to retrieve previously ## consumed AST nodes if self.ast.high() == -1 or self.current + distance > self.ast.high() or self.current + distance < 0: result = self.ast[^1] else: result = self.ast[self.current + distance] proc done(self: Compiler): bool {.inline.} = ## Returns true if the compiler is done ## compiling, false otherwise result = self.current > self.ast.high() proc error(self: Compiler, message: string, node: ASTNode = nil) {.raises: [CompileError], inline.} = ## Raises a CompileError exception raise CompileError(msg: message, node: if node.isNil(): self.getCurrentNode() else: node, file: self.file, module: self.currentModule, compiler: self) proc step(self: Compiler): ASTNode {.inline.} = ## Steps to the next node and returns ## the consumed one result = self.peek() if not self.done(): self.current += 1 proc emitByte(self: Compiler, byt: OpCode | uint8, line: int) {.inline.} = ## Emits a single byte, writing it to ## the current chunk being compiled self.chunk.write(uint8 byt, line) proc emitBytes(self: Compiler, bytarr: openarray[OpCode | uint8], line: int) {.inline.} = ## Handy helper method to write arbitrary bytes into ## the current chunk, calling emitByte on each of its ## elements for b in bytarr: self.emitByte(b, line) proc makeConstant(self: Compiler, val: Expression, typ: Type): array[3, uint8] = ## Adds a constant to the current chunk's constant table ## and returns its index as a 3-byte array of uint8s var lit: string if typ.kind in [UInt8, Int8, Int16, UInt16, Int32, UInt32, Int64, UInt64]: lit = val.token.lexeme if "'" in lit: var idx = lit.high() while lit[idx] != '\'': lit = lit[0..^2] dec(idx) lit = lit[0..^2] case typ.kind: of UInt8, Int8: result = self.chunk.writeConstant([uint8(parseInt(lit))]) of Int16, UInt16: result = self.chunk.writeConstant(parseInt(lit).toDouble()) of Int32, UInt32: result = self.chunk.writeConstant(parseInt(lit).toQuad()) of Int64: result = self.chunk.writeConstant(parseInt(lit).toLong()) of UInt64: result = self.chunk.writeConstant(parseBiggestUInt(lit).toLong()) of String: result = self.chunk.writeConstant(val.token.lexeme[1..^1].toBytes()) of Float32: var f: float = 0.0 discard parseFloat(val.token.lexeme, f) result = self.chunk.writeConstant(cast[array[4, uint8]](float32(f))) of Float64: var f: float = 0.0 discard parseFloat(val.token.lexeme, f) result = self.chunk.writeConstant(cast[array[8, uint8]](f)) else: discard proc emitConstant(self: Compiler, obj: Expression, kind: Type) = ## Emits a constant instruction along ## with its operand case kind.kind: of Int64: self.emitByte(LoadInt64, obj.token.line) of UInt64: self.emitByte(LoadUInt64, obj.token.line) of Int32: self.emitByte(LoadInt32, obj.token.line) of UInt32: self.emitByte(LoadUInt32, obj.token.line) of Int16: self.emitByte(LoadInt16, obj.token.line) of UInt16: self.emitByte(LoadUInt16, obj.token.line) of Int8: self.emitByte(LoadInt8, obj.token.line) of UInt8: self.emitByte(LoadUInt8, obj.token.line) of String: self.emitByte(LoadString, obj.token.line) let str = LiteralExpr(obj).literal.lexeme if str.len() >= 16777216: self.error("string constants cannot be larger than 16777215 bytes") self.emitBytes((str.len() - 2).toTriple(), obj.token.line) of Float32: self.emitByte(LoadFloat32, obj.token.line) of Float64: self.emitByte(LoadFloat64, obj.token.line) else: discard # TODO self.emitBytes(self.makeConstant(obj, kind), obj.token.line) proc setJump(self: Compiler, offset: int, jmp: array[3, uint8]) = ## Sets a jump at the given ## offset to the given value self.chunk.code[offset + 1] = jmp[0] self.chunk.code[offset + 2] = jmp[1] self.chunk.code[offset + 3] = jmp[2] proc setJump(self: Compiler, offset: int, jmp: seq[uint8]) = ## Sets a jump at the given ## offset to the given value self.chunk.code[offset + 1] = jmp[0] self.chunk.code[offset + 2] = jmp[1] self.chunk.code[offset + 3] = jmp[2] proc patchJump(self: Compiler, offset: int) = ## Patches a previously emitted relative ## jump using emitJump var jump: int = self.chunk.code.len() - self.jumps[offset].offset if jump < 0: self.error("invalid jump size (< 0), did the bytecode size change without fixJumps being called?") if jump > 16777215: # TODO: Emit consecutive jumps? self.error("cannot jump more than 16777215 instructions") self.setJump(self.jumps[offset].offset, (jump - 4).toTriple()) self.jumps[offset].patched = true proc emitJump(self: Compiler, opcode: OpCode, line: int): int = ## Emits a dummy jump offset to be patched later ## and returns a unique identifier for that jump ## to be passed to patchJump self.emitByte(opcode, line) self.jumps.add((patched: false, offset: self.chunk.code.high())) self.emitBytes(0.toTriple(), line) result = self.jumps.high() proc fixCFIOffsets(self: Compiler, oldLen: int, modifiedAt: int) = ## Fixes CFI offsets after the size of our ## bytecode has changed if oldLen == self.chunk.code.len(): return let offset = self.chunk.code.len() - oldLen var newCFI: array[3, uint8] var tmp: int var i = 0 for cfi in self.cfiOffsets.mitems(): if cfi.start >= modifiedAt: newCFI = (cfi.start + offset).toTriple() self.chunk.cfi[cfi.pos] = newCFI[0] self.chunk.cfi[cfi.pos + 1] = newCFI[1] self.chunk.cfi[cfi.pos + 2] = newCFI[2] tmp = [self.chunk.cfi[cfi.pos + 3], self.chunk.cfi[cfi.pos + 4], self.chunk.cfi[cfi.pos + 5]].fromTriple().int newCFI = (tmp + offset).toTriple() self.chunk.cfi[cfi.pos + 3] = newCFI[0] self.chunk.cfi[cfi.pos + 4] = newCFI[1] self.chunk.cfi[cfi.pos + 5] = newCFI[2] cfi.fn.codePos += offset cfi.start += offset cfi.stop += offset inc(i) proc fixJumps(self: Compiler, oldLen: int, modifiedAt: int) = ## Fixes jump offsets after the size ## of our bytecode has changed if oldLen == self.chunk.code.len(): return let offset = self.chunk.code.len() - oldLen for jump in self.jumps.mitems(): if jump.offset >= modifiedAt: # While all already-patched jumps need # to have their jump offsets fixed, we # also need to update our internal jumps # list in cases where we shifted the jump # instruction itself into the code! jump.offset += offset self.setJump(jump.offset, self.chunk.code[jump.offset + 1..jump.offset + 3]) proc resolve(self: Compiler, name: string): Name = ## Traverses all existing namespaces and returns ## the first object with the given name. Returns ## nil when the name can't be found. Note that ## when a type or function declaration is first ## resolved, it is also compiled on-the-fly for obj in reversed(self.names): if obj.ident.token.lexeme == name: if obj.kind == NameKind.Argument and obj.belongsTo != self.currentFunction: continue if obj.owner != self.currentModule: # We don't own this name, but we # may still have access to it if obj.isPrivate: # Name is private in its owner # module, so we definitely can't # use it continue elif obj.exported: # The name is public in its owner # module and said module has explicitly # exported it to us: we can use it result = obj break # If the name is public but not exported in # its owner module, then we act as if it's # private. This is to avoid namespace pollution # from imports (i.e. if module A imports modules # C and D and module B imports module A, then B # might not want to also have access to C's and D's # names as they might clash with its own stuff) continue result = obj break if not result.isNil() and not result.resolved: # There's no reason to compile a declaration # unless it is used at least once: this way # not only do we save space if a name is declared # but never used, it also makes it easier to # implement generics and lets us emit warnings for # unused names once they go out of scope. Yay! result.resolved = true # Now we just dispatch to one of our functions to # compile the declaration case result.kind: of NameKind.CustomType: self.typeDecl(TypeDecl(result.node), result) of NameKind.Function: # Generic functions need to be compiled at # the call site because we need to know the # type of the arguments, but regular functions # can be precompiled as soon as we resolve them if not result.valueType.isGeneric: self.funDecl(FunDecl(result.node), result) else: discard proc resolve(self: Compiler, name: IdentExpr): Name = ## Version of resolve that takes Identifier ## AST nodes instead of strings return self.resolve(name.token.lexeme) proc resolveOrError[T: IdentExpr | string](self: Compiler, name: T): Name = ## Calls self.resolve() and errors out with an appropriate ## message if it returns nil result = self.resolve(name) if result.isNil(): when T is IdentExpr: self.error(&"reference to undefined name '{name.token.lexeme}'", name) when T is string: self.error(&"reference to undefined name '{name}'") proc getStackPos(self: Compiler, name: Name): int = ## Returns the predicted call stack position of a ## given name, relative to the current frame var found = false result = 2 for variable in self.names: if variable.kind in [NameKind.Module, NameKind.CustomType, NameKind.Enum, NameKind.Function, NameKind.None]: # These names don't have a runtime representation on the call stack, so we skip them continue elif variable.kind == NameKind.Argument and variable.depth > self.depth: # Argument of a function we haven't compiled yet (or one that we're # not in). Ignore it, as it won't exist at runtime continue elif not variable.belongsTo.isNil(): if variable.belongsTo.valueType.isBuiltinFunction: # Builtin functions don't exist at runtime either, so variables belonging to them # are not present in the stack continue elif variable.valueType.kind == Generic: # Generics are also a purely compile-time construct and are therefore # ignored as far as stack positioning goes continue elif variable.belongsTo != name.belongsTo: # Since referencing a function immediately compiles it, this means # that if there's a function A with an argument x that calls another # function B with an argument also named x, that second "x" would # shadow the first one, leading to an incorrect stack offset continue elif variable.owner != self.currentModule: # We don't own this variable, so we check # if the owner exported it to us. If not, # we skip it and pretend it doesn't exist if variable.isPrivate or not variable.exported: continue if name == variable: # After all of these checks, we can # finally check whether the two names # match (note: this also includes scope # depth) found = true break inc(result) if not found: result = -1 proc getClosurePos(self: Compiler, name: Name): int = ## Returns the position of a name in a closure's ## environment if not self.currentFunction.valueType.isClosure: return -1 for i, e in self.closures: if e == name: return i return -1 proc compare(self: Compiler, a, b: Type): bool = ## Compares two type objects ## for equality (works with nil!) # The nil code here is for void functions (when # we compare their return types) if a.isNil(): return b.isNil() or b.kind == Any elif b.isNil(): return a.isNil() or a.kind == Any elif a.kind == Any or b.kind == Any: # This is needed internally: user code # cannot generate code for matching # arbitrary types, but we need it for # function calls and stuff like that # since peon doesn't have return type # inference return true elif a.kind != b.kind and not (a.kind == Generic or b.kind == Generic): # Next, we see the type discriminant: # If they're different, then they can't # be the same type! For generics, we match # those later, as we need access to the type # discriminant inside a case statement return false if a.kind != Generic and b.kind != Generic: case a.kind: # If all previous checks pass, it's time # to go through each possible type peon # supports and compare it of Int8, UInt8, Int16, UInt16, Int32, UInt32, Int64, UInt64, Float32, Float64, Char, Byte, String, Nil, Nan, Bool, Inf: # A value type's type is always equal to # another one's return true of Reference, Pointer: # Here we already know that both # a and b are of either of the two # types in this branch, so we just need # to compare their values return self.compare(a.value, b.value) of Function: # Functions are a bit trickier if a.args.len() != b.args.len(): return false if a.isCoroutine != b.isCoroutine: return false if not self.compare(a.returnType, b.returnType): return false for (argA, argB) in zip(a.args, b.args): if not self.compare(argA.kind, argB.kind): return false return true else: discard # TODO: Custom types else: case a.kind: of Generic: # Generic types case b.kind: of Generic: for c1 in a.cond: for c2 in b.cond: if self.compare(c1.kind, c2.kind): return c1.match == c2.match else: for constraint in a.cond: if self.compare(constraint.kind, b): return constraint.match else: discard case b.kind: of Generic: # Generic types case a.kind: of Generic: for c1 in a.cond: for c2 in b.cond: if self.compare(c1.kind, c2.kind): return c1.match == c2.match else: for constraint in b.cond: if self.compare(constraint.kind, a): return constraint.match else: discard return false proc toIntrinsic(name: string): Type = ## Converts a string to an intrinsic ## type if it is valid and returns nil ## otherwise if name == "all": return Type(kind: Any) elif name in ["int", "int64", "i64"]: return Type(kind: Int64) elif name in ["uint64", "u64", "uint"]: return Type(kind: UInt64) elif name in ["int32", "i32"]: return Type(kind: Int32) elif name in ["uint32", "u32"]: return Type(kind: UInt32) elif name in ["int16", "i16", "short"]: return Type(kind: Int16) elif name in ["uint16", "u16"]: return Type(kind: UInt16) elif name in ["int8", "i8"]: return Type(kind: Int8) elif name in ["uint8", "u8"]: return Type(kind: UInt8) elif name in ["f64", "float", "float64"]: return Type(kind: Float64) elif name in ["f32", "float32"]: return Type(kind: Float32) elif name in ["byte", "b"]: return Type(kind: Byte) elif name in ["char", "c"]: return Type(kind: Char) elif name == "nan": return Type(kind: Nan) elif name == "nil": return Type(kind: Nil) elif name == "inf": return Type(kind: Inf) elif name == "bool": return Type(kind: Bool) elif name == "typevar": return Type(kind: Typevar) elif name == "string": return Type(kind: String) else: return nil proc infer(self: Compiler, node: LiteralExpr, allowGeneric: bool = false): Type = ## Infers the type of a given literal expression ## (if the expression is nil, nil is returned) if node.isNil(): return nil case node.kind: of intExpr, binExpr, octExpr, hexExpr: let size = node.token.lexeme.split("'") if len(size) notin 1..2: self.error("invalid state: inferValueType -> invalid size specifier (This is an internal error and most likely a bug!)") if size.len() == 1: return Type(kind: Int64) let typ = size[1].toIntrinsic() if not self.compare(typ, nil): return typ else: self.error(&"invalid type specifier '{size[1]}' for int") of floatExpr: let size = node.token.lexeme.split("'") if len(size) notin 1..2: self.error("invalid state: inferValueType -> invalid size specifier (This is an internal error and most likely a bug!)") if size.len() == 1 or size[1] == "f64": return Type(kind: Float64) let typ = size[1].toIntrinsic() if not typ.isNil(): return typ else: self.error(&"invalid type specifier '{size[1]}' for float") of nilExpr: return Type(kind: Nil) of trueExpr: return Type(kind: Bool) of falseExpr: return Type(kind: Bool) of nanExpr: return Type(kind: TypeKind.Nan) of infExpr: return Type(kind: TypeKind.Inf) of strExpr: return Type(kind: String) else: discard # TODO proc infer(self: Compiler, node: Expression, allowGeneric: bool = false): Type = ## Infers the type of a given expression and ## returns it (if the node is nil, nil is ## returned). Always returns a concrete type ## unless allowGeneric is set to true if node.isNil(): return nil case node.kind: of identExpr: let node = IdentExpr(node) var name = self.resolve(node) if not name.isNil(): result = name.valueType if not result.isNil() and result.kind == Generic and not allowGeneric: if name.belongsTo.isNil(): name = self.resolve(result.name) if not name.isNil(): result = name.valueType else: for arg in name.belongsTo.valueType.args: if node.token.lexeme == arg.name: result = arg.kind else: result = node.name.lexeme.toIntrinsic() of unaryExpr: let node = UnaryExpr(node) let impl = self.matchImpl(node.operator.lexeme, Type(kind: Function, returnType: Type(kind: Any), args: @[("", self.infer(node.a))]), node) result = impl.valueType.returnType if result.kind == Generic and not allowGeneric: result = self.specialize(impl, @[node.a]).valueType.returnType of binaryExpr: let node = BinaryExpr(node) let impl = self.matchImpl(node.operator.lexeme, Type(kind: Function, returnType: Type(kind: Any), args: @[("", self.infer(node.a)), ("", self.infer(node.b))]), node) result = impl.valueType.returnType if result.kind == Generic and not allowGeneric: result = self.specialize(impl, @[node.a, node.b]).valueType.returnType of {intExpr, hexExpr, binExpr, octExpr, strExpr, falseExpr, trueExpr, infExpr, nanExpr, floatExpr, nilExpr }: result = self.infer(LiteralExpr(node)) of lambdaExpr: var node = LambdaExpr(node) result = Type(kind: Function, returnType: nil, args: @[], isLambda: true) if not node.returnType.isNil(): result.returnType = self.infer(node.returnType) for argument in node.arguments: result.args.add((argument.name.token.lexeme, self.infer(argument.valueType))) of callExpr: var node = CallExpr(node) case node.callee.kind: of identExpr: let resolved = self.resolve(IdentExpr(node.callee)) if not resolved.isNil(): case resolved.valueType.kind: of Function: result = resolved.valueType.returnType else: result = resolved.valueType else: result = nil of lambdaExpr: result = self.infer(LambdaExpr(node.callee).returnType) of callExpr: result = self.infer(CallExpr(node.callee)) if not result.isNil(): result = result.returnType else: discard # Unreachable of varExpr: result = self.infer(Var(node).value) result.mutable = true of refExpr: result = Type(kind: Reference, value: self.infer(Ref(node).value)) of ptrExpr: result = Type(kind: Pointer, value: self.infer(Ptr(node).value)) of groupingExpr: result = self.infer(GroupingExpr(node).expression) else: discard # Unreachable proc inferOrError[T: LiteralExpr | Expression](self: Compiler, node: T, allowGeneric: bool = false): Type = ## Attempts to infer the type of ## the given expression and raises an ## error with an appropriate message if ## it fails result = self.infer(node, allowGeneric) if result.isNil(): case node.kind: of identExpr: self.error(&"reference to undefined name '{IdentExpr(node).token.lexeme}'", node) of callExpr: let node = CallExpr(node) if node.callee.kind == identExpr: self.error(&"call to undefined function '{IdentExpr(node.callee).token.lexeme}'", node) else: self.error("expression has no type", node) else: self.error("expression has no type", node) proc typeToStr(self: Compiler, typ: Type): string = ## Returns the string representation of a ## type object if typ.isNil(): return "nil" case typ.kind: of Int8, UInt8, Int16, UInt16, Int32, UInt32, Int64, UInt64, Float32, Float64, Char, Byte, String, Nil, TypeKind.Nan, Bool, TypeKind.Inf: result &= ($typ.kind).toLowerAscii() of Pointer: result &= &"ptr {self.typeToStr(typ.value)}" of Reference: result &= &"ref {self.typeToStr(typ.value)}" of Function: result &= "fn (" for i, (argName, argType) in typ.args: result &= &"{argName}: " if argType.mutable: result &= "var " result &= self.typeToStr(argType) if i < typ.args.len() - 1: result &= ", " result &= ")" if not typ.returnType.isNil(): result &= &": {self.typeToStr(typ.returnType)}" of Generic: for i, condition in typ.cond: if i > 0: result &= " | " if not condition.match: result &= "~" result &= self.typeToStr(condition.kind) else: discard proc findByName(self: Compiler, name: string): seq[Name] = ## Looks for objects that have been already declared ## with the given name. Returns all objects that apply. ## As with resolve(), this will cause type and function ## declarations to be compiled on-the-fly for obj in reversed(self.names): if obj.ident.token.lexeme == name: if obj.owner != self.currentModule: if obj.isPrivate or not obj.exported: continue result.add(obj) for n in result: if n.resolved: continue n.resolved = true case n.kind: of NameKind.CustomType: self.typeDecl(TypeDecl(n.node), n) of NameKind.Function: if not n.valueType.isGeneric: self.funDecl(FunDecl(n.node), n) else: discard proc findByModule(self: Compiler, name: string): seq[Name] = ## Looks for objects that have been already declared AS ## public within the given module. Returns all objects that apply for obj in reversed(self.names): if not obj.isPrivate and obj.owner == name: result.add(obj) proc findByType(self: Compiler, name: string, kind: Type, depth: int = -1): seq[Name] = ## Looks for objects that have already been declared ## with the given name and type. If depth is not -1, ## it also compares the name's scope depth. Returns ## all objects that apply for obj in self.findByName(name): if self.compare(obj.valueType, kind) and (depth == -1 or depth == obj.depth): result.add(obj) proc findAtDepth(self: Compiler, name: string, depth: int): seq[Name] {.used.} = ## Looks for objects that have been already declared ## with the given name at the given scope depth. ## Returns all objects that apply for obj in self.findByName(name): if obj.depth == depth: result.add(obj) proc matchImpl(self: Compiler, name: string, kind: Type, node: ASTNode = nil): Name = ## Tries to find a matching function implementation ## compatible with the given type and returns its ## name object let impl = self.findByType(name, kind) if impl.len() == 0: var msg = &"cannot find a suitable implementation for '{name}'" let names = self.findByName(name) if names.len() > 0: msg &= &", found {len(names)} potential candidate" if names.len() > 1: msg &= "s" msg &= ": " for name in names: msg &= &"\n - in module '{name.owner}' at line {name.ident.token.line} of type '{self.typeToStr(name.valueType)}'" if name.valueType.kind != Function: msg &= ", not a callable" elif kind.args.len() != name.valueType.args.len(): msg &= &", wrong number of arguments ({name.valueType.args.len()} expected, got {kind.args.len()})" else: for i, arg in kind.args: if name.valueType.args[i].kind.mutable and not arg.kind.mutable: msg &= &", first mismatch at position {i + 1}: {name.valueType.args[i].name} is immutable, not 'var'" break elif not self.compare(arg.kind, name.valueType.args[i].kind): msg &= &", first mismatch at position {i + 1}: expected argument of type '{self.typeToStr(name.valueType.args[i].kind)}', got '{self.typeToStr(arg.kind)}' instead" break self.error(msg, node) elif impl.len() > 1: var msg = &"multiple matching implementations of '{name}' found:\n" for fn in reversed(impl): msg &= &"- in module '{fn.owner}' at line {fn.line} of type {self.typeToStr(fn.valueType)}\n" self.error(msg, node) result = impl[0] proc check(self: Compiler, term: Expression, kind: Type, allowAny: bool = false) = ## Checks the type of term against a known type. ## Raises an error if appropriate and returns ## otherwise let k = self.inferOrError(term) if k.kind == Any and not allowAny: # Any should only be used internally: error! self.error("'all' is not a valid type in this context", term) elif not self.compare(k, kind): self.error(&"expecting value of type '{self.typeToStr(kind)}', got '{self.typeToStr(k)}' instead", term) proc handleBuiltinFunction(self: Compiler, fn: Type, args: seq[Expression], line: int) = ## Emits instructions for builtin functions ## such as addition or subtraction if fn.builtinOp notin ["LogicalOr", "LogicalAnd"]: if len(args) == 2: self.expression(args[1]) self.expression(args[0]) elif len(args) == 1: self.expression(args[0]) const codes: Table[string, OpCode] = {"Negate": Negate, "NegateFloat32": NegateFloat32, "NegateFloat64": NegateFloat64, "Add": Add, "Subtract": Subtract, "Divide": Divide, "Multiply": Multiply, "SignedDivide": SignedDivide, "AddFloat64": AddFloat64, "SubtractFloat64": SubtractFloat64, "DivideFloat64": DivideFloat64, "MultiplyFloat64": MultiplyFloat64, "AddFloat32": AddFloat32, "SubtractFloat32": SubtractFloat32, "DivideFloat32": DivideFloat32, "MultiplyFloat32": MultiplyFloat32, "Pow": Pow, "SignedPow": SignedPow, "PowFloat32": PowFloat32, "PowFloat64": PowFloat64, "Mod": Mod, "SignedMod": SignedMod, "ModFloat32": ModFloat32, "ModFloat64": ModFloat64, "Or": Or, "And": And, "Xor": Xor, "Not": Not, "LShift": LShift, "RShift": RShift, "Equal": Equal, "NotEqual": NotEqual, "LessThan": LessThan, "GreaterThan": GreaterThan, "LessOrEqual": LessOrEqual, "GreaterOrEqual": GreaterOrEqual, "PrintInt64": PrintInt64, "PrintUInt64": PrintUInt64, "PrintInt32": PrintInt32, "PrintUInt32": PrintUInt32, "PrintInt16": PrintInt16, "PrintUInt16": PrintUInt16, "PrintInt8": PrintInt8, "PrintUInt8": PrintUInt8, "PrintFloat64": PrintFloat64, "PrintFloat32": PrintFloat32, "PrintHex": PrintHex, "PrintBool": PrintBool, "PrintNan": PrintNan, "PrintInf": PrintInf, "PrintString": PrintString, "SysClock64": SysClock64, "LogicalNot": LogicalNot, "NegInf": LoadNInf }.to_table() if fn.builtinOp in codes: self.emitByte(codes[fn.builtinOp], line) return # Some builtin operations are slightly more complex # so we handle them separately case fn.builtinOp: of "LogicalOr": self.expression(args[0]) let jump = self.emitJump(JumpIfTrue, line) self.expression(args[1]) self.patchJump(jump) of "LogicalAnd": self.expression(args[0]) var jump = self.emitJump(JumpIfFalseOrPop, line) self.expression(args[1]) self.patchJump(jump) else: self.error(&"unknown built-in: '{fn.builtinOp}'", fn.fun) proc beginScope(self: Compiler) = ## Begins a new local scope by incrementing the current ## scope's depth inc(self.depth) self.scopeOwners.add((self.currentFunction, self.depth)) # Flattens our weird function tree into a linear # list proc flattenImpl(self: Type, to: var seq[Type]) = to.add(self) for child in self.children: flattenImpl(child, to) proc flatten(self: Type): seq[Type] = flattenImpl(self, result) proc endScope(self: Compiler) = ## Ends the current local scope if self.depth < 0: self.error("cannot call endScope with depth < 0 (This is an internal error and most likely a bug)") discard self.scopeOwners.pop() dec(self.depth) var names: seq[Name] = @[] var popCount = 0 if self.depth == -1 and not self.isMainModule: # When we're compiling another module, we don't # close its global scope because self.compileModule() # needs access to it return for name in self.names: if name.depth > self.depth: names.add(name) #[if not name.resolved: # TODO: Emit a warning? continue]# if name.owner != self.currentModule and self.depth > -1: # Names coming from other modules only go out of scope # when the global scope is closed (i.e. at the end of # the module) continue if name.kind == NameKind.Var: inc(popCount) elif name.kind == NameKind.Argument: if not name.belongsTo.valueType.isBuiltinFunction and name.belongsTo.resolved and not name.belongsTo.valueType.isGeneric: # We don't pop arguments to builtin functions because those don't # actually have scopes: their arguments are temporaries on the stack inc(popCount) elif name.kind == NameKind.Function and name.valueType.children.len() > 0 and name.depth == 0: # When a closure goes out of scope, its environment is reclaimed. # This includes the environments of every other closure that may # have been contained within it, too var i = 0 var envLen = 0 var lastEnvLen = 0 # Why this? Well, it's simple: if a function returns # a closure, that function becomes a closure too. The # environments of closures are aligned one after the # other, so if a and b are both closures, but only b # closes over a value, both a and b will have an envLen # of 1, which would cause us to emit one extra PopClosure # instruction than what's actually needed. We can account # for this easily by checking if the contained function's # environment is larger than the contained one, which will # guarantee there actually is some value that the contained # function is closing over for fn in flatten(name.valueType): if fn.isClosure and fn.envLen > lastEnvLen: envLen += fn.envLen lastEnvLen = fn.envLen for y in 0.. 1: # If we're popping more than one variable, # we emit a bunch of PopN instructions until # the pop count is greater than zero while popCount > 0: self.emitByte(PopN, self.peek().token.line) self.emitBytes(popCount.toDouble(), self.peek().token.line) popCount -= popCount.toDouble().fromDouble().int elif popCount == 1: # We only emit PopN if we're popping more than one value self.emitByte(PopC, self.peek().token.line) # This seems *really* slow, but # what else should I do? Nim doesn't # allow the removal of items during # seq iteration so ¯\_(ツ)_/¯ var idx = 0 while idx < self.names.len(): for name in names: if self.names[idx] == name: self.names.delete(idx) inc(idx) proc unpackGenerics(self: Compiler, condition: Expression, list: var seq[tuple[match: bool, kind: Type]], accept: bool = true) = ## Recursively unpacks a type constraint in a generic type case condition.kind: of identExpr: list.add((accept, self.inferOrError(condition))) of binaryExpr: let condition = BinaryExpr(condition) case condition.operator.lexeme: of "|": self.unpackGenerics(condition.a, list) self.unpackGenerics(condition.b, list) else: self.error("invalid type constraint in generic declaration", condition) of unaryExpr: let condition = UnaryExpr(condition) case condition.operator.lexeme: of "~": self.unpackGenerics(condition.a, list, accept=false) else: self.error("invalid type constraint in generic declaration", condition) else: self.error("invalid type constraint in generic declaration", condition) proc declareName(self: Compiler, node: ASTNode, mutable: bool = false) = ## Statically declares a name into the current scope. ## "Declaring" a name only means updating our internal ## list of identifiers so that further calls to resolve() ## correctly return them. There is no code to actually ## declare a variable at runtime: the value is already ## on the stack var declaredName: string = "" var n: Name case node.kind: of NodeKind.varDecl: var node = VarDecl(node) if self.names.high() > 16777215: # If someone ever hits this limit in real-world scenarios, I swear I'll # slap myself 100 times with a sign saying "I'm dumb". Mark my words self.error("cannot declare more than 16777215 variables at a time") declaredName = node.name.token.lexeme # Creates a new Name entry so that self.identifier emits the proper stack offset self.names.add(Name(depth: self.depth, ident: node.name, isPrivate: node.isPrivate, owner: self.currentModule, isConst: node.isConst, valueType: nil, # Done later isLet: node.isLet, line: node.token.line, belongsTo: self.currentFunction, kind: NameKind.Var, node: node )) n = self.names[^1] if mutable: self.names[^1].valueType.mutable = true of NodeKind.funDecl: var node = FunDecl(node) declaredName = node.name.token.lexeme var fn = Name(depth: self.depth, isPrivate: node.isPrivate, isConst: false, owner: self.currentModule, valueType: Type(kind: Function, returnType: nil, # We check it later args: @[], fun: node, children: @[]), ident: node.name, node: node, isLet: false, line: node.token.line, kind: NameKind.Function, belongsTo: self.currentFunction) n = fn # First we declare the function's generics, if it has any. # This is because the function's return type may in itself # be a generic, so it needs to exist first var constraints: seq[tuple[match: bool, kind: Type]] = @[] for gen in node.generics: self.unpackGenerics(gen.cond, constraints) self.names.add(Name(depth: fn.depth + 1, isPrivate: true, valueType: Type(kind: Generic, name: gen.name.token.lexeme, mutable: false, cond: constraints), codePos: 0, isLet: false, line: fn.node.token.line, belongsTo: fn, ident: gen.name, owner: self.currentModule)) constraints = @[] if not node.returnType.isNil(): fn.valueType.returnType = self.inferOrError(node.returnType, allowGeneric=true) self.names.add(fn) # We now declare and typecheck the function's # arguments for argument in FunDecl(fn.node).arguments: if self.names.high() > 16777215: self.error("cannot declare more than 16777215 variables at a time") self.names.add(Name(depth: fn.depth + 1, isPrivate: true, owner: self.currentModule, isConst: false, ident: argument.name, valueType: self.inferOrError(argument.valueType, allowGeneric=true), codePos: 0, isLet: false, line: argument.name.token.line, belongsTo: fn, kind: NameKind.Argument )) fn.valueType.args.add((self.names[^1].ident.token.lexeme, self.names[^1].valueType)) if node.generics.len() > 0: fn.valueType.isGeneric = true of NodeKind.importStmt: var node = ImportStmt(node) var name = node.moduleName.token.lexeme.extractFilename().replace(".pn", "") declaredName = name self.names.add(Name(depth: self.depth, owner: self.currentModule, ident: newIdentExpr(Token(kind: Identifier, lexeme: name, line: node.moduleName.token.line)), line: node.moduleName.token.line, kind: NameKind.Module, isPrivate: false )) n = self.names[^1] else: discard # TODO: Types, enums self.dispatchPragmas(n) for name in self.findByName(declaredName): if name == n: continue elif (name.kind == NameKind.Var and name.depth == self.depth) or name.kind in [NameKind.Module, NameKind.CustomType, NameKind.Enum]: self.error(&"attempt to redeclare '{name.ident.token.lexeme}', which was previously defined in '{name.owner}' at line {name.line}") proc emitLoop(self: Compiler, begin: int, line: int) = ## Emits a JumpBackwards instruction with the correct ## jump offset let offset = self.chunk.code.high() - begin + 4 if offset > 16777215: # TODO: Emit consecutive jumps? self.error("cannot jump more than 16777215 bytecode instructions") self.emitByte(JumpBackwards, line) self.emitBytes(offset.toTriple(), line) proc patchBreaks(self: Compiler) = ## Patches the jumps emitted by ## breakStmt. This is needed ## because the size of code ## to skip is not known before ## the loop is fully compiled for brk in self.currentLoop.breakJumps: self.patchJump(brk) proc handleMagicPragma(self: Compiler, pragma: Pragma, name: Name) = ## Handles the "magic" pragma. Assumes the given name is already ## declared if pragma.args.len() != 1: self.error("'magic' pragma: wrong number of arguments") elif pragma.args[0].kind != strExpr: self.error("'magic' pragma: wrong type of argument (constant string expected)") elif name.node.kind != NodeKind.funDecl: self.error("'magic' pragma is not valid in this context") var node = FunDecl(name.node) name.valueType.isBuiltinFunction = true name.valueType.builtinOp = pragma.args[0].token.lexeme[1..^2] # The magic pragma ignores the function's body node.body = nil proc handlePurePragma(self: Compiler, pragma: Pragma, name: Name) = ## Handles the "pure" pragma case name.node.kind: of NodeKind.funDecl: FunDecl(name.node).isPure = true of lambdaExpr: LambdaExpr(name.node).isPure = true else: self.error("'pure' pragma is not valid in this context") proc dispatchPragmas(self: Compiler, name: Name) = ## Dispatches pragmas bound to objects if name.node.isNil(): return var pragmas: seq[Pragma] = @[] case name.node.kind: of NodeKind.funDecl, NodeKind.typeDecl, NodeKind.varDecl: pragmas = Declaration(name.node).pragmas of lambdaExpr: pragmas = LambdaExpr(name.node).pragmas else: discard # Unreachable for pragma in pragmas: if pragma.name.token.lexeme notin self.compilerProcs: self.error(&"unknown pragma '{pragma.name.token.lexeme}'") self.compilerProcs[pragma.name.token.lexeme](self, pragma, name) proc patchReturnAddress(self: Compiler, pos: int) = ## Patches the return address of a function ## call let address = self.chunk.code.len().toLong() self.chunk.consts[pos] = address[0] self.chunk.consts[pos + 1] = address[1] self.chunk.consts[pos + 2] = address[2] self.chunk.consts[pos + 3] = address[3] self.chunk.consts[pos + 4] = address[4] self.chunk.consts[pos + 5] = address[5] self.chunk.consts[pos + 6] = address[6] self.chunk.consts[pos + 7] = address[7] proc terminateProgram(self: Compiler, pos: int) = ## Utility to terminate a peon program self.endScope() self.emitByte(OpCode.Return, self.peek().token.line) self.emitByte(0, self.peek().token.line) # Entry point has no return value (TODO: Add easter eggs, cuz why not) self.patchReturnAddress(pos) proc beginProgram(self: Compiler): int = ## Utility to begin a peon program's ## bytecode. Returns the position of ## a dummy return address of the program's ## entry point to be patched by terminateProgram # Every peon program has a hidden entry point in # which user code is wrapped. Think of it as if # peon is implicitly writing the main() function # of your program and putting all of your code in # there. While we call our entry point just like # any regular peon function, we can't use our handy # helper generateCall() because we need to keep track # of where our program ends (which we don't know yet). # To fix this, we emit dummy offsets and patch them # later, once we know the boundaries of our hidden main() var main = Name(depth: 0, isPrivate: true, isConst: false, isLet: false, owner: self.currentModule, valueType: Type(kind: Function, returnType: nil, args: @[], ), codePos: self.chunk.code.len() + 12, ident: newIdentExpr(Token(lexeme: "", kind: Identifier)), kind: NameKind.Function, line: -1) self.names.add(main) self.scopeOwners.add((main, 0)) self.emitByte(LoadUInt64, 1) self.emitBytes(self.chunk.writeConstant(main.codePos.toLong()), 1) self.emitByte(LoadUInt64, 1) self.emitBytes(self.chunk.writeConstant(0.toLong()), 1) result = self.chunk.consts.len() - 8 self.emitByte(Call, 1) self.emitBytes(0.toTriple(), 1) ## End of utility functions proc literal(self: Compiler, node: ASTNode) = ## Emits instructions for literals such ## as singletons, strings and numbers case node.kind: of trueExpr: self.emitByte(LoadTrue, node.token.line) of falseExpr: self.emitByte(LoadFalse, node.token.line) of nilExpr: self.emitByte(LoadNil, node.token.line) of infExpr: self.emitByte(LoadInf, node.token.line) of nanExpr: self.emitByte(LoadNan, node.token.line) of strExpr: self.emitConstant(LiteralExpr(node), Type(kind: String)) of intExpr: let y = IntExpr(node) let kind = self.infer(y) if kind.kind in [Int64, Int32, Int16, Int8]: var x: int try: discard parseInt(y.literal.lexeme, x) except ValueError: self.error("integer value out of range") else: var x: uint64 try: discard parseBiggestUInt(y.literal.lexeme, x) except ValueError: self.error("integer value out of range") self.emitConstant(y, kind) of hexExpr: var x: int var y = HexExpr(node) try: discard parseHex(y.literal.lexeme, x) except ValueError: self.error("integer value out of range") let node = newIntExpr(Token(lexeme: $x, line: y.token.line, pos: (start: y.token.pos.start, stop: y.token.pos.start + len($x)) ) ) self.emitConstant(node, self.infer(y)) of binExpr: var x: int var y = BinExpr(node) try: discard parseBin(y.literal.lexeme, x) except ValueError: self.error("integer value out of range") let node = newIntExpr(Token(lexeme: $x, line: y.token.line, pos: (start: y.token.pos.start, stop: y.token.pos.start + len($x)) ) ) self.emitConstant(node, self.infer(y)) of octExpr: var x: int var y = OctExpr(node) try: discard parseOct(y.literal.lexeme, x) except ValueError: self.error("integer value out of range") let node = newIntExpr(Token(lexeme: $x, line: y.token.line, pos: (start: y.token.pos.start, stop: y.token.pos.start + len($x)) ) ) self.emitConstant(node, self.infer(y)) of floatExpr: var x: float var y = FloatExpr(node) try: discard parseFloat(y.literal.lexeme, x) except ValueError: self.error("floating point value out of range") self.emitConstant(y, self.infer(y)) of awaitExpr: var y = AwaitExpr(node) self.expression(y.expression) self.emitByte(OpCode.Await, node.token.line) else: self.error(&"invalid AST node of kind {node.kind} at literal(): {node} (This is an internal error and most likely a bug!)") proc callUnaryOp(self: Compiler, fn: Name, op: UnaryExpr) = ## Emits the code to call a unary operator self.generateCall(fn, @[op.a], fn.line) proc callBinaryOp(self: Compiler, fn: Name, op: BinaryExpr) = ## Emits the code to call a binary operator self.generateCall(fn, @[op.a, op.b], fn.line) proc unary(self: Compiler, node: UnaryExpr) = ## Compiles unary expressions such as decimal ## and bitwise negation let valueType = self.infer(node.a) let funct = self.matchImpl(node.token.lexeme, Type(kind: Function, returnType: Type(kind: Any), args: @[("", valueType)]), node) self.callUnaryOp(funct, node) proc binary(self: Compiler, node: BinaryExpr) = ## Compiles all binary expression let typeOfA = self.infer(node.a) let typeOfB = self.infer(node.b) let funct = self.matchImpl(node.token.lexeme, Type(kind: Function, returnType: Type(kind: Any), args: @[("", typeOfA), ("", typeOfB)]), node) self.callBinaryOp(funct, node) proc identifier(self: Compiler, node: IdentExpr) = ## Compiles access to identifiers var s = self.resolveOrError(node) if s.isConst: # Constants are always emitted as Load* instructions # no matter the scope depth self.emitConstant(node, self.infer(node)) else: if s.kind == NameKind.Function: # Functions have no runtime representation, they're just # a location to jump to, but we pretend they aren't and # resolve them to their address into our bytecode when # they're referenced self.emitByte(LoadUInt64, node.token.line) self.emitBytes(self.chunk.writeConstant(s.codePos.toLong()), node.token.line) elif s.depth > 0 and self.depth > 0 and not self.currentFunction.isNil() and s.depth != self.depth and self.scopeOwners[s.depth].owner != self.currentFunction: # Loads a closure variable. Stored in a separate "closure array" in the VM that does not # align its semantics with the call stack. This makes closures work as expected and is # not much slower than indexing our stack (since they're both dynamic arrays at runtime anyway) if not s.isClosedOver: var fn = self.currentFunction.valueType while true: fn.isClosure = true fn.envLen += 1 if fn.parent.isNil(): break fn = fn.parent s.isClosedOver = true self.closures.add(s) let stackIdx = self.getStackPos(s).toTriple() let closeIdx = self.closures.high().toTriple() let oldLen = self.chunk.code.len() # This madness makes it so that we can insert bytecode # at arbitrary offsets into our alredy compiled code and # have our metadata be up to date self.chunk.code.insert(StoreClosure.uint8, s.belongsTo.codePos) self.chunk.code.insert(stackIdx[0], s.belongsTo.codePos + 1) self.chunk.code.insert(stackIdx[1], s.belongsTo.codePos + 2) self.chunk.code.insert(stackIdx[2], s.belongsTo.codePos + 3) self.chunk.code.insert(closeIdx[0], s.belongsTo.codePos + 4) self.chunk.code.insert(closeIdx[1], s.belongsTo.codePos + 5) self.chunk.code.insert(closeIdx[2], s.belongsTo.codePos + 6) self.chunk.lines[self.chunk.getIdx(self.chunk.getLine(s.belongsTo.codePos)) + 1] += 7 self.fixJumps(oldLen, s.belongsTo.codePos) self.fixCFIOffsets(oldLen, s.belongsTo.codePos) let pos = self.getClosurePos(s) if pos == -1: self.error(&"cannot compute closure offset for '{s.ident.token.lexeme}'", s.ident) self.emitByte(LoadClosure, node.token.line) self.emitBytes(pos.toTriple(), node.token.line) else: # Static name resolution, loads value at index in the stack. Very fast. Much wow. self.emitByte(LoadVar, node.token.line) # No need to check for -1 here: we already did a nil check above! self.emitBytes(self.getStackPos(s).toTriple(), node.token.line) proc assignment(self: Compiler, node: ASTNode) = ## Compiles assignment expressions case node.kind: of assignExpr: let node = AssignExpr(node) let name = IdentExpr(node.name) var r = self.resolveOrError(name) if r.isConst: self.error(&"cannot assign to '{name.token.lexeme}' (value is a constant)", name) elif r.isLet: self.error(&"cannot reassign '{name.token.lexeme}' (value is immutable)", name) self.expression(node.value) if not r.isClosedOver: self.emitByte(StoreVar, node.token.line) self.emitBytes(self.getStackPos(r).toTriple(), node.token.line) else: # Loads a closure variable. Stored in a separate "closure array" in the VM that does not # align its semantics with the call stack. This makes closures work as expected and is # not much slower than indexing our stack (since they're both dynamic arrays at runtime anyway) self.emitByte(StoreClosure, node.token.line) self.emitBytes(self.getClosurePos(r).toTriple(), node.token.line) of setItemExpr: let node = SetItemExpr(node) let typ = self.infer(node) if typ.isNil(): self.error(&"cannot determine the type of '{node.name.token.lexeme}'") # TODO else: self.error(&"invalid AST node of kind {node.kind} at assignment(): {node} (This is an internal error and most likely a bug)") proc blockStmt(self: Compiler, node: BlockStmt) = ## Compiles block statements, which create a new ## local scope self.beginScope() for decl in node.code: self.declaration(decl) self.endScope() proc ifStmt(self: Compiler, node: IfStmt) = ## Compiles if/else statements for conditional ## execution of code self.check(node.condition, Type(kind: Bool)) self.expression(node.condition) let jump = self.emitJump(JumpIfFalsePop, node.token.line) self.statement(node.thenBranch) let jump2 = self.emitJump(JumpForwards, node.token.line) self.patchJump(jump) if not node.elseBranch.isNil(): self.statement(node.elseBranch) self.patchJump(jump2) proc whileStmt(self: Compiler, node: WhileStmt) = ## Compiles C-style while loops and ## desugared C-style for loops self.check(node.condition, Type(kind: Bool)) let start = self.chunk.code.high() self.expression(node.condition) let jump = self.emitJump(JumpIfFalsePop, node.token.line) self.statement(node.body) self.emitLoop(start, node.token.line) self.patchJump(jump) proc generateCall(self: Compiler, fn: Type, args: seq[Expression], line: int) = ## Version of generateCall that takes Type objects ## instead of Name objects. The function is assumed ## to be on the stack self.emitByte(LoadUInt64, line) self.emitBytes(self.chunk.writeConstant(0.toLong()), line) let pos = self.chunk.consts.len() - 8 for i, argument in reversed(args): # We pass the arguments in reverse # because of how stacks work. They'll # be reversed again at runtime self.check(argument, fn.args[^(i + 1)].kind) self.expression(argument) # Creates a new call frame and jumps # to the function's first instruction # in the code if not fn.isClosure: self.emitByte(Call, line) else: self.emitByte(CallClosure,line) self.emitBytes(args.len().toTriple(), line) if fn.isClosure: self.emitBytes(fn.envLen.toTriple(), line) self.patchReturnAddress(pos) proc generateCall(self: Compiler, fn: Name, args: seq[Expression], line: int) = ## Small wrapper that abstracts emitting a call instruction ## for a given function if fn.valueType.isBuiltinFunction: self.handleBuiltinFunction(fn.valueType, args, line) return case fn.kind: of NameKind.Var: self.identifier(VarDecl(fn.node).name) of NameKind.Function: self.emitByte(LoadUInt64, line) self.emitBytes(self.chunk.writeConstant(fn.codePos.toLong()), line) else: discard self.emitByte(LoadUInt64, line) self.emitBytes(self.chunk.writeConstant(0.toLong()), line) let pos = self.chunk.consts.len() - 8 for arg in reversed(args): self.expression(arg) # Creates a new call frame and jumps # to the function's first instruction # in the code if not fn.valueType.isClosure: self.emitByte(Call, line) else: self.emitByte(CallClosure, line) self.emitBytes(args.len().toTriple(), line) if fn.valueType.isClosure: self.emitBytes(fn.valueType.envLen.toTriple(), line) self.patchReturnAddress(pos) proc specialize(self: Compiler, name: Name, args: seq[Expression]): Name = ## Specializes a generic type by ## instantiating a concrete version ## of it var mapping: TableRef[string, Type] = newTable[string, Type]() var kind: Type result = deepCopy(name) result.valueType.isGeneric = false case name.kind: of NameKind.Function: # This first loop checks if a user tries to reassign a generic's # name to a different type for i, (name, typ) in result.valueType.args: if typ.kind != Generic: continue kind = self.infer(args[i]) if typ.name in mapping and not self.compare(kind, mapping[typ.name]): self.error(&"expected generic argument '{typ.name}' to be of type {self.typeToStr(mapping[typ.name])}, got {self.typeToStr(kind)} instead") mapping[typ.name] = kind result.valueType.args[i].kind = kind for (argExpr, argName) in zip(args, result.valueType.args): if self.names.high() > 16777215: self.error("cannot declare more than 16777215 variables at a time") self.names.add(Name(depth: name.depth + 1, isPrivate: true, owner: self.currentModule, isConst: false, ident: newIdentExpr(Token(lexeme: argName.name)), valueType: argName.kind, codePos: 0, isLet: false, line: name.line, belongsTo: result, kind: NameKind.Argument )) if result.valueType.returnType.kind == Generic: result.valueType.returnType = mapping[result.valueType.returnType.name] # self.funDecl(FunDecl(result.node), result) else: discard # TODO: Custom user-defined types proc callExpr(self: Compiler, node: CallExpr): Name {.discardable.} = ## Compiles code to call a chain of function calls var args: seq[tuple[name: string, kind: Type]] = @[] var argExpr: seq[Expression] = @[] var kind: Type # TODO: Keyword arguments var i = node.arguments.positionals.len() for argument in node.arguments.positionals: dec(i) kind = self.infer(argument) if kind.isNil(): if node.callee.kind != identExpr: self.error(&"cannot infer the type of argument {i + 1} in call") else: self.error(&"cannot infer the type of argument {i + 1} in call to '{node.callee.token.lexeme}'") args.add(("", kind)) argExpr.add(argument) case node.callee.kind: of identExpr: # Calls like hi() result = self.matchImpl(IdentExpr(node.callee).name.lexeme, Type(kind: Function, returnType: Type(kind: Any), args: args), node) if result.valueType.isGeneric: # We can't instantiate a concrete version # of a generic function without the types # of its arguments, so we wait until the # very last moment to compile it, once # that info is available to us result = self.specialize(result, argExpr) self.funDecl(FunDecl(result.node), result) # Now we call it self.generateCall(result, argExpr, node.token.line) of NodeKind.callExpr: # Calling a call expression, like hello()() var node: Expression = node var all: seq[CallExpr] = @[] while CallExpr(node).callee.kind == callExpr: all.add(CallExpr(CallExpr(node).callee)) node = CallExpr(node).callee for exp in reversed(all): self.callExpr(exp) case all[^1].callee.kind: of identExpr: let fn = self.resolve(IdentExpr(all[^1].callee)) self.generateCall(fn.valueType.returnType, argExpr, fn.line) else: discard # TODO: Lambdas # TODO: Calling lambdas on-the-fly (i.e. on the same line) else: let typ = self.infer(node) if typ.isNil(): self.error(&"expression has no type") else: self.error(&"object of type '{self.typeToStr(typ)}' is not callable") proc expression(self: Compiler, node: Expression) = ## Compiles all expressions case node.kind: of NodeKind.callExpr: self.callExpr(CallExpr(node)) # TODO of getItemExpr: discard # TODO: Get rid of this of pragmaExpr: discard # TODO # Note that for setItem and assign we don't convert # the node to its true type because that type information # would be lost in the call anyway. The differentiation # happens in self.assignment() of setItemExpr, assignExpr: # TODO: Get rid of this self.assignment(node) of identExpr: self.identifier(IdentExpr(node)) of unaryExpr: # Unary expressions such as ~5 and -3 self.unary(UnaryExpr(node)) of groupingExpr: # Grouping expressions like (2 + 1) self.expression(GroupingExpr(node).expression) of binaryExpr: # Binary expressions such as 2 ^ 5 and 0.66 * 3.14 self.binary(BinaryExpr(node)) of intExpr, hexExpr, binExpr, octExpr, strExpr, falseExpr, trueExpr, infExpr, nanExpr, floatExpr, nilExpr: # Since all of these AST nodes share the # same overall structure and the kind # field is enough to tell one from the # other, why bother with specialized # cases when one is enough? self.literal(node) else: self.error(&"invalid AST node of kind {node.kind} at expression(): {node} (This is an internal error and most likely a bug)") proc awaitStmt(self: Compiler, node: AwaitStmt) = ## Compiles await statements. An await statement ## is like an await expression, but parsed in the ## context of statements for usage outside expressions, ## meaning it can be used standalone. It's basically the ## same as an await expression followed by a semicolon. ## Await expressions and statements are the only native ## construct to run coroutines from within an already ## asynchronous context (which should be orchestrated ## by an event loop). They block in the caller until ## the callee returns self.expression(node.expression) self.emitByte(OpCode.Await, node.token.line) proc deferStmt(self: Compiler, node: DeferStmt) = ## Compiles defer statements. A defer statement ## is executed right before its containing function ## exits (either because of a return or an exception) var oldChunk = self.chunk var chunk = newChunk() chunk.consts = self.chunk.consts chunk.lines = self.chunk.lines chunk.cfi = self.chunk.cfi self.chunk = chunk self.expression(node.expression) for b in chunk.code: self.deferred.add(b) self.chunk = oldChunk self.chunk.consts &= chunk.consts self.chunk.lines &= chunk.lines self.chunk.cfi &= chunk.cfi proc returnStmt(self: Compiler, node: ReturnStmt) = ## Compiles return statements self.check(node.value, self.currentFunction.valueType.returnType) if not node.value.isNil(): self.expression(node.value) self.emitByte(OpCode.SetResult, node.token.line) # Since the "set result" part and "exit the function" part # of our return mechanism are already decoupled into two # separate opcodes, we perform the former and then jump to # the function's last return statement, which is always emitted # by funDecl() at the end of the function's lifecycle, greatly # simplifying the design, since now there's just one return # instruction to jump to instead of many potential points # where the function returns from. Note that depending on whether # the function has any local variables or not, this jump might be # patched to jump to the function's PopN/PopC instruction(s) rather # than straight to the return statement self.currentFunction.valueType.retJumps.add(self.emitJump(JumpForwards, node.token.line)) proc yieldStmt(self: Compiler, node: YieldStmt) = ## Compiles yield statements self.expression(node.expression) self.emitByte(OpCode.Yield, node.token.line) proc raiseStmt(self: Compiler, node: RaiseStmt) = ## Compiles raise statements self.expression(node.exception) self.emitByte(OpCode.Raise, node.token.line) proc continueStmt(self: Compiler, node: ContinueStmt) = ## Compiles continue statements. A continue statement ## jumps to the next iteration in a loop if self.currentLoop.start > 16777215: self.error("too much code to jump over in continue statement") self.emitByte(Jump, node.token.line) self.emitBytes(self.currentLoop.start.toTriple(), node.token.line) proc breakStmt(self: Compiler, node: BreakStmt) = ## Compiles break statements. A break statement ## jumps to the end of the loop self.currentLoop.breakJumps.add(self.emitJump(OpCode.JumpForwards, node.token.line)) if self.currentLoop.depth > self.depth: # Breaking out of a loop closes its scope self.endScope() proc assertStmt(self: Compiler, node: AssertStmt) = ## Compiles assert statements (raise ## AssertionError if the expression is falsey) self.expression(node.expression) self.emitByte(OpCode.Assert, node.token.line) proc forEachStmt(self: Compiler, node: ForEachStmt) = ## Compiles foreach loops # TODO proc importStmt(self: Compiler, node: ImportStmt) = ## Imports a module at compile time let filename = splitPath(node.moduleName.token.lexeme).tail try: self.compileModule(node.moduleName.token.lexeme) self.declareName(node) except IOError: self.error(&"""could not import '{filename}': {getCurrentExceptionMsg()}""") except OSError: self.error(&"""could not import '{filename}': {getCurrentExceptionMsg()} [errno {osLastError()}]""") proc exportStmt(self: Compiler, node: ExportStmt) = ## Exports a name at compile time to ## all modules importing us var name = self.resolveOrError(node.name) if name.isPrivate: self.error("cannot export private names") name.exported = true case name.kind: of NameKind.Module: # We need to export everything # this module defines! for name in self.findByModule(name.ident.token.lexeme): name.exported = true of NameKind.Function: for name in self.findByName(name.ident.token.lexeme): if name.kind != NameKind.Function: continue name.exported = true else: discard proc printRepl(self: Compiler, typ: Type, node: Expression) = ## Emits instruction to print ## peon types in REPL mode case typ.kind: of Int64: self.emitByte(PrintInt64, node.token.line) of UInt64: self.emitByte(PrintUInt64, node.token.line) of Int32: self.emitByte(PrintInt32, node.token.line) of UInt32: self.emitByte(PrintInt32, node.token.line) of Int16: self.emitByte(PrintInt16, node.token.line) of UInt16: self.emitByte(PrintUInt16, node.token.line) of Int8: self.emitByte(PrintInt8, node.token.line) of UInt8: self.emitByte(PrintUInt8, node.token.line) of Float64: self.emitByte(PrintFloat64, node.token.line) of Float32: self.emitByte(PrintFloat32, node.token.line) of Bool: self.emitByte(PrintBool, node.token.line) of Nan: self.emitByte(PrintNan, node.token.line) of Inf: self.emitByte(PrintInf, node.token.line) of String: self.emitByte(PrintString, node.token.line) else: self.emitByte(PrintHex, node.token.line) proc statement(self: Compiler, node: Statement) = ## Compiles all statements case node.kind: of exprStmt: let expression = ExprStmt(node).expression let kind = self.infer(expression) self.expression(expression) if kind.isNil(): # The expression has no type and produces no value, # so we don't have to pop anything discard elif self.replMode: self.printRepl(kind, expression) else: self.emitByte(Pop, node.token.line) of NodeKind.ifStmt: self.ifStmt(IfStmt(node)) of NodeKind.assertStmt: self.assertStmt(AssertStmt(node)) of NodeKind.raiseStmt: self.raiseStmt(RaiseStmt(node)) of NodeKind.breakStmt: self.breakStmt(BreakStmt(node)) of NodeKind.continueStmt: self.continueStmt(ContinueStmt(node)) of NodeKind.returnStmt: self.returnStmt(ReturnStmt(node)) of NodeKind.importStmt: self.importStmt(ImportStmt(node)) of NodeKind.exportStmt: self.exportStmt(ExportStmt(node)) of NodeKind.whileStmt: # Note: Our parser already desugars # for loops to while loops let loop = self.currentLoop self.currentLoop = Loop(start: self.chunk.code.len(), depth: self.depth, breakJumps: @[]) self.whileStmt(WhileStmt(node)) self.patchBreaks() self.currentLoop = loop of NodeKind.forEachStmt: self.forEachStmt(ForEachStmt(node)) of NodeKind.blockStmt: self.blockStmt(BlockStmt(node)) of NodeKind.yieldStmt: self.yieldStmt(YieldStmt(node)) of NodeKind.awaitStmt: self.awaitStmt(AwaitStmt(node)) of NodeKind.deferStmt: self.deferStmt(DeferStmt(node)) of NodeKind.tryStmt: discard else: self.expression(Expression(node)) proc varDecl(self: Compiler, node: VarDecl, name: Name) = ## Compiles variable declarations # Our parser guarantees that the variable declaration # will have a type declaration or a value (or both) var typ: Type if node.value.isNil(): # Variable has no value: the type declaration # takes over typ = self.inferOrError(node.valueType) elif node.valueType.isNil: # Variable has no type declaration: the type # of its value takes over typ = self.inferOrError(node.value) else: # Variable has both a type declaration and # a value: the value's type must match the # type declaration let expected = self.inferOrError(node.valueType) self.check(node.value, expected) # If this doesn't fail, then we're good typ = expected name.valueType = typ self.expression(node.value) self.emitByte(StoreVar, node.token.line) self.emitBytes(self.getStackPos(name).toTriple(), node.token.line) proc typeDecl(self: Compiler, node: TypeDecl, name: Name) = ## Compiles type declarations # TODO proc funDecl(self: Compiler, node: FunDecl, name: Name) = ## Compiles function declarations if node.token.kind == Operator and node.name.token.lexeme in [".", ]: self.error(&"Due to current compiler limitations, the '{node.name.token.lexeme}' operator cannot be overridden", node.name) if name.valueType.isBuiltinFunction: return var node = node var jmp: int # We store the current function var function = self.currentFunction if not self.currentFunction.isNil(): self.currentFunction.valueType.children.add(name.valueType) name.valueType.parent = function.valueType self.currentFunction = name if not node.body.isNil(): # A function's code is just compiled linearly # and then jumped over jmp = self.emitJump(JumpForwards, node.token.line) name.codePos = self.chunk.code.len() # We let our debugger know this function's boundaries self.chunk.cfi.add(self.chunk.code.high().toTriple()) self.cfiOffsets.add((start: self.chunk.code.high(), stop: 0, pos: self.chunk.cfi.len() - 3, fn: name)) var cfiOffset = self.cfiOffsets[^1] let idx = self.chunk.cfi.len() self.chunk.cfi.add(0.toTriple()) # Patched it later self.chunk.cfi.add(uint8(node.arguments.len())) if not node.name.isNil(): self.chunk.cfi.add(name.ident.token.lexeme.len().toDouble()) var s = name.ident.token.lexeme if s.len() >= uint16.high().int: s = node.name.token.lexeme[0..uint16.high()] self.chunk.cfi.add(s.toBytes()) else: self.chunk.cfi.add(0.toDouble()) if BlockStmt(node.body).code.len() == 0: raise newException(IndexDefect, "") self.error("cannot declare function with empty body") # Since the deferred array is a linear # sequence of instructions and we want # to keep track to whose function's each # set of deferred instruction belongs, # we record the length of the deferred # array before compiling the function # and use this info later to compile # the try/finally block with the deferred # code var deferStart = self.deferred.len() self.beginScope() for decl in BlockStmt(node.body).code: self.declaration(decl) let typ = self.currentFunction.valueType.returnType var hasVal: bool = false case self.currentFunction.valueType.fun.kind: of NodeKind.funDecl: hasVal = self.currentFunction.valueType.fun.hasExplicitReturn of NodeKind.lambdaExpr: hasVal = LambdaExpr(self.currentFunction.node).hasExplicitReturn else: discard # Unreachable if not hasVal and not typ.isNil(): # There is no explicit return statement anywhere in the function's # body: while this is not a tremendously useful piece of information # (since the presence of at least one doesn't mean all control flow # cases are covered), it definitely is an error worth reporting self.error("function has an explicit return type, but no return statement was found", node) hasVal = hasVal and not typ.isNil() for jump in self.currentFunction.valueType.retJumps: self.patchJump(jump) self.endScope() # Terminates the function's context self.emitByte(OpCode.Return, self.peek().token.line) if hasVal: self.emitByte(1, self.peek().token.line) else: self.emitByte(0, self.peek().token.line) let stop = self.chunk.code.len().toTriple() self.chunk.cfi[idx] = stop[0] self.chunk.cfi[idx + 1] = stop[1] self.chunk.cfi[idx + 2] = stop[2] cfiOffset.stop = self.chunk.code.len() # Currently defer is not functional, so we # just pop the instructions for _ in deferStart..self.deferred.high(): discard self.deferred.pop() # Well, we've compiled everything: time to patch # the jump offset self.patchJump(jmp) else: discard # TODO: Forward declarations # Restores the enclosing function (if any). # Makes nested calls work (including recursion) self.currentFunction = function proc declaration(self: Compiler, node: Declaration) = ## Handles all declarations. They are not compiled ## right away, but rather only when they're referenced ## the first time case node.kind: of NodeKind.varDecl, NodeKind.funDecl, NodeKind.typeDecl: self.declareName(node) if node.kind == NodeKind.varDecl: # We compile this immediately because we # need to keep the stack in the right state # at runtime self.varDecl(VarDecl(node), self.names[^1]) else: self.statement(Statement(node)) proc compile*(self: Compiler, ast: seq[Declaration], file: string, lines: seq[tuple[start, stop: int]], source: string, chunk: Chunk = nil, incremental: bool = false, isMainModule: bool = true): Chunk = ## Compiles a sequence of AST nodes into a chunk ## object if chunk.isNil(): self.chunk = newChunk() else: self.chunk = chunk self.ast = ast self.file = file self.depth = 0 self.currentFunction = nil self.currentModule = self.file.extractFilename().replace(".pn", "") self.current = 0 self.lines = lines self.source = source self.isMainModule = isMainModule if not incremental: self.jumps = @[] let pos = self.beginProgram() while not self.done(): self.declaration(Declaration(self.step())) self.terminateProgram(pos) # TODO: REPL is broken, we need a new way to make # incremental compilation resume from where it stopped! result = self.chunk proc compileModule(self: Compiler, moduleName: string) = ## Compiles an imported module into an existing chunk ## using the compiler's internal parser and lexer objects var path = "" for i, searchPath in moduleLookupPaths: if searchPath == "": path = joinPath(getCurrentDir(), joinPath(splitPath(self.file).head, moduleName)) else: path = joinPath(getCurrentDir(), joinPath(searchPath, moduleName)) if fileExists(path): break elif i == searchPath.high(): self.error(&"""could not import '{path}': module not found""") if self.modules.contains(path): return let source = readFile(path) let current = self.current let ast = self.ast let file = self.file let module = self.currentModule let lines = self.lines let src = self.source self.isMainModule = false discard self.compile(self.parser.parse(self.lexer.lex(source, path), path, self.lexer.getLines(), source, persist=true), path, self.lexer.getLines(), source, chunk=self.chunk, incremental=true, isMainModule=false) self.depth = 0 self.current = current self.ast = ast self.file = file self.currentModule = module self.lines = lines self.source = src self.modules.incl(path)