# Copyright 2022 Mattia Giambirtone & All Contributors # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ## The code generator for Peon bytecode import std/tables import std/strformat import std/algorithm import std/parseutils import std/strutils import std/sequtils import std/sets import std/os import opcodes import frontend/compiler/compiler import frontend/parsing/lexer import frontend/parsing/parser import frontend/parsing/ast import util/multibyte export opcodes type CompilerFunc = object ## An internal compiler function called ## by pragmas kind: PragmaKind handler: proc (self: BytecodeCompiler, pragma: Pragma, name: Name) Loop = object ## A "loop object" used ## by the compiler to emit ## appropriate jump offsets ## for continue and break ## statements # Position in the bytecode where the loop starts start: int # Scope depth where the loop is located depth: int # Jump offsets into our bytecode that we need to # patch. Used for break statements breakJumps: seq[int] NamedBlock = ref object ## A "named block object", similar ## to a loop object. Used to emit ## appropriate jump offsets start: int depth: int breakJumps: seq[int] name: string broken: bool BytecodeCompiler* = ref object of Compiler ## A wrapper around the Peon compiler's state # The bytecode chunk where we write code to chunk: Chunk # The current loop being compiled (used to # keep track of where to jump) currentLoop: Loop # Stack of named blocks namedBlocks: seq[NamedBlock] # Compiler procedures called by pragmas compilerProcs: TableRef[string, CompilerFunc] # Stores the position of all jumps jumps: seq[tuple[patched: bool, offset: int]] # Metadata about function locations functions: seq[tuple[start, stop, pos: int, fn: Name]] forwarded: seq[tuple[name: Name, pos: int]] # The topmost occupied stack slot # in the current frame (0-indexed) stackIndex: int lambdas: seq[LambdaExpr] # Forward declarations proc compile*(self: BytecodeCompiler, ast: seq[Declaration], file: string, lines: seq[tuple[start, stop: int]], source: string, chunk: Chunk = nil, incremental: bool = false, isMainModule: bool = true, disabledWarnings: seq[WarningKind] = @[], showMismatches: bool = false, mode: CompileMode = Debug): Chunk proc statement(self: BytecodeCompiler, node: Statement) proc declaration(self: BytecodeCompiler, node: Declaration) proc varDecl(self: BytecodeCompiler, node: VarDecl) proc specialize(self: BytecodeCompiler, typ: Type, args: seq[Expression]): Type {.discardable.} proc patchReturnAddress(self: BytecodeCompiler, pos: int) proc handleMagicPragma(self: BytecodeCompiler, pragma: Pragma, name: Name) proc handlePurePragma(self: BytecodeCompiler, pragma: Pragma, name: Name) proc handleErrorPragma(self: BytecodeCompiler, pragma: Pragma, name: Name) method dispatchPragmas(self: BytecodeCompiler, name: Name) method dispatchDelayedPragmas(self: BytecodeCompiler, name: Name) proc funDecl(self: BytecodeCompiler, node: FunDecl, name: Name) proc compileModule(self: BytecodeCompiler, module: Name) proc generateCall(self: BytecodeCompiler, fn: Name, args: seq[Expression], line: int) method prepareFunction(self: BytecodeCompiler, fn: Name) # End of forward declarations proc newBytecodeCompiler*(replMode: bool = false): BytecodeCompiler = ## Initializes a new BytecodeCompiler object new(result) result.ast = @[] result.current = 0 result.file = "" result.names = @[] result.depth = 0 result.lines = @[] result.jumps = @[] result.lambdas = @[] result.currentFunction = nil result.replMode = replMode result.currentModule = nil result.compilerProcs = newTable[string, CompilerFunc]() result.compilerProcs["magic"] = CompilerFunc(kind: Immediate, handler: handleMagicPragma) result.compilerProcs["pure"] = CompilerFunc(kind: Immediate, handler: handlePurePragma) result.compilerProcs["error"] = CompilerFunc(kind: Delayed, handler: handleErrorPragma) result.source = "" result.lexer = newLexer() result.lexer.fillSymbolTable() result.parser = newParser() result.isMainModule = false result.forwarded = @[] result.disabledWarnings = @[] result.functions = @[] result.stackIndex = 1 ## Low-level code generation helpers proc emitByte(self: BytecodeCompiler, byt: OpCode | uint8, line: int) {.inline.} = ## Emits a single byte, writing it to ## the current chunk being compiled self.chunk.write(uint8 byt, line) proc emitBytes(self: BytecodeCompiler, bytarr: openarray[OpCode | uint8], line: int) {.inline.} = ## Handy helper method to write arbitrary bytes into ## the current chunk, calling emitByte on each of its ## elements for b in bytarr: self.emitByte(b, line) proc printRepl(self: BytecodeCompiler, typ: Type, node: Expression) = ## Emits instruction to print ## peon types in REPL mode case typ.kind: of Int64: self.emitByte(PrintInt64, node.token.line) of UInt64: self.emitByte(PrintUInt64, node.token.line) of Int32: self.emitByte(PrintInt32, node.token.line) of UInt32: self.emitByte(PrintInt32, node.token.line) of Int16: self.emitByte(PrintInt16, node.token.line) of UInt16: self.emitByte(PrintUInt16, node.token.line) of Int8: self.emitByte(PrintInt8, node.token.line) of UInt8: self.emitByte(PrintUInt8, node.token.line) of Float64: self.emitByte(PrintFloat64, node.token.line) of Float32: self.emitByte(PrintFloat32, node.token.line) of Bool: self.emitByte(PrintBool, node.token.line) of TypeKind.Nan: self.emitByte(PrintNan, node.token.line) of TypeKind.Inf: self.emitByte(PrintInf, node.token.line) of TypeKind.String: self.emitByte(PrintString, node.token.line) else: self.emitByte(PrintHex, node.token.line) proc makeConstant(self: BytecodeCompiler, val: Expression, typ: Type): array[3, uint8] = ## Adds a constant to the current chunk's constant table ## and returns its index as a 3-byte array of uint8s var lit: string if typ.kind in [UInt8, Int8, Int16, UInt16, Int32, UInt32, Int64, UInt64]: lit = val.token.lexeme if "'" in lit: var idx = lit.high() while lit[idx] != '\'': lit = lit[0..^2] dec(idx) lit = lit[0..^2] case typ.kind: of UInt8, Int8: result = self.chunk.writeConstant([uint8(parseInt(lit))]) of Int16, UInt16: result = self.chunk.writeConstant(parseInt(lit).toDouble()) of Int32, UInt32: result = self.chunk.writeConstant(parseInt(lit).toQuad()) of Int64: result = self.chunk.writeConstant(parseInt(lit).toLong()) of UInt64: result = self.chunk.writeConstant(parseBiggestUInt(lit).toLong()) of String: result = self.chunk.writeConstant(val.token.lexeme[1..^1].toBytes()) of Float32: var f: float = 0.0 discard parseFloat(val.token.lexeme, f) result = self.chunk.writeConstant(cast[array[4, uint8]](float32(f))) of Float64: var f: float = 0.0 discard parseFloat(val.token.lexeme, f) result = self.chunk.writeConstant(cast[array[8, uint8]](f)) else: discard proc emitConstant(self: BytecodeCompiler, obj: Expression, kind: Type) = ## Emits a constant instruction along ## with its operand case kind.kind: of Int64: self.emitByte(LoadInt64, obj.token.line) of UInt64: self.emitByte(LoadUInt64, obj.token.line) of Int32: self.emitByte(LoadInt32, obj.token.line) of UInt32: self.emitByte(LoadUInt32, obj.token.line) of Int16: self.emitByte(LoadInt16, obj.token.line) of UInt16: self.emitByte(LoadUInt16, obj.token.line) of Int8: self.emitByte(LoadInt8, obj.token.line) of UInt8: self.emitByte(LoadUInt8, obj.token.line) of String: self.emitByte(LoadString, obj.token.line) let str = LiteralExpr(obj).literal.lexeme if str.len() >= 16777216: self.error("string constants cannot be larger than 16777215 bytes") self.emitBytes((str.len() - 2).toTriple(), obj.token.line) of Float32: self.emitByte(LoadFloat32, obj.token.line) of Float64: self.emitByte(LoadFloat64, obj.token.line) else: discard # TODO self.emitBytes(self.makeConstant(obj, kind), obj.token.line) proc setJump(self: BytecodeCompiler, offset: int, jmp: array[3, uint8]) = ## Sets a jump at the given ## offset to the given value self.chunk.code[offset + 1] = jmp[0] self.chunk.code[offset + 2] = jmp[1] self.chunk.code[offset + 3] = jmp[2] proc setJump(self: BytecodeCompiler, offset: int, jmp: seq[uint8]) = ## Sets a jump at the given ## offset to the given value self.chunk.code[offset + 1] = jmp[0] self.chunk.code[offset + 2] = jmp[1] self.chunk.code[offset + 3] = jmp[2] proc emitJump(self: BytecodeCompiler, opcode: OpCode, line: int): int = ## Emits a dummy jump offset to be patched later ## and returns a unique identifier for that jump ## to be passed to patchJump self.emitByte(opcode, line) self.jumps.add((patched: false, offset: self.chunk.code.high())) self.emitBytes(0.toTriple(), line) result = self.jumps.high() proc fixFunctionOffsets(self: BytecodeCompiler, where, oldLen: int) = ## Fixes function offsets after the size of our ## bytecode has changed if oldLen == self.chunk.code.len(): return let offset = self.chunk.code.len() - oldLen var newOffset: array[3, uint8] var tmp: int var i = 0 for function in self.functions.mitems(): if function.start >= where: newOffset = (function.start + offset).toTriple() self.chunk.functions[function.pos] = newOffset[0] self.chunk.functions[function.pos + 1] = newOffset[1] self.chunk.functions[function.pos + 2] = newOffset[2] tmp = [self.chunk.functions[function.pos + 3], self.chunk.functions[function.pos + 4], self.chunk.functions[function.pos + 5]].fromTriple().int newOffset = (tmp + offset).toTriple() self.chunk.functions[function.pos + 3] = newOffset[0] self.chunk.functions[function.pos + 4] = newOffset[1] self.chunk.functions[function.pos + 5] = newOffset[2] function.start += offset function.stop += offset inc(i) proc fixJumps(self: BytecodeCompiler, where, oldLen: int) = ## Fixes jump offsets after the size ## of our bytecode has changed if oldLen == self.chunk.code.len(): return let offset = self.chunk.code.len() - oldLen for jump in self.jumps.mitems(): if jump.offset >= where: # While all already-patched jumps need # to have their jump offsets fixed, we # also need to update our internal jumps # list in cases where we shifted the jump # instruction itself into the code! jump.offset += offset self.setJump(jump.offset, self.chunk.code[jump.offset + 1..jump.offset + 3]) proc fixLines(self: BytecodeCompiler, where, count: int, added: bool = true) = ## Fixes the line metadatata of our ## bytecode chunk after the size of ## the code segment has changed. The ## "count" argument represents how ## many bytes were added or deleted ## from the code and the "added" argument ## tells fixLines that either count ## instructions were injected (added = true, ## the default) or that count instructions ## were removed (added = false). The where ## argument is the position where the code ## change was performed if added: # We don't do any bounds checking here because I doubt # there's ever going to be even close to int.high() # instructions on a line :P inc(self.chunk.lines[self.chunk.getIdx(self.chunk.getLine(where)) + 1], count) else: if self.chunk.lines[self.chunk.getIdx(self.chunk.getLine(where)) + 1] > 0: dec(self.chunk.lines[self.chunk.getIdx(self.chunk.getLine(where)) + 1], count) proc fixNames(self: BytecodeCompiler, where, oldLen: int) = ## Fixes the codePos field of our name objects ## after the size of the bytecode has changed let offset = self.chunk.code.len() - oldLen for name in self.names: if name.codePos > where: name.codePos += offset if name.valueType.kind == Function: name.valueType.location += offset proc insertAt(self: BytecodeCompiler, where: int, opcode: OpCode, data: openarray[uint8]): int {.used.} = ## Inserts the given instruction into the ## chunk's code segment and updates internal ## metadata to reflect this change. Returns ## the new location where the code was added ## plus one (useful for consecutive calls) result = where let oldLen = self.chunk.code.len() self.chunk.code.insert(uint8(opcode), where) inc(result) for i, item in data: self.chunk.code.insert(item, where + i + 1) inc(result) # Changing the size of our code segment forces us # to update all metadata that refers to a position # into it self.fixJumps(where, oldLen) self.fixLines(where, self.chunk.code.len() - oldLen, true) self.fixNames(where, oldLen) self.fixFunctionOffsets(oldLen, where) proc patchJump(self: BytecodeCompiler, offset: int) = ## Patches a previously emitted relative ## jump using emitJump var jump: int = self.chunk.code.len() - self.jumps[offset].offset if jump < 0: self.error("jump size cannot be negative (This is an internal error and most likely a bug)") if jump > 16777215: # TODO: Emit consecutive jumps using insertAt self.error("cannot jump more than 16777215 instructions") if jump > 0: self.setJump(self.jumps[offset].offset, (jump - 4).toTriple()) self.jumps[offset].patched = true proc handleBuiltinFunction(self: BytecodeCompiler, fn: Type, args: seq[Expression], line: int) = ## Emits instructions for builtin functions ## such as addition or subtraction if fn.builtinOp notin ["LogicalOr", "LogicalAnd"]: if len(args) == 2: self.expression(args[1]) self.expression(args[0]) elif len(args) == 1: self.expression(args[0]) const codes: Table[string, OpCode] = {"Negate": Negate, "NegateFloat32": NegateFloat32, "NegateFloat64": NegateFloat64, "Add": Add, "Subtract": Subtract, "Divide": Divide, "Multiply": Multiply, "SignedDivide": SignedDivide, "AddFloat64": AddFloat64, "SubtractFloat64": SubtractFloat64, "DivideFloat64": DivideFloat64, "MultiplyFloat64": MultiplyFloat64, "AddFloat32": AddFloat32, "SubtractFloat32": SubtractFloat32, "DivideFloat32": DivideFloat32, "MultiplyFloat32": MultiplyFloat32, "Pow": Pow, "SignedPow": SignedPow, "PowFloat32": PowFloat32, "PowFloat64": PowFloat64, "Mod": Mod, "SignedMod": SignedMod, "ModFloat32": ModFloat32, "ModFloat64": ModFloat64, "Or": Or, "And": And, "Xor": Xor, "Not": Not, "LShift": LShift, "RShift": RShift, "Equal": Equal, "NotEqual": NotEqual, "LessThan": LessThan, "GreaterThan": GreaterThan, "LessOrEqual": LessOrEqual, "GreaterOrEqual": GreaterOrEqual, "SignedLessThan": SignedLessThan, "SignedGreaterThan": SignedGreaterThan, "SignedLessOrEqual": SignedLessOrEqual, "SignedGreaterOrEqual": SignedGreaterOrEqual, "Float32LessThan": Float32LessThan, "Float32GreaterThan": Float32GreaterThan, "Float32LessOrEqual": Float32LessOrEqual, "Float32GreaterOrEqual": Float32GreaterOrEqual, "Float64LessThan": Float64LessThan, "Float64GreaterThan": Float64GreaterThan, "Float64LessOrEqual": Float64LessOrEqual, "Float64GreaterOrEqual": Float64GreaterOrEqual, "PrintString": PrintString, "SysClock64": SysClock64, "LogicalNot": LogicalNot, "NegInf": LoadNInf, "Identity": Identity }.to_table() if fn.builtinOp == "print": let typ = self.inferOrError(args[0]) case typ.kind: of Int64: self.emitByte(PrintInt64, line) of Int32: self.emitByte(PrintInt32, line) of Int16: self.emitByte(PrintInt16, line) of Int8: self.emitByte(PrintInt8, line) of UInt64: self.emitByte(PrintUInt64, line) of UInt32: self.emitByte(PrintUInt32, line) of UInt16: self.emitByte(PrintUInt16, line) of UInt8: self.emitByte(PrintUInt8, line) of Float64: self.emitByte(PrintFloat64, line) of Float32: self.emitByte(PrintFloat32, line) of String: self.emitByte(PrintString, line) of Bool: self.emitByte(PrintBool, line) of TypeKind.Nan: self.emitByte(PrintNan, line) of TypeKind.Inf: self.emitByte(PrintInf, line) of Function: self.emitByte(LoadString, line) var loc: string = typ.location.toHex() while loc[0] == '0' and loc.len() > 1: loc = loc[1..^1] var str: string if typ.isLambda: str = &"anonymous function at 0x{loc}" else: str = &"function '{FunDecl(typ.fun).name.token.lexeme}' at 0x{loc}" self.emitBytes(str.len().toTriple(), line) self.emitBytes(self.chunk.writeConstant(str.toBytes()), line) self.emitByte(PrintString, line) else: self.error(&"invalid type {self.stringify(typ)} for built-in 'print'", args[0]) return if fn.builtinOp in codes: self.emitByte(codes[fn.builtinOp], line) return # Some builtin operations are slightly more complex # so we handle them separately case fn.builtinOp: of "LogicalOr": self.expression(args[0]) let jump = self.emitJump(JumpIfTrue, line) self.expression(args[1]) self.patchJump(jump) of "LogicalAnd": self.expression(args[0]) let jump = self.emitJump(JumpIfFalseOrPop, line) self.expression(args[1]) self.patchJump(jump) else: self.error(&"unknown built-in: '{fn.builtinOp}'", fn.fun) proc patchForwardDeclarations(self: BytecodeCompiler) = ## Patches forward declarations and looks ## for their implementations so that calls ## to them work properly var impl: Name var pos: array[8, uint8] for (forwarded, position) in self.forwarded: impl = self.match(forwarded.ident.token.lexeme, forwarded.valueType, allowFwd=false) if forwarded.isPrivate != impl.isPrivate: self.error(&"implementation of '{impl.ident.token.lexeme}' has a mismatching visibility modifier from its forward declaration", impl.ident) if position == 0: continue pos = impl.codePos.toLong() self.chunk.consts[position] = pos[0] self.chunk.consts[position + 1] = pos[1] self.chunk.consts[position + 2] = pos[2] self.chunk.consts[position + 3] = pos[3] self.chunk.consts[position + 4] = pos[4] self.chunk.consts[position + 5] = pos[5] self.chunk.consts[position + 6] = pos[6] self.chunk.consts[position + 7] = pos[7] proc endScope(self: BytecodeCompiler) = ## Ends the current local scope if self.depth < 0: self.error("cannot call endScope with depth < 0 (This is an internal error and most likely a bug)") dec(self.depth) # We keep track both of which names are going out of scope # and how many actually need to be popped off the call stack # at runtime (since only variables and function arguments # actually materialize at runtime) var names: seq[Name] = @[] var popCount = 0 for name in self.names: if self.replMode and name.depth == 0: continue # We only pop names in scopes deeper than ours if name.depth > self.depth: if name.depth == 0 and not self.isMainModule: # Global names coming from other modules only go out of scope # when the global scope of the main module is closed (i.e. at # the end of the whole program) continue names.add(name) # Now we have to actually emit the pop instructions. First # off, we skip the names that will not exist at runtime, # because there's no need to emit any instructions to pop them # (we still remove them from the name list later so they can't # be referenced anymore, of course) if name.kind notin [NameKind.Var, NameKind.Argument]: continue elif name.kind == NameKind.Argument and not name.belongsTo.isNil(): if name.belongsTo.isBuiltin: # Arguments to builtin functions become temporaries on the # stack and are popped automatically continue if name.belongsTo.valueType.isAuto: # Automatic functions do not materialize # at runtime, so their arguments don't either continue # This name has been generated internally by the # compiler and is a copy of an already existing # one, so we only need to pop its "real" counterpart if not name.isReal: continue inc(popCount) if not name.resolved: # We emit warnings for names that are declared but never used case name.kind: of NameKind.Var: if not name.ident.token.lexeme.startsWith("_") and name.isPrivate: self.warning(UnusedName, &"'{name.ident.token.lexeme}' is declared but not used (add '_' prefix to silence warning)", name) of NameKind.Argument: if not name.ident.token.lexeme.startsWith("_") and name.isPrivate: if not name.belongsTo.isNil() and not name.belongsTo.isBuiltin and name.belongsTo.isReal and name.belongsTo.resolved: # Builtin functions never use their arguments. We also don't emit this # warning if the function was generated internally by the compiler (for # example as a result of generic specialization) because such objects do # not exist in the user's code and are likely duplicated anyway self.warning(UnusedName, &"argument '{name.ident.token.lexeme}' is unused (add '_' prefix to silence warning)", name) else: discard dec(self.stackIndex, popCount) if popCount > 1: # If we're popping more than one variable, # we emit a bunch of PopN instructions until # the pop count is greater than zero while popCount > 0: self.emitByte(PopN, self.peek().token.line) self.emitBytes(popCount.toDouble(), self.peek().token.line) popCount -= popCount.toDouble().fromDouble().int elif popCount == 1: # We only emit PopN if we're popping more than one value self.emitByte(PopC, self.peek().token.line) # This seems *really* slow, but # what else should I do? Nim doesn't # allow the removal of items during # seq iteration so ¯\_(ツ)_/¯ var idx = 0 while idx < self.names.len(): for name in names: if self.names[idx] == name: self.names.delete(idx) inc(idx) proc unpackGenerics(self: BytecodeCompiler, condition: Expression, list: var seq[tuple[match: bool, kind: Type]], accept: bool = true) = ## Recursively unpacks a type constraint in a generic type case condition.kind: of identExpr: list.add((accept, self.inferOrError(condition))) if list[^1].kind.kind == Auto: self.error("automatic types cannot be used within generics", condition) of binaryExpr: let condition = BinaryExpr(condition) case condition.operator.lexeme: of "|": self.unpackGenerics(condition.a, list) self.unpackGenerics(condition.b, list) else: self.error("invalid type constraint in generic declaration", condition) of unaryExpr: let condition = UnaryExpr(condition) case condition.operator.lexeme: of "~": self.unpackGenerics(condition.a, list, accept=false) else: self.error("invalid type constraint in generic declaration", condition) else: self.error("invalid type constraint in generic declaration", condition) proc unpackUnion(self: BytecodeCompiler, condition: Expression, list: var seq[tuple[match: bool, kind: Type]], accept: bool = true) = ## Recursively unpacks a type union case condition.kind: of identExpr: list.add((accept, self.inferOrError(condition))) of binaryExpr: let condition = BinaryExpr(condition) case condition.operator.lexeme: of "|": self.unpackUnion(condition.a, list) self.unpackUnion(condition.b, list) else: self.error("invalid type constraint in type union", condition) of unaryExpr: let condition = UnaryExpr(condition) case condition.operator.lexeme: of "~": self.unpackUnion(condition.a, list, accept=false) else: self.error("invalid type constraint in type union", condition) else: self.error("invalid type constraint in type union", condition) proc emitLoop(self: BytecodeCompiler, begin: int, line: int) = ## Emits a JumpBackwards instruction with the correct ## jump offset let offset = self.chunk.code.high() - begin + 4 if offset > 16777215: # TODO: Emit consecutive jumps? self.error("cannot jump more than 16777215 bytecode instructions") self.emitByte(JumpBackwards, line) self.emitBytes(offset.toTriple(), line) proc patchBreaks(self: BytecodeCompiler) = ## Patches the jumps emitted by ## breakStmt. This is needed ## because the size of code ## to skip is not known before ## the loop is fully compiled for brk in self.currentLoop.breakJumps: self.patchJump(brk) for blk in self.namedBlocks: for brk in blk.breakJumps: self.patchJump(brk) proc handleMagicPragma(self: BytecodeCompiler, pragma: Pragma, name: Name) = ## Handles the "magic" pragma. Assumes the given name is already ## declared if pragma.args.len() != 1: self.error("'magic' pragma: wrong number of arguments") elif pragma.args[0].kind != strExpr: self.error("'magic' pragma: wrong type of argument (constant string expected)") elif name.node.kind == NodeKind.funDecl: name.isBuiltin = true name.valueType.builtinOp = pragma.args[0].token.lexeme[1..^2] elif name.node.kind == NodeKind.typeDecl: name.valueType = pragma.args[0].token.lexeme[1..^2].toIntrinsic() if name.valueType.kind == All: self.error("don't even think about it (compiler-chan is angry at you)", pragma) if name.valueType.isNil(): self.error("'magic' pragma: wrong argument value", pragma.args[0]) name.isBuiltin = true else: self.error("'magic' pragma is not valid in this context") proc handleErrorPragma(self: BytecodeCompiler, pragma: Pragma, name: Name) = ## Handles the "error" pragma if pragma.args.len() != 1: self.error("'error' pragma: wrong number of arguments") elif pragma.args[0].kind != strExpr: self.error("'error' pragma: wrong type of argument (constant string expected)") elif not name.isNil() and name.node.kind != NodeKind.funDecl: self.error("'error' pragma is not valid in this context") self.error(pragma.args[0].token.lexeme[1..^2]) proc handlePurePragma(self: BytecodeCompiler, pragma: Pragma, name: Name) = ## Handles the "pure" pragma case name.node.kind: of NodeKind.funDecl: FunDecl(name.node).isPure = true of NodeKind.lambdaExpr: LambdaExpr(name.node).isPure = true else: self.error("'pure' pragma is not valid in this context") method dispatchPragmas(self: BytecodeCompiler, name: Name) = ## Dispatches pragmas bound to objects if name.node.isNil(): return var pragmas: seq[Pragma] = @[] case name.node.kind: of NodeKind.funDecl, NodeKind.typeDecl, NodeKind.varDecl: pragmas = Declaration(name.node).pragmas of NodeKind.lambdaExpr: pragmas = LambdaExpr(name.node).pragmas else: discard # Unreachable var f: CompilerFunc for pragma in pragmas: if pragma.name.token.lexeme notin self.compilerProcs: self.error(&"unknown pragma '{pragma.name.token.lexeme}'") f = self.compilerProcs[pragma.name.token.lexeme] if f.kind != Immediate: continue f.handler(self, pragma, name) method dispatchDelayedPragmas(self: BytecodeCompiler, name: Name) = ## Dispatches pragmas bound to objects once they ## are called. Only applies to functions if name.node.isNil(): return var pragmas: seq[Pragma] = @[] pragmas = Declaration(name.node).pragmas var f: CompilerFunc for pragma in pragmas: if pragma.name.token.lexeme notin self.compilerProcs: self.error(&"unknown pragma '{pragma.name.token.lexeme}'") f = self.compilerProcs[pragma.name.token.lexeme] if f.kind == Immediate: continue f.handler(self, pragma, name) proc patchReturnAddress(self: BytecodeCompiler, pos: int) = ## Patches the return address of a function ## call let address = self.chunk.code.len().toLong() self.chunk.consts[pos] = address[0] self.chunk.consts[pos + 1] = address[1] self.chunk.consts[pos + 2] = address[2] self.chunk.consts[pos + 3] = address[3] self.chunk.consts[pos + 4] = address[4] self.chunk.consts[pos + 5] = address[5] self.chunk.consts[pos + 6] = address[6] self.chunk.consts[pos + 7] = address[7] proc generateCall(self: BytecodeCompiler, fn: Type, args: seq[Expression], line: int) {.used.} = ## Version of generateCall that takes Type objects ## instead of Name objects (used for lambdas and ## consequent calls). The function's address is ## assumed to be on the stack self.emitByte(LoadUInt64, line) self.emitBytes(self.chunk.writeConstant(0.toLong()), line) let pos = self.chunk.consts.len() - 8 for i, argument in reversed(args): # We pass the arguments in reverse # because of how stacks work. They'll # be reversed again at runtime self.check(argument, fn.args[^(i + 1)].kind) self.expression(argument) # Creates a new call frame and jumps # to the function's first instruction # in the code self.emitByte(Call, line) self.emitBytes(args.len().toTriple(), line) self.patchReturnAddress(pos) method prepareFunction(self: BytecodeCompiler, fn: Name) = ## "Prepares" a function declaration by declaring ## its arguments and typechecking it # First we declare the function's generics, if it has any. # This is because the function's return type may in itself # be a generic, so it needs to exist first var constraints: seq[tuple[match: bool, kind: Type]] = @[] for gen in fn.node.generics: self.unpackGenerics(gen.cond, constraints) self.names.add(Name(depth: fn.depth + 1, isPrivate: true, valueType: Type(kind: Generic, name: gen.name.token.lexeme, cond: constraints), codePos: 0, isLet: false, line: fn.node.token.line, belongsTo: fn, ident: gen.name, owner: self.currentModule, file: self.file)) constraints = @[] # We now declare and typecheck the function's # arguments let idx = self.stackIndex self.stackIndex = 1 var default: Expression var node = FunDecl(fn.node) var i = 0 for argument in node.arguments: if self.names.high() > 16777215: self.error("cannot declare more than 16777215 variables at a time") inc(self.stackIndex) self.names.add(Name(depth: fn.depth + 1, isPrivate: true, owner: fn.owner, file: fn.file, isConst: false, ident: argument.name, valueType: if not fn.valueType.isAuto: self.inferOrError(argument.valueType) else: Type(kind: Any), codePos: 0, isLet: false, line: argument.name.token.line, belongsTo: fn, kind: NameKind.Argument, node: argument.name, position: self.stackIndex, isReal: not node.isTemplate )) if node.arguments.high() - node.defaults.high() <= node.arguments.high(): # There's a default argument! fn.valueType.args.add((self.names[^1].ident.token.lexeme, self.names[^1].valueType, node.defaults[i])) inc(i) else: # This argument has no default fn.valueType.args.add((self.names[^1].ident.token.lexeme, self.names[^1].valueType, default)) # The function needs a return type too! if not FunDecl(fn.node).returnType.isNil(): fn.valueType.returnType = self.inferOrError(FunDecl(fn.node).returnType) fn.position = self.stackIndex self.stackIndex = idx if node.isTemplate: fn.valueType.compiled = true proc prepareAutoFunction(self: BytecodeCompiler, fn: Name, args: seq[tuple[name: string, kind: Type, default: Expression]]): Name = ## "Prepares" an automatic function declaration ## by declaring a concrete version of it along ## with its arguments # First we declare the function's generics, if it has any. # This is because the function's return type may in itself # be a generic, so it needs to exist first let idx = self.stackIndex self.stackIndex = 1 var default: Expression var node = FunDecl(fn.node) var fn = deepCopy(fn) fn.valueType.isAuto = false fn.valueType.compiled = false self.names.add(fn) # We now declare and typecheck the function's # arguments for (argument, val) in zip(node.arguments, args): if self.names.high() > 16777215: self.error("cannot declare more than 16777215 variables at a time") inc(self.stackIndex) self.names.add(Name(depth: fn.depth + 1, isPrivate: true, owner: fn.owner, file: fn.file, isConst: false, ident: argument.name, valueType: val.kind, codePos: 0, isLet: false, line: argument.name.token.line, belongsTo: fn, kind: NameKind.Argument, node: argument.name, position: self.stackIndex, isReal: not node.isTemplate )) if node.isTemplate: fn.valueType.compiled = true fn.valueType.args = args fn.position = self.stackIndex self.stackIndex = idx return fn proc generateCall(self: BytecodeCompiler, fn: Name, args: seq[Expression], line: int) = ## Small wrapper that abstracts emitting a call instruction ## for a given function self.dispatchDelayedPragmas(fn) if fn.isBuiltin: self.handleBuiltinFunction(fn.valueType, args, line) return case fn.kind: of NameKind.Var: self.identifier(VarDecl(fn.node).name) of NameKind.Function: self.emitByte(LoadUInt64, line) self.emitBytes(self.chunk.writeConstant(fn.codePos.toLong()), line) else: discard # Unreachable if fn.valueType.forwarded: self.forwarded.add((fn, self.chunk.consts.high() - 7)) self.emitByte(LoadUInt64, line) self.emitBytes(self.chunk.writeConstant(0.toLong()), line) let pos = self.chunk.consts.len() - 8 for arg in reversed(args): self.expression(arg) # Creates a new call frame and jumps # to the function's first instruction # in the code self.emitByte(Call, line) self.emitBytes(args.len().toTriple(), line) self.patchReturnAddress(pos) proc specialize(self: BytecodeCompiler, typ: Type, args: seq[Expression]): Type {.discardable.} = ## Specializes a generic type. ## Used for typechecking at the ## call site var mapping: TableRef[string, Type] = newTable[string, Type]() var kind: Type result = deepCopy(typ) case result.kind: of TypeKind.Function: # This first loop checks if a user tries to reassign a generic's # name to a different type for i, (name, typ, default) in result.args: if typ.kind != Generic: continue kind = self.inferOrError(args[i]) if typ.name in mapping and not self.compare(kind, mapping[typ.name]): self.error(&"expecting generic argument '{typ.name}' to be of type {self.stringify(mapping[typ.name])}, got {self.stringify(kind)}", args[i]) mapping[typ.name] = kind result.args[i].kind = kind if not result.returnType.isNil() and result.returnType.kind == Generic: if result.returnType.name in mapping: result.returnType = mapping[result.returnType.name] else: self.error(&"unknown generic argument name '{result.returnType.name}'", result.fun) else: discard # TODO: Custom user-defined types proc terminateProgram(self: BytecodeCompiler, pos: int) = ## Utility to terminate a peon program self.patchForwardDeclarations() self.endScope() if self.replMode: self.emitByte(ReplExit, self.peek().token.line) else: self.emitByte(OpCode.Return, self.peek().token.line) self.emitByte(0, self.peek().token.line) # Entry point has no return value self.patchReturnAddress(pos) proc beginProgram(self: BytecodeCompiler): int = ## Utility to begin a peon program's ## bytecode. Returns the position of ## a dummy return address of the program's ## entry point to be patched by terminateProgram if self.currentModule.isNil(): # We declare the program's main module var mainModule = Name(kind: NameKind.Module, depth: 0, isPrivate: true, isConst: false, isLet: false, owner: nil, file: self.file, path: self.file, codePos: 0, ident: newIdentExpr(Token(lexeme: self.file, kind: Identifier)), resolved: true, line: 1) self.names.add(mainModule) self.currentModule = mainModule # Every peon program has a hidden entry point in # which user code is wrapped. Think of it as if # peon is implicitly writing the main() function # of your program and putting all of your code in # there. While we call our entry point just like # any regular peon function, we can't use our handy # helper generateCall() because we need to keep track # of where our program ends (which we don't know yet). # To fix this, we emit dummy offsets and patch them # later, once we know the boundaries of our hidden main() var main = Name(depth: 0, isPrivate: true, isConst: false, isLet: false, owner: self.currentModule, file: self.file, valueType: Type(kind: Function, returnType: nil, args: @[], ), codePos: self.chunk.code.len() + 12, ident: newIdentExpr(Token(lexeme: "", kind: Identifier)), kind: NameKind.Function, resolved: true, line: 1) self.names.add(main) self.emitByte(LoadUInt64, 1) self.emitBytes(self.chunk.writeConstant(main.codePos.toLong()), 1) self.emitByte(LoadUInt64, 1) self.emitBytes(self.chunk.writeConstant(0.toLong()), 1) result = self.chunk.consts.len() - 8 self.emitByte(Call, 1) self.emitBytes(0.toTriple(), 1) method literal(self: BytecodeCompiler, node: ASTNode, compile: bool = true): Type {.discardable.} = ## Emits instructions for literals such ## as singletons, strings and numbers case node.kind: of trueExpr: result = Type(kind: Bool) if compile: self.emitByte(LoadTrue, node.token.line) of falseExpr: result = Type(kind: Bool) if compile: self.emitByte(LoadFalse, node.token.line) of strExpr: result = Type(kind: String) if compile: self.emitConstant(LiteralExpr(node), Type(kind: String)) of intExpr: let y = IntExpr(node) let kind = self.infer(y) result = kind if kind.kind in [Int64, Int32, Int16, Int8]: var x: int try: discard parseInt(y.literal.lexeme, x) except ValueError: self.error("integer value out of range") else: var x: uint64 try: discard parseBiggestUInt(y.literal.lexeme, x) except ValueError: self.error("integer value out of range") if compile: self.emitConstant(y, kind) of hexExpr: var x: int var y = HexExpr(node) result = self.infer(y) try: discard parseHex(y.literal.lexeme, x) except ValueError: self.error("integer value out of range") let node = newIntExpr(Token(lexeme: $x, line: y.token.line, pos: (start: y.token.pos.start, stop: y.token.pos.start + len($x)), relPos: (start: y.token.relPos.start, stop: y.token.relPos.start + len($x)) ) ) if compile: self.emitConstant(node, result) of binExpr: var x: int var y = BinExpr(node) result = self.infer(y) try: discard parseBin(y.literal.lexeme, x) except ValueError: self.error("integer value out of range") let node = newIntExpr(Token(lexeme: $x, line: y.token.line, pos: (start: y.token.pos.start, stop: y.token.pos.start + len($x)), relPos: (start: y.token.relPos.start, stop: y.token.relPos.start + len($x)) ) ) if compile: self.emitConstant(node, result) of octExpr: var x: int var y = OctExpr(node) result = self.infer(y) try: discard parseOct(y.literal.lexeme, x) except ValueError: self.error("integer value out of range") let node = newIntExpr(Token(lexeme: $x, line: y.token.line, pos: (start: y.token.pos.start, stop: y.token.pos.start + len($x)), relPos: (start: y.token.relPos.start, stop: y.token.relPos.start + len($x)) ) ) if compile: self.emitConstant(node, result) of floatExpr: var x: float var y = FloatExpr(node) result = self.infer(y) try: discard parseFloat(y.literal.lexeme, x) except ValueError: self.error("floating point value out of range") if compile: self.emitConstant(y, result) of awaitExpr: discard # TODO else: self.error(&"invalid AST node of kind {node.kind} at literal(): {node} (This is an internal error and most likely a bug!)") method unary(self: BytecodeCompiler, node: UnaryExpr, compile: bool = true): Type {.discardable.} = ## Compiles all unary expressions var default: Expression let fn = Type(kind: Function, returnType: Type(kind: Any), args: @[("", self.inferOrError(node.a), default)]) let impl = self.match(node.token.lexeme, fn, node) result = impl.valueType if impl.isGeneric: result = self.specialize(result, @[node.a]) result = result.returnType if compile: self.generateCall(impl, @[node.a], impl.line) method binary(self: BytecodeCompiler, node: BinaryExpr, compile: bool = true): Type {.discardable.} = ## Compiles all binary expressions var default: Expression let fn = Type(kind: Function, returnType: Type(kind: Any), args: @[("", self.inferOrError(node.a), default), ("", self.inferOrError(node.b), default)]) let impl = self.match(node.token.lexeme, fn, node) result = impl.valueType if impl.isGeneric: result = self.specialize(result, @[node.a, node.b]) result = result.returnType if compile: self.generateCall(impl, @[node.a, node.b], impl.line) method identifier(self: BytecodeCompiler, node: IdentExpr, name: Name = nil, compile: bool = true, strict: bool = true): Type {.discardable.} = ## Compiles access to identifiers var s = name if s.isNil(): if strict: s = self.resolveOrError(node) else: s = self.resolve(node) if s.isNil() and not strict: return nil result = s.valueType if not compile: return result var node = s.ident if s.isConst: # Constants are always emitted as Load* instructions # no matter the scope depth if strict: self.emitConstant(VarDecl(s.node).value, self.inferOrError(node)) else: self.emitConstant(VarDecl(s.node).value, self.infer(node)) elif s.kind == NameKind.Function: # Functions have no runtime representation, they're just # a location to jump to, but we pretend they aren't and # resolve them to their address into our bytecode when # they're referenced self.emitByte(LoadUInt64, node.token.line) self.emitBytes(self.chunk.writeConstant(s.codePos.toLong()), node.token.line) elif s.isBuiltin: case s.ident.token.lexeme: of "nil": self.emitByte(LoadNil, node.token.line) of "nan": self.emitByte(LoadNan, node.token.line) of "inf": self.emitByte(LoadInf, node.token.line) else: discard # Unreachable else: if not s.belongsTo.isNil() and s.belongsTo.valueType.fun.kind == funDecl and FunDecl(s.belongsTo.valueType.fun).isTemplate: discard else: if s.depth > 0: # Loads a regular variable from the current frame self.emitByte(LoadVar, s.ident.token.line) # No need to check for -1 here: we already did a nil check above! self.emitBytes(s.position.toTriple(), s.ident.token.line) else: self.emitByte(LoadGlobal, s.ident.token.line) self.emitBytes(s.position.toTriple(), s.ident.token.line) method assignment(self: BytecodeCompiler, node: ASTNode, compile: bool = true): Type {.discardable.} = ## Compiles assignment expressions case node.kind: of assignExpr: let node = AssignExpr(node) let name = IdentExpr(node.name) var r = self.resolveOrError(name) if r.isConst: self.error(&"cannot assign to '{name.token.lexeme}' (value is a constant)", name) elif r.isLet: self.error(&"cannot reassign '{name.token.lexeme}' (value is immutable)", name) self.check(node.value, r.valueType) self.expression(node.value, compile) var position = r.position if r.depth < self.depth and r.belongsTo != self.currentFunction: self.warning(WarningKind.MutateOuterScope, &"mutation of '{r.ident.token.lexeme}' declared in outer scope ({r.owner.file}.pn:{r.ident.token.line}:{r.ident.token.relPos.start})", nil, node) result = r.valueType if not compile: return self.emitByte(StoreVar, node.token.line) self.emitBytes(position.toTriple(), node.token.line) of setItemExpr: let node = SetItemExpr(node) let name = IdentExpr(node.name) var r = self.resolveOrError(name) if r.isConst: self.error(&"cannot assign to '{name.token.lexeme}' (value is a constant)", name) elif r.isLet: self.error(&"cannot reassign '{name.token.lexeme}' (value is immutable)", name) if r.valueType.kind != CustomType: self.error("only types have fields", node) else: self.error(&"invalid AST node of kind {node.kind} at assignment(): {node} (This is an internal error and most likely a bug)") method call(self: BytecodeCompiler, node: CallExpr, compile: bool = true): Type {.discardable.} = ## Compiles code to call a chain of function calls var args: seq[tuple[name: string, kind: Type, default: Expression]] = @[] var argExpr: seq[Expression] = @[] var default: Expression var kind: Type for i, argument in node.arguments.positionals: kind = self.infer(argument) # We don't use inferOrError so that we can raise a more appropriate error message later if kind.isNil(): if argument.kind == NodeKind.identExpr: self.error(&"reference to undefined name '{argument.token.lexeme}'", argument) self.error(&"positional argument {i + 1} in function call has no type", argument) args.add(("", kind, default)) argExpr.add(argument) for i, argument in node.arguments.keyword: kind = self.infer(argument.value) if kind.isNil(): if argument.value.kind == NodeKind.identExpr: self.error(&"reference to undefined name '{argument.value.token.lexeme}'", argument.value) self.error(&"keyword argument '{argument.name.token.lexeme}' in function call has no type", argument.value) args.add((argument.name.token.lexeme, kind, default)) argExpr.add(argument.value) case node.callee.kind: of NodeKind.identExpr: # Calls like hi() var impl = self.match(IdentExpr(node.callee).name.lexeme, Type(kind: Function, returnType: Type(kind: All), args: args), node) result = impl.valueType if impl.isGeneric: result = self.specialize(result, argExpr) if impl.valueType.isAuto: impl = self.prepareAutoFunction(impl, args) result = impl.valueType if result.fun.kind == NodeKind.lambdaExpr: self.lambdaExpr(LambdaExpr(result.fun), compile=compile) elif not impl.valueType.compiled: self.funDecl(FunDecl(result.fun), impl) result = result.returnType if compile: if impl.valueType.fun.kind == funDecl and FunDecl(impl.valueType.fun).isTemplate: for arg in reversed(argExpr): self.expression(arg) let code = BlockStmt(FunDecl(impl.valueType.fun).body).code for i, decl in code: if i < code.high(): self.declaration(decl) else: # The last expression in a template # is its "return value", so we compute # it, but don't pop it off the stack if decl.kind == exprStmt: self.expression(ExprStmt(decl).expression) else: self.declaration(decl) else: self.generateCall(impl, argExpr, node.token.line) of NodeKind.callExpr: # Calling a call expression, like hello()() var node: Expression = node var all: seq[CallExpr] = @[] # Since there can be as many consecutive calls as # the user wants, we need to "extract" all of them while CallExpr(node).callee.kind == callExpr: all.add(CallExpr(CallExpr(node).callee)) node = CallExpr(node).callee # Now that we know how many call expressions we # need to compile, we start from the outermost # one and work our way to the innermost call for exp in all: result = self.call(exp, compile) if compile and result.kind == Function: self.generateCall(result, argExpr, node.token.line) result = result.returnType of NodeKind.getItemExpr: var node = GetItemExpr(node.callee) let impl = self.match(node.name.token.lexeme, self.getItemExpr(node, compile=false, matching=Type(kind: Function, args: args, returnType: Type(kind: All))), node) result = impl.valueType if impl.isGeneric: result = self.specialize(result, argExpr) result = result.returnType self.generateCall(impl, argExpr, node.token.line) of NodeKind.lambdaExpr: var node = LambdaExpr(node.callee) var impl = self.lambdaExpr(node, compile=compile) result = impl.returnType if compile: self.generateCall(impl, argExpr, node.token.line) else: let typ = self.infer(node) if typ.isNil(): self.error(&"expression has no type", node) else: self.error(&"object of type '{self.stringify(typ)}' is not callable", node) method getItemExpr(self: BytecodeCompiler, node: GetItemExpr, compile: bool = true, matching: Type = nil): Type {.discardable.} = ## Compiles accessing to fields of a type or ## module namespace. If the compile flag is set ## to false, no code is generated for resolving ## the attribute. Returns the type of the object ## that is resolved case node.obj.kind: of identExpr: let name = self.resolveOrError(IdentExpr(node.obj)) case name.kind: of NameKind.Module: var values = self.findInModule(node.name.token.lexeme, name) if len(values) == 0: self.error(&"reference to undefined name '{node.name.token.lexeme}' in module '{name.ident.token.lexeme}'") elif len(values) > 1 and matching.isNil(): self.error(&"ambiguous reference for '{node.name.token.lexeme}' in module '{name.ident.token.lexeme}'") if not matching.isNil(): for name in values: if self.compare(name.valueType, matching): result = name.valueType return if len(values) == 1: result = values[0].valueType else: self.error(&"ambiguous reference for '{node.name.token.lexeme}' in module '{name.ident.token.lexeme}'") if compile: self.identifier(nil, values[0]) else: self.error("invalid syntax", node.obj) else: self.error("invalid syntax", node) proc blockStmt(self: BytecodeCompiler, node: BlockStmt, compile: bool = true) = ## Compiles block statements, which create ## a new local scope self.beginScope() var last: Declaration for decl in node.code: if not last.isNil(): case last.kind: of breakStmt, continueStmt: self.warning(UnreachableCode, &"code after '{last.token.lexeme}' statement is unreachable", nil, decl) else: discard self.declaration(decl) last = decl self.endScope() method lambdaExpr(self: BytecodeCompiler, node: LambdaExpr, compile: bool = true): Type {.discardable.} = ## Compiles lambda functions as expressions result = Type(kind: Function, isLambda: true, fun: node, location: 0, compiled: true) let function = self.currentFunction var default: Expression var name: Name var i = 0 let stackIdx = self.stackIndex self.stackIndex = 2 for argument in node.arguments: if self.names.high() > 16777215: self.error("cannot declare more than 16777215 variables at a time") name = Name(depth: self.depth + 1, isPrivate: true, owner: self.currentModule, file: self.currentModule.file, isConst: false, ident: argument.name, valueType: self.inferOrError(argument.valueType), codePos: 0, isLet: false, line: argument.name.token.line, belongsTo: nil, # TODO kind: NameKind.Argument, node: argument.name, position: self.stackIndex ) if name.valueType.kind == Auto: self.error("due to current compiler limitations, automatic types cannot be used in lambdas", name.ident) if compile: self.names.add(name) inc(self.stackIndex) if node.arguments.high() - node.defaults.high() <= node.arguments.high(): # There's a default argument! result.args.add((name.ident.token.lexeme, name.valueType, node.defaults[i])) inc(i) else: # This argument has no default result.args.add((name.ident.token.lexeme, name.valueType, default)) # The function needs a return type too! if not node.returnType.isNil(): result.returnType = self.inferOrError(node.returnType) self.currentFunction = Name(depth: self.depth, isPrivate: true, isConst: false, owner: self.currentModule, file: self.file, valueType: result, ident: nil, node: node, isLet: false, line: node.token.line, kind: NameKind.Function, belongsTo: function, isReal: true, ) if compile and node notin self.lambdas and not node.body.isNil(): self.lambdas.add(node) let jmp = self.emitJump(JumpForwards, node.token.line) if BlockStmt(node.body).code.len() == 0: self.error("cannot construct lambda with empty body") var last: Declaration self.beginScope() result.location = self.chunk.code.len() for decl in BlockStmt(node.body).code: if not last.isNil(): if last.kind == returnStmt: self.warning(UnreachableCode, "code after 'return' statement is unreachable", nil, decl) self.declaration(decl) last = decl let typ = self.currentFunction.valueType.returnType var hasVal: bool = false case self.currentFunction.valueType.fun.kind: of NodeKind.funDecl: hasVal = FunDecl(self.currentFunction.valueType.fun).hasExplicitReturn of NodeKind.lambdaExpr: hasVal = LambdaExpr(self.currentFunction.valueType.fun).hasExplicitReturn else: discard # Unreachable if not hasVal and not typ.isNil(): # There is no explicit return statement anywhere in the function's # body: while this is not a tremendously useful piece of information # (since the presence of at least one doesn't mean all control flow # cases are covered), it definitely is an error worth reporting self.error("function has an explicit return type, but no return statement was found", node) hasVal = hasVal and not typ.isNil() for jump in self.currentFunction.valueType.retJumps: self.patchJump(jump) # Terminates the function's context self.emitByte(OpCode.Return, self.peek().token.line) if hasVal: self.emitByte(1, self.peek().token.line) else: self.emitByte(0, self.peek().token.line) # Well, we've compiled everything: time to patch # the jump offset self.patchJump(jmp) self.emitByte(LoadUInt64, node.token.line) self.emitBytes(self.chunk.writeConstant(result.location.toLong()), node.token.line) self.endScope() # Restores the enclosing function (if any). # Makes nested calls work (including recursion) self.currentFunction = function self.stackIndex = stackIdx method expression(self: BytecodeCompiler, node: Expression, compile: bool = true): Type {.discardable.} = ## Compiles all expressions case node.kind: of NodeKind.callExpr: return self.call(CallExpr(node), compile) of NodeKind.getItemExpr: return self.getItemExpr(GetItemExpr(node), compile) of NodeKind.pragmaExpr: discard # TODO # Note that for setItem and assign we don't convert # the node to its true type because that type information # would be lost in the call anyway. The differentiation # happens in self.assignment() of NodeKind.setItemExpr, NodeKind.assignExpr: return self.assignment(node, compile) of NodeKind.identExpr: return self.identifier(IdentExpr(node), compile=compile) of NodeKind.unaryExpr: # Unary expressions such as ~5 and -3 return self.unary(UnaryExpr(node), compile) of NodeKind.groupingExpr: # Grouping expressions like (2 + 1) return self.expression(GroupingExpr(node).expression, compile) of NodeKind.binaryExpr: # Binary expressions such as 2 ^ 5 and 0.66 * 3.14 return self.binary(BinaryExpr(node)) of NodeKind.intExpr, NodeKind.hexExpr, NodeKind.binExpr, NodeKind.octExpr, NodeKind.strExpr, NodeKind.falseExpr, NodeKind.trueExpr, NodeKind.floatExpr: # Since all of these AST nodes share the # same overall structure and the kind # field is enough to tell one from the # other, why bother with specialized # cases when one is enough? return self.literal(node, compile) of NodeKind.lambdaExpr: return self.lambdaExpr(LambdaExpr(node), compile) else: self.error(&"invalid AST node of kind {node.kind} at expression(): {node} (This is an internal error and most likely a bug)") proc ifStmt(self: BytecodeCompiler, node: IfStmt) = ## Compiles if/else statements for conditional ## execution of code self.check(node.condition, Type(kind: Bool)) self.expression(node.condition) let jump = self.emitJump(JumpIfFalsePop, node.token.line) self.statement(node.thenBranch) let jump2 = self.emitJump(JumpForwards, node.token.line) self.patchJump(jump) if not node.elseBranch.isNil(): self.statement(node.elseBranch) self.patchJump(jump2) proc whileStmt(self: BytecodeCompiler, node: WhileStmt) = ## Compiles C-style while loops and ## desugared C-style for loops self.check(node.condition, Type(kind: Bool)) let start = self.chunk.code.high() self.expression(node.condition) let jump = self.emitJump(JumpIfFalsePop, node.token.line) self.statement(node.body) self.emitLoop(start, node.token.line) self.patchJump(jump) # TODO proc awaitStmt(self: BytecodeCompiler, node: AwaitStmt) = ## Compiles await statements # TODO proc deferStmt(self: BytecodeCompiler, node: DeferStmt) = ## Compiles defer statements # TODO proc yieldStmt(self: BytecodeCompiler, node: YieldStmt) = ## Compiles yield statements # TODO proc raiseStmt(self: BytecodeCompiler, node: RaiseStmt) = ## Compiles raise statements # TODO proc assertStmt(self: BytecodeCompiler, node: AssertStmt) = ## Compiles assert statements # TODO # TODO proc forEachStmt(self: BytecodeCompiler, node: ForEachStmt) = ## Compiles foreach loops proc returnStmt(self: BytecodeCompiler, node: ReturnStmt) = ## Compiles return statements if self.currentFunction.valueType.returnType.isNil() and not node.value.isNil(): self.error("cannot return a value from a void function", node.value) elif not self.currentFunction.valueType.returnType.isNil() and node.value.isNil(): self.error("bare return statement is only allowed in void functions", node) if not node.value.isNil(): if self.currentFunction.valueType.returnType.kind == Auto: self.currentFunction.valueType.returnType = self.inferOrError(node.value) self.check(node.value, self.currentFunction.valueType.returnType) self.expression(node.value) self.emitByte(OpCode.SetResult, node.token.line) # Since the "set result" part and "exit the function" part # of our return mechanism are already decoupled into two # separate opcodes, we perform the former and then jump to # the function's last return statement, which is always emitted # by funDecl() at the end of the function's lifecycle, greatly # simplifying the design, since now there's just one return # instruction to jump to instead of many potential points # where the function returns from. Note that depending on whether # the function has any local variables or not, this jump might be # patched to jump to the function's PopN/PopC instruction(s) rather # than straight to the return statement self.currentFunction.valueType.retJumps.add(self.emitJump(JumpForwards, node.token.line)) proc continueStmt(self: BytecodeCompiler, node: ContinueStmt, compile: bool = true) = ## Compiles continue statements. A continue statement can be ## used to jump to the beginning of a loop or block if node.label.isNil(): if self.currentLoop.start > 16777215: self.error("too much code to jump over in continue statement") if compile: self.emitByte(Jump, node.token.line) self.emitBytes(self.currentLoop.start.toTriple(), node.token.line) else: var blocks: seq[NamedBlock] = @[] var found: bool = false for blk in reversed(self.namedBlocks): blocks.add(blk) if blk.name == node.label.token.lexeme: found = true break if not found: self.error(&"unknown block name '{node.label.token.lexeme}'", node.label) if compile: self.emitByte(Jump, node.token.line) self.emitBytes(blocks[^1].start.toTriple(), node.token.line) proc importStmt(self: BytecodeCompiler, node: ImportStmt, compile: bool = true) = ## Imports a module. This creates a new "virtual" ## (i.e simulated) module namespace and injects all ## of the module's public names into the current module self.declare(node) var module = self.names[^1] try: if compile: self.compileModule(module) # Importing a module automatically exports # its public names to us for name in self.findInModule("", module): name.exportedTo.incl(self.currentModule.path) for module in self.modules.values(): if self.currentModule.path in module.exportedTo: for name in self.findInModule("", module): name.exportedTo.incl(self.currentModule.path) except IOError: self.error(&"could not import '{module.ident.token.lexeme}': {getCurrentExceptionMsg()}") except OSError: self.error(&"could not import '{module.ident.token.lexeme}': {getCurrentExceptionMsg()} [errno {osLastError()}]") proc exportStmt(self: BytecodeCompiler, node: ExportStmt, compile: bool = true) = ## Exports a name at compile time to ## all modules importing us. The user ## needs to explicitly tell the compiler ## which of the names it imported, if any, ## should be made available to other modules ## importing it in order to avoid namespace ## pollution var name = self.resolveOrError(node.name) if name.isPrivate: self.error("cannot export private names") name.exportedTo.incl(self.parentModule.path) case name.kind: of NameKind.Module: # We need to export everything # this module defines! for name in self.findInModule("", name): name.exportedTo.incl(self.parentModule.path) of NameKind.Function: # Only exporting a single function (or, well # all of its implementations) for name in self.findByName(name.ident.token.lexeme): if name.kind != NameKind.Function: continue name.exportedTo.incl(self.parentModule.path) else: self.error("unsupported export type") proc breakStmt(self: BytecodeCompiler, node: BreakStmt) = ## Compiles break statements. A break statement is used ## to jump at the end of a loop or outside of a given ## block if node.label.isNil(): # Jumping out of a loop self.currentLoop.breakJumps.add(self.emitJump(OpCode.JumpForwards, node.token.line)) if self.currentLoop.depth > self.depth: # Breaking out of a loop closes its scope self.endScope() else: # Jumping out of a block var blocks: seq[NamedBlock] = @[] var found: bool = false for blk in reversed(self.namedBlocks): blocks.add(blk) if blk.name == node.label.token.lexeme: for blk in blocks: blk.broken = true found = true break if not found: self.error(&"unknown block name '{node.label.token.lexeme}'", node.label) proc namedBlock(self: BytecodeCompiler, node: NamedBlockStmt) = ## Compiles named blocks self.namedBlocks.add(NamedBlock(start: self.chunk.code.len(), # Creates a new block entry depth: self.depth, breakJumps: @[], name: NamedBlockStmt(node).name.token.lexeme)) self.beginScope() var blk = self.namedBlocks[^1] var last: Declaration for decl in node.code: if not last.isNil(): case last.kind: of NodeKind.breakStmt, NodeKind.continueStmt: self.warning(UnreachableCode, &"code after '{last.token.lexeme}' statement is unreachable", nil, decl) else: discard if blk.broken: blk.breakJumps.add(self.emitJump(OpCode.JumpForwards, node.token.line)) self.declaration(decl) last = decl self.patchBreaks() self.endScope() discard self.namedBlocks.pop() proc switchStmt(self: BytecodeCompiler, node: SwitchStmt) = ## Compiles C-style switch statements self.expression(node.switch) let typeOfA = self.inferOrError(node.switch) var ifJump: int = -1 var thenJumps: seq[int] = @[] var fn: Type var impl: Name var default: Expression # Note that, unlike C switch statements, we don't # cascade to other branches once the first one matches for branch in node.branches: # We duplicate the top of the stack so we can safely # pop the topmost expression without losing its value # for later comparisons self.emitByte(DupTop, branch.body.token.line) self.expression(branch.cond) # We look for a matching equality implementation fn = Type(kind: Function, returnType: Type(kind: Bool), args: @[("", typeOfA, default), ("", self.inferOrError(branch.cond), default)]) impl = self.match("==", fn, node) self.generateCall(impl, @[node.switch, branch.cond], impl.line) ifJump = self.emitJump(JumpIfFalsePop, branch.body.token.line) self.blockStmt(branch.body) thenJumps.add(self.emitJump(JumpForwards, branch.body.token.line)) self.patchJump(ifJump) if not node.default.isNil(): self.blockStmt(node.default) for jump in thenJumps: self.patchJump(jump) self.emitByte(OpCode.Pop, node.token.line) proc statement(self: BytecodeCompiler, node: Statement) = ## Compiles all statements case node.kind: of exprStmt: # An expression statement is just a statement # followed by a statement terminator (semicolon) let expression = ExprStmt(node).expression let kind = self.infer(expression) self.expression(expression) if kind.isNil(): # The expression has no type and produces no value, # so we don't have to pop anything discard elif self.replMode: self.printRepl(kind, expression) else: self.emitByte(Pop, node.token.line) of NodeKind.switchStmt: self.switchStmt(SwitchStmt(node)) of NodeKind.namedBlockStmt: self.namedBlock(NamedBlockStmt(node)) of NodeKind.ifStmt: self.ifStmt(IfStmt(node)) of NodeKind.assertStmt: self.assertStmt(AssertStmt(node)) of NodeKind.raiseStmt: self.raiseStmt(RaiseStmt(node)) of NodeKind.breakStmt: self.breakStmt(BreakStmt(node)) of NodeKind.continueStmt: self.continueStmt(ContinueStmt(node)) of NodeKind.returnStmt: self.returnStmt(ReturnStmt(node)) of NodeKind.importStmt: self.importStmt(ImportStmt(node)) of NodeKind.exportStmt: self.exportStmt(ExportStmt(node)) of NodeKind.whileStmt: let loop = self.currentLoop self.currentLoop = Loop(start: self.chunk.code.len(), depth: self.depth, breakJumps: @[]) self.whileStmt(WhileStmt(node)) self.patchBreaks() self.currentLoop = loop of NodeKind.forEachStmt: self.forEachStmt(ForEachStmt(node)) of NodeKind.blockStmt: self.blockStmt(BlockStmt(node)) of NodeKind.yieldStmt: self.yieldStmt(YieldStmt(node)) of NodeKind.awaitStmt: self.awaitStmt(AwaitStmt(node)) of NodeKind.deferStmt: self.deferStmt(DeferStmt(node)) of NodeKind.tryStmt: discard else: self.expression(Expression(node)) proc varDecl(self: BytecodeCompiler, node: VarDecl) = ## Compiles variable declarations var typ: Type # Our parser guarantees that the variable declaration # will have a type declaration or a value (or both) if node.value.isNil(): # Variable has no value: the type declaration # takes over typ = self.inferOrError(node.valueType) if typ.kind == Auto: self.error("automatic types require initialization", node) elif node.valueType.isNil(): # Variable has no type declaration: the type # of its value takes over typ = self.inferOrError(node.value) else: # Variable has both a type declaration and # a value: the value's type must match the # type declaration let expected = self.inferOrError(node.valueType) if expected.kind != Auto: self.check(node.value, expected) # If this doesn't fail, then we're good typ = expected else: # Let the compiler infer the type (this # is the default behavior already, but # some users may prefer to be explicit!) typ = self.infer(node.value) self.expression(node.value) self.emitByte(AddVar, node.token.line) inc(self.stackIndex) # We declare the name only now in order to make # sure that stuff like var n = n; works as expected. # If we declared it early, we'd have a duplicate with # no type that would shadow the original value, which # is no good var name = self.declare(node) name.position = self.stackIndex name.valueType = typ proc funDecl(self: BytecodeCompiler, node: FunDecl, name: Name) = ## Compiles function declarations if node.token.kind == Operator and node.name.token.lexeme in [".", "="]: self.error(&"Due to compiler limitations, the '{node.name.token.lexeme}' operator cannot be currently overridden", node.name) var node = node var jmp: int # We store the current function to restore # it later let function = self.currentFunction if node.body.isNil(): # When we stumble across a forward declaration, # we record it for later so we can look it up at # the end of the module self.forwarded.add((name, 0)) name.valueType.forwarded = true self.currentFunction = function return self.currentFunction = name if self.currentFunction.isBuiltin: self.currentFunction = function return let stackIdx = self.stackIndex self.stackIndex = name.position if not node.isTemplate: # A function's code is just compiled linearly # and then jumped over name.valueType.compiled = true jmp = self.emitJump(JumpForwards, node.token.line) name.codePos = self.chunk.code.len() name.valueType.location = name.codePos # We let our debugger know this function's boundaries self.chunk.functions.add(self.chunk.code.len().toTriple()) self.functions.add((start: self.chunk.code.len(), stop: 0, pos: self.chunk.functions.len() - 3, fn: name)) var offset = self.functions[^1] var idx = self.chunk.functions.len() self.chunk.functions.add(0.toTriple()) # Patched it later self.chunk.functions.add(uint8(node.arguments.len())) if not node.name.isNil(): self.chunk.functions.add(name.ident.token.lexeme.len().toDouble()) var s = name.ident.token.lexeme if s.len() >= uint16.high().int: s = node.name.token.lexeme[0..uint16.high()] self.chunk.functions.add(s.toBytes()) else: self.chunk.functions.add(0.toDouble()) if BlockStmt(node.body).code.len() == 0: self.error("cannot declare function with empty body") var last: Declaration self.beginScope() for decl in BlockStmt(node.body).code: if not last.isNil(): if last.kind == returnStmt: self.warning(UnreachableCode, "code after 'return' statement is unreachable", nil, decl) self.declaration(decl) last = decl let typ = self.currentFunction.valueType.returnType var hasVal: bool = false case self.currentFunction.valueType.fun.kind: of NodeKind.funDecl: hasVal = FunDecl(self.currentFunction.valueType.fun).hasExplicitReturn of NodeKind.lambdaExpr: hasVal = LambdaExpr(self.currentFunction.valueType.fun).hasExplicitReturn else: discard # Unreachable if not hasVal and not typ.isNil(): # There is no explicit return statement anywhere in the function's # body: while this is not a tremendously useful piece of information # (since the presence of at least one doesn't mean all control flow # cases are covered), it definitely is an error worth reporting self.error("function has an explicit return type, but no return statement was found", node) hasVal = hasVal and not typ.isNil() for jump in self.currentFunction.valueType.retJumps: self.patchJump(jump) self.endScope() # Terminates the function's context let stop = self.chunk.code.len().toTriple() self.emitByte(OpCode.Return, self.peek().token.line) if hasVal: self.emitByte(1, self.peek().token.line) else: self.emitByte(0, self.peek().token.line) self.chunk.functions[idx] = stop[0] self.chunk.functions[idx + 1] = stop[1] self.chunk.functions[idx + 2] = stop[2] offset.stop = self.chunk.code.len() # Well, we've compiled everything: time to patch # the jump offset self.patchJump(jmp) # Restores the enclosing function (if any). # Makes nested calls work (including recursion) self.currentFunction = function self.stackIndex = stackIdx proc typeDecl(self: BytecodeCompiler, node: TypeDecl, name: Name) = ## Compiles type declarations for field in node.fields: if self.compare(self.inferOrError(field.valueType), name.valueType) and not node.isRef: self.error(&"illegal type recursion for non-ref type '{name.ident.token.lexeme}'") proc declaration(self: BytecodeCompiler, node: Declaration) = ## Compiles declarations, statements and expressions ## recursively case node.kind: of NodeKind.funDecl: var name = self.declare(node) if not name.valueType.isAuto: # We can't compile automatic functions right # away because we need to know the type of the # arguments in their signature, and this info is # not available at declaration time self.funDecl(FunDecl(node), name) if name.isGeneric: # After we're done compiling a generic # function, we pull a magic trick: since # from here on the user will be able to # call this with any of the types in the # generic constraint, we switch every generic # to a type union (which, conveniently, have an # identical layout) so that the compiler will # typecheck the function as if its arguments # were all types of the constraint at once, # while still allowing the user to call it with # any type in said constraint for i, argument in name.valueType.args: if argument.kind.kind != Generic: continue else: argument.kind.asUnion = true if not name.valueType.returnType.isNil() and name.valueType.returnType.kind == Generic: name.valueType.returnType.asUnion = true of NodeKind.typeDecl: self.typeDecl(TypeDecl(node), self.declare(node)) of NodeKind.varDecl: self.varDecl(VarDecl(node)) else: self.statement(Statement(node)) proc compile*(self: BytecodeCompiler, ast: seq[Declaration], file: string, lines: seq[tuple[start, stop: int]], source: string, chunk: Chunk = nil, incremental: bool = false, isMainModule: bool = true, disabledWarnings: seq[WarningKind] = @[], showMismatches: bool = false, mode: CompileMode = Debug): Chunk = ## Compiles a sequence of AST nodes into a chunk ## object if chunk.isNil(): self.chunk = newChunk() else: self.chunk = chunk self.file = file self.depth = 0 self.currentFunction = nil if self.replMode: self.ast &= ast self.source &= "\n" & source self.lines &= lines else: self.ast = ast self.current = 0 self.stackIndex = 1 self.lines = lines self.source = source self.isMainModule = isMainModule self.disabledWarnings = disabledWarnings self.showMismatches = showMismatches self.mode = mode let start = self.chunk.code.len() if not incremental: self.jumps = @[] self.modules = newTable[string, Name]() let pos = self.beginProgram() let idx = self.stackIndex self.stackIndex = idx while not self.done(): self.declaration(Declaration(self.step())) self.terminateProgram(pos) result = self.chunk proc compileModule(self: BytecodeCompiler, module: Name) = ## Compiles an imported module into an existing chunk ## using the compiler's internal parser and lexer objects var path = "" var moduleName = module.path & ".pn" for i, searchPath in moduleLookupPaths: if searchPath == "": path = absolutePath(joinPath(splitPath(self.file).head, moduleName)) else: path = joinPath(searchPath, moduleName) if fileExists(path): break elif i == searchPath.high(): self.error(&"""could not import '{path}': module not found""") if self.modules.hasKey(module.path): return let source = readFile(path) let current = self.current let ast = self.ast let file = self.file let lines = self.lines let src = self.source let currentModule = self.currentModule let mainModule = self.isMainModule let parentModule = self.parentModule let replMode = self.replMode self.replMode = false self.parentModule = currentModule self.currentModule = module let start = self.chunk.code.len() discard self.compile(self.parser.parse(self.lexer.lex(source, path), path, self.lexer.getLines(), self.lexer.getSource(), persist=true), path, self.lexer.getLines(), self.lexer.getSource(), chunk=self.chunk, incremental=true, isMainModule=false, self.disabledWarnings, self.showMismatches, self.mode) # Mark the end of a new module self.chunk.modules.extend(start.toTriple()) self.chunk.modules.extend(self.chunk.code.high().toTriple()) # I swear to god if someone ever creates a peon module with a name that's # longer than 2^16 bytes I will hit them with a metal pipe. Mark my words self.chunk.modules.extend(self.currentModule.ident.token.lexeme.len().toDouble()) self.chunk.modules.extend(self.currentModule.ident.token.lexeme.toBytes()) module.file = path # No need to save the old scope depth: import statements are # only allowed at the top level! self.depth = 0 self.current = current self.ast = ast self.file = file self.currentModule = currentModule self.isMainModule = mainModule self.parentModule = parentModule self.replMode = replMode self.lines = lines self.source = src self.modules[module.path] = module