# Copyright 2022 Mattia Giambirtone & All Contributors # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import meta/token import meta/ast import meta/errors import ../config import ../util/multibyte import strformat import algorithm import parseutils import strutils import sequtils import os export ast export token export multibyte type TypeKind* = enum ## An enumeration of compile-time ## types Int8, UInt8, Int16, UInt16, Int32, UInt32, Int64, UInt64, Float32, Float64, Char, Byte, String, Function, CustomType, Nil, Nan, Bool, Inf, Typedesc, Generic, Any # Any is used internally in a few cases, # for example when looking for operators # when only the type of the arguments is of # interest Type* = ref object ## A wrapper around ## compile-time types node*: ASTNode case kind*: TypeKind: of Function: args*: seq[Type] returnType*: Type else: discard # This way we don't have recursive dependency issues import meta/bytecode export bytecode type Name = ref object ## A compile-time wrapper around ## statically resolved names # Name of the identifier name: IdentExpr # Owner of the identifier (module) owner: string # Scope depth depth: int # Is this name private? isPrivate: bool # Is this a constant? isConst: bool # Can this name's value be mutated? isLet: bool # The name's type valueType: Type # For functions, this marks where the function's # code begins. For variables, this stores their # position in the stack (used for closures) codePos: int Loop = object ## A "loop object" used ## by the compiler to emit ## appropriate jump offsets ## for continue and break ## statements # Position in the bytecode where the loop starts start: int # Scope depth where the loop is located depth: int # Absolute jump offsets into our bytecode that we need to # patch. Used for break statements breakPos: seq[int] Compiler* = ref object ## A wrapper around the Peon compiler's state # The bytecode chunk where we write code to chunk: Chunk # The output of our parser (AST) ast: seq[Declaration] # The current AST node we're looking at current: int # The current file being compiled (used only for # error reporting) file: string # Compile-time "simulation" of the stack at # runtime to load variables that have stack # behavior more efficiently names: seq[Name] # Beginning of stack frames for function calls frames: seq[int] # The current scope depth. If > 0, we're # in a local scope, otherwise it's global scopeDepth: int # The current function being compiled currentFunction: FunDecl # Are optimizations turned on? enableOptimizations*: bool # The current loop being compiled (used to # keep track of where to jump) currentLoop: Loop # The current module being compiled # (used to restrict access to statically # defined variables at compile time) currentModule: string # Each time a defer statement is # compiled, its code is emitted # here. Later, if there is any code # to defer in the current function, # funDecl will wrap the function's code # inside an implicit try/finally block # and add this code in the finally branch. # This sequence is emptied each time a # function declaration is compiled and stores only # deferred code for the current function (may # be empty) deferred: seq[uint8] # List of closed-over variables closedOver: seq[Name] proc newCompiler*(enableOptimizations: bool = true): Compiler = ## Initializes a new Compiler object new(result) result.ast = @[] result.current = 0 result.file = "" result.names = @[] result.scopeDepth = 0 result.currentFunction = nil result.enableOptimizations = enableOptimizations result.currentModule = "" result.frames = @[] ## Forward declarations proc expression(self: Compiler, node: Expression) proc statement(self: Compiler, node: Statement) proc declaration(self: Compiler, node: Declaration) proc peek(self: Compiler, distance: int = 0): ASTNode proc identifier(self: Compiler, node: IdentExpr) proc varDecl(self: Compiler, node: VarDecl) proc inferType(self: Compiler, node: LiteralExpr): Type proc inferType(self: Compiler, node: Expression): Type proc findByName(self: Compiler, name: string): seq[Name] proc findByType(self: Compiler, name: string, kind: Type): seq[Name] proc compareTypes(self: Compiler, a, b: Type): bool proc patchReturnAddress(self: Compiler, retAddr: int) ## End of forward declarations ## Public getter for nicer error formatting proc getCurrentNode*(self: Compiler): ASTNode = (if self.current >= self.ast.len(): self.ast[^1] else: self.ast[self.current - 1]) proc getCurrentFunction*(self: Compiler): Declaration {.inline.} = self.currentFunction proc getFile*(self: Compiler): string {.inline.} = self.file proc getModule*(self: Compiler): string {.inline.} = self.currentModule ## Utility functions proc peek(self: Compiler, distance: int = 0): ASTNode = ## Peeks at the AST node at the given distance. ## If the distance is out of bounds, the last ## AST node in the tree is returned. A negative ## distance may be used to retrieve previously ## consumed AST nodes if self.ast.high() == -1 or self.current + distance > self.ast.high() or self.current + distance < 0: result = self.ast[^1] else: result = self.ast[self.current + distance] proc done(self: Compiler): bool = ## Returns true if the compiler is done ## compiling, false otherwise result = self.current > self.ast.high() proc error(self: Compiler, message: string) {.raises: [CompileError].} = ## Raises a CompileError exception raise CompileError(msg: message, node: self.getCurrentNode(), file: self.file, module: self.currentModule) proc step(self: Compiler): ASTNode = ## Steps to the next node and returns ## the consumed one result = self.peek() if not self.done(): self.current += 1 proc emitByte(self: Compiler, byt: OpCode | uint8) = ## Emits a single byte, writing it to ## the current chunk being compiled when DEBUG_TRACE_COMPILER: echo &"DEBUG - Compiler: Emitting {$byt}" self.chunk.write(uint8 byt, self.peek().token.line) proc emitBytes(self: Compiler, bytarr: openarray[OpCode | uint8]) = ## Handy helper method to write arbitrary bytes into ## the current chunk, calling emitByte on each of its ## elements for b in bytarr: self.emitByte(b) proc makeConstant(self: Compiler, val: Expression, typ: Type): array[3, uint8] = ## Adds a constant to the current chunk's constant table ## and returns its index as a 3-byte array of uint8s var v: int discard parseInt(val.token.lexeme, v) case typ.kind: of UInt8, Int8: result = self.chunk.writeConstant([uint8(v)]) of Int16, UInt16: result = self.chunk.writeConstant(v.toDouble()) of Int32, UInt32: result = self.chunk.writeConstant(v.toQuad()) of Int64, UInt64: result = self.chunk.writeConstant(v.toLong()) else: discard proc emitConstant(self: Compiler, obj: Expression, kind: Type) = ## Emits a LoadConstant instruction along ## with its operand case self.inferType(obj).kind: of Int64: self.emitByte(LoadInt64) else: discard # TODO self.emitBytes(self.makeConstant(obj, kind)) proc emitJump(self: Compiler, opcode: OpCode): int = ## Emits a dummy jump offset to be patched later. Assumes ## the largest offset (emits 4 bytes, one for the given jump ## opcode, while the other 3 are for the jump offset which is set ## to the maximum unsigned 24 bit integer). If the shorter ## 16 bit alternative is later found to be better suited, patchJump ## will fix this. This function returns the absolute index into the ## chunk's bytecode array where the given placeholder instruction was written self.emitByte(opcode) self.emitBytes((0xffffff).toTriple()) result = self.chunk.code.len() - 4 proc patchJump(self: Compiler, offset: int) = ## Patches a previously emitted relative ## jump using emitJump. Since emitJump assumes ## a long jump, this also shrinks the jump ## offset and changes the bytecode instruction if possible ## (i.e. jump is in 16 bit range), but the converse is also ## true (i.e. it might change a regular jump into a long one) var jump: int = self.chunk.code.len() - offset if jump > 16777215: self.error("cannot jump more than 16777216 bytecode instructions") if jump < uint16.high().int: case OpCode(self.chunk.code[offset]): of LongJumpForwards: self.chunk.code[offset] = JumpForwards.uint8() jump -= 4 of LongJumpBackwards: self.chunk.code[offset] = JumpBackwards.uint8() jump -= 4 of LongJumpIfFalse: self.chunk.code[offset] = JumpIfFalse.uint8() of LongJumpIfFalsePop: self.chunk.code[offset] = JumpIfFalsePop.uint8() of LongJumpIfFalseOrPop: self.chunk.code[offset] = JumpIfFalseOrPop.uint8() of JumpForwards, JumpBackwards: # We do this because a relative jump # does not normally take into account # its argument, which is hardcoded in # the bytecode itself jump -= 3 else: discard self.chunk.code.delete(offset + 1) # Discards the first 8 bits of the jump offset (which are empty) let offsetArray = (jump - 1).toDouble() # -1 since we got rid of 1 byte! self.chunk.code[offset + 1] = offsetArray[0] self.chunk.code[offset + 2] = offsetArray[1] else: case OpCode(self.chunk.code[offset]): of JumpForwards: self.chunk.code[offset] = LongJumpForwards.uint8() jump -= 3 of JumpBackwards: self.chunk.code[offset] = LongJumpBackwards.uint8() jump -= 3 of JumpIfFalse: self.chunk.code[offset] = LongJumpIfFalse.uint8() of JumpIfFalsePop: self.chunk.code[offset] = LongJumpIfFalsePop.uint8() of JumpIfFalseOrPop: self.chunk.code[offset] = LongJumpIfFalseOrPop.uint8() of LongJumpForwards, LongJumpBackwards: # We do this because a relative jump # does not normally take into account # its argument, which is hardcoded in # the bytecode itself jump -= 4 else: discard let offsetArray = jump.toTriple() self.chunk.code[offset + 1] = offsetArray[0] self.chunk.code[offset + 2] = offsetArray[1] self.chunk.code[offset + 3] = offsetArray[2] proc resolve(self: Compiler, name: IdentExpr, depth: int = self.scopeDepth): Name = ## Traverses self.names backwards and returns the ## first name object with the given name. Returns ## nil when the name can't be found. This function ## has no concept of scope depth, because getStackPos ## does that job. Note that private names declared in ## other modules will not be resolved! for obj in reversed(self.names): if obj.name.token.lexeme == name.token.lexeme: if obj.isPrivate and obj.owner != self.currentModule: continue # There may be a name in the current module that # matches, so we skip this return obj return nil proc getStackPos(self: Compiler, name: IdentExpr, depth: int = self.scopeDepth): tuple[closedOver: bool, pos: int] = ## Iterates the internal list of declared names backwards and ## returns a tuple (closedOver, pos) that tells the caller whether the ## the name is to be emitted as a closure as well as its predicted ## stack/closure array position. Returns (false, -1) if the variable's ## location can not be determined at compile time (this is an error!). ## Note that private names declared in other modules will not be resolved! var i: int = self.names.high() for variable in reversed(self.names): if name.name.lexeme == variable.name.name.lexeme: if variable.isPrivate and variable.owner != self.currentModule: continue elif variable.depth == depth or variable.depth == 0: # variable.depth == 0 for globals! return (false, i) elif variable.depth > 0: var j: int = self.closedOver.high() for closure in reversed(self.closedOver): if closure.name.token.lexeme == name.name.lexeme: return (true, j) inc(j) dec(i) return (false, -1) proc detectClosureVariable(self: Compiler, name: Name, depth: int = self.scopeDepth) = ## Detects if the given name is used in a local scope deeper ## than the given one and modifies the code emitted for it ## to store it as a closure variable if it is. Does nothing if the name ## hasn't been declared yet or is unreachable (for example if it's ## declared as private in another module). This function must be called ## each time a name is referenced in order for closed-over variables ## to be emitted properly, otherwise the runtime may behave ## unpredictably or crash if name == nil: return if name.depth < depth: # Ding! The given name is closed over: we need to # change the NoOp instructions that self.declareName # put in place for us into a StoreHeap. We don't need to change # other pieces of code because self.identifier() already # emits LoadHeap if it detects the variable is closed over, # whether or not this function is called self.closedOver.add(name) if self.closedOver.len() >= 16777216: self.error("too many consecutive closed-over variables (max is 16777216)") let idx = self.closedOver.high().toTriple() self.chunk.code[name.codePos] = StoreHeap.uint8 self.chunk.code[name.codePos + 1] = idx[0] self.chunk.code[name.codePos + 2] = idx[1] self.chunk.code[name.codePos + 3] = idx[2] proc compareTypesWithNullNode(self: Compiler, a, b: Type): bool = ## Compares two types without using information from ## AST nodes if a == nil: return b == nil elif b == nil: return a == nil if a.kind != b.kind: return false case a.kind: of Function: if a.args.len() != b.args.len(): return false elif not self.compareTypes(a.returnType, b.returnType): if a.returnType.kind != Any and b.returnType.kind != Any: return false for (argA, argB) in zip(a.args, b.args): if not self.compareTypes(argA, argB): return false return true else: discard proc compareTypes(self: Compiler, a, b: Type): bool = ## Compares two type objects ## for equality (works with nil!) if a == nil: return b == nil elif b == nil: return a == nil if a.kind != b.kind: return false case a.kind: of Int8, UInt8, Int16, UInt16, Int32, UInt32, Int64, UInt64, Float32, Float64, Char, Byte, String, Nil, Nan, Bool, Inf: return true of Function: if a.node == nil or b.node == nil: return self.compareTypesWithNullNode(a, b) let a = FunDecl(a.node) b = FunDecl(b.node) typeOfA = self.inferType(a.returnType) typeOfB = self.inferType(b.returnType) if a.name.token.lexeme != b.name.token.lexeme: return false elif a.arguments.len() != b.arguments.len(): return false elif not self.compareTypes(typeOfA, typeOfB): if typeOfA.kind != Any and typeOfB.kind != Any: return false for (argA, argB) in zip(a.arguments, b.arguments): if argA.mutable != argB.mutable: return false elif argA.isRef != argB.isRef: return false elif argA.isPtr != argB.isPtr: return false elif not self.compareTypes(self.inferType(argA.valueType), self.inferType(argB.valueType)): return false return true else: discard proc toIntrinsic(name: string): Type = ## Converts a string to an intrinsic ## type if it is valid and returns nil ## otherwise if name in ["int", "int64", "i64"]: return Type(kind: Int64) elif name in ["uint64", "u64"]: return Type(kind: UInt64) elif name in ["int32", "i32"]: return Type(kind: Int32) elif name in ["uint32", "u32"]: return Type(kind: UInt32) elif name in ["int16", "i16"]: return Type(kind: Int16) elif name in ["uint16", "u16"]: return Type(kind: UInt16) elif name in ["int8", "i8"]: return Type(kind: Int8) elif name in ["uint8", "u8"]: return Type(kind: UInt8) elif name in ["f64", "float", "float64"]: return Type(kind: Float64) elif name in ["f32", "float32"]: return Type(kind: Float32) elif name == "byte": return Type(kind: Byte) elif name == "char": return Type(kind: Char) elif name == "nan": return Type(kind: Nan) elif name == "nil": return Type(kind: Nil) elif name == "inf": return Type(kind: Inf) elif name == "bool": return Type(kind: Bool) elif name == "type": return Type(kind: Typedesc) else: return nil proc inferType(self: Compiler, node: LiteralExpr): Type = ## Infers the type of a given literal expression if node == nil: return nil case node.kind: of intExpr, binExpr, octExpr, hexExpr: let size = node.token.lexeme.split("'") if len(size) notin 1..2: self.error("invalid state: inferValueType -> invalid size specifier (This is an internal error and most likely a bug!)") if size.len() == 1: return Type(node: node, kind: Int64) let typ = size[1].toIntrinsic() if not self.compareTypes(typ, nil): return typ else: self.error(&"invalid type specifier '{size[1]}' for int") of floatExpr: let size = node.token.lexeme.split("'") if len(size) notin 1..2: self.error("invalid state: inferValueType -> invalid size specifier (This is an internal error and most likely a bug!)") if size.len() == 1 or size[1] == "f64": return Type(node: node, kind: Float64) let typ = size[1].toIntrinsic() if not self.compareTypes(typ, nil): return typ else: self.error(&"invalid type specifier '{size[1]}' for float") of nilExpr: return Type(node: node, kind: Nil) of trueExpr: return Type(node: node, kind: Bool) of falseExpr: return Type(node: node, kind: Bool) of nanExpr: return Type(node: node, kind: TypeKind.Nan) of infExpr: return Type(node: node, kind: TypeKind.Inf) else: discard # TODO proc toIntrinsic(self: Compiler, typ: Expression): Type = ## Gets an expression's intrinsic type, if ## possible if typ == nil: return nil case typ.kind: of trueExpr, falseExpr, intExpr, floatExpr: return typ.token.lexeme.toIntrinsic() of identExpr: let inferred = self.inferType(typ) if inferred == nil: return typ.token.lexeme.toIntrinsic() return inferred else: discard proc inferType(self: Compiler, node: Expression): Type = ## Infers the type of a given expression and ## returns it if node == nil: return nil case node.kind: of identExpr: let node = IdentExpr(node) let name = self.resolve(node) if name != nil: return name.valueType else: result = node.name.lexeme.toIntrinsic() if result != nil: result.node = node of unaryExpr: return self.inferType(UnaryExpr(node).a) of binaryExpr: let node = BinaryExpr(node) var a = self.inferType(node.a) var b = self.inferType(node.b) if not self.compareTypes(a, b): return nil return a of {intExpr, hexExpr, binExpr, octExpr, strExpr, falseExpr, trueExpr, infExpr, nanExpr, floatExpr, nilExpr }: return self.inferType(LiteralExpr(node)) else: discard # Unreachable proc typeToStr(self: Compiler, typ: Type): string = ## Returns the string representation of a ## type object case typ.kind: of Int8, UInt8, Int16, UInt16, Int32, UInt32, Int64, UInt64, Float32, Float64, Char, Byte, String, Nil, TypeKind.Nan, Bool, TypeKind.Inf: return ($typ.kind).toLowerAscii() of Function: result = "function (" case typ.node.kind: of funDecl: var node = FunDecl(typ.node) for i, argument in node.arguments: result &= &"{argument.name.token.lexeme}: {self.typeToStr(self.inferType(argument.valueType))}" if i < node.arguments.len() - 1: result &= ", " result &= ")" of lambdaExpr: var node = LambdaExpr(typ.node) for i, argument in node.arguments: result &= &"{argument.name.token.lexeme}: {self.typeToStr(self.inferType(argument.name))}" if i < node.arguments.len() - 1: result &= ", " result &= ")" else: discard # Unreachable result &= &": {self.typeToStr(typ.returnType)}" else: discard proc inferType(self: Compiler, node: Declaration): Type = ## Infers the type of a given declaration ## and returns it if node == nil: return nil case node.kind: of funDecl: var node = FunDecl(node) let resolved = self.resolve(node.name) if resolved != nil: return resolved.valueType of NodeKind.varDecl: var node = VarDecl(node) let resolved = self.resolve(node.name) if resolved != nil: return resolved.valueType else: return self.inferType(node.value) else: return # Unreachable ## End of utility functions proc literal(self: Compiler, node: ASTNode) = ## Emits instructions for literals such ## as singletons, strings, numbers and ## collections case node.kind: of trueExpr: self.emitByte(LoadTrue) of falseExpr: self.emitByte(LoadFalse) of nilExpr: self.emitByte(LoadNil) of infExpr: self.emitByte(LoadInf) of nanExpr: self.emitByte(LoadNan) of strExpr: self.emitConstant(LiteralExpr(node), Type(kind: String)) # TODO: Take size specifier into account! of intExpr: var x: int var y = IntExpr(node) try: discard parseInt(y.literal.lexeme, x) except ValueError: self.error("integer value out of range") self.emitConstant(y, Type(kind: Int64)) of hexExpr: var x: int var y = HexExpr(node) try: discard parseHex(y.literal.lexeme, x) except ValueError: self.error("integer value out of range") let node = newIntExpr(Token(lexeme: $x, line: y.token.line, pos: (start: y.token.pos.start, stop: y.token.pos.start + len($x)) ) ) self.emitConstant(node, Type(kind: Int64)) of binExpr: var x: int var y = BinExpr(node) try: discard parseBin(y.literal.lexeme, x) except ValueError: self.error("integer value out of range") let node = newIntExpr(Token(lexeme: $x, line: y.token.line, pos: (start: y.token.pos.start, stop: y.token.pos.start + len($x)) ) ) self.emitConstant(node, Type(kind: Int64)) of octExpr: var x: int var y = OctExpr(node) try: discard parseOct(y.literal.lexeme, x) except ValueError: self.error("integer value out of range") let node = newIntExpr(Token(lexeme: $x, line: y.token.line, pos: (start: y.token.pos.start, stop: y.token.pos.start + len($x)) ) ) self.emitConstant(node, Type(kind: Int64)) of floatExpr: var x: float var y = FloatExpr(node) try: discard parseFloat(y.literal.lexeme, x) except ValueError: self.error("floating point value out of range") self.emitConstant(y, Type(kind: Float64)) of awaitExpr: var y = AwaitExpr(node) self.expression(y.expression) self.emitByte(OpCode.Await) else: self.error(&"invalid AST node of kind {node.kind} at literal(): {node} (This is an internal error and most likely a bug!)") proc matchImpl(self: Compiler, name: string, kind: Type): Name = ## Tries to find a matching function implementation ## compatible with the given type and returns its ## name object let impl = self.findByType(name, kind) if impl.len() == 0: var msg = &"cannot find a suitable implementation for '{name}'" let names = self.findByName(name) if names.len() > 0: msg &= &", found {len(names)} candidate" if names.len() > 1: msg &= "s" msg &= ": " for name in names: msg &= &"\n - '{name.name.token.lexeme}' of type '{self.typeToStr(name.valueType)}'" if name.valueType.kind != Function: msg &= ", not a callable" elif kind.args.len() != name.valueType.args.len(): msg &= &", wrong number of arguments ({name.valueType.args.len()} expected, got {kind.args.len()})" else: for i, arg in kind.args: if not self.compareTypes(arg, name.valueType.args[i]): msg &= &", first mismatch at position {i + 1}: expected argument of type '{self.typeToStr(name.valueType.args[i])}', got '{self.typeToStr(arg)}' instead" self.error(msg) elif impl.len() > 1: var msg = &"multiple matching implementations of '{name}' found:\n" for fn in reversed(impl): var node = FunDecl(fn.valueType.node) msg &= &"- '{node.name.token.lexeme}' at line {node.token.line} of type {self.typeToStr(fn.valueType)}\n" self.error(msg) return impl[0] proc callUnaryOp(self: Compiler, fn: Name, op: UnaryExpr) = ## Emits the code to call a unary operator # Pushes the return address self.emitByte(LoadUInt32) # We patch it later! let idx = self.chunk.consts.len() self.emitBytes(self.chunk.writeConstant((0xffffffff'u32).toQuad())) self.expression(op.a) # Pushes the arguments onto the stack self.emitByte(Call) # Creates a stack frame self.emitBytes(fn.codePos.toTriple()) self.emitBytes(1.toTriple()) self.patchReturnAddress(idx) proc callBinaryOp(self: Compiler, fn: Name, op: BinaryExpr) = ## Emits the code to call a binary operator # Pushes the return address self.emitByte(LoadUInt32) # We patch it later! let idx = self.chunk.consts.len() self.emitBytes(self.chunk.writeConstant((0xffffffff'u32).toQuad())) self.expression(op.a) # Pushes the arguments onto the stack self.expression(op.b) self.emitByte(Call) # Creates a stack frame self.emitBytes(fn.codePos.toTriple()) self.emitBytes(1.toTriple()) self.patchReturnAddress(idx) proc unary(self: Compiler, node: UnaryExpr) = ## Compiles unary expressions such as decimal ## and bitwise negation let valueType = self.inferType(node.a) let funct = self.matchImpl(node.token.lexeme, Type(kind: Function, returnType: Type(kind: Any), node: nil, args: @[valueType])) self.callUnaryOp(funct, node) proc binary(self: Compiler, node: BinaryExpr) = ## Compiles all binary expressions let typeOfA = self.inferType(node.a) let typeOfB = self.inferType(node.b) let funct = self.matchImpl(node.token.lexeme, Type(kind: Function, returnType: Type(kind: Any), node: nil, args: @[typeOfA, typeOfB])) self.callBinaryOp(funct, node) # TODO: Get rid of old code #[ case node.operator.kind: of NoMatch: # a and b self.expression(node.a) var jump: int if self.enableOptimizations: jump = self.emitJump(JumpIfFalseOrPop) else: jump = self.emitJump(JumpIfFalse) self.emitByte(Pop) self.expression(node.b) self.patchJump(jump) of EndOfFile: # a or b self.expression(node.a) let jump = self.emitJump(JumpIfTrue) self.expression(node.b) self.patchJump(jump) else: self.error(&"invalid AST node of kind {node.kind} at binary(): {node} (This is an internal error and most likely a bug!)") ]# proc declareName(self: Compiler, node: Declaration) = ## Statically declares a name into the current scope. ## "Declaring" a name only means updating our internal ## list of identifiers so that further calls to resolve() ## correctly return them. There is no code to actually ## declare a variable at runtime: the value is already ## there on the stack case node.kind: of NodeKind.varDecl: var node = VarDecl(node) # Creates a new Name entry so that self.identifier emits the proper stack offset if self.names.high() > 16777215: # If someone ever hits this limit in real-world scenarios, I swear I'll # slap myself 100 times with a sign saying "I'm dumb". Mark my words self.error("cannot declare more than 16777216 variables at a time") for name in self.findByName(node.name.token.lexeme): if name.name.token.lexeme == node.name.token.lexeme and name.depth == self.scopeDepth and name.valueType.node.kind == varDecl: self.error(&"attempt to redeclare '{node.name.token.lexeme}', which was previously defined in '{name.owner}' at line {name.valueType.node.token.line}") self.names.add(Name(depth: self.scopeDepth, name: node.name, isPrivate: node.isPrivate, owner: self.currentModule, isConst: node.isConst, valueType: Type(kind: self.inferType(node.value).kind, node: node), codePos: self.chunk.code.len(), isLet: node.isLet)) self.emitBytes([NoOp, NoOp, NoOp, NoOp]) of NodeKind.funDecl: var node = FunDecl(node) # TODO: Emit some optional debugging # metadata to let the VM know where a function's # code begins and ends (similar to what gcc does with # CFI in object files) to build stack traces self.names.add(Name(depth: self.scopeDepth, isPrivate: node.isPrivate, isConst: false, owner: self.currentModule, valueType: Type(kind: Function, node: node, returnType: self.inferType( node.returnType), args: @[]), codePos: self.chunk.code.high(), name: node.name, isLet: false)) let fn = self.names[^1] for argument in node.arguments: if self.names.high() > 16777215: self.error("cannot declare more than 16777216 variables at a time") # wait, no LoadVar?? Yes! That's because when calling functions, # arguments will already be on the stack so there's no need to # load them here self.names.add(Name(depth: self.scopeDepth + 1, isPrivate: true, owner: self.currentModule, isConst: false, name: argument.name, valueType: nil, codePos: self.chunk.code.len(), isLet: false)) self.names[^1].valueType = self.inferType(argument.valueType) # We check if the argument's type is a generic if self.names[^1].valueType == nil and argument.valueType.kind == identExpr: for gen in node.generics: if gen.name == IdentExpr(argument.valueType): self.names[^1].valueType = Type(kind: Generic) break # If it's still nil, it's an error! if self.names[^1].valueType == nil: self.error(&"cannot determine the type of argument '{self.names[^1].name.token.lexeme}'") self.names[^1].valueType.node = argument.name fn.valueType.args.add(self.names[^1].valueType) else: discard # Unreachable proc identifier(self: Compiler, node: IdentExpr) = ## Compiles access to identifiers let s = self.resolve(node) if s == nil: self.error(&"reference to undeclared name '{node.token.lexeme}'") elif s.isConst: # Constants are always emitted as Load* instructions # no matter the scope depth self.emitConstant(node, self.inferType(node)) else: self.detectClosureVariable(s) let t = self.getStackPos(node) var index = t.pos # We don't check if index is -1 because if it # were, self.resolve() would have returned nil if not t.closedOver: # Static name resolution, loads value at index in the stack. Very fast. Much wow. self.emitByte(LoadVar) if self.scopeDepth > 0: inc(index) # Skip the return address! self.emitBytes((index - self.frames[self.scopeDepth]).toTriple()) else: # Heap-allocated closure variable. Stored in a separate "closure array" in the VM that does not have stack semantics. # This makes closures work as expected and is not comparatively slower than indexing our stack (since they're both # dynamic arrays at runtime anyway) self.emitByte(LoadHeap) self.emitBytes(self.closedOver.high().toTriple()) proc findByName(self: Compiler, name: string): seq[Name] = ## Looks for objects that have been already declared ## with the given name. Returns all objects that apply for obj in reversed(self.names): if obj.name.token.lexeme == name: result.add(obj) proc findByType(self: Compiler, name: string, kind: Type): seq[Name] = ## Looks for objects that have already been declared ## with the given name and type for obj in self.findByName(name): if self.compareTypes(obj.valueType, kind): result.add(obj) proc assignment(self: Compiler, node: ASTNode) = ## Compiles assignment expressions case node.kind: of assignExpr: let node = AssignExpr(node) let name = IdentExpr(node.name) let r = self.resolve(name) if r == nil: self.error(&"assignment to undeclared name '{name.token.lexeme}'") elif r.isConst: self.error(&"cannot assign to '{name.token.lexeme}' (constant)") elif r.isLet: self.error(&"cannot reassign '{name.token.lexeme}'") self.expression(node.value) let t = self.getStackPos(name) let index = t.pos if index != -1: if not t.closedOver: self.emitByte(StoreVar) else: self.emitByte(StoreHeap) self.emitBytes(index.toTriple()) else: self.error(&"reference to undeclared name '{node.token.lexeme}'") of setItemExpr: let node = SetItemExpr(node) let typ = self.inferType(node) if typ == nil: self.error(&"cannot determine the type of '{node.name.token.lexeme}'") # TODO else: self.error(&"invalid AST node of kind {node.kind} at assignment(): {node} (This is an internal error and most likely a bug)") proc beginScope(self: Compiler) = ## Begins a new local scope by incrementing the current ## scope's depth inc(self.scopeDepth) proc endScope(self: Compiler) = ## Ends the current local scope if self.scopeDepth == 0: self.error("cannot call endScope with scopeDepth == 0 (This is an internal error and most likely a bug)") dec(self.scopeDepth) var popped: int = 0 var name: Name for i, ident in reversed(self.names): if ident.depth > self.scopeDepth: inc(popped) name = self.names[self.names.high() - i] if name.valueType.kind != Function and OpCode(self.chunk.code[name.codePos]) == NoOp: self.chunk.code.delete(name.codePos) self.chunk.code.delete(name.codePos + 1) self.chunk.code.delete(name.codePos + 2) self.chunk.code.delete(name.codePos + 3) self.names.delete(self.names.len() - i) if not self.enableOptimizations: # All variables with a scope depth larger than the current one # are now out of scope. Begone, you're now homeless! self.emitByte(Pop) if self.enableOptimizations and popped > 1: # If we're popping less than 65535 variables, then # we can emit a PopN instruction. This is true for # 99.99999% of the use cases of the language (who the # hell is going to use 65 THOUSAND local variables?), but # if you'll ever use more then Peon will emit a PopN instruction # for the first 65 thousand and change local variables and then # emit another batch of plain ol' Pop instructions for the rest if popped <= uint16.high().int(): self.emitByte(PopN) self.emitBytes(popped.toDouble()) else: self.emitByte(PopN) self.emitBytes(uint16.high().int.toDouble()) for i in countdown(self.names.high(), popped - uint16.high().int()): if self.names[i].depth > self.scopeDepth: self.emitByte(Pop) elif popped == 1: # We only emit PopN if we're popping more than one value self.emitByte(Pop) proc blockStmt(self: Compiler, node: BlockStmt) = ## Compiles block statements, which create a new ## local scope. self.beginScope() for decl in node.code: self.declaration(decl) self.endScope() proc ifStmt(self: Compiler, node: IfStmt) = ## Compiles if/else statements for conditional ## execution of code self.expression(node.condition) var jumpCode: OpCode if self.enableOptimizations: jumpCode = JumpIfFalsePop else: jumpCode = JumpIfFalse let jump = self.emitJump(jumpCode) if not self.enableOptimizations: self.emitByte(Pop) self.statement(node.thenBranch) self.patchJump(jump) if node.elseBranch != nil: let jump = self.emitJump(JumpForwards) self.statement(node.elseBranch) self.patchJump(jump) proc emitLoop(self: Compiler, begin: int) = ## Emits a JumpBackwards instruction with the correct ## jump offset var offset: int case OpCode(self.chunk.code[begin + 1]): # The jump instruction of LongJumpForwards, LongJumpBackwards, LongJumpIfFalse, LongJumpIfFalsePop, LongJumpIfTrue: offset = self.chunk.code.len() - begin + 4 else: offset = self.chunk.code.len() - begin if offset > uint16.high().int: if offset > 16777215: self.error("cannot jump more than 16777215 bytecode instructions") self.emitByte(LongJumpBackwards) self.emitBytes(offset.toTriple()) else: self.emitByte(JumpBackwards) self.emitBytes(offset.toDouble()) proc whileStmt(self: Compiler, node: WhileStmt) = ## Compiles C-style while loops and ## desugared C-style for loops let start = self.chunk.code.len() self.expression(node.condition) var jump: int if self.enableOptimizations: jump = self.emitJump(JumpIfFalsePop) else: jump = self.emitJump(JumpIfFalse) self.emitByte(Pop) self.statement(node.body) self.patchJump(jump) self.emitLoop(start) proc expression(self: Compiler, node: Expression) = ## Compiles all expressions if self.inferType(node) == nil: if node.kind != identExpr: # So we can raise a more appropriate # error in self.identifier() self.error("expression has no type") case node.kind: of callExpr: discard # TODO of getItemExpr: discard # TODO # Note that for setItem and assign we don't convert # the node to its true type because that type information # would be lost in the call anyway. The differentiation # happens in self.assignment() of setItemExpr, assignExpr: self.assignment(node) of identExpr: self.identifier(IdentExpr(node)) of unaryExpr: # Unary expressions such as ~5 and -3 self.unary(UnaryExpr(node)) of groupingExpr: # Grouping expressions like (2 + 1) self.expression(GroupingExpr(node).expression) of binaryExpr: # Binary expressions such as 2 ^ 5 and 0.66 * 3.14 self.binary(BinaryExpr(node)) of intExpr, hexExpr, binExpr, octExpr, strExpr, falseExpr, trueExpr, infExpr, nanExpr, floatExpr, nilExpr: # Since all of these AST nodes share the # same overall structure and the kind # field is enough to tell one from the # other, why bother with specialized # cases when one is enough? self.literal(node) else: self.error(&"invalid AST node of kind {node.kind} at expression(): {node} (This is an internal error and most likely a bug)") proc awaitStmt(self: Compiler, node: AwaitStmt) = ## Compiles await statements. An await statement ## is like an await expression, but parsed in the ## context of statements for usage outside expressions, ## meaning it can be used standalone. It's basically the ## same as an await expression followed by a semicolon. ## Await expressions are the only native construct to ## run coroutines from within an already asynchronous ## context (which should be orchestrated by an event loop). ## They block in the caller until the callee returns self.expression(node.expression) self.emitByte(OpCode.Await) proc deferStmt(self: Compiler, node: DeferStmt) = ## Compiles defer statements. A defer statement ## is executed right before its containing function ## exits (either because of a return or an exception) let current = self.chunk.code.len self.expression(node.expression) for i in countup(current, self.chunk.code.high()): self.deferred.add(self.chunk.code[i]) self.chunk.code.del(i) proc returnStmt(self: Compiler, node: ReturnStmt) = ## Compiles return statements. An empty return ## implicitly returns nil let returnType = self.inferType(node.value) let typ = self.inferType(self.currentFunction) ## Having the return type if returnType == nil and typ.returnType != nil: self.error(&"expected return value of type '{self.typeToStr(typ.returnType)}', but expression has no type") elif typ.returnType == nil and returnType != nil: self.error("empty return statement is not allowed in non-void functions") elif not self.compareTypes(returnType, typ.returnType): self.error(&"expected return value of type '{self.typeToStr(typ.returnType)}', got '{self.typeToStr(returnType)}' instead") if node.value != nil: self.expression(node.value) self.emitByte(OpCode.ReturnValue) else: self.emitByte(OpCode.Return) proc yieldStmt(self: Compiler, node: YieldStmt) = ## Compiles yield statements self.expression(node.expression) self.emitByte(OpCode.Yield) proc raiseStmt(self: Compiler, node: RaiseStmt) = ## Compiles yield statements self.expression(node.exception) self.emitByte(OpCode.Raise) proc continueStmt(self: Compiler, node: ContinueStmt) = ## Compiles continue statements. A continue statements ## jumps to the next iteration in a loop if self.currentLoop.start <= 65535: self.emitByte(Jump) self.emitBytes(self.currentLoop.start.toDouble()) else: if self.currentLoop.start > 16777215: self.error("too much code to jump over in continue statement") self.emitByte(LongJump) self.emitBytes(self.currentLoop.start.toTriple()) proc breakStmt(self: Compiler, node: BreakStmt) = ## Compiles break statements. A continue statement ## jumps to the next iteration in a loop # Emits dummy jump offset, this is # patched later self.currentLoop.breakPos.add(self.emitJump(OpCode.Jump)) if self.currentLoop.depth > self.scopeDepth: # Breaking out of a loop closes its scope self.endScope() proc patchBreaks(self: Compiler) = ## Patches "break" opcodes with ## actual jumps. This is needed ## because the size of code ## to skip is not known before ## the loop is fully compiled for brk in self.currentLoop.breakPos: self.chunk.code[brk] = JumpForwards.uint8() self.patchJump(brk) proc assertStmt(self: Compiler, node: AssertStmt) = ## Compiles assert statements (raise ## AssertionError if the expression is falsey) self.expression(node.expression) self.emitByte(OpCode.Assert) proc statement(self: Compiler, node: Statement) = ## Compiles all statements case node.kind: of exprStmt: var expression = ExprStmt(node).expression self.expression(expression) self.emitByte(Pop) # Expression statements discard their value. Their main use case is side effects in function calls of NodeKind.ifStmt: self.ifStmt(IfStmt(node)) of NodeKind.assertStmt: self.assertStmt(AssertStmt(node)) of NodeKind.raiseStmt: self.raiseStmt(RaiseStmt(node)) of NodeKind.breakStmt: self.breakStmt(BreakStmt(node)) of NodeKind.continueStmt: self.continueStmt(ContinueStmt(node)) of NodeKind.returnStmt: self.returnStmt(ReturnStmt(node)) of NodeKind.importStmt: discard of NodeKind.whileStmt, NodeKind.forStmt: ## Our parser already desugars for loops to ## while loops! let loop = self.currentLoop self.currentLoop = Loop(start: self.chunk.code.len(), depth: self.scopeDepth, breakPos: @[]) self.whileStmt(WhileStmt(node)) self.patchBreaks() self.currentLoop = loop of NodeKind.forEachStmt: discard of NodeKind.blockStmt: self.blockStmt(BlockStmt(node)) of NodeKind.yieldStmt: self.yieldStmt(YieldStmt(node)) of NodeKind.awaitStmt: self.awaitStmt(AwaitStmt(node)) of NodeKind.deferStmt: self.deferStmt(DeferStmt(node)) of NodeKind.tryStmt: discard else: self.expression(Expression(node)) proc varDecl(self: Compiler, node: VarDecl) = ## Compiles variable declarations let kind = self.inferType(node.valueType) let typ = self.inferType(node.value) if kind == nil and typ == nil: self.error(&"'{node.name.token.lexeme}' has no type") elif not self.compareTypes(typ, kind): if kind != nil: self.error(&"expected value of type '{self.typeToStr(kind)}', but '{node.name.token.lexeme}' is of type '{self.typeToStr(typ)}'") self.expression(node.value) self.declareName(node) proc funDecl(self: Compiler, node: FunDecl) = ## Compiles function declarations # A function's code is just compiled linearly # and then jumped over let jmp = self.emitJump(JumpForwards) var function = self.currentFunction self.declareName(node) self.frames.add(self.names.high()) # TODO: Forward declarations if node.body != nil: if BlockStmt(node.body).code.len() == 0: self.error("cannot declare function with empty body") let fnType = self.inferType(node) let impl = self.findByType(node.name.token.lexeme, fnType) if impl.len() > 1: # Oh-oh! We found more than one implementation of # the same function with the same name! Error! var msg = &"multiple matching implementations of '{node.name.token.lexeme}' found:\n" for fn in reversed(impl): var node = FunDecl(fn.valueType.node) discard self.typeToStr(fn.valueType) msg &= &"- '{node.name.token.lexeme}' at line {node.token.line} of type {self.typeToStr(fn.valueType)}\n" self.error(msg) # We store the current function self.currentFunction = node # Since the deferred array is a linear # sequence of instructions and we want # to keep track to whose function's each # set of deferred instruction belongs, # we record the length of the deferred # array before compiling the function # and use this info later to compile # the try/finally block with the deferred # code var deferStart = self.deferred.len() # We let our debugger know a function is starting let start = self.chunk.code.high() self.blockStmt(BlockStmt(node.body)) # Yup, we're done. That was easy, huh? # But, after all, functions are just named # scopes, and we compile them just like that: # we declare their name and arguments (before # their body so recursion works) and then just # handle them as a block statement (which takes # care of incrementing self.scopeDepth so locals # are resolved properly). There's a need for a bit # of boilerplate code to make closures work, but # that's about it # Function is ending! self.chunk.cfi.add(start.toTriple()) self.chunk.cfi.add(self.chunk.code.high().toTriple()) self.chunk.cfi.add(self.frames[^1].toTriple()) self.chunk.cfi.add(uint8(node.arguments.len())) if not system.`==`(node.name, nil): self.chunk.cfi.add(node.name.token.lexeme.len().toDouble()) var s = node.name.token.lexeme if node.name.token.lexeme.len() >= uint16.high().int: s = node.name.token.lexeme[0..uint16.high()] self.chunk.cfi.add(s.toBytes()) else: self.chunk.cfi.add(0.toDouble()) case self.currentFunction.kind: of NodeKind.funDecl: if not self.currentFunction.hasExplicitReturn: let typ = self.inferType(self.currentFunction) if self.currentFunction.returnType == nil and typ.returnType != nil: self.error("non-empty return statement is not allowed in void functions") if self.currentFunction.returnType != nil: self.error("function has an explicit return type, but no return statement was found") self.emitByte(OpCode.Return) of NodeKind.lambdaExpr: if not LambdaExpr(Declaration(self.currentFunction)).hasExplicitReturn: self.emitByte(OpCode.Return) else: discard # Unreachable # Currently defer is not functional so we # just pop the instructions for i in countup(deferStart, self.deferred.len() - 1, 1): self.deferred.delete(i) self.patchJump(jmp) # This makes us compile nested functions correctly self.currentFunction = function discard self.frames.pop() proc patchReturnAddress(self: Compiler, retAddr: int) = ## Patches the return address of a function ## call. This is called at each iteration of ## the compiler's loop let address = self.chunk.code.len().toQuad() self.chunk.consts[retAddr] = address[0] self.chunk.consts[retAddr + 1] = address[1] self.chunk.consts[retAddr + 2] = address[2] self.chunk.consts[retAddr + 3] = address[3] proc declaration(self: Compiler, node: Declaration) = ## Compiles all declarations case node.kind: of NodeKind.varDecl: self.varDecl(VarDecl(node)) of NodeKind.funDecl: self.funDecl(FunDecl(node)) else: self.statement(Statement(node)) proc compile*(self: Compiler, ast: seq[Declaration], file: string): Chunk = ## Compiles a sequence of AST nodes into a chunk ## object self.chunk = newChunk() self.ast = ast self.file = file self.names = @[] self.scopeDepth = 0 self.currentFunction = nil self.currentModule = self.file.extractFilename() self.current = 0 self.frames = @[0] while not self.done(): self.declaration(Declaration(self.step())) if self.ast.len() > 0: # *Technically* an empty program is a valid program self.emitByte(OpCode.Return) # Exits the VM's main loop when used at the global scope result = self.chunk if self.ast.len() > 0 and self.scopeDepth != 0: self.error(&"invalid state: invalid scopeDepth value (expected 0, got {self.scopeDepth}), did you forget to call endScope/beginScope?")