# Copyright 2022 Mattia Giambirtone & All Contributors # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import meta/token import meta/ast import meta/errors import ../config import ../util/multibyte import strformat import algorithm import parseutils import strutils import sequtils export ast export token export multibyte type TypeKind* = enum ## An enumeration of compile-time ## types Int8, UInt8, Int16, UInt16, Int32, UInt32, Int64, UInt64, Float32, Float64, Char, Byte, String, Function, CustomType, Nil, Nan, Bool, Inf Type* = ref object ## A wrapper around ## compile-time types node*: ASTNode case kind*: TypeKind: of Function: returnType*: Type else: discard # This way we don't have recursive dependency issues import meta/bytecode export bytecode type Name = ref object ## A compile-time wrapper around ## statically resolved names # Name of the identifier name: IdentExpr # Owner of the identifier (module) owner: string # Scope depth depth: int # Is this name private? isPrivate: bool # Is this a constant? isConst: bool # Can this name's value be mutated? isLet: bool # The name's type valueType: Type # For variables, the position in the bytecode # where its StoreVar instruction was emitted. # For functions, this marks where the function's # code begins codePos: int Loop = object ## A "loop object" used ## by the compiler to emit ## appropriate jump offsets ## for continue and break ## statements # Position in the bytecode where the loop starts start: int # Scope depth where the loop is located depth: int # Absolute jump offsets into our bytecode that we need to # patch. Used for break statements breakPos: seq[int] Compiler* = ref object ## A wrapper around the Peon compiler's state # The bytecode chunk where we write code to chunk: Chunk # The output of our parser (AST) ast: seq[Declaration] # The current AST node we're looking at current: int # The current file being compiled (used only for # error reporting) file: string # Compile-time "simulation" of the stack at # runtime to load variables that have stack # behavior more efficiently names: seq[Name] # The current scope depth. If > 0, we're # in a local scope, otherwise it's global scopeDepth: int # The current function being compiled currentFunction: FunDecl # Are optimizations turned on? enableOptimizations*: bool # The current loop being compiled (used to # keep track of where to jump) currentLoop: Loop # The current module being compiled # (used to restrict access to statically # defined variables at compile time) currentModule: string # Each time a defer statement is # compiled, its code is emitted # here. Later, if there is any code # to defer in the current function, # funDecl will wrap the function's code # inside an implicit try/finally block # and add this code in the finally branch. # This sequence is emptied each time a # fun declaration is compiled and stores only # deferred code for the current function (may # be empty) deferred: seq[uint8] # List of closed-over variables closedOver: seq[IdentExpr] proc newCompiler*(enableOptimizations: bool = true): Compiler = ## Initializes a new Compiler object new(result) result.ast = @[] result.current = 0 result.file = "" result.names = @[] result.scopeDepth = 0 result.currentFunction = nil result.enableOptimizations = enableOptimizations result.currentModule = "" ## Forward declarations proc expression(self: Compiler, node: Expression) proc statement(self: Compiler, node: Statement) proc declaration(self: Compiler, node: Declaration) proc peek(self: Compiler, distance: int = 0): ASTNode proc identifier(self: Compiler, node: IdentExpr) proc varDecl(self: Compiler, node: VarDecl) proc inferType(self: Compiler, node: LiteralExpr): Type proc inferType(self: Compiler, node: Expression): Type ## End of forward declarations ## Public getter for nicer error formatting proc getCurrentNode*(self: Compiler): ASTNode = (if self.current >= self.ast.len(): self.ast[^1] else: self.ast[self.current - 1]) ## Utility functions proc peek(self: Compiler, distance: int = 0): ASTNode = ## Peeks at the AST node at the given distance. ## If the distance is out of bounds, the last ## AST node in the tree is returned. A negative ## distance may be used to retrieve previously ## consumed AST nodes if self.ast.high() == -1 or self.current + distance > self.ast.high() or self.current + distance < 0: result = self.ast[^1] else: result = self.ast[self.current + distance] proc done(self: Compiler): bool = ## Returns true if the compiler is done ## compiling, false otherwise result = self.current > self.ast.high() proc error(self: Compiler, message: string) {.raises: [CompileError, ValueError].} = ## Raises a formatted CompileError exception var tok = self.getCurrentNode().token raise newException(CompileError, &"A fatal error occurred while compiling '{self.file}', module '{self.currentModule}' line {tok.line} at '{tok.lexeme}' -> {message}") proc step(self: Compiler): ASTNode = ## Steps to the next node and returns ## the consumed one result = self.peek() if not self.done(): self.current += 1 proc emitByte(self: Compiler, byt: OpCode|uint8) = ## Emits a single byte, writing it to ## the current chunk being compiled when DEBUG_TRACE_COMPILER: echo &"DEBUG - Compiler: Emitting {$byt}" self.chunk.write(uint8 byt, self.peek().token.line) proc emitBytes(self: Compiler, byt1: OpCode|uint8, byt2: OpCode|uint8) = ## Emits multiple bytes instead of a single one. This is useful ## to emit operators along with their operands or for multi-byte ## instructions that are longer than one byte self.emitByte(uint8 byt1) self.emitByte(uint8 byt2) proc emitBytes(self: Compiler, bytarr: array[2, uint8]) = ## Handy helper method to write an array of 2 bytes into ## the current chunk, calling emitByte on each of its ## elements self.emitBytes(bytarr[0], bytarr[1]) proc emitBytes(self: Compiler, bytarr: openarray[uint8]) = ## Handy helper method to write an array of 3 bytes into ## the current chunk, calling emitByte on each of its ## elements for b in bytarr: self.emitByte(b) proc makeConstant(self: Compiler, val: Expression, kind: Type): array[3, uint8] = ## Adds a constant to the current chunk's constant table ## and returns its index as a 3-byte array of uint8s result = self.chunk.addConstant(val, kind) proc emitConstant(self: Compiler, obj: Expression, kind: Type) = ## Emits a LoadConstant instruction along ## with its operand case self.inferType(obj).kind: of Int64: self.emitByte(LoadInt64) else: discard # TODO self.emitBytes(self.makeConstant(obj, kind)) proc emitJump(self: Compiler, opcode: OpCode): int = ## Emits a dummy jump offset to be patched later. Assumes ## the largest offset (emits 4 bytes, one for the given jump ## opcode, while the other 3 are for the jump offset which is set ## to the maximum unsigned 24 bit integer). If the shorter ## 16 bit alternative is later found to be better suited, patchJump ## will fix this. This function returns the absolute index into the ## chunk's bytecode array where the given placeholder instruction was written self.emitByte(opcode) self.emitBytes((0xffffff).toTriple()) result = self.chunk.code.len() - 4 proc patchJump(self: Compiler, offset: int) = ## Patches a previously emitted relative ## jump using emitJump. Since emitJump assumes ## a long jump, this also shrinks the jump ## offset and changes the bytecode instruction if possible ## (i.e. jump is in 16 bit range), but the converse is also ## true (i.e. it might change a regular jump into a long one) var jump: int = self.chunk.code.len() - offset if jump > 16777215: self.error("cannot jump more than 16777216 bytecode instructions") if jump < uint16.high().int: case OpCode(self.chunk.code[offset]): of LongJumpForwards: self.chunk.code[offset] = JumpForwards.uint8() of LongJumpBackwards: self.chunk.code[offset] = JumpBackwards.uint8() of LongJumpIfFalse: self.chunk.code[offset] = JumpIfFalse.uint8() of LongJumpIfFalsePop: self.chunk.code[offset] = JumpIfFalsePop.uint8() of LongJumpIfFalseOrPop: self.chunk.code[offset] = JumpIfFalseOrPop.uint8() else: discard self.chunk.code.delete(offset + 1) # Discards the first 8 bits of the jump offset (which are empty) let offsetArray = (jump - 1).toDouble() # -1 since we got rid of 1 byte! self.chunk.code[offset + 1] = offsetArray[0] self.chunk.code[offset + 2] = offsetArray[1] else: case OpCode(self.chunk.code[offset]): of JumpForwards: self.chunk.code[offset] = LongJumpForwards.uint8() of JumpBackwards: self.chunk.code[offset] = LongJumpBackwards.uint8() of JumpIfFalse: self.chunk.code[offset] = LongJumpIfFalse.uint8() of JumpIfFalsePop: self.chunk.code[offset] = LongJumpIfFalsePop.uint8() of JumpIfFalseOrPop: self.chunk.code[offset] = LongJumpIfFalseOrPop.uint8() else: discard let offsetArray = jump.toTriple() self.chunk.code[offset + 1] = offsetArray[0] self.chunk.code[offset + 2] = offsetArray[1] self.chunk.code[offset + 3] = offsetArray[2] proc resolve(self: Compiler, name: IdentExpr, depth: int = self.scopeDepth): Name = ## Traverses self.names backwards and returns the ## first name object with the given name. Returns ## nil when the name can't be found. This function ## has no concept of scope depth, because getStackPos ## does that job. Note that private names declared in ## other modules will not be resolved! for obj in reversed(self.names): if obj.name.token.lexeme == name.token.lexeme: if obj.isPrivate and obj.owner != self.currentModule: continue # There may be a name in the current module that # matches, so we skip this return obj return nil proc getStackPos(self: Compiler, name: IdentExpr, depth: int = self.scopeDepth): tuple[closedOver: bool, pos: int] = ## Iterates the internal list of declared names backwards and ## returns a tuple (closedOver, pos) that tells the caller whether the ## the name is to be emitted as a closure as well as its predicted ## stack/closure array position. Returns (false, -1) if the variable's ## location can not be determined at compile time (this is an error!). ## Note that private names declared in other modules will not be resolved! var i: int = self.names.high() for variable in reversed(self.names): if name.name.lexeme == variable.name.name.lexeme: if variable.isPrivate and variable.owner != self.currentModule: continue if variable.depth == depth or variable.depth == 0: # variable.depth == 0 for globals! return (false, i) elif variable.depth > 0: for j, closure in reversed(self.closedOver): if closure.name.lexeme == name.name.lexeme: return (true, j) dec(i) return (false, -1) proc detectClosureVariable(self: Compiler, name: IdentExpr, depth: int = self.scopeDepth) = ## Detects if the given name is used in a local scope deeper ## than the given one and modifies the code emitted for it ## to store it as a closure variable if it is. Does nothing if the name ## hasn't been declared yet or is unreachable (for example if it's ## declared as private in another module). This function must be called ## each time a name is referenced in order for closed-over variables ## to be emitted properly, otherwise the runtime may behave ## unpredictably or crash let entry = self.resolve(name) if entry == nil: return if entry.depth < depth: # Ding! The given name is closed over: we need to # change the StoreVar instruction that created this # name entry into a StoreHeap. We don't need to change # other pieces of code because self.identifier() already # emits LoadHeap if it detects the variable is closed over, # whether or not this function is called self.closedOver.add(entry.name) if self.closedOver.len() >= 16777216: self.error("too many consecutive closed-over variables (max is 16777216)") let idx = self.closedOver.high().toTriple() self.chunk.code[entry.codePos] = StoreHeap.uint8 self.chunk.code[entry.codePos + 1] = idx[0] self.chunk.code[entry.codePos + 2] = idx[1] self.chunk.code[entry.codePos + 3] = idx[2] proc compareTypes(self: Compiler, a, b: Type): bool = ## Compares two type objects ## for equality (works with nil!) if a == nil: return b == nil elif b == nil: return a == nil if a.kind != b.kind: return false case a.kind: of Int8, UInt8, Int16, UInt16, Int32, UInt32, Int64, UInt64, Float32, Float64, Char, Byte, String, Nil, Nan, Bool, Inf: return true of Function: let a = FunDecl(a.node) b = FunDecl(b.node) if a.name.token.lexeme != b.name.token.lexeme: return false elif a.arguments.len() != b.arguments.len(): return false elif not self.compareTypes(self.inferType(a.returnType), self.inferType(b.returnType)): return false for (argA, argB) in zip(a.arguments, b.arguments): if argA.mutable != argB.mutable: return false elif argA.isRef != argB.isRef: return false elif argA.isPtr != argB.isPtr: return false elif not self.compareTypes(self.inferType(argA.valueType), self.inferType(argB.valueType)): return false return true else: discard proc toIntrinsic(name: string): Type = ## Converts a string to an intrinsic ## type if it is valid and returns nil ## otherwise if name in ["int", "int64", "i64"]: return Type(kind: Int64) elif name in ["uint64", "u64"]: return Type(kind: UInt64) elif name in ["int32", "i32"]: return Type(kind: Int32) elif name in ["uint32", "u32"]: return Type(kind: UInt32) elif name in ["int16", "i16"]: return Type(kind: Int16) elif name in ["uint16", "u16"]: return Type(kind: UInt16) elif name in ["int8", "i8"]: return Type(kind: Int8) elif name in ["uint8", "u8"]: return Type(kind: UInt8) elif name in ["f64", "float", "float64"]: return Type(kind: Float64) elif name in ["f32", "float32"]: return Type(kind: Float32) elif name == "byte": return Type(kind: Byte) elif name == "char": return Type(kind: Char) elif name == "nan": return Type(kind: Nan) elif name == "nil": return Type(kind: Nil) elif name == "inf": return Type(kind: Inf) elif name == "bool": return Type(kind: Bool) else: return nil proc inferType(self: Compiler, node: LiteralExpr): Type = ## Infers the type of a given literal expression if node == nil: return nil case node.kind: of intExpr, binExpr, octExpr, hexExpr: let size = node.token.lexeme.split("'") if len(size) notin 1..2: self.error("invalid state: inferValueType -> invalid size specifier (This is an internal error and most likely a bug!)") if size.len() == 1: return Type(node: node, kind: Int64) let typ = size[1].toIntrinsic() if not self.compareTypes(typ, nil): return typ else: self.error(&"invalid type specifier '{size[1]}' for int") of floatExpr: let size = node.token.lexeme.split("'") if len(size) notin 1..2: self.error("invalid state: inferValueType -> invalid size specifier (This is an internal error and most likely a bug!)") if size.len() == 1 or size[1] == "f64": return Type(node: node, kind: Float64) let typ = size[1].toIntrinsic() if not self.compareTypes(typ, nil): return typ else: self.error(&"invalid type specifier '{size[1]}' for float") of nilExpr: return Type(node: node, kind: Nil) of trueExpr: return Type(node: node, kind: Bool) of falseExpr: return Type(node: node, kind: Bool) of nanExpr: return Type(node: node, kind: TypeKind.Nan) of infExpr: return Type(node: node, kind: TypeKind.Inf) else: discard # TODO proc toIntrinsic(self: Compiler, typ: Expression): Type = ## Gets an expression's ## intrinsic type, if possible if typ == nil: return nil case typ.kind: of trueExpr, falseExpr, intExpr, floatExpr: return typ.token.lexeme.toIntrinsic() of identExpr: let inferred = self.inferType(typ) if inferred == nil: return typ.token.lexeme.toIntrinsic() return inferred else: discard proc inferType(self: Compiler, node: Expression): Type = ## Infers the type of a given expression and ## returns it if node == nil: return nil case node.kind: of identExpr: let node = IdentExpr(node) let name = self.resolve(node) if name != nil: return name.valueType else: return node.name.lexeme.toIntrinsic() of unaryExpr: return self.inferType(UnaryExpr(node).a) of binaryExpr: let node = BinaryExpr(node) var a = self.inferType(node.a) var b = self.inferType(node.b) if not self.compareTypes(a, b): return nil return a of {intExpr, hexExpr, binExpr, octExpr, strExpr, falseExpr, trueExpr, infExpr, nanExpr, floatExpr, nilExpr }: return self.inferType(LiteralExpr(node)) else: discard # Unreachable proc typeToStr(self: Compiler, typ: Type): string = ## Returns the string representation of a ## type object case typ.kind: of Int8, UInt8, Int16, UInt16, Int32, UInt32, Int64, UInt64, Float32, Float64, Char, Byte, String, Nil, TypeKind.Nan, Bool, TypeKind.Inf: return ($typ.kind).toLowerAscii() of Function: result = "function (" case typ.node.kind: of funDecl: var node = FunDecl(typ.node) for i, argument in node.arguments: result &= &"{argument.name.token.lexeme}: {self.typeToStr(self.inferType(argument.name))}" if i < node.arguments.len() - 1: result &= ", " result &= ")" of lambdaExpr: var node = LambdaExpr(typ.node) for i, argument in node.arguments: result &= &"{argument.name.token.lexeme}: {argument.valueType}" if i < node.arguments.len() - 1: result &= ", " result &= ")" else: discard # Unreachable result &= &": {self.typeToStr(typ.returnType)}" else: discard proc inferType(self: Compiler, node: Declaration): Type = ## Infers the type of a given declaration ## and returns it if node == nil: return nil case node.kind: of funDecl: var node = FunDecl(node) let resolved = self.resolve(node.name) if resolved != nil: return resolved.valueType of NodeKind.varDecl: var node = VarDecl(node) let resolved = self.resolve(node.name) if resolved != nil: return resolved.valueType else: return self.inferType(node.value) else: return # Unreachable ## End of utility functions proc literal(self: Compiler, node: ASTNode) = ## Emits instructions for literals such ## as singletons, strings, numbers and ## collections case node.kind: of trueExpr: self.emitByte(LoadTrue) of falseExpr: self.emitByte(LoadFalse) of nilExpr: self.emitByte(LoadNil) of infExpr: self.emitByte(LoadInf) of nanExpr: self.emitByte(LoadNan) of strExpr: self.emitConstant(LiteralExpr(node), Type(kind: String)) # TODO: Take size specifier into account! of intExpr: var x: int var y = IntExpr(node) try: discard parseInt(y.literal.lexeme, x) except ValueError: self.error("integer value out of range") self.emitConstant(y, Type(kind: Int64)) of hexExpr: var x: int var y = HexExpr(node) try: discard parseHex(y.literal.lexeme, x) except ValueError: self.error("integer value out of range") let node = newIntExpr(Token(lexeme: $x, line: y.token.line, pos: (start: y.token.pos.start, stop: y.token.pos.start + len($x)) ) ) self.emitConstant(node, Type(kind: Int64)) of binExpr: var x: int var y = BinExpr(node) try: discard parseBin(y.literal.lexeme, x) except ValueError: self.error("integer value out of range") let node = newIntExpr(Token(lexeme: $x, line: y.token.line, pos: (start: y.token.pos.start, stop: y.token.pos.start + len($x)) ) ) self.emitConstant(node, Type(kind: Int64)) of octExpr: var x: int var y = OctExpr(node) try: discard parseOct(y.literal.lexeme, x) except ValueError: self.error("integer value out of range") let node = newIntExpr(Token(lexeme: $x, line: y.token.line, pos: (start: y.token.pos.start, stop: y.token.pos.start + len($x)) ) ) self.emitConstant(node, Type(kind: Int64)) of floatExpr: var x: float var y = FloatExpr(node) try: discard parseFloat(y.literal.lexeme, x) except ValueError: self.error("floating point value out of range") self.emitConstant(y, Type(kind: Float64)) of awaitExpr: var y = AwaitExpr(node) self.expression(y.expression) self.emitByte(OpCode.Await) else: self.error(&"invalid AST node of kind {node.kind} at literal(): {node} (This is an internal error and most likely a bug!)") proc unary(self: Compiler, node: UnaryExpr) = ## Compiles unary expressions such as decimal ## and bitwise negation self.expression(node.a) # Pushes the operand onto the stack # TODO: Find implementation of # the given operator and call it proc binary(self: Compiler, node: BinaryExpr) = ## Compiles all binary expressions # These two lines prepare the stack by pushing the # opcode's operands onto it self.expression(node.a) self.expression(node.b) # TODO: Find implementation of # the given operator and call it case node.operator.kind: of NoMatch: # a and b self.expression(node.a) var jump: int if self.enableOptimizations: jump = self.emitJump(JumpIfFalseOrPop) else: jump = self.emitJump(JumpIfFalse) self.emitByte(Pop) self.expression(node.b) self.patchJump(jump) of EndOfFile: # a or b self.expression(node.a) let jump = self.emitJump(JumpIfTrue) self.expression(node.b) self.patchJump(jump) else: self.error(&"invalid AST node of kind {node.kind} at binary(): {node} (This is an internal error and most likely a bug!)") proc declareName(self: Compiler, node: Declaration) = ## Statically declares a name into the current scope case node.kind: of NodeKind.varDecl: var node = VarDecl(node) # Creates a new Name entry so that self.identifier emits the proper stack offset if self.names.high() > 16777215: # If someone ever hits this limit in real-world scenarios, I swear I'll # slap myself 100 times with a sign saying "I'm dumb". Mark my words self.error("cannot declare more than 16777216 variables at a time") self.names.add(Name(depth: self.scopeDepth, name: node.name, isPrivate: node.isPrivate, owner: self.currentModule, isConst: node.isConst, valueType: Type(kind: self.inferType( node.value).kind, node: node), codePos: self.chunk.code.len(), isLet: node.isLet)) self.emitByte(StoreVar) self.emitBytes(self.names.high().toTriple()) of NodeKind.funDecl: var node = FunDecl(node) # Declares the function's name in the # current scope but no StoreVar is emitted # because the name is only useful at compile time. # TODO: Maybe emit some optional debugging # metadata to let the VM know where a function's # code begins and ends (similar to what gcc does with # CFI in object files) to build stack traces self.names.add(Name(depth: self.scopeDepth, isPrivate: node.isPrivate, isConst: false, owner: self.currentModule, valueType: Type(kind: Function, node: node, returnType: self.inferType( node.returnType)), codePos: self.chunk.code.len(), name: node.name, isLet: false)) for argument in node.arguments: if self.names.high() > 16777215: self.error("cannot declare more than 16777216 variables at a time") self.names.add(Name(depth: self.scopeDepth + 1, isPrivate: true, owner: self.currentModule, isConst: false, name: argument.name, valueType: nil, codePos: self.chunk.code.len(), isLet: false)) self.names[^1].valueType = self.inferType(argument.valueType) self.names[^1].valueType.node = argument.name self.emitByte(LoadVar) self.emitBytes(self.names.high().toTriple()) else: discard # Unreachable proc identifier(self: Compiler, node: IdentExpr) = ## Compiles access to identifiers let s = self.resolve(node) if s == nil: self.error(&"reference to undeclared name '{node.token.lexeme}'") elif s.isConst: # Constants are emitted as, you guessed it, LoadConstant instructions # no matter the scope depth. If optimizations are enabled, the compiler # will reuse the same constant every time it is referenced instead of # allocating a new one each time self.emitConstant(node, self.inferType(node)) else: self.detectClosureVariable(s.name) let t = self.getStackPos(node) let index = t.pos # We don't check if index is -1 because if it # were, self.resolve() would have returned nil if not t.closedOver: # Static name resolution, loads value at index in the stack. Very fast. Much wow. self.emitByte(LoadVar) self.emitBytes(index.toTriple()) else: if self.closedOver.len() == 0: self.error("error: closure variable array is empty but LoadHeap would be emitted (this is an internal error and most likely a bug)") # Heap-allocated closure variable. Stored in a separate "closure array" in the VM that does not have stack semantics. # This makes closures work as expected and is not comparatively slower than indexing our stack (since they're both # dynamic arrays at runtime anyway) self.emitByte(LoadHeap) self.emitBytes(self.closedOver.high().toTriple()) proc findImpl(self: Compiler, node: FunDecl): seq[Name] = ## Looks for functions matching the given declaration ## in the code that has been compiled so far. ## Returns a list of each matching name object for obj in reversed(self.names): # Scopes are indexed backwards! case obj.valueType.kind: of Function: if self.compareTypes(obj.valueType, self.inferType(node)): result.add(obj) else: continue proc findByName(self: Compiler, name: string): seq[Name] = ## Looks for objects that have been already declared ## with the given name for obj in reversed(self.names): if obj.name.token.lexeme == name: result.add(obj) proc findByType(self: Compiler, name: string, kind: Type): seq[Name] = ## Looks for objects that have already been declared ## with the given name and type for obj in self.findByName(name): if self.compareTypes(obj.valueType, kind): result.add(obj) proc assignment(self: Compiler, node: ASTNode) = ## Compiles assignment expressions case node.kind: of assignExpr: let node = AssignExpr(node) let name = IdentExpr(node.name) let r = self.resolve(name) if r == nil: self.error(&"assignment to undeclared name '{name.token.lexeme}'") elif r.isConst: self.error(&"cannot assign to '{name.token.lexeme}' (constant)") elif r.isLet: self.error(&"cannot reassign '{name.token.lexeme}'") self.expression(node.value) let t = self.getStackPos(name) let index = t.pos if index != -1: if not t.closedOver: self.emitByte(StoreVar) else: self.emitByte(StoreHeap) self.emitBytes(index.toTriple()) else: self.error(&"reference to undeclared name '{node.token.lexeme}'") of setItemExpr: let node = SetItemExpr(node) let typ = self.inferType(node) if typ == nil: self.error(&"cannot determine the type of '{node.name.token.lexeme}'") # TODO else: self.error(&"invalid AST node of kind {node.kind} at assignment(): {node} (This is an internal error and most likely a bug)") proc beginScope(self: Compiler) = ## Begins a new local scope by incrementing the current ## scope's depth inc(self.scopeDepth) proc endScope(self: Compiler) = ## Ends the current local scope if self.scopeDepth < 0: self.error("cannot call endScope with scopeDepth < 0 (This is an internal error and most likely a bug)") var popped: int = 0 for ident in reversed(self.names): if ident.depth > self.scopeDepth: inc(popped) if not self.enableOptimizations: # All variables with a scope depth larger than the current one # are now out of scope. Begone, you're now homeless! self.emitByte(Pop) if self.enableOptimizations and popped > 1: # If we're popping less than 65535 variables, then # we can emit a PopN instruction. This is true for # 99.99999% of the use cases of the language (who the # hell is going to use 65 THOUSAND local variables?), but # if you'll ever use more then Peon will emit a PopN instruction # for the first 65 thousand and change local variables and then # emit another batch of plain ol' Pop instructions for the rest if popped <= uint16.high().int(): self.emitByte(PopN) self.emitBytes(popped.toDouble()) else: self.emitByte(PopN) self.emitBytes(uint16.high().int.toDouble()) for i in countdown(self.names.high(), popped - uint16.high().int()): if self.names[i].depth > self.scopeDepth: self.emitByte(Pop) elif popped == 1: # We only emit PopN if we're popping more than one value self.emitByte(Pop) for _ in countup(0, popped - 1): discard self.names.pop() dec(self.scopeDepth) proc blockStmt(self: Compiler, node: BlockStmt) = ## Compiles block statements, which create a new ## local scope. self.beginScope() for decl in node.code: self.declaration(decl) self.endScope() proc ifStmt(self: Compiler, node: IfStmt) = ## Compiles if/else statements for conditional ## execution of code self.expression(node.condition) var jumpCode: OpCode if self.enableOptimizations: jumpCode = JumpIfFalsePop else: jumpCode = JumpIfFalse let jump = self.emitJump(jumpCode) if not self.enableOptimizations: self.emitByte(Pop) self.statement(node.thenBranch) self.patchJump(jump) if node.elseBranch != nil: let jump = self.emitJump(JumpForwards) self.statement(node.elseBranch) self.patchJump(jump) proc emitLoop(self: Compiler, begin: int) = ## Emits a JumpBackwards instruction with the correct ## jump offset var offset: int case OpCode(self.chunk.code[begin + 1]): # The jump instruction of LongJumpForwards, LongJumpBackwards, LongJumpIfFalse, LongJumpIfFalsePop, LongJumpIfTrue: offset = self.chunk.code.len() - begin + 4 else: offset = self.chunk.code.len() - begin if offset > uint16.high().int: if offset > 16777215: self.error("cannot jump more than 16777215 bytecode instructions") self.emitByte(LongJumpBackwards) self.emitBytes(offset.toTriple()) else: self.emitByte(JumpBackwards) self.emitBytes(offset.toDouble()) proc whileStmt(self: Compiler, node: WhileStmt) = ## Compiles C-style while loops and ## desugared C-style for loops let start = self.chunk.code.len() self.expression(node.condition) var jump: int if self.enableOptimizations: jump = self.emitJump(JumpIfFalsePop) else: jump = self.emitJump(JumpIfFalse) self.emitByte(Pop) self.statement(node.body) self.patchJump(jump) self.emitLoop(start) proc expression(self: Compiler, node: Expression) = ## Compiles all expressions if self.inferType(node) == nil: if node.kind != identExpr: # So we can raise a more appropriate # error in self.identifier() self.error("expression has no type") case node.kind: of callExpr: discard # TODO of getItemExpr: discard # TODO # Note that for setItem and assign we don't convert # the node to its true type because that type information # would be lost in the call anyway. The differentiation # happens in self.assignment() of setItemExpr, assignExpr: self.assignment(node) of identExpr: self.identifier(IdentExpr(node)) of unaryExpr: # Unary expressions such as ~5 and -3 self.unary(UnaryExpr(node)) of groupingExpr: # Grouping expressions like (2 + 1) self.expression(GroupingExpr(node).expression) of binaryExpr: # Binary expressions such as 2 ^ 5 and 0.66 * 3.14 self.binary(BinaryExpr(node)) of intExpr, hexExpr, binExpr, octExpr, strExpr, falseExpr, trueExpr, infExpr, nanExpr, floatExpr, nilExpr: # Since all of these AST nodes share the # same overall structure and the kind # field is enough to tell one from the # other, why bother with specialized # cases when one is enough? self.literal(node) else: self.error(&"invalid AST node of kind {node.kind} at expression(): {node} (This is an internal error and most likely a bug)") proc awaitStmt(self: Compiler, node: AwaitStmt) = ## Compiles await statements. An await statement ## is like an await expression, but parsed in the ## context of statements for usage outside expressions, ## meaning it can be used standalone. It's basically the ## same as an await expression followed by a semicolon. ## Await expressions are the only native construct to ## run coroutines from within an already asynchronous ## context (which should be orchestrated by an event loop). ## They block in the caller until the callee returns self.expression(node.expression) self.emitByte(OpCode.Await) proc deferStmt(self: Compiler, node: DeferStmt) = ## Compiles defer statements. A defer statement ## is executed right before its containing function ## exits (either because of a return or an exception) let current = self.chunk.code.len self.expression(node.expression) for i in countup(current, self.chunk.code.high()): self.deferred.add(self.chunk.code[i]) self.chunk.code.del(i) proc returnStmt(self: Compiler, node: ReturnStmt) = ## Compiles return statements. An empty return ## implicitly returns nil let returnType = self.inferType(node.value) let typ = self.inferType(self.currentFunction) ## Having the return type if typ.returnType == nil and returnType != nil: self.error("non-empty return statement is not allowed in functions without an explicit return type") elif returnType == nil and typ.returnType != nil: self.error(&"expected return value of type '{self.typeToStr(typ.returnType)}', but expression has no type") elif not self.compareTypes(returnType, typ.returnType): self.error(&"expected return value of type '{self.typeToStr(typ.returnType)}', got '{self.typeToStr(returnType)}' instead") if node.value != nil: self.expression(node.value) self.emitByte(OpCode.ReturnPop) else: self.emitByte(OpCode.Return) proc yieldStmt(self: Compiler, node: YieldStmt) = ## Compiles yield statements self.expression(node.expression) self.emitByte(OpCode.Yield) proc raiseStmt(self: Compiler, node: RaiseStmt) = ## Compiles yield statements self.expression(node.exception) self.emitByte(OpCode.Raise) proc continueStmt(self: Compiler, node: ContinueStmt) = ## Compiles continue statements. A continue statements ## jumps to the next iteration in a loop if self.currentLoop.start <= 65535: self.emitByte(Jump) self.emitBytes(self.currentLoop.start.toDouble()) else: if self.currentLoop.start > 16777215: self.error("too much code to jump over in continue statement") self.emitByte(LongJump) self.emitBytes(self.currentLoop.start.toTriple()) proc breakStmt(self: Compiler, node: BreakStmt) = ## Compiles break statements. A continue statement ## jumps to the next iteration in a loop # Emits dummy jump offset, this is # patched later discard self.emitJump(OpCode.Jump) self.currentLoop.breakPos.add(self.chunk.code.high() - 4) if self.currentLoop.depth > self.scopeDepth: # Breaking out of a loop closes its scope self.endScope() proc patchBreaks(self: Compiler) = ## Patches "break" opcodes with ## actual jumps. This is needed ## because the size of code ## to skip is not known before ## the loop is fully compiled for brk in self.currentLoop.breakPos: self.chunk.code[brk] = JumpForwards.uint8() self.patchJump(brk) proc assertStmt(self: Compiler, node: AssertStmt) = ## Compiles assert statements (raise ## AssertionError if the expression is falsey) self.expression(node.expression) self.emitByte(OpCode.Assert) proc statement(self: Compiler, node: Statement) = ## Compiles all statements case node.kind: of exprStmt: var expression = ExprStmt(node).expression self.expression(expression) self.emitByte(Pop) # Expression statements discard their value. Their main use case is side effects in function calls of NodeKind.ifStmt: self.ifStmt(IfStmt(node)) of NodeKind.assertStmt: self.assertStmt(AssertStmt(node)) of NodeKind.raiseStmt: self.raiseStmt(RaiseStmt(node)) of NodeKind.breakStmt: self.breakStmt(BreakStmt(node)) of NodeKind.continueStmt: self.continueStmt(ContinueStmt(node)) of NodeKind.returnStmt: self.returnStmt(ReturnStmt(node)) of NodeKind.importStmt: discard of NodeKind.whileStmt, NodeKind.forStmt: ## Our parser already desugars for loops to ## while loops! let loop = self.currentLoop self.currentLoop = Loop(start: self.chunk.code.len(), depth: self.scopeDepth, breakPos: @[]) self.whileStmt(WhileStmt(node)) self.patchBreaks() self.currentLoop = loop of NodeKind.forEachStmt: discard of NodeKind.blockStmt: self.blockStmt(BlockStmt(node)) of NodeKind.yieldStmt: self.yieldStmt(YieldStmt(node)) of NodeKind.awaitStmt: self.awaitStmt(AwaitStmt(node)) of NodeKind.deferStmt: self.deferStmt(DeferStmt(node)) of NodeKind.tryStmt: discard else: self.expression(Expression(node)) proc varDecl(self: Compiler, node: VarDecl) = ## Compiles variable declarations let kind = self.toIntrinsic(node.valueType) let typ = self.inferType(node.value) if kind == nil and typ == nil: self.error(&"cannot determine the type of '{node.name.token.lexeme}'") elif typ != kind and kind != nil: self.error(&"expected value of type '{self.typeToStr(kind)}', but '{node.name.token.lexeme}' is of type '{self.typeToStr(typ)}'") self.expression(node.value) self.declareName(node) proc funDecl(self: Compiler, node: FunDecl) = ## Compiles function declarations # A function's code is just compiled linearly # and then jumped over let jmp = self.emitJump(Jump) self.declareName(node) if node.body != nil: if BlockStmt(node.body).code.len() == 0: self.error("Cannot declare function with empty body") let fnType = self.inferType(node) let impl = self.findByType(node.name.token.lexeme, fnType) if impl.len() > 1: # Oh-oh! We found more than one implementation of # the same function! Error! var msg = &"multiple matching implementations of '{node.name.token.lexeme}' found:\n" for fn in reversed(impl): var node = FunDecl(fn.valueType.node) discard self.typeToStr(fn.valueType) msg &= &"- '{node.name.token.lexeme}' at line {node.token.line} of type {self.typeToStr(fn.valueType)}\n" self.error(msg) # We store the current function var function = self.currentFunction self.currentFunction = node # Since the deferred array is a linear # sequence of instructions and we want # to keep track to whose function's each # set of deferred instruction belongs, # we record the length of the deferred # array before compiling the function # and use this info later to compile # the try/finally block with the deferred # code var deferStart = self.deferred.len() self.blockStmt(BlockStmt(node.body)) # Yup, we're done. That was easy, huh? # But after all functions are just named # scopes, and we compile them just like that: # we declare their name and arguments (before # their body so recursion works) and then just # handle them as a block statement (which takes # care of incrementing self.scopeDepth so locals # are resolved properly). There's a need for a bit # of boilerplate code to make closures work, but # that's about it self.emitBytes(LoadNil, OpCode.Return) # Currently defer is not functional so we # just pop the instructions for i in countup(deferStart, self.deferred.len() - 1, 1): self.deferred.delete(i) self.patchJump(jmp) # This makes us compile nested functions correctly self.currentFunction = function proc declaration(self: Compiler, node: Declaration) = ## Compiles all declarations case node.kind: of NodeKind.varDecl: self.varDecl(VarDecl(node)) of NodeKind.funDecl: self.funDecl(FunDecl(node)) else: self.statement(Statement(node)) proc compile*(self: Compiler, ast: seq[Declaration], file: string): Chunk = ## Compiles a sequence of AST nodes into a chunk ## object self.chunk = newChunk() self.ast = ast self.file = file self.names = @[] self.scopeDepth = 0 self.currentFunction = nil self.currentModule = self.file self.current = 0 while not self.done(): self.declaration(Declaration(self.step())) if self.ast.len() > 0: # *Technically* an empty program is a valid program self.endScope() self.emitByte(OpCode.Return) # Exits the VM's main loop when used at the global scope result = self.chunk if self.ast.len() > 0 and self.scopeDepth != -1: self.error(&"invalid state: invalid scopeDepth value (expected -1, got {self.scopeDepth}), did you forget to call endScope/beginScope?")