diff --git a/src/frontend/compiler.nim b/src/frontend/compiler.nim deleted file mode 100644 index ce1565b..0000000 --- a/src/frontend/compiler.nim +++ /dev/null @@ -1,1280 +0,0 @@ -# Copyright 2022 Mattia Giambirtone & All Contributors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import meta/token -import meta/ast -import meta/errors -import ../config -import ../util/multibyte - - -import strformat -import algorithm -import parseutils -import strutils -import sequtils - - -export ast -export token -export multibyte - -type - TypeKind* = enum - ## An enumeration of compile-time - ## types - Int8, UInt8, Int16, UInt16, Int32, - UInt32, Int64, UInt64, Float32, Float64, - Char, Byte, String, Function, CustomType, - Nil, Nan, Bool, Inf - Type* = ref object - ## A wrapper around - ## compile-time types - node*: ASTNode - case kind*: TypeKind: - of Function: - returnType*: Type - else: - discard -# This way we don't have recursive dependency issues -import meta/bytecode -export bytecode - - -type - Name = ref object - ## A compile-time wrapper around - ## statically resolved names - - # Name of the identifier - name: IdentExpr - # Owner of the identifier (module) - owner: string - # Scope depth - depth: int - # Is this name private? - isPrivate: bool - # Is this a constant? - isConst: bool - # Can this name's value be mutated? - isLet: bool - # The name's type - valueType: Type - # For variables, the position in the bytecode - # where its StoreVar instruction was emitted. - # For functions, this marks where the function's - # code begins - codePos: int - Loop = object - ## A "loop object" used - ## by the compiler to emit - ## appropriate jump offsets - ## for continue and break - ## statements - - # Position in the bytecode where the loop starts - start: int - # Scope depth where the loop is located - depth: int - # Absolute jump offsets into our bytecode that we need to - # patch. Used for break statements - breakPos: seq[int] - - Compiler* = ref object - ## A wrapper around the Peon compiler's state - - # The bytecode chunk where we write code to - chunk: Chunk - # The output of our parser (AST) - ast: seq[Declaration] - # The current AST node we're looking at - current: int - # The current file being compiled (used only for - # error reporting) - file: string - # Compile-time "simulation" of the stack at - # runtime to load variables that have stack - # behavior more efficiently - names: seq[Name] - # The current scope depth. If > 0, we're - # in a local scope, otherwise it's global - scopeDepth: int - # The current function being compiled - currentFunction: FunDecl - # Are optimizations turned on? - enableOptimizations*: bool - # The current loop being compiled (used to - # keep track of where to jump) - currentLoop: Loop - # The current module being compiled - # (used to restrict access to statically - # defined variables at compile time) - currentModule: string - # Each time a defer statement is - # compiled, its code is emitted - # here. Later, if there is any code - # to defer in the current function, - # funDecl will wrap the function's code - # inside an implicit try/finally block - # and add this code in the finally branch. - # This sequence is emptied each time a - # fun declaration is compiled and stores only - # deferred code for the current function (may - # be empty) - deferred: seq[uint8] - # List of closed-over variables - closedOver: seq[IdentExpr] - - -proc newCompiler*(enableOptimizations: bool = true): Compiler = - ## Initializes a new Compiler object - new(result) - result.ast = @[] - result.current = 0 - result.file = "" - result.names = @[] - result.scopeDepth = 0 - result.currentFunction = nil - result.enableOptimizations = enableOptimizations - result.currentModule = "" - - -## Forward declarations -proc expression(self: Compiler, node: Expression) -proc statement(self: Compiler, node: Statement) -proc declaration(self: Compiler, node: Declaration) -proc peek(self: Compiler, distance: int = 0): ASTNode -proc identifier(self: Compiler, node: IdentExpr) -proc varDecl(self: Compiler, node: VarDecl) -proc inferType(self: Compiler, node: LiteralExpr): Type -proc inferType(self: Compiler, node: Expression): Type -## End of forward declarations - -## Public getter for nicer error formatting -proc getCurrentNode*(self: Compiler): ASTNode = (if self.current >= - self.ast.len(): self.ast[^1] else: self.ast[self.current - 1]) - - -## Utility functions -proc peek(self: Compiler, distance: int = 0): ASTNode = - ## Peeks at the AST node at the given distance. - ## If the distance is out of bounds, the last - ## AST node in the tree is returned. A negative - ## distance may be used to retrieve previously - ## consumed AST nodes - if self.ast.high() == -1 or self.current + distance > self.ast.high() or - self.current + distance < 0: - result = self.ast[^1] - else: - result = self.ast[self.current + distance] - - -proc done(self: Compiler): bool = - ## Returns true if the compiler is done - ## compiling, false otherwise - result = self.current > self.ast.high() - - -proc error(self: Compiler, message: string) {.raises: [CompileError, - ValueError].} = - ## Raises a formatted CompileError exception - var tok = self.getCurrentNode().token - raise newException(CompileError, &"A fatal error occurred while compiling '{self.file}', module '{self.currentModule}' line {tok.line} at '{tok.lexeme}' -> {message}") - - -proc step(self: Compiler): ASTNode = - ## Steps to the next node and returns - ## the consumed one - result = self.peek() - if not self.done(): - self.current += 1 - - -proc emitByte(self: Compiler, byt: OpCode|uint8) = - ## Emits a single byte, writing it to - ## the current chunk being compiled - when DEBUG_TRACE_COMPILER: - echo &"DEBUG - Compiler: Emitting {$byt}" - self.chunk.write(uint8 byt, self.peek().token.line) - - -proc emitBytes(self: Compiler, byt1: OpCode|uint8, byt2: OpCode|uint8) = - ## Emits multiple bytes instead of a single one. This is useful - ## to emit operators along with their operands or for multi-byte - ## instructions that are longer than one byte - self.emitByte(uint8 byt1) - self.emitByte(uint8 byt2) - - -proc emitBytes(self: Compiler, bytarr: array[2, uint8]) = - ## Handy helper method to write an array of 2 bytes into - ## the current chunk, calling emitByte on each of its - ## elements - self.emitBytes(bytarr[0], bytarr[1]) - - -proc emitBytes(self: Compiler, bytarr: openarray[uint8]) = - ## Handy helper method to write an array of 3 bytes into - ## the current chunk, calling emitByte on each of its - ## elements - for b in bytarr: - self.emitByte(b) - - -proc makeConstant(self: Compiler, val: Expression, kind: Type): array[3, uint8] = - ## Adds a constant to the current chunk's constant table - ## and returns its index as a 3-byte array of uint8s - result = self.chunk.addConstant(val, kind) - - -proc emitConstant(self: Compiler, obj: Expression, kind: Type) = - ## Emits a LoadConstant instruction along - ## with its operand - case self.inferType(obj).kind: - of Int64: - self.emitByte(LoadInt64) - else: - discard # TODO - self.emitBytes(self.makeConstant(obj, kind)) - - -proc emitJump(self: Compiler, opcode: OpCode): int = - ## Emits a dummy jump offset to be patched later. Assumes - ## the largest offset (emits 4 bytes, one for the given jump - ## opcode, while the other 3 are for the jump offset which is set - ## to the maximum unsigned 24 bit integer). If the shorter - ## 16 bit alternative is later found to be better suited, patchJump - ## will fix this. This function returns the absolute index into the - ## chunk's bytecode array where the given placeholder instruction was written - self.emitByte(opcode) - self.emitBytes((0xffffff).toTriple()) - result = self.chunk.code.len() - 4 - - -proc patchJump(self: Compiler, offset: int) = - ## Patches a previously emitted relative - ## jump using emitJump. Since emitJump assumes - ## a long jump, this also shrinks the jump - ## offset and changes the bytecode instruction if possible - ## (i.e. jump is in 16 bit range), but the converse is also - ## true (i.e. it might change a regular jump into a long one) - var jump: int = self.chunk.code.len() - offset - if jump > 16777215: - self.error("cannot jump more than 16777216 bytecode instructions") - if jump < uint16.high().int: - case OpCode(self.chunk.code[offset]): - of LongJumpForwards: - self.chunk.code[offset] = JumpForwards.uint8() - of LongJumpBackwards: - self.chunk.code[offset] = JumpBackwards.uint8() - of LongJumpIfFalse: - self.chunk.code[offset] = JumpIfFalse.uint8() - of LongJumpIfFalsePop: - self.chunk.code[offset] = JumpIfFalsePop.uint8() - of LongJumpIfFalseOrPop: - self.chunk.code[offset] = JumpIfFalseOrPop.uint8() - else: - discard - self.chunk.code.delete(offset + 1) # Discards the first 8 bits of the jump offset (which are empty) - let offsetArray = (jump - 1).toDouble() # -1 since we got rid of 1 byte! - self.chunk.code[offset + 1] = offsetArray[0] - self.chunk.code[offset + 2] = offsetArray[1] - else: - case OpCode(self.chunk.code[offset]): - of JumpForwards: - self.chunk.code[offset] = LongJumpForwards.uint8() - of JumpBackwards: - self.chunk.code[offset] = LongJumpBackwards.uint8() - of JumpIfFalse: - self.chunk.code[offset] = LongJumpIfFalse.uint8() - of JumpIfFalsePop: - self.chunk.code[offset] = LongJumpIfFalsePop.uint8() - of JumpIfFalseOrPop: - self.chunk.code[offset] = LongJumpIfFalseOrPop.uint8() - else: - discard - let offsetArray = jump.toTriple() - self.chunk.code[offset + 1] = offsetArray[0] - self.chunk.code[offset + 2] = offsetArray[1] - self.chunk.code[offset + 3] = offsetArray[2] - - -proc resolve(self: Compiler, name: IdentExpr, - depth: int = self.scopeDepth): Name = - ## Traverses self.names backwards and returns the - ## first name object with the given name. Returns - ## nil when the name can't be found. This function - ## has no concept of scope depth, because getStackPos - ## does that job. Note that private names declared in - ## other modules will not be resolved! - for obj in reversed(self.names): - if obj.name.token.lexeme == name.token.lexeme: - if obj.isPrivate and obj.owner != self.currentModule: - continue # There may be a name in the current module that - # matches, so we skip this - return obj - return nil - - -proc getStackPos(self: Compiler, name: IdentExpr, - depth: int = self.scopeDepth): tuple[closedOver: bool, pos: int] = - ## Iterates the internal list of declared names backwards and - ## returns a tuple (closedOver, pos) that tells the caller whether the - ## the name is to be emitted as a closure as well as its predicted - ## stack/closure array position. Returns (false, -1) if the variable's - ## location can not be determined at compile time (this is an error!). - ## Note that private names declared in other modules will not be resolved! - var i: int = self.names.high() - for variable in reversed(self.names): - if name.name.lexeme == variable.name.name.lexeme: - if variable.isPrivate and variable.owner != self.currentModule: - continue - if variable.depth == depth or variable.depth == 0: - # variable.depth == 0 for globals! - return (false, i) - elif variable.depth > 0: - for j, closure in reversed(self.closedOver): - if closure.name.lexeme == name.name.lexeme: - return (true, j) - dec(i) - return (false, -1) - - -proc detectClosureVariable(self: Compiler, name: IdentExpr, - depth: int = self.scopeDepth) = - ## Detects if the given name is used in a local scope deeper - ## than the given one and modifies the code emitted for it - ## to store it as a closure variable if it is. Does nothing if the name - ## hasn't been declared yet or is unreachable (for example if it's - ## declared as private in another module). This function must be called - ## each time a name is referenced in order for closed-over variables - ## to be emitted properly, otherwise the runtime may behave - ## unpredictably or crash - let entry = self.resolve(name) - if entry == nil: - return - if entry.depth < depth: - # Ding! The given name is closed over: we need to - # change the StoreVar instruction that created this - # name entry into a StoreHeap. We don't need to change - # other pieces of code because self.identifier() already - # emits LoadHeap if it detects the variable is closed over, - # whether or not this function is called - self.closedOver.add(entry.name) - if self.closedOver.len() >= 16777216: - self.error("too many consecutive closed-over variables (max is 16777216)") - let idx = self.closedOver.high().toTriple() - self.chunk.code[entry.codePos] = StoreHeap.uint8 - self.chunk.code[entry.codePos + 1] = idx[0] - self.chunk.code[entry.codePos + 2] = idx[1] - self.chunk.code[entry.codePos + 3] = idx[2] - - -proc compareTypes(self: Compiler, a, b: Type): bool = - ## Compares two type objects - ## for equality (works with nil!) - if a == nil: - return b == nil - elif b == nil: - return a == nil - if a.kind != b.kind: - return false - case a.kind: - of Int8, UInt8, Int16, UInt16, Int32, - UInt32, Int64, UInt64, Float32, Float64, - Char, Byte, String, Nil, Nan, Bool, Inf: - return true - of Function: - let - a = FunDecl(a.node) - b = FunDecl(b.node) - if a.name.token.lexeme != b.name.token.lexeme: - return false - elif a.arguments.len() != b.arguments.len(): - return false - elif not self.compareTypes(self.inferType(a.returnType), - self.inferType(b.returnType)): - return false - for (argA, argB) in zip(a.arguments, b.arguments): - if argA.mutable != argB.mutable: - return false - elif argA.isRef != argB.isRef: - return false - elif argA.isPtr != argB.isPtr: - return false - elif not self.compareTypes(self.inferType(argA.valueType), - self.inferType(argB.valueType)): - return false - return true - else: - discard - - -proc toIntrinsic(name: string): Type = - ## Converts a string to an intrinsic - ## type if it is valid and returns nil - ## otherwise - if name in ["int", "int64", "i64"]: - return Type(kind: Int64) - elif name in ["uint64", "u64"]: - return Type(kind: UInt64) - elif name in ["int32", "i32"]: - return Type(kind: Int32) - elif name in ["uint32", "u32"]: - return Type(kind: UInt32) - elif name in ["int16", "i16"]: - return Type(kind: Int16) - elif name in ["uint16", "u16"]: - return Type(kind: UInt16) - elif name in ["int8", "i8"]: - return Type(kind: Int8) - elif name in ["uint8", "u8"]: - return Type(kind: UInt8) - elif name in ["f64", "float", "float64"]: - return Type(kind: Float64) - elif name in ["f32", "float32"]: - return Type(kind: Float32) - elif name == "byte": - return Type(kind: Byte) - elif name == "char": - return Type(kind: Char) - elif name == "nan": - return Type(kind: Nan) - elif name == "nil": - return Type(kind: Nil) - elif name == "inf": - return Type(kind: Inf) - elif name == "bool": - return Type(kind: Bool) - else: - return nil - - -proc inferType(self: Compiler, node: LiteralExpr): Type = - ## Infers the type of a given literal expression - if node == nil: - return nil - case node.kind: - of intExpr, binExpr, octExpr, hexExpr: - let size = node.token.lexeme.split("'") - if len(size) notin 1..2: - self.error("invalid state: inferValueType -> invalid size specifier (This is an internal error and most likely a bug!)") - if size.len() == 1: - return Type(node: node, kind: Int64) - let typ = size[1].toIntrinsic() - if not self.compareTypes(typ, nil): - return typ - else: - self.error(&"invalid type specifier '{size[1]}' for int") - of floatExpr: - let size = node.token.lexeme.split("'") - if len(size) notin 1..2: - self.error("invalid state: inferValueType -> invalid size specifier (This is an internal error and most likely a bug!)") - if size.len() == 1 or size[1] == "f64": - return Type(node: node, kind: Float64) - let typ = size[1].toIntrinsic() - if not self.compareTypes(typ, nil): - return typ - else: - self.error(&"invalid type specifier '{size[1]}' for float") - of nilExpr: - return Type(node: node, kind: Nil) - of trueExpr: - return Type(node: node, kind: Bool) - of falseExpr: - return Type(node: node, kind: Bool) - of nanExpr: - return Type(node: node, kind: TypeKind.Nan) - of infExpr: - return Type(node: node, kind: TypeKind.Inf) - else: - discard # TODO - - -proc toIntrinsic(self: Compiler, typ: Expression): Type = - ## Gets an expression's - ## intrinsic type, if possible - if typ == nil: - return nil - case typ.kind: - of trueExpr, falseExpr, intExpr, floatExpr: - return typ.token.lexeme.toIntrinsic() - of identExpr: - let inferred = self.inferType(typ) - if inferred == nil: - return typ.token.lexeme.toIntrinsic() - return inferred - else: - discard - - -proc inferType(self: Compiler, node: Expression): Type = - ## Infers the type of a given expression and - ## returns it - if node == nil: - return nil - case node.kind: - of identExpr: - let node = IdentExpr(node) - let name = self.resolve(node) - if name != nil: - return name.valueType - else: - return node.name.lexeme.toIntrinsic() - of unaryExpr: - return self.inferType(UnaryExpr(node).a) - of binaryExpr: - let node = BinaryExpr(node) - var a = self.inferType(node.a) - var b = self.inferType(node.b) - if not self.compareTypes(a, b): - return nil - return a - of {intExpr, hexExpr, binExpr, octExpr, - strExpr, falseExpr, trueExpr, infExpr, - nanExpr, floatExpr, nilExpr - }: - return self.inferType(LiteralExpr(node)) - else: - discard # Unreachable - - -proc typeToStr(self: Compiler, typ: Type): string = - ## Returns the string representation of a - ## type object - case typ.kind: - of Int8, UInt8, Int16, UInt16, Int32, - UInt32, Int64, UInt64, Float32, Float64, - Char, Byte, String, Nil, TypeKind.Nan, Bool, - TypeKind.Inf: - return ($typ.kind).toLowerAscii() - of Function: - result = "function (" - case typ.node.kind: - of funDecl: - var node = FunDecl(typ.node) - for i, argument in node.arguments: - result &= &"{argument.name.token.lexeme}: {self.typeToStr(self.inferType(argument.name))}" - if i < node.arguments.len() - 1: - result &= ", " - result &= ")" - of lambdaExpr: - var node = LambdaExpr(typ.node) - for i, argument in node.arguments: - result &= &"{argument.name.token.lexeme}: {argument.valueType}" - if i < node.arguments.len() - 1: - result &= ", " - result &= ")" - else: - discard # Unreachable - result &= &": {self.typeToStr(typ.returnType)}" - else: - discard - - -proc inferType(self: Compiler, node: Declaration): Type = - ## Infers the type of a given declaration - ## and returns it - if node == nil: - return nil - case node.kind: - of funDecl: - var node = FunDecl(node) - let resolved = self.resolve(node.name) - if resolved != nil: - return resolved.valueType - of NodeKind.varDecl: - var node = VarDecl(node) - let resolved = self.resolve(node.name) - if resolved != nil: - return resolved.valueType - else: - return self.inferType(node.value) - else: - return # Unreachable - -## End of utility functions - - -proc literal(self: Compiler, node: ASTNode) = - ## Emits instructions for literals such - ## as singletons, strings, numbers and - ## collections - case node.kind: - of trueExpr: - self.emitByte(LoadTrue) - of falseExpr: - self.emitByte(LoadFalse) - of nilExpr: - self.emitByte(LoadNil) - of infExpr: - self.emitByte(LoadInf) - of nanExpr: - self.emitByte(LoadNan) - of strExpr: - self.emitConstant(LiteralExpr(node), Type(kind: String)) - # TODO: Take size specifier into account! - of intExpr: - var x: int - var y = IntExpr(node) - try: - discard parseInt(y.literal.lexeme, x) - except ValueError: - self.error("integer value out of range") - self.emitConstant(y, Type(kind: Int64)) - of hexExpr: - var x: int - var y = HexExpr(node) - try: - discard parseHex(y.literal.lexeme, x) - except ValueError: - self.error("integer value out of range") - let node = newIntExpr(Token(lexeme: $x, line: y.token.line, - pos: (start: y.token.pos.start, - stop: y.token.pos.start + len($x)) - ) - ) - self.emitConstant(node, Type(kind: Int64)) - of binExpr: - var x: int - var y = BinExpr(node) - try: - discard parseBin(y.literal.lexeme, x) - except ValueError: - self.error("integer value out of range") - let node = newIntExpr(Token(lexeme: $x, line: y.token.line, - pos: (start: y.token.pos.start, - stop: y.token.pos.start + len($x)) - ) - ) - self.emitConstant(node, Type(kind: Int64)) - of octExpr: - var x: int - var y = OctExpr(node) - try: - discard parseOct(y.literal.lexeme, x) - except ValueError: - self.error("integer value out of range") - let node = newIntExpr(Token(lexeme: $x, line: y.token.line, - pos: (start: y.token.pos.start, - stop: y.token.pos.start + len($x)) - ) - ) - self.emitConstant(node, Type(kind: Int64)) - of floatExpr: - var x: float - var y = FloatExpr(node) - try: - discard parseFloat(y.literal.lexeme, x) - except ValueError: - self.error("floating point value out of range") - self.emitConstant(y, Type(kind: Float64)) - of awaitExpr: - var y = AwaitExpr(node) - self.expression(y.expression) - self.emitByte(OpCode.Await) - else: - self.error(&"invalid AST node of kind {node.kind} at literal(): {node} (This is an internal error and most likely a bug!)") - - -proc unary(self: Compiler, node: UnaryExpr) = - ## Compiles unary expressions such as decimal - ## and bitwise negation - self.expression(node.a) # Pushes the operand onto the stack - # TODO: Find implementation of - # the given operator and call it - - -proc binary(self: Compiler, node: BinaryExpr) = - ## Compiles all binary expressions - - # These two lines prepare the stack by pushing the - # opcode's operands onto it - self.expression(node.a) - self.expression(node.b) - # TODO: Find implementation of - # the given operator and call it - case node.operator.kind: - of NoMatch: - # a and b - self.expression(node.a) - var jump: int - if self.enableOptimizations: - jump = self.emitJump(JumpIfFalseOrPop) - else: - jump = self.emitJump(JumpIfFalse) - self.emitByte(Pop) - self.expression(node.b) - self.patchJump(jump) - of EndOfFile: - # a or b - self.expression(node.a) - let jump = self.emitJump(JumpIfTrue) - self.expression(node.b) - self.patchJump(jump) - else: - self.error(&"invalid AST node of kind {node.kind} at binary(): {node} (This is an internal error and most likely a bug!)") - - -proc declareName(self: Compiler, node: Declaration) = - ## Statically declares a name into the current scope - case node.kind: - of NodeKind.varDecl: - var node = VarDecl(node) - # Creates a new Name entry so that self.identifier emits the proper stack offset - if self.names.high() > 16777215: - # If someone ever hits this limit in real-world scenarios, I swear I'll - # slap myself 100 times with a sign saying "I'm dumb". Mark my words - self.error("cannot declare more than 16777216 variables at a time") - self.names.add(Name(depth: self.scopeDepth, - name: node.name, - isPrivate: node.isPrivate, - owner: self.currentModule, - isConst: node.isConst, - valueType: Type(kind: self.inferType( - node.value).kind, node: node), - codePos: self.chunk.code.len(), - isLet: node.isLet)) - self.emitByte(StoreVar) - self.emitBytes(self.names.high().toTriple()) - of NodeKind.funDecl: - var node = FunDecl(node) - # Declares the function's name in the - # current scope but no StoreVar is emitted - # because the name is only useful at compile time. - # TODO: Maybe emit some optional debugging - # metadata to let the VM know where a function's - # code begins and ends (similar to what gcc does with - # CFI in object files) to build stack traces - self.names.add(Name(depth: self.scopeDepth, - isPrivate: node.isPrivate, - isConst: false, - owner: self.currentModule, - valueType: Type(kind: Function, node: node, - returnType: self.inferType( - node.returnType)), - codePos: self.chunk.code.len(), - name: node.name, - isLet: false)) - for argument in node.arguments: - if self.names.high() > 16777215: - self.error("cannot declare more than 16777216 variables at a time") - self.names.add(Name(depth: self.scopeDepth + 1, - isPrivate: true, - owner: self.currentModule, - isConst: false, - name: argument.name, - valueType: nil, - codePos: self.chunk.code.len(), - isLet: false)) - self.names[^1].valueType = self.inferType(argument.valueType) - self.names[^1].valueType.node = argument.name - self.emitByte(LoadVar) - self.emitBytes(self.names.high().toTriple()) - else: - discard # Unreachable - - -proc identifier(self: Compiler, node: IdentExpr) = - ## Compiles access to identifiers - let s = self.resolve(node) - if s == nil: - self.error(&"reference to undeclared name '{node.token.lexeme}'") - elif s.isConst: - # Constants are emitted as, you guessed it, LoadConstant instructions - # no matter the scope depth. If optimizations are enabled, the compiler - # will reuse the same constant every time it is referenced instead of - # allocating a new one each time - self.emitConstant(node, self.inferType(node)) - else: - self.detectClosureVariable(s.name) - let t = self.getStackPos(node) - let index = t.pos - # We don't check if index is -1 because if it - # were, self.resolve() would have returned nil - if not t.closedOver: - # Static name resolution, loads value at index in the stack. Very fast. Much wow. - self.emitByte(LoadVar) - self.emitBytes(index.toTriple()) - else: - if self.closedOver.len() == 0: - self.error("error: closure variable array is empty but LoadHeap would be emitted (this is an internal error and most likely a bug)") - # Heap-allocated closure variable. Stored in a separate "closure array" in the VM that does not have stack semantics. - # This makes closures work as expected and is not comparatively slower than indexing our stack (since they're both - # dynamic arrays at runtime anyway) - self.emitByte(LoadHeap) - self.emitBytes(self.closedOver.high().toTriple()) - - -proc findImpl(self: Compiler, node: FunDecl): seq[Name] = - ## Looks for functions matching the given declaration - ## in the code that has been compiled so far. - ## Returns a list of each matching name object - for obj in reversed(self.names): - # Scopes are indexed backwards! - case obj.valueType.kind: - of Function: - if self.compareTypes(obj.valueType, self.inferType(node)): - result.add(obj) - else: - continue - - -proc findByName(self: Compiler, name: string): seq[Name] = - ## Looks for objects that have been already declared - ## with the given name - for obj in reversed(self.names): - if obj.name.token.lexeme == name: - result.add(obj) - - -proc findByType(self: Compiler, name: string, kind: Type): seq[Name] = - ## Looks for objects that have already been declared - ## with the given name and type - for obj in self.findByName(name): - if self.compareTypes(obj.valueType, kind): - result.add(obj) - - -proc assignment(self: Compiler, node: ASTNode) = - ## Compiles assignment expressions - case node.kind: - of assignExpr: - let node = AssignExpr(node) - let name = IdentExpr(node.name) - let r = self.resolve(name) - if r == nil: - self.error(&"assignment to undeclared name '{name.token.lexeme}'") - elif r.isConst: - self.error(&"cannot assign to '{name.token.lexeme}' (constant)") - elif r.isLet: - self.error(&"cannot reassign '{name.token.lexeme}'") - self.expression(node.value) - let t = self.getStackPos(name) - let index = t.pos - if index != -1: - if not t.closedOver: - self.emitByte(StoreVar) - else: - self.emitByte(StoreHeap) - self.emitBytes(index.toTriple()) - else: - self.error(&"reference to undeclared name '{node.token.lexeme}'") - of setItemExpr: - let node = SetItemExpr(node) - let typ = self.inferType(node) - if typ == nil: - self.error(&"cannot determine the type of '{node.name.token.lexeme}'") - # TODO - else: - self.error(&"invalid AST node of kind {node.kind} at assignment(): {node} (This is an internal error and most likely a bug)") - - -proc beginScope(self: Compiler) = - ## Begins a new local scope by incrementing the current - ## scope's depth - inc(self.scopeDepth) - - -proc endScope(self: Compiler) = - ## Ends the current local scope - if self.scopeDepth < 0: - self.error("cannot call endScope with scopeDepth < 0 (This is an internal error and most likely a bug)") - var popped: int = 0 - for ident in reversed(self.names): - if ident.depth > self.scopeDepth: - inc(popped) - if not self.enableOptimizations: - # All variables with a scope depth larger than the current one - # are now out of scope. Begone, you're now homeless! - self.emitByte(Pop) - if self.enableOptimizations and popped > 1: - # If we're popping less than 65535 variables, then - # we can emit a PopN instruction. This is true for - # 99.99999% of the use cases of the language (who the - # hell is going to use 65 THOUSAND local variables?), but - # if you'll ever use more then Peon will emit a PopN instruction - # for the first 65 thousand and change local variables and then - # emit another batch of plain ol' Pop instructions for the rest - if popped <= uint16.high().int(): - self.emitByte(PopN) - self.emitBytes(popped.toDouble()) - else: - self.emitByte(PopN) - self.emitBytes(uint16.high().int.toDouble()) - for i in countdown(self.names.high(), popped - uint16.high().int()): - if self.names[i].depth > self.scopeDepth: - self.emitByte(Pop) - elif popped == 1: - # We only emit PopN if we're popping more than one value - self.emitByte(Pop) - for _ in countup(0, popped - 1): - discard self.names.pop() - dec(self.scopeDepth) - - -proc blockStmt(self: Compiler, node: BlockStmt) = - ## Compiles block statements, which create a new - ## local scope. - self.beginScope() - for decl in node.code: - self.declaration(decl) - self.endScope() - - -proc ifStmt(self: Compiler, node: IfStmt) = - ## Compiles if/else statements for conditional - ## execution of code - self.expression(node.condition) - var jumpCode: OpCode - if self.enableOptimizations: - jumpCode = JumpIfFalsePop - else: - jumpCode = JumpIfFalse - let jump = self.emitJump(jumpCode) - if not self.enableOptimizations: - self.emitByte(Pop) - self.statement(node.thenBranch) - self.patchJump(jump) - if node.elseBranch != nil: - let jump = self.emitJump(JumpForwards) - self.statement(node.elseBranch) - self.patchJump(jump) - - -proc emitLoop(self: Compiler, begin: int) = - ## Emits a JumpBackwards instruction with the correct - ## jump offset - var offset: int - case OpCode(self.chunk.code[begin + 1]): # The jump instruction - of LongJumpForwards, LongJumpBackwards, LongJumpIfFalse, - LongJumpIfFalsePop, LongJumpIfTrue: - offset = self.chunk.code.len() - begin + 4 - else: - offset = self.chunk.code.len() - begin - if offset > uint16.high().int: - if offset > 16777215: - self.error("cannot jump more than 16777215 bytecode instructions") - self.emitByte(LongJumpBackwards) - self.emitBytes(offset.toTriple()) - else: - self.emitByte(JumpBackwards) - self.emitBytes(offset.toDouble()) - - -proc whileStmt(self: Compiler, node: WhileStmt) = - ## Compiles C-style while loops and - ## desugared C-style for loops - let start = self.chunk.code.len() - self.expression(node.condition) - var jump: int - if self.enableOptimizations: - jump = self.emitJump(JumpIfFalsePop) - else: - jump = self.emitJump(JumpIfFalse) - self.emitByte(Pop) - self.statement(node.body) - self.patchJump(jump) - self.emitLoop(start) - - -proc expression(self: Compiler, node: Expression) = - ## Compiles all expressions - if self.inferType(node) == nil: - if node.kind != identExpr: - # So we can raise a more appropriate - # error in self.identifier() - self.error("expression has no type") - case node.kind: - of callExpr: - discard # TODO - of getItemExpr: - discard # TODO - # Note that for setItem and assign we don't convert - # the node to its true type because that type information - # would be lost in the call anyway. The differentiation - # happens in self.assignment() - of setItemExpr, assignExpr: - self.assignment(node) - of identExpr: - self.identifier(IdentExpr(node)) - of unaryExpr: - # Unary expressions such as ~5 and -3 - self.unary(UnaryExpr(node)) - of groupingExpr: - # Grouping expressions like (2 + 1) - self.expression(GroupingExpr(node).expression) - of binaryExpr: - # Binary expressions such as 2 ^ 5 and 0.66 * 3.14 - self.binary(BinaryExpr(node)) - of intExpr, hexExpr, binExpr, octExpr, strExpr, falseExpr, trueExpr, - infExpr, nanExpr, floatExpr, nilExpr: - # Since all of these AST nodes share the - # same overall structure and the kind - # field is enough to tell one from the - # other, why bother with specialized - # cases when one is enough? - self.literal(node) - else: - self.error(&"invalid AST node of kind {node.kind} at expression(): {node} (This is an internal error and most likely a bug)") - - -proc awaitStmt(self: Compiler, node: AwaitStmt) = - ## Compiles await statements. An await statement - ## is like an await expression, but parsed in the - ## context of statements for usage outside expressions, - ## meaning it can be used standalone. It's basically the - ## same as an await expression followed by a semicolon. - ## Await expressions are the only native construct to - ## run coroutines from within an already asynchronous - ## context (which should be orchestrated by an event loop). - ## They block in the caller until the callee returns - self.expression(node.expression) - self.emitByte(OpCode.Await) - - -proc deferStmt(self: Compiler, node: DeferStmt) = - ## Compiles defer statements. A defer statement - ## is executed right before its containing function - ## exits (either because of a return or an exception) - let current = self.chunk.code.len - self.expression(node.expression) - for i in countup(current, self.chunk.code.high()): - self.deferred.add(self.chunk.code[i]) - self.chunk.code.del(i) - - -proc returnStmt(self: Compiler, node: ReturnStmt) = - ## Compiles return statements. An empty return - ## implicitly returns nil - let returnType = self.inferType(node.value) - let typ = self.inferType(self.currentFunction) - ## Having the return type - if typ.returnType == nil and returnType != nil: - self.error("non-empty return statement is not allowed in functions without an explicit return type") - elif returnType == nil and typ.returnType != nil: - self.error(&"expected return value of type '{self.typeToStr(typ.returnType)}', but expression has no type") - elif not self.compareTypes(returnType, typ.returnType): - self.error(&"expected return value of type '{self.typeToStr(typ.returnType)}', got '{self.typeToStr(returnType)}' instead") - if node.value != nil: - self.expression(node.value) - self.emitByte(OpCode.ReturnPop) - else: - self.emitByte(OpCode.Return) - - -proc yieldStmt(self: Compiler, node: YieldStmt) = - ## Compiles yield statements - self.expression(node.expression) - self.emitByte(OpCode.Yield) - - -proc raiseStmt(self: Compiler, node: RaiseStmt) = - ## Compiles yield statements - self.expression(node.exception) - self.emitByte(OpCode.Raise) - - -proc continueStmt(self: Compiler, node: ContinueStmt) = - ## Compiles continue statements. A continue statements - ## jumps to the next iteration in a loop - if self.currentLoop.start <= 65535: - self.emitByte(Jump) - self.emitBytes(self.currentLoop.start.toDouble()) - else: - if self.currentLoop.start > 16777215: - self.error("too much code to jump over in continue statement") - self.emitByte(LongJump) - self.emitBytes(self.currentLoop.start.toTriple()) - - -proc breakStmt(self: Compiler, node: BreakStmt) = - ## Compiles break statements. A continue statement - ## jumps to the next iteration in a loop - - # Emits dummy jump offset, this is - # patched later - discard self.emitJump(OpCode.Jump) - self.currentLoop.breakPos.add(self.chunk.code.high() - 4) - if self.currentLoop.depth > self.scopeDepth: - # Breaking out of a loop closes its scope - self.endScope() - - -proc patchBreaks(self: Compiler) = - ## Patches "break" opcodes with - ## actual jumps. This is needed - ## because the size of code - ## to skip is not known before - ## the loop is fully compiled - for brk in self.currentLoop.breakPos: - self.chunk.code[brk] = JumpForwards.uint8() - self.patchJump(brk) - - -proc assertStmt(self: Compiler, node: AssertStmt) = - ## Compiles assert statements (raise - ## AssertionError if the expression is falsey) - self.expression(node.expression) - self.emitByte(OpCode.Assert) - - -proc statement(self: Compiler, node: Statement) = - ## Compiles all statements - case node.kind: - of exprStmt: - var expression = ExprStmt(node).expression - self.expression(expression) - self.emitByte(Pop) # Expression statements discard their value. Their main use case is side effects in function calls - of NodeKind.ifStmt: - self.ifStmt(IfStmt(node)) - of NodeKind.assertStmt: - self.assertStmt(AssertStmt(node)) - of NodeKind.raiseStmt: - self.raiseStmt(RaiseStmt(node)) - of NodeKind.breakStmt: - self.breakStmt(BreakStmt(node)) - of NodeKind.continueStmt: - self.continueStmt(ContinueStmt(node)) - of NodeKind.returnStmt: - self.returnStmt(ReturnStmt(node)) - of NodeKind.importStmt: - discard - of NodeKind.whileStmt, NodeKind.forStmt: - ## Our parser already desugars for loops to - ## while loops! - let loop = self.currentLoop - self.currentLoop = Loop(start: self.chunk.code.len(), - depth: self.scopeDepth, breakPos: @[]) - self.whileStmt(WhileStmt(node)) - self.patchBreaks() - self.currentLoop = loop - of NodeKind.forEachStmt: - discard - of NodeKind.blockStmt: - self.blockStmt(BlockStmt(node)) - of NodeKind.yieldStmt: - self.yieldStmt(YieldStmt(node)) - of NodeKind.awaitStmt: - self.awaitStmt(AwaitStmt(node)) - of NodeKind.deferStmt: - self.deferStmt(DeferStmt(node)) - of NodeKind.tryStmt: - discard - else: - self.expression(Expression(node)) - - -proc varDecl(self: Compiler, node: VarDecl) = - ## Compiles variable declarations - let kind = self.toIntrinsic(node.valueType) - let typ = self.inferType(node.value) - if kind == nil and typ == nil: - self.error(&"cannot determine the type of '{node.name.token.lexeme}'") - elif typ != kind and kind != nil: - self.error(&"expected value of type '{self.typeToStr(kind)}', but '{node.name.token.lexeme}' is of type '{self.typeToStr(typ)}'") - self.expression(node.value) - self.declareName(node) - - -proc funDecl(self: Compiler, node: FunDecl) = - ## Compiles function declarations - - # A function's code is just compiled linearly - # and then jumped over - let jmp = self.emitJump(Jump) - self.declareName(node) - if node.body != nil: - if BlockStmt(node.body).code.len() == 0: - self.error("Cannot declare function with empty body") - let fnType = self.inferType(node) - let impl = self.findByType(node.name.token.lexeme, fnType) - if impl.len() > 1: - # Oh-oh! We found more than one implementation of - # the same function! Error! - var msg = &"multiple matching implementations of '{node.name.token.lexeme}' found:\n" - for fn in reversed(impl): - var node = FunDecl(fn.valueType.node) - discard self.typeToStr(fn.valueType) - msg &= &"- '{node.name.token.lexeme}' at line {node.token.line} of type {self.typeToStr(fn.valueType)}\n" - self.error(msg) - # We store the current function - var function = self.currentFunction - self.currentFunction = node - - # Since the deferred array is a linear - # sequence of instructions and we want - # to keep track to whose function's each - # set of deferred instruction belongs, - # we record the length of the deferred - # array before compiling the function - # and use this info later to compile - # the try/finally block with the deferred - # code - var deferStart = self.deferred.len() - - self.blockStmt(BlockStmt(node.body)) - # Yup, we're done. That was easy, huh? - # But after all functions are just named - # scopes, and we compile them just like that: - # we declare their name and arguments (before - # their body so recursion works) and then just - # handle them as a block statement (which takes - # care of incrementing self.scopeDepth so locals - # are resolved properly). There's a need for a bit - # of boilerplate code to make closures work, but - # that's about it - self.emitBytes(LoadNil, OpCode.Return) - - # Currently defer is not functional so we - # just pop the instructions - for i in countup(deferStart, self.deferred.len() - 1, 1): - self.deferred.delete(i) - - self.patchJump(jmp) - # This makes us compile nested functions correctly - self.currentFunction = function - - - -proc declaration(self: Compiler, node: Declaration) = - ## Compiles all declarations - case node.kind: - of NodeKind.varDecl: - self.varDecl(VarDecl(node)) - of NodeKind.funDecl: - self.funDecl(FunDecl(node)) - else: - self.statement(Statement(node)) - - -proc compile*(self: Compiler, ast: seq[Declaration], file: string): Chunk = - ## Compiles a sequence of AST nodes into a chunk - ## object - self.chunk = newChunk() - self.ast = ast - self.file = file - self.names = @[] - self.scopeDepth = 0 - self.currentFunction = nil - self.currentModule = self.file - self.current = 0 - while not self.done(): - self.declaration(Declaration(self.step())) - if self.ast.len() > 0: - # *Technically* an empty program is a valid program - self.endScope() - self.emitByte(OpCode.Return) # Exits the VM's main loop when used at the global scope - result = self.chunk - if self.ast.len() > 0 and self.scopeDepth != -1: - self.error(&"invalid state: invalid scopeDepth value (expected -1, got {self.scopeDepth}), did you forget to call endScope/beginScope?")