import meta/token
import meta/ast
import meta/errors
import meta/bytecode
import ../config
import ../util/multibyte
import strformat
import algorithm
import parseutils
import sequtils
import strutils


export ast
export bytecode
export token
export multibyte


type
    Name = ref object
        ## A compile-time wrapper around
        ## statically resolved names.
        ## Depth indicates to which scope
        ## the variable belongs, zero meaning
        ## the global one
        name: IdentExpr
        owner: string
        depth: int
        isPrivate: bool
        isConst: bool
        valueType: IdentExpr
    Loop = object
        ## A "loop object" used
        ## by the compiler to emit
        ## appropriate jump offsets
        ## for continue and break
        ## statements
        start: int
        depth: int
        breakPos: seq[int]
    Compiler* = ref object
        ## A wrapper around the compiler's state
        # The bytecode chunk where we write code to
        chunk: Chunk
        # The output of our parser (AST)
        ast: seq[ASTNode]
        # The current AST node we're looking at
        current: int
        # The current file being compiled (used only for
        # error reporting)
        file: string
        # Compile-time "simulation" of the stack at
        # runtime to load variables that have stack
        # behavior more efficiently
        names: seq[Name]
        # The current scope depth. If > 0, we're # in a local scope, otherwise it's global scopeDepth: int # The current function being compiled currentFunction: FunDecl # Are optimizations turned on? enableOptimizations*: bool # The current loop being compiled (used to # keep track of where to jump) currentLoop: Loop # The current module being compiled # (used to restrict access to statically # defined variables at compile time) currentModule: string # Each time a defer statement is # compiled, its code is emitted # here. Later, if there is any code # to defer in the current function, # funDecl will wrap the function's code # inside an implicit try/finally block # and add this code in the finally branch. # This sequence is emptied each time a # fun declaration is compiled and stores only # deferred code for the current function (may # be empty) deferred: seq[uint8] # List of closed-over variables closedOver: seq[IdentExpr] proc newCompiler*(enableOptimizations: bool = true): Compiler = ## Initializes a new Compiler object new(result) result.ast = @[] result.current = 0 result.file = "" result.names = @[] result.scopeDepth = 0 result.currentFunction = nil result.enableOptimizations = enableOptimizations result.currentModule = "" ## Forward declarations proc expression(self: Compiler, node: ASTNode) proc statement(self: Compiler, node: ASTNode) proc declaration(self: Compiler, node: ASTNode) proc peek(self: Compiler, distance: int = 0): ASTNode proc identifier(self: Compiler, node: IdentExpr) proc varDecl(self: Compiler, node: VarDecl) ## End of forward declarations ## Public getter for nicer error formatting proc getCurrentNode*(self: Compiler): ASTNode = (if self.current >= self.ast.len(): self.ast[^1] else: self.ast[self.current - 1]) ## Utility functions proc peek(self: Compiler, distance: int = 0): ASTNode = ## Peeks at the AST node at the given distance. ## If the distance is out of bounds, the last ## AST node in the tree is returned. A negative ## distance may be used to retrieve previously ## consumed AST nodes if self.ast.high() == -1 or self.current + distance > self.ast.high() or self.current + distance < 0: result = self.ast[^1] else: result = self.ast[self.current + distance] proc done(self: Compiler): bool = ## Returns true if the compiler is done ## compiling, false otherwise result = self.current > self.ast.high() proc error(self: Compiler, message: string) = ## Raises a formatted CompileError exception var tok = self.getCurrentNode().token raise newException(CompileError, &"A fatal error occurred while compiling '{self.file}', module '{self.currentModule}' line {tok.line} at '{tok.lexeme}' -> {message}") proc step(self: Compiler): ASTNode = ## Steps to the next node and returns ## the consumed one result = self.peek() if not self.done(): self.current += 1 proc emitByte(self: Compiler, byt: OpCode|uint8) = ## Emits a single byte, writing it to ## the current chunk being compiled when DEBUG_TRACE_COMPILER: echo &"DEBUG - Compiler: Emitting {$byt}" self.chunk.write(uint8 byt, self.peek().token.line) proc emitBytes(self: Compiler, byt1: OpCode|uint8, byt2: OpCode|uint8) = ## Emits multiple bytes instead of a single one. This is useful ## to emit operators along with their operands or for multi-byte ## instructions that are longer than one byte self.emitByte(uint8 byt1) self.emitByte(uint8 byt2) proc emitBytes(self: Compiler, bytarr: array[2, uint8]) = ## Handy helper method to write an array of 2 bytes into ## the current chunk, calling emitByte on each of its ## elements self.emitBytes(bytarr[0], bytarr[1]) proc emitBytes(self: Compiler, bytarr: array[3, uint8]) = ## Handy helper method to write an array of 3 bytes into ## the current chunk, calling emitByte on each of its ## elements self.emitBytes(bytarr[0], bytarr[1]) self.emitByte(bytarr[2]) proc makeConstant(self: Compiler, val: ASTNode): array[3, uint8] = ## Adds a constant to the current chunk's constant table ## and returns its index as a 3-byte array of uint8s result = self.chunk.addConstant(val) proc emitConstant(self: Compiler, obj: ASTNode) = ## Emits a LoadConstant instruction along ## with its operand self.emitByte(LoadConstant) self.emitBytes(self.makeConstant(obj)) proc emitJump(self: Compiler, opcode: OpCode): int = ## Emits a dummy jump offset to be patched later. Assumes ## the largest offset (emits 4 bytes, one for the given jump ## opcode, while the other 3 are for the jump offset which is set ## to the maximum unsigned 24 bit integer). If the shorter ## 16 bit alternative is later found to be better suited, patchJump ## will fix this. This function returns the absolute index into the ## chunk's bytecode array where the given placeholder instruction was written self.emitByte(opcode) self.emitBytes((0xffffff).toTriple()) result = self.chunk.code.len() - 4 proc patchJump(self: Compiler, offset: int) = ## Patches a previously emitted jump ## using emitJump. Since emitJump assumes ## a long jump, this also shrinks the jump ## offset and changes the bytecode instruction if possible ## (i.e. jump is in 16 bit range), but the converse is also ## true (i.e. it might change a regular jump into a long one) let jump: int = self.chunk.code.len() - offset if jump > 16777215: self.error("cannot jump more than 16777215 bytecode instructions") if jump < uint16.high().int: case OpCode(self.chunk.code[offset]): of LongJumpForwards: self.chunk.code[offset] = JumpForwards.uint8() of LongJumpBackwards: self.chunk.code[offset] = JumpBackwards.uint8() of LongJumpIfFalse: self.chunk.code[offset] = JumpIfFalse.uint8() of LongJumpIfFalsePop: self.chunk.code[offset] = JumpIfFalsePop.uint8() of LongJumpIfFalseOrPop: self.chunk.code[offset] = JumpIfFalseOrPop.uint8() else: discard self.chunk.code.delete(offset + 1) # Discards the 24 bit integer let offsetArray = jump.toDouble() self.chunk.code[offset + 1] = offsetArray[0] self.chunk.code[offset + 2] = offsetArray[1] else: case OpCode(self.chunk.code[offset]): of JumpForwards: self.chunk.code[offset] = LongJumpForwards.uint8() of JumpBackwards: self.chunk.code[offset] = LongJumpBackwards.uint8() of JumpIfFalse: self.chunk.code[offset] = LongJumpIfFalse.uint8() of JumpIfFalsePop: self.chunk.code[offset] = LongJumpIfFalsePop.uint8() of JumpIfFalseOrPop: self.chunk.code[offset] = LongJumpIfFalseOrPop.uint8() else: discard let offsetArray = jump.toTriple() self.chunk.code[offset + 1] = offsetArray[0] self.chunk.code[offset + 2] = offsetArray[1] self.chunk.code[offset + 3] = offsetArray[2] ## End of utility functions proc literal(self: Compiler, node: ASTNode) = ## Emits instructions for literals such ## as singletons, strings, numbers and ## collections case node.kind: of trueExpr: self.emitByte(OpCode.True) of falseExpr: self.emitByte(OpCode.False) of nilExpr: self.emitByte(OpCode.Nil) of infExpr: self.emitByte(OpCode.Inf) of nanExpr: self.emitByte(OpCode.Nan) of strExpr: self.emitConstant(node) # The optimizer will emit warning # for overflowing numbers. Here, we # treat them as errors of intExpr: var x: int var y = IntExpr(node) try: discard parseInt(y.literal.lexeme, x) except ValueError: self.error("integer value out of range") self.emitConstant(y) # Even though most likely the optimizer # will collapse all these other literals # to nodes of kind intExpr, that can be # disabled. This also allows us to catch # basic overflow errors before running any code of hexExpr: var x: int var y = HexExpr(node) try: discard parseHex(y.literal.lexeme, x) except ValueError: self.error("integer value out of range") self.emitConstant(newIntExpr(Token(lexeme: $x, line: y.token.line, pos: (start: y.token.pos.start, stop: y.token.pos.start + len($x))))) of binExpr: var x: int var y = BinExpr(node) try: discard parseBin(y.literal.lexeme, x) except ValueError: self.error("integer value out of range") self.emitConstant(newIntExpr(Token(lexeme: $x, line: y.token.line, pos: (start: y.token.pos.start, stop: y.token.pos.start + len($x))))) of octExpr: var x: int var y = OctExpr(node) try: discard parseOct(y.literal.lexeme, x) except ValueError: self.error("integer value out of range") self.emitConstant(newIntExpr(Token(lexeme: $x, line: y.token.line, pos: (start: y.token.pos.start, stop: y.token.pos.start + len($x))))) of floatExpr: var x: float var y = FloatExpr(node) try: discard parseFloat(y.literal.lexeme, x) except ValueError: self.error("floating point value out of range") self.emitConstant(y) of listExpr: var y = ListExpr(node) if y.members.len() > 16777216: self.error("collection literals can't have more than 16777216 elements") for member in y.members: self.expression(member) self.emitByte(BuildList) self.emitBytes(y.members.len().toTriple()) # 24-bit integer, meaning collection literals can have up to 2^24 elements of tupleExpr: var y = TupleExpr(node) if y.members.len() > 16777216: self.error("collection literals can't have more than 16777216 elements") for member in y.members: self.expression(member) self.emitByte(BuildTuple) self.emitBytes(y.members.len().toTriple()) of setExpr: var y = SetExpr(node) if y.members.len() > 16777216: self.error("collection literals can't have more than 16777216 elements") for member in y.members: self.expression(member) self.emitByte(BuildSet) self.emitBytes(y.members.len().toTriple()) of dictExpr: var y = DictExpr(node) if y.keys.len() > 16777216: self.error("collection literals can't have more than 16777216 elements") for (key, value) in zip(y.keys, y.values): self.expression(key) self.expression(value) self.emitByte(BuildDict) self.emitBytes(y.keys.len().toTriple()) of awaitExpr: var y = AwaitExpr(node) self.expression(y.expression) self.emitByte(OpCode.Await) else: self.error(&"invalid AST node of kind {node.kind} at literal(): {node} (This is an internal error and most likely a bug)") proc unary(self: Compiler, node: UnaryExpr) = ## Compiles unary expressions such as decimal or ## bitwise negation self.expression(node.a) # Pushes the operand onto the stack case node.operator.kind: of Minus: self.emitByte(UnaryNegate) of Plus: self.emitByte(UnaryPlus) of TokenType.LogicalNot: self.emitByte(OpCode.LogicalNot) of Tilde: self.emitByte(UnaryNot) else: self.error(&"invalid AST node of kind {node.kind} at unary(): {node} (This is an internal error and most likely a bug)") proc binary(self: Compiler, node: BinaryExpr) = ## Compiles all binary expressions # These two lines prepare the stack by pushing the # opcode's operands onto it self.expression(node.a) self.expression(node.b) case node.operator.kind: of Plus: self.emitByte(BinaryAdd) of Minus: self.emitByte(BinarySubtract) of Star: self.emitByte(BinaryMultiply) of DoubleStar: self.emitByte(BinaryPow) of Percentage: self.emitByte(BinaryMod) of FloorDiv: self.emitByte(BinaryFloorDiv) of Slash: self.emitByte(BinaryDivide) of Ampersand: self.emitByte(BinaryAnd) of Caret: self.emitByte(BinaryXor) of Pipe: self.emitByte(BinaryOr) of As: self.emitByte(BinaryAs) of Is: self.emitByte(BinaryIs) of IsNot: self.emitByte(BinaryIsNot) of Of: self.emitByte(BinaryOf) of RightShift: self.emitByte(BinaryShiftRight) of LeftShift: self.emitByte(BinaryShiftLeft) of TokenType.LessThan: self.emitByte(OpCode.LessThan) of TokenType.GreaterThan: self.emitByte(OpCode.GreaterThan) of TokenType.DoubleEqual: self.emitByte(EqualTo) of TokenType.LessOrEqual: self.emitByte(OpCode.LessOrEqual) of TokenType.GreaterOrEqual: self.emitByte(OpCode.GreaterOrEqual) of TokenType.LogicalAnd: self.expression(node.a) var jump: int if self.enableOptimizations: jump = self.emitJump(JumpIfFalseOrPop) else: jump = self.emitJump(JumpIfFalse) self.emitByte(Pop) self.expression(node.b) self.patchJump(jump) of TokenType.LogicalOr: self.expression(node.a) let jump = self.emitJump(JumpIfTrue) self.expression(node.b) self.patchJump(jump) # TODO: In-place operations else: self.error(&"invalid AST node of kind {node.kind} at binary(): {node} (This is an internal error and most likely a bug)") proc declareName(self: Compiler, node: ASTNode, kind: IdentExpr) = ## Compiles all name declarations case node.kind: of NodeKind.varDecl: var node = VarDecl(node) # Creates a new Name entry so that self.identifier emits the proper stack offset if self.names.high() > 16777215: # If someone ever hits this limit in real-world scenarios, I swear I'll # slap myself 100 times with a sign saying "I'm dumb". Mark my words self.error("cannot declare more than 16777215 variables at a time") self.names.add(Name(depth: self.scopeDepth, name: IdentExpr(node.name), isPrivate: node.isPrivate, owner: self.currentModule, isConst: node.isConst, valueType: kind)) self.emitByte(StoreVar) self.emitBytes(self.names.high().toTriple()) of funDecl: var node = FunDecl(node) # Declares the function's name in the # current (outer) scope... self.declareName(node.name, IdentExpr(node.returnType)) # ... but its arguments in an inner one! self.scopeDepth += 1 # (this ugly part is needed because # self.blockStmt() already increments # and decrements the scope depth) for argument in node.arguments: if self.names.high() > 16777215: self.error("cannot declare more than 16777215 variables at a time") self.names.add(Name(depth: self.scopeDepth + 1, isPrivate: true, owner: self.currentModule, isConst: false, name: IdentExpr(argument.name), valueType: kind)) self.emitByte(LoadVar) self.emitBytes(self.names.high().toTriple()) self.scopeDepth -= 1 # TODO: Default arguments and unpacking else: discard # TODO: Classes proc resolveStatic(self: Compiler, name: IdentExpr, depth: int = self.scopeDepth): Name = ## Traverses self.staticNames backwards and returns the ## first name object with the given name. Returns ## nil when the name can't be found. This function ## has no concept of scope depth, because getStaticIndex ## does that job. Note that private names declared in ## other modules will not be resolved! for obj in reversed(self.names): if obj.name.token.lexeme == name.token.lexeme: if obj.isPrivate and obj.owner != self.currentModule: return nil return obj return nil proc getStaticIndex(self: Compiler, name: IdentExpr, depth: int = self.scopeDepth): tuple[closedOver: bool, pos: int] = ## Gets the predicted stack position of the given variable and ## returns a tuple (closedOver, pos) that tells the caller whether ## the variable is to be emitted as a closure as well as its predicted ## stack/closure array position. Returns (false, -1) if the variable's ## location can not be determined at compile time (this is an error!). ## Note that private names declared in other modules will not be resolved! var i: int = self.names.high() for variable in reversed(self.names): if name.name.lexeme == variable.name.name.lexeme: if variable.isPrivate and variable.owner != self.currentModule: return (false, -1) if variable.depth == depth or variable.depth == 0: # variable.depth == 0 for globals! return (false, i) elif variable.depth > 0: for j, closure in reversed(self.closedOver): if closure.name.lexeme == name.name.lexeme: return (true, j) dec(i) return (false, -1) proc identifier(self: Compiler, node: IdentExpr) = ## Compiles access to identifiers let s = self.resolveStatic(node) if s != nil: if s.isConst: # Constants are emitted as, you guessed it, constant instructions # no matter the scope depth. Also, name resolution specifiers do not # apply to them (because what would it mean for a constant to be dynamic # anyway?) self.emitConstant(node) else: let t = self.getStaticIndex(node) let index = t.pos if index != -1: if t.closedOver: self.emitByte(LoadVar) # Static name resolution, loads value at index in the stack. Very fast. Much wow. self.emitBytes(index.toTriple()) else: if self.closedOver.len() == 0: self.error("error: closure variable array is empty but LoadHeap would be emitted (this is an internal error and most likely a bug)") if self.closedOver.len() >= 16777216: self.error("too many consecutive closure-over variables (max is 16777216)") self.emitByte(LoadHeap) # Heap-allocated closure variable. Stored in a separate "closure array" in the VM that does not have stack semantics self.emitBytes(self.closedOver.high().toTriple()) else: self.error(&"reference to undeclared name '{node.token.lexeme}'") proc assignment(self: Compiler, node: ASTNode) = ## Compiles assignment expressions case node.kind: of assignExpr: var node = AssignExpr(node) var name = IdentExpr(node.name) let r = self.resolveStatic(name) if r != nil and r.isConst: self.error("cannot assign to constant") self.expression(node.value) let t = self.getStaticIndex(name) let index = t.pos case node.token.kind: of InplaceAdd: self.emitByte(BinaryAdd) of InplaceSub: self.emitByte(BinarySubtract) of InplaceDiv: self.emitByte(BinaryDivide) of InplaceMul: self.emitByte(BinaryMultiply) of InplacePow: self.emitByte(BinaryPow) of InplaceFloorDiv: self.emitByte(BinaryFloorDiv) of InplaceMod: self.emitByte(BinaryMod) of InplaceAnd: self.emitByte(BinaryAnd) of InplaceXor: self.emitByte(BinaryXor) of InplaceRightShift: self.emitByte(BinaryShiftRight) of InplaceLeftShift: self.emitByte(BinaryShiftLeft) else: discard # Unreachable # In-place operators just change # what values is set to a given # stack offset/name, so we only # need to perform the operation # as usual and then store it. # TODO: A better optimization would # be to have everything in one opcode, # but that requires variants for stack, # heap, and closure variables and I cba if index != -1: if not t.closedOver: self.emitByte(StoreVar) else: self.emitByte(StoreHeap) self.emitBytes(index.toTriple()) else: self.error(&"reference to undeclared name '{node.token.lexeme}'") of setItemExpr: discard # TODO else: self.error(&"invalid AST node of kind {node.kind} at assignment(): {node} (This is an internal error and most likely a bug)") proc beginScope(self: Compiler) = ## Begins a new local scope by incrementing the current ## scope's depth inc(self.scopeDepth) proc endScope(self: Compiler) = ## Ends the current local scope if self.scopeDepth < 0: self.error("cannot call endScope with scopeDepth < 0 (This is an internal error and most likely a bug)") var popped: int = 0 for ident in reversed(self.names): if ident.depth > self.scopeDepth: inc(popped) if not self.enableOptimizations: # All variables with a scope depth larger than the current one # are now out of scope. Begone, you're now homeless! self.emitByte(Pop) if self.enableOptimizations and popped > 1: # If we're popping less than 65535 variables, then # we can emit a PopN instruction. This is true for # 99.99999% of the use cases of the language (who the # hell is going to use 65 THOUSAND local variables?), but # if you'll ever use more then JAPL will emit a PopN instruction # for the first 65 thousand and change local variables and then # emit another batch of plain ol' Pop instructions for the rest if popped <= uint16.high().int(): self.emitByte(PopN) self.emitBytes(popped.toDouble()) else: self.emitByte(PopN) self.emitBytes(uint16.high().int.toDouble()) for i in countdown(self.names.high(), popped - uint16.high().int()): if self.names[i].depth > self.scopeDepth: self.emitByte(Pop) elif popped == 1: # We only emit PopN if we're popping more than one value self.emitByte(Pop) for _ in countup(0, popped - 1): discard self.names.pop() dec(self.scopeDepth) proc blockStmt(self: Compiler, node: BlockStmt) = ## Compiles block statements, which create a new ## local scope. self.beginScope() for decl in node.code: self.declaration(decl) self.endScope() proc ifStmt(self: Compiler, node: IfStmt) = ## Compiles if/else statements for conditional ## execution of code self.expression(node.condition) var jumpCode: OpCode if self.enableOptimizations: jumpCode = JumpIfFalsePop else: jumpCode = JumpIfFalse let jump = self.emitJump(jumpCode) if not self.enableOptimizations: self.emitByte(Pop) self.statement(node.thenBranch) self.patchJump(jump) if node.elseBranch != nil: let jump = self.emitJump(JumpForwards) self.statement(node.elseBranch) self.patchJump(jump) proc emitLoop(self: Compiler, begin: int) = ## Emits a JumpBackwards instruction with the correct ## jump offset var offset: int case OpCode(self.chunk.code[begin + 1]): # The jump instruction of LongJumpForwards, LongJumpBackwards, LongJumpIfFalse, LongJumpIfFalsePop, LongJumpIfTrue: offset = self.chunk.code.len() - begin + 4 else: offset = self.chunk.code.len() - begin if offset > uint16.high().int: if offset > 16777215: self.error("cannot jump more than 16777215 bytecode instructions") self.emitByte(LongJumpBackwards) self.emitBytes(offset.toTriple()) else: self.emitByte(JumpBackwards) self.emitBytes(offset.toDouble()) proc whileStmt(self: Compiler, node: WhileStmt) = ## Compiles C-style while loops let start = self.chunk.code.len() self.expression(node.condition) let jump = self.emitJump(JumpIfFalsePop) self.statement(node.body) self.patchJump(jump) self.emitLoop(start) proc inferValueType(self: Compiler, node: ASTNode): ASTNode = ## Infers the type of a given literal expression case node.kind: of listExpr: return ListExpr(node).valueType of dictExpr: # It's not important that we don't use # valueType here, we just need to return # a non-nil value so we don't error out return DictExpr(node).keyType of intExpr, floatExpr, binExpr, octExpr, hexExpr: var node = LiteralExpr(node) var size = node.token.lexeme.split("'") if len(size) notin 1..2: self.error("invalid state: inferValueType -> invalid size specifier") elif size.len() == 1: return newIdentExpr(Token(lexeme: "int")) elif size[1] in ["u64", "i64", "u32", "i32", "f64", "f32", "i32", "u32", "u8", "i8"]: if size[1].startsWith("u"): size[1] = size[1].strip(true, false, {'u'}) size[1] = &"uint{size[1]}" elif size[1].startsWith("i"): size[1] = size[1].strip(true, false, {'i'}) size[1] = &"int{size[1]}" elif size[1].startsWith("f"): size[1] = size[1].strip(true, false, {'f'}) size[1] = &"float{size[1]}" return newIdentExpr(Token(lexeme: size[1])) else: self.error(&"invalid type specifier '{size[1]}' for '{size[0]}'") return newIdentExpr(Token(lexeme: "int")) else: discard # TODO proc inferExprType(self: Compiler, node: ASTNode): ASTNode = ## Infers the type of a given expression and ## returns it case node.kind: of identExpr: var node = IdentExpr(node) var name = self.resolveStatic(node) if name == nil: return nil return name.valueType of unaryExpr: return self.inferValueType(UnaryExpr(node).a) of binaryExpr: var node = BinaryExpr(node) var a = self.inferExprType(node.a) var b = self.inferExprType(node.b) if a == nil or b == nil: return nil return a of {intExpr, hexExpr, binExpr, octExpr, strExpr, falseExpr, trueExpr, infExpr, nanExpr, floatExpr, nilExpr, listExpr, dictExpr, setExpr, tupleExpr }: return self.inferValueType(node) else: discard # Unreachable proc inferDeclType(self: Compiler, node: Declaration): ASTNode = ## Infers the type of a given declaration if it's ## not already defined and returns it case node.kind: of funDecl: var node = FunDecl(node) if node.returnType != nil: return node.returnType of NodeKind.varDecl: var node = VarDecl(node) if node.valueType != nil: return node.valueType else: return self.inferExprType(node.value) else: return # Unreachable proc expression(self: Compiler, node: ASTNode) = ## Compiles all expressions if self.inferExprType(node) == nil: self.error("expression has no type") case node.kind: of getItemExpr: discard # TODO # Note that for setItem and assign we don't convert # the node to its true type because that type information # would be lost in the call anyway. The differentiation # happens in self.assignment of setItemExpr, assignExpr: self.assignment(node) of identExpr: self.identifier(IdentExpr(node)) of unaryExpr: # Unary expressions such as ~5 and -3 self.unary(UnaryExpr(node)) of groupingExpr: # Grouping expressions like (2 + 1) self.expression(GroupingExpr(node).expression) of binaryExpr: # Binary expressions such as 2 ^ 5 and 0.66 * 3.14 self.binary(BinaryExpr(node)) of intExpr, hexExpr, binExpr, octExpr, strExpr, falseExpr, trueExpr, infExpr, nanExpr, floatExpr, nilExpr, tupleExpr, setExpr, listExpr, dictExpr: # Since all of these AST nodes mostly share # the same overall structure, and the kind # discriminant is enough to tell one # from the other, why bother with # specialized cases when one is enough? self.literal(node) else: self.error(&"invalid AST node of kind {node.kind} at expression(): {node} (This is an internal error and most likely a bug)") proc awaitStmt(self: Compiler, node: AwaitStmt) = ## Compiles await statements. An await statement ## is like an await expression, but parsed in the ## context of statements for usage outside expressions, ## meaning it can be used standalone. It's basically the ## same as an await expression followed by a semicolon. ## Await expressions are the only native construct to ## run coroutines from within an already asynchronous ## context (which should be orchestrated by an event loop). ## They block in the caller until the callee returns self.expression(node.expression) self.emitByte(OpCode.Await) proc deferStmt(self: Compiler, node: DeferStmt) = ## Compiles defer statements. A defer statement ## is executed right before its containing function ## exits (either because of a return or an exception) let current = self.chunk.code.len self.expression(node.expression) for i in countup(current, self.chunk.code.high()): self.deferred.add(self.chunk.code[i]) self.chunk.code.del(i) proc returnStmt(self: Compiler, node: ReturnStmt) = ## Compiles return statements. An empty return ## implicitly returns nil let returnType = self.inferExprType(node.value) if returnType == nil: self.error("expression has no type") elif returnType.token.lexeme != self.currentFunction.returnType.token.lexeme: self.error(&"expected value of type '{self.currentFunction.returnType.token.lexeme}', got '{returnType.token.lexeme}'") self.expression(node.value) self.emitByte(OpCode.Return) proc yieldStmt(self: Compiler, node: YieldStmt) = ## Compiles yield statements self.expression(node.expression) self.emitByte(OpCode.Yield) proc raiseStmt(self: Compiler, node: RaiseStmt) = ## Compiles yield statements self.expression(node.exception) self.emitByte(OpCode.Raise) proc continueStmt(self: Compiler, node: ContinueStmt) = ## Compiles continue statements. A continue statements ## jumps to the next iteration in a loop if self.currentLoop.start <= 65535: self.emitByte(Jump) self.emitBytes(self.currentLoop.start.toDouble()) else: self.emitByte(LongJump) self.emitBytes(self.currentLoop.start.toTriple()) proc breakStmt(self: Compiler, node: BreakStmt) = ## Compiles break statements. A continue statement ## jumps to the next iteration in a loop # Emits dummy jump offset, this is # patched later discard self.emitJump(OpCode.Break) self.currentLoop.breakPos.add(self.chunk.code.high() - 4) if self.currentLoop.depth > self.scopeDepth: # Breaking out of a loop closes its scope self.endScope() proc patchBreaks(self: Compiler) = ## Patches "break" opcodes with ## actual jumps. This is needed ## because the size of code ## to skip is not known before ## the loop is fully compiled for brk in self.currentLoop.breakPos: self.chunk.code[brk] = JumpForwards.uint8() self.patchJump(brk) proc assertStmt(self: Compiler, node: AssertStmt) = ## Compiles assert statements (raise ## AssertionError if the expression is falsey) self.expression(node.expression) self.emitByte(OpCode.Assert) proc statement(self: Compiler, node: ASTNode) = ## Compiles all statements case node.kind: of exprStmt: var expression = ExprStmt(node).expression self.expression(expression) self.emitByte(Pop) # Expression statements discard their value. Their main use case is side effects in function calls of NodeKind.ifStmt: self.ifStmt(IfStmt(node)) of NodeKind.assertStmt: self.assertStmt(AssertStmt(node)) of NodeKind.raiseStmt: self.raiseStmt(RaiseStmt(node)) of NodeKind.breakStmt: self.breakStmt(BreakStmt(node)) of NodeKind.continueStmt: self.continueStmt(ContinueStmt(node)) of NodeKind.returnStmt: self.returnStmt(ReturnStmt(node)) of NodeKind.importStmt: discard of NodeKind.whileStmt, NodeKind.forStmt: ## Our parser already desugars for loops to ## while loops! let loop = self.currentLoop self.currentLoop = Loop(start: self.chunk.code.len(), depth: self.scopeDepth, breakPos: @[]) self.whileStmt(WhileStmt(node)) self.patchBreaks() self.currentLoop = loop of NodeKind.forEachStmt: discard of NodeKind.blockStmt: self.blockStmt(BlockStmt(node)) of NodeKind.yieldStmt: self.yieldStmt(YieldStmt(node)) of NodeKind.awaitStmt: self.awaitStmt(AwaitStmt(node)) of NodeKind.deferStmt: self.deferStmt(DeferStmt(node)) of NodeKind.tryStmt: discard else: self.expression(node) proc varDecl(self: Compiler, node: VarDecl) = ## Compiles variable declarations if self.inferDeclType(node) == nil: self.error(&"Cannot determine the type of '{node.name.token.lexeme}'") self.expression(node.value) self.declareName(node, IdentExpr(node.valueType)) proc funDecl(self: Compiler, node: FunDecl) = ## Compiles function declarations if self.inferDeclType(node) == nil: self.error(&"Cannot determine the return type of '{node.name.token.lexeme}'") # We store the current function var function = self.currentFunction self.currentFunction = node # A function's code is just compiled linearly # and then jumped over let jmp = self.emitJump(JumpForwards) self.declareName(node, IdentExpr(node.returnType)) # Since the deferred array is a linear # sequence of instructions and we want # to keep track to whose function's each # set of deferred instruction belongs, # we record the length of the deferred # array before compiling the function # and use this info later to compile # the try/finally block with the deferred # code var deferStart = self.deferred.len() self.blockStmt(BlockStmt(node.body)) # Yup, we're done. That was easy, huh? # But after all functions are just named # scopes, and we compile them just like that: # we declare their name and arguments (before # their body so recursion works) and then just # handle them as a block statement (which takes # care of incrementing self.scopeDepth so locals # are resolved properly). There's a need for a bit # of boilerplate code to make closures work, but # that's about it self.emitBytes(OpCode.Nil, OpCode.Return) # Currently defer is not functional so we # just pop the instructions for i in countup(deferStart, self.deferred.len(), 1): self.deferred.delete(i) self.patchJump(jmp) # This makes us compile nested functions correctly self.currentFunction = function proc declaration(self: Compiler, node: ASTNode) = ## Compiles all declarations case node.kind: of NodeKind.varDecl: self.varDecl(VarDecl(node)) of NodeKind.funDecl: self.funDecl(FunDecl(node)) else: self.statement(node) proc compile*(self: Compiler, ast: seq[ASTNode], file: string): Chunk = ## Compiles a sequence of AST nodes into a chunk ## object self.chunk = newChunk() self.ast = ast self.file = file self.names = @[] self.scopeDepth = 0 self.currentFunction = nil self.currentModule = self.file self.current = 0 while not self.done(): self.declaration(self.step()) if self.ast.len() > 0: # *Technically* an empty program is a valid program self.endScope() self.emitByte(OpCode.Return) # Exits the VM's main loop when used at the global scope result = self.chunk if self.ast.len() > 0 and self.scopeDepth != -1: self.error(&"invalid state: invalid scopeDepth value (expected -1, got {self.scopeDepth}), did you forget to call endScope/beginScope?")