diff --git a/src/backend/vm.nim b/src/backend/vm.nim index c971eeb..7d8c071 100644 --- a/src/backend/vm.nim +++ b/src/backend/vm.nim @@ -55,7 +55,7 @@ type cache: array[6, uint64] # Singletons cache frames: seq[uint64] # Stores the bottom of stack frames closures: seq[uint64] # Stores closure offsets - closedOver: seq[uint64] # Stores variables that do not have stack semantics + envs: seq[uint64] # Stores variables that do not have stack semantics results: seq[uint64] # Stores function's results (return values) gc: PeonGC ObjectKind* = enum @@ -213,7 +213,7 @@ proc markRoots(self: PeonGC): seq[ptr HeapObject] = for obj in self.vm.operands: if obj in self.pointers: live.incl(obj) - for obj in self.vm.closedOver: + for obj in self.vm.envs: if obj in self.pointers: live.incl(obj) # We preallocate the space on the seq @@ -342,7 +342,7 @@ proc newPeonVM*: PeonVM = result.calls = @[] result.operands = @[] result.results = @[] - result.closedOver = @[] + result.envs = @[] result.gc.vm = result @@ -383,66 +383,90 @@ proc `!>=`[T](a, b: T): auto {.inline, used.} = # that go through the getc/setc wrappers is frame-relative, # meaning that the index is added to the current stack frame's # bottom to obtain an absolute stack index -proc push(self: PeonVM, obj: uint64) = +proc push(self: PeonVM, obj: uint64) = ## Pushes a value object onto the ## operand stack self.operands.add(obj) -proc pop(self: PeonVM): uint64 = - ## Pops a value off the - ## operand stack and - ## returns it +proc pop(self: PeonVM): uint64 = + ## Pops a value off the operand + ## stack and returns it return self.operands.pop() -proc peekb(self: PeonVM, distance: BackwardsIndex = ^1): uint64 = - ## Returns the value at the - ## given (backwards) distance from the top of - ## the operand stack without consuming it +proc peekb(self: PeonVM, distance: BackwardsIndex = ^1): uint64 = + ## Returns the value at the given (backwards) + ## distance from the top of the operand stack + ## without consuming it return self.operands[distance] -proc peek(self: PeonVM, distance: int = 0): uint64 = - ## Returns the value at the - ## given distance from the top of - ## the operand stack without consuming it +proc peek(self: PeonVM, distance: int = 0): uint64 = + ## Returns the value at the given + ## distance from the top of the + ## operand stack without consuming it if distance < 0: return self.peekb(^(-int(distance))) return self.operands[self.operands.high() + distance] -proc pushc(self: PeonVM, val: uint64) = - ## Pushes a value to the +proc pushc(self: PeonVM, val: uint64) = + ## Pushes a value onto the ## call stack self.calls.add(val) -proc popc(self: PeonVM): uint64 = +proc popc(self: PeonVM): uint64 = ## Pops a value off the call ## stack and returns it return self.calls.pop() proc peekc(self: PeonVM, distance: int = 0): uint64 {.used.} = - ## Returns the value at the - ## given distance from the top of - ## the call stack without consuming it + ## Returns the value at the given + ## distance from the top of the + ## call stack without consuming it return self.calls[self.calls.high() + distance] proc getc(self: PeonVM, idx: int): uint64 = - ## Accessor method that abstracts - ## indexing our call stack through stack - ## frames + ## Getter method that abstracts + ## indexing our call stack through + ## stack frames return self.calls[idx.uint64 + self.frames[^1]] -proc setc(self: PeonVM, idx: uint, val: uint64) = +proc setc(self: PeonVM, idx: int, val: uint64) = ## Setter method that abstracts - ## indexing our call stack through stack - ## frames - self.calls[idx + self.frames[^1]] = val + ## indexing our call stack through + ## stack frames + self.calls[idx.uint + self.frames[^1]] = val + + +proc getClosure(self: PeonVM, idx: int): uint64 = + ## Getter method that abstracts + ## indexing closure environments + return self.envs[idx.uint + self.closures[^1]] + + +proc setClosure(self: PeonVM, idx: int, val: uint64) = + ## Setter method that abstracts + ## indexing closure environments + if idx == self.envs.len(): + self.envs.add(val) + else: + self.envs[idx.uint + self.closures[^1]] = val + + +proc popClosure(self: PeonVM, idx: int): uint64 = + ## Pop method that abstracts + ## popping values off closure + ## environments + var idx = idx.uint + self.closures[^1] + result = self.envs[idx] + self.envs.delete(idx) + # Byte-level primitives to read and decode # bytecode @@ -613,22 +637,29 @@ when debugVM: # So nim shuts up styledEcho fgMagenta, "]" if self.frames.len() !> 0: stdout.styledWrite(fgCyan, "Current Frame: ", fgMagenta, "[") - for i, e in self.calls[self.frames[^1]..self.calls.high()]: + for i, e in self.calls[self.frames[^1]..^1]: stdout.styledWrite(fgYellow, $e) - if i < self.calls.high(): + if i < (self.calls.high() - self.frames[^1].int): stdout.styledWrite(fgYellow, ", ") - styledEcho fgMagenta, "]" + styledEcho fgMagenta, "]", fgCyan stdout.styledWrite(fgRed, "Live stack frames: ", fgMagenta, "[") for i, e in self.frames: stdout.styledWrite(fgYellow, $e) if i < self.frames.high(): stdout.styledWrite(fgYellow, ", ") styledEcho fgMagenta, "]" - if self.closedOver.len() !> 0: - stdout.styledWrite(fgGreen, "Closure Array: ", fgMagenta, "[") - for i, e in self.closedOver: + if self.envs.len() !> 0: + stdout.styledWrite(fgGreen, "Environments: ", fgMagenta, "[") + for i, e in self.envs: stdout.styledWrite(fgYellow, $e) - if i < self.closedOver.high(): + if i < self.envs.high(): + stdout.styledWrite(fgYellow, ", ") + styledEcho fgMagenta, "]" + if self.closures.len() !> 0: + stdout.styledWrite(fgGreen, "Environment offsets: ", fgMagenta, "[") + for i, e in self.closures: + stdout.styledWrite(fgYellow, $e) + if i < self.closures.high(): stdout.styledWrite(fgYellow, ", ") styledEcho fgMagenta, "]" if self.results.len() !> 0: @@ -695,13 +726,6 @@ proc dispatch*(self: PeonVM) = self.push(cast[uint64](self.constReadFloat32(int(self.readLong())))) of LoadFloat64: self.push(cast[uint64](self.constReadFloat64(int(self.readLong())))) - of LoadFunction: - # Loads a function address onto the operand stack - self.push(uint64(self.readLong())) - of LoadReturnAddress: - # Loads a 32-bit unsigned integer onto the operand stack. - # Used to load function return addresses - self.push(uint64(self.readUInt())) of Call: # Calls a peon function. The calling convention here # is pretty simple: the first value in the frame is @@ -722,7 +746,32 @@ proc dispatch*(self: PeonVM) = self.results.add(self.getNil()) # Creates a new call frame self.frames.add(uint64(self.calls.len() - 2)) - self.closures.add(self.closedOver.len().uint64) + # Loads the arguments onto the stack + for _ in 0.. self.closedOver.high(): - # Note: we *peek* the stack, but we - # don't pop! - self.closedOver.add(self.peek()) - else: - self.closedOver[idx] = self.peek() of LoadClosure: # Loads a closed-over variable onto the # stack - self.push(self.closedOver[self.readLong() + self.closures[^1] - 1]) + self.push(self.getClosure(self.readLong().int)) of PopClosure: - self.closedOver.delete(self.readLong()) - of LiftArgument: - # Lifts a function argument onto the stack - self.closedOver.add(self.getc(self.readLong().int)) + discard self.popClosure(self.readLong().int) + of StoreClosure: + # Stores/updates the value of a closed-over + # variable + let item = self.getc(self.readLong().int) + self.setClosure(self.readLong().int, item) of LoadVar: # Pushes a variable onto the operand # stack diff --git a/src/frontend/compiler.nim b/src/frontend/compiler.nim index 2a75338..6d8f39b 100644 --- a/src/frontend/compiler.nim +++ b/src/frontend/compiler.nim @@ -63,8 +63,8 @@ type builtinOp: string fun: FunDecl isClosure: bool - closureBounds: tuple[start, stop: int] - childFunc: Type + envLen: int + children: seq[Type] of Reference, Pointer: value: Type of Generic: @@ -97,17 +97,16 @@ type # The name's type valueType: Type # For functions, this marks where the function's - # code begins. For variables, this stores where - # their StoreVar/StoreHeap instruction was emitted + # code begins codePos: int - # Is the name closed over (i.e. used in a closure)? - isClosedOver: bool # The function that owns this variable (may be nil!) belongsTo: Name # Is this a function argument? isFunctionArgument: bool # Where is this node declared in the file? line: int + # Has this name been closed over? + isClosedOver: bool # Is this a function declaration or a variable # with a function as value? (The distinction *is* # important! Check emitFunction()) @@ -176,9 +175,7 @@ type # be empty) deferred: seq[uint8] # List of closed-over variables - closedOver: seq[tuple[name: Name, count: int]] - # Keeps track of stack frames - frames: seq[int] + closedOver: seq[Name] # Compiler procedures called by pragmas compilerProcs: TableRef[string, proc (self: Compiler, pragma: Pragma, node: ASTNode)] # Stores line data @@ -187,8 +184,10 @@ type source: string # Currently imported modules modules: HashSet[string] - # TODO - scopes: seq[Type] + # Stores the position of all jumps + jumps: seq[tuple[patched: bool, offset: int]] + # List of CFI start offsets into our CFI data + cfiOffsets: seq[tuple[value, offset: int, fn: Name]] CompileError* = ref object of PeonException compiler*: Compiler node*: ASTNode @@ -196,7 +195,7 @@ type module*: string -## Forward declarations +# Forward declarations proc compile*(self: Compiler, ast: seq[Declaration], file: string, lines: seq[tuple[start, stop: int]], source: string, chunk: Chunk = nil, terminateScope: bool = true, incremental: bool = false): Chunk proc expression(self: Compiler, node: Expression) @@ -215,7 +214,8 @@ proc handleMagicPragma(self: Compiler, pragma: Pragma, node: ASTnode) proc handlePurePragma(self: Compiler, pragma: Pragma, node: ASTnode) proc dispatchPragmas(self: Compiler, node: ASTnode) proc funDecl(self: Compiler, node: FunDecl, fn: Name = nil, args: seq[Expression] = @[]) -## End of forward declarations +proc compileModule(self: Compiler, filename: string) +# End of forward declarations proc newCompiler*(enableOptimizations: bool = true, replMode: bool = false): Compiler = @@ -226,8 +226,8 @@ proc newCompiler*(enableOptimizations: bool = true, replMode: bool = false): Com result.file = "" result.names = @[] result.scopeDepth = 0 - result.frames = @[0] result.lines = @[] + result.jumps = @[] result.currentFunction = nil result.enableOptimizations = enableOptimizations result.replMode = replMode @@ -237,13 +237,10 @@ proc newCompiler*(enableOptimizations: bool = true, replMode: bool = false): Com result.compilerProcs["pure"] = handlePurePragma result.source = "" -proc compileModule(self: Compiler, filename: string) - - -## Public getter for nicer error formatting +## Public getters for nicer error formatting proc getCurrentNode*(self: Compiler): ASTNode = (if self.current >= self.ast.len(): self.ast[^1] else: self.ast[self.current - 1]) -proc getCurrentFunction*(self: Compiler): Declaration {.inline.} = (if self.currentFunction.valueType.isNil(): nil else: self.currentFunction.valueType.fun) +proc getCurrentFunction*(self: Compiler): Declaration {.inline.} = (if self.currentFunction.isNil(): nil else: self.currentFunction.valueType.fun) proc getFile*(self: Compiler): string {.inline.} = self.file proc getModule*(self: Compiler): string {.inline.} = self.currentModule proc getLines*(self: Compiler): seq[tuple[start, stop: int]] = self.lines @@ -252,6 +249,9 @@ proc getRelPos*(self: Compiler, line: int): tuple[start, stop: int] = self.lines ## Utility functions +proc `$`*(self: Name): string = $self[] + + proc peek(self: Compiler, distance: int = 0): ASTNode = ## Peeks at the AST node at the given distance. ## If the distance is out of bounds, the last @@ -283,18 +283,18 @@ proc step(self: Compiler): ASTNode {.inline.} = self.current += 1 -proc emitByte(self: Compiler, byt: OpCode | uint8) {.inline.} = +proc emitByte(self: Compiler, byt: OpCode | uint8, line: int) {.inline.} = ## Emits a single byte, writing it to ## the current chunk being compiled - self.chunk.write(uint8 byt, self.peek().token.line) + self.chunk.write(uint8 byt, line) -proc emitBytes(self: Compiler, bytarr: openarray[OpCode | uint8]) {.inline.} = +proc emitBytes(self: Compiler, bytarr: openarray[OpCode | uint8], line: int) {.inline.} = ## Handy helper method to write arbitrary bytes into ## the current chunk, calling emitByte on each of its ## elements for b in bytarr: - self.emitByte(b) + self.emitByte(b, line) proc makeConstant(self: Compiler, val: Expression, typ: Type): array[3, uint8] = @@ -339,56 +339,112 @@ proc emitConstant(self: Compiler, obj: Expression, kind: Type) = ## with its operand case kind.kind: of Int64: - self.emitByte(LoadInt64) + self.emitByte(LoadInt64, obj.token.line) of UInt64: - self.emitByte(LoadUInt64) + self.emitByte(LoadUInt64, obj.token.line) of Int32: - self.emitByte(LoadInt32) + self.emitByte(LoadInt32, obj.token.line) of UInt32: - self.emitByte(LoadUInt32) + self.emitByte(LoadUInt32, obj.token.line) of Int16: - self.emitByte(LoadInt16) + self.emitByte(LoadInt16, obj.token.line) of UInt16: - self.emitByte(LoadUInt16) + self.emitByte(LoadUInt16, obj.token.line) of Int8: - self.emitByte(LoadInt8) + self.emitByte(LoadInt8, obj.token.line) of UInt8: - self.emitByte(LoadUInt8) + self.emitByte(LoadUInt8, obj.token.line) of String: - self.emitByte(LoadString) + self.emitByte(LoadString, obj.token.line) let str = LiteralExpr(obj).literal.lexeme if str.len() >= 16777216: self.error("string constants cannot be larger than 16777215 bytes") - self.emitBytes((str.len() - 2).toTriple()) + self.emitBytes((str.len() - 2).toTriple(), obj.token.line) of Float32: - self.emitByte(LoadFloat32) + self.emitByte(LoadFloat32, obj.token.line) of Float64: - self.emitByte(LoadFloat64) + self.emitByte(LoadFloat64, obj.token.line) else: discard # TODO - self.emitBytes(self.makeConstant(obj, kind)) + self.emitBytes(self.makeConstant(obj, kind), obj.token.line) -proc emitJump(self: Compiler, opcode: OpCode): int = - ## Emits a dummy jump offset to be patched later - ## and returns the absolute index into the chunk's - ## bytecode array where the given placeholder - ## instruction was written - self.emitByte(opcode) - self.emitBytes(0.toTriple()) - result = self.chunk.code.len() - 4 +proc setJump(self: Compiler, offset: int, jmp: array[3, uint8]) = + ## Sets a jump at the given + ## offset to the given value + self.chunk.code[offset + 1] = jmp[0] + self.chunk.code[offset + 2] = jmp[1] + self.chunk.code[offset + 3] = jmp[2] + + +proc setJump(self: Compiler, offset: int, jmp: seq[uint8]) = + ## Sets a jump at the given + ## offset to the given value + self.chunk.code[offset + 1] = jmp[0] + self.chunk.code[offset + 2] = jmp[1] + self.chunk.code[offset + 3] = jmp[2] proc patchJump(self: Compiler, offset: int) = ## Patches a previously emitted relative ## jump using emitJump - var jump: int = self.chunk.code.len() - offset + var jump: int = self.chunk.code.len() - self.jumps[offset].offset + if jump < 0: + self.error("invalid jump size (< 0), did the bytecode size change without fixJumps being called?") if jump > 16777215: self.error("cannot jump more than 16777215 instructions") - let offsetArray = (jump - 4).toTriple() - self.chunk.code[offset + 1] = offsetArray[0] - self.chunk.code[offset + 2] = offsetArray[1] - self.chunk.code[offset + 3] = offsetArray[2] + self.setJump(self.jumps[offset].offset, (jump - 4).toTriple()) + self.jumps[offset].patched = true + + +proc emitJump(self: Compiler, opcode: OpCode, line: int): int = + ## Emits a dummy jump offset to be patched later + ## and returns a unique identifier for that jump + ## to be passed to patchJump + self.emitByte(opcode, line) + self.jumps.add((patched: false, offset: self.chunk.code.high())) + self.emitBytes(0.toTriple(), line) + result = self.jumps.high() + + +proc fixCFIOffsets(self: Compiler, oldLen: int, modifiedAt: int) = + ## Fixes CFI offsets after the size of our + ## bytecode has changed + if oldLen == self.chunk.code.len(): + return + let offset = self.chunk.code.len() - oldLen + var newCFI: array[3, uint8] + var tmp: int + for cfi in self.cfiOffsets: + if cfi.offset >= modifiedAt: + newCFI = (cfi.value + offset).toTriple() + self.chunk.cfi[cfi.offset] = newCFI[0] + self.chunk.cfi[cfi.offset + 1] = newCFI[1] + self.chunk.cfi[cfi.offset + 2] = newCFI[2] + tmp = [self.chunk.cfi[cfi.offset + 3], self.chunk.cfi[cfi.offset + 4], self.chunk.cfi[cfi.offset + 5]].fromTriple().int + newCFI = (tmp + offset).toTriple() + self.chunk.cfi[cfi.offset + 3] = newCFI[0] + self.chunk.cfi[cfi.offset + 4] = newCFI[1] + self.chunk.cfi[cfi.offset + 5] = newCFI[2] + cfi.fn.codePos += offset + + +proc fixJumps(self: Compiler, oldLen: int, modifiedAt: int) = + ## Fixes jump offsets after the size + ## of our bytecode has changed + if oldLen == self.chunk.code.len(): + return + let offset = self.chunk.code.len() - oldLen + for jump in self.jumps.mitems(): + if jump.offset >= modifiedAt: + # While all already-patched jumps need + # to have their jump offsets fixed, we + # also need to update our internal jumps + # list in cases where we shifted the jump + # instruction itself into the code! + jump.offset += offset + if jump.patched: + self.setJump(jump.offset, self.chunk.code[jump.offset..= 0: + if name == self.closedOver[i]: + return i + dec(result) + return -1 proc resolve(self: Compiler, name: string, @@ -457,41 +511,6 @@ proc resolve(self: Compiler, name: string, return nil -proc detectClosureVariable(self: Compiler, name: var Name, depth: int = self.scopeDepth) = - ## Detects if the given name is used in a local scope deeper - ## than the given one and modifies the code emitted for it - ## to store it as a closure variable if it is. Does nothing if the name - ## hasn't been declared yet or is unreachable (for example if it's - ## declared as private in another module). This function must be called - ## each time a name is referenced in order for closed-over variables - ## to be emitted properly, otherwise the runtime may behave - ## unpredictably or crash - if name.isNil() or name.depth == 0 or name.isClosedOver: - return - elif name.depth < depth and self.scopes[name.depth - 1] != self.scopes[depth - 1]: - # Ding! The given name is closed over in another function: - # we need to change the Jump instruction that self.declareName - # put in place for us into a StoreClosure. We also update - # the name's isClosedOver field so that self.identifier() - # can emit a LoadClosure instruction instead of a LoadVar - # once this name is referenced in the future - self.closedOver.add((name, 0)) - name.isClosedOver = true - if not self.currentFunction.valueType.isClosure: - self.currentFunction.valueType.isClosure = true - self.currentFunction.valueType.closureBounds.start = self.closedOver.high() - self.currentFunction.valueType.closureBounds.stop = self.closedOver.high() - if self.closedOver.len() >= 16777216: - self.error("too many consecutive closed-over variables (max is 16777215)") - if not name.isFunctionArgument: - self.chunk.code[name.codePos] = StoreClosure.uint8() - for i, b in self.closedOver.high().toTriple(): - self.chunk.code[name.codePos + i + 1] = b - else: - self.chunk.code[name.codePos] = LiftArgument.uint8() - for i, b in self.getStackPos(name).toTriple(): - self.chunk.code[name.codePos + i + 1] = b - proc compareTypes(self: Compiler, a, b: Type): bool = ## Compares two type objects @@ -772,15 +791,14 @@ proc findByType(self: Compiler, name: string, kind: Type, depth: int = -1): seq[ if self.compareTypes(obj.valueType, kind) and depth == -1 or depth == obj.depth: result.add(obj) -#[ -proc findAtDepth(self: Compiler, name: string, depth: int): seq[Name] = + +proc findAtDepth(self: Compiler, name: string, depth: int): seq[Name] {.used.} = ## Looks for objects that have been already declared ## with the given name at the given scope depth. ## Returns all objects that apply for obj in self.findByName(name): if obj.depth == depth: result.add(obj) -]# proc matchImpl(self: Compiler, name: string, kind: Type): Name = @@ -834,41 +852,475 @@ proc check(self: Compiler, term: Expression, kind: Type) = self.error(&"expecting value of type '{self.typeToStr(kind)}', got '{self.typeToStr(k)}' instead", term) +proc handleBuiltinFunction(self: Compiler, fn: Name, args: seq[Expression]) = + ## Emits instructions for builtin functions + ## such as addition or subtraction + if fn.valueType.builtinOp notin ["LogicalOr", "LogicalAnd"]: + if len(args) == 2: + self.expression(args[1]) + self.expression(args[0]) + elif len(args) == 1: + self.expression(args[0]) + const codes: Table[string, OpCode] = {"Negate": Negate, + "NegateFloat32": NegateFloat32, + "NegateFloat64": NegateFloat64, + "Add": Add, + "Subtract": Subtract, + "Divide": Divide, + "Multiply": Multiply, + "SignedDivide": SignedDivide, + "AddFloat64": AddFloat64, + "SubtractFloat64": SubtractFloat64, + "DivideFloat64": DivideFloat64, + "MultiplyFloat64": MultiplyFloat64, + "AddFloat32": AddFloat32, + "SubtractFloat32": SubtractFloat32, + "DivideFloat32": DivideFloat32, + "MultiplyFloat32": MultiplyFloat32, + "Pow": Pow, + "SignedPow": SignedPow, + "PowFloat32": PowFloat32, + "PowFloat64": PowFloat64, + "Mod": Mod, + "SignedMod": SignedMod, + "ModFloat32": ModFloat32, + "ModFloat64": ModFloat64, + "Or": Or, + "And": And, + "Xor": Xor, + "Not": Not, + "LShift": LShift, + "RShift": RShift, + "Equal": Equal, + "NotEqual": NotEqual, + "LessThan": LessThan, + "GreaterThan": GreaterThan, + "LessOrEqual": LessOrEqual, + "GreaterOrEqual": GreaterOrEqual, + "PrintInt64": PrintInt64, + "PrintUInt64": PrintUInt64, + "PrintInt32": PrintInt32, + "PrintUInt32": PrintUInt32, + "PrintInt16": PrintInt16, + "PrintUInt16": PrintUInt16, + "PrintInt8": PrintInt8, + "PrintUInt8": PrintUInt8, + "PrintFloat64": PrintFloat64, + "PrintFloat32": PrintFloat32, + "PrintHex": PrintHex, + "PrintBool": PrintBool, + "PrintNan": PrintNan, + "PrintInf": PrintInf, + "PrintString": PrintString, + "SysClock64": SysClock64 + }.to_table() + if fn.valueType.builtinOp in codes: + self.emitByte(codes[fn.valueType.builtinOp], fn.line) + return + # Some builtin operations are slightly more complex + # so we handle them separately + case fn.valueType.builtinOp: + of "LogicalOr": + self.expression(args[0]) + let jump = self.emitJump(JumpIfTrue, fn.line) + self.expression(args[1]) + self.patchJump(jump) + of "LogicalAnd": + self.expression(args[0]) + var jump = self.emitJump(JumpIfFalseOrPop, fn.line) + self.expression(args[1]) + self.patchJump(jump) + else: + self.error(&"unknown built-in: '{fn.valueType.builtinOp}'", fn.valueType.fun) + proc emitFunction(self: Compiler, name: Name) = ## Wrapper to emit LoadFunction instructions if name.isFunDecl: - self.emitByte(LoadFunction) - self.emitBytes(name.codePos.toTriple()) + self.emitByte(LoadInt64, name.line) + self.emitBytes(self.chunk.writeConstant(name.codePos.toLong()), name.line) # If we're not loading a statically declared # function, then it must be a function object # created by previous LoadFunction instructions # that is now bound to some variable, so we just # load it - elif not name.isClosedOver: - self.emitByte(LoadVar) - self.emitBytes(self.getStackPos(name).toTriple()) + elif self.scopeDepth > 0 and name.depth != self.scopeDepth: + self.emitByte(LoadVar, name.line) + self.emitBytes(self.getStackPos(name).toTriple(), name.line) else: - self.emitByte(LoadClosure) - self.emitBytes(self.getClosurePos(name).toTriple()) + self.emitByte(LoadClosure, name.line) + self.emitBytes(self.getClosurePos(name).toTriple(), name.line) + + +proc generateCall(self: Compiler, fn: Name, args: seq[Expression], onStack: bool = false) = + ## Small wrapper that abstracts emitting a call instruction + ## for a given function + if fn.valueType.isBuiltinFunction: + # Builtins map to individual instructions + # (usually 1, but some use more) so we handle + # them differently + self.handleBuiltinFunction(fn, args) + return + if not onStack: + # If we're not calling a function + # whose instruction pointer's is + # already on the stack, we emit it + self.emitFunction(fn) + # We initially emit a dummy return + # address. It is patched later + self.emitByte(LoadUInt32, fn.line) + self.emitBytes(self.chunk.writeConstant(0.toQuad()), fn.line) + let pos = self.chunk.consts.len() - 4 + for i, argument in reversed(args): + # We pass the arguments in reverse + # because of how stacks work. They'll + # be reversed again at runtime + if onStack: + self.check(argument, fn.valueType.args[^i].kind) + self.expression(argument) + # Creates a new call frame and jumps + # to the function's first instruction + # in the code + if not fn.valueType.isClosure: + self.emitByte(Call, fn.line) + else: + self.emitByte(CallClosure, fn.line) + self.emitBytes(fn.valueType.args.len().toTriple(), fn.line) + if fn.valueType.isClosure: + self.emitBytes(fn.valueType.envLen.toTriple(), fn.line) + self.patchReturnAddress(pos) + + +proc checkCallIsPure(self: Compiler, node: ASTnode): bool = + ## Checks if a call has any side effects. Returns + ## true if it doesn't and false otherwise + return true # TODO + + +proc beginScope(self: Compiler) = + ## Begins a new local scope by incrementing the current + ## scope's depth + inc(self.scopeDepth) + + +proc `$`(self: Type): string = $self[] + + +proc flattenImpl(self: Type, to: var seq[Type]) = + to.add(self) + for child in self.children: + flattenImpl(child, to) + + +proc flatten(self: Type): seq[Type] = flattenImpl(self, result) + + + +proc endScope(self: Compiler) = + ## Ends the current local scope + if self.scopeDepth < 0: + self.error("cannot call endScope with scopeDepth < 0 (This is an internal error and most likely a bug)") + dec(self.scopeDepth) + var names: seq[Name] = @[] + var popCount = 0 + for name in self.names: + if name.depth > self.scopeDepth: + names.add(name) + if name.valueType.kind notin {Generic, CustomType} and not name.isFunDecl: + # We don't increase the pop count for these kinds of objects + # because they're not stored the same way as regular variables + inc(popCount) + if name.isFunDecl and not name.valueType.isClosure and name.valueType.children.len() > 0 and name.depth == 0: + # If a function at the top level contains any closures, + # when it goes out of scope all of the environments that + # belong to its inner functions also go out of + # scope + var i = 0 + let f = name.valueType + for fn in flatten(f): + if fn.isClosure: + for y in 0..f.envLen: + self.closedOver.delete(y + i) + self.emitByte(PopClosure, self.peek().token.line) + self.emitBytes((y + i).toTriple(), self.peek().token.line) + inc(i) + if popCount > 1: + # If we're popping less than 65535 variables, then + # we can emit a PopN instruction. This is true for + # 99.99999% of the use cases of the language (who the + # hell is going to use 65 THOUSAND variables?), but + # if you'll ever use more then Peon will emit a PopN instruction + # for the first 65 thousand and change local variables and then + # emit another batch of plain ol' Pop instructions for the rest + self.emitByte(PopN, self.peek().token.line) + self.emitBytes(popCount.toDouble(), self.peek().token.line) + if popCount > uint16.high().int(): + for i in countdown(self.names.high(), popCount - uint16.high().int()): + if self.names[i].depth > self.scopeDepth: + self.emitByte(PopC, self.peek().token.line) + elif popCount == 1: + # We only emit PopN if we're popping more than one value + self.emitByte(PopC, self.peek().token.line) + # This seems *really* slow, but + # what else should I do? Nim doesn't + # allow the removal of items during + # seq iteration so ¯\_(ツ)_/¯ + var idx = 0 + while idx < self.names.len(): + for name in names: + if self.names[idx] == name: + self.names.delete(idx) + inc(idx) + + +proc declareName(self: Compiler, node: Declaration, mutable: bool = false) = + ## Statically declares a name into the current scope. + ## "Declaring" a name only means updating our internal + ## list of identifiers so that further calls to resolve() + ## correctly return them. There is no code to actually + ## declare a variable at runtime: the value is already + ## on the stack + case node.kind: + of NodeKind.varDecl: + var node = VarDecl(node) + # Creates a new Name entry so that self.identifier emits the proper stack offset + if self.names.high() > 16777215: + # If someone ever hits this limit in real-world scenarios, I swear I'll + # slap myself 100 times with a sign saying "I'm dumb". Mark my words + self.error("cannot declare more than 16777215 variables at a time") + for name in self.findByName(node.name.token.lexeme): + if name.depth == self.scopeDepth and not name.isFunctionArgument: + # Trying to redeclare a variable in the same scope/context is an error, but it's okay + # if it's a function argument (for example, if you want to copy a number to + # mutate it) + self.error(&"attempt to redeclare '{node.name.token.lexeme}', which was previously defined in '{name.owner}' at line {name.line}") + self.names.add(Name(depth: self.scopeDepth, + name: node.name, + isPrivate: node.isPrivate, + owner: self.currentModule, + isConst: node.isConst, + valueType: self.inferType(node.value), + codePos: self.chunk.code.len(), + isLet: node.isLet, + line: node.token.line, + belongsTo: self.currentFunction + )) + if mutable: + self.names[^1].valueType.mutable = true + of NodeKind.funDecl: + var node = FunDecl(node) + # We declare the generics before the function so we + # can refer to them later + for gen in node.generics: + self.names.add(Name(depth: self.scopeDepth + 1, + isPrivate: true, + isConst: false, + owner: self.currentModule, + line: node.token.line, + valueType: Type(kind: Generic, mutable: false, node: gen.name), + name: gen.name)) + self.names.add(Name(depth: self.scopeDepth, + isPrivate: node.isPrivate, + isConst: false, + owner: self.currentModule, + valueType: Type(kind: Function, + name: node.name.token.lexeme, + returnType: self.inferType(node.returnType), + args: @[], + fun: node, + children: @[]), + codePos: self.chunk.code.len(), + name: node.name, + isLet: false, + line: node.token.line, + isFunDecl: true, + )) + let fn = self.names[^1] + var name: Name + for argument in node.arguments: + if self.names.high() > 16777215: + self.error("cannot declare more than 16777215 variables at a time") + # wait, no LoadVar? Yes! That's because when calling functions, + # arguments will already be on the stack, so there's no need to + # load them here + name = Name(depth: self.scopeDepth + 1, + isPrivate: true, + owner: self.currentModule, + isConst: false, + name: argument.name, + valueType: nil, + codePos: 0, + isLet: false, + line: argument.name.token.line, + isFunctionArgument: true, + belongsTo: fn + ) + self.names.add(name) + name.valueType = self.inferType(argument.valueType) + # If it's still nil, it's an error! + if name.valueType.isNil(): + self.error(&"cannot determine the type of argument '{argument.name.token.lexeme}'", argument.name) + fn.valueType.args.add((argument.name.token.lexeme, name.valueType)) + else: + discard # TODO: Types, enums + + +proc emitLoop(self: Compiler, begin: int, line: int) = + ## Emits a JumpBackwards instruction with the correct + ## jump offset + let offset = self.chunk.code.high() - begin + 4 + if offset > 16777215: + self.error("cannot jump more than 16777215 bytecode instructions") + self.emitByte(JumpBackwards, line) + self.emitBytes(offset.toTriple(), line) + + +proc patchBreaks(self: Compiler) = + ## Patches the jumps emitted by + ## breakStmt. This is needed + ## because the size of code + ## to skip is not known before + ## the loop is fully compiled + for brk in self.currentLoop.breakPos: + self.patchJump(brk) + + +proc handleMagicPragma(self: Compiler, pragma: Pragma, node: ASTNode) = + ## Handles the "magic" pragma. Assumes the given name is already + ## declared + if pragma.args.len() != 1: + self.error("'magic' pragma: wrong number of arguments") + elif pragma.args[0].kind != strExpr: + self.error("'magic' pragma: wrong type of argument (constant string expected)") + elif node.kind != NodeKind.funDecl: + self.error("'magic' pragma is not valid in this context") + var node = FunDecl(node) + var fn = self.resolve(node.name) + fn.valueType.isBuiltinFunction = true + fn.valueType.builtinOp = pragma.args[0].token.lexeme[1..^2] + # The magic pragma ignores the function's body + node.body = nil + + +proc handlePurePragma(self: Compiler, pragma: Pragma, node: ASTNode) = + ## Handles the "pure" pragma + case node.kind: + of NodeKind.funDecl: + FunDecl(node).isPure = true + of lambdaExpr: + LambdaExpr(node).isPure = true + else: + self.error("'pure' pragma is not valid in this context") + + +proc dispatchPragmas(self: Compiler, node: ASTnode) = + ## Dispatches pragmas bound to objects + var pragmas: seq[Pragma] = @[] + case node.kind: + of NodeKind.funDecl, NodeKind.typeDecl, NodeKind.varDecl: + pragmas = Declaration(node).pragmas + of lambdaExpr: + pragmas = LambdaExpr(node).pragmas + else: + discard # Unreachable + for pragma in pragmas: + if pragma.name.token.lexeme notin self.compilerProcs: + self.error(&"unknown pragma '{pragma.name.token.lexeme}'") + self.compilerProcs[pragma.name.token.lexeme](self, pragma, node) + + +proc fixGenericFunc(self: Compiler, name: Name, args: seq[Expression]): Name = + ## Specializes generic arguments in functions + var fn = name.deepCopy() + result = fn + var typ: Type + for i in 0..args.high(): + if fn.valueType.args[i].kind.kind == Generic: + typ = self.inferType(args[i]) + fn.valueType.args[i].kind = typ + self.resolve(fn.valueType.args[i].name).valueType = typ + if fn.valueType.args[i].kind.isNil(): + self.error(&"cannot specialize generic function: argument {i + 1} has no type") + + +proc patchReturnAddress(self: Compiler, pos: int) = + ## Patches the return address of a function + ## call + let address = self.chunk.code.len().toQuad() + self.chunk.consts[pos] = address[0] + self.chunk.consts[pos + 1] = address[1] + self.chunk.consts[pos + 2] = address[2] + self.chunk.consts[pos + 3] = address[3] + + +proc terminateProgram(self: Compiler, pos: int, terminateScope: bool = true) = + ## Utility to terminate a peon program + if terminateScope: + self.endScope() + self.patchReturnAddress(pos + 3) + self.emitByte(OpCode.Return, self.peek().token.line) + self.emitByte(0, self.peek().token.line) # Entry point has no return value (TODO: Add easter eggs, cuz why not) + + +proc beginProgram(self: Compiler, incremental: bool = false): int = + ## Utility to begin a peon program + ## compiled. Returns the position of + ## a dummy return address of the program's + ## entry point to be patched by terminateProgram + + # Every peon program has a hidden entry point in + # which user code is wrapped. Think of it as if + # peon is implicitly writing the main() function + # of your program and putting all of your code in + # there. While we call our entry point just like + # any regular peon function, we can't use our handy + # helper generateCall() because we need to keep track + # of where our program ends (which we don't know yet). + # To fix this, we emit dummy offsets and patch them + # later, once we know the boundaries of our hidden main() + var main: Name + if incremental: + main = self.names[0] + else: + main = Name(depth: 0, + isPrivate: true, + isConst: false, + isLet: false, + owner: self.currentModule, + valueType: Type(kind: Function, + name: "", + returnType: nil, + args: @[], + ), + codePos: 12, # Jump address is hardcoded + name: newIdentExpr(Token(lexeme: "", kind: Identifier)), + isFunDecl: true, + line: -1) + self.names.add(main) + self.emitByte(LoadInt64, 1) + self.emitBytes(self.chunk.writeConstant(main.codePos.toLong()), 1) + self.emitByte(LoadUInt32, 1) + self.emitBytes(self.chunk.writeConstant(0.toQuad()), 1) + self.emitByte(Call, 1) + self.emitBytes(0.toTriple(), 1) + result = 5 ## End of utility functions - proc literal(self: Compiler, node: ASTNode) = ## Emits instructions for literals such ## as singletons, strings and numbers case node.kind: of trueExpr: - self.emitByte(LoadTrue) + self.emitByte(LoadTrue, node.token.line) of falseExpr: - self.emitByte(LoadFalse) + self.emitByte(LoadFalse, node.token.line) of nilExpr: - self.emitByte(LoadNil) + self.emitByte(LoadNil, node.token.line) of infExpr: - self.emitByte(LoadInf) + self.emitByte(LoadInf, node.token.line) of nanExpr: - self.emitByte(LoadNan) + self.emitByte(LoadNan, node.token.line) of strExpr: self.emitConstant(LiteralExpr(node), Type(kind: String)) of intExpr: @@ -937,129 +1389,11 @@ proc literal(self: Compiler, node: ASTNode) = of awaitExpr: var y = AwaitExpr(node) self.expression(y.expression) - self.emitByte(OpCode.Await) + self.emitByte(OpCode.Await, node.token.line) else: self.error(&"invalid AST node of kind {node.kind} at literal(): {node} (This is an internal error and most likely a bug!)") - -proc handleBuiltinFunction(self: Compiler, fn: Name, args: seq[Expression]) = - ## Emits instructions for builtin functions - ## such as addition or subtraction - if fn.valueType.builtinOp notin ["LogicalOr", "LogicalAnd"]: - if len(args) == 2: - self.expression(args[1]) - self.expression(args[0]) - elif len(args) == 1: - self.expression(args[0]) - const codes: Table[string, OpCode] = {"Negate": Negate, - "NegateFloat32": NegateFloat32, - "NegateFloat64": NegateFloat64, - "Add": Add, - "Subtract": Subtract, - "Divide": Divide, - "Multiply": Multiply, - "SignedDivide": SignedDivide, - "AddFloat64": AddFloat64, - "SubtractFloat64": SubtractFloat64, - "DivideFloat64": DivideFloat64, - "MultiplyFloat64": MultiplyFloat64, - "AddFloat32": AddFloat32, - "SubtractFloat32": SubtractFloat32, - "DivideFloat32": DivideFloat32, - "MultiplyFloat32": MultiplyFloat32, - "Pow": Pow, - "SignedPow": SignedPow, - "PowFloat32": PowFloat32, - "PowFloat64": PowFloat64, - "Mod": Mod, - "SignedMod": SignedMod, - "ModFloat32": ModFloat32, - "ModFloat64": ModFloat64, - "Or": Or, - "And": And, - "Xor": Xor, - "Not": Not, - "LShift": LShift, - "RShift": RShift, - "Equal": Equal, - "NotEqual": NotEqual, - "LessThan": LessThan, - "GreaterThan": GreaterThan, - "LessOrEqual": LessOrEqual, - "GreaterOrEqual": GreaterOrEqual, - "PrintInt64": PrintInt64, - "PrintUInt64": PrintUInt64, - "PrintInt32": PrintInt32, - "PrintUInt32": PrintUInt32, - "PrintInt16": PrintInt16, - "PrintUInt16": PrintUInt16, - "PrintInt8": PrintInt8, - "PrintUInt8": PrintUInt8, - "PrintFloat64": PrintFloat64, - "PrintFloat32": PrintFloat32, - "PrintHex": PrintHex, - "PrintBool": PrintBool, - "PrintNan": PrintNan, - "PrintInf": PrintInf, - "PrintString": PrintString, - "SysClock64": SysClock64 - }.to_table() - if fn.valueType.builtinOp in codes: - self.emitByte(codes[fn.valueType.builtinOp]) - return - # Some builtin operations are slightly more complex - # so we handle them separately - case fn.valueType.builtinOp: - of "LogicalOr": - self.expression(args[0]) - let jump = self.emitJump(JumpIfTrue) - self.expression(args[1]) - self.patchJump(jump) - of "LogicalAnd": - self.expression(args[0]) - var jump = self.emitJump(JumpIfFalseOrPop) - self.expression(args[1]) - self.patchJump(jump) - else: - self.error(&"unknown built-in: '{fn.valueType.builtinOp}'", fn.valueType.fun) - - -proc generateCall(self: Compiler, fn: Name, args: seq[Expression], onStack: bool = false) = - ## Small wrapper that abstracts emitting a call instruction - ## for a given function - if fn.valueType.isBuiltinFunction: - # Builtins map to individual instructions - # (usually 1, but some use more) so we handle - # them differently - self.handleBuiltinFunction(fn, args) - return - if not onStack: - self.emitFunction(fn) - self.emitByte(LoadReturnAddress) - let pos = self.chunk.code.len() - # We initially emit a dummy return - # address. It is patched later - self.emitBytes(0.toQuad()) - for argument in reversed(args): - # We pass the arguments in reverse - # because of how stack semantics - # work. They'll be fixed at runtime - self.expression(argument) - var f = fn.valueType - while not f.isNil(): - if f.isClosure: - for i in f.closureBounds.start..f.closureBounds.stop: - self.closedOver[i].count += 1 - f = f.childFunc - # Creates a new call frame and jumps - # to the function's first instruction - # in the code - self.emitByte(Call) - self.emitBytes(fn.valueType.args.len().toTriple()) - self.patchReturnAddress(pos) - - proc callUnaryOp(self: Compiler, fn: Name, op: UnaryExpr) = ## Emits the code to call a unary operator self.generateCall(fn, @[op.a]) @@ -1086,114 +1420,6 @@ proc binary(self: Compiler, node: BinaryExpr) = self.callBinaryOp(funct, node) -proc declareName(self: Compiler, node: Declaration, mutable: bool = false) = - ## Statically declares a name into the current scope. - ## "Declaring" a name only means updating our internal - ## list of identifiers so that further calls to resolve() - ## correctly return them. There is no code to actually - ## declare a variable at runtime: the value is already - ## on the stack - case node.kind: - of NodeKind.varDecl: - var node = VarDecl(node) - # Creates a new Name entry so that self.identifier emits the proper stack offset - if self.names.high() > 16777215: - # If someone ever hits this limit in real-world scenarios, I swear I'll - # slap myself 100 times with a sign saying "I'm dumb". Mark my words - self.error("cannot declare more than 16777215 variables at a time") - for name in self.findByName(node.name.token.lexeme): - if name.depth == self.scopeDepth and not name.isFunctionArgument: - # Trying to redeclare a variable in the same scope/context is an error, but it's okay - # if it's a function argument (for example, if you want to copy a number to - # mutate it) - self.error(&"attempt to redeclare '{node.name.token.lexeme}', which was previously defined in '{name.owner}' at line {name.line}") - self.names.add(Name(depth: self.scopeDepth, - name: node.name, - isPrivate: node.isPrivate, - owner: self.currentModule, - isConst: node.isConst, - valueType: self.inferType(node.value), - codePos: self.chunk.code.len(), - isLet: node.isLet, - isClosedOver: false, - line: node.token.line, - belongsTo: self.currentFunction - )) - if mutable: - self.names[^1].valueType.mutable = true - # We emit a jump of 0 because this may become a - # StoreHeap instruction. If they variable is - # not closed over, we'll sadly be wasting a - # VM cycle. The previous implementation used 4 no-op - # instructions, which wasted 4 times as many clock - # cycles. - # TODO: Optimize this. It's a bit tricky because - # deleting bytecode would render all of our - # jump offsets and other absolute indeces in the - # bytecode wrong - if self.scopeDepth > 0: - # Closure variables are only used in local - # scopes - self.emitByte(JumpForwards) - self.emitBytes(0.toTriple()) - of NodeKind.funDecl: - var node = FunDecl(node) - # We declare the generics before the function so we - # can refer to them - for gen in node.generics: - self.names.add(Name(depth: self.scopeDepth + 1, - isPrivate: true, - isConst: false, - owner: self.currentModule, - line: node.token.line, - valueType: Type(kind: Generic, mutable: false, node: gen.name), - name: gen.name)) - self.names.add(Name(depth: self.scopeDepth, - isPrivate: node.isPrivate, - isConst: false, - owner: self.currentModule, - valueType: Type(kind: Function, - name: node.name.token.lexeme, - returnType: self.inferType(node.returnType), - args: @[], - fun: node), - codePos: self.chunk.code.len(), - name: node.name, - isLet: false, - isClosedOver: false, - line: node.token.line, - isFunDecl: true)) - let fn = self.names[^1] - var name: Name - for argument in node.arguments: - if self.names.high() > 16777215: - self.error("cannot declare more than 16777215 variables at a time") - # wait, no LoadVar? Yes! That's because when calling functions, - # arguments will already be on the stack, so there's no need to - # load them here - name = Name(depth: self.scopeDepth + 1, - isPrivate: true, - owner: self.currentModule, - isConst: false, - name: argument.name, - valueType: nil, - codePos: 0, - isLet: false, - isClosedOver: false, - line: argument.name.token.line, - isFunctionArgument: true, - belongsTo: fn - ) - self.names.add(name) - name.valueType = self.inferType(argument.valueType) - # If it's still nil, it's an error! - if name.valueType.isNil(): - self.error(&"cannot determine the type of argument '{argument.name.token.lexeme}'", argument.name) - fn.valueType.args.add((argument.name.token.lexeme, name.valueType)) - else: - discard # TODO: Types, enums - - proc identifier(self: Compiler, node: IdentExpr) = ## Compiles access to identifiers var s = self.resolve(node) @@ -1204,24 +1430,40 @@ proc identifier(self: Compiler, node: IdentExpr) = # no matter the scope depth self.emitConstant(node, self.inferType(node)) else: - self.detectClosureVariable(s) if s.valueType.kind == Function and s.isFunDecl: # Functions have no runtime # representation, so we need # to create one on the fly - self.emitByte(LoadFunction) - self.emitBytes(s.codePos.toTriple()) - elif not s.isClosedOver: - # Static name resolution, loads value at index in the stack. Very fast. Much wow. - self.emitByte(LoadVar) - # No need to check for -1 here: we already did a nil-check above! - self.emitBytes(self.getStackPos(s).toTriple()) - else: + self.emitByte(LoadInt64, node.token.line) + self.emitBytes(self.chunk.writeConstant(s.codePos.toLong()), node.token.line) + elif self.scopeDepth > 0 and not self.currentFunction.isNil() and s.depth != self.scopeDepth: # Loads a closure variable. Stored in a separate "closure array" in the VM that does not # align its semantics with the call stack. This makes closures work as expected and is # not much slower than indexing our stack (since they're both dynamic arrays at runtime anyway) - self.emitByte(LoadClosure) - self.emitBytes((self.getClosurePos(s)).toTriple()) + if not s.isClosedOver: + self.currentFunction.valueType.isClosure = true + self.currentFunction.valueType.envLen += 1 + self.closedOver.add(s) + let stackIdx = self.getStackPos(s).toTriple() + let closeIdx = self.getClosurePos(s).toTriple() + let oldLen = self.chunk.code.len() + self.chunk.code.insert(StoreClosure.uint8, s.belongsTo.codePos) + self.chunk.code.insert(stackIdx[0], s.belongsTo.codePos + 1) + self.chunk.code.insert(stackIdx[1], s.belongsTo.codePos + 2) + self.chunk.code.insert(stackIdx[2], s.belongsTo.codePos + 3) + self.chunk.code.insert(closeIdx[0], s.belongsTo.codePos + 4) + self.chunk.code.insert(closeIdx[1], s.belongsTo.codePos + 5) + self.chunk.code.insert(closeIdx[2], s.belongsTo.codePos + 6) + self.chunk.lines[self.chunk.getIdx(self.chunk.getLine(s.belongsTo.codePos)) + 1] += 7 + self.fixJumps(oldLen, s.belongsTo.codePos) + self.fixCFIOffsets(oldLen, s.belongsTo.codePos) + self.emitByte(LoadClosure, node.token.line) + self.emitBytes(self.getClosurePos(s).toTriple(), node.token.line) + else: + # Static name resolution, loads value at index in the stack. Very fast. Much wow. + self.emitByte(LoadVar, node.token.line) + # No need to check for -1 here: we already did a nil-check above! + self.emitBytes(self.getStackPos(s).toTriple(), node.token.line) proc assignment(self: Compiler, node: ASTNode) = @@ -1238,16 +1480,15 @@ proc assignment(self: Compiler, node: ASTNode) = elif r.isLet: self.error(&"cannot reassign '{name.token.lexeme}'", name) self.expression(node.value) - self.detectClosureVariable(r) - if not r.isClosedOver: - self.emitByte(StoreVar) - self.emitBytes(self.getStackPos(r).toTriple()) + if self.scopeDepth > 0 and r.depth != self.scopeDepth: + self.emitByte(StoreVar, node.token.line) + self.emitBytes(self.getStackPos(r).toTriple(), node.token.line) else: # Loads a closure variable. Stored in a separate "closure array" in the VM that does not # align its semantics with the call stack. This makes closures work as expected and is # not much slower than indexing our stack (since they're both dynamic arrays at runtime anyway) - self.emitByte(StoreClosure) - self.emitBytes(self.getClosurePos(r).toTriple()) + self.emitByte(StoreClosure, node.token.line) + self.emitBytes(self.getClosurePos(r).toTriple(), node.token.line) of setItemExpr: let node = SetItemExpr(node) let typ = self.inferType(node) @@ -1258,68 +1499,6 @@ proc assignment(self: Compiler, node: ASTNode) = self.error(&"invalid AST node of kind {node.kind} at assignment(): {node} (This is an internal error and most likely a bug)") -proc beginScope(self: Compiler) = - ## Begins a new local scope by incrementing the current - ## scope's depth - inc(self.scopeDepth) - self.scopes.add(self.currentFunction.valueType) - - -proc endScope(self: Compiler) = - ## Ends the current local scope - if self.scopeDepth < 0: - self.error("cannot call endScope with scopeDepth < 0 (This is an internal error and most likely a bug)") - dec(self.scopeDepth) - if self.scopeDepth > 0: - discard self.scopes.pop() - var names: seq[Name] = @[] - var popCount = 0 - for name in self.names: - if name.depth > self.scopeDepth: - names.add(name) - if name.valueType.kind notin {Generic, CustomType} and not name.isFunDecl: - # We don't increase the pop count for these kinds of objects - # because they're not stored the same way as regular variables - inc(popCount) - if name.isFunDecl and not name.valueType.childFunc.isNil() and name.valueType.childFunc.isClosure: - var i = 0 - var closure: tuple[name: Name, count: int] - for y in name.valueType.childFunc.closureBounds.start..name.valueType.childFunc.closureBounds.stop: - closure = self.closedOver[y + i] - self.closedOver.delete(y + i) - for _ in 0.. 1: - # If we're popping less than 65535 variables, then - # we can emit a PopN instruction. This is true for - # 99.99999% of the use cases of the language (who the - # hell is going to use 65 THOUSAND variables?), but - # if you'll ever use more then Peon will emit a PopN instruction - # for the first 65 thousand and change local variables and then - # emit another batch of plain ol' Pop instructions for the rest - self.emitByte(PopN) - self.emitBytes(popCount.toDouble()) - if popCount > uint16.high().int(): - for i in countdown(self.names.high(), popCount - uint16.high().int()): - if self.names[i].depth > self.scopeDepth: - self.emitByte(PopC) - elif popCount == 1: - # We only emit PopN if we're popping more than one value - self.emitByte(PopC) - # This seems *really* slow, but - # what else should I do? Nim doesn't - # allow the removal of items during - # seq iteration so ¯\_(ツ)_/¯ - var idx = 0 - while idx < self.names.len(): - for name in names: - if self.names[idx] == name: - self.names.delete(idx) - inc(idx) - - proc blockStmt(self: Compiler, node: BlockStmt) = ## Compiles block statements, which create a new ## local scope @@ -1334,49 +1513,37 @@ proc ifStmt(self: Compiler, node: IfStmt) = ## execution of code self.check(node.condition, Type(kind: Bool)) self.expression(node.condition) - let jump = self.emitJump(JumpIfFalsePop) + let jump = self.emitJump(JumpIfFalsePop, node.token.line) self.statement(node.thenBranch) - let jump2 = self.emitJump(JumpForwards) + let jump2 = self.emitJump(JumpForwards, node.token.line) self.patchJump(jump) if not node.elseBranch.isNil(): self.statement(node.elseBranch) self.patchJump(jump2) -proc emitLoop(self: Compiler, begin: int) = - ## Emits a JumpBackwards instruction with the correct - ## jump offset - let offset = self.chunk.code.high() - begin + 4 - if offset > 16777215: - self.error("cannot jump more than 16777215 bytecode instructions") - self.emitByte(JumpBackwards) - self.emitBytes(offset.toTriple()) - - proc whileStmt(self: Compiler, node: WhileStmt) = ## Compiles C-style while loops and ## desugared C-style for loops self.check(node.condition, Type(kind: Bool)) let start = self.chunk.code.high() self.expression(node.condition) - let jump = self.emitJump(JumpIfFalsePop) + let jump = self.emitJump(JumpIfFalsePop, node.token.line) self.statement(node.body) - self.emitLoop(start) + self.emitLoop(start, node.token.line) self.patchJump(jump) -proc checkCallIsPure(self: Compiler, node: ASTnode): bool = - ## Checks if a call has any side effects. Returns - ## true if it doesn't and false otherwise - return true # TODO - - proc callExpr(self: Compiler, node: CallExpr): Name {.discardable.} = - ## Compiles code to call a function + ## Compiles code to call a chain of function calls var args: seq[tuple[name: string, kind: Type]] = @[] var argExpr: seq[Expression] = @[] var kind: Type - var onStack = false + if not self.checkCallIsPure(node.callee): + if self.currentFunction.valueType.name != "": + self.error(&"cannot make sure that calls to '{self.currentFunction.valueType.name}' are side-effect free") + else: + self.error(&"cannot make sure that call is side-effect free") # TODO: Keyword arguments for i, argument in node.arguments.positionals: kind = self.inferType(argument) @@ -1391,14 +1558,22 @@ proc callExpr(self: Compiler, node: CallExpr): Name {.discardable.} = discard if args.len() >= 16777216: self.error(&"cannot pass more than 16777215 arguments") - var funct: Name case node.callee.kind: of identExpr: - funct = self.matchImpl(IdentExpr(node.callee).name.lexeme, Type(kind: Function, returnType: Type(kind: Any), args: args)) + # Calls like hi() + result = self.matchImpl(IdentExpr(node.callee).name.lexeme, Type(kind: Function, returnType: Type(kind: Any), args: args)) + self.generateCall(result, argExpr) of NodeKind.callExpr: - funct = self.callExpr(CallExpr(node.callee)) - funct = Name(valueType: Type(kind: Function, returnType: Type(kind: Any), args: args)) - onStack = true + # Calling a call expression, like hello()() + var node: Expression = node + var all: seq[CallExpr] = @[] + while CallExpr(node).callee.kind == callExpr: + all.add(CallExpr(CallExpr(node).callee)) + node = CallExpr(node).callee + for exp in reversed(all): + self.callExpr(exp) + # TODO + self.generateCall(result, argExpr, onStack=true) # TODO: Calling lambdas on-the-fly (i.e. on the same line) else: let typ = self.inferType(node) @@ -1406,13 +1581,6 @@ proc callExpr(self: Compiler, node: CallExpr): Name {.discardable.} = self.error(&"expression has no type") else: self.error(&"object of type '{self.typeToStr(typ)}' is not callable") - result = funct - self.generateCall(funct, argExpr, onStack) - if not self.checkCallIsPure(node.callee): - if self.currentFunction.valueType.name != "": - self.error(&"cannot make sure that calls to '{self.currentFunction.valueType.name}' are side-effect free") - else: - self.error(&"cannot make sure that call is side-effect free") proc expression(self: Compiler, node: Expression) = @@ -1459,24 +1627,33 @@ proc awaitStmt(self: Compiler, node: AwaitStmt) = ## context of statements for usage outside expressions, ## meaning it can be used standalone. It's basically the ## same as an await expression followed by a semicolon. - ## Await expressions are the only native construct to - ## run coroutines from within an already asynchronous - ## context (which should be orchestrated by an event loop). - ## They block in the caller until the callee returns + ## Await expressions and statements are the only native + ## construct to run coroutines from within an already + ## asynchronous context (which should be orchestrated + ## by an event loop). They block in the caller until + ## the callee returns self.expression(node.expression) - self.emitByte(OpCode.Await) + self.emitByte(OpCode.Await, node.token.line) proc deferStmt(self: Compiler, node: DeferStmt) = ## Compiles defer statements. A defer statement ## is executed right before its containing function ## exits (either because of a return or an exception) - let current = self.chunk.code.len + var oldChunk = self.chunk + var chunk = newChunk() + chunk.consts = self.chunk.consts + chunk.lines = self.chunk.lines + chunk.cfi = self.chunk.cfi + self.chunk = chunk self.expression(node.expression) - for i in countup(current, self.chunk.code.high()): - self.deferred.add(self.chunk.code[i]) - self.chunk.code.delete(i) # TODO: Do not change bytecode size - + for b in chunk.code: + self.deferred.add(b) + self.chunk = oldChunk + self.chunk.code &= chunk.code + self.chunk.consts &= chunk.consts + self.chunk.lines &= chunk.lines + self.chunk.cfi &= chunk.cfi proc returnStmt(self: Compiler, node: ReturnStmt) = @@ -1485,60 +1662,49 @@ proc returnStmt(self: Compiler, node: ReturnStmt) = self.check(node.value, expected) if not node.value.isNil(): self.expression(node.value) - self.emitByte(OpCode.SetResult) - self.emitByte(OpCode.Return) + self.emitByte(OpCode.SetResult, node.token.line) + self.emitByte(OpCode.Return, node.token.line) if not node.value.isNil(): - self.emitByte(1) + self.emitByte(1, node.token.line) else: - self.emitByte(0) + self.emitByte(0, node.token.line) -# TODO: Implement this as a custom operator proc yieldStmt(self: Compiler, node: YieldStmt) = ## Compiles yield statements self.expression(node.expression) - self.emitByte(OpCode.Yield) + self.emitByte(OpCode.Yield, node.token.line) + -# TODO: Implement this as a custom operator proc raiseStmt(self: Compiler, node: RaiseStmt) = ## Compiles raise statements self.expression(node.exception) - self.emitByte(OpCode.Raise) + self.emitByte(OpCode.Raise, node.token.line) proc continueStmt(self: Compiler, node: ContinueStmt) = - ## Compiles continue statements. A continue statements + ## Compiles continue statements. A continue statement ## jumps to the next iteration in a loop if self.currentLoop.start > 16777215: self.error("too much code to jump over in continue statement") - self.emitByte(Jump) - self.emitBytes(self.currentLoop.start.toTriple()) + self.emitByte(Jump, node.token.line) + self.emitBytes(self.currentLoop.start.toTriple(), node.token.line) proc breakStmt(self: Compiler, node: BreakStmt) = - ## Compiles break statements. A continue statement - ## jumps to the next iteration in a loop - self.currentLoop.breakPos.add(self.emitJump(OpCode.JumpForwards)) + ## Compiles break statements. A break statement + ## jumps to the end of the loop + self.currentLoop.breakPos.add(self.emitJump(OpCode.JumpForwards, node.token.line)) if self.currentLoop.depth > self.scopeDepth: # Breaking out of a loop closes its scope self.endScope() -proc patchBreaks(self: Compiler) = - ## Patches the jumps emitted by - ## breakStmt. This is needed - ## because the size of code - ## to skip is not known before - ## the loop is fully compiled - for brk in self.currentLoop.breakPos: - self.patchJump(brk) - - proc assertStmt(self: Compiler, node: AssertStmt) = ## Compiles assert statements (raise ## AssertionError if the expression is falsey) self.expression(node.expression) - self.emitByte(OpCode.Assert) + self.emitByte(OpCode.Assert, node.token.line) proc forEachStmt(self: Compiler, node: ForEachStmt) = @@ -1572,37 +1738,37 @@ proc statement(self: Compiler, node: Statement) = elif self.replMode: case kind.kind: of Int64: - self.emitByte(PrintInt64) + self.emitByte(PrintInt64, node.token.line) of UInt64: - self.emitByte(PrintUInt64) + self.emitByte(PrintUInt64, node.token.line) of Int32: - self.emitByte(PrintInt32) + self.emitByte(PrintInt32, node.token.line) of UInt32: - self.emitByte(PrintInt32) + self.emitByte(PrintInt32, node.token.line) of Int16: - self.emitByte(PrintInt16) + self.emitByte(PrintInt16, node.token.line) of UInt16: - self.emitByte(PrintUInt16) + self.emitByte(PrintUInt16, node.token.line) of Int8: - self.emitByte(PrintInt8) + self.emitByte(PrintInt8, node.token.line) of UInt8: - self.emitByte(PrintUInt8) + self.emitByte(PrintUInt8, node.token.line) of Float64: - self.emitByte(PrintFloat64) + self.emitByte(PrintFloat64, node.token.line) of Float32: - self.emitByte(PrintFloat32) + self.emitByte(PrintFloat32, node.token.line) of Bool: - self.emitByte(PrintBool) + self.emitByte(PrintBool, node.token.line) of Nan: - self.emitByte(PrintNan) + self.emitByte(PrintNan, node.token.line) of Inf: - self.emitByte(PrintInf) + self.emitByte(PrintInf, node.token.line) of String: - self.emitByte(PrintString) + self.emitByte(PrintString, node.token.line) else: - self.emitByte(PrintHex) + self.emitByte(PrintHex, node.token.line) else: - self.emitByte(Pop) + self.emitByte(Pop, node.token.line) of NodeKind.ifStmt: self.ifStmt(IfStmt(node)) of NodeKind.assertStmt: @@ -1660,8 +1826,8 @@ proc varDecl(self: Compiler, node: VarDecl) = self.error(&"expected value of type '{self.typeToStr(expected)}', but '{node.name.token.lexeme}' is of type '{self.typeToStr(actual)}'") self.expression(node.value) self.declareName(node, mutable=node.token.kind == TokenType.Var) - self.emitByte(StoreVar) - self.emitBytes(self.getStackPos(self.names[^1]).toTriple()) + self.emitByte(StoreVar, node.token.line) + self.emitBytes(self.getStackPos(self.names[^1]).toTriple(), node.token.line) proc typeDecl(self: Compiler, node: TypeDecl) = @@ -1669,78 +1835,26 @@ proc typeDecl(self: Compiler, node: TypeDecl) = # TODO -proc handleMagicPragma(self: Compiler, pragma: Pragma, node: ASTNode) = - ## Handles the "magic" pragma. Assumes the given name is already - ## declared - if pragma.args.len() != 1: - self.error("'magic' pragma: wrong number of arguments") - elif pragma.args[0].kind != strExpr: - self.error("'magic' pragma: wrong type of argument (constant string expected)") - elif node.kind != NodeKind.funDecl: - self.error("'magic' pragma is not valid in this context") - var node = FunDecl(node) - var fn = self.resolve(node.name) - fn.valueType.isBuiltinFunction = true - fn.valueType.builtinOp = pragma.args[0].token.lexeme[1..^2] - # The magic pragma ignores the function's body - node.body = nil - - -proc handlePurePragma(self: Compiler, pragma: Pragma, node: ASTNode) = - ## Handles the "pure" pragma - case node.kind: - of NodeKind.funDecl: - FunDecl(node).isPure = true - of lambdaExpr: - LambdaExpr(node).isPure = true - else: - self.error("'pure' pragma is not valid in this context") - - -proc dispatchPragmas(self: Compiler, node: ASTnode) = - ## Dispatches pragmas bound to objects - var pragmas: seq[Pragma] = @[] - case node.kind: - of NodeKind.funDecl, NodeKind.typeDecl, NodeKind.varDecl: - pragmas = Declaration(node).pragmas - of lambdaExpr: - pragmas = LambdaExpr(node).pragmas - else: - discard # Unreachable - for pragma in pragmas: - if pragma.name.token.lexeme notin self.compilerProcs: - self.error(&"unknown pragma '{pragma.name.token.lexeme}'") - self.compilerProcs[pragma.name.token.lexeme](self, pragma, node) - - -proc fixGenericFunc(self: Compiler, name: Name, args: seq[Expression]): Name = - ## Specializes generic arguments in functions - var fn = name.deepCopy() - result = fn - var typ: Type - for i in 0..args.high(): - if fn.valueType.args[i].kind.kind == Generic: - typ = self.inferType(args[i]) - fn.valueType.args[i].kind = typ - self.resolve(fn.valueType.args[i].name).valueType = typ - if fn.valueType.args[i].kind.isNil(): - self.error(&"cannot specialize generic function: argument {i + 1} has no type") - - proc funDecl(self: Compiler, node: FunDecl, fn: Name = nil, args: seq[Expression] = @[]) = ## Compiles function declarations - #[if not node.isNil(): - if node.generics.len() > 0 and fn.isNil() and args.len() == 0: - # Generic function! We can't compile it right now - self.declareName(node) - self.dispatchPragmas(node) - return]# + # A function's code is just compiled linearly + # and then jumped over + if node.token.kind == Operator and node.token.lexeme in [".", ]: + self.error(&"The '{node.token.lexeme}' cannot be overridden", node) + var jmp: int self.declareName(node) self.dispatchPragmas(node) + # Function's code starts after the jump var node = node var fn = if fn.isNil(): self.names[^(node.arguments.len() + 1)] else: fn + # We store the current function + var function = self.currentFunction + if not self.currentFunction.isNil(): + self.currentFunction.valueType.children.add(fn.valueType) + self.currentFunction = fn var names = self.names[^(node.arguments.len())..^1] if fn.valueType.isBuiltinFunction: + fn.codePos = self.chunk.code.len() # We take the arguments off of our name list # because they become temporaries on the stack. # Builtin functions (usually) map to a single @@ -1749,42 +1863,25 @@ proc funDecl(self: Compiler, node: FunDecl, fn: Name = nil, args: seq[Expression # This also means that peon's fast builtins # can only be relatively simple self.names = self.names[0..^node.arguments.len() + 1] - else: - var function = self.currentFunction - var jmp: int - # A function's code is just compiled linearly - # and then jumped over - jmp = self.emitJump(JumpForwards) - # Function's code starts after the jump + elif not node.body.isNil(): + jmp = self.emitJump(JumpForwards, node.token.line) fn.codePos = self.chunk.code.len() - # We let our debugger know a function is starting - let start = self.chunk.code.high() - for name in names: - self.emitBytes([NoOp, NoOp, NoOp, NoOp]) - name.codePos = self.chunk.code.len() - 4 - # We store the current function - if not self.currentFunction.isNil(): - self.currentFunction.valueType.childFunc = fn.valueType - self.currentFunction = fn - if node.isNil(): - # We got called back with more specific type - # arguments: time to fix them! - self.currentFunction = self.fixGenericFunc(fn, args) - node = self.currentFunction.valueType.fun - elif not node.body.isNil(): - if BlockStmt(node.body).code.len() == 0: - self.error("cannot declare function with empty body") + # We let our debugger know this function's boundaries + self.chunk.cfi.add(self.chunk.code.high().toTriple()) + self.cfiOffsets.add((value: self.chunk.code.high(), offset: self.chunk.cfi.high() - 2, fn: fn)) + let idx = self.chunk.cfi.len() + self.chunk.cfi.add(0.toTriple()) # Patched it later + self.chunk.cfi.add(uint8(node.arguments.len())) + if not node.name.isNil(): + self.chunk.cfi.add(fn.name.token.lexeme.len().toDouble()) + var s = fn.name.token.lexeme + if s.len() >= uint16.high().int: + s = node.name.token.lexeme[0..uint16.high()] + self.chunk.cfi.add(s.toBytes()) else: - discard # TODO: Forward declarations - let impl = self.findByType(fn.name.token.lexeme, fn.valueType, self.scopeDepth) - if impl.len() > 1: - # We found more than one (public) implementation of - # the same function with the same name: this is an - # error, as it would raise ambiguity when calling them - var msg = &"multiple matching implementations of '{fn.name.token.lexeme}' found:\n" - for f in reversed(impl): - msg &= &"- in module '{f.owner}' at line {f.line} of type {self.typeToStr(f.valueType)}\n" - self.error(msg) + self.chunk.cfi.add(0.toDouble()) + if BlockStmt(node.body).code.len() == 0: + self.error("cannot declare function with empty body") # Since the deferred array is a linear # sequence of instructions and we want # to keep track to whose function's each @@ -1812,27 +1909,19 @@ proc funDecl(self: Compiler, node: FunDecl, fn: Name = nil, args: seq[Expression # body: while this is not a tremendously useful piece of information (since # the presence of at least one doesn't mean all control flow cases are # covered), it definitely is an error worth reporting - self.error("function has an explicit return type, but no return statement was found") + self.error("function has an explicit return type, but no return statement was found", node) hasVal = hasVal and not typ.isNil() self.endScope() # Terminates the function's context - self.emitByte(OpCode.Return) + self.emitByte(OpCode.Return, self.peek().token.line) if hasVal: - self.emitByte(1) + self.emitByte(1, self.peek().token.line) else: - self.emitByte(0) - # Some debugging info here - self.chunk.cfi.add(start.toTriple()) - self.chunk.cfi.add(self.chunk.code.high().toTriple()) - self.chunk.cfi.add(uint8(node.arguments.len())) - if not node.name.isNil(): - self.chunk.cfi.add(fn.name.token.lexeme.len().toDouble()) - var s = fn.name.token.lexeme - if s.len() >= uint16.high().int: - s = node.name.token.lexeme[0..uint16.high()] - self.chunk.cfi.add(s.toBytes()) - else: - self.chunk.cfi.add(0.toDouble()) + self.emitByte(0, self.peek().token.line) + let stop = self.chunk.code.len().toTriple() + self.chunk.cfi[idx] = stop[0] + self.chunk.cfi[idx + 1] = stop[1] + self.chunk.cfi[idx + 2] = stop[2] # Currently defer is not functional, so we # just pop the instructions for _ in deferStart..self.deferred.high(): @@ -1840,21 +1929,11 @@ proc funDecl(self: Compiler, node: FunDecl, fn: Name = nil, args: seq[Expression # Well, we've compiled everything: time to patch # the jump offset self.patchJump(jmp) - # Pops a call frame - # discard self.frames.pop() - # Restores the enclosing function (if any). - # Makes nested calls work (including recursion) - self.currentFunction = function - - -proc patchReturnAddress(self: Compiler, pos: int) = - ## Patches the return address of a function - ## call - let address = self.chunk.code.len().toQuad() - self.chunk.code[pos] = address[0] - self.chunk.code[pos + 1] = address[1] - self.chunk.code[pos + 2] = address[2] - self.chunk.code[pos + 3] = address[3] + else: + discard # TODO: Forward declarations + # Restores the enclosing function (if any). + # Makes nested calls work (including recursion) + self.currentFunction = function proc declaration(self: Compiler, node: Declaration) = @@ -1870,61 +1949,6 @@ proc declaration(self: Compiler, node: Declaration) = self.statement(Statement(node)) -proc terminateProgram(self: Compiler, pos: int, terminateScope: bool = true) = - ## Utility to terminate a peon program - if terminateScope: - self.endScope() - self.patchReturnAddress(pos) - self.emitByte(OpCode.Return) - self.emitByte(0) # Entry point has no return value (TODO: Add easter eggs, cuz why not) - - -proc beginProgram(self: Compiler, incremental: bool = false): int = - ## Utility to begin a peon program - ## compiled. - ## Returns a dummy return address of - ## the implicit main to be patched by - ## terminateProgram - - # Every peon program has a hidden entry point in - # which user code is wrapped. Think of it as if - # peon is implicitly writing the main() function - # of your program and putting all of your code in - # there. While we call our entry point just like - # any regular peon function, we can't use our handy - # helper generateCall() because we need to keep track - # of where our program ends (which we don't know yet). - # To fix this, we emit dummy offsets and patch them - # later, once we know the boundaries of our hidden main() - var main: Name - if incremental: - main = self.names[0] - else: - main = Name(depth: 0, - isPrivate: true, - isConst: false, - isLet: false, - isClosedOver: false, - owner: self.currentModule, - valueType: Type(kind: Function, - name: "", - returnType: nil, - args: @[], - ), - codePos: 13, # Jump address is hardcoded - name: newIdentExpr(Token(lexeme: "", kind: Identifier)), - isFunDecl: true, - line: -1) - self.names.add(main) - self.emitByte(LoadFunction) - self.emitBytes(main.codePos.toTriple()) - self.emitByte(LoadReturnAddress) - self.emitBytes(0.toQuad()) - self.emitByte(Call) - self.emitBytes(0.toTriple()) - result = 5 - - proc compile*(self: Compiler, ast: seq[Declaration], file: string, lines: seq[tuple[start, stop: int]], source: string, chunk: Chunk = nil, terminateScope: bool = true, incremental: bool = false): Chunk = ## Compiles a sequence of AST nodes into a chunk @@ -1942,20 +1966,20 @@ proc compile*(self: Compiler, ast: seq[Declaration], file: string, lines: seq[tu self.currentFunction = nil self.currentModule = self.file.extractFilename() self.current = 0 - self.frames = @[0] self.lines = lines self.source = source + self.jumps = @[] let pos = self.beginProgram(incremental) if incremental and self.replMode: - for i in countup(1, 2): - discard self.chunk.code.pop() + discard self.chunk.code.pop() + discard self.chunk.code.pop() while not self.done(): self.declaration(Declaration(self.step())) self.terminateProgram(pos, terminateScope) result = self.chunk if incremental and not self.replMode: - for i in countup(1, 2): - discard self.chunk.code.pop() + discard self.chunk.code.pop() + discard self.chunk.code.pop() proc compileModule(self: Compiler, filename: string) = @@ -1968,7 +1992,7 @@ proc compileModule(self: Compiler, filename: string) = var parser = newParser() var compiler = newCompiler() lexer.fillSymbolTable() - let source = readFile(joinPath(splitPath(self.file).head, filename)) + let source = readFile(path) let tokens = lexer.lex(source, filename) let ast = parser.parse(tokens, filename, lexer.getLines(), source) compiler.names.add(self.names[0]) diff --git a/src/frontend/meta/ast.nim b/src/frontend/meta/ast.nim index 26df8c8..9f0f276 100644 --- a/src/frontend/meta/ast.nim +++ b/src/frontend/meta/ast.nim @@ -182,7 +182,6 @@ type isPure*: bool returnType*: Expression hasExplicitReturn*: bool - freeVars*: seq[IdentExpr] depth*: int SliceExpr* = ref object of Expression @@ -264,7 +263,6 @@ type isPure*: bool returnType*: Expression hasExplicitReturn*: bool - freeVars*: seq[IdentExpr] depth*: int TypeDecl* = ref object of Declaration @@ -428,7 +426,6 @@ proc newLambdaExpr*(arguments: seq[tuple[name: IdentExpr, valueType: Expression] result.isPure = false result.pragmas = pragmas result.generics = generics - result.freeVars = freeVars result.depth = depth @@ -634,7 +631,6 @@ proc newFunDecl*(name: IdentExpr, arguments: seq[tuple[name: IdentExpr, valueTyp result.returnType = returnType result.isPure = false result.generics = generics - result.freeVars = freeVars result.depth = depth @@ -735,13 +731,13 @@ proc `$`*(self: ASTNode): string = result &= &"Var(name={self.name}, value={self.value}, const={self.isConst}, private={self.isPrivate}, type={self.valueType}, pragmas={self.pragmas})" of funDecl: var self = FunDecl(self) - result &= &"""FunDecl(name={self.name}, body={self.body}, type={self.returnType}, arguments=[{self.arguments.join(", ")}], defaults=[{self.defaults.join(", ")}], generics=[{self.generics.join(", ")}], async={self.isAsync}, generator={self.isGenerator}, private={self.isPrivate}, pragmas={self.pragmas}, vars=[{self.freeVars.join(", ")}])""" + result &= &"""FunDecl(name={self.name}, body={self.body}, type={self.returnType}, arguments=[{self.arguments.join(", ")}], defaults=[{self.defaults.join(", ")}], generics=[{self.generics.join(", ")}], async={self.isAsync}, generator={self.isGenerator}, private={self.isPrivate}, pragmas={self.pragmas})""" of typeDecl: var self = TypeDecl(self) result &= &"""TypeDecl(name={self.name}, fields={self.fields}, defaults={self.defaults}, private={self.isPrivate}, pragmas={self.pragmas}, generics={self.generics}, pragmas={self.pragmas}, type={self.valueType})""" of lambdaExpr: var self = LambdaExpr(self) - result &= &"""Lambda(body={self.body}, type={self.returnType}, arguments=[{self.arguments.join(", ")}], defaults=[{self.defaults.join(", ")}], generator={self.isGenerator}, async={self.isAsync}, pragmas={self.pragmas}, vars=[{self.freeVars.join(", ")}])""" + result &= &"""Lambda(body={self.body}, type={self.returnType}, arguments=[{self.arguments.join(", ")}], defaults=[{self.defaults.join(", ")}], generator={self.isGenerator}, async={self.isAsync}, pragmas={self.pragmas})""" of deferStmt: var self = DeferStmt(self) result &= &"Defer({self.expression})" diff --git a/src/frontend/meta/bytecode.nim b/src/frontend/meta/bytecode.nim index 7c47d6a..354e53d 100644 --- a/src/frontend/meta/bytecode.nim +++ b/src/frontend/meta/bytecode.nim @@ -29,13 +29,10 @@ type ## Length Encoding. Instructions are encoded in groups whose structure ## follows the following schema: ## - The first integer represents the line number - ## - The second integer represents the count of whatever comes after it - ## (let's call it c) - ## - After c, a sequence of c integers follows - ## - ## A visual representation may be easier to understand: [1, 2, 3, 4] - ## This is to be interpreted as "there are 2 instructions at line 1 whose values - ## are 3 and 4" + ## - The second integer represents the number of + ## instructions on that line + ## For example, if lines equals [1, 5], it means that there are 5 instructions + ## at line 1, meaning that all instructions in code[0..4] belong to the same line. ## This is more efficient than using the naive approach, which would encode ## the same line number multiple times and waste considerable amounts of space. ## cfi represents Call Frame Information and encodes the following information: @@ -81,8 +78,6 @@ type LoadFloat64, LoadFloat32, LoadString, - LoadFunction, - LoadReturnAddress, ## Singleton opcodes (each of them pushes a constant singleton on the stack) LoadNil, LoadTrue, @@ -164,6 +159,7 @@ type JumpIfFalseOrPop, # Jumps to an absolute index in the bytecode if x is false and pops otherwise (used for logical and) ## Functions Call, # Calls a function and initiates a new stack frame + CallClosure, # Calls a closure Return, # Terminates the current function SetResult, # Sets the result of the current function ## Exception handling @@ -264,8 +260,6 @@ const stackDoubleInstructions* = {} # Argument double argument instructions take hardcoded arguments as 16 bit integers const argumentDoubleInstructions* = {PopN, } -# Argument double argument instructions take hardcoded arguments as 24 bit integers -const argumentTripleInstructions* = {StoreClosure} # Jump instructions jump at relative or absolute bytecode offsets const jumpInstructions* = {Jump, JumpIfFalse, JumpIfFalsePop, @@ -329,6 +323,15 @@ proc getLine*(self: Chunk, idx: int): int = raise newException(IndexDefect, "index out of range") +proc getIdx*(self: Chunk, line: int): int = + ## Gets the index into self.lines + ## where the line counter for the given + ## line is located + for i, v in self.lines: + if (i and 1) != 0 and v == line: + return i + + proc writeConstant*(self: Chunk, data: openarray[uint8]): array[3, uint8] = ## Writes a series of bytes to the chunk's constant ## table and returns the index of the first byte as diff --git a/src/frontend/parser.nim b/src/frontend/parser.nim index 5011b46..e452dc0 100644 --- a/src/frontend/parser.nim +++ b/src/frontend/parser.nim @@ -86,6 +86,8 @@ type currentFunction: Declaration # Stores the current scope depth (0 = global, > 0 local) scopeDepth: int + # TODO + scopes: seq[Declaration] operators: OperatorTable # The AST node tree: seq[Declaration] @@ -170,7 +172,7 @@ proc getLines*(self: Parser): seq[tuple[start, stop: int]] = self.lines proc getSource*(self: Parser): string = self.source proc getRelPos*(self: Parser, line: int): tuple[start, stop: int] = self.lines[line - 1] template endOfFile: Token = Token(kind: EndOfFile, lexeme: "", line: -1) -template endOfLine(msg: string) = self.expect(Semicolon, msg) +template endOfLine(msg: string, tok: Token = nil) = self.expect(Semicolon, msg, tok) @@ -265,19 +267,19 @@ proc match[T: TokenType or string](self: Parser, kind: openarray[T]): bool = result = false -proc expect[T: TokenType or string](self: Parser, kind: T, message: string = "") = +proc expect[T: TokenType or string](self: Parser, kind: T, message: string = "", token: Token = nil) = ## Behaves like self.match(), except that ## when a token doesn't match, an error ## is raised. If no error message is ## given, a default one is used if not self.match(kind): if message.len() == 0: - self.error(&"expecting token of kind {kind}, found {self.peek().kind} instead") + self.error(&"expecting token of kind {kind}, found {self.peek().kind} instead", token) else: self.error(message) -proc expect[T: TokenType or string](self: Parser, kind: openarray[T], message: string = "") {.used.} = +proc expect[T: TokenType or string](self: Parser, kind: openarray[T], message: string = "", token: Token = nil) {.used.} = ## Behaves like self.expect(), except that ## an error is raised only if none of the ## given token kinds matches @@ -285,7 +287,7 @@ proc expect[T: TokenType or string](self: Parser, kind: openarray[T], message: s if self.match(kind): return if message.len() == 0: - self.error(&"""expecting any of the following tokens: {kind.join(", ")}, but got {self.peek().kind} instead""") + self.error(&"""expecting any of the following tokens: {kind.join(", ")}, but got {self.peek().kind} instead""", token) # Forward declarations @@ -320,16 +322,6 @@ proc primary(self: Parser): Expression = result = newIntExpr(self.step()) of Identifier: result = newIdentExpr(self.step(), self.scopeDepth) - if not self.currentFunction.isNil() and self.scopeDepth > 0: - case self.currentFunction.kind: - of NodeKind.funDecl: - if FunDecl(self.currentFunction).depth != self.scopeDepth: - FunDecl(self.currentFunction).freeVars.add(IdentExpr(result)) - of NodeKind.lambdaExpr: - if LambdaExpr(self.currentFunction).depth != self.scopeDepth: - LambdaExpr(self.currentFunction).freeVars.add(IdentExpr(result)) - else: - discard # Unreachable of LeftParen: let tok = self.step() result = newGroupingExpr(self.expression(), tok) @@ -1036,6 +1028,7 @@ proc funDecl(self: Parser, isAsync: bool = false, isGenerator: bool = false, elif isLambda: self.currentFunction = newLambdaExpr(arguments, defaults, newBlockStmt(@[], Token()), isGenerator=isGenerator, isAsync=isAsync, token=tok, returnType=nil, depth=self.scopeDepth) + self.scopes.add(FunDecl(self.currentFunction)) if self.match(":"): # Function has explicit return type if self.match([Function, Coroutine, Generator]): @@ -1101,11 +1094,12 @@ proc expression(self: Parser): Expression = result = self.parseArrow() # Highest-level expression + proc expressionStatement(self: Parser): Statement = ## Parses expression statements, which ## are expressions followed by a semicolon var expression = self.expression() - endOfLine("missing expression terminator") + endOfLine("missing expression terminator", expression.token) result = Statement(newExprStmt(expression, expression.token)) @@ -1254,6 +1248,7 @@ proc findOperators(self: Parser, tokens: seq[Token]) = self.error("invalid state: found malformed tokenizer input while looking for operators (missing EOF)", token) + proc parse*(self: Parser, tokens: seq[Token], file: string, lines: seq[tuple[start, stop: int]], source: string, persist: bool = false): seq[Declaration] = ## Parses a sequence of tokens into a sequence of AST nodes self.tokens = tokens diff --git a/src/util/debugger.nim b/src/util/debugger.nim index 46721de..c5bc7ee 100644 --- a/src/util/debugger.nim +++ b/src/util/debugger.nim @@ -124,7 +124,7 @@ proc argumentDoubleInstruction(self: Debugger, instruction: OpCode) = self.current += 3 -proc argumentTripleInstruction(self: Debugger, instruction: OpCode) = +proc argumentTripleInstruction(self: Debugger, instruction: OpCode) {.used.} = ## Debugs instructions that operate on a hardcoded value on the stack using a 24-bit operand var slot = [self.chunk.code[self.current + 1], self.chunk.code[self.current + 2], self.chunk.code[self.current + 3]].fromTriple() printInstruction(instruction) @@ -132,6 +132,15 @@ proc argumentTripleInstruction(self: Debugger, instruction: OpCode) = self.current += 4 +proc storeClosureInstruction(self: Debugger, instruction: OpCode) = + ## Debugs instructions that operate on a hardcoded value on the stack using a 24-bit operand + var idx = [self.chunk.code[self.current + 1], self.chunk.code[self.current + 2], self.chunk.code[self.current + 3]].fromTriple() + var idx2 = [self.chunk.code[self.current + 4], self.chunk.code[self.current + 5], self.chunk.code[self.current + 6]].fromTriple() + printInstruction(instruction) + stdout.styledWriteLine(fgGreen, ", stores element at position ", fgYellow, $idx, fgGreen, " into position ", fgYellow, $idx2) + self.current += 7 + + proc callInstruction(self: Debugger, instruction: OpCode) = ## Debugs function calls var size = [self.chunk.code[self.current + 1], self.chunk.code[self.current + 2], self.chunk.code[self.current + 3]].fromTriple() @@ -141,23 +150,6 @@ proc callInstruction(self: Debugger, instruction: OpCode) = self.current += 1 -proc functionInstruction(self: Debugger, instruction: OpCode) = - ## Debugs function calls - var address = [self.chunk.code[self.current + 1], self.chunk.code[self.current + 2], self.chunk.code[self.current + 3]].fromTriple() - printInstruction(instruction) - styledEcho fgGreen, &", loads function at address ", fgYellow, $address - self.current += 4 - - -proc loadAddressInstruction(self: Debugger, instruction: OpCode) = - ## Debugs LoadReturnAddress instructions - var address = [self.chunk.code[self.current + 1], self.chunk.code[self.current + 2], self.chunk.code[self.current + 3], - self.chunk.code[self.current + 4]].fromQuad() - printInstruction(instruction) - styledEcho fgGreen, &", loads address ", fgYellow, $address - self.current += 5 - - proc constantInstruction(self: Debugger, instruction: OpCode) = ## Debugs instructions that operate on the constant table var size: uint @@ -207,16 +199,12 @@ proc disassembleInstruction*(self: Debugger) = self.stackTripleInstruction(opcode) of argumentDoubleInstructions: self.argumentDoubleInstruction(opcode) - of argumentTripleInstructions: - self.argumentTripleInstruction(opcode) + of StoreClosure: + self.storeClosureInstruction(opcode) of Call: self.callInstruction(opcode) of jumpInstructions: self.jumpInstruction(opcode) - of LoadFunction: - self.functionInstruction(opcode) - of LoadReturnAddress: - self.loadAddressInstruction(opcode) else: echo &"DEBUG - Unknown opcode {opcode} at index {self.current}" self.current += 1 diff --git a/tests/closures.pn b/tests/closures.pn index 6ffb7d3..95dd1a1 100644 --- a/tests/closures.pn +++ b/tests/closures.pn @@ -2,16 +2,16 @@ import std; -fn makeClosure(n: int): fn: int { - fn inner: int { - return n; +fn makeClosure(n: int): fn: fn: int { + fn inner: fn: int { + fn deep: int { + return n; + } + return deep; } return inner; } -var closed = makeClosure(1)(); -print(closed); # 1 -print(makeClosure(2)()); # 2 -var closure = makeClosure(3); -print(closure()); # 3 -print(closure()); # 3 + +print(makeClosure(1)()()); # 1 +