# Copyright 2022 Mattia Giambirtone & All Contributors # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import meta/token import meta/ast import meta/errors import ../util/multibyte import ../util/symbols import lexer as l import parser as p import std/tables import std/strformat import std/algorithm import std/parseutils import std/strutils import std/sequtils import std/sets import std/os export ast export token export multibyte type TypeKind = enum ## An enumeration of compile-time ## types Int8, UInt8, Int16, UInt16, Int32, UInt32, Int64, UInt64, Float32, Float64, Char, Byte, String, Function, CustomType, Nil, Nan, Bool, Inf, Typevar, Generic, Reference, Pointer Any # Any is used internally in a few cases, # for example when looking for operators # when only the type of the arguments is of # interest Type = ref object ## A wrapper around ## compile-time types mutable: bool case kind: TypeKind: of Function: name: string isLambda: bool isGenerator: bool isCoroutine: bool args: seq[tuple[name: string, kind: Type]] returnType: Type isBuiltinFunction: bool builtinOp: string fun: FunDecl isClosure: bool envLen: int children: seq[Type] of Reference, Pointer: value: Type of Generic: node: IdentExpr else: discard # This way we don't have recursive dependency issues import meta/bytecode export bytecode type Name = ref object ## A compile-time wrapper around ## statically resolved names # Name of the identifier name: IdentExpr # Owner of the identifier (module) owner: string # Scope depth depth: int # Is this name private? isPrivate: bool # Is this a constant? isConst: bool # Can this name's value be mutated? isLet: bool # The name's type valueType: Type # For functions, this marks where the function's # code begins codePos: int # The function that owns this variable (may be nil!) belongsTo: Name # Is this a function argument? isFunctionArgument: bool # Where is this node declared in the file? line: int # Has this name been closed over? isClosedOver: bool # Is this a function declaration or a variable # with a function as value? (The distinction *is* # important! Check emitFunction()) isFunDecl: bool Loop = object ## A "loop object" used ## by the compiler to emit ## appropriate jump offsets ## for continue and break ## statements # Position in the bytecode where the loop starts start: int # Scope depth where the loop is located depth: int # Absolute jump offsets into our bytecode that we need to # patch. Used for break statements breakPos: seq[int] Compiler* = ref object ## A wrapper around the Peon compiler's state # The bytecode chunk where we write code to chunk: Chunk # The output of our parser (AST) ast: seq[Declaration] # The current AST node we're looking at current: int # The current file being compiled (used only for # error reporting) file: string # Compile-time "simulation" of the stack at # runtime to load variables that have stack # behavior more efficiently names: seq[Name] # The current scope depth. If > 0, we're # in a local scope, otherwise it's global scopeDepth: int # The current function being compiled currentFunction: Name # Are optimizations turned on? enableOptimizations: bool # The current loop being compiled (used to # keep track of where to jump) currentLoop: Loop # Are we in REPL mode? If so, Pop instructions # for expression statements at the top level are # swapped for a special PopRepl instruction that # prints the result of the expression once it is # evaluated replMode: bool # The current module being compiled # (used to restrict access to statically # defined variables at compile time) currentModule: string # Each time a defer statement is # compiled, its code is emitted # here. Later, if there is any code # to defer in the current function, # funDecl will wrap the function's code # inside an implicit try/finally block # and add this code in the finally branch. # This sequence is emptied each time a # function declaration is compiled and stores only # deferred code for the current function (may # be empty) deferred: seq[uint8] # List of closed-over variables closedOver: seq[Name] # Compiler procedures called by pragmas compilerProcs: TableRef[string, proc (self: Compiler, pragma: Pragma, node: ASTNode)] # Stores line data lines: seq[tuple[start, stop: int]] # The source of the current module source: string # Currently imported modules modules: HashSet[string] # Stores the position of all jumps jumps: seq[tuple[patched: bool, offset: int]] # List of CFI start offsets into our CFI data cfiOffsets: seq[tuple[value, offset: int, fn: Name]] CompileError* = ref object of PeonException compiler*: Compiler node*: ASTNode file*: string module*: string # Forward declarations proc compile*(self: Compiler, ast: seq[Declaration], file: string, lines: seq[tuple[start, stop: int]], source: string, chunk: Chunk = nil, terminateScope: bool = true, incremental: bool = false): Chunk proc expression(self: Compiler, node: Expression) proc statement(self: Compiler, node: Statement) proc declaration(self: Compiler, node: Declaration) proc peek(self: Compiler, distance: int = 0): ASTNode proc identifier(self: Compiler, node: IdentExpr) proc varDecl(self: Compiler, node: VarDecl) proc inferType(self: Compiler, node: LiteralExpr): Type proc inferType(self: Compiler, node: Expression): Type proc findByName(self: Compiler, name: string): seq[Name] proc findByType(self: Compiler, name: string, kind: Type, depth: int = -1): seq[Name] proc compareTypes(self: Compiler, a, b: Type): bool proc patchReturnAddress(self: Compiler, pos: int) proc handleMagicPragma(self: Compiler, pragma: Pragma, node: ASTnode) proc handlePurePragma(self: Compiler, pragma: Pragma, node: ASTnode) proc dispatchPragmas(self: Compiler, node: ASTnode) proc funDecl(self: Compiler, node: FunDecl, fn: Name = nil, args: seq[Expression] = @[]) proc compileModule(self: Compiler, filename: string) # End of forward declarations proc newCompiler*(enableOptimizations: bool = true, replMode: bool = false): Compiler = ## Initializes a new Compiler object new(result) result.ast = @[] result.current = 0 result.file = "" result.names = @[] result.scopeDepth = 0 result.lines = @[] result.jumps = @[] result.currentFunction = nil result.enableOptimizations = enableOptimizations result.replMode = replMode result.currentModule = "" result.compilerProcs = newTable[string, proc (self: Compiler, pragma: Pragma, node: ASTNode)]() result.compilerProcs["magic"] = handleMagicPragma result.compilerProcs["pure"] = handlePurePragma result.source = "" ## Public getters for nicer error formatting proc getCurrentNode*(self: Compiler): ASTNode = (if self.current >= self.ast.len(): self.ast[^1] else: self.ast[self.current - 1]) proc getCurrentFunction*(self: Compiler): Declaration {.inline.} = (if self.currentFunction.isNil(): nil else: self.currentFunction.valueType.fun) proc getFile*(self: Compiler): string {.inline.} = self.file proc getModule*(self: Compiler): string {.inline.} = self.currentModule proc getLines*(self: Compiler): seq[tuple[start, stop: int]] = self.lines proc getSource*(self: Compiler): string = self.source proc getRelPos*(self: Compiler, line: int): tuple[start, stop: int] = self.lines[line - 1] ## Utility functions proc `$`*(self: Name): string = $self[] proc peek(self: Compiler, distance: int = 0): ASTNode = ## Peeks at the AST node at the given distance. ## If the distance is out of bounds, the last ## AST node in the tree is returned. A negative ## distance may be used to retrieve previously ## consumed AST nodes if self.ast.high() == -1 or self.current + distance > self.ast.high() or self.current + distance < 0: result = self.ast[^1] else: result = self.ast[self.current + distance] proc done(self: Compiler): bool {.inline.} = ## Returns true if the compiler is done ## compiling, false otherwise result = self.current > self.ast.high() proc error(self: Compiler, message: string, node: ASTNode = nil) {.raises: [CompileError], inline.} = ## Raises a CompileError exception raise CompileError(msg: message, node: if node.isNil(): self.getCurrentNode() else: node, file: self.file, module: self.currentModule, compiler: self) proc step(self: Compiler): ASTNode {.inline.} = ## Steps to the next node and returns ## the consumed one result = self.peek() if not self.done(): self.current += 1 proc emitByte(self: Compiler, byt: OpCode | uint8, line: int) {.inline.} = ## Emits a single byte, writing it to ## the current chunk being compiled self.chunk.write(uint8 byt, line) proc emitBytes(self: Compiler, bytarr: openarray[OpCode | uint8], line: int) {.inline.} = ## Handy helper method to write arbitrary bytes into ## the current chunk, calling emitByte on each of its ## elements for b in bytarr: self.emitByte(b, line) proc makeConstant(self: Compiler, val: Expression, typ: Type): array[3, uint8] = ## Adds a constant to the current chunk's constant table ## and returns its index as a 3-byte array of uint8s var lit: string if typ.kind in [UInt8, Int8, Int16, UInt16, Int32, UInt32, Int64, UInt64]: lit = val.token.lexeme if "'" in lit: var idx = lit.high() while lit[idx] != '\'': lit = lit[0..^2] dec(idx) lit = lit[0..^2] case typ.kind: of UInt8, Int8: result = self.chunk.writeConstant([uint8(parseInt(lit))]) of Int16, UInt16: result = self.chunk.writeConstant(parseInt(lit).toDouble()) of Int32, UInt32: result = self.chunk.writeConstant(parseInt(lit).toQuad()) of Int64: result = self.chunk.writeConstant(parseInt(lit).toLong()) of UInt64: result = self.chunk.writeConstant(parseBiggestUInt(lit).toLong()) of String: result = self.chunk.writeConstant(val.token.lexeme[1..^1].toBytes()) of Float32: var f: float = 0.0 discard parseFloat(val.token.lexeme, f) result = self.chunk.writeConstant(cast[array[4, uint8]](float32(f))) of Float64: var f: float = 0.0 discard parseFloat(val.token.lexeme, f) result = self.chunk.writeConstant(cast[array[8, uint8]](f)) else: discard proc emitConstant(self: Compiler, obj: Expression, kind: Type) = ## Emits a constant instruction along ## with its operand case kind.kind: of Int64: self.emitByte(LoadInt64, obj.token.line) of UInt64: self.emitByte(LoadUInt64, obj.token.line) of Int32: self.emitByte(LoadInt32, obj.token.line) of UInt32: self.emitByte(LoadUInt32, obj.token.line) of Int16: self.emitByte(LoadInt16, obj.token.line) of UInt16: self.emitByte(LoadUInt16, obj.token.line) of Int8: self.emitByte(LoadInt8, obj.token.line) of UInt8: self.emitByte(LoadUInt8, obj.token.line) of String: self.emitByte(LoadString, obj.token.line) let str = LiteralExpr(obj).literal.lexeme if str.len() >= 16777216: self.error("string constants cannot be larger than 16777215 bytes") self.emitBytes((str.len() - 2).toTriple(), obj.token.line) of Float32: self.emitByte(LoadFloat32, obj.token.line) of Float64: self.emitByte(LoadFloat64, obj.token.line) else: discard # TODO self.emitBytes(self.makeConstant(obj, kind), obj.token.line) proc setJump(self: Compiler, offset: int, jmp: array[3, uint8]) = ## Sets a jump at the given ## offset to the given value self.chunk.code[offset + 1] = jmp[0] self.chunk.code[offset + 2] = jmp[1] self.chunk.code[offset + 3] = jmp[2] proc setJump(self: Compiler, offset: int, jmp: seq[uint8]) = ## Sets a jump at the given ## offset to the given value self.chunk.code[offset + 1] = jmp[0] self.chunk.code[offset + 2] = jmp[1] self.chunk.code[offset + 3] = jmp[2] proc patchJump(self: Compiler, offset: int) = ## Patches a previously emitted relative ## jump using emitJump var jump: int = self.chunk.code.len() - self.jumps[offset].offset if jump < 0: self.error("invalid jump size (< 0), did the bytecode size change without fixJumps being called?") if jump > 16777215: self.error("cannot jump more than 16777215 instructions") self.setJump(self.jumps[offset].offset, (jump - 4).toTriple()) self.jumps[offset].patched = true proc emitJump(self: Compiler, opcode: OpCode, line: int): int = ## Emits a dummy jump offset to be patched later ## and returns a unique identifier for that jump ## to be passed to patchJump self.emitByte(opcode, line) self.jumps.add((patched: false, offset: self.chunk.code.high())) self.emitBytes(0.toTriple(), line) result = self.jumps.high() proc fixCFIOffsets(self: Compiler, oldLen: int, modifiedAt: int) = ## Fixes CFI offsets after the size of our ## bytecode has changed if oldLen == self.chunk.code.len(): return let offset = self.chunk.code.len() - oldLen var newCFI: array[3, uint8] var tmp: int for cfi in self.cfiOffsets: if cfi.offset >= modifiedAt: newCFI = (cfi.value + offset).toTriple() self.chunk.cfi[cfi.offset] = newCFI[0] self.chunk.cfi[cfi.offset + 1] = newCFI[1] self.chunk.cfi[cfi.offset + 2] = newCFI[2] tmp = [self.chunk.cfi[cfi.offset + 3], self.chunk.cfi[cfi.offset + 4], self.chunk.cfi[cfi.offset + 5]].fromTriple().int newCFI = (tmp + offset).toTriple() self.chunk.cfi[cfi.offset + 3] = newCFI[0] self.chunk.cfi[cfi.offset + 4] = newCFI[1] self.chunk.cfi[cfi.offset + 5] = newCFI[2] cfi.fn.codePos += offset proc fixJumps(self: Compiler, oldLen: int, modifiedAt: int) = ## Fixes jump offsets after the size ## of our bytecode has changed if oldLen == self.chunk.code.len(): return let offset = self.chunk.code.len() - oldLen for jump in self.jumps.mitems(): if jump.offset >= modifiedAt: # While all already-patched jumps need # to have their jump offsets fixed, we # also need to update our internal jumps # list in cases where we shifted the jump # instruction itself into the code! jump.offset += offset if jump.patched: self.setJump(jump.offset, self.chunk.code[jump.offset..= 0: if name == self.closedOver[i]: return i dec(result) return -1 proc resolve(self: Compiler, name: string, depth: int = self.scopeDepth): Name = ## Traverses self.names backwards and returns the ## first name object with the given name. Returns ## nil when the name can't be found. This function ## has no concept of scope depth, because getStackPos ## does that job. Note that private names declared in ## other modules will not be resolved! for obj in reversed(self.names): if obj.name.token.lexeme == name: if obj.isPrivate and obj.owner != self.currentModule: continue # There may be a name in the current module that # matches, so we skip this return obj return nil proc compareTypes(self: Compiler, a, b: Type): bool = ## Compares two type objects ## for equality (works with nil!) # The nil code here is for void functions (when # we compare their return types) if a.isNil(): return b.isNil() or b.kind == Any elif b.isNil(): return a.isNil() or a.kind == Any elif a.kind == Any or b.kind == Any: # This is needed internally: user code # cannot generate code for matching # arbitrary types, but we need it for # function calls and stuff like that # since peon doesn't have return type # inference return true elif a.kind == Generic or b.kind == Generic: # Matching generic argument types return true elif a.kind != b.kind: # Next, we see the type discriminant: # If they're different, then they can't # be the same type! return false case a.kind: # If all previous checks pass, it's time # to go through each possible type peon # supports and compare it of Int8, UInt8, Int16, UInt16, Int32, UInt32, Int64, UInt64, Float32, Float64, Char, Byte, String, Nil, Nan, Bool, Inf: # A value type's type is always equal to # another one's return true of Reference, Pointer: # Here we already know that both # a and b are of either of the two # types in this branch, so we just need # to compare their values return self.compareTypes(a.value, b.value) of Function: # Functions are a bit trickier if a.args.len() != b.args.len(): return false elif not self.compareTypes(a.returnType, b.returnType): return false for (argA, argB) in zip(a.args, b.args): if not self.compareTypes(argA.kind, argB.kind): return false return true else: # TODO: Custom types discard proc toIntrinsic(name: string): Type = ## Converts a string to an intrinsic ## type if it is valid and returns nil ## otherwise if name in ["int", "int64", "i64"]: return Type(kind: Int64) elif name in ["uint64", "u64", "uint"]: return Type(kind: UInt64) elif name in ["int32", "i32"]: return Type(kind: Int32) elif name in ["uint32", "u32"]: return Type(kind: UInt32) elif name in ["int16", "i16", "short"]: return Type(kind: Int16) elif name in ["uint16", "u16"]: return Type(kind: UInt16) elif name in ["int8", "i8"]: return Type(kind: Int8) elif name in ["uint8", "u8"]: return Type(kind: UInt8) elif name in ["f64", "float", "float64"]: return Type(kind: Float64) elif name in ["f32", "float32"]: return Type(kind: Float32) elif name in ["byte", "b"]: return Type(kind: Byte) elif name in ["char", "c"]: return Type(kind: Char) elif name == "nan": return Type(kind: Nan) elif name == "nil": return Type(kind: Nil) elif name == "inf": return Type(kind: Inf) elif name == "bool": return Type(kind: Bool) elif name == "typevar": return Type(kind: Typevar) elif name == "string": return Type(kind: String) else: return nil proc inferType(self: Compiler, node: LiteralExpr): Type = ## Infers the type of a given literal expression if node.isNil(): return nil case node.kind: of intExpr, binExpr, octExpr, hexExpr: let size = node.token.lexeme.split("'") if len(size) notin 1..2: self.error("invalid state: inferValueType -> invalid size specifier (This is an internal error and most likely a bug!)") if size.len() == 1: return Type(kind: Int64) let typ = size[1].toIntrinsic() if not self.compareTypes(typ, nil): return typ else: self.error(&"invalid type specifier '{size[1]}' for int") of floatExpr: let size = node.token.lexeme.split("'") if len(size) notin 1..2: self.error("invalid state: inferValueType -> invalid size specifier (This is an internal error and most likely a bug!)") if size.len() == 1 or size[1] == "f64": return Type(kind: Float64) let typ = size[1].toIntrinsic() if not self.compareTypes(typ, nil): return typ else: self.error(&"invalid type specifier '{size[1]}' for float") of nilExpr: return Type(kind: Nil) of trueExpr: return Type(kind: Bool) of falseExpr: return Type(kind: Bool) of nanExpr: return Type(kind: TypeKind.Nan) of infExpr: return Type(kind: TypeKind.Inf) of strExpr: return Type(kind: String) else: discard # TODO proc matchImpl(self: Compiler, name: string, kind: Type): Name proc inferType(self: Compiler, node: Expression): Type = ## Infers the type of a given expression and ## returns it if node.isNil(): return nil case node.kind: of identExpr: let node = IdentExpr(node) let name = self.resolve(node) if not name.isNil(): result = name.valueType else: result = node.name.lexeme.toIntrinsic() of unaryExpr: let node = UnaryExpr(node) return self.matchImpl(node.operator.lexeme, Type(kind: Function, returnType: Type(kind: Any), args: @[("", self.inferType(node.a))])).valueType.returnType of binaryExpr: let node = BinaryExpr(node) return self.matchImpl(node.operator.lexeme, Type(kind: Function, returnType: Type(kind: Any), args: @[("", self.inferType(node.a)), ("", self.inferType(node.b))])).valueType.returnType of {intExpr, hexExpr, binExpr, octExpr, strExpr, falseExpr, trueExpr, infExpr, nanExpr, floatExpr, nilExpr }: return self.inferType(LiteralExpr(node)) of lambdaExpr: var node = LambdaExpr(node) result = Type(kind: Function, returnType: nil, args: @[], isLambda: true) if not node.returnType.isNil(): result.returnType = self.inferType(node.returnType) for argument in node.arguments: result.args.add((argument.name.token.lexeme, self.inferType(argument.valueType))) of callExpr: var node = CallExpr(node) case node.callee.kind: of identExpr: let resolved = self.resolve(IdentExpr(node.callee)) if not resolved.isNil(): result = resolved.valueType.returnType else: result = nil of lambdaExpr: result = self.inferType(LambdaExpr(node.callee).returnType) of callExpr: result = self.inferType(CallExpr(node.callee)) if not result.isNil(): result = result.returnType else: discard # Unreachable of varExpr: result = self.inferType(Var(node).value) result.mutable = true of refExpr: result = Type(kind: Reference, value: self.inferType(Ref(node).value)) of ptrExpr: result = Type(kind: Pointer, value: self.inferType(Ptr(node).value)) of groupingExpr: result = self.inferType(GroupingExpr(node).expression) else: discard # Unreachable proc inferType(self: Compiler, node: Declaration, strictMutable: bool = true): Type = ## Infers the type of a given declaration ## and returns it if node.isNil(): return nil case node.kind: of NodeKind.funDecl: var node = FunDecl(node) let resolved = self.resolve(node.name) if not resolved.isNil(): return resolved.valueType of NodeKind.varDecl: var node = VarDecl(node) let resolved = self.resolve(node.name) if not resolved.isNil(): return resolved.valueType else: return self.inferType(node.value, strictMutable) else: return # Unreachable proc typeToStr(self: Compiler, typ: Type): string = ## Returns the string representation of a ## type object if typ.isNil(): return "nil" case typ.kind: of Int8, UInt8, Int16, UInt16, Int32, UInt32, Int64, UInt64, Float32, Float64, Char, Byte, String, Nil, TypeKind.Nan, Bool, TypeKind.Inf: result &= ($typ.kind).toLowerAscii() of Pointer: result &= &"ptr {self.typeToStr(typ.value)}" of Reference: result &= &"ref {self.typeToStr(typ.value)}" of Function: result &= "fn (" for i, (argName, argType) in typ.args: result &= &"{argName}: " if argType.mutable: result &= "var " result &= self.typeToStr(argType) if i < typ.args.len() - 1: result &= ", " result &= ")" if not typ.returnType.isNil(): result &= &": {self.typeToStr(typ.returnType)}" of Generic: result = typ.node.name.lexeme else: discard proc findByName(self: Compiler, name: string): seq[Name] = ## Looks for objects that have been already declared ## with the given name. Returns all objects that apply for obj in reversed(self.names): if obj.name.token.lexeme == name: if obj.isPrivate and obj.owner != self.currentModule: continue result.add(obj) proc findByType(self: Compiler, name: string, kind: Type, depth: int = -1): seq[Name] = ## Looks for objects that have already been declared ## with the given name and type. If depth is not -1, ## it also compares the name's scope depth for obj in self.findByName(name): if self.compareTypes(obj.valueType, kind) and depth == -1 or depth == obj.depth: result.add(obj) proc findAtDepth(self: Compiler, name: string, depth: int): seq[Name] {.used.} = ## Looks for objects that have been already declared ## with the given name at the given scope depth. ## Returns all objects that apply for obj in self.findByName(name): if obj.depth == depth: result.add(obj) proc matchImpl(self: Compiler, name: string, kind: Type): Name = ## Tries to find a matching function implementation ## compatible with the given type and returns its ## name object let impl = self.findByType(name, kind) if impl.len() == 0: var msg = &"cannot find a suitable implementation for '{name}'" let names = self.findByName(name) if names.len() > 0: msg &= &", found {len(names)} candidate" if names.len() > 1: msg &= "s" msg &= ": " for name in names: msg &= &"\n - in module '{name.owner}' at line {name.name.token.line} of type '{self.typeToStr(name.valueType)}'" if name.valueType.kind != Function: msg &= ", not a callable" elif kind.args.len() != name.valueType.args.len(): msg &= &", wrong number of arguments ({name.valueType.args.len()} expected, got {kind.args.len()})" else: for i, arg in kind.args: if name.valueType.args[i].kind.mutable and not arg.kind.mutable: msg &= &", first mismatch at position {i + 1}: {name.valueType.args[i].name} is immutable, not 'var'" break elif not self.compareTypes(arg.kind, name.valueType.args[i].kind): msg &= &", first mismatch at position {i + 1}: expected argument of type '{self.typeToStr(name.valueType.args[i].kind)}', got '{self.typeToStr(arg.kind)}' instead" break self.error(msg) elif impl.len() > 1: var msg = &"multiple matching implementations of '{name}' found:\n" for fn in reversed(impl): msg &= &"- '{fn.name.token.lexeme}' at line {fn.line} of type {self.typeToStr(fn.valueType)}\n" self.error(msg) return impl[0] proc check(self: Compiler, term: Expression, kind: Type) = ## Checks the type of term against a known type. ## Raises an error if appropriate and returns ## otherwise let k = self.inferType(term) if k.isNil(): if term.kind == identExpr: self.error(&"reference to undeclared name '{term.token.lexeme}'", term) elif term.kind == callExpr and CallExpr(term).callee.kind == identExpr: self.error(&"call to undeclared function '{CallExpr(term).callee.token.lexeme}'", term) self.error(&"expecting value of type '{self.typeToStr(kind)}', but expression has no type", term) elif not self.compareTypes(k, kind): self.error(&"expecting value of type '{self.typeToStr(kind)}', got '{self.typeToStr(k)}' instead", term) proc handleBuiltinFunction(self: Compiler, fn: Name, args: seq[Expression]) = ## Emits instructions for builtin functions ## such as addition or subtraction if fn.valueType.builtinOp notin ["LogicalOr", "LogicalAnd"]: if len(args) == 2: self.expression(args[1]) self.expression(args[0]) elif len(args) == 1: self.expression(args[0]) const codes: Table[string, OpCode] = {"Negate": Negate, "NegateFloat32": NegateFloat32, "NegateFloat64": NegateFloat64, "Add": Add, "Subtract": Subtract, "Divide": Divide, "Multiply": Multiply, "SignedDivide": SignedDivide, "AddFloat64": AddFloat64, "SubtractFloat64": SubtractFloat64, "DivideFloat64": DivideFloat64, "MultiplyFloat64": MultiplyFloat64, "AddFloat32": AddFloat32, "SubtractFloat32": SubtractFloat32, "DivideFloat32": DivideFloat32, "MultiplyFloat32": MultiplyFloat32, "Pow": Pow, "SignedPow": SignedPow, "PowFloat32": PowFloat32, "PowFloat64": PowFloat64, "Mod": Mod, "SignedMod": SignedMod, "ModFloat32": ModFloat32, "ModFloat64": ModFloat64, "Or": Or, "And": And, "Xor": Xor, "Not": Not, "LShift": LShift, "RShift": RShift, "Equal": Equal, "NotEqual": NotEqual, "LessThan": LessThan, "GreaterThan": GreaterThan, "LessOrEqual": LessOrEqual, "GreaterOrEqual": GreaterOrEqual, "PrintInt64": PrintInt64, "PrintUInt64": PrintUInt64, "PrintInt32": PrintInt32, "PrintUInt32": PrintUInt32, "PrintInt16": PrintInt16, "PrintUInt16": PrintUInt16, "PrintInt8": PrintInt8, "PrintUInt8": PrintUInt8, "PrintFloat64": PrintFloat64, "PrintFloat32": PrintFloat32, "PrintHex": PrintHex, "PrintBool": PrintBool, "PrintNan": PrintNan, "PrintInf": PrintInf, "PrintString": PrintString, "SysClock64": SysClock64 }.to_table() if fn.valueType.builtinOp in codes: self.emitByte(codes[fn.valueType.builtinOp], fn.line) return # Some builtin operations are slightly more complex # so we handle them separately case fn.valueType.builtinOp: of "LogicalOr": self.expression(args[0]) let jump = self.emitJump(JumpIfTrue, fn.line) self.expression(args[1]) self.patchJump(jump) of "LogicalAnd": self.expression(args[0]) var jump = self.emitJump(JumpIfFalseOrPop, fn.line) self.expression(args[1]) self.patchJump(jump) else: self.error(&"unknown built-in: '{fn.valueType.builtinOp}'", fn.valueType.fun) proc emitFunction(self: Compiler, name: Name) = ## Wrapper to emit LoadFunction instructions if name.isFunDecl: self.emitByte(LoadInt64, name.line) self.emitBytes(self.chunk.writeConstant(name.codePos.toLong()), name.line) # If we're not loading a statically declared # function, then it must be a function object # created by previous LoadFunction instructions # that is now bound to some variable, so we just # load it elif self.scopeDepth > 0 and name.depth != self.scopeDepth: self.emitByte(LoadVar, name.line) self.emitBytes(self.getStackPos(name).toTriple(), name.line) else: self.emitByte(LoadClosure, name.line) self.emitBytes(self.getClosurePos(name).toTriple(), name.line) proc generateCall(self: Compiler, fn: Name, args: seq[Expression], onStack: bool = false) = ## Small wrapper that abstracts emitting a call instruction ## for a given function if fn.valueType.isBuiltinFunction: # Builtins map to individual instructions # (usually 1, but some use more) so we handle # them differently self.handleBuiltinFunction(fn, args) return if not onStack: # If we're not calling a function # whose instruction pointer's is # already on the stack, we emit it self.emitFunction(fn) # We initially emit a dummy return # address. It is patched later self.emitByte(LoadUInt32, fn.line) self.emitBytes(self.chunk.writeConstant(0.toQuad()), fn.line) let pos = self.chunk.consts.len() - 4 for i, argument in reversed(args): # We pass the arguments in reverse # because of how stacks work. They'll # be reversed again at runtime if onStack: self.check(argument, fn.valueType.args[^i].kind) self.expression(argument) # Creates a new call frame and jumps # to the function's first instruction # in the code if not fn.valueType.isClosure: self.emitByte(Call, fn.line) else: self.emitByte(CallClosure, fn.line) self.emitBytes(fn.valueType.args.len().toTriple(), fn.line) if fn.valueType.isClosure: self.emitBytes(fn.valueType.envLen.toTriple(), fn.line) self.patchReturnAddress(pos) proc checkCallIsPure(self: Compiler, node: ASTnode): bool = ## Checks if a call has any side effects. Returns ## true if it doesn't and false otherwise return true # TODO proc beginScope(self: Compiler) = ## Begins a new local scope by incrementing the current ## scope's depth inc(self.scopeDepth) proc `$`(self: Type): string = $self[] proc flattenImpl(self: Type, to: var seq[Type]) = to.add(self) for child in self.children: flattenImpl(child, to) proc flatten(self: Type): seq[Type] = flattenImpl(self, result) proc endScope(self: Compiler) = ## Ends the current local scope if self.scopeDepth < 0: self.error("cannot call endScope with scopeDepth < 0 (This is an internal error and most likely a bug)") dec(self.scopeDepth) var names: seq[Name] = @[] var popCount = 0 for name in self.names: if name.depth > self.scopeDepth: names.add(name) if name.valueType.kind notin {Generic, CustomType} and not name.isFunDecl: # We don't increase the pop count for these kinds of objects # because they're not stored the same way as regular variables inc(popCount) if name.isFunDecl and not name.valueType.isClosure and name.valueType.children.len() > 0 and name.depth == 0: # If a function at the top level contains any closures, # when it goes out of scope all of the environments that # belong to its inner functions also go out of # scope var i = 0 let f = name.valueType for fn in flatten(f): if fn.isClosure: for y in 0..f.envLen: self.closedOver.delete(y + i) self.emitByte(PopClosure, self.peek().token.line) self.emitBytes((y + i).toTriple(), self.peek().token.line) inc(i) if popCount > 1: # If we're popping less than 65535 variables, then # we can emit a PopN instruction. This is true for # 99.99999% of the use cases of the language (who the # hell is going to use 65 THOUSAND variables?), but # if you'll ever use more then Peon will emit a PopN instruction # for the first 65 thousand and change local variables and then # emit another batch of plain ol' Pop instructions for the rest self.emitByte(PopN, self.peek().token.line) self.emitBytes(popCount.toDouble(), self.peek().token.line) if popCount > uint16.high().int(): for i in countdown(self.names.high(), popCount - uint16.high().int()): if self.names[i].depth > self.scopeDepth: self.emitByte(PopC, self.peek().token.line) elif popCount == 1: # We only emit PopN if we're popping more than one value self.emitByte(PopC, self.peek().token.line) # This seems *really* slow, but # what else should I do? Nim doesn't # allow the removal of items during # seq iteration so ¯\_(ツ)_/¯ var idx = 0 while idx < self.names.len(): for name in names: if self.names[idx] == name: self.names.delete(idx) inc(idx) proc declareName(self: Compiler, node: Declaration, mutable: bool = false) = ## Statically declares a name into the current scope. ## "Declaring" a name only means updating our internal ## list of identifiers so that further calls to resolve() ## correctly return them. There is no code to actually ## declare a variable at runtime: the value is already ## on the stack case node.kind: of NodeKind.varDecl: var node = VarDecl(node) # Creates a new Name entry so that self.identifier emits the proper stack offset if self.names.high() > 16777215: # If someone ever hits this limit in real-world scenarios, I swear I'll # slap myself 100 times with a sign saying "I'm dumb". Mark my words self.error("cannot declare more than 16777215 variables at a time") for name in self.findByName(node.name.token.lexeme): if name.depth == self.scopeDepth and not name.isFunctionArgument: # Trying to redeclare a variable in the same scope/context is an error, but it's okay # if it's a function argument (for example, if you want to copy a number to # mutate it) self.error(&"attempt to redeclare '{node.name.token.lexeme}', which was previously defined in '{name.owner}' at line {name.line}") self.names.add(Name(depth: self.scopeDepth, name: node.name, isPrivate: node.isPrivate, owner: self.currentModule, isConst: node.isConst, valueType: self.inferType(node.value), codePos: self.chunk.code.len(), isLet: node.isLet, line: node.token.line, belongsTo: self.currentFunction )) if mutable: self.names[^1].valueType.mutable = true of NodeKind.funDecl: var node = FunDecl(node) # We declare the generics before the function so we # can refer to them later for gen in node.generics: self.names.add(Name(depth: self.scopeDepth + 1, isPrivate: true, isConst: false, owner: self.currentModule, line: node.token.line, valueType: Type(kind: Generic, mutable: false, node: gen.name), name: gen.name)) self.names.add(Name(depth: self.scopeDepth, isPrivate: node.isPrivate, isConst: false, owner: self.currentModule, valueType: Type(kind: Function, name: node.name.token.lexeme, returnType: self.inferType(node.returnType), args: @[], fun: node, children: @[]), codePos: self.chunk.code.len(), name: node.name, isLet: false, line: node.token.line, isFunDecl: true, )) let fn = self.names[^1] var name: Name for argument in node.arguments: if self.names.high() > 16777215: self.error("cannot declare more than 16777215 variables at a time") # wait, no LoadVar? Yes! That's because when calling functions, # arguments will already be on the stack, so there's no need to # load them here name = Name(depth: self.scopeDepth + 1, isPrivate: true, owner: self.currentModule, isConst: false, name: argument.name, valueType: nil, codePos: 0, isLet: false, line: argument.name.token.line, isFunctionArgument: true, belongsTo: fn ) self.names.add(name) name.valueType = self.inferType(argument.valueType) # If it's still nil, it's an error! if name.valueType.isNil(): self.error(&"cannot determine the type of argument '{argument.name.token.lexeme}'", argument.name) fn.valueType.args.add((argument.name.token.lexeme, name.valueType)) else: discard # TODO: Types, enums proc emitLoop(self: Compiler, begin: int, line: int) = ## Emits a JumpBackwards instruction with the correct ## jump offset let offset = self.chunk.code.high() - begin + 4 if offset > 16777215: self.error("cannot jump more than 16777215 bytecode instructions") self.emitByte(JumpBackwards, line) self.emitBytes(offset.toTriple(), line) proc patchBreaks(self: Compiler) = ## Patches the jumps emitted by ## breakStmt. This is needed ## because the size of code ## to skip is not known before ## the loop is fully compiled for brk in self.currentLoop.breakPos: self.patchJump(brk) proc handleMagicPragma(self: Compiler, pragma: Pragma, node: ASTNode) = ## Handles the "magic" pragma. Assumes the given name is already ## declared if pragma.args.len() != 1: self.error("'magic' pragma: wrong number of arguments") elif pragma.args[0].kind != strExpr: self.error("'magic' pragma: wrong type of argument (constant string expected)") elif node.kind != NodeKind.funDecl: self.error("'magic' pragma is not valid in this context") var node = FunDecl(node) var fn = self.resolve(node.name) fn.valueType.isBuiltinFunction = true fn.valueType.builtinOp = pragma.args[0].token.lexeme[1..^2] # The magic pragma ignores the function's body node.body = nil proc handlePurePragma(self: Compiler, pragma: Pragma, node: ASTNode) = ## Handles the "pure" pragma case node.kind: of NodeKind.funDecl: FunDecl(node).isPure = true of lambdaExpr: LambdaExpr(node).isPure = true else: self.error("'pure' pragma is not valid in this context") proc dispatchPragmas(self: Compiler, node: ASTnode) = ## Dispatches pragmas bound to objects var pragmas: seq[Pragma] = @[] case node.kind: of NodeKind.funDecl, NodeKind.typeDecl, NodeKind.varDecl: pragmas = Declaration(node).pragmas of lambdaExpr: pragmas = LambdaExpr(node).pragmas else: discard # Unreachable for pragma in pragmas: if pragma.name.token.lexeme notin self.compilerProcs: self.error(&"unknown pragma '{pragma.name.token.lexeme}'") self.compilerProcs[pragma.name.token.lexeme](self, pragma, node) proc fixGenericFunc(self: Compiler, name: Name, args: seq[Expression]): Name = ## Specializes generic arguments in functions var fn = name.deepCopy() result = fn var typ: Type for i in 0..args.high(): if fn.valueType.args[i].kind.kind == Generic: typ = self.inferType(args[i]) fn.valueType.args[i].kind = typ self.resolve(fn.valueType.args[i].name).valueType = typ if fn.valueType.args[i].kind.isNil(): self.error(&"cannot specialize generic function: argument {i + 1} has no type") proc patchReturnAddress(self: Compiler, pos: int) = ## Patches the return address of a function ## call let address = self.chunk.code.len().toQuad() self.chunk.consts[pos] = address[0] self.chunk.consts[pos + 1] = address[1] self.chunk.consts[pos + 2] = address[2] self.chunk.consts[pos + 3] = address[3] proc terminateProgram(self: Compiler, pos: int, terminateScope: bool = true) = ## Utility to terminate a peon program if terminateScope: self.endScope() self.patchReturnAddress(pos + 3) self.emitByte(OpCode.Return, self.peek().token.line) self.emitByte(0, self.peek().token.line) # Entry point has no return value (TODO: Add easter eggs, cuz why not) proc beginProgram(self: Compiler, incremental: bool = false): int = ## Utility to begin a peon program ## compiled. Returns the position of ## a dummy return address of the program's ## entry point to be patched by terminateProgram # Every peon program has a hidden entry point in # which user code is wrapped. Think of it as if # peon is implicitly writing the main() function # of your program and putting all of your code in # there. While we call our entry point just like # any regular peon function, we can't use our handy # helper generateCall() because we need to keep track # of where our program ends (which we don't know yet). # To fix this, we emit dummy offsets and patch them # later, once we know the boundaries of our hidden main() var main: Name if incremental: main = self.names[0] else: main = Name(depth: 0, isPrivate: true, isConst: false, isLet: false, owner: self.currentModule, valueType: Type(kind: Function, name: "", returnType: nil, args: @[], ), codePos: 12, # Jump address is hardcoded name: newIdentExpr(Token(lexeme: "", kind: Identifier)), isFunDecl: true, line: -1) self.names.add(main) self.emitByte(LoadInt64, 1) self.emitBytes(self.chunk.writeConstant(main.codePos.toLong()), 1) self.emitByte(LoadUInt32, 1) self.emitBytes(self.chunk.writeConstant(0.toQuad()), 1) self.emitByte(Call, 1) self.emitBytes(0.toTriple(), 1) result = 5 ## End of utility functions proc literal(self: Compiler, node: ASTNode) = ## Emits instructions for literals such ## as singletons, strings and numbers case node.kind: of trueExpr: self.emitByte(LoadTrue, node.token.line) of falseExpr: self.emitByte(LoadFalse, node.token.line) of nilExpr: self.emitByte(LoadNil, node.token.line) of infExpr: self.emitByte(LoadInf, node.token.line) of nanExpr: self.emitByte(LoadNan, node.token.line) of strExpr: self.emitConstant(LiteralExpr(node), Type(kind: String)) of intExpr: let y = IntExpr(node) let kind = self.inferType(y) if kind.kind in [Int64, Int32, Int16, Int8]: var x: int try: discard parseInt(y.literal.lexeme, x) except ValueError: self.error("integer value out of range") else: var x: uint64 try: discard parseBiggestUInt(y.literal.lexeme, x) except ValueError: self.error("integer value out of range") self.emitConstant(y, kind) of hexExpr: var x: int var y = HexExpr(node) try: discard parseHex(y.literal.lexeme, x) except ValueError: self.error("integer value out of range") let node = newIntExpr(Token(lexeme: $x, line: y.token.line, pos: (start: y.token.pos.start, stop: y.token.pos.start + len($x)) ) ) self.emitConstant(node, self.inferType(y)) of binExpr: var x: int var y = BinExpr(node) try: discard parseBin(y.literal.lexeme, x) except ValueError: self.error("integer value out of range") let node = newIntExpr(Token(lexeme: $x, line: y.token.line, pos: (start: y.token.pos.start, stop: y.token.pos.start + len($x)) ) ) self.emitConstant(node, self.inferType(y)) of octExpr: var x: int var y = OctExpr(node) try: discard parseOct(y.literal.lexeme, x) except ValueError: self.error("integer value out of range") let node = newIntExpr(Token(lexeme: $x, line: y.token.line, pos: (start: y.token.pos.start, stop: y.token.pos.start + len($x)) ) ) self.emitConstant(node, self.inferType(y)) of floatExpr: var x: float var y = FloatExpr(node) try: discard parseFloat(y.literal.lexeme, x) except ValueError: self.error("floating point value out of range") self.emitConstant(y, self.inferType(y)) of awaitExpr: var y = AwaitExpr(node) self.expression(y.expression) self.emitByte(OpCode.Await, node.token.line) else: self.error(&"invalid AST node of kind {node.kind} at literal(): {node} (This is an internal error and most likely a bug!)") proc callUnaryOp(self: Compiler, fn: Name, op: UnaryExpr) = ## Emits the code to call a unary operator self.generateCall(fn, @[op.a]) proc callBinaryOp(self: Compiler, fn: Name, op: BinaryExpr) = ## Emits the code to call a binary operator self.generateCall(fn, @[op.a, op.b]) proc unary(self: Compiler, node: UnaryExpr) = ## Compiles unary expressions such as decimal ## and bitwise negation let valueType = self.inferType(node.a) let funct = self.matchImpl(node.token.lexeme, Type(kind: Function, returnType: Type(kind: Any), args: @[("", valueType)])) self.callUnaryOp(funct, node) proc binary(self: Compiler, node: BinaryExpr) = ## Compiles all binary expressions let typeOfA = self.inferType(node.a) let typeOfB = self.inferType(node.b) let funct = self.matchImpl(node.token.lexeme, Type(kind: Function, returnType: Type(kind: Any), args: @[("", typeOfA), ("", typeOfB)])) self.callBinaryOp(funct, node) proc identifier(self: Compiler, node: IdentExpr) = ## Compiles access to identifiers var s = self.resolve(node) if s.isNil(): self.error(&"reference to undeclared name '{node.token.lexeme}'") elif s.isConst: # Constants are always emitted as Load* instructions # no matter the scope depth self.emitConstant(node, self.inferType(node)) else: if s.valueType.kind == Function and s.isFunDecl: # Functions have no runtime # representation, so we need # to create one on the fly self.emitByte(LoadInt64, node.token.line) self.emitBytes(self.chunk.writeConstant(s.codePos.toLong()), node.token.line) elif self.scopeDepth > 0 and not self.currentFunction.isNil() and s.depth != self.scopeDepth: # Loads a closure variable. Stored in a separate "closure array" in the VM that does not # align its semantics with the call stack. This makes closures work as expected and is # not much slower than indexing our stack (since they're both dynamic arrays at runtime anyway) if not s.isClosedOver: self.currentFunction.valueType.isClosure = true self.currentFunction.valueType.envLen += 1 self.closedOver.add(s) let stackIdx = self.getStackPos(s).toTriple() let closeIdx = self.getClosurePos(s).toTriple() let oldLen = self.chunk.code.len() self.chunk.code.insert(StoreClosure.uint8, s.belongsTo.codePos) self.chunk.code.insert(stackIdx[0], s.belongsTo.codePos + 1) self.chunk.code.insert(stackIdx[1], s.belongsTo.codePos + 2) self.chunk.code.insert(stackIdx[2], s.belongsTo.codePos + 3) self.chunk.code.insert(closeIdx[0], s.belongsTo.codePos + 4) self.chunk.code.insert(closeIdx[1], s.belongsTo.codePos + 5) self.chunk.code.insert(closeIdx[2], s.belongsTo.codePos + 6) self.chunk.lines[self.chunk.getIdx(self.chunk.getLine(s.belongsTo.codePos)) + 1] += 7 self.fixJumps(oldLen, s.belongsTo.codePos) self.fixCFIOffsets(oldLen, s.belongsTo.codePos) self.emitByte(LoadClosure, node.token.line) self.emitBytes(self.getClosurePos(s).toTriple(), node.token.line) else: # Static name resolution, loads value at index in the stack. Very fast. Much wow. self.emitByte(LoadVar, node.token.line) # No need to check for -1 here: we already did a nil-check above! self.emitBytes(self.getStackPos(s).toTriple(), node.token.line) proc assignment(self: Compiler, node: ASTNode) = ## Compiles assignment expressions case node.kind: of assignExpr: let node = AssignExpr(node) let name = IdentExpr(node.name) var r = self.resolve(name) if r.isNil(): self.error(&"assignment to undeclared name '{name.token.lexeme}'", name) elif r.isConst: self.error(&"cannot assign to '{name.token.lexeme}' (constant)", name) elif r.isLet: self.error(&"cannot reassign '{name.token.lexeme}'", name) self.expression(node.value) if self.scopeDepth > 0 and r.depth != self.scopeDepth: self.emitByte(StoreVar, node.token.line) self.emitBytes(self.getStackPos(r).toTriple(), node.token.line) else: # Loads a closure variable. Stored in a separate "closure array" in the VM that does not # align its semantics with the call stack. This makes closures work as expected and is # not much slower than indexing our stack (since they're both dynamic arrays at runtime anyway) self.emitByte(StoreClosure, node.token.line) self.emitBytes(self.getClosurePos(r).toTriple(), node.token.line) of setItemExpr: let node = SetItemExpr(node) let typ = self.inferType(node) if typ.isNil(): self.error(&"cannot determine the type of '{node.name.token.lexeme}'") # TODO else: self.error(&"invalid AST node of kind {node.kind} at assignment(): {node} (This is an internal error and most likely a bug)") proc blockStmt(self: Compiler, node: BlockStmt) = ## Compiles block statements, which create a new ## local scope self.beginScope() for decl in node.code: self.declaration(decl) self.endScope() proc ifStmt(self: Compiler, node: IfStmt) = ## Compiles if/else statements for conditional ## execution of code self.check(node.condition, Type(kind: Bool)) self.expression(node.condition) let jump = self.emitJump(JumpIfFalsePop, node.token.line) self.statement(node.thenBranch) let jump2 = self.emitJump(JumpForwards, node.token.line) self.patchJump(jump) if not node.elseBranch.isNil(): self.statement(node.elseBranch) self.patchJump(jump2) proc whileStmt(self: Compiler, node: WhileStmt) = ## Compiles C-style while loops and ## desugared C-style for loops self.check(node.condition, Type(kind: Bool)) let start = self.chunk.code.high() self.expression(node.condition) let jump = self.emitJump(JumpIfFalsePop, node.token.line) self.statement(node.body) self.emitLoop(start, node.token.line) self.patchJump(jump) proc callExpr(self: Compiler, node: CallExpr): Name {.discardable.} = ## Compiles code to call a chain of function calls var args: seq[tuple[name: string, kind: Type]] = @[] var argExpr: seq[Expression] = @[] var kind: Type if not self.checkCallIsPure(node.callee): if self.currentFunction.valueType.name != "": self.error(&"cannot make sure that calls to '{self.currentFunction.valueType.name}' are side-effect free") else: self.error(&"cannot make sure that call is side-effect free") # TODO: Keyword arguments for i, argument in node.arguments.positionals: kind = self.inferType(argument) if kind.isNil(): if argument.kind == identExpr: self.error(&"reference to undeclared name '{IdentExpr(argument).name.lexeme}'") self.error(&"cannot infer the type of argument {i + 1} in function call") args.add(("", kind)) argExpr.add(argument) for argument in node.arguments.keyword: # TODO discard if args.len() >= 16777216: self.error(&"cannot pass more than 16777215 arguments") case node.callee.kind: of identExpr: # Calls like hi() result = self.matchImpl(IdentExpr(node.callee).name.lexeme, Type(kind: Function, returnType: Type(kind: Any), args: args)) self.generateCall(result, argExpr) of NodeKind.callExpr: # Calling a call expression, like hello()() var node: Expression = node var all: seq[CallExpr] = @[] while CallExpr(node).callee.kind == callExpr: all.add(CallExpr(CallExpr(node).callee)) node = CallExpr(node).callee for exp in reversed(all): self.callExpr(exp) # TODO self.generateCall(result, argExpr, onStack=true) # TODO: Calling lambdas on-the-fly (i.e. on the same line) else: let typ = self.inferType(node) if typ.isNil(): self.error(&"expression has no type") else: self.error(&"object of type '{self.typeToStr(typ)}' is not callable") proc expression(self: Compiler, node: Expression) = ## Compiles all expressions case node.kind: of NodeKind.callExpr: self.callExpr(CallExpr(node)) # TODO of getItemExpr: discard # TODO: Get rid of this of pragmaExpr: discard # TODO # Note that for setItem and assign we don't convert # the node to its true type because that type information # would be lost in the call anyway. The differentiation # happens in self.assignment() of setItemExpr, assignExpr: # TODO: Get rid of this self.assignment(node) of identExpr: self.identifier(IdentExpr(node)) of unaryExpr: # Unary expressions such as ~5 and -3 self.unary(UnaryExpr(node)) of groupingExpr: # Grouping expressions like (2 + 1) self.expression(GroupingExpr(node).expression) of binaryExpr: # Binary expressions such as 2 ^ 5 and 0.66 * 3.14 self.binary(BinaryExpr(node)) of intExpr, hexExpr, binExpr, octExpr, strExpr, falseExpr, trueExpr, infExpr, nanExpr, floatExpr, nilExpr: # Since all of these AST nodes share the # same overall structure and the kind # field is enough to tell one from the # other, why bother with specialized # cases when one is enough? self.literal(node) else: self.error(&"invalid AST node of kind {node.kind} at expression(): {node} (This is an internal error and most likely a bug)") proc awaitStmt(self: Compiler, node: AwaitStmt) = ## Compiles await statements. An await statement ## is like an await expression, but parsed in the ## context of statements for usage outside expressions, ## meaning it can be used standalone. It's basically the ## same as an await expression followed by a semicolon. ## Await expressions and statements are the only native ## construct to run coroutines from within an already ## asynchronous context (which should be orchestrated ## by an event loop). They block in the caller until ## the callee returns self.expression(node.expression) self.emitByte(OpCode.Await, node.token.line) proc deferStmt(self: Compiler, node: DeferStmt) = ## Compiles defer statements. A defer statement ## is executed right before its containing function ## exits (either because of a return or an exception) var oldChunk = self.chunk var chunk = newChunk() chunk.consts = self.chunk.consts chunk.lines = self.chunk.lines chunk.cfi = self.chunk.cfi self.chunk = chunk self.expression(node.expression) for b in chunk.code: self.deferred.add(b) self.chunk = oldChunk self.chunk.code &= chunk.code self.chunk.consts &= chunk.consts self.chunk.lines &= chunk.lines self.chunk.cfi &= chunk.cfi proc returnStmt(self: Compiler, node: ReturnStmt) = ## Compiles return statements var expected = self.currentFunction.valueType.returnType self.check(node.value, expected) if not node.value.isNil(): self.expression(node.value) self.emitByte(OpCode.SetResult, node.token.line) self.emitByte(OpCode.Return, node.token.line) if not node.value.isNil(): self.emitByte(1, node.token.line) else: self.emitByte(0, node.token.line) proc yieldStmt(self: Compiler, node: YieldStmt) = ## Compiles yield statements self.expression(node.expression) self.emitByte(OpCode.Yield, node.token.line) proc raiseStmt(self: Compiler, node: RaiseStmt) = ## Compiles raise statements self.expression(node.exception) self.emitByte(OpCode.Raise, node.token.line) proc continueStmt(self: Compiler, node: ContinueStmt) = ## Compiles continue statements. A continue statement ## jumps to the next iteration in a loop if self.currentLoop.start > 16777215: self.error("too much code to jump over in continue statement") self.emitByte(Jump, node.token.line) self.emitBytes(self.currentLoop.start.toTriple(), node.token.line) proc breakStmt(self: Compiler, node: BreakStmt) = ## Compiles break statements. A break statement ## jumps to the end of the loop self.currentLoop.breakPos.add(self.emitJump(OpCode.JumpForwards, node.token.line)) if self.currentLoop.depth > self.scopeDepth: # Breaking out of a loop closes its scope self.endScope() proc assertStmt(self: Compiler, node: AssertStmt) = ## Compiles assert statements (raise ## AssertionError if the expression is falsey) self.expression(node.expression) self.emitByte(OpCode.Assert, node.token.line) proc forEachStmt(self: Compiler, node: ForEachStmt) = ## Compiles foreach loops # TODO proc importStmt(self: Compiler, node: ImportStmt) = ## Imports a module at compile time # TODO: This is obviously horrible. It's just a test let filename = node.moduleName.token.lexeme & ".pn" try: self.compileModule(filename) except IOError: self.error(&"""could not import '{filename}': {getCurrentExceptionMsg()}""") except OSError: self.error(&"""could not import '{filename}': {getCurrentExceptionMsg()} [errno {osLastError()}]""") proc statement(self: Compiler, node: Statement) = ## Compiles all statements case node.kind: of exprStmt: let expression = ExprStmt(node).expression let kind = self.inferType(expression) self.expression(expression) if kind.isNil(): # The expression has no type and produces no value, # so we don't have to pop anything discard elif self.replMode: case kind.kind: of Int64: self.emitByte(PrintInt64, node.token.line) of UInt64: self.emitByte(PrintUInt64, node.token.line) of Int32: self.emitByte(PrintInt32, node.token.line) of UInt32: self.emitByte(PrintInt32, node.token.line) of Int16: self.emitByte(PrintInt16, node.token.line) of UInt16: self.emitByte(PrintUInt16, node.token.line) of Int8: self.emitByte(PrintInt8, node.token.line) of UInt8: self.emitByte(PrintUInt8, node.token.line) of Float64: self.emitByte(PrintFloat64, node.token.line) of Float32: self.emitByte(PrintFloat32, node.token.line) of Bool: self.emitByte(PrintBool, node.token.line) of Nan: self.emitByte(PrintNan, node.token.line) of Inf: self.emitByte(PrintInf, node.token.line) of String: self.emitByte(PrintString, node.token.line) else: self.emitByte(PrintHex, node.token.line) else: self.emitByte(Pop, node.token.line) of NodeKind.ifStmt: self.ifStmt(IfStmt(node)) of NodeKind.assertStmt: self.assertStmt(AssertStmt(node)) of NodeKind.raiseStmt: self.raiseStmt(RaiseStmt(node)) of NodeKind.breakStmt: self.breakStmt(BreakStmt(node)) of NodeKind.continueStmt: self.continueStmt(ContinueStmt(node)) of NodeKind.returnStmt: self.returnStmt(ReturnStmt(node)) of NodeKind.importStmt: self.importStmt(ImportStmt(node)) of NodeKind.whileStmt: # Note: Our parser already desugars # for loops to while loops let loop = self.currentLoop self.currentLoop = Loop(start: self.chunk.code.len(), depth: self.scopeDepth, breakPos: @[]) self.whileStmt(WhileStmt(node)) self.patchBreaks() self.currentLoop = loop of NodeKind.forEachStmt: self.forEachStmt(ForEachStmt(node)) of NodeKind.blockStmt: self.blockStmt(BlockStmt(node)) of NodeKind.yieldStmt: self.yieldStmt(YieldStmt(node)) of NodeKind.awaitStmt: self.awaitStmt(AwaitStmt(node)) of NodeKind.deferStmt: self.deferStmt(DeferStmt(node)) of NodeKind.tryStmt: discard else: self.expression(Expression(node)) proc varDecl(self: Compiler, node: VarDecl) = ## Compiles variable declarations let expected = self.inferType(node.valueType) let actual = self.inferType(node.value) if expected.isNil() and actual.isNil(): if node.value.kind == identExpr or node.value.kind == callExpr and CallExpr(node.value).callee.kind == identExpr: var name = node.value.token.lexeme if node.value.kind == callExpr: name = CallExpr(node.value).callee.token.lexeme self.error(&"reference to undeclared name '{name}'") self.error(&"'{node.name.token.lexeme}' has no type") elif not expected.isNil() and expected.mutable: # I mean, variables *are* already mutable (some of them anyway) self.error(&"invalid type '{self.typeToStr(expected)}' for var") elif not self.compareTypes(expected, actual): if not expected.isNil(): self.error(&"expected value of type '{self.typeToStr(expected)}', but '{node.name.token.lexeme}' is of type '{self.typeToStr(actual)}'") self.expression(node.value) self.declareName(node, mutable=node.token.kind == TokenType.Var) self.emitByte(StoreVar, node.token.line) self.emitBytes(self.getStackPos(self.names[^1]).toTriple(), node.token.line) proc typeDecl(self: Compiler, node: TypeDecl) = ## Compiles type declarations # TODO proc funDecl(self: Compiler, node: FunDecl, fn: Name = nil, args: seq[Expression] = @[]) = ## Compiles function declarations # A function's code is just compiled linearly # and then jumped over if node.token.kind == Operator and node.token.lexeme in [".", ]: self.error(&"The '{node.token.lexeme}' cannot be overridden", node) var jmp: int self.declareName(node) self.dispatchPragmas(node) # Function's code starts after the jump var node = node var fn = if fn.isNil(): self.names[^(node.arguments.len() + 1)] else: fn # We store the current function var function = self.currentFunction if not self.currentFunction.isNil(): self.currentFunction.valueType.children.add(fn.valueType) self.currentFunction = fn var names = self.names[^(node.arguments.len())..^1] if fn.valueType.isBuiltinFunction: fn.codePos = self.chunk.code.len() # We take the arguments off of our name list # because they become temporaries on the stack. # Builtin functions (usually) map to a single # bytecode instruction to avoid unnecessary # overhead from peon's calling convention # This also means that peon's fast builtins # can only be relatively simple self.names = self.names[0..^node.arguments.len() + 1] elif not node.body.isNil(): jmp = self.emitJump(JumpForwards, node.token.line) fn.codePos = self.chunk.code.len() # We let our debugger know this function's boundaries self.chunk.cfi.add(self.chunk.code.high().toTriple()) self.cfiOffsets.add((value: self.chunk.code.high(), offset: self.chunk.cfi.high() - 2, fn: fn)) let idx = self.chunk.cfi.len() self.chunk.cfi.add(0.toTriple()) # Patched it later self.chunk.cfi.add(uint8(node.arguments.len())) if not node.name.isNil(): self.chunk.cfi.add(fn.name.token.lexeme.len().toDouble()) var s = fn.name.token.lexeme if s.len() >= uint16.high().int: s = node.name.token.lexeme[0..uint16.high()] self.chunk.cfi.add(s.toBytes()) else: self.chunk.cfi.add(0.toDouble()) if BlockStmt(node.body).code.len() == 0: self.error("cannot declare function with empty body") # Since the deferred array is a linear # sequence of instructions and we want # to keep track to whose function's each # set of deferred instruction belongs, # we record the length of the deferred # array before compiling the function # and use this info later to compile # the try/finally block with the deferred # code var deferStart = self.deferred.len() self.beginScope() for decl in BlockStmt(node.body).code: self.declaration(decl) let typ = self.currentFunction.valueType.returnType var hasVal: bool = false case self.currentFunction.valueType.fun.kind: of NodeKind.funDecl: hasVal = self.currentFunction.valueType.fun.hasExplicitReturn of NodeKind.lambdaExpr: hasVal = LambdaExpr(Declaration(self.currentFunction.valueType.fun)).hasExplicitReturn else: discard # Unreachable if not hasVal and not typ.isNil(): # There is no explicit return statement anywhere in the function's # body: while this is not a tremendously useful piece of information (since # the presence of at least one doesn't mean all control flow cases are # covered), it definitely is an error worth reporting self.error("function has an explicit return type, but no return statement was found", node) hasVal = hasVal and not typ.isNil() self.endScope() # Terminates the function's context self.emitByte(OpCode.Return, self.peek().token.line) if hasVal: self.emitByte(1, self.peek().token.line) else: self.emitByte(0, self.peek().token.line) let stop = self.chunk.code.len().toTriple() self.chunk.cfi[idx] = stop[0] self.chunk.cfi[idx + 1] = stop[1] self.chunk.cfi[idx + 2] = stop[2] # Currently defer is not functional, so we # just pop the instructions for _ in deferStart..self.deferred.high(): discard self.deferred.pop() # Well, we've compiled everything: time to patch # the jump offset self.patchJump(jmp) else: discard # TODO: Forward declarations # Restores the enclosing function (if any). # Makes nested calls work (including recursion) self.currentFunction = function proc declaration(self: Compiler, node: Declaration) = ## Compiles all declarations case node.kind: of NodeKind.varDecl: self.varDecl(VarDecl(node)) of NodeKind.funDecl: self.funDecl(FunDecl(node)) of NodeKind.typeDecl: self.typeDecl(TypeDecl(node)) else: self.statement(Statement(node)) proc compile*(self: Compiler, ast: seq[Declaration], file: string, lines: seq[tuple[start, stop: int]], source: string, chunk: Chunk = nil, terminateScope: bool = true, incremental: bool = false): Chunk = ## Compiles a sequence of AST nodes into a chunk ## object if chunk.isNil(): self.chunk = newChunk() else: self.chunk = chunk self.ast = ast self.file = file var terminateScope = terminateScope if incremental: terminateScope = false self.scopeDepth = 0 self.currentFunction = nil self.currentModule = self.file.extractFilename() self.current = 0 self.lines = lines self.source = source self.jumps = @[] let pos = self.beginProgram(incremental) if incremental and self.replMode: discard self.chunk.code.pop() discard self.chunk.code.pop() while not self.done(): self.declaration(Declaration(self.step())) self.terminateProgram(pos, terminateScope) result = self.chunk if incremental and not self.replMode: discard self.chunk.code.pop() discard self.chunk.code.pop() proc compileModule(self: Compiler, filename: string) = ## Compiles an imported module into an existing chunk. ## A temporary compiler object is initialized internally let path = joinPath(splitPath(self.file).head, filename) if self.modules.contains(path): return var lexer = newLexer() var parser = newParser() var compiler = newCompiler() lexer.fillSymbolTable() let source = readFile(path) let tokens = lexer.lex(source, filename) let ast = parser.parse(tokens, filename, lexer.getLines(), source) compiler.names.add(self.names[0]) discard compiler.compile(ast, filename, lexer.getLines(), source, chunk=self.chunk, incremental=true) for name in compiler.names: if name.owner in self.modules: continue self.names.add(name) self.modules.incl(path) self.closedOver &= compiler.closedOver