# Copyright 2022 Mattia Giambirtone & All Contributors # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import meta/token import meta/ast import meta/errors import ../config import ../util/multibyte import lexer as l import parser as p import tables import strformat import algorithm import parseutils import strutils import sequtils import os export ast export token export multibyte type TypeKind = enum ## An enumeration of compile-time ## types Int8, UInt8, Int16, UInt16, Int32, UInt32, Int64, UInt64, Float32, Float64, Char, Byte, String, Function, CustomType, Nil, Nan, Bool, Inf, Typevar, Generic, Reference, Pointer Any # Any is used internally in a few cases, # for example when looking for operators # when only the type of the arguments is of # interest Type = ref object ## A wrapper around ## compile-time types mutable: bool case kind: TypeKind: of Function: name: string isLambda: bool isGenerator: bool isCoroutine: bool args: seq[tuple[name: string, kind: Type]] returnType: Type isBuiltinFunction: bool builtinOp: string fun: FunDecl of Reference, Pointer: value: Type of Generic: node: IdentExpr else: discard # This way we don't have recursive dependency issues import meta/bytecode export bytecode type Name = ref object ## A compile-time wrapper around ## statically resolved names # Name of the identifier name: IdentExpr # Owner of the identifier (module) owner: string # Scope depth depth: int # Is this name private? isPrivate: bool # Is this a constant? isConst: bool # Can this name's value be mutated? isLet: bool # The name's type valueType: Type # For functions, this marks where the function's # code begins. For variables, this stores where # their StoreVar/StoreHeap instruction was emitted codePos: int # Is the name closed over (i.e. used in a closure)? isClosedOver: bool # Is this a function argument? isFunctionArgument: bool # Where is this node declared in the file? line: int # Is this a function declaration or a variable # with a function as value? (The distinction *is* # important! Check emitFunction()) isFunDecl: bool Loop = object ## A "loop object" used ## by the compiler to emit ## appropriate jump offsets ## for continue and break ## statements # Position in the bytecode where the loop starts start: int # Scope depth where the loop is located depth: int # Absolute jump offsets into our bytecode that we need to # patch. Used for break statements breakPos: seq[int] Compiler* = ref object ## A wrapper around the Peon compiler's state # The bytecode chunk where we write code to chunk: Chunk # The output of our parser (AST) ast: seq[Declaration] # The current AST node we're looking at current: int # The current file being compiled (used only for # error reporting) file: string # Compile-time "simulation" of the stack at # runtime to load variables that have stack # behavior more efficiently names: seq[Name] # The current scope depth. If > 0, we're # in a local scope, otherwise it's global scopeDepth: int # The current function being compiled currentFunction: Type # Are optimizations turned on? enableOptimizations: bool # The current loop being compiled (used to # keep track of where to jump) currentLoop: Loop # Are we in REPL mode? If so, Pop instructions # for expression statements at the top level are # swapped for a special PopRepl instruction that # prints the result of the expression once it is # evaluated replMode: bool # The current module being compiled # (used to restrict access to statically # defined variables at compile time) currentModule: string # Each time a defer statement is # compiled, its code is emitted # here. Later, if there is any code # to defer in the current function, # funDecl will wrap the function's code # inside an implicit try/finally block # and add this code in the finally branch. # This sequence is emptied each time a # function declaration is compiled and stores only # deferred code for the current function (may # be empty) deferred: seq[uint8] # List of closed-over variables closedOver: seq[Name] # Keeps track of stack frames frames: seq[int] # Compiler procedures called by pragmas compilerProcs: TableRef[string, proc (self: Compiler, pragma: Pragma, node: ASTNode)] ## Forward declarations proc compile*(self: Compiler, ast: seq[Declaration], file: string): Chunk proc expression(self: Compiler, node: Expression) proc statement(self: Compiler, node: Statement) proc declaration(self: Compiler, node: Declaration) proc peek(self: Compiler, distance: int = 0): ASTNode proc identifier(self: Compiler, node: IdentExpr) proc varDecl(self: Compiler, node: VarDecl) proc inferType(self: Compiler, node: LiteralExpr): Type proc inferType(self: Compiler, node: Expression): Type proc findByName(self: Compiler, name: string): seq[Name] proc findByType(self: Compiler, name: string, kind: Type): seq[Name] proc compareTypes(self: Compiler, a, b: Type): bool proc patchReturnAddress(self: Compiler, pos: int) proc handleMagicPragma(self: Compiler, pragma: Pragma, node: ASTnode) proc handlePurePragma(self: Compiler, pragma: Pragma, node: ASTnode) proc dispatchPragmas(self: Compiler, node: ASTnode) proc funDecl(self: Compiler, node: FunDecl, fn: Name = nil, args: seq[Expression] = @[]) ## End of forward declarations proc newCompiler*(enableOptimizations: bool = true, replMode: bool = false): Compiler = ## Initializes a new Compiler object new(result) result.ast = @[] result.current = 0 result.file = "" result.names = @[] result.scopeDepth = 0 result.currentFunction = nil result.enableOptimizations = enableOptimizations result.replMode = replMode result.currentModule = "" result.compilerProcs = newTable[string, proc (self: Compiler, pragma: Pragma, node: ASTNode)]() result.compilerProcs["magic"] = handleMagicPragma result.compilerProcs["pure"] = handlePurePragma ## Public getter for nicer error formatting proc getCurrentNode*(self: Compiler): ASTNode = (if self.current >= self.ast.len(): self.ast[^1] else: self.ast[self.current - 1]) proc getCurrentFunction*(self: Compiler): Declaration {.inline.} = (if self.currentFunction.isNil(): nil else: self.currentFunction.fun) proc getFile*(self: Compiler): string {.inline.} = self.file proc getModule*(self: Compiler): string {.inline.} = self.currentModule ## Utility functions proc peek(self: Compiler, distance: int = 0): ASTNode = ## Peeks at the AST node at the given distance. ## If the distance is out of bounds, the last ## AST node in the tree is returned. A negative ## distance may be used to retrieve previously ## consumed AST nodes if self.ast.high() == -1 or self.current + distance > self.ast.high() or self.current + distance < 0: result = self.ast[^1] else: result = self.ast[self.current + distance] proc done(self: Compiler): bool {.inline.} = ## Returns true if the compiler is done ## compiling, false otherwise result = self.current > self.ast.high() proc error(self: Compiler, message: string) {.raises: [CompileError], inline.} = ## Raises a CompileError exception raise CompileError(msg: message, node: self.getCurrentNode(), file: self.file, module: self.currentModule) proc step(self: Compiler): ASTNode {.inline.} = ## Steps to the next node and returns ## the consumed one result = self.peek() if not self.done(): self.current += 1 proc emitByte(self: Compiler, byt: OpCode | uint8) {.inline.} = ## Emits a single byte, writing it to ## the current chunk being compiled when DEBUG_TRACE_COMPILER: echo &"DEBUG - Compiler: Emitting {$byt}" self.chunk.write(uint8 byt, self.peek().token.line) proc emitBytes(self: Compiler, bytarr: openarray[OpCode | uint8]) {.inline.} = ## Handy helper method to write arbitrary bytes into ## the current chunk, calling emitByte on each of its ## elements for b in bytarr: self.emitByte(b) proc makeConstant(self: Compiler, val: Expression, typ: Type): array[3, uint8] = ## Adds a constant to the current chunk's constant table ## and returns its index as a 3-byte array of uint8s var v: int discard parseInt(val.token.lexeme, v) case typ.kind: of UInt8, Int8: result = self.chunk.writeConstant([uint8(v)]) of Int16, UInt16: result = self.chunk.writeConstant(v.toDouble()) of Int32, UInt32: result = self.chunk.writeConstant(v.toQuad()) of Int64, UInt64: result = self.chunk.writeConstant(v.toLong()) of String: result = self.chunk.writeConstant(v.toBytes()) of Float32: var f: float = 0.0 discard parseFloat(val.token.lexeme, f) result = self.chunk.writeConstant(cast[array[4, uint8]](float32(f))) of Float64: var f: float = 0.0 discard parseFloat(val.token.lexeme, f) result = self.chunk.writeConstant(cast[array[8, uint8]](f)) else: discard proc emitConstant(self: Compiler, obj: Expression, kind: Type) = ## Emits a constant instruction along ## with its operand case kind.kind: of Int64: self.emitByte(LoadInt64) of UInt64: self.emitByte(LoadUInt64) of Int32: self.emitByte(LoadInt32) of UInt32: self.emitByte(LoadUInt32) of Int16: self.emitByte(LoadInt16) of UInt16: self.emitByte(LoadUInt16) of Int8: self.emitByte(LoadInt8) of UInt8: self.emitByte(LoadUInt8) of String: self.emitByte(LoadString) let str = LiteralExpr(obj).literal.lexeme if str.len() >= 16777216: self.error("string constants cannot be larger than 16777215 bytes") self.emitBytes(LiteralExpr(obj).literal.lexeme.len().toTriple()) of Float32: self.emitByte(LoadFloat32) of Float64: self.emitByte(LoadFloat64) else: discard # TODO self.emitBytes(self.makeConstant(obj, kind)) proc emitJump(self: Compiler, opcode: OpCode): int = ## Emits a dummy jump offset to be patched later ## and returns the absolute index into the chunk's ## bytecode array where the given placeholder ## instruction was written self.emitByte(opcode) self.emitBytes(0.toTriple()) result = self.chunk.code.len() - 4 proc patchJump(self: Compiler, offset: int) = ## Patches a previously emitted relative ## jump using emitJump var jump: int = self.chunk.code.len() - offset if jump > 16777215: self.error("cannot jump more than 16777215 instructions") let offsetArray = (jump - 4).toTriple() self.chunk.code[offset + 1] = offsetArray[0] self.chunk.code[offset + 2] = offsetArray[1] self.chunk.code[offset + 3] = offsetArray[2] proc resolve(self: Compiler, name: IdentExpr, depth: int = self.scopeDepth): Name = ## Traverses self.names backwards and returns the ## first name object with the given name. Returns ## nil when the name can't be found. This function ## has no concept of scope depth, because getStackPos ## does that job. Note that private names declared in ## other modules will not be resolved! for obj in reversed(self.names): if obj.name.token.lexeme == name.token.lexeme: if obj.isPrivate and obj.owner != self.currentModule: continue # There may be a name in the current module that # matches, so we skip this return obj return nil proc getStackPos(self: Compiler, name: IdentExpr, depth: int = self.scopeDepth): int = ## Returns the predicted call stack position of a given name, relative ## to the current frame var found = false result = 2 for variable in self.names: if variable.valueType.kind == Function: continue inc(result) if name.name.lexeme == variable.name.name.lexeme: if variable.isPrivate and variable.owner != self.currentModule: continue elif variable.depth == depth: found = true dec(result) break if not found: return -1 proc getClosurePos(self: Compiler, name: IdentExpr): int = ## Iterates the internal list of declared closure names backwards and ## returns the predicted closure array position of a given name. ## Returns -1 if the name can't be found (this includes names that ## are private in other modules) result = self.closedOver.high() var found = false for variable in reversed(self.closedOver): if name.name.lexeme == variable.name.name.lexeme: if variable.isPrivate and variable.owner != self.currentModule: continue else: found = true break dec(result) if not found: return -1 proc resolve(self: Compiler, name: string, depth: int = self.scopeDepth): Name = ## Traverses self.names backwards and returns the ## first name object with the given name. Returns ## nil when the name can't be found. This function ## has no concept of scope depth, because getStackPos ## does that job. Note that private names declared in ## other modules will not be resolved! for obj in reversed(self.names): if obj.name.token.lexeme == name: if obj.isPrivate and obj.owner != self.currentModule: continue # There may be a name in the current module that # matches, so we skip this return obj return nil proc detectClosureVariable(self: Compiler, name: var Name, depth: int = self.scopeDepth) = ## Detects if the given name is used in a local scope deeper ## than the given one and modifies the code emitted for it ## to store it as a closure variable if it is. Does nothing if the name ## hasn't been declared yet or is unreachable (for example if it's ## declared as private in another module). This function must be called ## each time a name is referenced in order for closed-over variables ## to be emitted properly, otherwise the runtime may behave ## unpredictably or crash if name.isNil() or name.depth == 0 or name.isClosedOver: return elif name.depth < depth: # Ding! The given name is closed over: we need to # change the dummy Jump instruction that self.declareName # put in place for us into a StoreClosure. We also update # the name's isClosedOver field so that self.identifier() # can emit a LoadClosure instruction instead of a LoadVar self.closedOver.add(name) if self.closedOver.len() >= 16777216: self.error("too many consecutive closed-over variables (max is 16777215)") name.isClosedOver = true self.chunk.code[name.codePos] = StoreClosure.uint8() for i, b in self.closedOver.high().toTriple(): self.chunk.code[name.codePos + i + 1] = b proc compareTypes(self: Compiler, a, b: Type): bool = ## Compares two type objects ## for equality (works with nil!) # The nil code here is for void functions (when # we compare their return types) if a.isNil(): return b.isNil() or b.kind == Any elif b.isNil(): return a.isNil() or a.kind == Any elif a.kind == Any or b.kind == Any: # This is needed internally: user code # cannot generate code for matching # arbitrary types, but we need it for # function calls and stuff like that # since peon doesn't have return type # inference return true elif a.kind == Generic or b.kind == Generic: # Matching generic argument types return true elif a.kind != b.kind: # Next, we see the type discriminant: # If they're different, then they can't # be the same type! return false case a.kind: # If all previous checks pass, it's time # to go through each possible type peon # supports and compare it of Int8, UInt8, Int16, UInt16, Int32, UInt32, Int64, UInt64, Float32, Float64, Char, Byte, String, Nil, Nan, Bool, Inf: # A value type's type is always equal to # another one's return true of Reference, Pointer: # Here we already know that both # a and b are of either of the two # types in this branch, so we just need # to compare their values return self.compareTypes(a.value, b.value) of Function: # Functions are a bit trickier if a.args.len() != b.args.len(): return false elif not self.compareTypes(a.returnType, b.returnType): return false for (argA, argB) in zip(a.args, b.args): if not self.compareTypes(argA.kind, argB.kind): return false return true else: # TODO: Custom types discard proc toIntrinsic(name: string): Type = ## Converts a string to an intrinsic ## type if it is valid and returns nil ## otherwise if name in ["int", "int64", "i64"]: return Type(kind: Int64) elif name in ["uint64", "u64"]: return Type(kind: UInt64) elif name in ["int32", "i32"]: return Type(kind: Int32) elif name in ["uint32", "u32"]: return Type(kind: UInt32) elif name in ["int16", "i16"]: return Type(kind: Int16) elif name in ["uint16", "u16"]: return Type(kind: UInt16) elif name in ["int8", "i8"]: return Type(kind: Int8) elif name in ["uint8", "u8"]: return Type(kind: UInt8) elif name in ["f64", "float", "float64"]: return Type(kind: Float64) elif name in ["f32", "float32"]: return Type(kind: Float32) elif name == "byte": return Type(kind: Byte) elif name == "char": return Type(kind: Char) elif name == "nan": return Type(kind: Nan) elif name == "nil": return Type(kind: Nil) elif name == "inf": return Type(kind: Inf) elif name == "bool": return Type(kind: Bool) elif name == "typevar": return Type(kind: Typevar) else: return nil proc inferType(self: Compiler, node: LiteralExpr): Type = ## Infers the type of a given literal expression if node.isNil(): return nil case node.kind: of intExpr, binExpr, octExpr, hexExpr: let size = node.token.lexeme.split("'") if len(size) notin 1..2: self.error("invalid state: inferValueType -> invalid size specifier (This is an internal error and most likely a bug!)") if size.len() == 1: return Type(kind: Int64) let typ = size[1].toIntrinsic() if not self.compareTypes(typ, nil): return typ else: self.error(&"invalid type specifier '{size[1]}' for int") of floatExpr: let size = node.token.lexeme.split("'") if len(size) notin 1..2: self.error("invalid state: inferValueType -> invalid size specifier (This is an internal error and most likely a bug!)") if size.len() == 1 or size[1] == "f64": return Type(kind: Float64) let typ = size[1].toIntrinsic() if not self.compareTypes(typ, nil): return typ else: self.error(&"invalid type specifier '{size[1]}' for float") of nilExpr: return Type(kind: Nil) of trueExpr: return Type(kind: Bool) of falseExpr: return Type(kind: Bool) of nanExpr: return Type(kind: TypeKind.Nan) of infExpr: return Type(kind: TypeKind.Inf) else: discard # TODO proc inferType(self: Compiler, node: Expression): Type = ## Infers the type of a given expression and ## returns it if node.isNil(): return nil case node.kind: of identExpr: let node = IdentExpr(node) let name = self.resolve(node) if not name.isNil(): result = name.valueType else: result = node.name.lexeme.toIntrinsic() of unaryExpr: return self.inferType(UnaryExpr(node).a) of binaryExpr: let node = BinaryExpr(node) var a = self.inferType(node.a) var b = self.inferType(node.b) if not self.compareTypes(a, b): return nil return a of {intExpr, hexExpr, binExpr, octExpr, strExpr, falseExpr, trueExpr, infExpr, nanExpr, floatExpr, nilExpr }: return self.inferType(LiteralExpr(node)) of lambdaExpr: var node = LambdaExpr(node) result = Type(kind: Function, returnType: nil, args: @[], isLambda: true) if not node.returnType.isNil(): result.returnType = self.inferType(node.returnType) for argument in node.arguments: result.args.add((argument.name.token.lexeme, self.inferType(argument.valueType))) of callExpr: var node = CallExpr(node) case node.callee.kind: of identExpr: let resolved = self.resolve(IdentExpr(node.callee)) if not resolved.isNil(): result = resolved.valueType.returnType if result.isNil(): result = Type(kind: Any) else: result = nil of lambdaExpr: result = self.inferType(LambdaExpr(node.callee).returnType) else: discard # Unreachable of varExpr: result = self.inferType(Var(node).value) result.mutable = true of refExpr: result = Type(kind: Reference, value: self.inferType(Ref(node).value)) of ptrExpr: result = Type(kind: Pointer, value: self.inferType(Ptr(node).value)) else: discard # Unreachable proc inferType(self: Compiler, node: Declaration, strictMutable: bool = true): Type = ## Infers the type of a given declaration ## and returns it if node.isNil(): return nil case node.kind: of NodeKind.funDecl: var node = FunDecl(node) let resolved = self.resolve(node.name) if not resolved.isNil(): return resolved.valueType of NodeKind.varDecl: var node = VarDecl(node) let resolved = self.resolve(node.name) if not resolved.isNil(): return resolved.valueType else: return self.inferType(node.value, strictMutable) else: return # Unreachable proc typeToStr(self: Compiler, typ: Type): string = ## Returns the string representation of a ## type object case typ.kind: of Int8, UInt8, Int16, UInt16, Int32, UInt32, Int64, UInt64, Float32, Float64, Char, Byte, String, Nil, TypeKind.Nan, Bool, TypeKind.Inf: result &= ($typ.kind).toLowerAscii() of Pointer: result &= &"ptr {self.typeToStr(typ.value)}" of Reference: result &= &"ref {self.typeToStr(typ.value)}" of Function: result &= "fn (" for i, (argName, argType) in typ.args: result &= &"{argName}: " if argType.mutable: result &= "var " result &= self.typeToStr(argType) if i < typ.args.len() - 1: result &= ", " result &= ")" if not typ.returnType.isNil(): result &= &": {self.typeToStr(typ.returnType)}" of Generic: result = typ.node.name.lexeme else: discard proc findByName(self: Compiler, name: string): seq[Name] = ## Looks for objects that have been already declared ## with the given name. Returns all objects that apply for obj in reversed(self.names): if obj.name.token.lexeme == name: result.add(obj) proc findByType(self: Compiler, name: string, kind: Type): seq[Name] = ## Looks for objects that have already been declared ## with the given name and type for obj in self.findByName(name): if self.compareTypes(obj.valueType, kind): result.add(obj) #[ proc findAtDepth(self: Compiler, name: string, depth: int): seq[Name] = ## Looks for objects that have been already declared ## with the given name at the given scope depth. ## Returns all objects that apply for obj in self.findByName(name): if obj.depth == depth: result.add(obj) ]# proc matchImpl(self: Compiler, name: string, kind: Type): Name = ## Tries to find a matching function implementation ## compatible with the given type and returns its ## name object let impl = self.findByType(name, kind) if impl.len() == 0: var msg = &"cannot find a suitable implementation for '{name}'" let names = self.findByName(name) if names.len() > 0: msg &= &", found {len(names)} candidate" if names.len() > 1: msg &= "s" msg &= ": " for name in names: msg &= &"\n - '{name.name.token.lexeme}' of type '{self.typeToStr(name.valueType)}'" if name.valueType.kind != Function: msg &= ", not a callable" elif kind.args.len() != name.valueType.args.len(): msg &= &", wrong number of arguments ({name.valueType.args.len()} expected, got {kind.args.len()})" else: for i, arg in kind.args: if name.valueType.args[i].kind.mutable and not arg.kind.mutable: msg &= &", first mismatch at position {i + 1}: {name.valueType.args[i].name} is immutable, not 'var'" break elif not self.compareTypes(arg.kind, name.valueType.args[i].kind): msg &= &", first mismatch at position {i + 1}: expected argument of type '{self.typeToStr(name.valueType.args[i].kind)}', got '{self.typeToStr(arg.kind)}' instead" break self.error(msg) elif impl.len() > 1: var msg = &"multiple matching implementations of '{name}' found:\n" for fn in reversed(impl): msg &= &"- '{fn.name.token.lexeme}' at line {fn.line} of type {self.typeToStr(fn.valueType)}\n" self.error(msg) return impl[0] proc emitFunction(self: Compiler, name: Name) = ## Wrapper to emit LoadFunction instructions if name.isFunDecl: self.emitByte(LoadFunction) self.emitBytes(name.codePos.toTriple()) # If we're not loading a statically declared # function, then it must be a function object # created by previous LoadFunction instructions # that is now bound to some variable, so we just # load it elif not name.isClosedOver: self.emitByte(LoadVar) self.emitBytes(self.getStackPos(name.name).toTriple()) else: self.emitByte(LoadClosure) self.emitBytes(self.getClosurePos(name.name).toTriple()) ## End of utility functions proc literal(self: Compiler, node: ASTNode) = ## Emits instructions for literals such ## as singletons, strings and numbers case node.kind: of trueExpr: self.emitByte(LoadTrue) of falseExpr: self.emitByte(LoadFalse) of nilExpr: self.emitByte(LoadNil) of infExpr: self.emitByte(LoadInf) of nanExpr: self.emitByte(LoadNan) of strExpr: self.emitConstant(LiteralExpr(node), Type(kind: String)) of intExpr: var x: int var y = IntExpr(node) try: discard parseInt(y.literal.lexeme, x) except ValueError: self.error("integer value out of range") self.emitConstant(y, self.inferType(y)) of hexExpr: var x: int var y = HexExpr(node) try: discard parseHex(y.literal.lexeme, x) except ValueError: self.error("integer value out of range") let node = newIntExpr(Token(lexeme: $x, line: y.token.line, pos: (start: y.token.pos.start, stop: y.token.pos.start + len($x)) ) ) self.emitConstant(node, self.inferType(y)) of binExpr: var x: int var y = BinExpr(node) try: discard parseBin(y.literal.lexeme, x) except ValueError: self.error("integer value out of range") let node = newIntExpr(Token(lexeme: $x, line: y.token.line, pos: (start: y.token.pos.start, stop: y.token.pos.start + len($x)) ) ) self.emitConstant(node, self.inferType(y)) of octExpr: var x: int var y = OctExpr(node) try: discard parseOct(y.literal.lexeme, x) except ValueError: self.error("integer value out of range") let node = newIntExpr(Token(lexeme: $x, line: y.token.line, pos: (start: y.token.pos.start, stop: y.token.pos.start + len($x)) ) ) self.emitConstant(node, self.inferType(y)) of floatExpr: var x: float var y = FloatExpr(node) try: discard parseFloat(y.literal.lexeme, x) except ValueError: self.error("floating point value out of range") self.emitConstant(y, self.inferType(y)) of awaitExpr: var y = AwaitExpr(node) self.expression(y.expression) self.emitByte(OpCode.Await) else: self.error(&"invalid AST node of kind {node.kind} at literal(): {node} (This is an internal error and most likely a bug!)") proc handleBuiltinFunction(self: Compiler, fn: Name, args: seq[Expression]) = ## Emits instructions for builtin functions ## such as addition or subtraction if fn.valueType.builtinOp notin ["GenericLogicalOr", "GenericLogicalAnd"]: self.expression(args[1]) self.expression(args[0]) case fn.valueType.builtinOp: of "AddInt64": self.emitByte(AddInt64) of "SubInt64": self.emitByte(SubInt64) of "DivInt64": self.emitByte(DivInt64) of "MulInt64": self.emitByte(MulInt64) of "AddInt32": self.emitByte(AddInt32) of "SubInt32": self.emitByte(SubInt32) of "DivInt32": self.emitByte(DivInt32) of "MulInt32": self.emitByte(MulInt32) of "AddInt16": self.emitByte(AddInt16) of "SubInt16": self.emitByte(SubInt16) of "DivInt16": self.emitByte(DivInt16) of "MulInt16": self.emitByte(MulInt16) of "AddInt8": self.emitByte(AddInt8) of "SubInt8": self.emitByte(SubInt8) of "DivInt8": self.emitByte(DivInt8) of "MulInt8": self.emitByte(MulInt8) of "AddUInt64": self.emitByte(AddUInt64) of "SubUInt64": self.emitByte(SubUInt64) of "DivUInt64": self.emitByte(DivUInt64) of "MulUInt64": self.emitByte(MulUInt64) of "AddUInt32": self.emitByte(AddUInt32) of "SubUInt32": self.emitByte(SubUInt32) of "DivUInt32": self.emitByte(DivUInt32) of "MulUInt32": self.emitByte(MulUInt32) of "AddUInt16": self.emitByte(AddUInt16) of "SubUInt16": self.emitByte(SubUInt16) of "DivUInt16": self.emitByte(DivUInt16) of "MulUInt16": self.emitByte(MulUInt16) of "AddUInt8": self.emitByte(AddUInt8) of "SubUInt8": self.emitByte(SubUInt8) of "DivUInt8": self.emitByte(DivUInt8) of "MulUInt8": self.emitByte(MulUInt8) of "AddFloat64": self.emitByte(AddInt8) of "SubFloat64": self.emitByte(SubInt8) of "DivFloat64": self.emitByte(DivInt8) of "MulFloat64": self.emitByte(MulInt8) of "AddFloat32": self.emitByte(AddFloat32) of "SubFloat32": self.emitByte(SubFloat32) of "DivFloat32": self.emitByte(DivFloat32) of "MulFloat32": self.emitByte(MulFloat32) of "LogicalOr": self.expression(args[0]) let jump = self.emitJump(JumpIfTrue) self.expression(args[1]) self.patchJump(jump) of "LogicalAnd": self.expression(args[0]) var jump = self.emitJump(JumpIfFalseOrPop) self.expression(args[1]) self.patchJump(jump) else: self.error(&"unknown built-in: '{fn.valueType.builtinOp}'") proc generateCall(self: Compiler, fn: Name, args: seq[Expression]) = ## Small wrapper that abstracts emitting a call instruction ## for a given function if fn.valueType.isBuiltinFunction: # Builtins map to individual instructions # (usually 1, but some use more) so we handle # them differently self.handleBuiltinFunction(fn, args) return if any(fn.valueType.args, proc (arg: tuple[name: string, kind: Type]): bool = arg[1].kind == Generic): # The function has generic arguments! We need to compile a version # of it with the right type data self.funDecl(nil, fn, args) self.emitFunction(fn) self.emitByte(LoadReturnAddress) let pos = self.chunk.code.len() # We initially emit a dummy return # address. It is patched later self.emitBytes(0.toQuad()) for argument in reversed(args): # We pass the arguments in reverse # because of how stack semantics # work. They'll be fixed at runtime self.expression(argument) # Creates a new call frame and jumps # to the function's first instruction # in the code self.emitByte(Call) self.emitBytes(fn.valueType.args.len().toTriple()) self.patchReturnAddress(pos) proc callUnaryOp(self: Compiler, fn: Name, op: UnaryExpr) = ## Emits the code to call a unary operator self.generateCall(fn, @[op.a]) proc callBinaryOp(self: Compiler, fn: Name, op: BinaryExpr) = ## Emits the code to call a binary operator self.generateCall(fn, @[op.a, op.b]) proc unary(self: Compiler, node: UnaryExpr) = ## Compiles unary expressions such as decimal ## and bitwise negation let valueType = self.inferType(node.a) let funct = self.matchImpl(node.token.lexeme, Type(kind: Function, returnType: Type(kind: Any), args: @[("", valueType)])) self.callUnaryOp(funct, node) proc binary(self: Compiler, node: BinaryExpr) = ## Compiles all binary expressions let typeOfA = self.inferType(node.a) let typeOfB = self.inferType(node.b) let funct = self.matchImpl(node.token.lexeme, Type(kind: Function, returnType: Type(kind: Any), args: @[("", typeOfA), ("", typeOfB)])) self.callBinaryOp(funct, node) proc declareName(self: Compiler, node: Declaration, mutable: bool = false) = ## Statically declares a name into the current scope. ## "Declaring" a name only means updating our internal ## list of identifiers so that further calls to resolve() ## correctly return them. There is no code to actually ## declare a variable at runtime: the value is already ## on the stack case node.kind: of NodeKind.varDecl: var node = VarDecl(node) # Creates a new Name entry so that self.identifier emits the proper stack offset if self.names.high() > 16777215: # If someone ever hits this limit in real-world scenarios, I swear I'll # slap myself 100 times with a sign saying "I'm dumb". Mark my words self.error("cannot declare more than 16777215 variables at a time") for name in self.findByName(node.name.token.lexeme): if name.depth == self.scopeDepth and not name.isFunctionArgument: # Trying to redeclare a variable in the same scope/context is an error, but it's okay # if it's a function argument (for example, if you want to copy a number to # mutate it) self.error(&"attempt to redeclare '{node.name.token.lexeme}', which was previously defined in '{name.owner}' at line {name.line}") self.names.add(Name(depth: self.scopeDepth, name: node.name, isPrivate: node.isPrivate, owner: self.currentModule, isConst: node.isConst, valueType: self.inferType(node.value), codePos: self.chunk.code.len(), isLet: node.isLet, isClosedOver: false, line: node.token.line)) if mutable: self.names[^1].valueType.mutable = true of NodeKind.funDecl: var node = FunDecl(node) # We declare the generics before the function so we # can refer to them for gen in node.generics: self.names.add(Name(depth: self.scopeDepth + 1, isPrivate: true, isConst: false, owner: self.currentModule, line: node.token.line, valueType: Type(kind: Generic, mutable: false, node: gen.name), name: gen.name)) self.names.add(Name(depth: self.scopeDepth, isPrivate: node.isPrivate, isConst: false, owner: self.currentModule, valueType: Type(kind: Function, name: node.name.token.lexeme, returnType: self.inferType(node.returnType), args: @[], fun: node), codePos: self.chunk.code.len(), name: node.name, isLet: false, isClosedOver: false, line: node.token.line, isFunDecl: true)) let fn = self.names[^1] var name: Name for argument in node.arguments: if self.names.high() > 16777215: self.error("cannot declare more than 16777215 variables at a time") # wait, no LoadVar? Yes! That's because when calling functions, # arguments will already be on the stack, so there's no need to # load them here name = Name(depth: self.scopeDepth + 1, isPrivate: true, owner: self.currentModule, isConst: false, name: argument.name, valueType: nil, codePos: 0, isLet: false, isClosedOver: false, line: argument.name.token.line, isFunctionArgument: true) self.names.add(name) name.valueType = self.inferType(argument.valueType) # If it's still nil, it's an error! if name.valueType.isNil(): self.error(&"cannot determine the type of argument '{argument.name.token.lexeme}'") fn.valueType.args.add((argument.name.token.lexeme, name.valueType)) else: discard # TODO: Types, enums proc identifier(self: Compiler, node: IdentExpr) = ## Compiles access to identifiers var s = self.resolve(node) if s.isNil(): self.error(&"reference to undeclared name '{node.token.lexeme}'") elif s.isConst: # Constants are always emitted as Load* instructions # no matter the scope depth self.emitConstant(node, self.inferType(node)) else: self.emitByte(JumpForwards) self.emitBytes(0.toTriple()) self.detectClosureVariable(s) if s.valueType.kind == Function: # Functions have no runtime # representation, so we need # to create one on the fly self.emitByte(LoadFunction) self.emitBytes(s.codePos.toTriple()) elif not s.isClosedOver: # Static name resolution, loads value at index in the stack. Very fast. Much wow. self.emitByte(LoadVar) # No need to check for -1 here: we already did a nil-check above! self.emitBytes(self.getStackPos(s.name).toTriple()) else: # Loads a closure variable. Stored in a separate "closure array" in the VM that does not # align its semantics with the call stack. This makes closures work as expected and is # not much slower than indexing our stack (since they're both dynamic arrays at runtime anyway) self.emitByte(LoadClosure) self.emitBytes(self.getClosurePos(s.name).toTriple()) proc assignment(self: Compiler, node: ASTNode) = ## Compiles assignment expressions case node.kind: of assignExpr: let node = AssignExpr(node) let name = IdentExpr(node.name) var r = self.resolve(name) if r.isNil(): self.error(&"assignment to undeclared name '{name.token.lexeme}'") elif r.isConst: self.error(&"cannot assign to '{name.token.lexeme}' (constant)") elif r.isLet: self.error(&"cannot reassign '{name.token.lexeme}'") self.expression(node.value) self.detectClosureVariable(r) if not r.isClosedOver: self.emitByte(StoreVar) self.emitBytes(self.getStackPos(name).toTriple()) else: # Loads a closure variable. Stored in a separate "closure array" in the VM that does not # align its semantics with the call stack. This makes closures work as expected and is # not much slower than indexing our stack (since they're both dynamic arrays at runtime anyway) self.emitByte(StoreClosure) self.emitBytes(self.getClosurePos(name).toTriple()) of setItemExpr: let node = SetItemExpr(node) let typ = self.inferType(node) if typ.isNil(): self.error(&"cannot determine the type of '{node.name.token.lexeme}'") # TODO else: self.error(&"invalid AST node of kind {node.kind} at assignment(): {node} (This is an internal error and most likely a bug)") proc beginScope(self: Compiler) = ## Begins a new local scope by incrementing the current ## scope's depth inc(self.scopeDepth) proc endScope(self: Compiler) = ## Ends the current local scope if self.scopeDepth < 0: self.error("cannot call endScope with scopeDepth < 0 (This is an internal error and most likely a bug)") dec(self.scopeDepth) var names: seq[Name] = @[] for name in self.names: if name.depth > self.scopeDepth: names.add(name) if len(names) > 1: # If we're popping less than 65535 variables, then # we can emit a PopN instruction. This is true for # 99.99999% of the use cases of the language (who the # hell is going to use 65 THOUSAND local variables?), but # if you'll ever use more then Peon will emit a PopN instruction # for the first 65 thousand and change local variables and then # emit another batch of plain ol' Pop instructions for the rest self.emitByte(PopN) self.emitBytes(len(names).toDouble()) if len(names) > uint16.high().int(): for i in countdown(self.names.high(), len(names) - uint16.high().int()): if self.names[i].depth > self.scopeDepth: self.emitByte(PopC) elif len(names) == 1: # We only emit PopN if we're popping more than one value self.emitByte(PopC) # This seems *really* slow, but # what else should I do? Nim doesn't # allow the removal of items during # seq iteration so ¯\_(ツ)_/¯ var idx = 0 while idx < self.names.len(): for name in names: if self.names[idx] == name: self.names.delete(idx) inc(idx) proc blockStmt(self: Compiler, node: BlockStmt) = ## Compiles block statements, which create a new ## local scope self.beginScope() for decl in node.code: self.declaration(decl) self.endScope() proc ifStmt(self: Compiler, node: IfStmt) = ## Compiles if/else statements for conditional ## execution of code var cond = self.inferType(node.condition) self.expression(node.condition) if not self.compareTypes(cond, Type(kind: Bool)): if cond.isNil(): self.error(&"expecting value of type 'bool', but expression has no type") else: self.error(&"expecting value of type 'bool', got '{self.typeToStr(cond)}' instead") let jump = self.emitJump(JumpIfFalsePop) self.statement(node.thenBranch) let jump2 = self.emitJump(JumpForwards) self.patchJump(jump) if not node.elseBranch.isNil(): self.statement(node.elseBranch) self.patchJump(jump2) proc emitLoop(self: Compiler, begin: int) = ## Emits a JumpBackwards instruction with the correct ## jump offset var offset = self.chunk.code.len() - begin + 4 if offset > 16777215: self.error("cannot jump more than 16777215 bytecode instructions") self.emitByte(JumpBackwards) self.emitBytes(offset.toTriple()) proc whileStmt(self: Compiler, node: WhileStmt) = ## Compiles C-style while loops and ## desugared C-style for loops var cond = self.inferType(node.condition) self.expression(node.condition) if not self.compareTypes(cond, Type(kind: Bool)): if cond.isNil(): self.error(&"expecting value of type 'bool', but expression has no type") else: self.error(&"expecting value of type 'bool', got '{self.typeToStr(cond)}' instead") let start = self.chunk.code.len() var jump = self.emitJump(JumpIfFalsePop) self.statement(node.body) self.patchJump(jump) self.emitLoop(start) proc checkCallIsPure(self: Compiler, node: ASTnode): bool = ## Checks if a call has any side effects return true # TODO proc callExpr(self: Compiler, node: CallExpr) = ## Compiles code to call a function var args: seq[tuple[name: string, kind: Type]] = @[] var argExpr: seq[Expression] = @[] var kind: Type # TODO: Keyword arguments for i, argument in node.arguments.positionals: kind = self.inferType(argument) if kind.isNil(): if argument.kind == identExpr: self.error(&"reference to undeclared identifier '{IdentExpr(argument).name.lexeme}'") self.error(&"cannot infer the type of argument {i + 1} in function call") args.add(("", kind)) argExpr.add(argument) for argument in node.arguments.keyword: discard if args.len() >= 16777216: self.error(&"cannot pass more than 16777215 arguments") var funct: Name case node.callee.kind: of identExpr: funct = self.matchImpl(IdentExpr(node.callee).name.lexeme, Type(kind: Function, returnType: Type(kind: Any), args: args)) of NodeKind.callExpr: var node = node.callee while node.kind == callExpr: self.callExpr(CallExpr(node)) node = CallExpr(node).callee # funct = self.matchImpl(IdentExpr(node.callee).name.lexeme, Type(kind: Function, returnType: Type(kind: Any), args: args)) # TODO: Calling lambdas else: let typ = self.inferType(node) if typ.isNil(): self.error(&"expression has no type") else: self.error(&"object of type '{self.typeToStr(typ)}' is not callable") if any(funct.valueType.args, proc (arg: tuple[name: string, kind: Type]): bool = arg[1].kind == Generic): # The function has generic arguments! We need to compile a version # of it with the right type data self.funDecl(nil, funct, argExpr) # TODO: What next? elif funct.valueType.isBuiltinFunction: self.handleBuiltinFunction(funct, argExpr) else: self.generateCall(funct, argExpr) if self.scopeDepth > 0 and not self.checkCallIsPure(node.callee): if self.currentFunction.name != "": self.error(&"cannot make sure that calls to '{self.currentFunction.name}' are side-effect free") else: self.error(&"cannot make sure that call is side-effect free") proc expression(self: Compiler, node: Expression) = ## Compiles all expressions case node.kind: of NodeKind.callExpr: self.callExpr(CallExpr(node)) # TODO of getItemExpr: discard # TODO: Get rid of this of pragmaExpr: discard # TODO # Note that for setItem and assign we don't convert # the node to its true type because that type information # would be lost in the call anyway. The differentiation # happens in self.assignment() of setItemExpr, assignExpr: # TODO: Get rid of this self.assignment(node) of identExpr: self.identifier(IdentExpr(node)) of unaryExpr: # Unary expressions such as ~5 and -3 self.unary(UnaryExpr(node)) of groupingExpr: # Grouping expressions like (2 + 1) self.expression(GroupingExpr(node).expression) of binaryExpr: # Binary expressions such as 2 ^ 5 and 0.66 * 3.14 self.binary(BinaryExpr(node)) of intExpr, hexExpr, binExpr, octExpr, strExpr, falseExpr, trueExpr, infExpr, nanExpr, floatExpr, nilExpr: # Since all of these AST nodes share the # same overall structure and the kind # field is enough to tell one from the # other, why bother with specialized # cases when one is enough? self.literal(node) else: self.error(&"invalid AST node of kind {node.kind} at expression(): {node} (This is an internal error and most likely a bug)") proc awaitStmt(self: Compiler, node: AwaitStmt) = ## Compiles await statements. An await statement ## is like an await expression, but parsed in the ## context of statements for usage outside expressions, ## meaning it can be used standalone. It's basically the ## same as an await expression followed by a semicolon. ## Await expressions are the only native construct to ## run coroutines from within an already asynchronous ## context (which should be orchestrated by an event loop). ## They block in the caller until the callee returns self.expression(node.expression) self.emitByte(OpCode.Await) proc deferStmt(self: Compiler, node: DeferStmt) = ## Compiles defer statements. A defer statement ## is executed right before its containing function ## exits (either because of a return or an exception) let current = self.chunk.code.len self.expression(node.expression) for i in countup(current, self.chunk.code.high()): self.deferred.add(self.chunk.code[i]) self.chunk.code.delete(i) # TODO: Do not change bytecode size proc endFunctionBeforeReturn(self: Compiler) = ## Emits code to clear a function's ## stack frame right before executing ## its return instruction var popped = 0 for name in self.names: if name.depth == self.scopeDepth and name.valueType.kind notin {Function, Generic, CustomType} and not name.isClosedOver: inc(popped) if popped > 1: self.emitByte(PopN) self.emitBytes(popped.toDouble()) dec(popped, uint16.high().int) while popped > 0: self.emitByte(PopC) dec(popped) proc returnStmt(self: Compiler, node: ReturnStmt) = ## Compiles return statements let actual = self.inferType(node.value) var expected = self.currentFunction.returnType if not expected.isNil() and expected.kind == Generic: expected = actual if actual.isNil() and not expected.isNil(): if not node.value.isNil(): if node.value.kind == identExpr: self.error(&"reference to undeclared identifier '{node.value.token.lexeme}'") elif node.value.kind == callExpr and CallExpr(node.value).callee.kind == identExpr: self.error(&"call to undeclared function '{CallExpr(node.value).callee.token.lexeme}'") self.error(&"expected return value of type '{self.typeToStr(expected)}', but expression has no type") elif expected.isNil() and not actual.isNil(): self.error("empty return statement is only allowed in void functions") elif not self.compareTypes(actual, expected): self.error(&"expected return value of type '{self.typeToStr(expected)}', got '{self.typeToStr(actual)}' instead") if not node.value.isNil(): self.expression(node.value) self.emitByte(OpCode.SetResult) self.endFunctionBeforeReturn() self.emitByte(OpCode.Return) if not node.value.isNil(): self.emitByte(1) else: self.emitByte(0) proc yieldStmt(self: Compiler, node: YieldStmt) = ## Compiles yield statements self.expression(node.expression) self.emitByte(OpCode.Yield) proc raiseStmt(self: Compiler, node: RaiseStmt) = ## Compiles raise statements self.expression(node.exception) self.emitByte(OpCode.Raise) proc continueStmt(self: Compiler, node: ContinueStmt) = ## Compiles continue statements. A continue statements ## jumps to the next iteration in a loop if self.currentLoop.start > 16777215: self.error("too much code to jump over in continue statement") self.emitByte(Jump) self.emitBytes(self.currentLoop.start.toTriple()) proc breakStmt(self: Compiler, node: BreakStmt) = ## Compiles break statements. A continue statement ## jumps to the next iteration in a loop self.currentLoop.breakPos.add(self.emitJump(OpCode.JumpForwards)) if self.currentLoop.depth > self.scopeDepth: # Breaking out of a loop closes its scope self.endScope() proc patchBreaks(self: Compiler) = ## Patches the jumps emitted by ## breakStmt. This is needed ## because the size of code ## to skip is not known before ## the loop is fully compiled for brk in self.currentLoop.breakPos: self.patchJump(brk) proc assertStmt(self: Compiler, node: AssertStmt) = ## Compiles assert statements (raise ## AssertionError if the expression is falsey) self.expression(node.expression) self.emitByte(OpCode.Assert) proc forEachStmt(self: Compiler, node: ForEachStmt) = ## Compiles foreach loops # TODO proc importStmt(self: Compiler, node: ImportStmt) = ## Imports a module at compile time if self.scopeDepth > 0: self.error("import statements are only allowed at the top level") var lexer = newLexer() var parser = newParser() var compiler = newCompiler() # TODO: Find module var result {.used.} = compiler.compile(parser.parse(lexer.lex("", node.moduleName.name.lexeme), node.moduleName.name.lexeme), node.moduleName.name.lexeme) proc statement(self: Compiler, node: Statement) = ## Compiles all statements case node.kind: of exprStmt: var expression = ExprStmt(node).expression self.expression(expression) if expression.kind == callExpr and self.inferType(CallExpr(expression).callee).returnType.isNil(): # The expression has no type, so we don't have to # pop anything discard else: if self.replMode: self.emitByte(PopRepl) else: self.emitByte(Pop) of NodeKind.ifStmt: self.ifStmt(IfStmt(node)) of NodeKind.assertStmt: self.assertStmt(AssertStmt(node)) of NodeKind.raiseStmt: self.raiseStmt(RaiseStmt(node)) of NodeKind.breakStmt: self.breakStmt(BreakStmt(node)) of NodeKind.continueStmt: self.continueStmt(ContinueStmt(node)) of NodeKind.returnStmt: self.returnStmt(ReturnStmt(node)) of NodeKind.importStmt: self.importStmt(ImportStmt(node)) of NodeKind.whileStmt: # Note: Our parser already desugars # for loops to while loops! let loop = self.currentLoop self.currentLoop = Loop(start: self.chunk.code.len(), depth: self.scopeDepth, breakPos: @[]) self.whileStmt(WhileStmt(node)) self.patchBreaks() self.currentLoop = loop of NodeKind.forEachStmt: self.forEachStmt(ForEachStmt(node)) of NodeKind.blockStmt: self.blockStmt(BlockStmt(node)) of NodeKind.yieldStmt: self.yieldStmt(YieldStmt(node)) of NodeKind.awaitStmt: self.awaitStmt(AwaitStmt(node)) of NodeKind.deferStmt: self.deferStmt(DeferStmt(node)) of NodeKind.tryStmt: discard else: self.expression(Expression(node)) proc varDecl(self: Compiler, node: VarDecl) = ## Compiles variable declarations let expected = self.inferType(node.valueType) let actual = self.inferType(node.value) if expected.isNil() and actual.isNil(): if node.value.kind == identExpr or node.value.kind == callExpr and CallExpr(node.value).callee.kind == identExpr: var name = node.value.token.lexeme if node.value.kind == callExpr: name = CallExpr(node.value).callee.token.lexeme self.error(&"reference to undeclared identifier '{name}'") self.error(&"'{node.name.token.lexeme}' has no type") elif not expected.isNil() and expected.mutable: # I mean, variables *are* already mutable (some of them anyway) self.error(&"invalid type '{self.typeToStr(expected)}' for var") elif not self.compareTypes(expected, actual): if not expected.isNil(): self.error(&"expected value of type '{self.typeToStr(expected)}', but '{node.name.token.lexeme}' is of type '{self.typeToStr(actual)}'") self.expression(node.value) self.declareName(node, mutable=node.token.kind == TokenType.Var) self.emitByte(StoreVar) self.emitBytes((self.getStackPos(self.names[^1].name) + 1).toTriple()) proc typeDecl(self: Compiler, node: TypeDecl) = ## Compiles type declarations # TODO proc handleMagicPragma(self: Compiler, pragma: Pragma, node: ASTNode) = ## Handles the "magic" pragma. Assumes the given name is already ## declared if pragma.args.len() != 1: self.error("'magic' pragma: wrong number of arguments") elif pragma.args[0].kind != strExpr: self.error("'magic' pragma: wrong type of argument (constant string expected)") elif node.kind != NodeKind.funDecl: self.error("'magic' pragma is not valid in this context") var node = FunDecl(node) var fn = self.resolve(node.name) fn.valueType.isBuiltinFunction = true fn.valueType.builtinOp = pragma.args[0].token.lexeme[1..^2] # The magic pragma ignores the function's body node.body = nil proc handlePurePragma(self: Compiler, pragma: Pragma, node: ASTNode) = ## Handles the "pure" pragma case node.kind: of NodeKind.funDecl: FunDecl(node).isPure = true of lambdaExpr: LambdaExpr(node).isPure = true else: self.error("'pure' pragma is not valid in this context") proc dispatchPragmas(self: Compiler, node: ASTnode) = ## Dispatches pragmas bound to objects var pragmas: seq[Pragma] = @[] case node.kind: of NodeKind.funDecl, NodeKind.typeDecl, NodeKind.varDecl: pragmas = Declaration(node).pragmas of lambdaExpr: pragmas = LambdaExpr(node).pragmas else: discard # Unreachable for pragma in pragmas: if pragma.name.token.lexeme notin self.compilerProcs: self.error(&"unknown pragma '{pragma.name.token.lexeme}'") self.compilerProcs[pragma.name.token.lexeme](self, pragma, node) proc fixGenericFunc(self: Compiler, name: Name, args: seq[Expression]): Type = ## Specializes generic arguments in functions var fn = name.valueType.deepCopy() result = fn var typ: Type for i in 0..args.high(): if fn.args[i].kind.kind == Generic: typ = self.inferType(args[i]) fn.args[i].kind = typ self.resolve(fn.args[i].name).valueType = typ if fn.args[i].kind.isNil(): self.error(&"cannot specialize generic function: argument {i + 1} has no type") proc funDecl(self: Compiler, node: FunDecl, fn: Name = nil, args: seq[Expression] = @[]) = ## Compiles function declarations if not node.isNil(): if node.generics.len() > 0 and fn.isNil() and args.len() == 0: # Generic function! We can't compile it right now self.declareName(node) self.dispatchPragmas(node) return self.declareName(node) self.dispatchPragmas(node) var node = node var fn = if fn.isNil(): self.names[^(node.arguments.len() + 1)] else: fn if fn.valueType.isBuiltinFunction: # We take the arguments off of our name list # because they become temporaries on the stack for i in self.names.high() - node.arguments.high()..self.names.high(): self.names.delete(i) else: var function = self.currentFunction var jmp: int # Builtin functions (usually) map to a single # bytecode instruction to avoid unnecessary # overhead from peon's calling convention # This also means that peon's fast builtins # can only be relatively simple self.frames.add(self.names.high()) # A function's code is just compiled linearly # and then jumped over jmp = self.emitJump(JumpForwards) # Function's code starts after the jump fn.codePos = self.chunk.code.len() # We store the current function self.currentFunction = fn.valueType if node.isNil(): # We got called back with more specific type # arguments: time to fix them! self.currentFunction = self.fixGenericFunc(fn, args) node = self.currentFunction.fun elif not node.body.isNil(): if BlockStmt(node.body).code.len() == 0: self.error("cannot declare function with empty body") else: discard # TODO: Forward declarations let impl = self.findByType(fn.name.token.lexeme, fn.valueType) if impl.len() > 1: # We found more than one (public) implementation of # the same function with the same name: this is an # error, as it would raise ambiguity when calling them var msg = &"multiple matching implementations of '{fn.name.token.lexeme}' found:\n" for f in reversed(impl): msg &= &"- '{f.name.token.lexeme}' at line {f.line} of type {self.typeToStr(f.valueType)}\n" self.error(msg) # Since the deferred array is a linear # sequence of instructions and we want # to keep track to whose function's each # set of deferred instruction belongs, # we record the length of the deferred # array before compiling the function # and use this info later to compile # the try/finally block with the deferred # code var deferStart = self.deferred.len() # We let our debugger know a function is starting let start = self.chunk.code.high() self.beginScope() for decl in BlockStmt(node.body).code: self.declaration(decl) let typ = self.currentFunction.returnType var hasVal: bool = false case self.currentFunction.fun.kind: of NodeKind.funDecl: hasVal = self.currentFunction.fun.hasExplicitReturn of NodeKind.lambdaExpr: hasVal = LambdaExpr(Declaration(self.currentFunction.fun)).hasExplicitReturn else: discard # Unreachable if not hasVal and not typ.isNil(): # There is no explicit return statement anywhere in the function's # body: while this is not a tremendously useful piece of information (since # the presence of at least one doesn't mean all control flow cases are # covered), it definitely is an error worth reporting self.error("function has an explicit return type, but no return statement was found") hasVal = hasVal and not typ.isNil() self.endScope() # Terminates the function's context self.emitByte(OpCode.Return) if hasVal: self.emitByte(1) else: self.emitByte(0) # Some debugging info here self.chunk.cfi.add(start.toTriple()) self.chunk.cfi.add(self.chunk.code.high().toTriple()) self.chunk.cfi.add(self.frames[^1].toTriple()) self.chunk.cfi.add(uint8(node.arguments.len())) if not node.name.isNil(): self.chunk.cfi.add(fn.name.token.lexeme.len().toDouble()) var s = fn.name.token.lexeme if s.len() >= uint16.high().int: s = node.name.token.lexeme[0..uint16.high()] self.chunk.cfi.add(s.toBytes()) else: self.chunk.cfi.add(0.toDouble()) # Currently defer is not functional so we # just pop the instructions for _ in deferStart..self.deferred.high(): discard self.deferred.pop() # Well, we've compiled everything: time to patch # the jump offset self.patchJump(jmp) # Pops a call frame discard self.frames.pop() # Restores the enclosing function (if any). # Makes nested calls work (including recursion) self.currentFunction = function proc patchReturnAddress(self: Compiler, pos: int) = ## Patches the return address of a function ## call let address = self.chunk.code.len().toQuad() self.chunk.code[pos] = address[0] self.chunk.code[pos + 1] = address[1] self.chunk.code[pos + 2] = address[2] self.chunk.code[pos + 3] = address[3] proc declaration(self: Compiler, node: Declaration) = ## Compiles all declarations case node.kind: of NodeKind.varDecl: self.varDecl(VarDecl(node)) of NodeKind.funDecl: self.funDecl(FunDecl(node)) of NodeKind.typeDecl: self.typeDecl(TypeDecl(node)) else: self.statement(Statement(node)) proc compile*(self: Compiler, ast: seq[Declaration], file: string): Chunk = ## Compiles a sequence of AST nodes into a chunk ## object self.chunk = newChunk() self.ast = ast self.file = file self.names = @[] self.scopeDepth = 0 self.currentFunction = nil self.currentModule = self.file.extractFilename() self.current = 0 # Every peon program has a hidden entry point in # which user code is wrapped. Think of it as if # peon is implicitly writing the main() function # of your program and putting all of your code in # there. While we call our entry point just like # any regular peon function, we can't use our handy # helper generateCall() because we need to keep track # of where our program ends (which we don't know yet). # To fix this, we emit dummy offsets and patch them # later, once we know the boundaries of our hidden main() var main = Name(depth: 0, isPrivate: true, isConst: false, isLet: false, isClosedOver: false, owner: self.currentModule, valueType: Type(kind: Function, name: "", returnType: nil, args: @[]), codePos: 13, # Jump address is hardcoded name: newIdentExpr(Token(lexeme: "", kind: Identifier)), line: -1) self.names.add(main) self.emitByte(LoadFunction) self.emitBytes(main.codePos.toTriple()) self.emitByte(LoadReturnAddress) let pos = self.chunk.code.len() self.emitBytes(0.toQuad()) self.emitByte(Call) self.emitBytes(0.toTriple()) while not self.done(): self.declaration(Declaration(self.step())) self.endFunctionBeforeReturn() self.patchReturnAddress(pos) self.emitByte(OpCode.Return) self.emitByte(0) # Entry point has no return value result = self.chunk