diff --git a/src/frontend/compiler.nim b/src/frontend/compiler.nim index 5ba6d56..266a27a 100644 --- a/src/frontend/compiler.nim +++ b/src/frontend/compiler.nim @@ -53,10 +53,6 @@ type case kind: TypeKind: of Function: name: string - # Unfortunately we need to pollute - # the type system with AST nodes due - # to how we handle generics - funNode: FunDecl isLambda: bool isGenerator: bool isCoroutine: bool @@ -64,6 +60,7 @@ type returnType: Type isBuiltinFunction: bool builtinOp: string + fun: FunDecl of Reference, Pointer: value: Type of Generic: @@ -140,7 +137,7 @@ type # in a local scope, otherwise it's global scopeDepth: int # The current function being compiled - currentFunction: FunDecl + currentFunction: Type # Are optimizations turned on? enableOptimizations: bool # The current loop being compiled (used to @@ -193,6 +190,7 @@ proc patchReturnAddress(self: Compiler, pos: int) proc handleMagicPragma(self: Compiler, pragma: Pragma, node: ASTnode) proc handlePurePragma(self: Compiler, pragma: Pragma, node: ASTnode) proc dispatchPragmas(self: Compiler, node: ASTnode) +proc funDecl(self: Compiler, node: FunDecl, fn: Name = nil, args: seq[Expression] = @[]) ## End of forward declarations @@ -216,7 +214,7 @@ proc newCompiler*(enableOptimizations: bool = true, replMode: bool = false): Com ## Public getter for nicer error formatting proc getCurrentNode*(self: Compiler): ASTNode = (if self.current >= self.ast.len(): self.ast[^1] else: self.ast[self.current - 1]) -proc getCurrentFunction*(self: Compiler): Declaration {.inline.} = self.currentFunction +proc getCurrentFunction*(self: Compiler): Declaration {.inline.} = (if self.currentFunction.isNil(): nil else: self.currentFunction.fun) proc getFile*(self: Compiler): string {.inline.} = self.file proc getModule*(self: Compiler): string {.inline.} = self.currentModule @@ -321,7 +319,7 @@ proc emitConstant(self: Compiler, obj: Expression, kind: Type) = self.emitByte(LoadString) let str = LiteralExpr(obj).literal.lexeme if str.len() >= 16777216: - self.error("string constants cannot be larger than 16777216 bytes") + self.error("string constants cannot be larger than 16777215 bytes") self.emitBytes(LiteralExpr(obj).literal.lexeme.len().toTriple()) of Float32: self.emitByte(LoadFloat32) @@ -347,7 +345,7 @@ proc patchJump(self: Compiler, offset: int) = ## jump using emitJump var jump: int = self.chunk.code.len() - offset if jump > 16777215: - self.error("cannot jump more than 16777216 bytecode instructions") + self.error("cannot jump more than 16777215 instructions") let offsetArray = (jump - 4).toTriple() self.chunk.code[offset + 1] = offsetArray[0] self.chunk.code[offset + 2] = offsetArray[1] @@ -408,6 +406,23 @@ proc getClosurePos(self: Compiler, name: IdentExpr, depth: int = self.scopeDepth return -1 +proc resolve(self: Compiler, name: string, + depth: int = self.scopeDepth): Name = + ## Traverses self.names backwards and returns the + ## first name object with the given name. Returns + ## nil when the name can't be found. This function + ## has no concept of scope depth, because getStackPos + ## does that job. Note that private names declared in + ## other modules will not be resolved! + for obj in reversed(self.names): + if obj.name.token.lexeme == name: + if obj.isPrivate and obj.owner != self.currentModule: + continue # There may be a name in the current module that + # matches, so we skip this + return obj + return nil + + proc detectClosureVariable(self: Compiler, name: Name, depth: int = self.scopeDepth) = ## Detects if the given name is used in a local scope deeper ## than the given one and modifies the code emitted for it @@ -428,7 +443,7 @@ proc detectClosureVariable(self: Compiler, name: Name, depth: int = self.scopeDe self.closedOver.add(name) let idx = self.closedOver.high().toTriple() if self.closedOver.len() >= 16777216: - self.error("too many consecutive closed-over variables (max is 16777216)") + self.error("too many consecutive closed-over variables (max is 16777215)") self.chunk.code[name.codePos] = StoreClosure.uint8 self.chunk.code[name.codePos + 1] = idx[0] self.chunk.code[name.codePos + 2] = idx[1] @@ -645,7 +660,7 @@ proc inferType(self: Compiler, node: Declaration, strictMutable: bool = true): T if node.isNil(): return nil case node.kind: - of funDecl: + of NodeKind.funDecl: var node = FunDecl(node) let resolved = self.resolve(node.name) if not resolved.isNil(): @@ -678,7 +693,6 @@ proc typeToStr(self: Compiler, typ: Type): string = result &= "fn (" for i, (argName, argType) in typ.args: result &= &"{argName}: " - echo argType[] if argType.mutable: result &= "var " result &= self.typeToStr(argType) @@ -693,13 +707,70 @@ proc typeToStr(self: Compiler, typ: Type): string = discard +proc findByName(self: Compiler, name: string): seq[Name] = + ## Looks for objects that have been already declared + ## with the given name. Returns all objects that apply + for obj in reversed(self.names): + if obj.name.token.lexeme == name: + result.add(obj) + + +proc findByType(self: Compiler, name: string, kind: Type, strictMutable: bool = true): seq[Name] = + ## Looks for objects that have already been declared + ## with the given name and type + for obj in self.findByName(name): + if self.compareTypes(obj.valueType, kind, strictMutable): + result.add(obj) + + +proc matchImpl(self: Compiler, name: string, kind: Type, strictMutable: bool = true): Name = + ## Tries to find a matching function implementation + ## compatible with the given type and returns its + ## name object + let impl = self.findByType(name, kind, strictMutable) + if impl.len() == 0: + var msg = &"cannot find a suitable implementation for '{name}'" + let names = self.findByName(name) + if names.len() > 0: + msg &= &", found {len(names)} candidate" + if names.len() > 1: + msg &= "s" + msg &= ": " + for name in names: + msg &= &"\n - '{name.name.token.lexeme}' of type '{self.typeToStr(name.valueType)}'" + if name.valueType.kind != Function: + msg &= ", not a callable" + elif kind.args.len() != name.valueType.args.len(): + msg &= &", wrong number of arguments ({name.valueType.args.len()} expected, got {kind.args.len()})" + else: + for i, arg in kind.args: + if name.valueType.args[i].kind.mutable and not arg.kind.mutable: + msg &= &", first mismatch at position {i + 1}: {name.valueType.args[i].name} is immutable, not 'var'" + break + elif not self.compareTypes(arg.kind, name.valueType.args[i].kind): + msg &= &", first mismatch at position {i + 1}: expected argument of type '{self.typeToStr(name.valueType.args[i].kind)}', got '{self.typeToStr(arg.kind)}' instead" + break + self.error(msg) + elif impl.len() > 1: + var msg = &"multiple matching implementations of '{name}' found:\n" + for fn in reversed(impl): + msg &= &"- '{fn.name.token.lexeme}' at line {fn.line} of type {self.typeToStr(fn.valueType)}\n" + self.error(msg) + return impl[0] + + +proc emitFunction(self: Compiler, name: Name) = + ## Wrapper to emit LoadFunction instructions + self.emitByte(LoadFunction) + self.emitBytes(name.codePos.toTriple()) + + ## End of utility functions proc literal(self: Compiler, node: ASTNode) = ## Emits instructions for literals such - ## as singletons, strings, numbers and - ## collections + ## as singletons, strings and numbers case node.kind: of trueExpr: self.emitByte(LoadTrue) @@ -713,7 +784,6 @@ proc literal(self: Compiler, node: ASTNode) = self.emitByte(LoadNan) of strExpr: self.emitConstant(LiteralExpr(node), Type(kind: String)) - # TODO: Take size specifier into account! of intExpr: var x: int var y = IntExpr(node) @@ -778,65 +848,6 @@ proc literal(self: Compiler, node: ASTNode) = self.error(&"invalid AST node of kind {node.kind} at literal(): {node} (This is an internal error and most likely a bug!)") -proc findByName(self: Compiler, name: string): seq[Name] = - ## Looks for objects that have been already declared - ## with the given name. Returns all objects that apply - for obj in reversed(self.names): - if obj.name.token.lexeme == name: - result.add(obj) - - -proc findByType(self: Compiler, name: string, kind: Type, strictMutable: bool = true): seq[Name] = - ## Looks for objects that have already been declared - ## with the given name and type - for obj in self.findByName(name): - if self.compareTypes(obj.valueType, kind, strictMutable): - result.add(obj) - - -proc matchImpl(self: Compiler, name: string, kind: Type, strictMutable: bool = true): Name = - ## Tries to find a matching function implementation - ## compatible with the given type and returns its - ## name object - let impl = self.findByType(name, kind, strictMutable) - if impl.len() == 0: - var msg = &"cannot find a suitable implementation for '{name}'" - let names = self.findByName(name) - if names.len() > 0: - msg &= &", found {len(names)} candidate" - if names.len() > 1: - msg &= "s" - msg &= ": " - for name in names: - msg &= &"\n - '{name.name.token.lexeme}' of type '{self.typeToStr(name.valueType)}'" - if name.valueType.kind != Function: - msg &= ", not a callable" - elif kind.args.len() != name.valueType.args.len(): - msg &= &", wrong number of arguments ({name.valueType.args.len()} expected, got {kind.args.len()})" - else: - for i, arg in kind.args: - echo name.valueType.args[i].kind.mutable - echo arg.kind.mutable - if name.valueType.args[i].kind.mutable and not arg.kind.mutable: - msg &= &", first mismatch at position {i + 1}: {name.valueType.args[i].name} is immutable, not 'var'" - break - elif not self.compareTypes(arg.kind, name.valueType.args[i].kind): - msg &= &", first mismatch at position {i + 1}: expected argument of type '{self.typeToStr(name.valueType.args[i].kind)}', got '{self.typeToStr(arg.kind)}' instead" - break - self.error(msg) - elif impl.len() > 1: - var msg = &"multiple matching implementations of '{name}' found:\n" - for fn in reversed(impl): - msg &= &"- '{fn.name.token.lexeme}' at line {fn.line} of type {self.typeToStr(fn.valueType)}\n" - self.error(msg) - return impl[0] - - -proc emitFunction(self: Compiler, name: Name) = - ## Wrapper to emit LoadFunction instructions - self.emitByte(LoadFunction) - self.emitBytes(name.codePos.toTriple()) - proc handleBuiltinFunction(self: Compiler, fn: Name, args: seq[Expression]) = ## Emits single instructions for builtin functions @@ -953,12 +964,7 @@ proc generateCall(self: Compiler, fn: Name, args: seq[Expression]) = if any(fn.valueType.args, proc (arg: tuple[name: string, kind: Type]): bool = arg[1].kind == Generic): # The function has generic arguments! We need to compile a version # of it with the right type data - - # We don't want to cause *any* interference to - # other objects, so we just play it safe - var node = fn.valueType.funNode.deepCopy() - for argument in node.arguments: - + self.funDecl(nil, fn, args) self.emitFunction(fn) self.emitByte(LoadReturnAddress) let pos = self.chunk.code.len() @@ -1041,7 +1047,7 @@ proc declareName(self: Compiler, node: Declaration, mutable: bool = false) = if self.names.high() > 16777215: # If someone ever hits this limit in real-world scenarios, I swear I'll # slap myself 100 times with a sign saying "I'm dumb". Mark my words - self.error("cannot declare more than 16777216 variables at a time") + self.error("cannot declare more than 16777215 variables at a time") for name in self.findByName(node.name.token.lexeme): if name.depth == self.scopeDepth and name.valueType.kind notin {Function, CustomType} and not name.isFunctionArgument: # Trying to redeclare a variable in the same module is an error, but it's okay @@ -1095,7 +1101,7 @@ proc declareName(self: Compiler, node: Declaration, mutable: bool = false) = name: node.name.token.lexeme, returnType: self.inferType(node.returnType), args: @[], - funNode: node), + fun: node), codePos: self.chunk.code.len(), name: node.name, isLet: false, @@ -1105,7 +1111,7 @@ proc declareName(self: Compiler, node: Declaration, mutable: bool = false) = var name: Name for argument in node.arguments: if self.names.high() > 16777215: - self.error("cannot declare more than 16777216 variables at a time") + self.error("cannot declare more than 16777215 variables at a time") # wait, no LoadVar? Yes! That's because when calling functions, # arguments will already be on the stack so there's no need to # load them here @@ -1381,8 +1387,13 @@ proc callExpr(self: Compiler, node: CallExpr) = while node.kind == callExpr: self.callExpr(CallExpr(node)) node = CallExpr(node).callee + # TODO: Calling lambdas else: - discard # TODO: Calling expressions + let typ = self.inferType(node) + if typ.isNil(): + self.error(&"expression has no type") + else: + self.error(&"object of type '{self.typeToStr(typ)}' is not callable") if not funct.isNil(): if funct.valueType.isBuiltinFunction: self.handleBuiltinFunction(funct, argExpr) @@ -1391,8 +1402,8 @@ proc callExpr(self: Compiler, node: CallExpr) = else: self.generateObjCall(argExpr) if self.scopeDepth > 0 and not self.checkCallIsPure(node.callee): - if not self.currentFunction.name.isNil(): - self.error(&"cannot make sure that calls to '{self.currentFunction.name.token.lexeme}' are side-effect free") + if self.currentFunction.name != "": + self.error(&"cannot make sure that calls to '{self.currentFunction.name}' are side-effect free") else: self.error(&"cannot make sure that call is side-effect free") @@ -1466,7 +1477,7 @@ proc endFunctionBeforeReturn(self: Compiler) = ## its return instruction var popped = 0 for name in self.names: - if name.depth == self.scopeDepth and name.valueType.kind != Function: + if name.depth == self.scopeDepth and name.valueType.kind notin {Function, Generic}: inc(popped) if self.enableOptimizations and popped > 1: self.emitByte(PopN) @@ -1478,12 +1489,9 @@ proc endFunctionBeforeReturn(self: Compiler) = proc returnStmt(self: Compiler, node: ReturnStmt) = - ## Compiles return statements. An empty return - ## implicitly returns nil + ## Compiles return statements let actual = self.inferType(node.value) - let expected = self.inferType(self.currentFunction) - var comp: Type = actual - ## Having the return type + let expected = self.currentFunction if actual.isNil() and not expected.returnType.isNil(): if not node.value.isNil(): if node.value.kind == identExpr: @@ -1493,8 +1501,8 @@ proc returnStmt(self: Compiler, node: ReturnStmt) = self.error(&"expected return value of type '{self.typeToStr(expected.returnType)}', but expression has no type") elif expected.returnType.isNil() and not actual.isNil(): self.error("non-empty return statement is not allowed in void functions") - elif not self.compareTypes(actual, comp): - self.error(&"expected return value of type '{self.typeToStr(comp)}', got '{self.typeToStr(actual)}' instead") + elif not self.compareTypes(actual, expected.returnType): + self.error(&"expected return value of type '{self.typeToStr(expected)}', got '{self.typeToStr(actual)}' instead") if not node.value.isNil(): self.expression(node.value) self.emitByte(OpCode.SetResult) @@ -1667,12 +1675,14 @@ proc handleMagicPragma(self: Compiler, pragma: Pragma, node: ASTNode) = var fn = self.resolve(node.name) fn.valueType.isBuiltinFunction = true fn.valueType.builtinOp = pragma.args[0].token.lexeme[1..^2] + # The magic pragma ignores the function's body + node.body = nil proc handlePurePragma(self: Compiler, pragma: Pragma, node: ASTNode) = ## Handles the "pure" pragma case node.kind: - of funDecl: + of NodeKind.funDecl: FunDecl(node).isPure = true of lambdaExpr: LambdaExpr(node).isPure = true @@ -1684,7 +1694,7 @@ proc dispatchPragmas(self: Compiler, node: ASTnode) = ## Dispatches pragmas bound to objects var pragmas: seq[Pragma] = @[] case node.kind: - of funDecl, NodeKind.typeDecl, NodeKind.varDecl: + of NodeKind.funDecl, NodeKind.typeDecl, NodeKind.varDecl: pragmas = Declaration(node).pragmas of lambdaExpr: pragmas = LambdaExpr(node).pragmas @@ -1696,125 +1706,133 @@ proc dispatchPragmas(self: Compiler, node: ASTnode) = self.compilerProcs[pragma.name.token.lexeme](self, pragma, node) -proc funDecl(self: Compiler, node: FunDecl) = +proc fixGenericFunc(self: Compiler, name: Name, args: seq[Expression]): Type = + ## Specializes generic arguments in functions + var fn = name.valueType + result = fn.deepCopy() + var node = fn.fun + for i in 0..args.high(): + if fn.args[i].kind.kind == Generic: + self.resolve(fn.args[i].name).valueType = self.inferType(args[i]) + + +proc funDecl(self: Compiler, node: FunDecl, fn: Name = nil, args: seq[Expression] = @[]) = ## Compiles function declarations - var function = self.currentFunction - self.declareName(node) - if node.generics.len() < 0: - # We can't know the type of - # generic arguments yet, so - # we wait for the function to - # be called to compile its code - # or dispatch any pragmas. We - # still declare its name so that - # it can be assigned to variables - # and passed to functions - return - self.dispatchPragmas(node) - let fn = self.names[^(node.arguments.len() + 1)] - var jmp: int + if not node.isNil(): + if node.generics.len() > 0 and fn.isNil() and args.len() == 0: + # Generic function! We can't compile it right now + self.declareName(node) + self.dispatchPragmas(node) + return + self.declareName(node) + self.dispatchPragmas(node) + var node = node + var fn = if fn.isNil(): self.names[^(node.arguments.len() + 1)] else: fn + if fn.valueType.returnType.isNil(): + self.error(&"cannot infer the type of '{node.returnType.token.lexeme}'") if not fn.valueType.isBuiltinFunction: + var function = self.currentFunction + var jmp: int + # Builtin functions map to a single + # bytecode instruction to avoid + # unnecessary overhead from peon's + # calling convention. This also means + # that peon's fast builtins can only + # be relatively simple self.frames.add(self.names.high()) # A function's code is just compiled linearly # and then jumped over jmp = self.emitJump(JumpForwards) # Function's code starts after the jump fn.codePos = self.chunk.code.len() - for argument in node.arguments: + # We store the current function + self.currentFunction = fn.valueType + let argLen = if node.isNil(): fn.valueType.args.len() else: node.arguments.len() + for _ in 0.. 1: - # Oh-oh! We found more than one implementation of - # the same function with the same name! Error! - var msg = &"multiple matching implementations of '{node.name.token.lexeme}' found:\n" - for fn in reversed(impl): - msg &= &"- '{fn.name.token.lexeme}' at line {fn.line} of type {self.typeToStr(fn.valueType)}\n" + # We found more than one (public) implementation of + # the same function with the same name: this is an + # error, as it would raise ambiguity when calling them + var msg = &"multiple matching implementations of '{fn.name.token.lexeme}' found:\n" + for f in reversed(impl): + msg &= &"- '{f.name.token.lexeme}' at line {f.line} of type {self.typeToStr(f.valueType)}\n" self.error(msg) - # We store the current function - self.currentFunction = node - - if not fn.valueType.isBuiltinFunction: - # Since the deferred array is a linear - # sequence of instructions and we want - # to keep track to whose function's each - # set of deferred instruction belongs, - # we record the length of the deferred - # array before compiling the function - # and use this info later to compile - # the try/finally block with the deferred - # code - var deferStart = self.deferred.len() - # We let our debugger know a function is starting - let start = self.chunk.code.high() - self.beginScope() - for decl in BlockStmt(node.body).code: - self.declaration(decl) - var typ: Type - var hasVal: bool = false - case self.currentFunction.kind: - of NodeKind.funDecl: - typ = self.inferType(self.currentFunction) - hasVal = self.currentFunction.hasExplicitReturn - of NodeKind.lambdaExpr: - typ = self.inferType(LambdaExpr(Declaration(self.currentFunction))) - hasVal = LambdaExpr(Declaration(self.currentFunction)).hasExplicitReturn - else: - discard # Unreachable - if hasVal and self.currentFunction.returnType.isNil() and not typ.returnType.isNil(): - self.error("non-empty return statement is not allowed in void functions") - elif not hasVal and not self.currentFunction.returnType.isNil(): - self.error("function has an explicit return type, but no return statement was found") - self.endFunctionBeforeReturn() - hasVal = hasVal and not typ.returnType.isNil() - self.endScope(deleteNames=true, fromFunc=true) - # Terminates the function's context - self.emitByte(OpCode.Return) - if hasVal: - self.emitByte(1) + # Since the deferred array is a linear + # sequence of instructions and we want + # to keep track to whose function's each + # set of deferred instruction belongs, + # we record the length of the deferred + # array before compiling the function + # and use this info later to compile + # the try/finally block with the deferred + # code + var deferStart = self.deferred.len() + # We let our debugger know a function is starting + let start = self.chunk.code.high() + self.beginScope() + for decl in BlockStmt(node.body).code: + self.declaration(decl) + let typ = self.currentFunction.returnType + var hasVal: bool = false + case self.currentFunction.fun.kind: + of NodeKind.funDecl: + hasVal = self.currentFunction.fun.hasExplicitReturn + of NodeKind.lambdaExpr: + hasVal = LambdaExpr(Declaration(self.currentFunction.fun)).hasExplicitReturn else: - self.emitByte(0) - # Function is ending! - self.chunk.cfi.add(start.toTriple()) - self.chunk.cfi.add(self.chunk.code.high().toTriple()) - self.chunk.cfi.add(self.frames[^1].toTriple()) - self.chunk.cfi.add(uint8(node.arguments.len())) - if not node.name.isNil(): - self.chunk.cfi.add(node.name.token.lexeme.len().toDouble()) - var s = node.name.token.lexeme - if node.name.token.lexeme.len() >= uint16.high().int: - s = node.name.token.lexeme[0..uint16.high()] - self.chunk.cfi.add(s.toBytes()) - else: - self.chunk.cfi.add(0.toDouble()) - # Currently defer is not functional so we - # just pop the instructions - for i in countup(deferStart, self.deferred.len() - 1, 1): - self.deferred.delete(i) - - self.patchJump(jmp) - # This makes us compile nested functions correctly - discard self.frames.pop() - self.currentFunction = function + discard # Unreachable + if hasVal and self.currentFunction.returnType.isNil() and not typ.returnType.isNil(): + self.error("non-empty return statement is not allowed in void functions") + elif not hasVal and not self.currentFunction.returnType.isNil(): + self.error("function has an explicit return type, but no return statement was found") + self.endFunctionBeforeReturn() + hasVal = hasVal and not typ.isNil() + self.endScope(deleteNames=true, fromFunc=true) + # Terminates the function's context + self.emitByte(OpCode.Return) + if hasVal: + self.emitByte(1) + else: + self.emitByte(0) + # Some debugging info here + self.chunk.cfi.add(start.toTriple()) + self.chunk.cfi.add(self.chunk.code.high().toTriple()) + self.chunk.cfi.add(self.frames[^1].toTriple()) + self.chunk.cfi.add(uint8(node.arguments.len())) + if not node.name.isNil(): + self.chunk.cfi.add(fn.name.token.lexeme.len().toDouble()) + var s = fn.name.token.lexeme + if s.len() >= uint16.high().int: + s = node.name.token.lexeme[0..uint16.high()] + self.chunk.cfi.add(s.toBytes()) + else: + self.chunk.cfi.add(0.toDouble()) + # Currently defer is not functional so we + # just pop the instructions + for _ in deferStart..self.deferred.high(): + discard self.deferred.pop() + # Well, we've compiled everything: time to patch + # the jump offset + self.patchJump(jmp) + # Pops a call frame + discard self.frames.pop() + # Restores the enclosing function (if any). + # Makes nested calls work (including recursion) + self.currentFunction = function proc patchReturnAddress(self: Compiler, pos: int) =