From e97653cbc7be52a4a520e0196fb315876faf8d70 Mon Sep 17 00:00:00 2001 From: Mattia Giambirtone Date: Thu, 5 Jan 2023 12:44:11 +0100 Subject: [PATCH] Initial ground work on typed AST, minor fixes to compiler, switch statement now looks for equality operator instead of emitting the Equal opcode --- src/frontend/compiler/compiler.nim | 16 ++-- .../compiler/targets/bytecode/target.nim | 88 ++++++++++--------- .../targets/nativeC/util/generators.nim | 6 +- src/main.nim | 2 +- 4 files changed, 64 insertions(+), 48 deletions(-) diff --git a/src/frontend/compiler/compiler.nim b/src/frontend/compiler/compiler.nim index e4e6648..a019de3 100644 --- a/src/frontend/compiler/compiler.nim +++ b/src/frontend/compiler/compiler.nim @@ -97,6 +97,7 @@ type types*: seq[tuple[match: bool, kind: Type]] else: discard + NameKind* {.pure.} = enum ## A name enumeration type None, Module, Argument, Var, Function, CustomType, Enum @@ -149,7 +150,6 @@ type # Is this name a builtin? isBuiltin*: bool - WarningKind* {.pure.} = enum ## A warning enumeration type UnreachableCode, UnusedName, ShadowOuterScope, @@ -177,10 +177,7 @@ type # The current scope depth. If > 0, we're # in a local scope, otherwise it's global depth*: int - # Are we in REPL mode? If so, Pop instructions - # for expression statements at the top level are - # swapped for a special instruction that prints - # the result of the expression once it is evaluated + # Are we in REPL mode? replMode*: bool # List of all compile-time names names*: seq[Name] @@ -197,7 +194,7 @@ type # List of disabled warnings disabledWarnings*: seq[WarningKind] # Whether to show detailed info about type - # mismatches when we dispatch with match() + # mismatches when we dispatch with self.match showMismatches*: bool # Are we compiling in debug mode? mode*: CompileMode @@ -210,6 +207,13 @@ type # Currently imported modules modules*: HashSet[Name] + TypedNode* = ref object + ## A wapper for AST nodes + ## with attached type information + kind*: Type + node*: ASTNode + + ## Public getters for nicer error formatting proc getCurrentNode*(self: Compiler): ASTNode = (if self.current >= self.ast.len(): self.ast[^1] else: self.ast[self.current - 1]) diff --git a/src/frontend/compiler/targets/bytecode/target.nim b/src/frontend/compiler/targets/bytecode/target.nim index ab6bd3c..29070b4 100644 --- a/src/frontend/compiler/targets/bytecode/target.nim +++ b/src/frontend/compiler/targets/bytecode/target.nim @@ -108,7 +108,7 @@ proc compile*(self: BytecodeCompiler, ast: seq[Declaration], file: string, lines mode: CompileMode = Debug): Chunk proc statement(self: BytecodeCompiler, node: Statement) proc declaration(self: BytecodeCompiler, node: Declaration) -proc varDecl(self: BytecodeCompiler, node: VarDecl) +proc varDecl(self: BytecodeCompiler, node: VarDecl, name: BytecodeName) proc specialize(self: BytecodeCompiler, typ: Type, args: seq[Expression]): Type {.discardable.} proc patchReturnAddress(self: BytecodeCompiler, pos: int) proc handleMagicPragma(self: BytecodeCompiler, pragma: Pragma, name: BytecodeName) @@ -393,7 +393,7 @@ proc fixNames(self: BytecodeCompiler, where, oldLen: int) = name.valueType.location += offset -proc insertAt(self: BytecodeCompiler, where: int, opcode: OpCode, data: openarray[uint8]): int = +proc insertAt(self: BytecodeCompiler, where: int, opcode: OpCode, data: openarray[uint8]): int {.used.} = ## Inserts the given instruction into the ## chunk's code segment and updates internal ## metadata to reflect this change. Returns @@ -1739,8 +1739,8 @@ proc returnStmt(self: BytecodeCompiler, node: ReturnStmt) = proc continueStmt(self: BytecodeCompiler, node: ContinueStmt, compile: bool = true) = - ## Compiles continue statements. A continue statement - ## jumps to the next iteration in a loop + ## Compiles continue statements. A continue statement can be + ## used to jump to the beginning of a loop or block if node.label.isNil(): if self.currentLoop.start > 16777215: self.error("too much code to jump over in continue statement") @@ -1763,7 +1763,9 @@ proc continueStmt(self: BytecodeCompiler, node: ContinueStmt, compile: bool = tr proc importStmt(self: BytecodeCompiler, node: ImportStmt, compile: bool = true) = - ## Imports a module at compile time + ## Imports a module. This creates a new "virtual" + ## (i.e simulated) module namespace and injects all + ## of the module's public names into the current module self.declare(node) var module = self.names[^1] try: @@ -1781,7 +1783,12 @@ proc importStmt(self: BytecodeCompiler, node: ImportStmt, compile: bool = true) proc exportStmt(self: BytecodeCompiler, node: ExportStmt, compile: bool = true) = ## Exports a name at compile time to - ## all modules importing us + ## all modules importing us. The user + ## needs to explicitly tell the compiler + ## which of the names it imported, if any, + ## should be made available to other modules + ## importing it in order to avoid namespace + ## pollution var name = self.resolveOrError(node.name) if name.isPrivate: self.error("cannot export private names") @@ -1802,8 +1809,9 @@ proc exportStmt(self: BytecodeCompiler, node: ExportStmt, compile: bool = true) proc breakStmt(self: BytecodeCompiler, node: BreakStmt) = - ## Compiles break statements. A break statement - ## jumps to the end of the loop + ## Compiles break statements. A break statement is used + ## to jump at the end of a loop or outside of a given + ## block if node.label.isNil(): self.currentLoop.breakJumps.add(self.emitJump(OpCode.JumpForwards, node.token.line)) if self.currentLoop.depth > self.depth: @@ -1825,6 +1833,10 @@ proc breakStmt(self: BytecodeCompiler, node: BreakStmt) = proc namedBlock(self: BytecodeCompiler, node: NamedBlockStmt) = ## Compiles named blocks + self.namedBlocks.add(NamedBlock(start: self.chunk.code.len(), # Creates a new block entry + depth: self.depth, + breakJumps: @[], + name: NamedBlockStmt(node).name.token.lexeme)) self.beginScope() var blk = self.namedBlocks[^1] var last: Declaration @@ -1841,17 +1853,24 @@ proc namedBlock(self: BytecodeCompiler, node: NamedBlockStmt) = last = decl self.patchBreaks() self.endScope() + discard self.namedBlocks.pop() proc switchStmt(self: BytecodeCompiler, node: SwitchStmt) = - ## Compiles switch statements + ## Compiles C-style switch statements self.expression(node.switch) + let typeOfA = self.inferOrError(node.switch) var ifJump: int = -1 var thenJumps: seq[int] = @[] + var fn: Type + var impl: BytecodeName + var default: Expression for branch in node.branches: self.emitByte(DupTop, branch.body.token.line) self.expression(branch.cond) - self.emitByte(Equal, branch.body.token.line) + fn = Type(kind: Function, returnType: Type(kind: Bool), args: @[("", typeOfA, default), ("", self.inferOrError(branch.cond), default)]) + impl = BytecodeName(self.match("==", fn, node)) + self.generateCall(impl, @[node.switch, branch.cond], impl.line) ifJump = self.emitJump(JumpIfFalsePop, branch.body.token.line) self.blockStmt(branch.body) thenJumps.add(self.emitJump(JumpForwards, branch.body.token.line)) @@ -1867,6 +1886,8 @@ proc statement(self: BytecodeCompiler, node: Statement) = ## Compiles all statements case node.kind: of exprStmt: + # An expression statement is just a statement + # followed by a statement terminator (semicolon) let expression = ExprStmt(node).expression let kind = self.infer(expression) self.expression(expression) @@ -1881,12 +1902,7 @@ proc statement(self: BytecodeCompiler, node: Statement) = of NodeKind.switchStmt: self.switchStmt(SwitchStmt(node)) of NodeKind.namedBlockStmt: - self.namedBlocks.add(NamedBlock(start: self.chunk.code.len(), - depth: self.depth, - breakJumps: @[], - name: NamedBlockStmt(node).name.token.lexeme)) - self.namedBlock(NamedBlockStmt(node),) - discard self.namedBlocks.pop() + self.namedBlock(NamedBlockStmt(node)) of NodeKind.ifStmt: self.ifStmt(IfStmt(node)) of NodeKind.assertStmt: @@ -1904,8 +1920,6 @@ proc statement(self: BytecodeCompiler, node: Statement) = of NodeKind.exportStmt: self.exportStmt(ExportStmt(node)) of NodeKind.whileStmt: - # Note: Our parser already desugars - # for loops to while loops let loop = self.currentLoop self.currentLoop = Loop(start: self.chunk.code.len(), depth: self.depth, breakJumps: @[]) @@ -1928,12 +1942,11 @@ proc statement(self: BytecodeCompiler, node: Statement) = self.expression(Expression(node)) -proc varDecl(self: BytecodeCompiler, node: VarDecl) = +proc varDecl(self: BytecodeCompiler, node: VarDecl, name: BytecodeName) = ## Compiles variable declarations - + var typ: Type # Our parser guarantees that the variable declaration # will have a type declaration or a value (or both) - var typ: Type if node.value.isNil(): # Variable has no value: the type declaration # takes over @@ -1957,8 +1970,6 @@ proc varDecl(self: BytecodeCompiler, node: VarDecl) = typ = self.infer(node.value) self.expression(node.value) self.emitByte(AddVar, node.token.line) - self.declare(node) - var name = BytecodeName(self.names[^1]) inc(self.stackIndex) name.position = self.stackIndex name.valueType = typ @@ -1970,10 +1981,13 @@ proc funDecl(self: BytecodeCompiler, node: FunDecl, name: BytecodeName) = self.error(&"Due to compiler limitations, the '{node.name.token.lexeme}' operator cannot be currently overridden", node.name) var node = node var jmp: int - # We store the current function + # We store the current function to restore + # it later let function = self.currentFunction if node.body.isNil(): - # We ignore forward declarations + # When we stumble across a forward declaration, + # we record it for later so we can look it up at + # the end of the module self.forwarded.add((name, 0)) name.valueType.forwarded = true self.currentFunction = function @@ -2008,15 +2022,6 @@ proc funDecl(self: BytecodeCompiler, node: FunDecl, name: BytecodeName) = self.chunk.functions.add(0.toDouble()) if BlockStmt(node.body).code.len() == 0: self.error("cannot declare function with empty body") - # Since the deferred array is a linear - # sequence of instructions and we want - # to keep track to whose function's each - # set of deferred instruction belongs, - # we record the length of the deferred - # array before compiling the function - # and use this info later to compile - # the try/finally block with the deferred - # code var last: Declaration self.beginScope() for decl in BlockStmt(node.body).code: @@ -2071,11 +2076,15 @@ proc declaration(self: BytecodeCompiler, node: Declaration) = of NodeKind.funDecl: var name = self.declare(node) if not name.valueType.isAuto: + # We can't compile automatic functions right + # away because we need to know the type of the + # arguments in their signature, and this info is + # not available at declaration time self.funDecl(FunDecl(node), name) if name.isGeneric: # After we're done compiling a generic - # function, we pull a magic trick: since, - # from here on, the user will be able to + # function, we pull a magic trick: since + # from here on the user will be able to # call this with any of the types in the # generic constraint, we switch every generic # to a type union (which, conveniently, have an @@ -2092,12 +2101,11 @@ proc declaration(self: BytecodeCompiler, node: Declaration) = if not name.valueType.returnType.isNil() and name.valueType.returnType.kind == Generic: name.valueType.returnType.asUnion = true of NodeKind.typeDecl: + # Custom types don't do much other than + # declaring a name in the given scope self.declare(node) of NodeKind.varDecl: - # We compile this immediately because we - # need to keep the stack in the right state - # at runtime - self.varDecl(VarDecl(node)) + self.varDecl(VarDecl(node), self.declare(node)) else: self.statement(Statement(node)) diff --git a/src/frontend/compiler/targets/nativeC/util/generators.nim b/src/frontend/compiler/targets/nativeC/util/generators.nim index baa982b..9ffe1ef 100644 --- a/src/frontend/compiler/targets/nativeC/util/generators.nim +++ b/src/frontend/compiler/targets/nativeC/util/generators.nim @@ -17,12 +17,14 @@ type GeneratorKind* = enum ## A code generator enumeration - Literal, + Literal, Call CodeGenerator* = object ## A generic code generator case kind*: GeneratorKind of Literal: lit: string + else: + discard proc generate*(self: CodeGenerator): string = @@ -31,4 +33,6 @@ proc generate*(self: CodeGenerator): string = case self.kind: of Literal: return self.lit + else: + discard \ No newline at end of file diff --git a/src/main.nim b/src/main.nim index 2f11e1b..4fa4547 100644 --- a/src/main.nim +++ b/src/main.nim @@ -207,7 +207,7 @@ proc runFile(f: string, fromString: bool = false, dump: bool = true, breakpoints styledEcho fgGreen, "\t", $node echo "" case backend: - of PeonBackend.Bytecode: + of PeonBackend.Bytecode: compiled = compiler.compile(tree, f, tokenizer.getLines(), input, disabledWarnings=warnings, showMismatches=mismatches, mode=mode) when debugCompiler: styledEcho fgCyan, "Compilation step:\n"