From 78e169bd82f387d703b1f8a5480fbe86384bd56b Mon Sep 17 00:00:00 2001 From: Mattia Giambirtone Date: Thu, 7 Apr 2022 13:02:23 +0200 Subject: [PATCH] Hooked up the compiler and debugger (VERY experimental) --- src/frontend/compiler.nim | 44 +++++-------------------------- src/frontend/meta/bytecode.nim | 18 ++++++------- src/frontend/optimizer.nim | 19 +++++++------- src/test.nim | 47 ++++++++++++++++++++++------------ 4 files changed, 56 insertions(+), 72 deletions(-) diff --git a/src/frontend/compiler.nim b/src/frontend/compiler.nim index 57c9495..b0e677a 100644 --- a/src/frontend/compiler.nim +++ b/src/frontend/compiler.nim @@ -383,7 +383,7 @@ proc literal(self: Compiler, node: ASTNode) = self.emitBytes(y.keys.len().toTriple()) of awaitExpr: var y = AwaitExpr(node) - self.expression(y.awaitee) + self.expression(y.expression) self.emitByte(OpCode.Await) else: self.error(&"invalid AST node of kind {node.kind} at literal(): {node} (This is an internal error and most likely a bug)") @@ -417,9 +417,9 @@ proc binary(self: Compiler, node: BinaryExpr) = self.emitByte(BinaryAdd) of Minus: self.emitByte(BinarySubtract) - of Asterisk: + of Star: self.emitByte(BinaryMultiply) - of DoubleAsterisk: + of DoubleStar: self.emitByte(BinaryPow) of Percentage: self.emitByte(BinaryMod) @@ -497,7 +497,7 @@ proc declareName(self: Compiler, node: ASTNode) = self.error("cannot declare more than 16777215 static variables at a time") self.names.add(Name(depth: self.scopeDepth, name: IdentExpr(node.name), isPrivate: node.isPrivate, - owner: node.owner, + owner: "", isConst: node.isConst)) self.emitByte(StoreFast) self.emitBytes(self.names.high().toTriple()) @@ -514,7 +514,7 @@ proc declareName(self: Compiler, node: ASTNode) = for argument in node.arguments: if self.names.high() > 16777215: self.error("cannot declare more than 16777215 static variables at a time") - self.names.add(Name(depth: self.scopeDepth + 1, isPrivate: true, owner: self.currentModule, isConst: false, name: IdentExpr(argument))) + self.names.add(Name(depth: self.scopeDepth + 1, isPrivate: true, owner: self.currentModule, isConst: false, name: IdentExpr(argument.name))) self.emitByte(LoadFast) self.emitBytes(self.names.high().toTriple()) self.scopeDepth -= 1 @@ -542,14 +542,6 @@ proc resolveStatic(self: Compiler, name: IdentExpr, return nil -proc deleteStatic(self: Compiler, name: IdentExpr) = - ## Traverses self.staticNames backwards and deletes the - ## a name object with the given name. Does nothing when - ## the name can't be found - for i, obj in reversed(self.names): - if obj.name.token.lexeme == name.token.lexeme: - self.names.del(i) - proc getStaticIndex(self: Compiler, name: IdentExpr, depth: int = self.scopeDepth): int = ## Gets the predicted stack position of the given variable @@ -796,24 +788,6 @@ proc expression(self: Compiler, node: ASTNode) = self.error(&"invalid AST node of kind {node.kind} at expression(): {node} (This is an internal error and most likely a bug)") -proc delStmt(self: Compiler, node: ASTNode) = - ## Compiles del statements, which unbind - ## a name from the current scope - case node.kind: - of identExpr: - var node = IdentExpr(node) - let i = self.getStaticIndex(node) - if i != -1: - self.emitByte(DeleteFast) - self.emitBytes(i.toTriple()) - self.deleteStatic(node) - else: - self.emitByte(DeleteName) - self.emitBytes(self.identifierConstant(node)) - else: - discard # The parser already handles the other cases - - proc awaitStmt(self: Compiler, node: AwaitStmt) = ## Compiles await statements. An await statement ## is like an await expression, but parsed in the @@ -824,7 +798,7 @@ proc awaitStmt(self: Compiler, node: AwaitStmt) = ## run coroutines from within an already asynchronous ## loop (which should be orchestrated by an event loop). ## They block in the caller until the callee returns - self.expression(node.awaitee) + self.expression(node.expression) self.emitByte(OpCode.Await) @@ -833,7 +807,7 @@ proc deferStmt(self: Compiler, node: DeferStmt) = ## is executed right before the function exits ## (either because of a return or an exception) let current = self.chunk.code.len - self.expression(node.deferred) + self.expression(node.expression) for i in countup(current, self.chunk.code.high()): self.deferred.add(self.chunk.code[i]) self.chunk.code.del(i) @@ -908,8 +882,6 @@ proc statement(self: Compiler, node: ASTNode) = self.emitByte(Pop) # Expression statements discard their value. Their main use case is side effects in function calls of NodeKind.ifStmt: self.ifStmt(IfStmt(node)) - of NodeKind.delStmt: - self.delStmt(DelStmt(node).name) of NodeKind.assertStmt: self.assertStmt(AssertStmt(node)) of NodeKind.raiseStmt: @@ -922,8 +894,6 @@ proc statement(self: Compiler, node: ASTNode) = self.returnStmt(ReturnStmt(node)) of NodeKind.importStmt: discard - of NodeKind.fromImportStmt: - discard of NodeKind.whileStmt, NodeKind.forStmt: ## Our parser already desugars for loops to ## while loops! diff --git a/src/frontend/meta/bytecode.nim b/src/frontend/meta/bytecode.nim index 917c0aa..e24622f 100644 --- a/src/frontend/meta/bytecode.nim +++ b/src/frontend/meta/bytecode.nim @@ -161,18 +161,18 @@ type # Simple instructions encompass: # - Instructions that push onto/pop off the stack unconditionally (True, False, Pop, etc.) # - Unary and binary operators -const simpleInstructions* = {Return, BinaryAdd, BinaryMultiply, +const simpleInstructions* = {OpCode.Return, BinaryAdd, BinaryMultiply, BinaryDivide, BinarySubtract, - BinaryMod, BinaryPow, Nil, - True, False, OpCode.Nan, OpCode.Inf, + BinaryMod, BinaryPow, OpCode.Nil, + OpCode.True, OpCode.False, OpCode.Nan, OpCode.Inf, BinaryShiftLeft, BinaryShiftRight, - BinaryXor, LogicalNot, EqualTo, - GreaterThan, LessThan, LoadAttribute, + BinaryXor, OpCode.LogicalNot, EqualTo, + OpCode.GreaterThan, OpCode.LessThan, LoadAttribute, BinarySlice, Pop, UnaryNegate, - BinaryIs, BinaryAs, GreaterOrEqual, - LessOrEqual, BinaryOr, BinaryAnd, - UnaryNot, BinaryFloorDiv, BinaryOf, Raise, - ReRaise, BeginTry, FinishTry, Yield, Await, + BinaryIs, BinaryAs, OpCode.GreaterOrEqual, + OpCode.LessOrEqual, BinaryOr, BinaryAnd, + UnaryNot, BinaryFloorDiv, BinaryOf, OpCode.Raise, + ReRaise, BeginTry, FinishTry, OpCode.Yield, OpCode.Await, MakeClass, ImplicitReturn} # Constant instructions are instructions that operate on the bytecode constant table diff --git a/src/frontend/optimizer.nim b/src/frontend/optimizer.nim index a2124f5..1a528dc 100644 --- a/src/frontend/optimizer.nim +++ b/src/frontend/optimizer.nim @@ -177,11 +177,11 @@ proc optimizeBinary(self: Optimizer, node: BinaryExpr): ASTNode = z = x + y of Minus: z = x - y - of Asterisk: + of Star: z = x * y of FloorDiv: z = int(x / y) - of DoubleAsterisk: + of DoubleStar: if y >= 0: z = x ^ y else: @@ -232,11 +232,11 @@ proc optimizeBinary(self: Optimizer, node: BinaryExpr): ASTNode = z = x + y of Minus: z = x - y - of Asterisk: + of Star: z = x * y of FloorDiv, Slash: z = x / y - of DoubleAsterisk: + of DoubleStar: z = pow(x, y) of Percentage: z = x mod y @@ -260,7 +260,7 @@ proc optimizeBinary(self: Optimizer, node: BinaryExpr): ASTNode = var bb: int assert parseInt(b.literal.lexeme, bb) == b.literal.lexeme.len() case node.operator.kind: - of Asterisk: + of Star: result = StrExpr(kind: strExpr, literal: Token(kind: String, lexeme: "'" & a.literal.lexeme[1..<(^1)].repeat(bb) & "'")) else: result = node @@ -270,7 +270,7 @@ proc optimizeBinary(self: Optimizer, node: BinaryExpr): ASTNode = var aa: int assert parseInt(a.literal.lexeme, aa) == a.literal.lexeme.len() case node.operator.kind: - of Asterisk: + of Star: result = StrExpr(kind: strExpr, literal: Token(kind: String, lexeme: "'" & b.literal.lexeme[1..<(^1)].repeat(aa) & "'")) else: result = node @@ -290,12 +290,11 @@ proc detectClosures(self: Optimizer, node: FunDecl) = names.add(VarDecl(line)) of funDecl: names.add(FunDecl(line)) - of classDecl: - names.add(ClassDecl(line)) else: discard for name in names: - + discard + proc optimizeNode(self: Optimizer, node: ASTNode): ASTNode = ## Analyzes an AST node and attempts to perform @@ -327,7 +326,7 @@ proc optimizeNode(self: Optimizer, node: ASTNode): ASTNode = var node = SliceExpr(node) for i, e in node.ends: node.ends[i] = self.optimizeNode(e) - node.slicee = self.optimizeNode(node.slicee) + node.expression = self.optimizeNode(node.expression) result = node of tryStmt: var node = TryStmt(node) diff --git a/src/test.nim b/src/test.nim index 56b9ec4..6760020 100644 --- a/src/test.nim +++ b/src/test.nim @@ -6,6 +6,9 @@ import strformat # Our stuff import frontend/lexer as l import frontend/parser as p +import frontend/compiler as c +import util/debugger + # Thanks art <3 import jale/editor as ed @@ -23,6 +26,7 @@ proc getLineEditor: LineEditor const debugLexer = false const debugParser = true +const debugCompiler = true when isMainModule: @@ -30,8 +34,10 @@ when isMainModule: var keep = true var tokens: seq[Token] = @[] var tree: seq[ASTNode] = @[] + var compiled: Chunk var tokenizer = newLexer() var parser = newParser() + var compiler = initCompiler() let editor = getLineEditor() var input: string editor.bindEvent(jeQuit): @@ -60,6 +66,15 @@ when isMainModule: echo "Parsing step:" for node in tree: echo "\t", node + echo "" + compiled = compiler.compile(tree, "") + when debugCompiler: + echo "Compilation step:" + stdout.write("\t") + echo &"""Raw byte stream: [{compiled.code.join(", ")}]""" + echo "\nBytecode disassembler output below:\n" + disassembleChunk(compiled, "") + echo "" except IOError: break # TODO: The code for error reporting completely @@ -101,8 +116,8 @@ proc fillSymbolTable(tokenizer: Lexer) = tokenizer.symbols.addSymbol("]", RightBracket) tokenizer.symbols.addSymbol(".", Dot) tokenizer.symbols.addSymbol(",", Comma) - tokenizer.symbols.addSymbol(">", GreaterThan) - tokenizer.symbols.addSymbol("<", LessThan) + tokenizer.symbols.addSymbol(">", TokenType.GreaterThan) + tokenizer.symbols.addSymbol("<", TokenType.LessThan) tokenizer.symbols.addSymbol(";", Semicolon) tokenizer.symbols.addSymbol("=", Equal) tokenizer.symbols.addSymbol("~", Tilde) @@ -114,8 +129,8 @@ proc fillSymbolTable(tokenizer: Lexer) = # 2-byte symbols tokenizer.symbols.addSymbol("+=", InplaceAdd) tokenizer.symbols.addSymbol("-=", InplaceSub) - tokenizer.symbols.addSymbol(">=", GreaterOrEqual) - tokenizer.symbols.addSymbol("<=", LessOrEqual) + tokenizer.symbols.addSymbol(">=", TokenType.GreaterOrEqual) + tokenizer.symbols.addSymbol("<=", TokenType.LessOrEqual) tokenizer.symbols.addSymbol("*=", InplaceMul) tokenizer.symbols.addSymbol("/=", InplaceDiv) tokenizer.symbols.addSymbol("&=", InplaceAnd) @@ -141,26 +156,26 @@ proc fillSymbolTable(tokenizer: Lexer) = tokenizer.symbols.addKeyword("generator", Generator) tokenizer.symbols.addKeyword("function", Function) tokenizer.symbols.addKeyword("coroutine", Coroutine) - tokenizer.symbols.addKeyword("break", Break) + tokenizer.symbols.addKeyword("break", TokenType.Break) tokenizer.symbols.addKeyword("continue", Continue) tokenizer.symbols.addKeyword("while", While) tokenizer.symbols.addKeyword("for", For) tokenizer.symbols.addKeyword("foreach", Foreach) tokenizer.symbols.addKeyword("if", If) tokenizer.symbols.addKeyword("else", Else) - tokenizer.symbols.addKeyword("await", Await) + tokenizer.symbols.addKeyword("await", TokenType.Await) tokenizer.symbols.addKeyword("defer", Defer) tokenizer.symbols.addKeyword("try", Try) tokenizer.symbols.addKeyword("except", Except) tokenizer.symbols.addKeyword("finally", Finally) - tokenizer.symbols.addKeyword("raise", Raise) - tokenizer.symbols.addKeyword("assert", Assert) + tokenizer.symbols.addKeyword("raise", TokenType.Raise) + tokenizer.symbols.addKeyword("assert", TokenType.Assert) tokenizer.symbols.addKeyword("const", Const) tokenizer.symbols.addKeyword("let", Let) tokenizer.symbols.addKeyword("var", Var) tokenizer.symbols.addKeyword("import", Import) - tokenizer.symbols.addKeyword("yield", Yield) - tokenizer.symbols.addKeyword("return", Return) + tokenizer.symbols.addKeyword("yield", TokenType.Yield) + tokenizer.symbols.addKeyword("return", TokenType.Return) # These are technically more like expressions # with a reserved name that produce a value of a # builtin type, but we don't need to care about @@ -168,9 +183,9 @@ proc fillSymbolTable(tokenizer: Lexer) = # steps so it's fine tokenizer.symbols.addKeyword("nan", NotANumber) tokenizer.symbols.addKeyword("inf", Infinity) - tokenizer.symbols.addKeyword("nil", Nil) - tokenizer.symbols.addKeyword("true", True) - tokenizer.symbols.addKeyword("false", False) + tokenizer.symbols.addKeyword("nil", TokenType.Nil) + tokenizer.symbols.addKeyword("true", TokenType.True) + tokenizer.symbols.addKeyword("false", TokenType.False) # These are technically operators, but since # they fit neatly into the definition for an # identifier/keyword we parse them as such @@ -179,9 +194,9 @@ proc fillSymbolTable(tokenizer: Lexer) = tokenizer.symbols.addKeyword("is", Is) tokenizer.symbols.addKeyword("as", As) tokenizer.symbols.addKeyword("of", Of) - tokenizer.symbols.addKeyword("and", LogicalAnd) - tokenizer.symbols.addKeyword("or", LogicalOr) - tokenizer.symbols.addKeyword("not", LogicalNot) + tokenizer.symbols.addKeyword("and", TokenType.LogicalAnd) + tokenizer.symbols.addKeyword("or", TokenType.LogicalOr) + tokenizer.symbols.addKeyword("not", TokenType.LogicalNot) # P.S.: There's no reason for the order of addition of # symbols to be ascending in length (the symbol table uses