From 4839c9a341c41e02b2aac53bc5c9d1142b3b45fe Mon Sep 17 00:00:00 2001 From: Mattia Giambirtone Date: Tue, 26 Apr 2022 09:29:59 +0200 Subject: [PATCH] Turned off optimizer (breaks compiler) further work on closures and scope resolution --- src/frontend/compiler.nim | 142 +++++++++++++++++---------------- src/frontend/meta/bytecode.nim | 11 +-- src/frontend/optimizer.nim | 22 ++--- src/test.nim | 14 ++-- 4 files changed, 99 insertions(+), 90 deletions(-) diff --git a/src/frontend/compiler.nim b/src/frontend/compiler.nim index 653ec08..9b5698f 100644 --- a/src/frontend/compiler.nim +++ b/src/frontend/compiler.nim @@ -44,6 +44,7 @@ type depth: int isPrivate: bool isConst: bool + valueType: IdentExpr Loop = object ## A "loop object" used @@ -124,7 +125,7 @@ proc identifier(self: Compiler, node: IdentExpr) proc varDecl(self: Compiler, node: VarDecl) ## End of forward declarations -## Public getters for nicer error formatting +## Public getter for nicer error formatting proc getCurrentNode*(self: Compiler): ASTNode = (if self.current >= self.ast.len(): self.ast[^1] else: self.ast[self.current - 1]) @@ -173,7 +174,7 @@ proc emitByte(self: Compiler, byt: OpCode|uint8) = proc emitBytes(self: Compiler, byt1: OpCode|uint8, byt2: OpCode|uint8) = - ## Emits multiple bytes instead of a single one, this is useful + ## Emits multiple bytes instead of a single one. This is useful ## to emit operators along with their operands or for multi-byte ## instructions that are longer than one byte self.emitByte(uint8 byt1) @@ -208,16 +209,6 @@ proc emitConstant(self: Compiler, obj: ASTNode) = self.emitBytes(self.makeConstant(obj)) -proc identifierConstant(self: Compiler, identifier: IdentExpr): array[3, uint8] = - ## Emits an identifier name as a string in the current chunk's constant - ## table. This is used to load globals declared as dynamic that cannot - ## be resolved statically by the compiler - try: - result = self.makeConstant(identifier) - except CompileError: - self.error(getCurrentExceptionMsg()) - - proc emitJump(self: Compiler, opcode: OpCode): int = ## Emits a dummy jump offset to be patched later. Assumes ## the largest offset (emits 4 bytes, one for the given jump @@ -304,7 +295,7 @@ proc literal(self: Compiler, node: ASTNode) = var x: int var y = IntExpr(node) try: - assert parseInt(y.literal.lexeme, x) == len(y.literal.lexeme) + discard parseInt(y.literal.lexeme, x) except ValueError: self.error("integer value out of range") self.emitConstant(y) @@ -317,7 +308,7 @@ proc literal(self: Compiler, node: ASTNode) = var x: int var y = HexExpr(node) try: - assert parseHex(y.literal.lexeme, x) == len(y.literal.lexeme) + discard parseHex(y.literal.lexeme, x) except ValueError: self.error("integer value out of range") self.emitConstant(newIntExpr(Token(lexeme: $x, line: y.token.line, @@ -327,7 +318,7 @@ proc literal(self: Compiler, node: ASTNode) = var x: int var y = BinExpr(node) try: - assert parseBin(y.literal.lexeme, x) == len(y.literal.lexeme) + discard parseBin(y.literal.lexeme, x) except ValueError: self.error("integer value out of range") self.emitConstant(newIntExpr(Token(lexeme: $x, line: y.token.line, @@ -337,7 +328,7 @@ proc literal(self: Compiler, node: ASTNode) = var x: int var y = OctExpr(node) try: - assert parseOct(y.literal.lexeme, x) == len(y.literal.lexeme) + discard parseOct(y.literal.lexeme, x) except ValueError: self.error("integer value out of range") self.emitConstant(newIntExpr(Token(lexeme: $x, line: y.token.line, @@ -347,7 +338,7 @@ proc literal(self: Compiler, node: ASTNode) = var x: float var y = FloatExpr(node) try: - assert parseFloat(y.literal.lexeme, x) == len(y.literal.lexeme) + discard parseFloat(y.literal.lexeme, x) except ValueError: self.error("floating point value out of range") self.emitConstant(y) @@ -400,7 +391,7 @@ proc unary(self: Compiler, node: UnaryExpr) = of Minus: self.emitByte(UnaryNegate) of Plus: - discard # Unary + does nothing, but we allow it for consistency + self.emitByte(UnaryPlus) of TokenType.LogicalNot: self.emitByte(OpCode.LogicalNot) of Tilde: @@ -479,28 +470,28 @@ proc binary(self: Compiler, node: BinaryExpr) = -proc declareName(self: Compiler, node: ASTNode) = +proc declareName(self: Compiler, node: ASTNode, kind: IdentExpr) = ## Compiles all name declarations case node.kind: of NodeKind.varDecl: var node = VarDecl(node) - # Statically resolved variable here. Creates a new Name entry - # so that self.identifier emits the proper stack offset + # Creates a new Name entry so that self.identifier emits the proper stack offset if self.names.high() > 16777215: # If someone ever hits this limit in real-world scenarios, I swear I'll # slap myself 100 times with a sign saying "I'm dumb". Mark my words - self.error("cannot declare more than 16777215 static variables at a time") + self.error("cannot declare more than 16777215 variables at a time") self.names.add(Name(depth: self.scopeDepth, name: IdentExpr(node.name), - isPrivate: node.isPrivate, - owner: "", - isConst: node.isConst)) + isPrivate: node.isPrivate, + owner: self.currentModule, + isConst: node.isConst, + valueType: kind)) self.emitByte(StoreVar) self.emitBytes(self.names.high().toTriple()) of funDecl: var node = FunDecl(node) # Declares the function's name in the # current (outer) scope... - self.declareName(node.name) + self.declareName(node.name, IdentExpr(node.returnType)) # ... but its arguments in an inner one! self.scopeDepth += 1 # (this ugly part is needed because @@ -508,8 +499,8 @@ proc declareName(self: Compiler, node: ASTNode) = # and decrements the scope depth) for argument in node.arguments: if self.names.high() > 16777215: - self.error("cannot declare more than 16777215 static variables at a time") - self.names.add(Name(depth: self.scopeDepth + 1, isPrivate: true, owner: self.currentModule, isConst: false, name: IdentExpr(argument.name))) + self.error("cannot declare more than 16777215 variables at a time") + self.names.add(Name(depth: self.scopeDepth + 1, isPrivate: true, owner: self.currentModule, isConst: false, name: IdentExpr(argument.name), valueType: kind)) self.emitByte(LoadVar) self.emitBytes(self.names.high().toTriple()) self.scopeDepth -= 1 @@ -524,30 +515,37 @@ proc resolveStatic(self: Compiler, name: IdentExpr, ## first name object with the given name. Returns ## nil when the name can't be found. This function ## has no concept of scope depth, because getStaticIndex - ## does that job + ## does that job. Note that private names declared in + ## other modules will not be resolved! for obj in reversed(self.names): if obj.name.token.lexeme == name.token.lexeme: + if obj.isPrivate and obj.owner != self.currentModule: + return nil return obj return nil - -proc getStaticIndex(self: Compiler, name: IdentExpr, depth: int = self.scopeDepth): int = - ## Gets the predicted stack position of the given variable - ## if it is static, returns -1 if it is to be bound dynamically - ## or it does not exist at all and returns -2 if the variable - ## is outside of the current local scope and is to be emitted as a closure. +proc getStaticIndex(self: Compiler, name: IdentExpr, depth: int = self.scopeDepth): tuple[closedOver: bool, pos: int] = + ## Gets the predicted stack position of the given variable and + ## returns a tuple (closedOver, pos) that tells the caller whether + ## the variable is to be emitted as a closure as well as its predicted + ## stack/closure array position. Returns (false, -1) if the variable's + ## location can not be determined at compile time (this is an error!). + ## Note that private names declared in other modules will not be resolved! var i: int = self.names.high() for variable in reversed(self.names): if name.name.lexeme == variable.name.name.lexeme: - if variable.depth == depth: - return i - else: - # This tells self.identifier() that this is - # a closed-over variable - return -2 + if variable.isPrivate and variable.owner != self.currentModule: + return (false, -1) + if variable.depth == depth or variable.depth == 0: + # variable.depth == 0 for globals! + return (false, i) + elif variable.depth > 0: + for j, closure in reversed(self.closedOver): + if closure.name.lexeme == name.name.lexeme: + return (true, j) dec(i) - return -1 + return (false, -1) proc identifier(self: Compiler, node: IdentExpr) = @@ -561,16 +559,17 @@ proc identifier(self: Compiler, node: IdentExpr) = # anyway?) self.emitConstant(node) else: - let index = self.getStaticIndex(node) + let t = self.getStaticIndex(node) + let index = t.pos if index != -1: - if index >= 0: + if t.closedOver: self.emitByte(LoadVar) # Static name resolution, loads value at index in the stack. Very fast. Much wow. self.emitBytes(index.toTriple()) else: if self.closedOver.len() == 0: self.error("error: closure variable array is empty but LoadHeap would be emitted (this is an internal error and most likely a bug)") if self.closedOver.len() >= 16777216: - self.error("too many consecutive closed-over variables (max is 16777215)") + self.error("too many consecutive closure-over variables (max is 16777216)") self.emitByte(LoadHeap) # Heap-allocated closure variable. Stored in a separate "closure array" in the VM that does not have stack semantics self.emitBytes(self.closedOver.high().toTriple()) else: @@ -587,7 +586,8 @@ proc assignment(self: Compiler, node: ASTNode) = if r != nil and r.isConst: self.error("cannot assign to constant") self.expression(node.value) - let index = self.getStaticIndex(name) + let t = self.getStaticIndex(name) + let index = t.pos case node.token.kind: of InplaceAdd: self.emitByte(BinaryAdd) @@ -623,7 +623,10 @@ proc assignment(self: Compiler, node: ASTNode) = # but that requires variants for stack, # heap, and closure variables and I cba if index != -1: - self.emitByte(StoreVar) + if not t.closedOver: + self.emitByte(StoreVar) + else: + self.emitByte(StoreHeap) self.emitBytes(index.toTriple()) else: self.error(&"reference to undeclared name '{node.token.lexeme}'") @@ -746,11 +749,11 @@ proc inferValueType(self: Compiler, node: ASTNode): ASTNode = # valueType here, we just need to return # a non-nil value so we don't error out return DictExpr(node).keyType - of intExpr: - var node = IntExpr(node) + of intExpr, floatExpr, binExpr, octExpr, hexExpr: + var node = LiteralExpr(node) var size = node.token.lexeme.split("'") if len(size) notin 1..2: - self.error("invalid state: inferValueType -> invalid size specifier for int") + self.error("invalid state: inferValueType -> invalid size specifier") elif size.len() == 1: return newIdentExpr(Token(lexeme: "int")) elif size[1] in ["u64", "i64", "u32", "i32", "f64", "f32", "i32", "u32", "u8", "i8"]: @@ -777,20 +780,18 @@ proc inferExprType(self: Compiler, node: ASTNode): ASTNode = case node.kind: of identExpr: var node = IdentExpr(node) - if self.getStaticIndex(IdentExpr(node)) == -1: - self.error(&"reference to undeclared name '{node.token.lexeme}'") - # TODO: Find type of identifier + var name = self.resolveStatic(node) + if name == nil: + return nil + return name.valueType of unaryExpr: return self.inferValueType(UnaryExpr(node).a) of binaryExpr: var node = BinaryExpr(node) - var a = self.inferValueType(node.a) - var b = self.inferValueType(node.b) - # This is obviously not correct, but - # this function is only useful as a - # first type checking step anyway - if a == nil: - return b + var a = self.inferExprType(node.a) + var b = self.inferExprType(node.b) + if a == nil or b == nil: + return nil return a of {intExpr, hexExpr, binExpr, octExpr, strExpr, falseExpr, trueExpr, infExpr, @@ -845,8 +846,8 @@ proc expression(self: Compiler, node: ASTNode) = # Binary expressions such as 2 ^ 5 and 0.66 * 3.14 self.binary(BinaryExpr(node)) of intExpr, hexExpr, binExpr, octExpr, strExpr, falseExpr, trueExpr, - infExpr, nanExpr, floatExpr, nilExpr, - tupleExpr, setExpr, listExpr, dictExpr: + infExpr, nanExpr, floatExpr, nilExpr, tupleExpr, setExpr, listExpr, + dictExpr: # Since all of these AST nodes mostly share # the same overall structure, and the kind # discriminant is enough to tell one @@ -865,7 +866,7 @@ proc awaitStmt(self: Compiler, node: AwaitStmt) = ## same as an await expression followed by a semicolon. ## Await expressions are the only native construct to ## run coroutines from within an already asynchronous - ## loop (which should be orchestrated by an event loop). + ## context (which should be orchestrated by an event loop). ## They block in the caller until the callee returns self.expression(node.expression) self.emitByte(OpCode.Await) @@ -873,8 +874,8 @@ proc awaitStmt(self: Compiler, node: AwaitStmt) = proc deferStmt(self: Compiler, node: DeferStmt) = ## Compiles defer statements. A defer statement - ## is executed right before the function exits - ## (either because of a return or an exception) + ## is executed right before its containing function + ## exits (either because of a return or an exception) let current = self.chunk.code.len self.expression(node.expression) for i in countup(current, self.chunk.code.high()): @@ -885,6 +886,11 @@ proc deferStmt(self: Compiler, node: DeferStmt) = proc returnStmt(self: Compiler, node: ReturnStmt) = ## Compiles return statements. An empty return ## implicitly returns nil + let returnType = self.inferExprType(node.value) + if returnType == nil: + self.error("expression has no type") + elif returnType.token.lexeme != self.currentFunction.returnType.token.lexeme: + self.error(&"expected value of type '{self.currentFunction.returnType.token.lexeme}', got '{returnType.token.lexeme}'") self.expression(node.value) self.emitByte(OpCode.Return) @@ -994,7 +1000,7 @@ proc varDecl(self: Compiler, node: VarDecl) = if self.inferDeclType(node) == nil: self.error(&"Cannot determine the type of '{node.name.token.lexeme}'") self.expression(node.value) - self.declareName(node) + self.declareName(node, IdentExpr(node.valueType)) proc funDecl(self: Compiler, node: FunDecl) = @@ -1007,7 +1013,7 @@ proc funDecl(self: Compiler, node: FunDecl) = # A function's code is just compiled linearly # and then jumped over let jmp = self.emitJump(JumpForwards) - self.declareName(node) + self.declareName(node, IdentExpr(node.returnType)) # Since the deferred array is a linear # sequence of instructions and we want @@ -1065,7 +1071,7 @@ proc compile*(self: Compiler, ast: seq[ASTNode], file: string): Chunk = self.names = @[] self.scopeDepth = 0 self.currentFunction = nil - self.currentModule = "
" + self.currentModule = self.file self.current = 0 while not self.done(): self.declaration(self.step()) diff --git a/src/frontend/meta/bytecode.nim b/src/frontend/meta/bytecode.nim index 242790d..df18a05 100644 --- a/src/frontend/meta/bytecode.nim +++ b/src/frontend/meta/bytecode.nim @@ -63,10 +63,11 @@ type LoadConstant = 0u8, # Pushes constant at position x in the constant table onto the stack ## Binary operators UnaryNegate, # Pushes the result of -x onto the stack + UnaryPlus, # Pushes the result of +x onto the stack BinaryAdd, # Pushes the result of a + b onto the stack BinarySubtract, # Pushes the result of a - b onto the stack - BinaryDivide, # Pushes the result of a / b onto the stack (true division). The result is a float - BinaryFloorDiv, # Pushes the result of a // b onto the stack (integer division). The result is always an integer + BinaryDivide, # Pushes the result of a / b onto the stack (true division) + BinaryFloorDiv, # Pushes the result of a // b onto the stack (integer division) BinaryMultiply, # Pushes the result of a * b onto the stack BinaryPow, # Pushes the result of a ** b (a to the power of b) onto the stack BinaryMod, # Pushes the result of a % b onto the stack (modulo division) @@ -90,9 +91,9 @@ type GreaterOrEqual, # Pushes the result of a >= b onto the stack LessOrEqual, # Pushes the result of a <= b onto the stack ## Logical operators - LogicalNot, # Pushes true if - LogicalAnd, - LogicalOr, + LogicalNot, # Pushes true onto the stack if x is falsey + LogicalAnd, # Pushes true onto the stack if a and b are truthy and false otherwise + LogicalOr, # Pushes true onto the stack if either a or b are truthy and false otherwise ## Constant opcodes (each of them pushes a singleton on the stack) Nil, True, diff --git a/src/frontend/optimizer.nim b/src/frontend/optimizer.nim index 5362393..74b143c 100644 --- a/src/frontend/optimizer.nim +++ b/src/frontend/optimizer.nim @@ -69,7 +69,7 @@ proc optimizeConstant(self: Optimizer, node: ASTNode): ASTNode = var x: int var y = IntExpr(node) try: - assert parseInt(y.literal.lexeme, x) == len(y.literal.lexeme) + discard parseInt(y.literal.lexeme, x) except ValueError: self.newWarning(valueOverflow, node) result = node @@ -77,7 +77,7 @@ proc optimizeConstant(self: Optimizer, node: ASTNode): ASTNode = var x: int var y = HexExpr(node) try: - assert parseHex(y.literal.lexeme, x) == len(y.literal.lexeme) + discard parseHex(y.literal.lexeme, x) except ValueError: self.newWarning(valueOverflow, node) return node @@ -86,7 +86,7 @@ proc optimizeConstant(self: Optimizer, node: ASTNode): ASTNode = var x: int var y = BinExpr(node) try: - assert parseBin(y.literal.lexeme, x) == len(y.literal.lexeme) + discard parseBin(y.literal.lexeme, x) except ValueError: self.newWarning(valueOverflow, node) return node @@ -95,7 +95,7 @@ proc optimizeConstant(self: Optimizer, node: ASTNode): ASTNode = var x: int var y = OctExpr(node) try: - assert parseOct(y.literal.lexeme, x) == len(y.literal.lexeme) + discard parseOct(y.literal.lexeme, x) except ValueError: self.newWarning(valueOverflow, node) return node @@ -122,7 +122,7 @@ proc optimizeUnary(self: Optimizer, node: UnaryExpr): ASTNode = case a.kind: of intExpr: var x: int - assert parseInt(IntExpr(a).literal.lexeme, x) == len(IntExpr(a).literal.lexeme) + discard parseInt(IntExpr(a).literal.lexeme, x) case node.operator.kind: of Tilde: x = not x @@ -169,8 +169,8 @@ proc optimizeBinary(self: Optimizer, node: BinaryExpr): ASTNode = if a.kind == intExpr and b.kind == intExpr: # Optimizes integer operations var x, y, z: int - assert parseInt(IntExpr(a).literal.lexeme, x) == IntExpr(a).literal.lexeme.len() - assert parseInt(IntExpr(b).literal.lexeme, y) == IntExpr(b).literal.lexeme.len() + discard parseInt(IntExpr(a).literal.lexeme, x) + discard parseInt(IntExpr(b).literal.lexeme, y) try: case node.operator.kind: of Plus: @@ -213,14 +213,14 @@ proc optimizeBinary(self: Optimizer, node: BinaryExpr): ASTNode = var x, y, z: float if a.kind == intExpr: var temp: int - assert parseInt(IntExpr(a).literal.lexeme, temp) == IntExpr(a).literal.lexeme.len() + discard parseInt(IntExpr(a).literal.lexeme, temp) == IntExpr(a).literal.lexeme.len() x = float(temp) self.newWarning(implicitConversion, a) else: discard parseFloat(FloatExpr(a).literal.lexeme, x) if b.kind == intExpr: var temp: int - assert parseInt(IntExpr(b).literal.lexeme, temp) == IntExpr(b).literal.lexeme.len() + discard parseInt(IntExpr(b).literal.lexeme, temp) == IntExpr(b).literal.lexeme.len() y = float(temp) self.newWarning(implicitConversion, b) else: @@ -258,7 +258,7 @@ proc optimizeBinary(self: Optimizer, node: BinaryExpr): ASTNode = var a = StrExpr(a) var b = IntExpr(b) var bb: int - assert parseInt(b.literal.lexeme, bb) == b.literal.lexeme.len() + discard parseInt(b.literal.lexeme, bb) case node.operator.kind: of Star: result = newStrExpr(Token(kind: String, lexeme: "'" & a.literal.lexeme[1..<(^1)].repeat(bb) & "'")) @@ -268,7 +268,7 @@ proc optimizeBinary(self: Optimizer, node: BinaryExpr): ASTNode = var b = StrExpr(b) var a = IntExpr(a) var aa: int - assert parseInt(a.literal.lexeme, aa) == a.literal.lexeme.len() + discard parseInt(a.literal.lexeme, aa) case node.operator.kind: of Star: result = newStrExpr(Token(kind: String, lexeme: "'" & b.literal.lexeme[1..<(^1)].repeat(aa) & "'")) diff --git a/src/test.nim b/src/test.nim index b599c70..b8db085 100644 --- a/src/test.nim +++ b/src/test.nim @@ -26,10 +26,10 @@ proc fillSymbolTable(tokenizer: Lexer) proc getLineEditor: LineEditor # Handy dandy compile-time constants -const debugLexer = true -const debugParser = true +const debugLexer = false +const debugParser = false const debugCompiler = true -const debugOptimizer = true +const debugOptimizer = false const debugSerializer = true @@ -45,7 +45,7 @@ when isMainModule: serializedRaw: seq[byte] tokenizer = newLexer() parser = newParser() - optimizer = newOptimizer() + # optimizer = newOptimizer() compiler = newCompiler() serializer = newSerializer() editor = getLineEditor() @@ -78,7 +78,9 @@ when isMainModule: for node in tree: echo "\t", node echo "" - optimized = optimizer.optimize(tree) + # The optimizer needs work to function properly + # with the compiler + # optimized = optimizer.optimize(tree) when debugOptimizer: echo &"Optimization step (constant folding enabled: {optimizer.foldConstants}):" for node in optimized.tree: @@ -92,7 +94,7 @@ when isMainModule: else: stdout.write("No warnings produced\n") echo "" - compiled = compiler.compile(optimized.tree, "") + compiled = compiler.compile(tree, "") when debugCompiler: echo "Compilation step:" stdout.write("\t")