From 776968bffc62def3995c2646bd3de7dcba397ba2 Mon Sep 17 00:00:00 2001 From: Mattia Giambirtone Date: Fri, 29 Apr 2022 23:04:53 +0200 Subject: [PATCH] Name stropping, added NoOp, minor AST fixes, removed builtin opcodes for most operations --- src/backend/types.nim | 7 +- src/backend/vm.nim | 12 +--- src/frontend/compiler.nim | 116 +++++++++++++++++---------------- src/frontend/lexer.nim | 13 ++-- src/frontend/meta/ast.nim | 29 ++++----- src/frontend/meta/bytecode.nim | 103 +++++++++-------------------- src/frontend/meta/token.nim | 1 + src/frontend/parser.nim | 31 +++++---- 8 files changed, 140 insertions(+), 172 deletions(-) diff --git a/src/backend/types.nim b/src/backend/types.nim index 2cd4391..26e7b76 100644 --- a/src/backend/types.nim +++ b/src/backend/types.nim @@ -21,7 +21,7 @@ type objNil, objNan, objInf, objString PeonObject* = ref object of RootObj - + ## A generic Peon object kind*: ObjectKind Nil* = ref object of PeonObject Nan* = ref object of PeonObject @@ -74,3 +74,8 @@ proc newInf*(positive: bool): Inf = Inf(kind: objInf, positive: positive) proc newBool*(value: bool): Bool = Bool(kind: objBool, value: value) proc newInt*(value: int): Int = Int(kind: objInt, value: value) proc newFloat*(value: float): Float = Float(kind: objFloat, value: value) + + +proc `$`*(self: PeonObject): string = + ## Stringifies a peon object + \ No newline at end of file diff --git a/src/backend/vm.nim b/src/backend/vm.nim index a75a423..c71fa2f 100644 --- a/src/backend/vm.nim +++ b/src/backend/vm.nim @@ -89,14 +89,7 @@ proc dispatch*(self: PeonVM) = ## Main bytecode dispatch loop var instruction: OpCode while true: - stdout.write("[") - for i, e in self.stack: - stdout.write($(e[])) - if i < self.stack.high(): - stdout.write(", ") - echo "]" instruction = OpCode(self.readByte(self.chunk)) - echo instruction case instruction: of OpCode.True: self.push(self.getBool(true)) @@ -108,10 +101,11 @@ proc dispatch*(self: PeonVM) = self.push(self.getNil()) of OpCode.Inf: self.push(self.getInf(true)) - of UnaryPlus: - self.push(self.pop()) of OpCode.Return: + # TODO return + of OpCode.NoOp: + continue else: discard diff --git a/src/frontend/compiler.nim b/src/frontend/compiler.nim index a90839e..784f04c 100644 --- a/src/frontend/compiler.nim +++ b/src/frontend/compiler.nim @@ -44,6 +44,7 @@ type depth: int isPrivate: bool isConst: bool + isLet: bool valueType: IdentExpr codePos: int @@ -387,16 +388,16 @@ proc literal(self: Compiler, node: ASTNode) = proc unary(self: Compiler, node: UnaryExpr) = ## Compiles unary expressions such as decimal or ## bitwise negation - self.expression(node.a) # Pushes the operand onto the stack + self.expression(node.a) # Pushes the operand onto the stack case node.operator.kind: of Minus: - self.emitByte(UnaryNegate) + self.emitByte(NoOp) of Plus: - self.emitByte(UnaryPlus) + self.emitByte(NoOp) of TokenType.LogicalNot: - self.emitByte(OpCode.LogicalNot) + self.emitByte(NoOp) of Tilde: - self.emitByte(UnaryNot) + self.emitByte(NoOp) else: self.error(&"invalid AST node of kind {node.kind} at unary(): {node} (This is an internal error and most likely a bug)") @@ -409,48 +410,48 @@ proc binary(self: Compiler, node: BinaryExpr) = self.expression(node.b) case node.operator.kind: of Plus: - self.emitByte(BinaryAdd) + self.emitByte(NoOp) of Minus: - self.emitByte(BinarySubtract) + self.emitByte(NoOp) of Star: - self.emitByte(BinaryMultiply) + self.emitByte(NoOp) of DoubleStar: - self.emitByte(BinaryPow) + self.emitByte(NoOp) of Percentage: - self.emitByte(BinaryMod) + self.emitByte(NoOp) of FloorDiv: - self.emitByte(BinaryFloorDiv) + self.emitByte(NoOp) of Slash: - self.emitByte(BinaryDivide) + self.emitByte(NoOp) of Ampersand: - self.emitByte(BinaryAnd) + self.emitByte(NoOp) of Caret: - self.emitByte(BinaryXor) + self.emitByte(NoOp) of Pipe: - self.emitByte(BinaryOr) + self.emitByte(NoOp) of As: - self.emitByte(BinaryAs) + self.emitByte(NoOp) of Is: - self.emitByte(BinaryIs) + self.emitByte(NoOp) of IsNot: - self.emitByte(BinaryIsNot) + self.emitByte(NoOp) of Of: - self.emitByte(BinaryOf) + self.emitByte(NoOp) of RightShift: - self.emitByte(BinaryShiftRight) + self.emitByte(NoOp) of LeftShift: - self.emitByte(BinaryShiftLeft) - of TokenType.LessThan: - self.emitByte(OpCode.LessThan) - of TokenType.GreaterThan: - self.emitByte(OpCode.GreaterThan) - of TokenType.DoubleEqual: - self.emitByte(EqualTo) - of TokenType.LessOrEqual: - self.emitByte(OpCode.LessOrEqual) - of TokenType.GreaterOrEqual: - self.emitByte(OpCode.GreaterOrEqual) - of TokenType.LogicalAnd: + self.emitByte(NoOp) + of LessThan: + self.emitByte(NoOp) + of GreaterThan: + self.emitByte(NoOp) + of DoubleEqual: + self.emitByte(NoOp) + of LessOrEqual: + self.emitByte(NoOp) + of GreaterOrEqual: + self.emitByte(NoOp) + of LogicalAnd: self.expression(node.a) var jump: int if self.enableOptimizations: @@ -460,7 +461,7 @@ proc binary(self: Compiler, node: BinaryExpr) = self.emitByte(Pop) self.expression(node.b) self.patchJump(jump) - of TokenType.LogicalOr: + of LogicalOr: self.expression(node.a) let jump = self.emitJump(JumpIfTrue) self.expression(node.b) @@ -487,7 +488,8 @@ proc declareName(self: Compiler, node: Declaration, kind: IdentExpr) = owner: self.currentModule, isConst: node.isConst, valueType: kind, - codePos: self.chunk.code.len())) + codePos: self.chunk.code.len(), + isLet: node.isLet)) self.emitByte(StoreVar) self.emitBytes(self.names.high().toTriple()) of funDecl: @@ -499,7 +501,9 @@ proc declareName(self: Compiler, node: Declaration, kind: IdentExpr) = isConst: false, owner: self.currentModule, valueType: node.returnType, - codePos: self.chunk.code.len())) + codePos: self.chunk.code.len(), + name: node.name, + isLet: false)) self.emitByte(StoreVar) self.emitBytes(self.names.high().toTriple()) # ... but its arguments in an inner one! @@ -516,7 +520,8 @@ proc declareName(self: Compiler, node: Declaration, kind: IdentExpr) = isConst: false, name: argument.name, valueType: kind, - codePos: self.chunk.code.len())) + codePos: self.chunk.code.len(), + isLet: false)) self.emitByte(StoreVar) self.emitBytes(self.names.high().toTriple()) self.scopeDepth -= 1 @@ -608,7 +613,8 @@ proc identifier(self: Compiler, node: IdentExpr) = elif s.isConst: # Constants are emitted as, you guessed it, LoadConstant instructions # no matter the scope depth. If optimizations are enabled, the compiler - # will reuse the same constant every time it is referenced instead + # will reuse the same constant every time it is referenced instead of + # allocating a new one each time self.emitConstant(node) else: self.detectClosureVariable(s.name) @@ -638,34 +644,38 @@ proc assignment(self: Compiler, node: ASTNode) = # TODO: This will explode with slicing! var name = IdentExpr(node.name) let r = self.resolve(name) - if r != nil and r.isConst: - self.error("cannot assign to constant") + if r == nil: + self.error(&"assignment to undeclared name '{node.name}'") + elif r.isConst: + self.error(&"cannot assign to '{node.name}'") + elif r.isLet: + self.error(&"cannot reassign '{node.name}'") self.expression(node.value) let t = self.getStackPos(name) let index = t.pos case node.token.kind: of InplaceAdd: - self.emitByte(BinaryAdd) + self.emitByte(NoOp) of InplaceSub: - self.emitByte(BinarySubtract) + self.emitByte(NoOp) of InplaceDiv: - self.emitByte(BinaryDivide) + self.emitByte(NoOp) of InplaceMul: - self.emitByte(BinaryMultiply) + self.emitByte(NoOp) of InplacePow: - self.emitByte(BinaryPow) + self.emitByte(NoOp) of InplaceFloorDiv: - self.emitByte(BinaryFloorDiv) + self.emitByte(NoOp) of InplaceMod: - self.emitByte(BinaryMod) + self.emitByte(NoOp) of InplaceAnd: - self.emitByte(BinaryAnd) + self.emitByte(NoOp) of InplaceXor: - self.emitByte(BinaryXor) + self.emitByte(NoOp) of InplaceRightShift: - self.emitByte(BinaryShiftRight) + self.emitByte(NoOp) of InplaceLeftShift: - self.emitByte(BinaryShiftLeft) + self.emitByte(NoOp) else: discard # Unreachable # In-place operators just change @@ -677,10 +687,6 @@ proc assignment(self: Compiler, node: ASTNode) = # that would require variants of each # one for regular stack variables as # well as closed-over ones - - # Cannot be nil, we already resolved this! - if self.resolve(name).isConst: - self.error("cannot mutate constant") if index != -1: if not t.closedOver: self.emitByte(StoreVar) @@ -997,7 +1003,7 @@ proc breakStmt(self: Compiler, node: BreakStmt) = # Emits dummy jump offset, this is # patched later - discard self.emitJump(OpCode.Break) + discard self.emitJump(OpCode.Jump) self.currentLoop.breakPos.add(self.chunk.code.high() - 4) if self.currentLoop.depth > self.scopeDepth: # Breaking out of a loop closes its scope diff --git a/src/frontend/lexer.nim b/src/frontend/lexer.nim index 73fd6a3..c690019 100644 --- a/src/frontend/lexer.nim +++ b/src/frontend/lexer.nim @@ -82,7 +82,6 @@ proc removeKeyword*(self: SymbolTable, lexeme: string) = self.keywords.del(lexeme) - proc existsSymbol*(self: SymbolTable, lexeme: string): bool {.inline.} = ## Returns true if a given symbol exists ## in the symbol table already @@ -505,10 +504,14 @@ proc parseBackticks(self: Lexer) = ## by backticks. This may be used ## for name stropping as well as to ## reimplement existing operators - ## (e.g. +, -, etc.) - + ## (e.g. +, -, etc.) without the + ## parser complaining about syntax + ## errors while not self.match("`") and not self.done(): - discard self.step() + if self.peek().isAlphaNumeric() or self.symbols.existsSymbol(self.peek()): + discard self.step() + continue + self.error(&"unexpected character: '{self.peek()}'") self.createToken(Identifier) # Strips the backticks self.tokens[^1].lexeme = self.tokens[^1].lexeme[1..^2] @@ -522,7 +525,7 @@ proc parseIdentifier(self: Lexer) = while (self.peek().isAlphaNumeric() or self.check("_")) and not self.done(): discard self.step() let name: string = self.source[self.start..> b (a with bits shifted b times to the right) onto the stack - BinaryShiftLeft, # Pushes the result of a << b (a with bits shifted b times to the left) onto the stack - BinaryXor, # Pushes the result of a ^ b (bitwise exclusive or) onto the stack - BinaryOr, # Pushes the result of a | b (bitwise or) onto the stack - BinaryAnd, # Pushes the result of a & b (bitwise and) onto the stack - UnaryNot, # Pushes the result of ~x (bitwise not) onto the stack - BinaryAs, # Pushes the result of a as b onto the stack (converts a to the type of b. Explicit support from a is required) - BinaryIs, # Pushes the result of a is b onto the stack (true if a and b point to the same object, false otherwise) - BinaryIsNot, # Pushes the result of not (a is b). This could be implemented in terms of BinaryIs, but it's more efficient this way - BinaryOf, # Pushes the result of a of b onto the stack (true if a is a subclass of b, false otherwise) - BinarySlice, # Perform slicing on supported objects (like "hello"[0:2], which yields "he"). The result is pushed onto the stack - BinarySubscript, # Subscript operator, like "hello"[0] (which pushes 'h' onto the stack) - ## Binary comparison operators - GreaterThan, # Pushes the result of a > b onto the stack - LessThan, # Pushes the result of a < b onto the stack - EqualTo, # Pushes the result of a == b onto the stack - NotEqualTo, # Pushes the result of a != b onto the stack (optimization for not (a == b)) - GreaterOrEqual, # Pushes the result of a >= b onto the stack - LessOrEqual, # Pushes the result of a <= b onto the stack - ## Logical operators - LogicalNot, # Pushes true onto the stack if x is falsey - LogicalAnd, # Pushes true onto the stack if a and b are truthy and false otherwise - LogicalOr, # Pushes true onto the stack if either a or b are truthy and false otherwise - ## Constant opcodes (each of them pushes a singleton on the stack) + ## Constant opcodes (each of them pushes a constant singleton on the stack) Nil, True, False, @@ -104,19 +69,18 @@ type PopN, # Pops x elements off the stack (optimization for exiting scopes and returning from functions) ## Name resolution/handling LoadAttribute, - LoadVar, # Loads a variable from the stack - StoreVar, # Sets/updates a statically bound variable's value - LoadHeap, # Loads a closed-over variable - StoreHeap, # Stores a closed-over variable + LoadVar, # Pushes the object at position x in the stack + StoreVar, # Stores the value of b at position a in the stack + LoadHeap, # Pushes the object position x in the closure array + StoreHeap, # Stores the value of b at position a in the closure array ## Looping and jumping - Jump, # Absolute, unconditional jump into the bytecode - JumpIfFalse, # Jumps to an absolute index in the bytecode if the value at the top of the stack is falsey - JumpIfTrue, # Jumps to an absolute index in the bytecode if the value at the top of the stack is truthy + Jump, # Absolute, unconditional jump into the bytecode at offset x + JumpIfFalse, # Jumps to an absolute index (x) in the bytecode if the value at the top of the stack is falsey + JumpIfTrue, # Jumps to an absolute index (x) in the bytecode if the value at the top of the stack is truthy JumpIfFalsePop, # Like JumpIfFalse, but it also pops off the stack (regardless of truthyness). Optimization for if statements - JumpIfFalseOrPop, # Jumps to an absolute index in the bytecode if the value at the top of the stack is falsey and pops it otherwise - JumpForwards, # Relative, unconditional, positive jump in the bytecode - JumpBackwards, # Relative, unconditional, negative jump into the bytecode - Break, # Temporary opcode used to signal exiting out of loops + JumpIfFalseOrPop, # Jumps to an absolute index (x) in the bytecode if the value at the top of the stack is falsey and pops it otherwise + JumpForwards, # Relative, unconditional, positive jump of size x in the bytecode + JumpBackwards, # Relative, unconditional, negative jump of size x in the bytecode ## Long variants of jumps (they use a 24-bit operand instead of a 16-bit one) LongJump, LongJumpIfFalse, @@ -126,11 +90,10 @@ type LongJumpForwards, LongJumpBackwards, ## Functions - Call, # Calls a callable object + Call, # Calls a function Return # Returns from the current function ## Exception handling - Raise, # Raises exception x - ReRaise, # Re-raises active exception + Raise, # Raises exception x or re-raises active exception if x is nil BeginTry, # Initiates an exception handling context FinishTry, # Closes the current exception handling context ## Generators @@ -144,30 +107,20 @@ type BuildTuple, ## Misc Assert, # Raises an AssertionFailed exception if the value at the top of the stack is falsey - MakeClass, # Builds a class instance from the values at the top of the stack (class object, constructor arguments, etc.) Slice, # Slices an object (takes 3 arguments: start, stop, step). Pushes the result of a.subscript(b, c, d) onto the stack GetItem, # Pushes the result of a.getItem(b) onto the stack - ImplicitReturn, # Optimization for returning nil from functions (saves us a VM "clock cycle") + NoOp, # Just a no-op # We group instructions by their operation/operand types for easier handling when debugging -# Simple instructions encompass: -# - Instructions that push onto/pop off the stack unconditionally (True, False, Pop, etc.) -# - Unary and binary operators -const simpleInstructions* = {OpCode.Return, BinaryAdd, BinaryMultiply, - BinaryDivide, BinarySubtract, - BinaryMod, BinaryPow, OpCode.Nil, - OpCode.True, OpCode.False, OpCode.Nan, OpCode.Inf, - BinaryShiftLeft, BinaryShiftRight, - BinaryXor, OpCode.LogicalNot, EqualTo, - OpCode.GreaterThan, OpCode.LessThan, LoadAttribute, - BinarySlice, Pop, UnaryNegate, - BinaryIs, BinaryAs, OpCode.GreaterOrEqual, - OpCode.LessOrEqual, BinaryOr, BinaryAnd, - UnaryNot, BinaryFloorDiv, BinaryOf, OpCode.Raise, - ReRaise, BeginTry, FinishTry, OpCode.Yield, OpCode.Await, - MakeClass, ImplicitReturn, UnaryPlus} +# Simple instructions encompass instructions that push onto/pop off the stack unconditionally (True, False, Pop, etc.) +const simpleInstructions* = {OpCode.Return, OpCode.Nil, + OpCode.True, OpCode.False, + OpCode.Nan, OpCode.Inf, + Pop, OpCode.Raise, + BeginTry, FinishTry, + OpCode.Yield, OpCode.Await} # Constant instructions are instructions that operate on the bytecode constant table const constantInstructions* = {LoadConstant, } @@ -184,9 +137,11 @@ const stackDoubleInstructions* = {} const argumentDoubleInstructions* = {PopN, } # Jump instructions jump at relative or absolute bytecode offsets -const jumpInstructions* = {JumpIfFalse, JumpIfFalsePop, JumpForwards, JumpBackwards, - LongJumpIfFalse, LongJumpIfFalsePop, LongJumpForwards, - LongJumpBackwards, JumpIfTrue, LongJumpIfTrue} +const jumpInstructions* = {JumpIfFalse, JumpIfFalsePop, + JumpForwards, JumpBackwards, + LongJumpIfFalse, LongJumpIfFalsePop, + LongJumpForwards, LongJumpBackwards, + JumpIfTrue, LongJumpIfTrue} # Collection instructions push a built-in collection type onto the stack const collectionInstructions* = {BuildList, BuildDict, BuildSet, BuildTuple} diff --git a/src/frontend/meta/token.nim b/src/frontend/meta/token.nim index a9e5407..127611b 100644 --- a/src/frontend/meta/token.nim +++ b/src/frontend/meta/token.nim @@ -39,6 +39,7 @@ type Foreach, Yield, Public, As, Of, Defer, Try, Except, Finally, Type, Operator, Case, Enum, From, + Emit # Literal types Integer, Float, String, Identifier, diff --git a/src/frontend/parser.nim b/src/frontend/parser.nim index 2fe1e07..2d28626 100644 --- a/src/frontend/parser.nim +++ b/src/frontend/parser.nim @@ -375,10 +375,10 @@ proc call(self: Parser): Expression = elif self.match(LeftBracket): # Slicing such as a[1:2] let tok = self.peek(-1) - var ends: seq[ASTNode] = @[] + var ends: seq[Expression] = @[] while not self.check(RightBracket) and not self.done(): if self.check(Colon): - ends.add(newNilExpr(Token())) + ends.add(newNilExpr(Token(lexeme: "nil"))) discard self.step() else: ends.add(self.expression()) @@ -624,7 +624,7 @@ proc yieldStmt(self: Parser): Statement = if not self.check(Semicolon): result = newYieldStmt(self.expression(), tok) else: - result = newYieldStmt(newNilExpr(Token()), tok) + result = newYieldStmt(newNilExpr(Token(lexeme: "nil")), tok) endOfLine("missing semicolon after yield statement") @@ -777,7 +777,7 @@ proc forStmt(self: Parser): Statement = if condition == nil: ## An empty condition is functionally ## equivalent to "true" - condition = newTrueExpr(Token()) + condition = newTrueExpr(Token(lexeme: "true")) # We can use a while loop, which in this case works just as well body = newWhileStmt(condition, body, tok) if initializer != nil: @@ -846,15 +846,15 @@ proc varDecl(self: Parser, isLet: bool = false, isConst: bool = false): Declarat else: if tok.kind != Var: self.error(&"{tok.lexeme} declaration requires an initializer") - value = newNilExpr(Token()) + value = newNilExpr(Token(lexeme: "nil")) self.expect(Semicolon, &"expecting semicolon after declaration") case tok.kind: of Var: - result = newVarDecl(name, value, isPrivate=isPrivate, token=tok, closedOver=false, valueType=valueType) + result = newVarDecl(name, value, isPrivate=isPrivate, token=tok, valueType=valueType, pragmas=(@[])) of Const: - result = newVarDecl(name, value, isPrivate=isPrivate, token=tok, isConst=true, closedOver=false, valueType=valueType) + result = newVarDecl(name, value, isPrivate=isPrivate, token=tok, isConst=true, valueType=valueType, pragmas=(@[])) of Let: - result = newVarDecl(name, value, isPrivate=isPrivate, token=tok, isLet=isLet, closedOver=false, valueType=valueType) + result = newVarDecl(name, value, isPrivate=isPrivate, token=tok, isLet=isLet, valueType=valueType, pragmas=(@[])) else: discard # Unreachable @@ -868,7 +868,7 @@ proc funDecl(self: Parser, isAsync: bool = false, isGenerator: bool = false, isL var returnType: IdentExpr if not isLambda and self.check(Identifier): # We do this extra check because we might - # be called from a contexst where it's + # be called from a context where it's # ambiguous whether we're parsing a declaration # or an expression. Fortunately anonymous functions # are nameless, so we can sort the ambiguity by checking @@ -877,11 +877,11 @@ proc funDecl(self: Parser, isAsync: bool = false, isGenerator: bool = false, isL self.checkDecl(not self.check(Star)) self.currentFunction = newFunDecl(nil, arguments, defaults, newBlockStmt(@[], Token()), isAsync=isAsync, isGenerator=isGenerator, isPrivate=true, - token=tok, closedOver=false) + token=tok, pragmas=(@[])) FunDecl(self.currentFunction).name = newIdentExpr(self.peek(-1)) if self.match(Star): FunDecl(self.currentFunction).isPrivate = false - elif not isLambda and self.check([LeftBrace, Colon]): + elif not isLambda and self.check([LeftBrace, Colon, LeftParen]): # We do a bit of hacking to pretend we never # wanted to parse this as a declaration in # the first place and pass control over to @@ -904,7 +904,6 @@ proc funDecl(self: Parser, isAsync: bool = false, isGenerator: bool = false, isL self.expect([Identifier, Nil], "expecting function return type after ':'") returnType = newIdentExpr(self.peek(-1)) if not self.match(LeftBrace): - # Argument-less function var parameter: tuple[name: IdentExpr, valueType: IdentExpr] self.expect(LeftParen) while not self.check(RightParen): @@ -1048,8 +1047,14 @@ proc declaration(self: Parser): Declaration = of Operator: discard self.step() result = self.funDecl(isOperator=true) - of Type, Comment, TokenType.Whitespace, TokenType.Tab: + of Type, TokenType.Whitespace, TokenType.Tab: discard self.step() # TODO + of Comment: + let tok = self.peek() + if tok.lexeme.startsWith("#pragma["): + discard # TODO: Pragmas + elif tok.lexeme.startsWith("##"): + discard # TODO: Docstrings else: result = Declaration(self.statement())