diff --git a/src/backend/compiler.nim b/src/backend/compiler.nim deleted file mode 100644 index 2b5f50a..0000000 --- a/src/backend/compiler.nim +++ /dev/null @@ -1,908 +0,0 @@ -# Copyright 2021 Mattia Giambirtone & All Contributors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import meta/token -import meta/ast -import meta/errors -import meta/bytecode -import ../config -import ../util/multibyte - - -import strformat -import algorithm -import parseutils -import sequtils - - -export ast -export bytecode -export token -export multibyte - - -type - Name = ref object - ## A compile-time wrapper around - ## statically resolved names. - ## Depth indicates to which scope - ## the variable belongs, zero meaning - ## the global one - name: IdentExpr - owner: string - depth: int - isPrivate: bool - isConst: bool - - Loop = object - ## A "loop object" used - ## by the compiler to emit - ## appropriate jump offsets - ## for continue and break - ## statements - start: int - depth: int - breakPos: seq[int] - - Compiler* = ref object - ## A wrapper around the compiler's state - chunk: Chunk - ast: seq[ASTNode] - current: int - file: string - names: seq[Name] - scopeDepth: int - currentFunction: FunDecl - enableOptimizations*: bool - currentLoop: Loop - # Each time a defer statement is - # compiled, its code is emitted - # here. Later, if there is any code - # to defer in the current function, - # funDecl will wrap the function's code - # inside an implicit try/finally block - # and add this code in the finally branch. - # This sequence is emptied each time a - # fun declaration is compiled and stores only - # deferred code for the current function (may - # be empty) - deferred: seq[uint8] - - - -proc initCompiler*(enableOptimizations: bool = true): Compiler = - ## Initializes a new Compiler object - new(result) - result.ast = @[] - result.current = 0 - result.file = "" - result.names = @[] - result.scopeDepth = 0 - result.currentFunction = nil - result.enableOptimizations = enableOptimizations - - - -## Forward declarations -proc expression(self: Compiler, node: ASTNode) -proc statement(self: Compiler, node: ASTNode) -proc declaration(self: Compiler, node: ASTNode) -proc peek(self: Compiler, distance: int = 0): ASTNode -## End of forward declarations - -## Public getters for nicer error formatting -proc getCurrentNode*(self: Compiler): ASTNode = (if self.current >= - self.ast.len(): self.ast[^1] else: self.ast[self.current - 1]) - - -## Utility functions - -proc peek(self: Compiler, distance: int = 0): ASTNode = - ## Peeks at the AST node at the given distance. - ## If the distance is out of bounds, the last - ## AST node in the tree is returned. A negative - ## distance may be used to retrieve previously - ## consumed AST nodes - if self.ast.high() == -1 or self.current + distance > self.ast.high() or - self.current + distance < 0: - result = self.ast[^1] - else: - result = self.ast[self.current + distance] - - -proc done(self: Compiler): bool = - ## Returns true if the compiler is done - ## compiling, false otherwise - result = self.current > self.ast.high() - - -proc error(self: Compiler, message: string) = - ## Raises a formatted CompileError exception - var tok = self.getCurrentNode().token - raise newException(CompileError, &"A fatal error occurred while compiling '{self.file}', line {tok.line} at '{tok.lexeme}' -> {message}") - - -proc step(self: Compiler): ASTNode = - ## Steps to the next node and returns - ## the consumed one - result = self.peek() - if not self.done(): - self.current += 1 - - -proc emitByte(self: Compiler, byt: OpCode|uint8) = - ## Emits a single byte, writing it to - ## the current chunk being compiled - when DEBUG_TRACE_COMPILER: - echo &"DEBUG - Compiler: Emitting {$byt}" - self.chunk.write(uint8 byt, self.peek().token.line) - - -proc emitBytes(self: Compiler, byt1: OpCode|uint8, byt2: OpCode|uint8) = - ## Emits multiple bytes instead of a single one, this is useful - ## to emit operators along with their operands or for multi-byte - ## instructions that are longer than one byte - self.emitByte(uint8 byt1) - self.emitByte(uint8 byt2) - - -proc emitBytes(self: Compiler, bytarr: array[2, uint8]) = - ## Handy helper method to write an array of 2 bytes into - ## the current chunk, calling emitByte on each of its - ## elements - self.emitBytes(bytarr[0], bytarr[1]) - - -proc emitBytes(self: Compiler, bytarr: array[3, uint8]) = - ## Handy helper method to write an array of 3 bytes into - ## the current chunk, calling emitByte on each of its - ## elements - self.emitBytes(bytarr[0], bytarr[1]) - self.emitByte(bytarr[2]) - - -proc makeConstant(self: Compiler, val: ASTNode): array[3, uint8] = - ## Adds a constant to the current chunk's constant table - ## and returns its index as a 3-byte array of uint8s - result = self.chunk.addConstant(val) - - -proc emitConstant(self: Compiler, obj: ASTNode) = - ## Emits a LoadConstant instruction along - ## with its operand - self.emitByte(LoadConstant) - self.emitBytes(self.makeConstant(obj)) - - -proc identifierConstant(self: Compiler, identifier: IdentExpr): array[3, uint8] = - ## Emits an identifier name as a string in the current chunk's constant - ## table. This is used to load globals declared as dynamic that cannot - ## be resolved statically by the compiler - try: - result = self.makeConstant(identifier) - except CompileError: - self.error(getCurrentExceptionMsg()) - - -proc emitJump(self: Compiler, opcode: OpCode): int = - ## Emits a dummy jump offset to be patched later. Assumes - ## the largest offset (emits 4 bytes, one for the given jump - ## opcode, while the other 3 are for the jump offset which is set - ## to the maximum unsigned 24 bit integer). If the shorter - ## 16 bit alternative is later found to be better suited, patchJump - ## will fix this. This function returns the absolute index into the - ## chunk's bytecode array where the given placeholder instruction was written - self.emitByte(opcode) - self.emitBytes((0xffffff).toTriple()) - result = self.chunk.code.len() - 4 - - -proc patchJump(self: Compiler, offset: int) = - ## Patches a previously emitted jump - ## using emitJump. Since emitJump assumes - ## a long jump, this also shrinks the jump - ## offset and changes the bytecode instruction if possible - ## (i.e. jump is in 16 bit range), but the converse is also - ## true (i.e. it might change a regular jump into a long one) - let jump: int = self.chunk.code.len() - offset - 4 - if jump > 16777215: - self.error("cannot jump more than 16777215 bytecode instructions") - if jump < uint16.high().int: - case OpCode(self.chunk.code[offset]): - of LongJumpForwards: - self.chunk.code[offset] = JumpForwards.uint8() - of LongJumpBackwards: - self.chunk.code[offset] = JumpBackwards.uint8() - of LongJumpIfFalse: - self.chunk.code[offset] = JumpIfFalse.uint8() - of LongJumpIfFalsePop: - self.chunk.code[offset] = JumpIfFalsePop.uint8() - else: - self.error(&"invalid opcode {self.chunk.code[offset]} in patchJump (This is an internal error and most likely a bug)") - self.chunk.code.delete(offset + 1) # Discards the 24 bit integer - let offsetArray = jump.toDouble() - self.chunk.code[offset + 1] = offsetArray[0] - self.chunk.code[offset + 2] = offsetArray[1] - else: - case OpCode(self.chunk.code[offset]): - of JumpForwards: - self.chunk.code[offset] = LongJumpForwards.uint8() - of JumpBackwards: - self.chunk.code[offset] = LongJumpBackwards.uint8() - of JumpIfFalse: - self.chunk.code[offset] = LongJumpIfFalse.uint8() - of JumpIfFalsePop: - self.chunk.code[offset] = LongJumpIfFalsePop.uint8() - else: - self.error(&"invalid opcode {self.chunk.code[offset]} in patchJump (This is an internal error and most likely a bug)") - let offsetArray = jump.toTriple() - self.chunk.code[offset + 1] = offsetArray[0] - self.chunk.code[offset + 2] = offsetArray[1] - self.chunk.code[offset + 3] = offsetArray[2] - -## End of utility functions - -proc literal(self: Compiler, node: ASTNode) = - ## Emits instructions for literals such - ## as singletons, strings, numbers and - ## collections - case node.kind: - of trueExpr: - self.emitByte(OpCode.True) - of falseExpr: - self.emitByte(OpCode.False) - of nilExpr: - self.emitByte(OpCode.Nil) - of infExpr: - self.emitByte(OpCode.Inf) - of nanExpr: - self.emitByte(OpCode.Nan) - of strExpr: - self.emitConstant(node) - # The optimizer will emit warning - # for overflowing numbers. Here, we - # treat them as errors - of intExpr: - var x: int - var y = IntExpr(node) - try: - assert parseInt(y.literal.lexeme, x) == len(y.literal.lexeme) - except ValueError: - self.error("integer value out of range") - self.emitConstant(y) - # Even though most likely the optimizer - # will collapse all these other literals - # to nodes of kind intExpr, that can be - # disabled. This also allows us to catch - # basic overflow errors before running any code - of hexExpr: - var x: int - var y = HexExpr(node) - try: - assert parseHex(y.literal.lexeme, x) == len(y.literal.lexeme) - except ValueError: - self.error("integer value out of range") - self.emitConstant(newIntExpr(Token(lexeme: $x, line: y.token.line, - pos: (start: y.token.pos.start, stop: y.token.pos.start + - len($x))))) - of binExpr: - var x: int - var y = BinExpr(node) - try: - assert parseBin(y.literal.lexeme, x) == len(y.literal.lexeme) - except ValueError: - self.error("integer value out of range") - self.emitConstant(newIntExpr(Token(lexeme: $x, line: y.token.line, - pos: (start: y.token.pos.start, stop: y.token.pos.start + - len($x))))) - of octExpr: - var x: int - var y = OctExpr(node) - try: - assert parseOct(y.literal.lexeme, x) == len(y.literal.lexeme) - except ValueError: - self.error("integer value out of range") - self.emitConstant(newIntExpr(Token(lexeme: $x, line: y.token.line, - pos: (start: y.token.pos.start, stop: y.token.pos.start + - len($x))))) - of floatExpr: - var x: float - var y = FloatExpr(node) - try: - assert parseFloat(y.literal.lexeme, x) == len(y.literal.lexeme) - except ValueError: - self.error("floating point value out of range") - self.emitConstant(y) - of listExpr: - var y = ListExpr(node) - for member in y.members: - self.expression(member) - self.emitByte(BuildList) - self.emitBytes(y.members.len().toTriple()) # 24-bit integer, meaning list literals can have up to 2^24 elements - of tupleExpr: - var y = TupleExpr(node) - for member in y.members: - self.expression(member) - self.emitByte(BuildTuple) - self.emitBytes(y.members.len().toTriple()) - of setExpr: - var y = SetExpr(node) - for member in y.members: - self.expression(member) - self.emitByte(BuildSet) - self.emitBytes(y.members.len().toTriple()) - of dictExpr: - var y = DictExpr(node) - for (key, value) in zip(y.keys, y.values): - self.expression(key) - self.expression(value) - self.emitByte(BuildDict) - self.emitBytes(y.keys.len().toTriple()) - of awaitExpr: - var y = AwaitExpr(node) - self.expression(y.awaitee) - self.emitByte(OpCode.Await) - else: - self.error(&"invalid AST node of kind {node.kind} at literal(): {node} (This is an internal error and most likely a bug)") - - -proc unary(self: Compiler, node: UnaryExpr) = - ## Compiles unary expressions such as negation or - ## bitwise inversion - self.expression(node.a) # Pushes the operand onto the stack - case node.operator.kind: - of Minus: - self.emitByte(UnaryNegate) - of Plus: - discard # Unary + does nothing - of TokenType.LogicalNot: - self.emitByte(OpCode.LogicalNot) - of Tilde: - self.emitByte(UnaryNot) - else: - self.error(&"invalid AST node of kind {node.kind} at unary(): {node} (This is an internal error and most likely a bug)") - - -proc binary(self: Compiler, node: BinaryExpr) = - ## Compiles all binary expressions - # These two lines prepare the stack by pushing the - # opcode's operands onto it - self.expression(node.a) - self.expression(node.b) - case node.operator.kind: - of Plus: - self.emitByte(BinaryAdd) - of Minus: - self.emitByte(BinarySubtract) - of Asterisk: - self.emitByte(BinaryMultiply) - of DoubleAsterisk: - self.emitByte(BinaryPow) - of Percentage: - self.emitByte(BinaryMod) - of FloorDiv: - self.emitByte(BinaryFloorDiv) - of Slash: - self.emitByte(BinaryDivide) - of Ampersand: - self.emitByte(BinaryAnd) - of Caret: - self.emitByte(BinaryXor) - of Pipe: - self.emitByte(BinaryOr) - of As: - self.emitByte(BinaryAs) - of Is: - self.emitByte(BinaryIs) - of IsNot: - self.emitByte(BinaryIsNot) - of Of: - self.emitByte(BinaryOf) - of RightShift: - self.emitByte(BinaryShiftRight) - of LeftShift: - self.emitByte(BinaryShiftLeft) - of TokenType.LessThan: - self.emitByte(OpCode.LessThan) - of TokenType.GreaterThan: - self.emitByte(OpCode.GreaterThan) - of TokenType.DoubleEqual: - self.emitByte(EqualTo) - of TokenType.LessOrEqual: - self.emitByte(OpCode.LessOrEqual) - of TokenType.GreaterOrEqual: - self.emitByte(OpCode.GreaterOrEqual) - of TokenType.LogicalAnd: - self.expression(node.a) - let jump = self.emitJump(JumpIfFalse) - self.emitByte(Pop) - self.expression(node.b) - self.patchJump(jump) - of TokenType.LogicalOr: - self.expression(node.a) - let jump = self.emitJump(JumpIfTrue) - self.expression(node.b) - self.patchJump(jump) - # TODO: In-place operations - else: - self.error(&"invalid AST node of kind {node.kind} at binary(): {node} (This is an internal error and most likely a bug)") - - -proc declareName(self: Compiler, node: ASTNode) = - ## Compiles all name declarations (constants, static, - ## and dynamic) - case node.kind: - of varDecl: - var node = VarDecl(node) - if not node.isStatic: - # This emits code for dynamically-resolved variables (i.e. globals declared as dynamic and unresolvable names) - self.emitByte(DeclareName) - self.emitBytes(self.identifierConstant(IdentExpr(node.name))) - else: - # Statically resolved variable here. Only creates a new Name entry - # so that self.identifier emits the proper stack offset - if self.names.high() > 16777215: - # If someone ever hits this limit in real-world scenarios, I swear I'll - # slap myself 100 times with a sign saying "I'm dumb". Mark my words - self.error("cannot declare more than 16777215 static variables at a time") - self.names.add(Name(depth: self.scopeDepth, name: IdentExpr(node.name), - isPrivate: node.isPrivate, - owner: node.owner, - isConst: node.isConst)) - else: - discard # TODO: Classes, functions - - -proc varDecl(self: Compiler, node: VarDecl) = - ## Compiles variable declarations - self.expression(node.value) - self.declareName(node) - - -proc resolveStatic(self: Compiler, name: IdentExpr, - depth: int = self.scopeDepth): Name = - ## Traverses self.staticNames backwards and returns the - ## first name object with the given name at the given - ## depth. The default depth is the current one. Returns - ## nil when the name can't be found - for obj in reversed(self.names): - if obj.name.token.lexeme == name.token.lexeme and obj.depth == depth: - return obj - return nil - - -proc deleteStatic(self: Compiler, name: IdentExpr, - depth: int = self.scopeDepth) = - ## Traverses self.staticNames backwards and returns the - ## deletes name object with the given name at the given - ## depth. The default depth is the current one. Does - ## nothing when the name can't be found - for i, obj in reversed(self.names): - if obj.name.token.lexeme == name.token.lexeme and obj.depth == depth: - self.names.del(i) - - -proc getStaticIndex(self: Compiler, name: IdentExpr): int = - ## Gets the predicted stack position of the given variable - ## if it is static, returns -1 if it is to be bound dynamically - ## or it does not exist at all - var i: int = self.names.high() - for variable in reversed(self.names): - if name.name.lexeme == variable.name.name.lexeme: - return i - dec(i) - return -1 - - -proc identifier(self: Compiler, node: IdentExpr) = - ## Compiles access to identifiers - let s = self.resolveStatic(node) - if s != nil and s.isConst: - # Constants are emitted as, you guessed it, constant instructions - # no matter the scope depth. Also, name resolution specifiers do not - # apply to them (because what would it mean for a constant to be dynamic - # anyway?) - self.emitConstant(node) - else: - let index = self.getStaticIndex(node) - if index != -1: - self.emitByte(LoadFast) # Static name resolution, loads value at index in the stack - self.emitBytes(index.toTriple()) - else: - self.emitByte(LoadName) # Resolves by name, at runtime, in a global hashmap - self.emitBytes(self.identifierConstant(node)) - - -proc assignment(self: Compiler, node: ASTNode) = - ## Compiles assignment expressions - case node.kind: - of assignExpr: - var node = AssignExpr(node) - var name = IdentExpr(node.name) - let r = self.resolveStatic(name) - if r != nil and r.isConst: - self.error("cannot assign to constant") - - self.expression(node.value) - let index = self.getStaticIndex(name) - case node.token.kind: - of InplaceAdd: - self.emitByte(BinaryAdd) - of InplaceSub: - self.emitByte(BinarySubtract) - of InplaceDiv: - self.emitByte(BinaryDivide) - of InplaceMul: - self.emitByte(BinaryMultiply) - of InplacePow: - self.emitByte(BinaryPow) - of InplaceFloorDiv: - self.emitByte(BinaryFloorDiv) - of InplaceMod: - self.emitByte(BinaryMod) - of InplaceAnd: - self.emitByte(BinaryAnd) - of InplaceXor: - self.emitByte(BinaryXor) - of InplaceRightShift: - self.emitByte(BinaryShiftRight) - of InplaceLeftShift: - self.emitByte(BinaryShiftLeft) - else: - discard # Unreachable - # In-place operators just change - # what values is set to a given - # stack offset/name, so we only - # need to perform the operation - # as usual and then store it. - # TODO: A better optimization would - # be to have everything in one opcode, - # but that requires variants for stack, - # heap, and closure variables - if index != -1: - self.emitByte(StoreFast) - self.emitBytes(index.toTriple()) - else: - # Assignment only encompasses variable assignments - # so we can ensure the name is a constant (i.e. an - # IdentExpr) instead of an object (which would be - # the case with setItemExpr) - self.emitByte(StoreName) - self.emitBytes(self.makeConstant(name)) - of setItemExpr: - discard - # TODO - else: - self.error(&"invalid AST node of kind {node.kind} at assignment(): {node} (This is an internal error and most likely a bug)") - - -proc beginScope(self: Compiler) = - ## Begins a new local scope by incrementing the current - ## scope's depth - inc(self.scopeDepth) - - -proc endScope(self: Compiler) = - ## Ends the current local scope - if self.scopeDepth < 0: - self.error("cannot call endScope with scopeDepth < 0 (This is an internal error and most likely a bug)") - var popped: int = 0 - for ident in reversed(self.names): - if ident.depth > self.scopeDepth: - inc(popped) - if not self.enableOptimizations: - # All variables with a scope depth larger than the current one - # are now out of scope. Begone, you're now homeless! - self.emitByte(Pop) - if self.enableOptimizations and popped > 1: - # If we're popping less than 65535 variables, then - # we can emit a PopN instruction. This is true for - # 99.99999% of the use cases of the language (who the - # hell is going to use 65 THOUSAND local variables?), but - # if you'll ever use more then JAPL will emit a PopN instruction - # for the first 65 thousand and change local variables and then - # emit another batch of plain ol' Pop instructions for the rest - if popped <= uint16.high().int(): - self.emitByte(PopN) - self.emitBytes(popped.toTriple()) - else: - self.emitByte(PopN) - self.emitBytes(uint16.high().int.toTriple()) - for i in countdown(self.names.high(), popped - uint16.high().int()): - if self.names[i].depth > self.scopeDepth: - self.emitByte(Pop) - elif popped == 1: - # We only emit PopN if we're popping more than one value - self.emitByte(Pop) - for _ in countup(0, popped - 1): - discard self.names.pop() - dec(self.scopeDepth) - - -proc blockStmt(self: Compiler, node: BlockStmt) = - ## Compiles block statements, which create a new - ## local scope. - self.beginScope() - for decl in node.code: - self.declaration(decl) - self.endScope() - - -proc ifStmt(self: Compiler, node: IfStmt) = - ## Compiles if/else statements for conditional - ## execution of code - self.expression(node.condition) - var jumpCode: OpCode - if self.enableOptimizations: - jumpCode = JumpIfFalsePop - else: - jumpCode = JumpIfFalse - let jump = self.emitJump(jumpCode) - if not self.enableOptimizations: - self.emitByte(Pop) - self.statement(node.thenBranch) - self.patchJump(jump) - if node.elseBranch != nil: - let jump = self.emitJump(JumpForwards) - self.statement(node.elseBranch) - self.patchJump(jump) - - -proc emitLoop(self: Compiler, begin: int) = - ## Emits a JumpBackwards instruction with the correct - ## jump offset - var offset: int - case OpCode(self.chunk.code[begin + 1]): # The jump instruction - of LongJumpForwards, LongJumpBackwards, LongJumpIfFalse, - LongJumpIfFalsePop, LongJumpIfTrue: - offset = self.chunk.code.len() - begin + 4 - else: - offset = self.chunk.code.len() - begin - if offset > uint16.high().int: - if offset > 16777215: - self.error("cannot jump more than 16777215 bytecode instructions") - self.emitByte(LongJumpBackwards) - self.emitBytes(offset.toTriple()) - else: - self.emitByte(JumpBackwards) - self.emitBytes(offset.toDouble()) - - -proc whileStmt(self: Compiler, node: WhileStmt) = - ## Compiles C-style while loops - let start = self.chunk.code.len() - self.expression(node.condition) - let jump = self.emitJump(JumpIfFalsePop) - self.statement(node.body) - self.patchJump(jump) - self.emitLoop(start) - - -proc expression(self: Compiler, node: ASTNode) = - ## Compiles all expressions - case node.kind: - of getItemExpr: - discard - # Note that for setItem and assign we don't convert - # the node to its true type because that type information - # would be lost in the call anyway. The differentiation - # happens in self.assignment - of setItemExpr, assignExpr: - self.assignment(node) - of identExpr: - self.identifier(IdentExpr(node)) - of unaryExpr: - # Unary expressions such as ~5 and -3 - self.unary(UnaryExpr(node)) - of groupingExpr: - # Grouping expressions like (2 + 1) - self.expression(GroupingExpr(node).expression) - of binaryExpr: - # Binary expressions such as 2 ^ 5 and 0.66 * 3.14 - self.binary(BinaryExpr(node)) - of intExpr, hexExpr, binExpr, octExpr, strExpr, falseExpr, trueExpr, - infExpr, nanExpr, floatExpr, nilExpr, - tupleExpr, setExpr, listExpr, dictExpr: - # Since all of these AST nodes mostly share - # the same overall structure, and the kind - # discriminant is enough to tell one - # from the other, why bother with - # specialized cases when one is enough? - self.literal(node) - else: - self.error(&"invalid AST node of kind {node.kind} at expression(): {node} (This is an internal error and most likely a bug)") - - -proc delStmt(self: Compiler, node: ASTNode) = - ## Compiles del statements, which unbind - ## a name from the current scope - case node.kind: - of identExpr: - var node = IdentExpr(node) - let i = self.getStaticIndex(node) - if i != -1: - self.emitByte(DeleteFast) - self.emitBytes(i.toTriple()) - self.deleteStatic(node) - else: - self.emitByte(DeleteName) - self.emitBytes(self.identifierConstant(node)) - else: - discard # The parser already handles the other cases - - -proc awaitStmt(self: Compiler, node: AwaitStmt) = - ## Compiles await statements. An await statement - ## is like an await expression, but parsed in the - ## context of statements for usage outside expressions, - ## meaning it can be used standalone. It's basically the - ## same as an await expression followed by a semicolon. - ## Await expressions are the only native construct to - ## run coroutines from within an already asynchronous - ## loop (which should be orchestrated by an event loop). - ## They block in the caller until the callee returns - self.expression(node.awaitee) - self.emitByte(OpCode.Await) - - -proc deferStmt(self: Compiler, node: DeferStmt) = - ## Compiles defer statements. A defer statement - ## is executed right before the function exits - ## (either because of a return or an exception) - let current = self.chunk.code.len - self.expression(node.deferred) - for i in countup(current, self.chunk.code.high()): - self.deferred.add(self.chunk.code[i]) - self.chunk.code.del(i) - - -proc returnStmt(self: Compiler, node: ReturnStmt) = - ## Compiles return statements. An empty return - ## implicitly returns nil - self.expression(node.value) - self.emitByte(OpCode.Return) - - -proc yieldStmt(self: Compiler, node: YieldStmt) = - ## Compiles yield statements - self.expression(node.expression) - self.emitByte(OpCode.Yield) - - -proc raiseStmt(self: Compiler, node: RaiseStmt) = - ## Compiles yield statements - self.expression(node.exception) - self.emitByte(OpCode.Raise) - - -proc continueStmt(self: Compiler, node: ContinueStmt) = - ## Compiles continue statements. A continue statements - ## jumps to the next iteration in a loop - if self.currentLoop.start <= 65535: - self.emitByte(Jump) - self.emitBytes(self.currentLoop.start.toDouble()) - else: - self.emitByte(LongJump) - self.emitBytes(self.currentLoop.start.toTriple()) - - -proc breakStmt(self: Compiler, node: BreakStmt) = - ## Compiles break statements. A continue statement - ## jumps to the next iteration in a loop - - # Emits dummy jump offset, this is - # patched later - discard self.emitJump(OpCode.Break) - self.currentLoop.breakPos.add(self.chunk.code.high() - 4) - if self.currentLoop.depth > self.scopeDepth: - # Breaking out of a loop closes its scope - self.endScope() - - -proc patchBreaks(self: Compiler) = - ## Patches "break" opcodes with - ## actual jumps. This is needed - ## because the size of code - ## to skip is not known before - ## the loop is fully compiled - for brk in self.currentLoop.breakPos: - self.chunk.code[brk] = JumpForwards.uint8() - self.patchJump(brk) - - -proc assertStmt(self: Compiler, node: AssertStmt) = - ## Compiles assert statements (raise - ## AssertionError if the expression is falsey) - self.expression(node.expression) - self.emitByte(OpCode.Assert) - - -proc statement(self: Compiler, node: ASTNode) = - ## Compiles all statements - case node.kind: - of exprStmt: - self.expression(ExprStmt(node).expression) - self.emitByte(Pop) # Expression statements discard their value. Their main use case is side effects in function calls - of NodeKind.ifStmt: - self.ifStmt(IfStmt(node)) - of NodeKind.delStmt: - self.delStmt(DelStmt(node).name) - of NodeKind.assertStmt: - self.assertStmt(AssertStmt(node)) - of NodeKind.raiseStmt: - self.raiseStmt(RaiseStmt(node)) - of NodeKind.breakStmt: - self.breakStmt(BreakStmt(node)) - of NodeKind.continueStmt: - self.continueStmt(ContinueStmt(node)) - of NodeKind.returnStmt: - self.returnStmt(ReturnStmt(node)) - of NodeKind.importStmt: - discard - of NodeKind.fromImportStmt: - discard - of NodeKind.whileStmt, NodeKind.forStmt: - ## Our parser already desugars for loops to - ## while loops! - let loop = self.currentLoop - self.currentLoop = Loop(start: self.chunk.code.len(), - depth: self.scopeDepth, breakPos: @[]) - self.whileStmt(WhileStmt(node)) - self.patchBreaks() - self.currentLoop = loop - of NodeKind.forEachStmt: - discard - of NodeKind.blockStmt: - self.blockStmt(BlockStmt(node)) - of NodeKind.yieldStmt: - self.yieldStmt(YieldStmt(node)) - of NodeKind.awaitStmt: - self.awaitStmt(AwaitStmt(node)) - of NodeKind.deferStmt: - self.deferStmt(DeferStmt(node)) - of NodeKind.tryStmt: - discard - else: - self.expression(node) - - -proc declaration(self: Compiler, node: ASTNode) = - ## Compiles all declarations - case node.kind: - of NodeKind.varDecl: - self.varDecl(VarDecl(node)) - of funDecl, classDecl: - discard # TODO - else: - self.statement(node) - - -proc compile*(self: Compiler, ast: seq[ASTNode], file: string): Chunk = - ## Compiles a sequence of AST nodes into a chunk - ## object - self.chunk = newChunk() - self.ast = ast - self.file = file - self.names = @[] - self.scopeDepth = 0 - self.currentFunction = nil - self.current = 0 - while not self.done(): - self.declaration(self.step()) - if self.ast.len() > 0: - # *Technically* an empty program is a valid program - self.endScope() - self.emitByte(OpCode.Return) # Exits the VM's main loop when used at the global scope - result = self.chunk - if self.ast.len() > 0 and self.scopeDepth != -1: - self.error(&"internal error: invalid scopeDepth state (expected -1, got {self.scopeDepth}), did you forget to call endScope/beginScope?") diff --git a/src/backend/lexer.nim b/src/backend/lexer.nim deleted file mode 100644 index 1ceb187..0000000 --- a/src/backend/lexer.nim +++ /dev/null @@ -1,552 +0,0 @@ -# Copyright 2021 Mattia Giambirtone & All Contributors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -## A simple and modular tokenizer implementation with arbitrary lookahead - -import strutils -import parseutils -import strformat -import tables - -import meta/token -import meta/errors - - -export token # Makes Token available when importing the lexer module -export errors - - -# Tables of all character tokens that are not keywords - -# Table of all single-character tokens -const tokens = to_table({ - '(': LeftParen, ')': RightParen, - '{': LeftBrace, '}': RightBrace, - '.': Dot, ',': Comma, '-': Minus, - '+': Plus, '*': Asterisk, - '>': GreaterThan, '<': LessThan, '=': Equal, - '~': Tilde, '/': Slash, '%': Percentage, - '[': LeftBracket, ']': RightBracket, - ':': Colon, '^': Caret, '&': Ampersand, - '|': Pipe, ';': Semicolon}) - -# Table of all double-character tokens -const double = to_table({"**": DoubleAsterisk, - ">>": RightShift, - "<<": LeftShift, - "==": DoubleEqual, - "!=": NotEqual, - ">=": GreaterOrEqual, - "<=": LessOrEqual, - "//": FloorDiv, - "+=": InplaceAdd, - "-=": InplaceSub, - "/=": InplaceDiv, - "*=": InplaceMul, - "^=": InplaceXor, - "&=": InplaceAnd, - "|=": InplaceOr, - "%=": InplaceMod, - }) - -# Table of all triple-character tokens -const triple = to_table({"//=": InplaceFloorDiv, - "**=": InplacePow, - ">>=": InplaceRightShift, - "<<=": InplaceLeftShift - }) - - -# Constant table storing all the reserved keywords (which are parsed as identifiers) -const keywords = to_table({ - "fun": Fun, "raise": Raise, - "if": If, "else": Else, - "for": For, "while": While, - "var": Var, "nil": Nil, - "true": True, "false": False, - "return": Return, "break": Break, - "continue": Continue, "inf": Infinity, - "nan": NotANumber, "is": Is, - "lambda": Lambda, "class": Class, - "async": Async, "import": Import, - "isnot": IsNot, "from": From, - "const": Const, "not": LogicalNot, - "assert": Assert, "or": LogicalOr, - "and": LogicalAnd, "del": Del, - "async": Async, "await": Await, - "foreach": Foreach, "yield": Yield, - "private": Private, "public": Public, - "static": Static, "dynamic": Dynamic, - "as": As, "of": Of, "defer": Defer, - "except": Except, "finally": Finally, - "try": Try - }) - - -type - Lexer* = ref object - ## A lexer object - source: string - tokens: seq[Token] - line: int - start: int - current: int - file: string - lines: seq[tuple[start, stop: int]] - lastLine: int - - -# Simple public getters -proc getStart*(self: Lexer): int = self.start -proc getCurrent*(self: Lexer): int = self.current -proc getLine*(self: Lexer): int = self.line -proc getSource*(self: Lexer): string = self.source -proc getRelPos*(self: Lexer, line: int): tuple[start, stop: int] = (if line > 1: self.lines[line - 2] else: (start: 0, stop: self.current)) - - -proc initLexer*(self: Lexer = nil): Lexer = - ## Initializes the lexer or resets - ## the state of an existing one - new(result) - if self != nil: - result = self - result.source = "" - result.tokens = @[] - result.line = 1 - result.start = 0 - result.current = 0 - result.file = "" - result.lines = @[] - result.lastLine = 0 - - -proc done(self: Lexer): bool = - ## Returns true if we reached EOF - result = self.current >= self.source.len - - -proc incLine(self: Lexer) = - ## Increments the lexer's line - ## and updates internal line - ## metadata - self.lines.add((start: self.lastLine, stop: self.current)) - self.line += 1 - self.lastLine = self.current - - -proc step(self: Lexer, n: int = 1): char = - ## Steps n characters forward in the - ## source file (default = 1). A null - ## terminator is returned if the lexer - ## is at EOF. Note that only the first - ## consumed character token is returned, - ## the other ones are skipped over - if self.done(): - return '\0' - self.current = self.current + n - result = self.source[self.current - n] - - -proc peek(self: Lexer, distance: int = 0): char = - ## Returns the character in the source file at - ## the given distance without consuming it. - ## A null terminator is returned if the lexer - ## is at EOF. The distance parameter may be - ## negative to retrieve previously consumed - ## tokens, while the default distance is 0 - ## (retrieves the next token to be consumed). - ## If the given distance goes beyond EOF, a - ## null terminator is returned - if self.done() or self.current + distance > self.source.high(): - result = '\0' - else: - result = self.source[self.current + distance] - - -proc error(self: Lexer, message: string) = - ## Raises a lexing error with a formatted - ## error message - - raise newException(LexingError, &"A fatal error occurred while parsing '{self.file}', line {self.line} at '{self.peek()}' -> {message}") - - -proc check(self: Lexer, what: char, distance: int = 0): bool = - ## Behaves like match, without consuming the - ## token. False is returned if we're at EOF - ## regardless of what the token to check is. - ## The distance is passed directly to self.peek() - if self.done(): - return false - return self.peek(distance) == what - - -proc check(self: Lexer, what: string): bool = - ## Calls self.check() in a loop with - ## each character from the given source - ## string. Useful to check multi-character - ## strings in one go - for i, chr in what: - # Why "i" you ask? Well, since check - # does not consume the tokens it checks - # against we need some way of keeping - # track where we are in the string the - # caller gave us, otherwise this will - # not behave as expected - if not self.check(chr, i): - return false - return true - - -proc check(self: Lexer, what: openarray[char]): bool = - ## Calls self.check() in a loop with - ## each character from the given seq of - ## char and returns at the first match. - ## Useful to check multiple tokens in a situation - ## where only one of them may match at one time - for chr in what: - if self.check(chr): - return true - return false - - -proc match(self: Lexer, what: char): bool = - ## Returns true if the next character matches - ## the given character, and consumes it. - ## Otherwise, false is returned - if self.done(): - self.error("unexpected EOF") - return false - elif not self.check(what): - self.error(&"expecting '{what}', got '{self.peek()}' instead") - return false - self.current += 1 - return true - - -proc match(self: Lexer, what: string): bool = - ## Calls self.match() in a loop with - ## each character from the given source - ## string. Useful to match multi-character - ## strings in one go - for chr in what: - if not self.match(chr): - return false - return true - - -proc createToken(self: Lexer, tokenType: TokenType) = - ## Creates a token object and adds it to the token - ## list - var tok: Token = new(Token) - tok.kind = tokenType - tok.lexeme = self.source[self.start.. uint8.high().int: - self.error("escape sequence value too large (> 255)") - self.source[self.current] = cast[char](value) - of 'u', 'U': - self.error("unicode escape sequences are not supported (yet)") - of 'x': - var code = "" - var value = 0 - var i = self.current - while i < self.source.high() and (let c = self.source[ - i].toLowerAscii(); c in 'a'..'f' or c in '0'..'9'): - code &= self.source[i] - i += 1 - assert parseHex(code, value) == code.len() - if value > uint8.high().int: - self.error("escape sequence value too large (> 255)") - self.source[self.current] = cast[char](value) - else: - self.error(&"invalid escape sequence '\\{self.peek()}'") - - -proc parseString(self: Lexer, delimiter: char, mode: string = "single") = - ## Parses string literals. They can be expressed using matching pairs - ## of either single or double quotes. Most C-style escape sequences are - ## supported, moreover, a specific prefix may be prepended - ## to the string to instruct the lexer on how to parse it: - ## - b -> declares a byte string, where each character is - ## interpreted as an integer instead of a character - ## - r -> declares a raw string literal, where escape sequences - ## are not parsed and stay as-is - ## - f -> declares a format string, where variables may be - ## interpolated using curly braces like f"Hello, {name}!". - ## Braces may be escaped using a pair of them, so to represent - ## a literal "{" in an f-string, one would use {{ instead - ## Multi-line strings can be declared using matching triplets of - ## either single or double quotes. They can span across multiple - ## lines and escape sequences in them are not parsed, like in raw - ## strings, so a multi-line string prefixed with the "r" modifier - ## is redundant, although multi-line byte/format strings are supported - while not self.check(delimiter) and not self.done(): - if self.check('\n'): - if mode == "multi": - self.incLine() - else: - self.error("unexpected EOL while parsing string literal") - if mode in ["raw", "multi"]: - discard self.step() - if self.check('\\'): - # This madness here serves to get rid of the slash, since \x is mapped - # to a one-byte sequence but the string '\x' actually 2 bytes (or more, - # depending on the specific escape sequence) - self.source = self.source[0..> b (a with bits shifted b times to the right) onto the stack - BinaryShiftLeft, # Pushes the result of a << b (a with bits shifted b times to the left) onto the stack - BinaryXor, # Pushes the result of a ^ b (bitwise exclusive or) onto the stack - BinaryOr, # Pushes the result of a | b (bitwise or) onto the stack - BinaryAnd, # Pushes the result of a & b (bitwise and) onto the stack - UnaryNot, # Pushes the result of ~x (bitwise not) onto the stack - BinaryAs, # Pushes the result of a as b onto the stack (converts a to the type of b. Explicit support from a is required) - BinaryIs, # Pushes the result of a is b onto the stack (true if a and b point to the same object, false otherwise) - BinaryIsNot, # Pushes the result of not (a is b). This could be implemented in terms of BinaryIs, but it's more efficient this way - BinaryOf, # Pushes the result of a of b onto the stack (true if a is a subclass of b, false otherwise) - BinarySlice, # Perform slicing on supported objects (like "hello"[0:2], which yields "he"). The result is pushed onto the stack - BinarySubscript, # Subscript operator, like "hello"[0] (which pushes 'h' onto the stack) - # Binary comparison operators - GreaterThan, # Pushes the result of a > b onto the stack - LessThan, # Pushes the result of a < b onto the stack - EqualTo, # Pushes the result of a == b onto the stack - NotEqualTo, # Pushes the result of a != b onto the stack (optimization for not (a == b)) - GreaterOrEqual, # Pushes the result of a >= b onto the stack - LessOrEqual, # Pushes the result of a <= b onto the stack - # Logical operators - LogicalNot, - LogicalAnd, - LogicalOr, - # Constants/singletons - Nil, - True, - False, - Nan, - Inf, - # Basic stack operations - Pop, - Push, - PopN, # Pops N elements off the stack (optimization for exiting scopes and returning from functions) - # Name resolution/handling - LoadAttribute, - DeclareName, # Declares a global dynamically bound name in the current scope - LoadName, # Loads a dynamically bound variable - LoadFast, # Loads a statically bound variable - StoreName, # Sets/updates a dynamically bound variable's value - StoreFast, # Sets/updates a statically bound variable's value - DeleteName, # Unbinds a dynamically bound variable's name from the current scope - DeleteFast, # Unbinds a statically bound variable's name from the current scope - # Looping and jumping - Jump, # Absolute and unconditional jump into the bytecode - JumpIfFalse, # Jumps to an absolute index in the bytecode if the value at the top of the stack is falsey - JumpIfTrue, # Jumps to an absolute index in the bytecode if the value at the top of the stack is truthy - JumpIfFalsePop, # Like JumpIfFalse, but it also pops off the stack (regardless of truthyness). Optimization for if statements - JumpForwards, # Relative, unconditional, positive jump in the bytecode - JumpBackwards, # Relative, unconditional, negative jump into the bytecode - Break, # Temporary opcode used to signal exiting out of loop - ## Long variants of jumps (they use a 24-bit operand instead of a 16-bit one) - LongJump, - LongJumpIfFalse, - LongJumpIfTrue, - LongJumpIfFalsePop, - LongJumpForwards, - LongJumpBackwards, - # Functions - MakeFunction, - Call, - Return - # Exception handling - Raise, - ReRaise, # Re-raises active exception - BeginTry, - FinishTry, - # Generators - Yield, - # Coroutines - Await, - # Collection literals - BuildList, - BuildDict, - BuildSet, - BuildTuple, - # Misc - Assert, - - -# We group instructions by their operation/operand types for easier handling when debugging - -# Simple instructions encompass: -# - Instructions that push onto/pop off the stack unconditionally (True, False, PopN, Pop, etc.) -# - Unary and binary operators -const simpleInstructions* = {Return, BinaryAdd, BinaryMultiply, - BinaryDivide, BinarySubtract, - BinaryMod, BinaryPow, Nil, - True, False, OpCode.Nan, OpCode.Inf, - BinaryShiftLeft, BinaryShiftRight, - BinaryXor, LogicalNot, EqualTo, - GreaterThan, LessThan, LoadAttribute, - BinarySlice, Pop, UnaryNegate, - BinaryIs, BinaryAs, GreaterOrEqual, - LessOrEqual, BinaryOr, BinaryAnd, - UnaryNot, BinaryFloorDiv, BinaryOf, Raise, - ReRaise, BeginTry, FinishTry, Yield, Await} - -# Constant instructions are instructions that operate on the bytecode constant table -const constantInstructions* = {LoadConstant, DeclareName, LoadName, StoreName, DeleteName} - -# Stack triple instructions operate on the stack at arbitrary offsets and pop arguments off of it in the form -# of 24 bit integers -const stackTripleInstructions* = {Call, StoreFast, DeleteFast, LoadFast} - -# Stack Double instructions operate on the stack at arbitrary offsets and pop arguments off of it in the form -# of 16 bit integers -const stackDoubleInstructions* = {} - -# Argument double argument instructions take hardcoded arguments on the stack as 16 bit integers -const argumentDoubleInstructions* = {PopN, } - -# Jump instructions jump at relative or absolute bytecode offsets -const jumpInstructions* = {JumpIfFalse, JumpIfFalsePop, JumpForwards, JumpBackwards, - LongJumpIfFalse, LongJumpIfFalsePop, - LongJumpForwards, - LongJumpBackwards, JumpIfTrue, LongJumpIfTrue} - -# Collection instructions push a built-in collection type onto the stack -const collectionInstructions* = {BuildList, BuildDict, BuildSet, BuildTuple} - - -proc newChunk*(reuseConsts: bool = true): Chunk = - ## Initializes a new, empty chunk - result = Chunk(consts: @[], code: @[], lines: @[], reuseConsts: reuseConsts) - - -proc `$`*(self: Chunk): string = &"""Chunk(consts=[{self.consts.join(", ")}], code=[{self.code.join(", ")}], lines=[{self.lines.join(", ")}])""" - - -proc write*(self: Chunk, newByte: uint8, line: int) = - ## Adds the given instruction at the provided line number - ## to the given chunk object - assert line > 0, "line must be greater than zero" - if self.lines.high() >= 1 and self.lines[^2] == line: - self.lines[^1] += 1 - else: - self.lines.add(line) - self.lines.add(1) - self.code.add(newByte) - - -proc write*(self: Chunk, bytes: openarray[uint8], line: int) = - ## Calls write in a loop with all members of the given - ## array - for cByte in bytes: - self.write(cByte, line) - - -proc write*(self: Chunk, newByte: OpCode, line: int) = - ## Adds the given instruction at the provided line number - ## to the given chunk object - self.write(uint8(newByte), line) - - -proc write*(self: Chunk, bytes: openarray[OpCode], line: int) = - ## Calls write in a loop with all members of the given - ## array - for cByte in bytes: - self.write(uint8(cByte), line) - - -proc getLine*(self: Chunk, idx: int): int = - ## Returns the associated line of a given - ## instruction index - if self.lines.len < 2: - raise newException(IndexDefect, "the chunk object is empty") - var - count: int - current: int = 0 - for n in countup(0, self.lines.high(), 2): - count = self.lines[n + 1] - if idx in current - count.. < - LessOrEqual, GreaterOrEqual, # >= <= - NotEqual, RightShift, LeftShift, # != >> << - LogicalAnd, LogicalOr, LogicalNot, FloorDiv, # and or not // - InplaceAdd, InplaceSub, InplaceDiv, # += -= /= - InplaceMod, InplaceMul, InplaceXor, # %= *= ^= - InplaceAnd, InplaceOr, # &= |= - DoubleEqual, InplaceFloorDiv, InplacePow, # == //= **= - InplaceRightShift, InplaceLeftShift - - # Miscellaneous - - EndOfFile - - - Token* = ref object - ## A token object - kind*: TokenType - lexeme*: string - line*: int - pos*: tuple[start, stop: int] - - -proc `$`*(self: Token): string = - if self != nil: - result = &"Token(kind={self.kind}, lexeme={$(self.lexeme)}, line={self.line}, pos=({self.pos.start}, {self.pos.stop}))" - else: - result = "nil" diff --git a/src/backend/optimizer.nim b/src/backend/optimizer.nim deleted file mode 100644 index 8b58a06..0000000 --- a/src/backend/optimizer.nim +++ /dev/null @@ -1,382 +0,0 @@ -# Copyright 2021 Mattia Giambirtone & All Contributors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import meta/ast -import meta/token - -import parseutils -import strformat -import strutils -import math - - -type - WarningKind* = enum - unreachableCode, - nameShadowing, - isWithALiteral, - equalityWithSingleton, - valueOverflow, - implicitConversion, - invalidOperation - - Warning* = ref object - kind*: WarningKind - node*: ASTNode - - Optimizer* = ref object - warnings: seq[Warning] - foldConstants*: bool - - -proc initOptimizer*(foldConstants: bool = true): Optimizer = - ## Initializes a new optimizer object - new(result) - result.foldConstants = foldConstants - result.warnings = @[] - - -proc newWarning(self: Optimizer, kind: WarningKind, node: ASTNode) = - self.warnings.add(Warning(kind: kind, node: node)) - - -proc `$`*(self: Warning): string = &"Warning(kind={self.kind}, node={self.node})" - - -# Forward declaration -proc optimizeNode(self: Optimizer, node: ASTNode): ASTNode - - -proc optimizeConstant(self: Optimizer, node: ASTNode): ASTNode = - ## Performs some checks on constant AST nodes such as - ## integers. This method converts all of the different - ## integer forms (binary, octal and hexadecimal) to - ## decimal integers. Overflows are checked here too - if not self.foldConstants: - return node - case node.kind: - of intExpr: - var x: int - var y = IntExpr(node) - try: - assert parseInt(y.literal.lexeme, x) == len(y.literal.lexeme) - except ValueError: - self.newWarning(valueOverflow, node) - result = node - of hexExpr: - var x: int - var y = HexExpr(node) - try: - assert parseHex(y.literal.lexeme, x) == len(y.literal.lexeme) - except ValueError: - self.newWarning(valueOverflow, node) - return node - result = IntExpr(kind: intExpr, literal: Token(kind: Integer, lexeme: $x, line: y.literal.line, pos: (start: -1, stop: -1))) - of binExpr: - var x: int - var y = BinExpr(node) - try: - assert parseBin(y.literal.lexeme, x) == len(y.literal.lexeme) - except ValueError: - self.newWarning(valueOverflow, node) - return node - result = IntExpr(kind: intExpr, literal: Token(kind: Integer, lexeme: $x, line: y.literal.line, pos: (start: -1, stop: -1))) - of octExpr: - var x: int - var y = OctExpr(node) - try: - assert parseOct(y.literal.lexeme, x) == len(y.literal.lexeme) - except ValueError: - self.newWarning(valueOverflow, node) - return node - result = IntExpr(kind: intExpr, literal: Token(kind: Integer, lexeme: $x, line: y.literal.line, pos: (start: -1, stop: -1))) - of floatExpr: - var x: float - var y = FloatExpr(node) - try: - discard parseFloat(y.literal.lexeme, x) - except ValueError: - self.newWarning(valueOverflow, node) - return node - result = FloatExpr(kind: floatExpr, literal: Token(kind: Float, lexeme: $x, line: y.literal.line, pos: (start: -1, stop: -1))) - else: - result = node - - -proc optimizeUnary(self: Optimizer, node: UnaryExpr): ASTNode = - ## Attempts to optimize unary expressions - var a = self.optimizeNode(node.a) - if self.warnings.len() > 0 and self.warnings[^1].kind == valueOverflow and self.warnings[^1].node == a: - # We can't optimize further, the overflow will be caught in the compiler - return UnaryExpr(kind: unaryExpr, a: a, operator: node.operator) - case a.kind: - of intExpr: - var x: int - assert parseInt(IntExpr(a).literal.lexeme, x) == len(IntExpr(a).literal.lexeme) - case node.operator.kind: - of Tilde: - x = not x - of Minus: - x = -x - else: - discard # Unreachable - result = IntExpr(kind: intExpr, literal: Token(kind: Integer, lexeme: $x, line: node.operator.line, pos: (start: -1, stop: -1))) - of floatExpr: - var x: float - discard parseFloat(FloatExpr(a).literal.lexeme, x) - case node.operator.kind: - of Minus: - x = -x - of Tilde: - self.newWarning(invalidOperation, node) - return node - else: - discard - result = FloatExpr(kind: floatExpr, literal: Token(kind: Float, lexeme: $x, line: node.operator.line, pos: (start: -1, stop: -1))) - else: - result = node - - -proc optimizeBinary(self: Optimizer, node: BinaryExpr): ASTNode = - ## Attempts to optimize binary expressions - var a, b: ASTNode - a = self.optimizeNode(node.a) - b = self.optimizeNode(node.b) - if self.warnings.len() > 0 and self.warnings[^1].kind == valueOverflow and (self.warnings[^1].node == a or self.warnings[^1].node == b): - # We can't optimize further, the overflow will be caught in the compiler. We don't return the same node - # because optimizeNode might've been able to optimize one of the two operands and we don't know which - return BinaryExpr(kind: binaryExpr, a: a, b: b, operator: node.operator) - if node.operator.kind == DoubleEqual: - if a.kind in {trueExpr, falseExpr, nilExpr, nanExpr, infExpr}: - self.newWarning(equalityWithSingleton, a) - elif b.kind in {trueExpr, falseExpr, nilExpr, nanExpr, infExpr}: - self.newWarning(equalityWithSingleton, b) - elif node.operator.kind == Is: - if a.kind in {strExpr, intExpr, tupleExpr, dictExpr, listExpr, setExpr}: - self.newWarning(isWithALiteral, a) - elif b.kind in {strExpr, intExpr, tupleExpr, dictExpr, listExpr, setExpr}: - self.newWarning(isWithALiteral, b) - if a.kind == intExpr and b.kind == intExpr: - # Optimizes integer operations - var x, y, z: int - assert parseInt(IntExpr(a).literal.lexeme, x) == IntExpr(a).literal.lexeme.len() - assert parseInt(IntExpr(b).literal.lexeme, y) == IntExpr(b).literal.lexeme.len() - try: - case node.operator.kind: - of Plus: - z = x + y - of Minus: - z = x - y - of Asterisk: - z = x * y - of FloorDiv: - z = int(x / y) - of DoubleAsterisk: - if y >= 0: - z = x ^ y - else: - # Nim's builtin pow operator can't handle - # negative exponents, so we use math's - # pow and convert from/to floats instead - z = pow(x.float, y.float).int - of Percentage: - z = x mod y - of Caret: - z = x xor y - of Ampersand: - z = x and y - of Pipe: - z = x or y - of Slash: - # Special case, yields a float - return FloatExpr(kind: intExpr, literal: Token(kind: Float, lexeme: $(x / y), line: IntExpr(a).literal.line, pos: (start: -1, stop: -1))) - else: - result = BinaryExpr(kind: binaryExpr, a: a, b: b, operator: node.operator) - except OverflowDefect: - self.newWarning(valueOverflow, node) - return BinaryExpr(kind: binaryExpr, a: a, b: b, operator: node.operator) - except RangeDefect: - # TODO: What warning do we raise here? - return BinaryExpr(kind: binaryExpr, a: a, b: b, operator: node.operator) - result = IntExpr(kind: intExpr, literal: Token(kind: Integer, lexeme: $z, line: IntExpr(a).literal.line, pos: (start: -1, stop: -1))) - elif a.kind == floatExpr or b.kind == floatExpr: - var x, y, z: float - if a.kind == intExpr: - var temp: int - assert parseInt(IntExpr(a).literal.lexeme, temp) == IntExpr(a).literal.lexeme.len() - x = float(temp) - self.newWarning(implicitConversion, a) - else: - discard parseFloat(FloatExpr(a).literal.lexeme, x) - if b.kind == intExpr: - var temp: int - assert parseInt(IntExpr(b).literal.lexeme, temp) == IntExpr(b).literal.lexeme.len() - y = float(temp) - self.newWarning(implicitConversion, b) - else: - discard parseFloat(FloatExpr(b).literal.lexeme, y) - # Optimizes float operations - try: - case node.operator.kind: - of Plus: - z = x + y - of Minus: - z = x - y - of Asterisk: - z = x * y - of FloorDiv: - z = x / y - of DoubleAsterisk: - z = pow(x, y) - of Percentage: - z = x mod y - of Slash: - z = x / y - else: - result = BinaryExpr(kind: binaryExpr, a: a, b: b, operator: node.operator) - except OverflowDefect: - self.newWarning(valueOverflow, node) - return BinaryExpr(kind: binaryExpr, a: a, b: b, operator: node.operator) - result = FloatExpr(kind: floatExpr, literal: Token(kind: Float, lexeme: $z, line: LiteralExpr(a).literal.line, pos: (start: -1, stop: -1))) - elif a.kind == strExpr and b.kind == strExpr: - var a = StrExpr(a) - var b = StrExpr(b) - case node.operator.kind: - of Plus: - result = StrExpr(kind: strExpr, literal: Token(kind: String, lexeme: "'" & a.literal.lexeme[1..<(^1)] & b.literal.lexeme[1..<(^1)] & "'", pos: (start: -1, stop: -1))) - else: - result = node - elif a.kind == strExpr and self.optimizeNode(b).kind == intExpr and not (self.warnings.len() > 0 and self.warnings[^1].kind == valueOverflow and self.warnings[^1].node == b): - var a = StrExpr(a) - var b = IntExpr(b) - var bb: int - assert parseInt(b.literal.lexeme, bb) == b.literal.lexeme.len() - case node.operator.kind: - of Asterisk: - result = StrExpr(kind: strExpr, literal: Token(kind: String, lexeme: "'" & a.literal.lexeme[1..<(^1)].repeat(bb) & "'")) - else: - result = node - elif b.kind == strExpr and self.optimizeNode(a).kind == intExpr and not (self.warnings.len() > 0 and self.warnings[^1].kind == valueOverflow and self.warnings[^1].node == a): - var b = StrExpr(b) - var a = IntExpr(a) - var aa: int - assert parseInt(a.literal.lexeme, aa) == a.literal.lexeme.len() - case node.operator.kind: - of Asterisk: - result = StrExpr(kind: strExpr, literal: Token(kind: String, lexeme: "'" & b.literal.lexeme[1..<(^1)].repeat(aa) & "'")) - else: - result = node - else: - # There's no constant folding we can do! - result = node - - -proc optimizeNode(self: Optimizer, node: ASTNode): ASTNode = - ## Analyzes an AST node and attempts to perform - ## optimizations on it. If no optimizations can be - ## applied or self.foldConstants is set to false, - ## then the same node is returned - if not self.foldConstants: - return node - case node.kind: - of exprStmt: - result = newExprStmt(self.optimizeNode(ExprStmt(node).expression), ExprStmt(node).token) - of intExpr, hexExpr, octExpr, binExpr, floatExpr, strExpr: - result = self.optimizeConstant(node) - of unaryExpr: - result = self.optimizeUnary(UnaryExpr(node)) - of binaryExpr: - result = self.optimizeBinary(BinaryExpr(node)) - of groupingExpr: - # Recursively unnests groups - result = self.optimizeNode(GroupingExpr(node).expression) - of callExpr: - var node = CallExpr(node) - for i, positional in node.arguments.positionals: - node.arguments.positionals[i] = self.optimizeNode(positional) - for i, (key, value) in node.arguments.keyword: - node.arguments.keyword[i].value = self.optimizeNode(value) - result = node - of sliceExpr: - var node = SliceExpr(node) - for i, e in node.ends: - node.ends[i] = self.optimizeNode(e) - node.slicee = self.optimizeNode(node.slicee) - result = node - of tryStmt: - var node = TryStmt(node) - node.body = self.optimizeNode(node.body) - if node.finallyClause != nil: - node.finallyClause = self.optimizeNode(node.finallyClause) - if node.elseClause != nil: - node.elseClause = self.optimizeNode(node.elseClause) - for i, handler in node.handlers: - node.handlers[i].body = self.optimizeNode(node.handlers[i].body) - result = node - of funDecl: - var decl = FunDecl(node) - for i, node in decl.defaults: - decl.defaults[i] = self.optimizeNode(node) - result = decl - of blockStmt: - var node = BlockStmt(node) - for i, n in node.code: - node.code[i] = self.optimizeNode(n) - result = node - of varDecl: - var decl = VarDecl(node) - decl.value = self.optimizeNode(decl.value) - result = decl - of assignExpr: - var asgn = AssignExpr(node) - asgn.value = self.optimizeNode(asgn.value) - result = asgn - of listExpr: - var l = ListExpr(node) - for i, e in l.members: - l.members[i] = self.optimizeNode(e) - result = node - of setExpr: - var s = SetExpr(node) - for i, e in s.members: - s.members[i] = self.optimizeNode(e) - result = node - of tupleExpr: - var t = TupleExpr(node) - for i, e in t.members: - t.members[i] = self.optimizeNode(e) - result = node - of dictExpr: - var d = DictExpr(node) - for i, e in d.keys: - d.keys[i] = self.optimizeNode(e) - for i, e in d.values: - d.values[i] = self.optimizeNode(e) - result = node - else: - result = node - - -proc optimize*(self: Optimizer, tree: seq[ASTNode]): tuple[tree: seq[ASTNode], warnings: seq[Warning]] = - ## Runs the optimizer on the given source - ## tree and returns a new optimized tree - ## as well as a list of warnings that may - ## be of interest. The input tree may be - ## identical to the output tree if no optimization - ## could be performed. Constant folding can be - ## turned off by setting foldConstants to false - ## when initializing the optimizer object - var newTree: seq[ASTNode] = @[] - for node in tree: - newTree.add(self.optimizeNode(node)) - result = (tree: newTree, warnings: self.warnings) diff --git a/src/backend/parser.nim b/src/backend/parser.nim deleted file mode 100644 index 8feabe3..0000000 --- a/src/backend/parser.nim +++ /dev/null @@ -1,1078 +0,0 @@ -# Copyright 2021 Mattia Giambirtone & All Contributors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -## A recursive-descent top-down parser implementation - -import strformat - - -import meta/token -import meta/ast -import meta/errors - - -export token, ast, errors - - -type - - LoopContext = enum - Loop, None - - Parser* = ref object - ## A recursive-descent top-down - ## parser implementation - # Index into self.tokens - current: int - # The name of the file being parsed. - # Only meaningful for parse errors - file: string - # The list of tokens representing - # the source code to be parsed. - # In most cases, those will come - # from the builtin lexer, but this - # behavior is not enforced and the - # tokenizer is entirely separate from - # the parser - tokens: seq[Token] - # Little internal attribute that tells - # us if we're inside a loop or not. This - # allows us to detect errors like break - # being used outside loops - currentLoop: LoopContext - # Stores the current function - # being parsed. This is a reference - # to either a FunDecl or LambdaExpr - # AST node and is mostly used to allow - # implicit generators to work. What that - # means is that there is no need for the - # programmer to specifiy a function is a - # generator like in nim, (which uses the - # 'iterator' keyword): any function is - # automatically a generator if it contains - # any number of yield statement(s) or - # yield expression(s). This attribute - # is nil when the parser is at the top-level - # code and is what allows the parser to detect - # errors like return outside functions and attempts - # to declare public names inside them before - # compilation even begins - currentFunction: ASTNode - # Stores the current scope depth (0 = global, > 0 local) - scopeDepth: int - - -proc initParser*(): Parser = - ## Initializes a new Parser object - new(result) - result.current = 0 - result.file = "" - result.tokens = @[] - result.currentFunction = nil - result.currentLoop = None - result.scopeDepth = 0 - -# Public getters for improved error formatting -proc getCurrent*(self: Parser): int = self.current -proc getCurrentToken*(self: Parser): Token = (if self.getCurrent() >= self.tokens.len(): self.tokens[^1] else: self.tokens[self.current - 1]) - -# Handy templates to make our life easier, thanks nim! - -template endOfFile: Token = Token(kind: EndOfFile, lexeme: "", line: -1) -template endOfLine(msg: string) = self.expect(Semicolon, msg) - - -proc peek(self: Parser, distance: int = 0): Token = - ## Peeks at the token at the given distance. - ## If the distance is out of bounds, an EOF - ## token is returned. A negative distance may - ## be used to retrieve previously consumed - ## tokens - if self.tokens.high() == -1 or self.current + distance > self.tokens.high() or self.current + distance < 0: - result = endOfFile - else: - result = self.tokens[self.current + distance] - - -proc done(self: Parser): bool = - ## Returns true if we're at the - ## end of the file. Note that the - ## parser expects an explicit - ## EOF token to signal the end - ## of the file (unless the token - ## list is empty) - result = self.tokens.len() == 0 or self.peek().kind == EndOfFile - - -proc step(self: Parser, n: int = 1): Token = - ## Steps n tokens into the input, - ## returning the last consumed one - if self.done(): - result = self.peek() - else: - result = self.tokens[self.current] - self.current += 1 - - -proc error(self: Parser, message: string) = - ## Raises a formatted ParseError exception - var lexeme = self.getCurrentToken().lexeme - var errorMessage = &"A fatal error occurred while parsing '{self.file}', line {self.peek().line} at '{lexeme}' -> {message}" - raise newException(ParseError, errorMessage) - - -proc check(self: Parser, kind: TokenType, distance: int = 0): bool = - ## Checks if the given token at the given distance - ## matches the expected kind and returns a boolean. - ## The distance parameter is passed directly to - ## self.peek() - self.peek(distance).kind == kind - - -proc check(self: Parser, kind: openarray[TokenType]): bool = - ## Calls self.check() in a loop with each entry of - ## the given openarray of token kinds and returns - ## at the first match. Note that this assumes - ## that only one token may exist at a given - ## position - for k in kind: - if self.check(k): - return true - return false - - -proc match(self: Parser, kind: TokenType, distance: int = 0): bool = - ## Behaves like self.check(), except that when a token - ## matches it is consumed - if self.check(kind, distance): - discard self.step() - result = true - else: - result = false - - -proc match(self: Parser, kind: openarray[TokenType]): bool = - ## Calls self.match() in a loop with each entry of - ## the given openarray of token kinds and returns - ## at the first match. Note that this assumes - ## that only one token may exist at a given - ## position - for k in kind: - if self.match(k): - return true - result = false - - -proc expect(self: Parser, kind: TokenType, message: string = "") = - ## Behaves like self.match(), except that - ## when a token doesn't match an error - ## is raised. If no error message is - ## given, a default one is used - if not self.match(kind): - if message.len() == 0: - self.error(&"expecting token of kind {kind}, found {self.peek().kind} instead") - else: - self.error(message) - - -proc unnest(self: Parser, node: ASTNode): ASTNode = - ## Unpacks an arbitrarily nested grouping expression - var node = node - while node.kind == groupingExpr and GroupingExpr(node).expression != nil: - node = GroupingExpr(node).expression - result = node - - -# Forward declarations -proc expression(self: Parser): ASTNode -proc expressionStatement(self: Parser): ASTNode -proc statement(self: Parser): ASTNode -proc varDecl(self: Parser, isStatic: bool = true, isPrivate: bool = true): ASTNode -proc funDecl(self: Parser, isAsync: bool = false, isStatic: bool = true, isPrivate: bool = true, isLambda: bool = false): ASTNode -proc declaration(self: Parser): ASTNode - - -proc primary(self: Parser): ASTNode = - ## Parses primary expressions such - ## as integer literals and keywords - ## that map to builtin types (true, false, etc) - case self.peek().kind: - of True: - result = newTrueExpr(self.step()) - of False: - result = newFalseExpr(self.step()) - of TokenType.NotANumber: - result = newNanExpr(self.step()) - of Nil: - result = newNilExpr(self.step()) - of Float: - result = newFloatExpr(self.step()) - of Integer: - result = newIntExpr(self.step()) - of Identifier: - result = newIdentExpr(self.step()) - of LeftParen: - let tok = self.step() - if self.match(RightParen): - # This yields an empty tuple - result = newTupleExpr(@[], tok) - else: - result = self.expression() - if self.match(Comma): - var tupleObject = newTupleExpr(@[result], tok) - while not self.check(RightParen): - tupleObject.members.add(self.expression()) - if not self.match(Comma): - break - result = tupleObject - self.expect(RightParen, "unterminated tuple literal") - else: - self.expect(RightParen, "unterminated parenthesized expression") - result = newGroupingExpr(result, tok) - of LeftBracket: - let tok = self.step() - if self.match(RightBracket): - # This yields an empty list - result = newListExpr(@[], tok) - else: - var listObject = newListExpr(@[], tok) - while not self.check(RightBracket): - listObject.members.add(self.expression()) - if not self.match(Comma): - break - result = listObject - self.expect(RightBracket, "unterminated list literal") - of LeftBrace: - let tok = self.step() - if self.match(RightBrace): - # This yields an empty dictionary, not an empty set! - # For empty sets, there will be a builtin set() type - # that can be instantiated with no arguments - result = newDictExpr(@[], @[], tok) - else: - result = self.expression() - if self.match(Comma) or self.check(RightBrace): - var setObject = newSetExpr(@[result], tok) - while not self.check(RightBrace): - setObject.members.add(self.expression()) - if not self.match(Comma): - break - result = setObject - self.expect(RightBrace, "unterminated set literal") - elif self.match(Colon): - var dictObject = newDictExpr(@[result], @[self.expression()], tok) - if self.match(RightBrace): - return dictObject - if self.match(Comma): - while not self.check(RightBrace): - dictObject.keys.add(self.expression()) - self.expect(Colon) - dictObject.values.add(self.expression()) - if not self.match(Comma): - break - self.expect(RightBrace, "unterminated dict literal") - result = dictObject - of Yield: - let tok = self.step() - if self.currentFunction == nil: - self.error("'yield' cannot be outside functions") - if self.currentFunction.kind == NodeKind.funDecl: - FunDecl(self.currentFunction).isGenerator = true - else: - LambdaExpr(self.currentFunction).isGenerator = true - if not self.check([RightBrace, RightBracket, RightParen, Comma, Semicolon]): - result = newYieldExpr(self.expression(), tok) - else: - result = newYieldExpr(newNilExpr(Token()), tok) - of Await: - let tok = self.step() - if self.currentFunction == nil: - self.error("'await' cannot be used outside functions") - if self.currentFunction.kind == lambdaExpr or not FunDecl(self.currentFunction).isAsync: - self.error("'await' can only be used inside async functions") - result = newAwaitExpr(self.expression(), tok) - of Lambda: - discard self.step() - result = self.funDecl(isLambda=true) - of RightParen, RightBracket, RightBrace: - # This is *technically* unnecessary: the parser would - # throw an error regardless, but it's a little bit nicer - # when the error message is more specific - self.error(&"unmatched '{self.peek().lexeme}'") - of Hex: - result = newHexExpr(self.step()) - of Octal: - result = newOctExpr(self.step()) - of Binary: - result = newBinExpr(self.step()) - of String: - result = newStrExpr(self.step()) - of Infinity: - result = newInfExpr(self.step()) - else: - self.error("invalid syntax") - - -proc makeCall(self: Parser, callee: ASTNode): ASTNode = - ## Utility function called iteratively by self.call() - ## to parse a function-like call - let tok = self.peek(-1) - var argNames: seq[ASTNode] = @[] - var arguments: tuple[positionals: seq[ASTNode], keyword: seq[tuple[name: ASTNode, value: ASTNode]]] = (positionals: @[], keyword: @[]) - var argument: ASTNode = nil - var argCount = 0 - if not self.check(RightParen): - while true: - if argCount >= 255: - self.error("cannot store more than 255 arguments") - break - argument = self.expression() - if argument.kind == assignExpr: - if AssignExpr(argument).name in argNames: - self.error("duplicate keyword argument in call") - argNames.add(AssignExpr(argument).name) - arguments.keyword.add((name: AssignExpr(argument).name, value: AssignExpr(argument).value)) - elif arguments.keyword.len() == 0: - arguments.positionals.add(argument) - else: - self.error("positional arguments cannot follow keyword arguments in call") - if not self.match(Comma): - break - argCount += 1 - self.expect(RightParen) - result = newCallExpr(callee, arguments, tok) - - -proc call(self: Parser): ASTNode = - ## Parses call expressions and object - ## field accessing ("dot syntax") - result = self.primary() - while true: - if self.match(LeftParen): - result = self.makeCall(result) - elif self.match(Dot): - self.expect(Identifier, "expecting attribute name after '.'") - result = newGetItemExpr(result, newIdentExpr(self.peek(-1)), self.peek(-1)) - elif self.match(LeftBracket): - let tok = self.peek(-1) - var ends: seq[ASTNode] = @[] - while not self.match(RightBracket) and ends.len() < 3: - ends.add(self.expression()) - discard self.match(Colon) - if ends.len() < 1: - self.error("invalid syntax") - result = newSliceExpr(result, ends, tok) - else: - break - - -proc unary(self: Parser): ASTNode = - ## Parses unary expressions - if self.match([Minus, Tilde, LogicalNot, Plus]): - result = newUnaryExpr(self.peek(-1), self.unary()) - else: - result = self.call() - - -proc pow(self: Parser): ASTNode = - ## Parses exponentiation expressions - result = self.unary() - var operator: Token - var right: ASTNode - while self.match(DoubleAsterisk): - operator = self.peek(-1) - right = self.unary() - result = newBinaryExpr(result, operator, right) - - -proc mul(self: Parser): ASTNode = - ## Parses multiplication and division expressions - result = self.pow() - var operator: Token - var right: ASTNode - while self.match([Slash, Percentage, FloorDiv, Asterisk]): - operator = self.peek(-1) - right = self.pow() - result = newBinaryExpr(result, operator, right) - - -proc add(self: Parser): ASTNode = - ## Parses addition and subtraction expressions - result = self.mul() - var operator: Token - var right: ASTNode - while self.match([Plus, Minus]): - operator = self.peek(-1) - right = self.mul() - result = newBinaryExpr(result, operator, right) - - -proc comparison(self: Parser): ASTNode = - ## Parses comparison expressions - result = self.add() - var operator: Token - var right: ASTNode - while self.match([LessThan, GreaterThan, LessOrEqual, GreaterOrEqual, Is, As, Of, IsNot]): - operator = self.peek(-1) - right = self.add() - result = newBinaryExpr(result, operator, right) - - -proc equality(self: Parser): ASTNode = - ## Parses equality expressions - result = self.comparison() - var operator: Token - var right: ASTNode - while self.match([DoubleEqual, NotEqual]): - operator = self.peek(-1) - right = self.comparison() - result = newBinaryExpr(result, operator, right) - - -proc logicalAnd(self: Parser): ASTNode = - ## Parses logical AND expressions - result = self.equality() - var operator: Token - var right: ASTNode - while self.match(LogicalAnd): - operator = self.peek(-1) - right = self.equality() - result = newBinaryExpr(result, operator, right) - - -proc logicalOr(self: Parser): ASTNode = - ## Parses logical OR expressions - result = self.logicalAnd() - var operator: Token - var right: ASTNode - while self.match(LogicalOr): - operator = self.peek(-1) - right = self.logicalAnd() - result = newBinaryExpr(result, operator, right) - - -proc bitwiseAnd(self: Parser): ASTNode = - ## Parser a & b expressions - result = self.logicalOr() - var operator: Token - var right: ASTNode - while self.match(Pipe): - operator = self.peek(-1) - right = self.logicalOr() - result = newBinaryExpr(result, operator, right) - - -proc bitwiseOr(self: Parser): ASTNode = - ## Parser a | b expressions - result = self.bitwiseAnd() - var operator: Token - var right: ASTNode - while self.match(Ampersand): - operator = self.peek(-1) - right = self.bitwiseAnd() - result = newBinaryExpr(result, operator, right) - - -proc assignment(self: Parser): ASTNode = - ## Parses assignment, the highest-level - ## expression (including stuff like a.b = 1). - ## Slice assignments are also parsed here - result = self.bitwiseOr() - if self.match([Equal, InplaceAdd, InplaceSub, InplaceDiv, InplaceMod, - InplacePow, InplaceMul, InplaceXor, InplaceAnd, InplaceOr, - InplaceFloorDiv, InplaceRightShift, InplaceLeftShift]): - let tok = self.peek(-1) - var value = self.expression() - if result.kind in {identExpr, sliceExpr}: - result = newAssignExpr(result, value, tok) - elif result.kind == getItemExpr: - result = newSetItemExpr(GetItemExpr(result).obj, GetItemExpr(result).name, value, tok) - else: - self.error("invalid assignment target") - - -proc delStmt(self: Parser): ASTNode = - ## Parses "del" statements, - ## which unbind a name from its - ## value in the current scope and - ## calls its destructor - let tok = self.peek(-1) - var expression = self.expression() - var temp = expression - endOfLIne("missing semicolon after del statement") - if expression.kind == groupingExpr: - # We unpack grouping expressions - temp = self.unnest(temp) - if temp.isLiteral(): - self.error("cannot delete a literal") - elif temp.kind in {binaryExpr, unaryExpr}: - self.error("cannot delete operator") - elif temp.kind == callExpr: - self.error("cannot delete function call") - elif temp.kind == assignExpr: - self.error("cannot delete assignment") - else: - result = newDelStmt(expression, tok) - - -proc assertStmt(self: Parser): ASTNode = - ## Parses "assert" statements, - ## raise an error if the expression - ## fed into them is falsey - let tok = self.peek(-1) - var expression = self.expression() - endOfLine("missing semicolon after assert statement") - result = newAssertStmt(expression, tok) - - -proc beginScope(self: Parser) = - ## Begins a new syntactical scope - inc(self.scopeDepth) - - -proc endScope(self: Parser) = - ## Ends a new syntactical scope - dec(self.scopeDepth) - - -proc blockStmt(self: Parser): ASTNode = - ## Parses block statements. A block - ## statement simply opens a new local - ## scope - - self.beginScope() - let tok = self.peek(-1) - var code: seq[ASTNode] = @[] - while not self.check(RightBrace) and not self.done(): - code.add(self.declaration()) - self.expect(RightBrace, "unterminated block statement") - result = newBlockStmt(code, tok) - self.endScope() - - -proc breakStmt(self: Parser): ASTNode = - ## Parses break statements - let tok = self.peek(-1) - if self.currentLoop != Loop: - self.error("'break' cannot be used outside loops") - endOfLine("missing semicolon after break statement") - result = newBreakStmt(tok) - - -proc deferStmt(self: Parser): ASTNode = - ## Parses defer statements - let tok = self.peek(-1) - if self.currentFunction == nil: - self.error("'defer' cannot be used outside functions") - result = newDeferStmt(self.expression(), tok) - endOfLine("missing semicolon after defer statement") - - -proc continueStmt(self: Parser): ASTNode = - ## Parses continue statements - let tok = self.peek(-1) - if self.currentLoop != Loop: - self.error("'continue' cannot be used outside loops") - endOfLine("missing semicolon after continue statement") - result = newContinueStmt(tok) - - -proc returnStmt(self: Parser): ASTNode = - ## Parses return statements - let tok = self.peek(-1) - if self.currentFunction == nil: - self.error("'return' cannot be used outside functions") - var value: ASTNode - if not self.check(Semicolon): - # Since return can be used on its own too - # (in which case it implicitly returns nil), - # we need to check if there's an actual value - # to return or not - value = self.expression() - endOfLine("missing semicolon after return statement") - result = newReturnStmt(value, tok) - - -proc yieldStmt(self: Parser): ASTNode = - ## Parses yield Statements - let tok = self.peek(-1) - if self.currentFunction == nil: - self.error("'yield' cannot be outside functions") - if self.currentFunction.kind == NodeKind.funDecl: - FunDecl(self.currentFunction).isGenerator = true - else: - LambdaExpr(self.currentFunction).isGenerator = true - if not self.check(Semicolon): - result = newYieldStmt(self.expression(), tok) - else: - result = newYieldStmt(newNilExpr(Token()), tok) - endOfLine("missing semicolon after yield statement") - - -proc awaitStmt(self: Parser): ASTNode = - ## Parses yield Statements - let tok = self.peek(-1) - if self.currentFunction == nil: - self.error("'await' cannot be used outside functions") - if self.currentFunction.kind == lambdaExpr or not FunDecl(self.currentFunction).isAsync: - self.error("'await' can only be used inside async functions") - result = newAwaitStmt(self.expression(), tok) - endOfLine("missing semicolon after yield statement") - - -proc raiseStmt(self: Parser): ASTNode = - ## Parses raise statements - var exception: ASTNode - let tok = self.peek(-1) - if not self.check(Semicolon): - # Raise can be used on its own, in which - # case it re-raises the last active exception - exception = self.expression() - endOfLine("missing semicolon after raise statement") - result = newRaiseStmt(exception, tok) - - -proc forEachStmt(self: Parser): ASTNode = - ## Parses C#-like foreach loops - let tok = self.peek(-1) - var enclosingLoop = self.currentLoop - self.currentLoop = Loop - self.expect(LeftParen, "expecting '(' after 'foreach'") - self.expect(Identifier) - var identifier = newIdentExpr(self.peek(-1)) - self.expect(Colon) - var expression = self.expression() - self.expect(RightParen) - var body = self.statement() - result = newForEachStmt(identifier, expression, body, tok) - self.currentLoop = enclosingLoop - - -proc importStmt(self: Parser): ASTNode = - ## Parses import statements - let tok = self.peek(-1) - self.expect(Identifier, "expecting module name(s) after import statement") - result = newImportStmt(self.expression(), tok) - endOfLine("missing semicolon after import statement") - - -proc fromStmt(self: Parser): ASTNode = - ## Parser from xx import yy statements - let tok = self.peek(-1) - self.expect(Identifier, "expecting module name(s) after import statement") - result = newIdentExpr(self.peek(-1)) - var attributes: seq[ASTNode] = @[] - var attribute: ASTNode - self.expect(Import) - self.expect(Identifier) - attribute = newIdentExpr(self.peek(-1)) - attributes.add(attribute) - while self.match(Comma): - self.expect(Identifier) - attribute = newIdentExpr(self.peek(-1)) - attributes.add(attribute) - # from x import a [, b, c, ...]; - endOfLine("missing semicolon after import statement") - result = newFromImportStmt(result, attributes, tok) - - -proc tryStmt(self: Parser): ASTNode = - ## Parses try/except/finally/else blocks - let tok = self.peek(-1) - var body = self.statement() - var handlers: seq[tuple[body, exc, name: ASTNode]] = @[] - var finallyClause: ASTNode - var elseClause: ASTNode - var asName: ASTNode - var excName: ASTNode - var handlerBody: ASTNode - while self.match(Except): - excName = self.expression() - if excName.kind == identExpr: - continue - elif excName.kind == binaryExpr and BinaryExpr(excName).operator.kind == As: - asName = BinaryExpr(excName).b - if BinaryExpr(excName).b.kind != identExpr: - self.error("expecting alias name after 'except ... as'") - excName = BinaryExpr(excName).a - # Note how we don't use elif here: when the if above sets excName to As' - # first operand, that might be a tuple, which we unpack below - if excName.kind == tupleExpr: - # This allows to do except (a, b, c) as SomeError {...} - # TODO: Consider adding the ability to make exc a sequence - # instead of adding the same body with different exception - # types each time - handlerBody = self.statement() - for element in TupleExpr(excName).members: - handlers.add((body: handlerBody, exc: element, name: asName)) - continue - else: - excName = nil - handlerBody = self.statement() - handlers.add((body: handlerBody, exc: excName, name: asName)) - asName = nil - if self.match(Finally): - finallyClause = self.statement() - if self.match(Else): - elseClause = self.statement() - if handlers.len() == 0 and elseClause == nil and finallyClause == nil: - self.error("expecting 'except', 'finally' or 'else' statements after 'try' block") - for i, handler in handlers: - if handler.exc == nil and i != handlers.high(): - self.error("catch-all exception handler with bare 'except' must come last in try statement") - result = newTryStmt(body, handlers, finallyClause, elseClause, tok) - - -proc whileStmt(self: Parser): ASTNode = - ## Parses a C-style while loop statement - let tok = self.peek(-1) - self.beginScope() - var enclosingLoop = self.currentLoop - self.currentLoop = Loop - self.expect(LeftParen, "expecting '(' before while loop condition") - var condition = self.expression() - self.expect(RightParen, "unterminated while loop condition") - result = newWhileStmt(condition, self.statement(), tok) - self.currentLoop = enclosingLoop - self.endScope() - - -proc forStmt(self: Parser): ASTNode = - ## Parses a C-style for loop - self.beginScope() - let tok = self.peek(-1) - var enclosingLoop = self.currentLoop - self.currentLoop = Loop - self.expect(LeftParen, "expecting '(' after 'for'") - var initializer: ASTNode = nil - var condition: ASTNode = nil - var increment: ASTNode = nil - # The code below is not really that illuminating, but - # it's there to disallow weird things like a public for loop - # increment variable which doesn't really make sense, but still - # allow people that like verbosity (for *some* reason) to use - # private static var declarations as well as just private var - # and static var - if self.match(Semicolon): - discard - elif self.match(Dynamic): - self.error("dynamic declarations are not allowed in the foor loop initializer") - elif self.match(Public): - self.error("public declarations are not allowed in the for loop initializer") - elif self.match(Static): - self.expect(Var, "expecting 'var' after 'static' in for loop initializer") - initializer = self.varDecl(isStatic=true, isPrivate=true) - elif self.match(Private): - if self.match(Dynamic): - self.error("dynamic declarations are not allowed in the foor loop initializer") - elif self.match(Static): - self.expect(Var, "expecting 'var' after 'static' in for loop initializer") - initializer = self.varDecl(isStatic=true, isPrivate=true) - elif self.match(Var): - initializer = self.varDecl(isStatic=true, isPrivate=true) - elif self.match(Var): - initializer = self.varDecl(isStatic=true, isPrivate=true) - else: - initializer = self.expressionStatement() - if not self.check(Semicolon): - condition = self.expression() - self.expect(Semicolon, "expecting ';' after for loop condition") - if not self.check(RightParen): - increment = self.expression() - self.expect(RightParen, "unterminated for loop increment") - var body = self.statement() - if increment != nil: - # The increment runs after each iteration, so we - # inject it into the block as the last statement - body = newBlockStmt(@[body, newExprStmt(increment, increment.token)], tok) - if condition == nil: - ## An empty condition is functionally - ## equivalent to "true" - condition = newTrueExpr(Token()) - # We can use a while loop, which in this case works just as well - body = newWhileStmt(condition, body, tok) - if initializer != nil: - # Nested blocks, so the initializer is - # only executed once - body = newBlockStmt(@[initializer, body], tok) - result = body - self.currentLoop = enclosingLoop - self.endScope() - - -proc ifStmt(self: Parser): ASTNode = - ## Parses if statements - let tok = self.peek(-1) - self.expect(LeftParen, "expecting '(' before if condition") - var condition = self.expression() - self.expect(RightParen, "expecting ')' after if condition") - var thenBranch = self.statement() - var elseBranch: ASTNode = nil - if self.match(Else): - elseBranch = self.statement() - result = newIfStmt(condition, thenBranch, elseBranch, tok) - - -proc checkDecl(self: Parser, isStatic, isPrivate: bool) = - ## Handy utility function that avoids us from copy - ## pasting the same checks to all declaration handlers - if not isStatic and self.currentFunction != nil: - self.error("dynamic declarations are not allowed inside functions") - if not isStatic and self.scopeDepth > 0: - self.error("dynamic declarations are not allowed inside local scopes") - if not isPrivate and self.currentFunction != nil: - self.error("cannot bind public names inside functions") - if not isPrivate and self.scopeDepth > 0: - self.error("cannot bind public names inside local scopes") - - -proc varDecl(self: Parser, isStatic: bool = true, isPrivate: bool = true): ASTNode = - ## Parses variable declarations - self.checkDecl(isStatic, isPrivate) - var varKind = self.peek(-1) - var keyword = "" - var value: ASTNode - case varKind.kind: - of Const: - # Note that isStatic being false is an error, because constants are replaced at compile-time - if not isStatic: - self.error("constant declarations cannot be dynamic") - keyword = "constant" - else: - keyword = "variable" - self.expect(Identifier, &"expecting {keyword} name after '{varKind.lexeme}'") - var name = newIdentExpr(self.peek(-1)) - if self.match(Equal): - value = self.expression() - if varKind.kind == Const and not value.isConst(): - self.error("the initializer for constant declarations must be a primitive and constant type") - else: - if varKind.kind == Const: - self.error("constant declaration requires an explicit initializer") - value = newNilExpr(Token()) - self.expect(Semicolon, &"expecting semicolon after {keyword} declaration") - case varKind.kind: - of Var: - result = newVarDecl(name, value, isStatic=isStatic, isPrivate=isPrivate, token=varKind, owner=self.file) - of Const: - result = newVarDecl(name, value, isConst=true, isPrivate=isPrivate, isStatic=true, token=varKind, owner=self.file) - else: - discard # Unreachable - - -proc funDecl(self: Parser, isAsync: bool = false, isStatic: bool = true, isPrivate: bool = true, isLambda: bool = false): ASTNode = - ## Parses function and lambda declarations. Note that lambdas count as expressions! - self.checkDecl(isStatic, isPrivate) - let tok = self.peek(-1) - var enclosingFunction = self.currentFunction - var arguments: seq[ASTNode] = @[] - var defaults: seq[ASTNode] = @[] - if not isLambda: - self.currentFunction = newFunDecl(nil, arguments, defaults, newBlockStmt(@[], Token()), isAsync=isAsync, isGenerator=false, isStatic=isStatic, isPrivate=isPrivate, token=tok, owner=self.file) - else: - self.currentFunction = newLambdaExpr(arguments, defaults, newBlockStmt(@[], Token()), isGenerator=false, token=tok) - if not isLambda: - self.expect(Identifier, "expecting function name after 'fun'") - FunDecl(self.currentFunction).name = newIdentExpr(self.peek(-1)) - if self.match(LeftBrace): - # Argument-less function - discard - else: - var parameter: IdentExpr - self.expect(LeftParen) - while not self.check(RightParen): - if arguments.len > 255: - self.error("cannot have more than 255 arguments in function declaration") - self.expect(Identifier) - parameter = newIdentExpr(self.peek(-1)) - if parameter in arguments: - self.error("duplicate parameter name in function declaration") - arguments.add(parameter) - if self.match(Equal): - defaults.add(self.expression()) - elif defaults.len() > 0: - self.error("positional argument(s) cannot follow default argument(s) in function declaration") - if not self.match(Comma): - break - self.expect(RightParen) - self.expect(LeftBrace) - if not isLambda: - FunDecl(self.currentFunction).body = self.blockStmt() - else: - LambdaExpr(self.currentFunction).body = self.blockStmt() - result = self.currentFunction - self.currentFunction = enclosingFunction - - -proc classDecl(self: Parser, isStatic: bool = true, isPrivate: bool = true): ASTNode = - ## Parses class declarations - self.checkDecl(isStatic, isPrivate) - let tok = self.peek(-1) - var parents: seq[ASTNode] = @[] - self.expect(Identifier) - var name = newIdentExpr(self.peek(-1)) - if self.match(LessThan): - while true: - self.expect(Identifier) - parents.add(newIdentExpr(self.peek(-1))) - if not self.match(Comma): - break - self.expect(LeftBrace) - result = newClassDecl(name, self.blockStmt(), isPrivate=isPrivate, isStatic=isStatic, parents=parents, token=tok, owner=self.file) - - -proc expression(self: Parser): ASTNode = - ## Parses expressions - result = self.assignment() - - -proc expressionStatement(self: Parser): ASTNode = - ## Parses expression statements, which - ## are expressions followed by a semicolon - var expression = self.expression() - endOfLine("missing semicolon after expression") - result = newExprStmt(expression, expression.token) - - -proc statement(self: Parser): ASTNode = - ## Parses statements - case self.peek().kind: - of If: - discard self.step() - result = self.ifStmt() - of Del: - discard self.step() - result = self.delStmt() - of Assert: - discard self.step() - result = self.assertStmt() - of Raise: - discard self.step() - result = self.raiseStmt() - of Break: - discard self.step() - result = self.breakStmt() - of Continue: - discard self.step() - result = self.continueStmt() - of Return: - discard self.step() - result = self.returnStmt() - of Import: - discard self.step() - result = self.importStmt() - of From: - discard self.step() - result = self.fromStmt() - of While: - discard self.step() - result = self.whileStmt() - of For: - discard self.step() - result = self.forStmt() - of Foreach: - discard self.step() - result = self.forEachStmt() - of LeftBrace: - discard self.step() - result = self.blockStmt() - of Yield: - discard self.step() - result = self.yieldStmt() - of Await: - discard self.step() - result = self.awaitStmt() - of Defer: - discard self.step() - result = self.deferStmt() - of Try: - discard self.step() - result = self.tryStmt() - else: - result = self.expressionStatement() - - -proc declaration(self: Parser): ASTNode = - ## Parses declarations - case self.peek().kind: - of Var, Const: - discard self.step() - result = self.varDecl() - of Class: - discard self.step() - result = self.classDecl() - of Fun: - discard self.step() - result = self.funDecl() - of Private, Public: - discard self.step() - var isStatic: bool = true - let isPrivate = if self.peek(-1).kind == Private: true else: false - if self.match(Dynamic): - isStatic = false - elif self.match(Static): - discard # This is just to allow an "explicit" static keyword - if self.match(Async): - result = self.funDecl(isStatic=isStatic, isPrivate=isPrivate, isAsync=true) - else: - case self.peek().kind: - of Var, Const: - discard self.step() - result = self.varDecl(isStatic=isStatic, isPrivate=isPrivate) - of Class: - discard self.step() - result = self.classDecl(isStatic=isStatic, isPrivate=isPrivate) - of Fun: - discard self.step() - result = self.funDecl(isStatic=isStatic, isPrivate=isPrivate) - else: - self.error("expecting declaration") - of Static, Dynamic: - discard self.step() - let isStatic: bool = if self.peek(-1).kind == Static: true else: false - if self.match(Async): - self.expect(Fun) - result = self.funDecl(isStatic=isStatic, isPrivate=true, isAsync=true) - else: - case self.peek().kind: - of Var, Const: - discard self.step() - result = self.varDecl(isStatic=isStatic, isPrivate=true) - of Class: - discard self.step() - result = self.classDecl(isStatic=isStatic, isPrivate=true) - of Fun: - discard self.step() - result = self.funDecl(isStatic=isStatic, isPrivate=true) - else: - self.error("expecting declaration") - of Async: - discard self.step() - self.expect(Fun) - result = self.funDecl(isAsync=true) - - else: - result = self.statement() - - -proc parse*(self: Parser, tokens: seq[Token], file: string): seq[ASTNode] = - ## Parses a series of tokens into an AST node - self.tokens = tokens - self.file = file - self.current = 0 - self.currentLoop = None - self.currentFunction = nil - self.scopeDepth = 0 - while not self.done(): - result.add(self.declaration()) diff --git a/src/backend/serializer.nim b/src/backend/serializer.nim deleted file mode 100644 index 34f5576..0000000 --- a/src/backend/serializer.nim +++ /dev/null @@ -1,273 +0,0 @@ -# Copyright 2021 Mattia Giambirtone & All Contributors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import meta/ast -import meta/errors -import meta/bytecode -import meta/token -import ../config -import ../util/multibyte - -import strformat -import strutils -import nimSHA2 -import times - - -export ast - -type - Serializer* = ref object - file: string - filename: string - chunk: Chunk - Serialized* = ref object - ## Wrapper returned by - ## the Serializer.read* - ## procedures to store - ## metadata - fileHash*: string - japlVer*: tuple[major, minor, patch: int] - japlBranch*: string - commitHash*: string - compileDate*: int - chunk*: Chunk - - -proc `$`*(self: Serialized): string = - result = &"Serialized(fileHash={self.fileHash}, version={self.japlVer.major}.{self.japlVer.minor}.{self.japlVer.patch}, branch={self.japlBranch}), commitHash={self.commitHash}, date={self.compileDate}, chunk={self.chunk[]}" - - -proc error(self: Serializer, message: string) = - ## Raises a formatted SerializationError exception - raise newException(SerializationError, &"A fatal error occurred while (de)serializing '{self.filename}' -> {message}") - - -proc initSerializer*(self: Serializer = nil): Serializer = - new(result) - if self != nil: - result = self - result.file = "" - result.filename = "" - result.chunk = nil - - -## Basic routines and helpers to convert various objects from and to to their byte representation - -proc toBytes(self: Serializer, s: string): seq[byte] = - for c in s: - result.add(byte(c)) - - -proc toBytes(self: Serializer, s: int): array[8, uint8] = - result = cast[array[8, uint8]](s) - - -proc toBytes(self: Serializer, d: SHA256Digest): seq[byte] = - for b in d: - result.add(b) - - -proc bytesToString(self: Serializer, input: seq[byte]): string = - for b in input: - result.add(char(b)) - - -proc bytesToInt(self: Serializer, input: array[8, byte]): int = - copyMem(result.addr, input.unsafeAddr, sizeof(int)) - - -proc bytesToInt(self: Serializer, input: array[3, byte]): int = - copyMem(result.addr, input.unsafeAddr, sizeof(byte) * 3) - - -proc extend[T](s: var seq[T], a: openarray[T]) = - ## Extends s with the elements of a - for e in a: - s.add(e) - - -proc writeHeaders(self: Serializer, stream: var seq[byte], file: string) = - ## Writes the JAPL bytecode headers in-place into a byte stream - stream.extend(self.toBytes(BYTECODE_MARKER)) - stream.add(byte(JAPL_VERSION.major)) - stream.add(byte(JAPL_VERSION.minor)) - stream.add(byte(JAPL_VERSION.patch)) - stream.add(byte(len(JAPL_BRANCH))) - stream.extend(self.toBytes(JAPL_BRANCH)) - if len(JAPL_COMMIT_HASH) != 40: - self.error("the commit hash must be exactly 40 characters long") - stream.extend(self.toBytes(JAPL_COMMIT_HASH)) - stream.extend(self.toBytes(getTime().toUnixFloat().int())) - stream.extend(self.toBytes(computeSHA256(file))) - - -proc writeConstants(self: Serializer, stream: var seq[byte]) = - ## Writes the constants table in-place into the given stream - for constant in self.chunk.consts: - case constant.kind: - of intExpr, floatExpr: - stream.add(0x1) - stream.extend(len(constant.token.lexeme).toTriple()) - stream.extend(self.toBytes(constant.token.lexeme)) - of strExpr: - stream.add(0x2) - var temp: seq[byte] = @[] - var strip: int = 2 - var offset: int = 1 - case constant.token.lexeme[0]: - of 'f': - strip = 3 - inc(offset) - temp.add(0x2) - of 'b': - strip = 3 - inc(offset) - temp.add(0x1) - else: - strip = 2 - temp.add(0x0) - stream.extend((len(constant.token.lexeme) - strip).toTriple()) # Removes the quotes from the length count as they're not written - stream.extend(temp) - stream.add(self.toBytes(constant.token.lexeme[offset..^2])) - of identExpr: - stream.add(0x0) - stream.extend(len(constant.token.lexeme).toTriple()) - stream.add(self.toBytes(constant.token.lexeme)) - else: - self.error(&"unknown constant kind in chunk table ({constant.kind})") - stream.add(0x59) # End marker - - -proc readConstants(self: Serializer, stream: seq[byte]): int = - ## Reads the constant table from the given stream and - ## adds each constant to the chunk object (note: most compile-time - ## information such as the original token objects and line info is lost when - ## serializing the data, so those fields are set to nil or some default - ## value). Returns the number of bytes that were processed in the stream - var stream = stream - var count: int = 0 - while true: - case stream[0]: - of 0x59: - inc(count) - break - of 0x2: - stream = stream[1..^1] - let size = self.bytesToInt([stream[0], stream[1], stream[2]]) - stream = stream[3..^1] - var s = newStrExpr(Token(lexeme: "")) - case stream[0]: - of 0x0: - discard - of 0x1: - s.token.lexeme.add("b") - of 0x2: - s.token.lexeme.add("f") - else: - self.error(&"unknown string modifier in chunk table (0x{stream[0].toHex()}") - stream = stream[1..^1] - s.token.lexeme.add("\"") - for i in countup(0, size - 1): - s.token.lexeme.add(cast[char](stream[i])) - s.token.lexeme.add("\"") - stream = stream[size..^1] - self.chunk.consts.add(s) - inc(count, size + 5) - of 0x1: - stream = stream[1..^1] - inc(count) - let size = self.bytesToInt([stream[0], stream[1], stream[2]]) - stream = stream[3..^1] - inc(count, 3) - var tok: Token = new(Token) - tok.lexeme = self.bytesToString(stream[0.. 0: - for i in 0..self.length - 1: - if self[i] == elem: - return true - return false - - -proc high*[T](self: ptr ArrayList[T]): int = - ## Returns the index of the last - ## element in the list, in constant time - if self.length == 0: - raise newException(IndexDefect, "ArrayList is empty") - result = self.length - 1 - - -proc len*[T](self: ptr ArrayList[T]): int = - ## Returns the length of the list - ## in constant time - result = self.length - - -iterator pairs*[T](self: ptr ArrayList[T]): tuple[key: int, val: T] = - ## Implements pairwise iteration (similar to python's enumerate) - for i in countup(0, self.length - 1): - yield (key: i, val: self[i]) - - -iterator items*[T](self: ptr ArrayList[T]): T = - ## Implements iteration - for i in countup(0, self.length - 1): - yield self[i] - - -proc reversed*[T](self: ptr ArrayList[T], first: int = -1, last: int = 0): ptr ArrayList[T] = - ## Returns a reversed version of the given list, from first to last. - ## First defaults to -1 (the end of the list) and last defaults to 0 (the - ## beginning of the list) - var first = first - if first == -1: - first = self.length - 1 - result = newArrayList[T]() - for i in countdown(first, last): - result.append(self[i]) - - -proc extend*[T](self: ptr ArrayList[T], other: seq[T]) = - ## Iteratively calls self.append() with the elements - ## from a nim sequence - for elem in other: - self.append(elem) - - -proc extend*[T](self: ptr ArrayList[T], other: ptr ArrayList[T]) = - ## Iteratively calls self.append() with the elements - ## from another ArrayList - for elem in other: - self.append(elem) - - -proc `$`*[T](self: ptr ArrayList[T]): string = - ## Returns a string representation - ## of self - result = "[" - if self.length > 0: - for i in 0..self.length - 1: - result = result & $self.container[i] - if i < self.length - 1: - result = result & ", " - result = result & "]" - - -proc getIter*[T](self: ptr ArrayList[T]): Iterator = - ## Returns the iterator object of the - ## arraylist - result = allocate(ArrayListIterator, ) # TODO \ No newline at end of file diff --git a/src/frontend/types/base.nim b/src/frontend/types/base.nim deleted file mode 100644 index 9bb9988..0000000 --- a/src/frontend/types/base.nim +++ /dev/null @@ -1,60 +0,0 @@ -# Copyright 2021 Mattia Giambirtone & All Contributors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -import ../../memory/allocator - - -type - ObjectType* {.pure.} = enum - ## All the possible object types - String, Exception, Function, - Class, Module, BaseObject, - Native, Integer, Float, - Bool, NotANumber, Infinity, - Nil, List, Dict, Set, Tuple - Obj* = object of RootObj - ## The base object for all - ## JAPL types. Every object - ## in JAPL implicitly inherits - ## from this base type - kind*: ObjectType - hashValue*: uint64 - - -## Object constructors and allocators - -proc allocateObject*(size: int, kind: ObjectType): ptr Obj = - ## Wrapper around memory.reallocate to create a new generic JAPL object - result = cast[ptr Obj](reallocate(nil, 0, size)) - result.kind = kind - - -template allocateObj*(kind: untyped, objType: ObjectType): untyped = - ## Wrapper around allocateObject to cast a generic object - ## to a more specific type - cast[ptr kind](allocateObject(sizeof kind, objType)) - - -proc newObj*(): ptr Obj = - ## Allocates a generic JAPL object - result = allocateObj(Obj, ObjectType.BaseObject) - - -proc asObj*(self: ptr Obj): ptr Obj = - ## Casts a specific JAPL object into a generic - ## pointer to Obj - result = cast[ptr Obj](self) - - diff --git a/src/frontend/types/hashmap.nim b/src/frontend/types/hashmap.nim deleted file mode 100644 index a34332c..0000000 --- a/src/frontend/types/hashmap.nim +++ /dev/null @@ -1,164 +0,0 @@ -# Copyright 2021 Mattia Giambirtone & All Contributors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -import ../../memory/allocator -import ../../config -import base -import iterable - - -type - Entry = object - key: ptr Obj - value: ptr Obj - tombstone: bool - HashMap* = object of Iterable - entries: ptr UncheckedArray[ptr Entry] - actual_length: int - - -proc newHashMap*(): ptr HashMap = - result = allocateObj(HashMap, ObjectType.Dict) - result.actual_length = 0 - result.entries = nil - result.capacity = 0 - result.length = 0 - - -proc freeHashMap*(self: ptr HashMap) = - discard freeArray(UncheckedArray[ptr Entry], self.entries, self.capacity) - self.length = 0 - self.actual_length = 0 - self.capacity = 0 - self.entries = nil - - -proc findEntry(self: ptr UncheckedArray[ptr Entry], key: ptr Obj, capacity: int): ptr Entry = - var capacity = uint64(capacity) - var idx = uint64(key.hash()) mod capacity - while true: - result = self[idx] - if system.`==`(result.key, nil): - break - elif result.tombstone: - if result.key == key: - break - elif result.key == key: - break - idx = (idx + 1) mod capacity - - -proc adjustCapacity(self: ptr HashMap) = - var newCapacity = growCapacity(self.capacity) - var entries = allocate(UncheckedArray[ptr Entry], Entry, newCapacity) - var oldEntry: ptr Entry - var newEntry: ptr Entry - self.length = 0 - for x in countup(0, newCapacity - 1): - entries[x] = allocate(Entry, Entry, 1) - entries[x].tombstone = false - entries[x].key = nil - entries[x].value = nil - for x in countup(0, self.capacity - 1): - oldEntry = self.entries[x] - if not system.`==`(oldEntry.key, nil): - newEntry = entries.findEntry(oldEntry.key, newCapacity) - newEntry.key = oldEntry.key - newEntry.value = oldEntry.value - self.length += 1 - discard freeArray(UncheckedArray[ptr Entry], self.entries, self.capacity) - self.entries = entries - self.capacity = newCapacity - - -proc setEntry(self: ptr HashMap, key: ptr Obj, value: ptr Obj): bool = - if float64(self.length + 1) >= float64(self.capacity) * MAP_LOAD_FACTOR: - self.adjustCapacity() - var entry = findEntry(self.entries, key, self.capacity) - result = system.`==`(entry.key, nil) - if result: - self.actual_length += 1 - self.length += 1 - entry.key = key - entry.value = value - entry.tombstone = false - - -proc `[]`*(self: ptr HashMap, key: ptr Obj): ptr Obj = - var entry = findEntry(self.entries, key, self.capacity) - if system.`==`(entry.key, nil) or entry.tombstone: - raise newException(KeyError, "Key not found: " & $key) - result = entry.value - - -proc `[]=`*(self: ptr HashMap, key: ptr Obj, value: ptr Obj) = - discard self.setEntry(key, value) - - -proc len*(self: ptr HashMap): int = - result = self.actual_length - - -proc del*(self: ptr HashMap, key: ptr Obj) = - if self.len() == 0: - raise newException(KeyError, "delete from empty hashmap") - var entry = findEntry(self.entries, key, self.capacity) - if not system.`==`(entry.key, nil): - self.actual_length -= 1 - entry.tombstone = true - else: - raise newException(KeyError, "Key not found: " & $key) - - -proc contains*(self: ptr HashMap, key: ptr Obj): bool = - let entry = findEntry(self.entries, key, self.capacity) - if not system.`==`(entry.key, nil) and not entry.tombstone: - result = true - else: - result = false - - -iterator keys*(self: ptr HashMap): ptr Obj = - var entry: ptr Entry - for i in countup(0, self.capacity - 1): - entry = self.entries[i] - if not system.`==`(entry.key, nil) and not entry.tombstone: - yield entry.key - - -iterator values*(self: ptr HashMap): ptr Obj = - for key in self.keys(): - yield self[key] - - -iterator pairs*(self: ptr HashMap): tuple[key: ptr Obj, val: ptr Obj] = - for key in self.keys(): - yield (key: key, val: self[key]) - - -iterator items*(self: ptr HashMap): ptr Obj = - for k in self.keys(): - yield k - - -proc `$`*(self: ptr HashMap): string = - var i = 0 - result &= "{" - for key, value in self.pairs(): - result &= $key & ": " & $value - if i < self.len() - 1: - result &= ", " - i += 1 - result &= "}" \ No newline at end of file diff --git a/src/frontend/types/iterable.nim b/src/frontend/types/iterable.nim deleted file mode 100644 index c236428..0000000 --- a/src/frontend/types/iterable.nim +++ /dev/null @@ -1,44 +0,0 @@ -# Copyright 2021 Mattia Giambirtone & All Contributors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# Implementation of iterable types and iterators in JAPL - -import base - - -type - Iterable* = object of Obj - ## Defines the standard interface - ## for iterable types in JAPL - length*: int - capacity*: int - Iterator* = object of Iterable - ## This object drives iteration - ## for every iterable type in JAPL except - ## generators - iterable*: ptr Obj - iterCount*: int - - -proc getIter*(self: Iterable): ptr Iterator = - ## Returns the iterator object of an - ## iterable, which drives foreach - ## loops - return nil - - -proc next*(self: Iterator): ptr Obj = - ## Returns the next element from - ## the iterator or nil if the - ## iterator has been consumed - return nil \ No newline at end of file