diff --git a/src/backend/vm.nim b/src/backend/vm.nim index e2a209e..31fab81 100644 --- a/src/backend/vm.nim +++ b/src/backend/vm.nim @@ -86,6 +86,13 @@ proc pop(self: PeonVM): PeonObject = return self.stack[self.sp] +proc peek(self: PeonVM): PeonObject = + ## Returns the element at the top + ## of the stack without consuming + ## it + return self.stack[self.sp] + + proc readByte(self: PeonVM): uint8 = ## Reads a single byte from the ## bytecode and returns it as an @@ -99,7 +106,7 @@ proc readShort(self: PeonVM): uint16 = ## bytecode and returns them ## as an unsigned 16 bit ## integer - var arr: array[2, uint8] + var arr: array[2, uint8] = [self.readByte(), self.readByte()] copyMem(result.addr, unsafeAddr(arr), sizeof(arr)) @@ -110,29 +117,28 @@ proc readLong(self: PeonVM): uint32 = ## integer. Note however that ## the boundary is capped at ## 24 bits instead of 32 - var arr: array[3, uint8] + var arr: array[3, uint8] = [self.readByte(), self.readByte(), self.readByte()] copyMem(result.addr, unsafeAddr(arr), sizeof(arr)) -proc readLongLong(self: PeonVM): uint64 = - ## Reads 4 bytes from the - ## bytecode and returns them - ## as an unsigned 64 bit - ## integer - var arr: array[4, uint8] - copyMem(result.addr, unsafeAddr(arr), sizeof(arr)) - - -proc readInt64(self: PeonVM): PeonObject = +proc readInt64(self: PeonVM, idx: int): PeonObject = ## Reads a constant from the ## chunk's constant table and ## returns a Peon object. Assumes - ## the constant's type is an Int64 - var arr = [self.readByte(), self.readByte(), self.readByte()] - var idx: int - copyMem(idx.addr, arr.addr, sizeof(arr)) - # TODO - # result = PeonObject() + ## the constant is an Int64 + var arr = [self.chunk.byteConsts[idx], self.chunk.byteConsts[idx + 1], self.chunk.byteConsts[idx + 2], self.chunk.byteConsts[idx + 3]] + result = PeonObject(kind: Int64) + copyMem(result.long.addr, arr.addr, sizeof(arr)) + + +proc readUInt64(self: PeonVM, idx: int): PeonObject = + ## Reads a constant from the + ## chunk's constant table and + ## returns a Peon object. Assumes + ## the constant is an UInt64 + var arr = [self.chunk.byteConsts[idx], self.chunk.byteConsts[idx + 1], self.chunk.byteConsts[idx + 2], self.chunk.byteConsts[idx + 3]] + result = PeonObject(kind: UInt64) + copyMem(result.uLong.addr, arr.addr, sizeof(arr)) proc dispatch*(self: PeonVM) = @@ -141,25 +147,66 @@ proc dispatch*(self: PeonVM) = while true: instruction = OpCode(self.readByte()) case instruction: - of OpCode.True: + of LoadTrue: self.push(self.getBool(true)) - of OpCode.False: + of LoadFalse: self.push(self.getBool(false)) - of OpCode.Nan: + of LoadNan: self.push(self.getNan()) - of OpCode.Nil: + of LoadNil: self.push(self.getNil()) - of OpCode.Inf: + of LoadInf: self.push(self.getInf(true)) + of LoadInt64: + self.push(self.readInt64(int(self.readLong()))) + of LoadUInt64: + self.push(self.readUInt64(int(self.readLong()))) of OpCode.Return: # TODO return - of OpCode.NoOp: + of NoOp: continue - of OpCode.Pop: + of Pop: discard self.pop() - of OpCode.Jump: + of Jump: + self.ip = int(self.readShort()) + of JumpForwards: self.ip += int(self.readShort()) + of JumpBackwards: + self.ip -= int(self.readShort()) + of JumpIfFalse: + if not self.peek().boolean: + self.ip += int(self.readShort()) + of JumpIfTrue: + if self.peek().boolean: + self.ip += int(self.readShort()) + of JumpIfFalsePop: + if not self.peek().boolean: + self.ip += int(self.readShort()) + discard self.pop() + of JumpIfFalseOrPop: + if not self.peek().boolean: + self.ip += int(self.readShort()) + else: + discard self.pop() + of LongJumpIfFalse: + if not self.peek().boolean: + self.ip += int(self.readLong()) + of LongJumpIfFalsePop: + if not self.peek().boolean: + self.ip += int(self.readLong()) + discard self.pop() + of LongJumpForwards: + self.ip += int(self.readLong()) + of LongJumpBackwards: + self.ip -= int(self.readLong()) + of LongJump: + self.ip = int(self.readLong()) + of LongJumpIfFalseOrPop: + if not self.peek().boolean: + self.ip += int(self.readLong()) + else: + discard self.pop() else: discard diff --git a/src/frontend/compiler.nim b/src/frontend/compiler.nim index 49439d9..84d3ad1 100644 --- a/src/frontend/compiler.nim +++ b/src/frontend/compiler.nim @@ -14,8 +14,6 @@ import meta/token import meta/ast import meta/errors -import meta/bytecode -import meta/typing import ../config import ../util/multibyte @@ -24,41 +22,73 @@ import strformat import algorithm import parseutils import strutils +import sequtils export ast -export bytecode export token export multibyte +type + TypeKind* = enum + ## An enumeration of compile-time + ## types + Int8, UInt8, Int16, UInt16, Int32, + UInt32, Int64, UInt64, Float32, Float64, + Char, Byte, String, Function, CustomType, + Nil, Nan, Bool, Inf + Type* = ref object + ## A wrapper around + ## compile-time types + node*: ASTNode + case kind*: TypeKind: + of Function: + returnType*: Type + else: + discard +# This way we don't have recursive dependency issues +import meta/bytecode +export bytecode + type Name = ref object ## A compile-time wrapper around ## statically resolved names - name: IdentExpr # Name of the identifier - owner: string # Owner of the identifier (module) - depth: int # Scope depth - isPrivate: bool # Is this name private? - isConst: bool # Is this a constant? - isLet: bool # Can this name's value be mutated? - valueType: Type # The name's type - codePos: int # The position in the bytecode - # where this name's StoreVar - # instruction was emitted. This - # is kept so that once we detect - # this name as a closed-over variable - # we can change the StoreVar into a StoreHeap + + # Name of the identifier + name: IdentExpr + # Owner of the identifier (module) + owner: string + # Scope depth + depth: int + # Is this name private? + isPrivate: bool + # Is this a constant? + isConst: bool + # Can this name's value be mutated? + isLet: bool + # The name's type + valueType: Type + # For variables, the position in the bytecode + # where its StoreVar instruction was emitted. + # For functions, this marks where the function's + # code begins + codePos: int Loop = object ## A "loop object" used ## by the compiler to emit ## appropriate jump offsets ## for continue and break ## statements - start: int # Position in the bytecode where the loop starts - depth: int # Scope depth where the loop is located - breakPos: seq[int] # List of positions into our bytecode where we need to - # patch jumps. Used for break statements + + # Position in the bytecode where the loop starts + start: int + # Scope depth where the loop is located + depth: int + # Absolute jump offsets into our bytecode that we need to + # patch. Used for break statements + breakPos: seq[int] Compiler* = ref object ## A wrapper around the Peon compiler's state @@ -192,22 +222,21 @@ proc emitBytes(self: Compiler, bytarr: array[2, uint8]) = self.emitBytes(bytarr[0], bytarr[1]) -proc emitBytes(self: Compiler, bytarr: array[3, uint8]) = +proc emitBytes(self: Compiler, bytarr: openarray[uint8]) = ## Handy helper method to write an array of 3 bytes into ## the current chunk, calling emitByte on each of its ## elements - self.emitBytes(bytarr[0], bytarr[1]) - self.emitByte(bytarr[2]) + for b in bytarr: + self.emitByte(b) - -proc makeConstant(self: Compiler, val: LiteralExpr): array[3, uint8] = +proc makeConstant(self: Compiler, val: Expression, kind: Type): array[3, uint8] = ## Adds a constant to the current chunk's constant table ## and returns its index as a 3-byte array of uint8s - result = self.chunk.addConstant(val) + result = self.chunk.addConstant(val, kind) -proc emitConstant(self: Compiler, obj: LiteralExpr) = +proc emitConstant(self: Compiler, obj: Expression, kind: Type) = ## Emits a LoadConstant instruction along ## with its operand case self.inferType(obj).kind: @@ -215,7 +244,7 @@ proc emitConstant(self: Compiler, obj: LiteralExpr) = self.emitByte(LoadInt64) else: discard # TODO - self.emitBytes(self.makeConstant(obj)) + self.emitBytes(self.makeConstant(obj, kind)) proc emitJump(self: Compiler, opcode: OpCode): int = @@ -348,6 +377,83 @@ proc detectClosureVariable(self: Compiler, name: IdentExpr, depth: int = self.sc self.chunk.code[entry.codePos + 3] = idx[2] +proc compareTypes(self: Compiler, a, b: Type): bool = + ## Compares two type objects + ## for equality (works with nil!) + if a == nil: + return b == nil + elif b == nil: + return a == nil + if a.kind != b.kind: + return false + case a.kind: + of Int8, UInt8, Int16, UInt16, Int32, + UInt32, Int64, UInt64, Float32, Float64, + Char, Byte, String, Nil, Nan, Bool, Inf: + return true + of Function: + let + a = FunDecl(a.node) + b = FunDecl(b.node) + if a.name.token.lexeme != b.name.token.lexeme: + return false + elif a.arguments.len() != b.arguments.len(): + return false + elif not self.compareTypes(self.inferType(a.returnType), self.inferType(b.returnType)): + return false + for (argA, argB) in zip(a.arguments, b.arguments): + if argA.mutable != argB.mutable: + return false + elif argA.isRef != argB.isRef: + return false + elif argA.isPtr != argB.isPtr: + return false + elif not self.compareTypes(self.inferType(argA.valueType), self.inferType(argB.valueType)): + return false + return true + else: + discard + + +proc toIntrinsic(name: string): Type = + ## Converts a string to an intrinsic + ## type if it is valid and returns nil + ## otherwise + if name in ["int", "int64", "i64"]: + return Type(kind: Int64) + elif name in ["uint64", "u64"]: + return Type(kind: UInt64) + elif name in ["int32", "i32"]: + return Type(kind: Int32) + elif name in ["uint32", "u32"]: + return Type(kind: UInt32) + elif name in ["int16", "i16"]: + return Type(kind: Int16) + elif name in ["uint16", "u16"]: + return Type(kind: UInt16) + elif name in ["int8", "i8"]: + return Type(kind: Int8) + elif name in ["uint8", "u8"]: + return Type(kind: UInt8) + elif name in ["f64", "float", "float64"]: + return Type(kind: Float64) + elif name in ["f32", "float32"]: + return Type(kind: Float32) + elif name == "byte": + return Type(kind: Byte) + elif name == "char": + return Type(kind: Char) + elif name == "nan": + return Type(kind: Nan) + elif name == "nil": + return Type(kind: Nil) + elif name == "inf": + return Type(kind: Inf) + elif name == "bool": + return Type(kind: Bool) + else: + return nil + proc inferType(self: Compiler, node: LiteralExpr): Type = ## Infers the type of a given literal expression @@ -357,9 +463,9 @@ proc inferType(self: Compiler, node: LiteralExpr): Type = if len(size) notin 1..2: self.error("invalid state: inferValueType -> invalid size specifier (This is an internal error and most likely a bug!)") if size.len() == 1: - return Type(kind: Int64) + return Type(node: node, kind: Int64) let typ = size[1].toIntrinsic() - if typ != nil: + if not self.compareTypes(typ, nil): return typ else: self.error(&"invalid type specifier '{size[1]}' for int") @@ -368,41 +474,61 @@ proc inferType(self: Compiler, node: LiteralExpr): Type = if len(size) notin 1..2: self.error("invalid state: inferValueType -> invalid size specifier (This is an internal error and most likely a bug!)") if size.len() == 1 or size[1] == "f64": - return Type(kind: Float64) + return Type(node: node, kind: Float64) let typ = size[1].toIntrinsic() - if typ != nil: + if not self.compareTypes(typ, nil): return typ else: self.error(&"invalid type specifier '{size[1]}' for float") of nilExpr: - return Type(kind: Nil) + return Type(node: node, kind: Nil) of trueExpr: - return Type(kind: Bool) + return Type(node: node, kind: Bool) of falseExpr: - return Type(kind: Bool) + return Type(node: node, kind: Bool) of nanExpr: - return Type(kind: TypeKind.Nan) + return Type(node: node, kind: TypeKind.Nan) of infExpr: - return Type(kind: TypeKind.Inf) + return Type(node: node, kind: TypeKind.Inf) else: discard # TODO +proc toIntrinsic(self: Compiler, typ: Expression): Type = + ## Gets an expression's + ## intrinsic type, if possible + if typ == nil: + return nil + case typ.kind: + of trueExpr, falseExpr, intExpr, floatExpr: + return typ.token.lexeme.toIntrinsic() + of identExpr: + let inferred = self.inferType(typ) + if inferred == nil: + return typ.token.lexeme.toIntrinsic() + return inferred + else: + discard + + proc inferType(self: Compiler, node: Expression): Type = ## Infers the type of a given expression and ## returns it case node.kind: of identExpr: - let name = self.resolve(IdentExpr(node)) + let node = IdentExpr(node) + let name = self.resolve(node) if name != nil: return name.valueType + else: + return node.name.lexeme.toIntrinsic() of unaryExpr: return self.inferType(UnaryExpr(node).a) of binaryExpr: let node = BinaryExpr(node) var a = self.inferType(node.a) var b = self.inferType(node.b) - if a != b: + if not self.compareTypes(a, b): return nil return a of {intExpr, hexExpr, binExpr, octExpr, @@ -412,26 +538,6 @@ proc inferType(self: Compiler, node: Expression): Type = return self.inferType(LiteralExpr(node)) else: discard # Unreachable - - -proc inferType(self: Compiler, node: Declaration): Type = - ## Infers the type of a given declaration if it's - ## not already defined and returns it - case node.kind: - of funDecl: - var node = FunDecl(node) - let resolved = self.resolve(node.name) - if resolved != nil: - return resolved.valueType - of NodeKind.varDecl: - var node = VarDecl(node) - let resolved = self.resolve(node.name) - if resolved != nil: - return resolved.valueType - else: - return self.inferType(node.value) - else: - return # Unreachable proc typeToStr(self: Compiler, typ: Type): string = @@ -450,58 +556,63 @@ proc typeToStr(self: Compiler, typ: Type): string = var node = FunDecl(typ.node) for i, argument in node.arguments: result &= &"{argument.name.token.lexeme}: {self.typeToStr(self.inferType(argument.name))}" - if i < node.arguments.len(): + if i < node.arguments.len() - 1: result &= ", " result &= ")" of lambdaExpr: var node = LambdaExpr(typ.node) for i, argument in node.arguments: result &= &"{argument.name.token.lexeme}: {argument.valueType}" - if i < node.arguments.len(): + if i < node.arguments.len() - 1: result &= ", " result &= ")" else: discard # Unreachable + result &= &": {self.typeToStr(typ.returnType)}" else: discard + - -proc toIntrinsic(self: Compiler, typ: Expression): Type = - ## Gets an expression's - ## intrinsic type, if possible - if typ == nil: - return nil - case typ.kind: - of trueExpr, falseExpr, intExpr, floatExpr: - return typ.token.lexeme.toIntrinsic() - of identExpr: - let inferred = self.inferType(typ) - if inferred != nil: - return +proc inferType(self: Compiler, node: Declaration): Type = + ## Infers the type of a given declaration + ## and returns it + case node.kind: + of funDecl: + var node = FunDecl(node) + let resolved = self.resolve(node.name) + if resolved != nil: + return resolved.valueType + of NodeKind.varDecl: + var node = VarDecl(node) + let resolved = self.resolve(node.name) + if resolved != nil: + return resolved.valueType + else: + return self.inferType(node.value) else: - discard - - + return # Unreachable ## End of utility functions + proc literal(self: Compiler, node: ASTNode) = ## Emits instructions for literals such ## as singletons, strings, numbers and ## collections case node.kind: of trueExpr: - self.emitByte(OpCode.True) + self.emitByte(LoadTrue) of falseExpr: - self.emitByte(OpCode.False) + self.emitByte(LoadFalse) of nilExpr: - self.emitByte(OpCode.Nil) + self.emitByte(LoadNil) of infExpr: - self.emitByte(OpCode.Inf) + self.emitByte(LoadInf) of nanExpr: - self.emitByte(OpCode.Nan) + self.emitByte(LoadNan) of strExpr: - self.emitConstant(LiteralExpr(node)) + self.emitConstant(LiteralExpr(node), Type(kind: String)) + # TODO: Take size specifier into account! of intExpr: var x: int var y = IntExpr(node) @@ -509,7 +620,7 @@ proc literal(self: Compiler, node: ASTNode) = discard parseInt(y.literal.lexeme, x) except ValueError: self.error("integer value out of range") - self.emitConstant(y) + self.emitConstant(y, Type(kind: Int64)) of hexExpr: var x: int var y = HexExpr(node) @@ -517,9 +628,12 @@ proc literal(self: Compiler, node: ASTNode) = discard parseHex(y.literal.lexeme, x) except ValueError: self.error("integer value out of range") - self.emitConstant(newIntExpr(Token(lexeme: $x, line: y.token.line, - pos: (start: y.token.pos.start, stop: y.token.pos.start + - len($x))))) + let node = newIntExpr(Token(lexeme: $x, line: y.token.line, + pos: (start: y.token.pos.start, + stop: y.token.pos.start + len($x)) + ) + ) + self.emitConstant(node, Type(kind: Int64)) of binExpr: var x: int var y = BinExpr(node) @@ -527,9 +641,12 @@ proc literal(self: Compiler, node: ASTNode) = discard parseBin(y.literal.lexeme, x) except ValueError: self.error("integer value out of range") - self.emitConstant(newIntExpr(Token(lexeme: $x, line: y.token.line, - pos: (start: y.token.pos.start, stop: y.token.pos.start + - len($x))))) + let node = newIntExpr(Token(lexeme: $x, line: y.token.line, + pos: (start: y.token.pos.start, + stop: y.token.pos.start + len($x)) + ) + ) + self.emitConstant(node, Type(kind: Int64)) of octExpr: var x: int var y = OctExpr(node) @@ -537,9 +654,12 @@ proc literal(self: Compiler, node: ASTNode) = discard parseOct(y.literal.lexeme, x) except ValueError: self.error("integer value out of range") - self.emitConstant(newIntExpr(Token(lexeme: $x, line: y.token.line, - pos: (start: y.token.pos.start, stop: y.token.pos.start + - len($x))))) + let node = newIntExpr(Token(lexeme: $x, line: y.token.line, + pos: (start: y.token.pos.start, + stop: y.token.pos.start + len($x)) + ) + ) + self.emitConstant(node, Type(kind: Int64)) of floatExpr: var x: float var y = FloatExpr(node) @@ -547,7 +667,7 @@ proc literal(self: Compiler, node: ASTNode) = discard parseFloat(y.literal.lexeme, x) except ValueError: self.error("floating point value out of range") - self.emitConstant(y) + self.emitConstant(y, Type(kind: Float64)) of awaitExpr: var y = AwaitExpr(node) self.expression(y.expression) @@ -557,20 +677,11 @@ proc literal(self: Compiler, node: ASTNode) = proc unary(self: Compiler, node: UnaryExpr) = - ## Compiles unary expressions such as decimal or - ## bitwise negation + ## Compiles unary expressions such as decimal + ## and bitwise negation self.expression(node.a) # Pushes the operand onto the stack - case node.operator.kind: - of Minus: - self.emitByte(NoOp) - of Plus: - self.emitByte(NoOp) - of TokenType.LogicalNot: - self.emitByte(NoOp) - of Tilde: - self.emitByte(NoOp) - else: - self.error(&"invalid AST node of kind {node.kind} at unary(): {node} (This is an internal error and most likely a bug!)") + # TODO: Find implementation of + # the given operator and call it proc binary(self: Compiler, node: BinaryExpr) = @@ -583,70 +694,7 @@ proc binary(self: Compiler, node: BinaryExpr) = # TODO: Find implementation of # the given operator and call it case node.operator.kind: - of Plus: - # a + b - self.emitByte(NoOp) - of Minus: - # a - b - self.emitByte(NoOp) - of Star: - # a * b - self.emitByte(NoOp) - of DoubleStar: - # a ** b - self.emitByte(NoOp) - of Percentage: - # a % b - self.emitByte(NoOp) - of FloorDiv: - # a // b - self.emitByte(NoOp) - of Slash: - # a / b - self.emitByte(NoOp) - of Ampersand: - # a & b - self.emitByte(NoOp) - of Caret: - # a ^ b - self.emitByte(NoOp) - of Pipe: - # a | b - self.emitByte(NoOp) - of Is: - # a is b - self.emitByte(NoOp) - of IsNot: - # a isnot b - self.emitByte(NoOp) - of Of: - # a of b - self.emitByte(NoOp) - of As: - # a as b - self.emitByte(NoOp) - of RightShift: - # a >> b - self.emitByte(NoOp) - of LeftShift: - # a << b - self.emitByte(NoOp) - of LessThan: - # a < b - self.emitByte(NoOp) - of GreaterThan: - # a > b - self.emitByte(NoOp) - of DoubleEqual: - # a == b - self.emitByte(NoOp) - of LessOrEqual: - # a <= b - self.emitByte(NoOp) - of GreaterOrEqual: - # a >= b - self.emitByte(NoOp) - of LogicalAnd: + of NoMatch: # a and b self.expression(node.a) var jump: int @@ -657,7 +705,7 @@ proc binary(self: Compiler, node: BinaryExpr) = self.emitByte(Pop) self.expression(node.b) self.patchJump(jump) - of LogicalOr: + of EndOfFile: # a or b self.expression(node.a) let jump = self.emitJump(JumpIfTrue) @@ -691,14 +739,17 @@ proc declareName(self: Compiler, node: Declaration) = var node = FunDecl(node) # Declares the function's name in the # current scope but no StoreVar is emitted - # because a function's name is only useful - # at compile time + # because the name is only useful at compile time. + # TODO: Maybe emit some optional debugging + # metadata to let the VM know where a function's + # code begins and ends (similar to what gcc does with + # CFI in object files) to build stack traces self.names.add(Name(depth: self.scopeDepth, isPrivate: node.isPrivate, isConst: false, owner: self.currentModule, - valueType: Type(kind: Function, node: node), - codePos: -1, + valueType: Type(kind: Function, node: node, returnType: self.inferType(node.returnType)), + codePos: self.chunk.code.len(), name: node.name, isLet: false)) for argument in node.arguments: @@ -709,12 +760,13 @@ proc declareName(self: Compiler, node: Declaration) = owner: self.currentModule, isConst: false, name: argument.name, - valueType: self.inferType(argument.name), + valueType: nil, codePos: self.chunk.code.len(), isLet: false)) + self.names[^1].valueType = self.inferType(argument.valueType) + self.names[^1].valueType.node = argument.name self.emitByte(StoreVar) self.emitBytes(self.names.high().toTriple()) - # TODO: Default arguments and unpacking else: discard # Unreachable @@ -729,7 +781,7 @@ proc identifier(self: Compiler, node: IdentExpr) = # no matter the scope depth. If optimizations are enabled, the compiler # will reuse the same constant every time it is referenced instead of # allocating a new one each time - self.emitConstant(node) + self.emitConstant(node, self.inferType(node)) else: self.detectClosureVariable(s.name) let t = self.getStackPos(node) @@ -750,6 +802,36 @@ proc identifier(self: Compiler, node: IdentExpr) = self.emitBytes(self.closedOver.high().toTriple()) +proc findImpl(self: Compiler, node: FunDecl): seq[Name] = + ## Looks for functions matching the given declaration + ## in the code that has been compiled so far. + ## Returns a list of each matching name object + for obj in reversed(self.names): + # Scopes are indexed backwards! + case obj.valueType.kind: + of Function: + if self.compareTypes(obj.valueType, self.inferType(node)): + result.add(obj) + else: + continue + + +proc findByName(self: Compiler, name: string): seq[Name] = + ## Looks for objects that have been already declared + ## with the given name + for obj in reversed(self.names): + if obj.name.token.lexeme == name: + result.add(obj) + + +proc findByType(self: Compiler, name: string, kind: Type): seq[Name] = + ## Looks for objects that have already been declared + ## with the given name and type + for obj in self.findByName(name): + if self.compareTypes(obj.valueType, kind): + result.add(obj) + + proc assignment(self: Compiler, node: ASTNode) = ## Compiles assignment expressions case node.kind: @@ -760,42 +842,12 @@ proc assignment(self: Compiler, node: ASTNode) = if r == nil: self.error(&"assignment to undeclared name '{name.token.lexeme}'") elif r.isConst: - self.error(&"cannot assign to '{name.token.lexeme}'") + self.error(&"cannot assign to '{name.token.lexeme}' (constant)") elif r.isLet: self.error(&"cannot reassign '{name.token.lexeme}'") self.expression(node.value) let t = self.getStackPos(name) let index = t.pos - case node.token.kind: - of InplaceAdd: - self.emitByte(NoOp) - of InplaceSub: - self.emitByte(NoOp) - of InplaceDiv: - self.emitByte(NoOp) - of InplaceMul: - self.emitByte(NoOp) - of InplacePow: - self.emitByte(NoOp) - of InplaceFloorDiv: - self.emitByte(NoOp) - of InplaceMod: - self.emitByte(NoOp) - of InplaceAnd: - self.emitByte(NoOp) - of InplaceXor: - self.emitByte(NoOp) - of InplaceRightShift: - self.emitByte(NoOp) - of InplaceLeftShift: - self.emitByte(NoOp) - else: - discard # Unreachable - # In-place operators just change - # what values is set to a given - # offset in a dynamic array, so we only - # need to perform the operation as usual - # and then store it again if index != -1: if not t.closedOver: self.emitByte(StoreVar) @@ -805,7 +857,10 @@ proc assignment(self: Compiler, node: ASTNode) = else: self.error(&"reference to undeclared name '{node.token.lexeme}'") of setItemExpr: - let typ = self.inferType(SetItemExpr(node)) + let node = SetItemExpr(node) + let typ = self.inferType(node) + if typ == nil: + self.error(&"cannot determine the type of '{node.name.token.lexeme}'") # TODO else: self.error(&"invalid AST node of kind {node.kind} at assignment(): {node} (This is an internal error and most likely a bug)") @@ -904,10 +959,16 @@ proc emitLoop(self: Compiler, begin: int) = proc whileStmt(self: Compiler, node: WhileStmt) = - ## Compiles C-style while loops + ## Compiles C-style while loops and + ## desugared C-style for loops let start = self.chunk.code.len() self.expression(node.condition) - let jump = self.emitJump(JumpIfFalsePop) + var jump: int + if self.enableOptimizations: + jump = self.emitJump(JumpIfFalsePop) + else: + jump = self.emitJump(JumpIfFalse) + self.emitByte(Pop) self.statement(node.body) self.patchJump(jump) self.emitLoop(start) @@ -926,7 +987,7 @@ proc expression(self: Compiler, node: Expression) = # Note that for setItem and assign we don't convert # the node to its true type because that type information # would be lost in the call anyway. The differentiation - # happens in self.assignment + # happens in self.assignment() of setItemExpr, assignExpr: self.assignment(node) of identExpr: @@ -942,11 +1003,11 @@ proc expression(self: Compiler, node: Expression) = self.binary(BinaryExpr(node)) of intExpr, hexExpr, binExpr, octExpr, strExpr, falseExpr, trueExpr, infExpr, nanExpr, floatExpr, nilExpr: - # Since all of these AST nodes mostly share - # the same overall structure, and the kind - # discriminant is enough to tell one - # from the other, why bother with - # specialized cases when one is enough? + # Since all of these AST nodes share the + # same overall structure and the kind + # field is enough to tell one from the + # other, why bother with specialized + # cases when one is enough? self.literal(node) else: self.error(&"invalid AST node of kind {node.kind} at expression(): {node} (This is an internal error and most likely a bug)") @@ -984,12 +1045,10 @@ proc returnStmt(self: Compiler, node: ReturnStmt) = let typ = self.inferType(self.currentFunction) if returnType == nil and self.currentFunction.returnType != nil: self.error(&"expected return value of type '{self.currentFunction.returnType.token.lexeme}', but expression has no type") - elif self.currentFunction.returnType == nil: - if node.value.kind != nilExpr: - self.error("non-nil return value is not allowed in functions without an explicit return type") - else: - if returnType != typ: - self.error(&"expected return value of type '{self.typeToStr(typ)}', got '{self.typeToStr(returnType)}' instead") + elif self.currentFunction.returnType == nil and node.value.kind != nilExpr: + self.error("non-nil return value is not allowed in functions without an explicit return type") + elif not self.compareTypes(returnType, typ.returnType): + self.error(&"expected return value of type '{self.typeToStr(typ.returnType)}', got '{self.typeToStr(returnType)}' instead") self.expression(node.value) self.emitByte(OpCode.Return) @@ -1013,6 +1072,8 @@ proc continueStmt(self: Compiler, node: ContinueStmt) = self.emitByte(Jump) self.emitBytes(self.currentLoop.start.toDouble()) else: + if self.currentLoop.start > 16777215: + self.error("too much code to jump over in continue statement") self.emitByte(LongJump) self.emitBytes(self.currentLoop.start.toTriple()) @@ -1074,7 +1135,7 @@ proc statement(self: Compiler, node: Statement) = ## while loops! let loop = self.currentLoop self.currentLoop = Loop(start: self.chunk.code.len(), - depth: self.scopeDepth, breakPos: @[]) + depth: self.scopeDepth, breakPos: @[]) self.whileStmt(WhileStmt(node)) self.patchBreaks() self.currentLoop = loop @@ -1108,14 +1169,25 @@ proc varDecl(self: Compiler, node: VarDecl) = proc funDecl(self: Compiler, node: FunDecl) = ## Compiles function declarations - + self.declareName(node) + if node.body != nil: + let fnType = self.inferType(node) + let impl = self.findByType(node.name.token.lexeme, fnType) + if impl.len() > 1: + # Oh-oh! We found more than one implementation of + # the same function! Error! + var msg = &"multiple matching implementations of '{node.name.token.lexeme}' found:\n" + for fn in reversed(impl): + var node = Declaration(fn.valueType.node) + discard self.typeToStr(fn.valueType) + msg &= &"- '{node.name.lexeme}' at line {node.token.line} of type {self.typeToStr(fn.valueType)}\n" + self.error(msg) # We store the current function var function = self.currentFunction self.currentFunction = node # A function's code is just compiled linearly # and then jumped over let jmp = self.emitJump(JumpForwards) - self.declareName(node) # Since the deferred array is a linear # sequence of instructions and we want @@ -1139,11 +1211,11 @@ proc funDecl(self: Compiler, node: FunDecl) = # are resolved properly). There's a need for a bit # of boilerplate code to make closures work, but # that's about it - self.emitBytes(OpCode.Nil, OpCode.Return) + self.emitBytes(LoadNil, OpCode.Return) # Currently defer is not functional so we # just pop the instructions - for i in countup(deferStart, self.deferred.len(), 1): + for i in countup(deferStart, self.deferred.len() - 1, 1): self.deferred.delete(i) self.patchJump(jmp) @@ -1182,4 +1254,4 @@ proc compile*(self: Compiler, ast: seq[ASTNode], file: string): Chunk = self.emitByte(OpCode.Return) # Exits the VM's main loop when used at the global scope result = self.chunk if self.ast.len() > 0 and self.scopeDepth != -1: - self.error(&"invalid state: invalid scopeDepth value (expected -1, got {self.scopeDepth}), did you forget to call endScope/beginScope?") + self.error(&"invalid state: invalid scopeDepth value (expected -1, got {self.scopeDepth}), did you forget to call endScope/beginScope?") \ No newline at end of file diff --git a/src/frontend/lexer.nim b/src/frontend/lexer.nim index c690019..dd2f39f 100644 --- a/src/frontend/lexer.nim +++ b/src/frontend/lexer.nim @@ -583,7 +583,7 @@ proc next(self: Lexer) = # Keywords and identifiers self.parseIdentifier() elif self.match("#"): - # Inline comments + # Inline comments, pragmas, etc. while not (self.check("\n") or self.done()): discard self.step() self.createToken(Comment) @@ -606,10 +606,10 @@ proc next(self: Lexer) = self.tokens.add(self.getToken(symbol)) return dec(n) - # None of our conditions matched: we don't know - # what's sitting in front of us, but it definitely - # isn't something we can parse, so it's an error - self.error("invalid syntax") + # We just assume what we have in front of us + # is a symbol + discard self.step() + self.createToken(Symbol) proc lex*(self: Lexer, source, file: string): seq[Token] = diff --git a/src/frontend/meta/ast.nim b/src/frontend/meta/ast.nim index 6501b3e..edae12e 100644 --- a/src/frontend/meta/ast.nim +++ b/src/frontend/meta/ast.nim @@ -127,7 +127,7 @@ type NanExpr* = ref object of LiteralExpr InfExpr* = ref object of LiteralExpr - IdentExpr* = ref object of LiteralExpr + IdentExpr* = ref object of Expression name*: Token GroupingExpr* = ref object of Expression @@ -169,6 +169,7 @@ type defaults*: seq[Expression] isGenerator*: bool isAsync*: bool + isPure*: bool returnType*: Expression SliceExpr* = ref object of Expression @@ -207,7 +208,7 @@ type TryStmt* = ref object of Statement body*: Statement - handlers*: seq[tuple[body: Statement, exc: IdentExpr, name: IdentExpr]] + handlers*: seq[tuple[body: Statement, exc: IdentExpr]] finallyClause*: Statement elseClause*: Statement @@ -249,6 +250,7 @@ type isAsync*: bool isGenerator*: bool isPrivate*: bool + isPure*: bool returnType*: Expression @@ -333,7 +335,6 @@ proc newIdentExpr*(name: Token): IdentExpr = result = IdentExpr(kind: identExpr) result.name = name result.token = name - result.literal = name proc newGroupingExpr*(expression: Expression, token: Token): GroupingExpr = @@ -352,6 +353,7 @@ proc newLambdaExpr*(arguments: seq[tuple[name: IdentExpr, valueType: Expression, result.isAsync = isAsync result.token = token result.returnType = returnType + result.isPure = false proc newGetItemExpr*(obj: Expression, name: IdentExpr, token: Token): GetItemExpr = @@ -462,7 +464,7 @@ proc newRaiseStmt*(exception: Expression, token: Token): RaiseStmt = result.token = token -proc newTryStmt*(body: Statement, handlers: seq[tuple[body: Statement, exc: IdentExpr, name: IdentExpr]], +proc newTryStmt*(body: Statement, handlers: seq[tuple[body: Statement, exc: IdentExpr]], finallyClause: Statement, elseClause: Statement, token: Token): TryStmt = result = TryStmt(kind: tryStmt) @@ -549,6 +551,7 @@ proc newFunDecl*(name: IdentExpr, arguments: seq[tuple[name: IdentExpr, valueTyp result.token = token result.pragmas = pragmas result.returnType = returnType + result.isPure = false @@ -659,3 +662,6 @@ proc `$`*(self: ASTNode): string = result &= ")" else: discard + + +proc `==`*(self, other: IdentExpr): bool {.inline.} = self.token == other.token \ No newline at end of file diff --git a/src/frontend/meta/bytecode.nim b/src/frontend/meta/bytecode.nim index 09d02dc..72e9b46 100644 --- a/src/frontend/meta/bytecode.nim +++ b/src/frontend/meta/bytecode.nim @@ -14,13 +14,13 @@ ## Low level bytecode implementation details import ast -import typing -import ../../util/multibyte import errors import strutils import strformat +import ../../util/multibyte +import ../compiler export ast @@ -45,7 +45,7 @@ type ## are 3 and 4" ## This is more efficient than using the naive approach, which would encode ## the same line number multiple times and waste considerable amounts of space. - consts*: seq[LiteralExpr] + consts*: seq[Expression] byteConsts*: seq[uint8] code*: seq[uint8] lines*: seq[int] @@ -79,11 +79,11 @@ type LoadFloat32, LoadString, ## Singleton opcodes (each of them pushes a constant singleton on the stack) - Nil, - True, - False, - Nan, - Inf, + LoadNil, + LoadTrue, + LoadFalse, + LoadNan, + LoadInf, ## Basic stack operations Pop, # Pops an element off the stack and discards it Push, # Pushes x onto the stack @@ -98,10 +98,10 @@ type Jump, # Absolute, unconditional jump into the bytecode JumpForwards, # Relative, unconditional, positive jump in the bytecode JumpBackwards, # Relative, unconditional, negative jump in the bytecode - JumpIfFalse, # Jumps to an absolute index in the bytecode if x is true - JumpIfTrue, # Jumps to an absolute index in the bytecode if x is false + JumpIfFalse, # Jumps to a relative index in the bytecode if x is false + JumpIfTrue, # Jumps to a relative index in the bytecode if x is true JumpIfFalsePop, # Like JumpIfFalse, but also pops off the stack (regardless of truthyness). Optimization for if statements - JumpIfFalseOrPop, # Jumps to an absolute index in the bytecode if x is false and pops it otherwise (used for logical and) + JumpIfFalseOrPop, # Jumps to an absolute index in the bytecode if x is false and pops otherwise (used for logical and) ## Long variants of jumps (they use a 24-bit operand instead of a 16-bit one) LongJump, LongJumpIfFalse, @@ -129,9 +129,9 @@ type # We group instructions by their operation/operand types for easier handling when debugging # Simple instructions encompass instructions that push onto/pop off the stack unconditionally (True, False, Pop, etc.) -const simpleInstructions* = {OpCode.Return, OpCode.Nil, - OpCode.True, OpCode.False, - OpCode.Nan, OpCode.Inf, +const simpleInstructions* = {OpCode.Return, LoadNil, + LoadTrue, LoadFalse, + LoadNan, LoadInf, Pop, OpCode.Raise, BeginTry, FinishTry, OpCode.Yield, OpCode.Await, @@ -220,7 +220,7 @@ proc getLine*(self: Chunk, idx: int): int = raise newException(IndexDefect, "index out of range") -proc findOrAddConstant(self: Chunk, constant: LiteralExpr): int = +proc findOrAddConstant(self: Chunk, constant: Expression, kind: Type): int = ## Small optimization function that reuses the same constant ## if it's already been written before (only if self.reuseConsts ## equals true) @@ -232,15 +232,13 @@ proc findOrAddConstant(self: Chunk, constant: LiteralExpr): int = if c.kind != constant.kind: continue if constant.isConst(): - if c.literal.lexeme == constant.literal.lexeme: + if LiteralExpr(c).literal.lexeme == LiteralExpr(constant).literal.lexeme: # This wouldn't work for stuff like 2e3 and 2000.0, but those # forms are collapsed in the compiler before being written # to the constants table return i elif constant.kind == identExpr: - var c = IdentExpr(c) - var constant = IdentExpr(constant) - if c.name.lexeme == constant.name.lexeme: + if IdentExpr(c).name.lexeme == IdentExpr(constant).name.lexeme: return i else: continue @@ -248,14 +246,15 @@ proc findOrAddConstant(self: Chunk, constant: LiteralExpr): int = result = self.consts.high() -proc addConstant*(self: Chunk, constant: LiteralExpr): array[3, uint8] = - ## Writes a constant to a chunk. Returns its index casted to a 3-byte - ## sequence (array). Constant indexes are reused if a constant is used - ## more than once and self.reuseConsts equals true +proc addConstant*(self: Chunk, constant: Expression, kind: Type): array[3, uint8] = + ## Writes a constant of the given type in the chunk's constant + ## table. Returns its index as an array of 3 unsigned 8 bit integers. + ## Constant indexes are reused if a constant is used more than once + ## and self.reuseConsts equals true if self.consts.high() == 16777215: # The constant index is a 24 bit unsigned integer, so that's as far # as we can index into the constant table (the same applies # to our stack by the way). Not that anyone's ever gonna hit this # limit in the real world, but you know, just in case raise newException(CompileError, "cannot encode more than 16777216 constants") - result = self.findOrAddConstant(constant).toTriple() + result = self.findOrAddConstant(constant, kind).toTriple() diff --git a/src/frontend/meta/token.nim b/src/frontend/meta/token.nim index 56380df..064ffd2 100644 --- a/src/frontend/meta/token.nim +++ b/src/frontend/meta/token.nim @@ -16,79 +16,74 @@ import strformat type + TokenType* {.pure.} = enum - ## Token types enumeration + ## Token types enumeration - # Booleans - True, False, + # Booleans + True, False, - # Other singleton types - Infinity, NotANumber, Nil + # Other singleton types + Infinity, NotANumber, Nil - # Control flow statements - If, Else, + # Control flow statements + If, Else, - # Looping statements - While, For, + # Looping statements + While, For, - # Keywords - Function, Break, Continue, - Var, Let, Const, Is, Return, - Coroutine, Generator, Import, - IsNot, Raise, Assert, Await, - Foreach, Yield, Of, Defer, - Try, Except, Finally, Type, - Operator, Case, Enum, From, - Emit, As, Ptr, Ref + # Keywords + Function, Break, Continue, + Var, Let, Const, Return, + Coroutine, Generator, Import, + Raise, Assert, Await, Foreach, + Yield, Defer, Try, Except, + Finally, Type, Operator, Case, + Enum, From, Ptr, Ref - # Literal types - Integer, Float, String, Identifier, - Binary, Octal, Hex, Char + # Literal types + Integer, Float, String, Identifier, + Binary, Octal, Hex, Char - # Brackets, parentheses, - # operators and others + # Brackets, parentheses, + # operators and others - LeftParen, RightParen, # () - LeftBrace, RightBrace, # {} - LeftBracket, RightBracket, # [] - Dot, Semicolon, Colon, Comma, # . ; : , - Plus, Minus, Slash, Star, # + - / * - Percentage, DoubleStar, # % ** - Caret, Pipe, Ampersand, Tilde, # ^ | & ~ - Equal, GreaterThan, LessThan, # = > < - LessOrEqual, GreaterOrEqual, # >= <= - NotEqual, RightShift, LeftShift, # != >> << - LogicalAnd, LogicalOr, LogicalNot, # and or not - InplaceAdd, InplaceSub, InplaceDiv, # += -= /= - InplaceMod, InplaceMul, InplaceXor, # %= *= ^= - InplaceAnd, InplaceOr, FloorDiv, # &= |= // - DoubleEqual, InplaceFloorDiv, InplacePow, # == //= **= - InplaceRightShift, InplaceLeftShift, # >>= <<= + LeftParen, RightParen, # () + LeftBrace, RightBrace, # {} + LeftBracket, RightBracket, # [] + Dot, Semicolon, Comma, # . ; , - # Miscellaneous + # Miscellaneous - EndOfFile, # Marks the end of the token stream - NoMatch, # Used internally by the symbol table - Comment, # Useful for documentation comments, pragmas, etc. - # These are not used at the moment but may be - # employed to enforce indentation or other neat - # stuff I haven't thought about yet - Whitespace, - Tab, + EndOfFile, # Marks the end of the token stream + NoMatch, # Used internally by the symbol table + Comment, # Useful for documentation comments, pragmas, etc. + Symbol, # A generic symbol + # These are not used at the moment but may be + # employed to enforce indentation or other neat + # stuff I haven't thought about yet + Whitespace, + Tab, Token* = ref object - ## A token object - kind*: TokenType # Type of the token - lexeme*: string # The lexeme associated to the token - line*: int # The line where the token appears - pos*: tuple[start, stop: int] # The absolute position in the source file - # (0-indexed and inclusive at the beginning) + ## A token object + kind*: TokenType # Type of the token + lexeme*: string # The lexeme associated to the token + line*: int # The line where the token appears + pos*: tuple[start, stop: int] # The absolute position in the source file + # (0-indexed and inclusive at the beginning) proc `$`*(self: Token): string = - if self != nil: - result = &"Token(kind={self.kind}, lexeme='{$(self.lexeme)}', line={self.line}, pos=({self.pos.start}, {self.pos.stop}))" - else: - result = "nil" + ## Strinfifies + if self != nil: + result = &"Token(kind={self.kind}, lexeme='{$(self.lexeme)}', line={self.line}, pos=({self.pos.start}, {self.pos.stop}))" + else: + result = "nil" + + +proc `==`*(self, other: Token): bool = + ## Returns self == other + return self.kind == other.kind and self.lexeme == other.lexeme \ No newline at end of file diff --git a/src/frontend/meta/typing.nim b/src/frontend/meta/typing.nim deleted file mode 100644 index 44292b3..0000000 --- a/src/frontend/meta/typing.nim +++ /dev/null @@ -1,96 +0,0 @@ -# Copyright 2022 Mattia Giambirtone & All Contributors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -## Peon's type system -import ast -export ast - - -type - TypeKind* = enum - ## An enumeration of compile-time - ## types - Int8, UInt8, Int16, UInt16, Int32, - UInt32, Int64, UInt64, Float32, Float64, - Char, Byte, String, Function, CustomType, - Nil, Nan, Bool, Inf - Type* = ref object - ## A wrapper around - ## compile-time types - node*: ASTNode - case kind*: TypeKind: - of Function: - returnType*: Type - else: - discard - - -proc `==`*(self, other: Type): bool = - ## Compares two type objects - ## for equality - if system.`==`(self, nil): - return system.`==`(other, nil) - elif system.`==`(other, nil): - return system.`==`(self, nil) - if self.kind != other.kind: - return false - case self.kind: - of {Int8, UInt8, Int16, UInt16, Int32, - UInt32, Int64, UInt64, Float32, Float64, - Char, Byte, String, Nil, Nan, Bool, Inf}: - return true - of Function: - discard # TODO - else: - discard - - -proc toIntrinsic*(name: string): Type = - ## Converts a string to an intrinsic - ## type if it is valid and returns nil - ## otherwise - if name in ["int", "int64", "i64"]: - return Type(kind: Int64) - elif name in ["uint64", "u64"]: - return Type(kind: UInt64) - elif name in ["int32", "i32"]: - return Type(kind: Int32) - elif name in ["uint32", "u32"]: - return Type(kind: UInt32) - elif name in ["int16", "i16"]: - return Type(kind: Int16) - elif name in ["uint16", "u16"]: - return Type(kind: UInt16) - elif name in ["int8", "i8"]: - return Type(kind: Int8) - elif name in ["uint8", "u8"]: - return Type(kind: UInt8) - elif name in ["f64", "float", "float64"]: - return Type(kind: Float64) - elif name in ["f32", "float32"]: - return Type(kind: Float32) - elif name == "byte": - return Type(kind: Byte) - elif name == "char": - return Type(kind: Char) - elif name == "nan": - return Type(kind: Nan) - elif name == "nil": - return Type(kind: Nil) - elif name == "inf": - return Type(kind: Inf) - elif name == "bool": - return Type(kind: Bool) - else: - return nil diff --git a/src/frontend/parser.nim b/src/frontend/parser.nim index 0bf933a..9cb773e 100644 --- a/src/frontend/parser.nim +++ b/src/frontend/parser.nim @@ -16,7 +16,7 @@ import strformat import strutils - +import tables import meta/token import meta/ast @@ -28,9 +28,29 @@ export token, ast, errors type - LoopContext = enum + LoopContext {.pure.} = enum Loop, None - + Precedence {.pure.} = enum + ## Operator precedence + ## clearly stolen from + ## nim + Arrow = 0, + Assign, + Or, + And, + Compare, + Addition, + Multiplication, + Power, + None # Used for stuff that isn't an operator + + OperatorTable = ref object + ## A table for storing and + ## handling the precedence + ## of operators + tokens: seq[string] + precedence: TableRef[Precedence, seq[string]] + Parser* = ref object ## A recursive-descent top-down ## parser implementation @@ -63,27 +83,69 @@ type currentFunction: Declaration # Stores the current scope depth (0 = global, > 0 local) scopeDepth: int - # We store user-defined operators for later use - operators: seq[string] + operators: OperatorTable -proc newParser*(): Parser = +proc newOperatorTable: OperatorTable = + ## Initializes a new OperatorTable + ## object + new(result) + result.tokens = @[] + result.precedence = newTable[Precedence, seq[string]]() + for prec in Precedence: + result.precedence[prec] = @[] + + +proc addOperator(self: OperatorTable, lexeme: string) = + ## Adds an operator to the table. Its precedence + ## is inferred from the operator's lexeme (the + ## criteria are similar to Nim's) + if lexeme in self.tokens: + return # We've already added it! + var prec = Precedence.high() + if lexeme.len() >= 2 and lexeme[^3..^1] in ["->", "~>", "=>"]: + prec = Arrow + elif lexeme.endsWith("=") and lexeme[0] notin {'<', '>', '!', '?', '~', '='}: + prec = Assign + elif lexeme[0] in {'$', } or lexeme == "**": + prec = Power + elif lexeme[0] in {'*', '%', '/', '\\'}: + prec = Multiplication + elif lexeme[0] in {'+', '-', '|', '~'}: + prec = Addition + elif lexeme[0] in {'<', '>', '=', '!'}: + prec = Compare + elif lexeme == "and": + prec = Precedence.And + elif lexeme == "or": + prec = Precedence.Or + self.tokens.add(lexeme) + self.precedence[prec].add(lexeme) + + +proc getPrecedence(self: OperatorTable, lexeme: string): Precedence = + ## Gets the precedence of a given operator + for (prec, operators) in self.precedence.pairs(): + if lexeme in operators: + return prec + + +proc newParser*: Parser = ## Initializes a new Parser object new(result) result.current = 0 result.file = "" result.tokens = @[] result.currentFunction = nil - result.currentLoop = None + result.currentLoop = LoopContext.None result.scopeDepth = 0 + result.operators = newOperatorTable() + # Public getters for improved error formatting proc getCurrent*(self: Parser): int {.inline.} = self.current -proc getCurrentToken*(self: Parser): Token = - if self.getCurrent() >= self.tokens.high() or self.getCurrent() - 1 < 0: - return self.tokens[^1] - else: - return self.tokens[self.current - 1] +proc getCurrentToken*(self: Parser): Token {.inline.} = (if self.getCurrent() >= self.tokens.high() or + self.getCurrent() - 1 < 0: self.tokens[^1] else: self.tokens[self.current - 1]) # Handy templates to make our life easier, thanks nim! template endOfFile: Token = Token(kind: EndOfFile, lexeme: "", line: -1) @@ -128,15 +190,26 @@ proc error(self: Parser, message: string) {.raises: [ParseError, ValueError].} = raise newException(ParseError, errorMessage) -proc check(self: Parser, kind: TokenType, distance: int = 0): bool = +# Why do we allow strings or enum members of TokenType? Well, it's simple: +# symbols like ":" and "=" are both valid operator names (therefore they are +# tokenized as symbols), but they are also used in a context where they are just +# separators (for example, the colon is used in type declarations). Since we can't +# tell at tokenization time which of the two contexts we're in, we just treat everything +# as a symbol and in the cases where we need a specific token we just match the string +# directly +proc check[T: TokenType or string](self: Parser, kind: T, distance: int = 0): bool = ## Checks if the given token at the given distance ## matches the expected kind and returns a boolean. ## The distance parameter is passed directly to ## self.peek() - self.peek(distance).kind == kind + when T is TokenType: + self.peek(distance).kind == kind + else: + when T is string: + self.peek(distance).lexeme == kind + - -proc check(self: Parser, kind: openarray[TokenType]): bool = +proc check[T: TokenType or string](self: Parser, kind: openarray[T]): bool = ## Calls self.check() in a loop with each entry of ## the given openarray of token kinds and returns ## at the first match. Note that this assumes @@ -148,17 +221,17 @@ proc check(self: Parser, kind: openarray[TokenType]): bool = return false -proc match(self: Parser, kind: TokenType): bool = +proc match[T: TokenType or string](self: Parser, kind: T): bool = ## Behaves like self.check(), except that when a token ## matches it is also consumed - if self.check(kind,): + if self.check(kind): discard self.step() result = true else: result = false -proc match(self: Parser, kind: openarray[TokenType]): bool = +proc match[T: TokenType or string](self: Parser, kind: openarray[T]): bool = ## Calls self.match() in a loop with each entry of ## the given openarray of token kinds and returns ## at the first match. Note that this assumes @@ -170,7 +243,7 @@ proc match(self: Parser, kind: openarray[TokenType]): bool = result = false -proc expect(self: Parser, kind: TokenType, message: string = "") = +proc expect[T: TokenType or string](self: Parser, kind: T, message: string = "") = ## Behaves like self.match(), except that ## when a token doesn't match, an error ## is raised. If no error message is @@ -182,16 +255,16 @@ proc expect(self: Parser, kind: TokenType, message: string = "") = self.error(message) -proc expect(self: Parser, kinds: openarray[TokenType], message: string = "") = +proc expect[T: TokenType or string](self: Parser, kind: openarray[T], message: string = "") = ## Behaves like self.expect(), except that ## an error is raised only if none of the ## given token kinds matches - for kind in kinds: + for k in kind: if self.match(kind): return if message.len() == 0: self.error(&"""expecting any of the following tokens: {kinds.join(", ")}, but got {self.peek().kind} instead""") - + # Forward declarations proc expression(self: Parser): Expression @@ -200,6 +273,7 @@ proc statement(self: Parser): Statement proc varDecl(self: Parser, isLet: bool = false, isConst: bool = false): Declaration proc funDecl(self: Parser, isAsync: bool = false, isGenerator: bool = false, isLambda: bool = false, isOperator: bool = false): Declaration proc declaration(self: Parser): Declaration +# End of forward declarations proc primary(self: Parser): Expression = @@ -316,168 +390,116 @@ proc call(self: Parser): Expression = self.expect(Identifier, "expecting attribute name after '.'") result = newGetItemExpr(result, newIdentExpr(self.peek(-1)), self.peek(-1)) elif self.match(LeftBracket): - # Slicing such as a[1:2] + # Slicing such as a[1:2], which is then + # translated to `[]`(a, 1, 2) let tok = self.peek(-1) var ends: seq[Expression] = @[] while not self.check(RightBracket) and not self.done(): - if self.check(Colon): + if self.check(":"): ends.add(newNilExpr(Token(lexeme: "nil"))) discard self.step() else: ends.add(self.expression()) - discard self.match(Colon) + discard self.match(":") self.expect(RightBracket, "expecting ']'") result = newSliceExpr(result, ends, tok) else: break +## Operator parsing handlers proc unary(self: Parser): Expression = - ## Parses unary expressions - if self.match([Minus, Tilde, LogicalNot, Plus]): - result = newUnaryExpr(self.peek(-1), self.unary()) + if self.peek().lexeme in self.operators.tokens: + result = newUnaryExpr(self.step(), self.unary()) else: result = self.call() -proc customUnaryOperator(self: Parser): Expression = - ## Parses user-defined unary expressions - if self.peek().lexeme in self.operators: - discard self.step() - result = newUnaryExpr(self.peek(-1), self.customUnaryOperator()) - else: - result = self.unary() - - -proc pow(self: Parser): Expression = - ## Parses exponentiation expressions - result = self.customUnaryOperator() +proc parsePow(self: Parser): Expression = + result = self.unary() var operator: Token var right: Expression - while self.match(DoubleStar): - operator = self.peek(-1) - right = self.customUnaryOperator() - result = newBinaryExpr(result, operator, right) - - -proc mul(self: Parser): Expression = - ## Parses multiplication and division expressions - result = self.pow() - var operator: Token - var right: Expression - while self.match([Slash, Percentage, FloorDiv, Star]): - operator = self.peek(-1) - right = self.pow() - result = newBinaryExpr(result, operator, right) - - -proc add(self: Parser): Expression = - ## Parses addition and subtraction expressions - result = self.mul() - var operator: Token - var right: Expression - while self.match([Plus, Minus]): - operator = self.peek(-1) - right = self.mul() - result = newBinaryExpr(result, operator, right) - - -proc comparison(self: Parser): Expression = - ## Parses other comparison expressions - ## and some other operators - result = self.add() - var operator: Token - var right: Expression - while self.match([LessThan, GreaterThan, LessOrEqual, GreaterOrEqual, Is, Of, IsNot]): - operator = self.peek(-1) - right = self.add() - result = newBinaryExpr(result, operator, right) - - -proc equality(self: Parser): Expression = - ## Parses equality expressions - result = self.comparison() - var operator: Token - var right: Expression - while self.match([DoubleEqual, NotEqual]): - operator = self.peek(-1) - right = self.comparison() - result = newBinaryExpr(result, operator, right) - - -proc logicalAnd(self: Parser): Expression = - ## Parses logical and expressions - ## (a and b) - result = self.equality() - var operator: Token - var right: Expression - while self.match(LogicalAnd): - operator = self.peek(-1) - right = self.equality() - result = newBinaryExpr(result, operator, right) - - -proc logicalOr(self: Parser): Expression = - ## Parses logical or expressions - ## (a or b) - result = self.logicalAnd() - var operator: Token - var right: Expression - while self.match(LogicalOr): - operator = self.peek(-1) - right = self.logicalAnd() - result = newBinaryExpr(result, operator, right) - - -proc bitwiseAnd(self: Parser): Expression = - ## Parses a & b expressions - result = self.logicalOr() - var operator: Token - var right: Expression - while self.match(Pipe): - operator = self.peek(-1) - right = self.logicalOr() - result = newBinaryExpr(result, operator, right) - - -proc bitwiseOr(self: Parser): Expression = - ## Parses a | b expressions - result = self.bitwiseAnd() - var operator: Token - var right: Expression - while self.match(Ampersand): - operator = self.peek(-1) - right = self.bitwiseAnd() - result = newBinaryExpr(result, operator, right) - - -proc customBinaryOperator(self: Parser): Expression = - ## Parses user-defined binary operators - result = self.bitwiseOr() - var operator: Token - var right: Expression - while self.peek().lexeme in self.operators: + while self.operators.getPrecedence(self.peek().lexeme) == Power: operator = self.step() - right = self.bitwiseOr() + right = self.unary() result = newBinaryExpr(result, operator, right) -proc assignment(self: Parser): Expression = - ## Parses assignment, the highest-level - ## expression (including stuff like a.b = 1). - ## Slice assignments are also parsed here - result = self.customBinaryOperator() - if self.match([Equal, InplaceAdd, InplaceSub, InplaceDiv, InplaceMod, - InplacePow, InplaceMul, InplaceXor, InplaceAnd, InplaceOr, - InplaceFloorDiv, InplaceRightShift, InplaceLeftShift]): - let tok = self.peek(-1) +proc parseMul(self: Parser): Expression = + result = self.parsePow() + var operator: Token + var right: Expression + while self.operators.getPrecedence(self.peek().lexeme) == Multiplication: + operator = self.step() + right = self.parsePow() + result = newBinaryExpr(result, operator, right) + + +proc parseAdd(self: Parser): Expression = + result = self.parseMul() + var operator: Token + var right: Expression + while self.operators.getPrecedence(self.peek().lexeme) == Addition: + operator = self.step() + right = self.parseMul() + result = newBinaryExpr(result, operator, right) + + +proc parseCmp(self: Parser): Expression = + result = self.parseAdd() + var operator: Token + var right: Expression + while self.operators.getPrecedence(self.peek().lexeme) == Compare: + operator = self.step() + right = self.parseAdd() + result = newBinaryExpr(result, operator, right) + + +proc parseAnd(self: Parser): Expression = + result = self.parseCmp() + var operator: Token + var right: Expression + while self.operators.getPrecedence(self.peek().lexeme) == Precedence.And: + operator = self.step() + right = self.parseCmp() + result = newBinaryExpr(result, operator, right) + + +proc parseOr(self: Parser): Expression = + result = self.parseAnd() + var operator: Token + var right: Expression + while self.operators.getPrecedence(self.peek().lexeme) == Precedence.Or: + operator = self.step() + right = self.parseAnd() + result = newBinaryExpr(result, operator, right) + + +proc parseAssign(self: Parser): Expression = + result = self.parseOr() + if self.operators.getPrecedence(self.peek().lexeme) == Assign: + let tok = self.step() var value = self.expression() - if result.kind in {identExpr, sliceExpr}: - result = newAssignExpr(result, value, tok) - elif result.kind == getItemExpr: - result = newSetItemExpr(GetItemExpr(result).obj, GetItemExpr(result).name, value, tok) - else: - self.error("invalid assignment target") + case result.kind: + of identExpr, sliceExpr: + result = newAssignExpr(result, value, tok) + of getItemExpr: + result = newSetItemExpr(GetItemExpr(result).obj, GetItemExpr(result).name, value, tok) + else: + self.error("invalid assignment target") + + +proc parseArrow(self: Parser): Expression = + result = self.parseAssign() + var operator: Token + var right: Expression + while self.operators.getPrecedence(self.peek().lexeme) == Precedence.Or: + operator = self.step() + right = self.parseAssign() + result = newBinaryExpr(result, operator, right) + + +## End of operator parsing handlers proc assertStmt(self: Parser): Statement = @@ -602,7 +624,7 @@ proc forEachStmt(self: Parser): Statement = self.expect(LeftParen, "expecting '(' after 'foreach'") self.expect(Identifier) var identifier = newIdentExpr(self.peek(-1)) - self.expect(Colon) + self.expect(":") var expression = self.expression() self.expect(RightParen) var body = self.statement() @@ -628,25 +650,16 @@ proc tryStmt(self: Parser): Statement = ## Parses try/except/else/finally blocks let tok = self.peek(-1) var body = self.statement() - var handlers: seq[tuple[body: Statement, exc: IdentExpr, name: IdentExpr]] = @[] + var handlers: seq[tuple[body: Statement, exc: IdentExpr]] = @[] var finallyClause: Statement var elseClause: Statement - var asName: IdentExpr var excName: Expression var handlerBody: Statement while self.match(Except): excName = self.expression() if excName.kind == identExpr: handlerBody = self.statement() - handlers.add((body: handlerBody, exc: IdentExpr(excName), name: asName)) - asName = nil - elif excName.kind == binaryExpr and BinaryExpr(excName).operator.kind == As: - asName = IdentExpr(BinaryExpr(excName).b) - if BinaryExpr(excName).b.kind != identExpr: - self.error("expecting alias name after 'except ... as'") - elif BinaryExpr(excName).a.kind != identExpr: - self.error("expecting exception name") - excName = BinaryExpr(excName).a + handlers.add((body: handlerBody, exc: IdentExpr(excName))) else: excName = nil if self.match(Else): @@ -760,16 +773,16 @@ proc varDecl(self: Parser, isLet: bool = false, isConst: bool = false): Declarat var value: Expression self.expect(Identifier, &"expecting identifier after '{tok.lexeme}'") var name = newIdentExpr(self.peek(-1)) - let isPrivate = not self.match(Star) + let isPrivate = not self.match("*") self.checkDecl(isPrivate) var valueType: IdentExpr - if self.match(Colon): + if self.match(":"): # We don't enforce it here because # the compiler may be able to infer # the type later! self.expect(Identifier, "expecting type name after ':'") valueType = newIdentExpr(self.peek(-1)) - if self.match(Equal): + if self.match("="): value = self.expression() if isConst and not value.isConst(): self.error("constant initializer is not a constant") @@ -792,13 +805,16 @@ proc varDecl(self: Parser, isLet: bool = false, isConst: bool = false): Declarat proc parseDeclArguments(self: Parser, arguments: var seq[tuple[name: IdentExpr, valueType: Expression, mutable: bool, isRef: bool, isPtr: bool]], parameter: var tuple[name: IdentExpr, valueType: Expression, mutable: bool, isRef: bool, isPtr: bool], defaults: var seq[Expression]) = + ## Helper to parse declaration arguments and avoid code duplication while not self.check(RightParen): if arguments.len > 255: self.error("cannot have more than 255 arguments in function declaration") self.expect(Identifier, "expecting parameter name") parameter.name = newIdentExpr(self.peek(-1)) - if self.match(Colon): + if self.match(":"): parameter.mutable = false + parameter.isPtr = false + parameter.isRef = false if self.match(Var): parameter.mutable = true elif self.match(Ptr): @@ -816,7 +832,7 @@ proc parseDeclArguments(self: Parser, arguments: var seq[tuple[name: IdentExpr, if parameter in arguments: self.error("duplicate parameter name in function declaration") arguments.add(parameter) - if self.match(Equal): + if self.match("="): defaults.add(self.expression()) elif defaults.len() > 0: self.error("positional argument cannot follow default argument in function declaration") @@ -829,7 +845,7 @@ proc parseDeclArguments(self: Parser, arguments: var seq[tuple[name: IdentExpr, proc funDecl(self: Parser, isAsync: bool = false, isGenerator: bool = false, isLambda: bool = false, isOperator: bool = false): Declaration = - ## Parses functions, coroutines, generators, anonymous functions and custom operators + ## Parses functions, coroutines, generators, anonymous functions and operators let tok = self.peek(-1) var enclosingFunction = self.currentFunction var arguments: seq[tuple[name: IdentExpr, valueType: Expression, mutable: bool, isRef: bool, isPtr: bool]] = @[] @@ -842,15 +858,15 @@ proc funDecl(self: Parser, isAsync: bool = false, isGenerator: bool = false, isL # or an expression. Fortunately anonymous functions # are nameless, so we can sort the ambiguity by checking # if there's an identifier after the keyword - self.expect(Identifier, &"expecting function name after '{tok.lexeme}'") - self.checkDecl(not self.check(Star)) + self.expect(Identifier, &"expecting identifier after '{tok.lexeme}'") + self.checkDecl(not self.check("*")) self.currentFunction = newFunDecl(nil, arguments, defaults, newBlockStmt(@[], Token()), isAsync=isAsync, isGenerator=isGenerator, isPrivate=true, token=tok, pragmas=(@[]), returnType=nil) FunDecl(self.currentFunction).name = newIdentExpr(self.peek(-1)) - if self.match(Star): + if self.match("*"): FunDecl(self.currentFunction).isPrivate = false - elif not isLambda and self.check([LeftBrace, Colon, LeftParen]): + elif not isLambda and (self.check([LeftBrace, LeftParen]) or self.check(":")): # We do a bit of hacking to pretend we never # wanted to parse this as a declaration in # the first place and pass control over to @@ -867,7 +883,7 @@ proc funDecl(self: Parser, isAsync: bool = false, isGenerator: bool = false, isL returnType=nil) elif not isOperator: self.error("funDecl: invalid state") - if self.match(Colon): + if self.match(":"): # Function has explicit return type if self.match([Function, Coroutine, Generator]): # The function's return type is another @@ -883,7 +899,7 @@ proc funDecl(self: Parser, isAsync: bool = false, isGenerator: bool = false, isL var parameter: tuple[name: IdentExpr, valueType: Expression, mutable: bool, isRef: bool, isPtr: bool] if self.match(LeftParen): self.parseDeclArguments(arguments, parameter, defaults) - if self.match(Colon): + if self.match(":"): LambdaExpr(returnType).returnType = self.expression() else: returnType = self.expression() @@ -891,7 +907,7 @@ proc funDecl(self: Parser, isAsync: bool = false, isGenerator: bool = false, isL self.expect(LeftParen) var parameter: tuple[name: IdentExpr, valueType: Expression, mutable: bool, isRef: bool, isPtr: bool] self.parseDeclArguments(arguments, parameter, defaults) - if self.match(Colon): + if self.match(":"): # Function's return type if self.match([Function, Coroutine, Generator]): var arguments: seq[tuple[name: IdentExpr, valueType: Expression, mutable: bool, isRef: bool, isPtr: bool]] = @[] @@ -902,7 +918,7 @@ proc funDecl(self: Parser, isAsync: bool = false, isGenerator: bool = false, isL var parameter: tuple[name: IdentExpr, valueType: Expression, mutable: bool, isRef: bool, isPtr: bool] if self.match(LeftParen): self.parseDeclArguments(arguments, parameter, defaults) - if self.match(Colon): + if self.match(":"): LambdaExpr(returnType).returnType = self.expression() else: returnType = self.expression() @@ -926,11 +942,9 @@ proc funDecl(self: Parser, isAsync: bool = false, isGenerator: bool = false, isL result = self.currentFunction if isOperator: if arguments.len() == 0: - self.error("cannot declare argument-less operator") - elif arguments.len() > 2: - self.error("cannot declare operator with more than 2 arguments") + self.error("cannot declare operator without arguments") elif FunDecl(result).returnType == nil: - self.error("operator cannot have void return type") + self.error("operators must have a return type") for argument in arguments: if argument.valueType == nil: self.error(&"missing type declaration for '{argument.name.token.lexeme}' in function declaration") @@ -939,7 +953,7 @@ proc funDecl(self: Parser, isAsync: bool = false, isGenerator: bool = false, isL proc expression(self: Parser): Expression = ## Parses expressions - result = self.assignment() # Highest-level expression + result = self.parseArrow() # Highest-level expression proc expressionStatement(self: Parser): Statement = @@ -1025,14 +1039,10 @@ proc declaration(self: Parser): Declaration = of Operator: discard self.step() result = self.funDecl(isOperator=true) - of Type, TokenType.Whitespace, TokenType.Tab: - discard self.step() # TODO - of Comment: - let tok = self.peek() - if tok.lexeme.startsWith("#pragma["): - discard # TODO: Pragmas - elif tok.lexeme.startsWith("##"): - discard # TODO: Docstrings + of Type, TokenType.Whitespace, TokenType.Tab, Comment: + # TODO: Comments, pragmas, docstrings + discard self.step() # TODO + return newNilExpr(Token(lexeme: "nil")) else: result = Declaration(self.statement()) @@ -1042,22 +1052,22 @@ proc parse*(self: Parser, tokens: seq[Token], file: string): seq[ASTNode] = self.tokens = tokens self.file = file self.current = 0 - self.currentLoop = None + self.currentLoop = LoopContext.None self.currentFunction = nil self.scopeDepth = 0 - self.operators = @[] + self.operators = newOperatorTable() for i, token in self.tokens: # We do a first pass over the tokens - # to find user-defined operators. - # Note that this relies on the lexer - # ending the input with an EOF token + # to find operators. Note that this + # relies on the lexer ending the input + # with an EOF token if token.kind == Operator: if i == self.tokens.high(): self.error("invalid state: found malformed tokenizer input while looking for operators (missing EOF)") - self.operators.add(self.tokens[i + 1].lexeme) + self.operators.addOperator(self.tokens[i + 1].lexeme) if i == self.tokens.high() and token.kind != EndOfFile: # Since we're iterating this list anyway might as # well perform some extra checks self.error("invalid state: found malformed tokenizer input while looking for operators (missing EOF)") while not self.done(): - result.add(self.declaration()) \ No newline at end of file + result.add(self.declaration()) diff --git a/src/test.nim b/src/test.nim index c831ff3..f862739 100644 --- a/src/test.nim +++ b/src/test.nim @@ -2,8 +2,6 @@ import sequtils import strformat import strutils -import nimSHA2 -import times import jale/editor as ed import jale/templates import jale/plugin/defaults @@ -18,7 +16,6 @@ import frontend/parser as p import frontend/compiler as c import backend/vm as v import util/serializer as s -import util/debugger # Forward declarations @@ -28,10 +25,18 @@ proc getLineEditor: LineEditor # Handy dandy compile-time constants const debugLexer = false const debugParser = false -const debugCompiler = true +const debugCompiler = false const debugSerializer = false +when debugSerializer: + import nimSHA2 + import times + +when debugCompiler: + import util/debugger + + when isMainModule: setControlCHook(proc () {.noconv.} = quit(0)) var @@ -61,7 +66,7 @@ when isMainModule: if input.len() == 0: continue # Currently the parser doesn't handle these tokens well - tokens = filter(tokenizer.lex(input, ""), proc (x: Token): bool = x.kind notin {TokenType.Whitespace, Tab}) + tokens = filter(tokenizer.lex(input, "stdin"), proc (x: Token): bool = x.kind notin {TokenType.Whitespace, Tab}) if tokens.len() == 0: continue when debugLexer: @@ -72,23 +77,25 @@ when isMainModule: break echo "\t", token echo "" - tree = parser.parse(tokens, "") + tree = parser.parse(tokens, "stdin") + if tree.len() == 0: + continue when debugParser: echo "Parsing step:" for node in tree: echo "\t", node echo "" - compiled = compiler.compile(tree, "") + compiled = compiler.compile(tree, "stdin") when debugCompiler: echo "Compilation step:" stdout.write("\t") echo &"""Raw byte stream: [{compiled.code.join(", ")}]""" echo "\nBytecode disassembler output below:\n" - disassembleChunk(compiled, "") + disassembleChunk(compiled, "stdin") echo "" - serializer.dumpToFile(compiled, input, "", "stdin.pbc") - serializedRaw = serializer.dumpBytes(compiled, input, "") + serializer.dumpToFile(compiled, input, "stdin", "stdin.pbc") + serializedRaw = serializer.dumpBytes(compiled, input, "stdin") serialized = serializer.loadFile("stdin.pbc") when debugSerializer: echo "Serialization step: " @@ -116,29 +123,29 @@ when isMainModule: vm.run(serialized.chunk) except IOError: break - # TODO: The code for error reporting completely - # breaks down with multiline input, fix it + # TODO: The code for error reporting completely + # breaks down with multiline input, fix it except LexingError: let lineNo = tokenizer.getLine() let relPos = tokenizer.getRelPos(lineNo) let line = tokenizer.getSource().splitLines()[lineNo - 1].strip() echo getCurrentExceptionMsg() - echo &"Source line: {line}" - echo " ".repeat(relPos.start + len("Source line: ")) & "^".repeat(relPos.stop - relPos.start) + # echo &"Source line: {line}" + # echo " ".repeat(relPos.start + len("Source line: ")) & "^".repeat(relPos.stop - relPos.start) except ParseError: let lineNo = parser.getCurrentToken().line let relPos = tokenizer.getRelPos(lineNo) let line = tokenizer.getSource().splitLines()[lineNo - 1].strip() echo getCurrentExceptionMsg() - echo &"Source line: {line}" - echo " ".repeat(relPos.start + len("Source line: ")) & "^".repeat(relPos.stop - parser.getCurrentToken().lexeme.len()) + # echo &"Source line: {line}" + # echo " ".repeat(relPos.start + len("Source line: ")) & "^".repeat(relPos.stop - parser.getCurrentToken().lexeme.len()) except CompileError: let lineNo = compiler.getCurrentNode().token.line let relPos = tokenizer.getRelPos(lineNo) let line = tokenizer.getSource().splitLines()[lineNo - 1].strip() echo getCurrentExceptionMsg() - echo &"Source line: {line}" - echo " ".repeat(relPos.start + len("Source line: ")) & "^".repeat(relPos.stop - compiler.getCurrentNode().token.lexeme.len()) + # echo &"Source line: {line}" + # echo " ".repeat(relPos.start + len("Source line: ")) & "^".repeat(relPos.stop - compiler.getCurrentNode().token.lexeme.len()) except SerializationError: echo getCurrentExceptionMsg() quit(0) @@ -151,10 +158,6 @@ proc fillSymbolTable(tokenizer: Lexer) = ## and keywords # 1-byte symbols - tokenizer.symbols.addSymbol("+", Plus) - tokenizer.symbols.addSymbol("-", Minus) - tokenizer.symbols.addSymbol("*", Star) - tokenizer.symbols.addSymbol("/", Slash) tokenizer.symbols.addSymbol("{", LeftBrace) tokenizer.symbols.addSymbol("}", RightBrace) tokenizer.symbols.addSymbol("(", LeftParen) @@ -163,45 +166,14 @@ proc fillSymbolTable(tokenizer: Lexer) = tokenizer.symbols.addSymbol("]", RightBracket) tokenizer.symbols.addSymbol(".", Dot) tokenizer.symbols.addSymbol(",", Comma) - tokenizer.symbols.addSymbol(">", TokenType.GreaterThan) - tokenizer.symbols.addSymbol("<", TokenType.LessThan) tokenizer.symbols.addSymbol(";", Semicolon) - tokenizer.symbols.addSymbol("=", Equal) - tokenizer.symbols.addSymbol("~", Tilde) - tokenizer.symbols.addSymbol("%", Percentage) - tokenizer.symbols.addSymbol(":", Colon) - tokenizer.symbols.addSymbol("&", Ampersand) - tokenizer.symbols.addSymbol("^", Caret) - tokenizer.symbols.addSymbol("|", Pipe) - # 2-byte symbols - tokenizer.symbols.addSymbol("+=", InplaceAdd) - tokenizer.symbols.addSymbol("-=", InplaceSub) - tokenizer.symbols.addSymbol(">=", TokenType.GreaterOrEqual) - tokenizer.symbols.addSymbol("<=", TokenType.LessOrEqual) - tokenizer.symbols.addSymbol("*=", InplaceMul) - tokenizer.symbols.addSymbol("/=", InplaceDiv) - tokenizer.symbols.addSymbol("&=", InplaceAnd) - tokenizer.symbols.addSymbol("!=", NotEqual) - tokenizer.symbols.addSymbol("|=", InplaceOr) - tokenizer.symbols.addSymbol("^=", InplaceXor) - tokenizer.symbols.addSymbol("%=", InplaceMod) - tokenizer.symbols.addSymbol("//", FloorDiv) - tokenizer.symbols.addSymbol("==", DoubleEqual) - tokenizer.symbols.addSymbol("**", DoubleStar) - tokenizer.symbols.addSymbol(">>", RightShift) - tokenizer.symbols.addSymbol("<<", LeftShift) - # 3-byte symbols - tokenizer.symbols.addSymbol("//=", InplaceFloorDiv) - tokenizer.symbols.addSymbol("**=", InplacePow) - tokenizer.symbols.addSymbol(">>=", InplaceRightShift) - tokenizer.symbols.addSymbol("<<=", InplaceLeftShift) # Keywords - tokenizer.symbols.addKeyword("type", Type) + tokenizer.symbols.addKeyword("type", TokenType.Type) tokenizer.symbols.addKeyword("enum", Enum) tokenizer.symbols.addKeyword("case", Case) tokenizer.symbols.addKeyword("operator", Operator) tokenizer.symbols.addKeyword("generator", Generator) - tokenizer.symbols.addKeyword("function", Function) + tokenizer.symbols.addKeyword("function", TokenType.Function) tokenizer.symbols.addKeyword("coroutine", Coroutine) tokenizer.symbols.addKeyword("break", TokenType.Break) tokenizer.symbols.addKeyword("continue", Continue) @@ -231,26 +203,12 @@ proc fillSymbolTable(tokenizer: Lexer) = tokenizer.symbols.addKeyword("nan", NotANumber) tokenizer.symbols.addKeyword("inf", Infinity) tokenizer.symbols.addKeyword("nil", TokenType.Nil) - tokenizer.symbols.addKeyword("true", TokenType.True) - tokenizer.symbols.addKeyword("false", TokenType.False) - # These are technically operators, but since - # they fit neatly into the definition for an - # identifier/keyword we parse them as such - # and specialize them later - tokenizer.symbols.addKeyword("isnot", IsNot) - tokenizer.symbols.addKeyword("is", Is) - tokenizer.symbols.addKeyword("is", As) - tokenizer.symbols.addKeyword("of", Of) - tokenizer.symbols.addKeyword("and", TokenType.LogicalAnd) - tokenizer.symbols.addKeyword("or", TokenType.LogicalOr) - tokenizer.symbols.addKeyword("not", TokenType.LogicalNot) + tokenizer.symbols.addKeyword("true", True) + tokenizer.symbols.addKeyword("false", False) tokenizer.symbols.addKeyword("ref", Ref) tokenizer.symbols.addKeyword("ptr", Ptr) - - # P.S.: There's no reason for the order of addition of - # symbols to be ascending in length (the symbol table uses - # a hashmap internally). You can add/remove symbols (and - # keywords for that matter) as you like! + for sym in [">", "<", "=", "~", "/", "+", "-", "_", "*", "?", "@", ":"]: + tokenizer.symbols.addSymbol(sym, Symbol) proc getLineEditor: LineEditor = diff --git a/src/util/serializer.nim b/src/util/serializer.nim index aba4d8c..14b25be 100644 --- a/src/util/serializer.nim +++ b/src/util/serializer.nim @@ -17,6 +17,7 @@ import ../frontend/meta/bytecode import ../frontend/meta/token import ../config import multibyte +import ../frontend/compiler import strformat import strutils @@ -203,7 +204,9 @@ proc readConstants(self: Serializer, stream: seq[byte]): int = stream = stream[1..^1] let size = self.bytesToInt([stream[0], stream[1], stream[2]]) stream = stream[3..^1] - discard self.chunk.addConstant(newIdentExpr(Token(lexeme: self.bytesToString(stream[0..