From b0515d3573759fce743277dbe10721489e675fe6 Mon Sep 17 00:00:00 2001 From: Mattia Giambirtone Date: Sun, 29 May 2022 15:54:01 +0200 Subject: [PATCH] Refactored the type system which no longer relies on AST node objects. Added types for ref, ptr and mutable types --- src/backend/vm.nim | 3 +- src/frontend/compiler.nim | 425 +++++++++++++++----------------------- 2 files changed, 174 insertions(+), 254 deletions(-) diff --git a/src/backend/vm.nim b/src/backend/vm.nim index f25fe09..4c291d0 100644 --- a/src/backend/vm.nim +++ b/src/backend/vm.nim @@ -14,8 +14,9 @@ ## The Peon runtime environment import types -import strformat import ../config +when DEBUG_TRACE_VM: + import strformat import ../frontend/meta/bytecode import ../util/multibyte diff --git a/src/frontend/compiler.nim b/src/frontend/compiler.nim index 948e8fc..b1c7f8a 100644 --- a/src/frontend/compiler.nim +++ b/src/frontend/compiler.nim @@ -32,27 +32,29 @@ export multibyte type - TypeKind* = enum + TypeKind = enum ## An enumeration of compile-time ## types Int8, UInt8, Int16, UInt16, Int32, UInt32, Int64, UInt64, Float32, Float64, Char, Byte, String, Function, CustomType, Nil, Nan, Bool, Inf, Typedesc, Generic, - + Mutable, Reference, Pointer Any # Any is used internally in a few cases, # for example when looking for operators # when only the type of the arguments is of # interest - Type* = ref object + Type = ref object ## A wrapper around ## compile-time types - node*: ASTNode - case kind*: TypeKind: + case kind: TypeKind: of Function: + name: string isLambda: bool - args*: seq[Type] - returnType*: Type + args: seq[tuple[name: string, kind: Type]] + returnType: Type + of Mutable, Reference, Pointer: + value: Type else: discard @@ -81,10 +83,13 @@ type # The name's type valueType: Type # For functions, this marks where the function's - # code begins. For variables, this stores their - # position in the stack (used for closures) + # code begins. For variables, this stores where + # their StoreVar/StoreHeap instruction was emitted codePos: int + # Is the name closed over (i.e. used in a closure)? isClosedOver: bool + # Where is this node declared in the file? + line: int Loop = object ## A "loop object" used ## by the compiler to emit @@ -124,7 +129,7 @@ type # The current function being compiled currentFunction: FunDecl # Are optimizations turned on? - enableOptimizations*: bool + enableOptimizations: bool # The current loop being compiled (used to # keep track of where to jump) currentLoop: Loop @@ -254,11 +259,15 @@ proc makeConstant(self: Compiler, val: Expression, typ: Type): array[3, uint8] = proc emitConstant(self: Compiler, obj: Expression, kind: Type) = - ## Emits a LoadConstant instruction along + ## Emits a constant instruction along ## with its operand case self.inferType(obj).kind: of Int64: self.emitByte(LoadInt64) + of UInt64: + self.emitByte(LoadUInt64) + of Int32: + self.emitByte(LoadInt32) else: discard # TODO self.emitBytes(self.makeConstant(obj, kind)) @@ -267,11 +276,11 @@ proc emitConstant(self: Compiler, obj: Expression, kind: Type) = proc emitJump(self: Compiler, opcode: OpCode): int = ## Emits a dummy jump offset to be patched later. Assumes ## the largest offset (emits 4 bytes, one for the given jump - ## opcode, while the other 3 are for the jump offset which is set - ## to the maximum unsigned 24 bit integer). If the shorter + ## opcode, while the other 3 are for the jump offset, which + ## is set to the maximum unsigned 24 bit integer). If the shorter ## 16 bit alternative is later found to be better suited, patchJump - ## will fix this. This function returns the absolute index into the - ## chunk's bytecode array where the given placeholder instruction was written + ## will fix this. Returns the absolute index into the chunk's + ## bytecode array where the given placeholder instruction was written self.emitByte(opcode) self.emitBytes((0xffffff).toTriple()) result = self.chunk.code.len() - 4 @@ -281,9 +290,10 @@ proc patchJump(self: Compiler, offset: int) = ## Patches a previously emitted relative ## jump using emitJump. Since emitJump assumes ## a long jump, this also shrinks the jump - ## offset and changes the bytecode instruction if possible - ## (i.e. jump is in 16 bit range), but the converse is also - ## true (i.e. it might change a regular jump into a long one) + ## offset and changes the bytecode instruction + ## if possible (i.e. jump is in 16 bit range), + ## but the converse is also true (i.e. it might + ## change a regular jump into a long one) var jump: int = self.chunk.code.len() - offset if jump > 16777215: self.error("cannot jump more than 16777216 bytecode instructions") @@ -291,6 +301,10 @@ proc patchJump(self: Compiler, offset: int) = case OpCode(self.chunk.code[offset]): of LongJumpForwards: self.chunk.code[offset] = JumpForwards.uint8() + # We do this because a relative jump + # does not take its argument into account + # because it is hardcoded in the bytecode + # itself jump -= 4 of LongJumpBackwards: self.chunk.code[offset] = JumpBackwards.uint8() @@ -302,14 +316,10 @@ proc patchJump(self: Compiler, offset: int) = of LongJumpIfFalseOrPop: self.chunk.code[offset] = JumpIfFalseOrPop.uint8() of JumpForwards, JumpBackwards: - # We do this because a relative jump - # does not normally take into account - # its argument, which is hardcoded in - # the bytecode itself jump -= 3 else: discard - self.chunk.code.delete(offset + 1) # Discards the first 8 bits of the jump offset (which are empty) + self.chunk.code.delete(offset + 1) # Discards the first 8 bits of the jump offset (which are empty) let offsetArray = (jump - 1).toDouble() # -1 since we got rid of 1 byte! self.chunk.code[offset + 1] = offsetArray[0] self.chunk.code[offset + 2] = offsetArray[1] @@ -328,10 +338,6 @@ proc patchJump(self: Compiler, offset: int) = of JumpIfFalseOrPop: self.chunk.code[offset] = LongJumpIfFalseOrPop.uint8() of LongJumpForwards, LongJumpBackwards: - # We do this because a relative jump - # does not normally take into account - # its argument, which is hardcoded in - # the bytecode itself jump -= 4 else: discard @@ -414,133 +420,48 @@ proc detectClosureVariable(self: Compiler, name: Name, name.isClosedOver = true -proc compareTypesWithNullNode(self: Compiler, a, b: Type): bool = - ## Compares two types without using information from - ## AST nodes +proc compareTypes(self: Compiler, a, b: Type): bool = + ## Compares two type objects + ## for equality (works with nil!) + + # The nil code here is for void functions (when + # we compare their return types) if a == nil: return b == nil elif b == nil: return a == nil - if a.kind != b.kind: + elif a.kind != b.kind: + # Next, we see the type discriminant: + # If they're different, then they can't + # be the same type! return false case a.kind: + # If all previous checks pass, it's time + # to go through each possible type peon + # supports and compare it + of Int8, UInt8, Int16, UInt16, Int32, + UInt32, Int64, UInt64, Float32, Float64, + Char, Byte, String, Nil, Nan, Bool, Inf: + # A value type's type is always equal to + # another one's + return true + of Reference, Pointer, Mutable: + # Here we already know that both + # a and b are of either of the three + # types in this branch, so we just need + # to compare their values + return self.compareTypes(a.value, b.value) of Function: + # Functions are a bit trickier if a.args.len() != b.args.len(): return false elif not self.compareTypes(a.returnType, b.returnType): if a.returnType.kind != Any and b.returnType.kind != Any: return false for (argA, argB) in zip(a.args, b.args): - if not self.compareTypes(argA, argB): + if not self.compareTypes(argA.kind, argB.kind): return false - return true - else: - discard - - -proc compareTypes(self: Compiler, a, b: Type): bool = - ## Compares two type objects - ## for equality (works with nil!) - if a == nil: - return b == nil - elif b == nil: - return a == nil - if a.kind != b.kind: - return false - case a.kind: - of Int8, UInt8, Int16, UInt16, Int32, - UInt32, Int64, UInt64, Float32, Float64, - Char, Byte, String, Nil, Nan, Bool, Inf: - return true - of Function: - if a.node == nil or b.node == nil: - return self.compareTypesWithNullNode(a, b) - if not a.isLambda and not b.isLambda: - let - a = FunDecl(a.node) - b = FunDecl(b.node) - typeOfA = self.inferType(a.returnType) - typeOfB = self.inferType(b.returnType) - if a.name.token.lexeme != b.name.token.lexeme: - return false - elif a.arguments.len() != b.arguments.len(): - return false - elif not self.compareTypes(typeOfA, typeOfB): - if typeOfA.kind != Any and typeOfB.kind != Any: - return false - for (argA, argB) in zip(a.arguments, b.arguments): - if argA.mutable != argB.mutable: - return false - elif argA.isRef != argB.isRef: - return false - elif argA.isPtr != argB.isPtr: - return false - elif not self.compareTypes(self.inferType(argA.valueType), self.inferType(argB.valueType)): - return false - return true - elif a.isLambda and not b.isLambda: - let - a = LambdaExpr(a.node) - b = FunDecl(b.node) - typeOfA = self.inferType(a.returnType) - typeOfB = self.inferType(b.returnType) - if a.arguments.len() != b.arguments.len(): - return false - elif not self.compareTypes(typeOfA, typeOfB): - if typeOfA.kind != Any and typeOfB.kind != Any: - return false - for (argA, argB) in zip(a.arguments, b.arguments): - if argA.mutable != argB.mutable: - return false - elif argA.isRef != argB.isRef: - return false - elif argA.isPtr != argB.isPtr: - return false - elif not self.compareTypes(self.inferType(argA.valueType), self.inferType(argB.valueType)): - return false - return true - elif b.isLambda and not a.isLambda: - let - a = FunDecl(a.node) - b = LambdaExpr(b.node) - typeOfA = self.inferType(a.returnType) - typeOfB = self.inferType(b.returnType) - if a.arguments.len() != b.arguments.len(): - return false - elif not self.compareTypes(typeOfA, typeOfB): - if typeOfA.kind != Any and typeOfB.kind != Any: - return false - for (argA, argB) in zip(a.arguments, b.arguments): - if argA.mutable != argB.mutable: - return false - elif argA.isRef != argB.isRef: - return false - elif argA.isPtr != argB.isPtr: - return false - elif not self.compareTypes(self.inferType(argA.valueType), self.inferType(argB.valueType)): - return false - return true - else: - let - a = LambdaExpr(a.node) - b = LambdaExpr(b.node) - typeOfA = self.inferType(a.returnType) - typeOfB = self.inferType(b.returnType) - if a.arguments.len() != b.arguments.len(): - return false - elif not self.compareTypes(typeOfA, typeOfB): - if typeOfA.kind != Any and typeOfB.kind != Any: - return false - for (argA, argB) in zip(a.arguments, b.arguments): - if argA.mutable != argB.mutable: - return false - elif argA.isRef != argB.isRef: - return false - elif argA.isPtr != argB.isPtr: - return false - elif not self.compareTypes(self.inferType(argA.valueType), self.inferType(argB.valueType)): - return false - return true + return true else: discard @@ -587,47 +508,6 @@ proc toIntrinsic(name: string): Type = return nil -proc inferType(self: Compiler, node: LiteralExpr): Type = - ## Infers the type of a given literal expression - if node == nil: - return nil - case node.kind: - of intExpr, binExpr, octExpr, hexExpr: - let size = node.token.lexeme.split("'") - if len(size) notin 1..2: - self.error("invalid state: inferValueType -> invalid size specifier (This is an internal error and most likely a bug!)") - if size.len() == 1: - return Type(node: node, kind: Int64) - let typ = size[1].toIntrinsic() - if not self.compareTypes(typ, nil): - return typ - else: - self.error(&"invalid type specifier '{size[1]}' for int") - of floatExpr: - let size = node.token.lexeme.split("'") - if len(size) notin 1..2: - self.error("invalid state: inferValueType -> invalid size specifier (This is an internal error and most likely a bug!)") - if size.len() == 1 or size[1] == "f64": - return Type(node: node, kind: Float64) - let typ = size[1].toIntrinsic() - if not self.compareTypes(typ, nil): - return typ - else: - self.error(&"invalid type specifier '{size[1]}' for float") - of nilExpr: - return Type(node: node, kind: Nil) - of trueExpr: - return Type(node: node, kind: Bool) - of falseExpr: - return Type(node: node, kind: Bool) - of nanExpr: - return Type(node: node, kind: TypeKind.Nan) - of infExpr: - return Type(node: node, kind: TypeKind.Inf) - else: - discard # TODO - - proc toIntrinsic(self: Compiler, typ: Expression): Type = ## Gets an expression's intrinsic type, if ## possible @@ -645,6 +525,47 @@ proc toIntrinsic(self: Compiler, typ: Expression): Type = discard +proc inferType(self: Compiler, node: LiteralExpr): Type = + ## Infers the type of a given literal expression + if node == nil: + return nil + case node.kind: + of intExpr, binExpr, octExpr, hexExpr: + let size = node.token.lexeme.split("'") + if len(size) notin 1..2: + self.error("invalid state: inferValueType -> invalid size specifier (This is an internal error and most likely a bug!)") + if size.len() == 1: + return Type(kind: Int64) + let typ = size[1].toIntrinsic() + if not self.compareTypes(typ, nil): + return typ + else: + self.error(&"invalid type specifier '{size[1]}' for int") + of floatExpr: + let size = node.token.lexeme.split("'") + if len(size) notin 1..2: + self.error("invalid state: inferValueType -> invalid size specifier (This is an internal error and most likely a bug!)") + if size.len() == 1 or size[1] == "f64": + return Type(kind: Float64) + let typ = size[1].toIntrinsic() + if not self.compareTypes(typ, nil): + return typ + else: + self.error(&"invalid type specifier '{size[1]}' for float") + of nilExpr: + return Type(kind: Nil) + of trueExpr: + return Type(kind: Bool) + of falseExpr: + return Type(kind: Bool) + of nanExpr: + return Type(kind: TypeKind.Nan) + of infExpr: + return Type(kind: TypeKind.Inf) + else: + discard # TODO + + proc inferType(self: Compiler, node: Expression): Type = ## Infers the type of a given expression and ## returns it @@ -658,8 +579,6 @@ proc inferType(self: Compiler, node: Expression): Type = return name.valueType else: result = node.name.lexeme.toIntrinsic() - if result != nil: - result.node = node of unaryExpr: return self.inferType(UnaryExpr(node).a) of binaryExpr: @@ -676,49 +595,15 @@ proc inferType(self: Compiler, node: Expression): Type = return self.inferType(LiteralExpr(node)) of lambdaExpr: var node = LambdaExpr(node) - result = Type(kind: Function, returnType: nil, node: node, args: @[], isLambda: true) + result = Type(kind: Function, returnType: nil, args: @[], isLambda: true) if node.returnType != nil: result.returnType = self.inferType(node.returnType) for argument in node.arguments: - result.args.add(self.inferType(argument.valueType)) + result.args.add((argument.name.token.lexeme, self.inferType(argument.valueType))) else: discard # Unreachable -proc typeToStr(self: Compiler, typ: Type): string = - ## Returns the string representation of a - ## type object - case typ.kind: - of Int8, UInt8, Int16, UInt16, Int32, - UInt32, Int64, UInt64, Float32, Float64, - Char, Byte, String, Nil, TypeKind.Nan, Bool, - TypeKind.Inf: - return ($typ.kind).toLowerAscii() - of Function: - result = "function (" - case typ.node.kind: - of funDecl: - var node = FunDecl(typ.node) - for i, argument in node.arguments: - result &= &"{argument.name.token.lexeme}: {self.typeToStr(self.inferType(argument.valueType))}" - if i < node.arguments.len() - 1: - result &= ", " - result &= ")" - of lambdaExpr: - var node = LambdaExpr(typ.node) - for i, argument in node.arguments: - result &= &"{argument.name.token.lexeme}: {self.typeToStr(self.inferType(argument.name))}" - if i < node.arguments.len() - 1: - result &= ", " - result &= ")" - else: - discard # Unreachable - if typ.returnType != nil: - result &= &": {self.typeToStr(typ.returnType)}" - else: - discard - - proc inferType(self: Compiler, node: Declaration): Type = ## Infers the type of a given declaration ## and returns it @@ -740,6 +625,35 @@ proc inferType(self: Compiler, node: Declaration): Type = else: return # Unreachable + +proc typeToStr(self: Compiler, typ: Type): string = + ## Returns the string representation of a + ## type object + case typ.kind: + of Int8, UInt8, Int16, UInt16, Int32, + UInt32, Int64, UInt64, Float32, Float64, + Char, Byte, String, Nil, TypeKind.Nan, Bool, + TypeKind.Inf: + return ($typ.kind).toLowerAscii() + of Pointer: + return &"ptr {self.typeToStr(typ.value)}" + of Reference: + return &"ref {self.typeToStr(typ.value)}" + of Mutable: + return &"var {self.typeToStr(typ.value)}" + of Function: + result = "fn (" + for i, (argName, argType) in typ.args: + result &= &"{argName}: {self.typeToStr(argType)}" + if i < typ.args.len() - 1: + result &= ", " + result &= ")" + if typ.returnType != nil: + result &= &": {self.typeToStr(typ.returnType)}" + else: + discard + + ## End of utility functions @@ -845,14 +759,13 @@ proc matchImpl(self: Compiler, name: string, kind: Type): Name = msg &= &", wrong number of arguments ({name.valueType.args.len()} expected, got {kind.args.len()})" else: for i, arg in kind.args: - if not self.compareTypes(arg, name.valueType.args[i]): - msg &= &", first mismatch at position {i + 1}: expected argument of type '{self.typeToStr(name.valueType.args[i])}', got '{self.typeToStr(arg)}' instead" + if not self.compareTypes(arg.kind, name.valueType.args[i].kind): + msg &= &", first mismatch at position {i + 1}: expected argument of type '{self.typeToStr(name.valueType.args[i].kind)}', got '{self.typeToStr(arg.kind)}' instead" self.error(msg) elif impl.len() > 1: var msg = &"multiple matching implementations of '{name}' found:\n" for fn in reversed(impl): - var node = FunDecl(fn.valueType.node) - msg &= &"- '{node.name.token.lexeme}' at line {node.token.line} of type {self.typeToStr(fn.valueType)}\n" + msg &= &"- '{fn.name}' at line {fn.line} of type {self.typeToStr(fn.valueType)}\n" self.error(msg) return impl[0] @@ -890,7 +803,7 @@ proc unary(self: Compiler, node: UnaryExpr) = ## Compiles unary expressions such as decimal ## and bitwise negation let valueType = self.inferType(node.a) - let funct = self.matchImpl(node.token.lexeme, Type(kind: Function, returnType: Type(kind: Any), node: nil, args: @[valueType])) + let funct = self.matchImpl(node.token.lexeme, Type(kind: Function, returnType: Type(kind: Any), args: @[("", valueType)])) self.callUnaryOp(funct, node) @@ -898,7 +811,7 @@ proc binary(self: Compiler, node: BinaryExpr) = ## Compiles all binary expressions let typeOfA = self.inferType(node.a) let typeOfB = self.inferType(node.b) - let funct = self.matchImpl(node.token.lexeme, Type(kind: Function, returnType: Type(kind: Any), node: nil, args: @[typeOfA, typeOfB])) + let funct = self.matchImpl(node.token.lexeme, Type(kind: Function, returnType: Type(kind: Any), args: @[("", typeOfA), ("", typeOfB)])) self.callBinaryOp(funct, node) # TODO: Get rid of old code @@ -942,67 +855,74 @@ proc declareName(self: Compiler, node: Declaration) = # slap myself 100 times with a sign saying "I'm dumb". Mark my words self.error("cannot declare more than 16777216 variables at a time") for name in self.findByName(node.name.token.lexeme): - if name.name.token.lexeme == node.name.token.lexeme and name.depth == self.scopeDepth and name.valueType.node.kind == varDecl: - self.error(&"attempt to redeclare '{node.name.token.lexeme}', which was previously defined in '{name.owner}' at line {name.valueType.node.token.line}") + if name.depth == self.scopeDepth and name.valueType.kind notin {Function, CustomType}: + # Trying to redeclare a variable in the same module is an error! + self.error(&"attempt to redeclare '{node.name.token.lexeme}', which was previously defined in '{name.owner}' at line {name.line}") self.names.add(Name(depth: self.scopeDepth, name: node.name, isPrivate: node.isPrivate, owner: self.currentModule, isConst: node.isConst, - valueType: Type(kind: self.inferType(node.value).kind, node: node), + valueType: Type(kind: self.inferType(node.value).kind), codePos: self.chunk.code.len(), isLet: node.isLet, - isClosedOver: false)) + isClosedOver: false, + line: node.token.line)) # We emit 4 No-Ops because they may become a # StoreHeap instruction. If not, they'll be # removed before the compiler is finished + # TODO: This may break CFI offsets self.emitBytes([NoOp, NoOp, NoOp, NoOp]) of NodeKind.funDecl: var node = FunDecl(node) - # TODO: Emit some optional debugging - # metadata to let the VM know where a function's - # code begins and ends (similar to what gcc does with - # CFI in object files) to build stack traces self.names.add(Name(depth: self.scopeDepth, isPrivate: node.isPrivate, isConst: false, owner: self.currentModule, - valueType: Type(kind: Function, node: node, + valueType: Type(kind: Function, returnType: self.inferType( node.returnType), args: @[]), codePos: self.chunk.code.high(), name: node.name, isLet: false, - isClosedOver: false)) + isClosedOver: false, + line: node.token.line)) let fn = self.names[^1] + var name: Name for argument in node.arguments: if self.names.high() > 16777215: self.error("cannot declare more than 16777216 variables at a time") # wait, no LoadVar?? Yes! That's because when calling functions, # arguments will already be on the stack so there's no need to # load them here - self.names.add(Name(depth: self.scopeDepth + 1, - isPrivate: true, - owner: self.currentModule, - isConst: false, - name: argument.name, - valueType: nil, - codePos: self.chunk.code.len(), - isLet: false, - isClosedOver: false)) - self.names[^1].valueType = self.inferType(argument.valueType) + name = Name(depth: self.scopeDepth + 1, + isPrivate: true, + owner: self.currentModule, + isConst: false, + name: argument.name, + valueType: nil, + codePos: self.chunk.code.len(), + isLet: false, + isClosedOver: false) + self.names.add(name) + name.valueType = self.inferType(argument.valueType) + if argument.mutable: + name.valueType = Type(kind: Mutable, value: name.valueType) + elif argument.isRef: + name.valueType = Type(kind: Reference, value: name.valueType) + elif argument.isPtr: + name.valueType = Type(kind: Pointer, value: name.valueType) # We check if the argument's type is a generic - if self.names[^1].valueType == nil and argument.valueType.kind == identExpr: + if name.valueType == nil and argument.valueType.kind == identExpr: for gen in node.generics: if gen.name == IdentExpr(argument.valueType): - self.names[^1].valueType = Type(kind: Generic) + name.valueType = Type(kind: Generic) break # If it's still nil, it's an error! - if self.names[^1].valueType == nil: - self.error(&"cannot determine the type of argument '{self.names[^1].name.token.lexeme}'") - self.names[^1].valueType.node = argument.name - fn.valueType.args.add(self.names[^1].valueType) + if name.valueType == nil: + self.error(&"cannot determine the type of argument '{argument.name.token.lexeme}'") + fn.valueType.args.add((argument.name.token.lexeme, name.valueType)) else: discard # TODO: Types, enums @@ -1418,8 +1338,7 @@ proc funDecl(self: Compiler, node: FunDecl) = # the same function with the same name! Error! var msg = &"multiple matching implementations of '{node.name.token.lexeme}' found:\n" for fn in reversed(impl): - var node = FunDecl(fn.valueType.node) - msg &= &"- '{node.name.token.lexeme}' at line {node.token.line} of type {self.typeToStr(fn.valueType)}\n" + msg &= &"- '{fn.name}' at line {fn.line} of type {self.typeToStr(fn.valueType)}\n" self.error(msg) # We store the current function self.currentFunction = node