Refactored the type system which no longer relies on AST node objects. Added types for ref, ptr and mutable types

This commit is contained in:
Mattia Giambirtone 2022-05-29 15:54:01 +02:00
parent a8345d065a
commit b0515d3573
2 changed files with 174 additions and 254 deletions

View File

@ -14,8 +14,9 @@
## The Peon runtime environment
import types
import strformat
import ../config
when DEBUG_TRACE_VM:
import strformat
import ../frontend/meta/bytecode
import ../util/multibyte

View File

@ -32,27 +32,29 @@ export multibyte
type
TypeKind* = enum
TypeKind = enum
## An enumeration of compile-time
## types
Int8, UInt8, Int16, UInt16, Int32,
UInt32, Int64, UInt64, Float32, Float64,
Char, Byte, String, Function, CustomType,
Nil, Nan, Bool, Inf, Typedesc, Generic,
Mutable, Reference, Pointer
Any # Any is used internally in a few cases,
# for example when looking for operators
# when only the type of the arguments is of
# interest
Type* = ref object
Type = ref object
## A wrapper around
## compile-time types
node*: ASTNode
case kind*: TypeKind:
case kind: TypeKind:
of Function:
name: string
isLambda: bool
args*: seq[Type]
returnType*: Type
args: seq[tuple[name: string, kind: Type]]
returnType: Type
of Mutable, Reference, Pointer:
value: Type
else:
discard
@ -81,10 +83,13 @@ type
# The name's type
valueType: Type
# For functions, this marks where the function's
# code begins. For variables, this stores their
# position in the stack (used for closures)
# code begins. For variables, this stores where
# their StoreVar/StoreHeap instruction was emitted
codePos: int
# Is the name closed over (i.e. used in a closure)?
isClosedOver: bool
# Where is this node declared in the file?
line: int
Loop = object
## A "loop object" used
## by the compiler to emit
@ -124,7 +129,7 @@ type
# The current function being compiled
currentFunction: FunDecl
# Are optimizations turned on?
enableOptimizations*: bool
enableOptimizations: bool
# The current loop being compiled (used to
# keep track of where to jump)
currentLoop: Loop
@ -254,11 +259,15 @@ proc makeConstant(self: Compiler, val: Expression, typ: Type): array[3, uint8] =
proc emitConstant(self: Compiler, obj: Expression, kind: Type) =
## Emits a LoadConstant instruction along
## Emits a constant instruction along
## with its operand
case self.inferType(obj).kind:
of Int64:
self.emitByte(LoadInt64)
of UInt64:
self.emitByte(LoadUInt64)
of Int32:
self.emitByte(LoadInt32)
else:
discard # TODO
self.emitBytes(self.makeConstant(obj, kind))
@ -267,11 +276,11 @@ proc emitConstant(self: Compiler, obj: Expression, kind: Type) =
proc emitJump(self: Compiler, opcode: OpCode): int =
## Emits a dummy jump offset to be patched later. Assumes
## the largest offset (emits 4 bytes, one for the given jump
## opcode, while the other 3 are for the jump offset which is set
## to the maximum unsigned 24 bit integer). If the shorter
## opcode, while the other 3 are for the jump offset, which
## is set to the maximum unsigned 24 bit integer). If the shorter
## 16 bit alternative is later found to be better suited, patchJump
## will fix this. This function returns the absolute index into the
## chunk's bytecode array where the given placeholder instruction was written
## will fix this. Returns the absolute index into the chunk's
## bytecode array where the given placeholder instruction was written
self.emitByte(opcode)
self.emitBytes((0xffffff).toTriple())
result = self.chunk.code.len() - 4
@ -281,9 +290,10 @@ proc patchJump(self: Compiler, offset: int) =
## Patches a previously emitted relative
## jump using emitJump. Since emitJump assumes
## a long jump, this also shrinks the jump
## offset and changes the bytecode instruction if possible
## (i.e. jump is in 16 bit range), but the converse is also
## true (i.e. it might change a regular jump into a long one)
## offset and changes the bytecode instruction
## if possible (i.e. jump is in 16 bit range),
## but the converse is also true (i.e. it might
## change a regular jump into a long one)
var jump: int = self.chunk.code.len() - offset
if jump > 16777215:
self.error("cannot jump more than 16777216 bytecode instructions")
@ -291,6 +301,10 @@ proc patchJump(self: Compiler, offset: int) =
case OpCode(self.chunk.code[offset]):
of LongJumpForwards:
self.chunk.code[offset] = JumpForwards.uint8()
# We do this because a relative jump
# does not take its argument into account
# because it is hardcoded in the bytecode
# itself
jump -= 4
of LongJumpBackwards:
self.chunk.code[offset] = JumpBackwards.uint8()
@ -302,14 +316,10 @@ proc patchJump(self: Compiler, offset: int) =
of LongJumpIfFalseOrPop:
self.chunk.code[offset] = JumpIfFalseOrPop.uint8()
of JumpForwards, JumpBackwards:
# We do this because a relative jump
# does not normally take into account
# its argument, which is hardcoded in
# the bytecode itself
jump -= 3
else:
discard
self.chunk.code.delete(offset + 1) # Discards the first 8 bits of the jump offset (which are empty)
self.chunk.code.delete(offset + 1) # Discards the first 8 bits of the jump offset (which are empty)
let offsetArray = (jump - 1).toDouble() # -1 since we got rid of 1 byte!
self.chunk.code[offset + 1] = offsetArray[0]
self.chunk.code[offset + 2] = offsetArray[1]
@ -328,10 +338,6 @@ proc patchJump(self: Compiler, offset: int) =
of JumpIfFalseOrPop:
self.chunk.code[offset] = LongJumpIfFalseOrPop.uint8()
of LongJumpForwards, LongJumpBackwards:
# We do this because a relative jump
# does not normally take into account
# its argument, which is hardcoded in
# the bytecode itself
jump -= 4
else:
discard
@ -414,133 +420,48 @@ proc detectClosureVariable(self: Compiler, name: Name,
name.isClosedOver = true
proc compareTypesWithNullNode(self: Compiler, a, b: Type): bool =
## Compares two types without using information from
## AST nodes
proc compareTypes(self: Compiler, a, b: Type): bool =
## Compares two type objects
## for equality (works with nil!)
# The nil code here is for void functions (when
# we compare their return types)
if a == nil:
return b == nil
elif b == nil:
return a == nil
if a.kind != b.kind:
elif a.kind != b.kind:
# Next, we see the type discriminant:
# If they're different, then they can't
# be the same type!
return false
case a.kind:
# If all previous checks pass, it's time
# to go through each possible type peon
# supports and compare it
of Int8, UInt8, Int16, UInt16, Int32,
UInt32, Int64, UInt64, Float32, Float64,
Char, Byte, String, Nil, Nan, Bool, Inf:
# A value type's type is always equal to
# another one's
return true
of Reference, Pointer, Mutable:
# Here we already know that both
# a and b are of either of the three
# types in this branch, so we just need
# to compare their values
return self.compareTypes(a.value, b.value)
of Function:
# Functions are a bit trickier
if a.args.len() != b.args.len():
return false
elif not self.compareTypes(a.returnType, b.returnType):
if a.returnType.kind != Any and b.returnType.kind != Any:
return false
for (argA, argB) in zip(a.args, b.args):
if not self.compareTypes(argA, argB):
if not self.compareTypes(argA.kind, argB.kind):
return false
return true
else:
discard
proc compareTypes(self: Compiler, a, b: Type): bool =
## Compares two type objects
## for equality (works with nil!)
if a == nil:
return b == nil
elif b == nil:
return a == nil
if a.kind != b.kind:
return false
case a.kind:
of Int8, UInt8, Int16, UInt16, Int32,
UInt32, Int64, UInt64, Float32, Float64,
Char, Byte, String, Nil, Nan, Bool, Inf:
return true
of Function:
if a.node == nil or b.node == nil:
return self.compareTypesWithNullNode(a, b)
if not a.isLambda and not b.isLambda:
let
a = FunDecl(a.node)
b = FunDecl(b.node)
typeOfA = self.inferType(a.returnType)
typeOfB = self.inferType(b.returnType)
if a.name.token.lexeme != b.name.token.lexeme:
return false
elif a.arguments.len() != b.arguments.len():
return false
elif not self.compareTypes(typeOfA, typeOfB):
if typeOfA.kind != Any and typeOfB.kind != Any:
return false
for (argA, argB) in zip(a.arguments, b.arguments):
if argA.mutable != argB.mutable:
return false
elif argA.isRef != argB.isRef:
return false
elif argA.isPtr != argB.isPtr:
return false
elif not self.compareTypes(self.inferType(argA.valueType), self.inferType(argB.valueType)):
return false
return true
elif a.isLambda and not b.isLambda:
let
a = LambdaExpr(a.node)
b = FunDecl(b.node)
typeOfA = self.inferType(a.returnType)
typeOfB = self.inferType(b.returnType)
if a.arguments.len() != b.arguments.len():
return false
elif not self.compareTypes(typeOfA, typeOfB):
if typeOfA.kind != Any and typeOfB.kind != Any:
return false
for (argA, argB) in zip(a.arguments, b.arguments):
if argA.mutable != argB.mutable:
return false
elif argA.isRef != argB.isRef:
return false
elif argA.isPtr != argB.isPtr:
return false
elif not self.compareTypes(self.inferType(argA.valueType), self.inferType(argB.valueType)):
return false
return true
elif b.isLambda and not a.isLambda:
let
a = FunDecl(a.node)
b = LambdaExpr(b.node)
typeOfA = self.inferType(a.returnType)
typeOfB = self.inferType(b.returnType)
if a.arguments.len() != b.arguments.len():
return false
elif not self.compareTypes(typeOfA, typeOfB):
if typeOfA.kind != Any and typeOfB.kind != Any:
return false
for (argA, argB) in zip(a.arguments, b.arguments):
if argA.mutable != argB.mutable:
return false
elif argA.isRef != argB.isRef:
return false
elif argA.isPtr != argB.isPtr:
return false
elif not self.compareTypes(self.inferType(argA.valueType), self.inferType(argB.valueType)):
return false
return true
else:
let
a = LambdaExpr(a.node)
b = LambdaExpr(b.node)
typeOfA = self.inferType(a.returnType)
typeOfB = self.inferType(b.returnType)
if a.arguments.len() != b.arguments.len():
return false
elif not self.compareTypes(typeOfA, typeOfB):
if typeOfA.kind != Any and typeOfB.kind != Any:
return false
for (argA, argB) in zip(a.arguments, b.arguments):
if argA.mutable != argB.mutable:
return false
elif argA.isRef != argB.isRef:
return false
elif argA.isPtr != argB.isPtr:
return false
elif not self.compareTypes(self.inferType(argA.valueType), self.inferType(argB.valueType)):
return false
return true
return true
else:
discard
@ -587,47 +508,6 @@ proc toIntrinsic(name: string): Type =
return nil
proc inferType(self: Compiler, node: LiteralExpr): Type =
## Infers the type of a given literal expression
if node == nil:
return nil
case node.kind:
of intExpr, binExpr, octExpr, hexExpr:
let size = node.token.lexeme.split("'")
if len(size) notin 1..2:
self.error("invalid state: inferValueType -> invalid size specifier (This is an internal error and most likely a bug!)")
if size.len() == 1:
return Type(node: node, kind: Int64)
let typ = size[1].toIntrinsic()
if not self.compareTypes(typ, nil):
return typ
else:
self.error(&"invalid type specifier '{size[1]}' for int")
of floatExpr:
let size = node.token.lexeme.split("'")
if len(size) notin 1..2:
self.error("invalid state: inferValueType -> invalid size specifier (This is an internal error and most likely a bug!)")
if size.len() == 1 or size[1] == "f64":
return Type(node: node, kind: Float64)
let typ = size[1].toIntrinsic()
if not self.compareTypes(typ, nil):
return typ
else:
self.error(&"invalid type specifier '{size[1]}' for float")
of nilExpr:
return Type(node: node, kind: Nil)
of trueExpr:
return Type(node: node, kind: Bool)
of falseExpr:
return Type(node: node, kind: Bool)
of nanExpr:
return Type(node: node, kind: TypeKind.Nan)
of infExpr:
return Type(node: node, kind: TypeKind.Inf)
else:
discard # TODO
proc toIntrinsic(self: Compiler, typ: Expression): Type =
## Gets an expression's intrinsic type, if
## possible
@ -645,6 +525,47 @@ proc toIntrinsic(self: Compiler, typ: Expression): Type =
discard
proc inferType(self: Compiler, node: LiteralExpr): Type =
## Infers the type of a given literal expression
if node == nil:
return nil
case node.kind:
of intExpr, binExpr, octExpr, hexExpr:
let size = node.token.lexeme.split("'")
if len(size) notin 1..2:
self.error("invalid state: inferValueType -> invalid size specifier (This is an internal error and most likely a bug!)")
if size.len() == 1:
return Type(kind: Int64)
let typ = size[1].toIntrinsic()
if not self.compareTypes(typ, nil):
return typ
else:
self.error(&"invalid type specifier '{size[1]}' for int")
of floatExpr:
let size = node.token.lexeme.split("'")
if len(size) notin 1..2:
self.error("invalid state: inferValueType -> invalid size specifier (This is an internal error and most likely a bug!)")
if size.len() == 1 or size[1] == "f64":
return Type(kind: Float64)
let typ = size[1].toIntrinsic()
if not self.compareTypes(typ, nil):
return typ
else:
self.error(&"invalid type specifier '{size[1]}' for float")
of nilExpr:
return Type(kind: Nil)
of trueExpr:
return Type(kind: Bool)
of falseExpr:
return Type(kind: Bool)
of nanExpr:
return Type(kind: TypeKind.Nan)
of infExpr:
return Type(kind: TypeKind.Inf)
else:
discard # TODO
proc inferType(self: Compiler, node: Expression): Type =
## Infers the type of a given expression and
## returns it
@ -658,8 +579,6 @@ proc inferType(self: Compiler, node: Expression): Type =
return name.valueType
else:
result = node.name.lexeme.toIntrinsic()
if result != nil:
result.node = node
of unaryExpr:
return self.inferType(UnaryExpr(node).a)
of binaryExpr:
@ -676,49 +595,15 @@ proc inferType(self: Compiler, node: Expression): Type =
return self.inferType(LiteralExpr(node))
of lambdaExpr:
var node = LambdaExpr(node)
result = Type(kind: Function, returnType: nil, node: node, args: @[], isLambda: true)
result = Type(kind: Function, returnType: nil, args: @[], isLambda: true)
if node.returnType != nil:
result.returnType = self.inferType(node.returnType)
for argument in node.arguments:
result.args.add(self.inferType(argument.valueType))
result.args.add((argument.name.token.lexeme, self.inferType(argument.valueType)))
else:
discard # Unreachable
proc typeToStr(self: Compiler, typ: Type): string =
## Returns the string representation of a
## type object
case typ.kind:
of Int8, UInt8, Int16, UInt16, Int32,
UInt32, Int64, UInt64, Float32, Float64,
Char, Byte, String, Nil, TypeKind.Nan, Bool,
TypeKind.Inf:
return ($typ.kind).toLowerAscii()
of Function:
result = "function ("
case typ.node.kind:
of funDecl:
var node = FunDecl(typ.node)
for i, argument in node.arguments:
result &= &"{argument.name.token.lexeme}: {self.typeToStr(self.inferType(argument.valueType))}"
if i < node.arguments.len() - 1:
result &= ", "
result &= ")"
of lambdaExpr:
var node = LambdaExpr(typ.node)
for i, argument in node.arguments:
result &= &"{argument.name.token.lexeme}: {self.typeToStr(self.inferType(argument.name))}"
if i < node.arguments.len() - 1:
result &= ", "
result &= ")"
else:
discard # Unreachable
if typ.returnType != nil:
result &= &": {self.typeToStr(typ.returnType)}"
else:
discard
proc inferType(self: Compiler, node: Declaration): Type =
## Infers the type of a given declaration
## and returns it
@ -740,6 +625,35 @@ proc inferType(self: Compiler, node: Declaration): Type =
else:
return # Unreachable
proc typeToStr(self: Compiler, typ: Type): string =
## Returns the string representation of a
## type object
case typ.kind:
of Int8, UInt8, Int16, UInt16, Int32,
UInt32, Int64, UInt64, Float32, Float64,
Char, Byte, String, Nil, TypeKind.Nan, Bool,
TypeKind.Inf:
return ($typ.kind).toLowerAscii()
of Pointer:
return &"ptr {self.typeToStr(typ.value)}"
of Reference:
return &"ref {self.typeToStr(typ.value)}"
of Mutable:
return &"var {self.typeToStr(typ.value)}"
of Function:
result = "fn ("
for i, (argName, argType) in typ.args:
result &= &"{argName}: {self.typeToStr(argType)}"
if i < typ.args.len() - 1:
result &= ", "
result &= ")"
if typ.returnType != nil:
result &= &": {self.typeToStr(typ.returnType)}"
else:
discard
## End of utility functions
@ -845,14 +759,13 @@ proc matchImpl(self: Compiler, name: string, kind: Type): Name =
msg &= &", wrong number of arguments ({name.valueType.args.len()} expected, got {kind.args.len()})"
else:
for i, arg in kind.args:
if not self.compareTypes(arg, name.valueType.args[i]):
msg &= &", first mismatch at position {i + 1}: expected argument of type '{self.typeToStr(name.valueType.args[i])}', got '{self.typeToStr(arg)}' instead"
if not self.compareTypes(arg.kind, name.valueType.args[i].kind):
msg &= &", first mismatch at position {i + 1}: expected argument of type '{self.typeToStr(name.valueType.args[i].kind)}', got '{self.typeToStr(arg.kind)}' instead"
self.error(msg)
elif impl.len() > 1:
var msg = &"multiple matching implementations of '{name}' found:\n"
for fn in reversed(impl):
var node = FunDecl(fn.valueType.node)
msg &= &"- '{node.name.token.lexeme}' at line {node.token.line} of type {self.typeToStr(fn.valueType)}\n"
msg &= &"- '{fn.name}' at line {fn.line} of type {self.typeToStr(fn.valueType)}\n"
self.error(msg)
return impl[0]
@ -890,7 +803,7 @@ proc unary(self: Compiler, node: UnaryExpr) =
## Compiles unary expressions such as decimal
## and bitwise negation
let valueType = self.inferType(node.a)
let funct = self.matchImpl(node.token.lexeme, Type(kind: Function, returnType: Type(kind: Any), node: nil, args: @[valueType]))
let funct = self.matchImpl(node.token.lexeme, Type(kind: Function, returnType: Type(kind: Any), args: @[("", valueType)]))
self.callUnaryOp(funct, node)
@ -898,7 +811,7 @@ proc binary(self: Compiler, node: BinaryExpr) =
## Compiles all binary expressions
let typeOfA = self.inferType(node.a)
let typeOfB = self.inferType(node.b)
let funct = self.matchImpl(node.token.lexeme, Type(kind: Function, returnType: Type(kind: Any), node: nil, args: @[typeOfA, typeOfB]))
let funct = self.matchImpl(node.token.lexeme, Type(kind: Function, returnType: Type(kind: Any), args: @[("", typeOfA), ("", typeOfB)]))
self.callBinaryOp(funct, node)
# TODO: Get rid of old code
@ -942,67 +855,74 @@ proc declareName(self: Compiler, node: Declaration) =
# slap myself 100 times with a sign saying "I'm dumb". Mark my words
self.error("cannot declare more than 16777216 variables at a time")
for name in self.findByName(node.name.token.lexeme):
if name.name.token.lexeme == node.name.token.lexeme and name.depth == self.scopeDepth and name.valueType.node.kind == varDecl:
self.error(&"attempt to redeclare '{node.name.token.lexeme}', which was previously defined in '{name.owner}' at line {name.valueType.node.token.line}")
if name.depth == self.scopeDepth and name.valueType.kind notin {Function, CustomType}:
# Trying to redeclare a variable in the same module is an error!
self.error(&"attempt to redeclare '{node.name.token.lexeme}', which was previously defined in '{name.owner}' at line {name.line}")
self.names.add(Name(depth: self.scopeDepth,
name: node.name,
isPrivate: node.isPrivate,
owner: self.currentModule,
isConst: node.isConst,
valueType: Type(kind: self.inferType(node.value).kind, node: node),
valueType: Type(kind: self.inferType(node.value).kind),
codePos: self.chunk.code.len(),
isLet: node.isLet,
isClosedOver: false))
isClosedOver: false,
line: node.token.line))
# We emit 4 No-Ops because they may become a
# StoreHeap instruction. If not, they'll be
# removed before the compiler is finished
# TODO: This may break CFI offsets
self.emitBytes([NoOp, NoOp, NoOp, NoOp])
of NodeKind.funDecl:
var node = FunDecl(node)
# TODO: Emit some optional debugging
# metadata to let the VM know where a function's
# code begins and ends (similar to what gcc does with
# CFI in object files) to build stack traces
self.names.add(Name(depth: self.scopeDepth,
isPrivate: node.isPrivate,
isConst: false,
owner: self.currentModule,
valueType: Type(kind: Function, node: node,
valueType: Type(kind: Function,
returnType: self.inferType(
node.returnType),
args: @[]),
codePos: self.chunk.code.high(),
name: node.name,
isLet: false,
isClosedOver: false))
isClosedOver: false,
line: node.token.line))
let fn = self.names[^1]
var name: Name
for argument in node.arguments:
if self.names.high() > 16777215:
self.error("cannot declare more than 16777216 variables at a time")
# wait, no LoadVar?? Yes! That's because when calling functions,
# arguments will already be on the stack so there's no need to
# load them here
self.names.add(Name(depth: self.scopeDepth + 1,
isPrivate: true,
owner: self.currentModule,
isConst: false,
name: argument.name,
valueType: nil,
codePos: self.chunk.code.len(),
isLet: false,
isClosedOver: false))
self.names[^1].valueType = self.inferType(argument.valueType)
name = Name(depth: self.scopeDepth + 1,
isPrivate: true,
owner: self.currentModule,
isConst: false,
name: argument.name,
valueType: nil,
codePos: self.chunk.code.len(),
isLet: false,
isClosedOver: false)
self.names.add(name)
name.valueType = self.inferType(argument.valueType)
if argument.mutable:
name.valueType = Type(kind: Mutable, value: name.valueType)
elif argument.isRef:
name.valueType = Type(kind: Reference, value: name.valueType)
elif argument.isPtr:
name.valueType = Type(kind: Pointer, value: name.valueType)
# We check if the argument's type is a generic
if self.names[^1].valueType == nil and argument.valueType.kind == identExpr:
if name.valueType == nil and argument.valueType.kind == identExpr:
for gen in node.generics:
if gen.name == IdentExpr(argument.valueType):
self.names[^1].valueType = Type(kind: Generic)
name.valueType = Type(kind: Generic)
break
# If it's still nil, it's an error!
if self.names[^1].valueType == nil:
self.error(&"cannot determine the type of argument '{self.names[^1].name.token.lexeme}'")
self.names[^1].valueType.node = argument.name
fn.valueType.args.add(self.names[^1].valueType)
if name.valueType == nil:
self.error(&"cannot determine the type of argument '{argument.name.token.lexeme}'")
fn.valueType.args.add((argument.name.token.lexeme, name.valueType))
else:
discard # TODO: Types, enums
@ -1418,8 +1338,7 @@ proc funDecl(self: Compiler, node: FunDecl) =
# the same function with the same name! Error!
var msg = &"multiple matching implementations of '{node.name.token.lexeme}' found:\n"
for fn in reversed(impl):
var node = FunDecl(fn.valueType.node)
msg &= &"- '{node.name.token.lexeme}' at line {node.token.line} of type {self.typeToStr(fn.valueType)}\n"
msg &= &"- '{fn.name}' at line {fn.line} of type {self.typeToStr(fn.valueType)}\n"
self.error(msg)
# We store the current function
self.currentFunction = node