Completely reworked the type inference system. Initial work on specializing the constants table in the bytecode chunk

This commit is contained in:
Mattia Giambirtone 2022-05-02 17:26:38 +02:00
parent 0493181262
commit b4fffe18aa
6 changed files with 475 additions and 322 deletions

View File

@ -22,10 +22,21 @@ type
PeonVM* = ref object
## The Peon Virtual Machine
stack: seq[PeonObject]
ip: int # Instruction pointer
sp: int # Stack pointer
ip: int # Instruction pointer
sp: int # Stack pointer
cache: array[6, PeonObject] # Singletons cache
chunk: Chunk # Piece of bytecode to execute
chunk: Chunk # Piece of bytecode to execute
proc initCache*(self: PeonVM) =
## Initializes the VM's
## singletons cache
self.cache[0] = newNil()
self.cache[1] = newBool(true)
self.cache[2] = newBool(false)
self.cache[3] = newInf(true)
self.cache[4] = newInf(false)
self.cache[5] = newNan()
proc newPeonVM*: PeonVM =
@ -35,12 +46,7 @@ proc newPeonVM*: PeonVM =
result.ip = 0
result.sp = 0
result.stack = newSeqOfCap[PeonObject](INITIAL_STACK_SIZE)
result.cache[0] = newNil()
result.cache[1] = newBool(true)
result.cache[2] = newBool(false)
result.cache[3] = newInf(true)
result.cache[4] = newInf(false)
result.cache[5] = newNan()
result.initCache()
for _ in 0..<INITIAL_STACK_SIZE:
result.stack.add(result.cache[0])
@ -80,16 +86,49 @@ proc pop(self: PeonVM): PeonObject =
return self.stack[self.sp]
proc readByte(self: PeonVM, chunk: Chunk): uint8 =
proc readByte(self: PeonVM): uint8 =
## Reads a single byte from the
## bytecode and returns it as an
## unsigned 8 bit integer
inc(self.ip)
return chunk.code[self.ip - 1]
return self.chunk.code[self.ip - 1]
proc readShort(self: PeonVM): uint16 =
## Reads two bytes from the
## bytecode and returns them
## as an unsigned 16 bit
## integer
var arr: array[2, uint8]
copyMem(result.addr, unsafeAddr(arr), sizeof(arr))
proc readBytes(self: PeonVM): uint32 =
## Reads three bytes from the
## bytecode and returns them
## as an unsigned 32 bit
## integer. Note however that
## the boundary is capped at
## 24 bits instead of 32
var arr: array[3, uint8]
copyMem(result.addr, unsafeAddr(arr), sizeof(arr))
proc readConstant(self: PeonVM): PeonObject =
## Reads a constant from the
## chunk's constant table
var arr = [self.readByte(), self.readByte(), self.readByte()]
var idx: int
copyMem(idx.addr, arr.addr, sizeof(arr))
# TODO
# result = self.chunk.consts[idx]
proc dispatch*(self: PeonVM) =
## Main bytecode dispatch loop
var instruction: OpCode
while true:
instruction = OpCode(self.readByte(self.chunk))
instruction = OpCode(self.readByte())
case instruction:
of OpCode.True:
self.push(self.getBool(true))
@ -106,6 +145,10 @@ proc dispatch*(self: PeonVM) =
return
of OpCode.NoOp:
continue
of OpCode.Pop:
discard self.pop()
of OpCode.Jump:
self.ip += int(self.readShort())
else:
discard

View File

@ -33,29 +33,47 @@ export multibyte
type
NameKind = enum
Function, Type, Var
TypeKind = enum
## An enumeration of compile-time
## types
Int8, UInt8, Int16, UInt16, Int32,
UInt32, Int64, UInt64, Float32, Float64,
Char, Byte, String, Function, CustomType,
Dict, List, Tuple, Set, Nil, Nan, Bool,
Inf
Type = ref object
## A wrapper around
## compile-time types
node: ASTNode
case kind: TypeKind:
of Function:
returnType: Type
of List, Tuple, Set:
memberType: Type
of Dict:
keyType: Type
valueType: Type
else:
discard
Name = ref object
## A compile-time wrapper around
## statically resolved names.
## Depth indicates to which scope
## the variable belongs, zero meaning
## the global one
kind: NameKind
name: IdentExpr # Name of the identifier
owner: string # Owner of the identifier
depth: int # Scope depth
isPrivate: bool # Is this name private?
isConst: bool # Is this a constant?
isLet: bool # Can this name's value be mutated?
valueType: Expression # Name's type
valueType: Type # The name's type
codePos: int # The position in the bytecode
# where this name's StoreVar
# instruction was emitted. This
# is kept so that once we detect
# this name as a closed-over variable
# we can change the StoreVar into a StoreHeap
Loop = object
## A "loop object" used
## by the compiler to emit
@ -112,7 +130,6 @@ type
closedOver: seq[IdentExpr]
proc newCompiler*(enableOptimizations: bool = true): Compiler =
## Initializes a new Compiler object
new(result)
@ -133,6 +150,8 @@ proc declaration(self: Compiler, node: ASTNode)
proc peek(self: Compiler, distance: int = 0): ASTNode
proc identifier(self: Compiler, node: IdentExpr)
proc varDecl(self: Compiler, node: VarDecl)
proc inferValueType(self: Compiler, node: ASTNode): Type
proc inferExprType(self: Compiler, node: ASTNode): Type
## End of forward declarations
## Public getter for nicer error formatting
@ -205,16 +224,21 @@ proc emitBytes(self: Compiler, bytarr: array[3, uint8]) =
self.emitByte(bytarr[2])
proc makeConstant(self: Compiler, val: ASTNode): array[3, uint8] =
proc makeConstant(self: Compiler, val: LiteralExpr): array[3, uint8] =
## Adds a constant to the current chunk's constant table
## and returns its index as a 3-byte array of uint8s
result = self.chunk.addConstant(val)
proc emitConstant(self: Compiler, obj: ASTNode) =
proc emitConstant(self: Compiler, obj: LiteralExpr) =
## Emits a LoadConstant instruction along
## with its operand
self.emitByte(LoadConstant)
case self.inferExprType(obj).kind:
of Int64:
self.emitByte(LoadInt64)
else:
discard # TODO
self.emitBytes(self.makeConstant(obj))
@ -278,6 +302,285 @@ proc patchJump(self: Compiler, offset: int) =
self.chunk.code[offset + 2] = offsetArray[1]
self.chunk.code[offset + 3] = offsetArray[2]
proc resolve(self: Compiler, name: IdentExpr,
depth: int = self.scopeDepth): Name =
## Traverses self.names backwards and returns the
## first name object with the given name. Returns
## nil when the name can't be found. This function
## has no concept of scope depth, because getStackPos
## does that job. Note that private names declared in
## other modules will not be resolved!
for obj in reversed(self.names):
if obj.name.token.lexeme == name.token.lexeme:
if obj.isPrivate and obj.owner != self.currentModule:
continue # There may be a name in the current module that
# matches, so we skip this
return obj
return nil
proc getStackPos(self: Compiler, name: IdentExpr, depth: int = self.scopeDepth): tuple[closedOver: bool, pos: int] =
## Iterates the internal list of declared names backwards and
## returns a tuple (closedOver, pos) that tells the caller whether the
## the name is to be emitted as a closure as well as its predicted
## stack/closure array position. Returns (false, -1) if the variable's
## location can not be determined at compile time (this is an error!).
## Note that private names declared in other modules will not be resolved!
var i: int = self.names.high()
for variable in reversed(self.names):
if name.name.lexeme == variable.name.name.lexeme:
if variable.isPrivate and variable.owner != self.currentModule:
continue
if variable.depth == depth or variable.depth == 0:
# variable.depth == 0 for globals!
return (false, i)
elif variable.depth > 0:
for j, closure in reversed(self.closedOver):
if closure.name.lexeme == name.name.lexeme:
return (true, j)
dec(i)
return (false, -1)
proc detectClosureVariable(self: Compiler, name: IdentExpr, depth: int = self.scopeDepth) =
## Detects if the given name is used in a local scope deeper
## than the given one and modifies the code emitted for it
## to store it as a closure variable if it is. Does nothing if the name
## hasn't been declared yet or is unreachable (for example if it's
## declared as private in another module), if the name itself is a
## global variable and if either the current or the outer scope are
## the global (outermost) one. This function must be called each
## time a name is referenced in order for closed-over variables
## to be emitted properly, otherwise the runtime may behave
## unpredictably or crash
if depth == 0 or depth - 1 == 0:
return
let entry = self.resolve(name)
if entry == nil:
return
if entry.depth == 0:
return
if entry.depth < depth:
# Ding! The given name is closed over: we need to
# change the StoreVar instruction that created this
# name entry into a StoreHeap. We don't need to change
# other pieces of code because self.identifier() already
# emits LoadHeap if it detects the variable is closed over,
# whether or not this function is called
self.closedOver.add(entry.name)
if self.closedOver.len() >= 16777216:
self.error("too many consecutive closure-over variables (max is 16777216)")
let idx = self.closedOver.high().toTriple()
self.chunk.code[entry.codePos] = StoreHeap.uint8
self.chunk.code[entry.codePos + 1] = idx[0]
self.chunk.code[entry.codePos + 2] = idx[1]
self.chunk.code[entry.codePos + 3] = idx[2]
proc toIntrinsic(name: string): Type =
## Converts a string to an intrinsic
## type if it is valid and returns nil
## otherwise
if name in ["int", "int64", "i64"]:
return Type(kind: Int64)
elif name in ["uint64", "u64"]:
return Type(kind: UInt64)
elif name in ["int32", "i32"]:
return Type(kind: Int32)
elif name in ["uint32", "u32"]:
return Type(kind: UInt32)
elif name in ["int16", "i16"]:
return Type(kind: Int16)
elif name in ["uint16", "u16"]:
return Type(kind: UInt16)
elif name in ["int8", "i8"]:
return Type(kind: Int8)
elif name in ["uint8", "u8"]:
return Type(kind: UInt8)
elif name in ["f64", "float", "float64"]:
return Type(kind: Float64)
elif name in ["f32", "float32"]:
return Type(kind: Float32)
elif name == "byte":
return Type(kind: Byte)
elif name == "char":
return Type(kind: Char)
elif name == "nan":
return Type(kind: Nan)
elif name == "nil":
return Type(kind: Nil)
elif name == "inf":
return Type(kind: Inf)
elif name == "bool":
return Type(kind: Bool)
else:
return nil
proc toIntrinsic(typ: Expression): Type =
## Gets an expression's
## intrinsic type, if possible
if typ == nil:
return nil
case typ.kind:
of identExpr:
return typ.token.lexeme.toIntrinsic()
else:
discard
proc inferValueType(self: Compiler, node: ASTNode): Type =
## Infers the type of a given literal expression
case node.kind:
of listExpr:
return Type(kind: List, memberType: self.inferExprType(ListExpr(node).valueType))
of tupleExpr:
return Type(kind: Tuple, memberType: self.inferExprType(TupleExpr(node).valueType))
of setExpr:
return Type(kind: Set, memberType: self.inferExprType(SetExpr(node).valueType))
of dictExpr:
let node = DictExpr(node)
return Type(kind: Dict, keyType: self.inferExprType(node.valueType), valueType: self.inferExprType(node.valueType))
of intExpr, binExpr, octExpr, hexExpr:
let node = LiteralExpr(node)
let size = node.token.lexeme.split("'")
if len(size) notin 1..2:
self.error("invalid state: inferValueType -> invalid size specifier (This is an internal error and most likely a bug!)")
if size.len() == 1:
return Type(kind: Int64)
let typ = size[1].toIntrinsic()
if typ != nil:
return typ
else:
self.error(&"invalid type specifier '{size[1]}' for int")
of floatExpr:
let node = LiteralExpr(node)
let size = node.token.lexeme.split("'")
if len(size) notin 1..2:
self.error("invalid state: inferValueType -> invalid size specifier (This is an internal error and most likely a bug!)")
if size.len() == 1 or size[1] == "f64":
return Type(kind: Float64)
let typ = size[1].toIntrinsic()
if typ != nil:
return typ
else:
self.error(&"invalid type specifier '{size[1]}' for float")
of nilExpr:
return Type(kind: Nil)
of trueExpr:
return Type(kind: Bool)
of falseExpr:
return Type(kind: Bool)
of nanExpr:
return Type(kind: Nan)
of infExpr:
return Type(kind: Inf)
else:
discard # TODO
proc inferExprType(self: Compiler, node: ASTNode): Type =
## Infers the type of a given expression and
## returns it
case node.kind:
of identExpr:
var node = IdentExpr(node)
var name = self.resolve(node)
return name.valueType
of unaryExpr:
return self.inferValueType(UnaryExpr(node).a)
of binaryExpr:
var node = BinaryExpr(node)
var a = self.inferExprType(node.a)
var b = self.inferExprType(node.b)
if a == nil or b == nil:
return nil
return a
of {intExpr, hexExpr, binExpr, octExpr,
strExpr, falseExpr, trueExpr, infExpr,
nanExpr, floatExpr, nilExpr, listExpr,
dictExpr, setExpr, tupleExpr
}:
return self.inferValueType(node)
else:
discard # Unreachable
proc inferDeclType(self: Compiler, node: Declaration): Type =
## Infers the type of a given declaration if it's
## not already defined and returns it
case node.kind:
of funDecl:
var node = FunDecl(node)
let resolved = self.resolve(node.name)
if resolved != nil:
return resolved.valueType
of NodeKind.varDecl:
var node = VarDecl(node)
let resolved = self.resolve(node.name)
if resolved != nil:
return resolved.valueType
else:
return self.inferExprType(node.value)
else:
return # Unreachable
proc typeToStr(self: Compiler, typ: Type): string =
case typ.kind:
of {Int8, UInt8, Int16, UInt16, Int32,
UInt32, Int64, UInt64, Float32, Float64,
Char, Byte, String, Nil, Nan, Bool, Inf}:
return ($typ.kind).toLowerAscii()
of Function:
result = "function ("
case typ.node.kind:
of funDecl:
var node = FunDecl(typ.node)
for i, argument in node.arguments:
result &= &"{argument.name.token.lexeme}: {self.typeToStr(self.inferExprType(argument.name))}"
if i < node.arguments.len():
result &= ", "
result &= ")"
of lambdaExpr:
var node = LambdaExpr(typ.node)
for i, argument in node.arguments:
result &= &"{argument.name.token.lexeme}: {argument.valueType}"
if i < node.arguments.len():
result &= ", "
result &= ")"
else:
discard # Unreachable
of List, Tuple, Set:
result &= &"{($typ.kind).toLowerAscii()}["
of Dict:
result &= &"{($typ.kind).toLowerAscii()}[]"
else:
discard
proc `==`(self, other: Type): bool =
if system.`==`(self, nil):
return system.`==`(other, nil)
elif system.`==`(other, nil):
return system.`==`(self, nil)
if self.kind != other.kind:
return false
case self.kind:
of {Int8, UInt8, Int16, UInt16, Int32,
UInt32, Int64, UInt64, Float32, Float64,
Char, Byte, String, Nil, Nan, Bool, Inf}:
return true
of Function:
discard # TODO
of List, Tuple, Set:
return self.memberType == other.memberType
of Dict:
return self.keyType == other.keyType and self.valueType == other.valueType
else:
discard
## End of utility functions
proc literal(self: Compiler, node: ASTNode) =
@ -296,7 +599,7 @@ proc literal(self: Compiler, node: ASTNode) =
of nanExpr:
self.emitByte(OpCode.Nan)
of strExpr:
self.emitConstant(node)
self.emitConstant(LiteralExpr(node))
of intExpr:
var x: int
var y = IntExpr(node)
@ -495,7 +798,7 @@ proc binary(self: Compiler, node: BinaryExpr) =
self.error(&"invalid AST node of kind {node.kind} at binary(): {node} (This is an internal error and most likely a bug!)")
proc declareName(self: Compiler, node: Declaration, kind: Expression) =
proc declareName(self: Compiler, node: Declaration) =
## Statically declares a name into the current scope
case node.kind:
of NodeKind.varDecl:
@ -510,10 +813,9 @@ proc declareName(self: Compiler, node: Declaration, kind: Expression) =
isPrivate: node.isPrivate,
owner: self.currentModule,
isConst: node.isConst,
valueType: kind,
valueType: Type(kind: self.inferExprType(node.value).kind, node: node),
codePos: self.chunk.code.len(),
isLet: node.isLet,
kind: Var))
isLet: node.isLet))
self.emitByte(StoreVar)
self.emitBytes(self.names.high().toTriple())
of NodeKind.funDecl:
@ -527,11 +829,10 @@ proc declareName(self: Compiler, node: Declaration, kind: Expression) =
isPrivate: node.isPrivate,
isConst: false,
owner: self.currentModule,
valueType: node.returnType,
valueType: Type(kind: Function, node: node),
codePos: -1,
name: node.name,
isLet: false,
kind: Function))
isLet: false))
for argument in node.arguments:
if self.names.high() > 16777215:
self.error("cannot declare more than 16777216 variables at a time")
@ -540,90 +841,14 @@ proc declareName(self: Compiler, node: Declaration, kind: Expression) =
owner: self.currentModule,
isConst: false,
name: argument.name,
valueType: kind,
valueType: self.inferExprType(argument.name),
codePos: self.chunk.code.len(),
isLet: false,
kind: Var))
isLet: false))
self.emitByte(StoreVar)
self.emitBytes(self.names.high().toTriple())
# TODO: Default arguments and unpacking
else:
discard # Unreachable
proc resolve(self: Compiler, name: IdentExpr,
depth: int = self.scopeDepth): Name =
## Traverses self.names backwards and returns the
## first name object with the given name. Returns
## nil when the name can't be found. This function
## has no concept of scope depth, because getStackPos
## does that job. Note that private names declared in
## other modules will not be resolved!
for obj in reversed(self.names):
if obj.name.token.lexeme == name.token.lexeme:
if obj.isPrivate and obj.owner != self.currentModule:
continue # There may be a name in the current module that
# matches, so we skip this
return obj
return nil
proc getStackPos(self: Compiler, name: IdentExpr, depth: int = self.scopeDepth): tuple[closedOver: bool, pos: int] =
## Iterates the internal list of declared names backwards and
## returns a tuple (closedOver, pos) that tells the caller whether the
## the name is to be emitted as a closure as well as its predicted
## stack/closure array position. Returns (false, -1) if the variable's
## location can not be determined at compile time (this is an error!).
## Note that private names declared in other modules will not be resolved!
var i: int = self.names.high()
for variable in reversed(self.names):
if name.name.lexeme == variable.name.name.lexeme:
if variable.isPrivate and variable.owner != self.currentModule:
continue
if variable.depth == depth or variable.depth == 0:
# variable.depth == 0 for globals!
return (false, i)
elif variable.depth > 0:
for j, closure in reversed(self.closedOver):
if closure.name.lexeme == name.name.lexeme:
return (true, j)
dec(i)
return (false, -1)
proc detectClosureVariable(self: Compiler, name: IdentExpr, depth: int = self.scopeDepth) =
## Detects if the given name is used in a local scope deeper
## than the given one and modifies the code emitted for it
## to store it as a closure variable if it is. Does nothing if the name
## hasn't been declared yet or is unreachable (for example if it's
## declared as private in another module), if the name itself is a
## global variable and if either the current or the outer scope are
## the global (outermost) one. This function must be called each
## time a name is referenced in order for closed-over variables
## to be emitted properly, otherwise the runtime may behave
## unpredictably or crash
if depth == 0 or depth - 1 == 0:
return
var entry = self.resolve(name)
if entry == nil:
return
if entry.depth == 0:
return
if entry.depth < depth:
# Ding! The given name is closed over: we need to
# change the StoreVar instruction that created this
# name entry into a StoreHeap. We don't need to change
# other pieces of code because self.identifier() already
# emits LoadHeap if it detects the variable is closed over,
# whether or not this function is called
self.closedOver.add(entry.name)
if self.closedOver.len() >= 16777216:
self.error("too many consecutive closure-over variables (max is 16777216)")
let idx = self.closedOver.high().toTriple()
self.chunk.code[entry.codePos] = StoreHeap.uint8
self.chunk.code[entry.codePos + 1] = idx[0]
self.chunk.code[entry.codePos + 2] = idx[1]
self.chunk.code[entry.codePos + 3] = idx[2]
proc identifier(self: Compiler, node: IdentExpr) =
@ -824,98 +1049,6 @@ proc whileStmt(self: Compiler, node: WhileStmt) =
self.emitLoop(start)
proc inferValueType(self: Compiler, node: ASTNode): ASTNode =
## Infers the type of a given literal expression
case node.kind:
of listExpr:
return ListExpr(node).valueType
of dictExpr:
# It's not important that we don't use
# valueType here, we just need to return
# a non-nil value so we don't error out
return DictExpr(node).keyType
of intExpr, floatExpr, binExpr, octExpr, hexExpr:
var node = LiteralExpr(node)
var size = node.token.lexeme.split("'")
if len(size) notin 1..2:
self.error("invalid state: inferValueType -> invalid size specifier")
elif size.len() == 1:
return newIdentExpr(Token(lexeme: "int"))
elif size[1] in ["u64", "i64", "u32", "i32", "f64", "f32", "i32", "u32", "u8", "i8"]:
if size[1].startsWith("u"):
size[1] = size[1].strip(true, false, {'u'})
size[1] = &"uint{size[1]}"
elif size[1].startsWith("i"):
size[1] = size[1].strip(true, false, {'i'})
size[1] = &"int{size[1]}"
elif size[1].startsWith("f"):
size[1] = size[1].strip(true, false, {'f'})
size[1] = &"float{size[1]}"
return newIdentExpr(Token(lexeme: size[1]))
else:
self.error(&"invalid type specifier '{size[1]}' for '{size[0]}'")
return newIdentExpr(Token(lexeme: "int"))
of nilExpr:
return newIdentExpr(Token(lexeme: "nil"))
of trueExpr:
return newIdentExpr(Token(lexeme: "true"))
of falseExpr:
return newIdentExpr(Token(lexeme: "false"))
of nanExpr:
return newIdentExpr(Token(lexeme: "nan"))
of infExpr:
return newIdentExpr(Token(lexeme: "inf"))
else:
discard # TODO
proc inferExprType(self: Compiler, node: ASTNode): ASTNode =
## Infers the type of a given expression and
## returns it
case node.kind:
of identExpr:
var node = IdentExpr(node)
var name = self.resolve(node)
if name == nil:
return nil
return name.valueType
of unaryExpr:
return self.inferValueType(UnaryExpr(node).a)
of binaryExpr:
var node = BinaryExpr(node)
var a = self.inferExprType(node.a)
var b = self.inferExprType(node.b)
if a == nil or b == nil:
return nil
return a
of {intExpr, hexExpr, binExpr, octExpr,
strExpr, falseExpr, trueExpr, infExpr,
nanExpr, floatExpr, nilExpr, listExpr,
dictExpr, setExpr, tupleExpr
}:
return self.inferValueType(node)
else:
discard # Unreachable
proc inferDeclType(self: Compiler, node: Declaration): ASTNode =
## Infers the type of a given declaration if it's
## not already defined and returns it
case node.kind:
of funDecl:
var node = FunDecl(node)
if node.returnType != nil:
return node.returnType
of NodeKind.varDecl:
var node = VarDecl(node)
if node.valueType != nil:
return node.valueType
else:
return self.inferExprType(node.value)
else:
return # Unreachable
proc expression(self: Compiler, node: ASTNode) =
## Compiles all expressions
if self.inferExprType(node) == nil:
@ -985,14 +1118,15 @@ proc returnStmt(self: Compiler, node: ReturnStmt) =
## Compiles return statements. An empty return
## implicitly returns nil
let returnType = self.inferExprType(node.value)
let typ = self.inferDeclType(self.currentFunction)
if returnType == nil and self.currentFunction.returnType != nil:
self.error(&"expected return value of type '{self.currentFunction.returnType.token.lexeme}', but expression has no type")
elif self.currentFunction.returnType == nil:
if node.value.kind != nilExpr:
self.error("non-nil return value is not allowed in functions without an explicit return type")
else:
if returnType.token.lexeme != self.currentFunction.returnType.token.lexeme:
self.error(&"expected return value of type '{self.currentFunction.returnType.token.lexeme}', got '{returnType.token.lexeme}' instead")
if returnType != typ:
self.error(&"expected return value of type '{self.typeToStr(typ)}', got '{self.typeToStr(returnType)}' instead")
self.expression(node.value)
self.emitByte(OpCode.Return)
@ -1099,11 +1233,14 @@ proc statement(self: Compiler, node: ASTNode) =
proc varDecl(self: Compiler, node: VarDecl) =
## Compiles variable declarations
let kind = self.inferDeclType(node)
if kind == nil:
self.error(&"Cannot determine the type of '{node.name.token.lexeme}'")
let kind = node.valueType.toIntrinsic()
let typ = self.inferExprType(node.value)
if kind == nil and typ == nil:
self.error(&"cannot determine the type of '{node.name.token.lexeme}'")
elif typ != kind and kind != nil:
self.error(&"expected value of type '{self.typeToStr(kind)}', but '{node.name.token.lexeme}' is of type '{self.typeToStr(typ)}'")
self.expression(node.value)
self.declareName(node, IdentExpr(kind))
self.declareName(node)
proc funDecl(self: Compiler, node: FunDecl) =
@ -1115,7 +1252,7 @@ proc funDecl(self: Compiler, node: FunDecl) =
# A function's code is just compiled linearly
# and then jumped over
let jmp = self.emitJump(JumpForwards)
self.declareName(node, node.returnType)
self.declareName(node)
# Since the deferred array is a linear
# sequence of instructions and we want

View File

@ -146,7 +146,7 @@ type
keyType*: IdentExpr
valueType*: IdentExpr
IdentExpr* = ref object of Expression
IdentExpr* = ref object of LiteralExpr
name*: Token
GroupingExpr* = ref object of Expression
@ -184,7 +184,7 @@ type
LambdaExpr* = ref object of Expression
body*: Statement
arguments*: seq[tuple[name: IdentExpr, valueType: Expression]]
arguments*: seq[tuple[name: IdentExpr, valueType: Expression, mutable: bool]]
defaults*: seq[Expression]
isGenerator*: bool
isAsync*: bool
@ -263,7 +263,7 @@ type
FunDecl* = ref object of Declaration
name*: IdentExpr
body*: Statement
arguments*: seq[tuple[name: IdentExpr, valueType: Expression]]
arguments*: seq[tuple[name: IdentExpr, valueType: Expression, mutable: bool]]
defaults*: seq[Expression]
isAsync*: bool
isGenerator*: bool
@ -271,20 +271,13 @@ type
returnType*: Expression
proc newASTNode*(kind: NodeKind, token: Token): ASTNode =
## Initializes a new generic ASTNode object
new(result)
result.kind = kind
result.token = token
proc isConst*(self: ASTNode): bool =
## Returns true if the given
## AST node represents a value
## of constant type. All integers,
## strings and singletons count as
## constants, as well as collections
## comprised only of those types.
## comprised only of those types
case self.kind:
of intExpr, hexExpr, binExpr, octExpr, strExpr, falseExpr, trueExpr, infExpr, nanExpr, floatExpr, nilExpr:
return true
@ -303,12 +296,20 @@ proc isConst*(self: ASTNode): bool =
proc isLiteral*(self: ASTNode): bool {.inline.} =
## Returns if the AST node represents a literal
self.kind in {intExpr, hexExpr, binExpr, octExpr,
strExpr, falseExpr, trueExpr, infExpr,
nanExpr, floatExpr, nilExpr, listExpr,
dictExpr, setExpr, tupleExpr
}
## AST node constructors
proc newASTNode*(kind: NodeKind, token: Token): ASTNode =
## Initializes a new generic ASTNode object
new(result)
result.kind = kind
result.token = token
proc newIntExpr*(literal: Token): IntExpr =
result = IntExpr(kind: intExpr)
@ -363,6 +364,7 @@ proc newIdentExpr*(name: Token): IdentExpr =
result = IdentExpr(kind: identExpr)
result.name = name
result.token = name
result.literal = name
proc newGroupingExpr*(expression: Expression, token: Token): GroupingExpr =
@ -371,7 +373,7 @@ proc newGroupingExpr*(expression: Expression, token: Token): GroupingExpr =
result.token = token
proc newLambdaExpr*(arguments: seq[tuple[name: IdentExpr, valueType: Expression]], defaults: seq[Expression], body: Statement,
proc newLambdaExpr*(arguments: seq[tuple[name: IdentExpr, valueType: Expression, mutable: bool]], defaults: seq[Expression], body: Statement,
isGenerator: bool, isAsync: bool, token: Token, returnType: Expression): LambdaExpr =
result = LambdaExpr(kind: lambdaExpr)
result.body = body
@ -591,7 +593,7 @@ proc newVarDecl*(name: IdentExpr, value: Expression, isConst: bool = false,
result.pragmas = pragmas
proc newFunDecl*(name: IdentExpr, arguments: seq[tuple[name: IdentExpr, valueType: Expression]], defaults: seq[Expression],
proc newFunDecl*(name: IdentExpr, arguments: seq[tuple[name: IdentExpr, valueType: Expression, mutable: bool]], defaults: seq[Expression],
body: Statement, isAsync, isGenerator: bool,
isPrivate: bool, token: Token, pragmas: seq[Token],
returnType: Expression): FunDecl =

View File

@ -42,7 +42,7 @@ type
## are 3 and 4"
## This is more efficient than using the naive approach, which would encode
## the same line number multiple times and waste considerable amounts of space.
consts*: seq[ASTNode]
consts*: seq[LiteralExpr]
code*: seq[uint8]
lines*: seq[int]
reuseConsts*: bool
@ -62,8 +62,19 @@ type
# or 24 bit numbers that are defined statically
# at compilation time into the bytecode.
LoadConstant = 0u8, # Pushes constant at position x in the constant table onto the stack
## Constant opcodes (each of them pushes a constant singleton on the stack)
# These push a constant onto the stack
LoadInt64 = 0u8,
LoadUInt64,
LoadInt32,
LoadUInt32,
LoadInt16,
LoadUInt16,
LoadInt8,
LoadUInt8,
LoadFloat64,
LoadFloat32,
LoadString,
## Singleton opcodes (each of them pushes a constant singleton on the stack)
Nil,
True,
False,
@ -127,7 +138,12 @@ const simpleInstructions* = {OpCode.Return, OpCode.Nil,
OpCode.NoOp}
# Constant instructions are instructions that operate on the bytecode constant table
const constantInstructions* = {LoadConstant, }
const constantInstructions* = {LoadInt64, LoadUInt64,
LoadInt32, LoadUInt32,
LoadInt16, LoadUInt16,
LoadInt8, LoadUInt8,
LoadFloat64, LoadFloat32,
LoadString}
# Stack triple instructions operate on the stack at arbitrary offsets and pop arguments off of it in the form
# of 24 bit integers
@ -207,7 +223,7 @@ proc getLine*(self: Chunk, idx: int): int =
raise newException(IndexDefect, "index out of range")
proc findOrAddConstant(self: Chunk, constant: ASTNode): int =
proc findOrAddConstant(self: Chunk, constant: LiteralExpr): int =
## Small optimization function that reuses the same constant
## if it's already been written before (only if self.reuseConsts
## equals true)
@ -219,8 +235,7 @@ proc findOrAddConstant(self: Chunk, constant: ASTNode): int =
if c.kind != constant.kind:
continue
if constant.isConst():
var c = LiteralExpr(c)
var constant = LiteralExpr(constant)
var constant = constant
if c.literal.lexeme == constant.literal.lexeme:
# This wouldn't work for stuff like 2e3 and 2000.0, but those
# forms are collapsed in the compiler before being written
@ -237,7 +252,7 @@ proc findOrAddConstant(self: Chunk, constant: ASTNode): int =
result = self.consts.high()
proc addConstant*(self: Chunk, constant: ASTNode): array[3, uint8] =
proc addConstant*(self: Chunk, constant: LiteralExpr): array[3, uint8] =
## Writes a constant to a chunk. Returns its index casted to a 3-byte
## sequence (array). Constant indexes are reused if a constant is used
## more than once and self.reuseConsts equals true

View File

@ -859,11 +859,46 @@ proc varDecl(self: Parser, isLet: bool = false, isConst: bool = false): Declarat
discard # Unreachable
proc parseDeclArguments(self: Parser, arguments: var seq[tuple[name: IdentExpr, valueType: Expression, mutable: bool]],
parameter: var tuple[name: IdentExpr, valueType: Expression, mutable: bool],
defaults: var seq[Expression]) =
while not self.check(RightParen):
if arguments.len > 255:
self.error("cannot have more than 255 arguments in function declaration")
self.expect(Identifier, "expecting parameter name")
parameter.name = newIdentExpr(self.peek(-1))
if self.match(Colon):
parameter.mutable = false
if self.match(Var):
parameter.mutable = true
parameter.valueType = self.expression()
for i in countdown(arguments.high(), 0):
if arguments[i].valueType != nil:
break
arguments[i].valueType = parameter.valueType
arguments[i].mutable = parameter.mutable
else:
parameter.valueType = nil
if parameter in arguments:
self.error("duplicate parameter name in function declaration")
arguments.add(parameter)
if self.match(Equal):
defaults.add(self.expression())
elif defaults.len() > 0:
self.error("positional argument cannot follow default argument in function declaration")
if not self.match(Comma):
break
self.expect(RightParen)
for argument in arguments:
if argument.valueType == nil:
self.error(&"missing type declaration for '{argument.name.token.lexeme}' in function declaration")
proc funDecl(self: Parser, isAsync: bool = false, isGenerator: bool = false, isLambda: bool = false, isOperator: bool = false): Declaration =
## Parses functions, coroutines, generators, anonymous functions and custom operators
let tok = self.peek(-1)
var enclosingFunction = self.currentFunction
var arguments: seq[tuple[name: IdentExpr, valueType: Expression]] = @[]
var arguments: seq[tuple[name: IdentExpr, valueType: Expression, mutable: bool]] = @[]
var defaults: seq[Expression] = @[]
var returnType: Expression
if not isLambda and self.check(Identifier):
@ -898,11 +933,6 @@ proc funDecl(self: Parser, isAsync: bool = false, isGenerator: bool = false, isL
returnType=nil)
elif not isOperator:
self.error("funDecl: invalid state")
# Beware: lots of code duplication ahead. I agree,
# it's disgusting, but each case of argument parsing
# is specialized for a given context and is hard to
# generalize elegantly into a single function that
# makes sense
if self.match(Colon):
# Function has explicit return type
if self.match([Function, Coroutine, Generator]):
@ -911,103 +941,33 @@ proc funDecl(self: Parser, isAsync: bool = false, isGenerator: bool = false, isL
# the type declaration for a function lacks
# the braces that would qualify it as an
# expression
var arguments: seq[tuple[name: IdentExpr, valueType: Expression]] = @[]
var arguments: seq[tuple[name: IdentExpr, valueType: Expression, mutable: bool]] = @[]
var defaults: seq[Expression] = @[]
returnType = newLambdaExpr(arguments, defaults, nil, isGenerator=self.peek(-1).kind == Generator,
isAsync=self.peek(-1).kind == Coroutine,
token=self.peek(-1), returnType=nil)
var parameter: tuple[name: IdentExpr, valueType: Expression]
var parameter: tuple[name: IdentExpr, valueType: Expression, mutable: bool]
if self.match(LeftParen):
while not self.check(RightParen):
if arguments.len > 255:
self.error("cannot have more than 255 arguments in function declaration")
self.expect(Identifier, "expecting parameter name")
parameter.name = newIdentExpr(self.peek(-1))
if self.match(Colon):
parameter.valueType = self.expression()
for i in countdown(arguments.high(), 0):
if arguments[i].valueType != nil:
break
arguments[i].valueType = parameter.valueType
else:
parameter.valueType = nil
if parameter in arguments:
self.error("duplicate parameter name in function declaration")
arguments.add(parameter)
if self.match(Equal):
defaults.add(self.expression())
elif defaults.len() > 0:
self.error("positional argument cannot follow default argument in function declaration")
if not self.match(Comma):
break
self.expect(RightParen)
for argument in arguments:
if argument.valueType == nil:
self.error(&"missing type declaration for '{argument.name.token.lexeme}' in function declaration")
self.parseDeclArguments(arguments, parameter, defaults)
if self.match(Colon):
LambdaExpr(returnType).returnType = self.expression()
else:
returnType = self.expression()
if not self.match(LeftBrace):
self.expect(LeftParen)
var parameter: tuple[name: IdentExpr, valueType: Expression]
while not self.check(RightParen):
if arguments.len > 255:
self.error("cannot have more than 255 arguments in function declaration")
self.expect(Identifier, "expecting parameter name")
parameter.name = newIdentExpr(self.peek(-1))
if self.match(Colon):
parameter.valueType = self.expression()
for i in countdown(arguments.high(), 0):
if arguments[i].valueType != nil:
break
arguments[i].valueType = parameter.valueType
else:
parameter.valueType = nil
if parameter in arguments:
self.error("duplicate parameter name in function declaration")
arguments.add(parameter)
if self.match(Equal):
defaults.add(self.expression())
elif defaults.len() > 0:
self.error("positional argument cannot follow default argument in function declaration")
if not self.match(Comma):
break
self.expect(RightParen)
var parameter: tuple[name: IdentExpr, valueType: Expression, mutable: bool]
self.parseDeclArguments(arguments, parameter, defaults)
if self.match(Colon):
# Function's return type
if self.match([Function, Coroutine, Generator]):
var arguments: seq[tuple[name: IdentExpr, valueType: Expression]] = @[]
var arguments: seq[tuple[name: IdentExpr, valueType: Expression, mutable: bool]] = @[]
var defaults: seq[Expression] = @[]
returnType = newLambdaExpr(arguments, defaults, nil, isGenerator=self.peek(-1).kind == Generator,
isAsync=self.peek(-1).kind == Coroutine,
token=self.peek(-1), returnType=nil)
var parameter: tuple[name: IdentExpr, valueType: Expression]
var parameter: tuple[name: IdentExpr, valueType: Expression, mutable: bool]
if self.match(LeftParen):
while not self.check(RightParen):
if arguments.len > 255:
self.error("cannot have more than 255 arguments in function declaration")
self.expect(Identifier, "expecting parameter name")
parameter.name = newIdentExpr(self.peek(-1))
if self.match(Colon):
parameter.valueType = self.expression()
for i in countdown(arguments.high(), 0):
if arguments[i].valueType != nil:
break
arguments[i].valueType = parameter.valueType
else:
parameter.valueType = nil
if parameter in arguments:
self.error("duplicate parameter name in function declaration")
arguments.add(parameter)
if self.match(Equal):
defaults.add(self.expression())
elif defaults.len() > 0:
self.error("positional argument cannot follow default argument in function declaration")
if not self.match(Comma):
break
self.expect(RightParen)
for argument in arguments:
if argument.valueType == nil:
self.error(&"missing type declaration for '{argument.name.token.lexeme}' in function declaration")
self.parseDeclArguments(arguments, parameter, defaults)
if self.match(Colon):
LambdaExpr(returnType).returnType = self.expression()
else:

View File

@ -29,7 +29,7 @@ proc getLineEditor: LineEditor
const debugLexer = false
const debugParser = false
const debugCompiler = true
const debugSerializer = true
const debugSerializer = false
when isMainModule:
@ -86,16 +86,12 @@ when isMainModule:
echo "\nBytecode disassembler output below:\n"
disassembleChunk(compiled, "<stdin>")
echo ""
serializer.dumpToFile(compiled, input, "<stdin>", "stdin.pbc")
serializedRaw = serializer.dumpBytes(compiled, input, "<stdin>")
serialized = serializer.loadFile("stdin.pbc")
when debugSerializer:
echo "Serialization step: "
echo "Dumping bytecode to 'stdin.pbc'\n"
serializer.dumpToFile(compiled, input, "<stdin>", "stdin.pbc")
serializedRaw = serializer.dumpBytes(compiled, input, "<stdin>")
echo "Loading 'stdin.pbc'\n"
serialized = serializer.loadFile("stdin.pbc")
echo "Deserialized 'stdin.pbc':"
stdout.write("\t")
echo &"""Raw hex output: {serializedRaw.mapIt(toHex(it)).join("").toLowerAscii()}"""
echo ""