peon/src/frontend/compiler.nim

1466 lines
57 KiB
Nim
Raw Normal View History

# Copyright 2022 Mattia Giambirtone & All Contributors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import meta/token
import meta/ast
import meta/errors
import ../config
import ../util/multibyte
import strformat
import algorithm
import parseutils
import strutils
import sequtils
import os
export ast
export token
export multibyte
type
TypeKind = enum
## An enumeration of compile-time
## types
Int8, UInt8, Int16, UInt16, Int32,
UInt32, Int64, UInt64, Float32, Float64,
Char, Byte, String, Function, CustomType,
Nil, Nan, Bool, Inf, Typedesc, Generic,
Mutable, Reference, Pointer
Any # Any is used internally in a few cases,
# for example when looking for operators
# when only the type of the arguments is of
# interest
Type = ref object
## A wrapper around
## compile-time types
case kind: TypeKind:
of Function:
name: string
isLambda: bool
2022-05-29 17:04:19 +02:00
isGenerator: bool
isCoroutine: bool
args: seq[tuple[name: string, kind: Type]]
returnType: Type
of Mutable, Reference, Pointer:
value: Type
else:
discard
# This way we don't have recursive dependency issues
import meta/bytecode
export bytecode
type
Name = ref object
## A compile-time wrapper around
## statically resolved names
# Name of the identifier
name: IdentExpr
# Owner of the identifier (module)
owner: string
# Scope depth
depth: int
# Is this name private?
isPrivate: bool
# Is this a constant?
isConst: bool
# Can this name's value be mutated?
isLet: bool
# The name's type
valueType: Type
# For functions, this marks where the function's
# code begins. For variables, this stores where
# their StoreVar/StoreHeap instruction was emitted
codePos: int
# Is the name closed over (i.e. used in a closure)?
isClosedOver: bool
# Where is this node declared in the file?
line: int
Loop = object
## A "loop object" used
## by the compiler to emit
## appropriate jump offsets
## for continue and break
## statements
# Position in the bytecode where the loop starts
start: int
# Scope depth where the loop is located
depth: int
# Absolute jump offsets into our bytecode that we need to
# patch. Used for break statements
breakPos: seq[int]
Compiler* = ref object
## A wrapper around the Peon compiler's state
# The bytecode chunk where we write code to
chunk: Chunk
# The output of our parser (AST)
ast: seq[Declaration]
# The current AST node we're looking at
current: int
# The current file being compiled (used only for
# error reporting)
file: string
# Compile-time "simulation" of the stack at
# runtime to load variables that have stack
# behavior more efficiently
names: seq[Name]
# Beginning of stack frames for function calls
frames: seq[int]
# The current scope depth. If > 0, we're
# in a local scope, otherwise it's global
scopeDepth: int
# The current function being compiled
currentFunction: FunDecl
# Are optimizations turned on?
enableOptimizations: bool
# The current loop being compiled (used to
# keep track of where to jump)
currentLoop: Loop
# The current module being compiled
# (used to restrict access to statically
# defined variables at compile time)
currentModule: string
# Each time a defer statement is
# compiled, its code is emitted
# here. Later, if there is any code
# to defer in the current function,
# funDecl will wrap the function's code
# inside an implicit try/finally block
# and add this code in the finally branch.
# This sequence is emptied each time a
# function declaration is compiled and stores only
# deferred code for the current function (may
# be empty)
deferred: seq[uint8]
# List of closed-over variables
closedOver: seq[Name]
2022-05-29 17:04:19 +02:00
proc `$`(self: Name): string =
## Stringifies a name object
result &= &"Name(name='{self.name}', depth={self.depth}, owner='{self.owner}', private={self.isPrivate}, let={self.isLet}, const={self.isConst}"
result &= &", pos={self.codePos}, closure={self.isClosedOver}, line={self.line})"
2022-04-11 14:41:20 +02:00
proc newCompiler*(enableOptimizations: bool = true): Compiler =
## Initializes a new Compiler object
new(result)
result.ast = @[]
result.current = 0
result.file = ""
result.names = @[]
result.scopeDepth = 0
result.currentFunction = nil
result.enableOptimizations = enableOptimizations
result.currentModule = ""
result.frames = @[]
## Forward declarations
proc expression(self: Compiler, node: Expression)
proc statement(self: Compiler, node: Statement)
proc declaration(self: Compiler, node: Declaration)
proc peek(self: Compiler, distance: int = 0): ASTNode
proc identifier(self: Compiler, node: IdentExpr)
proc varDecl(self: Compiler, node: VarDecl)
proc inferType(self: Compiler, node: LiteralExpr): Type
proc inferType(self: Compiler, node: Expression): Type
proc findByName(self: Compiler, name: string): seq[Name]
proc findByType(self: Compiler, name: string, kind: Type): seq[Name]
proc compareTypes(self: Compiler, a, b: Type): bool
proc patchReturnAddress(self: Compiler, retAddr: int)
## End of forward declarations
## Public getter for nicer error formatting
proc getCurrentNode*(self: Compiler): ASTNode = (if self.current >=
self.ast.len(): self.ast[^1] else: self.ast[self.current - 1])
proc getCurrentFunction*(self: Compiler): Declaration {.inline.} = self.currentFunction
proc getFile*(self: Compiler): string {.inline.} = self.file
proc getModule*(self: Compiler): string {.inline.} = self.currentModule
## Utility functions
proc peek(self: Compiler, distance: int = 0): ASTNode =
## Peeks at the AST node at the given distance.
## If the distance is out of bounds, the last
## AST node in the tree is returned. A negative
## distance may be used to retrieve previously
## consumed AST nodes
if self.ast.high() == -1 or self.current + distance > self.ast.high() or
self.current + distance < 0:
result = self.ast[^1]
else:
result = self.ast[self.current + distance]
proc done(self: Compiler): bool =
## Returns true if the compiler is done
## compiling, false otherwise
result = self.current > self.ast.high()
proc error(self: Compiler, message: string) {.raises: [CompileError].} =
## Raises a CompileError exception
raise CompileError(msg: message, node: self.getCurrentNode(), file: self.file, module: self.currentModule)
proc step(self: Compiler): ASTNode =
## Steps to the next node and returns
## the consumed one
result = self.peek()
if not self.done():
self.current += 1
proc emitByte(self: Compiler, byt: OpCode | uint8) =
## Emits a single byte, writing it to
## the current chunk being compiled
when DEBUG_TRACE_COMPILER:
echo &"DEBUG - Compiler: Emitting {$byt}"
self.chunk.write(uint8 byt, self.peek().token.line)
proc emitBytes(self: Compiler, bytarr: openarray[OpCode | uint8]) =
## Handy helper method to write arbitrary bytes into
## the current chunk, calling emitByte on each of its
## elements
for b in bytarr:
self.emitByte(b)
proc makeConstant(self: Compiler, val: Expression, typ: Type): array[3, uint8] =
## Adds a constant to the current chunk's constant table
## and returns its index as a 3-byte array of uint8s
var v: int
discard parseInt(val.token.lexeme, v)
case typ.kind:
of UInt8, Int8:
result = self.chunk.writeConstant([uint8(v)])
of Int16, UInt16:
result = self.chunk.writeConstant(v.toDouble())
of Int32, UInt32:
result = self.chunk.writeConstant(v.toQuad())
of Int64, UInt64:
result = self.chunk.writeConstant(v.toLong())
else:
discard
proc emitConstant(self: Compiler, obj: Expression, kind: Type) =
## Emits a constant instruction along
## with its operand
case self.inferType(obj).kind:
of Int64:
self.emitByte(LoadInt64)
of UInt64:
self.emitByte(LoadUInt64)
of Int32:
self.emitByte(LoadInt32)
else:
discard # TODO
self.emitBytes(self.makeConstant(obj, kind))
proc emitJump(self: Compiler, opcode: OpCode): int =
## Emits a dummy jump offset to be patched later. Assumes
## the largest offset (emits 4 bytes, one for the given jump
## opcode, while the other 3 are for the jump offset, which
## is set to the maximum unsigned 24 bit integer). If the shorter
## 16 bit alternative is later found to be better suited, patchJump
## will fix this. Returns the absolute index into the chunk's
## bytecode array where the given placeholder instruction was written
self.emitByte(opcode)
self.emitBytes((0xffffff).toTriple())
result = self.chunk.code.len() - 4
proc patchJump(self: Compiler, offset: int) =
## Patches a previously emitted relative
## jump using emitJump. Since emitJump assumes
## a long jump, this also shrinks the jump
## offset and changes the bytecode instruction
## if possible (i.e. jump is in 16 bit range),
## but the converse is also true (i.e. it might
## change a regular jump into a long one)
var jump: int = self.chunk.code.len() - offset
if jump > 16777215:
self.error("cannot jump more than 16777216 bytecode instructions")
if jump < uint16.high().int:
case OpCode(self.chunk.code[offset]):
of LongJumpForwards:
self.chunk.code[offset] = JumpForwards.uint8()
# We do this because a relative jump
# does not take its argument into account
# because it is hardcoded in the bytecode
# itself
jump -= 4
of LongJumpBackwards:
self.chunk.code[offset] = JumpBackwards.uint8()
jump -= 4
of LongJumpIfFalse:
self.chunk.code[offset] = JumpIfFalse.uint8()
of LongJumpIfFalsePop:
self.chunk.code[offset] = JumpIfFalsePop.uint8()
of LongJumpIfFalseOrPop:
self.chunk.code[offset] = JumpIfFalseOrPop.uint8()
of JumpForwards, JumpBackwards:
jump -= 3
else:
discard
self.chunk.code.delete(offset + 1) # Discards the first 8 bits of the jump offset (which are empty)
let offsetArray = (jump - 1).toDouble() # -1 since we got rid of 1 byte!
self.chunk.code[offset + 1] = offsetArray[0]
self.chunk.code[offset + 2] = offsetArray[1]
else:
case OpCode(self.chunk.code[offset]):
of JumpForwards:
self.chunk.code[offset] = LongJumpForwards.uint8()
jump -= 3
of JumpBackwards:
self.chunk.code[offset] = LongJumpBackwards.uint8()
jump -= 3
of JumpIfFalse:
self.chunk.code[offset] = LongJumpIfFalse.uint8()
of JumpIfFalsePop:
self.chunk.code[offset] = LongJumpIfFalsePop.uint8()
of JumpIfFalseOrPop:
self.chunk.code[offset] = LongJumpIfFalseOrPop.uint8()
of LongJumpForwards, LongJumpBackwards:
jump -= 4
else:
discard
let offsetArray = jump.toTriple()
self.chunk.code[offset + 1] = offsetArray[0]
self.chunk.code[offset + 2] = offsetArray[1]
self.chunk.code[offset + 3] = offsetArray[2]
proc resolve(self: Compiler, name: IdentExpr,
depth: int = self.scopeDepth): Name =
## Traverses self.names backwards and returns the
## first name object with the given name. Returns
## nil when the name can't be found. This function
## has no concept of scope depth, because getStackPos
## does that job. Note that private names declared in
## other modules will not be resolved!
for obj in reversed(self.names):
if obj.name.token.lexeme == name.token.lexeme:
if obj.isPrivate and obj.owner != self.currentModule:
continue # There may be a name in the current module that
# matches, so we skip this
return obj
return nil
proc getStackPos(self: Compiler, name: IdentExpr,
depth: int = self.scopeDepth): tuple[closedOver: bool, pos: int] =
## Iterates the internal list of declared names backwards and
## returns a tuple (closedOver, pos) that tells the caller whether the
## the name is to be emitted as a closure as well as its predicted
## stack/closure array position. Returns (false, -1) if the variable's
## location can not be determined at compile time (this is an error!).
## Note that private names declared in other modules will not be resolved!
var i: int = self.names.high()
for variable in reversed(self.names):
if name.name.lexeme == variable.name.name.lexeme:
if variable.isPrivate and variable.owner != self.currentModule:
continue
elif variable.depth == depth or variable.depth == 0:
# variable.depth == 0 for globals!
return (false, i)
elif variable.depth > 0:
var j: int = self.closedOver.high()
for closure in reversed(self.closedOver):
if closure.name.token.lexeme == name.name.lexeme:
return (true, j)
inc(j)
dec(i)
return (false, -1)
proc detectClosureVariable(self: Compiler, name: Name,
depth: int = self.scopeDepth) =
## Detects if the given name is used in a local scope deeper
## than the given one and modifies the code emitted for it
## to store it as a closure variable if it is. Does nothing if the name
## hasn't been declared yet or is unreachable (for example if it's
## declared as private in another module). This function must be called
## each time a name is referenced in order for closed-over variables
## to be emitted properly, otherwise the runtime may behave
## unpredictably or crash
if name == nil:
return
if name.depth < depth:
# Ding! The given name is closed over: we need to
# change the NoOp instructions that self.declareName
# put in place for us into a StoreHeap. We don't need to change
# other pieces of code because self.identifier() already
# emits LoadHeap if it detects the variable is closed over,
# whether or not this function is called
self.closedOver.add(name)
let idx = self.closedOver.high().toTriple()
if self.closedOver.len() >= 16777216:
self.error("too many consecutive closed-over variables (max is 16777216)")
self.chunk.code[name.codePos] = StoreHeap.uint8
self.chunk.code[name.codePos + 1] = idx[0]
self.chunk.code[name.codePos + 2] = idx[1]
self.chunk.code[name.codePos + 3] = idx[2]
name.isClosedOver = true
proc compareTypes(self: Compiler, a, b: Type): bool =
## Compares two type objects
## for equality (works with nil!)
# The nil code here is for void functions (when
# we compare their return types)
if a == nil:
return b == nil
elif b == nil:
return a == nil
elif a.kind != b.kind:
# Next, we see the type discriminant:
# If they're different, then they can't
# be the same type!
return false
case a.kind:
# If all previous checks pass, it's time
# to go through each possible type peon
# supports and compare it
of Int8, UInt8, Int16, UInt16, Int32,
UInt32, Int64, UInt64, Float32, Float64,
Char, Byte, String, Nil, Nan, Bool, Inf:
# A value type's type is always equal to
# another one's
return true
of Reference, Pointer, Mutable:
# Here we already know that both
# a and b are of either of the three
# types in this branch, so we just need
# to compare their values
return self.compareTypes(a.value, b.value)
of Function:
# Functions are a bit trickier
if a.args.len() != b.args.len():
return false
elif not self.compareTypes(a.returnType, b.returnType):
2022-05-29 23:01:36 +02:00
if a.returnType != nil and b.returnType != nil:
if a.returnType.kind != Any and b.returnType.kind != Any:
return false
return false
for (argA, argB) in zip(a.args, b.args):
if not self.compareTypes(argA.kind, argB.kind):
return false
return true
else:
discard
proc toIntrinsic(name: string): Type =
## Converts a string to an intrinsic
## type if it is valid and returns nil
## otherwise
if name in ["int", "int64", "i64"]:
return Type(kind: Int64)
elif name in ["uint64", "u64"]:
return Type(kind: UInt64)
elif name in ["int32", "i32"]:
return Type(kind: Int32)
elif name in ["uint32", "u32"]:
return Type(kind: UInt32)
elif name in ["int16", "i16"]:
return Type(kind: Int16)
elif name in ["uint16", "u16"]:
return Type(kind: UInt16)
elif name in ["int8", "i8"]:
return Type(kind: Int8)
elif name in ["uint8", "u8"]:
return Type(kind: UInt8)
elif name in ["f64", "float", "float64"]:
return Type(kind: Float64)
elif name in ["f32", "float32"]:
return Type(kind: Float32)
elif name == "byte":
return Type(kind: Byte)
elif name == "char":
return Type(kind: Char)
elif name == "nan":
return Type(kind: Nan)
elif name == "nil":
return Type(kind: Nil)
elif name == "inf":
return Type(kind: Inf)
elif name == "bool":
return Type(kind: Bool)
elif name == "type":
return Type(kind: Typedesc)
else:
return nil
proc toIntrinsic(self: Compiler, typ: Expression): Type =
## Gets an expression's intrinsic type, if
## possible
if typ == nil:
return nil
case typ.kind:
of trueExpr, falseExpr, intExpr, floatExpr:
return typ.token.lexeme.toIntrinsic()
of identExpr:
let inferred = self.inferType(typ)
if inferred == nil:
return typ.token.lexeme.toIntrinsic()
return inferred
else:
discard
proc inferType(self: Compiler, node: LiteralExpr): Type =
## Infers the type of a given literal expression
if node == nil:
return nil
case node.kind:
of intExpr, binExpr, octExpr, hexExpr:
let size = node.token.lexeme.split("'")
if len(size) notin 1..2:
self.error("invalid state: inferValueType -> invalid size specifier (This is an internal error and most likely a bug!)")
if size.len() == 1:
return Type(kind: Int64)
let typ = size[1].toIntrinsic()
if not self.compareTypes(typ, nil):
return typ
else:
self.error(&"invalid type specifier '{size[1]}' for int")
of floatExpr:
let size = node.token.lexeme.split("'")
if len(size) notin 1..2:
self.error("invalid state: inferValueType -> invalid size specifier (This is an internal error and most likely a bug!)")
if size.len() == 1 or size[1] == "f64":
return Type(kind: Float64)
let typ = size[1].toIntrinsic()
if not self.compareTypes(typ, nil):
return typ
else:
self.error(&"invalid type specifier '{size[1]}' for float")
of nilExpr:
return Type(kind: Nil)
of trueExpr:
return Type(kind: Bool)
of falseExpr:
return Type(kind: Bool)
of nanExpr:
return Type(kind: TypeKind.Nan)
of infExpr:
return Type(kind: TypeKind.Inf)
else:
discard # TODO
proc inferType(self: Compiler, node: Expression): Type =
## Infers the type of a given expression and
## returns it
if node == nil:
return nil
case node.kind:
of identExpr:
let node = IdentExpr(node)
let name = self.resolve(node)
if name != nil:
return name.valueType
else:
2022-05-25 14:17:58 +02:00
result = node.name.lexeme.toIntrinsic()
of unaryExpr:
return self.inferType(UnaryExpr(node).a)
of binaryExpr:
let node = BinaryExpr(node)
var a = self.inferType(node.a)
var b = self.inferType(node.b)
if not self.compareTypes(a, b):
return nil
return a
of {intExpr, hexExpr, binExpr, octExpr,
strExpr, falseExpr, trueExpr, infExpr,
nanExpr, floatExpr, nilExpr
}:
return self.inferType(LiteralExpr(node))
of lambdaExpr:
var node = LambdaExpr(node)
result = Type(kind: Function, returnType: nil, args: @[], isLambda: true)
if node.returnType != nil:
result.returnType = self.inferType(node.returnType)
for argument in node.arguments:
result.args.add((argument.name.token.lexeme, self.inferType(argument.valueType)))
else:
discard # Unreachable
proc inferType(self: Compiler, node: Declaration): Type =
## Infers the type of a given declaration
## and returns it
if node == nil:
return nil
case node.kind:
of funDecl:
var node = FunDecl(node)
let resolved = self.resolve(node.name)
if resolved != nil:
return resolved.valueType
of NodeKind.varDecl:
var node = VarDecl(node)
let resolved = self.resolve(node.name)
if resolved != nil:
return resolved.valueType
else:
return self.inferType(node.value)
else:
return # Unreachable
proc typeToStr(self: Compiler, typ: Type): string =
## Returns the string representation of a
## type object
case typ.kind:
of Int8, UInt8, Int16, UInt16, Int32,
UInt32, Int64, UInt64, Float32, Float64,
Char, Byte, String, Nil, TypeKind.Nan, Bool,
TypeKind.Inf:
return ($typ.kind).toLowerAscii()
of Pointer:
return &"ptr {self.typeToStr(typ.value)}"
of Reference:
return &"ref {self.typeToStr(typ.value)}"
of Mutable:
return &"var {self.typeToStr(typ.value)}"
of Function:
result = "fn ("
for i, (argName, argType) in typ.args:
result &= &"{argName}: {self.typeToStr(argType)}"
if i < typ.args.len() - 1:
result &= ", "
result &= ")"
if typ.returnType != nil:
result &= &": {self.typeToStr(typ.returnType)}"
else:
discard
## End of utility functions
proc literal(self: Compiler, node: ASTNode) =
## Emits instructions for literals such
## as singletons, strings, numbers and
## collections
case node.kind:
of trueExpr:
self.emitByte(LoadTrue)
of falseExpr:
self.emitByte(LoadFalse)
of nilExpr:
self.emitByte(LoadNil)
of infExpr:
self.emitByte(LoadInf)
of nanExpr:
self.emitByte(LoadNan)
of strExpr:
self.emitConstant(LiteralExpr(node), Type(kind: String))
# TODO: Take size specifier into account!
of intExpr:
var x: int
var y = IntExpr(node)
try:
discard parseInt(y.literal.lexeme, x)
except ValueError:
self.error("integer value out of range")
self.emitConstant(y, Type(kind: Int64))
of hexExpr:
var x: int
var y = HexExpr(node)
try:
discard parseHex(y.literal.lexeme, x)
except ValueError:
self.error("integer value out of range")
let node = newIntExpr(Token(lexeme: $x, line: y.token.line,
pos: (start: y.token.pos.start,
stop: y.token.pos.start + len($x))
)
)
self.emitConstant(node, Type(kind: Int64))
of binExpr:
var x: int
var y = BinExpr(node)
try:
discard parseBin(y.literal.lexeme, x)
except ValueError:
self.error("integer value out of range")
let node = newIntExpr(Token(lexeme: $x, line: y.token.line,
pos: (start: y.token.pos.start,
stop: y.token.pos.start + len($x))
)
)
self.emitConstant(node, Type(kind: Int64))
of octExpr:
var x: int
var y = OctExpr(node)
try:
discard parseOct(y.literal.lexeme, x)
except ValueError:
self.error("integer value out of range")
let node = newIntExpr(Token(lexeme: $x, line: y.token.line,
pos: (start: y.token.pos.start,
stop: y.token.pos.start + len($x))
)
)
self.emitConstant(node, Type(kind: Int64))
of floatExpr:
var x: float
var y = FloatExpr(node)
try:
discard parseFloat(y.literal.lexeme, x)
except ValueError:
self.error("floating point value out of range")
self.emitConstant(y, Type(kind: Float64))
of awaitExpr:
var y = AwaitExpr(node)
self.expression(y.expression)
self.emitByte(OpCode.Await)
else:
self.error(&"invalid AST node of kind {node.kind} at literal(): {node} (This is an internal error and most likely a bug!)")
proc matchImpl(self: Compiler, name: string, kind: Type): Name =
## Tries to find a matching function implementation
## compatible with the given type and returns its
## name object
let impl = self.findByType(name, kind)
if impl.len() == 0:
var msg = &"cannot find a suitable implementation for '{name}'"
let names = self.findByName(name)
if names.len() > 0:
msg &= &", found {len(names)} candidate"
if names.len() > 1:
msg &= "s"
msg &= ": "
for name in names:
msg &= &"\n - '{name.name.token.lexeme}' of type '{self.typeToStr(name.valueType)}'"
if name.valueType.kind != Function:
msg &= ", not a callable"
elif kind.args.len() != name.valueType.args.len():
msg &= &", wrong number of arguments ({name.valueType.args.len()} expected, got {kind.args.len()})"
else:
for i, arg in kind.args:
if not self.compareTypes(arg.kind, name.valueType.args[i].kind):
msg &= &", first mismatch at position {i + 1}: expected argument of type '{self.typeToStr(name.valueType.args[i].kind)}', got '{self.typeToStr(arg.kind)}' instead"
self.error(msg)
elif impl.len() > 1:
var msg = &"multiple matching implementations of '{name}' found:\n"
for fn in reversed(impl):
msg &= &"- '{fn.name}' at line {fn.line} of type {self.typeToStr(fn.valueType)}\n"
self.error(msg)
return impl[0]
proc callUnaryOp(self: Compiler, fn: Name, op: UnaryExpr) =
## Emits the code to call a unary operator
# Pushes the return address
self.emitByte(LoadUInt32)
# We patch it later!
let idx = self.chunk.consts.len()
self.emitBytes(self.chunk.writeConstant((0xffffffff'u32).toQuad()))
self.expression(op.a) # Pushes the arguments onto the stack
self.emitByte(Call) # Creates a stack frame
self.emitBytes(fn.codePos.toTriple())
self.emitBytes(1.toTriple())
self.patchReturnAddress(idx)
proc callBinaryOp(self: Compiler, fn: Name, op: BinaryExpr) =
## Emits the code to call a binary operator
# Pushes the return address
self.emitByte(LoadUInt32)
# We patch it later!
let idx = self.chunk.consts.len()
self.emitBytes(self.chunk.writeConstant((0xffffffff'u32).toQuad()))
self.expression(op.a) # Pushes the arguments onto the stack
self.expression(op.b)
self.emitByte(Call) # Creates a stack frame
self.emitBytes(fn.codePos.toTriple())
self.emitBytes(2.toTriple())
self.patchReturnAddress(idx)
proc unary(self: Compiler, node: UnaryExpr) =
## Compiles unary expressions such as decimal
## and bitwise negation
let valueType = self.inferType(node.a)
let funct = self.matchImpl(node.token.lexeme, Type(kind: Function, returnType: Type(kind: Any), args: @[("", valueType)]))
self.callUnaryOp(funct, node)
proc binary(self: Compiler, node: BinaryExpr) =
## Compiles all binary expressions
let typeOfA = self.inferType(node.a)
let typeOfB = self.inferType(node.b)
let funct = self.matchImpl(node.token.lexeme, Type(kind: Function, returnType: Type(kind: Any), args: @[("", typeOfA), ("", typeOfB)]))
self.callBinaryOp(funct, node)
# TODO: Get rid of old code
#[
case node.operator.kind:
of NoMatch:
# a and b
self.expression(node.a)
var jump: int
if self.enableOptimizations:
jump = self.emitJump(JumpIfFalseOrPop)
else:
jump = self.emitJump(JumpIfFalse)
self.emitByte(Pop)
self.expression(node.b)
self.patchJump(jump)
of EndOfFile:
# a or b
self.expression(node.a)
let jump = self.emitJump(JumpIfTrue)
self.expression(node.b)
self.patchJump(jump)
else:
self.error(&"invalid AST node of kind {node.kind} at binary(): {node} (This is an internal error and most likely a bug!)")
]#
proc declareName(self: Compiler, node: Declaration) =
## Statically declares a name into the current scope.
## "Declaring" a name only means updating our internal
## list of identifiers so that further calls to resolve()
## correctly return them. There is no code to actually
## declare a variable at runtime: the value is already
## there on the stack
case node.kind:
of NodeKind.varDecl:
var node = VarDecl(node)
# Creates a new Name entry so that self.identifier emits the proper stack offset
if self.names.high() > 16777215:
# If someone ever hits this limit in real-world scenarios, I swear I'll
# slap myself 100 times with a sign saying "I'm dumb". Mark my words
2022-04-26 16:22:23 +02:00
self.error("cannot declare more than 16777216 variables at a time")
for name in self.findByName(node.name.token.lexeme):
if name.depth == self.scopeDepth and name.valueType.kind notin {Function, CustomType}:
# Trying to redeclare a variable in the same module is an error!
self.error(&"attempt to redeclare '{node.name.token.lexeme}', which was previously defined in '{name.owner}' at line {name.line}")
self.names.add(Name(depth: self.scopeDepth,
name: node.name,
isPrivate: node.isPrivate,
owner: self.currentModule,
isConst: node.isConst,
valueType: Type(kind: self.inferType(node.value).kind),
codePos: self.chunk.code.len(),
isLet: node.isLet,
isClosedOver: false,
line: node.token.line))
# We emit 4 No-Ops because they may become a
# StoreHeap instruction. If not, they'll be
# removed before the compiler is finished
# TODO: This may break CFI offsets
self.emitBytes([NoOp, NoOp, NoOp, NoOp])
of NodeKind.funDecl:
var node = FunDecl(node)
self.names.add(Name(depth: self.scopeDepth,
isPrivate: node.isPrivate,
isConst: false,
owner: self.currentModule,
valueType: Type(kind: Function,
2022-05-29 23:01:36 +02:00
name: node.name.token.lexeme,
returnType: self.inferType(
node.returnType),
args: @[]),
codePos: self.chunk.code.high(),
name: node.name,
isLet: false,
isClosedOver: false,
line: node.token.line))
let fn = self.names[^1]
var name: Name
for argument in node.arguments:
if self.names.high() > 16777215:
2022-04-26 16:22:23 +02:00
self.error("cannot declare more than 16777216 variables at a time")
# wait, no LoadVar?? Yes! That's because when calling functions,
# arguments will already be on the stack so there's no need to
# load them here
name = Name(depth: self.scopeDepth + 1,
isPrivate: true,
owner: self.currentModule,
isConst: false,
name: argument.name,
valueType: nil,
2022-05-29 17:04:19 +02:00
codePos: 0,
isLet: false,
isClosedOver: false)
self.names.add(name)
name.valueType = self.inferType(argument.valueType)
if argument.mutable:
name.valueType = Type(kind: Mutable, value: name.valueType)
elif argument.isRef:
name.valueType = Type(kind: Reference, value: name.valueType)
elif argument.isPtr:
name.valueType = Type(kind: Pointer, value: name.valueType)
# We check if the argument's type is a generic
if name.valueType == nil and argument.valueType.kind == identExpr:
for gen in node.generics:
if gen.name == IdentExpr(argument.valueType):
name.valueType = Type(kind: Generic)
break
# If it's still nil, it's an error!
if name.valueType == nil:
self.error(&"cannot determine the type of argument '{argument.name.token.lexeme}'")
fn.valueType.args.add((argument.name.token.lexeme, name.valueType))
else:
discard # TODO: Types, enums
proc identifier(self: Compiler, node: IdentExpr) =
## Compiles access to identifiers
let s = self.resolve(node)
if s == nil:
self.error(&"reference to undeclared name '{node.token.lexeme}'")
elif s.isConst:
# Constants are always emitted as Load* instructions
# no matter the scope depth
self.emitConstant(node, self.inferType(node))
else:
self.detectClosureVariable(s)
let t = self.getStackPos(node)
var index = t.pos
# We don't check if index is -1 because if it
# were, self.resolve() would have returned nil
if not t.closedOver:
# Static name resolution, loads value at index in the stack. Very fast. Much wow.
self.emitByte(LoadVar)
self.emitBytes((index - self.frames[self.scopeDepth]).toTriple())
else:
# Heap-allocated closure variable. Stored in a separate "closure array" in the VM that does not have stack semantics.
# This makes closures work as expected and is not much slower than indexing our stack (since they're both
# dynamic arrays at runtime anyway)
self.emitByte(LoadHeap)
self.emitBytes(self.closedOver.high().toTriple())
proc findByName(self: Compiler, name: string): seq[Name] =
## Looks for objects that have been already declared
## with the given name. Returns all objects that apply
for obj in reversed(self.names):
if obj.name.token.lexeme == name:
result.add(obj)
proc findByType(self: Compiler, name: string, kind: Type): seq[Name] =
## Looks for objects that have already been declared
## with the given name and type
for obj in self.findByName(name):
if self.compareTypes(obj.valueType, kind):
result.add(obj)
proc assignment(self: Compiler, node: ASTNode) =
## Compiles assignment expressions
case node.kind:
of assignExpr:
let node = AssignExpr(node)
let name = IdentExpr(node.name)
let r = self.resolve(name)
if r == nil:
self.error(&"assignment to undeclared name '{name.token.lexeme}'")
elif r.isConst:
self.error(&"cannot assign to '{name.token.lexeme}' (constant)")
elif r.isLet:
self.error(&"cannot reassign '{name.token.lexeme}'")
self.expression(node.value)
let t = self.getStackPos(name)
let index = t.pos
if index != -1:
if not t.closedOver:
self.emitByte(StoreVar)
else:
self.emitByte(StoreHeap)
self.emitBytes(index.toTriple())
else:
self.error(&"reference to undeclared name '{node.token.lexeme}'")
of setItemExpr:
let node = SetItemExpr(node)
let typ = self.inferType(node)
if typ == nil:
self.error(&"cannot determine the type of '{node.name.token.lexeme}'")
# TODO
else:
self.error(&"invalid AST node of kind {node.kind} at assignment(): {node} (This is an internal error and most likely a bug)")
proc beginScope(self: Compiler) =
## Begins a new local scope by incrementing the current
## scope's depth
inc(self.scopeDepth)
proc endScope(self: Compiler) =
## Ends the current local scope
2022-05-29 17:04:19 +02:00
if self.scopeDepth < 0:
self.error("cannot call endScope with scopeDepth < 0 (This is an internal error and most likely a bug)")
dec(self.scopeDepth)
var popped: int = 0
var name: Name
2022-05-29 17:04:19 +02:00
var indeces: seq[int] = @[]
for i, ident in reversed(self.names):
2022-05-29 17:04:19 +02:00
if ident.depth > self.scopeDepth and ident.valueType.kind != TypeKind.Function:
inc(popped)
name = self.names[self.names.high() - i]
if name.valueType.kind != Function and OpCode(self.chunk.code[name.codePos]) == NoOp:
2022-05-29 17:04:19 +02:00
for _ in countup(0, 3):
# Since by deleting it the size of the
# sequence decreases, we don't need to
# increase the index
self.chunk.code.delete(name.codePos)
indeces.add(self.names.high() - i)
if not self.enableOptimizations:
# All variables with a scope depth larger than the current one
# are now out of scope. Begone, you're now homeless!
self.emitByte(Pop)
if self.enableOptimizations and popped > 1:
# If we're popping less than 65535 variables, then
# we can emit a PopN instruction. This is true for
# 99.99999% of the use cases of the language (who the
# hell is going to use 65 THOUSAND local variables?), but
# if you'll ever use more then Peon will emit a PopN instruction
# for the first 65 thousand and change local variables and then
# emit another batch of plain ol' Pop instructions for the rest
if popped <= uint16.high().int():
self.emitByte(PopN)
self.emitBytes(popped.toDouble())
else:
self.emitByte(PopN)
self.emitBytes(uint16.high().int.toDouble())
for i in countdown(self.names.high(), popped - uint16.high().int()):
if self.names[i].depth > self.scopeDepth:
self.emitByte(Pop)
elif popped == 1:
# We only emit PopN if we're popping more than one value
self.emitByte(Pop)
2022-05-29 17:04:19 +02:00
for index in indeces:
self.names.delete(index)
proc blockStmt(self: Compiler, node: BlockStmt) =
## Compiles block statements, which create a new
## local scope.
self.beginScope()
for decl in node.code:
self.declaration(decl)
self.endScope()
proc ifStmt(self: Compiler, node: IfStmt) =
## Compiles if/else statements for conditional
## execution of code
self.expression(node.condition)
var jumpCode: OpCode
if self.enableOptimizations:
jumpCode = JumpIfFalsePop
else:
jumpCode = JumpIfFalse
let jump = self.emitJump(jumpCode)
if not self.enableOptimizations:
self.emitByte(Pop)
self.statement(node.thenBranch)
self.patchJump(jump)
if node.elseBranch != nil:
let jump = self.emitJump(JumpForwards)
self.statement(node.elseBranch)
self.patchJump(jump)
proc emitLoop(self: Compiler, begin: int) =
## Emits a JumpBackwards instruction with the correct
## jump offset
var offset: int
case OpCode(self.chunk.code[begin + 1]): # The jump instruction
of LongJumpForwards, LongJumpBackwards, LongJumpIfFalse,
LongJumpIfFalsePop, LongJumpIfTrue:
offset = self.chunk.code.len() - begin + 4
else:
offset = self.chunk.code.len() - begin
if offset > uint16.high().int:
if offset > 16777215:
self.error("cannot jump more than 16777215 bytecode instructions")
self.emitByte(LongJumpBackwards)
self.emitBytes(offset.toTriple())
else:
self.emitByte(JumpBackwards)
self.emitBytes(offset.toDouble())
proc whileStmt(self: Compiler, node: WhileStmt) =
## Compiles C-style while loops and
## desugared C-style for loops
let start = self.chunk.code.len()
self.expression(node.condition)
var jump: int
if self.enableOptimizations:
jump = self.emitJump(JumpIfFalsePop)
else:
jump = self.emitJump(JumpIfFalse)
self.emitByte(Pop)
self.statement(node.body)
self.patchJump(jump)
self.emitLoop(start)
proc expression(self: Compiler, node: Expression) =
## Compiles all expressions
if self.inferType(node) == nil:
2022-05-02 12:38:43 +02:00
if node.kind != identExpr:
# So we can raise a more appropriate
# error in self.identifier()
self.error("expression has no type")
case node.kind:
2022-05-16 19:40:13 +02:00
of callExpr:
discard # TODO
of getItemExpr:
discard # TODO
# Note that for setItem and assign we don't convert
# the node to its true type because that type information
# would be lost in the call anyway. The differentiation
# happens in self.assignment()
of setItemExpr, assignExpr:
self.assignment(node)
of identExpr:
self.identifier(IdentExpr(node))
of unaryExpr:
# Unary expressions such as ~5 and -3
self.unary(UnaryExpr(node))
of groupingExpr:
# Grouping expressions like (2 + 1)
self.expression(GroupingExpr(node).expression)
of binaryExpr:
# Binary expressions such as 2 ^ 5 and 0.66 * 3.14
self.binary(BinaryExpr(node))
of intExpr, hexExpr, binExpr, octExpr, strExpr, falseExpr, trueExpr,
infExpr, nanExpr, floatExpr, nilExpr:
# Since all of these AST nodes share the
# same overall structure and the kind
# field is enough to tell one from the
# other, why bother with specialized
# cases when one is enough?
self.literal(node)
else:
self.error(&"invalid AST node of kind {node.kind} at expression(): {node} (This is an internal error and most likely a bug)")
proc awaitStmt(self: Compiler, node: AwaitStmt) =
## Compiles await statements. An await statement
## is like an await expression, but parsed in the
## context of statements for usage outside expressions,
## meaning it can be used standalone. It's basically the
## same as an await expression followed by a semicolon.
## Await expressions are the only native construct to
## run coroutines from within an already asynchronous
## context (which should be orchestrated by an event loop).
## They block in the caller until the callee returns
self.expression(node.expression)
self.emitByte(OpCode.Await)
proc deferStmt(self: Compiler, node: DeferStmt) =
## Compiles defer statements. A defer statement
## is executed right before its containing function
## exits (either because of a return or an exception)
let current = self.chunk.code.len
self.expression(node.expression)
for i in countup(current, self.chunk.code.high()):
self.deferred.add(self.chunk.code[i])
self.chunk.code.del(i)
proc returnStmt(self: Compiler, node: ReturnStmt) =
## Compiles return statements. An empty return
## implicitly returns nil
let returnType = self.inferType(node.value)
let typ = self.inferType(self.currentFunction)
## Having the return type
if returnType == nil and typ.returnType != nil:
self.error(&"expected return value of type '{self.typeToStr(typ.returnType)}', but expression has no type")
elif typ.returnType == nil and returnType != nil:
self.error("empty return statement is not allowed in non-void functions")
elif not self.compareTypes(returnType, typ.returnType):
self.error(&"expected return value of type '{self.typeToStr(typ.returnType)}', got '{self.typeToStr(returnType)}' instead")
if node.value != nil:
self.expression(node.value)
self.emitByte(OpCode.ReturnValue)
else:
self.emitByte(OpCode.Return)
proc yieldStmt(self: Compiler, node: YieldStmt) =
## Compiles yield statements
self.expression(node.expression)
self.emitByte(OpCode.Yield)
proc raiseStmt(self: Compiler, node: RaiseStmt) =
## Compiles yield statements
self.expression(node.exception)
self.emitByte(OpCode.Raise)
proc continueStmt(self: Compiler, node: ContinueStmt) =
## Compiles continue statements. A continue statements
## jumps to the next iteration in a loop
if self.currentLoop.start <= 65535:
self.emitByte(Jump)
self.emitBytes(self.currentLoop.start.toDouble())
else:
if self.currentLoop.start > 16777215:
self.error("too much code to jump over in continue statement")
self.emitByte(LongJump)
self.emitBytes(self.currentLoop.start.toTriple())
proc breakStmt(self: Compiler, node: BreakStmt) =
## Compiles break statements. A continue statement
## jumps to the next iteration in a loop
# Emits dummy jump offset, this is
# patched later
self.currentLoop.breakPos.add(self.emitJump(OpCode.Jump))
if self.currentLoop.depth > self.scopeDepth:
# Breaking out of a loop closes its scope
self.endScope()
proc patchBreaks(self: Compiler) =
## Patches "break" opcodes with
## actual jumps. This is needed
## because the size of code
## to skip is not known before
## the loop is fully compiled
for brk in self.currentLoop.breakPos:
self.chunk.code[brk] = JumpForwards.uint8()
self.patchJump(brk)
proc assertStmt(self: Compiler, node: AssertStmt) =
## Compiles assert statements (raise
## AssertionError if the expression is falsey)
self.expression(node.expression)
self.emitByte(OpCode.Assert)
proc statement(self: Compiler, node: Statement) =
## Compiles all statements
case node.kind:
of exprStmt:
var expression = ExprStmt(node).expression
self.expression(expression)
self.emitByte(Pop) # Expression statements discard their value. Their main use case is side effects in function calls
of NodeKind.ifStmt:
self.ifStmt(IfStmt(node))
of NodeKind.assertStmt:
self.assertStmt(AssertStmt(node))
of NodeKind.raiseStmt:
self.raiseStmt(RaiseStmt(node))
of NodeKind.breakStmt:
self.breakStmt(BreakStmt(node))
of NodeKind.continueStmt:
self.continueStmt(ContinueStmt(node))
of NodeKind.returnStmt:
self.returnStmt(ReturnStmt(node))
of NodeKind.importStmt:
discard
of NodeKind.whileStmt, NodeKind.forStmt:
## Our parser already desugars for loops to
## while loops!
let loop = self.currentLoop
self.currentLoop = Loop(start: self.chunk.code.len(),
depth: self.scopeDepth, breakPos: @[])
self.whileStmt(WhileStmt(node))
self.patchBreaks()
self.currentLoop = loop
of NodeKind.forEachStmt:
discard
of NodeKind.blockStmt:
self.blockStmt(BlockStmt(node))
of NodeKind.yieldStmt:
self.yieldStmt(YieldStmt(node))
of NodeKind.awaitStmt:
self.awaitStmt(AwaitStmt(node))
of NodeKind.deferStmt:
self.deferStmt(DeferStmt(node))
of NodeKind.tryStmt:
discard
else:
self.expression(Expression(node))
2022-04-12 12:18:25 +02:00
proc varDecl(self: Compiler, node: VarDecl) =
## Compiles variable declarations
2022-05-29 17:04:19 +02:00
let expected = self.inferType(node.valueType)
let actual = self.inferType(node.value)
if expected == nil and actual == nil:
self.error(&"'{node.name.token.lexeme}' has no type")
2022-05-29 23:01:36 +02:00
elif expected != nil and expected.kind == Mutable: # I mean, variables *are* already mutable (some of them anyway)
2022-05-29 17:04:19 +02:00
self.error(&"invalid type '{self.typeToStr(expected)}' for var")
elif not self.compareTypes(expected, actual):
if expected != nil:
self.error(&"expected value of type '{self.typeToStr(expected)}', but '{node.name.token.lexeme}' is of type '{self.typeToStr(actual)}'")
2022-04-12 12:18:25 +02:00
self.expression(node.value)
self.declareName(node)
2022-04-12 12:18:25 +02:00
proc funDecl(self: Compiler, node: FunDecl) =
## Compiles function declarations
2022-05-16 19:31:17 +02:00
# A function's code is just compiled linearly
# and then jumped over
let jmp = self.emitJump(JumpForwards)
var function = self.currentFunction
self.declareName(node)
self.frames.add(self.names.high())
# TODO: Forward declarations
if node.body != nil:
if BlockStmt(node.body).code.len() == 0:
self.error("cannot declare function with empty body")
let fnType = self.inferType(node)
let impl = self.findByType(node.name.token.lexeme, fnType)
if impl.len() > 1:
# Oh-oh! We found more than one implementation of
# the same function with the same name! Error!
var msg = &"multiple matching implementations of '{node.name.token.lexeme}' found:\n"
for fn in reversed(impl):
msg &= &"- '{fn.name}' at line {fn.line} of type {self.typeToStr(fn.valueType)}\n"
self.error(msg)
# We store the current function
self.currentFunction = node
# Since the deferred array is a linear
# sequence of instructions and we want
# to keep track to whose function's each
# set of deferred instruction belongs,
# we record the length of the deferred
# array before compiling the function
# and use this info later to compile
# the try/finally block with the deferred
# code
var deferStart = self.deferred.len()
# We let our debugger know a function is starting
let start = self.chunk.code.high()
self.blockStmt(BlockStmt(node.body))
# Yup, we're done. That was easy, huh?
# But, after all, functions are just named
# scopes, and we compile them just like that:
# we declare their name and arguments (before
# their body so recursion works) and then just
# handle them as a block statement (which takes
# care of incrementing self.scopeDepth so locals
# are resolved properly). There's a need for a bit
# of boilerplate code to make closures work, but
# that's about it
case self.currentFunction.kind:
of NodeKind.funDecl:
if not self.currentFunction.hasExplicitReturn:
let typ = self.inferType(self.currentFunction)
if self.currentFunction.returnType == nil and typ.returnType != nil:
self.error("non-empty return statement is not allowed in void functions")
if self.currentFunction.returnType != nil:
self.error("function has an explicit return type, but no return statement was found")
self.emitByte(OpCode.Return)
of NodeKind.lambdaExpr:
if not LambdaExpr(Declaration(self.currentFunction)).hasExplicitReturn:
self.emitByte(OpCode.Return)
else:
discard # Unreachable
# Function is ending!
self.chunk.cfi.add(start.toTriple())
self.chunk.cfi.add(self.chunk.code.high().toTriple())
self.chunk.cfi.add(self.frames[^1].toTriple())
self.chunk.cfi.add(uint8(node.arguments.len()))
if not system.`==`(node.name, nil):
self.chunk.cfi.add(node.name.token.lexeme.len().toDouble())
var s = node.name.token.lexeme
if node.name.token.lexeme.len() >= uint16.high().int:
s = node.name.token.lexeme[0..uint16.high()]
self.chunk.cfi.add(s.toBytes())
else:
self.chunk.cfi.add(0.toDouble())
# Currently defer is not functional so we
# just pop the instructions
for i in countup(deferStart, self.deferred.len() - 1, 1):
self.deferred.delete(i)
self.patchJump(jmp)
# This makes us compile nested functions correctly
self.currentFunction = function
discard self.frames.pop()
proc patchReturnAddress(self: Compiler, retAddr: int) =
## Patches the return address of a function
## call. This is called at each iteration of
## the compiler's loop
let address = self.chunk.code.len().toQuad()
self.chunk.consts[retAddr] = address[0]
self.chunk.consts[retAddr + 1] = address[1]
self.chunk.consts[retAddr + 2] = address[2]
self.chunk.consts[retAddr + 3] = address[3]
proc declaration(self: Compiler, node: Declaration) =
## Compiles all declarations
case node.kind:
of NodeKind.varDecl:
self.varDecl(VarDecl(node))
of NodeKind.funDecl:
self.funDecl(FunDecl(node))
else:
self.statement(Statement(node))
proc compile*(self: Compiler, ast: seq[Declaration], file: string): Chunk =
## Compiles a sequence of AST nodes into a chunk
## object
self.chunk = newChunk()
self.ast = ast
self.file = file
self.names = @[]
self.scopeDepth = 0
self.currentFunction = nil
self.currentModule = self.file.extractFilename()
self.current = 0
self.frames = @[0]
while not self.done():
self.declaration(Declaration(self.step()))
if self.ast.len() > 0:
# *Technically* an empty program is a valid program
self.emitByte(OpCode.Return) # Exits the VM's main loop when used at the global scope
result = self.chunk
if self.ast.len() > 0 and self.scopeDepth != 0:
self.error(&"invalid state: invalid scopeDepth value (expected 0, got {self.scopeDepth}), did you forget to call endScope/beginScope?")
2022-05-29 17:04:19 +02:00
self.endScope()