peon/src/frontend/compiler.nim

1879 lines
73 KiB
Nim

# Copyright 2022 Mattia Giambirtone & All Contributors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import meta/token
import meta/ast
import meta/errors
import ../config
import ../util/multibyte
import lexer as l
import parser as p
import tables
import strformat
import algorithm
import parseutils
import strutils
import sequtils
import os
export ast
export token
export multibyte
type
TypeKind = enum
## An enumeration of compile-time
## types
Int8, UInt8, Int16, UInt16, Int32,
UInt32, Int64, UInt64, Float32, Float64,
Char, Byte, String, Function, CustomType,
Nil, Nan, Bool, Inf, Typevar, Generic,
Reference, Pointer
Any # Any is used internally in a few cases,
# for example when looking for operators
# when only the type of the arguments is of
# interest
Type = ref object
## A wrapper around
## compile-time types
mutable: bool
case kind: TypeKind:
of Function:
name: string
isLambda: bool
isGenerator: bool
isCoroutine: bool
args: seq[tuple[name: string, kind: Type]]
returnType: Type
isBuiltinFunction: bool
builtinOp: string
of Reference, Pointer:
value: Type
of Generic:
node: IdentExpr
else:
discard
# This way we don't have recursive dependency issues
import meta/bytecode
export bytecode
type
Name = ref object
## A compile-time wrapper around
## statically resolved names
# Name of the identifier
name: IdentExpr
# Owner of the identifier (module)
owner: string
# Scope depth
depth: int
# Is this name private?
isPrivate: bool
# Is this a constant?
isConst: bool
# Can this name's value be mutated?
isLet: bool
# The name's type
valueType: Type
# For functions, this marks where the function's
# code begins. For variables, this stores where
# their StoreVar/StoreHeap instruction was emitted
codePos: int
# Is the name closed over (i.e. used in a closure)?
isClosedOver: bool
# Is this a function argument?
isFunctionArgument: bool
# Where is this node declared in the file?
line: int
Loop = object
## A "loop object" used
## by the compiler to emit
## appropriate jump offsets
## for continue and break
## statements
# Position in the bytecode where the loop starts
start: int
# Scope depth where the loop is located
depth: int
# Absolute jump offsets into our bytecode that we need to
# patch. Used for break statements
breakPos: seq[int]
Compiler* = ref object
## A wrapper around the Peon compiler's state
# The bytecode chunk where we write code to
chunk: Chunk
# The output of our parser (AST)
ast: seq[Declaration]
# The current AST node we're looking at
current: int
# The current file being compiled (used only for
# error reporting)
file: string
# Compile-time "simulation" of the stack at
# runtime to load variables that have stack
# behavior more efficiently
names: seq[Name]
# The current scope depth. If > 0, we're
# in a local scope, otherwise it's global
scopeDepth: int
# The current function being compiled
currentFunction: FunDecl
# Are optimizations turned on?
enableOptimizations: bool
# The current loop being compiled (used to
# keep track of where to jump)
currentLoop: Loop
# Are we in REPL mode? If so, Pop instructions
# for expression statements at the top level are
# swapped for a special PopRepl instruction that
# prints the result of the expression once it is
# evaluated
replMode: bool
# The current module being compiled
# (used to restrict access to statically
# defined variables at compile time)
currentModule: string
# Each time a defer statement is
# compiled, its code is emitted
# here. Later, if there is any code
# to defer in the current function,
# funDecl will wrap the function's code
# inside an implicit try/finally block
# and add this code in the finally branch.
# This sequence is emptied each time a
# function declaration is compiled and stores only
# deferred code for the current function (may
# be empty)
deferred: seq[uint8]
# List of closed-over variables
closedOver: seq[Name]
# Keeps track of stack frames
frames: seq[int]
# Compiler procedures called by pragmas
compilerProcs: TableRef[string, proc (self: Compiler, pragma: Pragma, node: ASTNode)]
## Forward declarations
proc compile*(self: Compiler, ast: seq[Declaration], file: string): Chunk
proc expression(self: Compiler, node: Expression)
proc statement(self: Compiler, node: Statement)
proc declaration(self: Compiler, node: Declaration)
proc peek(self: Compiler, distance: int = 0): ASTNode
proc identifier(self: Compiler, node: IdentExpr)
proc varDecl(self: Compiler, node: VarDecl)
proc inferType(self: Compiler, node: LiteralExpr, strictMutable: bool = true): Type
proc inferType(self: Compiler, node: Expression, strictMutable: bool = true): Type
proc findByName(self: Compiler, name: string): seq[Name]
proc findByType(self: Compiler, name: string, kind: Type, strictMutable: bool = true): seq[Name]
proc compareTypes(self: Compiler, a, b: Type, strictMutable: bool = true): bool
proc patchReturnAddress(self: Compiler, pos: int)
proc handleMagicPragma(self: Compiler, pragma: Pragma, node: ASTnode)
proc handlePurePragma(self: Compiler, pragma: Pragma, node: ASTnode)
proc dispatchPragmas(self: Compiler, node: ASTnode)
## End of forward declarations
proc newCompiler*(enableOptimizations: bool = true, replMode: bool = false): Compiler =
## Initializes a new Compiler object
new(result)
result.ast = @[]
result.current = 0
result.file = ""
result.names = @[]
result.scopeDepth = 0
result.currentFunction = nil
result.enableOptimizations = enableOptimizations
result.replMode = replMode
result.currentModule = ""
result.compilerProcs = newTable[string, proc (self: Compiler, pragma: Pragma, node: ASTNode)]()
result.compilerProcs["magic"] = handleMagicPragma
result.compilerProcs["pure"] = handlePurePragma
## Public getter for nicer error formatting
proc getCurrentNode*(self: Compiler): ASTNode = (if self.current >=
self.ast.len(): self.ast[^1] else: self.ast[self.current - 1])
proc getCurrentFunction*(self: Compiler): Declaration {.inline.} = self.currentFunction
proc getFile*(self: Compiler): string {.inline.} = self.file
proc getModule*(self: Compiler): string {.inline.} = self.currentModule
## Utility functions
proc peek(self: Compiler, distance: int = 0): ASTNode =
## Peeks at the AST node at the given distance.
## If the distance is out of bounds, the last
## AST node in the tree is returned. A negative
## distance may be used to retrieve previously
## consumed AST nodes
if self.ast.high() == -1 or self.current + distance > self.ast.high() or self.current + distance < 0:
result = self.ast[^1]
else:
result = self.ast[self.current + distance]
proc done(self: Compiler): bool {.inline.} =
## Returns true if the compiler is done
## compiling, false otherwise
result = self.current > self.ast.high()
proc error(self: Compiler, message: string) {.raises: [CompileError], inline.} =
## Raises a CompileError exception
raise CompileError(msg: message, node: self.getCurrentNode(), file: self.file, module: self.currentModule)
proc step(self: Compiler): ASTNode {.inline.} =
## Steps to the next node and returns
## the consumed one
result = self.peek()
if not self.done():
self.current += 1
proc emitByte(self: Compiler, byt: OpCode | uint8) {.inline.} =
## Emits a single byte, writing it to
## the current chunk being compiled
when DEBUG_TRACE_COMPILER:
echo &"DEBUG - Compiler: Emitting {$byt}"
self.chunk.write(uint8 byt, self.peek().token.line)
proc emitBytes(self: Compiler, bytarr: openarray[OpCode | uint8]) {.inline.} =
## Handy helper method to write arbitrary bytes into
## the current chunk, calling emitByte on each of its
## elements
for b in bytarr:
self.emitByte(b)
proc makeConstant(self: Compiler, val: Expression, typ: Type): array[3, uint8] =
## Adds a constant to the current chunk's constant table
## and returns its index as a 3-byte array of uint8s
var v: int
discard parseInt(val.token.lexeme, v)
case typ.kind:
of UInt8, Int8:
result = self.chunk.writeConstant([uint8(v)])
of Int16, UInt16:
result = self.chunk.writeConstant(v.toDouble())
of Int32, UInt32:
result = self.chunk.writeConstant(v.toQuad())
of Int64, UInt64:
result = self.chunk.writeConstant(v.toLong())
of String:
result = self.chunk.writeConstant(v.toBytes())
of Float32:
var f: float = 0.0
discard parseFloat(val.token.lexeme, f)
result = self.chunk.writeConstant(cast[array[4, uint8]](float32(f)))
of Float64:
var f: float = 0.0
discard parseFloat(val.token.lexeme, f)
result = self.chunk.writeConstant(cast[array[8, uint8]](f))
else:
discard
proc emitConstant(self: Compiler, obj: Expression, kind: Type) =
## Emits a constant instruction along
## with its operand
case kind.kind:
of Int64:
self.emitByte(LoadInt64)
of UInt64:
self.emitByte(LoadUInt64)
of Int32:
self.emitByte(LoadInt32)
of UInt32:
self.emitByte(LoadUInt32)
of Int16:
self.emitByte(LoadInt16)
of UInt16:
self.emitByte(LoadUInt16)
of Int8:
self.emitByte(LoadInt8)
of UInt8:
self.emitByte(LoadUInt8)
of String:
self.emitByte(LoadString)
let str = LiteralExpr(obj).literal.lexeme
if str.len() >= 16777216:
self.error("string constants cannot be larger than 16777216 bytes")
self.emitBytes(LiteralExpr(obj).literal.lexeme.len().toTriple())
of Float32:
self.emitByte(LoadFloat32)
of Float64:
self.emitByte(LoadFloat64)
else:
discard # TODO
self.emitBytes(self.makeConstant(obj, kind))
proc emitJump(self: Compiler, opcode: OpCode): int =
## Emits a dummy jump offset to be patched later
## and returns the absolute index into the chunk's
## bytecode array where the given placeholder
## instruction was written
self.emitByte(opcode)
self.emitBytes(0.toTriple())
result = self.chunk.code.len() - 4
proc patchJump(self: Compiler, offset: int) =
## Patches a previously emitted relative
## jump using emitJump
var jump: int = self.chunk.code.len() - offset
if jump > 16777215:
self.error("cannot jump more than 16777216 bytecode instructions")
let offsetArray = (jump - 4).toTriple()
self.chunk.code[offset + 1] = offsetArray[0]
self.chunk.code[offset + 2] = offsetArray[1]
self.chunk.code[offset + 3] = offsetArray[2]
proc resolve(self: Compiler, name: IdentExpr,
depth: int = self.scopeDepth): Name =
## Traverses self.names backwards and returns the
## first name object with the given name. Returns
## nil when the name can't be found. This function
## has no concept of scope depth, because getStackPos
## does that job. Note that private names declared in
## other modules will not be resolved!
for obj in reversed(self.names):
if obj.name.token.lexeme == name.token.lexeme:
if obj.isPrivate and obj.owner != self.currentModule:
continue # There may be a name in the current module that
# matches, so we skip this
return obj
return nil
proc getStackPos(self: Compiler, name: IdentExpr, depth: int = self.scopeDepth): int =
## Returns the predicted call stack position of a given name, relative
## to the current frame
result = 2
var found = false
for variable in reversed(self.names):
if name.name.lexeme == variable.name.name.lexeme:
if variable.isPrivate and variable.owner != self.currentModule:
continue
if variable.depth == depth or variable.depth == 0:
# variable.depth == 0 for globals!
found = true
break
inc(result)
if not found:
return -1
proc getClosurePos(self: Compiler, name: IdentExpr, depth: int = self.scopeDepth): int =
## Iterates the internal list of declared closure names backwards and
## returns the predicted closure array position of a given name.
## Returns -1 if the name can't be found (this includes names that
## are private in other modules)
result = self.closedOver.high()
var found = false
for variable in reversed(self.closedOver):
if name.name.lexeme == variable.name.name.lexeme:
if variable.isPrivate and variable.owner != self.currentModule:
continue
elif variable.depth == depth:
found = true
break
dec(result)
if not found:
return -1
proc detectClosureVariable(self: Compiler, name: Name, depth: int = self.scopeDepth) =
## Detects if the given name is used in a local scope deeper
## than the given one and modifies the code emitted for it
## to store it as a closure variable if it is. Does nothing if the name
## hasn't been declared yet or is unreachable (for example if it's
## declared as private in another module). This function must be called
## each time a name is referenced in order for closed-over variables
## to be emitted properly, otherwise the runtime may behave
## unpredictably or crash
if name.isNil() or name.depth == 0:
return
elif name.depth < depth and not name.isClosedOver:
# Ding! The given name is closed over: we need to
# change the dummy Jump instruction that self.declareName
# put in place for us into a StoreClosure. We also update
# the name's isClosedOver field so that self.identifier()
# can emit a LoadClosure instruction instead of a LoadVar
self.closedOver.add(name)
let idx = self.closedOver.high().toTriple()
if self.closedOver.len() >= 16777216:
self.error("too many consecutive closed-over variables (max is 16777216)")
self.chunk.code[name.codePos] = StoreClosure.uint8
self.chunk.code[name.codePos + 1] = idx[0]
self.chunk.code[name.codePos + 2] = idx[1]
self.chunk.code[name.codePos + 3] = idx[2]
name.isClosedOver = true
proc compareTypes(self: Compiler, a, b: Type, strictMutable: bool = true): bool =
## Compares two type objects
## for equality (works with nil!)
# The nil code here is for void functions (when
# we compare their return types)
if a.isNil():
return b.isNil() or b.kind == Any
elif b.isNil():
return a.isNil() or a.kind == Any
elif a.kind == Any or b.kind == Any:
# This is needed internally: user code
# cannot generate code for matching
# arbitrary types, but we need it for
# function calls and stuff like that
# since peon doesn't have return type
# inference
return true
elif a.kind == Generic or b.kind == Generic:
# Matching generic argument types
return true
elif a.kind != b.kind:
# Next, we see the type discriminant:
# If they're different, then they can't
# be the same type!
return false
elif a.mutable != b.mutable and strictMutable:
# Are they both (im)mutable? If not,
# they're different
return false
case a.kind:
# If all previous checks pass, it's time
# to go through each possible type peon
# supports and compare it
of Int8, UInt8, Int16, UInt16, Int32,
UInt32, Int64, UInt64, Float32, Float64,
Char, Byte, String, Nil, Nan, Bool, Inf:
# A value type's type is always equal to
# another one's
return true
of Reference, Pointer:
# Here we already know that both
# a and b are of either of the two
# types in this branch, so we just need
# to compare their values
return self.compareTypes(a.value, b.value)
of Function:
# Functions are a bit trickier
if a.args.len() != b.args.len():
return false
elif not self.compareTypes(a.returnType, b.returnType):
return false
for (argA, argB) in zip(a.args, b.args):
if not self.compareTypes(argA.kind, argB.kind, strictMutable):
return false
return true
else:
discard
proc toIntrinsic(name: string): Type =
## Converts a string to an intrinsic
## type if it is valid and returns nil
## otherwise
if name in ["int", "int64", "i64"]:
return Type(kind: Int64)
elif name in ["uint64", "u64"]:
return Type(kind: UInt64)
elif name in ["int32", "i32"]:
return Type(kind: Int32)
elif name in ["uint32", "u32"]:
return Type(kind: UInt32)
elif name in ["int16", "i16"]:
return Type(kind: Int16)
elif name in ["uint16", "u16"]:
return Type(kind: UInt16)
elif name in ["int8", "i8"]:
return Type(kind: Int8)
elif name in ["uint8", "u8"]:
return Type(kind: UInt8)
elif name in ["f64", "float", "float64"]:
return Type(kind: Float64)
elif name in ["f32", "float32"]:
return Type(kind: Float32)
elif name == "byte":
return Type(kind: Byte)
elif name == "char":
return Type(kind: Char)
elif name == "nan":
return Type(kind: Nan)
elif name == "nil":
return Type(kind: Nil)
elif name == "inf":
return Type(kind: Inf)
elif name == "bool":
return Type(kind: Bool)
elif name == "typevar":
return Type(kind: Typevar)
else:
return nil
proc inferType(self: Compiler, node: LiteralExpr, strictMutable: bool = true): Type =
## Infers the type of a given literal expression
if node.isNil():
return nil
case node.kind:
of intExpr, binExpr, octExpr, hexExpr:
let size = node.token.lexeme.split("'")
if len(size) notin 1..2:
self.error("invalid state: inferValueType -> invalid size specifier (This is an internal error and most likely a bug!)")
if size.len() == 1:
return Type(kind: Int64)
let typ = size[1].toIntrinsic()
if not self.compareTypes(typ, nil, strictMutable):
return typ
else:
self.error(&"invalid type specifier '{size[1]}' for int")
of floatExpr:
let size = node.token.lexeme.split("'")
if len(size) notin 1..2:
self.error("invalid state: inferValueType -> invalid size specifier (This is an internal error and most likely a bug!)")
if size.len() == 1 or size[1] == "f64":
return Type(kind: Float64)
let typ = size[1].toIntrinsic()
if not self.compareTypes(typ, nil, strictMutable):
return typ
else:
self.error(&"invalid type specifier '{size[1]}' for float")
of nilExpr:
return Type(kind: Nil)
of trueExpr:
return Type(kind: Bool)
of falseExpr:
return Type(kind: Bool)
of nanExpr:
return Type(kind: TypeKind.Nan)
of infExpr:
return Type(kind: TypeKind.Inf)
else:
discard # TODO
proc inferType(self: Compiler, node: Expression, strictMutable: bool = true): Type =
## Infers the type of a given expression and
## returns it
if node.isNil():
return nil
case node.kind:
of identExpr:
let node = IdentExpr(node)
let name = self.resolve(node)
if not name.isNil():
result = name.valueType
else:
result = node.name.lexeme.toIntrinsic()
of unaryExpr:
return self.inferType(UnaryExpr(node).a)
of binaryExpr:
let node = BinaryExpr(node)
var a = self.inferType(node.a, strictMutable)
var b = self.inferType(node.b, strictMutable)
if not self.compareTypes(a, b, strictMutable):
return nil
return a
of {intExpr, hexExpr, binExpr, octExpr,
strExpr, falseExpr, trueExpr, infExpr,
nanExpr, floatExpr, nilExpr
}:
return self.inferType(LiteralExpr(node))
of lambdaExpr:
var node = LambdaExpr(node)
result = Type(kind: Function, returnType: nil, args: @[], isLambda: true)
if not node.returnType.isNil():
result.returnType = self.inferType(node.returnType)
for argument in node.arguments:
result.args.add((argument.name.token.lexeme, self.inferType(argument.valueType, strictMutable)))
of callExpr:
var node = CallExpr(node)
case node.callee.kind:
of identExpr:
let resolved = self.resolve(IdentExpr(node.callee))
if not resolved.isNil():
result = resolved.valueType.returnType
if result.isNil():
result = Type(kind: Any)
else:
result = nil
of lambdaExpr:
result = self.inferType(LambdaExpr(node.callee).returnType, strictMutable)
else:
discard # Unreachable
of varExpr:
result = self.inferType(Var(node).value)
result.mutable = true
of refExpr:
result = Type(kind: Reference, value: self.inferType(Ref(node).value, strictMutable))
of ptrExpr:
result = Type(kind: Pointer, value: self.inferType(Ptr(node).value, strictMutable))
else:
discard # Unreachable
proc inferType(self: Compiler, node: Declaration, strictMutable: bool = true): Type =
## Infers the type of a given declaration
## and returns it
if node.isNil():
return nil
case node.kind:
of funDecl:
var node = FunDecl(node)
let resolved = self.resolve(node.name)
if not resolved.isNil():
return resolved.valueType
of NodeKind.varDecl:
var node = VarDecl(node)
let resolved = self.resolve(node.name)
if not resolved.isNil():
return resolved.valueType
else:
return self.inferType(node.value, strictMutable)
else:
return # Unreachable
proc typeToStr(self: Compiler, typ: Type): string =
## Returns the string representation of a
## type object
case typ.kind:
of Int8, UInt8, Int16, UInt16, Int32,
UInt32, Int64, UInt64, Float32, Float64,
Char, Byte, String, Nil, TypeKind.Nan, Bool,
TypeKind.Inf:
result &= ($typ.kind).toLowerAscii()
of Pointer:
result &= &"ptr {self.typeToStr(typ.value)}"
of Reference:
result &= &"ref {self.typeToStr(typ.value)}"
of Function:
result &= "fn ("
for i, (argName, argType) in typ.args:
result &= &"{argName}: "
echo argType[]
if argType.mutable:
result &= "var "
result &= self.typeToStr(argType)
if i < typ.args.len() - 1:
result &= ", "
result &= ")"
if not typ.returnType.isNil():
result &= &": {self.typeToStr(typ.returnType)}"
of Generic:
result = typ.node.name.lexeme
else:
discard
## End of utility functions
proc literal(self: Compiler, node: ASTNode) =
## Emits instructions for literals such
## as singletons, strings, numbers and
## collections
case node.kind:
of trueExpr:
self.emitByte(LoadTrue)
of falseExpr:
self.emitByte(LoadFalse)
of nilExpr:
self.emitByte(LoadNil)
of infExpr:
self.emitByte(LoadInf)
of nanExpr:
self.emitByte(LoadNan)
of strExpr:
self.emitConstant(LiteralExpr(node), Type(kind: String))
# TODO: Take size specifier into account!
of intExpr:
var x: int
var y = IntExpr(node)
try:
discard parseInt(y.literal.lexeme, x)
except ValueError:
self.error("integer value out of range")
self.emitConstant(y, self.inferType(y))
of hexExpr:
var x: int
var y = HexExpr(node)
try:
discard parseHex(y.literal.lexeme, x)
except ValueError:
self.error("integer value out of range")
let node = newIntExpr(Token(lexeme: $x, line: y.token.line,
pos: (start: y.token.pos.start,
stop: y.token.pos.start + len($x))
)
)
self.emitConstant(node, self.inferType(y))
of binExpr:
var x: int
var y = BinExpr(node)
try:
discard parseBin(y.literal.lexeme, x)
except ValueError:
self.error("integer value out of range")
let node = newIntExpr(Token(lexeme: $x, line: y.token.line,
pos: (start: y.token.pos.start,
stop: y.token.pos.start + len($x))
)
)
self.emitConstant(node, self.inferType(y))
of octExpr:
var x: int
var y = OctExpr(node)
try:
discard parseOct(y.literal.lexeme, x)
except ValueError:
self.error("integer value out of range")
let node = newIntExpr(Token(lexeme: $x, line: y.token.line,
pos: (start: y.token.pos.start,
stop: y.token.pos.start + len($x))
)
)
self.emitConstant(node, self.inferType(y))
of floatExpr:
var x: float
var y = FloatExpr(node)
try:
discard parseFloat(y.literal.lexeme, x)
except ValueError:
self.error("floating point value out of range")
self.emitConstant(y, self.inferType(y))
of awaitExpr:
var y = AwaitExpr(node)
self.expression(y.expression)
self.emitByte(OpCode.Await)
else:
self.error(&"invalid AST node of kind {node.kind} at literal(): {node} (This is an internal error and most likely a bug!)")
proc findByName(self: Compiler, name: string): seq[Name] =
## Looks for objects that have been already declared
## with the given name. Returns all objects that apply
for obj in reversed(self.names):
if obj.name.token.lexeme == name:
result.add(obj)
proc findByType(self: Compiler, name: string, kind: Type, strictMutable: bool = true): seq[Name] =
## Looks for objects that have already been declared
## with the given name and type
for obj in self.findByName(name):
if self.compareTypes(obj.valueType, kind, strictMutable):
result.add(obj)
proc matchImpl(self: Compiler, name: string, kind: Type, strictMutable: bool = true): Name =
## Tries to find a matching function implementation
## compatible with the given type and returns its
## name object
let impl = self.findByType(name, kind, strictMutable)
if impl.len() == 0:
var msg = &"cannot find a suitable implementation for '{name}'"
let names = self.findByName(name)
if names.len() > 0:
msg &= &", found {len(names)} candidate"
if names.len() > 1:
msg &= "s"
msg &= ": "
for name in names:
msg &= &"\n - '{name.name.token.lexeme}' of type '{self.typeToStr(name.valueType)}'"
if name.valueType.kind != Function:
msg &= ", not a callable"
elif kind.args.len() != name.valueType.args.len():
msg &= &", wrong number of arguments ({name.valueType.args.len()} expected, got {kind.args.len()})"
else:
for i, arg in kind.args:
echo name.valueType.args[i].kind.mutable
echo arg.kind.mutable
if name.valueType.args[i].kind.mutable and not arg.kind.mutable:
msg &= &", first mismatch at position {i + 1}: {name.valueType.args[i].name} is immutable, not 'var'"
break
elif not self.compareTypes(arg.kind, name.valueType.args[i].kind):
msg &= &", first mismatch at position {i + 1}: expected argument of type '{self.typeToStr(name.valueType.args[i].kind)}', got '{self.typeToStr(arg.kind)}' instead"
break
self.error(msg)
elif impl.len() > 1:
var msg = &"multiple matching implementations of '{name}' found:\n"
for fn in reversed(impl):
msg &= &"- '{fn.name.token.lexeme}' at line {fn.line} of type {self.typeToStr(fn.valueType)}\n"
self.error(msg)
return impl[0]
proc emitFunction(self: Compiler, name: Name) =
## Wrapper to emit LoadFunction instructions
self.emitByte(LoadFunction)
self.emitBytes(name.codePos.toTriple())
proc handleBuiltinFunction(self: Compiler, fn: Name, args: seq[Expression]) =
## Emits single instructions for builtin functions
## such as addition or subtraction
if fn.valueType.builtinOp notin ["GenericLogicalOr", "GenericLogicalAnd"]:
for argument in args:
self.expression(argument)
case fn.valueType.builtinOp:
of "AddInt64":
self.emitByte(AddInt64)
of "SubInt64":
self.emitByte(SubInt64)
of "DivInt64":
self.emitByte(DivInt64)
of "MulInt64":
self.emitByte(MulInt64)
of "AddInt32":
self.emitByte(AddInt32)
of "SubInt32":
self.emitByte(SubInt32)
of "DivInt32":
self.emitByte(DivInt32)
of "MulInt32":
self.emitByte(MulInt32)
of "AddInt16":
self.emitByte(AddInt16)
of "SubInt16":
self.emitByte(SubInt16)
of "DivInt16":
self.emitByte(DivInt16)
of "MulInt16":
self.emitByte(MulInt16)
of "AddInt8":
self.emitByte(AddInt8)
of "SubInt8":
self.emitByte(SubInt8)
of "DivInt8":
self.emitByte(DivInt8)
of "MulInt8":
self.emitByte(MulInt8)
of "AddUInt64":
self.emitByte(AddUInt64)
of "SubUInt64":
self.emitByte(SubUInt64)
of "DivUInt64":
self.emitByte(DivUInt64)
of "MulUInt64":
self.emitByte(MulUInt64)
of "AddUInt32":
self.emitByte(AddUInt32)
of "SubUInt32":
self.emitByte(SubUInt32)
of "DivUInt32":
self.emitByte(DivUInt32)
of "MulUInt32":
self.emitByte(MulUInt32)
of "AddUInt16":
self.emitByte(AddUInt16)
of "SubUInt16":
self.emitByte(SubUInt16)
of "DivUInt16":
self.emitByte(DivUInt16)
of "MulUInt16":
self.emitByte(MulUInt16)
of "AddUInt8":
self.emitByte(AddUInt8)
of "SubUInt8":
self.emitByte(SubUInt8)
of "DivUInt8":
self.emitByte(DivUInt8)
of "MulUInt8":
self.emitByte(MulUInt8)
of "AddFloat64":
self.emitByte(AddInt8)
of "SubFloat64":
self.emitByte(SubInt8)
of "DivFloat64":
self.emitByte(DivInt8)
of "MulFloat64":
self.emitByte(MulInt8)
of "AddFloat32":
self.emitByte(AddFloat32)
of "SubFloat32":
self.emitByte(SubFloat32)
of "DivFloat32":
self.emitByte(DivFloat32)
of "MulFloat32":
self.emitByte(MulFloat32)
of "GenericLogicalOr":
self.expression(args[0])
let jump = self.emitJump(JumpIfTrue)
self.expression(args[1])
self.patchJump(jump)
of "GenericLogicalAnd":
self.expression(args[0])
var jump: int
if self.enableOptimizations:
jump = self.emitJump(JumpIfFalseOrPop)
else:
jump = self.emitJump(JumpIfFalse)
self.emitByte(Pop)
self.expression(args[1])
self.patchJump(jump)
else:
discard # Unreachable
proc generateCall(self: Compiler, fn: Name, args: seq[Expression]) =
## Small wrapper that abstracts emitting a call instruction
## for a given function
if fn.valueType.isBuiltinFunction:
self.handleBuiltinFunction(fn, args)
return
self.emitFunction(fn)
self.emitByte(LoadReturnAddress)
let pos = self.chunk.code.len()
self.emitBytes(0.toQuad())
for argument in args:
self.expression(argument)
self.emitByte(Call) # Creates a new call frame
var size = 2 # We start at 2 because each call frame
# contains at least 2 elements (function
# object and return address)
for name in reversed(self.names):
# Then, for each local variable
# we increase the frame size by 1
if name.depth == self.scopeDepth:
inc(size)
self.emitBytes(size.toTriple())
self.patchReturnAddress(pos)
proc generateObjCall(self: Compiler, args: seq[Expression]) =
## Small wrapper that abstracts emitting a call instruction
## for a given function already loaded on the operand stack
self.emitByte(PushC) # Pops the function off the operand stack onto the call stack
self.emitByte(LoadReturnAddress)
let pos = self.chunk.code.len()
self.emitBytes(0.toQuad())
for argument in args:
self.expression(argument)
self.emitByte(Call) # Creates a new call frame
var size = 2 # We start at 2 because each call frame
# contains at least 2 elements (function
# object and return address)
for name in reversed(self.names):
# Then, for each local variable
# we increase the frame size by 1
if name.depth == self.scopeDepth:
inc(size)
self.emitBytes(size.toTriple())
self.patchReturnAddress(pos)
proc callUnaryOp(self: Compiler, fn: Name, op: UnaryExpr) =
## Emits the code to call a unary operator
self.generateCall(fn, @[op.a])
proc callBinaryOp(self: Compiler, fn: Name, op: BinaryExpr) =
## Emits the code to call a binary operator
# Pushes the return address
self.generateCall(fn, @[op.a, op.b])
proc unary(self: Compiler, node: UnaryExpr) =
## Compiles unary expressions such as decimal
## and bitwise negation
let valueType = self.inferType(node.a, strictMutable=false)
let funct = self.matchImpl(node.token.lexeme, Type(kind: Function, returnType: Type(kind: Any), args: @[("", valueType)]), strictMutable=false)
self.callUnaryOp(funct, node)
proc binary(self: Compiler, node: BinaryExpr) =
## Compiles all binary expressions
let typeOfA = self.inferType(node.a, strictMutable=false)
let typeOfB = self.inferType(node.b, strictMutable=false)
let funct = self.matchImpl(node.token.lexeme, Type(kind: Function, returnType: Type(kind: Any), args: @[("", typeOfA), ("", typeOfB)]), strictMutable=false)
self.callBinaryOp(funct, node)
proc declareName(self: Compiler, node: Declaration, mutable: bool = false) =
## Statically declares a name into the current scope.
## "Declaring" a name only means updating our internal
## list of identifiers so that further calls to resolve()
## correctly return them. There is no code to actually
## declare a variable at runtime: the value is already
## on the stack
case node.kind:
of NodeKind.varDecl:
var node = VarDecl(node)
# Creates a new Name entry so that self.identifier emits the proper stack offset
if self.names.high() > 16777215:
# If someone ever hits this limit in real-world scenarios, I swear I'll
# slap myself 100 times with a sign saying "I'm dumb". Mark my words
self.error("cannot declare more than 16777216 variables at a time")
for name in self.findByName(node.name.token.lexeme):
if name.depth == self.scopeDepth and name.valueType.kind notin {Function, CustomType} and not name.isFunctionArgument:
# Trying to redeclare a variable in the same module is an error, but it's okay
# if it's a function argument (for example, if you want to copy a number to
# mutate it)
self.error(&"attempt to redeclare '{node.name.token.lexeme}', which was previously defined in '{name.owner}' at line {name.line}")
self.names.add(Name(depth: self.scopeDepth,
name: node.name,
isPrivate: node.isPrivate,
owner: self.currentModule,
isConst: node.isConst,
valueType: self.inferType(node.value),
codePos: self.chunk.code.len(),
isLet: node.isLet,
isClosedOver: false,
line: node.token.line))
if mutable:
self.names[^1].valueType.mutable = true
# We emit a jump of 0 because this may become a
# StoreHeap instruction. If they variable is
# not closed over, we'll sadly be wasting a
# VM cycle. The previous implementation used 4 no-op
# instructions, which wasted 4 times as many clock
# cycles.
# TODO: Optimize this. It's a bit tricky because
# deleting bytecode would render all of our
# jump offsets and other absolute indeces in the
# bytecode wrong
if self.scopeDepth > 0:
# Closure variables are only used in local
# scopes
self.emitByte(JumpForwards)
self.emitBytes(0.toTriple())
of NodeKind.funDecl:
var node = FunDecl(node)
# We declare the generics before the function so we
# can refer to them
for gen in node.generics:
self.names.add(Name(depth: self.scopeDepth + 1,
isPrivate: true,
isConst: false,
owner: self.currentModule,
line: node.token.line,
valueType: Type(kind: Generic, mutable: false, node: gen.name),
name: gen.name))
self.names.add(Name(depth: self.scopeDepth,
isPrivate: node.isPrivate,
isConst: false,
owner: self.currentModule,
valueType: Type(kind: Function,
name: node.name.token.lexeme,
returnType: self.inferType(
node.returnType),
args: @[]),
codePos: self.chunk.code.len(),
name: node.name,
isLet: false,
isClosedOver: false,
line: node.token.line))
let fn = self.names[^1]
var name: Name
for argument in node.arguments:
if self.names.high() > 16777215:
self.error("cannot declare more than 16777216 variables at a time")
# wait, no LoadVar? Yes! That's because when calling functions,
# arguments will already be on the stack so there's no need to
# load them here
name = Name(depth: self.scopeDepth + 1,
isPrivate: true,
owner: self.currentModule,
isConst: false,
name: argument.name,
valueType: nil,
codePos: 0,
isLet: false,
isClosedOver: false,
line: argument.name.token.line,
isFunctionArgument: true)
self.names.add(name)
name.valueType = self.inferType(argument.valueType)
# If it's still nil, it's an error!
if name.valueType.isNil():
self.error(&"cannot determine the type of argument '{argument.name.token.lexeme}'")
fn.valueType.args.add((argument.name.token.lexeme, name.valueType))
else:
discard # TODO: Types, enums
proc identifier(self: Compiler, node: IdentExpr) =
## Compiles access to identifiers
let s = self.resolve(node)
if s.isNil():
self.error(&"reference to undeclared name '{node.token.lexeme}'")
elif s.isConst:
# Constants are always emitted as Load* instructions
# no matter the scope depth
self.emitConstant(node, self.inferType(node))
else:
self.detectClosureVariable(s)
if s.valueType.kind == Function:
if not s.valueType.isBuiltinFunction:
self.emitByte(LoadFunctionObj)
self.emitBytes(s.codePos.toTriple())
else:
self.emitByte(LoadNil)
elif not s.isClosedOver:
# Static name resolution, loads value at index in the stack. Very fast. Much wow.
self.emitByte(LoadVar)
# No need to check for -1 here: we already did a nil-check above!
self.emitBytes(self.getStackPos(s.name).toTriple())
else:
# Loads a closure variable. Stored in a separate "closure array" in the VM that does not
# align its semantics with the call stack. This makes closures work as expected and is
# not much slower than indexing our stack (since they're both dynamic arrays at runtime anyway)
self.emitByte(LoadClosure)
self.emitBytes(self.closedOver.high().toTriple())
proc assignment(self: Compiler, node: ASTNode) =
## Compiles assignment expressions
case node.kind:
of assignExpr:
let node = AssignExpr(node)
let name = IdentExpr(node.name)
let r = self.resolve(name)
if r.isNil():
self.error(&"assignment to undeclared name '{name.token.lexeme}'")
elif r.isConst:
self.error(&"cannot assign to '{name.token.lexeme}' (constant)")
elif r.isLet:
self.error(&"cannot reassign '{name.token.lexeme}'")
self.expression(node.value)
self.detectClosureVariable(r)
if not r.isClosedOver:
self.emitByte(StoreVar)
self.emitBytes(self.getStackPos(name).toTriple())
else:
# Loads a closure variable. Stored in a separate "closure array" in the VM that does not
# align its semantics with the call stack. This makes closures work as expected and is
# not much slower than indexing our stack (since they're both dynamic arrays at runtime anyway)
self.emitByte(StoreClosure)
self.emitBytes(self.getClosurePos(name).toTriple())
of setItemExpr:
let node = SetItemExpr(node)
let typ = self.inferType(node)
if typ.isNil():
self.error(&"cannot determine the type of '{node.name.token.lexeme}'")
# TODO
else:
self.error(&"invalid AST node of kind {node.kind} at assignment(): {node} (This is an internal error and most likely a bug)")
proc beginScope(self: Compiler) =
## Begins a new local scope by incrementing the current
## scope's depth
inc(self.scopeDepth)
proc endScope(self: Compiler, deleteNames: bool = true, fromFunc: bool = false) =
## Ends the current local scope
if self.scopeDepth < 0:
self.error("cannot call endScope with scopeDepth < 0 (This is an internal error and most likely a bug)")
dec(self.scopeDepth)
var names: seq[Name] = @[]
for name in self.names:
if name.depth > self.scopeDepth:
names.add(name)
if not self.enableOptimizations and not fromFunc:
# All variables with a scope depth larger than the current one
# are now out of scope. Begone, you're now homeless!
self.emitByte(PopC)
if self.enableOptimizations and len(names) > 1 and not fromFunc:
# If we're popping less than 65535 variables, then
# we can emit a PopN instruction. This is true for
# 99.99999% of the use cases of the language (who the
# hell is going to use 65 THOUSAND local variables?), but
# if you'll ever use more then Peon will emit a PopN instruction
# for the first 65 thousand and change local variables and then
# emit another batch of plain ol' Pop instructions for the rest
self.emitByte(PopN)
self.emitBytes(len(names).toDouble())
if len(names) > uint16.high().int():
for i in countdown(self.names.high(), len(names) - uint16.high().int()):
if self.names[i].depth > self.scopeDepth:
self.emitByte(PopC)
elif len(names) == 1 and not fromFunc:
# We only emit PopN if we're popping more than one value
self.emitByte(PopC)
# This seems *really* slow, but
# what else should I do? Nim doesn't
# allow the removal of items during
# seq iteration so ¯\_(ツ)_/¯
if deleteNames:
var idx = 0
while idx < self.names.len():
for name in names:
if self.names[idx] == name:
self.names.delete(idx)
inc(idx)
idx = 0
while idx < self.closedOver.len():
for name in names:
if name.isClosedOver:
self.closedOver.delete(idx)
self.emitByte(PopClosure)
inc(idx)
proc blockStmt(self: Compiler, node: BlockStmt) =
## Compiles block statements, which create a new
## local scope.
self.beginScope()
for decl in node.code:
self.declaration(decl)
self.endScope()
proc ifStmt(self: Compiler, node: IfStmt) =
## Compiles if/else statements for conditional
## execution of code
var cond = self.inferType(node.condition)
if not self.compareTypes(cond, Type(kind: Bool)):
if cond.isNil():
if node.condition.kind == identExpr:
self.error(&"reference to undeclared identifier '{IdentExpr(node.condition).name.lexeme}'")
elif node.condition.kind == callExpr and CallExpr(node.condition).callee.kind == identExpr:
self.error(&"reference to undeclared identifier '{IdentExpr(CallExpr(node.condition).callee).name.lexeme}'")
else:
self.error(&"expecting value of type 'bool', but expression has no type")
else:
self.error(&"expecting value of type 'bool', got '{self.typeToStr(cond)}' instead")
self.expression(node.condition)
var jumpCode: OpCode
if self.enableOptimizations:
jumpCode = JumpIfFalsePop
else:
jumpCode = JumpIfFalse
let jump = self.emitJump(jumpCode)
if not self.enableOptimizations:
self.emitByte(Pop)
self.statement(node.thenBranch)
let jump2 = self.emitJump(JumpForwards)
self.patchJump(jump)
if not node.elseBranch.isNil():
self.statement(node.elseBranch)
self.patchJump(jump2)
proc emitLoop(self: Compiler, begin: int) =
## Emits a JumpBackwards instruction with the correct
## jump offset
var offset = self.chunk.code.len() - begin + 4
if offset > 16777215:
self.error("cannot jump more than 16777215 bytecode instructions")
self.emitByte(JumpBackwards)
self.emitBytes(offset.toTriple())
proc whileStmt(self: Compiler, node: WhileStmt) =
## Compiles C-style while loops and
## desugared C-style for loops
let start = self.chunk.code.len()
self.expression(node.condition)
var jump: int
if self.enableOptimizations:
jump = self.emitJump(JumpIfFalsePop)
else:
jump = self.emitJump(JumpIfFalse)
self.emitByte(Pop)
self.statement(node.body)
self.patchJump(jump)
self.emitLoop(start)
proc isPure(self: Compiler, node: ASTNode): bool =
## Checks if a function has any side effects
var pragmas: seq[Pragma]
case node.kind:
of lambdaExpr:
pragmas = LambdaExpr(node).pragmas
else:
pragmas = Declaration(node).pragmas
if pragmas.len() == 0:
return false
for pragma in pragmas:
if pragma.name.name.lexeme == "pure":
return true
return false
proc checkCallIsPure(self: Compiler, node: ASTnode): bool =
## Checks if a call has any side effects
if not self.isPure(node):
return true
var pragmas: seq[Pragma]
case node.kind:
of lambdaExpr:
pragmas = LambdaExpr(node).pragmas
else:
pragmas = Declaration(node).pragmas
if pragmas.len() == 0:
return false
for pragma in pragmas:
if pragma.name.name.lexeme == "pure":
return true
return false
proc callExpr(self: Compiler, node: CallExpr) =
## Compiles code to call a function
var args: seq[tuple[name: string, kind: Type]] = @[]
var argExpr: seq[Expression] = @[]
var kind: Type
var strictMutable = true
# TODO: Keyword arguments
for i, argument in node.arguments.positionals:
kind = self.inferType(argument)
if kind.isNil():
if argument.kind == identExpr:
self.error(&"reference to undeclared identifier '{IdentExpr(argument).name.lexeme}'")
self.error(&"cannot infer the type of argument {i + 1} in function call")
if kind.mutable:
strictMutable = false
args.add(("", kind))
argExpr.add(argument)
for argument in node.arguments.keyword:
discard
if args.len() >= 16777216:
self.error(&"cannot pass more than 16777215 arguments")
var funct: Name
case node.callee.kind:
of identExpr:
funct = self.matchImpl(IdentExpr(node.callee).name.lexeme, Type(kind: Function, returnType: Type(kind: Any), args: args), strictMutable)
of NodeKind.callExpr:
var node = node.callee
while node.kind == callExpr:
self.callExpr(CallExpr(node))
node = CallExpr(node).callee
else:
discard # TODO: Calling expressions
if not funct.isNil():
if funct.valueType.isBuiltinFunction:
self.handleBuiltinFunction(funct, argExpr)
else:
self.generateCall(funct, argExpr)
else:
self.generateObjCall(argExpr)
if self.scopeDepth > 0 and not self.checkCallIsPure(node.callee):
if not self.currentFunction.name.isNil():
self.error(&"cannot make sure that calls to '{self.currentFunction.name.token.lexeme}' are side-effect free")
else:
self.error(&"cannot make sure that call is side-effect free")
proc expression(self: Compiler, node: Expression) =
## Compiles all expressions
case node.kind:
of NodeKind.callExpr:
self.callExpr(CallExpr(node)) # TODO
of getItemExpr:
discard # TODO: Get rid of this
of pragmaExpr:
discard # TODO
# Note that for setItem and assign we don't convert
# the node to its true type because that type information
# would be lost in the call anyway. The differentiation
# happens in self.assignment()
of setItemExpr, assignExpr: # TODO: Get rid of this
self.assignment(node)
of identExpr:
self.identifier(IdentExpr(node))
of unaryExpr:
# Unary expressions such as ~5 and -3
self.unary(UnaryExpr(node))
of groupingExpr:
# Grouping expressions like (2 + 1)
self.expression(GroupingExpr(node).expression)
of binaryExpr:
# Binary expressions such as 2 ^ 5 and 0.66 * 3.14
self.binary(BinaryExpr(node))
of intExpr, hexExpr, binExpr, octExpr, strExpr, falseExpr, trueExpr,
infExpr, nanExpr, floatExpr, nilExpr:
# Since all of these AST nodes share the
# same overall structure and the kind
# field is enough to tell one from the
# other, why bother with specialized
# cases when one is enough?
self.literal(node)
else:
self.error(&"invalid AST node of kind {node.kind} at expression(): {node} (This is an internal error and most likely a bug)")
proc awaitStmt(self: Compiler, node: AwaitStmt) =
## Compiles await statements. An await statement
## is like an await expression, but parsed in the
## context of statements for usage outside expressions,
## meaning it can be used standalone. It's basically the
## same as an await expression followed by a semicolon.
## Await expressions are the only native construct to
## run coroutines from within an already asynchronous
## context (which should be orchestrated by an event loop).
## They block in the caller until the callee returns
self.expression(node.expression)
self.emitByte(OpCode.Await)
proc deferStmt(self: Compiler, node: DeferStmt) =
## Compiles defer statements. A defer statement
## is executed right before its containing function
## exits (either because of a return or an exception)
let current = self.chunk.code.len
self.expression(node.expression)
for i in countup(current, self.chunk.code.high()):
self.deferred.add(self.chunk.code[i])
self.chunk.code.delete(i) # TODO: Do not change bytecode size
proc endFunctionBeforeReturn(self: Compiler) =
## Emits code to clear a function's
## stack frame right before executing
## its return instruction
var popped = 0
for name in self.names:
if name.depth == self.scopeDepth and name.valueType.kind != Function:
inc(popped)
if self.enableOptimizations and popped > 1:
self.emitByte(PopN)
self.emitBytes(popped.toDouble())
dec(popped, uint16.high().int)
while popped > 0:
self.emitByte(PopC)
dec(popped)
proc returnStmt(self: Compiler, node: ReturnStmt) =
## Compiles return statements. An empty return
## implicitly returns nil
let actual = self.inferType(node.value)
let expected = self.inferType(self.currentFunction)
var comp: Type = actual
## Having the return type
if actual.isNil() and not expected.returnType.isNil():
if not node.value.isNil():
if node.value.kind == identExpr:
self.error(&"reference to undeclared identifier '{node.value.token.lexeme}'")
elif node.value.kind == callExpr and CallExpr(node.value).callee.kind == identExpr:
self.error(&"call to undeclared function '{CallExpr(node.value).callee.token.lexeme}'")
self.error(&"expected return value of type '{self.typeToStr(expected.returnType)}', but expression has no type")
elif expected.returnType.isNil() and not actual.isNil():
self.error("non-empty return statement is not allowed in void functions")
elif not self.compareTypes(actual, comp):
self.error(&"expected return value of type '{self.typeToStr(comp)}', got '{self.typeToStr(actual)}' instead")
if not node.value.isNil():
self.expression(node.value)
self.emitByte(OpCode.SetResult)
self.endFunctionBeforeReturn()
self.emitByte(OpCode.Return)
if not node.value.isNil():
self.emitByte(1)
else:
self.emitByte(0)
proc yieldStmt(self: Compiler, node: YieldStmt) =
## Compiles yield statements
self.expression(node.expression)
self.emitByte(OpCode.Yield)
proc raiseStmt(self: Compiler, node: RaiseStmt) =
## Compiles raise statements
self.expression(node.exception)
self.emitByte(OpCode.Raise)
proc continueStmt(self: Compiler, node: ContinueStmt) =
## Compiles continue statements. A continue statements
## jumps to the next iteration in a loop
if self.currentLoop.start > 16777215:
self.error("too much code to jump over in continue statement")
self.emitByte(Jump)
self.emitBytes(self.currentLoop.start.toTriple())
proc breakStmt(self: Compiler, node: BreakStmt) =
## Compiles break statements. A continue statement
## jumps to the next iteration in a loop
# Emits dummy jump offset, this is
# patched later
self.currentLoop.breakPos.add(self.emitJump(OpCode.Jump))
if self.currentLoop.depth > self.scopeDepth:
# Breaking out of a loop closes its scope
self.endScope()
proc patchBreaks(self: Compiler) =
## Patches "break" opcodes with
## actual jumps. This is needed
## because the size of code
## to skip is not known before
## the loop is fully compiled
for brk in self.currentLoop.breakPos:
self.chunk.code[brk] = JumpForwards.uint8()
self.patchJump(brk)
proc assertStmt(self: Compiler, node: AssertStmt) =
## Compiles assert statements (raise
## AssertionError if the expression is falsey)
self.expression(node.expression)
self.emitByte(OpCode.Assert)
proc forEachStmt(self: Compiler, node: ForEachStmt) =
## Compiles foreach loops
# TODO
proc importStmt(self: Compiler, node: ImportStmt) =
## Imports a module at compile time
if self.scopeDepth > 0:
self.error("import statements are only allowed at the top level")
var lexer = newLexer()
var parser = newParser()
var compiler = newCompiler()
# TODO: Find module
var result = compiler.compile(parser.parse(lexer.lex("", node.moduleName.name.lexeme), node.moduleName.name.lexeme), node.moduleName.name.lexeme)
proc statement(self: Compiler, node: Statement) =
## Compiles all statements
case node.kind:
of exprStmt:
var expression = ExprStmt(node).expression
self.expression(expression)
if expression.kind == callExpr and self.inferType(CallExpr(expression).callee).returnType.isNil():
# The expression has no type, so we don't have to
# pop anything
discard
else:
if self.replMode:
self.emitByte(PopRepl)
else:
self.emitByte(Pop)
of NodeKind.ifStmt:
self.ifStmt(IfStmt(node))
of NodeKind.assertStmt:
self.assertStmt(AssertStmt(node))
of NodeKind.raiseStmt:
self.raiseStmt(RaiseStmt(node))
of NodeKind.breakStmt:
self.breakStmt(BreakStmt(node))
of NodeKind.continueStmt:
self.continueStmt(ContinueStmt(node))
of NodeKind.returnStmt:
self.returnStmt(ReturnStmt(node))
of NodeKind.importStmt:
self.importStmt(ImportStmt(node))
of NodeKind.whileStmt, NodeKind.forStmt:
## Our parser already desugars for loops to
## while loops!
let loop = self.currentLoop
self.currentLoop = Loop(start: self.chunk.code.len(),
depth: self.scopeDepth, breakPos: @[])
self.whileStmt(WhileStmt(node))
self.patchBreaks()
self.currentLoop = loop
of NodeKind.forEachStmt:
self.forEachStmt(ForEachStmt(node))
of NodeKind.blockStmt:
self.blockStmt(BlockStmt(node))
of NodeKind.yieldStmt:
self.yieldStmt(YieldStmt(node))
of NodeKind.awaitStmt:
self.awaitStmt(AwaitStmt(node))
of NodeKind.deferStmt:
self.deferStmt(DeferStmt(node))
of NodeKind.tryStmt:
discard
else:
self.expression(Expression(node))
proc varDecl(self: Compiler, node: VarDecl) =
## Compiles variable declarations
let expected = self.inferType(node.valueType)
let actual = self.inferType(node.value)
if expected.isNil() and actual.isNil():
if node.value.kind == identExpr or node.value.kind == callExpr and CallExpr(node.value).callee.kind == identExpr:
var name = node.value.token.lexeme
if node.value.kind == callExpr:
name = CallExpr(node.value).callee.token.lexeme
self.error(&"reference to undeclared identifier '{name}'")
self.error(&"'{node.name.token.lexeme}' has no type")
elif not expected.isNil() and expected.mutable: # I mean, variables *are* already mutable (some of them anyway)
self.error(&"invalid type '{self.typeToStr(expected)}' for var")
elif not self.compareTypes(expected, actual):
if not expected.isNil():
self.error(&"expected value of type '{self.typeToStr(expected)}', but '{node.name.token.lexeme}' is of type '{self.typeToStr(actual)}'")
self.expression(node.value)
self.declareName(node, mutable=node.token.kind == TokenType.Var)
self.emitByte(StoreVar)
self.emitBytes(self.names.len().toTriple())
proc typeDecl(self: Compiler, node: TypeDecl) =
## Compiles type declarations
# TODO
proc handleMagicPragma(self: Compiler, pragma: Pragma, node: ASTNode) =
## Handles the "magic" pragma. Assumes the given name is already
## declared
if pragma.args.len() != 1:
self.error("'magic' pragma: wrong number of arguments")
elif pragma.args[0].kind != strExpr:
self.error("'magic' pragma: wrong type of argument (string expected)")
elif node.kind != NodeKind.funDecl:
self.error("'magic' pragma is not valid in this context")
var node = FunDecl(node)
var fn = self.resolve(node.name)
fn.valueType.isBuiltinFunction = true
fn.valueType.builtinOp = pragma.args[0].token.lexeme[1..^2]
proc handlePurePragma(self: Compiler, pragma: Pragma, node: ASTNode) =
## Handles the "pure" pragma
case node.kind:
of funDecl:
FunDecl(node).isPure = true
of lambdaExpr:
LambdaExpr(node).isPure = true
else:
self.error("'pure' pragma: invalid usage")
proc dispatchPragmas(self: Compiler, node: ASTnode) =
## Dispatches pragmas bound to objects
var pragmas: seq[Pragma] = @[]
case node.kind:
of funDecl, NodeKind.typeDecl, NodeKind.varDecl:
pragmas = Declaration(node).pragmas
of lambdaExpr:
pragmas = LambdaExpr(node).pragmas
else:
discard # Unreachable
for pragma in pragmas:
if pragma.name.token.lexeme notin self.compilerProcs:
self.error(&"unknown pragma '{pragma.name.token.lexeme}'")
self.compilerProcs[pragma.name.token.lexeme](self, pragma, node)
proc funDecl(self: Compiler, node: FunDecl) =
## Compiles function declarations
var function = self.currentFunction
self.declareName(node)
if node.generics.len() > 0:
# We can't know the type of
# generic arguments yet, so
# we wait for the function to
# be called to compile its code
# or dispatch any pragmas. We
# still declare its name so that
# it can be assigned to variables
# and passed to functions
return
self.dispatchPragmas(node)
let fn = self.names[^(node.arguments.len() + 1)]
var jmp: int
if not fn.valueType.isBuiltinFunction:
self.frames.add(self.names.high())
# A function's code is just compiled linearly
# and then jumped over
jmp = self.emitJump(JumpForwards)
# Function's code starts after the jump
fn.codePos = self.chunk.code.len()
for argument in node.arguments:
# Pops off the operand stack onto the
# call stack
self.emitByte(LoadArgument)
if not node.returnType.isNil() and self.inferType(node.returnType).isNil():
# Are we returning a generic type?
var isGeneric = false
if node.returnType.kind == identExpr:
let name = IdentExpr(node.returnType)
for g in node.generics:
if name == g.name:
# Yep!
isGeneric = true
break
if not isGeneric:
# Nope
self.error(&"cannot infer the type of '{node.returnType.token.lexeme}'")
# TODO: Forward declarations
if not node.body.isNil():
if BlockStmt(node.body).code.len() == 0 and not fn.valueType.isBuiltinFunction:
self.error("cannot declare function with empty body")
let fnType = self.inferType(node)
let impl = self.findByType(node.name.token.lexeme, fnType)
if impl.len() > 1:
# Oh-oh! We found more than one implementation of
# the same function with the same name! Error!
var msg = &"multiple matching implementations of '{node.name.token.lexeme}' found:\n"
for fn in reversed(impl):
msg &= &"- '{fn.name.token.lexeme}' at line {fn.line} of type {self.typeToStr(fn.valueType)}\n"
self.error(msg)
# We store the current function
self.currentFunction = node
if not fn.valueType.isBuiltinFunction:
# Since the deferred array is a linear
# sequence of instructions and we want
# to keep track to whose function's each
# set of deferred instruction belongs,
# we record the length of the deferred
# array before compiling the function
# and use this info later to compile
# the try/finally block with the deferred
# code
var deferStart = self.deferred.len()
# We let our debugger know a function is starting
let start = self.chunk.code.high()
self.beginScope()
for decl in BlockStmt(node.body).code:
self.declaration(decl)
var typ: Type
var hasVal: bool = false
case self.currentFunction.kind:
of NodeKind.funDecl:
typ = self.inferType(self.currentFunction)
hasVal = self.currentFunction.hasExplicitReturn
of NodeKind.lambdaExpr:
typ = self.inferType(LambdaExpr(Declaration(self.currentFunction)))
hasVal = LambdaExpr(Declaration(self.currentFunction)).hasExplicitReturn
else:
discard # Unreachable
if hasVal and self.currentFunction.returnType.isNil() and not typ.returnType.isNil():
self.error("non-empty return statement is not allowed in void functions")
elif not hasVal and not self.currentFunction.returnType.isNil():
self.error("function has an explicit return type, but no return statement was found")
self.endFunctionBeforeReturn()
hasVal = hasVal and not typ.returnType.isNil()
self.endScope(deleteNames=true, fromFunc=true)
# Terminates the function's context
self.emitByte(OpCode.Return)
if hasVal:
self.emitByte(1)
else:
self.emitByte(0)
# Function is ending!
self.chunk.cfi.add(start.toTriple())
self.chunk.cfi.add(self.chunk.code.high().toTriple())
self.chunk.cfi.add(self.frames[^1].toTriple())
self.chunk.cfi.add(uint8(node.arguments.len()))
if not node.name.isNil():
self.chunk.cfi.add(node.name.token.lexeme.len().toDouble())
var s = node.name.token.lexeme
if node.name.token.lexeme.len() >= uint16.high().int:
s = node.name.token.lexeme[0..uint16.high()]
self.chunk.cfi.add(s.toBytes())
else:
self.chunk.cfi.add(0.toDouble())
# Currently defer is not functional so we
# just pop the instructions
for i in countup(deferStart, self.deferred.len() - 1, 1):
self.deferred.delete(i)
self.patchJump(jmp)
# This makes us compile nested functions correctly
discard self.frames.pop()
self.currentFunction = function
proc patchReturnAddress(self: Compiler, pos: int) =
## Patches the return address of a function
## call
let address = self.chunk.code.len().toQuad()
self.chunk.code[pos] = address[0]
self.chunk.code[pos + 1] = address[1]
self.chunk.code[pos + 2] = address[2]
self.chunk.code[pos + 3] = address[3]
proc declaration(self: Compiler, node: Declaration) =
## Compiles all declarations
case node.kind:
of NodeKind.varDecl:
self.varDecl(VarDecl(node))
of NodeKind.funDecl:
self.funDecl(FunDecl(node))
of NodeKind.typeDecl:
self.typeDecl(TypeDecl(node))
else:
self.statement(Statement(node))
proc compile*(self: Compiler, ast: seq[Declaration], file: string): Chunk =
## Compiles a sequence of AST nodes into a chunk
## object
self.chunk = newChunk()
self.ast = ast
self.file = file
self.names = @[]
self.scopeDepth = 0
self.currentFunction = nil
self.currentModule = self.file.extractFilename()
self.current = 0
# Every peon program has a hidden entry point in
# which user code is wrapped. Think of it as if
# peon is implicitly writing the main() function
# of your program and putting all of your code in
# there. While we call our entry point just like
# any regular peon function, we can't use our handy
# helper generateCall() because we need to keep track
# of where our program ends (which we don't know yet).
# To fix this, we emit dummy offsets and patch them
# later, once we know the boundaries of our hidden main()
var main = Name(depth: 0,
isPrivate: true,
isConst: false,
isLet: false,
isClosedOver: false,
owner: self.currentModule,
valueType: Type(kind: Function,
name: "",
returnType: nil,
args: @[]),
codePos: 13, # Jump address is hardcoded
name: newIdentExpr(Token(lexeme: "", kind: Identifier)),
line: -1)
self.names.add(main)
self.emitByte(LoadFunction)
self.emitBytes(main.codePos.toTriple())
self.emitByte(LoadReturnAddress)
let pos = self.chunk.code.len()
self.emitBytes(0.toQuad())
self.emitByte(Call)
self.emitBytes(2.toTriple())
while not self.done():
self.declaration(Declaration(self.step()))
self.endScope(fromFunc=true)
self.patchReturnAddress(pos)
self.emitByte(OpCode.Return)
self.emitByte(0)
result = self.chunk