peon/src/frontend/compiler.nim

1982 lines
79 KiB
Nim

# Copyright 2022 Mattia Giambirtone & All Contributors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import meta/token
import meta/ast
import meta/errors
import ../util/multibyte
import ../util/symbols
import lexer as l
import parser as p
import std/tables
import std/strformat
import std/algorithm
import std/parseutils
import std/strutils
import std/sequtils
import std/sets
import std/os
export ast
export token
export multibyte
type
TypeKind = enum
## An enumeration of compile-time
## types
Int8, UInt8, Int16, UInt16, Int32,
UInt32, Int64, UInt64, Float32, Float64,
Char, Byte, String, Function, CustomType,
Nil, Nan, Bool, Inf, Typevar, Generic,
Reference, Pointer
Any # Any is used internally in a few cases,
# for example when looking for operators
# when only the type of the arguments is of
# interest
Type = ref object
## A wrapper around
## compile-time types
mutable: bool
case kind: TypeKind:
of Function:
name: string
isLambda: bool
isGenerator: bool
isCoroutine: bool
args: seq[tuple[name: string, kind: Type]]
returnType: Type
isBuiltinFunction: bool
builtinOp: string
fun: FunDecl
isClosure: bool
closureBounds: tuple[start, stop: int]
childFunc: Type
of Reference, Pointer:
value: Type
of Generic:
node: IdentExpr
else:
discard
# This way we don't have recursive dependency issues
import meta/bytecode
export bytecode
type
Name = ref object
## A compile-time wrapper around
## statically resolved names
# Name of the identifier
name: IdentExpr
# Owner of the identifier (module)
owner: string
# Scope depth
depth: int
# Is this name private?
isPrivate: bool
# Is this a constant?
isConst: bool
# Can this name's value be mutated?
isLet: bool
# The name's type
valueType: Type
# For functions, this marks where the function's
# code begins. For variables, this stores where
# their StoreVar/StoreHeap instruction was emitted
codePos: int
# Is the name closed over (i.e. used in a closure)?
isClosedOver: bool
# The function that owns this variable (may be nil!)
belongsTo: Name
# Is this a function argument?
isFunctionArgument: bool
# Where is this node declared in the file?
line: int
# Is this a function declaration or a variable
# with a function as value? (The distinction *is*
# important! Check emitFunction())
isFunDecl: bool
Loop = object
## A "loop object" used
## by the compiler to emit
## appropriate jump offsets
## for continue and break
## statements
# Position in the bytecode where the loop starts
start: int
# Scope depth where the loop is located
depth: int
# Absolute jump offsets into our bytecode that we need to
# patch. Used for break statements
breakPos: seq[int]
Compiler* = ref object
## A wrapper around the Peon compiler's state
# The bytecode chunk where we write code to
chunk: Chunk
# The output of our parser (AST)
ast: seq[Declaration]
# The current AST node we're looking at
current: int
# The current file being compiled (used only for
# error reporting)
file: string
# Compile-time "simulation" of the stack at
# runtime to load variables that have stack
# behavior more efficiently
names: seq[Name]
# The current scope depth. If > 0, we're
# in a local scope, otherwise it's global
scopeDepth: int
# The current function being compiled
currentFunction: Name
# Are optimizations turned on?
enableOptimizations: bool
# The current loop being compiled (used to
# keep track of where to jump)
currentLoop: Loop
# Are we in REPL mode? If so, Pop instructions
# for expression statements at the top level are
# swapped for a special PopRepl instruction that
# prints the result of the expression once it is
# evaluated
replMode: bool
# The current module being compiled
# (used to restrict access to statically
# defined variables at compile time)
currentModule: string
# Each time a defer statement is
# compiled, its code is emitted
# here. Later, if there is any code
# to defer in the current function,
# funDecl will wrap the function's code
# inside an implicit try/finally block
# and add this code in the finally branch.
# This sequence is emptied each time a
# function declaration is compiled and stores only
# deferred code for the current function (may
# be empty)
deferred: seq[uint8]
# List of closed-over variables
closedOver: seq[tuple[name: Name, count: int]]
# Keeps track of stack frames
frames: seq[int]
# Compiler procedures called by pragmas
compilerProcs: TableRef[string, proc (self: Compiler, pragma: Pragma, node: ASTNode)]
# Stores line data
lines: seq[tuple[start, stop: int]]
# The source of the current module
source: string
# Currently imported modules
modules: HashSet[string]
# TODO
scopes: seq[Type]
CompileError* = ref object of PeonException
compiler*: Compiler
node*: ASTNode
file*: string
module*: string
## Forward declarations
proc compile*(self: Compiler, ast: seq[Declaration], file: string, lines: seq[tuple[start, stop: int]], source: string, chunk: Chunk = nil,
terminateScope: bool = true, incremental: bool = false): Chunk
proc expression(self: Compiler, node: Expression)
proc statement(self: Compiler, node: Statement)
proc declaration(self: Compiler, node: Declaration)
proc peek(self: Compiler, distance: int = 0): ASTNode
proc identifier(self: Compiler, node: IdentExpr)
proc varDecl(self: Compiler, node: VarDecl)
proc inferType(self: Compiler, node: LiteralExpr): Type
proc inferType(self: Compiler, node: Expression): Type
proc findByName(self: Compiler, name: string): seq[Name]
proc findByType(self: Compiler, name: string, kind: Type, depth: int = -1): seq[Name]
proc compareTypes(self: Compiler, a, b: Type): bool
proc patchReturnAddress(self: Compiler, pos: int)
proc handleMagicPragma(self: Compiler, pragma: Pragma, node: ASTnode)
proc handlePurePragma(self: Compiler, pragma: Pragma, node: ASTnode)
proc dispatchPragmas(self: Compiler, node: ASTnode)
proc funDecl(self: Compiler, node: FunDecl, fn: Name = nil, args: seq[Expression] = @[])
## End of forward declarations
proc newCompiler*(enableOptimizations: bool = true, replMode: bool = false): Compiler =
## Initializes a new Compiler object
new(result)
result.ast = @[]
result.current = 0
result.file = ""
result.names = @[]
result.scopeDepth = 0
result.frames = @[0]
result.lines = @[]
result.currentFunction = nil
result.enableOptimizations = enableOptimizations
result.replMode = replMode
result.currentModule = ""
result.compilerProcs = newTable[string, proc (self: Compiler, pragma: Pragma, node: ASTNode)]()
result.compilerProcs["magic"] = handleMagicPragma
result.compilerProcs["pure"] = handlePurePragma
result.source = ""
proc compileModule(self: Compiler, filename: string)
## Public getter for nicer error formatting
proc getCurrentNode*(self: Compiler): ASTNode = (if self.current >=
self.ast.len(): self.ast[^1] else: self.ast[self.current - 1])
proc getCurrentFunction*(self: Compiler): Declaration {.inline.} = (if self.currentFunction.valueType.isNil(): nil else: self.currentFunction.valueType.fun)
proc getFile*(self: Compiler): string {.inline.} = self.file
proc getModule*(self: Compiler): string {.inline.} = self.currentModule
proc getLines*(self: Compiler): seq[tuple[start, stop: int]] = self.lines
proc getSource*(self: Compiler): string = self.source
proc getRelPos*(self: Compiler, line: int): tuple[start, stop: int] = self.lines[line - 1]
## Utility functions
proc peek(self: Compiler, distance: int = 0): ASTNode =
## Peeks at the AST node at the given distance.
## If the distance is out of bounds, the last
## AST node in the tree is returned. A negative
## distance may be used to retrieve previously
## consumed AST nodes
if self.ast.high() == -1 or self.current + distance > self.ast.high() or self.current + distance < 0:
result = self.ast[^1]
else:
result = self.ast[self.current + distance]
proc done(self: Compiler): bool {.inline.} =
## Returns true if the compiler is done
## compiling, false otherwise
result = self.current > self.ast.high()
proc error(self: Compiler, message: string, node: ASTNode = nil) {.raises: [CompileError], inline.} =
## Raises a CompileError exception
raise CompileError(msg: message, node: if node.isNil(): self.getCurrentNode() else: node, file: self.file, module: self.currentModule, compiler: self)
proc step(self: Compiler): ASTNode {.inline.} =
## Steps to the next node and returns
## the consumed one
result = self.peek()
if not self.done():
self.current += 1
proc emitByte(self: Compiler, byt: OpCode | uint8) {.inline.} =
## Emits a single byte, writing it to
## the current chunk being compiled
self.chunk.write(uint8 byt, self.peek().token.line)
proc emitBytes(self: Compiler, bytarr: openarray[OpCode | uint8]) {.inline.} =
## Handy helper method to write arbitrary bytes into
## the current chunk, calling emitByte on each of its
## elements
for b in bytarr:
self.emitByte(b)
proc makeConstant(self: Compiler, val: Expression, typ: Type): array[3, uint8] =
## Adds a constant to the current chunk's constant table
## and returns its index as a 3-byte array of uint8s
var lit: string
if typ.kind in [UInt8, Int8, Int16, UInt16, Int32, UInt32, Int64, UInt64]:
lit = val.token.lexeme
if "'" in lit:
var idx = lit.high()
while lit[idx] != '\'':
lit = lit[0..^2]
dec(idx)
lit = lit[0..^2]
case typ.kind:
of UInt8, Int8:
result = self.chunk.writeConstant([uint8(parseInt(lit))])
of Int16, UInt16:
result = self.chunk.writeConstant(parseInt(lit).toDouble())
of Int32, UInt32:
result = self.chunk.writeConstant(parseInt(lit).toQuad())
of Int64:
result = self.chunk.writeConstant(parseInt(lit).toLong())
of UInt64:
result = self.chunk.writeConstant(parseBiggestUInt(lit).toLong())
of String:
result = self.chunk.writeConstant(val.token.lexeme[1..^1].toBytes())
of Float32:
var f: float = 0.0
discard parseFloat(val.token.lexeme, f)
result = self.chunk.writeConstant(cast[array[4, uint8]](float32(f)))
of Float64:
var f: float = 0.0
discard parseFloat(val.token.lexeme, f)
result = self.chunk.writeConstant(cast[array[8, uint8]](f))
else:
discard
proc emitConstant(self: Compiler, obj: Expression, kind: Type) =
## Emits a constant instruction along
## with its operand
case kind.kind:
of Int64:
self.emitByte(LoadInt64)
of UInt64:
self.emitByte(LoadUInt64)
of Int32:
self.emitByte(LoadInt32)
of UInt32:
self.emitByte(LoadUInt32)
of Int16:
self.emitByte(LoadInt16)
of UInt16:
self.emitByte(LoadUInt16)
of Int8:
self.emitByte(LoadInt8)
of UInt8:
self.emitByte(LoadUInt8)
of String:
self.emitByte(LoadString)
let str = LiteralExpr(obj).literal.lexeme
if str.len() >= 16777216:
self.error("string constants cannot be larger than 16777215 bytes")
self.emitBytes((str.len() - 2).toTriple())
of Float32:
self.emitByte(LoadFloat32)
of Float64:
self.emitByte(LoadFloat64)
else:
discard # TODO
self.emitBytes(self.makeConstant(obj, kind))
proc emitJump(self: Compiler, opcode: OpCode): int =
## Emits a dummy jump offset to be patched later
## and returns the absolute index into the chunk's
## bytecode array where the given placeholder
## instruction was written
self.emitByte(opcode)
self.emitBytes(0.toTriple())
result = self.chunk.code.len() - 4
proc patchJump(self: Compiler, offset: int) =
## Patches a previously emitted relative
## jump using emitJump
var jump: int = self.chunk.code.len() - offset
if jump > 16777215:
self.error("cannot jump more than 16777215 instructions")
let offsetArray = (jump - 4).toTriple()
self.chunk.code[offset + 1] = offsetArray[0]
self.chunk.code[offset + 2] = offsetArray[1]
self.chunk.code[offset + 3] = offsetArray[2]
proc resolve(self: Compiler, name: IdentExpr,
depth: int = self.scopeDepth): Name =
## Traverses self.names backwards and returns the
## first name object with the given name. Returns
## nil when the name can't be found. This function
## has no concept of scope depth, because getStackPos
## does that job. Note that private names declared in
## other modules will not be resolved!
for obj in reversed(self.names):
if obj.name.token.lexeme == name.token.lexeme:
if obj.isPrivate and obj.owner != self.currentModule:
continue # There may be a name in the current module that
# matches, so we skip this
return obj
return nil
proc getStackPos(self: Compiler, name: Name): int =
## Returns the predicted call stack position of a given name, relative
## to the current frame
var found = false
result = 2
for variable in self.names:
if variable.isFunDecl or variable.valueType.kind in {CustomType, Generic}:
continue
if name == variable:
found = true
break
inc(result)
if not found:
return -1
proc getClosurePos(self: Compiler, name: Name): int =
## Iterates the internal list of declared closure names backwards and
## returns the predicted closure array position of a given name.
## Returns -1 if the name can't be found (this includes names that
## are private in other modules)
result = self.closedOver.high()
var found = false
for variable in reversed(self.closedOver):
if name == variable.name:
found = true
break
dec(result)
if not found:
return -1
proc resolve(self: Compiler, name: string,
depth: int = self.scopeDepth): Name =
## Traverses self.names backwards and returns the
## first name object with the given name. Returns
## nil when the name can't be found. This function
## has no concept of scope depth, because getStackPos
## does that job. Note that private names declared in
## other modules will not be resolved!
for obj in reversed(self.names):
if obj.name.token.lexeme == name:
if obj.isPrivate and obj.owner != self.currentModule:
continue # There may be a name in the current module that
# matches, so we skip this
return obj
return nil
proc detectClosureVariable(self: Compiler, name: var Name, depth: int = self.scopeDepth) =
## Detects if the given name is used in a local scope deeper
## than the given one and modifies the code emitted for it
## to store it as a closure variable if it is. Does nothing if the name
## hasn't been declared yet or is unreachable (for example if it's
## declared as private in another module). This function must be called
## each time a name is referenced in order for closed-over variables
## to be emitted properly, otherwise the runtime may behave
## unpredictably or crash
if name.isNil() or name.depth == 0 or name.isClosedOver:
return
elif name.depth < depth and self.scopes[name.depth - 1] != self.scopes[depth - 1]:
# Ding! The given name is closed over in another function:
# we need to change the Jump instruction that self.declareName
# put in place for us into a StoreClosure. We also update
# the name's isClosedOver field so that self.identifier()
# can emit a LoadClosure instruction instead of a LoadVar
# once this name is referenced in the future
self.closedOver.add((name, 0))
name.isClosedOver = true
if not self.currentFunction.valueType.isClosure:
self.currentFunction.valueType.isClosure = true
self.currentFunction.valueType.closureBounds.start = self.closedOver.high()
self.currentFunction.valueType.closureBounds.stop = self.closedOver.high()
if self.closedOver.len() >= 16777216:
self.error("too many consecutive closed-over variables (max is 16777215)")
if not name.isFunctionArgument:
self.chunk.code[name.codePos] = StoreClosure.uint8()
for i, b in self.closedOver.high().toTriple():
self.chunk.code[name.codePos + i + 1] = b
else:
self.chunk.code[name.codePos] = LiftArgument.uint8()
for i, b in self.getStackPos(name).toTriple():
self.chunk.code[name.codePos + i + 1] = b
proc compareTypes(self: Compiler, a, b: Type): bool =
## Compares two type objects
## for equality (works with nil!)
# The nil code here is for void functions (when
# we compare their return types)
if a.isNil():
return b.isNil() or b.kind == Any
elif b.isNil():
return a.isNil() or a.kind == Any
elif a.kind == Any or b.kind == Any:
# This is needed internally: user code
# cannot generate code for matching
# arbitrary types, but we need it for
# function calls and stuff like that
# since peon doesn't have return type
# inference
return true
elif a.kind == Generic or b.kind == Generic:
# Matching generic argument types
return true
elif a.kind != b.kind:
# Next, we see the type discriminant:
# If they're different, then they can't
# be the same type!
return false
case a.kind:
# If all previous checks pass, it's time
# to go through each possible type peon
# supports and compare it
of Int8, UInt8, Int16, UInt16, Int32,
UInt32, Int64, UInt64, Float32, Float64,
Char, Byte, String, Nil, Nan, Bool, Inf:
# A value type's type is always equal to
# another one's
return true
of Reference, Pointer:
# Here we already know that both
# a and b are of either of the two
# types in this branch, so we just need
# to compare their values
return self.compareTypes(a.value, b.value)
of Function:
# Functions are a bit trickier
if a.args.len() != b.args.len():
return false
elif not self.compareTypes(a.returnType, b.returnType):
return false
for (argA, argB) in zip(a.args, b.args):
if not self.compareTypes(argA.kind, argB.kind):
return false
return true
else:
# TODO: Custom types
discard
proc toIntrinsic(name: string): Type =
## Converts a string to an intrinsic
## type if it is valid and returns nil
## otherwise
if name in ["int", "int64", "i64"]:
return Type(kind: Int64)
elif name in ["uint64", "u64", "uint"]:
return Type(kind: UInt64)
elif name in ["int32", "i32"]:
return Type(kind: Int32)
elif name in ["uint32", "u32"]:
return Type(kind: UInt32)
elif name in ["int16", "i16", "short"]:
return Type(kind: Int16)
elif name in ["uint16", "u16"]:
return Type(kind: UInt16)
elif name in ["int8", "i8"]:
return Type(kind: Int8)
elif name in ["uint8", "u8"]:
return Type(kind: UInt8)
elif name in ["f64", "float", "float64"]:
return Type(kind: Float64)
elif name in ["f32", "float32"]:
return Type(kind: Float32)
elif name in ["byte", "b"]:
return Type(kind: Byte)
elif name in ["char", "c"]:
return Type(kind: Char)
elif name == "nan":
return Type(kind: Nan)
elif name == "nil":
return Type(kind: Nil)
elif name == "inf":
return Type(kind: Inf)
elif name == "bool":
return Type(kind: Bool)
elif name == "typevar":
return Type(kind: Typevar)
elif name == "string":
return Type(kind: String)
else:
return nil
proc inferType(self: Compiler, node: LiteralExpr): Type =
## Infers the type of a given literal expression
if node.isNil():
return nil
case node.kind:
of intExpr, binExpr, octExpr, hexExpr:
let size = node.token.lexeme.split("'")
if len(size) notin 1..2:
self.error("invalid state: inferValueType -> invalid size specifier (This is an internal error and most likely a bug!)")
if size.len() == 1:
return Type(kind: Int64)
let typ = size[1].toIntrinsic()
if not self.compareTypes(typ, nil):
return typ
else:
self.error(&"invalid type specifier '{size[1]}' for int")
of floatExpr:
let size = node.token.lexeme.split("'")
if len(size) notin 1..2:
self.error("invalid state: inferValueType -> invalid size specifier (This is an internal error and most likely a bug!)")
if size.len() == 1 or size[1] == "f64":
return Type(kind: Float64)
let typ = size[1].toIntrinsic()
if not self.compareTypes(typ, nil):
return typ
else:
self.error(&"invalid type specifier '{size[1]}' for float")
of nilExpr:
return Type(kind: Nil)
of trueExpr:
return Type(kind: Bool)
of falseExpr:
return Type(kind: Bool)
of nanExpr:
return Type(kind: TypeKind.Nan)
of infExpr:
return Type(kind: TypeKind.Inf)
of strExpr:
return Type(kind: String)
else:
discard # TODO
proc matchImpl(self: Compiler, name: string, kind: Type): Name
proc inferType(self: Compiler, node: Expression): Type =
## Infers the type of a given expression and
## returns it
if node.isNil():
return nil
case node.kind:
of identExpr:
let node = IdentExpr(node)
let name = self.resolve(node)
if not name.isNil():
result = name.valueType
else:
result = node.name.lexeme.toIntrinsic()
of unaryExpr:
let node = UnaryExpr(node)
return self.matchImpl(node.operator.lexeme, Type(kind: Function, returnType: Type(kind: Any), args: @[("", self.inferType(node.a))])).valueType.returnType
of binaryExpr:
let node = BinaryExpr(node)
return self.matchImpl(node.operator.lexeme, Type(kind: Function, returnType: Type(kind: Any), args: @[("", self.inferType(node.a)), ("", self.inferType(node.b))])).valueType.returnType
of {intExpr, hexExpr, binExpr, octExpr,
strExpr, falseExpr, trueExpr, infExpr,
nanExpr, floatExpr, nilExpr
}:
return self.inferType(LiteralExpr(node))
of lambdaExpr:
var node = LambdaExpr(node)
result = Type(kind: Function, returnType: nil, args: @[], isLambda: true)
if not node.returnType.isNil():
result.returnType = self.inferType(node.returnType)
for argument in node.arguments:
result.args.add((argument.name.token.lexeme, self.inferType(argument.valueType)))
of callExpr:
var node = CallExpr(node)
case node.callee.kind:
of identExpr:
let resolved = self.resolve(IdentExpr(node.callee))
if not resolved.isNil():
result = resolved.valueType.returnType
else:
result = nil
of lambdaExpr:
result = self.inferType(LambdaExpr(node.callee).returnType)
of callExpr:
result = self.inferType(CallExpr(node.callee))
if not result.isNil():
result = result.returnType
else:
discard # Unreachable
of varExpr:
result = self.inferType(Var(node).value)
result.mutable = true
of refExpr:
result = Type(kind: Reference, value: self.inferType(Ref(node).value))
of ptrExpr:
result = Type(kind: Pointer, value: self.inferType(Ptr(node).value))
of groupingExpr:
result = self.inferType(GroupingExpr(node).expression)
else:
discard # Unreachable
proc inferType(self: Compiler, node: Declaration, strictMutable: bool = true): Type =
## Infers the type of a given declaration
## and returns it
if node.isNil():
return nil
case node.kind:
of NodeKind.funDecl:
var node = FunDecl(node)
let resolved = self.resolve(node.name)
if not resolved.isNil():
return resolved.valueType
of NodeKind.varDecl:
var node = VarDecl(node)
let resolved = self.resolve(node.name)
if not resolved.isNil():
return resolved.valueType
else:
return self.inferType(node.value, strictMutable)
else:
return # Unreachable
proc typeToStr(self: Compiler, typ: Type): string =
## Returns the string representation of a
## type object
if typ.isNil():
return "nil"
case typ.kind:
of Int8, UInt8, Int16, UInt16, Int32,
UInt32, Int64, UInt64, Float32, Float64,
Char, Byte, String, Nil, TypeKind.Nan, Bool,
TypeKind.Inf:
result &= ($typ.kind).toLowerAscii()
of Pointer:
result &= &"ptr {self.typeToStr(typ.value)}"
of Reference:
result &= &"ref {self.typeToStr(typ.value)}"
of Function:
result &= "fn ("
for i, (argName, argType) in typ.args:
result &= &"{argName}: "
if argType.mutable:
result &= "var "
result &= self.typeToStr(argType)
if i < typ.args.len() - 1:
result &= ", "
result &= ")"
if not typ.returnType.isNil():
result &= &": {self.typeToStr(typ.returnType)}"
of Generic:
result = typ.node.name.lexeme
else:
discard
proc findByName(self: Compiler, name: string): seq[Name] =
## Looks for objects that have been already declared
## with the given name. Returns all objects that apply
for obj in reversed(self.names):
if obj.name.token.lexeme == name:
if obj.isPrivate and obj.owner != self.currentModule:
continue
result.add(obj)
proc findByType(self: Compiler, name: string, kind: Type, depth: int = -1): seq[Name] =
## Looks for objects that have already been declared
## with the given name and type. If depth is not -1,
## it also compares the name's scope depth
for obj in self.findByName(name):
if self.compareTypes(obj.valueType, kind) and depth == -1 or depth == obj.depth:
result.add(obj)
#[
proc findAtDepth(self: Compiler, name: string, depth: int): seq[Name] =
## Looks for objects that have been already declared
## with the given name at the given scope depth.
## Returns all objects that apply
for obj in self.findByName(name):
if obj.depth == depth:
result.add(obj)
]#
proc matchImpl(self: Compiler, name: string, kind: Type): Name =
## Tries to find a matching function implementation
## compatible with the given type and returns its
## name object
let impl = self.findByType(name, kind)
if impl.len() == 0:
var msg = &"cannot find a suitable implementation for '{name}'"
let names = self.findByName(name)
if names.len() > 0:
msg &= &", found {len(names)} candidate"
if names.len() > 1:
msg &= "s"
msg &= ": "
for name in names:
msg &= &"\n - in module '{name.owner}' at line {name.name.token.line} of type '{self.typeToStr(name.valueType)}'"
if name.valueType.kind != Function:
msg &= ", not a callable"
elif kind.args.len() != name.valueType.args.len():
msg &= &", wrong number of arguments ({name.valueType.args.len()} expected, got {kind.args.len()})"
else:
for i, arg in kind.args:
if name.valueType.args[i].kind.mutable and not arg.kind.mutable:
msg &= &", first mismatch at position {i + 1}: {name.valueType.args[i].name} is immutable, not 'var'"
break
elif not self.compareTypes(arg.kind, name.valueType.args[i].kind):
msg &= &", first mismatch at position {i + 1}: expected argument of type '{self.typeToStr(name.valueType.args[i].kind)}', got '{self.typeToStr(arg.kind)}' instead"
break
self.error(msg)
elif impl.len() > 1:
var msg = &"multiple matching implementations of '{name}' found:\n"
for fn in reversed(impl):
msg &= &"- '{fn.name.token.lexeme}' at line {fn.line} of type {self.typeToStr(fn.valueType)}\n"
self.error(msg)
return impl[0]
proc check(self: Compiler, term: Expression, kind: Type) =
## Checks the type of term against a known type.
## Raises an error if appropriate and returns
## otherwise
let k = self.inferType(term)
if k.isNil():
if term.kind == identExpr:
self.error(&"reference to undeclared name '{term.token.lexeme}'", term)
elif term.kind == callExpr and CallExpr(term).callee.kind == identExpr:
self.error(&"call to undeclared function '{CallExpr(term).callee.token.lexeme}'", term)
self.error(&"expecting value of type '{self.typeToStr(kind)}', but expression has no type", term)
elif not self.compareTypes(k, kind):
self.error(&"expecting value of type '{self.typeToStr(kind)}', got '{self.typeToStr(k)}' instead", term)
proc emitFunction(self: Compiler, name: Name) =
## Wrapper to emit LoadFunction instructions
if name.isFunDecl:
self.emitByte(LoadFunction)
self.emitBytes(name.codePos.toTriple())
# If we're not loading a statically declared
# function, then it must be a function object
# created by previous LoadFunction instructions
# that is now bound to some variable, so we just
# load it
elif not name.isClosedOver:
self.emitByte(LoadVar)
self.emitBytes(self.getStackPos(name).toTriple())
else:
self.emitByte(LoadClosure)
self.emitBytes(self.getClosurePos(name).toTriple())
## End of utility functions
proc literal(self: Compiler, node: ASTNode) =
## Emits instructions for literals such
## as singletons, strings and numbers
case node.kind:
of trueExpr:
self.emitByte(LoadTrue)
of falseExpr:
self.emitByte(LoadFalse)
of nilExpr:
self.emitByte(LoadNil)
of infExpr:
self.emitByte(LoadInf)
of nanExpr:
self.emitByte(LoadNan)
of strExpr:
self.emitConstant(LiteralExpr(node), Type(kind: String))
of intExpr:
let y = IntExpr(node)
let kind = self.inferType(y)
if kind.kind in [Int64, Int32, Int16, Int8]:
var x: int
try:
discard parseInt(y.literal.lexeme, x)
except ValueError:
self.error("integer value out of range")
else:
var x: uint64
try:
discard parseBiggestUInt(y.literal.lexeme, x)
except ValueError:
self.error("integer value out of range")
self.emitConstant(y, kind)
of hexExpr:
var x: int
var y = HexExpr(node)
try:
discard parseHex(y.literal.lexeme, x)
except ValueError:
self.error("integer value out of range")
let node = newIntExpr(Token(lexeme: $x, line: y.token.line,
pos: (start: y.token.pos.start,
stop: y.token.pos.start + len($x))
)
)
self.emitConstant(node, self.inferType(y))
of binExpr:
var x: int
var y = BinExpr(node)
try:
discard parseBin(y.literal.lexeme, x)
except ValueError:
self.error("integer value out of range")
let node = newIntExpr(Token(lexeme: $x, line: y.token.line,
pos: (start: y.token.pos.start,
stop: y.token.pos.start + len($x))
)
)
self.emitConstant(node, self.inferType(y))
of octExpr:
var x: int
var y = OctExpr(node)
try:
discard parseOct(y.literal.lexeme, x)
except ValueError:
self.error("integer value out of range")
let node = newIntExpr(Token(lexeme: $x, line: y.token.line,
pos: (start: y.token.pos.start,
stop: y.token.pos.start + len($x))
)
)
self.emitConstant(node, self.inferType(y))
of floatExpr:
var x: float
var y = FloatExpr(node)
try:
discard parseFloat(y.literal.lexeme, x)
except ValueError:
self.error("floating point value out of range")
self.emitConstant(y, self.inferType(y))
of awaitExpr:
var y = AwaitExpr(node)
self.expression(y.expression)
self.emitByte(OpCode.Await)
else:
self.error(&"invalid AST node of kind {node.kind} at literal(): {node} (This is an internal error and most likely a bug!)")
proc handleBuiltinFunction(self: Compiler, fn: Name, args: seq[Expression]) =
## Emits instructions for builtin functions
## such as addition or subtraction
if fn.valueType.builtinOp notin ["LogicalOr", "LogicalAnd"]:
if len(args) == 2:
self.expression(args[1])
self.expression(args[0])
elif len(args) == 1:
self.expression(args[0])
const codes: Table[string, OpCode] = {"Negate": Negate,
"NegateFloat32": NegateFloat32,
"NegateFloat64": NegateFloat64,
"Add": Add,
"Subtract": Subtract,
"Divide": Divide,
"Multiply": Multiply,
"SignedDivide": SignedDivide,
"AddFloat64": AddFloat64,
"SubtractFloat64": SubtractFloat64,
"DivideFloat64": DivideFloat64,
"MultiplyFloat64": MultiplyFloat64,
"AddFloat32": AddFloat32,
"SubtractFloat32": SubtractFloat32,
"DivideFloat32": DivideFloat32,
"MultiplyFloat32": MultiplyFloat32,
"Pow": Pow,
"SignedPow": SignedPow,
"PowFloat32": PowFloat32,
"PowFloat64": PowFloat64,
"Mod": Mod,
"SignedMod": SignedMod,
"ModFloat32": ModFloat32,
"ModFloat64": ModFloat64,
"Or": Or,
"And": And,
"Xor": Xor,
"Not": Not,
"LShift": LShift,
"RShift": RShift,
"Equal": Equal,
"NotEqual": NotEqual,
"LessThan": LessThan,
"GreaterThan": GreaterThan,
"LessOrEqual": LessOrEqual,
"GreaterOrEqual": GreaterOrEqual,
"PrintInt64": PrintInt64,
"PrintUInt64": PrintUInt64,
"PrintInt32": PrintInt32,
"PrintUInt32": PrintUInt32,
"PrintInt16": PrintInt16,
"PrintUInt16": PrintUInt16,
"PrintInt8": PrintInt8,
"PrintUInt8": PrintUInt8,
"PrintFloat64": PrintFloat64,
"PrintFloat32": PrintFloat32,
"PrintHex": PrintHex,
"PrintBool": PrintBool,
"PrintNan": PrintNan,
"PrintInf": PrintInf,
"PrintString": PrintString,
"SysClock64": SysClock64
}.to_table()
if fn.valueType.builtinOp in codes:
self.emitByte(codes[fn.valueType.builtinOp])
return
# Some builtin operations are slightly more complex
# so we handle them separately
case fn.valueType.builtinOp:
of "LogicalOr":
self.expression(args[0])
let jump = self.emitJump(JumpIfTrue)
self.expression(args[1])
self.patchJump(jump)
of "LogicalAnd":
self.expression(args[0])
var jump = self.emitJump(JumpIfFalseOrPop)
self.expression(args[1])
self.patchJump(jump)
else:
self.error(&"unknown built-in: '{fn.valueType.builtinOp}'", fn.valueType.fun)
proc generateCall(self: Compiler, fn: Name, args: seq[Expression], onStack: bool = false) =
## Small wrapper that abstracts emitting a call instruction
## for a given function
if fn.valueType.isBuiltinFunction:
# Builtins map to individual instructions
# (usually 1, but some use more) so we handle
# them differently
self.handleBuiltinFunction(fn, args)
return
if not onStack:
self.emitFunction(fn)
self.emitByte(LoadReturnAddress)
let pos = self.chunk.code.len()
# We initially emit a dummy return
# address. It is patched later
self.emitBytes(0.toQuad())
for argument in reversed(args):
# We pass the arguments in reverse
# because of how stack semantics
# work. They'll be fixed at runtime
self.expression(argument)
var f = fn.valueType
while not f.isNil():
if f.isClosure:
for i in f.closureBounds.start..f.closureBounds.stop:
self.closedOver[i].count += 1
f = f.childFunc
# Creates a new call frame and jumps
# to the function's first instruction
# in the code
self.emitByte(Call)
self.emitBytes(fn.valueType.args.len().toTriple())
self.patchReturnAddress(pos)
proc callUnaryOp(self: Compiler, fn: Name, op: UnaryExpr) =
## Emits the code to call a unary operator
self.generateCall(fn, @[op.a])
proc callBinaryOp(self: Compiler, fn: Name, op: BinaryExpr) =
## Emits the code to call a binary operator
self.generateCall(fn, @[op.a, op.b])
proc unary(self: Compiler, node: UnaryExpr) =
## Compiles unary expressions such as decimal
## and bitwise negation
let valueType = self.inferType(node.a)
let funct = self.matchImpl(node.token.lexeme, Type(kind: Function, returnType: Type(kind: Any), args: @[("", valueType)]))
self.callUnaryOp(funct, node)
proc binary(self: Compiler, node: BinaryExpr) =
## Compiles all binary expressions
let typeOfA = self.inferType(node.a)
let typeOfB = self.inferType(node.b)
let funct = self.matchImpl(node.token.lexeme, Type(kind: Function, returnType: Type(kind: Any), args: @[("", typeOfA), ("", typeOfB)]))
self.callBinaryOp(funct, node)
proc declareName(self: Compiler, node: Declaration, mutable: bool = false) =
## Statically declares a name into the current scope.
## "Declaring" a name only means updating our internal
## list of identifiers so that further calls to resolve()
## correctly return them. There is no code to actually
## declare a variable at runtime: the value is already
## on the stack
case node.kind:
of NodeKind.varDecl:
var node = VarDecl(node)
# Creates a new Name entry so that self.identifier emits the proper stack offset
if self.names.high() > 16777215:
# If someone ever hits this limit in real-world scenarios, I swear I'll
# slap myself 100 times with a sign saying "I'm dumb". Mark my words
self.error("cannot declare more than 16777215 variables at a time")
for name in self.findByName(node.name.token.lexeme):
if name.depth == self.scopeDepth and not name.isFunctionArgument:
# Trying to redeclare a variable in the same scope/context is an error, but it's okay
# if it's a function argument (for example, if you want to copy a number to
# mutate it)
self.error(&"attempt to redeclare '{node.name.token.lexeme}', which was previously defined in '{name.owner}' at line {name.line}")
self.names.add(Name(depth: self.scopeDepth,
name: node.name,
isPrivate: node.isPrivate,
owner: self.currentModule,
isConst: node.isConst,
valueType: self.inferType(node.value),
codePos: self.chunk.code.len(),
isLet: node.isLet,
isClosedOver: false,
line: node.token.line,
belongsTo: self.currentFunction
))
if mutable:
self.names[^1].valueType.mutable = true
# We emit a jump of 0 because this may become a
# StoreHeap instruction. If they variable is
# not closed over, we'll sadly be wasting a
# VM cycle. The previous implementation used 4 no-op
# instructions, which wasted 4 times as many clock
# cycles.
# TODO: Optimize this. It's a bit tricky because
# deleting bytecode would render all of our
# jump offsets and other absolute indeces in the
# bytecode wrong
if self.scopeDepth > 0:
# Closure variables are only used in local
# scopes
self.emitByte(JumpForwards)
self.emitBytes(0.toTriple())
of NodeKind.funDecl:
var node = FunDecl(node)
# We declare the generics before the function so we
# can refer to them
for gen in node.generics:
self.names.add(Name(depth: self.scopeDepth + 1,
isPrivate: true,
isConst: false,
owner: self.currentModule,
line: node.token.line,
valueType: Type(kind: Generic, mutable: false, node: gen.name),
name: gen.name))
self.names.add(Name(depth: self.scopeDepth,
isPrivate: node.isPrivate,
isConst: false,
owner: self.currentModule,
valueType: Type(kind: Function,
name: node.name.token.lexeme,
returnType: self.inferType(node.returnType),
args: @[],
fun: node),
codePos: self.chunk.code.len(),
name: node.name,
isLet: false,
isClosedOver: false,
line: node.token.line,
isFunDecl: true))
let fn = self.names[^1]
var name: Name
for argument in node.arguments:
if self.names.high() > 16777215:
self.error("cannot declare more than 16777215 variables at a time")
# wait, no LoadVar? Yes! That's because when calling functions,
# arguments will already be on the stack, so there's no need to
# load them here
name = Name(depth: self.scopeDepth + 1,
isPrivate: true,
owner: self.currentModule,
isConst: false,
name: argument.name,
valueType: nil,
codePos: 0,
isLet: false,
isClosedOver: false,
line: argument.name.token.line,
isFunctionArgument: true,
belongsTo: fn
)
self.names.add(name)
name.valueType = self.inferType(argument.valueType)
# If it's still nil, it's an error!
if name.valueType.isNil():
self.error(&"cannot determine the type of argument '{argument.name.token.lexeme}'", argument.name)
fn.valueType.args.add((argument.name.token.lexeme, name.valueType))
else:
discard # TODO: Types, enums
proc identifier(self: Compiler, node: IdentExpr) =
## Compiles access to identifiers
var s = self.resolve(node)
if s.isNil():
self.error(&"reference to undeclared name '{node.token.lexeme}'")
elif s.isConst:
# Constants are always emitted as Load* instructions
# no matter the scope depth
self.emitConstant(node, self.inferType(node))
else:
self.detectClosureVariable(s)
if s.valueType.kind == Function and s.isFunDecl:
# Functions have no runtime
# representation, so we need
# to create one on the fly
self.emitByte(LoadFunction)
self.emitBytes(s.codePos.toTriple())
elif not s.isClosedOver:
# Static name resolution, loads value at index in the stack. Very fast. Much wow.
self.emitByte(LoadVar)
# No need to check for -1 here: we already did a nil-check above!
self.emitBytes(self.getStackPos(s).toTriple())
else:
# Loads a closure variable. Stored in a separate "closure array" in the VM that does not
# align its semantics with the call stack. This makes closures work as expected and is
# not much slower than indexing our stack (since they're both dynamic arrays at runtime anyway)
self.emitByte(LoadClosure)
self.emitBytes((self.getClosurePos(s)).toTriple())
proc assignment(self: Compiler, node: ASTNode) =
## Compiles assignment expressions
case node.kind:
of assignExpr:
let node = AssignExpr(node)
let name = IdentExpr(node.name)
var r = self.resolve(name)
if r.isNil():
self.error(&"assignment to undeclared name '{name.token.lexeme}'", name)
elif r.isConst:
self.error(&"cannot assign to '{name.token.lexeme}' (constant)", name)
elif r.isLet:
self.error(&"cannot reassign '{name.token.lexeme}'", name)
self.expression(node.value)
self.detectClosureVariable(r)
if not r.isClosedOver:
self.emitByte(StoreVar)
self.emitBytes(self.getStackPos(r).toTriple())
else:
# Loads a closure variable. Stored in a separate "closure array" in the VM that does not
# align its semantics with the call stack. This makes closures work as expected and is
# not much slower than indexing our stack (since they're both dynamic arrays at runtime anyway)
self.emitByte(StoreClosure)
self.emitBytes(self.getClosurePos(r).toTriple())
of setItemExpr:
let node = SetItemExpr(node)
let typ = self.inferType(node)
if typ.isNil():
self.error(&"cannot determine the type of '{node.name.token.lexeme}'")
# TODO
else:
self.error(&"invalid AST node of kind {node.kind} at assignment(): {node} (This is an internal error and most likely a bug)")
proc beginScope(self: Compiler) =
## Begins a new local scope by incrementing the current
## scope's depth
inc(self.scopeDepth)
self.scopes.add(self.currentFunction.valueType)
proc endScope(self: Compiler) =
## Ends the current local scope
if self.scopeDepth < 0:
self.error("cannot call endScope with scopeDepth < 0 (This is an internal error and most likely a bug)")
dec(self.scopeDepth)
if self.scopeDepth > 0:
discard self.scopes.pop()
var names: seq[Name] = @[]
var popCount = 0
for name in self.names:
if name.depth > self.scopeDepth:
names.add(name)
if name.valueType.kind notin {Generic, CustomType} and not name.isFunDecl:
# We don't increase the pop count for these kinds of objects
# because they're not stored the same way as regular variables
inc(popCount)
if name.isFunDecl and not name.valueType.childFunc.isNil() and name.valueType.childFunc.isClosure:
var i = 0
var closure: tuple[name: Name, count: int]
for y in name.valueType.childFunc.closureBounds.start..name.valueType.childFunc.closureBounds.stop:
closure = self.closedOver[y + i]
self.closedOver.delete(y + i)
for _ in 0..<closure.count:
self.emitByte(PopClosure)
self.emitBytes((y + i).toTriple())
inc(i)
if popCount > 1:
# If we're popping less than 65535 variables, then
# we can emit a PopN instruction. This is true for
# 99.99999% of the use cases of the language (who the
# hell is going to use 65 THOUSAND variables?), but
# if you'll ever use more then Peon will emit a PopN instruction
# for the first 65 thousand and change local variables and then
# emit another batch of plain ol' Pop instructions for the rest
self.emitByte(PopN)
self.emitBytes(popCount.toDouble())
if popCount > uint16.high().int():
for i in countdown(self.names.high(), popCount - uint16.high().int()):
if self.names[i].depth > self.scopeDepth:
self.emitByte(PopC)
elif popCount == 1:
# We only emit PopN if we're popping more than one value
self.emitByte(PopC)
# This seems *really* slow, but
# what else should I do? Nim doesn't
# allow the removal of items during
# seq iteration so ¯\_(ツ)_/¯
var idx = 0
while idx < self.names.len():
for name in names:
if self.names[idx] == name:
self.names.delete(idx)
inc(idx)
proc blockStmt(self: Compiler, node: BlockStmt) =
## Compiles block statements, which create a new
## local scope
self.beginScope()
for decl in node.code:
self.declaration(decl)
self.endScope()
proc ifStmt(self: Compiler, node: IfStmt) =
## Compiles if/else statements for conditional
## execution of code
self.check(node.condition, Type(kind: Bool))
self.expression(node.condition)
let jump = self.emitJump(JumpIfFalsePop)
self.statement(node.thenBranch)
let jump2 = self.emitJump(JumpForwards)
self.patchJump(jump)
if not node.elseBranch.isNil():
self.statement(node.elseBranch)
self.patchJump(jump2)
proc emitLoop(self: Compiler, begin: int) =
## Emits a JumpBackwards instruction with the correct
## jump offset
let offset = self.chunk.code.high() - begin + 4
if offset > 16777215:
self.error("cannot jump more than 16777215 bytecode instructions")
self.emitByte(JumpBackwards)
self.emitBytes(offset.toTriple())
proc whileStmt(self: Compiler, node: WhileStmt) =
## Compiles C-style while loops and
## desugared C-style for loops
self.check(node.condition, Type(kind: Bool))
let start = self.chunk.code.high()
self.expression(node.condition)
let jump = self.emitJump(JumpIfFalsePop)
self.statement(node.body)
self.emitLoop(start)
self.patchJump(jump)
proc checkCallIsPure(self: Compiler, node: ASTnode): bool =
## Checks if a call has any side effects. Returns
## true if it doesn't and false otherwise
return true # TODO
proc callExpr(self: Compiler, node: CallExpr): Name {.discardable.} =
## Compiles code to call a function
var args: seq[tuple[name: string, kind: Type]] = @[]
var argExpr: seq[Expression] = @[]
var kind: Type
var onStack = false
# TODO: Keyword arguments
for i, argument in node.arguments.positionals:
kind = self.inferType(argument)
if kind.isNil():
if argument.kind == identExpr:
self.error(&"reference to undeclared name '{IdentExpr(argument).name.lexeme}'")
self.error(&"cannot infer the type of argument {i + 1} in function call")
args.add(("", kind))
argExpr.add(argument)
for argument in node.arguments.keyword:
# TODO
discard
if args.len() >= 16777216:
self.error(&"cannot pass more than 16777215 arguments")
var funct: Name
case node.callee.kind:
of identExpr:
funct = self.matchImpl(IdentExpr(node.callee).name.lexeme, Type(kind: Function, returnType: Type(kind: Any), args: args))
of NodeKind.callExpr:
funct = self.callExpr(CallExpr(node.callee))
funct = Name(valueType: Type(kind: Function, returnType: Type(kind: Any), args: args))
onStack = true
# TODO: Calling lambdas on-the-fly (i.e. on the same line)
else:
let typ = self.inferType(node)
if typ.isNil():
self.error(&"expression has no type")
else:
self.error(&"object of type '{self.typeToStr(typ)}' is not callable")
result = funct
self.generateCall(funct, argExpr, onStack)
if not self.checkCallIsPure(node.callee):
if self.currentFunction.valueType.name != "":
self.error(&"cannot make sure that calls to '{self.currentFunction.valueType.name}' are side-effect free")
else:
self.error(&"cannot make sure that call is side-effect free")
proc expression(self: Compiler, node: Expression) =
## Compiles all expressions
case node.kind:
of NodeKind.callExpr:
self.callExpr(CallExpr(node)) # TODO
of getItemExpr:
discard # TODO: Get rid of this
of pragmaExpr:
discard # TODO
# Note that for setItem and assign we don't convert
# the node to its true type because that type information
# would be lost in the call anyway. The differentiation
# happens in self.assignment()
of setItemExpr, assignExpr: # TODO: Get rid of this
self.assignment(node)
of identExpr:
self.identifier(IdentExpr(node))
of unaryExpr:
# Unary expressions such as ~5 and -3
self.unary(UnaryExpr(node))
of groupingExpr:
# Grouping expressions like (2 + 1)
self.expression(GroupingExpr(node).expression)
of binaryExpr:
# Binary expressions such as 2 ^ 5 and 0.66 * 3.14
self.binary(BinaryExpr(node))
of intExpr, hexExpr, binExpr, octExpr, strExpr, falseExpr, trueExpr,
infExpr, nanExpr, floatExpr, nilExpr:
# Since all of these AST nodes share the
# same overall structure and the kind
# field is enough to tell one from the
# other, why bother with specialized
# cases when one is enough?
self.literal(node)
else:
self.error(&"invalid AST node of kind {node.kind} at expression(): {node} (This is an internal error and most likely a bug)")
proc awaitStmt(self: Compiler, node: AwaitStmt) =
## Compiles await statements. An await statement
## is like an await expression, but parsed in the
## context of statements for usage outside expressions,
## meaning it can be used standalone. It's basically the
## same as an await expression followed by a semicolon.
## Await expressions are the only native construct to
## run coroutines from within an already asynchronous
## context (which should be orchestrated by an event loop).
## They block in the caller until the callee returns
self.expression(node.expression)
self.emitByte(OpCode.Await)
proc deferStmt(self: Compiler, node: DeferStmt) =
## Compiles defer statements. A defer statement
## is executed right before its containing function
## exits (either because of a return or an exception)
let current = self.chunk.code.len
self.expression(node.expression)
for i in countup(current, self.chunk.code.high()):
self.deferred.add(self.chunk.code[i])
self.chunk.code.delete(i) # TODO: Do not change bytecode size
proc returnStmt(self: Compiler, node: ReturnStmt) =
## Compiles return statements
var expected = self.currentFunction.valueType.returnType
self.check(node.value, expected)
if not node.value.isNil():
self.expression(node.value)
self.emitByte(OpCode.SetResult)
self.emitByte(OpCode.Return)
if not node.value.isNil():
self.emitByte(1)
else:
self.emitByte(0)
# TODO: Implement this as a custom operator
proc yieldStmt(self: Compiler, node: YieldStmt) =
## Compiles yield statements
self.expression(node.expression)
self.emitByte(OpCode.Yield)
# TODO: Implement this as a custom operator
proc raiseStmt(self: Compiler, node: RaiseStmt) =
## Compiles raise statements
self.expression(node.exception)
self.emitByte(OpCode.Raise)
proc continueStmt(self: Compiler, node: ContinueStmt) =
## Compiles continue statements. A continue statements
## jumps to the next iteration in a loop
if self.currentLoop.start > 16777215:
self.error("too much code to jump over in continue statement")
self.emitByte(Jump)
self.emitBytes(self.currentLoop.start.toTriple())
proc breakStmt(self: Compiler, node: BreakStmt) =
## Compiles break statements. A continue statement
## jumps to the next iteration in a loop
self.currentLoop.breakPos.add(self.emitJump(OpCode.JumpForwards))
if self.currentLoop.depth > self.scopeDepth:
# Breaking out of a loop closes its scope
self.endScope()
proc patchBreaks(self: Compiler) =
## Patches the jumps emitted by
## breakStmt. This is needed
## because the size of code
## to skip is not known before
## the loop is fully compiled
for brk in self.currentLoop.breakPos:
self.patchJump(brk)
proc assertStmt(self: Compiler, node: AssertStmt) =
## Compiles assert statements (raise
## AssertionError if the expression is falsey)
self.expression(node.expression)
self.emitByte(OpCode.Assert)
proc forEachStmt(self: Compiler, node: ForEachStmt) =
## Compiles foreach loops
# TODO
proc importStmt(self: Compiler, node: ImportStmt) =
## Imports a module at compile time
# TODO: This is obviously horrible. It's just a test
let filename = node.moduleName.token.lexeme & ".pn"
try:
self.compileModule(filename)
except IOError:
self.error(&"""could not import '{filename}': {getCurrentExceptionMsg()}""")
except OSError:
self.error(&"""could not import '{filename}': {getCurrentExceptionMsg()} [errno {osLastError()}]""")
proc statement(self: Compiler, node: Statement) =
## Compiles all statements
case node.kind:
of exprStmt:
let expression = ExprStmt(node).expression
let kind = self.inferType(expression)
self.expression(expression)
if kind.isNil():
# The expression has no type and produces no value,
# so we don't have to pop anything
discard
elif self.replMode:
case kind.kind:
of Int64:
self.emitByte(PrintInt64)
of UInt64:
self.emitByte(PrintUInt64)
of Int32:
self.emitByte(PrintInt32)
of UInt32:
self.emitByte(PrintInt32)
of Int16:
self.emitByte(PrintInt16)
of UInt16:
self.emitByte(PrintUInt16)
of Int8:
self.emitByte(PrintInt8)
of UInt8:
self.emitByte(PrintUInt8)
of Float64:
self.emitByte(PrintFloat64)
of Float32:
self.emitByte(PrintFloat32)
of Bool:
self.emitByte(PrintBool)
of Nan:
self.emitByte(PrintNan)
of Inf:
self.emitByte(PrintInf)
of String:
self.emitByte(PrintString)
else:
self.emitByte(PrintHex)
else:
self.emitByte(Pop)
of NodeKind.ifStmt:
self.ifStmt(IfStmt(node))
of NodeKind.assertStmt:
self.assertStmt(AssertStmt(node))
of NodeKind.raiseStmt:
self.raiseStmt(RaiseStmt(node))
of NodeKind.breakStmt:
self.breakStmt(BreakStmt(node))
of NodeKind.continueStmt:
self.continueStmt(ContinueStmt(node))
of NodeKind.returnStmt:
self.returnStmt(ReturnStmt(node))
of NodeKind.importStmt:
self.importStmt(ImportStmt(node))
of NodeKind.whileStmt:
# Note: Our parser already desugars
# for loops to while loops
let loop = self.currentLoop
self.currentLoop = Loop(start: self.chunk.code.len(),
depth: self.scopeDepth, breakPos: @[])
self.whileStmt(WhileStmt(node))
self.patchBreaks()
self.currentLoop = loop
of NodeKind.forEachStmt:
self.forEachStmt(ForEachStmt(node))
of NodeKind.blockStmt:
self.blockStmt(BlockStmt(node))
of NodeKind.yieldStmt:
self.yieldStmt(YieldStmt(node))
of NodeKind.awaitStmt:
self.awaitStmt(AwaitStmt(node))
of NodeKind.deferStmt:
self.deferStmt(DeferStmt(node))
of NodeKind.tryStmt:
discard
else:
self.expression(Expression(node))
proc varDecl(self: Compiler, node: VarDecl) =
## Compiles variable declarations
let expected = self.inferType(node.valueType)
let actual = self.inferType(node.value)
if expected.isNil() and actual.isNil():
if node.value.kind == identExpr or node.value.kind == callExpr and CallExpr(node.value).callee.kind == identExpr:
var name = node.value.token.lexeme
if node.value.kind == callExpr:
name = CallExpr(node.value).callee.token.lexeme
self.error(&"reference to undeclared name '{name}'")
self.error(&"'{node.name.token.lexeme}' has no type")
elif not expected.isNil() and expected.mutable: # I mean, variables *are* already mutable (some of them anyway)
self.error(&"invalid type '{self.typeToStr(expected)}' for var")
elif not self.compareTypes(expected, actual):
if not expected.isNil():
self.error(&"expected value of type '{self.typeToStr(expected)}', but '{node.name.token.lexeme}' is of type '{self.typeToStr(actual)}'")
self.expression(node.value)
self.declareName(node, mutable=node.token.kind == TokenType.Var)
self.emitByte(StoreVar)
self.emitBytes(self.getStackPos(self.names[^1]).toTriple())
proc typeDecl(self: Compiler, node: TypeDecl) =
## Compiles type declarations
# TODO
proc handleMagicPragma(self: Compiler, pragma: Pragma, node: ASTNode) =
## Handles the "magic" pragma. Assumes the given name is already
## declared
if pragma.args.len() != 1:
self.error("'magic' pragma: wrong number of arguments")
elif pragma.args[0].kind != strExpr:
self.error("'magic' pragma: wrong type of argument (constant string expected)")
elif node.kind != NodeKind.funDecl:
self.error("'magic' pragma is not valid in this context")
var node = FunDecl(node)
var fn = self.resolve(node.name)
fn.valueType.isBuiltinFunction = true
fn.valueType.builtinOp = pragma.args[0].token.lexeme[1..^2]
# The magic pragma ignores the function's body
node.body = nil
proc handlePurePragma(self: Compiler, pragma: Pragma, node: ASTNode) =
## Handles the "pure" pragma
case node.kind:
of NodeKind.funDecl:
FunDecl(node).isPure = true
of lambdaExpr:
LambdaExpr(node).isPure = true
else:
self.error("'pure' pragma is not valid in this context")
proc dispatchPragmas(self: Compiler, node: ASTnode) =
## Dispatches pragmas bound to objects
var pragmas: seq[Pragma] = @[]
case node.kind:
of NodeKind.funDecl, NodeKind.typeDecl, NodeKind.varDecl:
pragmas = Declaration(node).pragmas
of lambdaExpr:
pragmas = LambdaExpr(node).pragmas
else:
discard # Unreachable
for pragma in pragmas:
if pragma.name.token.lexeme notin self.compilerProcs:
self.error(&"unknown pragma '{pragma.name.token.lexeme}'")
self.compilerProcs[pragma.name.token.lexeme](self, pragma, node)
proc fixGenericFunc(self: Compiler, name: Name, args: seq[Expression]): Name =
## Specializes generic arguments in functions
var fn = name.deepCopy()
result = fn
var typ: Type
for i in 0..args.high():
if fn.valueType.args[i].kind.kind == Generic:
typ = self.inferType(args[i])
fn.valueType.args[i].kind = typ
self.resolve(fn.valueType.args[i].name).valueType = typ
if fn.valueType.args[i].kind.isNil():
self.error(&"cannot specialize generic function: argument {i + 1} has no type")
proc funDecl(self: Compiler, node: FunDecl, fn: Name = nil, args: seq[Expression] = @[]) =
## Compiles function declarations
#[if not node.isNil():
if node.generics.len() > 0 and fn.isNil() and args.len() == 0:
# Generic function! We can't compile it right now
self.declareName(node)
self.dispatchPragmas(node)
return]#
self.declareName(node)
self.dispatchPragmas(node)
var node = node
var fn = if fn.isNil(): self.names[^(node.arguments.len() + 1)] else: fn
var names = self.names[^(node.arguments.len())..^1]
if fn.valueType.isBuiltinFunction:
# We take the arguments off of our name list
# because they become temporaries on the stack.
# Builtin functions (usually) map to a single
# bytecode instruction to avoid unnecessary
# overhead from peon's calling convention
# This also means that peon's fast builtins
# can only be relatively simple
self.names = self.names[0..^node.arguments.len() + 1]
else:
var function = self.currentFunction
var jmp: int
# A function's code is just compiled linearly
# and then jumped over
jmp = self.emitJump(JumpForwards)
# Function's code starts after the jump
fn.codePos = self.chunk.code.len()
# We let our debugger know a function is starting
let start = self.chunk.code.high()
for name in names:
self.emitBytes([NoOp, NoOp, NoOp, NoOp])
name.codePos = self.chunk.code.len() - 4
# We store the current function
if not self.currentFunction.isNil():
self.currentFunction.valueType.childFunc = fn.valueType
self.currentFunction = fn
if node.isNil():
# We got called back with more specific type
# arguments: time to fix them!
self.currentFunction = self.fixGenericFunc(fn, args)
node = self.currentFunction.valueType.fun
elif not node.body.isNil():
if BlockStmt(node.body).code.len() == 0:
self.error("cannot declare function with empty body")
else:
discard # TODO: Forward declarations
let impl = self.findByType(fn.name.token.lexeme, fn.valueType, self.scopeDepth)
if impl.len() > 1:
# We found more than one (public) implementation of
# the same function with the same name: this is an
# error, as it would raise ambiguity when calling them
var msg = &"multiple matching implementations of '{fn.name.token.lexeme}' found:\n"
for f in reversed(impl):
msg &= &"- in module '{f.owner}' at line {f.line} of type {self.typeToStr(f.valueType)}\n"
self.error(msg)
# Since the deferred array is a linear
# sequence of instructions and we want
# to keep track to whose function's each
# set of deferred instruction belongs,
# we record the length of the deferred
# array before compiling the function
# and use this info later to compile
# the try/finally block with the deferred
# code
var deferStart = self.deferred.len()
self.beginScope()
for decl in BlockStmt(node.body).code:
self.declaration(decl)
let typ = self.currentFunction.valueType.returnType
var hasVal: bool = false
case self.currentFunction.valueType.fun.kind:
of NodeKind.funDecl:
hasVal = self.currentFunction.valueType.fun.hasExplicitReturn
of NodeKind.lambdaExpr:
hasVal = LambdaExpr(Declaration(self.currentFunction.valueType.fun)).hasExplicitReturn
else:
discard # Unreachable
if not hasVal and not typ.isNil():
# There is no explicit return statement anywhere in the function's
# body: while this is not a tremendously useful piece of information (since
# the presence of at least one doesn't mean all control flow cases are
# covered), it definitely is an error worth reporting
self.error("function has an explicit return type, but no return statement was found")
hasVal = hasVal and not typ.isNil()
self.endScope()
# Terminates the function's context
self.emitByte(OpCode.Return)
if hasVal:
self.emitByte(1)
else:
self.emitByte(0)
# Some debugging info here
self.chunk.cfi.add(start.toTriple())
self.chunk.cfi.add(self.chunk.code.high().toTriple())
self.chunk.cfi.add(uint8(node.arguments.len()))
if not node.name.isNil():
self.chunk.cfi.add(fn.name.token.lexeme.len().toDouble())
var s = fn.name.token.lexeme
if s.len() >= uint16.high().int:
s = node.name.token.lexeme[0..uint16.high()]
self.chunk.cfi.add(s.toBytes())
else:
self.chunk.cfi.add(0.toDouble())
# Currently defer is not functional, so we
# just pop the instructions
for _ in deferStart..self.deferred.high():
discard self.deferred.pop()
# Well, we've compiled everything: time to patch
# the jump offset
self.patchJump(jmp)
# Pops a call frame
# discard self.frames.pop()
# Restores the enclosing function (if any).
# Makes nested calls work (including recursion)
self.currentFunction = function
proc patchReturnAddress(self: Compiler, pos: int) =
## Patches the return address of a function
## call
let address = self.chunk.code.len().toQuad()
self.chunk.code[pos] = address[0]
self.chunk.code[pos + 1] = address[1]
self.chunk.code[pos + 2] = address[2]
self.chunk.code[pos + 3] = address[3]
proc declaration(self: Compiler, node: Declaration) =
## Compiles all declarations
case node.kind:
of NodeKind.varDecl:
self.varDecl(VarDecl(node))
of NodeKind.funDecl:
self.funDecl(FunDecl(node))
of NodeKind.typeDecl:
self.typeDecl(TypeDecl(node))
else:
self.statement(Statement(node))
proc terminateProgram(self: Compiler, pos: int, terminateScope: bool = true) =
## Utility to terminate a peon program
if terminateScope:
self.endScope()
self.patchReturnAddress(pos)
self.emitByte(OpCode.Return)
self.emitByte(0) # Entry point has no return value (TODO: Add easter eggs, cuz why not)
proc beginProgram(self: Compiler, incremental: bool = false): int =
## Utility to begin a peon program
## compiled.
## Returns a dummy return address of
## the implicit main to be patched by
## terminateProgram
# Every peon program has a hidden entry point in
# which user code is wrapped. Think of it as if
# peon is implicitly writing the main() function
# of your program and putting all of your code in
# there. While we call our entry point just like
# any regular peon function, we can't use our handy
# helper generateCall() because we need to keep track
# of where our program ends (which we don't know yet).
# To fix this, we emit dummy offsets and patch them
# later, once we know the boundaries of our hidden main()
var main: Name
if incremental:
main = self.names[0]
else:
main = Name(depth: 0,
isPrivate: true,
isConst: false,
isLet: false,
isClosedOver: false,
owner: self.currentModule,
valueType: Type(kind: Function,
name: "",
returnType: nil,
args: @[],
),
codePos: 13, # Jump address is hardcoded
name: newIdentExpr(Token(lexeme: "", kind: Identifier)),
isFunDecl: true,
line: -1)
self.names.add(main)
self.emitByte(LoadFunction)
self.emitBytes(main.codePos.toTriple())
self.emitByte(LoadReturnAddress)
self.emitBytes(0.toQuad())
self.emitByte(Call)
self.emitBytes(0.toTriple())
result = 5
proc compile*(self: Compiler, ast: seq[Declaration], file: string, lines: seq[tuple[start, stop: int]], source: string, chunk: Chunk = nil,
terminateScope: bool = true, incremental: bool = false): Chunk =
## Compiles a sequence of AST nodes into a chunk
## object
if chunk.isNil():
self.chunk = newChunk()
else:
self.chunk = chunk
self.ast = ast
self.file = file
var terminateScope = terminateScope
if incremental:
terminateScope = false
self.scopeDepth = 0
self.currentFunction = nil
self.currentModule = self.file.extractFilename()
self.current = 0
self.frames = @[0]
self.lines = lines
self.source = source
let pos = self.beginProgram(incremental)
if incremental and self.replMode:
for i in countup(1, 2):
discard self.chunk.code.pop()
while not self.done():
self.declaration(Declaration(self.step()))
self.terminateProgram(pos, terminateScope)
result = self.chunk
if incremental and not self.replMode:
for i in countup(1, 2):
discard self.chunk.code.pop()
proc compileModule(self: Compiler, filename: string) =
## Compiles an imported module into an existing chunk.
## A temporary compiler object is initialized internally
let path = joinPath(splitPath(self.file).head, filename)
if self.modules.contains(path):
return
var lexer = newLexer()
var parser = newParser()
var compiler = newCompiler()
lexer.fillSymbolTable()
let source = readFile(joinPath(splitPath(self.file).head, filename))
let tokens = lexer.lex(source, filename)
let ast = parser.parse(tokens, filename, lexer.getLines(), source)
compiler.names.add(self.names[0])
discard compiler.compile(ast, filename, lexer.getLines(), source, chunk=self.chunk, incremental=true)
for name in compiler.names:
if name.owner in self.modules:
continue
self.names.add(name)
self.modules.incl(path)
self.closedOver &= compiler.closedOver