peon/src/frontend/compiler.nim

2536 lines
105 KiB
Nim

# Copyright 2022 Mattia Giambirtone & All Contributors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import meta/token
import meta/ast
import meta/errors
import ../util/multibyte
import ../util/symbols
import lexer as l
import parser as p
import ../config
import std/tables
import std/strformat
import std/algorithm
import std/parseutils
import std/strutils
import std/sequtils
import std/sets
import std/os
import std/terminal
export ast
export token
export multibyte
type
TypeKind = enum
## An enumeration of compile-time
## types
Int8, UInt8, Int16, UInt16, Int32,
UInt32, Int64, UInt64, Float32, Float64,
Char, Byte, String, Function, CustomType,
Nil, Nan, Bool, Inf, Typevar, Generic,
Reference, Pointer
Any # Any is used internally in a few cases,
# for example when looking for operators
# when only the type of the arguments is of
# interest
Type = ref object
## A wrapper around
## compile-time types
mutable: bool
case kind: TypeKind:
of Function:
isLambda: bool
isGenerator: bool
isCoroutine: bool
isGeneric: bool
args: seq[tuple[name: string, kind: Type]]
returnType: Type
isBuiltinFunction: bool
builtinOp: string
fun: FunDecl
isClosure: bool
envLen: int
children: seq[Type]
parent: Type
retJumps: seq[int]
forwarded: bool
of CustomType:
fields: TableRef[string, Type]
of Reference, Pointer:
value: Type
of Generic:
# cond represents a type constraint. For
# example, fn foo[T: int & !uint](...) {...}
# would map to [(true, int), (false, uint)]
cond: seq[tuple[match: bool, kind: Type]]
name: string
else:
discard
# This way we don't have recursive dependency issues
import meta/bytecode
export bytecode
type
WarningKind* {.pure.} = enum
## A warning enumeration type
UnreachableCode, UnusedName, ShadowOuterScope
CompileMode* {.pure.} = enum
## A compilation mode enumeration
Debug, Release
NameKind {.pure.} = enum
## A name enumeration type
None, Module, Argument, Var, Function, CustomType, Enum
Name = ref object
## A compile-time wrapper around
## statically resolved names
# The name's identifier
ident: IdentExpr
# Type of the identifier (NOT of the value!)
kind: NameKind
# Owner of the identifier (module)
owner: string
# File where the name is declared
file: string
# Scope depth
depth: int
# Is this name private?
isPrivate: bool
# Is this a constant?
isConst: bool
# Can this name's value be mutated?
isLet: bool
# The name's type
valueType: Type
# For functions, this marks where the function's
# code begins
codePos: int
# The function that owns this variable (may be nil!)
belongsTo: Name
# Where is this node declared in the file?
line: int
# Has this name been closed over?
isClosedOver: bool
# Has this name been referenced at least once?
resolved: bool
# The AST node associated with this node. This
# is needed because we compile declarations only
# if they're actually used
node: Declaration
# Is this name exported? (Only makes sense if isPrivate
# equals false)
exported: bool
Loop = object
## A "loop object" used
## by the compiler to emit
## appropriate jump offsets
## for continue and break
## statements
# Position in the bytecode where the loop starts
start: int
# Scope depth where the loop is located
depth: int
# Jump offsets into our bytecode that we need to
# patch. Used for break statements
breakJumps: seq[int]
Compiler* = ref object
## A wrapper around the Peon compiler's state
# The bytecode chunk where we write code to
chunk: Chunk
# The output of our parser (AST)
ast: seq[Declaration]
# The current AST node we're looking at
current: int
# The current file being compiled (used only for
# error reporting)
file: string
# Compile-time "simulation" of the stack at
# runtime to load variables that have stack
# behavior more efficiently
names: seq[Name]
# The current scope depth. If > 0, we're
# in a local scope, otherwise it's global
depth: int
# The current function being compiled
currentFunction: Name
# The current loop being compiled (used to
# keep track of where to jump)
currentLoop: Loop
# Are we in REPL mode? If so, Pop instructions
# for expression statements at the top level are
# swapped for a special PopRepl instruction that
# prints the result of the expression once it is
# evaluated
replMode: bool
# The current module being compiled
# (used to restrict access to statically
# defined variables at compile time)
currentModule: string
# Each time a defer statement is
# compiled, its code is emitted
# here. Later, if there is any code
# to defer in the current function,
# funDecl will wrap the function's code
# inside an implicit try/finally block
# and add this code in the finally branch.
# This sequence is emptied each time a
# function declaration is compiled and stores only
# deferred code for the current function (may
# be empty)
deferred: seq[uint8]
# List of closed-over variables
closures: seq[Name]
# Compiler procedures called by pragmas
compilerProcs: TableRef[string, CompilerFunc]
# Stores line data for error reporting
lines: seq[tuple[start, stop: int]]
# The source of the current module,
# used for error reporting
source: string
# Currently imported modules
modules: HashSet[string]
# Stores the position of all jumps
jumps: seq[tuple[patched: bool, offset: int]]
# List of CFI start offsets into our CFI data
cfiOffsets: seq[tuple[start, stop, pos: int, fn: Name]]
# We store these objects to compile modules
lexer: Lexer
parser: Parser
# Are we compiling the main module?
isMainModule: bool
# Stores the call offsets for forward
# declarations so that we can patch them
# later
forwarded: seq[tuple[name: Name, pos: int]]
# List of disabled warnings
disabledWarnings: seq[WarningKind]
# Whether to show detailed info about type
# mismatches when we dispatch with matchImpl()
showMismatches: bool
# Are we compiling in debug mode?
mode: CompileMode
PragmaKind = enum
## An enumeration of pragma types
Immediate,
Delayed
CompilerFunc = object
## An internal compiler function called
## by pragmas
kind: PragmaKind
handler: proc (self: Compiler, pragma: Pragma, name: Name)
CompileError* = ref object of PeonException
compiler*: Compiler
node*: ASTNode
module*: string
# Forward declarations
proc compile*(self: Compiler, ast: seq[Declaration], file: string, lines: seq[tuple[start, stop: int]], source: string, chunk: Chunk = nil,
incremental: bool = false, isMainModule: bool = true, disabledWarnings: seq[WarningKind] = @[], showMismatches: bool = false,
mode: CompileMode = Debug): Chunk
proc expression(self: Compiler, node: Expression)
proc statement(self: Compiler, node: Statement)
proc declaration(self: Compiler, node: Declaration)
proc peek(self: Compiler, distance: int = 0): ASTNode
proc identifier(self: Compiler, node: IdentExpr)
proc varDecl(self: Compiler, node: VarDecl)
proc specialize(self: Compiler, name: Name, args: seq[Expression]): Name
proc matchImpl(self: Compiler, name: string, kind: Type, node: ASTNode = nil, allowFwd: bool = true): Name
proc infer(self: Compiler, node: LiteralExpr, allowGeneric: bool = false): Type
proc infer(self: Compiler, node: Expression, allowGeneric: bool = false): Type
proc inferOrError[T: LiteralExpr | Expression](self: Compiler, node: T, allowGeneric: bool = false): Type
proc findByName(self: Compiler, name: string, resolve: bool = true): seq[Name]
proc findByModule(self: Compiler, name: string): seq[Name]
proc findByType(self: Compiler, name: string, kind: Type, depth: int = -1): seq[Name]
proc compare(self: Compiler, a, b: Type): bool
proc patchReturnAddress(self: Compiler, pos: int)
proc handleMagicPragma(self: Compiler, pragma: Pragma, name: Name)
proc handlePurePragma(self: Compiler, pragma: Pragma, name: Name)
proc handleErrorPragma(self: Compiler, pragma: Pragma, name: Name)
proc dispatchPragmas(self: Compiler, name: Name)
proc dispatchDelayedPragmas(self: Compiler, name: Name)
proc funDecl(self: Compiler, node: FunDecl, name: Name)
proc typeDecl(self: Compiler, node: TypeDecl, name: Name)
proc compileModule(self: Compiler, moduleName: string)
proc generateCall(self: Compiler, fn: Name, args: seq[Expression], line: int)
# End of forward declarations
proc newCompiler*(replMode: bool = false): Compiler =
## Initializes a new Compiler object
new(result)
result.ast = @[]
result.current = 0
result.file = ""
result.names = @[]
result.depth = 0
result.lines = @[]
result.jumps = @[]
result.currentFunction = nil
result.replMode = replMode
result.currentModule = ""
result.compilerProcs = newTable[string, CompilerFunc]()
result.compilerProcs["magic"] = CompilerFunc(kind: Immediate, handler: handleMagicPragma)
result.compilerProcs["pure"] = CompilerFunc(kind: Immediate, handler: handlePurePragma)
result.compilerProcs["error"] = CompilerFunc(kind: Delayed, handler: handleErrorPragma)
result.source = ""
result.lexer = newLexer()
result.lexer.fillSymbolTable()
result.parser = newParser()
result.isMainModule = false
result.closures = @[]
result.forwarded = @[]
result.disabledWarnings = @[]
## Public getters for nicer error formatting
proc getCurrentNode*(self: Compiler): ASTNode = (if self.current >=
self.ast.len(): self.ast[^1] else: self.ast[self.current - 1])
proc getCurrentFunction*(self: Compiler): Declaration {.inline.} = (if self.currentFunction.isNil(): nil else: self.currentFunction.valueType.fun)
proc getFile*(self: Compiler): string {.inline.} = self.file
proc getModule*(self: Compiler): string {.inline.} = self.currentModule
proc getLines*(self: Compiler): seq[tuple[start, stop: int]] = self.lines
proc getSource*(self: Compiler): string = self.source
proc getRelPos*(self: Compiler, line: int): tuple[start, stop: int] = self.lines[line - 1]
proc getCurrentToken*(self: Compiler): Token = self.getCurrentNode().token
## Utility functions
proc `$`*(self: Name): string = $self[]
proc `$`(self: Type): string = $self[]
proc peek(self: Compiler, distance: int = 0): ASTNode =
## Peeks at the AST node at the given distance.
## If the distance is out of bounds, the last
## AST node in the tree is returned. A negative
## distance may be used to retrieve previously
## consumed AST nodes
if self.ast.high() == -1 or self.current + distance > self.ast.high() or self.current + distance < 0:
result = self.ast[^1]
else:
result = self.ast[self.current + distance]
proc done(self: Compiler): bool {.inline.} =
## Returns true if the compiler is done
## compiling, false otherwise
result = self.current > self.ast.high()
proc error(self: Compiler, message: string, node: ASTNode = nil) {.raises: [CompileError], inline.} =
## Raises a CompileError exception
let node = if node.isNil(): self.getCurrentNode() else: node
raise CompileError(msg: message, node: node, line: node.token.line, file: self.file, module: self.currentModule, compiler: self)
proc warning(self: Compiler, kind: WarningKind, message: string, name: Name = nil) =
## Raises a warning
var node: ASTNode
var fn: Declaration
if name.isNil():
node = self.getCurrentNode()
fn = self.getCurrentFunction()
else:
node = name.node
if node.isNil():
node = self.getCurrentNode()
if not name.belongsTo.isNil():
fn = name.belongsTo.node
var file = self.file
var pos = node.getRelativeBoundaries()
if file notin ["<string>", ""]:
file = relativePath(file, getCurrentDir())
if kind notin self.disabledWarnings:
stderr.styledWrite(fgYellow, styleBright, "Warning in ", fgRed, &"{file}:{node.token.line}:{pos.start}")
if not fn.isNil() and fn.kind == funDecl:
stderr.styledWrite(fgYellow, styleBright, " in function ", fgRed, FunDecl(fn).name.token.lexeme)
stderr.styledWriteLine(styleBright, fgDefault, ": ", message)
proc step(self: Compiler): ASTNode {.inline.} =
## Steps to the next node and returns
## the consumed one
result = self.peek()
if not self.done():
self.current += 1
proc emitByte(self: Compiler, byt: OpCode | uint8, line: int) {.inline.} =
## Emits a single byte, writing it to
## the current chunk being compiled
self.chunk.write(uint8 byt, line)
proc emitBytes(self: Compiler, bytarr: openarray[OpCode | uint8], line: int) {.inline.} =
## Handy helper method to write arbitrary bytes into
## the current chunk, calling emitByte on each of its
## elements
for b in bytarr:
self.emitByte(b, line)
proc makeConstant(self: Compiler, val: Expression, typ: Type): array[3, uint8] =
## Adds a constant to the current chunk's constant table
## and returns its index as a 3-byte array of uint8s
var lit: string
if typ.kind in [UInt8, Int8, Int16, UInt16, Int32, UInt32, Int64, UInt64]:
lit = val.token.lexeme
if "'" in lit:
var idx = lit.high()
while lit[idx] != '\'':
lit = lit[0..^2]
dec(idx)
lit = lit[0..^2]
case typ.kind:
of UInt8, Int8:
result = self.chunk.writeConstant([uint8(parseInt(lit))])
of Int16, UInt16:
result = self.chunk.writeConstant(parseInt(lit).toDouble())
of Int32, UInt32:
result = self.chunk.writeConstant(parseInt(lit).toQuad())
of Int64:
result = self.chunk.writeConstant(parseInt(lit).toLong())
of UInt64:
result = self.chunk.writeConstant(parseBiggestUInt(lit).toLong())
of String:
result = self.chunk.writeConstant(val.token.lexeme[1..^1].toBytes())
of Float32:
var f: float = 0.0
discard parseFloat(val.token.lexeme, f)
result = self.chunk.writeConstant(cast[array[4, uint8]](float32(f)))
of Float64:
var f: float = 0.0
discard parseFloat(val.token.lexeme, f)
result = self.chunk.writeConstant(cast[array[8, uint8]](f))
else:
discard
proc emitConstant(self: Compiler, obj: Expression, kind: Type) =
## Emits a constant instruction along
## with its operand
case kind.kind:
of Int64:
self.emitByte(LoadInt64, obj.token.line)
of UInt64:
self.emitByte(LoadUInt64, obj.token.line)
of Int32:
self.emitByte(LoadInt32, obj.token.line)
of UInt32:
self.emitByte(LoadUInt32, obj.token.line)
of Int16:
self.emitByte(LoadInt16, obj.token.line)
of UInt16:
self.emitByte(LoadUInt16, obj.token.line)
of Int8:
self.emitByte(LoadInt8, obj.token.line)
of UInt8:
self.emitByte(LoadUInt8, obj.token.line)
of String:
self.emitByte(LoadString, obj.token.line)
let str = LiteralExpr(obj).literal.lexeme
if str.len() >= 16777216:
self.error("string constants cannot be larger than 16777215 bytes")
self.emitBytes((str.len() - 2).toTriple(), obj.token.line)
of Float32:
self.emitByte(LoadFloat32, obj.token.line)
of Float64:
self.emitByte(LoadFloat64, obj.token.line)
else:
discard # TODO
self.emitBytes(self.makeConstant(obj, kind), obj.token.line)
proc setJump(self: Compiler, offset: int, jmp: array[3, uint8]) =
## Sets a jump at the given
## offset to the given value
self.chunk.code[offset + 1] = jmp[0]
self.chunk.code[offset + 2] = jmp[1]
self.chunk.code[offset + 3] = jmp[2]
proc setJump(self: Compiler, offset: int, jmp: seq[uint8]) =
## Sets a jump at the given
## offset to the given value
self.chunk.code[offset + 1] = jmp[0]
self.chunk.code[offset + 2] = jmp[1]
self.chunk.code[offset + 3] = jmp[2]
proc patchJump(self: Compiler, offset: int) =
## Patches a previously emitted relative
## jump using emitJump
var jump: int = self.chunk.code.len() - self.jumps[offset].offset
if jump < 0:
self.error("invalid jump size (< 0), did the bytecode size change without fixJumps being called?")
if jump > 16777215:
# TODO: Emit consecutive jumps?
self.error("cannot jump more than 16777215 instructions")
self.setJump(self.jumps[offset].offset, (jump - 4).toTriple())
self.jumps[offset].patched = true
proc emitJump(self: Compiler, opcode: OpCode, line: int): int =
## Emits a dummy jump offset to be patched later
## and returns a unique identifier for that jump
## to be passed to patchJump
self.emitByte(opcode, line)
self.jumps.add((patched: false, offset: self.chunk.code.high()))
self.emitBytes(0.toTriple(), line)
result = self.jumps.high()
proc fixCFIOffsets(self: Compiler, oldLen: int, modifiedAt: int) =
## Fixes CFI offsets after the size of our
## bytecode has changed
if oldLen == self.chunk.code.len():
return
let offset = self.chunk.code.len() - oldLen
var newCFI: array[3, uint8]
var tmp: int
var i = 0
for cfi in self.cfiOffsets.mitems():
if cfi.start >= modifiedAt:
newCFI = (cfi.start + offset).toTriple()
self.chunk.cfi[cfi.pos] = newCFI[0]
self.chunk.cfi[cfi.pos + 1] = newCFI[1]
self.chunk.cfi[cfi.pos + 2] = newCFI[2]
tmp = [self.chunk.cfi[cfi.pos + 3], self.chunk.cfi[cfi.pos + 4], self.chunk.cfi[cfi.pos + 5]].fromTriple().int
newCFI = (tmp + offset).toTriple()
self.chunk.cfi[cfi.pos + 3] = newCFI[0]
self.chunk.cfi[cfi.pos + 4] = newCFI[1]
self.chunk.cfi[cfi.pos + 5] = newCFI[2]
cfi.fn.codePos += offset
cfi.start += offset
cfi.stop += offset
inc(i)
proc fixJumps(self: Compiler, oldLen: int, modifiedAt: int) =
## Fixes jump offsets after the size
## of our bytecode has changed
if oldLen == self.chunk.code.len():
return
let offset = self.chunk.code.len() - oldLen
for jump in self.jumps.mitems():
if jump.offset >= modifiedAt:
# While all already-patched jumps need
# to have their jump offsets fixed, we
# also need to update our internal jumps
# list in cases where we shifted the jump
# instruction itself into the code!
jump.offset += offset
self.setJump(jump.offset, self.chunk.code[jump.offset + 1..jump.offset + 3])
proc resolve(self: Compiler, name: string): Name =
## Traverses all existing namespaces and returns
## the first object with the given name. Returns
## nil when the name can't be found. Note that
## when a type or function declaration is first
## resolved, it is also compiled on-the-fly
for obj in reversed(self.names):
if obj.ident.token.lexeme == name:
if obj.owner != self.currentModule:
# We don't own this name, but we
# may still have access to it
if obj.isPrivate:
# Name is private in its owner
# module, so we definitely can't
# use it
continue
elif obj.exported:
# The name is public in its owner
# module and said module has explicitly
# exported it to us: we can use it
result = obj
break
# If the name is public but not exported in
# its owner module, then we act as if it's
# private. This is to avoid namespace pollution
# from imports (i.e. if module A imports modules
# C and D and module B imports module A, then B
# might not want to also have access to C's and D's
# names as they might clash with its own stuff)
continue
result = obj
break
if not result.isNil() and not result.resolved:
# There's no reason to compile a declaration
# unless it is used at least once: this way
# not only do we save space if a name is declared
# but never used, it also makes it easier to
# implement generics and lets us emit warnings for
# unused names once they go out of scope. Yay!
result.resolved = true
# Now we just dispatch to one of our functions to
# compile the declaration
case result.kind:
of NameKind.CustomType:
self.typeDecl(TypeDecl(result.node), result)
of NameKind.Function:
# Generic functions need to be compiled at
# the call site because we need to know the
# type of the arguments, but regular functions
# can be precompiled as soon as we resolve them
if not result.valueType.isGeneric:
self.funDecl(FunDecl(result.node), result)
else:
discard
proc resolve(self: Compiler, name: IdentExpr): Name =
## Version of resolve that takes Identifier
## AST nodes instead of strings
return self.resolve(name.token.lexeme)
proc resolveOrError[T: IdentExpr | string](self: Compiler, name: T): Name =
## Calls self.resolve() and errors out with an appropriate
## message if it returns nil
result = self.resolve(name)
if result.isNil():
when T is IdentExpr:
self.error(&"reference to undefined name '{name.token.lexeme}'", name)
when T is string:
self.error(&"reference to undefined name '{name}'")
proc getStackPos(self: Compiler, name: Name): int =
## Returns the predicted call stack position of a
## given name, relative to the current frame
var found = false
result = 2
for variable in self.names:
if variable.kind in [NameKind.Module, NameKind.CustomType, NameKind.Enum, NameKind.Function, NameKind.None]:
# These names don't have a runtime representation on the call stack, so we skip them
continue
elif variable.kind == NameKind.Argument and variable.depth > self.depth:
# Argument of a function we haven't compiled yet (or one that we're
# not in). Ignore it, as it won't exist at runtime
continue
elif not variable.belongsTo.isNil():
if variable.belongsTo.valueType.isBuiltinFunction:
# Builtin functions don't exist at runtime either, so variables belonging to them
# are not present in the stack
continue
elif variable.valueType.kind == Generic:
# Generics are also a purely compile-time construct and are therefore
# ignored as far as stack positioning goes
continue
elif variable.belongsTo != name.belongsTo:
# Since referencing a function immediately compiles it, this means
# that if there's a function A with an argument x that calls another
# function B with an argument also named x, that second "x" would
# shadow the first one, leading to an incorrect stack offset
continue
elif variable.owner != self.currentModule:
# We don't own this variable, so we check
# if the owner exported it to us. If not,
# we skip it and pretend it doesn't exist
if variable.isPrivate or not variable.exported:
continue
if name == variable:
# After all of these checks, we can
# finally check whether the two names
# match (note: this also includes scope
# depth)
found = true
break
inc(result)
if not found:
result = -1
proc getClosurePos(self: Compiler, name: Name): int =
## Returns the position of a name in a closure's
## environment
if not self.currentFunction.valueType.isClosure:
return -1
for i, e in self.closures:
if e == name:
return i
return -1
proc compare(self: Compiler, a, b: Type): bool =
## Compares two type objects
## for equality (works with nil!)
# The nil code here is for void functions (when
# we compare their return types)
if a.isNil():
return b.isNil() or b.kind == Any
elif b.isNil():
return a.isNil() or a.kind == Any
elif a.kind == Any or b.kind == Any:
# This is needed internally: user code
# cannot generate code for matching
# arbitrary types, but we need it for
# function calls and stuff like that
# since peon doesn't have return type
# inference
return true
elif a.kind != b.kind and not (a.kind == Generic or b.kind == Generic):
# Next, we see the type discriminant:
# If they're different, then they can't
# be the same type! For generics, we match
# those later, as we need access to the type
# discriminant inside a case statement
return false
if a.kind != Generic and b.kind != Generic:
case a.kind:
# If all previous checks pass, it's time
# to go through each possible type peon
# supports and compare it
of Int8, UInt8, Int16, UInt16, Int32,
UInt32, Int64, UInt64, Float32, Float64,
Char, Byte, String, Nil, Nan, Bool, Inf:
# A value type's type is always equal to
# another one's
return true
of Reference, Pointer:
# Here we already know that both
# a and b are of either of the two
# types in this branch, so we just need
# to compare their values
return self.compare(a.value, b.value)
of Function:
# Functions are a bit trickier
if a.args.len() != b.args.len():
return false
if a.isCoroutine != b.isCoroutine:
return false
if not self.compare(a.returnType, b.returnType):
return false
for (argA, argB) in zip(a.args, b.args):
if not self.compare(argA.kind, argB.kind):
return false
return true
else:
discard # TODO: Custom types
else:
case a.kind:
of Generic:
# Generic types
case b.kind:
of Generic:
for c1 in a.cond:
for c2 in b.cond:
if self.compare(c1.kind, c2.kind):
return c1.match == c2.match
else:
for constraint in a.cond:
if self.compare(constraint.kind, b):
return constraint.match
else:
discard
case b.kind:
of Generic:
# Generic types
case a.kind:
of Generic:
for c1 in a.cond:
for c2 in b.cond:
if self.compare(c1.kind, c2.kind):
return c1.match == c2.match
else:
for constraint in b.cond:
if self.compare(constraint.kind, a):
return constraint.match
else:
discard
return false
proc toIntrinsic(name: string): Type =
## Converts a string to an intrinsic
## type if it is valid and returns nil
## otherwise
if name == "all":
return Type(kind: Any)
elif name in ["int", "int64", "i64"]:
return Type(kind: Int64)
elif name in ["uint64", "u64", "uint"]:
return Type(kind: UInt64)
elif name in ["int32", "i32"]:
return Type(kind: Int32)
elif name in ["uint32", "u32"]:
return Type(kind: UInt32)
elif name in ["int16", "i16", "short"]:
return Type(kind: Int16)
elif name in ["uint16", "u16"]:
return Type(kind: UInt16)
elif name in ["int8", "i8"]:
return Type(kind: Int8)
elif name in ["uint8", "u8"]:
return Type(kind: UInt8)
elif name in ["f64", "float", "float64"]:
return Type(kind: Float64)
elif name in ["f32", "float32"]:
return Type(kind: Float32)
elif name in ["byte", "b"]:
return Type(kind: Byte)
elif name in ["char", "c"]:
return Type(kind: Char)
elif name == "nan":
return Type(kind: Nan)
elif name == "nil":
return Type(kind: Nil)
elif name == "inf":
return Type(kind: Inf)
elif name == "bool":
return Type(kind: Bool)
elif name == "typevar":
return Type(kind: Typevar)
elif name == "string":
return Type(kind: String)
else:
return nil
proc infer(self: Compiler, node: LiteralExpr, allowGeneric: bool = false): Type =
## Infers the type of a given literal expression
## (if the expression is nil, nil is returned)
if node.isNil():
return nil
case node.kind:
of intExpr, binExpr, octExpr, hexExpr:
let size = node.token.lexeme.split("'")
if len(size) notin 1..2:
self.error("invalid state: inferValueType -> invalid size specifier (This is an internal error and most likely a bug!)")
if size.len() == 1:
return Type(kind: Int64)
let typ = size[1].toIntrinsic()
if not self.compare(typ, nil):
return typ
else:
self.error(&"invalid type specifier '{size[1]}' for int")
of floatExpr:
let size = node.token.lexeme.split("'")
if len(size) notin 1..2:
self.error("invalid state: inferValueType -> invalid size specifier (This is an internal error and most likely a bug!)")
if size.len() == 1 or size[1] == "f64":
return Type(kind: Float64)
let typ = size[1].toIntrinsic()
if not typ.isNil():
return typ
else:
self.error(&"invalid type specifier '{size[1]}' for float")
of nilExpr:
return Type(kind: Nil)
of trueExpr:
return Type(kind: Bool)
of falseExpr:
return Type(kind: Bool)
of nanExpr:
return Type(kind: TypeKind.Nan)
of infExpr:
return Type(kind: TypeKind.Inf)
of strExpr:
return Type(kind: String)
else:
discard # TODO
proc infer(self: Compiler, node: Expression, allowGeneric: bool = false): Type =
## Infers the type of a given expression and
## returns it (if the node is nil, nil is
## returned). Always returns a concrete type
## unless allowGeneric is set to true
if node.isNil():
return nil
case node.kind:
of identExpr:
let node = IdentExpr(node)
var name = self.resolve(node)
if not name.isNil():
result = name.valueType
if not result.isNil() and result.kind == Generic and not allowGeneric:
if name.belongsTo.isNil():
name = self.resolve(result.name)
if not name.isNil():
result = name.valueType
else:
for arg in name.belongsTo.valueType.args:
if node.token.lexeme == arg.name:
result = arg.kind
else:
result = node.name.lexeme.toIntrinsic()
of unaryExpr:
let node = UnaryExpr(node)
let impl = self.matchImpl(node.operator.lexeme, Type(kind: Function, returnType: Type(kind: Any), args: @[("", self.infer(node.a))]), node)
result = impl.valueType.returnType
if result.kind == Generic and not allowGeneric:
result = self.specialize(impl, @[node.a]).valueType.returnType
of binaryExpr:
let node = BinaryExpr(node)
let impl = self.matchImpl(node.operator.lexeme, Type(kind: Function, returnType: Type(kind: Any), args: @[("", self.infer(node.a)), ("", self.infer(node.b))]), node)
result = impl.valueType.returnType
if result.kind == Generic and not allowGeneric:
result = self.specialize(impl, @[node.a, node.b]).valueType.returnType
of {intExpr, hexExpr, binExpr, octExpr,
strExpr, falseExpr, trueExpr, infExpr,
nanExpr, floatExpr, nilExpr
}:
result = self.infer(LiteralExpr(node))
of lambdaExpr:
var node = LambdaExpr(node)
result = Type(kind: Function, returnType: nil, args: @[], isLambda: true)
if not node.returnType.isNil():
result.returnType = self.infer(node.returnType)
for argument in node.arguments:
result.args.add((argument.name.token.lexeme, self.infer(argument.valueType)))
of callExpr:
var node = CallExpr(node)
case node.callee.kind:
of identExpr:
let resolved = self.resolve(IdentExpr(node.callee))
if not resolved.isNil():
case resolved.valueType.kind:
of Function:
result = resolved.valueType.returnType
else:
result = resolved.valueType
else:
result = nil
of lambdaExpr:
result = self.infer(LambdaExpr(node.callee).returnType)
of callExpr:
result = self.infer(CallExpr(node.callee))
if not result.isNil():
result = result.returnType
else:
discard # Unreachable
of varExpr:
result = self.infer(Var(node).value)
result.mutable = true
of refExpr:
result = Type(kind: Reference, value: self.infer(Ref(node).value))
of ptrExpr:
result = Type(kind: Pointer, value: self.infer(Ptr(node).value))
of groupingExpr:
result = self.infer(GroupingExpr(node).expression)
else:
discard # Unreachable
proc inferOrError[T: LiteralExpr | Expression](self: Compiler, node: T, allowGeneric: bool = false): Type =
## Attempts to infer the type of
## the given expression and raises an
## error with an appropriate message if
## it fails
result = self.infer(node, allowGeneric)
if result.isNil():
case node.kind:
of identExpr:
self.error(&"reference to undefined name '{IdentExpr(node).token.lexeme}'", node)
of callExpr:
let node = CallExpr(node)
if node.callee.kind == identExpr:
self.error(&"call to undefined function '{IdentExpr(node.callee).token.lexeme}'", node)
else:
self.error("expression has no type", node)
else:
self.error("expression has no type", node)
proc typeToStr(self: Compiler, typ: Type): string =
## Returns the string representation of a
## type object
if typ.isNil():
return "nil"
case typ.kind:
of Int8, UInt8, Int16, UInt16, Int32,
UInt32, Int64, UInt64, Float32, Float64,
Char, Byte, String, Nil, TypeKind.Nan, Bool,
TypeKind.Inf:
result &= ($typ.kind).toLowerAscii()
of Pointer:
result &= &"ptr {self.typeToStr(typ.value)}"
of Reference:
result &= &"ref {self.typeToStr(typ.value)}"
of Function:
result &= "fn ("
for i, (argName, argType) in typ.args:
result &= &"{argName}: "
if argType.mutable:
result &= "var "
result &= self.typeToStr(argType)
if i < typ.args.len() - 1:
result &= ", "
result &= ")"
if not typ.returnType.isNil():
result &= &": {self.typeToStr(typ.returnType)}"
of Generic:
for i, condition in typ.cond:
if i > 0:
result &= " | "
if not condition.match:
result &= "~"
result &= self.typeToStr(condition.kind)
else:
discard
proc findByName(self: Compiler, name: string, resolve: bool = true): seq[Name] =
## Looks for objects that have been already declared
## with the given name. Returns all objects that apply.
## As with resolve(), this will cause type and function
## declarations to be compiled on-the-fly
for obj in reversed(self.names):
if obj.ident.token.lexeme == name:
if obj.owner != self.currentModule:
if obj.isPrivate or not obj.exported:
continue
result.add(obj)
if resolve:
for n in result:
if n.resolved:
continue
n.resolved = true
case n.kind:
of NameKind.CustomType:
self.typeDecl(TypeDecl(n.node), n)
of NameKind.Function:
if not n.valueType.isGeneric:
self.funDecl(FunDecl(n.node), n)
else:
discard
proc findByModule(self: Compiler, name: string): seq[Name] =
## Looks for objects that have been already declared as
## public within the given module. Returns all objects
## that apply
for obj in reversed(self.names):
if not obj.isPrivate and obj.owner == name:
result.add(obj)
proc findByType(self: Compiler, name: string, kind: Type, depth: int = -1): seq[Name] =
## Looks for objects that have already been declared
## with the given name and type. If depth is not -1,
## it also compares the name's scope depth. Returns
## all objects that apply
for obj in self.findByName(name, resolve=false):
if self.compare(obj.valueType, kind) and (depth == -1 or depth == obj.depth):
result.add(obj)
if not obj.resolved:
obj.resolved = true
case obj.kind:
of NameKind.CustomType:
self.typeDecl(TypeDecl(obj.node), obj)
of NameKind.Function:
if not obj.valueType.isGeneric:
self.funDecl(FunDecl(obj.node), obj)
else:
discard
proc findAtDepth(self: Compiler, name: string, depth: int): seq[Name] {.used.} =
## Looks for objects that have been already declared
## with the given name at the given scope depth.
## Returns all objects that apply
for obj in self.findByName(name):
if obj.depth == depth:
result.add(obj)
proc matchImpl(self: Compiler, name: string, kind: Type, node: ASTNode = nil, allowFwd: bool = true): Name =
## Tries to find a matching function implementation
## compatible with the given type and returns its
## name object
var impl = self.findByType(name, kind)
if impl.len() == 0:
var msg = &"cannot find a suitable implementation for '{name}'"
let names = self.findByName(name, resolve=false)
if names.len() > 0:
msg &= &", found {len(names)} potential candidate"
if names.len() > 1:
msg &= "s"
if self.showMismatches:
msg &= ": "
for name in names:
msg &= &"\n - in '{relativePath(name.file, getCurrentDir())}', line {name.ident.token.line}: '{self.typeToStr(name.valueType)}'"
if name.valueType.kind != Function:
msg &= ", not a callable"
elif kind.args.len() != name.valueType.args.len():
msg &= &", wrong number of arguments ({name.valueType.args.len()} expected, got {kind.args.len()})"
else:
for i, arg in kind.args:
if name.valueType.args[i].kind.mutable and not arg.kind.mutable:
msg &= &", first mismatch at position {i + 1}: {name.valueType.args[i].name} is immutable, not 'var'"
break
elif not self.compare(arg.kind, name.valueType.args[i].kind):
msg &= &", first mismatch at position {i + 1}: expected argument of type '{self.typeToStr(name.valueType.args[i].kind)}', got '{self.typeToStr(arg.kind)}' instead"
break
else:
msg &= " (compile with --showMismatches for more details)"
self.error(msg, node)
if impl.len() > 1:
# Forward declarations don't count when looking for a function
impl = filterIt(impl, not it.valueType.forwarded)
if impl.len() > 1:
# If it's *still* more than one match, then it's an error
var msg = &"multiple matching implementations of '{name}' found\n"
if self.showMismatches:
msg &= ":"
for fn in reversed(impl):
msg &= &"- in module '{fn.owner}' at line {fn.line} of type {self.typeToStr(fn.valueType)}\n"
else:
msg &= " (compile with --showMismatches for more details)"
self.error(msg, node)
if impl[0].valueType.forwarded and not allowFwd:
self.error(&"expecting an implementation for function '{impl[0].ident.token.lexeme}' declared in module '{impl[0].owner}' at line {impl[0].ident.token.line} of type '{self.typeToStr(impl[0].valueType)}'")
result = impl[0]
proc check(self: Compiler, term: Expression, kind: Type, allowAny: bool = false) =
## Checks the type of term against a known type.
## Raises an error if appropriate and returns
## otherwise
let k = self.inferOrError(term)
if k.kind == Any and not allowAny:
# Any should only be used internally: error!
self.error("'all' is not a valid type in this context", term)
elif not self.compare(k, kind):
self.error(&"expecting value of type '{self.typeToStr(kind)}', got '{self.typeToStr(k)}' instead", term)
proc handleBuiltinFunction(self: Compiler, fn: Type, args: seq[Expression], line: int) =
## Emits instructions for builtin functions
## such as addition or subtraction
if fn.builtinOp notin ["LogicalOr", "LogicalAnd"]:
if len(args) == 2:
self.expression(args[1])
self.expression(args[0])
elif len(args) == 1:
self.expression(args[0])
const codes: Table[string, OpCode] = {"Negate": Negate,
"NegateFloat32": NegateFloat32,
"NegateFloat64": NegateFloat64,
"Add": Add,
"Subtract": Subtract,
"Divide": Divide,
"Multiply": Multiply,
"SignedDivide": SignedDivide,
"AddFloat64": AddFloat64,
"SubtractFloat64": SubtractFloat64,
"DivideFloat64": DivideFloat64,
"MultiplyFloat64": MultiplyFloat64,
"AddFloat32": AddFloat32,
"SubtractFloat32": SubtractFloat32,
"DivideFloat32": DivideFloat32,
"MultiplyFloat32": MultiplyFloat32,
"Pow": Pow,
"SignedPow": SignedPow,
"PowFloat32": PowFloat32,
"PowFloat64": PowFloat64,
"Mod": Mod,
"SignedMod": SignedMod,
"ModFloat32": ModFloat32,
"ModFloat64": ModFloat64,
"Or": Or,
"And": And,
"Xor": Xor,
"Not": Not,
"LShift": LShift,
"RShift": RShift,
"Equal": Equal,
"NotEqual": NotEqual,
"LessThan": LessThan,
"GreaterThan": GreaterThan,
"LessOrEqual": LessOrEqual,
"GreaterOrEqual": GreaterOrEqual,
"PrintInt64": PrintInt64,
"PrintUInt64": PrintUInt64,
"PrintInt32": PrintInt32,
"PrintUInt32": PrintUInt32,
"PrintInt16": PrintInt16,
"PrintUInt16": PrintUInt16,
"PrintInt8": PrintInt8,
"PrintUInt8": PrintUInt8,
"PrintFloat64": PrintFloat64,
"PrintFloat32": PrintFloat32,
"PrintHex": PrintHex,
"PrintBool": PrintBool,
"PrintNan": PrintNan,
"PrintInf": PrintInf,
"PrintString": PrintString,
"SysClock64": SysClock64,
"LogicalNot": LogicalNot,
"NegInf": LoadNInf
}.to_table()
if fn.builtinOp in codes:
self.emitByte(codes[fn.builtinOp], line)
return
# Some builtin operations are slightly more complex
# so we handle them separately
case fn.builtinOp:
of "LogicalOr":
self.expression(args[0])
let jump = self.emitJump(JumpIfTrue, line)
self.expression(args[1])
self.patchJump(jump)
of "LogicalAnd":
self.expression(args[0])
var jump = self.emitJump(JumpIfFalseOrPop, line)
self.expression(args[1])
self.patchJump(jump)
else:
self.error(&"unknown built-in: '{fn.builtinOp}'", fn.fun)
proc beginScope(self: Compiler) =
## Begins a new local scope by incrementing the current
## scope's depth
inc(self.depth)
# Flattens our weird function tree into a linear
# list
proc flattenImpl(self: Type, to: var seq[Type]) =
to.add(self)
for child in self.children:
flattenImpl(child, to)
proc flatten(self: Type): seq[Type] = flattenImpl(self, result)
proc patchForwardDeclarations(self: Compiler) =
## Patches forward declarations and looks
## for their implementations so that calls
## to them work properly
var impl: Name
var pos: array[8, uint8]
for (forwarded, position) in self.forwarded:
impl = self.matchImpl(forwarded.ident.token.lexeme, forwarded.valueType, allowFwd=false)
if position == 0:
continue
pos = impl.codePos.toLong()
self.chunk.consts[position] = pos[0]
self.chunk.consts[position + 1] = pos[1]
self.chunk.consts[position + 2] = pos[2]
self.chunk.consts[position + 3] = pos[3]
self.chunk.consts[position + 4] = pos[4]
self.chunk.consts[position + 5] = pos[5]
self.chunk.consts[position + 6] = pos[6]
self.chunk.consts[position + 7] = pos[7]
proc endScope(self: Compiler) =
## Ends the current local scope
if self.depth < 0:
self.error("cannot call endScope with depth < 0 (This is an internal error and most likely a bug)")
dec(self.depth)
# We keep track both of which names are going out of scope
# and how many actually need to be popped off the call stack
# at runtime (since only variables and function arguments
# actually materialize at runtime)
var names: seq[Name] = @[]
var popCount = 0
for name in self.names:
# We only pop names in scopes deeper than ours
if name.depth > self.depth:
if name.depth == 0 and not self.isMainModule:
# Global names coming from other modules only go out of scope
# when the global scope of the main module is closed (i.e. at
# the end of the whole program)
continue
names.add(name)
if name.kind == NameKind.Function and name.valueType.children.len() > 0 and name.depth == 0:
# When a closure goes out of scope, its environment is reclaimed.
# This includes the environments of every other closure that may
# have been contained within it, too
var i = 0
var envLen = 0
var lastEnvLen = 0
# Why this? Well, it's simple: if a function returns
# a closure, that function becomes a closure too. The
# environments of closures are aligned one after the
# other, so if a and b are both closures, but only b
# closes over a value, both a and b will have an envLen
# of 1, which would cause us to emit one extra PopClosure
# instruction than what's actually needed. We can account
# for this easily by checking if the contained function's
# environment is larger than the contained one, which will
# guarantee there actually is some value that the contained
# function is closing over
for fn in flatten(name.valueType):
if fn.isClosure and fn.envLen > lastEnvLen:
envLen += fn.envLen
lastEnvLen = fn.envLen
for y in 0..<envLen:
self.closures.delete(y + i)
self.emitByte(PopClosure, self.peek().token.line)
self.emitBytes((y + i).toTriple(), self.peek().token.line)
inc(i)
# Now we have to actually emit the pop instructions. First
# off, we skip the names that will not exist at runtime,
# because there's no need to emit any instructions to pop them
# (we still remove them from the name list later so they can't
# be referenced anymore, of course)
if name.kind notin [NameKind.Var, NameKind.Argument]:
continue
elif name.kind == NameKind.Argument:
if name.belongsTo.valueType.isBuiltinFunction:
# Arguments to builtin functions become temporaries on the
# stack and are popped automatically
continue
if not name.belongsTo.resolved:
# Function hasn't been compiled yet,
# so we can't get rid of its arguments
# (it may need them later)
names.delete(names.high())
continue
if not name.resolved and not self.replMode:
# We don't emit this warning in replMode because
# a variable might be declared on one line and then
# used on the next
case name.kind:
of NameKind.Var:
if not name.ident.token.lexeme.startsWith("_"):
self.warning(UnusedName, &"'{name.ident.token.lexeme}' is declared but not used (add '_' prefix to silence warning)", name)
of NameKind.Argument:
if not name.ident.token.lexeme.startsWith("_"):
if not name.belongsTo.valueType.isBuiltinFunction:
# Builtin functions never use their arguments
self.warning(UnusedName, &"argument '{name.ident.token.lexeme}' is unused (add '_' prefix to silence warning)", name)
else:
discard
inc(popCount)
if popCount > 1:
# If we're popping more than one variable,
# we emit a bunch of PopN instructions until
# the pop count is greater than zero
while popCount > 0:
self.emitByte(PopN, self.peek().token.line)
self.emitBytes(popCount.toDouble(), self.peek().token.line)
popCount -= popCount.toDouble().fromDouble().int
elif popCount == 1:
# We only emit PopN if we're popping more than one value
self.emitByte(PopC, self.peek().token.line)
# This seems *really* slow, but
# what else should I do? Nim doesn't
# allow the removal of items during
# seq iteration so ¯\_(ツ)_/¯
var idx = 0
while idx < self.names.len():
for name in names:
if self.names[idx] == name:
self.names.delete(idx)
inc(idx)
proc unpackGenerics(self: Compiler, condition: Expression, list: var seq[tuple[match: bool, kind: Type]], accept: bool = true) =
## Recursively unpacks a type constraint in a generic type
case condition.kind:
of identExpr:
list.add((accept, self.inferOrError(condition)))
of binaryExpr:
let condition = BinaryExpr(condition)
case condition.operator.lexeme:
of "|":
self.unpackGenerics(condition.a, list)
self.unpackGenerics(condition.b, list)
else:
self.error("invalid type constraint in generic declaration", condition)
of unaryExpr:
let condition = UnaryExpr(condition)
case condition.operator.lexeme:
of "~":
self.unpackGenerics(condition.a, list, accept=false)
else:
self.error("invalid type constraint in generic declaration", condition)
else:
self.error("invalid type constraint in generic declaration", condition)
proc declareName(self: Compiler, node: ASTNode, mutable: bool = false) =
## Statically declares a name into the current scope.
## "Declaring" a name only means updating our internal
## list of identifiers so that further calls to resolve()
## correctly return them. There is no code to actually
## declare a variable at runtime: the value is already
## on the stack
var declaredName: string = ""
var n: Name
case node.kind:
of NodeKind.varDecl:
var node = VarDecl(node)
if self.names.high() > 16777215:
# If someone ever hits this limit in real-world scenarios, I swear I'll
# slap myself 100 times with a sign saying "I'm dumb". Mark my words
self.error("cannot declare more than 16777215 variables at a time")
declaredName = node.name.token.lexeme
# Creates a new Name entry so that self.identifier emits the proper stack offset
self.names.add(Name(depth: self.depth,
ident: node.name,
isPrivate: node.isPrivate,
owner: self.currentModule,
file: self.file,
isConst: node.isConst,
valueType: nil, # Done later
isLet: node.isLet,
line: node.token.line,
belongsTo: self.currentFunction,
kind: NameKind.Var,
node: node
))
n = self.names[^1]
if mutable:
self.names[^1].valueType.mutable = true
of NodeKind.funDecl:
var node = FunDecl(node)
declaredName = node.name.token.lexeme
var fn = Name(depth: self.depth,
isPrivate: node.isPrivate,
isConst: false,
owner: self.currentModule,
file: self.file,
valueType: Type(kind: Function,
returnType: nil, # We check it later
args: @[],
fun: node,
children: @[],
forwarded: node.body.isNil()),
ident: node.name,
node: node,
isLet: false,
line: node.token.line,
kind: NameKind.Function,
belongsTo: self.currentFunction)
n = fn
# First we declare the function's generics, if it has any.
# This is because the function's return type may in itself
# be a generic, so it needs to exist first
var constraints: seq[tuple[match: bool, kind: Type]] = @[]
for gen in node.generics:
self.unpackGenerics(gen.cond, constraints)
self.names.add(Name(depth: fn.depth + 1,
isPrivate: true,
valueType: Type(kind: Generic, name: gen.name.token.lexeme, mutable: false, cond: constraints),
codePos: 0,
isLet: false,
line: fn.node.token.line,
belongsTo: fn,
ident: gen.name,
owner: self.currentModule,
file: self.file))
constraints = @[]
if not node.returnType.isNil():
fn.valueType.returnType = self.inferOrError(node.returnType, allowGeneric=true)
self.names.add(fn)
# We now declare and typecheck the function's
# arguments
for argument in FunDecl(fn.node).arguments:
if self.names.high() > 16777215:
self.error("cannot declare more than 16777215 variables at a time")
self.names.add(Name(depth: fn.depth + 1,
isPrivate: true,
owner: self.currentModule,
file: self.file,
isConst: false,
ident: argument.name,
valueType: self.inferOrError(argument.valueType, allowGeneric=true),
codePos: 0,
isLet: false,
line: argument.name.token.line,
belongsTo: fn,
kind: NameKind.Argument,
node: argument.name
))
fn.valueType.args.add((self.names[^1].ident.token.lexeme, self.names[^1].valueType))
if node.generics.len() > 0:
fn.valueType.isGeneric = true
of NodeKind.importStmt:
var node = ImportStmt(node)
var name = node.moduleName.token.lexeme.extractFilename().replace(".pn", "")
declaredName = name
self.names.add(Name(depth: self.depth,
owner: self.currentModule,
file: self.file,
ident: newIdentExpr(Token(kind: Identifier, lexeme: name, line: node.moduleName.token.line)),
line: node.moduleName.token.line,
kind: NameKind.Module,
isPrivate: false
))
n = self.names[^1]
else:
discard # TODO: Types, enums
self.dispatchPragmas(n)
for name in self.findByName(declaredName, resolve=false):
if name == n:
continue
elif name.kind in [NameKind.Var, NameKind.Module, NameKind.CustomType, NameKind.Enum] and name.depth < self.depth:
# We don't check for clashing functions here: self.matchImpl() takes care of that
self.warning(WarningKind.ShadowOuterScope, &"'{name.ident.token.lexeme}' shadows a name from an outer scope")
proc emitLoop(self: Compiler, begin: int, line: int) =
## Emits a JumpBackwards instruction with the correct
## jump offset
let offset = self.chunk.code.high() - begin + 4
if offset > 16777215:
# TODO: Emit consecutive jumps?
self.error("cannot jump more than 16777215 bytecode instructions")
self.emitByte(JumpBackwards, line)
self.emitBytes(offset.toTriple(), line)
proc patchBreaks(self: Compiler) =
## Patches the jumps emitted by
## breakStmt. This is needed
## because the size of code
## to skip is not known before
## the loop is fully compiled
for brk in self.currentLoop.breakJumps:
self.patchJump(brk)
proc handleMagicPragma(self: Compiler, pragma: Pragma, name: Name) =
## Handles the "magic" pragma. Assumes the given name is already
## declared
if pragma.args.len() != 1:
self.error("'magic' pragma: wrong number of arguments")
elif pragma.args[0].kind != strExpr:
self.error("'magic' pragma: wrong type of argument (constant string expected)")
elif name.node.kind != NodeKind.funDecl:
self.error("'magic' pragma is not valid in this context")
var node = FunDecl(name.node)
name.valueType.isBuiltinFunction = true
name.valueType.builtinOp = pragma.args[0].token.lexeme[1..^2]
# The magic pragma ignores the function's body
node.body = nil
proc handleErrorPragma(self: Compiler, pragma: Pragma, name: Name) =
## Handles the "error" pragma
if pragma.args.len() != 1:
self.error("'error' pragma: wrong number of arguments")
elif pragma.args[0].kind != strExpr:
self.error("'error' pragma: wrong type of argument (constant string expected)")
elif not name.isNil() and name.node.kind != NodeKind.funDecl:
self.error("'error' pragma is not valid in this context")
self.error(pragma.args[0].token.lexeme[1..^2])
proc handlePurePragma(self: Compiler, pragma: Pragma, name: Name) =
## Handles the "pure" pragma
case name.node.kind:
of NodeKind.funDecl:
FunDecl(name.node).isPure = true
of lambdaExpr:
LambdaExpr(name.node).isPure = true
else:
self.error("'pure' pragma is not valid in this context")
proc dispatchPragmas(self: Compiler, name: Name) =
## Dispatches pragmas bound to objects
if name.node.isNil():
return
var pragmas: seq[Pragma] = @[]
case name.node.kind:
of NodeKind.funDecl, NodeKind.typeDecl, NodeKind.varDecl:
pragmas = Declaration(name.node).pragmas
of lambdaExpr:
pragmas = LambdaExpr(name.node).pragmas
else:
discard # Unreachable
var f: CompilerFunc
for pragma in pragmas:
if pragma.name.token.lexeme notin self.compilerProcs:
self.error(&"unknown pragma '{pragma.name.token.lexeme}'")
f = self.compilerProcs[pragma.name.token.lexeme]
if f.kind != Immediate:
continue
f.handler(self, pragma, name)
proc dispatchDelayedPragmas(self: Compiler, name: Name) =
## Dispatches pragmas bound to objects once they
## are called. Only applies to functions
if name.node.isNil():
return
var pragmas: seq[Pragma] = @[]
pragmas = Declaration(name.node).pragmas
var f: CompilerFunc
for pragma in pragmas:
if pragma.name.token.lexeme notin self.compilerProcs:
self.error(&"unknown pragma '{pragma.name.token.lexeme}'")
f = self.compilerProcs[pragma.name.token.lexeme]
if f.kind == Immediate:
continue
f.handler(self, pragma, name)
proc patchReturnAddress(self: Compiler, pos: int) =
## Patches the return address of a function
## call
let address = self.chunk.code.len().toLong()
self.chunk.consts[pos] = address[0]
self.chunk.consts[pos + 1] = address[1]
self.chunk.consts[pos + 2] = address[2]
self.chunk.consts[pos + 3] = address[3]
self.chunk.consts[pos + 4] = address[4]
self.chunk.consts[pos + 5] = address[5]
self.chunk.consts[pos + 6] = address[6]
self.chunk.consts[pos + 7] = address[7]
proc terminateProgram(self: Compiler, pos: int) =
## Utility to terminate a peon program
self.patchForwardDeclarations()
self.endScope()
self.emitByte(OpCode.Return, self.peek().token.line)
self.emitByte(0, self.peek().token.line) # Entry point has no return value (TODO: Add easter eggs, cuz why not)
self.patchReturnAddress(pos)
proc beginProgram(self: Compiler): int =
## Utility to begin a peon program's
## bytecode. Returns the position of
## a dummy return address of the program's
## entry point to be patched by terminateProgram
# Every peon program has a hidden entry point in
# which user code is wrapped. Think of it as if
# peon is implicitly writing the main() function
# of your program and putting all of your code in
# there. While we call our entry point just like
# any regular peon function, we can't use our handy
# helper generateCall() because we need to keep track
# of where our program ends (which we don't know yet).
# To fix this, we emit dummy offsets and patch them
# later, once we know the boundaries of our hidden main()
var main = Name(depth: 0,
isPrivate: true,
isConst: false,
isLet: false,
owner: self.currentModule,
file: self.file,
valueType: Type(kind: Function,
returnType: nil,
args: @[],
),
codePos: self.chunk.code.len() + 12,
ident: newIdentExpr(Token(lexeme: "", kind: Identifier)),
kind: NameKind.Function,
resolved: true,
line: -1)
self.names.add(main)
self.emitByte(LoadUInt64, 1)
self.emitBytes(self.chunk.writeConstant(main.codePos.toLong()), 1)
self.emitByte(LoadUInt64, 1)
self.emitBytes(self.chunk.writeConstant(0.toLong()), 1)
result = self.chunk.consts.len() - 8
self.emitByte(Call, 1)
self.emitBytes(0.toTriple(), 1)
## End of utility functions
proc literal(self: Compiler, node: ASTNode) =
## Emits instructions for literals such
## as singletons, strings and numbers
case node.kind:
of trueExpr:
self.emitByte(LoadTrue, node.token.line)
of falseExpr:
self.emitByte(LoadFalse, node.token.line)
of nilExpr:
self.emitByte(LoadNil, node.token.line)
of infExpr:
self.emitByte(LoadInf, node.token.line)
of nanExpr:
self.emitByte(LoadNan, node.token.line)
of strExpr:
self.emitConstant(LiteralExpr(node), Type(kind: String))
of intExpr:
let y = IntExpr(node)
let kind = self.infer(y)
if kind.kind in [Int64, Int32, Int16, Int8]:
var x: int
try:
discard parseInt(y.literal.lexeme, x)
except ValueError:
self.error("integer value out of range")
else:
var x: uint64
try:
discard parseBiggestUInt(y.literal.lexeme, x)
except ValueError:
self.error("integer value out of range")
self.emitConstant(y, kind)
of hexExpr:
var x: int
var y = HexExpr(node)
try:
discard parseHex(y.literal.lexeme, x)
except ValueError:
self.error("integer value out of range")
let node = newIntExpr(Token(lexeme: $x, line: y.token.line,
pos: (start: y.token.pos.start,
stop: y.token.pos.start + len($x)),
relPos: (start: y.token.relPos.start, stop: y.token.relPos.start + len($x))
)
)
self.emitConstant(node, self.infer(y))
of binExpr:
var x: int
var y = BinExpr(node)
try:
discard parseBin(y.literal.lexeme, x)
except ValueError:
self.error("integer value out of range")
let node = newIntExpr(Token(lexeme: $x, line: y.token.line,
pos: (start: y.token.pos.start,
stop: y.token.pos.start + len($x)),
relPos: (start: y.token.relPos.start, stop: y.token.relPos.start + len($x))
)
)
self.emitConstant(node, self.infer(y))
of octExpr:
var x: int
var y = OctExpr(node)
try:
discard parseOct(y.literal.lexeme, x)
except ValueError:
self.error("integer value out of range")
let node = newIntExpr(Token(lexeme: $x, line: y.token.line,
pos: (start: y.token.pos.start,
stop: y.token.pos.start + len($x)),
relPos: (start: y.token.relPos.start, stop: y.token.relPos.start + len($x))
)
)
self.emitConstant(node, self.infer(y))
of floatExpr:
var x: float
var y = FloatExpr(node)
try:
discard parseFloat(y.literal.lexeme, x)
except ValueError:
self.error("floating point value out of range")
self.emitConstant(y, self.infer(y))
of awaitExpr:
var y = AwaitExpr(node)
self.expression(y.expression)
self.emitByte(OpCode.Await, node.token.line)
else:
self.error(&"invalid AST node of kind {node.kind} at literal(): {node} (This is an internal error and most likely a bug!)")
proc callUnaryOp(self: Compiler, fn: Name, op: UnaryExpr) {.inline.} =
## Emits the code to call a unary operator
self.generateCall(fn, @[op.a], fn.line)
proc callBinaryOp(self: Compiler, fn: Name, op: BinaryExpr) {.inline.} =
## Emits the code to call a binary operator
self.generateCall(fn, @[op.a, op.b], fn.line)
proc unary(self: Compiler, node: UnaryExpr) {.inline.} =
## Compiles unary expressions such as decimal
## and bitwise negation
self.callUnaryOp(self.matchImpl(node.token.lexeme, Type(kind: Function, returnType: Type(kind: Any), args: @[("", self.inferOrError(node.a))]), node), node)
proc binary(self: Compiler, node: BinaryExpr) {.inline.} =
## Compiles all binary expression
self.callBinaryOp(self.matchImpl(node.token.lexeme, Type(kind: Function, returnType: Type(kind: Any), args: @[("", self.inferOrError(node.a)), ("", self.inferOrError(node.b))]), node), node)
proc identifier(self: Compiler, node: IdentExpr) =
## Compiles access to identifiers
var s = self.resolveOrError(node)
if s.isConst:
# Constants are always emitted as Load* instructions
# no matter the scope depth
self.emitConstant(node, self.infer(node))
else:
if s.kind == NameKind.Function:
# Functions have no runtime representation, they're just
# a location to jump to, but we pretend they aren't and
# resolve them to their address into our bytecode when
# they're referenced
self.emitByte(LoadUInt64, node.token.line)
self.emitBytes(self.chunk.writeConstant(s.codePos.toLong()), node.token.line)
elif s.depth > 0 and self.depth > 1 and not self.currentFunction.isNil():
# Loads a closure variable. Stored in a separate "closure array" in the VM that does not
# align its semantics with the call stack. This makes closures work as expected and is
# not much slower than indexing our stack (since they're both dynamic arrays at runtime anyway)
if not s.isClosedOver:
var fn = self.currentFunction.valueType
while true:
fn.isClosure = true
fn.envLen += 1
if fn.parent.isNil():
break
fn = fn.parent
s.isClosedOver = true
self.closures.add(s)
let stackIdx = self.getStackPos(s).toTriple()
let closeIdx = self.closures.high().toTriple()
let oldLen = self.chunk.code.len()
# This madness makes it so that we can insert bytecode
# at arbitrary offsets into our alredy compiled code and
# have our metadata be up to date
self.chunk.code.insert(StoreClosure.uint8, s.belongsTo.codePos)
self.chunk.code.insert(stackIdx[0], s.belongsTo.codePos + 1)
self.chunk.code.insert(stackIdx[1], s.belongsTo.codePos + 2)
self.chunk.code.insert(stackIdx[2], s.belongsTo.codePos + 3)
self.chunk.code.insert(closeIdx[0], s.belongsTo.codePos + 4)
self.chunk.code.insert(closeIdx[1], s.belongsTo.codePos + 5)
self.chunk.code.insert(closeIdx[2], s.belongsTo.codePos + 6)
self.chunk.lines[self.chunk.getIdx(self.chunk.getLine(s.belongsTo.codePos)) + 1] += 7
self.fixJumps(oldLen, s.belongsTo.codePos)
self.fixCFIOffsets(oldLen, s.belongsTo.codePos)
let pos = self.getClosurePos(s)
if pos == -1:
self.error(&"cannot compute closure offset for '{s.ident.token.lexeme}'", s.ident)
self.emitByte(LoadClosure, node.token.line)
self.emitBytes(pos.toTriple(), node.token.line)
else:
# Static name resolution, loads value at index in the stack. Very fast. Much wow.
self.emitByte(LoadVar, node.token.line)
# No need to check for -1 here: we already did a nil check above!
self.emitBytes(self.getStackPos(s).toTriple(), node.token.line)
proc assignment(self: Compiler, node: ASTNode) =
## Compiles assignment expressions
case node.kind:
of assignExpr:
let node = AssignExpr(node)
let name = IdentExpr(node.name)
var r = self.resolveOrError(name)
if r.isConst:
self.error(&"cannot assign to '{name.token.lexeme}' (value is a constant)", name)
elif r.isLet:
self.error(&"cannot reassign '{name.token.lexeme}' (value is immutable)", name)
self.check(node.value, r.valueType)
self.expression(node.value)
if not r.isClosedOver:
self.emitByte(StoreVar, node.token.line)
self.emitBytes(self.getStackPos(r).toTriple(), node.token.line)
else:
# Loads a closure variable. Stored in a separate "closure array" in the VM that does not
# align its semantics with the call stack. This makes closures work as expected and is
# not much slower than indexing our stack (since they're both dynamic arrays at runtime anyway)
self.emitByte(StoreClosure, node.token.line)
self.emitBytes(self.getClosurePos(r).toTriple(), node.token.line)
of setItemExpr:
let node = SetItemExpr(node)
let typ = self.infer(node)
if typ.isNil():
self.error(&"cannot determine the type of '{node.name.token.lexeme}'")
# TODO
else:
self.error(&"invalid AST node of kind {node.kind} at assignment(): {node} (This is an internal error and most likely a bug)")
proc blockStmt(self: Compiler, node: BlockStmt) =
## Compiles block statements, which create a new
## local scope
self.beginScope()
var last: Declaration
for decl in node.code:
if not last.isNil():
case last.kind:
of breakStmt, continueStmt:
self.warning(UnreachableCode, &"code after '{last.token.lexeme}' statement is unreachable")
else:
discard
self.declaration(decl)
last = decl
self.endScope()
proc ifStmt(self: Compiler, node: IfStmt) =
## Compiles if/else statements for conditional
## execution of code
self.check(node.condition, Type(kind: Bool))
self.expression(node.condition)
let jump = self.emitJump(JumpIfFalsePop, node.token.line)
self.statement(node.thenBranch)
let jump2 = self.emitJump(JumpForwards, node.token.line)
self.patchJump(jump)
if not node.elseBranch.isNil():
self.statement(node.elseBranch)
self.patchJump(jump2)
proc whileStmt(self: Compiler, node: WhileStmt) =
## Compiles C-style while loops and
## desugared C-style for loops
self.check(node.condition, Type(kind: Bool))
let start = self.chunk.code.high()
self.expression(node.condition)
let jump = self.emitJump(JumpIfFalsePop, node.token.line)
self.statement(node.body)
self.emitLoop(start, node.token.line)
self.patchJump(jump)
proc generateCall(self: Compiler, fn: Type, args: seq[Expression], line: int) =
## Version of generateCall that takes Type objects
## instead of Name objects. The function is assumed
## to be on the stack
self.emitByte(LoadUInt64, line)
self.emitBytes(self.chunk.writeConstant(0.toLong()), line)
let pos = self.chunk.consts.len() - 8
for i, argument in reversed(args):
# We pass the arguments in reverse
# because of how stacks work. They'll
# be reversed again at runtime
self.check(argument, fn.args[^(i + 1)].kind)
self.expression(argument)
# Creates a new call frame and jumps
# to the function's first instruction
# in the code
if not fn.isClosure:
self.emitByte(Call, line)
else:
self.emitByte(CallClosure,line)
self.emitBytes(args.len().toTriple(), line)
if fn.isClosure:
self.emitBytes(fn.envLen.toTriple(), line)
self.patchReturnAddress(pos)
proc generateCall(self: Compiler, fn: Name, args: seq[Expression], line: int) =
## Small wrapper that abstracts emitting a call instruction
## for a given function
self.dispatchDelayedPragmas(fn)
if fn.valueType.isBuiltinFunction:
self.handleBuiltinFunction(fn.valueType, args, line)
return
case fn.kind:
of NameKind.Var:
self.identifier(VarDecl(fn.node).name)
of NameKind.Function:
self.emitByte(LoadUInt64, line)
self.emitBytes(self.chunk.writeConstant(fn.codePos.toLong()), line)
else:
discard
if fn.valueType.forwarded:
self.forwarded.add((fn, self.chunk.consts.high() - 7))
self.emitByte(LoadUInt64, line)
self.emitBytes(self.chunk.writeConstant(0.toLong()), line)
let pos = self.chunk.consts.len() - 8
for arg in reversed(args):
self.expression(arg)
# Creates a new call frame and jumps
# to the function's first instruction
# in the code
if not fn.valueType.isClosure:
self.emitByte(Call, line)
else:
self.emitByte(CallClosure, line)
self.emitBytes(args.len().toTriple(), line)
if fn.valueType.isClosure:
self.emitBytes(fn.valueType.envLen.toTriple(), line)
self.patchReturnAddress(pos)
proc specialize(self: Compiler, name: Name, args: seq[Expression]): Name =
## Specializes a generic type by
## instantiating a concrete version
## of it
var mapping: TableRef[string, Type] = newTable[string, Type]()
var kind: Type
result = deepCopy(name)
result.valueType.isGeneric = false
case name.kind:
of NameKind.Function:
# This first loop checks if a user tries to reassign a generic's
# name to a different type
for i, (name, typ) in result.valueType.args:
if typ.kind != Generic:
continue
kind = self.infer(args[i])
if typ.name in mapping and not self.compare(kind, mapping[typ.name]):
self.error(&"expected generic argument '{typ.name}' to be of type {self.typeToStr(mapping[typ.name])}, got {self.typeToStr(kind)} instead")
mapping[typ.name] = kind
result.valueType.args[i].kind = kind
for (argExpr, argName) in zip(args, result.valueType.args):
if self.names.high() > 16777215:
self.error("cannot declare more than 16777215 variables at a time")
self.names.add(Name(depth: name.depth + 1,
isPrivate: true,
owner: self.currentModule,
file: self.file,
isConst: false,
ident: newIdentExpr(Token(lexeme: argName.name)),
valueType: argName.kind,
codePos: 0,
isLet: false,
line: name.line,
belongsTo: result,
kind: NameKind.Argument
))
if result.valueType.returnType.kind == Generic:
result.valueType.returnType = mapping[result.valueType.returnType.name]
else:
discard # TODO: Custom user-defined types
proc callExpr(self: Compiler, node: CallExpr): Name {.discardable.} =
## Compiles code to call a chain of function calls
var args: seq[tuple[name: string, kind: Type]] = @[]
var argExpr: seq[Expression] = @[]
var kind: Type
# TODO: Keyword arguments
var i = node.arguments.positionals.len()
for argument in node.arguments.positionals:
dec(i)
kind = self.infer(argument)
if kind.isNil():
if node.callee.kind != identExpr:
self.error(&"cannot infer the type of argument {i + 1} in call")
else:
self.error(&"cannot infer the type of argument {i + 1} in call to '{node.callee.token.lexeme}'")
args.add(("", kind))
argExpr.add(argument)
case node.callee.kind:
of identExpr:
# Calls like hi()
result = self.matchImpl(IdentExpr(node.callee).name.lexeme, Type(kind: Function, returnType: Type(kind: Any), args: args), node)
if result.valueType.isGeneric:
# We can't instantiate a concrete version
# of a generic function without the types
# of its arguments, so we wait until the
# very last moment to compile it, once
# that info is available to us
result = self.specialize(result, argExpr)
self.funDecl(FunDecl(result.node), result)
# Now we call it
self.generateCall(result, argExpr, node.token.line)
of NodeKind.callExpr:
# Calling a call expression, like hello()()
var node: Expression = node
var all: seq[CallExpr] = @[]
while CallExpr(node).callee.kind == callExpr:
all.add(CallExpr(CallExpr(node).callee))
node = CallExpr(node).callee
for exp in reversed(all):
self.callExpr(exp)
case all[^1].callee.kind:
of identExpr:
let fn = self.resolve(IdentExpr(all[^1].callee))
self.generateCall(fn.valueType.returnType, argExpr, fn.line)
else:
discard # TODO: Lambdas
# TODO: Calling lambdas on-the-fly (i.e. on the same line)
else:
let typ = self.infer(node)
if typ.isNil():
self.error(&"expression has no type")
else:
self.error(&"object of type '{self.typeToStr(typ)}' is not callable")
proc expression(self: Compiler, node: Expression) =
## Compiles all expressions
case node.kind:
of NodeKind.callExpr:
self.callExpr(CallExpr(node)) # TODO
of getItemExpr:
discard # TODO: Get rid of this
of pragmaExpr:
discard # TODO
# Note that for setItem and assign we don't convert
# the node to its true type because that type information
# would be lost in the call anyway. The differentiation
# happens in self.assignment()
of setItemExpr, assignExpr: # TODO: Get rid of this
self.assignment(node)
of identExpr:
self.identifier(IdentExpr(node))
of unaryExpr:
# Unary expressions such as ~5 and -3
self.unary(UnaryExpr(node))
of groupingExpr:
# Grouping expressions like (2 + 1)
self.expression(GroupingExpr(node).expression)
of binaryExpr:
# Binary expressions such as 2 ^ 5 and 0.66 * 3.14
self.binary(BinaryExpr(node))
of intExpr, hexExpr, binExpr, octExpr, strExpr, falseExpr, trueExpr,
infExpr, nanExpr, floatExpr, nilExpr:
# Since all of these AST nodes share the
# same overall structure and the kind
# field is enough to tell one from the
# other, why bother with specialized
# cases when one is enough?
self.literal(node)
else:
self.error(&"invalid AST node of kind {node.kind} at expression(): {node} (This is an internal error and most likely a bug)")
proc awaitStmt(self: Compiler, node: AwaitStmt) =
## Compiles await statements. An await statement
## is like an await expression, but parsed in the
## context of statements for usage outside expressions,
## meaning it can be used standalone. It's basically the
## same as an await expression followed by a semicolon.
## Await expressions and statements are the only native
## construct to run coroutines from within an already
## asynchronous context (which should be orchestrated
## by an event loop). They block in the caller until
## the callee returns
self.expression(node.expression)
self.emitByte(OpCode.Await, node.token.line)
proc deferStmt(self: Compiler, node: DeferStmt) =
## Compiles defer statements. A defer statement
## is executed right before its containing function
## exits (either because of a return or an exception)
var oldChunk = self.chunk
var chunk = newChunk()
chunk.consts = self.chunk.consts
chunk.lines = self.chunk.lines
chunk.cfi = self.chunk.cfi
self.chunk = chunk
self.expression(node.expression)
for b in chunk.code:
self.deferred.add(b)
self.chunk = oldChunk
self.chunk.consts &= chunk.consts
self.chunk.lines &= chunk.lines
self.chunk.cfi &= chunk.cfi
proc returnStmt(self: Compiler, node: ReturnStmt) =
## Compiles return statements
if not node.value.isNil():
self.expression(node.value)
self.emitByte(OpCode.SetResult, node.token.line)
# Since the "set result" part and "exit the function" part
# of our return mechanism are already decoupled into two
# separate opcodes, we perform the former and then jump to
# the function's last return statement, which is always emitted
# by funDecl() at the end of the function's lifecycle, greatly
# simplifying the design, since now there's just one return
# instruction to jump to instead of many potential points
# where the function returns from. Note that depending on whether
# the function has any local variables or not, this jump might be
# patched to jump to the function's PopN/PopC instruction(s) rather
# than straight to the return statement
self.currentFunction.valueType.retJumps.add(self.emitJump(JumpForwards, node.token.line))
proc yieldStmt(self: Compiler, node: YieldStmt) =
## Compiles yield statements
self.expression(node.expression)
self.emitByte(OpCode.Yield, node.token.line)
proc raiseStmt(self: Compiler, node: RaiseStmt) =
## Compiles raise statements
self.expression(node.exception)
self.emitByte(OpCode.Raise, node.token.line)
proc continueStmt(self: Compiler, node: ContinueStmt) =
## Compiles continue statements. A continue statement
## jumps to the next iteration in a loop
if self.currentLoop.start > 16777215:
self.error("too much code to jump over in continue statement")
self.emitByte(Jump, node.token.line)
self.emitBytes(self.currentLoop.start.toTriple(), node.token.line)
proc breakStmt(self: Compiler, node: BreakStmt) =
## Compiles break statements. A break statement
## jumps to the end of the loop
self.currentLoop.breakJumps.add(self.emitJump(OpCode.JumpForwards, node.token.line))
if self.currentLoop.depth > self.depth:
# Breaking out of a loop closes its scope
self.endScope()
proc assertStmt(self: Compiler, node: AssertStmt) =
## Compiles assert statements (raise
## AssertionError if the expression is falsey)
self.expression(node.expression)
self.emitByte(OpCode.Assert, node.token.line)
proc forEachStmt(self: Compiler, node: ForEachStmt) =
## Compiles foreach loops
# TODO
proc importStmt(self: Compiler, node: ImportStmt) =
## Imports a module at compile time
let filename = splitPath(node.moduleName.token.lexeme).tail
try:
self.compileModule(node.moduleName.token.lexeme)
self.declareName(node)
except IOError:
self.error(&"""could not import '{filename}': {getCurrentExceptionMsg()}""")
except OSError:
self.error(&"""could not import '{filename}': {getCurrentExceptionMsg()} [errno {osLastError()}]""")
proc exportStmt(self: Compiler, node: ExportStmt) =
## Exports a name at compile time to
## all modules importing us
var name = self.resolveOrError(node.name)
if name.isPrivate:
self.error("cannot export private names")
name.exported = true
case name.kind:
of NameKind.Module:
# We need to export everything
# this module defines!
for name in self.findByModule(name.ident.token.lexeme):
name.exported = true
of NameKind.Function:
for name in self.findByName(name.ident.token.lexeme):
if name.kind != NameKind.Function:
continue
name.exported = true
else:
discard
proc printRepl(self: Compiler, typ: Type, node: Expression) =
## Emits instruction to print
## peon types in REPL mode
case typ.kind:
of Int64:
self.emitByte(PrintInt64, node.token.line)
of UInt64:
self.emitByte(PrintUInt64, node.token.line)
of Int32:
self.emitByte(PrintInt32, node.token.line)
of UInt32:
self.emitByte(PrintInt32, node.token.line)
of Int16:
self.emitByte(PrintInt16, node.token.line)
of UInt16:
self.emitByte(PrintUInt16, node.token.line)
of Int8:
self.emitByte(PrintInt8, node.token.line)
of UInt8:
self.emitByte(PrintUInt8, node.token.line)
of Float64:
self.emitByte(PrintFloat64, node.token.line)
of Float32:
self.emitByte(PrintFloat32, node.token.line)
of Bool:
self.emitByte(PrintBool, node.token.line)
of Nan:
self.emitByte(PrintNan, node.token.line)
of Inf:
self.emitByte(PrintInf, node.token.line)
of String:
self.emitByte(PrintString, node.token.line)
else:
self.emitByte(PrintHex, node.token.line)
proc statement(self: Compiler, node: Statement) =
## Compiles all statements
case node.kind:
of exprStmt:
let expression = ExprStmt(node).expression
let kind = self.infer(expression)
self.expression(expression)
if kind.isNil():
# The expression has no type and produces no value,
# so we don't have to pop anything
discard
elif self.replMode:
self.printRepl(kind, expression)
else:
self.emitByte(Pop, node.token.line)
of NodeKind.ifStmt:
self.ifStmt(IfStmt(node))
of NodeKind.assertStmt:
self.assertStmt(AssertStmt(node))
of NodeKind.raiseStmt:
self.raiseStmt(RaiseStmt(node))
of NodeKind.breakStmt:
self.breakStmt(BreakStmt(node))
of NodeKind.continueStmt:
self.continueStmt(ContinueStmt(node))
of NodeKind.returnStmt:
self.returnStmt(ReturnStmt(node))
of NodeKind.importStmt:
self.importStmt(ImportStmt(node))
of NodeKind.exportStmt:
self.exportStmt(ExportStmt(node))
of NodeKind.whileStmt:
# Note: Our parser already desugars
# for loops to while loops
let loop = self.currentLoop
self.currentLoop = Loop(start: self.chunk.code.len(),
depth: self.depth, breakJumps: @[])
self.whileStmt(WhileStmt(node))
self.patchBreaks()
self.currentLoop = loop
of NodeKind.forEachStmt:
self.forEachStmt(ForEachStmt(node))
of NodeKind.blockStmt:
self.blockStmt(BlockStmt(node))
of NodeKind.yieldStmt:
self.yieldStmt(YieldStmt(node))
of NodeKind.awaitStmt:
self.awaitStmt(AwaitStmt(node))
of NodeKind.deferStmt:
self.deferStmt(DeferStmt(node))
of NodeKind.tryStmt:
discard
else:
self.expression(Expression(node))
proc varDecl(self: Compiler, node: VarDecl) =
## Compiles variable declarations
# Our parser guarantees that the variable declaration
# will have a type declaration or a value (or both)
var typ: Type
if node.value.isNil():
# Variable has no value: the type declaration
# takes over
typ = self.inferOrError(node.valueType)
elif node.valueType.isNil:
# Variable has no type declaration: the type
# of its value takes over
typ = self.inferOrError(node.value)
else:
# Variable has both a type declaration and
# a value: the value's type must match the
# type declaration
let expected = self.inferOrError(node.valueType)
self.check(node.value, expected)
# If this doesn't fail, then we're good
typ = expected
self.expression(node.value)
self.emitByte(StoreVar, node.token.line)
self.declareName(node)
var name = self.names[^1]
name.valueType = typ
self.emitBytes(self.getStackPos(name).toTriple(), node.token.line)
proc typeDecl(self: Compiler, node: TypeDecl, name: Name) =
## Compiles type declarations
# TODO
proc funDecl(self: Compiler, node: FunDecl, name: Name) =
## Compiles function declarations
if node.token.kind == Operator and node.name.token.lexeme in [".", ]:
self.error(&"Due to current compiler limitations, the '{node.name.token.lexeme}' operator cannot be overridden", node.name)
if name.valueType.isBuiltinFunction:
return
var node = node
var jmp: int
# We store the current function
var function = self.currentFunction
if not node.body.isNil(): # We ignore forward declarations
if not self.currentFunction.isNil():
self.currentFunction.valueType.children.add(name.valueType)
name.valueType.parent = function.valueType
self.currentFunction = name
# A function's code is just compiled linearly
# and then jumped over
jmp = self.emitJump(JumpForwards, node.token.line)
name.codePos = self.chunk.code.len()
# We let our debugger know this function's boundaries
self.chunk.cfi.add(self.chunk.code.high().toTriple())
self.cfiOffsets.add((start: self.chunk.code.high(), stop: 0, pos: self.chunk.cfi.len() - 3, fn: name))
var cfiOffset = self.cfiOffsets[^1]
let idx = self.chunk.cfi.len()
self.chunk.cfi.add(0.toTriple()) # Patched it later
self.chunk.cfi.add(uint8(node.arguments.len()))
if not node.name.isNil():
self.chunk.cfi.add(name.ident.token.lexeme.len().toDouble())
var s = name.ident.token.lexeme
if s.len() >= uint16.high().int:
s = node.name.token.lexeme[0..uint16.high()]
self.chunk.cfi.add(s.toBytes())
else:
self.chunk.cfi.add(0.toDouble())
if BlockStmt(node.body).code.len() == 0:
raise newException(IndexDefect, "")
self.error("cannot declare function with empty body")
# Since the deferred array is a linear
# sequence of instructions and we want
# to keep track to whose function's each
# set of deferred instruction belongs,
# we record the length of the deferred
# array before compiling the function
# and use this info later to compile
# the try/finally block with the deferred
# code
var deferStart = self.deferred.len()
var last: Declaration
self.beginScope()
for decl in BlockStmt(node.body).code:
if not last.isNil():
if last.kind == returnStmt:
self.warning(UnreachableCode, "code after 'return' statement is unreachable")
self.declaration(decl)
last = decl
let typ = self.currentFunction.valueType.returnType
var hasVal: bool = false
case self.currentFunction.valueType.fun.kind:
of NodeKind.funDecl:
hasVal = self.currentFunction.valueType.fun.hasExplicitReturn
of NodeKind.lambdaExpr:
hasVal = LambdaExpr(self.currentFunction.node).hasExplicitReturn
else:
discard # Unreachable
if not hasVal and not typ.isNil():
# There is no explicit return statement anywhere in the function's
# body: while this is not a tremendously useful piece of information
# (since the presence of at least one doesn't mean all control flow
# cases are covered), it definitely is an error worth reporting
self.error("function has an explicit return type, but no return statement was found", node)
hasVal = hasVal and not typ.isNil()
for jump in self.currentFunction.valueType.retJumps:
self.patchJump(jump)
self.endScope()
# Terminates the function's context
self.emitByte(OpCode.Return, self.peek().token.line)
if hasVal:
self.emitByte(1, self.peek().token.line)
else:
self.emitByte(0, self.peek().token.line)
let stop = self.chunk.code.len().toTriple()
self.chunk.cfi[idx] = stop[0]
self.chunk.cfi[idx + 1] = stop[1]
self.chunk.cfi[idx + 2] = stop[2]
cfiOffset.stop = self.chunk.code.len()
# Currently defer is not functional, so we
# just pop the instructions
for _ in deferStart..self.deferred.high():
discard self.deferred.pop()
# Well, we've compiled everything: time to patch
# the jump offset
self.patchJump(jmp)
else:
self.forwarded.add((name, 0))
# Restores the enclosing function (if any).
# Makes nested calls work (including recursion)
self.currentFunction = function
proc declaration(self: Compiler, node: Declaration) =
## Handles all declarations. They are not compiled
## right away, but rather only when they're referenced
## the first time
case node.kind:
of NodeKind.funDecl, NodeKind.typeDecl:
self.declareName(node)
of NodeKind.varDecl:
# We compile this immediately because we
# need to keep the stack in the right state
# at runtime
self.varDecl(VarDecl(node))
else:
self.statement(Statement(node))
proc compile*(self: Compiler, ast: seq[Declaration], file: string, lines: seq[tuple[start, stop: int]], source: string, chunk: Chunk = nil,
incremental: bool = false, isMainModule: bool = true, disabledWarnings: seq[WarningKind] = @[], showMismatches: bool = false,
mode: CompileMode = Debug): Chunk =
## Compiles a sequence of AST nodes into a chunk
## object
if chunk.isNil():
self.chunk = newChunk()
else:
self.chunk = chunk
self.ast = ast
self.file = file
self.depth = 0
self.currentFunction = nil
self.currentModule = self.file.extractFilename().replace(".pn", "")
self.current = 0
self.lines = lines
self.source = source
self.isMainModule = isMainModule
self.disabledWarnings = disabledWarnings
self.showMismatches = showMismatches
self.mode = mode
if not incremental:
self.jumps = @[]
let pos = self.beginProgram()
while not self.done():
self.declaration(Declaration(self.step()))
self.terminateProgram(pos)
# TODO: REPL is broken, we need a new way to make
# incremental compilation resume from where it stopped!
result = self.chunk
proc compileModule(self: Compiler, moduleName: string) =
## Compiles an imported module into an existing chunk
## using the compiler's internal parser and lexer objects
var path = ""
for i, searchPath in moduleLookupPaths:
if searchPath == "":
path = joinPath(getCurrentDir(), joinPath(splitPath(self.file).head, moduleName))
else:
path = joinPath(getCurrentDir(), joinPath(searchPath, moduleName))
if fileExists(path):
break
elif i == searchPath.high():
self.error(&"""could not import '{path}': module not found""")
if self.modules.contains(path):
return
let source = readFile(path)
let current = self.current
let ast = self.ast
let file = self.file
let module = self.currentModule
let lines = self.lines
let src = self.source
self.isMainModule = false
discard self.compile(self.parser.parse(self.lexer.lex(source, path),
path, self.lexer.getLines(),
source, persist=true),
path, self.lexer.getLines(), source, chunk=self.chunk, incremental=true,
isMainModule=false, self.disabledWarnings, self.showMismatches, self.mode)
self.depth = 0
self.current = current
self.ast = ast
self.file = file
self.currentModule = module
self.lines = lines
self.source = src
self.modules.incl(path)