2196 lines
93 KiB
Nim
2196 lines
93 KiB
Nim
# Copyright 2022 Mattia Giambirtone & All Contributors
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
## The code generator for Peon bytecode
|
|
import std/tables
|
|
import std/strformat
|
|
import std/algorithm
|
|
import std/parseutils
|
|
import std/strutils
|
|
import std/sequtils
|
|
import std/sets
|
|
import std/os
|
|
|
|
|
|
import opcodes
|
|
import frontend/compiler/compiler
|
|
import frontend/parsing/lexer
|
|
import frontend/parsing/parser
|
|
import frontend/parsing/ast
|
|
import util/multibyte
|
|
|
|
|
|
export opcodes
|
|
|
|
|
|
type
|
|
|
|
CompilerFunc = object
|
|
## An internal compiler function called
|
|
## by pragmas
|
|
kind: PragmaKind
|
|
handler: proc (self: BytecodeCompiler, pragma: Pragma, name: Name)
|
|
|
|
Loop = object
|
|
## A "loop object" used
|
|
## by the compiler to emit
|
|
## appropriate jump offsets
|
|
## for continue and break
|
|
## statements
|
|
# Position in the bytecode where the loop starts
|
|
start: int
|
|
# Scope depth where the loop is located
|
|
depth: int
|
|
# Jump offsets into our bytecode that we need to
|
|
# patch. Used for break statements
|
|
breakJumps: seq[int]
|
|
|
|
NamedBlock = ref object
|
|
## A "named block object", similar
|
|
## to a loop object. Used to emit
|
|
## appropriate jump offsets
|
|
start: int
|
|
depth: int
|
|
breakJumps: seq[int]
|
|
name: string
|
|
broken: bool
|
|
|
|
BytecodeCompiler* = ref object of Compiler
|
|
## A wrapper around the Peon compiler's state
|
|
|
|
# The bytecode chunk where we write code to
|
|
chunk: Chunk
|
|
# The current loop being compiled (used to
|
|
# keep track of where to jump)
|
|
currentLoop: Loop
|
|
# Stack of named blocks
|
|
namedBlocks: seq[NamedBlock]
|
|
# Compiler procedures called by pragmas
|
|
compilerProcs: TableRef[string, CompilerFunc]
|
|
# Stores the position of all jumps
|
|
jumps: seq[tuple[patched: bool, offset: int]]
|
|
# Metadata about function locations
|
|
functions: seq[tuple[start, stop, pos: int, fn: Name]]
|
|
forwarded: seq[tuple[name: Name, pos: int]]
|
|
# The topmost occupied stack slot
|
|
# in the current frame (0-indexed)
|
|
stackIndex: int
|
|
lambdas: seq[LambdaExpr]
|
|
|
|
|
|
# Forward declarations
|
|
proc compile*(self: BytecodeCompiler, ast: seq[Declaration], file: string, lines: seq[tuple[start, stop: int]], source: string, chunk: Chunk = nil,
|
|
incremental: bool = false, isMainModule: bool = true, disabledWarnings: seq[WarningKind] = @[], showMismatches: bool = false,
|
|
mode: CompileMode = Debug): Chunk
|
|
proc statement(self: BytecodeCompiler, node: Statement)
|
|
proc declaration(self: BytecodeCompiler, node: Declaration)
|
|
proc varDecl(self: BytecodeCompiler, node: VarDecl)
|
|
proc specialize(self: BytecodeCompiler, typ: Type, args: seq[Expression]): Type {.discardable.}
|
|
proc patchReturnAddress(self: BytecodeCompiler, pos: int)
|
|
proc handleMagicPragma(self: BytecodeCompiler, pragma: Pragma, name: Name)
|
|
proc handlePurePragma(self: BytecodeCompiler, pragma: Pragma, name: Name)
|
|
proc handleErrorPragma(self: BytecodeCompiler, pragma: Pragma, name: Name)
|
|
method dispatchPragmas(self: BytecodeCompiler, name: Name)
|
|
method dispatchDelayedPragmas(self: BytecodeCompiler, name: Name)
|
|
proc funDecl(self: BytecodeCompiler, node: FunDecl, name: Name)
|
|
proc compileModule(self: BytecodeCompiler, module: Name)
|
|
proc generateCall(self: BytecodeCompiler, fn: Name, args: seq[Expression], line: int)
|
|
# End of forward declarations
|
|
|
|
|
|
proc newBytecodeCompiler*(replMode: bool = false): BytecodeCompiler =
|
|
## Initializes a new BytecodeCompiler object
|
|
new(result)
|
|
result.ast = @[]
|
|
result.current = 0
|
|
result.file = ""
|
|
result.names = @[]
|
|
result.depth = 0
|
|
result.lines = @[]
|
|
result.jumps = @[]
|
|
result.modules = newTable[string, Name]()
|
|
result.lambdas = @[]
|
|
result.currentFunction = nil
|
|
result.replMode = replMode
|
|
result.currentModule = nil
|
|
result.compilerProcs = newTable[string, CompilerFunc]()
|
|
result.compilerProcs["magic"] = CompilerFunc(kind: Immediate, handler: handleMagicPragma)
|
|
result.compilerProcs["pure"] = CompilerFunc(kind: Immediate, handler: handlePurePragma)
|
|
result.compilerProcs["error"] = CompilerFunc(kind: Delayed, handler: handleErrorPragma)
|
|
result.source = ""
|
|
result.lexer = newLexer()
|
|
result.lexer.fillSymbolTable()
|
|
result.parser = newParser()
|
|
result.isMainModule = false
|
|
result.forwarded = @[]
|
|
result.disabledWarnings = @[]
|
|
result.functions = @[]
|
|
result.stackIndex = 1
|
|
|
|
## Low-level code generation helpers
|
|
|
|
proc emitByte(self: BytecodeCompiler, byt: OpCode | uint8, line: int) {.inline.} =
|
|
## Emits a single byte, writing it to
|
|
## the current chunk being compiled
|
|
self.chunk.write(uint8 byt, line)
|
|
|
|
|
|
proc emitBytes(self: BytecodeCompiler, bytarr: openarray[OpCode | uint8], line: int) {.inline.} =
|
|
## Handy helper method to write arbitrary bytes into
|
|
## the current chunk, calling emitByte on each of its
|
|
## elements
|
|
for b in bytarr:
|
|
self.emitByte(b, line)
|
|
|
|
|
|
proc printRepl(self: BytecodeCompiler, typ: Type, node: Expression) =
|
|
## Emits instruction to print
|
|
## peon types in REPL mode
|
|
case typ.kind:
|
|
of Int64:
|
|
self.emitByte(PrintInt64, node.token.line)
|
|
of UInt64:
|
|
self.emitByte(PrintUInt64, node.token.line)
|
|
of Int32:
|
|
self.emitByte(PrintInt32, node.token.line)
|
|
of UInt32:
|
|
self.emitByte(PrintInt32, node.token.line)
|
|
of Int16:
|
|
self.emitByte(PrintInt16, node.token.line)
|
|
of UInt16:
|
|
self.emitByte(PrintUInt16, node.token.line)
|
|
of Int8:
|
|
self.emitByte(PrintInt8, node.token.line)
|
|
of UInt8:
|
|
self.emitByte(PrintUInt8, node.token.line)
|
|
of Float64:
|
|
self.emitByte(PrintFloat64, node.token.line)
|
|
of Float32:
|
|
self.emitByte(PrintFloat32, node.token.line)
|
|
of Bool:
|
|
self.emitByte(PrintBool, node.token.line)
|
|
of TypeKind.Nan:
|
|
self.emitByte(PrintNan, node.token.line)
|
|
of TypeKind.Inf:
|
|
self.emitByte(PrintInf, node.token.line)
|
|
of TypeKind.String:
|
|
self.emitByte(PrintString, node.token.line)
|
|
else:
|
|
self.emitByte(PrintHex, node.token.line)
|
|
|
|
|
|
proc makeConstant(self: BytecodeCompiler, val: Expression, typ: Type): array[3, uint8] =
|
|
## Adds a constant to the current chunk's constant table
|
|
## and returns its index as a 3-byte array of uint8s
|
|
var lit: string
|
|
if typ.kind in [UInt8, Int8, Int16, UInt16, Int32, UInt32, Int64, UInt64]:
|
|
lit = val.token.lexeme
|
|
if "'" in lit:
|
|
var idx = lit.high()
|
|
while lit[idx] != '\'':
|
|
lit = lit[0..^2]
|
|
dec(idx)
|
|
lit = lit[0..^2]
|
|
case typ.kind:
|
|
of UInt8, Int8:
|
|
result = self.chunk.writeConstant([uint8(parseInt(lit))])
|
|
of Int16, UInt16:
|
|
result = self.chunk.writeConstant(parseInt(lit).toDouble())
|
|
of Int32, UInt32:
|
|
result = self.chunk.writeConstant(parseInt(lit).toQuad())
|
|
of Int64:
|
|
result = self.chunk.writeConstant(parseInt(lit).toLong())
|
|
of UInt64:
|
|
result = self.chunk.writeConstant(parseBiggestUInt(lit).toLong())
|
|
of String:
|
|
result = self.chunk.writeConstant(val.token.lexeme[1..^1].toBytes())
|
|
of Float32:
|
|
var f: float = 0.0
|
|
discard parseFloat(val.token.lexeme, f)
|
|
result = self.chunk.writeConstant(cast[array[4, uint8]](float32(f)))
|
|
of Float64:
|
|
var f: float = 0.0
|
|
discard parseFloat(val.token.lexeme, f)
|
|
result = self.chunk.writeConstant(cast[array[8, uint8]](f))
|
|
else:
|
|
discard
|
|
|
|
|
|
proc emitConstant(self: BytecodeCompiler, obj: Expression, kind: Type) =
|
|
## Emits a constant instruction along
|
|
## with its operand
|
|
case kind.kind:
|
|
of Int64:
|
|
self.emitByte(LoadInt64, obj.token.line)
|
|
of UInt64:
|
|
self.emitByte(LoadUInt64, obj.token.line)
|
|
of Int32:
|
|
self.emitByte(LoadInt32, obj.token.line)
|
|
of UInt32:
|
|
self.emitByte(LoadUInt32, obj.token.line)
|
|
of Int16:
|
|
self.emitByte(LoadInt16, obj.token.line)
|
|
of UInt16:
|
|
self.emitByte(LoadUInt16, obj.token.line)
|
|
of Int8:
|
|
self.emitByte(LoadInt8, obj.token.line)
|
|
of UInt8:
|
|
self.emitByte(LoadUInt8, obj.token.line)
|
|
of String:
|
|
self.emitByte(LoadString, obj.token.line)
|
|
let str = LiteralExpr(obj).literal.lexeme
|
|
if str.len() >= 16777216:
|
|
self.error("string constants cannot be larger than 16777215 bytes")
|
|
self.emitBytes((str.len() - 2).toTriple(), obj.token.line)
|
|
of Float32:
|
|
self.emitByte(LoadFloat32, obj.token.line)
|
|
of Float64:
|
|
self.emitByte(LoadFloat64, obj.token.line)
|
|
else:
|
|
discard # TODO
|
|
self.emitBytes(self.makeConstant(obj, kind), obj.token.line)
|
|
|
|
|
|
proc setJump(self: BytecodeCompiler, offset: int, jmp: array[3, uint8]) =
|
|
## Sets a jump at the given
|
|
## offset to the given value
|
|
self.chunk.code[offset + 1] = jmp[0]
|
|
self.chunk.code[offset + 2] = jmp[1]
|
|
self.chunk.code[offset + 3] = jmp[2]
|
|
|
|
|
|
proc setJump(self: BytecodeCompiler, offset: int, jmp: seq[uint8]) =
|
|
## Sets a jump at the given
|
|
## offset to the given value
|
|
self.chunk.code[offset + 1] = jmp[0]
|
|
self.chunk.code[offset + 2] = jmp[1]
|
|
self.chunk.code[offset + 3] = jmp[2]
|
|
|
|
|
|
proc emitJump(self: BytecodeCompiler, opcode: OpCode, line: int): int =
|
|
## Emits a dummy jump offset to be patched later
|
|
## and returns a unique identifier for that jump
|
|
## to be passed to patchJump
|
|
self.emitByte(opcode, line)
|
|
self.jumps.add((patched: false, offset: self.chunk.code.high()))
|
|
self.emitBytes(0.toTriple(), line)
|
|
result = self.jumps.high()
|
|
|
|
|
|
proc fixFunctionOffsets(self: BytecodeCompiler, where, oldLen: int) =
|
|
## Fixes function offsets after the size of our
|
|
## bytecode has changed
|
|
if oldLen == self.chunk.code.len():
|
|
return
|
|
let offset = self.chunk.code.len() - oldLen
|
|
var newOffset: array[3, uint8]
|
|
var tmp: int
|
|
var i = 0
|
|
for function in self.functions.mitems():
|
|
if function.start >= where:
|
|
newOffset = (function.start + offset).toTriple()
|
|
self.chunk.functions[function.pos] = newOffset[0]
|
|
self.chunk.functions[function.pos + 1] = newOffset[1]
|
|
self.chunk.functions[function.pos + 2] = newOffset[2]
|
|
tmp = [self.chunk.functions[function.pos + 3], self.chunk.functions[function.pos + 4], self.chunk.functions[function.pos + 5]].fromTriple().int
|
|
newOffset = (tmp + offset).toTriple()
|
|
self.chunk.functions[function.pos + 3] = newOffset[0]
|
|
self.chunk.functions[function.pos + 4] = newOffset[1]
|
|
self.chunk.functions[function.pos + 5] = newOffset[2]
|
|
function.start += offset
|
|
function.stop += offset
|
|
inc(i)
|
|
|
|
|
|
proc fixJumps(self: BytecodeCompiler, where, oldLen: int) =
|
|
## Fixes jump offsets after the size
|
|
## of our bytecode has changed
|
|
if oldLen == self.chunk.code.len():
|
|
return
|
|
let offset = self.chunk.code.len() - oldLen
|
|
for jump in self.jumps.mitems():
|
|
if jump.offset >= where:
|
|
# While all already-patched jumps need
|
|
# to have their jump offsets fixed, we
|
|
# also need to update our internal jumps
|
|
# list in cases where we shifted the jump
|
|
# instruction itself into the code!
|
|
jump.offset += offset
|
|
self.setJump(jump.offset, self.chunk.code[jump.offset + 1..jump.offset + 3])
|
|
|
|
|
|
proc fixLines(self: BytecodeCompiler, where, count: int, added: bool = true) =
|
|
## Fixes the line metadatata of our
|
|
## bytecode chunk after the size of
|
|
## the code segment has changed. The
|
|
## "count" argument represents how
|
|
## many bytes were added or deleted
|
|
## from the code and the "added" argument
|
|
## tells fixLines that either count
|
|
## instructions were injected (added = true,
|
|
## the default) or that count instructions
|
|
## were removed (added = false). The where
|
|
## argument is the position where the code
|
|
## change was performed
|
|
if added:
|
|
# We don't do any bounds checking here because I doubt
|
|
# there's ever going to be even close to int.high()
|
|
# instructions on a line :P
|
|
inc(self.chunk.lines[self.chunk.getIdx(self.chunk.getLine(where)) + 1], count)
|
|
else:
|
|
if self.chunk.lines[self.chunk.getIdx(self.chunk.getLine(where)) + 1] > 0:
|
|
dec(self.chunk.lines[self.chunk.getIdx(self.chunk.getLine(where)) + 1], count)
|
|
|
|
|
|
proc fixNames(self: BytecodeCompiler, where, oldLen: int) =
|
|
## Fixes the codePos field of our name objects
|
|
## after the size of the bytecode has changed
|
|
let offset = self.chunk.code.len() - oldLen
|
|
for name in self.names:
|
|
if name.codePos > where:
|
|
name.codePos += offset
|
|
if name.valueType.kind == Function:
|
|
name.valueType.location += offset
|
|
|
|
|
|
proc insertAt(self: BytecodeCompiler, where: int, opcode: OpCode, data: openarray[uint8]): int {.used.} =
|
|
## Inserts the given instruction into the
|
|
## chunk's code segment and updates internal
|
|
## metadata to reflect this change. Returns
|
|
## the new location where the code was added
|
|
## plus one (useful for consecutive calls)
|
|
result = where
|
|
let oldLen = self.chunk.code.len()
|
|
self.chunk.code.insert(uint8(opcode), where)
|
|
inc(result)
|
|
for i, item in data:
|
|
self.chunk.code.insert(item, where + i + 1)
|
|
inc(result)
|
|
# Changing the size of our code segment forces us
|
|
# to update all metadata that refers to a position
|
|
# into it
|
|
self.fixJumps(where, oldLen)
|
|
self.fixLines(where, self.chunk.code.len() - oldLen, true)
|
|
self.fixNames(where, oldLen)
|
|
self.fixFunctionOffsets(oldLen, where)
|
|
|
|
|
|
|
|
proc patchJump(self: BytecodeCompiler, offset: int) =
|
|
## Patches a previously emitted relative
|
|
## jump using emitJump
|
|
var jump: int = self.chunk.code.len() - self.jumps[offset].offset
|
|
if jump < 0:
|
|
self.error("jump size cannot be negative (This is an internal error and most likely a bug)")
|
|
if jump > 16777215:
|
|
# TODO: Emit consecutive jumps using insertAt
|
|
self.error("cannot jump more than 16777215 instructions")
|
|
if jump > 0:
|
|
self.setJump(self.jumps[offset].offset, (jump - 4).toTriple())
|
|
self.jumps[offset].patched = true
|
|
|
|
|
|
proc handleBuiltinFunction(self: BytecodeCompiler, fn: Type, args: seq[Expression], line: int) =
|
|
## Emits instructions for builtin functions
|
|
## such as addition or subtraction
|
|
if fn.builtinOp notin ["LogicalOr", "LogicalAnd"]:
|
|
if len(args) == 2:
|
|
self.expression(args[1])
|
|
self.expression(args[0])
|
|
elif len(args) == 1:
|
|
self.expression(args[0])
|
|
const codes: Table[string, OpCode] = {"Negate": Negate,
|
|
"NegateFloat32": NegateFloat32,
|
|
"NegateFloat64": NegateFloat64,
|
|
"Add": Add,
|
|
"Subtract": Subtract,
|
|
"Divide": Divide,
|
|
"Multiply": Multiply,
|
|
"SignedDivide": SignedDivide,
|
|
"AddFloat64": AddFloat64,
|
|
"SubtractFloat64": SubtractFloat64,
|
|
"DivideFloat64": DivideFloat64,
|
|
"MultiplyFloat64": MultiplyFloat64,
|
|
"AddFloat32": AddFloat32,
|
|
"SubtractFloat32": SubtractFloat32,
|
|
"DivideFloat32": DivideFloat32,
|
|
"MultiplyFloat32": MultiplyFloat32,
|
|
"Pow": Pow,
|
|
"SignedPow": SignedPow,
|
|
"PowFloat32": PowFloat32,
|
|
"PowFloat64": PowFloat64,
|
|
"Mod": Mod,
|
|
"SignedMod": SignedMod,
|
|
"ModFloat32": ModFloat32,
|
|
"ModFloat64": ModFloat64,
|
|
"Or": Or,
|
|
"And": And,
|
|
"Xor": Xor,
|
|
"Not": Not,
|
|
"LShift": LShift,
|
|
"RShift": RShift,
|
|
"Equal": Equal,
|
|
"NotEqual": NotEqual,
|
|
"LessThan": LessThan,
|
|
"GreaterThan": GreaterThan,
|
|
"LessOrEqual": LessOrEqual,
|
|
"GreaterOrEqual": GreaterOrEqual,
|
|
"SignedLessThan": SignedLessThan,
|
|
"SignedGreaterThan": SignedGreaterThan,
|
|
"SignedLessOrEqual": SignedLessOrEqual,
|
|
"SignedGreaterOrEqual": SignedGreaterOrEqual,
|
|
"Float32LessThan": Float32LessThan,
|
|
"Float32GreaterThan": Float32GreaterThan,
|
|
"Float32LessOrEqual": Float32LessOrEqual,
|
|
"Float32GreaterOrEqual": Float32GreaterOrEqual,
|
|
"Float64LessThan": Float64LessThan,
|
|
"Float64GreaterThan": Float64GreaterThan,
|
|
"Float64LessOrEqual": Float64LessOrEqual,
|
|
"Float64GreaterOrEqual": Float64GreaterOrEqual,
|
|
"PrintString": PrintString,
|
|
"SysClock64": SysClock64,
|
|
"LogicalNot": LogicalNot,
|
|
"NegInf": LoadNInf,
|
|
"Identity": Identity
|
|
}.to_table()
|
|
if fn.builtinOp == "print":
|
|
var typ = self.inferOrError(args[0]).unwrap()
|
|
case typ.kind:
|
|
of Int64:
|
|
self.emitByte(PrintInt64, line)
|
|
of Int32:
|
|
self.emitByte(PrintInt32, line)
|
|
of Int16:
|
|
self.emitByte(PrintInt16, line)
|
|
of Int8:
|
|
self.emitByte(PrintInt8, line)
|
|
of UInt64:
|
|
self.emitByte(PrintUInt64, line)
|
|
of UInt32:
|
|
self.emitByte(PrintUInt32, line)
|
|
of UInt16:
|
|
self.emitByte(PrintUInt16, line)
|
|
of UInt8:
|
|
self.emitByte(PrintUInt8, line)
|
|
of Float64:
|
|
self.emitByte(PrintFloat64, line)
|
|
of Float32:
|
|
self.emitByte(PrintFloat32, line)
|
|
of String:
|
|
self.emitByte(PrintString, line)
|
|
of Bool:
|
|
self.emitByte(PrintBool, line)
|
|
of TypeKind.Nan:
|
|
self.emitByte(PrintNan, line)
|
|
of TypeKind.Inf:
|
|
self.emitByte(PrintInf, line)
|
|
of Function:
|
|
self.emitByte(LoadString, line)
|
|
var loc: string = typ.location.toHex()
|
|
while loc[0] == '0' and loc.len() > 1:
|
|
loc = loc[1..^1]
|
|
var str: string
|
|
if typ.isLambda:
|
|
str = &"anonymous function at 0x{loc}"
|
|
else:
|
|
str = &"function '{FunDecl(typ.fun).name.token.lexeme}' at 0x{loc}"
|
|
self.emitBytes(str.len().toTriple(), line)
|
|
self.emitBytes(self.chunk.writeConstant(str.toBytes()), line)
|
|
self.emitByte(PrintString, line)
|
|
else:
|
|
self.error(&"invalid type {self.stringify(typ)} for built-in 'print'", args[0])
|
|
return
|
|
if fn.builtinOp in codes:
|
|
self.emitByte(codes[fn.builtinOp], line)
|
|
return
|
|
# Some builtin operations are slightly more complex
|
|
# so we handle them separately
|
|
case fn.builtinOp:
|
|
of "LogicalOr":
|
|
self.expression(args[0])
|
|
let jump = self.emitJump(JumpIfTrue, line)
|
|
self.expression(args[1])
|
|
self.patchJump(jump)
|
|
of "LogicalAnd":
|
|
self.expression(args[0])
|
|
let jump = self.emitJump(JumpIfFalseOrPop, line)
|
|
self.expression(args[1])
|
|
self.patchJump(jump)
|
|
of "cast":
|
|
# Type casts are a merely compile-time construct:
|
|
# they don't produce any code at runtime because
|
|
# the underlying data representation does not change!
|
|
# The only reason why there's a "cast" pragma is to
|
|
# make it so that the peon stub can have no body
|
|
discard
|
|
else:
|
|
self.error(&"unknown built-in: '{fn.builtinOp}'", fn.fun)
|
|
|
|
|
|
proc patchForwardDeclarations(self: BytecodeCompiler) =
|
|
## Patches forward declarations and looks
|
|
## for their implementations so that calls
|
|
## to them work properly
|
|
var impl: Name
|
|
var pos: array[8, uint8]
|
|
for (forwarded, position) in self.forwarded:
|
|
impl = self.match(forwarded.ident.token.lexeme, forwarded.valueType, allowFwd=false)
|
|
if forwarded.isPrivate != impl.isPrivate:
|
|
self.error(&"implementation of '{impl.ident.token.lexeme}' has a mismatching visibility modifier from its forward declaration", impl.ident)
|
|
if position == 0:
|
|
# Forward declaration created by funDecl (it's
|
|
# necessary to make sure that there's no unimplemented
|
|
# forward declarations)
|
|
continue
|
|
pos = impl.codePos.toLong()
|
|
self.chunk.consts[position] = pos[0]
|
|
self.chunk.consts[position + 1] = pos[1]
|
|
self.chunk.consts[position + 2] = pos[2]
|
|
self.chunk.consts[position + 3] = pos[3]
|
|
self.chunk.consts[position + 4] = pos[4]
|
|
self.chunk.consts[position + 5] = pos[5]
|
|
self.chunk.consts[position + 6] = pos[6]
|
|
self.chunk.consts[position + 7] = pos[7]
|
|
|
|
|
|
proc endScope(self: BytecodeCompiler) =
|
|
## Ends the current local scope
|
|
if self.depth < 0:
|
|
self.error("cannot call endScope with depth < 0 (This is an internal error and most likely a bug)")
|
|
dec(self.depth)
|
|
# We keep track both of which names are going out of scope
|
|
# and how many actually need to be popped off the call stack
|
|
# at runtime (since only variables and function arguments
|
|
# actually materialize at runtime)
|
|
var names: seq[Name] = @[]
|
|
var popCount = 0
|
|
for name in self.names:
|
|
if self.replMode and name.depth == 0:
|
|
continue
|
|
# We only pop names in scopes deeper than ours
|
|
if name.depth > self.depth:
|
|
if name.depth == 0 and not self.isMainModule:
|
|
# Global names coming from other modules only go out of scope
|
|
# when the global scope of the main module is closed (i.e. at
|
|
# the end of the whole program)
|
|
continue
|
|
names.add(name)
|
|
# Now we have to actually emit the pop instructions. First
|
|
# off, we skip the names that will not exist at runtime,
|
|
# because there's no need to emit any instructions to pop them
|
|
# (we still remove them from the name list later so they can't
|
|
# be referenced anymore, of course)
|
|
if name.kind notin [NameKind.Var, NameKind.Argument]:
|
|
continue
|
|
elif name.kind == NameKind.Argument and not name.belongsTo.isNil():
|
|
if name.belongsTo.valueType.isBuiltin:
|
|
# Arguments to builtin functions become temporaries on the
|
|
# stack and are popped automatically
|
|
continue
|
|
if name.belongsTo.valueType.isAuto:
|
|
# Automatic functions do not materialize
|
|
# at runtime, so their arguments don't either
|
|
continue
|
|
# This name has been generated internally by the
|
|
# compiler and is a copy of an already existing
|
|
# one, so we only need to pop its "real" counterpart
|
|
if not name.isReal:
|
|
continue
|
|
inc(popCount)
|
|
if not name.resolved:
|
|
# We emit warnings for names that are declared but never used
|
|
case name.kind:
|
|
of NameKind.Var:
|
|
if not name.ident.token.lexeme.startsWith("_") and name.isPrivate:
|
|
self.warning(UnusedName, &"'{name.ident.token.lexeme}' is declared but not used (add '_' prefix to silence warning)", name)
|
|
of NameKind.Argument:
|
|
if not name.ident.token.lexeme.startsWith("_") and name.isPrivate:
|
|
if not name.belongsTo.isNil() and not name.belongsTo.valueType.isBuiltin and name.belongsTo.isReal and name.belongsTo.resolved:
|
|
# Builtin functions never use their arguments. We also don't emit this
|
|
# warning if the function was generated internally by the compiler (for
|
|
# example as a result of generic specialization) because such objects do
|
|
# not exist in the user's code and are likely duplicated anyway
|
|
self.warning(UnusedName, &"argument '{name.ident.token.lexeme}' is unused (add '_' prefix to silence warning)", name)
|
|
else:
|
|
discard
|
|
dec(self.stackIndex, popCount)
|
|
if popCount > 1:
|
|
# If we're popping more than one variable,
|
|
# we emit a bunch of PopN instructions until
|
|
# the pop count is greater than zero
|
|
while popCount > 0:
|
|
self.emitByte(PopN, self.peek().token.line)
|
|
self.emitBytes(popCount.toDouble(), self.peek().token.line)
|
|
popCount -= popCount.toDouble().fromDouble().int
|
|
elif popCount == 1:
|
|
# We only emit PopN if we're popping more than one value
|
|
self.emitByte(PopC, self.peek().token.line)
|
|
# This seems *really* slow, but
|
|
# what else should I do? Nim doesn't
|
|
# allow the removal of items during
|
|
# seq iteration so ¯\_(ツ)_/¯
|
|
var idx = 0
|
|
while idx < self.names.len():
|
|
for name in names:
|
|
if self.names[idx] == name:
|
|
self.names.delete(idx)
|
|
inc(idx)
|
|
|
|
|
|
proc emitLoop(self: BytecodeCompiler, begin: int, line: int) =
|
|
## Emits a JumpBackwards instruction with the correct
|
|
## jump offset
|
|
let offset = self.chunk.code.high() - begin + 4
|
|
if offset > 16777215:
|
|
# TODO: Emit consecutive jumps?
|
|
self.error("cannot jump more than 16777215 bytecode instructions")
|
|
self.emitByte(JumpBackwards, line)
|
|
self.emitBytes(offset.toTriple(), line)
|
|
|
|
|
|
proc patchBreaks(self: BytecodeCompiler) =
|
|
## Patches the jumps emitted by
|
|
## breakStmt. This is needed
|
|
## because the size of code
|
|
## to skip is not known before
|
|
## the loop is fully compiled
|
|
for brk in self.currentLoop.breakJumps:
|
|
self.patchJump(brk)
|
|
for blk in self.namedBlocks:
|
|
for brk in blk.breakJumps:
|
|
self.patchJump(brk)
|
|
|
|
|
|
proc handleMagicPragma(self: BytecodeCompiler, pragma: Pragma, name: Name) =
|
|
## Handles the "magic" pragma. Assumes the given name is already
|
|
## declared
|
|
if pragma.args.len() != 1:
|
|
self.error(&"'magic' pragma: wrong number of arguments (expected 1, got {len(pragma.args)})")
|
|
elif pragma.args[0].kind != strExpr:
|
|
self.error(&"'magic' pragma: wrong argument type (constant string expected, got {self.stringify(self.inferOrError(pragma.args[0]))})")
|
|
elif name.node.kind == NodeKind.funDecl:
|
|
name.valueType.isBuiltin = true
|
|
name.valueType.builtinOp = pragma.args[0].token.lexeme[1..^2]
|
|
name.valueType.compiled = true
|
|
elif name.node.kind == NodeKind.typeDecl:
|
|
name.valueType = pragma.args[0].token.lexeme[1..^2].toIntrinsic()
|
|
if name.valueType.kind == All:
|
|
self.error("don't even think about it (compiler-chan is angry at you :/)", pragma)
|
|
if name.valueType.isNil():
|
|
self.error("'magic' pragma: wrong argument value", pragma.args[0])
|
|
name.valueType.isBuiltin = true
|
|
else:
|
|
self.error("'magic' pragma is not valid in this context")
|
|
|
|
|
|
proc handleErrorPragma(self: BytecodeCompiler, pragma: Pragma, name: Name) =
|
|
## Handles the "error" pragma
|
|
if pragma.args.len() != 1:
|
|
self.error("'error' pragma: wrong number of arguments")
|
|
elif pragma.args[0].kind != strExpr:
|
|
self.error("'error' pragma: wrong type of argument (constant string expected)")
|
|
elif not name.isNil() and name.node.kind != NodeKind.funDecl:
|
|
self.error("'error' pragma is not valid in this context")
|
|
self.error(pragma.args[0].token.lexeme[1..^2])
|
|
|
|
|
|
proc handlePurePragma(self: BytecodeCompiler, pragma: Pragma, name: Name) =
|
|
## Handles the "pure" pragma
|
|
case name.node.kind:
|
|
of NodeKind.funDecl:
|
|
FunDecl(name.node).isPure = true
|
|
of NodeKind.lambdaExpr:
|
|
LambdaExpr(name.node).isPure = true
|
|
else:
|
|
self.error("'pure' pragma is not valid in this context")
|
|
|
|
|
|
method dispatchPragmas(self: BytecodeCompiler, name: Name) =
|
|
## Dispatches pragmas bound to objects
|
|
if name.node.isNil():
|
|
return
|
|
var pragmas: seq[Pragma] = @[]
|
|
case name.node.kind:
|
|
of NodeKind.funDecl, NodeKind.typeDecl, NodeKind.varDecl:
|
|
pragmas = Declaration(name.node).pragmas
|
|
of NodeKind.lambdaExpr:
|
|
pragmas = LambdaExpr(name.node).pragmas
|
|
else:
|
|
discard # Unreachable
|
|
var f: CompilerFunc
|
|
for pragma in pragmas:
|
|
if pragma.name.token.lexeme notin self.compilerProcs:
|
|
self.error(&"unknown pragma '{pragma.name.token.lexeme}'")
|
|
f = self.compilerProcs[pragma.name.token.lexeme]
|
|
if f.kind != Immediate:
|
|
continue
|
|
f.handler(self, pragma, name)
|
|
|
|
|
|
method dispatchDelayedPragmas(self: BytecodeCompiler, name: Name) =
|
|
## Dispatches pragmas bound to objects once they
|
|
## are called. Only applies to functions
|
|
if name.node.isNil():
|
|
return
|
|
var pragmas: seq[Pragma] = @[]
|
|
pragmas = Declaration(name.node).pragmas
|
|
var f: CompilerFunc
|
|
for pragma in pragmas:
|
|
if pragma.name.token.lexeme notin self.compilerProcs:
|
|
self.error(&"unknown pragma '{pragma.name.token.lexeme}'")
|
|
f = self.compilerProcs[pragma.name.token.lexeme]
|
|
if f.kind == Immediate:
|
|
continue
|
|
f.handler(self, pragma, name)
|
|
|
|
|
|
proc patchReturnAddress(self: BytecodeCompiler, pos: int) =
|
|
## Patches the return address of a function
|
|
## call
|
|
let address = self.chunk.code.len().toLong()
|
|
self.chunk.consts[pos] = address[0]
|
|
self.chunk.consts[pos + 1] = address[1]
|
|
self.chunk.consts[pos + 2] = address[2]
|
|
self.chunk.consts[pos + 3] = address[3]
|
|
self.chunk.consts[pos + 4] = address[4]
|
|
self.chunk.consts[pos + 5] = address[5]
|
|
self.chunk.consts[pos + 6] = address[6]
|
|
self.chunk.consts[pos + 7] = address[7]
|
|
|
|
|
|
proc generateCall(self: BytecodeCompiler, fn: Type, args: seq[Expression], line: int) {.used.} =
|
|
## Version of generateCall that takes Type objects
|
|
## instead of Name objects (used for lambdas and
|
|
## consequent calls). The function's address is
|
|
## assumed to be on the stack
|
|
if fn.isBuiltin:
|
|
self.handleBuiltinFunction(fn, args, line)
|
|
return
|
|
self.emitByte(LoadUInt64, line)
|
|
self.emitBytes(self.chunk.writeConstant(0.toLong()), line)
|
|
let pos = self.chunk.consts.len() - 8
|
|
for i, argument in reversed(args):
|
|
# We pass the arguments in reverse
|
|
# because of how stacks work. They'll
|
|
# be reversed again at runtime
|
|
self.check(argument, fn.args[^(i + 1)].kind)
|
|
self.expression(argument)
|
|
# Creates a new call frame and jumps
|
|
# to the function's first instruction
|
|
# in the code
|
|
self.emitByte(Call, line)
|
|
self.emitBytes(args.len().toTriple(), line)
|
|
self.patchReturnAddress(pos)
|
|
|
|
|
|
method prepareFunction(self: BytecodeCompiler, fn: Name) =
|
|
## "Prepares" a function declaration by declaring
|
|
## its arguments and typechecking it
|
|
|
|
# First we declare the function's generics, if it has any
|
|
var constraints: seq[tuple[match: bool, kind: Type]] = @[]
|
|
for gen in fn.node.generics:
|
|
self.unpackTypes(gen.cond, constraints)
|
|
self.names.add(Name(depth: fn.depth + 1,
|
|
isPrivate: true,
|
|
valueType: Type(kind: Generic, name: gen.name.token.lexeme, cond: constraints),
|
|
codePos: 0,
|
|
isLet: false,
|
|
line: fn.node.token.line,
|
|
belongsTo: fn,
|
|
ident: gen.name,
|
|
owner: self.currentModule,
|
|
file: self.file))
|
|
constraints = @[]
|
|
# We now declare and typecheck the function's
|
|
# arguments
|
|
let idx = self.stackIndex
|
|
self.stackIndex = 1
|
|
var default: Expression
|
|
let node = FunDecl(fn.node)
|
|
var i = 0
|
|
var typ: Type
|
|
for argument in node.arguments:
|
|
if self.names.high() > 16777215:
|
|
self.error("cannot declare more than 16777215 variables at a time")
|
|
inc(self.stackIndex)
|
|
typ = self.inferOrError(argument.valueType)
|
|
# We can't use self.compare(), because it would
|
|
# always just return true
|
|
if typ.kind == Auto:
|
|
fn.valueType.isAuto = true
|
|
# Magic trick! We turn auto into any, just
|
|
# to make our lives easier
|
|
typ = "any".toIntrinsic()
|
|
self.names.add(Name(depth: fn.depth + 1,
|
|
isPrivate: true,
|
|
owner: fn.owner,
|
|
file: fn.file,
|
|
isConst: false,
|
|
ident: argument.name,
|
|
valueType: typ,
|
|
codePos: 0,
|
|
isLet: false,
|
|
line: argument.name.token.line,
|
|
belongsTo: fn,
|
|
kind: NameKind.Argument,
|
|
node: argument.name,
|
|
position: self.stackIndex,
|
|
isReal: true
|
|
))
|
|
if node.arguments.high() - node.defaults.high() <= node.arguments.high():
|
|
# There's a default argument!
|
|
fn.valueType.args.add((self.names[^1].ident.token.lexeme, typ, node.defaults[i]))
|
|
inc(i)
|
|
else:
|
|
# This argument has no default
|
|
fn.valueType.args.add((self.names[^1].ident.token.lexeme, typ, default))
|
|
# The function needs a return type too!
|
|
if not node.returnType.isNil():
|
|
fn.valueType.returnType = self.inferOrError(node.returnType)
|
|
if fn.valueType.returnType.kind == Auto:
|
|
fn.valueType.isAuto = true
|
|
# Here we don't bother changing the return type
|
|
# to any because returnStmt() will see the auto
|
|
# type and change it accordingly once we know what
|
|
# we're trying to return for the first time
|
|
fn.position = self.stackIndex
|
|
self.stackIndex = idx
|
|
|
|
|
|
proc prepareAutoFunction(self: BytecodeCompiler, fn: Name, args: seq[tuple[name: string, kind: Type, default: Expression]]): Name =
|
|
## "Prepares" an automatic function declaration
|
|
## by declaring a concrete version of it along
|
|
## with its arguments
|
|
|
|
let idx = self.stackIndex
|
|
self.stackIndex = 1
|
|
var default: Expression
|
|
var node = FunDecl(fn.node)
|
|
var fn = deepCopy(fn)
|
|
fn.valueType.isAuto = false
|
|
fn.valueType.compiled = false
|
|
self.names.add(fn)
|
|
# We now declare and typecheck the function's
|
|
# arguments
|
|
for (argument, val) in zip(node.arguments, args):
|
|
if self.names.high() > 16777215:
|
|
self.error("cannot declare more than 16777215 variables at a time")
|
|
inc(self.stackIndex)
|
|
self.names.add(Name(depth: fn.depth + 1,
|
|
isPrivate: true,
|
|
owner: fn.owner,
|
|
file: fn.file,
|
|
isConst: false,
|
|
ident: argument.name,
|
|
valueType: val.kind,
|
|
codePos: 0,
|
|
isLet: false,
|
|
line: argument.name.token.line,
|
|
belongsTo: fn,
|
|
kind: NameKind.Argument,
|
|
node: argument.name,
|
|
position: self.stackIndex,
|
|
isReal: true
|
|
))
|
|
fn.valueType.args = args
|
|
fn.position = self.stackIndex
|
|
self.stackIndex = idx
|
|
return fn
|
|
|
|
|
|
proc generateCall(self: BytecodeCompiler, fn: Name, args: seq[Expression], line: int) =
|
|
## Small wrapper that abstracts emitting a call instruction
|
|
## for a given function
|
|
if fn.valueType.isBuiltin:
|
|
self.handleBuiltinFunction(fn.valueType, args, line)
|
|
return
|
|
case fn.kind:
|
|
of NameKind.Var:
|
|
self.identifier(VarDecl(fn.node).name)
|
|
of NameKind.Function:
|
|
self.emitByte(LoadUInt64, line)
|
|
self.emitBytes(self.chunk.writeConstant(fn.codePos.toLong()), line)
|
|
else:
|
|
discard # Unreachable
|
|
if fn.valueType.forwarded:
|
|
self.forwarded.add((fn, self.chunk.consts.high() - 7))
|
|
self.emitByte(LoadUInt64, line)
|
|
self.emitBytes(self.chunk.writeConstant(0.toLong()), line)
|
|
let pos = self.chunk.consts.len() - 8
|
|
for arg in reversed(args):
|
|
self.expression(arg)
|
|
# Creates a new call frame and jumps
|
|
# to the function's first instruction
|
|
# in the code
|
|
self.emitByte(Call, line)
|
|
self.emitBytes(args.len().toTriple(), line)
|
|
self.patchReturnAddress(pos)
|
|
|
|
|
|
proc specialize(self: BytecodeCompiler, typ: Type, args: seq[Expression]): Type {.discardable.} =
|
|
## Instantiates a generic type
|
|
var mapping: TableRef[string, Type] = newTable[string, Type]()
|
|
var kind: Type
|
|
result = deepCopy(typ)
|
|
case result.kind:
|
|
of TypeKind.Function:
|
|
# This loop checks if a user tries to reassign a generic's
|
|
# name to a different type
|
|
for i, (name, typ, default) in result.args:
|
|
if typ.kind != Generic:
|
|
continue
|
|
kind = self.inferOrError(args[i])
|
|
if typ.name in mapping and not self.compare(kind, mapping[typ.name]):
|
|
self.error(&"expecting generic argument '{typ.name}' to be of type {self.stringify(mapping[typ.name])}, got {self.stringify(kind)}", args[i])
|
|
mapping[typ.name] = kind
|
|
result.args[i].kind = kind
|
|
if not result.returnType.isNil() and result.returnType.kind == Generic:
|
|
if result.returnType.name in mapping:
|
|
result.returnType = mapping[result.returnType.name]
|
|
elif mapping.len() == 0:
|
|
# The function has no generic arguments,
|
|
# just a generic return type
|
|
var typ: Type
|
|
for i, gen in result.fun.generics:
|
|
if gen.name.token.lexeme == result.returnType.name:
|
|
typ = result.args[i].kind
|
|
break
|
|
if typ.isNil():
|
|
self.error(&"unknown generic argument name '{result.returnType.name}'", result.fun)
|
|
result.returnType = typ
|
|
else:
|
|
self.error(&"unknown generic argument name '{result.returnType.name}'", result.fun)
|
|
else:
|
|
discard # TODO: Custom user-defined types
|
|
|
|
|
|
proc terminateProgram(self: BytecodeCompiler, pos: int) =
|
|
## Utility to terminate a peon program
|
|
self.patchForwardDeclarations()
|
|
self.endScope()
|
|
|
|
self.emitByte(OpCode.Return, self.peek().token.line)
|
|
self.emitByte(0, self.peek().token.line) # Entry point has no return value
|
|
self.patchReturnAddress(pos)
|
|
|
|
|
|
proc beginProgram(self: BytecodeCompiler): int =
|
|
## Utility to begin a peon program's
|
|
## bytecode. Returns the position of
|
|
## a dummy return address of the program's
|
|
## entry point to be patched by terminateProgram
|
|
if self.currentModule.isNil():
|
|
# We declare the program's main module
|
|
var mainModule = Name(kind: NameKind.Module,
|
|
depth: 0,
|
|
isPrivate: true,
|
|
isConst: false,
|
|
isLet: false,
|
|
owner: nil,
|
|
file: self.file,
|
|
path: self.file,
|
|
codePos: 0,
|
|
ident: newIdentExpr(Token(lexeme: self.file, kind: Identifier)),
|
|
resolved: true,
|
|
line: 1)
|
|
self.names.add(mainModule)
|
|
self.currentModule = mainModule
|
|
# Every peon program has a hidden entry point in
|
|
# which user code is wrapped. Think of it as if
|
|
# peon is implicitly writing the main() function
|
|
# of your program and putting all of your code in
|
|
# there. While we call our entry point just like
|
|
# any regular peon function, we can't use our handy
|
|
# helper generateCall() because we need to keep track
|
|
# of where our program ends (which we don't know yet).
|
|
# To fix this, we emit dummy offsets and patch them
|
|
# later, once we know the boundaries of our hidden main()
|
|
var main = Name(depth: 0,
|
|
isPrivate: true,
|
|
isConst: false,
|
|
isLet: false,
|
|
owner: self.currentModule,
|
|
file: self.file,
|
|
valueType: Type(kind: Function,
|
|
returnType: nil,
|
|
args: @[],
|
|
),
|
|
codePos: self.chunk.code.len() + 12,
|
|
ident: newIdentExpr(Token(lexeme: "", kind: Identifier)),
|
|
kind: NameKind.Function,
|
|
resolved: true,
|
|
line: 1)
|
|
self.names.add(main)
|
|
self.emitByte(LoadUInt64, 1)
|
|
self.emitBytes(self.chunk.writeConstant(main.codePos.toLong()), 1)
|
|
self.emitByte(LoadUInt64, 1)
|
|
self.emitBytes(self.chunk.writeConstant(0.toLong()), 1)
|
|
result = self.chunk.consts.len() - 8
|
|
self.emitByte(Call, 1)
|
|
self.emitBytes(0.toTriple(), 1)
|
|
|
|
|
|
method literal(self: BytecodeCompiler, node: ASTNode, compile: bool = true): Type {.discardable.} =
|
|
## Emits instructions for literals such
|
|
## as singletons, strings and numbers
|
|
case node.kind:
|
|
of trueExpr:
|
|
result = "bool".toIntrinsic()
|
|
if compile:
|
|
self.emitByte(LoadTrue, node.token.line)
|
|
of falseExpr:
|
|
result = "bool".toIntrinsic()
|
|
if compile:
|
|
self.emitByte(LoadFalse, node.token.line)
|
|
of strExpr:
|
|
result = "string".toIntrinsic()
|
|
if compile:
|
|
self.emitConstant(LiteralExpr(node), result)
|
|
of intExpr:
|
|
let y = IntExpr(node)
|
|
let kind = self.infer(y)
|
|
result = kind
|
|
if kind.kind in [Int64, Int32, Int16, Int8]:
|
|
var x: int
|
|
try:
|
|
discard parseInt(y.literal.lexeme, x)
|
|
except ValueError:
|
|
self.error("integer value out of range")
|
|
else:
|
|
var x: uint64
|
|
try:
|
|
discard parseBiggestUInt(y.literal.lexeme, x)
|
|
except ValueError:
|
|
self.error("integer value out of range")
|
|
if compile:
|
|
self.emitConstant(y, kind)
|
|
of hexExpr:
|
|
var x: int
|
|
var y = HexExpr(node)
|
|
result = self.infer(y)
|
|
try:
|
|
discard parseHex(y.literal.lexeme, x)
|
|
except ValueError:
|
|
self.error("integer value out of range")
|
|
let node = newIntExpr(Token(lexeme: $x, line: y.token.line,
|
|
pos: (start: y.token.pos.start,
|
|
stop: y.token.pos.start + len($x)),
|
|
relPos: (start: y.token.relPos.start, stop: y.token.relPos.start + len($x))
|
|
)
|
|
)
|
|
if compile:
|
|
self.emitConstant(node, result)
|
|
of binExpr:
|
|
var x: int
|
|
var y = BinExpr(node)
|
|
result = self.infer(y)
|
|
try:
|
|
discard parseBin(y.literal.lexeme, x)
|
|
except ValueError:
|
|
self.error("integer value out of range")
|
|
let node = newIntExpr(Token(lexeme: $x, line: y.token.line,
|
|
pos: (start: y.token.pos.start,
|
|
stop: y.token.pos.start + len($x)),
|
|
relPos: (start: y.token.relPos.start, stop: y.token.relPos.start + len($x))
|
|
)
|
|
)
|
|
if compile:
|
|
self.emitConstant(node, result)
|
|
of octExpr:
|
|
var x: int
|
|
var y = OctExpr(node)
|
|
result = self.infer(y)
|
|
try:
|
|
discard parseOct(y.literal.lexeme, x)
|
|
except ValueError:
|
|
self.error("integer value out of range")
|
|
let node = newIntExpr(Token(lexeme: $x, line: y.token.line,
|
|
pos: (start: y.token.pos.start,
|
|
stop: y.token.pos.start + len($x)),
|
|
relPos: (start: y.token.relPos.start, stop: y.token.relPos.start + len($x))
|
|
)
|
|
)
|
|
if compile:
|
|
self.emitConstant(node, result)
|
|
of floatExpr:
|
|
var x: float
|
|
var y = FloatExpr(node)
|
|
result = self.infer(y)
|
|
try:
|
|
discard parseFloat(y.literal.lexeme, x)
|
|
except ValueError:
|
|
self.error("floating point value out of range")
|
|
if compile:
|
|
self.emitConstant(y, result)
|
|
of awaitExpr:
|
|
discard # TODO
|
|
else:
|
|
self.error(&"invalid AST node of kind {node.kind} at literal(): {node} (This is an internal error and most likely a bug!)")
|
|
|
|
|
|
method unary(self: BytecodeCompiler, node: UnaryExpr, compile: bool = true): Type {.discardable.} =
|
|
## Compiles all unary expressions
|
|
var default: Expression
|
|
let fn = Type(kind: Function,
|
|
returnType: Type(kind: Any),
|
|
args: @[("", self.inferOrError(node.a), default)])
|
|
var impl = self.match(node.token.lexeme, fn, node)
|
|
result = impl.valueType
|
|
if impl.isGeneric:
|
|
result = self.specialize(impl.valueType, @[node.a])
|
|
elif impl.valueType.isAuto:
|
|
impl = self.prepareAutoFunction(impl, fn.args)
|
|
result = impl.valueType
|
|
result = result.returnType
|
|
if compile:
|
|
self.generateCall(impl, @[node.a], impl.line)
|
|
|
|
|
|
method binary(self: BytecodeCompiler, node: BinaryExpr, compile: bool = true): Type {.discardable.} =
|
|
## Compiles all binary expressions
|
|
var default: Expression
|
|
let fn = Type(kind: Function, returnType: "any".toIntrinsic(), args: @[("", self.inferOrError(node.a), default), ("", self.inferOrError(node.b), default)])
|
|
var impl = self.match(node.token.lexeme, fn, node)
|
|
result = impl.valueType
|
|
if impl.isGeneric:
|
|
result = self.specialize(impl.valueType, @[node.a, node.b])
|
|
elif impl.valueType.isAuto:
|
|
impl = self.prepareAutoFunction(impl, fn.args)
|
|
result = impl.valueType
|
|
result = result.returnType
|
|
if compile:
|
|
self.generateCall(impl, @[node.a, node.b], impl.line)
|
|
|
|
|
|
method identifier(self: BytecodeCompiler, node: IdentExpr, name: Name = nil, compile: bool = true, strict: bool = true): Type {.discardable.} =
|
|
## Compiles access to identifiers
|
|
var s = name
|
|
if s.isNil():
|
|
if strict:
|
|
s = self.resolveOrError(node)
|
|
else:
|
|
s = self.resolve(node)
|
|
if s.isNil() and not strict:
|
|
return nil
|
|
result = s.valueType
|
|
if s.kind == NameKind.CustomType:
|
|
# This makes it so that the type of
|
|
# a type comes out as "typevar"
|
|
result = result.wrap()
|
|
if not compile:
|
|
return result
|
|
var node = s.ident
|
|
if s.isConst:
|
|
# Constants are always emitted as Load* instructions
|
|
# no matter the scope depth
|
|
if strict:
|
|
self.emitConstant(VarDecl(s.node).value, self.inferOrError(node))
|
|
else:
|
|
self.emitConstant(VarDecl(s.node).value, self.infer(node))
|
|
elif s.kind == NameKind.Function:
|
|
# Functions have no runtime representation, they're just
|
|
# a location to jump to, but we pretend they aren't and
|
|
# resolve them to their address into our bytecode when
|
|
# they're referenced
|
|
self.emitByte(LoadUInt64, node.token.line)
|
|
self.emitBytes(self.chunk.writeConstant(s.codePos.toLong()), node.token.line)
|
|
elif s.kind == NameKind.CustomType:
|
|
# Types have no runtime representation either, but we need
|
|
# to have something on the stack to pop off (just to act as
|
|
# a placeholder)
|
|
self.emitByte(LoadNil, node.token.line)
|
|
else:
|
|
if s.depth > 0:
|
|
# Loads a regular variable from the current frame
|
|
self.emitByte(LoadVar, s.ident.token.line)
|
|
else:
|
|
# Loads a global variable from an absolute stack
|
|
# position
|
|
self.emitByte(LoadGlobal, s.ident.token.line)
|
|
# No need to check for -1 here: we already did a nil check above!
|
|
self.emitBytes(s.position.toTriple(), s.ident.token.line)
|
|
|
|
|
|
method assignment(self: BytecodeCompiler, node: ASTNode, compile: bool = true): Type {.discardable.} =
|
|
## Compiles assignment expressions
|
|
case node.kind:
|
|
of assignExpr:
|
|
let node = AssignExpr(node)
|
|
let name = IdentExpr(node.name)
|
|
var r = self.resolveOrError(name)
|
|
if r.isConst:
|
|
self.error(&"cannot assign to '{name.token.lexeme}' (value is a constant)", name)
|
|
elif r.isLet:
|
|
self.error(&"cannot reassign '{name.token.lexeme}' (value is immutable)", name)
|
|
self.check(node.value, r.valueType)
|
|
self.expression(node.value, compile)
|
|
var position = r.position
|
|
if r.depth < self.depth and r.belongsTo != self.currentFunction:
|
|
self.warning(WarningKind.MutateOuterScope, &"mutation of '{r.ident.token.lexeme}' declared in outer scope ({r.owner.file}.pn:{r.ident.token.line}:{r.ident.token.relPos.start})", nil, node)
|
|
result = r.valueType
|
|
if not compile:
|
|
return
|
|
self.emitByte(StoreVar, node.token.line)
|
|
self.emitBytes(position.toTriple(), node.token.line)
|
|
of setItemExpr:
|
|
let node = SetItemExpr(node)
|
|
let name = IdentExpr(node.name)
|
|
var r = self.resolveOrError(name)
|
|
if r.isConst:
|
|
self.error(&"cannot assign to '{name.token.lexeme}' (value is a constant)", name)
|
|
elif r.isLet:
|
|
self.error(&"cannot reassign '{name.token.lexeme}' (value is immutable)", name)
|
|
if r.valueType.kind != CustomType:
|
|
self.error("only types have fields", node)
|
|
else:
|
|
self.error(&"invalid AST node of kind {node.kind} at assignment(): {node} (This is an internal error and most likely a bug)")
|
|
|
|
|
|
method makeConcrete(self: BytecodeCompiler, node: GenericExpr, compile: bool = true): Type =
|
|
## Builds a concrete type from the given generic
|
|
## instantiation
|
|
var name = self.resolveOrError(node.ident)
|
|
if not name.isGeneric:
|
|
self.error(&"cannot instantiate concrete type from {self.stringify(name.valueType)}: a generic is required")
|
|
var fun = FunDecl(name.node)
|
|
if fun.generics.len() != node.args.len():
|
|
self.error(&"wrong number of types supplied for generic instantiation (expected {fun.generics.len()}, got {node.args.len()} instead)")
|
|
var concrete = deepCopy(name.valueType)
|
|
var types: seq[Type] = @[]
|
|
var map = newTable[string, Type]()
|
|
for arg in node.args:
|
|
types.add(self.inferOrError(arg))
|
|
if types[^1].kind != Typevar:
|
|
self.error(&"expecting type name during generic instantiation, got {self.stringify(types[^1])} instead", arg)
|
|
for (gen, value) in zip(fun.generics, node.args):
|
|
map[gen.name.token.lexeme] = self.inferOrError(value)
|
|
for i, argument in concrete.args:
|
|
if argument.kind.kind != Generic:
|
|
continue
|
|
elif argument.name in map:
|
|
concrete.args[i].kind = map[argument.name]
|
|
else:
|
|
self.error(&"unknown generic argument name '{argument.name}'", concrete.fun)
|
|
if not concrete.returnType.isNil() and concrete.returnType.kind == Generic:
|
|
if concrete.returnType.name in map:
|
|
concrete.returnType = map[concrete.returnType.name]
|
|
else:
|
|
self.error(&"unknown generic argument name '{concrete.returnType.name}'", concrete.fun)
|
|
if compile:
|
|
# Types don't exist at runtime, but if you want to
|
|
# assign them to variables then you need *something*
|
|
# to pop off the stack, so we just push a nil
|
|
self.emitByte(LoadNil, node.token.line)
|
|
result = concrete
|
|
|
|
|
|
method call(self: BytecodeCompiler, node: CallExpr, compile: bool = true): Type {.discardable.} =
|
|
## Compiles function calls
|
|
var args: seq[tuple[name: string, kind: Type, default: Expression]] = @[]
|
|
var argExpr: seq[Expression] = @[]
|
|
var default: Expression
|
|
var kind: Type
|
|
for i, argument in node.arguments.positionals:
|
|
kind = self.infer(argument) # We don't use inferOrError so that we can raise a more appropriate error message later
|
|
if kind.isNil():
|
|
if argument.kind == NodeKind.identExpr:
|
|
self.error(&"reference to undefined name '{argument.token.lexeme}'", argument)
|
|
self.error(&"positional argument {i + 1} in function call has no type", argument)
|
|
args.add(("", kind, default))
|
|
argExpr.add(argument)
|
|
for i, argument in node.arguments.keyword:
|
|
kind = self.infer(argument.value)
|
|
if kind.isNil():
|
|
if argument.value.kind == NodeKind.identExpr:
|
|
self.error(&"reference to undefined name '{argument.value.token.lexeme}'", argument.value)
|
|
self.error(&"keyword argument '{argument.name.token.lexeme}' in function call has no type", argument.value)
|
|
args.add((argument.name.token.lexeme, kind, default))
|
|
argExpr.add(argument.value)
|
|
case node.callee.kind:
|
|
of NodeKind.identExpr:
|
|
# Calls like hi()
|
|
var impl = self.match(IdentExpr(node.callee).name.lexeme, Type(kind: Function, returnType: "all".toIntrinsic(), args: args), node)
|
|
result = impl.valueType
|
|
if impl.isGeneric:
|
|
result = self.specialize(impl.valueType, argExpr)
|
|
elif impl.valueType.isAuto:
|
|
impl = self.prepareAutoFunction(impl, args)
|
|
result = impl.valueType
|
|
if result.fun.kind == NodeKind.lambdaExpr:
|
|
self.lambdaExpr(LambdaExpr(result.fun), compile=compile)
|
|
if not impl.valueType.compiled:
|
|
self.funDecl(FunDecl(result.fun), impl)
|
|
result = result.returnType
|
|
self.dispatchDelayedPragmas(impl)
|
|
if compile:
|
|
self.generateCall(impl, argExpr, node.token.line)
|
|
of NodeKind.callExpr:
|
|
# Calling a call expression, like hello()()
|
|
var node: Expression = node
|
|
var all: seq[CallExpr] = @[]
|
|
# Since there can be as many consecutive calls as
|
|
# the user wants, we need to "extract" all of them
|
|
while CallExpr(node).callee.kind == callExpr:
|
|
all.add(CallExpr(CallExpr(node).callee))
|
|
node = CallExpr(node).callee
|
|
# Now that we know how many call expressions we
|
|
# need to compile, we start from the outermost
|
|
# one and work our way to the innermost call
|
|
for exp in all:
|
|
result = self.call(exp, compile)
|
|
if compile and result.kind == Function:
|
|
self.generateCall(result, argExpr, node.token.line)
|
|
result = result.returnType
|
|
of NodeKind.getItemExpr:
|
|
let node = GetItemExpr(node.callee)
|
|
result = self.getItemExpr(node, compile=false, matching=Type(kind: Function, args: args, returnType: Type(kind: All)))
|
|
var fn: Name
|
|
# getItemExpr returns a Type object, but
|
|
# we need a Name object!
|
|
for name in self.names:
|
|
if name.valueType == result:
|
|
fn = name
|
|
break
|
|
if fn.isGeneric:
|
|
result = self.specialize(result, argExpr)
|
|
elif result.isAuto:
|
|
fn = self.prepareAutoFunction(fn, args)
|
|
result = fn.valueType
|
|
result = result.returnType
|
|
if compile:
|
|
self.generateCall(fn, argExpr, node.token.line)
|
|
of NodeKind.lambdaExpr:
|
|
# Calling a lambda
|
|
var node = LambdaExpr(node.callee)
|
|
let impl = self.lambdaExpr(node, compile=compile)
|
|
result = impl.returnType
|
|
if compile:
|
|
self.generateCall(impl, argExpr, node.token.line)
|
|
of NodeKind.genericExpr:
|
|
# Instantiating a generic type
|
|
let node = GenericExpr(node.callee)
|
|
let concrete = self.makeConcrete(node)
|
|
var impl = self.resolve(node.ident).deepCopy()
|
|
impl.valueType = concrete
|
|
result = impl.valueType.returnType
|
|
if compile:
|
|
self.generateCall(impl, argExpr, node.token.line)
|
|
else:
|
|
let typ = self.infer(node)
|
|
if typ.isNil():
|
|
self.error(&"expression has no type", node)
|
|
else:
|
|
self.error(&"object of type '{self.stringify(typ)}' is not callable", node)
|
|
|
|
|
|
method getItemExpr(self: BytecodeCompiler, node: GetItemExpr, compile: bool = true, matching: Type = nil): Type {.discardable.} =
|
|
## Compiles accessing to fields of a type or
|
|
## module namespace. If the compile flag is set
|
|
## to false, no code is generated for resolving
|
|
## the attribute. Returns the type of the object
|
|
## that is resolved
|
|
case node.obj.kind:
|
|
of identExpr:
|
|
let name = self.resolveOrError(IdentExpr(node.obj))
|
|
case name.kind:
|
|
of NameKind.Module:
|
|
var values = self.findInModule(node.name.token.lexeme, name)
|
|
if len(values) == 0:
|
|
self.error(&"reference to undefined name '{node.name.token.lexeme}' in module '{name.ident.token.lexeme}'")
|
|
elif len(values) > 1 and matching.isNil():
|
|
self.error(&"ambiguous reference for '{node.name.token.lexeme}' in module '{name.ident.token.lexeme}'")
|
|
if not matching.isNil():
|
|
for name in values:
|
|
if self.compare(name.valueType, matching):
|
|
result = name.valueType
|
|
return
|
|
if len(values) == 1:
|
|
result = values[0].valueType
|
|
else:
|
|
self.error(&"ambiguous reference for '{node.name.token.lexeme}' in module '{name.ident.token.lexeme}'")
|
|
if compile:
|
|
self.identifier(nil, values[0])
|
|
else:
|
|
self.error("invalid syntax", node.obj)
|
|
else:
|
|
self.error("invalid syntax", node)
|
|
|
|
|
|
proc blockStmt(self: BytecodeCompiler, node: BlockStmt, compile: bool = true) =
|
|
## Compiles block statements, which create
|
|
## a new local scope
|
|
self.beginScope()
|
|
var last: Declaration
|
|
for decl in node.code:
|
|
if not last.isNil():
|
|
case last.kind:
|
|
of breakStmt, continueStmt:
|
|
self.warning(UnreachableCode, &"code after '{last.token.lexeme}' statement is unreachable", nil, decl)
|
|
else:
|
|
discard
|
|
self.declaration(decl)
|
|
last = decl
|
|
self.endScope()
|
|
|
|
|
|
method lambdaExpr(self: BytecodeCompiler, node: LambdaExpr, compile: bool = true): Type {.discardable.} =
|
|
## Compiles lambda functions as expressions
|
|
result = Type(kind: Function, isLambda: true, fun: node, location: 0, compiled: true)
|
|
let function = self.currentFunction
|
|
var default: Expression
|
|
var name: Name
|
|
var i = 0
|
|
let stackIdx = self.stackIndex
|
|
self.stackIndex = 2
|
|
for argument in node.arguments:
|
|
if self.names.high() > 16777215:
|
|
self.error("cannot declare more than 16777215 variables at a time")
|
|
name = Name(depth: self.depth + 1,
|
|
isPrivate: true,
|
|
owner: self.currentModule,
|
|
file: self.currentModule.file,
|
|
isConst: false,
|
|
ident: argument.name,
|
|
valueType: self.inferOrError(argument.valueType),
|
|
codePos: 0,
|
|
isLet: false,
|
|
line: argument.name.token.line,
|
|
belongsTo: nil, # TODO
|
|
kind: NameKind.Argument,
|
|
node: argument.name,
|
|
position: self.stackIndex
|
|
)
|
|
if name.valueType.kind == Auto:
|
|
self.error("due to current compiler limitations, automatic types cannot be used in lambdas", name.ident)
|
|
if compile:
|
|
self.names.add(name)
|
|
inc(self.stackIndex)
|
|
if node.arguments.high() - node.defaults.high() <= node.arguments.high():
|
|
# There's a default argument!
|
|
result.args.add((name.ident.token.lexeme, name.valueType, node.defaults[i]))
|
|
inc(i)
|
|
else:
|
|
# This argument has no default
|
|
result.args.add((name.ident.token.lexeme, name.valueType, default))
|
|
# The function needs a return type too!
|
|
if not node.returnType.isNil():
|
|
result.returnType = self.inferOrError(node.returnType)
|
|
self.currentFunction = Name(depth: self.depth,
|
|
isPrivate: true,
|
|
isConst: false,
|
|
owner: self.currentModule,
|
|
file: self.file,
|
|
valueType: result,
|
|
ident: nil,
|
|
node: node,
|
|
isLet: false,
|
|
line: node.token.line,
|
|
kind: NameKind.Function,
|
|
belongsTo: function,
|
|
isReal: true,
|
|
)
|
|
if compile and node notin self.lambdas and not node.body.isNil():
|
|
self.lambdas.add(node)
|
|
let jmp = self.emitJump(JumpForwards, node.token.line)
|
|
if BlockStmt(node.body).code.len() == 0:
|
|
self.error("cannot construct lambda with empty body")
|
|
var last: Declaration
|
|
self.beginScope()
|
|
result.location = self.chunk.code.len()
|
|
for decl in BlockStmt(node.body).code:
|
|
if not last.isNil():
|
|
if last.kind == returnStmt:
|
|
self.warning(UnreachableCode, "code after 'return' statement is unreachable", nil, decl)
|
|
self.declaration(decl)
|
|
last = decl
|
|
let typ = self.currentFunction.valueType.returnType
|
|
var hasVal: bool = false
|
|
case self.currentFunction.valueType.fun.kind:
|
|
of NodeKind.funDecl:
|
|
hasVal = FunDecl(self.currentFunction.valueType.fun).hasExplicitReturn
|
|
of NodeKind.lambdaExpr:
|
|
hasVal = LambdaExpr(self.currentFunction.valueType.fun).hasExplicitReturn
|
|
else:
|
|
discard # Unreachable
|
|
if not hasVal and not typ.isNil():
|
|
# There is no explicit return statement anywhere in the function's
|
|
# body: while this is not a tremendously useful piece of information
|
|
# (since the presence of at least one doesn't mean all control flow
|
|
# cases are covered), it definitely is an error worth reporting
|
|
self.error("function has an explicit return type, but no return statement was found", node)
|
|
hasVal = hasVal and not typ.isNil()
|
|
for jump in self.currentFunction.valueType.retJumps:
|
|
self.patchJump(jump)
|
|
# Terminates the function's context
|
|
self.emitByte(OpCode.Return, self.peek().token.line)
|
|
if hasVal:
|
|
self.emitByte(1, self.peek().token.line)
|
|
else:
|
|
self.emitByte(0, self.peek().token.line)
|
|
# Well, we've compiled everything: time to patch
|
|
# the jump offset
|
|
self.patchJump(jmp)
|
|
self.emitByte(LoadUInt64, node.token.line)
|
|
self.emitBytes(self.chunk.writeConstant(result.location.toLong()), node.token.line)
|
|
self.endScope()
|
|
# Restores the enclosing function (if any).
|
|
# Makes nested calls work (including recursion)
|
|
self.currentFunction = function
|
|
self.stackIndex = stackIdx
|
|
|
|
|
|
method expression(self: BytecodeCompiler, node: Expression, compile: bool = true): Type {.discardable.} =
|
|
## Compiles all expressions
|
|
case node.kind:
|
|
of NodeKind.genericExpr:
|
|
return self.makeConcrete(GenericExpr(node))
|
|
of NodeKind.callExpr:
|
|
return self.call(CallExpr(node), compile)
|
|
of NodeKind.getItemExpr:
|
|
return self.getItemExpr(GetItemExpr(node), compile)
|
|
of NodeKind.pragmaExpr:
|
|
discard # TODO
|
|
# Note that for setItem and assign we don't convert
|
|
# the node to its true type because that type information
|
|
# would be lost in the call anyway. The differentiation
|
|
# happens in self.assignment()
|
|
of NodeKind.setItemExpr, NodeKind.assignExpr:
|
|
return self.assignment(node, compile)
|
|
of NodeKind.identExpr:
|
|
return self.identifier(IdentExpr(node), compile=compile)
|
|
of NodeKind.unaryExpr:
|
|
# Unary expressions such as ~5 and -3
|
|
return self.unary(UnaryExpr(node), compile)
|
|
of NodeKind.groupingExpr:
|
|
# Grouping expressions like (2 + 1)
|
|
return self.expression(GroupingExpr(node).expression, compile)
|
|
of NodeKind.binaryExpr:
|
|
# Binary expressions such as 2 ^ 5 and 0.66 * 3.14
|
|
return self.binary(BinaryExpr(node))
|
|
of NodeKind.intExpr, NodeKind.hexExpr, NodeKind.binExpr, NodeKind.octExpr,
|
|
NodeKind.strExpr, NodeKind.falseExpr, NodeKind.trueExpr, NodeKind.floatExpr:
|
|
# Since all of these AST nodes share the
|
|
# same overall structure and the kind
|
|
# field is enough to tell one from the
|
|
# other, why bother with specialized
|
|
# cases when one is enough?
|
|
return self.literal(node, compile)
|
|
of NodeKind.lambdaExpr:
|
|
return self.lambdaExpr(LambdaExpr(node), compile)
|
|
else:
|
|
self.error(&"invalid AST node of kind {node.kind} at expression(): {node} (This is an internal error and most likely a bug)")
|
|
|
|
|
|
proc ifStmt(self: BytecodeCompiler, node: IfStmt) =
|
|
## Compiles if/else statements for conditional
|
|
## execution of code
|
|
self.check(node.condition, "bool".toIntrinsic())
|
|
self.expression(node.condition)
|
|
let jump = self.emitJump(JumpIfFalsePop, node.token.line)
|
|
self.statement(node.thenBranch)
|
|
let jump2 = self.emitJump(JumpForwards, node.token.line)
|
|
self.patchJump(jump)
|
|
if not node.elseBranch.isNil():
|
|
self.statement(node.elseBranch)
|
|
self.patchJump(jump2)
|
|
|
|
|
|
proc whileStmt(self: BytecodeCompiler, node: WhileStmt) =
|
|
## Compiles C-style while loops and
|
|
## desugared C-style for loops
|
|
self.check(node.condition, "bool".toIntrinsic())
|
|
let start = self.chunk.code.high()
|
|
self.expression(node.condition)
|
|
let jump = self.emitJump(JumpIfFalsePop, node.token.line)
|
|
self.statement(node.body)
|
|
self.emitLoop(start, node.token.line)
|
|
self.patchJump(jump)
|
|
|
|
# TODO
|
|
proc awaitStmt(self: BytecodeCompiler, node: AwaitStmt) =
|
|
## Compiles await statements
|
|
|
|
|
|
# TODO
|
|
proc deferStmt(self: BytecodeCompiler, node: DeferStmt) =
|
|
## Compiles defer statements
|
|
|
|
|
|
# TODO
|
|
proc yieldStmt(self: BytecodeCompiler, node: YieldStmt) =
|
|
## Compiles yield statements
|
|
|
|
|
|
# TODO
|
|
proc raiseStmt(self: BytecodeCompiler, node: RaiseStmt) =
|
|
## Compiles raise statements
|
|
|
|
|
|
# TODO
|
|
proc assertStmt(self: BytecodeCompiler, node: AssertStmt) =
|
|
## Compiles assert statements
|
|
# TODO
|
|
|
|
|
|
# TODO
|
|
proc forEachStmt(self: BytecodeCompiler, node: ForEachStmt) =
|
|
## Compiles foreach loops
|
|
|
|
|
|
proc returnStmt(self: BytecodeCompiler, node: ReturnStmt) =
|
|
## Compiles return statements
|
|
if self.currentFunction.valueType.returnType.isNil() and not node.value.isNil():
|
|
self.error("cannot return a value from a void function", node.value)
|
|
elif not self.currentFunction.valueType.returnType.isNil() and node.value.isNil():
|
|
self.error("bare return statement is only allowed in void functions", node)
|
|
if not node.value.isNil():
|
|
if self.compare(self.currentFunction.valueType.returnType, "auto".toIntrinsic()):
|
|
self.currentFunction.valueType.returnType = self.inferOrError(node.value)
|
|
self.check(node.value, self.currentFunction.valueType.returnType)
|
|
self.expression(node.value)
|
|
self.emitByte(OpCode.SetResult, node.token.line)
|
|
# Since the "set result" part and "exit the function" part
|
|
# of our return mechanism are already decoupled into two
|
|
# separate opcodes, we perform the former and then jump to
|
|
# the function's last return statement, which is always emitted
|
|
# by funDecl() at the end of the function's lifecycle, greatly
|
|
# simplifying the design, since now there's just one return
|
|
# instruction to jump to instead of many potential points
|
|
# where the function returns from. Note that depending on whether
|
|
# the function has any local variables or not, this jump might be
|
|
# patched to jump to the function's PopN/PopC instruction(s) rather
|
|
# than straight to the return statement
|
|
self.currentFunction.valueType.retJumps.add(self.emitJump(JumpForwards, node.token.line))
|
|
|
|
|
|
proc continueStmt(self: BytecodeCompiler, node: ContinueStmt, compile: bool = true) =
|
|
## Compiles continue statements. A continue statement can be
|
|
## used to jump to the beginning of a loop or block
|
|
if node.label.isNil():
|
|
if self.currentLoop.start > 16777215:
|
|
self.error("too much code to jump over in continue statement")
|
|
if compile:
|
|
self.emitByte(Jump, node.token.line)
|
|
self.emitBytes(self.currentLoop.start.toTriple(), node.token.line)
|
|
else:
|
|
var blocks: seq[NamedBlock] = @[]
|
|
var found: bool = false
|
|
for blk in reversed(self.namedBlocks):
|
|
blocks.add(blk)
|
|
if blk.name == node.label.token.lexeme:
|
|
found = true
|
|
break
|
|
if not found:
|
|
self.error(&"unknown block name '{node.label.token.lexeme}'", node.label)
|
|
if blocks[^1].start > 16777215:
|
|
self.error("too much code to jump over in continue statement")
|
|
if compile:
|
|
self.emitByte(Jump, node.token.line)
|
|
self.emitBytes(blocks[^1].start.toTriple(), node.token.line)
|
|
|
|
|
|
proc importStmt(self: BytecodeCompiler, node: ImportStmt, compile: bool = true) =
|
|
## Imports a module. This creates a new "virtual"
|
|
## (i.e simulated) module namespace and injects all
|
|
## of the module's public names into the current module
|
|
self.declare(node)
|
|
var module = self.names[^1]
|
|
try:
|
|
if compile:
|
|
self.compileModule(module)
|
|
# Importing a module automatically exports
|
|
# its public names to us
|
|
for name in self.findInModule("", module):
|
|
name.exportedTo.add(self.currentModule)
|
|
# We also need to export public names from other modules
|
|
# that we have explicitly exported because imports are
|
|
# compiled only once
|
|
for module in self.modules.values():
|
|
if self.currentModule in module.exportedTo:
|
|
for name in self.findInModule("", module):
|
|
name.exportedTo.add(self.currentModule)
|
|
except IOError:
|
|
self.error(&"could not import '{module.ident.token.lexeme}': {getCurrentExceptionMsg()}")
|
|
except OSError:
|
|
self.error(&"could not import '{module.ident.token.lexeme}': {getCurrentExceptionMsg()} [errno {osLastError()}]")
|
|
|
|
|
|
proc exportStmt(self: BytecodeCompiler, node: ExportStmt, compile: bool = true) =
|
|
## Exports a name at compile time to
|
|
## all modules importing us. The user
|
|
## needs to explicitly tell the compiler
|
|
## which of the names it imported, if any,
|
|
## should be made available to other modules
|
|
## importing it in order to avoid namespace
|
|
## pollution
|
|
var name = self.resolveOrError(node.name)
|
|
if name.isPrivate:
|
|
self.error("cannot export private names")
|
|
name.exportedTo.add(self.parentModule)
|
|
case name.kind:
|
|
of NameKind.Module:
|
|
# We need to export everything
|
|
# this module defines!
|
|
for name in self.findInModule("", name):
|
|
name.exportedTo.add(self.parentModule)
|
|
of NameKind.Function:
|
|
# Only exporting a single function (or, well
|
|
# all of its implementations)
|
|
for name in self.findByName(name.ident.token.lexeme):
|
|
if name.kind != NameKind.Function:
|
|
continue
|
|
name.exportedTo.add(self.parentModule)
|
|
else:
|
|
self.error("unsupported export type")
|
|
|
|
|
|
proc breakStmt(self: BytecodeCompiler, node: BreakStmt) =
|
|
## Compiles break statements. A break statement is used
|
|
## to jump at the end of a loop or outside of a given
|
|
## block
|
|
if node.label.isNil():
|
|
# Jumping out of a loop
|
|
self.currentLoop.breakJumps.add(self.emitJump(OpCode.JumpForwards, node.token.line))
|
|
if self.currentLoop.depth > self.depth:
|
|
# Breaking out of a loop closes its scope
|
|
self.endScope()
|
|
else:
|
|
# Jumping out of a block
|
|
var blocks: seq[NamedBlock] = @[]
|
|
var found: bool = false
|
|
for blk in reversed(self.namedBlocks):
|
|
blocks.add(blk)
|
|
if blk.name == node.label.token.lexeme:
|
|
for blk in blocks:
|
|
blk.broken = true
|
|
found = true
|
|
break
|
|
if not found:
|
|
self.error(&"unknown block name '{node.label.token.lexeme}'", node.label)
|
|
|
|
|
|
proc namedBlock(self: BytecodeCompiler, node: NamedBlockStmt) =
|
|
## Compiles named blocks
|
|
self.namedBlocks.add(NamedBlock(start: self.chunk.code.len(), # Creates a new block entry
|
|
depth: self.depth,
|
|
breakJumps: @[],
|
|
name: NamedBlockStmt(node).name.token.lexeme))
|
|
self.beginScope()
|
|
var blk = self.namedBlocks[^1]
|
|
var last: Declaration
|
|
for decl in node.code:
|
|
if not last.isNil():
|
|
case last.kind:
|
|
of NodeKind.breakStmt, NodeKind.continueStmt:
|
|
self.warning(UnreachableCode, &"code after '{last.token.lexeme}' statement is unreachable", nil, decl)
|
|
else:
|
|
discard
|
|
if blk.broken:
|
|
blk.breakJumps.add(self.emitJump(OpCode.JumpForwards, node.token.line))
|
|
self.declaration(decl)
|
|
last = decl
|
|
self.patchBreaks()
|
|
self.endScope()
|
|
discard self.namedBlocks.pop()
|
|
|
|
|
|
proc switchStmt(self: BytecodeCompiler, node: SwitchStmt) =
|
|
## Compiles switch statements
|
|
self.expression(node.switch)
|
|
let typeOfA = self.inferOrError(node.switch)
|
|
var ifJump: int = -1
|
|
var thenJumps: seq[int] = @[]
|
|
var fn: Type
|
|
var impl: Name
|
|
var default: Expression
|
|
# Note that, unlike C switch statements, we don't
|
|
# cascade to other branches once the first one matches
|
|
for branch in node.branches:
|
|
# We duplicate the top of the stack so we can safely
|
|
# pop the topmost expression without losing its value
|
|
# for later comparisons
|
|
self.emitByte(DupTop, branch.body.token.line)
|
|
self.expression(branch.cond)
|
|
# We look for a matching equality implementation
|
|
fn = Type(kind: Function, returnType: "bool".toIntrinsic(), args: @[("", typeOfA, default), ("", self.inferOrError(branch.cond), default)])
|
|
impl = self.match("==", fn, node)
|
|
self.generateCall(impl, @[node.switch, branch.cond], impl.line)
|
|
ifJump = self.emitJump(JumpIfFalsePop, branch.body.token.line)
|
|
self.blockStmt(branch.body)
|
|
thenJumps.add(self.emitJump(JumpForwards, branch.body.token.line))
|
|
self.patchJump(ifJump)
|
|
if not node.default.isNil():
|
|
self.blockStmt(node.default)
|
|
for jump in thenJumps:
|
|
self.patchJump(jump)
|
|
self.emitByte(OpCode.Pop, node.token.line)
|
|
|
|
|
|
proc statement(self: BytecodeCompiler, node: Statement) =
|
|
## Compiles all statements
|
|
case node.kind:
|
|
of exprStmt:
|
|
# An expression statement is just a statement
|
|
# followed by a statement terminator (semicolon)
|
|
let expression = ExprStmt(node).expression
|
|
let kind = self.infer(expression)
|
|
self.expression(expression)
|
|
if kind.isNil():
|
|
# The expression has no type and produces no value,
|
|
# so we don't have to pop anything
|
|
discard
|
|
elif self.replMode:
|
|
self.printRepl(kind, expression)
|
|
else:
|
|
self.emitByte(Pop, node.token.line)
|
|
of NodeKind.switchStmt:
|
|
self.switchStmt(SwitchStmt(node))
|
|
of NodeKind.namedBlockStmt:
|
|
self.namedBlock(NamedBlockStmt(node))
|
|
of NodeKind.ifStmt:
|
|
self.ifStmt(IfStmt(node))
|
|
of NodeKind.assertStmt:
|
|
self.assertStmt(AssertStmt(node))
|
|
of NodeKind.raiseStmt:
|
|
self.raiseStmt(RaiseStmt(node))
|
|
of NodeKind.breakStmt:
|
|
self.breakStmt(BreakStmt(node))
|
|
of NodeKind.continueStmt:
|
|
self.continueStmt(ContinueStmt(node))
|
|
of NodeKind.returnStmt:
|
|
self.returnStmt(ReturnStmt(node))
|
|
of NodeKind.importStmt:
|
|
self.importStmt(ImportStmt(node))
|
|
of NodeKind.exportStmt:
|
|
self.exportStmt(ExportStmt(node))
|
|
of NodeKind.whileStmt:
|
|
let loop = self.currentLoop
|
|
self.currentLoop = Loop(start: self.chunk.code.len(),
|
|
depth: self.depth, breakJumps: @[])
|
|
self.whileStmt(WhileStmt(node))
|
|
self.patchBreaks()
|
|
self.currentLoop = loop
|
|
of NodeKind.forEachStmt:
|
|
self.forEachStmt(ForEachStmt(node))
|
|
of NodeKind.blockStmt:
|
|
self.blockStmt(BlockStmt(node))
|
|
of NodeKind.yieldStmt:
|
|
self.yieldStmt(YieldStmt(node))
|
|
of NodeKind.awaitStmt:
|
|
self.awaitStmt(AwaitStmt(node))
|
|
of NodeKind.deferStmt:
|
|
self.deferStmt(DeferStmt(node))
|
|
of NodeKind.tryStmt:
|
|
discard
|
|
else:
|
|
self.expression(Expression(node))
|
|
|
|
|
|
proc varDecl(self: BytecodeCompiler, node: VarDecl) =
|
|
## Compiles variable declarations
|
|
var typ: Type
|
|
# Our parser guarantees that the variable declaration
|
|
# will have a type declaration or a value (or both)
|
|
if node.value.isNil():
|
|
# Variable has no value: the type declaration
|
|
# takes over
|
|
# TODO: Implement T.default()!
|
|
if self.compare(typ, "auto".toIntrinsic()):
|
|
self.error("automatic types require initialization", node)
|
|
typ = self.inferOrError(node.valueType)
|
|
# One of the few exceptions where we actually don't want to use
|
|
# self.compare() is this one, because that will implicitly unwrap
|
|
# the typevar and compare the wrapped type, which is not what we want
|
|
if typ.kind != Typevar:
|
|
self.error(&"expecting type name, got value of type {self.stringify(typ)} instead", node.name)
|
|
elif node.valueType.isNil():
|
|
# Variable has no type declaration: the type
|
|
# of its value takes over
|
|
typ = self.inferOrError(node.value)
|
|
else:
|
|
# Variable has both a type declaration and
|
|
# a value: the value's type must match the
|
|
# type declaration
|
|
let expected = self.inferOrError(node.valueType)
|
|
if not self.compare(expected, "auto".toIntrinsic()):
|
|
self.check(node.value, expected)
|
|
# If this doesn't fail, then we're good
|
|
typ = expected
|
|
else:
|
|
# Let the compiler infer the type (this
|
|
# is the default behavior already, but
|
|
# some users may prefer to be explicit!)
|
|
typ = self.inferOrError(node.value)
|
|
self.expression(node.value)
|
|
self.emitByte(AddVar, node.token.line)
|
|
inc(self.stackIndex)
|
|
# We declare the name only now in order to make
|
|
# sure that stuff like var n = n; works as expected.
|
|
# If we declared it early, we'd have a duplicate with
|
|
# no type that would shadow the original value, which
|
|
# is no good
|
|
var name = self.declare(node)
|
|
name.position = self.stackIndex
|
|
name.valueType = typ
|
|
|
|
|
|
proc funDecl(self: BytecodeCompiler, node: FunDecl, name: Name) =
|
|
## Compiles function declarations
|
|
if node.token.kind == Operator and node.name.token.lexeme in [".", "="]:
|
|
self.error(&"Due to compiler limitations, the '{node.name.token.lexeme}' operator cannot be currently overridden", node.name)
|
|
var node = node
|
|
var jmp: int
|
|
# We store the current function to restore
|
|
# it later
|
|
let function = self.currentFunction
|
|
if node.body.isNil():
|
|
# When we stumble across a forward declaration,
|
|
# we record it for later so we can look it up at
|
|
# the end of the module
|
|
self.forwarded.add((name, 0))
|
|
name.valueType.forwarded = true
|
|
return
|
|
if name.valueType.isBuiltin:
|
|
# Builtins are handled at call time
|
|
return
|
|
self.currentFunction = name
|
|
let stackIdx = self.stackIndex
|
|
self.stackIndex = name.position
|
|
# A function's code is just compiled linearly
|
|
# and then jumped over
|
|
name.valueType.compiled = true
|
|
jmp = self.emitJump(JumpForwards, node.token.line)
|
|
name.codePos = self.chunk.code.len()
|
|
name.valueType.location = name.codePos
|
|
# We let our debugger know this function's boundaries
|
|
self.chunk.functions.add(self.chunk.code.len().toTriple())
|
|
self.functions.add((start: self.chunk.code.len(), stop: 0, pos: self.chunk.functions.len() - 3, fn: name))
|
|
var offset = self.functions[^1]
|
|
var idx = self.chunk.functions.len()
|
|
self.chunk.functions.add(0.toTriple()) # Patched it later
|
|
self.chunk.functions.add(uint8(node.arguments.len()))
|
|
if not node.name.isNil():
|
|
self.chunk.functions.add(name.ident.token.lexeme.len().toDouble())
|
|
var s = name.ident.token.lexeme
|
|
if s.len() >= uint16.high().int:
|
|
s = node.name.token.lexeme[0..uint16.high()]
|
|
self.chunk.functions.add(s.toBytes())
|
|
else:
|
|
self.chunk.functions.add(0.toDouble())
|
|
if BlockStmt(node.body).code.len() == 0:
|
|
self.error("cannot declare function with empty body")
|
|
var last: Declaration
|
|
self.beginScope()
|
|
for decl in BlockStmt(node.body).code:
|
|
if not last.isNil() and last.kind == returnStmt:
|
|
self.warning(UnreachableCode, "code after 'return' statement is unreachable", nil, decl)
|
|
self.declaration(decl)
|
|
last = decl
|
|
let typ = self.currentFunction.valueType.returnType
|
|
var hasVal: bool = false
|
|
case self.currentFunction.valueType.fun.kind:
|
|
of NodeKind.funDecl:
|
|
hasVal = FunDecl(self.currentFunction.valueType.fun).hasExplicitReturn
|
|
of NodeKind.lambdaExpr:
|
|
hasVal = LambdaExpr(self.currentFunction.valueType.fun).hasExplicitReturn
|
|
else:
|
|
discard # Unreachable
|
|
if not hasVal and not typ.isNil():
|
|
# There is no explicit return statement anywhere in the function's
|
|
# body: while this is not a tremendously useful piece of information
|
|
# (since the presence of at least one doesn't mean all control flow
|
|
# cases are covered), it definitely is an error worth reporting
|
|
self.error("function has an explicit return type, but no return statement was found", node)
|
|
hasVal = hasVal and not typ.isNil()
|
|
for jump in self.currentFunction.valueType.retJumps:
|
|
self.patchJump(jump)
|
|
self.endScope()
|
|
# Terminates the function's context
|
|
let stop = self.chunk.code.len().toTriple()
|
|
self.emitByte(OpCode.Return, self.peek().token.line)
|
|
if hasVal:
|
|
self.emitByte(1, self.peek().token.line)
|
|
else:
|
|
self.emitByte(0, self.peek().token.line)
|
|
self.chunk.functions[idx] = stop[0]
|
|
self.chunk.functions[idx + 1] = stop[1]
|
|
self.chunk.functions[idx + 2] = stop[2]
|
|
offset.stop = self.chunk.code.len()
|
|
# Well, we've compiled everything: time to patch
|
|
# the jump offset
|
|
self.patchJump(jmp)
|
|
# Restores the enclosing function (if any).
|
|
# Makes nested calls work (including recursion)
|
|
self.currentFunction = function
|
|
self.stackIndex = stackIdx
|
|
|
|
|
|
proc typeDecl(self: BytecodeCompiler, node: TypeDecl, name: Name) =
|
|
## Compiles type declarations
|
|
for field in node.fields:
|
|
if self.compare(self.inferOrError(field.valueType), name.valueType) and not node.isRef:
|
|
self.error(&"illegal type recursion for non-ref type '{name.ident.token.lexeme}'")
|
|
|
|
|
|
proc declaration(self: BytecodeCompiler, node: Declaration) =
|
|
## Compiles declarations, statements and expressions
|
|
## recursively
|
|
case node.kind:
|
|
of NodeKind.funDecl:
|
|
var name = self.declare(node)
|
|
if not name.valueType.isAuto:
|
|
# We can't compile automatic functions right
|
|
# away because we need to know the type of the
|
|
# arguments in their signature, and this info is
|
|
# not available at declaration time
|
|
self.funDecl(FunDecl(node), name)
|
|
if name.isGeneric:
|
|
# After we're done compiling a generic
|
|
# function, we pull a magic trick: since
|
|
# from here on the user will be able to
|
|
# call this with any of the types in the
|
|
# generic constraint, we switch every generic
|
|
# to a type union (which, conveniently, have an
|
|
# identical layout) so that the compiler will
|
|
# typecheck the function as if its arguments
|
|
# were all types of the constraint at once,
|
|
# while still allowing the user to call it with
|
|
# any type in said constraint
|
|
for i, argument in name.valueType.args:
|
|
if argument.kind.kind != Generic:
|
|
continue
|
|
else:
|
|
argument.kind.asUnion = true
|
|
if not name.valueType.returnType.isNil() and name.valueType.returnType.kind == Generic:
|
|
name.valueType.returnType.asUnion = true
|
|
of NodeKind.typeDecl:
|
|
self.typeDecl(TypeDecl(node), self.declare(node))
|
|
of NodeKind.varDecl:
|
|
self.varDecl(VarDecl(node))
|
|
else:
|
|
self.statement(Statement(node))
|
|
|
|
|
|
proc compile*(self: BytecodeCompiler, ast: seq[Declaration], file: string, lines: seq[tuple[start, stop: int]], source: string, chunk: Chunk = nil,
|
|
incremental: bool = false, isMainModule: bool = true, disabledWarnings: seq[WarningKind] = @[], showMismatches: bool = false,
|
|
mode: CompileMode = Debug): Chunk =
|
|
## Compiles a sequence of AST nodes into a chunk
|
|
## object
|
|
if chunk.isNil():
|
|
self.chunk = newChunk()
|
|
else:
|
|
self.chunk = chunk
|
|
self.file = file
|
|
self.depth = 0
|
|
self.currentFunction = nil
|
|
if self.replMode:
|
|
self.ast &= ast
|
|
self.source &= "\n" & source
|
|
self.lines &= lines
|
|
else:
|
|
self.ast = ast
|
|
self.current = 0
|
|
self.lines = lines
|
|
self.source = source
|
|
self.isMainModule = isMainModule
|
|
self.disabledWarnings = disabledWarnings
|
|
self.showMismatches = showMismatches
|
|
self.mode = mode
|
|
let start = self.chunk.code.len()
|
|
if not incremental:
|
|
self.jumps = @[]
|
|
self.modules = newTable[string, Name]()
|
|
self.stackIndex = 1
|
|
let pos = self.beginProgram()
|
|
while not self.done():
|
|
self.declaration(Declaration(self.step()))
|
|
self.terminateProgram(pos)
|
|
result = self.chunk
|
|
|
|
|
|
proc compileModule(self: BytecodeCompiler, module: Name) =
|
|
## Compiles an imported module into an existing chunk
|
|
## using the compiler's internal parser and lexer objects
|
|
var path = ""
|
|
let moduleName = module.path & ".pn"
|
|
# We take the absolute path of the module so that we
|
|
# know that if it's in self.modules, then we already
|
|
# imported it
|
|
for i, searchPath in moduleLookupPaths:
|
|
if searchPath == "":
|
|
path = absolutePath(joinPath(splitPath(self.file).head, moduleName))
|
|
else:
|
|
path = absolutePath(joinPath(searchPath, moduleName))
|
|
if fileExists(path):
|
|
break
|
|
elif i == searchPath.high():
|
|
self.error(&"""could not import '{path}': module not found""")
|
|
module.absPath = path
|
|
if self.modules.hasKey(path):
|
|
# Module is already imported: we have
|
|
# already compiled it
|
|
return
|
|
let source = readFile(path)
|
|
# Preserve the current state so we can
|
|
# resume compiling the current module
|
|
# later
|
|
let current = self.current
|
|
let ast = self.ast
|
|
let file = self.file
|
|
let lines = self.lines
|
|
let src = self.source
|
|
let currentModule = self.currentModule
|
|
let mainModule = self.isMainModule
|
|
let parentModule = self.parentModule
|
|
let replMode = self.replMode
|
|
self.replMode = false
|
|
# Set the current module to the new module
|
|
# and the current module as the parent module:
|
|
# this is needed for export statements
|
|
self.parentModule = currentModule
|
|
self.currentModule = module
|
|
# We remember where the new module starts, but
|
|
# we don't emit the bytes into the chunk right
|
|
# away because we may call this function again
|
|
# from within this call and it would break all
|
|
# sorts of things
|
|
let start = self.chunk.code.len()
|
|
discard self.compile(self.parser.parse(self.lexer.lex(source, path),
|
|
path, self.lexer.getLines(),
|
|
self.lexer.getSource(), persist=true),
|
|
path, self.lexer.getLines(), self.lexer.getSource(), chunk=self.chunk, incremental=true,
|
|
isMainModule=false, self.disabledWarnings, self.showMismatches, self.mode)
|
|
# Mark the boundaries of the module
|
|
self.chunk.modules.extend(start.toTriple())
|
|
self.chunk.modules.extend(self.chunk.code.high().toTriple())
|
|
# I swear to god if someone ever creates a peon module with a name that's
|
|
# longer than 2^16 bytes I will hit them with a metal pipe. Mark my words
|
|
self.chunk.modules.extend(self.currentModule.ident.token.lexeme.len().toDouble())
|
|
self.chunk.modules.extend(self.currentModule.ident.token.lexeme.toBytes())
|
|
module.file = path
|
|
# No need to save the old scope depth: import statements are
|
|
# only allowed at the top level!
|
|
self.depth = 0
|
|
self.current = current
|
|
self.ast = ast
|
|
self.file = file
|
|
self.currentModule = currentModule
|
|
self.isMainModule = mainModule
|
|
self.parentModule = parentModule
|
|
self.replMode = replMode
|
|
self.lines = lines
|
|
self.source = src
|
|
self.modules[module.absPath] = module
|