peon-rewrite/src/backend/bytecode/codegen/generator.nim

458 lines
18 KiB
Nim

import frontend/compiler/typechecker
import backend/bytecode/opcodes
import backend/bytecode/tooling/multibyte
import errors
import std/strutils
import std/parseutils
import std/tables
import std/strformat
type
FunctionWrapper = ref object
## A wrapper around a typed function
## declaration. This is necessary to
## carry bytecode-specific information
## regarding this function along with
## the typed declaration itself
decl: TypedFunDecl
location: int
BytecodeGenerator* = ref object
## A bytecode generator
# The piece of code we compile into
chunk: Chunk
# The current size of the call
# stack (which is always known
# statically)
stackSize: int
# Stores the position of all jumps
jumps: seq[tuple[patched: bool, offset: int]]
# Metadata regarding function locations (used to construct
# the debugging fields in the resulting bytecode)
functions: seq[tuple[start, stop, pos: int, fn: Name]]
# Used for error reporting
currentFile: string
currentNode: TypedNode
# The typechecker used to validate the peon code we're generating
# bytecode for
typeChecker: TypeChecker
proc newBytecodeGenerator*: BytecodeGenerator =
## Initializes a new, blank bytecode
## generator
result = BytecodeGenerator()
proc generateExpression(self: BytecodeGenerator, expression: TypedExpr)
proc error(self: BytecodeGenerator, msg: string, typedNode: TypedNode = nil) =
## Raises a generic peon exception
var typedNode = typedNode
var file = self.currentFile
if typedNode.isNil():
typedNode = self.currentNode
if file == "" and typedNode.node.isDecl():
file = TypedDecl(typedNode).name.owner.ident.token.lexeme
raise CodeGenError(msg: msg, line: typedNode.node.token.line, file: file)
proc emitByte(self: BytecodeGenerator, byt: OpCode | uint8, line: int) {.inline.} =
## Emits a single byte, writing it to
## the current chunk being compiled
self.chunk.write(uint8(byt), line)
proc emitBytes(self: BytecodeGenerator, bytarr: openarray[OpCode | uint8], line: int) {.inline.} =
## Handy helper method to write arbitrary bytes into
## the current chunk, calling emitByte on each of its
## elements
for b in bytarr:
self.emitByte(b, line)
proc makeConstant(self: BytecodeGenerator, value: TypedExpr): array[3, uint8] =
## Adds a constant to the current chunk's constant table
## and returns its index as a 3-byte array of uint8s
var lit: string
if value.kind.kind == Integer:
lit = value.node.token.lexeme
if lit.contains("'"):
var idx = lit.high()
while lit[idx] != '\'':
lit = lit[0..^2]
dec(idx)
lit = lit[0..^2]
case value.kind.kind:
of Integer:
case value.kind.size:
of Tiny:
result = self.chunk.writeConstant([uint8(parseInt(lit))])
of Short:
result = self.chunk.writeConstant(parseInt(lit).toDouble())
of Long:
result = self.chunk.writeConstant(parseInt(lit).toQuad())
of LongLong:
if not value.kind.signed:
result = self.chunk.writeConstant(parseInt(lit).toLong())
else:
result = self.chunk.writeConstant(parseBiggestUInt(lit).toLong())
of String:
result = self.chunk.writeConstant(value.node.token.lexeme[1..^1].toBytes())
of Float:
case value.kind.width:
of Half:
var f: float = 0.0
discard parseFloat(value.node.token.lexeme, f)
result = self.chunk.writeConstant(cast[array[4, uint8]](float32(f)))
of Full:
var f: float = 0.0
discard parseFloat(value.node.token.lexeme, f)
result = self.chunk.writeConstant(cast[array[8, uint8]](f))
else:
discard
proc emitConstant(self: BytecodeGenerator, expression: TypedExpr) =
## Emits a constant instruction along
## with its operand
let
typ = expression.kind
node = expression.node
case typ.kind:
of Integer:
case typ.size:
of LongLong:
if typ.signed:
self.emitByte(LoadInt64, node.token.line)
else:
self.emitByte(LoadUInt64, node.token.line)
of Long:
if typ.signed:
self.emitByte(LoadInt32, node.token.line)
else:
self.emitByte(LoadUInt32, node.token.line)
of Short:
if typ.signed:
self.emitByte(LoadInt16, node.token.line)
else:
self.emitByte(LoadUInt16, node.token.line)
of Tiny:
if typ.signed:
self.emitByte(LoadInt8, node.token.line)
else:
self.emitByte(LoadUInt8, node.token.line)
of String:
self.emitByte(LoadString, node.token.line)
let str = LiteralExpr(node).literal.lexeme
if str.len() >= 16777216:
self.error("string constants cannot be larger than 16777215 bytes", expression)
self.emitBytes((str.len() - 2).toTriple(), node.token.line)
of Float:
case typ.width:
of Half:
self.emitByte(LoadFloat32, node.token.line)
of Full:
self.emitByte(LoadFloat64, node.token.line)
else:
discard # TODO
self.emitBytes(self.makeConstant(expression), node.token.line)
proc setJump(self: BytecodeGenerator, offset: int, jmp: array[3, uint8]) =
## Sets a jump at the given
## offset to the given value
self.chunk.code[offset + 1] = jmp[0]
self.chunk.code[offset + 2] = jmp[1]
self.chunk.code[offset + 3] = jmp[2]
proc setJump(self: BytecodeGenerator, offset: int, jmp: seq[uint8]) =
## Sets a jump at the given
## offset to the given value
self.chunk.code[offset + 1] = jmp[0]
self.chunk.code[offset + 2] = jmp[1]
self.chunk.code[offset + 3] = jmp[2]
proc emitJump(self: BytecodeGenerator, opcode: OpCode, line: int): int =
## Emits a dummy jump offset to be patched later
## and returns a unique identifier for that jump
## to be passed to patchJump
self.emitByte(opcode, line)
self.jumps.add((patched: false, offset: self.chunk.code.high()))
self.emitBytes(0.toTriple(), line)
result = self.jumps.high()
proc patchJump(self: BytecodeGenerator, offset: int) =
## Patches a previously emitted relative
## jump using emitJump
var jump: int = self.chunk.code.len() - self.jumps[offset].offset
if jump < 0:
self.error("jump size cannot be negative (This is an internal error and most likely a bug)")
if jump > 16777215:
# TODO: Emit consecutive jumps using insertAt
self.error("cannot jump more than 16777215 instructions")
if jump > 0:
self.setJump(self.jumps[offset].offset, (jump - 4).toTriple())
self.jumps[offset].patched = true
proc handleBuiltinFunction(self: BytecodeGenerator, fn: FunctionWrapper, args: seq[TypedExpr], line: int) =
## Emits instructions for builtin functions
## such as addition or subtraction
var builtinOp: string
for pragma in FunDecl(fn.decl.node).pragmas:
if pragma.name.token.lexeme == "magic":
builtinOp = pragma.args[0].token.lexeme
if builtinOp notin ["LogicalOr", "LogicalAnd"]:
if len(args) == 2:
self.generateExpression(args[1])
self.generateExpression(args[0])
elif len(args) == 1:
self.generateExpression(args[0])
const codes: Table[string, OpCode] = {"Negate": Negate,
"NegateFloat32": NegateFloat32,
"NegateFloat64": NegateFloat64,
"Add": Add,
"Subtract": Subtract,
"Divide": Divide,
"Multiply": Multiply,
"SignedDivide": SignedDivide,
"AddFloat64": AddFloat64,
"SubtractFloat64": SubtractFloat64,
"DivideFloat64": DivideFloat64,
"MultiplyFloat64": MultiplyFloat64,
"AddFloat32": AddFloat32,
"SubtractFloat32": SubtractFloat32,
"DivideFloat32": DivideFloat32,
"MultiplyFloat32": MultiplyFloat32,
"Pow": Pow,
"SignedPow": SignedPow,
"PowFloat32": PowFloat32,
"PowFloat64": PowFloat64,
"Mod": Mod,
"SignedMod": SignedMod,
"ModFloat32": ModFloat32,
"ModFloat64": ModFloat64,
"Or": Or,
"And": And,
"Xor": Xor,
"Not": Not,
"LShift": LShift,
"RShift": RShift,
"Equal": Equal,
"NotEqual": NotEqual,
"LessThan": LessThan,
"GreaterThan": GreaterThan,
"LessOrEqual": LessOrEqual,
"GreaterOrEqual": GreaterOrEqual,
"SignedLessThan": SignedLessThan,
"SignedGreaterThan": SignedGreaterThan,
"SignedLessOrEqual": SignedLessOrEqual,
"SignedGreaterOrEqual": SignedGreaterOrEqual,
"Float32LessThan": Float32LessThan,
"Float32GreaterThan": Float32GreaterThan,
"Float32LessOrEqual": Float32LessOrEqual,
"Float32GreaterOrEqual": Float32GreaterOrEqual,
"Float64LessThan": Float64LessThan,
"Float64GreaterThan": Float64GreaterThan,
"Float64LessOrEqual": Float64LessOrEqual,
"Float64GreaterOrEqual": Float64GreaterOrEqual,
"PrintString": PrintString,
"SysClock64": SysClock64,
"LogicalNot": LogicalNot,
"NegInf": LoadNInf,
"Identity": Identity
}.toTable()
if builtinOp == "print":
let typ = args[0].kind
case typ.kind:
of Integer:
case typ.size:
of LongLong:
if typ.signed:
self.emitByte(PrintInt64, line)
else:
self.emitByte(PrintUInt64, line)
of Long:
if typ.signed:
self.emitByte(PrintInt32, line)
else:
self.emitByte(PrintUInt32, line)
of Short:
if typ.signed:
self.emitByte(PrintInt16, line)
else:
self.emitByte(PrintUInt16, line)
of Tiny:
if typ.signed:
self.emitByte(PrintInt8, line)
else:
self.emitByte(PrintUInt8, line)
of Float:
case typ.width:
of Full:
self.emitByte(PrintFloat64, line)
of Half:
self.emitByte(PrintFloat32, line)
of String:
self.emitByte(PrintString, line)
of Boolean:
self.emitByte(PrintBool, line)
of TypeKind.Nan:
self.emitByte(PrintNan, line)
of TypeKind.Infinity:
self.emitByte(PrintInf, line)
of Function:
self.emitByte(LoadString, line)
var loc: string = fn.location.toHex()
while loc[0] == '0' and loc.len() > 1:
loc = loc[1..^1]
var str: string
if typ.isLambda:
str = &"anonymous function at 0x{loc}"
else:
str = &"function '{FunDecl(fn.decl.node).name.token.lexeme}' at 0x{loc}"
self.emitBytes(str.len().toTriple(), line)
self.emitBytes(self.chunk.writeConstant(str.toBytes()), line)
self.emitByte(PrintString, line)
else:
self.error(&"invalid type {self.typechecker.stringify(typ)} for built-in 'print'", args[0])
return
if builtinOp in codes:
self.emitByte(codes[builtinOp], line)
return
# Some builtin operations are slightly more complex
# so we handle them separately
case builtinOp:
of "LogicalOr":
self.generateExpression(args[0])
let jump = self.emitJump(JumpIfTrue, line)
self.generateExpression(args[1])
self.patchJump(jump)
of "LogicalAnd":
self.generateExpression(args[0])
let jump = self.emitJump(JumpIfFalseOrPop, line)
self.generateExpression(args[1])
self.patchJump(jump)
of "cast":
# Type casts are a merely compile-time construct:
# they don't produce any code at runtime because
# the underlying data representation does not change!
# The only reason why there's a "cast" pragma is to
# make it so that the peon stub can have no body
discard
else:
self.error(&"unknown built-in: '{builtinOp}'")
proc patchReturnAddress(self: BytecodeGenerator, pos: int) =
## Patches the return address of a function
## call
let address = self.chunk.code.len().toLong()
self.chunk.consts[pos] = address[0]
self.chunk.consts[pos + 1] = address[1]
self.chunk.consts[pos + 2] = address[2]
self.chunk.consts[pos + 3] = address[3]
self.chunk.consts[pos + 4] = address[4]
self.chunk.consts[pos + 5] = address[5]
self.chunk.consts[pos + 6] = address[6]
self.chunk.consts[pos + 7] = address[7]
proc generateLiteral(self: BytecodeGenerator, literal: TypedExpr) =
## Emits code for literals
let
typ = literal.kind
node = literal.node
case typ.kind:
of Integer, Float:
# No need to do any input validation here: the typechecker
# has graciously done all the work for us! :)
self.emitConstant(literal)
of Infinity:
if typ.positive:
self.emitByte(LoadInf, node.token.line)
else:
self.emitByte(LoadNInf, node.token.line)
of NaN:
self.emitByte(LoadNaN, node.token.line)
else:
self.error(&"Unknown typed node of type {node.kind} at generateLiteral()")
proc generateUnary(self: BytecodeGenerator, expression: TypedExpr) =
## Emits code for unary expressions
echo expression[]
proc generateExpression(self: BytecodeGenerator, expression: TypedExpr) =
## Emits code for expressions
if expression.node.isConst():
self.generateLiteral(expression)
else:
let node = expression.node
case node.kind:
of binaryExpr:
self.generateUnary(expression)
of unaryExpr:
discard
else:
self.error(&"Unknown typed node of type {node.kind} at generateExpression()")
proc beginProgram(self: BytecodeGenerator): int =
## Emits boilerplate code to set up
## a peon program
self.emitByte(LoadUInt64, 1)
# The initial jump address is always the same
self.emitBytes(self.chunk.writeConstant(12.toLong()), 1)
self.emitByte(LoadUInt64, 1)
# We emit a dummy return address which is patched later
self.emitBytes(self.chunk.writeConstant(0.toLong()), 1)
result = self.chunk.consts.len() - 8
self.emitByte(Call, 1)
self.emitBytes(0.toTriple(), 1)
proc endProgram(self: BytecodeGenerator, pos: int) =
## Emits boilerplate code to tear down
## a peon program
self.emitByte(OpCode.Return, self.currentNode.node.token.line)
# Entry point has no return value
self.emitByte(0, self.currentNode.node.token.line)
# Patch the return address now that we know the boundaries
# of the function
self.patchReturnAddress(pos)
proc generate*(self: BytecodeGenerator, compiled: seq[TypedNode], typeChecker: TypeChecker): Chunk =
## Turn the given compilation output
## into a bytecode chunk
self.chunk = newChunk()
self.typeChecker = typeChecker
let offset = self.beginProgram()
for typedNode in compiled:
self.currentNode = typedNode
let currentFile = self.currentFile
if self.currentNode.node.isDecl():
self.currentFile = TypedDecl(typedNode).name.module.ident.token.lexeme
case typedNode.node.kind:
of exprStmt:
self.generateExpression(TypedExprStmt(typedNode).expression)
self.emitByte(Pop, typedNode.node.token.line)
else:
self.error(&"Unknown typed node of type {typedNode.node.kind} at generate()")
self.currentFile = currentFile
self.endProgram(offset)
result = self.chunk