peon/src/frontend/meta/bytecode.nim

342 lines
13 KiB
Nim

# Copyright 2022 Mattia Giambirtone & All Contributors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
## Low level bytecode implementation details
import std/strutils
import std/strformat
import ../../util/multibyte
type
Chunk* = ref object
## A piece of bytecode.
## consts is the code's constants table.
## code is the linear sequence of compiled bytecode instructions.
## lines maps bytecode instructions to line numbers using Run
## Length Encoding. Instructions are encoded in groups whose structure
## follows the following schema:
## - The first integer represents the line number
## - The second integer represents the number of
## instructions on that line
## For example, if lines equals [1, 5], it means that there are 5 instructions
## at line 1, meaning that all instructions in code[0..4] belong to the same line.
## This is more efficient than using the naive approach, which would encode
## the same line number multiple times and waste considerable amounts of space.
## cfi represents Call Frame Information and encodes the following information:
## - Function name
## - Argument count
## - Function boundaries
## The encoding for CFI data is the following:
## - First, the position into the bytecode where the function begins is encoded (as a 3 byte integer)
## - Second, the position into the bytecode where the function ends is encoded (as a 3 byte integer)
## - After that follows the argument count as a 1 byte integer
## - Lastly, the function's name (optional) is encoded in ASCII, prepended with
## its size as a 2-byte integer
consts*: seq[uint8]
code*: seq[uint8]
lines*: seq[int]
cfi*: seq[uint8]
OpCode* {.pure.} = enum
## Enum of Peon's bytecode opcodes
# Note: x represents the argument
# to unary opcodes, while a and b
# represent arguments to binary
# opcodes. Other variable names (c, d, ...)
# may be used for more complex opcodes. If
# an opcode takes any arguments at runtime,
# they come from either the stack or the VM's
# closure array. Some other opcodes (e.g.
# jumps), take arguments in the form of 16
# or 24 bit numbers that are defined statically
# at compilation time into the bytecode
# These push a constant at position x in the
# constant table onto the stack
LoadInt64 = 0u8,
LoadUInt64,
LoadInt32,
LoadUInt32,
LoadInt16,
LoadUInt16,
LoadInt8,
LoadUInt8,
LoadFloat64,
LoadFloat32,
LoadString,
## Singleton opcodes (each of them pushes a constant singleton on the stack)
LoadNil,
LoadTrue,
LoadFalse,
LoadNan,
LoadInf,
## Operations on primitive types
Negate,
NegateFloat64,
NegateFloat32,
Add,
Subtract,
Multiply,
Divide,
SignedDivide,
AddFloat64,
SubtractFloat64,
MultiplyFloat64,
DivideFloat64,
AddFloat32,
SubtractFloat32,
MultiplyFloat32,
DivideFloat32,
Pow,
SignedPow,
Mod,
SignedMod,
PowFloat64,
PowFloat32,
ModFloat64,
ModFloat32,
LShift,
RSHift,
Xor,
Or,
And,
Not,
Equal,
NotEqual,
GreaterThan,
LessThan,
GreaterOrEqual,
LessOrEqual,
## Print opcodes
PrintInt64,
PrintUInt64,
PrintInt32,
PrintUInt32,
PrintInt16,
PrintUint16,
PrintInt8,
PrintUInt8,
PrintFloat64,
PrintFloat32,
PrintHex,
PrintBool,
PrintNan,
PrintInf,
PrintString,
## Basic stack operations
Pop, # Pops an element off the stack and discards it
PopRepl, # Same as Pop, but also prints the value of what's popped (used in REPL mode)
PopN, # Pops x elements off the call stack (optimization for exiting local scopes which usually pop many elements)
## Name resolution/handling
LoadAttribute, # Pushes the attribute b of object a onto the stack
LoadVar, # Pushes the object at position x in the stack onto the stack
StoreVar, # Stores the value of b at position a in the stack
LoadClosure, # Pushes the object position x in the closure array onto the stack
StoreClosure, # Stores the value of b at position a in the closure array
LiftArgument, # Closes over a function argument
PopClosure,
## Looping and jumping
Jump, # Absolute, unconditional jump into the bytecode
JumpForwards, # Relative, unconditional, positive jump in the bytecode
JumpBackwards, # Relative, unconditional, negative jump in the bytecode
JumpIfFalse, # Jumps to a relative index in the bytecode if x is false
JumpIfTrue, # Jumps to a relative index in the bytecode if x is true
JumpIfFalsePop, # Like JumpIfFalse, but also pops off the stack (regardless of truthyness). Optimization for if statements
JumpIfFalseOrPop, # Jumps to an absolute index in the bytecode if x is false and pops otherwise (used for logical and)
## Functions
Call, # Calls a function and initiates a new stack frame
CallClosure, # Calls a closure
Return, # Terminates the current function
SetResult, # Sets the result of the current function
## Exception handling
Raise, # Raises exception x or re-raises active exception if x is nil
BeginTry, # Initiates an exception handling context
FinishTry, # Closes the current exception handling context
## Generators
Yield, # Yields control from a generator back to the caller
## Coroutines
Await, # Calls an asynchronous function
## Misc
Assert, # Raises an AssertionFailed exception if x is false
NoOp, # Just a no-op
PopC, # Pop off the call stack onto the operand stack
PushC, # Pop off the operand stack onto the call stack
SysClock64 # Pushes the output of a monotonic clock on the stack
# We group instructions by their operation/operand types for easier handling when debugging
# Simple instructions encompass instructions that push onto/pop off the stack unconditionally (True, False, Pop, etc.)
const simpleInstructions* = {Return, LoadNil,
LoadTrue, LoadFalse,
LoadNan, LoadInf,
Pop, Raise,
BeginTry, FinishTry, Yield,
Await, NoOp, SetResult,
PopC, PushC, SysClock64,
Negate,
NegateFloat64,
NegateFloat32,
Add,
Subtract,
Multiply,
Divide,
SignedDivide,
AddFloat64,
SubtractFloat64,
MultiplyFloat64,
DivideFloat64,
AddFloat32,
SubtractFloat32,
MultiplyFloat32,
DivideFloat32,
Pow,
SignedPow,
Mod,
SignedMod,
PowFloat64,
PowFloat32,
ModFloat64,
ModFloat32,
LShift,
RSHift,
Xor,
Or,
And,
Not,
Equal,
NotEqual,
GreaterThan,
LessThan,
GreaterOrEqual,
LessOrEqual,
PrintInt64,
PrintUInt64,
PrintInt32,
PrintUInt32,
PrintInt16,
PrintUint16,
PrintInt8,
PrintUInt8,
PrintFloat64,
PrintFloat32,
PrintHex,
PrintBool,
PrintNan,
PrintInf,
PrintString,
}
# Constant instructions are instructions that operate on the bytecode constant table
const constantInstructions* = {LoadInt64, LoadUInt64,
LoadInt32, LoadUInt32,
LoadInt16, LoadUInt16,
LoadInt8, LoadUInt8,
LoadFloat64, LoadFloat32,
LoadString}
# Stack triple instructions operate on the stack at arbitrary offsets and pop arguments off of it in the form
# of 24 bit integers
const stackTripleInstructions* = {StoreVar, LoadVar, LoadCLosure, LiftArgument, PopClosure}
# Stack double instructions operate on the stack at arbitrary offsets and pop arguments off of it in the form
# of 16 bit integers
const stackDoubleInstructions* = {}
# Argument double argument instructions take hardcoded arguments as 16 bit integers
const argumentDoubleInstructions* = {PopN, }
# Jump instructions jump at relative or absolute bytecode offsets
const jumpInstructions* = {Jump, JumpIfFalse, JumpIfFalsePop,
JumpForwards, JumpBackwards,
JumpIfTrue}
proc newChunk*: Chunk =
## Initializes a new, empty chunk
result = Chunk(consts: @[], code: @[], lines: @[], cfi: @[])
proc `$`*(self: Chunk): string = &"""Chunk(consts=[{self.consts.join(", ")}], code=[{self.code.join(", ")}], lines=[{self.lines.join(", ")}])"""
proc write*(self: Chunk, newByte: uint8, line: int) =
## Adds the given instruction at the provided line number
## to the given chunk object
assert line > 0, "line must be greater than zero"
if self.lines.high() >= 1 and self.lines[^2] == line:
self.lines[^1] += 1
else:
self.lines.add(line)
self.lines.add(1)
self.code.add(newByte)
proc write*(self: Chunk, bytes: openarray[uint8], line: int) =
## Calls write in a loop with all members of the given
## array
for cByte in bytes:
self.write(cByte, line)
proc write*(self: Chunk, newByte: OpCode, line: int) =
## Adds the given instruction at the provided line number
## to the given chunk object
self.write(uint8(newByte), line)
proc write*(self: Chunk, bytes: openarray[OpCode], line: int) =
## Calls write in a loop with all members of the given
## array
for cByte in bytes:
self.write(uint8(cByte), line)
proc getLine*(self: Chunk, idx: int): int =
## Returns the associated line of a given
## instruction index
if self.lines.len < 2:
raise newException(IndexDefect, "the chunk object is empty")
var
count: int
current: int = 0
for n in countup(0, self.lines.high(), 2):
count = self.lines[n + 1]
if idx in current - count..<current + count:
return self.lines[n]
current += count
raise newException(IndexDefect, "index out of range")
proc getIdx*(self: Chunk, line: int): int =
## Gets the index into self.lines
## where the line counter for the given
## line is located
for i, v in self.lines:
if (i and 1) != 0 and v == line:
return i
proc writeConstant*(self: Chunk, data: openarray[uint8]): array[3, uint8] =
## Writes a series of bytes to the chunk's constant
## table and returns the index of the first byte as
## an array of 3 bytes
result = self.consts.len().toTriple()
for b in data:
self.consts.add(b)