
241 lines
9.9 KiB

# Copyright 2022 Mattia Giambirtone & All Contributors
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# See the License for the specific language governing permissions and
# limitations under the License.
## Low level bytecode implementation details
import strutils
import strformat
import ../../util/multibyte
Chunk* = ref object
## A piece of bytecode.
## consts is used when serializing to/from a bytecode stream.
## code is the linear sequence of compiled bytecode instructions.
## lines maps bytecode instructions to line numbers using Run
## Length Encoding. Instructions are encoded in groups whose structure
## follows the following schema:
## - The first integer represents the line number
## - The second integer represents the count of whatever comes after it
## (let's call it c)
## - After c, a sequence of c integers follows
## A visual representation may be easier to understand: [1, 2, 3, 4]
## This is to be interpreted as "there are 2 instructions at line 1 whose values
## are 3 and 4"
## This is more efficient than using the naive approach, which would encode
## the same line number multiple times and waste considerable amounts of space.
## cfi represents Call Frame Information and encodes the following information:
## - Function name
## - Stack bottom
## - Argument count
## The encoding for CFI data is the following:
## - First, the position into the bytecode where the function begins is encoded (as a 3 byte integer)
## - Second, the position into the bytecode where the function ends is encoded (as a 3 byte integer)
## - Then, the frame's stack bottom is encoded as a 3 byte integer
## - After the frame's stack bottom follows the argument count as a 1 byte integer
## - Lastly, the function's name (optional) is encoded in ASCII, prepended with
## its size as a 2-byte integer
consts*: seq[uint8]
code*: seq[uint8]
lines*: seq[int]
cfi*: seq[uint8]
OpCode* {.pure.} = enum
## Enum of Peon's bytecode opcodes
# Note: x represents the argument
# to unary opcodes, while a and b
# represent arguments to binary
# opcodes. Other variable names (c, d, ...)
# may be used for more complex opcodes. If
# an opcode takes any arguments at runtime,
# they come from either the stack or the VM's
# closure array. Some other opcodes (e.g.
# jumps), take arguments in the form of 16
# or 24 bit numbers that are defined statically
# at compilation time into the bytecode
# These push a constant onto the stack
LoadInt64 = 0u8,
## Singleton opcodes (each of them pushes a constant singleton on the stack)
## Basic stack operations
Pop, # Pops an element off the stack and discards it
Push, # Pushes x onto the stack
PopN, # Pops x elements off the stack (optimization for exiting local scopes which usually pop many elements)
## Name resolution/handling
LoadAttribute, # Pushes the attribute b of object a onto the stack
LoadVar, # Pushes the object at position x in the stack onto the stack
StoreVar, # Stores the value of b at position a in the stack
LoadHeap, # Pushes the object position x in the closure array onto the stack
StoreHeap, # Stores the value of b at position a in the closure array
## Looping and jumping
Jump, # Absolute, unconditional jump into the bytecode
JumpForwards, # Relative, unconditional, positive jump in the bytecode
JumpBackwards, # Relative, unconditional, negative jump in the bytecode
JumpIfFalse, # Jumps to a relative index in the bytecode if x is false
JumpIfTrue, # Jumps to a relative index in the bytecode if x is true
JumpIfFalsePop, # Like JumpIfFalse, but also pops off the stack (regardless of truthyness). Optimization for if statements
JumpIfFalseOrPop, # Jumps to an absolute index in the bytecode if x is false and pops otherwise (used for logical and)
## Long variants of jumps (they use a 24-bit operand instead of a 16-bit one)
## Functions
Call, # Calls a function and initiates a new stack frame
Return, # Terminates the current function without popping off the stack
ReturnValue, # Pops a return value off the stack and terminates the current function
## Exception handling
Raise, # Raises exception x or re-raises active exception if x is nil
BeginTry, # Initiates an exception handling context
FinishTry, # Closes the current exception handling context
## Generators
Yield, # Yields control from a generator back to the caller
## Coroutines
Await, # Calls an asynchronous function
## Misc
Assert, # Raises an AssertionFailed exception if x is false
NoOp, # Just a no-op
# We group instructions by their operation/operand types for easier handling when debugging
# Simple instructions encompass instructions that push onto/pop off the stack unconditionally (True, False, Pop, etc.)
const simpleInstructions* = {OpCode.Return, LoadNil,
LoadTrue, LoadFalse,
LoadNan, LoadInf,
Pop, OpCode.Raise,
BeginTry, FinishTry,
OpCode.Yield, OpCode.Await,
OpCode.NoOp, OpCode.Return,
# Constant instructions are instructions that operate on the bytecode constant table
const constantInstructions* = {LoadInt64, LoadUInt64,
LoadInt32, LoadUInt32,
LoadInt16, LoadUInt16,
LoadInt8, LoadUInt8,
LoadFloat64, LoadFloat32,
# Stack triple instructions operate on the stack at arbitrary offsets and pop arguments off of it in the form
# of 24 bit integers
const stackTripleInstructions* = {StoreVar, LoadVar, LoadHeap, StoreHeap}
# Stack double instructions operate on the stack at arbitrary offsets and pop arguments off of it in the form
# of 16 bit integers
const stackDoubleInstructions* = {}
# Argument double argument instructions take hardcoded arguments as 16 bit integers
const argumentDoubleInstructions* = {PopN, }
# Argument double argument instructions take hardcoded arguments as 24 bit integers
const argumentTripleInstructions* = {}
# Instructions that call functions
const callInstructions* = {Call, }
# Jump instructions jump at relative or absolute bytecode offsets
const jumpInstructions* = {Jump, LongJump, JumpIfFalse, JumpIfFalsePop,
JumpForwards, JumpBackwards,
LongJumpIfFalse, LongJumpIfFalsePop,
LongJumpForwards, LongJumpBackwards,
JumpIfTrue, LongJumpIfTrue}
proc newChunk*: Chunk =
## Initializes a new, empty chunk
result = Chunk(consts: @[], code: @[], lines: @[], cfi: @[])
proc `$`*(self: Chunk): string = &"""Chunk(consts=[{self.consts.join(", ")}], code=[{self.code.join(", ")}], lines=[{self.lines.join(", ")}])"""
proc write*(self: Chunk, newByte: uint8, line: int) =
## Adds the given instruction at the provided line number
## to the given chunk object
assert line > 0, "line must be greater than zero"
if self.lines.high() >= 1 and self.lines[^2] == line:
self.lines[^1] += 1
proc write*(self: Chunk, bytes: openarray[uint8], line: int) =
## Calls write in a loop with all members of the given
## array
for cByte in bytes:
self.write(cByte, line)
proc write*(self: Chunk, newByte: OpCode, line: int) =
## Adds the given instruction at the provided line number
## to the given chunk object
self.write(uint8(newByte), line)
proc write*(self: Chunk, bytes: openarray[OpCode], line: int) =
## Calls write in a loop with all members of the given
## array
for cByte in bytes:
self.write(uint8(cByte), line)
proc getLine*(self: Chunk, idx: int): int =
## Returns the associated line of a given
## instruction index
if self.lines.len < 2:
raise newException(IndexDefect, "the chunk object is empty")
count: int
current: int = 0
for n in countup(0, self.lines.high(), 2):
count = self.lines[n + 1]
if idx in current - count..<current + count:
return self.lines[n]
current += count
raise newException(IndexDefect, "index out of range")
proc writeConstant*(self: Chunk, data: openarray[uint8]): array[3, uint8] =
## Writes a series of bytes to the chunk's constant
## table and returns the index of the first byte as
## an array of 3 bytes
result = self.consts.len().toTriple()
for b in data: