352 lines
13 KiB
Nim
352 lines
13 KiB
Nim
# Copyright 2022 Mattia Giambirtone & All Contributors
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
## Low level bytecode implementation details
|
|
|
|
import strutils
|
|
import strformat
|
|
|
|
import ../../util/multibyte
|
|
|
|
|
|
type
|
|
Chunk* = ref object
|
|
## A piece of bytecode.
|
|
## consts is the code's constants table.
|
|
## code is the linear sequence of compiled bytecode instructions.
|
|
## lines maps bytecode instructions to line numbers using Run
|
|
## Length Encoding. Instructions are encoded in groups whose structure
|
|
## follows the following schema:
|
|
## - The first integer represents the line number
|
|
## - The second integer represents the count of whatever comes after it
|
|
## (let's call it c)
|
|
## - After c, a sequence of c integers follows
|
|
##
|
|
## A visual representation may be easier to understand: [1, 2, 3, 4]
|
|
## This is to be interpreted as "there are 2 instructions at line 1 whose values
|
|
## are 3 and 4"
|
|
## This is more efficient than using the naive approach, which would encode
|
|
## the same line number multiple times and waste considerable amounts of space.
|
|
## cfi represents Call Frame Information and encodes the following information:
|
|
## - Function name
|
|
## - Argument count
|
|
## - Function boundaries
|
|
## The encoding for CFI data is the following:
|
|
## - First, the position into the bytecode where the function begins is encoded (as a 3 byte integer)
|
|
## - Second, the position into the bytecode where the function ends is encoded (as a 3 byte integer)
|
|
## - After that follows the argument count as a 1 byte integer
|
|
## - Lastly, the function's name (optional) is encoded in ASCII, prepended with
|
|
## its size as a 2-byte integer
|
|
consts*: seq[uint8]
|
|
code*: seq[uint8]
|
|
lines*: seq[int]
|
|
cfi*: seq[uint8]
|
|
|
|
OpCode* {.pure.} = enum
|
|
## Enum of Peon's bytecode opcodes
|
|
|
|
# Note: x represents the argument
|
|
# to unary opcodes, while a and b
|
|
# represent arguments to binary
|
|
# opcodes. Other variable names (c, d, ...)
|
|
# may be used for more complex opcodes. If
|
|
# an opcode takes any arguments at runtime,
|
|
# they come from either the stack or the VM's
|
|
# closure array. Some other opcodes (e.g.
|
|
# jumps), take arguments in the form of 16
|
|
# or 24 bit numbers that are defined statically
|
|
# at compilation time into the bytecode
|
|
|
|
# These push a constant at position x in the
|
|
# constant table onto the stack
|
|
LoadInt64 = 0u8,
|
|
LoadUInt64,
|
|
LoadInt32,
|
|
LoadUInt32,
|
|
LoadInt16,
|
|
LoadUInt16,
|
|
LoadInt8,
|
|
LoadUInt8,
|
|
LoadFloat64,
|
|
LoadFloat32,
|
|
LoadString,
|
|
LoadFunction,
|
|
LoadReturnAddress,
|
|
## Singleton opcodes (each of them pushes a constant singleton on the stack)
|
|
LoadNil,
|
|
LoadTrue,
|
|
LoadFalse,
|
|
LoadNan,
|
|
LoadInf,
|
|
## Operations on primitive types
|
|
GenericPrint,
|
|
NegInt64, # No unsigned variants (how would you negate something that has no sign?)
|
|
NegInt32,
|
|
NegInt16,
|
|
NegInt8,
|
|
NegFloat32,
|
|
NegFloat64,
|
|
AddInt64,
|
|
AddUInt64,
|
|
AddInt32,
|
|
AddUInt32
|
|
AddInt16,
|
|
AddUInt16,
|
|
AddInt8,
|
|
AddUInt8,
|
|
SubInt64,
|
|
SubUInt64,
|
|
SubInt32,
|
|
SubUInt32,
|
|
SubInt16,
|
|
SubUInt16,
|
|
SubInt8,
|
|
SubUInt8,
|
|
MulInt64,
|
|
MulUInt64,
|
|
MulInt32,
|
|
MulUInt32,
|
|
MulInt16,
|
|
MulUInt16,
|
|
MulInt8,
|
|
MulUInt8,
|
|
DivInt64,
|
|
DivUInt64,
|
|
DivInt32,
|
|
DivUInt32,
|
|
DivInt16,
|
|
DivUInt16,
|
|
DivInt8,
|
|
DivUInt8,
|
|
AddFloat64,
|
|
SubFloat64,
|
|
DivFloat64,
|
|
MulFloat64,
|
|
AddFloat32,
|
|
SubFloat32,
|
|
DivFloat32,
|
|
MulFloat32,
|
|
LessThanInt64,
|
|
GreaterThanInt64,
|
|
EqualInt64,
|
|
NotEqualInt64,
|
|
LessThanUInt64,
|
|
GreaterThanUInt64,
|
|
EqualUInt64,
|
|
NotEqualUInt64,
|
|
LessThanInt32,
|
|
GreaterThanInt32,
|
|
EqualInt32,
|
|
NotEqualInt32,
|
|
LessThanUInt32,
|
|
GreaterThanUInt32,
|
|
EqualUInt32,
|
|
NotEqualUInt32,
|
|
LessThanInt16,
|
|
GreaterThanInt16,
|
|
EqualInt16,
|
|
NotEqualInt16,
|
|
LessThanUInt16,
|
|
GreaterThanUInt16,
|
|
EqualUInt16,
|
|
NotEqualUInt16,
|
|
LessThanInt8,
|
|
GreaterThanInt8,
|
|
EqualInt8,
|
|
NotEqualInt8,
|
|
LessThanUInt8,
|
|
GreaterThanUInt8,
|
|
EqualUInt8,
|
|
NotEqualUInt8,
|
|
LessThanFloat64,
|
|
GreaterThanFloat64,
|
|
EqualFloat64,
|
|
NotEqualFloat64,
|
|
LessThanFloat32,
|
|
GreaterThanFloat32,
|
|
EqualFloat32,
|
|
NotEqualFloat32,
|
|
SysClock64,
|
|
## Basic stack operations
|
|
Pop, # Pops an element off the stack and discards it
|
|
PopRepl, # Same as Pop, but also prints the value of what's popped (used in REPL mode)
|
|
PopN, # Pops x elements off the call stack (optimization for exiting local scopes which usually pop many elements)
|
|
## Name resolution/handling
|
|
LoadAttribute, # Pushes the attribute b of object a onto the stack
|
|
LoadVar, # Pushes the object at position x in the stack onto the stack
|
|
StoreVar, # Stores the value of b at position a in the stack
|
|
LoadClosure, # Pushes the object position x in the closure array onto the stack
|
|
StoreClosure, # Stores the value of b at position a in the closure array
|
|
## Looping and jumping
|
|
Jump, # Absolute, unconditional jump into the bytecode
|
|
JumpForwards, # Relative, unconditional, positive jump in the bytecode
|
|
JumpBackwards, # Relative, unconditional, negative jump in the bytecode
|
|
JumpIfFalse, # Jumps to a relative index in the bytecode if x is false
|
|
JumpIfTrue, # Jumps to a relative index in the bytecode if x is true
|
|
JumpIfFalsePop, # Like JumpIfFalse, but also pops off the stack (regardless of truthyness). Optimization for if statements
|
|
JumpIfFalseOrPop, # Jumps to an absolute index in the bytecode if x is false and pops otherwise (used for logical and)
|
|
## Functions
|
|
Call, # Calls a function and initiates a new stack frame
|
|
Return, # Terminates the current function
|
|
SetResult, # Sets the result of the current function
|
|
## Exception handling
|
|
Raise, # Raises exception x or re-raises active exception if x is nil
|
|
BeginTry, # Initiates an exception handling context
|
|
FinishTry, # Closes the current exception handling context
|
|
## Generators
|
|
Yield, # Yields control from a generator back to the caller
|
|
## Coroutines
|
|
Await, # Calls an asynchronous function
|
|
## Misc
|
|
Assert, # Raises an AssertionFailed exception if x is false
|
|
NoOp, # Just a no-op
|
|
PopC, # Pop off the call stack onto the operand stack
|
|
PushC # Pop off the operand stack onto the call stack
|
|
|
|
|
|
# We group instructions by their operation/operand types for easier handling when debugging
|
|
|
|
# Simple instructions encompass instructions that push onto/pop off the stack unconditionally (True, False, Pop, etc.)
|
|
const simpleInstructions* = {Return, LoadNil,
|
|
LoadTrue, LoadFalse,
|
|
LoadNan, LoadInf,
|
|
Pop, PopRepl, Raise,
|
|
BeginTry, FinishTry, Yield,
|
|
Await, NoOp, SetResult,
|
|
PopC, PushC,
|
|
AddInt64, AddUInt64, AddInt32,
|
|
AddUInt32, AddInt16, AddUInt16,
|
|
AddInt8, AddUInt8, SubInt64,
|
|
SubUInt64, SubInt32, SubUInt32,
|
|
SubInt16, SubUInt16, SubInt8,
|
|
SubUInt8, MulInt64, MulUInt64,
|
|
MulInt32, MulUInt32, MulInt16,
|
|
MulUInt16, MulInt8, MulUInt8,
|
|
DivInt64, DivUInt64, DivInt32,
|
|
DivUInt32, DivInt16, DivUInt16,
|
|
DivInt8, DivUInt8, AddFloat64,
|
|
SubFloat64, DivFloat64, MulFloat64,
|
|
AddFloat32, SubFloat32, DivFloat32,
|
|
MulFloat32, NegFloat32, NegFloat64,
|
|
LessThanInt64, SysClock64, GenericPrint,
|
|
GreaterThanInt64, EqualInt64, NotEqualInt64,
|
|
LessThanUInt64, GreaterThanUInt64, EqualUInt64,
|
|
NotEqualUInt64, LessThanInt32, GreaterThanInt32,
|
|
EqualInt32, NotEqualInt32, LessThanUInt32,
|
|
GreaterThanUInt32, EqualUInt32, NotEqualUInt32,
|
|
LessThanInt16, GreaterThanInt16, EqualInt16,
|
|
NotEqualInt16, LessThanUInt16, GreaterThanUInt16,
|
|
EqualUInt16, NotEqualUInt16, LessThanInt8,
|
|
GreaterThanInt8,EqualInt8, NotEqualInt8,
|
|
LessThanUInt8, GreaterThanUInt8, EqualUInt8,
|
|
NotEqualUInt8, LessThanFloat64, GreaterThanFloat64,
|
|
EqualFloat64,NotEqualFloat64, LessThanFloat32,
|
|
GreaterThanFloat32, EqualFloat32, NotEqualFloat32,
|
|
}
|
|
|
|
# Constant instructions are instructions that operate on the bytecode constant table
|
|
const constantInstructions* = {LoadInt64, LoadUInt64,
|
|
LoadInt32, LoadUInt32,
|
|
LoadInt16, LoadUInt16,
|
|
LoadInt8, LoadUInt8,
|
|
LoadFloat64, LoadFloat32,
|
|
LoadString}
|
|
|
|
# Stack triple instructions operate on the stack at arbitrary offsets and pop arguments off of it in the form
|
|
# of 24 bit integers
|
|
const stackTripleInstructions* = {StoreVar, LoadVar, LoadCLosure, }
|
|
|
|
# Stack double instructions operate on the stack at arbitrary offsets and pop arguments off of it in the form
|
|
# of 16 bit integers
|
|
const stackDoubleInstructions* = {}
|
|
|
|
# Argument double argument instructions take hardcoded arguments as 16 bit integers
|
|
const argumentDoubleInstructions* = {PopN, }
|
|
|
|
# Argument double argument instructions take hardcoded arguments as 24 bit integers
|
|
const argumentTripleInstructions* = {StoreClosure}
|
|
|
|
# Instructions that call functions
|
|
const callInstructions* = {Call, }
|
|
|
|
# Jump instructions jump at relative or absolute bytecode offsets
|
|
const jumpInstructions* = {Jump, JumpIfFalse, JumpIfFalsePop,
|
|
JumpForwards, JumpBackwards,
|
|
JumpIfTrue}
|
|
|
|
|
|
proc newChunk*: Chunk =
|
|
## Initializes a new, empty chunk
|
|
result = Chunk(consts: @[], code: @[], lines: @[], cfi: @[])
|
|
|
|
|
|
proc `$`*(self: Chunk): string = &"""Chunk(consts=[{self.consts.join(", ")}], code=[{self.code.join(", ")}], lines=[{self.lines.join(", ")}])"""
|
|
|
|
|
|
proc write*(self: Chunk, newByte: uint8, line: int) =
|
|
## Adds the given instruction at the provided line number
|
|
## to the given chunk object
|
|
assert line > 0, "line must be greater than zero"
|
|
if self.lines.high() >= 1 and self.lines[^2] == line:
|
|
self.lines[^1] += 1
|
|
else:
|
|
self.lines.add(line)
|
|
self.lines.add(1)
|
|
self.code.add(newByte)
|
|
|
|
|
|
proc write*(self: Chunk, bytes: openarray[uint8], line: int) =
|
|
## Calls write in a loop with all members of the given
|
|
## array
|
|
for cByte in bytes:
|
|
self.write(cByte, line)
|
|
|
|
|
|
proc write*(self: Chunk, newByte: OpCode, line: int) =
|
|
## Adds the given instruction at the provided line number
|
|
## to the given chunk object
|
|
self.write(uint8(newByte), line)
|
|
|
|
|
|
proc write*(self: Chunk, bytes: openarray[OpCode], line: int) =
|
|
## Calls write in a loop with all members of the given
|
|
## array
|
|
for cByte in bytes:
|
|
self.write(uint8(cByte), line)
|
|
|
|
|
|
proc getLine*(self: Chunk, idx: int): int =
|
|
## Returns the associated line of a given
|
|
## instruction index
|
|
if self.lines.len < 2:
|
|
raise newException(IndexDefect, "the chunk object is empty")
|
|
var
|
|
count: int
|
|
current: int = 0
|
|
for n in countup(0, self.lines.high(), 2):
|
|
count = self.lines[n + 1]
|
|
if idx in current - count..<current + count:
|
|
return self.lines[n]
|
|
current += count
|
|
raise newException(IndexDefect, "index out of range")
|
|
|
|
|
|
proc writeConstant*(self: Chunk, data: openarray[uint8]): array[3, uint8] =
|
|
## Writes a series of bytes to the chunk's constant
|
|
## table and returns the index of the first byte as
|
|
## an array of 3 bytes
|
|
result = self.consts.len().toTriple()
|
|
for b in data:
|
|
self.consts.add(b)
|