# Copyright 2022 Mattia Giambirtone & All Contributors # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ## Low level bytecode implementation details import std/strutils import std/strformat import ../../util/multibyte type Chunk* = ref object ## A piece of bytecode. ## consts is the code's constants table. ## code is the linear sequence of compiled bytecode instructions. ## lines maps bytecode instructions to line numbers using Run ## Length Encoding. Instructions are encoded in groups whose structure ## follows the following schema: ## - The first integer represents the line number ## - The second integer represents the number of ## instructions on that line ## For example, if lines equals [1, 5], it means that there are 5 instructions ## at line 1, meaning that all instructions in code[0..4] belong to the same line. ## This is more efficient than using the naive approach, which would encode ## the same line number multiple times and waste considerable amounts of space. ## cfi represents Call Frame Information and encodes the following information: ## - Function name ## - Argument count ## - Function boundaries ## The encoding for CFI data is the following: ## - First, the position into the bytecode where the function begins is encoded (as a 3 byte integer) ## - Second, the position into the bytecode where the function ends is encoded (as a 3 byte integer) ## - After that follows the argument count as a 1 byte integer ## - Lastly, the function's name (optional) is encoded in ASCII, prepended with ## its size as a 2-byte integer consts*: seq[uint8] code*: seq[uint8] lines*: seq[int] cfi*: seq[uint8] OpCode* {.pure.} = enum ## Enum of Peon's bytecode opcodes # Note: x represents the argument # to unary opcodes, while a and b # represent arguments to binary # opcodes. Other variable names (c, d, ...) # may be used for more complex opcodes. If # an opcode takes any arguments at runtime, # they come from either the stack or the VM's # closure array. Some other opcodes (e.g. # jumps), take arguments in the form of 16 # or 24 bit numbers that are defined statically # at compilation time into the bytecode # These push a constant at position x in the # constant table onto the stack LoadInt64 = 0u8, LoadUInt64, LoadInt32, LoadUInt32, LoadInt16, LoadUInt16, LoadInt8, LoadUInt8, LoadFloat64, LoadFloat32, LoadString, ## Singleton opcodes (each of them pushes a constant singleton on the stack) LoadNil, LoadTrue, LoadFalse, LoadNan, LoadInf, ## Operations on primitive types Negate, NegateFloat64, NegateFloat32, Add, Subtract, Multiply, Divide, SignedDivide, AddFloat64, SubtractFloat64, MultiplyFloat64, DivideFloat64, AddFloat32, SubtractFloat32, MultiplyFloat32, DivideFloat32, Pow, SignedPow, Mod, SignedMod, PowFloat64, PowFloat32, ModFloat64, ModFloat32, LShift, RSHift, Xor, Or, And, Not, Equal, NotEqual, GreaterThan, LessThan, GreaterOrEqual, LessOrEqual, ## Print opcodes PrintInt64, PrintUInt64, PrintInt32, PrintUInt32, PrintInt16, PrintUint16, PrintInt8, PrintUInt8, PrintFloat64, PrintFloat32, PrintHex, PrintBool, PrintNan, PrintInf, PrintString, ## Basic stack operations Pop, # Pops an element off the stack and discards it PopRepl, # Same as Pop, but also prints the value of what's popped (used in REPL mode) PopN, # Pops x elements off the call stack (optimization for exiting local scopes which usually pop many elements) ## Name resolution/handling LoadAttribute, # Pushes the attribute b of object a onto the stack LoadVar, # Pushes the object at position x in the stack onto the stack StoreVar, # Stores the value of b at position a in the stack LoadClosure, # Pushes the object position x in the closure array onto the stack StoreClosure, # Stores the value of b at position a in the closure array LiftArgument, # Closes over a function argument PopClosure, ## Looping and jumping Jump, # Absolute, unconditional jump into the bytecode JumpForwards, # Relative, unconditional, positive jump in the bytecode JumpBackwards, # Relative, unconditional, negative jump in the bytecode JumpIfFalse, # Jumps to a relative index in the bytecode if x is false JumpIfTrue, # Jumps to a relative index in the bytecode if x is true JumpIfFalsePop, # Like JumpIfFalse, but also pops off the stack (regardless of truthyness). Optimization for if statements JumpIfFalseOrPop, # Jumps to an absolute index in the bytecode if x is false and pops otherwise (used for logical and) ## Functions Call, # Calls a function and initiates a new stack frame CallClosure, # Calls a closure Return, # Terminates the current function SetResult, # Sets the result of the current function ## Exception handling Raise, # Raises exception x or re-raises active exception if x is nil BeginTry, # Initiates an exception handling context FinishTry, # Closes the current exception handling context ## Generators Yield, # Yields control from a generator back to the caller ## Coroutines Await, # Calls an asynchronous function ## Misc Assert, # Raises an AssertionFailed exception if x is false NoOp, # Just a no-op PopC, # Pop off the call stack onto the operand stack PushC, # Pop off the operand stack onto the call stack SysClock64 # Pushes the output of a monotonic clock on the stack # We group instructions by their operation/operand types for easier handling when debugging # Simple instructions encompass instructions that push onto/pop off the stack unconditionally (True, False, Pop, etc.) const simpleInstructions* = {Return, LoadNil, LoadTrue, LoadFalse, LoadNan, LoadInf, Pop, Raise, BeginTry, FinishTry, Yield, Await, NoOp, SetResult, PopC, PushC, SysClock64, Negate, NegateFloat64, NegateFloat32, Add, Subtract, Multiply, Divide, SignedDivide, AddFloat64, SubtractFloat64, MultiplyFloat64, DivideFloat64, AddFloat32, SubtractFloat32, MultiplyFloat32, DivideFloat32, Pow, SignedPow, Mod, SignedMod, PowFloat64, PowFloat32, ModFloat64, ModFloat32, LShift, RSHift, Xor, Or, And, Not, Equal, NotEqual, GreaterThan, LessThan, GreaterOrEqual, LessOrEqual, PrintInt64, PrintUInt64, PrintInt32, PrintUInt32, PrintInt16, PrintUint16, PrintInt8, PrintUInt8, PrintFloat64, PrintFloat32, PrintHex, PrintBool, PrintNan, PrintInf, PrintString, } # Constant instructions are instructions that operate on the bytecode constant table const constantInstructions* = {LoadInt64, LoadUInt64, LoadInt32, LoadUInt32, LoadInt16, LoadUInt16, LoadInt8, LoadUInt8, LoadFloat64, LoadFloat32, LoadString} # Stack triple instructions operate on the stack at arbitrary offsets and pop arguments off of it in the form # of 24 bit integers const stackTripleInstructions* = {StoreVar, LoadVar, LoadCLosure, LiftArgument, PopClosure} # Stack double instructions operate on the stack at arbitrary offsets and pop arguments off of it in the form # of 16 bit integers const stackDoubleInstructions* = {} # Argument double argument instructions take hardcoded arguments as 16 bit integers const argumentDoubleInstructions* = {PopN, } # Jump instructions jump at relative or absolute bytecode offsets const jumpInstructions* = {Jump, JumpIfFalse, JumpIfFalsePop, JumpForwards, JumpBackwards, JumpIfTrue} proc newChunk*: Chunk = ## Initializes a new, empty chunk result = Chunk(consts: @[], code: @[], lines: @[], cfi: @[]) proc `$`*(self: Chunk): string = &"""Chunk(consts=[{self.consts.join(", ")}], code=[{self.code.join(", ")}], lines=[{self.lines.join(", ")}])""" proc write*(self: Chunk, newByte: uint8, line: int) = ## Adds the given instruction at the provided line number ## to the given chunk object assert line > 0, "line must be greater than zero" if self.lines.high() >= 1 and self.lines[^2] == line: self.lines[^1] += 1 else: self.lines.add(line) self.lines.add(1) self.code.add(newByte) proc write*(self: Chunk, bytes: openarray[uint8], line: int) = ## Calls write in a loop with all members of the given ## array for cByte in bytes: self.write(cByte, line) proc write*(self: Chunk, newByte: OpCode, line: int) = ## Adds the given instruction at the provided line number ## to the given chunk object self.write(uint8(newByte), line) proc write*(self: Chunk, bytes: openarray[OpCode], line: int) = ## Calls write in a loop with all members of the given ## array for cByte in bytes: self.write(uint8(cByte), line) proc getLine*(self: Chunk, idx: int): int = ## Returns the associated line of a given ## instruction index if self.lines.len < 2: raise newException(IndexDefect, "the chunk object is empty") var count: int current: int = 0 for n in countup(0, self.lines.high(), 2): count = self.lines[n + 1] if idx in current - count..