# Copyright 2022 Mattia Giambirtone & All Contributors # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ## Low level bytecode implementation details import strutils import strformat import ../../util/multibyte type Chunk* = ref object ## A piece of bytecode. ## consts is used when serializing to/from a bytecode stream. ## code is the linear sequence of compiled bytecode instructions. ## lines maps bytecode instructions to line numbers using Run ## Length Encoding. Instructions are encoded in groups whose structure ## follows the following schema: ## - The first integer represents the line number ## - The second integer represents the count of whatever comes after it ## (let's call it c) ## - After c, a sequence of c integers follows ## ## A visual representation may be easier to understand: [1, 2, 3, 4] ## This is to be interpreted as "there are 2 instructions at line 1 whose values ## are 3 and 4" ## This is more efficient than using the naive approach, which would encode ## the same line number multiple times and waste considerable amounts of space. ## cfi represents Call Frame Information and encodes the following information: ## - Function name ## - Stack bottom ## - Argument count ## The encoding for CFI data is the following: ## - First, the position into the bytecode where the function begins is encoded (as a 3 byte integer) ## - Second, the position into the bytecode where the function ends is encoded (as a 3 byte integer) ## - Then, the frame's stack bottom is encoded as a 3 byte integer ## - After the frame's stack bottom follows the argument count as a 1 byte integer ## - Lastly, the function's name (optional) is encoded in ASCII, prepended with ## its size as a 2-byte integer consts*: seq[uint8] code*: seq[uint8] lines*: seq[int] cfi*: seq[uint8] OpCode* {.pure.} = enum ## Enum of Peon's bytecode opcodes # Note: x represents the argument # to unary opcodes, while a and b # represent arguments to binary # opcodes. Other variable names (c, d, ...) # may be used for more complex opcodes. If # an opcode takes any arguments at runtime, # they come from either the stack or the VM's # closure array. Some other opcodes (e.g. # jumps), take arguments in the form of 16 # or 24 bit numbers that are defined statically # at compilation time into the bytecode # These push a constant at position x in the # constant table onto the stack LoadInt64 = 0u8, LoadUInt64, LoadInt32, LoadUInt32, LoadInt16, LoadUInt16, LoadInt8, LoadUInt8, LoadFloat64, LoadFloat32, LoadString, ## Singleton opcodes (each of them pushes a constant singleton on the stack) LoadNil, LoadTrue, LoadFalse, LoadNan, LoadInf, ## Basic stack operations Pop, # Pops an element off the stack and discards it Push, # Pushes x onto the stack PopN, # Pops x elements off the stack (optimization for exiting local scopes which usually pop many elements) ## Name resolution/handling LoadAttribute, # Pushes the attribute b of object a onto the stack LoadVar, # Pushes the object at position x in the stack onto the stack StoreVar, # Stores the value of b at position a in the stack LoadHeap, # Pushes the object position x in the closure array onto the stack StoreHeap, # Stores the value of b at position a in the closure array ## Looping and jumping Jump, # Absolute, unconditional jump into the bytecode JumpForwards, # Relative, unconditional, positive jump in the bytecode JumpBackwards, # Relative, unconditional, negative jump in the bytecode JumpIfFalse, # Jumps to a relative index in the bytecode if x is false JumpIfTrue, # Jumps to a relative index in the bytecode if x is true JumpIfFalsePop, # Like JumpIfFalse, but also pops off the stack (regardless of truthyness). Optimization for if statements JumpIfFalseOrPop, # Jumps to an absolute index in the bytecode if x is false and pops otherwise (used for logical and) ## Long variants of jumps (they use a 24-bit operand instead of a 16-bit one) LongJump, LongJumpIfFalse, LongJumpIfTrue, LongJumpIfFalsePop, LongJumpIfFalseOrPop, LongJumpForwards, LongJumpBackwards, ## Functions Call, # Calls a function and initiates a new stack frame Return, # Terminates the current function without popping off the stack ReturnValue, # Pops a return value off the stack and terminates the current function ## Exception handling Raise, # Raises exception x or re-raises active exception if x is nil BeginTry, # Initiates an exception handling context FinishTry, # Closes the current exception handling context ## Generators Yield, # Yields control from a generator back to the caller ## Coroutines Await, # Calls an asynchronous function ## Misc Assert, # Raises an AssertionFailed exception if x is false NoOp, # Just a no-op # We group instructions by their operation/operand types for easier handling when debugging # Simple instructions encompass instructions that push onto/pop off the stack unconditionally (True, False, Pop, etc.) const simpleInstructions* = {OpCode.Return, LoadNil, LoadTrue, LoadFalse, LoadNan, LoadInf, Pop, OpCode.Raise, BeginTry, FinishTry, OpCode.Yield, OpCode.Await, OpCode.NoOp, OpCode.Return, OpCode.ReturnValue} # Constant instructions are instructions that operate on the bytecode constant table const constantInstructions* = {LoadInt64, LoadUInt64, LoadInt32, LoadUInt32, LoadInt16, LoadUInt16, LoadInt8, LoadUInt8, LoadFloat64, LoadFloat32, LoadString} # Stack triple instructions operate on the stack at arbitrary offsets and pop arguments off of it in the form # of 24 bit integers const stackTripleInstructions* = {StoreVar, LoadVar, LoadHeap, StoreHeap} # Stack double instructions operate on the stack at arbitrary offsets and pop arguments off of it in the form # of 16 bit integers const stackDoubleInstructions* = {} # Argument double argument instructions take hardcoded arguments as 16 bit integers const argumentDoubleInstructions* = {PopN, } # Argument double argument instructions take hardcoded arguments as 24 bit integers const argumentTripleInstructions* = {} # Instructions that call functions const callInstructions* = {Call, } # Jump instructions jump at relative or absolute bytecode offsets const jumpInstructions* = {Jump, LongJump, JumpIfFalse, JumpIfFalsePop, JumpForwards, JumpBackwards, LongJumpIfFalse, LongJumpIfFalsePop, LongJumpForwards, LongJumpBackwards, JumpIfTrue, LongJumpIfTrue} proc newChunk*: Chunk = ## Initializes a new, empty chunk result = Chunk(consts: @[], code: @[], lines: @[], cfi: @[]) proc `$`*(self: Chunk): string = &"""Chunk(consts=[{self.consts.join(", ")}], code=[{self.code.join(", ")}], lines=[{self.lines.join(", ")}])""" proc write*(self: Chunk, newByte: uint8, line: int) = ## Adds the given instruction at the provided line number ## to the given chunk object assert line > 0, "line must be greater than zero" if self.lines.high() >= 1 and self.lines[^2] == line: self.lines[^1] += 1 else: self.lines.add(line) self.lines.add(1) self.code.add(newByte) proc write*(self: Chunk, bytes: openarray[uint8], line: int) = ## Calls write in a loop with all members of the given ## array for cByte in bytes: self.write(cByte, line) proc write*(self: Chunk, newByte: OpCode, line: int) = ## Adds the given instruction at the provided line number ## to the given chunk object self.write(uint8(newByte), line) proc write*(self: Chunk, bytes: openarray[OpCode], line: int) = ## Calls write in a loop with all members of the given ## array for cByte in bytes: self.write(uint8(cByte), line) proc getLine*(self: Chunk, idx: int): int = ## Returns the associated line of a given ## instruction index if self.lines.len < 2: raise newException(IndexDefect, "the chunk object is empty") var count: int current: int = 0 for n in countup(0, self.lines.high(), 2): count = self.lines[n + 1] if idx in current - count..