peon/src/frontend/compiler/targets/bytecode/opcodes.nim

# Copyright 2022 Mattia Giambirtone & All Contributors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

## Low level bytecode implementation details

import std/strutils
import std/strformat


import util/multibyte


type
    Chunk* = ref object
        ## A piece of bytecode.
        ## consts is the code's constants table.
        ## code is the linear sequence of compiled bytecode instructions.
        ## lines maps bytecode instructions to line numbers using Run
        ## Length Encoding. Instructions are encoded in groups whose structure
        ## follows the following schema:
        ## - The first integer represents the line number
        ## - The second integer represents the number of
        ##   instructions on that line
        ## For example, if lines equals [1, 5], it means that there are 5 instructions
        ## at line 1, meaning that all instructions in code[0..4] belong to the same line.
        ## This is more efficient than using the naive approach, which would encode
        ## the same line number multiple times and waste considerable amounts of space.
        ## functions encodes the following information:
        ## - Function name
        ## - Argument count
        ## - Function boundaries
        ## The encoding is the following:
        ## - First, the position into the bytecode where the function begins is encoded (as a 3 byte integer)
        ## - Second, the position into the bytecode where the function ends is encoded (as a 3 byte integer)
        ## - After that follows the argument count as a 1 byte integer
        ## - Lastly, the function's name (optional) is encoded in ASCII, prepended with
        ##   its size as a 2-byte integer
        consts*: seq[uint8]
        code*: seq[uint8]
        lines*: seq[int]
        functions*: seq[uint8]

    OpCode* {.pure.} = enum
        ## Enum of Peon's bytecode opcodes

        # Note: x represents the argument
        # to unary opcodes, while a and b
        # represent arguments to binary
        # opcodes. Other variable names (c, d, ...)
        # may be used for more complex opcodes. If
        # an opcode takes any arguments at runtime,
        # they come from either the stack or the VM's
        # closure array. Some other opcodes (e.g.
        # jumps), take arguments in the form of 16
        # or 24 bit numbers that are defined statically
        # at compilation time into the bytecode

        # These push a constant at position x in the
        # constant table onto the stack
        LoadInt64 = 0u8,
        LoadUInt64,
        LoadInt32,
        LoadUInt32,
        LoadInt16,
        LoadUInt16,
        LoadInt8,
        LoadUInt8,
        LoadFloat64,
        LoadFloat32,
        LoadString,
        ## Singleton opcodes (each of them pushes a constant singleton on the stack)
        LoadNil,
        LoadTrue,
        LoadFalse,
        LoadNan,
        LoadInf,
        LoadNInf,
        ## Operations on primitive types
        Negate,
        NegateFloat64,
        NegateFloat32,
        Add,
        Subtract,
        Multiply,
        Divide,
        SignedDivide,
        AddFloat64,
        SubtractFloat64,
        MultiplyFloat64,
        DivideFloat64,
        AddFloat32,
        SubtractFloat32,
        MultiplyFloat32,
        DivideFloat32,
        Pow,
        SignedPow,
        Mod,
        SignedMod,
        PowFloat64,
        PowFloat32,
        ModFloat64,
        ModFloat32,
        LShift,
        RSHift,
        Xor,
        Or,
        And,
        Not,
        Equal,
        NotEqual,
        GreaterThan,
        LessThan,
        GreaterOrEqual,
        LessOrEqual,
        SignedGreaterThan,
        SignedLessThan,
        SignedGreaterOrEqual,
        SignedLessOrEqual,
        Float64GreaterThan,
        Float64LessThan,
        Float64GreaterOrEqual,
        Float64LessOrEqual,
        Float32GreaterThan,
        Float32LessThan,
        Float32GreaterOrEqual,
        Float32LessOrEqual,
        LogicalNot,
        ## Print opcodes
        PrintInt64,
        PrintUInt64,
        PrintInt32,
        PrintUInt32,
        PrintInt16,
        PrintUint16,
        PrintInt8,
        PrintUInt8,
        PrintFloat64,
        PrintFloat32,
        PrintHex,
        PrintBool,
        PrintNan,
        PrintInf,
        PrintString,
        ## Basic stack operations
        Pop, # Pops an element off the stack and discards it
        PopRepl, # Same as Pop, but also prints the value of what's popped (used in REPL mode)
        PopN, # Pops x elements off the call stack (optimization for exiting local scopes which usually pop many elements)
        ## Name resolution/handling
        LoadAttribute, # Pushes the attribute b of object a onto the stack
        LoadVar, # Pushes the object at position x in the stack onto the stack
        StoreVar, # Stores the value of b at position a in the stack
        AddVar,   # An optimization for StoreVar (used when the variable is first declared)
        ## Looping and jumping
        Jump, # Absolute, unconditional jump into the bytecode
        JumpForwards, # Relative, unconditional, positive jump in the bytecode
        JumpBackwards, # Relative, unconditional, negative jump in the bytecode
        JumpIfFalse, # Jumps to a relative index in the bytecode if x is false
        JumpIfTrue, # Jumps to a relative index in the bytecode if x is true
        JumpIfFalsePop, # Like JumpIfFalse, but also pops off the stack (regardless of truthyness). Optimization for if statements
        JumpIfFalseOrPop, # Jumps to an absolute index in the bytecode if x is false and pops otherwise (used for logical and)
        ## Functions
        Call, # Calls a function and initiates a new stack frame
        Return, # Terminates the current function
        SetResult,  # Sets the result of the current function
        ## Exception handling
        Raise, # Raises exception x or re-raises active exception if x is nil
        BeginTry, # Initiates an exception handling context
        FinishTry, # Closes the current exception handling context
        ## Generators
        Yield, # Yields control from a generator back to the caller
        ## Coroutines
        Await, # Calls an asynchronous function
        ## Misc
        Assert,      # Raises an exception if x is false
        NoOp,        # Just a no-op
        PopC,        # Pop off the call stack onto the operand stack
        PushC,       # Pop off the operand stack onto the call stack
        SysClock64,  # Pushes the output of a monotonic clock on the stack
        LoadTOS,     # Pushes the top of the call stack onto the operand stack
        DupTop,      # Duplicates the top of the operand stack onto the operand stack
        ReplExit,    # Exits the VM immediately, leaving its state intact. Used in the REPL
        LoadGlobal   # Loads a global variable


# We group instructions by their operation/operand types for easier handling when debugging

# Simple instructions encompass instructions that push onto/pop off the stack unconditionally (True, False, Pop, etc.)
const simpleInstructions* = {Return, LoadNil,
                             LoadTrue, LoadFalse,
                             LoadNan, LoadInf,
                             Pop, Raise, LoadNInf,
                             BeginTry, FinishTry, Yield,
                             Await, NoOp, SetResult,
                             PopC, PushC, SysClock64,
                             Negate,
                             NegateFloat64,
                             NegateFloat32,
                             Add,
                             Subtract,
                             Multiply,
                             Divide,
                             SignedDivide,
                             AddFloat64,
                             SubtractFloat64,
                             MultiplyFloat64,
                             DivideFloat64,
                             AddFloat32,
                             SubtractFloat32,
                             MultiplyFloat32,
                             DivideFloat32,
                             Pow,
                             SignedPow,
                             Mod,
                             SignedMod,
                             PowFloat64,
                             PowFloat32,
                             ModFloat64,
                             ModFloat32,
                             LShift,
                             RSHift,
                             Xor,
                             Or,
                             And,
                             Not,
                             Equal,
                             NotEqual,
                             GreaterThan,
                             LessThan,
                             GreaterOrEqual,
                             LessOrEqual,
                             PrintInt64,
                             PrintUInt64,
                             PrintInt32,
                             PrintUInt32,
                             PrintInt16,
                             PrintUint16,
                             PrintInt8,
                             PrintUInt8,
                             PrintFloat64,
                             PrintFloat32,
                             PrintHex,
                             PrintBool,
                             PrintNan,
                             PrintInf,
                             PrintString,
                             LogicalNot,
                             AddVar,
                             LoadTOS,
                             SignedGreaterThan,
                             SignedLessThan,
                             SignedGreaterOrEqual,
                             SignedLessOrEqual,
                             Float64GreaterThan,
                             Float64LessThan,
                             Float64GreaterOrEqual,
                             Float64LessOrEqual,
                             Float32GreaterThan,
                             Float32LessThan,
                             Float32GreaterOrEqual,
                             Float32LessOrEqual,
                             DupTop,
                             ReplExit
                            }

# Constant instructions are instructions that operate on the bytecode constant table
const constantInstructions* = {LoadInt64, LoadUInt64,
                               LoadInt32, LoadUInt32,
                               LoadInt16, LoadUInt16,
                               LoadInt8, LoadUInt8,
                               LoadFloat64, LoadFloat32,
                               LoadString}

# Stack triple instructions operate on the stack at arbitrary offsets and pop arguments off of it in the form
# of 24 bit integers
const stackTripleInstructions* = {StoreVar, LoadVar, LoadGlobal}

# Stack double instructions operate on the stack at arbitrary offsets and pop arguments off of it in the form
# of 16 bit integers
const stackDoubleInstructions* = {}

# Argument double argument instructions take hardcoded arguments as 16 bit integers
const argumentDoubleInstructions* = {PopN, }


# Jump instructions jump at relative or absolute bytecode offsets
const jumpInstructions* = {Jump, JumpIfFalse, JumpIfFalsePop,
                           JumpForwards, JumpBackwards,
                           JumpIfTrue, JumpIfFalseOrPop}


proc newChunk*: Chunk =
    ## Initializes a new, empty chunk
    result = Chunk(consts: @[], code: @[], lines: @[], functions: @[])


proc write*(self: Chunk, newByte: uint8, line: int) =
    ## Adds the given instruction at the provided line number
    ## to the given chunk object
    assert line > 0, "line must be greater than zero"
    if self.lines.high() >= 1 and self.lines[^2] == line:
        self.lines[^1] += 1
    else:
        self.lines.add(line)
        self.lines.add(1)
    self.code.add(newByte)


proc write*(self: Chunk, bytes: openarray[uint8], line: int) =
    ## Calls self.write() in a loop with all members of the
    ## given array
    for cByte in bytes:
        self.write(cByte, line)


proc write*(self: Chunk, newByte: OpCode, line: int) =
    ## Adds the given instruction at the provided line number
    ## to the given chunk object
    self.write(uint8(newByte), line)


proc write*(self: Chunk, bytes: openarray[OpCode], line: int) =
    ## Calls write in a loop with all members of the given
    ## array
    for cByte in bytes:
        self.write(uint8(cByte), line)


proc getLine*(self: Chunk, idx: int): int =
    ## Returns the associated line of a given
    ## instruction index
    if self.lines.len < 2:
        raise newException(IndexDefect, "the chunk object is empty")
    var
        count: int
        current: int = 0
    for n in countup(0, self.lines.high(), 2):
        count = self.lines[n + 1]
        if idx in current - count..<current + count:
            return self.lines[n]
        current += count
    raise newException(IndexDefect, "index out of range")


proc getIdx*(self: Chunk, line: int): int =
    ## Gets the index into self.lines
    ## where the line counter for the given
    ## line is located
    for i, v in self.lines:
        if (i and 1) != 0 and v == line:
            return i


proc writeConstant*(self: Chunk, data: openarray[uint8]): array[3, uint8] =
    ## Writes a series of bytes to the chunk's constant
    ## table and returns the index of the first byte as
    ## an array of 3 bytes
    result = self.consts.len().toTriple()
    for b in data:
        self.consts.add(b)