peon/src/frontend/meta/bytecode.nim

262 lines
11 KiB
Nim
Raw Normal View History

# Copyright 2022 Mattia Giambirtone & All Contributors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
## Low level bytecode implementation details
import ast
import typing
import ../../util/multibyte
import errors
import strutils
import strformat
export ast
type
Chunk* = ref object
## A piece of bytecode.
## consts represents the constants table the code is referring to.
## byteConsts represents the actual encoding of the constants table
## used when serializing to/from a bytecode stream.
## code is the linear sequence of compiled bytecode instructions.
## lines maps bytecode instructions to line numbers using Run
## Length Encoding. Instructions are encoded in groups whose structure
## follows the following schema:
## - The first integer represents the line number
## - The second integer represents the count of whatever comes after it
## (let's call it c)
## - After c, a sequence of c integers follows
##
## A visual representation may be easier to understand: [1, 2, 3, 4]
## This is to be interpreted as "there are 2 instructions at line 1 whose values
## are 3 and 4"
## This is more efficient than using the naive approach, which would encode
## the same line number multiple times and waste considerable amounts of space.
consts*: seq[LiteralExpr]
byteConsts*: seq[uint8]
code*: seq[uint8]
lines*: seq[int]
reuseConsts*: bool
OpCode* {.pure.} = enum
## Enum of Peon's bytecode opcodes
# Note: x represents the argument
# to unary opcodes, while a and b
# represent arguments to binary
# opcodes. Other variable names (c, d, ...)
# may be used for more complex opcodes. If
# an opcode takes any arguments at runtime,
# they come from either the stack or the VM's
# closure array. Some other opcodes (e.g.
# jumps), take arguments in the form of 16
# or 24 bit numbers that are defined statically
# at compilation time into the bytecode
# These push a constant onto the stack
LoadInt64 = 0u8,
LoadUInt64,
LoadInt32,
LoadUInt32,
LoadInt16,
LoadUInt16,
LoadInt8,
LoadUInt8,
LoadFloat64,
LoadFloat32,
LoadString,
## Singleton opcodes (each of them pushes a constant singleton on the stack)
Nil,
True,
False,
Nan,
Inf,
## Basic stack operations
Pop, # Pops an element off the stack and discards it
Push, # Pushes x onto the stack
PopN, # Pops x elements off the stack (optimization for exiting scopes and returning from functions)
## Name resolution/handling
LoadAttribute, # Pushes the attribute b of object a onto the stack
LoadVar, # Pushes the object at position x in the stack onto the stack
StoreVar, # Stores the value of b at position a in the stack
LoadHeap, # Pushes the object position x in the closure array onto the stack
StoreHeap, # Stores the value of b at position a in the closure array
## Looping and jumping
Jump, # Absolute, unconditional jump into the bytecode
JumpForwards, # Relative, unconditional, positive jump in the bytecode
JumpBackwards, # Relative, unconditional, negative jump in the bytecode
JumpIfFalse, # Jumps to an absolute index in the bytecode if x is true
JumpIfTrue, # Jumps to an absolute index in the bytecode if x is false
JumpIfFalsePop, # Like JumpIfFalse, but also pops off the stack (regardless of truthyness). Optimization for if statements
JumpIfFalseOrPop, # Jumps to an absolute index in the bytecode if x is false and pops it otherwise (used for logical and)
## Long variants of jumps (they use a 24-bit operand instead of a 16-bit one)
LongJump,
LongJumpIfFalse,
LongJumpIfTrue,
LongJumpIfFalsePop,
LongJumpIfFalseOrPop,
LongJumpForwards,
LongJumpBackwards,
## Functions
Call, # Calls a function
Return # Returns from the current function
## Exception handling
Raise, # Raises exception x or re-raises active exception if x is nil
BeginTry, # Initiates an exception handling context
FinishTry, # Closes the current exception handling context
## Generators
Yield, # Yields control from a generator back to the caller
## Coroutines
Await, # Calls an asynchronous function
## Misc
Assert, # Raises an AssertionFailed exception if x is false
NoOp, # Just a no-op
# We group instructions by their operation/operand types for easier handling when debugging
# Simple instructions encompass instructions that push onto/pop off the stack unconditionally (True, False, Pop, etc.)
const simpleInstructions* = {OpCode.Return, OpCode.Nil,
OpCode.True, OpCode.False,
OpCode.Nan, OpCode.Inf,
Pop, OpCode.Raise,
BeginTry, FinishTry,
OpCode.Yield, OpCode.Await,
OpCode.NoOp}
# Constant instructions are instructions that operate on the bytecode constant table
const constantInstructions* = {LoadInt64, LoadUInt64,
LoadInt32, LoadUInt32,
LoadInt16, LoadUInt16,
LoadInt8, LoadUInt8,
LoadFloat64, LoadFloat32,
LoadString}
# Stack triple instructions operate on the stack at arbitrary offsets and pop arguments off of it in the form
# of 24 bit integers
const stackTripleInstructions* = {StoreVar, LoadVar, LoadHeap, StoreHeap}
# Stack double instructions operate on the stack at arbitrary offsets and pop arguments off of it in the form
# of 16 bit integers
const stackDoubleInstructions* = {}
# Argument double argument instructions take hardcoded arguments on the stack as 16 bit integers
const argumentDoubleInstructions* = {PopN, }
# Jump instructions jump at relative or absolute bytecode offsets
const jumpInstructions* = {JumpIfFalse, JumpIfFalsePop,
JumpForwards, JumpBackwards,
LongJumpIfFalse, LongJumpIfFalsePop,
LongJumpForwards, LongJumpBackwards,
JumpIfTrue, LongJumpIfTrue}
proc newChunk*(reuseConsts: bool = true): Chunk =
## Initializes a new, empty chunk
result = Chunk(consts: @[], code: @[], lines: @[], reuseConsts: reuseConsts)
proc `$`*(self: Chunk): string = &"""Chunk(consts=[{self.consts.join(", ")}], code=[{self.code.join(", ")}], lines=[{self.lines.join(", ")}])"""
proc write*(self: Chunk, newByte: uint8, line: int) =
## Adds the given instruction at the provided line number
## to the given chunk object
assert line > 0, "line must be greater than zero"
if self.lines.high() >= 1 and self.lines[^2] == line:
self.lines[^1] += 1
else:
self.lines.add(line)
self.lines.add(1)
self.code.add(newByte)
proc write*(self: Chunk, bytes: openarray[uint8], line: int) =
## Calls write in a loop with all members of the given
## array
for cByte in bytes:
self.write(cByte, line)
proc write*(self: Chunk, newByte: OpCode, line: int) =
## Adds the given instruction at the provided line number
## to the given chunk object
self.write(uint8(newByte), line)
proc write*(self: Chunk, bytes: openarray[OpCode], line: int) =
## Calls write in a loop with all members of the given
## array
for cByte in bytes:
self.write(uint8(cByte), line)
proc getLine*(self: Chunk, idx: int): int =
## Returns the associated line of a given
## instruction index
if self.lines.len < 2:
raise newException(IndexDefect, "the chunk object is empty")
var
count: int
current: int = 0
for n in countup(0, self.lines.high(), 2):
count = self.lines[n + 1]
if idx in current - count..<current + count:
return self.lines[n]
current += count
raise newException(IndexDefect, "index out of range")
proc findOrAddConstant(self: Chunk, constant: LiteralExpr): int =
## Small optimization function that reuses the same constant
## if it's already been written before (only if self.reuseConsts
## equals true)
if not self.reuseConsts:
return
for i, c in self.consts:
# We cannot use simple equality because the nodes likely have
# different token objects with different values
if c.kind != constant.kind:
continue
if constant.isConst():
if c.literal.lexeme == constant.literal.lexeme:
# This wouldn't work for stuff like 2e3 and 2000.0, but those
# forms are collapsed in the compiler before being written
# to the constants table
return i
elif constant.kind == identExpr:
var c = IdentExpr(c)
var constant = IdentExpr(constant)
if c.name.lexeme == constant.name.lexeme:
return i
else:
continue
self.consts.add(constant)
result = self.consts.high()
proc addConstant*(self: Chunk, constant: LiteralExpr): array[3, uint8] =
## Writes a constant to a chunk. Returns its index casted to a 3-byte
## sequence (array). Constant indexes are reused if a constant is used
## more than once and self.reuseConsts equals true
if self.consts.high() == 16777215:
# The constant index is a 24 bit unsigned integer, so that's as far
# as we can index into the constant table (the same applies
# to our stack by the way). Not that anyone's ever gonna hit this
# limit in the real world, but you know, just in case
raise newException(CompileError, "cannot encode more than 16777216 constants")
result = self.findOrAddConstant(constant).toTriple()