298 lines
14 KiB
Nim
298 lines
14 KiB
Nim
# Copyright 2022 Mattia Giambirtone & All Contributors
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
## Low level bytecode implementation details
|
|
import ast
|
|
import ../../util/multibyte
|
|
import errors
|
|
|
|
import strutils
|
|
import strformat
|
|
|
|
|
|
export ast
|
|
|
|
|
|
type
|
|
Chunk* = ref object
|
|
## A piece of bytecode.
|
|
## Consts represents the constants table the code is referring to.
|
|
## Code is the linear sequence of compiled bytecode instructions.
|
|
## Lines maps bytecode instructions to line numbers using Run
|
|
## Length Encoding. Instructions are encoded in groups whose structure
|
|
## follows the following schema:
|
|
## - The first integer represents the line number
|
|
## - The second integer represents the count of whatever comes after it
|
|
## (let's call it c)
|
|
## - After c, a sequence of c integers follows
|
|
##
|
|
## A visual representation may be easier to understand: [1, 2, 3, 4]
|
|
## This is to be interpreted as "there are 2 instructions at line 1 whose values
|
|
## are 3 and 4"
|
|
## This is more efficient than using the naive approach, which would encode
|
|
## the same line number multiple times and waste considerable amounts of space.
|
|
consts*: seq[ASTNode]
|
|
code*: seq[uint8]
|
|
lines*: seq[int]
|
|
reuseConsts*: bool
|
|
|
|
OpCode* {.pure.} = enum
|
|
## Enum of possible opcodes.
|
|
|
|
# Note: x represents the
|
|
# argument to unary opcodes, while
|
|
# a and b represent arguments to binary
|
|
# opcodes. Other variable names may be
|
|
# used for more complex opcodes. All
|
|
# arguments to opcodes (if they take
|
|
# arguments) come from popping off the
|
|
# stack. Unsupported operations will
|
|
# raise TypeError or ValueError exceptions
|
|
# and never fail silently
|
|
LoadConstant = 0u8, # Pushes constant at position x in the constant table onto the stack
|
|
## Binary operators
|
|
UnaryNegate, # Pushes the result of -x onto the stack
|
|
BinaryAdd, # Pushes the result of a + b onto the stack
|
|
BinarySubtract, # Pushes the result of a - b onto the stack
|
|
BinaryDivide, # Pushes the result of a / b onto the stack (true division). The result is a float
|
|
BinaryFloorDiv, # Pushes the result of a // b onto the stack (integer division). The result is always an integer
|
|
BinaryMultiply, # Pushes the result of a * b onto the stack
|
|
BinaryPow, # Pushes the result of a ** b (a to the power of b) onto the stack
|
|
BinaryMod, # Pushes the result of a % b onto the stack (modulo division)
|
|
BinaryShiftRight, # Pushes the result of a >> b (a with bits shifted b times to the right) onto the stack
|
|
BinaryShiftLeft, # Pushes the result of a << b (a with bits shifted b times to the left) onto the stack
|
|
BinaryXor, # Pushes the result of a ^ b (bitwise exclusive or) onto the stack
|
|
BinaryOr, # Pushes the result of a | b (bitwise or) onto the stack
|
|
BinaryAnd, # Pushes the result of a & b (bitwise and) onto the stack
|
|
UnaryNot, # Pushes the result of ~x (bitwise not) onto the stack
|
|
BinaryAs, # Pushes the result of a as b onto the stack (converts a to the type of b. Explicit support from a is required)
|
|
BinaryIs, # Pushes the result of a is b onto the stack (true if a and b point to the same object, false otherwise)
|
|
BinaryIsNot, # Pushes the result of not (a is b). This could be implemented in terms of BinaryIs, but it's more efficient this way
|
|
BinaryOf, # Pushes the result of a of b onto the stack (true if a is a subclass of b, false otherwise)
|
|
BinarySlice, # Perform slicing on supported objects (like "hello"[0:2], which yields "he"). The result is pushed onto the stack
|
|
BinarySubscript, # Subscript operator, like "hello"[0] (which pushes 'h' onto the stack)
|
|
## Binary comparison operators
|
|
GreaterThan, # Pushes the result of a > b onto the stack
|
|
LessThan, # Pushes the result of a < b onto the stack
|
|
EqualTo, # Pushes the result of a == b onto the stack
|
|
NotEqualTo, # Pushes the result of a != b onto the stack (optimization for not (a == b))
|
|
GreaterOrEqual, # Pushes the result of a >= b onto the stack
|
|
LessOrEqual, # Pushes the result of a <= b onto the stack
|
|
## Logical operators
|
|
LogicalNot, # Pushes true if
|
|
LogicalAnd,
|
|
LogicalOr,
|
|
## Constant opcodes (each of them pushes a singleton on the stack)
|
|
Nil,
|
|
True,
|
|
False,
|
|
Nan,
|
|
Inf,
|
|
## Basic stack operations
|
|
Pop, # Pops an element off the stack and discards it
|
|
Push, # Pushes x onto the stack
|
|
PopN, # Pops x elements off the stack (optimization for exiting scopes and returning from functions)
|
|
## Name resolution/handling
|
|
LoadAttribute,
|
|
DeclareName, # Declares a global dynamically bound name in the current scope
|
|
LoadName, # Loads a dynamically bound variable
|
|
LoadFast, # Loads a statically bound variable
|
|
StoreName, # Sets/updates a dynamically bound variable's value
|
|
StoreFast, # Sets/updates a statically bound variable's value
|
|
DeleteName, # Unbinds a dynamically bound variable's name from the current scope
|
|
DeleteFast, # Unbinds a statically bound variable's name from the current scope
|
|
LoadHeap, # Loads a closed-over variable
|
|
StoreHeap, # Stores a closed-over variable
|
|
## Looping and jumping
|
|
Jump, # Absolute, unconditional jump into the bytecode
|
|
JumpIfFalse, # Jumps to an absolute index in the bytecode if the value at the top of the stack is falsey
|
|
JumpIfTrue, # Jumps to an absolute index in the bytecode if the value at the top of the stack is truthy
|
|
JumpIfFalsePop, # Like JumpIfFalse, but it also pops off the stack (regardless of truthyness). Optimization for if statements
|
|
JumpIfFalseOrPop, # Jumps to an absolute index in the bytecode if the value at the top of the stack is falsey and pops it otherwise
|
|
JumpForwards, # Relative, unconditional, positive jump in the bytecode
|
|
JumpBackwards, # Relative, unconditional, negative jump into the bytecode
|
|
Break, # Temporary opcode used to signal exiting out of loops
|
|
## Long variants of jumps (they use a 24-bit operand instead of a 16-bit one)
|
|
LongJump,
|
|
LongJumpIfFalse,
|
|
LongJumpIfTrue,
|
|
LongJumpIfFalsePop,
|
|
LongJumpIfFalseOrPop,
|
|
LongJumpForwards,
|
|
LongJumpBackwards,
|
|
## Functions
|
|
Call, # Calls a callable object
|
|
Return # Returns from the current function
|
|
## Exception handling
|
|
Raise, # Raises exception x
|
|
ReRaise, # Re-raises active exception
|
|
BeginTry, # Initiates an exception handling context
|
|
FinishTry, # Closes the current exception handling context
|
|
## Generators
|
|
Yield,
|
|
## Coroutines
|
|
Await,
|
|
## Collection literals
|
|
BuildList,
|
|
BuildDict,
|
|
BuildSet,
|
|
BuildTuple,
|
|
## Misc
|
|
Assert, # Raises an AssertionFailed exception if the value at the top of the stack is falsey
|
|
MakeClass, # Builds a class instance from the values at the top of the stack (class object, constructor arguments, etc.)
|
|
Slice, # Slices an object (takes 3 arguments: start, stop, step). Pushes the result of a.subscript(b, c, d) onto the stack
|
|
GetItem, # Pushes the result of a.getItem(b) onto the stack
|
|
ImplicitReturn, # Optimization for returning nil from functions (saves us a VM "clock cycle")
|
|
|
|
|
|
# We group instructions by their operation/operand types for easier handling when debugging
|
|
|
|
# Simple instructions encompass:
|
|
# - Instructions that push onto/pop off the stack unconditionally (True, False, Pop, etc.)
|
|
# - Unary and binary operators
|
|
const simpleInstructions* = {Return, BinaryAdd, BinaryMultiply,
|
|
BinaryDivide, BinarySubtract,
|
|
BinaryMod, BinaryPow, Nil,
|
|
True, False, OpCode.Nan, OpCode.Inf,
|
|
BinaryShiftLeft, BinaryShiftRight,
|
|
BinaryXor, LogicalNot, EqualTo,
|
|
GreaterThan, LessThan, LoadAttribute,
|
|
BinarySlice, Pop, UnaryNegate,
|
|
BinaryIs, BinaryAs, GreaterOrEqual,
|
|
LessOrEqual, BinaryOr, BinaryAnd,
|
|
UnaryNot, BinaryFloorDiv, BinaryOf, Raise,
|
|
ReRaise, BeginTry, FinishTry, Yield, Await,
|
|
MakeClass, ImplicitReturn}
|
|
|
|
# Constant instructions are instructions that operate on the bytecode constant table
|
|
const constantInstructions* = {LoadConstant, DeclareName, LoadName, StoreName, DeleteName}
|
|
|
|
# Stack triple instructions operate on the stack at arbitrary offsets and pop arguments off of it in the form
|
|
# of 24 bit integers
|
|
const stackTripleInstructions* = {Call, StoreFast, DeleteFast, LoadFast, LoadHeap, StoreHeap}
|
|
|
|
# Stack double instructions operate on the stack at arbitrary offsets and pop arguments off of it in the form
|
|
# of 16 bit integers
|
|
const stackDoubleInstructions* = {}
|
|
|
|
# Argument double argument instructions take hardcoded arguments on the stack as 16 bit integers
|
|
const argumentDoubleInstructions* = {PopN, }
|
|
|
|
# Jump instructions jump at relative or absolute bytecode offsets
|
|
const jumpInstructions* = {JumpIfFalse, JumpIfFalsePop, JumpForwards, JumpBackwards,
|
|
LongJumpIfFalse, LongJumpIfFalsePop, LongJumpForwards,
|
|
LongJumpBackwards, JumpIfTrue, LongJumpIfTrue}
|
|
|
|
# Collection instructions push a built-in collection type onto the stack
|
|
const collectionInstructions* = {BuildList, BuildDict, BuildSet, BuildTuple}
|
|
|
|
|
|
proc newChunk*(reuseConsts: bool = true): Chunk =
|
|
## Initializes a new, empty chunk
|
|
result = Chunk(consts: @[], code: @[], lines: @[], reuseConsts: reuseConsts)
|
|
|
|
|
|
proc `$`*(self: Chunk): string = &"""Chunk(consts=[{self.consts.join(", ")}], code=[{self.code.join(", ")}], lines=[{self.lines.join(", ")}])"""
|
|
|
|
|
|
proc write*(self: Chunk, newByte: uint8, line: int) =
|
|
## Adds the given instruction at the provided line number
|
|
## to the given chunk object
|
|
assert line > 0, "line must be greater than zero"
|
|
if self.lines.high() >= 1 and self.lines[^2] == line:
|
|
self.lines[^1] += 1
|
|
else:
|
|
self.lines.add(line)
|
|
self.lines.add(1)
|
|
self.code.add(newByte)
|
|
|
|
|
|
proc write*(self: Chunk, bytes: openarray[uint8], line: int) =
|
|
## Calls write in a loop with all members of the given
|
|
## array
|
|
for cByte in bytes:
|
|
self.write(cByte, line)
|
|
|
|
|
|
proc write*(self: Chunk, newByte: OpCode, line: int) =
|
|
## Adds the given instruction at the provided line number
|
|
## to the given chunk object
|
|
self.write(uint8(newByte), line)
|
|
|
|
|
|
proc write*(self: Chunk, bytes: openarray[OpCode], line: int) =
|
|
## Calls write in a loop with all members of the given
|
|
## array
|
|
for cByte in bytes:
|
|
self.write(uint8(cByte), line)
|
|
|
|
|
|
proc getLine*(self: Chunk, idx: int): int =
|
|
## Returns the associated line of a given
|
|
## instruction index
|
|
if self.lines.len < 2:
|
|
raise newException(IndexDefect, "the chunk object is empty")
|
|
var
|
|
count: int
|
|
current: int = 0
|
|
for n in countup(0, self.lines.high(), 2):
|
|
count = self.lines[n + 1]
|
|
if idx in current - count..<current + count:
|
|
return self.lines[n]
|
|
current += count
|
|
raise newException(IndexDefect, "index out of range")
|
|
|
|
|
|
proc findOrAddConstant(self: Chunk, constant: ASTNode): int =
|
|
## Small optimization function that reuses the same constant
|
|
## if it's already been written before (only if self.reuseConsts
|
|
## equals true)
|
|
if not self.reuseConsts:
|
|
return
|
|
for i, c in self.consts:
|
|
# We cannot use simple equality because the nodes likely have
|
|
# different token objects with different values
|
|
if c.kind != constant.kind:
|
|
continue
|
|
if constant.isConst():
|
|
var c = LiteralExpr(c)
|
|
var constant = LiteralExpr(constant)
|
|
if c.literal.lexeme == constant.literal.lexeme:
|
|
# This wouldn't work for stuff like 2e3 and 2000.0, but those
|
|
# forms are collapsed in the compiler before being written
|
|
# to the constants table
|
|
return i
|
|
elif constant.kind == identExpr:
|
|
var c = IdentExpr(c)
|
|
var constant = IdentExpr(constant)
|
|
if c.name.lexeme == constant.name.lexeme:
|
|
return i
|
|
else:
|
|
continue
|
|
self.consts.add(constant)
|
|
result = self.consts.high()
|
|
|
|
|
|
proc addConstant*(self: Chunk, constant: ASTNode): array[3, uint8] =
|
|
## Writes a constant to a chunk. Returns its index casted to a 3-byte
|
|
## sequence (array). Constant indexes are reused if a constant is used
|
|
## more than once and self.reuseConsts equals true
|
|
if self.consts.len() == 16777215:
|
|
# The constant index is a 24 bit unsigned integer, so that's as far
|
|
# as we can index into the constant table (the same applies
|
|
# to our stack by the way). Not that anyone's ever gonna hit this
|
|
# limit in the real world, but you know, just in case
|
|
raise newException(CompileError, "cannot encode more than 16777215 constants")
|
|
result = self.findOrAddConstant(constant).toTriple()
|