Initial work on porting the bytecode backend to peon 0.2

This commit is contained in:
Mattia Giambirtone 2023-11-20 13:04:54 +01:00
parent 8b39cc3bc0
commit 6181c49f1f
Signed by: nocturn9x
GPG Key ID: 8270F9F467971E59
9 changed files with 2287 additions and 41 deletions

View File

@ -0,0 +1,30 @@
import frontend/compiler/typechecker
import backend/bytecode/opcodes
type
BytecodeGenerator* = ref object
chunk: Chunk
proc newBytecodeGenerator*: BytecodeGenerator =
## Initializes a new, blank bytecode
## generator
result = BytecodeGenerator()
proc generateExpression(self: BytecodeGenerator, node: TypedExpr) =
## Emits code for expressions
proc generate*(self: BytecodeGenerator, compiled: seq[TypedNode]): Chunk =
## Turn the given compilation output
## into a bytecode chunk
self.chunk = newChunk()
for typedNode in compiled:
case typedNode.node.kind:
of exprStmt:
self.generateExpression(TypedExpr(typedNode))
else:
discard # TODO
result = self.chunk

View File

@ -0,0 +1,373 @@
# Copyright 2023 Mattia Giambirtone & All Contributors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
## Low level bytecode implementation details
import backend/bytecode/tooling/multibyte
type
Chunk* = ref object
## A piece of bytecode.
## consts is the code's constants table.
## code is the linear sequence of compiled bytecode instructions.
## lines maps bytecode instructions to line numbers using Run
## Length Encoding. Instructions are encoded in groups whose structure
## follows the following schema:
## - The first integer represents the line number
## - The second integer represents the number of
## instructions on that line
## For example, if lines equals [1, 5], it means that there are 5 instructions
## at line 1, meaning that all instructions in code[0..4] belong to the same line.
## This is more efficient than using the naive approach, which would encode
## the same line number multiple times and waste considerable amounts of space.
## functions encodes the following information:
## - Function name
## - Argument count
## - Function boundaries
## The encoding is the following:
## - First, the position into the bytecode where the function begins is encoded (as a 3 byte integer)
## - Second, the position into the bytecode where the function ends is encoded (as a 3 byte integer)
## - After that follows the argument count as a 1 byte integer
## - Lastly, the function's name (optional) is encoded in ASCII, prepended with
## its size as a 2-byte integer
## modules contains information about all the peon modules that the compiler has encountered,
## along with their start/end offset in the code. Unlike other bytecode-compiled languages like
## Python, peon does not produce a bytecode file for each separate module it compiles: everything
## is contained within a single binary blob. While this simplifies the implementation and makes
## bytecode files entirely "self-hosted", it also means that the original module information is
## lost: this segment serves to fix that. The segment's size is encoded at the beginning as a 4-byte
## sequence (i.e. a single 32-bit integer) and its encoding is similar to that of the functions segment:
## - First, the position into the bytecode where the module begins is encoded (as a 3 byte integer)
## - Second, the position into the bytecode where the module ends is encoded (as a 3 byte integer)
## - Lastly, the module's name is encoded in ASCII, prepended with its size as a 2-byte integer
consts*: seq[uint8]
code*: seq[uint8]
lines*: seq[int]
functions*: seq[uint8]
modules*: seq[uint8]
OpCode* {.pure.} = enum
## Enum of Peon's bytecode opcodes
# Note: x represents the argument
# to unary opcodes, while a and b
# represent arguments to binary
# opcodes. Other variable names (c, d, ...)
# may be used for more complex opcodes.
# Some opcodes (e.g. jumps), take arguments in
# the form of 16 or 24 bit numbers that are defined
# statically at compilation time into the bytecode
# These push a constant at position x in the
# constant table onto the stack
LoadInt64 = 0u8,
LoadUInt64,
LoadInt32,
LoadUInt32,
LoadInt16,
LoadUInt16,
LoadInt8,
LoadUInt8,
LoadFloat64,
LoadFloat32,
LoadString,
## Singleton opcodes (each of them pushes a constant singleton on the operand stack)
LoadNil,
LoadTrue,
LoadFalse,
LoadNan,
LoadInf,
LoadNInf,
## Operations on primitive types
Negate,
NegateFloat64,
NegateFloat32,
Add,
Subtract,
Multiply,
Divide,
SignedDivide,
AddFloat64,
SubtractFloat64,
MultiplyFloat64,
DivideFloat64,
AddFloat32,
SubtractFloat32,
MultiplyFloat32,
DivideFloat32,
Pow,
SignedPow,
Mod,
SignedMod,
PowFloat64,
PowFloat32,
ModFloat64,
ModFloat32,
LShift,
RSHift,
Xor,
Or,
And,
Not,
Equal,
NotEqual,
GreaterThan,
LessThan,
GreaterOrEqual,
LessOrEqual,
SignedGreaterThan,
SignedLessThan,
SignedGreaterOrEqual,
SignedLessOrEqual,
Float64GreaterThan,
Float64LessThan,
Float64GreaterOrEqual,
Float64LessOrEqual,
Float32GreaterThan,
Float32LessThan,
Float32GreaterOrEqual,
Float32LessOrEqual,
LogicalNot,
Identity, # Pointer equality
## Print opcodes
PrintInt64,
PrintUInt64,
PrintInt32,
PrintUInt32,
PrintInt16,
PrintUint16,
PrintInt8,
PrintUInt8,
PrintFloat64,
PrintFloat32,
PrintHex,
PrintBool,
PrintNan,
PrintInf,
PrintString,
## Basic stack operations
Pop, # Pops an element off the operand stack and discards it
PopN, # Pops x elements off the call stack (optimization for exiting local scopes which usually pop many elements)
## Name resolution/handling
LoadAttribute, # Pushes the attribute b of object a onto the stack
LoadVar, # Pushes the object at position x in the stack onto the stack
StoreVar, # Stores the value of b at position a in the stack
AddVar, # An optimization for StoreVar (used when the variable is first declared)
## Looping and jumping
Jump, # Absolute, unconditional jump into the bytecode
JumpForwards, # Relative, unconditional, positive jump in the bytecode
JumpBackwards, # Relative, unconditional, negative jump in the bytecode
JumpIfFalse, # Jumps to a relative index in the bytecode if x is false
JumpIfTrue, # Jumps to a relative index in the bytecode if x is true
JumpIfFalsePop, # Like JumpIfFalse, but also pops off the stack (regardless of truthyness). Optimization for if statements
JumpIfFalseOrPop, # Jumps to an absolute index in the bytecode if x is false and pops otherwise (used for logical and)
## Functions
Call, # Calls a function and initiates a new stack frame
Return, # Terminates the current function
SetResult, # Sets the result of the current function
## Exception handling
Raise, # Raises exception x or re-raises active exception if x is nil
BeginTry, # Initiates an exception handling context
FinishTry, # Closes the current exception handling context
## Generators
Yield, # Yields control from a generator back to the caller
## Coroutines
Await, # Calls an asynchronous function
## Misc
Assert, # Raises an exception if x is false
NoOp, # Just a no-op
PopC, # Pop a value off the call stack and discard it
PushC, # Pop a value off the operand stack and push it onto the call stack
SysClock64, # Pushes the output of a monotonic clock on the stack
LoadTOS, # Pushes the top of the call stack onto the operand stack
DupTop, # Duplicates the top of the operand stack onto the operand stack
LoadGlobal # Loads a global variable
# We group instructions by their operation/operand types for easier handling when debugging
# Simple instructions encompass instructions that push onto/pop off the stack unconditionally (True, False, Pop, etc.)
const simpleInstructions* = {Return, LoadNil,
LoadTrue, LoadFalse,
LoadNan, LoadInf,
Pop, Raise, LoadNInf,
BeginTry, FinishTry, Yield,
Await, NoOp, SetResult,
PopC, PushC, SysClock64,
Negate,
NegateFloat64,
NegateFloat32,
Add,
Subtract,
Multiply,
Divide,
SignedDivide,
AddFloat64,
SubtractFloat64,
MultiplyFloat64,
DivideFloat64,
AddFloat32,
SubtractFloat32,
MultiplyFloat32,
DivideFloat32,
Pow,
SignedPow,
Mod,
SignedMod,
PowFloat64,
PowFloat32,
ModFloat64,
ModFloat32,
LShift,
RSHift,
Xor,
Or,
And,
Not,
Equal,
NotEqual,
GreaterThan,
LessThan,
GreaterOrEqual,
LessOrEqual,
PrintInt64,
PrintUInt64,
PrintInt32,
PrintUInt32,
PrintInt16,
PrintUint16,
PrintInt8,
PrintUInt8,
PrintFloat64,
PrintFloat32,
PrintHex,
PrintBool,
PrintNan,
PrintInf,
PrintString,
LogicalNot,
AddVar,
LoadTOS,
SignedGreaterThan,
SignedLessThan,
SignedGreaterOrEqual,
SignedLessOrEqual,
Float64GreaterThan,
Float64LessThan,
Float64GreaterOrEqual,
Float64LessOrEqual,
Float32GreaterThan,
Float32LessThan,
Float32GreaterOrEqual,
Float32LessOrEqual,
DupTop,
Identity
}
# Constant instructions are instructions that operate on the bytecode constant table
const constantInstructions* = {LoadInt64, LoadUInt64,
LoadInt32, LoadUInt32,
LoadInt16, LoadUInt16,
LoadInt8, LoadUInt8,
LoadFloat64, LoadFloat32,
LoadString}
# Stack triple instructions operate on the stack at arbitrary offsets and pop arguments off of it in the form
# of 24 bit integers
const stackTripleInstructions* = {StoreVar, LoadVar, LoadGlobal}
# Stack double instructions operate on the stack at arbitrary offsets and pop arguments off of it in the form
# of 16 bit integers
const stackDoubleInstructions* = {}
# Argument double argument instructions take hardcoded arguments as 16 bit integers
const argumentDoubleInstructions* = {PopN, }
# Jump instructions jump at relative or absolute bytecode offsets
const jumpInstructions* = {Jump, JumpIfFalse, JumpIfFalsePop,
JumpForwards, JumpBackwards,
JumpIfTrue, JumpIfFalseOrPop}
proc newChunk*: Chunk =
## Initializes a new, empty chunk
result = Chunk(consts: @[], code: @[], lines: @[], functions: @[])
proc write*(self: Chunk, newByte: uint8, line: int) =
## Adds the given instruction at the provided line number
## to the given chunk object
assert line > 0, "line must be greater than zero"
if self.lines.high() >= 1 and self.lines[^2] == line:
self.lines[^1] += 1
else:
self.lines.add(line)
self.lines.add(1)
self.code.add(newByte)
proc write*(self: Chunk, bytes: openarray[uint8], line: int) =
## Calls self.write() in a loop with all members of the
## given array
for cByte in bytes:
self.write(cByte, line)
proc write*(self: Chunk, newByte: OpCode, line: int) =
## Adds the given instruction at the provided line number
## to the given chunk object
self.write(uint8(newByte), line)
proc write*(self: Chunk, bytes: openarray[OpCode], line: int) =
## Calls write in a loop with all members of the given
## array
for cByte in bytes:
self.write(uint8(cByte), line)
proc getLine*(self: Chunk, idx: int): int =
## Returns the associated line of a given
## instruction index
if self.lines.len < 2:
raise newException(IndexDefect, "the chunk object is empty")
var
count: int
current: int = 0
for n in countup(0, self.lines.high(), 2):
count = self.lines[n + 1]
if idx in current - count..<current + count:
return self.lines[n]
current += count
raise newException(IndexDefect, "index out of range")
proc getIdx*(self: Chunk, line: int): int =
## Gets the index into self.lines
## where the line counter for the given
## line is located
for i, v in self.lines:
if (i and 1) != 0 and v == line:
return i
proc writeConstant*(self: Chunk, data: openarray[uint8]): array[3, uint8] =
## Writes a series of bytes to the chunk's constant
## table and returns the index of the first byte as
## an array of 3 bytes
result = self.consts.len().toTriple()
for b in data:
self.consts.add(b)

View File

@ -0,0 +1,277 @@
# Copyright 2022 Mattia Giambirtone & All Contributors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import std/strformat
import std/terminal
import backend/bytecode/opcodes
import backend/bytecode/tooling/multibyte
type
Function = object
start, stop, argc: int
name: string
Module = object
start, stop: int
name: string
BytecodeDebugger* = ref object
chunk: Chunk
modules: seq[Module]
functions: seq[Function]
current: int
proc newBytecodeDebugger*: BytecodeDebugger =
## Initializes a new, empty
## debugger object
new(result)
result.functions = @[]
proc nl = stdout.write("\n")
proc printDebug(s: string, newline: bool = false) =
stdout.styledWrite(fgMagenta, "DEBUG - Disassembler -> ")
stdout.styledWrite(fgGreen, s)
if newline:
nl()
proc printName(opcode: OpCode, newline: bool = false) =
stdout.styledWrite(fgRed, $opcode, " (", fgYellow, $uint8(opcode), fgRed, ")")
if newline:
nl()
proc printInstruction(instruction: OpCode, newline: bool = false) =
printDebug("Instruction: ")
printName(instruction)
if newline:
nl()
proc checkFunctionStart(self: BytecodeDebugger, n: int) =
## Checks if a function begins at the given
## bytecode offset
for i, e in self.functions:
# Avoids duplicate output
if n == e.start:
styledEcho fgBlue, "\n==== Peon Bytecode Disassembler - Function Start ", fgYellow, &"'{e.name}' ", fgBlue, "(", fgYellow, $i, fgBlue, ") ===="
styledEcho fgGreen, "\t- Start offset: ", fgYellow, $e.start
styledEcho fgGreen, "\t- End offset: ", fgYellow, $e.stop
styledEcho fgGreen, "\t- Argument count: ", fgYellow, $e.argc, "\n"
proc checkFunctionEnd(self: BytecodeDebugger, n: int) =
## Checks if a function ends at the given
## bytecode offset
for i, e in self.functions:
if n == e.stop:
styledEcho fgBlue, "\n==== Peon Bytecode Disassembler - Function End ", fgYellow, &"'{e.name}' ", fgBlue, "(", fgYellow, $i, fgBlue, ") ===="
proc checkModuleStart(self: BytecodeDebugger, n: int) =
## Checks if a module begins at the given
## bytecode offset
for i, m in self.modules:
if m.start == n:
styledEcho fgBlue, "\n==== Peon Bytecode Disassembler - Module Start ", fgYellow, &"'{m.name}' ", fgBlue, "(", fgYellow, $i, fgBlue, ") ===="
styledEcho fgGreen, "\t- Start offset: ", fgYellow, $m.start
styledEcho fgGreen, "\t- End offset: ", fgYellow, $m.stop, "\n"
proc checkModuleEnd(self: BytecodeDebugger, n: int) =
## Checks if a module ends at the given
## bytecode offset
for i, m in self.modules:
if m.stop == n:
styledEcho fgBlue, "\n==== Peon Bytecode Disassembler - Module End ", fgYellow, &"'{m.name}' ", fgBlue, "(", fgYellow, $i, fgBlue, ") ===="
proc simpleInstruction(self: BytecodeDebugger, instruction: OpCode) =
## Debugs simple instructions
printInstruction(instruction, true)
self.current += 1
if instruction == Return:
printDebug("Void: ")
if self.chunk.code[self.current] == 0:
stdout.styledWriteLine(fgYellow, "Yes")
else:
stdout.styledWriteLine(fgYellow, "No")
self.current += 1
proc stackTripleInstruction(self: BytecodeDebugger, instruction: OpCode) =
## Debugs instructions that operate on a single value on the stack using a 24-bit operand
var slot = [self.chunk.code[self.current + 1], self.chunk.code[self.current + 2], self.chunk.code[self.current + 3]].fromTriple()
printInstruction(instruction)
stdout.styledWriteLine(fgGreen, &", points to index ", fgYellow, $slot)
self.current += 4
proc stackDoubleInstruction(self: BytecodeDebugger, instruction: OpCode) =
## Debugs instructions that operate on a single value on the stack using a 16-bit operand
var slot = [self.chunk.code[self.current + 1], self.chunk.code[self.current + 2]].fromDouble()
printInstruction(instruction)
stdout.write(&", points to index ")
stdout.styledWriteLine(fgGreen, &", points to index ", fgYellow, $slot)
self.current += 3
proc argumentDoubleInstruction(self: BytecodeDebugger, instruction: OpCode) =
## Debugs instructions that operate on a hardcoded value on the stack using a 16-bit operand
var slot = [self.chunk.code[self.current + 1], self.chunk.code[self.current + 2]].fromDouble()
printInstruction(instruction)
stdout.styledWriteLine(fgGreen, &", has argument ", fgYellow, $slot)
self.current += 3
proc argumentTripleInstruction(self: BytecodeDebugger, instruction: OpCode) {.used.} =
## Debugs instructions that operate on a hardcoded value on the stack using a 24-bit operand
var slot = [self.chunk.code[self.current + 1], self.chunk.code[self.current + 2], self.chunk.code[self.current + 3]].fromTriple()
printInstruction(instruction)
stdout.styledWriteLine(fgGreen, ", has argument ", fgYellow, $slot)
self.current += 4
proc callInstruction(self: BytecodeDebugger, instruction: OpCode) =
## Debugs function calls
var size = [self.chunk.code[self.current + 1], self.chunk.code[self.current + 2], self.chunk.code[self.current + 3]].fromTriple()
self.current += 3
printInstruction(instruction)
styledEcho fgGreen, &", creates frame of size ", fgYellow, $(size + 2), fgGreen
self.current += 1
proc constantInstruction(self: BytecodeDebugger, instruction: OpCode) =
## Debugs instructions that operate on the constant table
var size: uint
if instruction == LoadString:
size = [self.chunk.code[self.current + 1], self.chunk.code[self.current + 2], self.chunk.code[self.current + 3]].fromTriple()
self.current += 3
var constant = [self.chunk.code[self.current + 1], self.chunk.code[self.current + 2], self.chunk.code[self.current + 3]].fromTriple()
printInstruction(instruction)
stdout.styledWrite(fgGreen, &", points to constant at position ", fgYellow, $constant)
self.current += 4
if instruction == LoadString:
stdout.styledWriteLine(fgGreen, " of length ", fgYellow, $size)
else:
stdout.write("\n")
proc jumpInstruction(self: BytecodeDebugger, instruction: OpCode) =
## Debugs jumps
var orig = self.current
var jump = [self.chunk.code[self.current + 1], self.chunk.code[self.current + 2], self.chunk.code[self.current + 3]].fromTriple().int()
printInstruction(instruction, true)
printDebug("Jump size: ")
stdout.styledWrite(fgYellow, $jump)
nl()
self.current += 4
while self.chunk.code[self.current] == NoOp.uint8:
inc(self.current)
proc disassembleInstruction*(self: BytecodeDebugger) =
## Takes one bytecode instruction and prints it
let opcode = OpCode(self.chunk.code[self.current])
self.checkModuleStart(self.current)
self.checkFunctionStart(self.current)
printDebug("Offset: ")
stdout.styledWriteLine(fgYellow, $(self.current))
printDebug("Line: ")
stdout.styledWriteLine(fgYellow, &"{self.chunk.getLine(self.current)}")
case opcode:
of simpleInstructions:
self.simpleInstruction(opcode)
# Functions (and modules) only have a single return statement at the
# end of their body, so we never execute this more than once per module/function
if opcode == Return:
# -2 to skip the hardcoded argument to return
# and the increment by simpleInstruction()
self.checkFunctionEnd(self.current - 2)
self.checkModuleEnd(self.current - 1)
of constantInstructions:
self.constantInstruction(opcode)
of stackDoubleInstructions:
self.stackDoubleInstruction(opcode)
of stackTripleInstructions:
self.stackTripleInstruction(opcode)
of argumentDoubleInstructions:
self.argumentDoubleInstruction(opcode)
of Call:
self.callInstruction(opcode)
of jumpInstructions:
self.jumpInstruction(opcode)
else:
echo &"DEBUG - Unknown opcode {opcode} at index {self.current}"
self.current += 1
proc parseFunctions(self: BytecodeDebugger) =
## Parses function information in the chunk
var
start, stop, argc: int
name: string
idx = 0
size = 0
while idx < self.chunk.functions.high():
start = int([self.chunk.functions[idx], self.chunk.functions[idx + 1], self.chunk.functions[idx + 2]].fromTriple())
idx += 3
stop = int([self.chunk.functions[idx], self.chunk.functions[idx + 1], self.chunk.functions[idx + 2]].fromTriple())
idx += 3
argc = int(self.chunk.functions[idx])
inc(idx)
size = int([self.chunk.functions[idx], self.chunk.functions[idx + 1]].fromDouble())
idx += 2
name = self.chunk.functions[idx..<idx + size].fromBytes()
inc(idx, size)
self.functions.add(Function(start: start, stop: stop, argc: argc, name: name))
proc parseModules(self: BytecodeDebugger) =
## Parses module information in the chunk
var
start, stop: int
name: string
idx = 0
size = 0
while idx < self.chunk.modules.high():
start = int([self.chunk.modules[idx], self.chunk.modules[idx + 1], self.chunk.modules[idx + 2]].fromTriple())
idx += 3
stop = int([self.chunk.modules[idx], self.chunk.modules[idx + 1], self.chunk.modules[idx + 2]].fromTriple())
idx += 3
size = int([self.chunk.modules[idx], self.chunk.modules[idx + 1]].fromDouble())
idx += 2
name = self.chunk.modules[idx..<idx + size].fromBytes()
inc(idx, size)
self.modules.add(Module(start: start, stop: stop, name: name))
proc disassembleChunk*(self: BytecodeDebugger, chunk: Chunk, name: string) =
## Takes a chunk of bytecode and prints it
self.chunk = chunk
styledEcho fgBlue, &"==== Peon Bytecode Disassembler - Chunk '{name}' ====\n"
self.current = 0
self.parseFunctions()
self.parseModules()
while self.current < self.chunk.code.len:
self.disassembleInstruction()
echo ""
styledEcho fgBlue, &"==== Peon Bytecode Disassembler - Chunk '{name}' ===="

View File

@ -0,0 +1,87 @@
# Copyright 2022 Mattia Giambirtone & All Contributors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
## Utilities to handle multibyte sequences
proc toDouble*(input: int | uint | uint16): array[2, uint8] =
## Converts an unsigned integer
## to an array[2, uint8]
result = cast[array[2, uint8]](uint16(input))
proc toTriple*(input: uint | int): array[3, uint8] =
## Converts an unsigned integer to an array[3, uint8]
result = cast[array[3, uint8]](uint(input))
proc toQuad*(input: int | uint | uint16 | uint32): array[4, uint8] =
## Converts an unsigned integer to an array[4, uint8]
result = cast[array[4, uint8]](uint(input))
proc toLong*(input: int | uint | uint16 | uint32 | uint64): array[8, uint8] =
## Converts an unsigned integer to an array[8, uint8]
result = cast[array[8, uint8]](uint(input))
proc fromDouble*(input: array[2, uint8]): uint16 =
## Rebuilds the output of toDouble into
## an uint16
copyMem(result.addr, unsafeAddr(input), sizeof(uint16))
proc fromTriple*(input: array[3, uint8]): uint =
## Rebuilds the output of toTriple into
## an uint
copyMem(result.addr, unsafeAddr(input), sizeof(uint8) * 3)
proc fromQuad*(input: array[4, uint8]): uint =
## Rebuilts the output of toQuad into
## an uint
copyMem(result.addr, unsafeAddr(input), sizeof(uint32))
proc fromLong*(input: array[8, uint8]): uint =
## Rebuilts the output of toQuad into
## an uint
copyMem(result.addr, unsafeAddr(input), sizeof(uint64))
proc toBytes*(s: string): seq[byte] =
## Converts a string into a sequence
## of bytes
for c in s:
result.add(byte(c))
proc toBytes*(s: int): array[8, uint8] =
## Converts
result = cast[array[8, uint8]](s)
proc fromBytes*(input: seq[byte]): string =
## Converts a sequence of bytes to
## a string
var i = 0
while i < input.len():
result.add(char(input[i]))
inc(i)
proc extend*[T](s: var seq[T], a: openarray[T]) =
## Extends s with the elements of a
for e in a:
s.add(e)

View File

@ -0,0 +1,251 @@
# Copyright 2022 Mattia Giambirtone & All Contributors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
## Implementation of the peon bytecode serializer
import std/strformat
import std/strutils
import std/times
import config
import errors
import backend/bytecode/tooling/multibyte
import backend/bytecode/opcodes
type
BytecodeSerializer* = ref object
file: string
filename: string
chunk: Chunk
SerializedBytecode* = ref object
## Wrapper returned by
## the Serializer.read*
## procedures to store
## metadata
version*: tuple[major, minor, patch: int]
branch*: string
commit*: string
compileDate*: int
chunk*: Chunk
SerializationError* = ref object of PeonException
proc `$`*(self: SerializedBytecode): string =
result = &"SerializedBytecode(version={self.version.major}.{self.version.minor}.{self.version.patch}, branch={self.branch}), commitHash={self.commit}, date={self.compileDate}, chunk={self.chunk[]}"
proc error(self: BytecodeSerializer, message: string) =
## Raises a formatted SerializationError exception
raise SerializationError(msg: message, file: self.filename)
proc newBytecodeSerializer*(self: BytecodeSerializer = nil): BytecodeSerializer =
new(result)
if self != nil:
result = self
result.file = ""
result.filename = ""
result.chunk = nil
proc writeHeaders(self: BytecodeSerializer, stream: var seq[byte]) =
## Writes the Peon bytecode headers in-place into the
## given byte sequence
stream.extend(PeonBytecodeMarker.toBytes())
stream.add(byte(PEON_VERSION.major))
stream.add(byte(PEON_VERSION.minor))
stream.add(byte(PEON_VERSION.patch))
stream.add(byte(len(PEON_BRANCH)))
stream.extend(PEON_BRANCH.toBytes())
stream.extend(PEON_COMMIT_HASH.toBytes())
stream.extend(getTime().toUnixFloat().int().toBytes())
proc writeLineData(self: BytecodeSerializer, stream: var seq[byte]) =
## Writes line information for debugging
## bytecode instructions to the given byte
## sequence
stream.extend(len(self.chunk.lines).toQuad())
for b in self.chunk.lines:
stream.extend(b.toTriple())
proc writeFunctions(self: BytecodeSerializer, stream: var seq[byte]) =
## Writes debug info about functions to the
## given byte sequence
stream.extend(len(self.chunk.functions).toQuad())
stream.extend(self.chunk.functions)
proc writeConstants(self: BytecodeSerializer, stream: var seq[byte]) =
## Writes the constants table in-place into the
## byte sequence
stream.extend(self.chunk.consts.len().toQuad())
stream.extend(self.chunk.consts)
proc writeModules(self: BytecodeSerializer, stream: var seq[byte]) =
## Writes module information to the given stream
stream.extend(self.chunk.modules.len().toQuad())
stream.extend(self.chunk.modules)
proc writeCode(self: BytecodeSerializer, stream: var seq[byte]) =
## Writes the bytecode from the given chunk to the
## given source stream
stream.extend(self.chunk.code.len.toTriple())
stream.extend(self.chunk.code)
proc readHeaders(self: BytecodeSerializer, stream: seq[byte], serialized: SerializedBytecode): int =
## Reads the bytecode headers from a given sequence
## of bytes
var stream = stream
if stream[0..<len(PeonBytecodeMarker)] != PeonBytecodeMarker.toBytes():
self.error("malformed bytecode marker")
result += len(PeonBytecodeMarker)
stream = stream[len(PeonBytecodeMarker)..^1]
serialized.version = (major: int(stream[0]), minor: int(stream[1]), patch: int(stream[2]))
stream = stream[3..^1]
result += 3
let branchLength = stream[0]
stream = stream[1..^1]
result += 1
serialized.branch = stream[0..<branchLength].fromBytes()
stream = stream[branchLength..^1]
result += int(branchLength)
serialized.commit = stream[0..<40].fromBytes().toLowerAscii()
stream = stream[40..^1]
result += 40
serialized.compileDate = int(fromLong([stream[0], stream[1], stream[2],
stream[3], stream[4], stream[5], stream[6], stream[7]]))
stream = stream[8..^1]
result += 8
proc readLineData(self: BytecodeSerializer, stream: seq[byte]): int =
## Reads line information from a stream
## of bytes
let size = [stream[0], stream[1], stream[2], stream[3]].fromQuad()
result += 4
var stream = stream[4..^1]
for i in countup(0, int(size) - 1):
self.chunk.lines.add(int([stream[0], stream[1], stream[2]].fromTriple()))
result += 3
stream = stream[3..^1]
doAssert len(self.chunk.lines) == int(size)
proc readFunctions(self: BytecodeSerializer, stream: seq[byte]): int =
## Reads the function segment from a stream
## of bytes
let size = [stream[0], stream[1], stream[2], stream[3]].fromQuad()
result += 4
var stream = stream[4..^1]
for i in countup(0, int(size) - 1):
self.chunk.functions.add(stream[i])
inc(result)
doAssert len(self.chunk.functions) == int(size)
proc readConstants(self: BytecodeSerializer, stream: seq[byte]): int =
## Reads the constant table from the given
## byte sequence
let size = [stream[0], stream[1], stream[2], stream[3]].fromQuad()
result += 4
var stream = stream[4..^1]
for i in countup(0, int(size) - 1):
self.chunk.consts.add(stream[i])
inc(result)
doAssert len(self.chunk.consts) == int(size)
proc readModules(self: BytecodeSerializer, stream: seq[byte]): int =
## Reads module information
let size = [stream[0], stream[1], stream[2], stream[3]].fromQuad()
result += 4
var stream = stream[4..^1]
for i in countup(0, int(size) - 1):
self.chunk.modules.add(stream[i])
inc(result)
doAssert len(self.chunk.modules) == int(size)
proc readCode(self: BytecodeSerializer, stream: seq[byte]): int =
## Reads the bytecode from a given byte sequence
let size = [stream[0], stream[1], stream[2]].fromTriple()
var stream = stream[3..^1]
for i in countup(0, int(size) - 1):
self.chunk.code.add(stream[i])
doAssert len(self.chunk.code) == int(size)
return int(size)
proc dumpBytes*(self: BytecodeSerializer, chunk: Chunk, filename: string): seq[byte] =
## Dumps the given chunk to a sequence of bytes and returns it.
## The filename argument is for error reporting only, use dumpFile
## to dump bytecode to a file
self.filename = filename
self.chunk = chunk
self.writeHeaders(result)
self.writeLineData(result)
self.writeFunctions(result)
self.writeConstants(result)
self.writeModules(result)
self.writeCode(result)
proc dumpFile*(self: BytecodeSerializer, chunk: Chunk, filename, dest: string) =
## Dumps the result of dumpBytes to a file at dest
var fp = open(dest, fmWrite)
defer: fp.close()
let data = self.dumpBytes(chunk, filename)
discard fp.writeBytes(data, 0, len(data))
proc loadBytes*(self: BytecodeSerializer, stream: seq[byte]): SerializedBytecode =
## Loads the result from dumpBytes to a Serializer object
## for use in the VM or for inspection
discard self.newBytecodeSerializer()
new(result)
result.chunk = newChunk()
self.chunk = result.chunk
var stream = stream
try:
stream = stream[self.readHeaders(stream, result)..^1]
stream = stream[self.readLineData(stream)..^1]
stream = stream[self.readFunctions(stream)..^1]
stream = stream[self.readConstants(stream)..^1]
stream = stream[self.readModules(stream)..^1]
stream = stream[self.readCode(stream)..^1]
except IndexDefect:
self.error("truncated bytecode stream")
except AssertionDefect:
self.error(&"corrupted bytecode stream: {getCurrentExceptionMsg()}")
proc loadFile*(self: BytecodeSerializer, src: string): SerializedBytecode =
## Loads a bytecode file
var fp = open(src, fmRead)
defer: fp.close()
let size = fp.getFileSize()
var pos = 0'i64
var data: seq[byte] = newSeqOfCap[byte](size)
for _ in 0..<size:
data.add(0)
while pos < size:
discard fp.readBytes(data, pos, size)
pos = fp.getFilePos()
return self.loadBytes(data)

1074
src/backend/bytecode/vm.nim Normal file

File diff suppressed because it is too large Load Diff

View File

@ -16,27 +16,33 @@ import std/strformat
import std/os
# These variables can be tweaked to debug and test various components of the toolchain
type
PeonBackend* = enum
Bytecode,
NativeC # Coming soon
# These variables can be tweaked to debug and test various components of the toolchain. Do not modify them directly,
# use the command-line options instead (or -d:option=value for constants)
var debugLexer* = false # Print the tokenizer's output (main module only)
var debugParser* = false # Print the AST generated by the parser (main module only)
var debugTypeChecker* = false # Debug the typechecker's output (main module only)
var debugCompiler* = false # Disassemble and/or print the code generated by the compiler
var debugSerializer* = false # Validate the bytecode serializer's output
const debugVM* {.booldefine.} = false # Enable the runtime debugger in the bytecode VM
const debugGC* {.booldefine.} = false # Debug the Garbage Collector (extremely verbose)
const debugAlloc* {.booldefine.} = false # Trace object allocation (extremely verbose)
const debugMem* {.booldefine.} = false # Debug the memory allocator (extremely verbose)
var debugSerializer* = false # Validate the bytecode serializer's output
const debugStressGC* {.booldefine.} = false # Make the GC run a collection at every allocation (VERY SLOW!)
const debugMarkGC* {.booldefine.} = false # Trace the marking phase object by object (extremely verbose)
const PeonBytecodeMarker* = "PEON_BYTECODE" # Magic value at the beginning of bytecode files
const HeapGrowFactor* = 2 # The growth factor used by the GC to schedule the next collection
const FirstGC* = 1024 * 1024; # How many bytes to allocate before running the first GC
const enableVMChecks* {.booldefine.} = true; # Enables all types of compiler (nim-wise) checks in the VM
const enableVMChecks* {.booldefine.} = true; # Enables all types of compiler checks in the VM
# List of paths where peon looks for modules, in order (empty path means current directory, which always takes precedence)
const moduleLookupPaths*: seq[string] = @["", "src/peon/stdlib", absolutePath(joinPath(".local", "peon", "stdlib"), getenv("HOME"))]
when HeapGrowFactor <= 1:
{.fatal: "Heap growth factor must be > 1".}
const PeonVersion* = (major: 0, minor: 1, patch: 0)
const PeonVersion* = (major: 0, minor: 2, patch: 0)
const PeonRelease* = "alpha"
const PeonCommitHash* = staticExec("git rev-parse HEAD")
const PeonBranch* = staticExec("git symbolic-ref HEAD 2>/dev/null | cut -f 3 -d /")
@ -46,7 +52,13 @@ const HelpMessage* = """The peon programming language, Copyright (C) 2023 Mattia
This program is free software, see the license distributed with this program or check
http://www.apache.org/licenses/LICENSE-2.0 for more info.
Note: This is a development tool
Note: This is very much a work in progress
Basic Usage
-----------
peon [options] <file>[.pn] Run the given peon file
peon [options] file.pbc Run the given peon bytecode file
Options
@ -54,13 +66,26 @@ Options
-h, --help Show this help text and exit
-v, --version Print the current peon version and exit
-s, --string Execute the passed string as if it was a file
-s, --string Use the passed string as if it was a file
-w, --warnings Turn warnings on or off (default: on). Acceptable values are
yes/on and no/off
--noWarn Disable a specific warning (for example, --noWarn:UserWarning)
--noWarn Disable a specific warning (example: --noWarn:UserWarning)
--showMismatches Show all mismatches when function dispatching fails (output is really verbose)
--debugLexer Show the lexer's output
--debugParser Show the parser's output
--debugTypeChecker Show the typechecker's output
--debugCompiler Show the generated code (backend-specific)
--listWarns Show a list of all warnings
-b, --backend Select the compilation backend. Currently only supports 'bytecode' (the default)
-c, --compile Compile the code, but do not run the main module
-o, --output Rename the output executable to this (a "pn" extension is added for bytecode files,
if not already present)
-s, --string Run the given string as if it were a file (the filename is set to '<string>')
The following options are specific to the 'bytecode' backend:
-n, --noDump Do not dump bytecode files to the source directory
--breakpoints Set debugging breakpoints at the given bytecode offsets.
Input should be a comma-separated list of positive integers
(spacing is irrelevant). Only works if peon was compiled with
-d:debugVM
"""

View File

@ -44,10 +44,6 @@ type
current: int # The current node we're looking at
tree: ParseTree # The AST for the current module
scopeDepth*: int # The current scope depth (0 == global, > 0 == local)
# These objects are needed to parse other
# modules
tokenizer: Lexer
parser: Parser
source: string # The module's raw source code
file: string # The module's filename
isMainModule: bool # Are we the main module?
@ -101,8 +97,6 @@ proc newTypeChecker*: TypeChecker =
result.current = 0
result.tree = @[]
result.scopeDepth = 0
result.tokenizer = newLexer()
result.parser = newParser()
result.source = ""
result.file = ""
result.isMainModule = false
@ -207,7 +201,6 @@ proc handleUnsafePragma(self: TypeChecker, pragma: Pragma, name: Name) =
name.valueType.safe = false
proc warning(self: TypeChecker, kind: WarningKind, message: string, name: Name = nil, node: ASTNode = nil) =
## Raises a warning
if kind in self.disabledWarnings:

View File

@ -18,15 +18,23 @@ import util/symbols
import frontend/parser/lexer
import frontend/parser/parser
import frontend/compiler/typechecker
import backend/bytecode/codegen/generator
import backend/bytecode/tooling/serializer
import backend/bytecode/opcodes
import backend/bytecode/tooling/debugger
import backend/bytecode/vm
import std/os
import std/parseopt
import std/strutils
import std/terminal
import std/strformat
import std/times
# Thanks art <3
#[
import jale/editor as ed
import jale/templates
import jale/plugin/defaults
@ -34,13 +42,13 @@ import jale/plugin/editor_history
import jale/keycodes
import jale/multiline
proc getLineEditor: LineEditor =
result = newLineEditor()
result.prompt = "=> "
result.populateDefaults()
let history = result.plugHistory()
result.bindHistory(history)
]#
proc `$`(self: TypedNode): string =
@ -58,12 +66,126 @@ proc `$`(self: TypedNode): string =
result = &"{self.node}: ? ({self.node.kind})"
proc main(file: string, warnings: seq[WarningKind] = @[], showMismatches: bool = false) =
# TODO
discard
proc test(warnings: seq[WarningKind] = @[], showMismatches: bool = false) =
proc runFile(filename: string, fromString: bool = false, dump: bool = true, breakpoints: seq[uint64] = @[],
disabledWarnings: seq[WarningKind] = @[], mismatches: bool = false, run: bool = true,
backend: PeonBackend = PeonBackend.Bytecode, output: string) =
var
tokens: seq[Token]
tree: ParseTree
typedNodes: seq[TypedNode]
tokenizer = newLexer()
parser = newParser()
typeChecker = newTypeChecker()
input: string
filename = filename
isBinary = false
output = output
tokenizer.fillSymbolTable()
if not fromString and filename.endsWith(".pbc"):
isBinary = true
if fromString:
input = filename
filename = "<string>"
try:
if not isBinary:
tokens = tokenizer.lex(input, filename)
if tokens.len() == 0:
return
if debugLexer:
styledEcho fgCyan, "Tokenizer output:"
for i, token in tokens:
if i == tokens.high():
# Who cares about EOF?
break
styledEcho fgGreen, "\t", $token
echo ""
tree = parser.parse(tokens, filename, tokenizer.getLines(), input)
if tree.len() == 0:
return
if debugParser:
styledEcho fgCyan, "Parser output:"
for node in tree:
styledEcho fgGreen, "\t", $node
echo ""
typedNodes = typeChecker.validate(tree, filename, tokenizer.getSource(), mismatches, disabledWarnings)
if debugTypeChecker:
styledEcho fgCyan, "Typechecker output:"
for typedNode in typedNodes:
styledEcho fgGreen, &"\t{typedNode.node} -> {typeChecker.stringify(typedNode)}\n"
case backend:
of PeonBackend.Bytecode:
var
debugger = newBytecodeDebugger()
generator = newBytecodeGenerator()
serializer = newBytecodeSerializer()
vm = newPeonVM()
chunk: Chunk = newChunk()
serialized: SerializedBytecode
if not isBinary:
chunk = generator.generate(typedNodes)
serialized = serializer.loadBytes(serializer.dumpBytes(chunk, filename))
else:
serialized = serializer.loadFile(filename)
chunk = serialized.chunk
if dump:
if not output.endsWith(".pbc"):
output.add(".pbc")
serializer.dumpFile(chunk, filename, output)
if debugCompiler:
styledEcho fgCyan, "Disassembler output: "
debugger.disassembleChunk(chunk, filename)
if debugSerializer:
styledEcho fgCyan, "Serializer output: "
styledEcho fgBlue, "\t- Peon version: ", fgYellow, &"{serialized.version.major}.{serialized.version.minor}.{serialized.version.patch}", fgBlue, " (commit ", fgYellow, serialized.commit[0..8], fgBlue, ") on branch ", fgYellow, serialized.branch
stdout.styledWriteLine(fgBlue, "\t- Compilation date & time: ", fgYellow, fromUnix(serialized.compileDate).format("d/M/yyyy HH:mm:ss"))
stdout.styledWrite(fgBlue, &"\t- Constants segment: ")
if serialized.chunk.consts == chunk.consts:
styledEcho fgGreen, "OK"
else:
styledEcho fgRed, "Corrupted"
stdout.styledWrite(fgBlue, &"\t- Code segment: ")
if serialized.chunk.code == chunk.code:
styledEcho fgGreen, "OK"
else:
styledEcho fgRed, "Corrupted"
stdout.styledWrite(fgBlue, "\t- Line info segment: ")
if serialized.chunk.lines == chunk.lines:
styledEcho fgGreen, "OK"
else:
styledEcho fgRed, "Corrupted"
stdout.styledWrite(fgBlue, "\t- Functions segment: ")
if serialized.chunk.functions == chunk.functions:
styledEcho fgGreen, "OK"
else:
styledEcho fgRed, "Corrupted"
stdout.styledWrite(fgBlue, "\t- Modules segment: ")
if serialized.chunk.modules == chunk.modules:
styledEcho fgGreen, "OK"
else:
styledEcho fgRed, "Corrupted"
if run:
vm.run(chunk, breakpoints, repl=false)
else:
discard
except LexingError as exc:
print(exc)
except ParseError as exc:
print(exc)
except TypeCheckError as exc:
print(exc)
except SerializationError as exc:
var file = exc.file
if file notin ["<string>", ""]:
file = relativePath(file, getCurrentDir())
stderr.styledWriteLine(fgRed, styleBright, "Error while (de-)serializing ", fgYellow, file, fgDefault, &": {exc.msg}")
except IOError as exc:
stderr.styledWriteLine(fgRed, styleBright, "Error while trying to read ", fgYellow, filename, fgDefault, &": {exc.msg}")
except OSError as exc:
stderr.styledWriteLine(fgRed, styleBright, "Error while trying to read ", fgYellow, filename, fgDefault, &": {exc.msg} ({osErrorMsg(osLastError())})",
fgRed, "[errno ", fgYellow, $osLastError(), fgRed, "]")
#[
proc repl(warnings: seq[WarningKind] = @[], showMismatches: bool = false) =
var
keep = true
tokens: seq[Token]
@ -156,6 +278,7 @@ when isMainModule:
setControlCHook(proc () {.noconv.} = echo ""; quit(0))
main()
]#
]#
when isMainModule:
@ -164,13 +287,14 @@ when isMainModule:
optParser = initOptParser(commandLineParams())
file: string
fromString: bool
#dump: bool = true
dump: bool = true
warnings: seq[WarningKind] = @[]
showMismatches: bool
#mode: CompileMode = CompileMode.Debug
run: bool = true
#backend: PeonBackend
#output: string
backend: PeonBackend
output: string
breakpoints: seq[uint64]
for kind, key, value in optParser.getopt():
case kind:
of cmdArgument:
@ -196,8 +320,8 @@ when isMainModule:
of "string":
file = key
fromString = true
#[of "noDump":
dump = false]#
of "noDump":
dump = false
of "warnings":
if value.toLowerAscii() in ["yes", "on"]:
warnings = @[]
@ -223,7 +347,11 @@ when isMainModule:
quit(0)
of "debugTypeChecker":
debugTypeChecker = true
#[of "compile":
of "debugCompiler":
debugCompiler = true
of "debugSerializer":
debugSerializer = true
of "compile":
run = false
of "output":
output = value
@ -235,18 +363,28 @@ when isMainModule:
backend = PeonBackend.NativeC
of "debug-dump":
debugSerializer = true
]#
of "debugLexer":
debugLexer = true
of "debugParser":
debugParser = true
of "breakpoints":
when debugVM:
for point in value.strip(chars={' '}).split(","):
try:
breakpoints.add(parseBiggestUInt(point))
except ValueError:
stderr.styledWriteLine(fgRed, styleBright, "Error: ", fgDefault, &"error: invalid breakpoint value '{point}'")
quit()
when not debugVM:
stderr.styledWriteLine(fgRed, styleBright, "Error: ", fgDefault, "VM debugging is off, cannot set breakpoints (recompile with -d:debugVM to fix this)")
quit()
else:
stderr.styledWriteLine(fgRed, styleBright, "Error: ", fgDefault, &"unkown option '{key}'")
stderr.styledWriteLine(fgRed, styleBright, "Error: ", fgDefault, &"unkown long option '{key}'")
quit()
of cmdShortOption:
case key:
#[of "o":
output = value]#
of "o":
output = value
of "h":
echo HELP_MESSAGE
quit()
@ -256,8 +394,8 @@ when isMainModule:
of "s":
file = key
fromString = true
#[of "n":
dump = false]#
of "n":
dump = false
of "w":
if value.toLowerAscii() in ["yes", "on"]:
warnings = @[]
@ -267,18 +405,16 @@ when isMainModule:
else:
stderr.styledWriteLine(fgRed, styleBright, "Error: ", fgDefault, "invalid value for option 'w' (valid options are: yes, on, no, off)")
quit()
#[of "c":
run = false]#
of "c":
run = false
else:
stderr.styledWriteLine(fgRed, styleBright, "Error: ", fgDefault, &"unkown option '{key}'")
stderr.styledWriteLine(fgRed, styleBright, "Error: ", fgDefault, &"unkown short option '{key}'")
quit()
else:
echo "usage: peon [options] [filename.pn]"
quit()
if file == "":
test(warnings, showMismatches)
echo "Sorry, the REPL is broken :("
# repl(warnings, showMismatches, backend, dump)
else:
echo "Warning: not implemented yet!"
main(file, warnings, showMismatches)
#[else:
runFile(file, fromString, dump, breaks, warnings, showMismatches, mode, run, backend, output)]#
runFile(file, fromString, dump, breakpoints, warnings, showMismatches, run, backend, output)