Minor style changes, removed findImpl from compiler, made the VM use the multibyte utilities, bytecode chunks now no longer store AST node objects and use a stream of bytes instead, fixed issues with endScope() in the compiler which would not pop properly from self.names, fixed issues with blockStmt in parser, added more multibyte utilities

This commit is contained in:
Mattia Giambirtone 2022-05-20 15:47:04 +02:00
parent 77bd0c8b6f
commit 1a0587d08b
9 changed files with 117 additions and 232 deletions

View File

@ -16,6 +16,7 @@
import types
import ../config
import ../frontend/meta/bytecode
import ../util/multibyte
type
@ -106,8 +107,7 @@ proc readShort(self: PeonVM): uint16 =
## bytecode and returns them
## as an unsigned 16 bit
## integer
var arr: array[2, uint8] = [self.readByte(), self.readByte()]
copyMem(result.addr, unsafeAddr(arr), sizeof(arr))
return [self.readByte(), self.readByte()].fromDouble()
proc readLong(self: PeonVM): uint32 =
@ -117,8 +117,7 @@ proc readLong(self: PeonVM): uint32 =
## integer. Note however that
## the boundary is capped at
## 24 bits instead of 32
var arr: array[3, uint8] = [self.readByte(), self.readByte(), self.readByte()]
copyMem(result.addr, unsafeAddr(arr), sizeof(arr))
return uint32([self.readByte(), self.readByte(), self.readByte()].fromTriple())
proc readInt64(self: PeonVM, idx: int): PeonObject =
@ -126,8 +125,8 @@ proc readInt64(self: PeonVM, idx: int): PeonObject =
## chunk's constant table and
## returns a Peon object. Assumes
## the constant is an Int64
var arr = [self.chunk.byteConsts[idx], self.chunk.byteConsts[idx + 1],
self.chunk.byteConsts[idx + 2], self.chunk.byteConsts[idx + 3]]
var arr = [self.chunk.consts[idx], self.chunk.consts[idx + 1],
self.chunk.consts[idx + 2], self.chunk.consts[idx + 3]]
result = PeonObject(kind: Int64)
copyMem(result.long.addr, arr.addr, sizeof(arr))
@ -137,8 +136,8 @@ proc readUInt64(self: PeonVM, idx: int): PeonObject =
## chunk's constant table and
## returns a Peon object. Assumes
## the constant is an UInt64
var arr = [self.chunk.byteConsts[idx], self.chunk.byteConsts[idx + 1],
self.chunk.byteConsts[idx + 2], self.chunk.byteConsts[idx + 3]]
var arr = [self.chunk.consts[idx], self.chunk.consts[idx + 1],
self.chunk.consts[idx + 2], self.chunk.consts[idx + 3]]
result = PeonObject(kind: UInt64)
copyMem(result.uLong.addr, arr.addr, sizeof(arr))

View File

@ -30,7 +30,7 @@ const PEON_COMMIT_HASH* = "ed79385e2a93100331697f26a4a90157e60ad27a"
when len(PEON_COMMIT_HASH) != 40:
{.fatal: "The git commit hash must be exactly 40 characters long".}
const PEON_BRANCH* = "master"
when len(PEON_BRANCH) >= 255:
when len(PEON_BRANCH) > 255:
{.fatal: "The git branch name's length must be less than or equal to 255 characters".}
const DEBUG_TRACE_VM* = false # Traces VM execution
const SKIP_STDLIB_INIT* = false # Skips stdlib initialization (can be imported manually)
@ -48,7 +48,7 @@ Basic usage
-----------
$ peon Opens an interactive session (REPL)
$ peon file.pe Runs the given Peon source file
$ peon file.pn Runs the given Peon source file
Command-line options
--------------------

View File

@ -128,7 +128,7 @@ type
# inside an implicit try/finally block
# and add this code in the finally branch.
# This sequence is emptied each time a
# fun declaration is compiled and stores only
# function declaration is compiled and stores only
# deferred code for the current function (may
# be empty)
deferred: seq[uint8]
@ -185,8 +185,7 @@ proc done(self: Compiler): bool =
result = self.current > self.ast.high()
proc error(self: Compiler, message: string) {.raises: [CompileError,
ValueError].} =
proc error(self: Compiler, message: string) {.raises: [CompileError, ValueError].} =
## Raises a formatted CompileError exception
var tok = self.getCurrentNode().token
raise newException(CompileError, &"A fatal error occurred while compiling '{self.file}', module '{self.currentModule}' line {tok.line} at '{tok.lexeme}' -> {message}")
@ -231,10 +230,20 @@ proc emitBytes(self: Compiler, bytarr: openarray[uint8]) =
self.emitByte(b)
proc makeConstant(self: Compiler, val: Expression, kind: Type): array[3, uint8] =
proc makeConstant(self: Compiler, val: Expression, typ: Type): array[3, uint8] =
## Adds a constant to the current chunk's constant table
## and returns its index as a 3-byte array of uint8s
result = self.chunk.addConstant(val, kind)
case typ.kind:
of UInt8, Int8:
result = self.chunk.writeConstant([uint8(parseInt(val.token.lexeme))])
of Int16, UInt16:
result = self.chunk.writeConstant(parseInt(val.token.lexeme).toDouble())
of Int32, UInt32:
result = self.chunk.writeConstant(parseInt(val.token.lexeme).toQuad())
of Int64, UInt64:
result = self.chunk.writeConstant(parseInt(val.token.lexeme).toLong())
else:
discard
proc emitConstant(self: Compiler, obj: Expression, kind: Type) =
@ -816,23 +825,9 @@ proc identifier(self: Compiler, node: IdentExpr) =
self.emitBytes(self.closedOver.high().toTriple())
proc findImpl(self: Compiler, node: FunDecl): seq[Name] =
## Looks for functions matching the given declaration
## in the code that has been compiled so far.
## Returns a list of each matching name object
for obj in reversed(self.names):
# Scopes are indexed backwards!
case obj.valueType.kind:
of Function:
if self.compareTypes(obj.valueType, self.inferType(node)):
result.add(obj)
else:
continue
proc findByName(self: Compiler, name: string): seq[Name] =
## Looks for objects that have been already declared
## with the given name
## with the given name. Returns all objects that apply
for obj in reversed(self.names):
if obj.name.token.lexeme == name:
result.add(obj)
@ -888,12 +883,14 @@ proc beginScope(self: Compiler) =
proc endScope(self: Compiler) =
## Ends the current local scope
if self.scopeDepth < 0:
self.error("cannot call endScope with scopeDepth < 0 (This is an internal error and most likely a bug)")
if self.scopeDepth == 0:
self.error("cannot call endScope with scopeDepth == 0 (This is an internal error and most likely a bug)")
dec(self.scopeDepth)
var popped: int = 0
for ident in reversed(self.names):
for i, ident in reversed(self.names):
if ident.depth > self.scopeDepth:
inc(popped)
self.names.delete(self.names.len() - i)
if not self.enableOptimizations:
# All variables with a scope depth larger than the current one
# are now out of scope. Begone, you're now homeless!
@ -918,9 +915,6 @@ proc endScope(self: Compiler) =
elif popped == 1:
# We only emit PopN if we're popping more than one value
self.emitByte(Pop)
for _ in countup(0, popped - 1):
discard self.names.pop()
dec(self.scopeDepth)
proc blockStmt(self: Compiler, node: BlockStmt) =
@ -1273,8 +1267,7 @@ proc compile*(self: Compiler, ast: seq[Declaration], file: string): Chunk =
self.declaration(Declaration(self.step()))
if self.ast.len() > 0:
# *Technically* an empty program is a valid program
self.endScope()
self.emitByte(OpCode.Return) # Exits the VM's main loop when used at the global scope
result = self.chunk
if self.ast.len() > 0 and self.scopeDepth != -1:
self.error(&"invalid state: invalid scopeDepth value (expected -1, got {self.scopeDepth}), did you forget to call endScope/beginScope?")
if self.ast.len() > 0 and self.scopeDepth != 0:
self.error(&"invalid state: invalid scopeDepth value (expected 0, got {self.scopeDepth}), did you forget to call endScope/beginScope?")

View File

@ -13,24 +13,16 @@
# limitations under the License.
## Low level bytecode implementation details
import ast
import errors
import strutils
import strformat
import ../../util/multibyte
import ../compiler
export ast
type
Chunk* = ref object
## A piece of bytecode.
## consts represents the high-level constants table the code is
## referring to and is only meaningful at compile time (not stored
## in bytecode dumps!).
## byteConsts is used when serializing to/from a bytecode stream.
## code is the linear sequence of compiled bytecode instructions.
## lines maps bytecode instructions to line numbers using Run
@ -46,8 +38,7 @@ type
## are 3 and 4"
## This is more efficient than using the naive approach, which would encode
## the same line number multiple times and waste considerable amounts of space.
consts*: seq[Expression]
byteConsts*: seq[uint8]
consts*: seq[uint8]
code*: seq[uint8]
lines*: seq[int]
reuseConsts*: bool
@ -223,42 +214,10 @@ proc getLine*(self: Chunk, idx: int): int =
raise newException(IndexDefect, "index out of range")
proc findOrAddConstant(self: Chunk, constant: Expression, kind: Type): int =
## Small optimization function that reuses the same constant
## if it's already been written before (only if self.reuseConsts
## equals true)
if not self.reuseConsts:
return
for i, c in self.consts:
# We cannot use simple equality because the nodes likely have
# different token objects with different values
if c.kind != constant.kind:
continue
if constant.isConst():
if LiteralExpr(c).literal.lexeme == LiteralExpr(
constant).literal.lexeme:
# This wouldn't work for stuff like 2e3 and 2000.0, but those
# forms are collapsed in the compiler before being written
# to the constants table
return i
elif constant.kind == identExpr:
if IdentExpr(c).name.lexeme == IdentExpr(constant).name.lexeme:
return i
else:
continue
self.consts.add(constant)
result = self.consts.high()
proc addConstant*(self: Chunk, constant: Expression, kind: Type): array[3, uint8] =
## Writes a constant of the given type in the chunk's constant
## table. Returns its index as an array of 3 unsigned 8 bit integers.
## Constant indexes are reused if a constant is used more than once
## and self.reuseConsts equals true
if self.consts.high() == 16777215:
# The constant index is a 24 bit unsigned integer, so that's as far
# as we can index into the constant table (the same applies
# to our stack by the way). Not that anyone's ever gonna hit this
# limit in the real world, but you know, just in case
raise newException(CompileError, "cannot encode more than 16777216 constants")
result = self.findOrAddConstant(constant, kind).toTriple()
proc writeConstant*(self: Chunk, data: openarray[uint8]): array[3, uint8] =
## Writes a series of bytes to the chunk's constant
## table and returns the index of the first byte as
## an array of 3 bytes
result = self.consts.len().toTriple()
for b in data:
self.consts.add(b)

View File

@ -547,8 +547,8 @@ proc blockStmt(self: Parser): Statement =
var code: seq[Declaration] = @[]
while not self.check(RightBrace) and not self.done():
code.add(self.declaration())
if self.tree[^1] == nil:
self.tree.delete(self.tree.high())
if code[^1] == nil:
code.delete(code.high())
self.expect(RightBrace, "expecting '}'")
result = newBlockStmt(code, tok)
self.endScope()
@ -1140,4 +1140,4 @@ proc parse*(self: Parser, tokens: seq[Token], file: string): seq[Declaration] =
self.tree.add(self.declaration())
if self.tree[^1] == nil:
self.tree.delete(self.tree.high())
result = self.tree
result = self.tree

View File

@ -23,11 +23,11 @@ proc fillSymbolTable(tokenizer: Lexer)
proc getLineEditor: LineEditor
# Handy dandy compile-time constants
const debugLexer = false
const debugLexer = true
const debugParser = true
const debugCompiler = true
const debugSerializer = false
const debugRuntime = false
const debugSerializer = true
const debugRuntime = true
when debugSerializer:
import nimSHA2
@ -113,7 +113,7 @@ when isMainModule:
stdout.write(e)
if i < len(serialized.chunk.consts) - 1:
stdout.write(", ")
stdout.write("]\n")
stdout.write(&"] (matches: {serialized.chunk.consts == compiled.consts})\n")
stdout.write(&"\t- Reconstructed bytecode: [")
for i, e in serialized.chunk.code:
stdout.write($e)
@ -175,7 +175,7 @@ proc fillSymbolTable(tokenizer: Lexer) =
tokenizer.symbols.addKeyword("case", Case)
tokenizer.symbols.addKeyword("operator", Operator)
tokenizer.symbols.addKeyword("generator", Generator)
tokenizer.symbols.addKeyword("function", TokenType.Function)
tokenizer.symbols.addKeyword("fn", TokenType.Function)
tokenizer.symbols.addKeyword("coroutine", Coroutine)
tokenizer.symbols.addKeyword("break", TokenType.Break)
tokenizer.symbols.addKeyword("continue", Continue)

View File

@ -13,7 +13,6 @@
# limitations under the License.
import ../frontend/meta/bytecode
import ../frontend/meta/ast
import multibyte
@ -104,9 +103,6 @@ proc constantInstruction(instruction: OpCode, chunk: Chunk, offset: int): int =
setForegroundColor(fgYellow)
stdout.write(&"{obj}\n")
setForegroundColor(fgGreen)
printDebug("Value kind: ")
setForegroundColor(fgYellow)
stdout.write(&"{obj.kind}\n")
return offset + 4

View File

@ -17,17 +17,26 @@
proc toDouble*(input: int | uint | uint16): array[2, uint8] =
## Converts an int (either int, uint or uint16)
## Converts an unsigned integer
## to an array[2, uint8]
result = cast[array[2, uint8]](uint16(input))
proc toTriple*(input: uint | int): array[3, uint8] =
## Converts an unsigned integer (int is converted
## to an uint and sign is lost!) to an array[3, uint8]
## Converts an unsigned integer to an array[3, uint8]
result = cast[array[3, uint8]](uint(input))
proc toQuad*(input: int | uint | uint16 | uint32): array[4, uint8] =
## Converts an unsigned integer to an array[4, uint8]
result = cast[array[4, uint8]](uint(input))
proc toLong*(input: int | uint | uint16 | uint32 | uint64): array[8, uint8] =
## Converts an unsigned integer to an array[8, uint8]
result = cast[array[8, uint8]](uint(input))
proc fromDouble*(input: array[2, uint8]): uint16 =
## Rebuilds the output of toDouble into
## an uint16
@ -38,3 +47,15 @@ proc fromTriple*(input: array[3, uint8]): uint =
## Rebuilds the output of toTriple into
## an uint
copyMem(result.addr, unsafeAddr(input), sizeof(uint8) * 3)
proc fromQuad*(input: array[4, uint8]): uint =
## Rebuilts the output of toQuad into
## an uint
copyMem(result.addr, unsafeAddr(input), sizeof(uint32))
proc fromLong*(input: array[8, uint8]): uint =
## Rebuilts the output of toQuad into
## an uint
copyMem(result.addr, unsafeAddr(input), sizeof(uint64))

View File

@ -11,10 +11,8 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import ../frontend/meta/ast
import ../frontend/meta/errors
import ../frontend/meta/bytecode
import ../frontend/meta/token
import ../config
import multibyte
import ../frontend/compiler
@ -85,14 +83,6 @@ proc bytesToString(self: Serializer, input: seq[byte]): string =
result.add(char(b))
proc bytesToInt(self: Serializer, input: array[8, byte]): int =
copyMem(result.addr, input.unsafeAddr, sizeof(int))
proc bytesToInt(self: Serializer, input: array[3, byte]): int =
copyMem(result.addr, input.unsafeAddr, sizeof(byte) * 3)
proc extend[T](s: var seq[T], a: openarray[T]) =
## Extends s with the elements of a
for e in a:
@ -107,122 +97,65 @@ proc writeHeaders(self: Serializer, stream: var seq[byte], file: string) =
stream.add(byte(PEON_VERSION.patch))
stream.add(byte(len(PEON_BRANCH)))
stream.extend(self.toBytes(PEON_BRANCH))
if len(PEON_COMMIT_HASH) != 40:
self.error("the commit hash must be exactly 40 characters long")
stream.extend(self.toBytes(PEON_COMMIT_HASH))
stream.extend(self.toBytes(getTime().toUnixFloat().int()))
stream.extend(self.toBytes(computeSHA256(file)))
proc writeConstants(self: Serializer, stream: var seq[byte]) =
## Writes the constants table in-place into the given stream
## Writes the constants table in-place into the
## given stream
stream.extend(self.chunk.consts.len().toQuad())
for constant in self.chunk.consts:
case constant.kind:
of intExpr, floatExpr:
stream.add(0x1)
stream.extend(len(constant.token.lexeme).toTriple())
stream.extend(self.toBytes(constant.token.lexeme))
of strExpr:
stream.add(0x2)
var temp: byte
var strip: int = 2
var offset: int = 1
case constant.token.lexeme[0]:
of 'f':
strip = 3
inc(offset)
temp = 0x2
of 'b':
strip = 3
inc(offset)
temp = 0x1
else:
strip = 2
temp = 0x0
stream.extend((len(constant.token.lexeme) - strip).toTriple()) # Removes the quotes from the length count as they're not written
stream.add(temp)
stream.add(self.toBytes(constant.token.lexeme[offset..^2]))
of identExpr:
stream.add(0x0)
stream.extend(len(constant.token.lexeme).toTriple())
stream.add(self.toBytes(constant.token.lexeme))
else:
self.error(&"unknown constant kind in chunk table ({constant.kind})")
stream.add(0x59) # End marker
proc readConstants(self: Serializer, stream: seq[byte]): int =
## Reads the constant table from the given stream and
## adds each constant to the chunk object.
## Returns the number of bytes that were processed in
## the stream
var stream = stream
var count: int = 0
while true:
case stream[0]:
of 0x59:
inc(count)
break
of 0x2:
stream = stream[1..^1]
let size = self.bytesToInt([stream[0], stream[1], stream[2]])
stream = stream[3..^1]
var s = newStrExpr(Token(lexeme: ""))
case stream[0]:
of 0x0:
discard
of 0x1:
s.token.lexeme.add("b")
of 0x2:
s.token.lexeme.add("f")
else:
self.error(&"unknown string modifier in chunk table (0x{stream[0].toHex()}")
stream = stream[1..^1]
s.token.lexeme.add("\"")
for i in countup(0, size - 1):
s.token.lexeme.add(cast[char](stream[i]))
s.token.lexeme.add("\"")
stream = stream[size..^1]
self.chunk.consts.add(s)
inc(count, size + 5)
of 0x1:
stream = stream[1..^1]
inc(count)
let size = self.bytesToInt([stream[0], stream[1], stream[2]])
stream = stream[3..^1]
inc(count, 3)
var tok: Token = new(Token)
tok.lexeme = self.bytesToString(stream[0..<size])
if "." in tok.lexeme:
tok.kind = Float
self.chunk.consts.add(newFloatExpr(tok))
else:
tok.kind = Integer
self.chunk.consts.add(newIntExpr(tok))
stream = stream[size..^1]
inc(count, size)
of 0x0:
stream = stream[1..^1]
let size = self.bytesToInt([stream[0], stream[1], stream[2]])
stream = stream[3..^1]
self.chunk.consts.add(newIdentExpr(Token(
lexeme: self.bytesToString(stream[0..<size]))))
# TODO
# discard self.chunk.addConstant(newIdentExpr(Token(lexeme: self.bytesToString(stream[0..<size]))))
stream = stream[size..^1]
inc(count, size + 4)
else:
self.error(&"unknown constant kind in chunk table (0x{stream[0].toHex()})")
result = count
stream.add(constant)
proc writeCode(self: Serializer, stream: var seq[byte]) =
## Writes the bytecode from the given chunk to the given source
## stream
## Writes the bytecode from the given chunk to the
## given source stream
stream.extend(self.chunk.code.len.toTriple())
stream.extend(self.chunk.code)
proc readHeaders(self: Serializer, stream: seq[byte], serialized: Serialized): int =
## Reads the bytecode headers from a given stream
## of bytes
var stream = stream
if stream[0..<len(BYTECODE_MARKER)] != self.toBytes(BYTECODE_MARKER):
self.error("malformed bytecode marker")
result += len(BYTECODE_MARKER)
stream = stream[len(BYTECODE_MARKER)..^1]
serialized.peonVer = (major: int(stream[0]), minor: int(stream[1]), patch: int(stream[2]))
stream = stream[3..^1]
result += 3
let branchLength = stream[0]
stream = stream[1..^1]
result += 1
serialized.peonBranch = self.bytesToString(stream[0..<branchLength])
stream = stream[branchLength..^1]
result += int(branchLength)
serialized.commitHash = self.bytesToString(stream[0..<40]).toLowerAscii()
stream = stream[40..^1]
result += 40
serialized.compileDate = int(fromLong([stream[0], stream[1], stream[2],
stream[3], stream[4], stream[5], stream[6], stream[7]]))
stream = stream[8..^1]
result += 8
serialized.fileHash = self.bytesToString(stream[0..<32]).toHex().toLowerAscii()
result += 32
proc readConstants(self: Serializer, stream: seq[byte]): int =
## Reads the constant table from the given stream
## of bytes
let size = [stream[0], stream[1], stream[2], stream[3]].fromQuad()
result += 4
var stream = stream[4..^1]
for i in countup(0, int(size) - 1):
self.chunk.consts.add(stream[i])
inc(result)
proc readCode(self: Serializer, stream: seq[byte]): int =
## Reads the bytecode from a given stream and writes
## it into the given chunk
@ -262,23 +195,7 @@ proc loadBytes*(self: Serializer, stream: seq[byte]): Serialized =
self.chunk = result.chunk
var stream = stream
try:
if stream[0..<len(BYTECODE_MARKER)] != self.toBytes(BYTECODE_MARKER):
self.error("malformed bytecode marker")
stream = stream[len(BYTECODE_MARKER)..^1]
result.peonVer = (major: int(stream[0]), minor: int(stream[1]),
patch: int(stream[2]))
stream = stream[3..^1]
let branchLength = stream[0]
stream = stream[1..^1]
result.peonBranch = self.bytesToString(stream[0..<branchLength])
stream = stream[branchLength..^1]
result.commitHash = self.bytesToString(stream[0..<40]).toLowerAscii()
stream = stream[40..^1]
result.compileDate = self.bytesToInt([stream[0], stream[1], stream[2],
stream[3], stream[4], stream[5], stream[6], stream[7]])
stream = stream[8..^1]
result.fileHash = self.bytesToString(stream[0..<32]).toHex().toLowerAscii()
stream = stream[32..^1]
stream = stream[self.readHeaders(stream, result)..^1]
stream = stream[self.readConstants(stream)..^1]
stream = stream[self.readCode(stream)..^1]
except IndexDefect: