274 lines
10 KiB
Nim
274 lines
10 KiB
Nim
# Copyright 2022 Mattia Giambirtone & All Contributors
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
import meta/ast
|
|
import meta/errors
|
|
import meta/bytecode
|
|
import meta/token
|
|
import ../config
|
|
import ../util/multibyte
|
|
|
|
import strformat
|
|
import strutils
|
|
import nimSHA2
|
|
import times
|
|
|
|
|
|
export ast
|
|
|
|
type
|
|
Serializer* = ref object
|
|
file: string
|
|
filename: string
|
|
chunk: Chunk
|
|
Serialized* = ref object
|
|
## Wrapper returned by
|
|
## the Serializer.read*
|
|
## procedures to store
|
|
## metadata
|
|
fileHash*: string
|
|
japlVer*: tuple[major, minor, patch: int]
|
|
japlBranch*: string
|
|
commitHash*: string
|
|
compileDate*: int
|
|
chunk*: Chunk
|
|
|
|
|
|
proc `$`*(self: Serialized): string =
|
|
result = &"Serialized(fileHash={self.fileHash}, version={self.japlVer.major}.{self.japlVer.minor}.{self.japlVer.patch}, branch={self.japlBranch}), commitHash={self.commitHash}, date={self.compileDate}, chunk={self.chunk[]}"
|
|
|
|
|
|
proc error(self: Serializer, message: string) =
|
|
## Raises a formatted SerializationError exception
|
|
raise newException(SerializationError, &"A fatal error occurred while (de)serializing '{self.filename}' -> {message}")
|
|
|
|
|
|
proc initSerializer*(self: Serializer = nil): Serializer =
|
|
new(result)
|
|
if self != nil:
|
|
result = self
|
|
result.file = ""
|
|
result.filename = ""
|
|
result.chunk = nil
|
|
|
|
|
|
## Basic routines and helpers to convert various objects from and to to their byte representation
|
|
|
|
proc toBytes(self: Serializer, s: string): seq[byte] =
|
|
for c in s:
|
|
result.add(byte(c))
|
|
|
|
|
|
proc toBytes(self: Serializer, s: int): array[8, uint8] =
|
|
result = cast[array[8, uint8]](s)
|
|
|
|
|
|
proc toBytes(self: Serializer, d: SHA256Digest): seq[byte] =
|
|
for b in d:
|
|
result.add(b)
|
|
|
|
|
|
proc bytesToString(self: Serializer, input: seq[byte]): string =
|
|
for b in input:
|
|
result.add(char(b))
|
|
|
|
|
|
proc bytesToInt(self: Serializer, input: array[8, byte]): int =
|
|
copyMem(result.addr, input.unsafeAddr, sizeof(int))
|
|
|
|
|
|
proc bytesToInt(self: Serializer, input: array[3, byte]): int =
|
|
copyMem(result.addr, input.unsafeAddr, sizeof(byte) * 3)
|
|
|
|
|
|
proc extend[T](s: var seq[T], a: openarray[T]) =
|
|
## Extends s with the elements of a
|
|
for e in a:
|
|
s.add(e)
|
|
|
|
|
|
proc writeHeaders(self: Serializer, stream: var seq[byte], file: string) =
|
|
## Writes the JAPL bytecode headers in-place into a byte stream
|
|
stream.extend(self.toBytes(BYTECODE_MARKER))
|
|
stream.add(byte(JAPL_VERSION.major))
|
|
stream.add(byte(JAPL_VERSION.minor))
|
|
stream.add(byte(JAPL_VERSION.patch))
|
|
stream.add(byte(len(JAPL_BRANCH)))
|
|
stream.extend(self.toBytes(JAPL_BRANCH))
|
|
if len(JAPL_COMMIT_HASH) != 40:
|
|
self.error("the commit hash must be exactly 40 characters long")
|
|
stream.extend(self.toBytes(JAPL_COMMIT_HASH))
|
|
stream.extend(self.toBytes(getTime().toUnixFloat().int()))
|
|
stream.extend(self.toBytes(computeSHA256(file)))
|
|
|
|
|
|
proc writeConstants(self: Serializer, stream: var seq[byte]) =
|
|
## Writes the constants table in-place into the given stream
|
|
for constant in self.chunk.consts:
|
|
case constant.kind:
|
|
of intExpr, floatExpr:
|
|
stream.add(0x1)
|
|
stream.extend(len(constant.token.lexeme).toTriple())
|
|
stream.extend(self.toBytes(constant.token.lexeme))
|
|
of strExpr:
|
|
stream.add(0x2)
|
|
var temp: byte
|
|
var strip: int = 2
|
|
var offset: int = 1
|
|
case constant.token.lexeme[0]:
|
|
of 'f':
|
|
strip = 3
|
|
inc(offset)
|
|
temp = 0x2
|
|
of 'b':
|
|
strip = 3
|
|
inc(offset)
|
|
temp = 0x1
|
|
else:
|
|
strip = 2
|
|
temp = 0x0
|
|
stream.extend((len(constant.token.lexeme) - strip).toTriple()) # Removes the quotes from the length count as they're not written
|
|
stream.add(temp)
|
|
stream.add(self.toBytes(constant.token.lexeme[offset..^2]))
|
|
of identExpr:
|
|
stream.add(0x0)
|
|
stream.extend(len(constant.token.lexeme).toTriple())
|
|
stream.add(self.toBytes(constant.token.lexeme))
|
|
else:
|
|
self.error(&"unknown constant kind in chunk table ({constant.kind})")
|
|
stream.add(0x59) # End marker
|
|
|
|
|
|
proc readConstants(self: Serializer, stream: seq[byte]): int =
|
|
## Reads the constant table from the given stream and
|
|
## adds each constant to the chunk object (note: most compile-time
|
|
## information such as the original token objects and line info is lost when
|
|
## serializing the data, so those fields are set to nil or some default
|
|
## value). Returns the number of bytes that were processed in the stream
|
|
var stream = stream
|
|
var count: int = 0
|
|
while true:
|
|
case stream[0]:
|
|
of 0x59:
|
|
inc(count)
|
|
break
|
|
of 0x2:
|
|
stream = stream[1..^1]
|
|
let size = self.bytesToInt([stream[0], stream[1], stream[2]])
|
|
stream = stream[3..^1]
|
|
var s = newStrExpr(Token(lexeme: ""))
|
|
case stream[0]:
|
|
of 0x0:
|
|
discard
|
|
of 0x1:
|
|
s.token.lexeme.add("b")
|
|
of 0x2:
|
|
s.token.lexeme.add("f")
|
|
else:
|
|
self.error(&"unknown string modifier in chunk table (0x{stream[0].toHex()}")
|
|
stream = stream[1..^1]
|
|
s.token.lexeme.add("\"")
|
|
for i in countup(0, size - 1):
|
|
s.token.lexeme.add(cast[char](stream[i]))
|
|
s.token.lexeme.add("\"")
|
|
stream = stream[size..^1]
|
|
self.chunk.consts.add(s)
|
|
inc(count, size + 5)
|
|
of 0x1:
|
|
stream = stream[1..^1]
|
|
inc(count)
|
|
let size = self.bytesToInt([stream[0], stream[1], stream[2]])
|
|
stream = stream[3..^1]
|
|
inc(count, 3)
|
|
var tok: Token = new(Token)
|
|
tok.lexeme = self.bytesToString(stream[0..<size])
|
|
if "." in tok.lexeme:
|
|
tok.kind = Float
|
|
self.chunk.consts.add(newFloatExpr(tok))
|
|
else:
|
|
tok.kind = Integer
|
|
self.chunk.consts.add(newIntExpr(tok))
|
|
stream = stream[size..^1]
|
|
inc(count, size)
|
|
of 0x0:
|
|
stream = stream[1..^1]
|
|
let size = self.bytesToInt([stream[0], stream[1], stream[2]])
|
|
stream = stream[3..^1]
|
|
discard self.chunk.addConstant(newIdentExpr(Token(lexeme: self.bytesToString(stream[0..<size]))))
|
|
stream = stream[size..^1]
|
|
inc(count, size + 4)
|
|
else:
|
|
self.error(&"unknown constant kind in chunk table (0x{stream[0].toHex()})")
|
|
result = count
|
|
|
|
|
|
proc writeCode(self: Serializer, stream: var seq[byte]) =
|
|
## Writes the bytecode from the given chunk to the given source
|
|
## stream
|
|
stream.extend(self.chunk.code.len.toTriple())
|
|
stream.extend(self.chunk.code)
|
|
|
|
|
|
proc readCode(self: Serializer, stream: seq[byte]): int =
|
|
## Reads the bytecode from a given stream and writes
|
|
## it into the given chunk
|
|
let size = [stream[0], stream[1], stream[2]].fromTriple()
|
|
var stream = stream[3..^1]
|
|
for i in countup(0, int(size) - 1):
|
|
self.chunk.code.add(stream[i])
|
|
assert len(self.chunk.code) == int(size)
|
|
return int(size)
|
|
|
|
|
|
proc dumpBytes*(self: Serializer, chunk: Chunk, file, filename: string): seq[byte] =
|
|
## Dumps the given bytecode and file to a sequence of bytes and returns it.
|
|
## The file argument must be the actual file's content and is needed to compute its SHA256 hash.
|
|
self.file = file
|
|
self.filename = filename
|
|
self.chunk = chunk
|
|
self.writeHeaders(result, self.file)
|
|
self.writeConstants(result)
|
|
self.writeCode(result)
|
|
|
|
|
|
proc loadBytes*(self: Serializer, stream: seq[byte]): Serialized =
|
|
## Loads the result from dumpBytes to a Serializer object
|
|
## for use in the VM or for inspection
|
|
discard self.initSerializer()
|
|
new(result)
|
|
result.chunk = newChunk()
|
|
self.chunk = result.chunk
|
|
var stream = stream
|
|
try:
|
|
if stream[0..<len(BYTECODE_MARKER)] != self.toBytes(BYTECODE_MARKER):
|
|
self.error("malformed bytecode marker")
|
|
stream = stream[len(BYTECODE_MARKER)..^1]
|
|
result.japlVer = (major: int(stream[0]), minor: int(stream[1]), patch: int(stream[2]))
|
|
stream = stream[3..^1]
|
|
let branchLength = stream[0]
|
|
stream = stream[1..^1]
|
|
result.japlBranch = self.bytesToString(stream[0..<branchLength])
|
|
stream = stream[branchLength..^1]
|
|
result.commitHash = self.bytesToString(stream[0..<40]).toLowerAscii()
|
|
stream = stream[40..^1]
|
|
result.compileDate = self.bytesToInt([stream[0], stream[1], stream[2], stream[3], stream[4], stream[5], stream[6], stream[7]])
|
|
stream = stream[8..^1]
|
|
result.fileHash = self.bytesToString(stream[0..<32]).toHex().toLowerAscii()
|
|
stream = stream[32..^1]
|
|
stream = stream[self.readConstants(stream)..^1]
|
|
stream = stream[self.readCode(stream)..^1]
|
|
except IndexDefect:
|
|
self.error("truncated bytecode file")
|
|
except AssertionDefect:
|
|
self.error("corrupted bytecode file")
|