From 13a6d71cfa6d964a35ca9f1a58e5607399219e0e Mon Sep 17 00:00:00 2001 From: Nocturn9x Date: Mon, 14 Mar 2022 09:09:42 +0100 Subject: [PATCH] Minor formatting and language changes to bytecode spec --- docs/bytecode.md | 8 ++++---- src/frontend/parser.nim | 13 +++++++++++++ src/frontend/serializer.nim | 10 +++++----- 3 files changed, 22 insertions(+), 9 deletions(-) diff --git a/docs/bytecode.md b/docs/bytecode.md index 60d947b..9f14a12 100644 --- a/docs/bytecode.md +++ b/docs/bytecode.md @@ -67,6 +67,7 @@ After the modifier follows the string encoded in UTF-8, __without__ quotes. ### List-like collections (sets, lists and tuples) + List-like collections (or _sequences_)-- namely sets, lists and tuples-- encode their length first: for lists and sets this only denotes the _starting_ size of the container, while a tuple's size is fixed once it is created. The length may be 0, in which case it is interpreted as the sequence being empty; After the length, which expresses the __number of elements__ in the collection (just the count!), follows a number of compile-time objects equal to the specified length, with their respective encoding. __TODO__: Currently the compiler does not emit constant instructions for collections using only constants: it will just emit a bunch of `LoadConstant` instructions and @@ -82,8 +83,8 @@ Mappings (also called _associative arrays_ or, more informally, _dictionaries_) An object file starts with the headers, namely: -- A 13-byte constant string with the value `"JAPL_BYTECODE"` (without quotes) encoded as a sequence of integers corresponding to their value in the ASCII table -- A 3-byte version header composed of 3 unsigned integers representing the major, minor and patch version of the compiler used to generate the file, respectively. JAPL follows the SemVer standard for versioning +- A 13-byte constant string with the value `"JAPL_BYTECODE"` (without quotes) encoded as a sequence of integers corresponding to the ordinal value of each character in the ASCII table +- A 3-byte version header composed by 3 unsigned integers representing the major, minor and patch version of the compiler used to generate the file, respectively. JAPL follows the SemVer standard for versioning - A string representing the branch name of the git repo from which JAPL was compiled, prepended with its size represented as a single 8-bit unsigned integer. Due to this encoding the branch name can't be longer than 256 characters, which is a length deemed appropriate for this purpose - A 40 bytes hexadecimal string, pinpointing the version of the compiler down to the exact commit hash in the JAPL repository, particularly useful when testing development versions - An 8 byte (64 bit) UNIX timestamp (starting from the Unix Epoch of January 1st 1970 at 00:00), representing the date and time when the file was created @@ -91,8 +92,7 @@ An object file starts with the headers, namely: ### Constant section -This section of the file follows the headers and is meant to store all constants needed upon startup by the JAPL virtual machine. For example, the code `var x = 1;` would have the number one as a constant. Constants are just an ordered sequence of compile-time types as described in the sections above. The constant section's end is marked with -the byte `0x59`. +This section of the file follows the headers and is meant to store all constants needed upon startup by the JAPL virtual machine. For example, the code `var x = 1;` would have the number one as a constant. Constants are just an ordered sequence of compile-time types as described in the sections above. The constant section's end is marked with the byte `0x59`. ### Code section diff --git a/src/frontend/parser.nim b/src/frontend/parser.nim index d26cb40..4e1a3ba 100644 --- a/src/frontend/parser.nim +++ b/src/frontend/parser.nim @@ -808,6 +808,19 @@ proc forStmt(self: Parser): ASTNode = # Nested blocks, so the initializer is # only executed once body = newBlockStmt(@[initializer, body], tok) + # This desgugars the following code: + # for (var i = 0; i < 10; i += 1) { + # print(i); + # } + # To the semantically equivalent snippet + # below: + # { + # private static var i = 0; + # while (i < 10) { + # print(i); + # i += 1; + # } + # } result = body self.currentLoop = enclosingLoop self.endScope() diff --git a/src/frontend/serializer.nim b/src/frontend/serializer.nim index ad92159..1a44fab 100644 --- a/src/frontend/serializer.nim +++ b/src/frontend/serializer.nim @@ -122,23 +122,23 @@ proc writeConstants(self: Serializer, stream: var seq[byte]) = stream.extend(self.toBytes(constant.token.lexeme)) of strExpr: stream.add(0x2) - var temp: seq[byte] = @[] + var temp: byte var strip: int = 2 var offset: int = 1 case constant.token.lexeme[0]: of 'f': strip = 3 inc(offset) - temp.add(0x2) + temp = 0x2 of 'b': strip = 3 inc(offset) - temp.add(0x1) + temp = 0x1 else: strip = 2 - temp.add(0x0) + temp = 0x0 stream.extend((len(constant.token.lexeme) - strip).toTriple()) # Removes the quotes from the length count as they're not written - stream.extend(temp) + stream.add(temp) stream.add(self.toBytes(constant.token.lexeme[offset..^2])) of identExpr: stream.add(0x0)