diff --git a/.gitignore b/.gitignore index 750bcf3..45557ef 100644 --- a/.gitignore +++ b/.gitignore @@ -2,4 +2,6 @@ nimcache/ nimblecache/ htmldocs/ - +*.pbc # Peon bytecode files +stdin.pbc +tests.pbc diff --git a/LICENSE b/LICENSE index eb2e968..261eeb9 100644 --- a/LICENSE +++ b/LICENSE @@ -1,85 +1,201 @@ -The Artistic License 2.0 + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ -Copyright (c) 2000-2006, The Perl Foundation. + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION -Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. + 1. Definitions. -Preamble + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. -This license establishes the terms under which a given free software Package may be copied, modified, distributed, and/or redistributed. The intent is that the Copyright Holder maintains some artistic control over the development of that Package while still keeping the Package available as open source and free software. + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. -You are always permitted to make arrangements wholly outside of this license directly with the Copyright Holder of a given Package. If the terms of this license do not permit the full use that you propose to make of the Package, you should contact the Copyright Holder and seek a different licensing arrangement. + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. -Definitions + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. - "Copyright Holder" means the individual(s) or organization(s) named in the copyright notice for the entire Package. + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. - "Contributor" means any party that has contributed code or other material to the Package, in accordance with the Copyright Holder's procedures. + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. - "You" and "your" means any person who would like to copy, distribute, or modify the Package. + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). - "Package" means the collection of files distributed by the Copyright Holder, and derivatives of that collection and/or of those files. A given Package may consist of either the Standard Version, or a Modified Version. + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. - "Distribute" means providing a copy of the Package or making it accessible to anyone else, or in the case of a company or organization, to others outside of your company or organization. + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." - "Distributor Fee" means any fee that you charge for Distributing this Package or providing support for this Package to another party. It does not mean licensing fees. + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. - "Standard Version" refers to the Package if it has not been modified, or has been modified only in ways explicitly requested by the Copyright Holder. + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. - "Modified Version" means the Package, if it has been changed, and such changes were not explicitly requested by the Copyright Holder. + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. - "Original License" means this Artistic License as Distributed with the Standard Version of the Package, in its current version or as it may be modified by The Perl Foundation in the future. + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: - "Source" form means the source code, documentation source, and configuration files for the Package. + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and - "Compiled" form means the compiled bytecode, object code, binary, or any other form resulting from mechanical transformation or translation of the Source form. + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and -Permission for Use and Modification Without Distribution + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and -(1) You are permitted to use the Standard Version and create and use Modified Versions for any purpose without restriction, provided that you do not Distribute the Modified Version. + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. -Permissions for Redistribution of the Standard Version + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. -(2) You may Distribute verbatim copies of the Source form of the Standard Version of this Package in any medium without restriction, either gratis or for a Distributor Fee, provided that you duplicate all of the original copyright notices and associated disclaimers. At your discretion, such verbatim copies may or may not include a Compiled form of the Package. + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. -(3) You may apply any bug fixes, portability changes, and other modifications made available from the Copyright Holder. The resulting Package will still be considered the Standard Version, and as such will be subject to the Original License. + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. -Distribution of Modified Versions of the Package as Source + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. -(4) You may Distribute your Modified Version as Source (either gratis or for a Distributor Fee, and with or without a Compiled form of the Modified Version) provided that you clearly document how it differs from the Standard Version, including, but not limited to, documenting any non-standard features, executables, or modules, and provided that you do at least ONE of the following: + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. - (a) make the Modified Version available to the Copyright Holder of the Standard Version, under the Original License, so that the Copyright Holder may include your modifications in the Standard Version. - (b) ensure that installation of your Modified Version does not prevent the user installing or running the Standard Version. In addition, the Modified Version must bear a name that is different from the name of the Standard Version. - (c) allow anyone who receives a copy of the Modified Version to make the Source form of the Modified Version available to others under + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. - (i) the Original License or - (ii) a license that permits the licensee to freely copy, modify and redistribute the Modified Version using the same licensing terms that apply to the copy that the licensee received, and requires that the Source form of the Modified Version, and of any works derived from it, be made freely available in that license fees are prohibited but Distributor Fees are allowed. + END OF TERMS AND CONDITIONS -Distribution of Compiled Forms of the Standard Version or Modified Versions without the Source + APPENDIX: How to apply the Apache License to your work. -(5) You may Distribute Compiled forms of the Standard Version without the Source, provided that you include complete instructions on how to get the Source of the Standard Version. Such instructions must be valid at the time of your distribution. If these instructions, at any time while you are carrying out such distribution, become invalid, you must provide new instructions on demand or cease further distribution. If you provide valid instructions or cease distribution within thirty days after you become aware that the instructions are invalid, then you do not forfeit any of your rights under this license. + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. -(6) You may Distribute a Modified Version in Compiled form without the Source, provided that you comply with Section 4 with respect to the Source of the Modified Version. + Copyright [yyyy] [name of copyright owner] -Aggregating or Linking the Package + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at -(7) You may aggregate the Package (either the Standard Version or Modified Version) with other packages and Distribute the resulting aggregation provided that you do not charge a licensing fee for the Package. Distributor Fees are permitted, and licensing fees for other components in the aggregation are permitted. The terms of this license apply to the use and Distribution of the Standard or Modified Versions as included in the aggregation. + http://www.apache.org/licenses/LICENSE-2.0 -(8) You are permitted to link Modified and Standard Versions with other works, to embed the Package in a larger work of your own, or to build stand-alone binary or bytecode versions of applications that include the Package, and Distribute the result without restriction, provided the result does not expose a direct interface to the Package. - -Items That are Not Considered Part of a Modified Version - -(9) Works (including, but not limited to, modules and scripts) that merely extend or make use of the Package, do not, by themselves, cause the Package to be a Modified Version. In addition, such works are not considered parts of the Package itself, and are not subject to the terms of this license. - -General Provisions - -(10) Any use, modification, and distribution of the Standard or Modified Versions is governed by this Artistic License. By using, modifying or distributing the Package, you accept this license. Do not use, modify, or distribute the Package, if you do not accept this license. - -(11) If your Modified Version has been derived from a Modified Version made by someone other than you, you are nevertheless required to ensure that your Modified Version complies with the requirements of this license. - -(12) This license does not grant you the right to use any trademark, service mark, tradename, or logo of the Copyright Holder. - -(13) This license includes the non-exclusive, worldwide, free-of-charge patent license to make, have made, use, offer to sell, sell, import and otherwise transfer the Package with respect to any patent claims licensable by the Copyright Holder that are necessarily infringed by the Package. If you institute patent litigation (including a cross-claim or counterclaim) against any party alleging that the Package constitutes direct or contributory patent infringement, then this Artistic License to you shall terminate on the date that such litigation is filed. - -(14) Disclaimer of Warranty: -THE PACKAGE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES. THE IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT ARE DISCLAIMED TO THE EXTENT PERMITTED BY YOUR LOCAL LAW. UNLESS REQUIRED BY LAW, NO COPYRIGHT HOLDER OR CONTRIBUTOR WILL BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING IN ANY WAY OUT OF THE USE OF THE PACKAGE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..f953db0 --- /dev/null +++ b/Makefile @@ -0,0 +1,5 @@ +run: + nim --hints:off --warnings:off r src/test.nim + +pretty: + nimpretty src/*.nim src/backend/*.nim src/frontend/*.nim src/frontend/meta/*.nim src/memory/*.nim src/util/*.nim diff --git a/README.md b/README.md index b25d468..7eb6ec7 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,79 @@ -# peon +# The peon programming language -Peon is a simple, functional, async-first programming language with a focus on correctness and speed \ No newline at end of file +Peon is a simple, functional, async-first programming language with a focus on correctness and speed. + +[Go to the Manual](docs/manual.md) + + +## Project structure + +- `src/` -> Contains the entirety of peon's toolchain + - `src/memory/` -> Contains peon's memory allocator and GC (TODO) + - `src/frontend/` -> Contains the tokenizer, parser and compiler + - `src/frontend/meta/` -> Contains shared error definitions, AST node and token + declarations as well as the bytecode used by the compiler + - `src/backend/` -> Contains the peon VM and type system + - `src/util/` -> Contains utilities such as the bytecode debugger and serializer as well + as procedures to handle multi-byte sequences + - `src/config.nim` -> Contains compile-time configuration variables + - `src/main.nim` -> Ties up the whole toolchain together by tokenizing, + parsing, compiling, debugging, (de-)serializing and executing peon code +- `docs/` -> Contains documentation for various components of peon (bytecode, syntax, etc.) +- `tests/` -> Contains tests (both in peon and Nim) for the toolchain + + +## Credits + +- Araq, for creating the amazing language that is [Nim](https://nim-lang.org) +- The Nim community and contributors, for making Nim what it is today +- Bob Nystrom, for his amazing [book](https://craftinginterpreters.com) that inspired me + and taught me how to actually make a programming language +- [Njsmith](https://vorpus.org/), for his awesome articles on structured concurrency + + +## Project State + +**Disclaimer**: The project is still in its very early days: lots of stuff is not implemented, a work in progress or +otherwise outright broken. Feel free to report bugs! + +Also, yes: peon is yet another programming language inspired by Bob's book, but it is also **very** +different from Lox, which is an object-oriented, dynamically typed and very high level programming language, whereas +peon is a statically-typed, functional language which aims to allow low-level interfacing with C and Nim code while +being a breeze to use. + +Also, peon will feature [structured concurrency](https://vorpus.org/blog/notes-on-structured-concurrency-or-go-statement-considered-harmful/) with coroutines (think Futures/Fibers but without +callback hell). Since, unlike Lox, peon isn't a toy language, there's obviously plans to implement creature comforts +like an import system, exception handling, a package manager, etc. + + +### TODO List + +In no particular order, here's a list of stuff that's done/to do (might be incomplete/out of date): + +Toolchain: + + - Tokenizer (with dynamic symbol table) [x] + - Parser (with support for custom operators, even builtins) [x] + - Compiler [ ] (Work in Progress) + - VM [ ] (Work in Progress) + - Bytecode (de-)serializer [x] + - Static code debugger [x] + - Runtime debugger/inspection tool [ ] + +Type system: + + - Custom types [ ] + - Intrinsics [x] + - Generics [ ] (Work in Progress) + - Function calls [ ] (Work in Progress) + +Misc: + + - Pragmas [ ] (Work in Progress) + - Attribute resolution [ ] + - ... More? + +## The name + +The name for peon comes from my and [Productive2's](https://git.nocturn9x.space/prod2) genius and is a result of shortening +the name of the fastest animal on earth: the **Pe**regrine Falc**on**. I guess I wanted this to mean peon will be blazing fast \ No newline at end of file diff --git a/docs/bytecode.md b/docs/bytecode.md new file mode 100644 index 0000000..485ee15 --- /dev/null +++ b/docs/bytecode.md @@ -0,0 +1,72 @@ +# Peon - Bytecode Specification + +This document aims to document peon's bytecode as well as how it is (de-)serialized to/from files and +other file-like objects. + +## Code Structure + +A peon program is compiled into a tightly packed sequence of bytes that contain all the necessary information +the VM needs to execute said program. There is no dependence between the frontend and the backend outside of the +bytecode format (which is implemented in a separate serialiazer module) to allow for maximum modularity. + +A peon bytecode dump contains: + +- Constants +- The bytecode itself +- Debugging information +- File and version metadata + +## Encoding + +### Header + +A peon bytecode file starts with the header, which is structured as follows: + +- The literal string `PEON_BYTECODE` +- A 3-byte version number (the major, minor and patch versions of the compiler that generated the file as per the SemVer versioning standard) +- The branch name of the repository the compiler was built from, prepended with its length as a 1 byte integer +- The full commit hash (encoded as a 40-byte hex-encoded string) in the aforementioned branch from which the compiler was built from (particularly useful in development builds) +- An 8-byte UNIX timestamp (with Epoch 0 starting at 1/1/1970 12:00 AM) representing the exact date and time of when the file was generated +- A 32-byte, hex-encoded SHA256 hash of the source file's content, used to track file changes + +### Line data section + +The line data section contains information about each instruction in the code section and associates them +1:1 with a line number in the original source file for easier debugging using run-length encoding. The section's +size is fixed and is encoded at the beginning as a sequence of 4 bytes (i.e. a single 32 bit integer). The data +in this section can be decoded as explained in [this file](../src/frontend/meta/bytecode.nim#L28), which is quoted +below: +``` +[...] +## lines maps bytecode instructions to line numbers using Run +## Length Encoding. Instructions are encoded in groups whose structure +## follows the following schema: +## - The first integer represents the line number +## - The second integer represents the count of whatever comes after it +## (let's call it c) +## - After c, a sequence of c integers follows +## +## A visual representation may be easier to understand: [1, 2, 3, 4] +## This is to be interpreted as "there are 2 instructions at line 1 whose values +## are 3 and 4" +## This is more efficient than using the naive approach, which would encode +## the same line number multiple times and waste considerable amounts of space. +[...] +``` + +### Constant section + +The constant section contains all the read-only values that the code will need at runtime, such as hardcoded +variable initializers or constant expressions. It is similar to the `.rodata` section of Assembly files, although +the implementation is different. Constants are encoded as a linear sequence of bytes with no type information about +them whatsoever: it is the code that, at runtime, loads each constant (whose type is determined at compile time) onto +the stack accordingly. For example, a 32 bit integer constant would be encoded as a sequence of 4 bytes, which would +then be loaded by the appropriate `LoadInt32` instruction at runtime. The section's size is fixed and is encoded at +the beginning as a sequence of 4 bytes (i.e. a single 32 bit integer). The constant section may be empty, although in +real-world scenarios it's unlikely that it would. + +### Code section + +The code section contains the linear sequence of bytecode instructions of a peon program. It is to be read directly +and without modifications. The section's size is fixed and is encoded at the beginning as a sequence of 3 bytes +(i.e. a single 24 bit integer). \ No newline at end of file diff --git a/docs/grammar.md b/docs/grammar.md new file mode 100644 index 0000000..f87f5c1 --- /dev/null +++ b/docs/grammar.md @@ -0,0 +1 @@ +# TODO \ No newline at end of file diff --git a/docs/manual.md b/docs/manual.md new file mode 100644 index 0000000..07317b7 --- /dev/null +++ b/docs/manual.md @@ -0,0 +1,188 @@ +# Peon - Manual + +Peon is a functional, statically typed, garbage-collected, C-like programming language with +a focus on speed and correctness, but whose main feature is the ability to natively +perform highly efficient parallel I/O operations by implementing the [structured concurrency](https://vorpus.org/blog/notes-on-structured-concurrency-or-go-statement-considered-harmful/) +paradigm. + +__Note__: Peon is currently a WIP (Work In Progress), and much of the content of this manual is purely theoretical as +of now. If you want to help make this into a reality, feel free to contribute! + + +## Table of contents + +- [Manual](#peon---manual) +- [Design Goals](#design-goals) +- [Examples](#peon-by-example) +- [Grammar](grammar.md) +- [Bytecode](bytecode.md) + +## Design Goals + +While peon is inspired from Bob Nystrom's [book](https://craftinginterpreters.com), where he describes a simple toy language +named Lox, the aspiration for it is to become a programming language that could actually be used in the real world. For that +to happen, we need: + +- Exceptions (`try/except/finally`) +- An import system (with namespaces, like Python) +- Multithreading support (with a global VM lock when GC'ing) +- Built-in collections (list, tuple, set, etc.) +- Coroutines (w/ structured concurrency) +- Generators +- Generics +- C/Nim FFI +- A package manager + +Peon ~~steals~~ borrows many ideas from Python and Nim (the latter being the language peon itself is written in). + +## Peon by Example + +Here follow a few examples of peon code to make it clear what the end product should look like + +### Variable declarations + +``` +var x = 5; # Inferred type is int64 +var y = 3'u16; # Type is specified as uint16 +x = 6; # Works: type matches +x = 3.0; # Cannot assign float64 to x +var x = 3.14; # Cannot re-declare x +``` + +__Note__: Peon supports [name stropping](https://en.wikipedia.org/wiki/Stropping_(syntax)), meaning + that almost any ASCII sequence of characters can be used as an identifier, including language + keywords, but stropped names need to be enclosed by matching pairs of backticks (`\``) + +### Functions + +``` +fn fib(n: int): int { + if (n < 3) { + return n; + } + return fib(n - 1) + fib(n - 2); +} + +fib(30); +``` + +### Type declarations + +``` +type Foo = object { # Can also be "ref object" for reference types (managed automatically) + fieldOne*: int # Asterisk means the field is public outside the current module + fieldTwo*: int +} +``` + +### Operator overloading + +``` +operator `+`(a, b: Foo) { + return Foo(fieldOne: a.fieldOne + b.fieldOne, fieldTwo: a.fieldTwo + b.fieldTwo); +} + +Foo(fieldOne: 1, fieldTwo: 3) + Foo(fieldOne: 2, fieldTwo: 3); # Foo(fieldOne: 3, fieldTwo: 6) +``` + +__Note__: Custom operators (e.g. `foo`) can also be defined! The backticks around the plus sign serve to mark it +as an identifier instead of a symbol (which is a requirement for function names, since operators are basically +functions). In fact, even the built-in peon operators are implemented partially in peon (well, their forward +declarations are) and they are then specialized in the compiler to emit a single bytecode instruction. + +### Function calls + +``` +foo(1, 2 + 3, 3.14, bar(baz)); +``` + +__Note__: Operators can be called as functions too. Just wrap their name in backticks, like so: +``` +`+`(1, 2) +``` + +__Note__: Code the likes of `a.b()` is desugared to `b(a)` if there exists a function `b` whose + signature is compatible with the value of of `a` (assuming `a` doesn't have a `b` field, in + which case the attribute resolution takes precedence) + + +### Generic declarations + +``` +fn genericSum[T](a, b: T): T { # Note: "a, b: T" means that both a and b are of type T + return a + b; +} + +# This allows for a single implementation to be +# re-used multiple times without any code duplication! +genericSum(1, 2); +genericSum(3.14, 0.1); +genericSum(1'u8, 250'u8); +``` + +#### Multiple generics + +``` +fn genericSth[T, K](a: T, b: K) { # Note: no return type == void function! + # code... +} + +genericSth(1, 3.0); +``` + +__Note__: The `*` modifier to make a name visible outside the current module must be put +__before__ generics declarations, so only `fn foo*[T](a: T) {}` is the correct syntax + +### Forward declarations + +``` +fn someF: int; # Semicolon, no body! + +someF(); # This works! + +fn someF: int { + return 42; +} +``` + +### Generators + +``` +generator count(n: int): int { + while (n > 0) { + yield n; + n -= 1; + } +} + +foreach (n: count(10)) { + print(n); +} +``` + + +### Coroutines + +``` +import concur; +import http; + + +coroutine req(url: string): string { + return (await http.AsyncClient().get(url)).content; +} + + +coroutine main(urls: list[string]) { + pool = concur.pool(); # Creates a task pool: like a nursery in njsmith's article + for (var i = 0; i < urls.len(); i += 1) { + pool.spawn(req, urls[i]); + } + # The pool has internal machinery that makes the parent + # task wait until all child exit! When this function + # returns, ALL child tasks will have exited somehow +} + + +concur.run(main, newList[string]("https://google.com", "https://debian.org")) +``` \ No newline at end of file diff --git a/peon b/peon new file mode 100755 index 0000000..04b46ed Binary files /dev/null and b/peon differ diff --git a/src/backend/types.nim b/src/backend/types.nim new file mode 100644 index 0000000..c910c44 --- /dev/null +++ b/src/backend/types.nim @@ -0,0 +1,54 @@ +# Copyright 2022 Mattia Giambirtone & All Contributors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +type + ObjectKind* = enum + ## Enumeration of Peon + ## types + Int8, UInt8, Int16, UInt16, Int32, + UInt32, Int64, UInt64, Float32, Float64, + Char, Byte, String, Function, CustomType, + Nil, Nan, Bool, Inf + PeonObject* = object + ## A generic Peon object + case kind*: ObjectKind: + of Bool: + boolean*: bool + of Inf: + positive*: bool + of Byte: + `byte`*: byte + of Int8: + tiny*: uint8 + of UInt8: + uTiny*: uint8 + of Int16: + short*: int16 + of UInt16: + uShort*: uint16 + of Int32: + `int`*: int32 + of UInt32: + uInt*: uint32 + of Int64: + long*: int64 + of UInt64: + uLong*: uint64 + of Nil, Nan: + discard + of CustomType: + fields*: seq[PeonObject] + else: + discard # TODO diff --git a/src/backend/vm.nim b/src/backend/vm.nim new file mode 100644 index 0000000..22a0aa7 --- /dev/null +++ b/src/backend/vm.nim @@ -0,0 +1,312 @@ +# Copyright 2022 Mattia Giambirtone & All Contributors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +## The Peon runtime environment +import types +import strformat +import ../config +import ../frontend/meta/bytecode +import ../util/multibyte + + +type + PeonVM* = ref object + ## The Peon Virtual Machine + stack: seq[PeonObject] + ip: int # Instruction pointer + cache: array[6, PeonObject] # Singletons cache + chunk: Chunk # Piece of bytecode to execute + frames: seq[int] # Stores the initial index of stack frames + heapVars: seq[PeonObject] # Stores variables that do not have stack semantics (i.e. "static") + + +proc initCache*(self: PeonVM) = + ## Initializes the VM's + ## singletons cache + self.cache[0] = PeonObject(kind: Nil) + self.cache[1] = PeonObject(kind: Bool, boolean: true) + self.cache[2] = PeonObject(kind: Bool, boolean: false) + self.cache[3] = PeonObject(kind: ObjectKind.Inf, positive: true) + self.cache[4] = PeonObject(kind: ObjectKind.Inf, positive: false) + self.cache[5] = PeonObject(kind: ObjectKind.Nan) + + +proc newPeonVM*: PeonVM = + ## Initializes a new, blank VM + ## for executing Peon bytecode + new(result) + result.ip = 0 + result.frames = @[] + result.stack = newSeq[PeonObject]() + result.initCache() + + +## Getters for singleton types (they are cached!) + +proc getNil*(self: PeonVM): PeonObject = self.cache[0] + +proc getBool*(self: PeonVM, value: bool): PeonObject = + if value: + return self.cache[1] + return self.cache[2] + +proc getInf*(self: PeonVM, positive: bool): PeonObject = + if positive: + return self.cache[3] + return self.cache[4] + +proc getNan*(self: PeonVM): PeonObject = self.cache[5] + +## Stack primitives. Note: all stack accessing that goes +## through the get/set wrappers is frame-relative, meaning +## that the index is added to the current stack frame's +## bottom to obtain an absolute stack index. + +proc push(self: PeonVM, obj: PeonObject) = + ## Pushes a Peon object onto the + ## stack + self.stack.add(obj) + + +proc pop(self: PeonVM): PeonObject = + ## Pops a Peon object off the + ## stack, decreasing the stack + ## pointer. The object is returned + return self.stack.pop() + + +proc peek(self: PeonVM): PeonObject = + ## Returns the Peon object at the top + ## of the stack without consuming + ## it + return self.stack[^1] + + +proc get(self: PeonVM, idx: int): PeonObject = + ## Accessor method that abstracts + ## stack accessing through stack + ## frames + return self.stack[idx + self.frames[^1]] + + +proc set(self: PeonVM, idx: int, val: PeonObject) = + ## Setter method that abstracts + ## stack accessing through stack + ## frames + self.stack[idx + self.frames[^1]] = val + + +proc readByte(self: PeonVM): uint8 = + ## Reads a single byte from the + ## bytecode and returns it as an + ## unsigned 8 bit integer + inc(self.ip) + return self.chunk.code[self.ip - 1] + + +proc readShort(self: PeonVM): uint16 = + ## Reads two bytes from the + ## bytecode and returns them + ## as an unsigned 16 bit + ## integer + return [self.readByte(), self.readByte()].fromDouble() + + +proc readLong(self: PeonVM): uint32 = + ## Reads three bytes from the + ## bytecode and returns them + ## as an unsigned 32 bit + ## integer. Note however that + ## the boundary is capped at + ## 24 bits instead of 32 + return uint32([self.readByte(), self.readByte(), self.readByte()].fromTriple()) + + +proc readInt64(self: PeonVM, idx: int): PeonObject = + ## Reads a constant from the + ## chunk's constant table and + ## returns a Peon object. Assumes + ## the constant is an Int64 + var arr = [self.chunk.consts[idx], self.chunk.consts[idx + 1], + self.chunk.consts[idx + 2], self.chunk.consts[idx + 3], + self.chunk.consts[idx + 4], self.chunk.consts[idx + 5], + self.chunk.consts[idx + 6], self.chunk.consts[idx + 7], + ] + result = PeonObject(kind: Int64) + copyMem(result.long.addr, arr.addr, sizeof(arr)) + + +proc readUInt64(self: PeonVM, idx: int): PeonObject = + ## Reads a constant from the + ## chunk's constant table and + ## returns a Peon object. Assumes + ## the constant is an UInt64 + var arr = [self.chunk.consts[idx], self.chunk.consts[idx + 1], + self.chunk.consts[idx + 2], self.chunk.consts[idx + 3], + self.chunk.consts[idx + 4], self.chunk.consts[idx + 5], + self.chunk.consts[idx + 6], self.chunk.consts[idx + 7], + ] + result = PeonObject(kind: UInt64) + copyMem(result.uLong.addr, arr.addr, sizeof(arr)) + + +proc readUInt32(self: PeonVM, idx: int): PeonObject = + ## Reads a constant from the + ## chunk's constant table and + ## returns a Peon object. Assumes + ## the constant is an UInt32 + var arr = [self.chunk.consts[idx], self.chunk.consts[idx + 1], + self.chunk.consts[idx + 2], self.chunk.consts[idx + 3]] + result = PeonObject(kind: UInt32) + copyMem(result.uInt.addr, arr.addr, sizeof(arr)) + + +proc readInt32(self: PeonVM, idx: int): PeonObject = + ## Reads a constant from the + ## chunk's constant table and + ## returns a Peon object. Assumes + ## the constant is an Int32 + var arr = [self.chunk.consts[idx], self.chunk.consts[idx + 1], + self.chunk.consts[idx + 2], self.chunk.consts[idx + 3]] + result = PeonObject(kind: Int32) + copyMem(result.`int`.addr, arr.addr, sizeof(arr)) + + +proc dispatch*(self: PeonVM) = + ## Main bytecode dispatch loop + var instruction: OpCode + while true: + instruction = OpCode(self.readByte()) + when DEBUG_TRACE_VM: + echo &"IP: {self.ip}" + echo &"SP: {self.stack.high()}" + echo &"Stack: {self.stack}" + echo &"Instruction: {instruction}" + discard readLine stdin + case instruction: + # Constant loading + of LoadTrue: + self.push(self.getBool(true)) + of LoadFalse: + self.push(self.getBool(false)) + of LoadNan: + self.push(self.getNan()) + of LoadNil: + self.push(self.getNil()) + of LoadInf: + self.push(self.getInf(true)) + of LoadInt64: + self.push(self.readInt64(int(self.readLong()))) + of LoadUInt64: + self.push(self.readUInt64(int(self.readLong()))) + of LoadUInt32: + self.push(self.readUInt32(int(self.readLong()))) + of Call: + # Calls a function. The calling convention for peon + # functions is pretty simple: the return address sits + # at the bottom of the stack frame, then follow the + # arguments and all temporaries/local variables + let newIp = self.readLong() + # We do this because if we immediately changed + # the instruction pointer, we'd read the wrong + # value for the argument count. Storing it and + # changing it later fixes this issue + self.frames.add(int(self.readLong())) + self.ip = int(newIp) + of OpCode.Return: + # Returns from a void function or terminates the + # program entirely if we're at the topmost frame + if self.frames.len() > 1: + let frame = self.frames.pop() + for i in countdown(self.stack.high(), frame): + discard self.pop() + self.ip = int(self.pop().uInt) + else: + return + of ReturnValue: + # Returns from a function which has a return value, + # pushing it on the stack + let retVal = self.pop() + let frame = self.frames.pop() + for i in countdown(self.stack.high(), frame): + discard self.pop() + self.ip = int(self.pop().uInt) + self.push(retVal) + of StoreVar: + # Stores the value at the top of the stack + # into the given stack index + self.set(int(self.readLong()), self.pop()) + of StoreHeap: + self.heapVars.add(self.pop()) + of LoadHeap: + self.push(self.heapVars[self.readLong()]) + of LoadVar: + self.push(self.get(int(self.readLong()))) + of NoOp: + continue + of Pop: + discard self.pop() + of PopN: + for _ in 0.. 1".} +const PEON_VERSION* = (major: 0, minor: 4, patch: 0) +const PEON_RELEASE* = "alpha" +const PEON_COMMIT_HASH* = "ed79385e2a93100331697f26a4a90157e60ad27a" +when len(PEON_COMMIT_HASH) != 40: + {.fatal: "The git commit hash must be exactly 40 characters long".} +const PEON_BRANCH* = "master" +when len(PEON_BRANCH) > 255: + {.fatal: "The git branch name's length must be less than or equal to 255 characters".} +const DEBUG_TRACE_VM* = true # Traces VM execution +const DEBUG_TRACE_GC* = false # Traces the garbage collector (TODO) +const DEBUG_TRACE_ALLOCATION* = false # Traces memory allocation/deallocation +const DEBUG_TRACE_COMPILER* = false # Traces the compiler +const PEON_VERSION_STRING* = &"Peon {PEON_VERSION.major}.{PEON_VERSION.minor}.{PEON_VERSION.patch} {PEON_RELEASE} ({PEON_BRANCH}, {CompileDate}, {CompileTime}, {PEON_COMMIT_HASH[0..8]}) [Nim {NimVersion}] on {hostOS} ({hostCPU})" +const HELP_MESSAGE* = """The peon programming language, Copyright (C) 2022 Mattia Giambirtone & All Contributors + +This program is free software, see the license distributed with this program or check +http://www.apache.org/licenses/LICENSE-2.0 for more info. + +Basic usage +----------- + +$ peon Opens an interactive session (REPL) +$ peon file.pn Runs the given Peon source file + +Command-line options +-------------------- + +-h, --help Shows this help text and exits +-v, --version Prints the peon version number and exits +-s, --string Executes the passed string as if it was a file +-i, --interactive Enables interactive mode, which opens a REPL session after execution of a file or source string +-c, --nocache Disables dumping the result of bytecode compilation to files for caching +-d, --cache-delay Configures the bytecode cache invalidation threshold, in minutes (defaults to 60) +""" diff --git a/src/frontend/compiler.nim b/src/frontend/compiler.nim new file mode 100644 index 0000000..797fc10 --- /dev/null +++ b/src/frontend/compiler.nim @@ -0,0 +1,1353 @@ +# Copyright 2022 Mattia Giambirtone & All Contributors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import meta/token +import meta/ast +import meta/errors +import ../config +import ../util/multibyte + + +import strformat +import algorithm +import parseutils +import strutils +import sequtils +import os + + +export ast +export token +export multibyte + + +type + TypeKind* = enum + ## An enumeration of compile-time + ## types + Int8, UInt8, Int16, UInt16, Int32, + UInt32, Int64, UInt64, Float32, Float64, + Char, Byte, String, Function, CustomType, + Nil, Nan, Bool, Inf, Typedesc, Generic + Type* = ref object + ## A wrapper around + ## compile-time types + node*: ASTNode + case kind*: TypeKind: + of Function: + args*: seq[Type] + returnType*: Type + else: + discard + +# This way we don't have recursive dependency issues +import meta/bytecode +export bytecode + + +type + Name = ref object + ## A compile-time wrapper around + ## statically resolved names + + # Name of the identifier + name: IdentExpr + # Owner of the identifier (module) + owner: string + # Scope depth + depth: int + # Is this name private? + isPrivate: bool + # Is this a constant? + isConst: bool + # Can this name's value be mutated? + isLet: bool + # The name's type + valueType: Type + # For variables, the position in the bytecode + # where its StoreVar instruction was emitted. + # For functions, this marks where the function's + # code begins + codePos: int + Loop = object + ## A "loop object" used + ## by the compiler to emit + ## appropriate jump offsets + ## for continue and break + ## statements + + # Position in the bytecode where the loop starts + start: int + # Scope depth where the loop is located + depth: int + # Absolute jump offsets into our bytecode that we need to + # patch. Used for break statements + breakPos: seq[int] + + Compiler* = ref object + ## A wrapper around the Peon compiler's state + + # The bytecode chunk where we write code to + chunk: Chunk + # The output of our parser (AST) + ast: seq[Declaration] + # The current AST node we're looking at + current: int + # The current file being compiled (used only for + # error reporting) + file: string + # Compile-time "simulation" of the stack at + # runtime to load variables that have stack + # behavior more efficiently + names: seq[Name] + # Beginning of stack frames for function calls + frames: seq[int] + # The current scope depth. If > 0, we're + # in a local scope, otherwise it's global + scopeDepth: int + # The current function being compiled + currentFunction: FunDecl + # Are optimizations turned on? + enableOptimizations*: bool + # The current loop being compiled (used to + # keep track of where to jump) + currentLoop: Loop + # The current module being compiled + # (used to restrict access to statically + # defined variables at compile time) + currentModule: string + # Each time a defer statement is + # compiled, its code is emitted + # here. Later, if there is any code + # to defer in the current function, + # funDecl will wrap the function's code + # inside an implicit try/finally block + # and add this code in the finally branch. + # This sequence is emptied each time a + # function declaration is compiled and stores only + # deferred code for the current function (may + # be empty) + deferred: seq[uint8] + # List of closed-over variables + closedOver: seq[IdentExpr] + + + +proc newCompiler*(enableOptimizations: bool = true): Compiler = + ## Initializes a new Compiler object + new(result) + result.ast = @[] + result.current = 0 + result.file = "" + result.names = @[] + result.scopeDepth = 0 + result.currentFunction = nil + result.enableOptimizations = enableOptimizations + result.currentModule = "" + result.frames = @[] + + +## Forward declarations +proc expression(self: Compiler, node: Expression) +proc statement(self: Compiler, node: Statement) +proc declaration(self: Compiler, node: Declaration) +proc peek(self: Compiler, distance: int = 0): ASTNode +proc identifier(self: Compiler, node: IdentExpr) +proc varDecl(self: Compiler, node: VarDecl) +proc inferType(self: Compiler, node: LiteralExpr): Type +proc inferType(self: Compiler, node: Expression): Type +proc findByName(self: Compiler, name: string): seq[Name] +proc findByType(self: Compiler, name: string, kind: Type): seq[Name] +proc compareTypes(self: Compiler, a, b: Type): bool +proc patchReturnAddress(self: Compiler, retAddr: int) +## End of forward declarations + +## Public getter for nicer error formatting +proc getCurrentNode*(self: Compiler): ASTNode = (if self.current >= + self.ast.len(): self.ast[^1] else: self.ast[self.current - 1]) +proc getCurrentFunction*(self: Compiler): Declaration {.inline.} = self.currentFunction +proc getFile*(self: COmpiler): string {.inline.} = self.file +proc getModule*(self: COmpiler): string {.inline.} = self.currentModule + + +## Utility functions +proc peek(self: Compiler, distance: int = 0): ASTNode = + ## Peeks at the AST node at the given distance. + ## If the distance is out of bounds, the last + ## AST node in the tree is returned. A negative + ## distance may be used to retrieve previously + ## consumed AST nodes + if self.ast.high() == -1 or self.current + distance > self.ast.high() or + self.current + distance < 0: + result = self.ast[^1] + else: + result = self.ast[self.current + distance] + + +proc done(self: Compiler): bool = + ## Returns true if the compiler is done + ## compiling, false otherwise + result = self.current > self.ast.high() + + +proc error(self: Compiler, message: string) {.raises: [CompileError].} = + ## Raises a CompileError exception + raise CompileError(msg: message, node: self.getCurrentNode(), file: self.file, module: self.currentModule) + + +proc step(self: Compiler): ASTNode = + ## Steps to the next node and returns + ## the consumed one + result = self.peek() + if not self.done(): + self.current += 1 + + +proc emitByte(self: Compiler, byt: OpCode | uint8) = + ## Emits a single byte, writing it to + ## the current chunk being compiled + when DEBUG_TRACE_COMPILER: + echo &"DEBUG - Compiler: Emitting {$byt}" + self.chunk.write(uint8 byt, self.peek().token.line) + + +proc emitBytes(self: Compiler, bytarr: openarray[OpCode | uint8]) = + ## Handy helper method to write arbitrary bytes into + ## the current chunk, calling emitByte on each of its + ## elements + for b in bytarr: + self.emitByte(b) + + +proc makeConstant(self: Compiler, val: Expression, typ: Type): array[3, uint8] = + ## Adds a constant to the current chunk's constant table + ## and returns its index as a 3-byte array of uint8s + var v: int + discard parseInt(val.token.lexeme, v) + case typ.kind: + of UInt8, Int8: + result = self.chunk.writeConstant([uint8(v)]) + of Int16, UInt16: + result = self.chunk.writeConstant(v.toDouble()) + of Int32, UInt32: + result = self.chunk.writeConstant(v.toQuad()) + of Int64, UInt64: + result = self.chunk.writeConstant(v.toLong()) + else: + discard + + +proc emitConstant(self: Compiler, obj: Expression, kind: Type) = + ## Emits a LoadConstant instruction along + ## with its operand + case self.inferType(obj).kind: + of Int64: + self.emitByte(LoadInt64) + else: + discard # TODO + self.emitBytes(self.makeConstant(obj, kind)) + + +proc emitJump(self: Compiler, opcode: OpCode): int = + ## Emits a dummy jump offset to be patched later. Assumes + ## the largest offset (emits 4 bytes, one for the given jump + ## opcode, while the other 3 are for the jump offset which is set + ## to the maximum unsigned 24 bit integer). If the shorter + ## 16 bit alternative is later found to be better suited, patchJump + ## will fix this. This function returns the absolute index into the + ## chunk's bytecode array where the given placeholder instruction was written + self.emitByte(opcode) + self.emitBytes((0xffffff).toTriple()) + result = self.chunk.code.len() - 4 + + +proc patchJump(self: Compiler, offset: int) = + ## Patches a previously emitted relative + ## jump using emitJump. Since emitJump assumes + ## a long jump, this also shrinks the jump + ## offset and changes the bytecode instruction if possible + ## (i.e. jump is in 16 bit range), but the converse is also + ## true (i.e. it might change a regular jump into a long one) + var jump: int = self.chunk.code.len() - offset + if jump > 16777215: + self.error("cannot jump more than 16777216 bytecode instructions") + if jump < uint16.high().int: + case OpCode(self.chunk.code[offset]): + of LongJumpForwards: + self.chunk.code[offset] = JumpForwards.uint8() + of LongJumpBackwards: + self.chunk.code[offset] = JumpBackwards.uint8() + of LongJumpIfFalse: + self.chunk.code[offset] = JumpIfFalse.uint8() + of LongJumpIfFalsePop: + self.chunk.code[offset] = JumpIfFalsePop.uint8() + of LongJumpIfFalseOrPop: + self.chunk.code[offset] = JumpIfFalseOrPop.uint8() + else: + discard + self.chunk.code.delete(offset + 1) # Discards the first 8 bits of the jump offset (which are empty) + let offsetArray = (jump - 1).toDouble() # -1 since we got rid of 1 byte! + self.chunk.code[offset + 1] = offsetArray[0] + self.chunk.code[offset + 2] = offsetArray[1] + else: + case OpCode(self.chunk.code[offset]): + of JumpForwards: + self.chunk.code[offset] = LongJumpForwards.uint8() + of JumpBackwards: + self.chunk.code[offset] = LongJumpBackwards.uint8() + of JumpIfFalse: + self.chunk.code[offset] = LongJumpIfFalse.uint8() + of JumpIfFalsePop: + self.chunk.code[offset] = LongJumpIfFalsePop.uint8() + of JumpIfFalseOrPop: + self.chunk.code[offset] = LongJumpIfFalseOrPop.uint8() + else: + discard + let offsetArray = jump.toTriple() + self.chunk.code[offset + 1] = offsetArray[0] + self.chunk.code[offset + 2] = offsetArray[1] + self.chunk.code[offset + 3] = offsetArray[2] + + +proc resolve(self: Compiler, name: IdentExpr, + depth: int = self.scopeDepth): Name = + ## Traverses self.names backwards and returns the + ## first name object with the given name. Returns + ## nil when the name can't be found. This function + ## has no concept of scope depth, because getStackPos + ## does that job. Note that private names declared in + ## other modules will not be resolved! + for obj in reversed(self.names): + if obj.name.token.lexeme == name.token.lexeme: + if obj.isPrivate and obj.owner != self.currentModule: + continue # There may be a name in the current module that + # matches, so we skip this + return obj + return nil + + +proc getStackPos(self: Compiler, name: IdentExpr, + depth: int = self.scopeDepth): tuple[closedOver: bool, pos: int] = + ## Iterates the internal list of declared names backwards and + ## returns a tuple (closedOver, pos) that tells the caller whether the + ## the name is to be emitted as a closure as well as its predicted + ## stack/closure array position. Returns (false, -1) if the variable's + ## location can not be determined at compile time (this is an error!). + ## Note that private names declared in other modules will not be resolved! + var i: int = self.names.high() + for variable in reversed(self.names): + if name.name.lexeme == variable.name.name.lexeme: + if variable.isPrivate and variable.owner != self.currentModule: + continue + if variable.depth == depth or variable.depth == 0: + # variable.depth == 0 for globals! + return (false, i) + elif variable.depth > 0: + for j, closure in reversed(self.closedOver): + if closure.name.lexeme == name.name.lexeme: + return (true, j) + dec(i) + return (false, -1) + + +proc detectClosureVariable(self: Compiler, name: IdentExpr, + depth: int = self.scopeDepth) = + ## Detects if the given name is used in a local scope deeper + ## than the given one and modifies the code emitted for it + ## to store it as a closure variable if it is. Does nothing if the name + ## hasn't been declared yet or is unreachable (for example if it's + ## declared as private in another module). This function must be called + ## each time a name is referenced in order for closed-over variables + ## to be emitted properly, otherwise the runtime may behave + ## unpredictably or crash + let entry = self.resolve(name) + if entry == nil: + return + if entry.depth < depth: + # Ding! The given name is closed over: we need to + # change the StoreVar instruction that created this + # name entry into a StoreHeap. We don't need to change + # other pieces of code because self.identifier() already + # emits LoadHeap if it detects the variable is closed over, + # whether or not this function is called + self.closedOver.add(entry.name) + if self.closedOver.len() >= 16777216: + self.error("too many consecutive closed-over variables (max is 16777216)") + let idx = self.closedOver.high().toTriple() + self.chunk.code[entry.codePos] = StoreHeap.uint8 + self.chunk.code[entry.codePos + 1] = idx[0] + self.chunk.code[entry.codePos + 2] = idx[1] + self.chunk.code[entry.codePos + 3] = idx[2] + + +proc compareTypesWithNullNode(self: Compiler, a, b: Type): bool = + ## Compares two types without using information from + ## AST nodes + if a == nil: + return b == nil + elif b == nil: + return a == nil + if a.kind != b.kind: + return false + case a.kind: + of Function: + if a.args.len() != b.args.len(): + return false + elif not self.compareTypes(a.returnType, b.returnType): + return false + for (argA, argB) in zip(a.args, b.args): + if not self.compareTypes(argA, argB): + return false + return true + else: + discard + + +proc compareTypes(self: Compiler, a, b: Type): bool = + ## Compares two type objects + ## for equality (works with nil!) + if a == nil: + return b == nil + elif b == nil: + return a == nil + if a.kind != b.kind: + return false + case a.kind: + of Int8, UInt8, Int16, UInt16, Int32, + UInt32, Int64, UInt64, Float32, Float64, + Char, Byte, String, Nil, Nan, Bool, Inf: + return true + of Function: + if a.node == nil or b.node == nil: + return self.compareTypesWithNullNode(a, b) + let + a = FunDecl(a.node) + b = FunDecl(b.node) + if a.name.token.lexeme != b.name.token.lexeme: + return false + elif a.arguments.len() != b.arguments.len(): + return false + elif not self.compareTypes(self.inferType(a.returnType), self.inferType(b.returnType)): + return false + for (argA, argB) in zip(a.arguments, b.arguments): + if argA.mutable != argB.mutable: + return false + elif argA.isRef != argB.isRef: + return false + elif argA.isPtr != argB.isPtr: + return false + elif not self.compareTypes(self.inferType(argA.valueType), self.inferType(argB.valueType)): + return false + return true + else: + discard + + +proc toIntrinsic(name: string): Type = + ## Converts a string to an intrinsic + ## type if it is valid and returns nil + ## otherwise + if name in ["int", "int64", "i64"]: + return Type(kind: Int64) + elif name in ["uint64", "u64"]: + return Type(kind: UInt64) + elif name in ["int32", "i32"]: + return Type(kind: Int32) + elif name in ["uint32", "u32"]: + return Type(kind: UInt32) + elif name in ["int16", "i16"]: + return Type(kind: Int16) + elif name in ["uint16", "u16"]: + return Type(kind: UInt16) + elif name in ["int8", "i8"]: + return Type(kind: Int8) + elif name in ["uint8", "u8"]: + return Type(kind: UInt8) + elif name in ["f64", "float", "float64"]: + return Type(kind: Float64) + elif name in ["f32", "float32"]: + return Type(kind: Float32) + elif name == "byte": + return Type(kind: Byte) + elif name == "char": + return Type(kind: Char) + elif name == "nan": + return Type(kind: Nan) + elif name == "nil": + return Type(kind: Nil) + elif name == "inf": + return Type(kind: Inf) + elif name == "bool": + return Type(kind: Bool) + elif name == "type": + return Type(kind: Typedesc) + else: + return nil + + +proc inferType(self: Compiler, node: LiteralExpr): Type = + ## Infers the type of a given literal expression + if node == nil: + return nil + case node.kind: + of intExpr, binExpr, octExpr, hexExpr: + let size = node.token.lexeme.split("'") + if len(size) notin 1..2: + self.error("invalid state: inferValueType -> invalid size specifier (This is an internal error and most likely a bug!)") + if size.len() == 1: + return Type(node: node, kind: Int64) + let typ = size[1].toIntrinsic() + if not self.compareTypes(typ, nil): + return typ + else: + self.error(&"invalid type specifier '{size[1]}' for int") + of floatExpr: + let size = node.token.lexeme.split("'") + if len(size) notin 1..2: + self.error("invalid state: inferValueType -> invalid size specifier (This is an internal error and most likely a bug!)") + if size.len() == 1 or size[1] == "f64": + return Type(node: node, kind: Float64) + let typ = size[1].toIntrinsic() + if not self.compareTypes(typ, nil): + return typ + else: + self.error(&"invalid type specifier '{size[1]}' for float") + of nilExpr: + return Type(node: node, kind: Nil) + of trueExpr: + return Type(node: node, kind: Bool) + of falseExpr: + return Type(node: node, kind: Bool) + of nanExpr: + return Type(node: node, kind: TypeKind.Nan) + of infExpr: + return Type(node: node, kind: TypeKind.Inf) + else: + discard # TODO + + +proc toIntrinsic(self: Compiler, typ: Expression): Type = + ## Gets an expression's + ## intrinsic type, if possible + if typ == nil: + return nil + case typ.kind: + of trueExpr, falseExpr, intExpr, floatExpr: + return typ.token.lexeme.toIntrinsic() + of identExpr: + let inferred = self.inferType(typ) + if inferred == nil: + return typ.token.lexeme.toIntrinsic() + return inferred + else: + discard + + +proc inferType(self: Compiler, node: Expression): Type = + ## Infers the type of a given expression and + ## returns it + if node == nil: + return nil + case node.kind: + of identExpr: + let node = IdentExpr(node) + let name = self.resolve(node) + if name != nil: + return name.valueType + else: + return node.name.lexeme.toIntrinsic() + of unaryExpr: + return self.inferType(UnaryExpr(node).a) + of binaryExpr: + let node = BinaryExpr(node) + var a = self.inferType(node.a) + var b = self.inferType(node.b) + if not self.compareTypes(a, b): + return nil + return a + of {intExpr, hexExpr, binExpr, octExpr, + strExpr, falseExpr, trueExpr, infExpr, + nanExpr, floatExpr, nilExpr + }: + return self.inferType(LiteralExpr(node)) + else: + discard # Unreachable + + +proc typeToStr(self: Compiler, typ: Type): string = + ## Returns the string representation of a + ## type object + case typ.kind: + of Int8, UInt8, Int16, UInt16, Int32, + UInt32, Int64, UInt64, Float32, Float64, + Char, Byte, String, Nil, TypeKind.Nan, Bool, + TypeKind.Inf: + return ($typ.kind).toLowerAscii() + of Function: + result = "function (" + case typ.node.kind: + of funDecl: + var node = FunDecl(typ.node) + for i, argument in node.arguments: + result &= &"{argument.name.token.lexeme}: {self.typeToStr(self.inferType(argument.name))}" + if i < node.arguments.len() - 1: + result &= ", " + result &= ")" + of lambdaExpr: + var node = LambdaExpr(typ.node) + for i, argument in node.arguments: + result &= &"{argument.name.token.lexeme}: {argument.valueType}" + if i < node.arguments.len() - 1: + result &= ", " + result &= ")" + else: + discard # Unreachable + result &= &": {self.typeToStr(typ.returnType)}" + else: + discard + + +proc inferType(self: Compiler, node: Declaration): Type = + ## Infers the type of a given declaration + ## and returns it + if node == nil: + return nil + case node.kind: + of funDecl: + var node = FunDecl(node) + let resolved = self.resolve(node.name) + if resolved != nil: + return resolved.valueType + of NodeKind.varDecl: + var node = VarDecl(node) + let resolved = self.resolve(node.name) + if resolved != nil: + return resolved.valueType + else: + return self.inferType(node.value) + else: + return # Unreachable + +## End of utility functions + + +proc literal(self: Compiler, node: ASTNode) = + ## Emits instructions for literals such + ## as singletons, strings, numbers and + ## collections + case node.kind: + of trueExpr: + self.emitByte(LoadTrue) + of falseExpr: + self.emitByte(LoadFalse) + of nilExpr: + self.emitByte(LoadNil) + of infExpr: + self.emitByte(LoadInf) + of nanExpr: + self.emitByte(LoadNan) + of strExpr: + self.emitConstant(LiteralExpr(node), Type(kind: String)) + # TODO: Take size specifier into account! + of intExpr: + var x: int + var y = IntExpr(node) + try: + discard parseInt(y.literal.lexeme, x) + except ValueError: + self.error("integer value out of range") + self.emitConstant(y, Type(kind: Int64)) + of hexExpr: + var x: int + var y = HexExpr(node) + try: + discard parseHex(y.literal.lexeme, x) + except ValueError: + self.error("integer value out of range") + let node = newIntExpr(Token(lexeme: $x, line: y.token.line, + pos: (start: y.token.pos.start, + stop: y.token.pos.start + len($x)) + ) + ) + self.emitConstant(node, Type(kind: Int64)) + of binExpr: + var x: int + var y = BinExpr(node) + try: + discard parseBin(y.literal.lexeme, x) + except ValueError: + self.error("integer value out of range") + let node = newIntExpr(Token(lexeme: $x, line: y.token.line, + pos: (start: y.token.pos.start, + stop: y.token.pos.start + len($x)) + ) + ) + self.emitConstant(node, Type(kind: Int64)) + of octExpr: + var x: int + var y = OctExpr(node) + try: + discard parseOct(y.literal.lexeme, x) + except ValueError: + self.error("integer value out of range") + let node = newIntExpr(Token(lexeme: $x, line: y.token.line, + pos: (start: y.token.pos.start, + stop: y.token.pos.start + len($x)) + ) + ) + self.emitConstant(node, Type(kind: Int64)) + of floatExpr: + var x: float + var y = FloatExpr(node) + try: + discard parseFloat(y.literal.lexeme, x) + except ValueError: + self.error("floating point value out of range") + self.emitConstant(y, Type(kind: Float64)) + of awaitExpr: + var y = AwaitExpr(node) + self.expression(y.expression) + self.emitByte(OpCode.Await) + else: + self.error(&"invalid AST node of kind {node.kind} at literal(): {node} (This is an internal error and most likely a bug!)") + + +proc unary(self: Compiler, node: UnaryExpr) = + ## Compiles unary expressions such as decimal + ## and bitwise negation + let valueType = self.inferType(node.a) + let impl = self.findByType(node.token.lexeme, Type(kind: Function, returnType: valueType, node: nil, args: @[valueType])) + if impl.len() == 0: + self.error(&"cannot find a suitable implementation for '{node.token.lexeme}'") + elif impl.len() > 2: + var msg = &"multiple matching implementations of '{node.token.lexeme}' found:\n" + for fn in reversed(impl): + var node = FunDecl(fn.valueType.node) + discard self.typeToStr(fn.valueType) + msg &= &"- '{node.name.token.lexeme}' at line {node.token.line} of type {self.typeToStr(fn.valueType)}\n" + self.error(msg) + else: + # Pushes the return address + self.emitByte(LoadUInt32) + # We patch it later! + let idx = self.chunk.consts.len() + self.emitBytes(self.chunk.writeConstant((0xffffffff'u32).toQuad())) + self.expression(node.a) # Pushes the operand onto the stack + self.emitByte(Call) # Creates a stack frame + self.emitBytes(impl[0].codePos.toTriple()) + self.emitBytes(1.toTriple()) + self.patchReturnAddress(idx) + + +proc binary(self: Compiler, node: BinaryExpr) = + ## Compiles all binary expressions + # These two lines prepare the stack by pushing the + # opcode's operands onto it + self.expression(node.a) + self.expression(node.b) + # TODO: Find implementation of + # the given operator and call it + case node.operator.kind: + of NoMatch: + # a and b + self.expression(node.a) + var jump: int + if self.enableOptimizations: + jump = self.emitJump(JumpIfFalseOrPop) + else: + jump = self.emitJump(JumpIfFalse) + self.emitByte(Pop) + self.expression(node.b) + self.patchJump(jump) + of EndOfFile: + # a or b + self.expression(node.a) + let jump = self.emitJump(JumpIfTrue) + self.expression(node.b) + self.patchJump(jump) + else: + self.error(&"invalid AST node of kind {node.kind} at binary(): {node} (This is an internal error and most likely a bug!)") + + +proc declareName(self: Compiler, node: Declaration) = + ## Statically declares a name into the current scope + case node.kind: + of NodeKind.varDecl: + var node = VarDecl(node) + # Creates a new Name entry so that self.identifier emits the proper stack offset + if self.names.high() > 16777215: + # If someone ever hits this limit in real-world scenarios, I swear I'll + # slap myself 100 times with a sign saying "I'm dumb". Mark my words + self.error("cannot declare more than 16777216 variables at a time") + for name in self.findByName(node.name.token.lexeme): + if name.name.token.lexeme == node.name.token.lexeme: + self.error(&"attempt to redeclare '{node.name.token.lexeme}', which was previously defined in '{name.owner}' at line {name.valueType.node.token.line}") + self.names.add(Name(depth: self.scopeDepth, + name: node.name, + isPrivate: node.isPrivate, + owner: self.currentModule, + isConst: node.isConst, + valueType: Type(kind: self.inferType( + node.value).kind, node: node), + codePos: self.chunk.code.len(), + isLet: node.isLet)) + of NodeKind.funDecl: + var node = FunDecl(node) + # TODO: Emit some optional debugging + # metadata to let the VM know where a function's + # code begins and ends (similar to what gcc does with + # CFI in object files) to build stack traces + self.names.add(Name(depth: self.scopeDepth, + isPrivate: node.isPrivate, + isConst: false, + owner: self.currentModule, + valueType: Type(kind: Function, node: node, + returnType: self.inferType( + node.returnType), + args: @[]), + codePos: self.chunk.code.high(), + name: node.name, + isLet: false)) + let fn = self.names[^1] + for argument in node.arguments: + if self.names.high() > 16777215: + self.error("cannot declare more than 16777216 variables at a time") + # wait, no LoadVar?? Yes! That's because when calling functions, + # arguments will already be on the stack so there's no need to + # load them here + self.names.add(Name(depth: self.scopeDepth + 1, + isPrivate: true, + owner: self.currentModule, + isConst: false, + name: argument.name, + valueType: nil, + codePos: self.chunk.code.len(), + isLet: false)) + self.names[^1].valueType = self.inferType(argument.valueType) + # We check if the argument's type is a generic + if self.names[^1].valueType == nil and argument.valueType.kind == identExpr: + for gen in node.generics: + if gen.name == IdentExpr(argument.valueType): + self.names[^1].valueType = Type(kind: Generic) + break + # If it's still nil, it's an error! + if self.names[^1].valueType == nil: + self.error(&"cannot determine the type of argument '{self.names[^1].name.token.lexeme}'") + self.names[^1].valueType.node = argument.name + fn.valueType.args.add(self.names[^1].valueType) + else: + discard # Unreachable + + +proc identifier(self: Compiler, node: IdentExpr) = + ## Compiles access to identifiers + let s = self.resolve(node) + if s == nil: + self.error(&"reference to undeclared name '{node.token.lexeme}'") + elif s.isConst: + # Constants are emitted as, you guessed it, LoadConstant instructions + # no matter the scope depth. If optimizations are enabled, the compiler + # will reuse the same constant every time it is referenced instead of + # allocating a new one each time + self.emitConstant(node, self.inferType(node)) + else: + self.detectClosureVariable(s.name) + let t = self.getStackPos(node) + let index = t.pos + # We don't check if index is -1 because if it + # were, self.resolve() would have returned nil + if not t.closedOver: + # Static name resolution, loads value at index in the stack. Very fast. Much wow. + self.emitByte(LoadVar) + self.emitBytes((index - self.frames[^1]).toTriple()) + else: + if self.closedOver.len() == 0: + self.error("error: closure variable array is empty but LoadHeap would be emitted (this is an internal error and most likely a bug)") + # Heap-allocated closure variable. Stored in a separate "closure array" in the VM that does not have stack semantics. + # This makes closures work as expected and is not comparatively slower than indexing our stack (since they're both + # dynamic arrays at runtime anyway) + self.emitByte(LoadHeap) + self.emitBytes(self.closedOver.high().toTriple()) + + +proc findByName(self: Compiler, name: string): seq[Name] = + ## Looks for objects that have been already declared + ## with the given name. Returns all objects that apply + for obj in reversed(self.names): + if obj.name.token.lexeme == name: + result.add(obj) + + +proc findByType(self: Compiler, name: string, kind: Type): seq[Name] = + ## Looks for objects that have already been declared + ## with the given name and type + for obj in self.findByName(name): + if self.compareTypes(obj.valueType, kind): + result.add(obj) + + +proc assignment(self: Compiler, node: ASTNode) = + ## Compiles assignment expressions + case node.kind: + of assignExpr: + let node = AssignExpr(node) + let name = IdentExpr(node.name) + let r = self.resolve(name) + if r == nil: + self.error(&"assignment to undeclared name '{name.token.lexeme}'") + elif r.isConst: + self.error(&"cannot assign to '{name.token.lexeme}' (constant)") + elif r.isLet: + self.error(&"cannot reassign '{name.token.lexeme}'") + self.expression(node.value) + let t = self.getStackPos(name) + let index = t.pos + if index != -1: + if not t.closedOver: + self.emitByte(StoreVar) + else: + self.emitByte(StoreHeap) + self.emitBytes(index.toTriple()) + else: + self.error(&"reference to undeclared name '{node.token.lexeme}'") + of setItemExpr: + let node = SetItemExpr(node) + let typ = self.inferType(node) + if typ == nil: + self.error(&"cannot determine the type of '{node.name.token.lexeme}'") + # TODO + else: + self.error(&"invalid AST node of kind {node.kind} at assignment(): {node} (This is an internal error and most likely a bug)") + + +proc beginScope(self: Compiler) = + ## Begins a new local scope by incrementing the current + ## scope's depth + inc(self.scopeDepth) + + +proc endScope(self: Compiler) = + ## Ends the current local scope + if self.scopeDepth == 0: + self.error("cannot call endScope with scopeDepth == 0 (This is an internal error and most likely a bug)") + dec(self.scopeDepth) + var popped: int = 0 + for i, ident in reversed(self.names): + if ident.depth > self.scopeDepth: + inc(popped) + self.names.delete(self.names.len() - i) + if not self.enableOptimizations: + # All variables with a scope depth larger than the current one + # are now out of scope. Begone, you're now homeless! + self.emitByte(Pop) + if self.enableOptimizations and popped > 1: + # If we're popping less than 65535 variables, then + # we can emit a PopN instruction. This is true for + # 99.99999% of the use cases of the language (who the + # hell is going to use 65 THOUSAND local variables?), but + # if you'll ever use more then Peon will emit a PopN instruction + # for the first 65 thousand and change local variables and then + # emit another batch of plain ol' Pop instructions for the rest + if popped <= uint16.high().int(): + self.emitByte(PopN) + self.emitBytes(popped.toDouble()) + else: + self.emitByte(PopN) + self.emitBytes(uint16.high().int.toDouble()) + for i in countdown(self.names.high(), popped - uint16.high().int()): + if self.names[i].depth > self.scopeDepth: + self.emitByte(Pop) + elif popped == 1: + # We only emit PopN if we're popping more than one value + self.emitByte(Pop) + + +proc blockStmt(self: Compiler, node: BlockStmt) = + ## Compiles block statements, which create a new + ## local scope. + self.beginScope() + for decl in node.code: + self.declaration(decl) + self.endScope() + + +proc ifStmt(self: Compiler, node: IfStmt) = + ## Compiles if/else statements for conditional + ## execution of code + self.expression(node.condition) + var jumpCode: OpCode + if self.enableOptimizations: + jumpCode = JumpIfFalsePop + else: + jumpCode = JumpIfFalse + let jump = self.emitJump(jumpCode) + if not self.enableOptimizations: + self.emitByte(Pop) + self.statement(node.thenBranch) + self.patchJump(jump) + if node.elseBranch != nil: + let jump = self.emitJump(JumpForwards) + self.statement(node.elseBranch) + self.patchJump(jump) + + +proc emitLoop(self: Compiler, begin: int) = + ## Emits a JumpBackwards instruction with the correct + ## jump offset + var offset: int + case OpCode(self.chunk.code[begin + 1]): # The jump instruction + of LongJumpForwards, LongJumpBackwards, LongJumpIfFalse, + LongJumpIfFalsePop, LongJumpIfTrue: + offset = self.chunk.code.len() - begin + 4 + else: + offset = self.chunk.code.len() - begin + if offset > uint16.high().int: + if offset > 16777215: + self.error("cannot jump more than 16777215 bytecode instructions") + self.emitByte(LongJumpBackwards) + self.emitBytes(offset.toTriple()) + else: + self.emitByte(JumpBackwards) + self.emitBytes(offset.toDouble()) + + +proc whileStmt(self: Compiler, node: WhileStmt) = + ## Compiles C-style while loops and + ## desugared C-style for loops + let start = self.chunk.code.len() + self.expression(node.condition) + var jump: int + if self.enableOptimizations: + jump = self.emitJump(JumpIfFalsePop) + else: + jump = self.emitJump(JumpIfFalse) + self.emitByte(Pop) + self.statement(node.body) + self.patchJump(jump) + self.emitLoop(start) + + +proc expression(self: Compiler, node: Expression) = + ## Compiles all expressions + if self.inferType(node) == nil: + if node.kind != identExpr: + # So we can raise a more appropriate + # error in self.identifier() + self.error("expression has no type") + case node.kind: + of callExpr: + discard # TODO + of getItemExpr: + discard # TODO + # Note that for setItem and assign we don't convert + # the node to its true type because that type information + # would be lost in the call anyway. The differentiation + # happens in self.assignment() + of setItemExpr, assignExpr: + self.assignment(node) + of identExpr: + self.identifier(IdentExpr(node)) + of unaryExpr: + # Unary expressions such as ~5 and -3 + self.unary(UnaryExpr(node)) + of groupingExpr: + # Grouping expressions like (2 + 1) + self.expression(GroupingExpr(node).expression) + of binaryExpr: + # Binary expressions such as 2 ^ 5 and 0.66 * 3.14 + self.binary(BinaryExpr(node)) + of intExpr, hexExpr, binExpr, octExpr, strExpr, falseExpr, trueExpr, + infExpr, nanExpr, floatExpr, nilExpr: + # Since all of these AST nodes share the + # same overall structure and the kind + # field is enough to tell one from the + # other, why bother with specialized + # cases when one is enough? + self.literal(node) + else: + self.error(&"invalid AST node of kind {node.kind} at expression(): {node} (This is an internal error and most likely a bug)") + + +proc awaitStmt(self: Compiler, node: AwaitStmt) = + ## Compiles await statements. An await statement + ## is like an await expression, but parsed in the + ## context of statements for usage outside expressions, + ## meaning it can be used standalone. It's basically the + ## same as an await expression followed by a semicolon. + ## Await expressions are the only native construct to + ## run coroutines from within an already asynchronous + ## context (which should be orchestrated by an event loop). + ## They block in the caller until the callee returns + self.expression(node.expression) + self.emitByte(OpCode.Await) + + +proc deferStmt(self: Compiler, node: DeferStmt) = + ## Compiles defer statements. A defer statement + ## is executed right before its containing function + ## exits (either because of a return or an exception) + let current = self.chunk.code.len + self.expression(node.expression) + for i in countup(current, self.chunk.code.high()): + self.deferred.add(self.chunk.code[i]) + self.chunk.code.del(i) + + +proc returnStmt(self: Compiler, node: ReturnStmt) = + ## Compiles return statements. An empty return + ## implicitly returns nil + let returnType = self.inferType(node.value) + let typ = self.inferType(self.currentFunction) + ## Having the return type + if returnType == nil and typ.returnType != nil: + self.error(&"expected return value of type '{self.typeToStr(typ.returnType)}', but expression has no type") + elif typ.returnType == nil and returnType != nil: + self.error("empty return statement is not allowed in non-void functions") + elif not self.compareTypes(returnType, typ.returnType): + self.error(&"expected return value of type '{self.typeToStr(typ.returnType)}', got '{self.typeToStr(returnType)}' instead") + if node.value != nil: + self.expression(node.value) + self.emitByte(OpCode.ReturnValue) + else: + self.emitByte(OpCode.Return) + discard self.frames.pop() + + +proc yieldStmt(self: Compiler, node: YieldStmt) = + ## Compiles yield statements + self.expression(node.expression) + self.emitByte(OpCode.Yield) + + +proc raiseStmt(self: Compiler, node: RaiseStmt) = + ## Compiles yield statements + self.expression(node.exception) + self.emitByte(OpCode.Raise) + + +proc continueStmt(self: Compiler, node: ContinueStmt) = + ## Compiles continue statements. A continue statements + ## jumps to the next iteration in a loop + if self.currentLoop.start <= 65535: + self.emitByte(Jump) + self.emitBytes(self.currentLoop.start.toDouble()) + else: + if self.currentLoop.start > 16777215: + self.error("too much code to jump over in continue statement") + self.emitByte(LongJump) + self.emitBytes(self.currentLoop.start.toTriple()) + + +proc breakStmt(self: Compiler, node: BreakStmt) = + ## Compiles break statements. A continue statement + ## jumps to the next iteration in a loop + + # Emits dummy jump offset, this is + # patched later + discard self.emitJump(OpCode.Jump) + self.currentLoop.breakPos.add(self.chunk.code.high() - 4) + if self.currentLoop.depth > self.scopeDepth: + # Breaking out of a loop closes its scope + self.endScope() + + +proc patchBreaks(self: Compiler) = + ## Patches "break" opcodes with + ## actual jumps. This is needed + ## because the size of code + ## to skip is not known before + ## the loop is fully compiled + for brk in self.currentLoop.breakPos: + self.chunk.code[brk] = JumpForwards.uint8() + self.patchJump(brk) + + +proc assertStmt(self: Compiler, node: AssertStmt) = + ## Compiles assert statements (raise + ## AssertionError if the expression is falsey) + self.expression(node.expression) + self.emitByte(OpCode.Assert) + + +proc statement(self: Compiler, node: Statement) = + ## Compiles all statements + case node.kind: + of exprStmt: + var expression = ExprStmt(node).expression + self.expression(expression) + self.emitByte(Pop) # Expression statements discard their value. Their main use case is side effects in function calls + of NodeKind.ifStmt: + self.ifStmt(IfStmt(node)) + of NodeKind.assertStmt: + self.assertStmt(AssertStmt(node)) + of NodeKind.raiseStmt: + self.raiseStmt(RaiseStmt(node)) + of NodeKind.breakStmt: + self.breakStmt(BreakStmt(node)) + of NodeKind.continueStmt: + self.continueStmt(ContinueStmt(node)) + of NodeKind.returnStmt: + self.returnStmt(ReturnStmt(node)) + of NodeKind.importStmt: + discard + of NodeKind.whileStmt, NodeKind.forStmt: + ## Our parser already desugars for loops to + ## while loops! + let loop = self.currentLoop + self.currentLoop = Loop(start: self.chunk.code.len(), + depth: self.scopeDepth, breakPos: @[]) + self.whileStmt(WhileStmt(node)) + self.patchBreaks() + self.currentLoop = loop + of NodeKind.forEachStmt: + discard + of NodeKind.blockStmt: + self.blockStmt(BlockStmt(node)) + of NodeKind.yieldStmt: + self.yieldStmt(YieldStmt(node)) + of NodeKind.awaitStmt: + self.awaitStmt(AwaitStmt(node)) + of NodeKind.deferStmt: + self.deferStmt(DeferStmt(node)) + of NodeKind.tryStmt: + discard + else: + self.expression(Expression(node)) + + +proc varDecl(self: Compiler, node: VarDecl) = + ## Compiles variable declarations + let kind = self.toIntrinsic(node.valueType) + let typ = self.inferType(node.value) + if kind == nil and typ == nil: + self.error(&"cannot determine the type of '{node.name.token.lexeme}'") + elif typ != kind and kind != nil: + self.error(&"expected value of type '{self.typeToStr(kind)}', but '{node.name.token.lexeme}' is of type '{self.typeToStr(typ)}'") + self.expression(node.value) + self.declareName(node) + + +proc funDecl(self: Compiler, node: FunDecl) = + ## Compiles function declarations + # A function's code is just compiled linearly + # and then jumped over + let jmp = self.emitJump(Jump) + var function = self.currentFunction + self.declareName(node) + self.frames.add(self.names.high()) + # TODO: Forward declarations + if node.body != nil: + if BlockStmt(node.body).code.len() == 0: + self.error("Cannot declare function with empty body") + let fnType = self.inferType(node) + let impl = self.findByType(node.name.token.lexeme, fnType) + if impl.len() > 1: + # Oh-oh! We found more than one implementation of + # the same function! Error! + var msg = &"multiple matching implementations of '{node.name.token.lexeme}' found:\n" + for fn in reversed(impl): + var node = FunDecl(fn.valueType.node) + discard self.typeToStr(fn.valueType) + msg &= &"- '{node.name.token.lexeme}' at line {node.token.line} of type {self.typeToStr(fn.valueType)}\n" + self.error(msg) + # We store the current function + self.currentFunction = node + + # Since the deferred array is a linear + # sequence of instructions and we want + # to keep track to whose function's each + # set of deferred instruction belongs, + # we record the length of the deferred + # array before compiling the function + # and use this info later to compile + # the try/finally block with the deferred + # code + var deferStart = self.deferred.len() + + self.blockStmt(BlockStmt(node.body)) + # Yup, we're done. That was easy, huh? + # But after all functions are just named + # scopes, and we compile them just like that: + # we declare their name and arguments (before + # their body so recursion works) and then just + # handle them as a block statement (which takes + # care of incrementing self.scopeDepth so locals + # are resolved properly). There's a need for a bit + # of boilerplate code to make closures work, but + # that's about it + case self.currentFunction.kind: + of NodeKind.funDecl: + if not self.currentFunction.hasExplicitReturn: + let typ = self.inferType(self.currentFunction) + if self.currentFunction.returnType == nil and typ != nil: + self.error("non-empty return statement is not allowed in void functions") + if self.currentFunction.returnType != nil: + self.error("function has an explicit return type, but no explicit return statement was found") + self.emitByte(OpCode.Return) + of NodeKind.lambdaExpr: + if not LambdaExpr(Declaration(self.currentFunction)).hasExplicitReturn: + self.emitByte(OpCode.Return) + else: + discard # Unreachable + # Currently defer is not functional so we + # just pop the instructions + for i in countup(deferStart, self.deferred.len() - 1, 1): + self.deferred.delete(i) + + self.patchJump(jmp) + # This makes us compile nested functions correctly + self.currentFunction = function + + +proc patchReturnAddress(self: Compiler, retAddr: int) = + ## Patches the return address of a function + ## call. This is called at each iteration of + ## the compiler's loop + let address = self.chunk.code.len().toQuad() + self.chunk.consts[retAddr] = address[0] + self.chunk.consts[retAddr + 1] = address[1] + self.chunk.consts[retAddr + 2] = address[2] + self.chunk.consts[retAddr + 3] = address[3] + + +proc declaration(self: Compiler, node: Declaration) = + ## Compiles all declarations + case node.kind: + of NodeKind.varDecl: + self.varDecl(VarDecl(node)) + of NodeKind.funDecl: + self.funDecl(FunDecl(node)) + else: + self.statement(Statement(node)) + + +proc compile*(self: Compiler, ast: seq[Declaration], file: string): Chunk = + ## Compiles a sequence of AST nodes into a chunk + ## object + self.chunk = newChunk() + self.ast = ast + self.file = file + self.names = @[] + self.scopeDepth = 0 + self.currentFunction = nil + self.currentModule = self.file.extractFilename() + self.current = 0 + self.frames = @[0] + while not self.done(): + self.declaration(Declaration(self.step())) + if self.ast.len() > 0: + # *Technically* an empty program is a valid program + self.emitByte(OpCode.Return) # Exits the VM's main loop when used at the global scope + result = self.chunk + if self.ast.len() > 0 and self.scopeDepth != 0: + self.error(&"invalid state: invalid scopeDepth value (expected 0, got {self.scopeDepth}), did you forget to call endScope/beginScope?") diff --git a/src/frontend/lexer.nim b/src/frontend/lexer.nim new file mode 100644 index 0000000..04c2f80 --- /dev/null +++ b/src/frontend/lexer.nim @@ -0,0 +1,641 @@ +# Copyright 2022 Mattia Giambirtone & All Contributors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +## A simple and modular tokenizer implementation with arbitrary lookahead +## using a customizable symbol table + +import strutils +import parseutils +import strformat +import tables + + +import meta/token +import meta/errors + + +export token +export errors + + +type + SymbolTable* = ref object + ## A table of symbols used + ## to lex a source file + + # Although we don't parse keywords + # as symbols, but rather as identifiers, + # we keep them here for consistency + # purposes + keywords: TableRef[string, TokenType] + symbols: TableRef[string, TokenType] + Lexer* = ref object + ## A lexer object + symbols*: SymbolTable + source: string + tokens: seq[Token] + line: int + start: int + current: int + file: string + lines: seq[tuple[start, stop: int]] + lastLine: int + + +proc newSymbolTable: SymbolTable = + new(result) + result.keywords = newTable[string, TokenType]() + result.symbols = newTable[string, TokenType]() + + +proc addSymbol*(self: SymbolTable, lexeme: string, token: TokenType) = + ## Adds a symbol to the symbol table. Overwrites + ## any previous entries + self.symbols[lexeme] = token + + +proc removeSymbol*(self: SymbolTable, lexeme: string) = + ## Removes a symbol from the symbol table + ## (does nothing if it does not exist) + self.symbols.del(lexeme) + + +proc addKeyword*(self: SymbolTable, lexeme: string, token: TokenType) = + ## Adds a keyword to the symbol table. Overwrites + ## any previous entries + self.keywords[lexeme] = token + + +proc removeKeyword*(self: SymbolTable, lexeme: string) = + ## Removes a keyword from the symbol table + ## (does nothing if it does not exist) + self.keywords.del(lexeme) + + +proc existsSymbol*(self: SymbolTable, lexeme: string): bool {.inline.} = + ## Returns true if a given symbol exists + ## in the symbol table already + lexeme in self.symbols + + +proc existsKeyword*(self: SymbolTable, lexeme: string): bool {.inline.} = + ## Returns true if a given keyword exists + ## in the symbol table already + lexeme in self.keywords + + +proc getToken(self: Lexer, lexeme: string): Token = + ## Gets the matching token object for a given + ## string according to the symbol table or + ## returns nil if there's no match + let table = self.symbols + var kind = table.symbols.getOrDefault(lexeme, table.keywords.getOrDefault( + lexeme, NoMatch)) + if kind == NoMatch: + return nil + new(result) + result.kind = kind + result.lexeme = self.source[self.start.. result: + result = len(lexeme) + + +proc getSymbols(self: SymbolTable, n: int): seq[string] = + ## Returns all n-bytes symbols + ## in the symbol table + for lexeme in self.symbols.keys(): + if len(lexeme) == n: + result.add(lexeme) + +# Wrappers around isDigit and isAlphanumeric for +# strings +proc isDigit(s: string): bool = + for c in s: + if not c.isDigit(): + return false + return true + + +proc isAlphaNumeric(s: string): bool = + for c in s: + if not c.isAlphaNumeric(): + return false + return true + +proc incLine(self: Lexer) + +# Simple public getters used for error +# formatting and whatnot +proc getStart*(self: Lexer): int = self.start +proc getFile*(self: Lexer): string = self.file +proc getCurrent*(self: Lexer): int = self.current +proc getLine*(self: Lexer): int = self.line +proc getSource*(self: Lexer): string = self.source +proc getRelPos*(self: Lexer, line: int): tuple[start, stop: int] = + if self.tokens.len() == 0 or self.tokens[^1].kind != EndOfFile: + self.incLine() + return self.lines[line - 1] + + +proc newLexer*(self: Lexer = nil): Lexer = + ## Initializes the lexer or resets + ## the state of an existing one + new(result) + if self != nil: + result = self + result.source = "" + result.tokens = @[] + result.line = 1 + result.start = 0 + result.current = 0 + result.file = "" + result.lines = @[] + result.lastLine = 0 + result.symbols = newSymbolTable() + + +proc done(self: Lexer): bool = + ## Returns true if we reached EOF + result = self.current >= self.source.len + + +proc incLine(self: Lexer) = + ## Increments the lexer's line + ## and updates internal line + ## metadata + self.lines.add((self.lastLine, self.current)) + self.lastLine = self.current + self.line += 1 + + +proc step(self: Lexer, n: int = 1): string = + ## Steps n characters forward in the + ## source file (default = 1). A string + ## of at most n bytes is returned. If n + ## exceeds EOF, the string will be shorter + while len(result) < n: + if self.done() or self.current > self.source.high(): + break + else: + result.add(self.source[self.current]) + inc(self.current) + + +proc peek(self: Lexer, distance: int = 0, length: int = 1): string = + ## Returns a stream of characters of + ## at most length bytes from the source + ## file, starting at the given distance, + ## without consuming it. The distance + ## parameter may be negative to retrieve + ## previously consumed tokens. If the + ## distance and/or the length are beyond + ## EOF (even partially), the resulting string + ## will be shorter than length bytes + var i = distance + while len(result) < length: + if self.done() or self.current + i > self.source.high() or + self.current + i < 0: + break + else: + result.add(self.source[self.current + i]) + inc(i) + + +proc error(self: Lexer, message: string) = + ## Raises a lexing error with a formatted + ## error message + raise LexingError(msg: message, line: self.line, file: self.file, lexeme: self.peek()) + + +proc check(self: Lexer, s: string, distance: int = 0): bool = + ## Behaves like self.match(), without consuming the + ## token. False is returned if we're at EOF + ## regardless of what the token to check is. + ## The distance is passed directly to self.peek() + if self.done(): + return false + return self.peek(distance, len(s)) == s + + +proc check(self: Lexer, args: openarray[string], distance: int = 0): bool = + ## Calls self.check() in a loop with + ## each character from the given set of + ## strings and returns at the first match. + ## Useful to check multiple tokens in a situation + ## where only one of them may match at one time + for s in args: + if self.check(s, distance): + return true + return false + + +proc match(self: Lexer, s: string): bool = + ## Returns true if the next len(s) bytes + ## of the source file match the provided + ## string. If the match is successful, + ## len(s) bytes are consumed, otherwise + ## false is returned + if not self.check(s): + return false + discard self.step(len(s)) + return true + + +proc match(self: Lexer, args: openarray[string]): bool = + ## Calls self.match() in a loop with + ## each character from the given set of + ## strings and returns at the first match. + ## Useful to match multiple tokens in a situation + ## where only one of them may match at one time + for s in args: + if self.match(s): + return true + return false + + +proc createToken(self: Lexer, tokenType: TokenType) = + ## Creates a token object and adds it to the token + ## list. The lexeme and position of the token are + ## inferred from the current state of the tokenizer + var tok: Token = new(Token) + tok.kind = tokenType + tok.lexeme = self.source[self.start.. uint8.high().int: + self.error("escape sequence value too large (> 255)") + self.source[self.current] = cast[char](value) + of 'u', 'U': + self.error("unicode escape sequences are not supported (yet)") + of 'x': + var code = "" + var value = 0 + var i = self.current + while i < self.source.high() and (let c = self.source[ + i].toLowerAscii(); c in 'a'..'f' or c in '0'..'9'): + code &= self.source[i] + i += 1 + assert parseHex(code, value) == code.len() + if value > uint8.high().int: + self.error("escape sequence value too large (> 255)") + self.source[self.current] = cast[char](value) + else: + self.error(&"invalid escape sequence '\\{self.peek()}'") + + +proc parseString(self: Lexer, delimiter: string, mode: string = "single") = + ## Parses string literals. They can be expressed using matching pairs + ## of either single or double quotes. Most C-style escape sequences are + ## supported, moreover, a specific prefix may be prepended + ## to the string to instruct the lexer on how to parse it: + ## - b -> declares a byte string, where each character is + ## interpreted as an integer instead of a character + ## - r -> declares a raw string literal, where escape sequences + ## are not parsed and stay as-is + ## - f -> declares a format string, where variables may be + ## interpolated using curly braces like f"Hello, {name}!". + ## Braces may be escaped using a pair of them, so to represent + ## a literal "{" in an f-string, one would use {{ instead + ## Multi-line strings can be declared using matching triplets of + ## either single or double quotes. They can span across multiple + ## lines and escape sequences in them are not parsed, like in raw + ## strings, so a multi-line string prefixed with the "r" modifier + ## is redundant, although multi-line byte/format strings are supported + var slen = 0 + while not self.check(delimiter) and not self.done(): + if self.match("\n"): + if mode == "multi": + self.incLine() + else: + self.error("unexpected EOL while parsing string literal") + if mode in ["raw", "multi"]: + discard self.step() + elif self.match("\\"): + # This madness here serves to get rid of the slash, since \x is mapped + # to a one-byte sequence but the string '\x' is actually 2 bytes (or more, + # depending on the specific escape sequence) + self.source = self.source[0.. 1 and delimiter == "'": + self.error("invalid character literal (length must be one!)") + if mode == "multi": + if not self.match(delimiter.repeat(3)): + self.error("unexpected EOL while parsing multi-line string literal") + elif self.done() and self.peek(-1) != delimiter: + self.error("unexpected EOF while parsing string literal") + else: + discard self.step() + if delimiter == "\"": + self.createToken(String) + else: + self.createToken(Char) + + +proc parseBinary(self: Lexer) = + ## Parses binary numbers + while self.peek().isDigit(): + if not self.check(["0", "1"]): + self.error(&"invalid digit '{self.peek()}' in binary literal") + discard self.step() + + +proc parseOctal(self: Lexer) = + ## Parses octal numbers + while self.peek().isDigit(): + if self.peek() notin "0".."7": + self.error(&"invalid digit '{self.peek()}' in octal literal") + discard self.step() + + +proc parseHex(self: Lexer) = + ## Parses hexadecimal numbers + while self.peek().isAlphaNumeric(): + if not self.peek().isDigit() and self.peek().toLowerAscii() notin "a".."f": + self.error(&"invalid hexadecimal literal") + discard self.step() + + +proc parseNumber(self: Lexer) = + ## Parses numeric literals, which encompass + ## integers and floating point numbers. + ## Floats also support scientific notation + ## (i.e. 3e14), while the fractional part + ## must be separated from the decimal one + ## using a dot (which acts as the comma). + ## Float literals such as 32.5e3 are also supported. + ## The "e" for the scientific notation of floats + ## is case-insensitive. Binary number literals are + ## expressed using the prefix 0b, hexadecimal + ## numbers with the prefix 0x and octal numbers + ## with the prefix 0o. Numeric literals support + ## size specifiers, like so: 10'u8, 3.14'f32 + var kind: TokenType + case self.peek(): + of "b": + discard self.step() + kind = Binary + self.parseBinary() + of "x": + kind = Hex + discard self.step() + self.parseHex() + of "o": + kind = Octal + discard self.step() + self.parseOctal() + else: + kind = Integer + while isDigit(self.peek()) and not self.done(): + discard self.step() + if self.check(["e", "E"]): + kind = Float + discard self.step() + while self.peek().isDigit() and not self.done(): + discard self.step() + elif self.check("."): + # TODO: Is there a better way? + discard self.step() + if not isDigit(self.peek()): + self.error("invalid float number literal") + kind = Float + while isDigit(self.peek()) and not self.done(): + discard self.step() + if self.check(["e", "E"]): + discard self.step() + while isDigit(self.peek()) and not self.done(): + discard self.step() + if self.match("'"): + # Could be a size specifier, better catch it + while (self.peek().isAlphaNumeric() or self.check("_")) and + not self.done(): + discard self.step() + self.createToken(kind) + if kind == Binary: + # To make our life easier, we pad the binary number in here already + while (self.tokens[^1].lexeme.len() - 2) mod 8 != 0: + self.tokens[^1].lexeme = "0b" & "0" & self.tokens[^1].lexeme[2..^1] + + +proc parseBackticks(self: Lexer) = + ## Parses tokens surrounded + ## by backticks. This may be used + ## for name stropping as well as to + ## reimplement existing operators + ## (e.g. +, -, etc.) without the + ## parser complaining about syntax + ## errors + while not self.match("`") and not self.done(): + if self.peek().isAlphaNumeric() or self.symbols.existsSymbol(self.peek()): + discard self.step() + continue + self.error(&"unexpected character: '{self.peek()}'") + self.createToken(Identifier) + # Strips the backticks + self.tokens[^1].lexeme = self.tokens[^1].lexeme[1..^2] + + +proc parseIdentifier(self: Lexer) = + ## Parses keywords and identifiers. + ## Note that multi-character tokens + ## (aka UTF runes) are not supported + ## by design and *will* break things + while (self.peek().isAlphaNumeric() or self.check("_")) and not self.done(): + discard self.step() + let name: string = self.source[self.start.. 0: + for symbol in self.symbols.getSymbols(n): + if self.match(symbol): + # We've found the largest possible + # match! + self.tokens.add(self.getToken(symbol)) + return + dec(n) + # We just assume what we have in front of us + # is a symbol + discard self.step() + self.createToken(Symbol) + + +proc lex*(self: Lexer, source, file: string): seq[Token] = + ## Lexes a source file, converting a stream + ## of characters into a series of tokens + var symbols = self.symbols + discard self.newLexer() + self.symbols = symbols + self.source = source + self.file = file + self.lines = @[] + while not self.done(): + self.next() + self.start = self.current + self.tokens.add(Token(kind: EndOfFile, lexeme: "", + line: self.line, pos: (self.current, self.current))) + self.incLine() + return self.tokens diff --git a/src/frontend/meta/ast.nim b/src/frontend/meta/ast.nim new file mode 100644 index 0000000..5b3cc93 --- /dev/null +++ b/src/frontend/meta/ast.nim @@ -0,0 +1,701 @@ +# Copyright 2022 Mattia Giambirtone & All Contributors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +## An Abstract Syntax Tree (AST) structure for our recursive-descent +## top-down parser. For more info, check out docs/grammar.md + + +import strformat +import strutils + + +import token +export token + +type + NodeKind* = enum + ## Enumeration of the AST + ## node types, sorted by + ## precedence + + # Declarations + funDecl = 0'u8, + varDecl, + # Statements + forStmt, # Unused for now (for loops are compiled to while loops) + ifStmt, + returnStmt, + breakStmt, + continueStmt, + whileStmt, + forEachStmt, + blockStmt, + raiseStmt, + assertStmt, + tryStmt, + yieldStmt, + awaitStmt, + importStmt, + deferStmt, + # An expression followed by a semicolon + exprStmt, + # Expressions + assignExpr, + lambdaExpr, + awaitExpr, + yieldExpr, + setItemExpr, # Set expressions like a.b = "c" + binaryExpr, + unaryExpr, + sliceExpr, + callExpr, + getItemExpr, # Get expressions like a.b + # Primary expressions + groupingExpr, # Parenthesized expressions such as (true) and (3 + 4) + trueExpr, + falseExpr, + strExpr, + charExpr, + intExpr, + floatExpr, + hexExpr, + octExpr, + binExpr, + nilExpr, + nanExpr, + infExpr, + identExpr, # Identifier + pragmaExpr + + # Here I would've rather used object variants, and in fact that's what was in + # place before, but not being able to re-declare a field of the same type in + # another case branch is kind of a deal breaker long-term, so until that is + # fixed (check out https://github.com/nim-lang/RFCs/issues/368 for more info). + # I'll stick to using inheritance instead + + + # Generic AST node types + ASTNode* = ref object of RootObj + ## An AST node + kind*: NodeKind + # Regardless of the type of node, we keep the token in the AST node for internal usage. + # This is not shown when the node is printed, but makes it a heck of a lot easier to report + # errors accurately even deep in the compilation pipeline + token*: Token + # This weird inheritance chain is needed for the parser to + # work properly + Declaration* = ref object of ASTNode + ## A declaration + pragmas*: seq[Pragma] + generics*: seq[tuple[name: IdentExpr, cond: Expression]] + + Statement* = ref object of Declaration + ## A statement + Expression* = ref object of Statement + ## An expression + LiteralExpr* = ref object of Expression + # Using a string for literals makes it much easier to handle numeric types, as + # there is no overflow nor underflow or float precision issues during parsing. + # Numbers are just serialized as strings and then converted back to numbers + # before being passed to the VM, which also keeps the door open in the future + # to implementing bignum arithmetic that can take advantage of natively supported + # machine types, meaning that if a numeric type fits into a 64 bit signed/unsigned + # int then it is stored in such a type to save space, otherwise it is just converted + # to a bigint. Bigfloats with arbitrary-precision arithmetic would also be nice, + # although arguably less useful (and probably significantly slower than bigints) + literal*: Token + + IntExpr* = ref object of LiteralExpr + OctExpr* = ref object of LiteralExpr + HexExpr* = ref object of LiteralExpr + BinExpr* = ref object of LiteralExpr + FloatExpr* = ref object of LiteralExpr + StrExpr* = ref object of LiteralExpr + CharExpr* = ref object of LiteralExpr + + TrueExpr* = ref object of LiteralExpr + FalseExpr* = ref object of LiteralExpr + NilExpr* = ref object of LiteralExpr + NanExpr* = ref object of LiteralExpr + InfExpr* = ref object of LiteralExpr + + IdentExpr* = ref object of Expression + name*: Token + + GroupingExpr* = ref object of Expression + expression*: Expression + + GetItemExpr* = ref object of Expression + obj*: Expression + name*: IdentExpr + + SetItemExpr* = ref object of GetItemExpr + # Since a setItem expression is just + # a getItem one followed by an assignment, + # inheriting it from getItem makes sense + value*: Expression + + CallExpr* = ref object of Expression + callee*: Expression # The object being called + arguments*: tuple[positionals: seq[Expression], keyword: seq[tuple[ + name: IdentExpr, value: Expression]]] + + UnaryExpr* = ref object of Expression + operator*: Token + a*: Expression + + BinaryExpr* = ref object of UnaryExpr + # Binary expressions can be seen here as unary + # expressions with an extra operand so we just + # inherit from that and add a second operand + b*: Expression + + YieldExpr* = ref object of Expression + expression*: Expression + + AwaitExpr* = ref object of Expression + expression*: Expression + + LambdaExpr* = ref object of Expression + body*: Statement + arguments*: seq[tuple[name: IdentExpr, valueType: Expression, + mutable: bool, isRef: bool, isPtr: bool]] + defaults*: seq[Expression] + isGenerator*: bool + isAsync*: bool + isPure*: bool + returnType*: Expression + hasExplicitReturn*: bool + + + SliceExpr* = ref object of Expression + expression*: Expression + ends*: seq[Expression] + + AssignExpr* = ref object of Expression + name*: Expression + value*: Expression + + ExprStmt* = ref object of Statement + expression*: Expression + + ImportStmt* = ref object of Statement + moduleName*: IdentExpr + + AssertStmt* = ref object of Statement + expression*: Expression + + RaiseStmt* = ref object of Statement + exception*: Expression + + BlockStmt* = ref object of Statement + code*: seq[Declaration] + + ForStmt* = ref object of Statement + discard # Unused + + ForEachStmt* = ref object of Statement + identifier*: IdentExpr + expression*: Expression + body*: Statement + + DeferStmt* = ref object of Statement + expression*: Expression + + TryStmt* = ref object of Statement + body*: Statement + handlers*: seq[tuple[body: Statement, exc: IdentExpr]] + finallyClause*: Statement + elseClause*: Statement + + WhileStmt* = ref object of Statement + condition*: Expression + body*: Statement + + AwaitStmt* = ref object of Statement + expression*: Expression + + BreakStmt* = ref object of Statement + + ContinueStmt* = ref object of Statement + + ReturnStmt* = ref object of Statement + value*: Expression + + IfStmt* = ref object of Statement + condition*: Expression + thenBranch*: Statement + elseBranch*: Statement + + YieldStmt* = ref object of Statement + expression*: Expression + + VarDecl* = ref object of Declaration + name*: IdentExpr + value*: Expression + isConst*: bool + isPrivate*: bool + isLet*: bool + valueType*: Expression + + FunDecl* = ref object of Declaration + name*: IdentExpr + body*: Statement + arguments*: seq[tuple[name: IdentExpr, valueType: Expression, + mutable: bool, isRef: bool, isPtr: bool]] + defaults*: seq[Expression] + isAsync*: bool + isGenerator*: bool + isPrivate*: bool + isPure*: bool + returnType*: Expression + hasExplicitReturn*: bool + Pragma* = ref object of Expression + name*: IdentExpr + args*: seq[LiteralExpr] + + +proc isConst*(self: ASTNode): bool = + ## Returns true if the given + ## AST node represents a value + ## of constant type. All integers, + ## strings and singletons count as + ## constants + case self.kind: + of intExpr, hexExpr, binExpr, octExpr, strExpr, falseExpr, trueExpr, + infExpr, nanExpr, floatExpr, nilExpr: + return true + else: + return false + + +proc isLiteral*(self: ASTNode): bool {.inline.} = + ## Returns if the AST node represents a literal + self.kind in {intExpr, hexExpr, binExpr, octExpr, + strExpr, falseExpr, trueExpr, infExpr, + nanExpr, floatExpr, nilExpr + } + +## AST node constructors +proc newASTNode*(kind: NodeKind, token: Token): ASTNode = + ## Initializes a new generic ASTNode object + new(result) + result.kind = kind + result.token = token + + +proc newPragma*(name: IdentExpr, args: seq[LiteralExpr]): Pragma = + new(result) + result.kind = pragmaExpr + result.args = args + result.name = name + + +proc newIntExpr*(literal: Token): IntExpr = + result = IntExpr(kind: intExpr) + result.literal = literal + result.token = literal + + +proc newOctExpr*(literal: Token): OctExpr = + result = OctExpr(kind: octExpr) + result.literal = literal + result.token = literal + + +proc newHexExpr*(literal: Token): HexExpr = + result = HexExpr(kind: hexExpr) + result.literal = literal + result.token = literal + + +proc newBinExpr*(literal: Token): BinExpr = + result = BinExpr(kind: binExpr) + result.literal = literal + result.token = literal + + +proc newFloatExpr*(literal: Token): FloatExpr = + result = FloatExpr(kind: floatExpr) + result.literal = literal + result.token = literal + + +proc newTrueExpr*(token: Token): LiteralExpr = LiteralExpr(kind: trueExpr, + token: token, literal: token) +proc newFalseExpr*(token: Token): LiteralExpr = LiteralExpr(kind: falseExpr, + token: token, literal: token) +proc newNaNExpr*(token: Token): LiteralExpr = LiteralExpr(kind: nanExpr, + token: token, literal: token) +proc newNilExpr*(token: Token): LiteralExpr = LiteralExpr(kind: nilExpr, + token: token, literal: token) +proc newInfExpr*(token: Token): LiteralExpr = LiteralExpr(kind: infExpr, + token: token, literal: token) + + +proc newStrExpr*(literal: Token): StrExpr = + result = StrExpr(kind: strExpr) + result.literal = literal + result.token = literal + + +proc newCharExpr*(literal: Token): CharExpr = + result = CharExpr(kind: charExpr) + result.literal = literal + result.token = literal + + +proc newIdentExpr*(name: Token): IdentExpr = + result = IdentExpr(kind: identExpr) + result.name = name + result.token = name + + +proc newGroupingExpr*(expression: Expression, token: Token): GroupingExpr = + result = GroupingExpr(kind: groupingExpr) + result.expression = expression + result.token = token + + +proc newLambdaExpr*(arguments: seq[tuple[name: IdentExpr, valueType: Expression, + mutable: bool, isRef: bool, isPtr: bool]], defaults: seq[Expression], + body: Statement, isGenerator: bool, isAsync: bool, token: Token, + returnType: Expression, pragmas: seq[Pragma], + generics: seq[tuple[name: IdentExpr, cond: Expression]]): LambdaExpr = + result = LambdaExpr(kind: lambdaExpr) + result.body = body + result.arguments = arguments + result.defaults = defaults + result.isGenerator = isGenerator + result.isAsync = isAsync + result.token = token + result.returnType = returnType + result.isPure = false + result.pragmas = pragmas + result.generics = generics + + +proc newGetItemExpr*(obj: Expression, name: IdentExpr, + token: Token): GetItemExpr = + result = GetItemExpr(kind: getItemExpr) + result.obj = obj + result.name = name + result.token = token + + +proc newSetItemExpr*(obj: Expression, name: IdentExpr, value: Expression, + token: Token): SetItemExpr = + result = SetItemExpr(kind: setItemExpr) + result.obj = obj + result.name = name + result.value = value + result.token = token + + +proc newCallExpr*(callee: Expression, arguments: tuple[positionals: seq[ + Expression], keyword: seq[tuple[name: IdentExpr, value: Expression]]], + token: Token): CallExpr = + result = CallExpr(kind: callExpr) + result.callee = callee + result.arguments = arguments + result.token = token + + +proc newSliceExpr*(expression: Expression, ends: seq[Expression], + token: Token): SliceExpr = + result = SliceExpr(kind: sliceExpr) + result.expression = expression + result.ends = ends + result.token = token + + +proc newUnaryExpr*(operator: Token, a: Expression): UnaryExpr = + result = UnaryExpr(kind: unaryExpr) + result.operator = operator + result.a = a + result.token = result.operator + + +proc newBinaryExpr*(a: Expression, operator: Token, b: Expression): BinaryExpr = + result = BinaryExpr(kind: binaryExpr) + result.operator = operator + result.a = a + result.b = b + result.token = operator + + +proc newYieldExpr*(expression: Expression, token: Token): YieldExpr = + result = YieldExpr(kind: yieldExpr) + result.expression = expression + result.token = token + + +proc newAssignExpr*(name: Expression, value: Expression, + token: Token): AssignExpr = + result = AssignExpr(kind: assignExpr) + result.name = name + result.value = value + result.token = token + + +proc newAwaitExpr*(expression: Expression, token: Token): AwaitExpr = + result = AwaitExpr(kind: awaitExpr) + result.expression = expression + result.token = token + + +proc newExprStmt*(expression: Expression, token: Token): ExprStmt = + result = ExprStmt(kind: exprStmt) + result.expression = expression + result.token = token + + +proc newImportStmt*(moduleName: IdentExpr, token: Token): ImportStmt = + result = ImportStmt(kind: importStmt) + result.moduleName = moduleName + result.token = token + + +proc newYieldStmt*(expression: Expression, token: Token): YieldStmt = + result = YieldStmt(kind: yieldStmt) + result.expression = expression + result.token = token + + +proc newAwaitStmt*(expression: Expression, token: Token): AwaitStmt = + result = AwaitStmt(kind: awaitStmt) + result.expression = expression + result.token = token + + +proc newAssertStmt*(expression: Expression, token: Token): AssertStmt = + result = AssertStmt(kind: assertStmt) + result.expression = expression + result.token = token + + +proc newDeferStmt*(expression: Expression, token: Token): DeferStmt = + result = DeferStmt(kind: deferStmt) + result.expression = expression + result.token = token + + +proc newRaiseStmt*(exception: Expression, token: Token): RaiseStmt = + result = RaiseStmt(kind: raiseStmt) + result.exception = exception + result.token = token + + +proc newTryStmt*(body: Statement, handlers: seq[tuple[body: Statement, exc: IdentExpr]], + finallyClause: Statement, + elseClause: Statement, token: Token): TryStmt = + result = TryStmt(kind: tryStmt) + result.body = body + result.handlers = handlers + result.finallyClause = finallyClause + result.elseClause = elseClause + result.token = token + + +proc newBlockStmt*(code: seq[Declaration], token: Token): BlockStmt = + result = BlockStmt(kind: blockStmt) + result.code = code + result.token = token + + +proc newWhileStmt*(condition: Expression, body: Statement, + token: Token): WhileStmt = + result = WhileStmt(kind: whileStmt) + result.condition = condition + result.body = body + result.token = token + + +proc newForEachStmt*(identifier: IdentExpr, expression: Expression, + body: Statement, token: Token): ForEachStmt = + result = ForEachStmt(kind: forEachStmt) + result.identifier = identifier + result.expression = expression + result.body = body + result.token = token + + +proc newBreakStmt*(token: Token): BreakStmt = + result = BreakStmt(kind: breakStmt) + result.token = token + + +proc newContinueStmt*(token: Token): ContinueStmt = + result = ContinueStmt(kind: continueStmt) + result.token = token + + +proc newReturnStmt*(value: Expression, token: Token): ReturnStmt = + result = ReturnStmt(kind: returnStmt) + result.value = value + result.token = token + + +proc newIfStmt*(condition: Expression, thenBranch, elseBranch: Statement, + token: Token): IfStmt = + result = IfStmt(kind: ifStmt) + result.condition = condition + result.thenBranch = thenBranch + result.elseBranch = elseBranch + result.token = token + + +proc newVarDecl*(name: IdentExpr, value: Expression, isConst: bool = false, + isPrivate: bool = true, token: Token, isLet: bool = false, + valueType: Expression, pragmas: seq[Pragma]): VarDecl = + result = VarDecl(kind: varDecl) + result.name = name + result.value = value + result.isConst = isConst + result.isPrivate = isPrivate + result.token = token + result.isLet = isLet + result.valueType = valueType + result.pragmas = pragmas + + +proc newFunDecl*(name: IdentExpr, arguments: seq[tuple[name: IdentExpr, valueType: Expression, mutable: bool, isRef: bool, isPtr: bool]], defaults: seq[Expression], + body: Statement, isAsync, isGenerator: bool, + isPrivate: bool, token: Token, pragmas: seq[Pragma], + returnType: Expression, generics: seq[tuple[name: IdentExpr, cond: Expression]]): FunDecl = + result = FunDecl(kind: funDecl) + result.name = name + result.arguments = arguments + result.defaults = defaults + result.body = body + result.isAsync = isAsync + result.isGenerator = isGenerator + result.isPrivate = isPrivate + result.token = token + result.pragmas = pragmas + result.returnType = returnType + result.isPure = false + result.generics = generics + + +proc `$`*(self: ASTNode): string = + if self == nil: + return "nil" + case self.kind: + of intExpr, floatExpr, hexExpr, binExpr, octExpr, strExpr, trueExpr, + falseExpr, nanExpr, nilExpr, infExpr: + if self.kind in {trueExpr, falseExpr, nanExpr, nilExpr, infExpr}: + result &= &"Literal({($self.kind)[0..^5]})" + elif self.kind == strExpr: + result &= &"Literal({LiteralExpr(self).literal.lexeme[1..^2].escape()})" + else: + result &= &"Literal({LiteralExpr(self).literal.lexeme})" + of identExpr: + result &= &"Identifier('{IdentExpr(self).name.lexeme}')" + of groupingExpr: + result &= &"Grouping({GroupingExpr(self).expression})" + of getItemExpr: + var self = GetItemExpr(self) + result &= &"GetItem(obj={self.obj}, name={self.name})" + of setItemExpr: + var self = SetItemExpr(self) + result &= &"SetItem(obj={self.obj}, name={self.value}, value={self.value})" + of callExpr: + var self = CallExpr(self) + result &= &"""Call({self.callee}, arguments=(positionals=[{self.arguments.positionals.join(", ")}], keyword=[{self.arguments.keyword.join(", ")}]))""" + of unaryExpr: + var self = UnaryExpr(self) + result &= &"Unary(Operator('{self.operator.lexeme}'), {self.a})" + of binaryExpr: + var self = BinaryExpr(self) + result &= &"Binary({self.a}, Operator('{self.operator.lexeme}'), {self.b})" + of assignExpr: + var self = AssignExpr(self) + result &= &"Assign(name={self.name}, value={self.value})" + of exprStmt: + var self = ExprStmt(self) + result &= &"ExpressionStatement({self.expression})" + of breakStmt: + result = "Break()" + of importStmt: + var self = ImportStmt(self) + result &= &"Import({self.moduleName})" + of assertStmt: + var self = AssertStmt(self) + result &= &"Assert({self.expression})" + of raiseStmt: + var self = RaiseStmt(self) + result &= &"Raise({self.exception})" + of blockStmt: + var self = BlockStmt(self) + result &= &"""Block([{self.code.join(", ")}])""" + of whileStmt: + var self = WhileStmt(self) + result &= &"While(condition={self.condition}, body={self.body})" + of forEachStmt: + var self = ForEachStmt(self) + result &= &"ForEach(identifier={self.identifier}, expression={self.expression}, body={self.body})" + of returnStmt: + var self = ReturnStmt(self) + result &= &"Return({self.value})" + of yieldExpr: + var self = YieldExpr(self) + result &= &"Yield({self.expression})" + of awaitExpr: + var self = AwaitExpr(self) + result &= &"Await({self.expression})" + of ifStmt: + var self = IfStmt(self) + if self.elseBranch == nil: + result &= &"If(condition={self.condition}, thenBranch={self.thenBranch}, elseBranch=nil)" + else: + result &= &"If(condition={self.condition}, thenBranch={self.thenBranch}, elseBranch={self.elseBranch})" + of yieldStmt: + var self = YieldStmt(self) + result &= &"YieldStmt({self.expression})" + of awaitStmt: + var self = AwaitStmt(self) + result &= &"AwaitStmt({self.expression})" + of varDecl: + var self = VarDecl(self) + result &= &"Var(name={self.name}, value={self.value}, const={self.isConst}, private={self.isPrivate}, type={self.valueType})" + of funDecl: + var self = FunDecl(self) + result &= &"""FunDecl(name={self.name}, body={self.body}, type={self.returnType}, arguments=[{self.arguments.join(", ")}], defaults=[{self.defaults.join(", ")}], generics=[{self.generics.join(", ")}], async={self.isAsync}, generator={self.isGenerator}, private={self.isPrivate})""" + of lambdaExpr: + var self = LambdaExpr(self) + result &= &"""Lambda(body={self.body}, type={self.returnType}, arguments=[{self.arguments.join(", ")}], defaults=[{self.defaults.join(", ")}], generator={self.isGenerator}, async={self.isAsync})""" + of deferStmt: + var self = DeferStmt(self) + result &= &"Defer({self.expression})" + of sliceExpr: + var self = SliceExpr(self) + result &= &"""Slice({self.expression}, ends=[{self.ends.join(", ")}])""" + of tryStmt: + var self = TryStmt(self) + result &= &"TryStmt(body={self.body}, handlers={self.handlers}" + if self.finallyClause != nil: + result &= &", finallyClause={self.finallyClause}" + else: + result &= ", finallyClause=nil" + if self.elseClause != nil: + result &= &", elseClause={self.elseClause}" + else: + result &= ", elseClause=nil" + result &= ")" + else: + discard + + +proc `==`*(self, other: IdentExpr): bool {.inline.} = self.token == other.token diff --git a/src/frontend/meta/bytecode.nim b/src/frontend/meta/bytecode.nim new file mode 100644 index 0000000..ed010ef --- /dev/null +++ b/src/frontend/meta/bytecode.nim @@ -0,0 +1,228 @@ +# Copyright 2022 Mattia Giambirtone & All Contributors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +## Low level bytecode implementation details + +import strutils +import strformat + +import ../../util/multibyte + + +type + Chunk* = ref object + ## A piece of bytecode. + ## consts is used when serializing to/from a bytecode stream. + ## code is the linear sequence of compiled bytecode instructions. + ## lines maps bytecode instructions to line numbers using Run + ## Length Encoding. Instructions are encoded in groups whose structure + ## follows the following schema: + ## - The first integer represents the line number + ## - The second integer represents the count of whatever comes after it + ## (let's call it c) + ## - After c, a sequence of c integers follows + ## + ## A visual representation may be easier to understand: [1, 2, 3, 4] + ## This is to be interpreted as "there are 2 instructions at line 1 whose values + ## are 3 and 4" + ## This is more efficient than using the naive approach, which would encode + ## the same line number multiple times and waste considerable amounts of space. + consts*: seq[uint8] + code*: seq[uint8] + lines*: seq[int] + + OpCode* {.pure.} = enum + ## Enum of Peon's bytecode opcodes + + # Note: x represents the argument + # to unary opcodes, while a and b + # represent arguments to binary + # opcodes. Other variable names (c, d, ...) + # may be used for more complex opcodes. If + # an opcode takes any arguments at runtime, + # they come from either the stack or the VM's + # closure array. Some other opcodes (e.g. + # jumps), take arguments in the form of 16 + # or 24 bit numbers that are defined statically + # at compilation time into the bytecode + + # These push a constant onto the stack + LoadInt64 = 0u8, + LoadUInt64, + LoadInt32, + LoadUInt32, + LoadInt16, + LoadUInt16, + LoadInt8, + LoadUInt8, + LoadFloat64, + LoadFloat32, + LoadString, + ## Singleton opcodes (each of them pushes a constant singleton on the stack) + LoadNil, + LoadTrue, + LoadFalse, + LoadNan, + LoadInf, + ## Basic stack operations + Pop, # Pops an element off the stack and discards it + Push, # Pushes x onto the stack + PopN, # Pops x elements off the stack (optimization for exiting local scopes which usually pop many elements) + ## Name resolution/handling + LoadAttribute, # Pushes the attribute b of object a onto the stack + LoadVar, # Pushes the object at position x in the stack onto the stack + StoreVar, # Stores the value of b at position a in the stack + LoadHeap, # Pushes the object position x in the closure array onto the stack + StoreHeap, # Stores the value of b at position a in the closure array + ## Looping and jumping + Jump, # Absolute, unconditional jump into the bytecode + JumpForwards, # Relative, unconditional, positive jump in the bytecode + JumpBackwards, # Relative, unconditional, negative jump in the bytecode + JumpIfFalse, # Jumps to a relative index in the bytecode if x is false + JumpIfTrue, # Jumps to a relative index in the bytecode if x is true + JumpIfFalsePop, # Like JumpIfFalse, but also pops off the stack (regardless of truthyness). Optimization for if statements + JumpIfFalseOrPop, # Jumps to an absolute index in the bytecode if x is false and pops otherwise (used for logical and) + ## Long variants of jumps (they use a 24-bit operand instead of a 16-bit one) + LongJump, + LongJumpIfFalse, + LongJumpIfTrue, + LongJumpIfFalsePop, + LongJumpIfFalseOrPop, + LongJumpForwards, + LongJumpBackwards, + ## Functions + Call, # Calls a function and initiates a new stack frame + Return, # Terminates the current function without popping off the stack + ReturnValue, # Pops a return value off the stack and terminates the current function + ## Exception handling + Raise, # Raises exception x or re-raises active exception if x is nil + BeginTry, # Initiates an exception handling context + FinishTry, # Closes the current exception handling context + ## Generators + Yield, # Yields control from a generator back to the caller + ## Coroutines + Await, # Calls an asynchronous function + ## Misc + Assert, # Raises an AssertionFailed exception if x is false + NoOp, # Just a no-op + + +# We group instructions by their operation/operand types for easier handling when debugging + +# Simple instructions encompass instructions that push onto/pop off the stack unconditionally (True, False, Pop, etc.) +const simpleInstructions* = {OpCode.Return, LoadNil, + LoadTrue, LoadFalse, + LoadNan, LoadInf, + Pop, OpCode.Raise, + BeginTry, FinishTry, + OpCode.Yield, OpCode.Await, + OpCode.NoOp, OpCode.Return, + OpCode.ReturnValue} + +# Constant instructions are instructions that operate on the bytecode constant table +const constantInstructions* = {LoadInt64, LoadUInt64, + LoadInt32, LoadUInt32, + LoadInt16, LoadUInt16, + LoadInt8, LoadUInt8, + LoadFloat64, LoadFloat32, + LoadString} + +# Stack triple instructions operate on the stack at arbitrary offsets and pop arguments off of it in the form +# of 24 bit integers +const stackTripleInstructions* = {StoreVar, LoadVar, LoadHeap, StoreHeap} + +# Stack double instructions operate on the stack at arbitrary offsets and pop arguments off of it in the form +# of 16 bit integers +const stackDoubleInstructions* = {} + +# Argument double argument instructions take hardcoded arguments as 16 bit integers +const argumentDoubleInstructions* = {PopN, } + +# Argument double argument instructions take hardcoded arguments as 24 bit integers +const argumentTripleInstructions* = {} + +# Instructions that call functions +const callInstructions* = {Call, } + +# Jump instructions jump at relative or absolute bytecode offsets +const jumpInstructions* = {Jump, LongJump, JumpIfFalse, JumpIfFalsePop, + JumpForwards, JumpBackwards, + LongJumpIfFalse, LongJumpIfFalsePop, + LongJumpForwards, LongJumpBackwards, + JumpIfTrue, LongJumpIfTrue} + + +proc newChunk*: Chunk = + ## Initializes a new, empty chunk + result = Chunk(consts: @[], code: @[], lines: @[]) + + +proc `$`*(self: Chunk): string = &"""Chunk(consts=[{self.consts.join(", ")}], code=[{self.code.join(", ")}], lines=[{self.lines.join(", ")}])""" + + +proc write*(self: Chunk, newByte: uint8, line: int) = + ## Adds the given instruction at the provided line number + ## to the given chunk object + assert line > 0, "line must be greater than zero" + if self.lines.high() >= 1 and self.lines[^2] == line: + self.lines[^1] += 1 + else: + self.lines.add(line) + self.lines.add(1) + self.code.add(newByte) + + +proc write*(self: Chunk, bytes: openarray[uint8], line: int) = + ## Calls write in a loop with all members of the given + ## array + for cByte in bytes: + self.write(cByte, line) + + +proc write*(self: Chunk, newByte: OpCode, line: int) = + ## Adds the given instruction at the provided line number + ## to the given chunk object + self.write(uint8(newByte), line) + + +proc write*(self: Chunk, bytes: openarray[OpCode], line: int) = + ## Calls write in a loop with all members of the given + ## array + for cByte in bytes: + self.write(uint8(cByte), line) + + +proc getLine*(self: Chunk, idx: int): int = + ## Returns the associated line of a given + ## instruction index + if self.lines.len < 2: + raise newException(IndexDefect, "the chunk object is empty") + var + count: int + current: int = 0 + for n in countup(0, self.lines.high(), 2): + count = self.lines[n + 1] + if idx in current - count.. 0 local) + scopeDepth: int + operators: OperatorTable + # The AST node + tree: seq[Declaration] + + +proc newOperatorTable: OperatorTable = + ## Initializes a new OperatorTable + ## object + new(result) + result.tokens = @[] + result.precedence = newTable[Precedence, seq[string]]() + for prec in Precedence: + result.precedence[prec] = @[] + + +proc addOperator(self: OperatorTable, lexeme: string) = + ## Adds an operator to the table. Its precedence + ## is inferred from the operator's lexeme (the + ## criteria are similar to Nim's) + if lexeme in self.tokens: + return # We've already added it! + var prec = Precedence.high() + if lexeme.len() >= 2 and lexeme[^2..^1] in ["->", "~>", "=>"]: + prec = Arrow + elif lexeme.endsWith("=") and lexeme[0] notin {'<', '>', '!', '?', '~', '='}: + prec = Assign + elif lexeme[0] in {'$', } or lexeme == "**": + prec = Power + elif lexeme[0] in {'*', '%', '/', '\\'}: + prec = Multiplication + elif lexeme[0] in {'+', '-', '|', '~'}: + prec = Addition + elif lexeme[0] in {'<', '>', '=', '!'}: + prec = Compare + elif lexeme == "and": + prec = Precedence.And + elif lexeme == "or": + prec = Precedence.Or + self.tokens.add(lexeme) + self.precedence[prec].add(lexeme) + + +proc getPrecedence(self: OperatorTable, lexeme: string): Precedence = + ## Gets the precedence of a given operator + for (prec, operators) in self.precedence.pairs(): + if lexeme in operators: + return prec + + +proc newParser*: Parser = + ## Initializes a new Parser object + new(result) + result.current = 0 + result.file = "" + result.tokens = @[] + result.currentFunction = nil + result.currentLoop = LoopContext.None + result.scopeDepth = 0 + result.operators = newOperatorTable() + result.tree = @[] + + +# Public getters for improved error formatting +proc getCurrent*(self: Parser): int {.inline.} = self.current +proc getCurrentToken*(self: Parser): Token {.inline.} = (if self.getCurrent() >= + self.tokens.high() or + self.getCurrent() - 1 < 0: self.tokens[^1] else: self.tokens[self.current - 1]) +proc getCurrentFunction*(self: Parser): Declaration {.inline.} = self.currentFunction +proc getFile*(self: Parser): string {.inline.} = self.file +proc getModule*(self: Parser): string {.inline.} = self.getFile().extractFilename() + +# Handy templates to make our life easier, thanks nim! +template endOfFile: Token = Token(kind: EndOfFile, lexeme: "", line: -1) +template endOfLine(msg: string) = self.expect(Semicolon, msg) + + +proc peek(self: Parser, distance: int = 0): Token = + ## Peeks at the token at the given distance. + ## If the distance is out of bounds, an EOF + ## token is returned. A negative distance may + ## be used to retrieve previously consumed + ## tokens + if self.tokens.high() == -1 or self.current + distance > self.tokens.high( + ) or self.current + distance < 0: + result = endOfFile + else: + result = self.tokens[self.current + distance] + + +proc done(self: Parser): bool {.inline.} = + ## Returns true if we're at the + ## end of the file. Note that the + ## parser expects an explicit + ## EOF token to signal the end + ## of the file + result = self.peek().kind == EndOfFile + + +proc step(self: Parser, n: int = 1): Token = + ## Steps n tokens into the input, + ## returning the last consumed one + if self.done(): + result = self.peek() + else: + result = self.tokens[self.current] + self.current += 1 + + +proc error(self: Parser, message: string) {.raises: [ParseError].} = + ## Raises a ParseError exception + raise ParseError(msg: message, token: self.getCurrentToken(), file: self.file, module: self.getModule()) + + +# Why do we allow strings or enum members of TokenType? Well, it's simple: +# symbols like ":" and "=" are both valid operator names (therefore they are +# tokenized as symbols), but they are also used in a context where they are just +# separators (for example, the colon is used in type declarations). Since we can't +# tell at tokenization time which of the two contexts we're in, we just treat everything +# as a symbol and in the cases where we need a specific token we just match the string +# directly +proc check[T: TokenType or string](self: Parser, kind: T, + distance: int = 0): bool = + ## Checks if the given token at the given distance + ## matches the expected kind and returns a boolean. + ## The distance parameter is passed directly to + ## self.peek() + when T is TokenType: + self.peek(distance).kind == kind + else: + when T is string: + self.peek(distance).lexeme == kind + + +proc check[T: TokenType or string](self: Parser, kind: openarray[T]): bool = + ## Calls self.check() in a loop with each entry of + ## the given openarray of token kinds and returns + ## at the first match. Note that this assumes + ## that only one token may match at a given + ## position + for k in kind: + if self.check(k): + return true + return false + + +proc match[T: TokenType or string](self: Parser, kind: T): bool = + ## Behaves like self.check(), except that when a token + ## matches it is also consumed + if self.check(kind): + discard self.step() + result = true + else: + result = false + + +proc match[T: TokenType or string](self: Parser, kind: openarray[T]): bool = + ## Calls self.match() in a loop with each entry of + ## the given openarray of token kinds and returns + ## at the first match. Note that this assumes + ## that only one token may exist at a given + ## position + for k in kind: + if self.match(k): + return true + result = false + + +proc expect[T: TokenType or string](self: Parser, kind: T, + message: string = "") = + ## Behaves like self.match(), except that + ## when a token doesn't match, an error + ## is raised. If no error message is + ## given, a default one is used + if not self.match(kind): + if message.len() == 0: + self.error(&"expecting token of kind {kind}, found {self.peek().kind} instead") + else: + self.error(message) + + +proc expect[T: TokenType or string](self: Parser, kind: openarray[T], + message: string = "") = + ## Behaves like self.expect(), except that + ## an error is raised only if none of the + ## given token kinds matches + for k in kind: + if self.match(kind): + return + if message.len() == 0: + self.error(&"""expecting any of the following tokens: {kinds.join(", ")}, but got {self.peek().kind} instead""") + + +# Forward declarations +proc expression(self: Parser): Expression +proc expressionStatement(self: Parser): Statement +proc statement(self: Parser): Statement +proc varDecl(self: Parser, isLet: bool = false, + isConst: bool = false): Declaration +proc funDecl(self: Parser, isAsync: bool = false, isGenerator: bool = false, + isLambda: bool = false, isOperator: bool = false): Declaration +proc declaration(self: Parser): Declaration +# End of forward declarations + + +proc primary(self: Parser): Expression = + ## Parses primary expressions such + ## as integer literals and keywords + ## that map to builtin types (true, + ## false, nil, etc.) + case self.peek().kind: + of True: + result = newTrueExpr(self.step()) + of False: + result = newFalseExpr(self.step()) + of TokenType.NotANumber: + result = newNanExpr(self.step()) + of Nil: + result = newNilExpr(self.step()) + of Float: + result = newFloatExpr(self.step()) + of Integer: + result = newIntExpr(self.step()) + of Identifier: + result = newIdentExpr(self.step()) + of LeftParen: + let tok = self.step() + result = newGroupingExpr(self.expression(), tok) + self.expect(RightParen, "unterminated parenthesized expression") + of Yield: + let tok = self.step() + if self.currentFunction == nil: + self.error("'yield' cannot be used outside functions") + elif self.currentFunction.token.kind != Generator: + # It's easier than doing conversions for lambda/funDecl + self.error("'yield' cannot be used outside generators") + if not self.check([RightBrace, RightBracket, RightParen, Comma, Semicolon]): + # Expression delimiters + result = newYieldExpr(self.expression(), tok) + else: + # Empty yield + result = newYieldExpr(newNilExpr(Token()), tok) + of Await: + let tok = self.step() + if self.currentFunction == nil: + self.error("'await' cannot be used outside functions") + if self.currentFunction.token.kind != Coroutine: + self.error("'await' can only be used inside coroutines") + result = newAwaitExpr(self.expression(), tok) + of RightParen, RightBracket, RightBrace: + # This is *technically* unnecessary: the parser would + # throw an error regardless, but it's a little bit nicer + # when the error message is more specific + self.error(&"unmatched '{self.peek().lexeme}'") + of Hex: + result = newHexExpr(self.step()) + of Octal: + result = newOctExpr(self.step()) + of Binary: + result = newBinExpr(self.step()) + of String: + result = newStrExpr(self.step()) + of Infinity: + result = newInfExpr(self.step()) + of Function: + discard self.step() + result = Expression(self.funDecl(isLambda = true)) + of Coroutine: + discard self.step() + result = Expression(self.funDecl(isAsync = true, isLambda = true)) + of Generator: + discard self.step() + result = Expression(self.funDecl(isGenerator = true, + isLambda = true)) + else: + self.error("invalid syntax") + + +proc makeCall(self: Parser, callee: Expression): Expression = + ## Utility function called iteratively by self.call() + ## to parse a function call + let tok = self.peek(-1) + var argNames: seq[IdentExpr] = @[] + var arguments: tuple[positionals: seq[Expression], keyword: seq[tuple[ + name: IdentExpr, value: Expression]]] = (positionals: @[], + keyword: @[]) + var argument: Expression = nil + var argCount = 0 + if not self.check(RightParen): + while true: + if argCount >= 255: + self.error("call can not have more than 255 arguments") + break + argument = self.expression() + if argument.kind == assignExpr: + # TODO: This will explode with slices! + if IdentExpr(AssignExpr(argument).name) in argNames: + self.error("duplicate keyword argument in call") + argNames.add(IdentExpr(AssignExpr(argument).name)) + arguments.keyword.add((name: IdentExpr(AssignExpr( + argument).name), value: AssignExpr(argument).value)) + elif arguments.keyword.len() == 0: + arguments.positionals.add(argument) + else: + self.error("positional argument cannot follow keyword argument in call") + if not self.match(Comma): + break + argCount += 1 + self.expect(RightParen) + result = newCallExpr(callee, arguments, tok) + + +proc call(self: Parser): Expression = + ## Parses function calls, object field + ## accessing and slicing expressions + result = self.primary() + while true: + if self.match(LeftParen): + result = self.makeCall(result) + elif self.match(Dot): + self.expect(Identifier, "expecting attribute name after '.'") + result = newGetItemExpr(result, newIdentExpr(self.peek(-1)), + self.peek(-1)) + elif self.match(LeftBracket): + # Slicing such as a[1:2], which is then + # translated to `[]`(a, 1, 2) + let tok = self.peek(-1) + var ends: seq[Expression] = @[] + while not self.check(RightBracket) and not self.done(): + if self.check(":"): + ends.add(newNilExpr(Token(lexeme: "nil"))) + discard self.step() + else: + ends.add(self.expression()) + discard self.match(":") + self.expect(RightBracket, "expecting ']'") + result = newSliceExpr(result, ends, tok) + else: + break + +## Operator parsing handlers + +proc unary(self: Parser): Expression = + if self.peek().lexeme in self.operators.tokens: + result = newUnaryExpr(self.step(), self.unary()) + else: + result = self.call() + + +proc parsePow(self: Parser): Expression = + result = self.unary() + var operator: Token + var right: Expression + while self.operators.getPrecedence(self.peek().lexeme) == Power: + operator = self.step() + right = self.unary() + result = newBinaryExpr(result, operator, right) + + +proc parseMul(self: Parser): Expression = + result = self.parsePow() + var operator: Token + var right: Expression + while self.operators.getPrecedence(self.peek().lexeme) == Multiplication: + operator = self.step() + right = self.parsePow() + result = newBinaryExpr(result, operator, right) + + +proc parseAdd(self: Parser): Expression = + result = self.parseMul() + var operator: Token + var right: Expression + while self.operators.getPrecedence(self.peek().lexeme) == Addition: + operator = self.step() + right = self.parseMul() + result = newBinaryExpr(result, operator, right) + + +proc parseCmp(self: Parser): Expression = + result = self.parseAdd() + var operator: Token + var right: Expression + while self.operators.getPrecedence(self.peek().lexeme) == Compare: + operator = self.step() + right = self.parseAdd() + result = newBinaryExpr(result, operator, right) + + +proc parseAnd(self: Parser): Expression = + result = self.parseCmp() + var operator: Token + var right: Expression + while self.operators.getPrecedence(self.peek().lexeme) == Precedence.And: + operator = self.step() + right = self.parseCmp() + result = newBinaryExpr(result, operator, right) + + +proc parseOr(self: Parser): Expression = + result = self.parseAnd() + var operator: Token + var right: Expression + while self.operators.getPrecedence(self.peek().lexeme) == Precedence.Or: + operator = self.step() + right = self.parseAnd() + result = newBinaryExpr(result, operator, right) + + +proc parseAssign(self: Parser): Expression = + result = self.parseOr() + if self.operators.getPrecedence(self.peek().lexeme) == Assign: + let tok = self.step() + var value = self.expression() + case result.kind: + of identExpr, sliceExpr: + result = newAssignExpr(result, value, tok) + of getItemExpr: + result = newSetItemExpr(GetItemExpr(result).obj, GetItemExpr(result).name, value, tok) + else: + self.error("invalid assignment target") + + +proc parseArrow(self: Parser): Expression = + result = self.parseAssign() + var operator: Token + var right: Expression + while self.operators.getPrecedence(self.peek().lexeme) == Precedence.Or: + operator = self.step() + right = self.parseAssign() + result = newBinaryExpr(result, operator, right) + + +## End of operator parsing handlers + + +proc assertStmt(self: Parser): Statement = + ## Parses "assert" statements, which + ## raise an error if the expression + ## fed into them is falsey + let tok = self.peek(-1) + var expression = self.expression() + endOfLine("missing semicolon after assert statement") + result = newAssertStmt(expression, tok) + + +proc beginScope(self: Parser) = + ## Begins a new lexical scope + inc(self.scopeDepth) + + +proc endScope(self: Parser) = + ## Ends a new lexical scope + dec(self.scopeDepth) + + +proc blockStmt(self: Parser): Statement = + ## Parses block statements. A block + ## statement simply opens a new local + ## scope + self.beginScope() + let tok = self.peek(-1) + var code: seq[Declaration] = @[] + while not self.check(RightBrace) and not self.done(): + code.add(self.declaration()) + if code[^1] == nil: + code.delete(code.high()) + self.expect(RightBrace, "expecting '}'") + result = newBlockStmt(code, tok) + self.endScope() + + +proc breakStmt(self: Parser): Statement = + ## Parses break statements + let tok = self.peek(-1) + if self.currentLoop != Loop: + self.error("'break' cannot be used outside loops") + endOfLine("missing semicolon after break statement") + result = newBreakStmt(tok) + + +proc deferStmt(self: Parser): Statement = + ## Parses defer statements + let tok = self.peek(-1) + if self.currentFunction == nil: + self.error("'defer' cannot be used outside functions") + result = newDeferStmt(self.expression(), tok) + endOfLine("missing semicolon after defer statement") + + +proc continueStmt(self: Parser): Statement = + ## Parses continue statements + let tok = self.peek(-1) + if self.currentLoop != Loop: + self.error("'continue' cannot be used outside loops") + endOfLine("missing semicolon after continue statement") + result = newContinueStmt(tok) + + +proc returnStmt(self: Parser): Statement = + ## Parses return statements + let tok = self.peek(-1) + if self.currentFunction == nil: + self.error("'return' cannot be used outside functions") + var value: Expression + if not self.check(Semicolon): + # Since return can be used on its own too + # we need to check if there's an actual value + # to return or not + value = self.expression() + endOfLine("missing semicolon after return statement") + result = newReturnStmt(value, tok) + case self.currentFunction.kind: + of NodeKind.funDecl: + FunDecl(self.currentFunction).hasExplicitReturn = true + else: + LambdaExpr(self.currentFunction).hasExplicitReturn = true + + +proc yieldStmt(self: Parser): Statement = + ## Parses yield statements + let tok = self.peek(-1) + if self.currentFunction == nil: + self.error("'yield' cannot be outside functions") + elif self.currentFunction.token.kind != Generator: + self.error("'yield' can only be used inside generators") + if not self.check(Semicolon): + result = newYieldStmt(self.expression(), tok) + else: + result = newYieldStmt(newNilExpr(Token(lexeme: "nil")), tok) + endOfLine("missing semicolon after yield statement") + + +proc awaitStmt(self: Parser): Statement = + ## Parses await statements + let tok = self.peek(-1) + if self.currentFunction == nil: + self.error("'await' cannot be used outside functions") + if self.currentFunction.token.kind != Coroutine: + self.error("'await' can only be used inside coroutines") + result = newAwaitStmt(self.expression(), tok) + endOfLine("missing semicolon after await statement") + + +proc raiseStmt(self: Parser): Statement = + ## Parses raise statements + var exception: Expression + let tok = self.peek(-1) + if not self.check(Semicolon): + # Raise can be used on its own, in which + # case it re-raises the last active exception + exception = self.expression() + endOfLine("missing semicolon after raise statement") + result = newRaiseStmt(exception, tok) + + +proc forEachStmt(self: Parser): Statement = + ## Parses C#-like foreach loops + let tok = self.peek(-1) + var enclosingLoop = self.currentLoop + self.currentLoop = Loop + self.expect(LeftParen, "expecting '(' after 'foreach'") + self.expect(Identifier) + var identifier = newIdentExpr(self.peek(-1)) + self.expect(":") + var expression = self.expression() + self.expect(RightParen) + var body = self.statement() + result = newForEachStmt(identifier, expression, body, tok) + self.currentLoop = enclosingLoop + + +proc importStmt(self: Parser, fromStmt: bool = false): Statement = + ## Parses import statements + var tok: Token + if fromStmt: + tok = self.peek(-2) + else: + tok = self.peek(-1) + # TODO: New AST node + self.expect(Identifier, "expecting module name(s) after import statement") + result = newImportStmt(newIdentExpr(self.peek(-1)), tok) + endOfLine("missing semicolon after import statement") + + + +proc tryStmt(self: Parser): Statement = + ## Parses try/except/else/finally blocks + let tok = self.peek(-1) + var body = self.statement() + var handlers: seq[tuple[body: Statement, exc: IdentExpr]] = @[] + var finallyClause: Statement + var elseClause: Statement + var excName: Expression + var handlerBody: Statement + while self.match(Except): + excName = self.expression() + if excName.kind == identExpr: + handlerBody = self.statement() + handlers.add((body: handlerBody, exc: IdentExpr(excName))) + else: + excName = nil + if self.match(Else): + elseClause = self.statement() + if self.match(Finally): + finallyClause = self.statement() + if handlers.len() == 0 and elseClause == nil and finallyClause == nil: + self.error("expecting 'except', 'finally' or 'else' statement after 'try' block") + for i, handler in handlers: + if handler.exc == nil and i != handlers.high(): + self.error("catch-all exception handler with bare 'except' must come last in try statement") + result = newTryStmt(body, handlers, finallyClause, elseClause, tok) + + +proc whileStmt(self: Parser): Statement = + ## Parses a C-style while loop statement + let tok = self.peek(-1) + self.beginScope() + var enclosingLoop = self.currentLoop + self.currentLoop = Loop + self.expect(LeftParen, "expecting '(' before while loop condition") + var condition = self.expression() + self.expect(RightParen, "unterminated while loop condition") + result = newWhileStmt(condition, self.statement(), tok) + self.currentLoop = enclosingLoop + self.endScope() + + +proc forStmt(self: Parser): Statement = + ## Parses a C-style for loop + self.beginScope() + let tok = self.peek(-1) + var enclosingLoop = self.currentLoop + self.currentLoop = Loop + self.expect(LeftParen, "expecting '(' after 'for'") + var initializer: ASTNode = nil + var condition: Expression = nil + var increment: Expression = nil + if self.match(Semicolon): + discard + elif self.match(Var): + initializer = self.varDecl() + if not VarDecl(initializer).isPrivate: + self.error("cannot declare public for loop initializer") + else: + initializer = self.expressionStatement() + if not self.check(Semicolon): + condition = self.expression() + self.expect(Semicolon, "expecting ';' after for loop condition") + if not self.check(RightParen): + increment = self.expression() + self.expect(RightParen, "unterminated for loop increment") + var body = self.statement() + if increment != nil: + # The increment runs after each iteration, so we + # inject it into the block as the last statement + body = newBlockStmt(@[Declaration(body), newExprStmt(increment, + increment.token)], tok) + if condition == nil: + ## An empty condition is functionally + ## equivalent to "true" + condition = newTrueExpr(Token(lexeme: "true")) + # We can use a while loop, which in this case works just as well + body = newWhileStmt(condition, body, tok) + if initializer != nil: + # Nested blocks, so the initializer is + # only executed once + body = newBlockStmt(@[Declaration(initializer), Declaration(body)], tok) + # This desgugars the following code: + # for (var i = 0; i < 10; i += 1) { + # print(i); + # } + # To the semantically equivalent snippet + # below: + # { + # var i = 0; + # while (i < 10) { + # print(i); + # i += 1; + # } + # } + result = body + self.currentLoop = enclosingLoop + self.endScope() + + +proc ifStmt(self: Parser): Statement = + ## Parses if statements + let tok = self.peek(-1) + self.expect(LeftParen, "expecting '(' before if condition") + var condition = self.expression() + self.expect(RightParen, "expecting ')' after if condition") + var thenBranch = self.statement() + var elseBranch: Statement = nil + if self.match(Else): + elseBranch = self.statement() + result = newIfStmt(condition, thenBranch, elseBranch, tok) + + +template checkDecl(self: Parser, isPrivate: bool) = + ## Handy utility template that avoids us from copy + ## pasting the same checks to all declaration handlers + if not isPrivate and self.scopeDepth > 0: + self.error("cannot bind public names inside local scopes") + + +proc varDecl(self: Parser, isLet: bool = false, + isConst: bool = false): Declaration = + ## Parses variable declarations + var tok = self.peek(-1) + var value: Expression + self.expect(Identifier, &"expecting identifier after '{tok.lexeme}'") + var name = newIdentExpr(self.peek(-1)) + let isPrivate = not self.match("*") + self.checkDecl(isPrivate) + var valueType: IdentExpr + if self.match(":"): + # We don't enforce it here because + # the compiler may be able to infer + # the type later! + self.expect(Identifier, "expecting type name after ':'") + valueType = newIdentExpr(self.peek(-1)) + if self.match("="): + value = self.expression() + if isConst and not value.isConst(): + self.error("constant initializer is not a constant") + else: + if tok.kind != Var: + self.error(&"{tok.lexeme} declaration requires an initializer") + value = newNilExpr(Token(lexeme: "nil")) + self.expect(Semicolon, &"expecting semicolon after declaration") + case tok.kind: + of Var: + result = newVarDecl(name, value, isPrivate = isPrivate, token = tok, + valueType = valueType, pragmas = (@[])) + of Const: + result = newVarDecl(name, value, isPrivate = isPrivate, token = tok, + isConst = true, valueType = valueType, pragmas = (@[])) + of Let: + result = newVarDecl(name, value, isPrivate = isPrivate, token = tok, + isLet = isLet, valueType = valueType, pragmas = (@[])) + else: + discard # Unreachable + + +proc parseDeclArguments(self: Parser, arguments: var seq[tuple[name: IdentExpr, valueType: Expression, mutable: bool, isRef: bool, isPtr: bool]], + parameter: var tuple[name: IdentExpr, + valueType: Expression, mutable: bool, + isRef: bool, isPtr: bool], + defaults: var seq[Expression]) = + ## Helper to parse declaration arguments and avoid code duplication + while not self.check(RightParen): + if arguments.len > 255: + self.error("cannot have more than 255 arguments in function declaration") + self.expect(Identifier, "expecting parameter name") + parameter.name = newIdentExpr(self.peek(-1)) + if self.match(":"): + parameter.mutable = false + parameter.isPtr = false + parameter.isRef = false + if self.match(Var): + parameter.mutable = true + elif self.match(Ptr): + parameter.isPtr = true + elif self.match(Ref): + parameter.isRef = true + parameter.valueType = self.expression() + for i in countdown(arguments.high(), 0): + if arguments[i].valueType != nil: + break + arguments[i].valueType = parameter.valueType + arguments[i].mutable = parameter.mutable + else: + parameter.valueType = nil + if parameter in arguments: + self.error("duplicate parameter name in function declaration") + arguments.add(parameter) + if self.match("="): + defaults.add(self.expression()) + elif defaults.len() > 0: + self.error("positional argument cannot follow default argument in function declaration") + if not self.match(Comma): + break + self.expect(RightParen) + for argument in arguments: + if argument.valueType == nil: + self.error(&"missing type declaration for '{argument.name.token.lexeme}' in function declaration") + + +proc parseFunExpr(self: Parser): LambdaExpr = + ## Parses the return value of a function + ## when it is another function. Works + ## recursively + var arguments: seq[tuple[name: IdentExpr, valueType: Expression, + mutable: bool, isRef: bool, isPtr: bool]] = @[] + var defaults: seq[Expression] = @[] + result = newLambdaExpr(arguments, defaults, nil, isGenerator = self.peek(-1).kind == Generator, + isAsync = self.peek(-1).kind == Coroutine, token = self.peek(-1), + returnType = nil, pragmas = (@[]), generics=(@[])) + var parameter: tuple[name: IdentExpr, valueType: Expression, + mutable: bool, isRef: bool, isPtr: bool] + if self.match(LeftParen): + self.parseDeclArguments(arguments, parameter, defaults) + if self.match(":"): + if self.match([Function, Coroutine, Generator]): + result.returnType = self.parseFunExpr() + else: + result.returnType = self.expression() + + +proc parseGenerics(self: Parser, decl: Declaration) = + ## Parses generics in declarations + var gen: tuple[name: IdentExpr, cond: Expression] + while not self.check(RightBracket) and not self.done(): + self.expect(Identifier, "expecting generic type name") + gen.name = newIdentExpr(self.peek(-1)) + if self.match(":"): + gen.cond = self.expression() + decl.generics.add(gen) + if not self.match(Comma): + break + self.expect(RightBracket) + + +proc funDecl(self: Parser, isAsync: bool = false, isGenerator: bool = false, + isLambda: bool = false, isOperator: bool = false): Declaration = # Can't use just FunDecl because it can also return LambdaExpr! + ## Parses all types of functions, coroutines, generators and operators + ## (with or without a name, where applicable) + let tok = self.peek(-1) + var enclosingFunction = self.currentFunction + var arguments: seq[tuple[name: IdentExpr, valueType: Expression, + mutable: bool, isRef: bool, isPtr: bool]] = @[] + var defaults: seq[Expression] = @[] + var returnType: Expression + if not isLambda and self.match(Identifier): + # We do this extra check because we might + # be called from a context where it's + # ambiguous whether we're parsing a declaration + # or an expression. Fortunately anonymous functions + # are nameless, so we can sort the ambiguity by checking + # if there's an identifier after the keyword + self.currentFunction = newFunDecl(newIdentExpr(self.peek(-1)), arguments, defaults, newBlockStmt(@[], Token()), + isAsync = isAsync, + isGenerator = isGenerator, + isPrivate = true, + token = tok, pragmas = (@[]), + returnType = nil, + generics=(@[])) + if self.match("*"): + FunDecl(self.currentFunction).isPrivate = false + self.checkDecl(FunDecl(self.currentFunction).isPrivate) + if self.match(LeftBracket): + self.parseGenerics(self.currentFunction) + elif not isLambda and (self.check([LeftBrace, LeftParen]) or self.check(":")): + # We do a bit of hacking to pretend we never + # wanted to parse this as a declaration in + # the first place and pass control over to + # expressionStatement(), which will in turn + # go all the way up to primary(), which will + # call us back with isLambda=true, allowing us + # to actually parse the function as an expression + while not self.check(tok.kind): # We rewind back to the token that caused us to be called + dec(self.current) + result = Declaration(self.expressionStatement()) + self.currentFunction = enclosingFunction + return result + elif isLambda: + self.currentFunction = newLambdaExpr(arguments, defaults, newBlockStmt(@[], Token()), isGenerator = isGenerator, isAsync = isAsync, token = tok, + returnType = nil, pragmas = (@[]), generics=(@[])) + if self.match(":"): + # Function has explicit return type + if self.match([Function, Coroutine, Generator]): + # The function's return type is another + # function. We specialize this case because + # the type declaration for a function lacks + # the braces that would qualify it as an + # expression + returnType = self.parseFunExpr() + else: + returnType = self.expression() + if self.match(LeftParen): + var parameter: tuple[name: IdentExpr, valueType: Expression, + mutable: bool, isRef: bool, isPtr: bool] + self.parseDeclArguments(arguments, parameter, defaults) + if self.match(":"): + # Function's return type + if self.match([Function, Coroutine, Generator]): + returnType = self.parseFunExpr() + else: + returnType = self.expression() + if self.currentFunction.kind == funDecl: + if not self.match(Semicolon): + # If we don't find a semicolon, + # it's not a forward declaration + self.expect(LeftBrace) + FunDecl(self.currentFunction).body = self.blockStmt() + else: + # This is a forward declaration, so we explicitly + # nullify the function's body to tell the compiler + # to look for it elsewhere in the file later + FunDecl(self.currentFunction).body = nil + FunDecl(self.currentFunction).arguments = arguments + FunDecl(self.currentFunction).returnType = returnType + else: + self.expect(LeftBrace) + LambdaExpr(Expression(self.currentFunction)).body = self.blockStmt() + LambdaExpr(Expression(self.currentFunction)).arguments = arguments + LambdaExpr(Expression(self.currentFunction)).returnType = returnType + result = self.currentFunction + if isOperator: + if arguments.len() == 0: + self.error("cannot declare operator without arguments") + elif isLambda: + self.error("cannot declare anonymous operator") + for argument in arguments: + if argument.valueType == nil: + self.error(&"missing type declaration for '{argument.name.token.lexeme}' in function declaration") + self.currentFunction = enclosingFunction + + +proc expression(self: Parser): Expression = + ## Parses expressions + result = self.parseArrow() # Highest-level expression + + +proc expressionStatement(self: Parser): Statement = + ## Parses expression statements, which + ## are expressions followed by a semicolon + var expression = self.expression() + endOfLine("missing semicolon after expression") + result = Statement(newExprStmt(expression, expression.token)) + + +proc statement(self: Parser): Statement = + ## Parses statements + case self.peek().kind: + of If: + discard self.step() + result = self.ifStmt() + of Assert: + discard self.step() + result = self.assertStmt() + of Raise: + discard self.step() + result = self.raiseStmt() + of Break: + discard self.step() + result = self.breakStmt() + of Continue: + discard self.step() + result = self.continueStmt() + of Return: + discard self.step() + result = self.returnStmt() + of Import: + discard self.step() + result = self.importStmt() + of From: + # TODO + # from module import a [, b, c as d] + discard self.step() + result = self.importStmt(fromStmt = true) + of While: + discard self.step() + result = self.whileStmt() + of For: + discard self.step() + result = self.forStmt() + of Foreach: + discard self.step() + result = self.forEachStmt() + of LeftBrace: + discard self.step() + result = self.blockStmt() + of Yield: + discard self.step() + result = self.yieldStmt() + of Await: + discard self.step() + result = self.awaitStmt() + of Defer: + discard self.step() + result = self.deferStmt() + of Try: + discard self.step() + result = self.tryStmt() + else: + result = self.expressionStatement() + + +proc parsePragma(self: Parser): Pragma = + ## Parses pragmas + if self.scopeDepth == 0: + ## Pragmas used at the + ## top level are either + ## used for compile-time + ## switches or for variable + ## declarations + var decl: VarDecl + for node in self.tree: + if node.token.line == self.peek(-1).line and node.kind == varDecl: + decl = VarDecl(node) + break + else: + var decl = self.currentFunction + # TODO + + +proc declaration(self: Parser): Declaration = + ## Parses declarations + case self.peek().kind: + of Var, Const, Let: + let keyword = self.step() + result = self.varDecl(isLet = keyword.kind == Let, + isConst = keyword.kind == Const) + of Function: + discard self.step() + result = self.funDecl() + of Coroutine: + discard self.step() + result = self.funDecl(isAsync = true) + of Generator: + discard self.step() + result = self.funDecl(isGenerator = true) + of Operator: + discard self.step() + result = self.funDecl(isOperator = true) + of TokenType.Comment: + let tok = self.step() + if tok.lexeme.startsWith("#pragma["): + result = self.parsePragma() + of Type, TokenType.Whitespace, TokenType.Tab: + discard self.step() # TODO + else: + result = Declaration(self.statement()) + + +proc parse*(self: Parser, tokens: seq[Token], file: string): seq[Declaration] = + ## Parses a sequence of tokens into a sequence of AST nodes + self.tokens = @[] + # The parser is not designed to handle these tokens. + # Maybe create a separate syntax checker module? + for token in tokens: + if token.kind notin {TokenType.Whitespace, Tab}: + self.tokens.add(token) + self.file = file + self.current = 0 + self.currentLoop = LoopContext.None + self.currentFunction = nil + self.scopeDepth = 0 + self.operators = newOperatorTable() + self.tree = @[] + for i, token in self.tokens: + # We do a first pass over the tokens + # to find operators. Note that this + # relies on the lexer ending the input + # with an EOF token + if token.kind == Operator: + if i == self.tokens.high(): + self.error("invalid state: found malformed tokenizer input while looking for operators (missing EOF)") + self.operators.addOperator(self.tokens[i + 1].lexeme) + if i == self.tokens.high() and token.kind != EndOfFile: + # Since we're iterating this list anyway might as + # well perform some extra checks + self.error("invalid state: found malformed tokenizer input while looking for operators (missing EOF)") + while not self.done(): + self.tree.add(self.declaration()) + if self.tree[^1] == nil: + self.tree.delete(self.tree.high()) + result = self.tree \ No newline at end of file diff --git a/src/main.nim b/src/main.nim new file mode 100644 index 0000000..dccc4c6 --- /dev/null +++ b/src/main.nim @@ -0,0 +1,359 @@ +# Builtins & external libs +import strformat +import strutils +import terminal +import os +# Thanks art <3 +import jale/editor as ed +import jale/templates +import jale/plugin/defaults +import jale/plugin/editor_history +import jale/keycodes +import jale/multiline + + +# Our stuff +import frontend/lexer as l +import frontend/parser as p +import frontend/compiler as c +import backend/vm as v +import util/serializer as s + +# Forward declarations +proc fillSymbolTable(tokenizer: Lexer) +proc getLineEditor: LineEditor + +# Handy dandy compile-time constants +const debugLexer = false +const debugParser = false +const debugCompiler = true +const debugSerializer = false +const debugRuntime = false + +when debugSerializer: + import nimSHA2 + import times + +when debugCompiler: + import util/debugger + + +proc repl = + styledEcho fgMagenta, "Welcome into the peon REPL!" + var + keep = true + tokens: seq[Token] = @[] + tree: seq[Declaration] = @[] + compiled: Chunk + serialized: Serialized + tokenizer = newLexer() + parser = newParser() + compiler = newCompiler() + serializer = newSerializer() + vm = newPeonVM() + editor = getLineEditor() + input: string + tokenizer.fillSymbolTable() + editor.bindEvent(jeQuit): + stdout.styledWriteLine(fgGreen, "Goodbye!") + editor.prompt = "" + keep = false + editor.bindKey("ctrl+a"): + editor.content.home() + editor.bindKey("ctrl+e"): + editor.content.`end`() + while keep: + try: + input = editor.read() + if input.len() == 0: + continue + tokens = tokenizer.lex(input, "stdin") + if tokens.len() == 0: + continue + when debugLexer: + styledEcho fgCyan, "Tokenization step:" + for i, token in tokens: + if i == tokens.high(): + # Who cares about EOF? + break + styledEcho fgGreen, "\t", $token + echo "" + tree = parser.parse(tokens, "stdin") + if tree.len() == 0: + continue + when debugParser: + styledEcho fgCyan, "Parsing step:" + for node in tree: + styledEcho fgGreen, "\t", $node + echo "" + compiled = compiler.compile(tree, "stdin") + when debugCompiler: + styledEcho fgCyan, "Compilation step:" + styledEcho fgCyan, "\tRaw byte stream: ", fgGreen, "[", fgYellow, compiled.code.join(", "), fgGreen, "]" + styledEcho fgCyan, "\tConstant table: ", fgGreen, "[", fgYellow, compiled.consts.join(", "), fgGreen, "]" + styledEcho fgCyan, "\nBytecode disassembler output below:\n" + disassembleChunk(compiled, "stdin") + echo "" + + serializer.dumpFile(compiled, input, "stdin", "stdin.pbc") + serialized = serializer.loadFile("stdin.pbc") + when debugSerializer: + var hashMatches = computeSHA256(input).toHex().toLowerAscii() == serialized.fileHash + styledEcho fgCyan, "Serialization step: " + styledEcho fgBlue, &"\t- File hash: ", fgYellow, serialized.fileHash, fgBlue, " (", if hashMatches: fgGreen else: fgRed, if hashMatches: "OK" else: "Fail", fgBlue, ")" + styledEcho fgBlue, "\t- Peon version: ", fgYellow, &"{serialized.peonVer.major}.{serialized.peonVer.minor}.{serialized.peonVer.patch}", fgBlue, " (commit ", fgYellow, serialized.commitHash[0..8], fgBlue, ") on branch ", fgYellow, serialized.peonBranch + stdout.styledWriteLine(fgBlue, "\t- Compilation date & time: ", fgYellow, fromUnix(serialized.compileDate).format("d/M/yyyy HH:mm:ss")) + stdout.styledWrite(fgBlue, &"\t- Constants segment: ") + if serialized.chunk.consts == compiled.consts: + styledEcho fgGreen, "OK" + else: + styledEcho fgRed, "Corrupted" + stdout.styledWrite(fgBlue, &"\t- Code segment: ") + if serialized.chunk.code == compiled.code: + styledEcho fgGreen, "OK" + else: + styledEcho fgRed, "Corrupted" + stdout.styledWrite(fgBlue, "\t- Line info segment: ") + if serialized.chunk.lines == compiled.lines: + styledEcho fgGreen, "OK" + else: + styledEcho fgRed, "Corrupted" + when debugRuntime: + styledEcho fgCyan, "\n\nExecution step: " + vm.run(serialized.chunk) + except LexingError: + let exc = LexingError(getCurrentException()) + let relPos = tokenizer.getRelPos(exc.line) + let line = tokenizer.getSource().splitLines()[exc.line - 1].strip() + stderr.styledWriteLine(fgRed, "A fatal error occurred while parsing ", fgYellow, &"'{exc.file}'", fgRed, ", module ", + fgYellow, &"'{exc.file.extractFilename()}'", fgRed, ", line ", fgYellow, $exc.line, fgRed, " at ", fgYellow, &"'{exc.lexeme}'", + fgRed, ": ", fgGreen , getCurrentExceptionMsg()) + styledEcho fgBlue, "Source line: " , fgDefault, line + styledEcho fgCyan, " ".repeat(len("Source line: ")) & "^".repeat(relPos.stop - relPos.start) + except ParseError: + let exc = ParseError(getCurrentException()) + let lexeme = exc.token.lexeme + let lineNo = exc.token.line + let relPos = tokenizer.getRelPos(lineNo) + let fn = parser.getCurrentFunction() + let line = tokenizer.getSource().splitLines()[lineNo - 1].strip() + var fnMsg = "" + if fn != nil and fn.kind == funDecl: + fnMsg &= &"in function '{FunDecl(fn).name.token.lexeme}'" + stderr.styledWriteLine(fgRed, "A fatal error occurred while parsing ", fgYellow, &"'{exc.file}'", fgRed, ", module ", + fgYellow, &"'{exc.file}'", fgRed, ", line ", fgYellow, $lineNo, fgRed, " at ", fgYellow, &"'{lexeme}'", + fgRed, ": ", fgGreen , getCurrentExceptionMsg()) + styledEcho fgBlue, "Source line: " , fgDefault, line + styledEcho fgCyan, " ".repeat(len("Source line: ")) & "^".repeat(relPos.stop - relPos.start) + except CompileError: + let exc = CompileError(getCurrentException()) + let lexeme = exc.node.token.lexeme + let lineNo = exc.node.token.line + let relPos = tokenizer.getRelPos(lineNo) + let line = tokenizer.getSource().splitLines()[lineNo - 1].strip() + var fn = compiler.getCurrentFunction() + var fnMsg = "" + if fn != nil and fn.kind == funDecl: + fnMsg &= &"in function '{FunDecl(fn).name.token.lexeme}'" + stderr.styledWriteLine(fgRed, "A fatal error occurred while compiling ", fgYellow, &"'{exc.file}'", fgRed, ", module ", + fgYellow, &"'{exc.module}'", fgRed, ", line ", fgYellow, $lineNo, fgRed, " at ", fgYellow, &"'{lexeme}'", + fgRed, ": ", fgGreen , getCurrentExceptionMsg()) + styledEcho fgBlue, "Source line: " , fgDefault, line + styledEcho fgCyan, " ".repeat(len("Source line: ")) & "^".repeat(relPos.stop - relPos.start) + except SerializationError: + let exc = SerializationError(getCurrentException()) + stderr.styledWriteLine(fgRed, "A fatal error occurred while (de-)serializing", fgYellow, &"'{exc.file}'", fgGreen, ": ", getCurrentExceptionMsg()) + quit(0) + + +proc runFile(f: string) = + var + tokens: seq[Token] = @[] + tree: seq[Declaration] = @[] + compiled: Chunk + serialized: Serialized + tokenizer = newLexer() + parser = newParser() + compiler = newCompiler() + serializer = newSerializer() + vm = newPeonVM() + input: string + tokenizer.fillSymbolTable() + try: + input = readFile(f) + tokens = tokenizer.lex(input, f) + if tokens.len() == 0: + return + when debugLexer: + styledEcho fgCyan, "Tokenization step:" + for i, token in tokens: + if i == tokens.high(): + # Who cares about EOF? + break + styledEcho fgGreen, "\t", $token + echo "" + tree = parser.parse(tokens, f) + if tree.len() == 0: + return + when debugParser: + styledEcho fgCyan, "Parsing step:" + for node in tree: + styledEcho fgGreen, "\t", $node + echo "" + compiled = compiler.compile(tree, f) + when debugCompiler: + styledEcho fgCyan, "Compilation step:" + styledEcho fgCyan, "\tRaw byte stream: ", fgGreen, "[", fgYellow, compiled.code.join(", "), fgGreen, "]" + styledEcho fgCyan, "\tConstant table: ", fgGreen, "[", fgYellow, compiled.consts.join(", "), fgGreen, "]" + styledEcho fgCyan, "\nBytecode disassembler output below:\n" + disassembleChunk(compiled, f) + echo "" + + serializer.dumpFile(compiled, input, f, splitFile(f).name & ".pbc") + serialized = serializer.loadFile(splitFile(f).name & ".pbc") + when debugSerializer: + var hashMatches = computeSHA256(input).toHex().toLowerAscii() == serialized.fileHash + styledEcho fgCyan, "Serialization step: " + styledEcho fgBlue, &"\t- File hash: ", fgYellow, serialized.fileHash, fgBlue, " (", if hashMatches: fgGreen else: fgRed, if hashMatches: "OK" else: "Fail", fgBlue, ")" + styledEcho fgBlue, "\t- Peon version: ", fgYellow, &"{serialized.peonVer.major}.{serialized.peonVer.minor}.{serialized.peonVer.patch}", fgBlue, " (commit ", fgYellow, serialized.commitHash[0..8], fgBlue, ") on branch ", fgYellow, serialized.peonBranch + stdout.styledWriteLine(fgBlue, "\t- Compilation date & time: ", fgYellow, fromUnix(serialized.compileDate).format("d/M/yyyy HH:mm:ss")) + stdout.styledWrite(fgBlue, &"\t- Constants segment: ") + if serialized.chunk.consts == compiled.consts: + styledEcho fgGreen, "OK" + else: + styledEcho fgRed, "Corrupted" + stdout.styledWrite(fgBlue, &"\t- Code segment: ") + if serialized.chunk.code == compiled.code: + styledEcho fgGreen, "OK" + else: + styledEcho fgRed, "Corrupted" + stdout.styledWrite(fgBlue, "\t- Line info segment: ") + if serialized.chunk.lines == compiled.lines: + styledEcho fgGreen, "OK" + else: + styledEcho fgRed, "Corrupted" + when debugRuntime: + styledEcho fgCyan, "\n\nExecution step: " + vm.run(serialized.chunk) + except LexingError: + let exc = LexingError(getCurrentException()) + let relPos = tokenizer.getRelPos(exc.line) + let line = tokenizer.getSource().splitLines()[exc.line - 1].strip() + stderr.styledWriteLine(fgRed, "A fatal error occurred while parsing ", fgYellow, &"'{exc.file}'", fgRed, ", module ", + fgYellow, &"'{exc.file}'", fgRed, ", line ", fgYellow, $exc.line, fgRed, " at ", fgYellow, &"'{exc.lexeme}'", + fgRed, ": ", fgGreen , getCurrentExceptionMsg()) + styledEcho fgBlue, "Source line: " , fgDefault, line + styledEcho fgCyan, " ".repeat(len("Source line: ")) & "^".repeat(relPos.stop - relPos.start) + except ParseError: + let exc = ParseError(getCurrentException()) + let lexeme = exc.token.lexeme + let lineNo = exc.token.line + let relPos = tokenizer.getRelPos(lineNo) + let fn = parser.getCurrentFunction() + let line = tokenizer.getSource().splitLines()[lineNo - 1].strip() + var fnMsg = "" + if fn != nil and fn.kind == funDecl: + fnMsg &= &"in function '{FunDecl(fn).name.token.lexeme}'" + stderr.styledWriteLine(fgRed, "A fatal error occurred while parsing ", fgYellow, &"'{exc.file}'", fgRed, ", module ", + fgYellow, &"'{exc.file}'", fgRed, ", line ", fgYellow, $lineNo, fgRed, " at ", fgYellow, &"'{lexeme}'", + fgRed, ": ", fgGreen , getCurrentExceptionMsg()) + styledEcho fgBlue, "Source line: " , fgDefault, line + styledEcho fgCyan, " ".repeat(len("Source line: ")) & "^".repeat(relPos.stop - relPos.start) + except CompileError: + let exc = CompileError(getCurrentException()) + let lexeme = exc.node.token.lexeme + let lineNo = exc.node.token.line + let relPos = tokenizer.getRelPos(lineNo) + let line = tokenizer.getSource().splitLines()[lineNo - 1].strip() + var fn = compiler.getCurrentFunction() + var fnMsg = "" + if fn != nil and fn.kind == funDecl: + fnMsg &= &"in function '{FunDecl(fn).name.token.lexeme}'" + stderr.styledWriteLine(fgRed, "A fatal error occurred while compiling ", fgYellow, &"'{exc.file}'", fgRed, ", module ", + fgYellow, &"'{exc.module}'", fgRed, ", line ", fgYellow, $lineNo, fgRed, " at ", fgYellow, &"'{lexeme}'", + fgRed, ": ", fgGreen , getCurrentExceptionMsg()) + styledEcho fgBlue, "Source line: " , fgDefault, line + styledEcho fgCyan, " ".repeat(len("Source line: ")) & "^".repeat(relPos.stop - relPos.start) + except SerializationError: + let exc = SerializationError(getCurrentException()) + stderr.styledWriteLine(fgRed, "A fatal error occurred while (de-)serializing", fgYellow, &"'{exc.file}'", fgGreen, ": ", getCurrentExceptionMsg()) + except IOError: + stderr.styledWriteLine(fgRed, "An error occurred while trying to read ", fgYellow, &"'{f}'", fgGreen, &": {getCurrentExceptionMsg()}") + except OSError: + stderr.styledWriteLine(fgRed, "An error occurred while trying to read ", fgYellow, &"'{f}'", fgGreen, &": {osErrorMsg(osLastError())} [errno {osLastError()}]") + + +when isMainModule: + setControlCHook(proc () {.noconv.} = quit(0)) + let args = commandLineParams() + if args.len() == 0: + repl() + else: + runFile(args[0]) + + +proc fillSymbolTable(tokenizer: Lexer) = + ## Initializes the Lexer's symbol + ## table with the builtin symbols + ## and keywords + + # 1-byte symbols + tokenizer.symbols.addSymbol("{", LeftBrace) + tokenizer.symbols.addSymbol("}", RightBrace) + tokenizer.symbols.addSymbol("(", LeftParen) + tokenizer.symbols.addSymbol(")", RightParen) + tokenizer.symbols.addSymbol("[", LeftBracket) + tokenizer.symbols.addSymbol("]", RightBracket) + tokenizer.symbols.addSymbol(".", Dot) + tokenizer.symbols.addSymbol(",", Comma) + tokenizer.symbols.addSymbol(";", Semicolon) + # Keywords + tokenizer.symbols.addKeyword("type", TokenType.Type) + tokenizer.symbols.addKeyword("enum", Enum) + tokenizer.symbols.addKeyword("case", Case) + tokenizer.symbols.addKeyword("operator", Operator) + tokenizer.symbols.addKeyword("generator", Generator) + tokenizer.symbols.addKeyword("fn", TokenType.Function) + tokenizer.symbols.addKeyword("coroutine", Coroutine) + tokenizer.symbols.addKeyword("break", TokenType.Break) + tokenizer.symbols.addKeyword("continue", Continue) + tokenizer.symbols.addKeyword("while", While) + tokenizer.symbols.addKeyword("for", For) + tokenizer.symbols.addKeyword("foreach", Foreach) + tokenizer.symbols.addKeyword("if", If) + tokenizer.symbols.addKeyword("else", Else) + tokenizer.symbols.addKeyword("await", TokenType.Await) + tokenizer.symbols.addKeyword("defer", Defer) + tokenizer.symbols.addKeyword("try", Try) + tokenizer.symbols.addKeyword("except", Except) + tokenizer.symbols.addKeyword("finally", Finally) + tokenizer.symbols.addKeyword("raise", TokenType.Raise) + tokenizer.symbols.addKeyword("assert", TokenType.Assert) + tokenizer.symbols.addKeyword("const", Const) + tokenizer.symbols.addKeyword("let", Let) + tokenizer.symbols.addKeyword("var", Var) + tokenizer.symbols.addKeyword("import", Import) + tokenizer.symbols.addKeyword("yield", TokenType.Yield) + tokenizer.symbols.addKeyword("return", TokenType.Return) + # These are more like expressions with a reserved + # name that produce a value of a builtin type, + # but we don't need to care about that until + # we're in the parsing/ compilation steps so + # it's fine + tokenizer.symbols.addKeyword("nan", NotANumber) + tokenizer.symbols.addKeyword("inf", Infinity) + tokenizer.symbols.addKeyword("nil", TokenType.Nil) + tokenizer.symbols.addKeyword("true", True) + tokenizer.symbols.addKeyword("false", False) + tokenizer.symbols.addKeyword("ref", Ref) + tokenizer.symbols.addKeyword("ptr", Ptr) + for sym in [">", "<", "=", "~", "/", "+", "-", "_", "*", "?", "@", ":"]: + tokenizer.symbols.addSymbol(sym, Symbol) + + +proc getLineEditor: LineEditor = + result = newLineEditor() + result.prompt = "=> " + result.populateDefaults() + let history = result.plugHistory() + result.bindHistory(history) diff --git a/src/memory/allocator.nim b/src/memory/allocator.nim new file mode 100644 index 0000000..b6d93e8 --- /dev/null +++ b/src/memory/allocator.nim @@ -0,0 +1,87 @@ +# Copyright 2022 Mattia Giambirtone +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +## Memory allocator from JAPL + + +import segfaults +import ../config + +when DEBUG_TRACE_ALLOCATION: + import strformat + + +proc reallocate*(p: pointer, oldSize: int, newSize: int): pointer = + ## Wrapper around realloc/dealloc + try: + if newSize == 0 and p != nil: + when DEBUG_TRACE_ALLOCATION: + if oldSize > 1: + echo &"DEBUG - Memory manager: Deallocating {oldSize} bytes" + else: + echo "DEBUG - Memory manager: Deallocating 1 byte" + dealloc(p) + return nil + when DEBUG_TRACE_ALLOCATION: + if pointr == nil and newSize == 0: + echo &"DEBUG - Memory manager: Warning, asked to dealloc() nil pointer from {oldSize} to {newSize} bytes, ignoring request" + if oldSize > 0 and p != nil or oldSize == 0: + when DEBUG_TRACE_ALLOCATION: + if oldSize == 0: + if newSize > 1: + echo &"DEBUG - Memory manager: Allocating {newSize} bytes of memory" + else: + echo "DEBUG - Memory manager: Allocating 1 byte of memory" + else: + echo &"DEBUG - Memory manager: Resizing {oldSize} bytes of memory to {newSize} bytes" + result = realloc(p, newSize) + when DEBUG_TRACE_ALLOCATION: + if oldSize > 0 and pointr == nil: + echo &"DEBUG - Memory manager: Warning, asked to realloc() nil pointer from {oldSize} to {newSize} bytes, ignoring request" + except NilAccessDefect: + stderr.write("JAPL: could not manage memory, segmentation fault\n") + quit(139) # For now, there's not much we can do if we can't get the memory we need, so we exit + + +template resizeArray*(kind: untyped, pointr: pointer, oldCount, + newCount: int): untyped = + ## Handy macro (in the C sense of macro, not nim's) to resize a dynamic array + cast[ptr UncheckedArray[kind]](reallocate(pointr, sizeof(kind) * oldCount, + sizeof(kind) * newCount)) + + +template freeArray*(kind: untyped, pointr: pointer, oldCount: int): untyped = + ## Frees a dynamic array + reallocate(pointr, sizeof(kind) * oldCount, 0) + + +template free*(kind: untyped, pointr: pointer): untyped = + ## Frees a pointer by reallocating its + ## size to 0 + reallocate(pointr, sizeof(kind), 0) + + +template growCapacity*(capacity: int): untyped = + ## Handy macro used to calculate how much + ## more memory is needed when reallocating + ## dynamic arrays + if capacity < 8: + 8 + else: + capacity * ARRAY_GROW_FACTOR + + +template allocate*(castTo: untyped, sizeTo: untyped, count: int): untyped = + ## Allocates an object and casts its pointer to the specified type + cast[ptr castTo](reallocate(nil, 0, sizeof(sizeTo) * count)) diff --git a/src/peon/stdlib/arithmetics.pn b/src/peon/stdlib/arithmetics.pn new file mode 100644 index 0000000..63768f1 --- /dev/null +++ b/src/peon/stdlib/arithmetics.pn @@ -0,0 +1,193 @@ +## Builtin arithmetic operators for Peon + + +operator `+`(a, b: int): int { + #pragma[magic: AddInt64, pure] + return; +} + + +operator `+`(a, b: uint): uint { + #pragma[magic: AddUInt64, pure] + return; +} + + +operator `+`(a, b: int32): int32 { + #pragma[magic: AddInt32, pure] + return; +} + + +operator `+`(a, b: uint32): uint32 { + #pragma[magic: AddUInt32, pure] + return; +} + + +operator `+`(a, b: int16): int16 { + #pragma[magic: AddInt16, pure] + return; +} + + +operator `+`(a, b: uint16): uint16 { + #pragma[magic: AddUInt16, pure] + return; +} + + +operator `+`(a, b: int8): int8 { + #pragma[magic: AddInt8, pure] + return; +} + + +operator `+`(a, b: uint8): uint8 { + #pragma[magic: AddUInt8, pure] + return; +} + + +operator `-`(a, b: int): int { + #pragma[magic: SubInt64, pure] + return; +} + + +operator `-`(a, b: uint): uint { + #pragma[magic: SubUInt64, pure] + return; +} + + +operator `-`(a, b: int32): int32 { + #pragma[magic: SubInt32, pure] + return; +} + + +operator `-`(a, b: uint32): uint32 { + #pragma[magic: SubUInt32, pure] + return; +} + + +operator `-`(a, b: int16): int16 { + #pragma[magic: SubInt16, pure] + return; +} + + +operator `-`(a, b: uint16): uint16 { + #pragma[magic: SubUInt16, pure] + return; +} + + +operator `-`(a, b: int8): int8 { + #pragma[magic: SubInt8, pure] + return; +} + + +operator `-`(a, b: uint8): uint8 { + #pragma[magic: SubUInt8, pure] + return; +} + + +operator `*`(a, b: int): int { + #pragma[magic: MulInt64, pure] + return; +} + + +operator `*`(a, b: uint): uint { + #pragma[magic: MulUInt64, pure] + return; +} + + +operator `*`(a, b: int32): int32 { + #pragma[magic: MulInt32, pure] + return; +} + + +operator `*`(a, b: uint32): uint32 { + #pragma[magic: MulUInt32, pure] + return; +} + + +operator `*`(a, b: int16): int16 { + #pragma[magic: MulInt16, pure] + return; +} + + +operator `*`(a, b: uint16): uint16 { + #pragma[magic: MulUInt16, pure] + return; +} + + +operator `*`(a, b: int8): int8 { + #pragma[magic: MulInt8, pure] + return; +} + + +operator `*`(a, b: uint8): uint8 { + #pragma[magic: MulUInt8, pure] + return; +} + + +operator `/`(a, b: int): int { + #pragma[magic: DivInt64, pure] + return; +} + + +operator `/`(a, b: uint): uint { + #pragma[magic: DivUInt64, pure] + return; +} + + +operator `/`(a, b: int32): int32 { + #pragma[magic: DivInt32, pure] + return; +} + + +operator `/`(a, b: uint32): uint32 { + #pragma[magic: DivUInt32, pure] + return; +} + + +operator `/`(a, b: int16): int16 { + #pragma[magic: DivInt16, pure] + return; +} + + +operator `/`(a, b: uint16): uint16 { + #pragma[magic: DivUInt16, pure] + return; +} + + +operator `/`(a, b: int8): int8 { + #pragma[magic: DivInt8, pure] + return; +} + + +operator `/`(a, b: uint8): uint8 { + #pragma[magic: DivUInt8, pure] + return; +} \ No newline at end of file diff --git a/src/tests.pn b/src/tests.pn new file mode 100644 index 0000000..0ea5253 --- /dev/null +++ b/src/tests.pn @@ -0,0 +1,5 @@ +operator `+`(a: int): int { + return a; +} + ++1; # Works: defined for int64 diff --git a/src/util/debugger.nim b/src/util/debugger.nim new file mode 100644 index 0000000..6119b53 --- /dev/null +++ b/src/util/debugger.nim @@ -0,0 +1,174 @@ +# Copyright 2022 Mattia Giambirtone & All Contributors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import ../frontend/meta/bytecode +import multibyte + + +import strformat +import strutils +import terminal + + +proc nl = stdout.write("\n") + + +proc printDebug(s: string, newline: bool = false) = + stdout.styledWrite(fgMagenta, "DEBUG - Disassembler -> ") + stdout.styledWrite(fgGreen, s) + if newline: + nl() + + +proc printName(opcode: OpCode, newline: bool = false) = + stdout.styledWrite(fgRed, $opcode, " (", fgYellow, $uint8(opcode), fgRed, ")") + if newline: + nl() + + +proc printInstruction(instruction: OpCode, newline: bool = false) = + printDebug("Instruction: ") + printName(instruction) + if newline: + nl() + + +proc simpleInstruction(instruction: OpCode, offset: int): int = + printInstruction(instruction) + nl() + return offset + 1 + + +proc stackTripleInstruction(instruction: OpCode, chunk: Chunk, + offset: int): int = + ## Debugs instructions that operate on a single value on the stack using a 24-bit operand + var slot = [chunk.code[offset + 1], chunk.code[offset + 2], chunk.code[ + offset + 3]].fromTriple() + printInstruction(instruction) + stdout.styledWrite(fgGreen, &", points to index ") + stdout.styledWriteLine(fgYellow, &"{slot}") + return offset + 4 + + +proc stackDoubleInstruction(instruction: OpCode, chunk: Chunk, + offset: int): int = + ## Debugs instructions that operate on a single value on the stack using a 16-bit operand + var slot = [chunk.code[offset + 1], chunk.code[offset + 2]].fromDouble() + printInstruction(instruction) + stdout.write(&", points to index ") + stdout.styledWrite(fgGreen, &", points to index ") + stdout.styledWriteLine(fgYellow, &"{slot}") + return offset + 3 + + +proc argumentDoubleInstruction(instruction: OpCode, chunk: Chunk, offset: int): int = + ## Debugs instructions that operate on a hardcoded value on the stack using a 16-bit operand + var slot = [chunk.code[offset + 1], chunk.code[offset + 2]].fromDouble() + printInstruction(instruction) + stdout.styledWrite(fgGreen, &", has argument ") + stdout.styledWriteLine(fgYellow, $slot) + return offset + 3 + + +proc argumentTripleInstruction(instruction: OpCode, chunk: Chunk, offset: int): int = + ## Debugs instructions that operate on a hardcoded value on the stack using a 24-bit operand + var slot = [chunk.code[offset + 1], chunk.code[offset + 2], chunk.code[offset + 3]].fromTriple() + printInstruction(instruction) + stdout.styledWrite(fgGreen, ", has argument ") + stdout.styledWriteLine(fgYellow, $slot) + return offset + 4 + + +proc callInstruction(instruction: OpCode, chunk: Chunk, offset: int): int = + ## Debugs function calls + var slot = [chunk.code[offset + 1], chunk.code[offset + 2], chunk.code[offset + 3]].fromTriple() + var args = [chunk.code[offset + 4], chunk.code[offset + 5], chunk.code[offset + 6]].fromTriple() + printInstruction(instruction) + stdout.styledWrite(fgGreen, &", jumps to address ", fgYellow, $slot, fgGreen, " with ", fgYellow, $args, fgGreen, " argument") + if args > 1: + stdout.styledWrite(fgYellow, "s") + nl() + return offset + 7 + + +proc constantInstruction(instruction: OpCode, chunk: Chunk, offset: int): int = + ## Debugs instructions that operate on the constant table + var constant = [chunk.code[offset + 1], chunk.code[offset + 2], chunk.code[ + offset + 3]].fromTriple() + printInstruction(instruction) + stdout.styledWrite(fgGreen, &", points to constant at position ", fgYellow, $constant) + nl() + printDebug("Operand: ") + stdout.styledWriteLine(fgYellow, &"{chunk.consts[constant]}") + return offset + 4 + + +proc jumpInstruction(instruction: OpCode, chunk: Chunk, offset: int): int = + ## Debugs jumps + var jump: int + case instruction: + of Jump, JumpIfFalse, JumpIfTrue, JumpIfFalsePop, JumpForwards, JumpBackwards: + jump = [chunk.code[offset + 1], chunk.code[offset + 2]].fromDouble().int() + of LongJump, LongJumpIfFalse, LongJumpIfTrue, LongJumpIfFalsePop, + LongJumpForwards, LongJumpBackwards: + jump = [chunk.code[offset + 1], chunk.code[offset + 2], chunk.code[ + offset + 3]].fromTriple().int() + else: + discard # Unreachable + printInstruction(instruction, true) + printDebug("Jump size: ") + stdout.styledWrite(fgYellow, $jump) + nl() + return offset + 3 + + +proc disassembleInstruction*(chunk: Chunk, offset: int): int = + ## Takes one bytecode instruction and prints it + printDebug("Offset: ") + stdout.styledWriteLine(fgYellow, $offset) + printDebug("Line: ") + stdout.styledWriteLine(fgYellow, &"{chunk.getLine(offset)}") + var opcode = OpCode(chunk.code[offset]) + case opcode: + of simpleInstructions: + result = simpleInstruction(opcode, offset) + of constantInstructions: + result = constantInstruction(opcode, chunk, offset) + of stackDoubleInstructions: + result = stackDoubleInstruction(opcode, chunk, offset) + of stackTripleInstructions: + result = stackTripleInstruction(opcode, chunk, offset) + of argumentDoubleInstructions: + result = argumentDoubleInstruction(opcode, chunk, offset) + of argumentTripleInstructions: + result = argumentTripleInstruction(opcode, chunk, offset) + of callInstructions: + result = callInstruction(opcode, chunk, offset) + of jumpInstructions: + result = jumpInstruction(opcode, chunk, offset) + else: + echo &"DEBUG - Unknown opcode {opcode} at index {offset}" + result = offset + 1 + + +proc disassembleChunk*(chunk: Chunk, name: string) = + ## Takes a chunk of bytecode, and prints it + echo &"==== Peon Bytecode Debugger - Chunk '{name}' ====\n" + var index = 0 + while index < chunk.code.len: + index = disassembleInstruction(chunk, index) + echo "" + echo &"==== Debug session ended - Chunk '{name}' ====" + + diff --git a/src/util/multibyte.nim b/src/util/multibyte.nim new file mode 100644 index 0000000..4244cf5 --- /dev/null +++ b/src/util/multibyte.nim @@ -0,0 +1,61 @@ +# Copyright 2022 Mattia Giambirtone & All Contributors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +## Utilities to convert from/to our 16-bit and 24-bit representations +## of numbers + + +proc toDouble*(input: int | uint | uint16): array[2, uint8] = + ## Converts an unsigned integer + ## to an array[2, uint8] + result = cast[array[2, uint8]](uint16(input)) + + +proc toTriple*(input: uint | int): array[3, uint8] = + ## Converts an unsigned integer to an array[3, uint8] + result = cast[array[3, uint8]](uint(input)) + + +proc toQuad*(input: int | uint | uint16 | uint32): array[4, uint8] = + ## Converts an unsigned integer to an array[4, uint8] + result = cast[array[4, uint8]](uint(input)) + + +proc toLong*(input: int | uint | uint16 | uint32 | uint64): array[8, uint8] = + ## Converts an unsigned integer to an array[8, uint8] + result = cast[array[8, uint8]](uint(input)) + + +proc fromDouble*(input: array[2, uint8]): uint16 = + ## Rebuilds the output of toDouble into + ## an uint16 + copyMem(result.addr, unsafeAddr(input), sizeof(uint16)) + + +proc fromTriple*(input: array[3, uint8]): uint = + ## Rebuilds the output of toTriple into + ## an uint + copyMem(result.addr, unsafeAddr(input), sizeof(uint8) * 3) + + +proc fromQuad*(input: array[4, uint8]): uint = + ## Rebuilts the output of toQuad into + ## an uint + copyMem(result.addr, unsafeAddr(input), sizeof(uint32)) + + +proc fromLong*(input: array[8, uint8]): uint = + ## Rebuilts the output of toQuad into + ## an uint + copyMem(result.addr, unsafeAddr(input), sizeof(uint64)) \ No newline at end of file diff --git a/src/util/serializer.nim b/src/util/serializer.nim new file mode 100644 index 0000000..b8cd10a --- /dev/null +++ b/src/util/serializer.nim @@ -0,0 +1,241 @@ +# Copyright 2022 Mattia Giambirtone & All Contributors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import ../frontend/meta/errors +import ../frontend/meta/bytecode +import ../config +import multibyte +import ../frontend/compiler + +import strformat +import strutils +import nimSHA2 +import times + + +export ast + +type + Serializer* = ref object + file: string + filename: string + chunk: Chunk + Serialized* = ref object + ## Wrapper returned by + ## the Serializer.read* + ## procedures to store + ## metadata + fileHash*: string + peonVer*: tuple[major, minor, patch: int] + peonBranch*: string + commitHash*: string + compileDate*: int + chunk*: Chunk + + +proc `$`*(self: Serialized): string = + result = &"Serialized(fileHash={self.fileHash}, version={self.peonVer.major}.{self.peonVer.minor}.{self.peonVer.patch}, branch={self.peonBranch}), commitHash={self.commitHash}, date={self.compileDate}, chunk={self.chunk[]}" + + +proc error(self: Serializer, message: string) = + ## Raises a formatted SerializationError exception + raise SerializationError(msg: message, file: self.filename) + + +proc newSerializer*(self: Serializer = nil): Serializer = + new(result) + if self != nil: + result = self + result.file = "" + result.filename = "" + result.chunk = nil + + +## Basic routines and helpers to convert various objects from and to to their byte representation + +proc toBytes(self: Serializer, s: string): seq[byte] = + for c in s: + result.add(byte(c)) + + +proc toBytes(self: Serializer, s: int): array[8, uint8] = + result = cast[array[8, uint8]](s) + + +proc toBytes(self: Serializer, d: SHA256Digest): seq[byte] = + for b in d: + result.add(b) + + +proc bytesToString(self: Serializer, input: seq[byte]): string = + for b in input: + result.add(char(b)) + + +proc extend[T](s: var seq[T], a: openarray[T]) = + ## Extends s with the elements of a + for e in a: + s.add(e) + + +proc writeHeaders(self: Serializer, stream: var seq[byte], file: string) = + ## Writes the Peon bytecode headers in-place into a byte stream + stream.extend(self.toBytes(BYTECODE_MARKER)) + stream.add(byte(PEON_VERSION.major)) + stream.add(byte(PEON_VERSION.minor)) + stream.add(byte(PEON_VERSION.patch)) + stream.add(byte(len(PEON_BRANCH))) + stream.extend(self.toBytes(PEON_BRANCH)) + stream.extend(self.toBytes(PEON_COMMIT_HASH)) + stream.extend(self.toBytes(getTime().toUnixFloat().int())) + stream.extend(self.toBytes(computeSHA256(file))) + + +proc writeLineData(self: Serializer, stream: var seq[byte]) = + ## Writes line information for debugging + ## bytecode instructions + stream.extend(len(self.chunk.lines).toQuad()) + for b in self.chunk.lines: + stream.extend(b.toTriple()) + + +proc writeConstants(self: Serializer, stream: var seq[byte]) = + ## Writes the constants table in-place into the + ## given stream + stream.extend(self.chunk.consts.len().toQuad()) + for constant in self.chunk.consts: + stream.add(constant) + + +proc writeCode(self: Serializer, stream: var seq[byte]) = + ## Writes the bytecode from the given chunk to the + ## given source stream + stream.extend(self.chunk.code.len.toTriple()) + stream.extend(self.chunk.code) + + +proc readHeaders(self: Serializer, stream: seq[byte], serialized: Serialized): int = + ## Reads the bytecode headers from a given stream + ## of bytes + var stream = stream + if stream[0..