Fixed typo

This commit is contained in:
Mattia Giambirtone 2022-05-23 23:15:09 +02:00
parent a20cfc532b
commit 630de7a30c
26 changed files with 6259 additions and 56 deletions

4
.gitignore vendored
View File

@ -2,4 +2,6 @@
nimcache/
nimblecache/
htmldocs/
*.pbc # Peon bytecode files
stdin.pbc
tests.pbc

222
LICENSE
View File

@ -1,85 +1,201 @@
The Artistic License 2.0
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
Copyright (c) 2000-2006, The Perl Foundation.
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed.
1. Definitions.
Preamble
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
This license establishes the terms under which a given free software Package may be copied, modified, distributed, and/or redistributed. The intent is that the Copyright Holder maintains some artistic control over the development of that Package while still keeping the Package available as open source and free software.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
You are always permitted to make arrangements wholly outside of this license directly with the Copyright Holder of a given Package. If the terms of this license do not permit the full use that you propose to make of the Package, you should contact the Copyright Holder and seek a different licensing arrangement.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
Definitions
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Copyright Holder" means the individual(s) or organization(s) named in the copyright notice for the entire Package.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Contributor" means any party that has contributed code or other material to the Package, in accordance with the Copyright Holder's procedures.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"You" and "your" means any person who would like to copy, distribute, or modify the Package.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Package" means the collection of files distributed by the Copyright Holder, and derivatives of that collection and/or of those files. A given Package may consist of either the Standard Version, or a Modified Version.
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Distribute" means providing a copy of the Package or making it accessible to anyone else, or in the case of a company or organization, to others outside of your company or organization.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Distributor Fee" means any fee that you charge for Distributing this Package or providing support for this Package to another party. It does not mean licensing fees.
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
"Standard Version" refers to the Package if it has not been modified, or has been modified only in ways explicitly requested by the Copyright Holder.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
"Modified Version" means the Package, if it has been changed, and such changes were not explicitly requested by the Copyright Holder.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
"Original License" means this Artistic License as Distributed with the Standard Version of the Package, in its current version or as it may be modified by The Perl Foundation in the future.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
"Source" form means the source code, documentation source, and configuration files for the Package.
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
"Compiled" form means the compiled bytecode, object code, binary, or any other form resulting from mechanical transformation or translation of the Source form.
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
Permission for Use and Modification Without Distribution
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(1) You are permitted to use the Standard Version and create and use Modified Versions for any purpose without restriction, provided that you do not Distribute the Modified Version.
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
Permissions for Redistribution of the Standard Version
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
(2) You may Distribute verbatim copies of the Source form of the Standard Version of this Package in any medium without restriction, either gratis or for a Distributor Fee, provided that you duplicate all of the original copyright notices and associated disclaimers. At your discretion, such verbatim copies may or may not include a Compiled form of the Package.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
(3) You may apply any bug fixes, portability changes, and other modifications made available from the Copyright Holder. The resulting Package will still be considered the Standard Version, and as such will be subject to the Original License.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
Distribution of Modified Versions of the Package as Source
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
(4) You may Distribute your Modified Version as Source (either gratis or for a Distributor Fee, and with or without a Compiled form of the Modified Version) provided that you clearly document how it differs from the Standard Version, including, but not limited to, documenting any non-standard features, executables, or modules, and provided that you do at least ONE of the following:
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
(a) make the Modified Version available to the Copyright Holder of the Standard Version, under the Original License, so that the Copyright Holder may include your modifications in the Standard Version.
(b) ensure that installation of your Modified Version does not prevent the user installing or running the Standard Version. In addition, the Modified Version must bear a name that is different from the name of the Standard Version.
(c) allow anyone who receives a copy of the Modified Version to make the Source form of the Modified Version available to others under
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
(i) the Original License or
(ii) a license that permits the licensee to freely copy, modify and redistribute the Modified Version using the same licensing terms that apply to the copy that the licensee received, and requires that the Source form of the Modified Version, and of any works derived from it, be made freely available in that license fees are prohibited but Distributor Fees are allowed.
END OF TERMS AND CONDITIONS
Distribution of Compiled Forms of the Standard Version or Modified Versions without the Source
APPENDIX: How to apply the Apache License to your work.
(5) You may Distribute Compiled forms of the Standard Version without the Source, provided that you include complete instructions on how to get the Source of the Standard Version. Such instructions must be valid at the time of your distribution. If these instructions, at any time while you are carrying out such distribution, become invalid, you must provide new instructions on demand or cease further distribution. If you provide valid instructions or cease distribution within thirty days after you become aware that the instructions are invalid, then you do not forfeit any of your rights under this license.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
(6) You may Distribute a Modified Version in Compiled form without the Source, provided that you comply with Section 4 with respect to the Source of the Modified Version.
Copyright [yyyy] [name of copyright owner]
Aggregating or Linking the Package
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
(7) You may aggregate the Package (either the Standard Version or Modified Version) with other packages and Distribute the resulting aggregation provided that you do not charge a licensing fee for the Package. Distributor Fees are permitted, and licensing fees for other components in the aggregation are permitted. The terms of this license apply to the use and Distribution of the Standard or Modified Versions as included in the aggregation.
http://www.apache.org/licenses/LICENSE-2.0
(8) You are permitted to link Modified and Standard Versions with other works, to embed the Package in a larger work of your own, or to build stand-alone binary or bytecode versions of applications that include the Package, and Distribute the result without restriction, provided the result does not expose a direct interface to the Package.
Items That are Not Considered Part of a Modified Version
(9) Works (including, but not limited to, modules and scripts) that merely extend or make use of the Package, do not, by themselves, cause the Package to be a Modified Version. In addition, such works are not considered parts of the Package itself, and are not subject to the terms of this license.
General Provisions
(10) Any use, modification, and distribution of the Standard or Modified Versions is governed by this Artistic License. By using, modifying or distributing the Package, you accept this license. Do not use, modify, or distribute the Package, if you do not accept this license.
(11) If your Modified Version has been derived from a Modified Version made by someone other than you, you are nevertheless required to ensure that your Modified Version complies with the requirements of this license.
(12) This license does not grant you the right to use any trademark, service mark, tradename, or logo of the Copyright Holder.
(13) This license includes the non-exclusive, worldwide, free-of-charge patent license to make, have made, use, offer to sell, sell, import and otherwise transfer the Package with respect to any patent claims licensable by the Copyright Holder that are necessarily infringed by the Package. If you institute patent litigation (including a cross-claim or counterclaim) against any party alleging that the Package constitutes direct or contributory patent infringement, then this Artistic License to you shall terminate on the date that such litigation is filed.
(14) Disclaimer of Warranty:
THE PACKAGE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES. THE IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT ARE DISCLAIMED TO THE EXTENT PERMITTED BY YOUR LOCAL LAW. UNLESS REQUIRED BY LAW, NO COPYRIGHT HOLDER OR CONTRIBUTOR WILL BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING IN ANY WAY OUT OF THE USE OF THE PACKAGE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

5
Makefile Normal file
View File

@ -0,0 +1,5 @@
run:
nim --hints:off --warnings:off r src/test.nim
pretty:
nimpretty src/*.nim src/backend/*.nim src/frontend/*.nim src/frontend/meta/*.nim src/memory/*.nim src/util/*.nim

View File

@ -1,3 +1,79 @@
# peon
# The peon programming language
Peon is a simple, functional, async-first programming language with a focus on correctness and speed
Peon is a simple, functional, async-first programming language with a focus on correctness and speed.
[Go to the Manual](docs/manual.md)
## Project structure
- `src/` -> Contains the entirety of peon's toolchain
- `src/memory/` -> Contains peon's memory allocator and GC (TODO)
- `src/frontend/` -> Contains the tokenizer, parser and compiler
- `src/frontend/meta/` -> Contains shared error definitions, AST node and token
declarations as well as the bytecode used by the compiler
- `src/backend/` -> Contains the peon VM and type system
- `src/util/` -> Contains utilities such as the bytecode debugger and serializer as well
as procedures to handle multi-byte sequences
- `src/config.nim` -> Contains compile-time configuration variables
- `src/main.nim` -> Ties up the whole toolchain together by tokenizing,
parsing, compiling, debugging, (de-)serializing and executing peon code
- `docs/` -> Contains documentation for various components of peon (bytecode, syntax, etc.)
- `tests/` -> Contains tests (both in peon and Nim) for the toolchain
## Credits
- Araq, for creating the amazing language that is [Nim](https://nim-lang.org)
- The Nim community and contributors, for making Nim what it is today
- Bob Nystrom, for his amazing [book](https://craftinginterpreters.com) that inspired me
and taught me how to actually make a programming language
- [Njsmith](https://vorpus.org/), for his awesome articles on structured concurrency
## Project State
**Disclaimer**: The project is still in its very early days: lots of stuff is not implemented, a work in progress or
otherwise outright broken. Feel free to report bugs!
Also, yes: peon is yet another programming language inspired by Bob's book, but it is also **very**
different from Lox, which is an object-oriented, dynamically typed and very high level programming language, whereas
peon is a statically-typed, functional language which aims to allow low-level interfacing with C and Nim code while
being a breeze to use.
Also, peon will feature [structured concurrency](https://vorpus.org/blog/notes-on-structured-concurrency-or-go-statement-considered-harmful/) with coroutines (think Futures/Fibers but without
callback hell). Since, unlike Lox, peon isn't a toy language, there's obviously plans to implement creature comforts
like an import system, exception handling, a package manager, etc.
### TODO List
In no particular order, here's a list of stuff that's done/to do (might be incomplete/out of date):
Toolchain:
- Tokenizer (with dynamic symbol table) [x]
- Parser (with support for custom operators, even builtins) [x]
- Compiler [ ] (Work in Progress)
- VM [ ] (Work in Progress)
- Bytecode (de-)serializer [x]
- Static code debugger [x]
- Runtime debugger/inspection tool [ ]
Type system:
- Custom types [ ]
- Intrinsics [x]
- Generics [ ] (Work in Progress)
- Function calls [ ] (Work in Progress)
Misc:
- Pragmas [ ] (Work in Progress)
- Attribute resolution [ ]
- ... More?
## The name
The name for peon comes from my and [Productive2's](https://git.nocturn9x.space/prod2) genius and is a result of shortening
the name of the fastest animal on earth: the **Pe**regrine Falc**on**. I guess I wanted this to mean peon will be blazing fast

72
docs/bytecode.md Normal file
View File

@ -0,0 +1,72 @@
# Peon - Bytecode Specification
This document aims to document peon's bytecode as well as how it is (de-)serialized to/from files and
other file-like objects.
## Code Structure
A peon program is compiled into a tightly packed sequence of bytes that contain all the necessary information
the VM needs to execute said program. There is no dependence between the frontend and the backend outside of the
bytecode format (which is implemented in a separate serialiazer module) to allow for maximum modularity.
A peon bytecode dump contains:
- Constants
- The bytecode itself
- Debugging information
- File and version metadata
## Encoding
### Header
A peon bytecode file starts with the header, which is structured as follows:
- The literal string `PEON_BYTECODE`
- A 3-byte version number (the major, minor and patch versions of the compiler that generated the file as per the SemVer versioning standard)
- The branch name of the repository the compiler was built from, prepended with its length as a 1 byte integer
- The full commit hash (encoded as a 40-byte hex-encoded string) in the aforementioned branch from which the compiler was built from (particularly useful in development builds)
- An 8-byte UNIX timestamp (with Epoch 0 starting at 1/1/1970 12:00 AM) representing the exact date and time of when the file was generated
- A 32-byte, hex-encoded SHA256 hash of the source file's content, used to track file changes
### Line data section
The line data section contains information about each instruction in the code section and associates them
1:1 with a line number in the original source file for easier debugging using run-length encoding. The section's
size is fixed and is encoded at the beginning as a sequence of 4 bytes (i.e. a single 32 bit integer). The data
in this section can be decoded as explained in [this file](../src/frontend/meta/bytecode.nim#L28), which is quoted
below:
```
[...]
## lines maps bytecode instructions to line numbers using Run
## Length Encoding. Instructions are encoded in groups whose structure
## follows the following schema:
## - The first integer represents the line number
## - The second integer represents the count of whatever comes after it
## (let's call it c)
## - After c, a sequence of c integers follows
##
## A visual representation may be easier to understand: [1, 2, 3, 4]
## This is to be interpreted as "there are 2 instructions at line 1 whose values
## are 3 and 4"
## This is more efficient than using the naive approach, which would encode
## the same line number multiple times and waste considerable amounts of space.
[...]
```
### Constant section
The constant section contains all the read-only values that the code will need at runtime, such as hardcoded
variable initializers or constant expressions. It is similar to the `.rodata` section of Assembly files, although
the implementation is different. Constants are encoded as a linear sequence of bytes with no type information about
them whatsoever: it is the code that, at runtime, loads each constant (whose type is determined at compile time) onto
the stack accordingly. For example, a 32 bit integer constant would be encoded as a sequence of 4 bytes, which would
then be loaded by the appropriate `LoadInt32` instruction at runtime. The section's size is fixed and is encoded at
the beginning as a sequence of 4 bytes (i.e. a single 32 bit integer). The constant section may be empty, although in
real-world scenarios it's unlikely that it would.
### Code section
The code section contains the linear sequence of bytecode instructions of a peon program. It is to be read directly
and without modifications. The section's size is fixed and is encoded at the beginning as a sequence of 3 bytes
(i.e. a single 24 bit integer).

1
docs/grammar.md Normal file
View File

@ -0,0 +1 @@
# TODO

188
docs/manual.md Normal file
View File

@ -0,0 +1,188 @@
# Peon - Manual
Peon is a functional, statically typed, garbage-collected, C-like programming language with
a focus on speed and correctness, but whose main feature is the ability to natively
perform highly efficient parallel I/O operations by implementing the [structured concurrency](https://vorpus.org/blog/notes-on-structured-concurrency-or-go-statement-considered-harmful/)
paradigm.
__Note__: Peon is currently a WIP (Work In Progress), and much of the content of this manual is purely theoretical as
of now. If you want to help make this into a reality, feel free to contribute!
## Table of contents
- [Manual](#peon---manual)
- [Design Goals](#design-goals)
- [Examples](#peon-by-example)
- [Grammar](grammar.md)
- [Bytecode](bytecode.md)
## Design Goals
While peon is inspired from Bob Nystrom's [book](https://craftinginterpreters.com), where he describes a simple toy language
named Lox, the aspiration for it is to become a programming language that could actually be used in the real world. For that
to happen, we need:
- Exceptions (`try/except/finally`)
- An import system (with namespaces, like Python)
- Multithreading support (with a global VM lock when GC'ing)
- Built-in collections (list, tuple, set, etc.)
- Coroutines (w/ structured concurrency)
- Generators
- Generics
- C/Nim FFI
- A package manager
Peon ~~steals~~ borrows many ideas from Python and Nim (the latter being the language peon itself is written in).
## Peon by Example
Here follow a few examples of peon code to make it clear what the end product should look like
### Variable declarations
```
var x = 5; # Inferred type is int64
var y = 3'u16; # Type is specified as uint16
x = 6; # Works: type matches
x = 3.0; # Cannot assign float64 to x
var x = 3.14; # Cannot re-declare x
```
__Note__: Peon supports [name stropping](https://en.wikipedia.org/wiki/Stropping_(syntax)), meaning
that almost any ASCII sequence of characters can be used as an identifier, including language
keywords, but stropped names need to be enclosed by matching pairs of backticks (`\``)
### Functions
```
fn fib(n: int): int {
if (n < 3) {
return n;
}
return fib(n - 1) + fib(n - 2);
}
fib(30);
```
### Type declarations
```
type Foo = object { # Can also be "ref object" for reference types (managed automatically)
fieldOne*: int # Asterisk means the field is public outside the current module
fieldTwo*: int
}
```
### Operator overloading
```
operator `+`(a, b: Foo) {
return Foo(fieldOne: a.fieldOne + b.fieldOne, fieldTwo: a.fieldTwo + b.fieldTwo);
}
Foo(fieldOne: 1, fieldTwo: 3) + Foo(fieldOne: 2, fieldTwo: 3); # Foo(fieldOne: 3, fieldTwo: 6)
```
__Note__: Custom operators (e.g. `foo`) can also be defined! The backticks around the plus sign serve to mark it
as an identifier instead of a symbol (which is a requirement for function names, since operators are basically
functions). In fact, even the built-in peon operators are implemented partially in peon (well, their forward
declarations are) and they are then specialized in the compiler to emit a single bytecode instruction.
### Function calls
```
foo(1, 2 + 3, 3.14, bar(baz));
```
__Note__: Operators can be called as functions too. Just wrap their name in backticks, like so:
```
`+`(1, 2)
```
__Note__: Code the likes of `a.b()` is desugared to `b(a)` if there exists a function `b` whose
signature is compatible with the value of of `a` (assuming `a` doesn't have a `b` field, in
which case the attribute resolution takes precedence)
### Generic declarations
```
fn genericSum[T](a, b: T): T { # Note: "a, b: T" means that both a and b are of type T
return a + b;
}
# This allows for a single implementation to be
# re-used multiple times without any code duplication!
genericSum(1, 2);
genericSum(3.14, 0.1);
genericSum(1'u8, 250'u8);
```
#### Multiple generics
```
fn genericSth[T, K](a: T, b: K) { # Note: no return type == void function!
# code...
}
genericSth(1, 3.0);
```
__Note__: The `*` modifier to make a name visible outside the current module must be put
__before__ generics declarations, so only `fn foo*[T](a: T) {}` is the correct syntax
### Forward declarations
```
fn someF: int; # Semicolon, no body!
someF(); # This works!
fn someF: int {
return 42;
}
```
### Generators
```
generator count(n: int): int {
while (n > 0) {
yield n;
n -= 1;
}
}
foreach (n: count(10)) {
print(n);
}
```
### Coroutines
```
import concur;
import http;
coroutine req(url: string): string {
return (await http.AsyncClient().get(url)).content;
}
coroutine main(urls: list[string]) {
pool = concur.pool(); # Creates a task pool: like a nursery in njsmith's article
for (var i = 0; i < urls.len(); i += 1) {
pool.spawn(req, urls[i]);
}
# The pool has internal machinery that makes the parent
# task wait until all child exit! When this function
# returns, ALL child tasks will have exited somehow
}
concur.run(main, newList[string]("https://google.com", "https://debian.org"))
```

BIN
peon Executable file

Binary file not shown.

54
src/backend/types.nim Normal file
View File

@ -0,0 +1,54 @@
# Copyright 2022 Mattia Giambirtone & All Contributors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
type
ObjectKind* = enum
## Enumeration of Peon
## types
Int8, UInt8, Int16, UInt16, Int32,
UInt32, Int64, UInt64, Float32, Float64,
Char, Byte, String, Function, CustomType,
Nil, Nan, Bool, Inf
PeonObject* = object
## A generic Peon object
case kind*: ObjectKind:
of Bool:
boolean*: bool
of Inf:
positive*: bool
of Byte:
`byte`*: byte
of Int8:
tiny*: uint8
of UInt8:
uTiny*: uint8
of Int16:
short*: int16
of UInt16:
uShort*: uint16
of Int32:
`int`*: int32
of UInt32:
uInt*: uint32
of Int64:
long*: int64
of UInt64:
uLong*: uint64
of Nil, Nan:
discard
of CustomType:
fields*: seq[PeonObject]
else:
discard # TODO

312
src/backend/vm.nim Normal file
View File

@ -0,0 +1,312 @@
# Copyright 2022 Mattia Giambirtone & All Contributors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
## The Peon runtime environment
import types
import strformat
import ../config
import ../frontend/meta/bytecode
import ../util/multibyte
type
PeonVM* = ref object
## The Peon Virtual Machine
stack: seq[PeonObject]
ip: int # Instruction pointer
cache: array[6, PeonObject] # Singletons cache
chunk: Chunk # Piece of bytecode to execute
frames: seq[int] # Stores the initial index of stack frames
heapVars: seq[PeonObject] # Stores variables that do not have stack semantics (i.e. "static")
proc initCache*(self: PeonVM) =
## Initializes the VM's
## singletons cache
self.cache[0] = PeonObject(kind: Nil)
self.cache[1] = PeonObject(kind: Bool, boolean: true)
self.cache[2] = PeonObject(kind: Bool, boolean: false)
self.cache[3] = PeonObject(kind: ObjectKind.Inf, positive: true)
self.cache[4] = PeonObject(kind: ObjectKind.Inf, positive: false)
self.cache[5] = PeonObject(kind: ObjectKind.Nan)
proc newPeonVM*: PeonVM =
## Initializes a new, blank VM
## for executing Peon bytecode
new(result)
result.ip = 0
result.frames = @[]
result.stack = newSeq[PeonObject]()
result.initCache()
## Getters for singleton types (they are cached!)
proc getNil*(self: PeonVM): PeonObject = self.cache[0]
proc getBool*(self: PeonVM, value: bool): PeonObject =
if value:
return self.cache[1]
return self.cache[2]
proc getInf*(self: PeonVM, positive: bool): PeonObject =
if positive:
return self.cache[3]
return self.cache[4]
proc getNan*(self: PeonVM): PeonObject = self.cache[5]
## Stack primitives. Note: all stack accessing that goes
## through the get/set wrappers is frame-relative, meaning
## that the index is added to the current stack frame's
## bottom to obtain an absolute stack index.
proc push(self: PeonVM, obj: PeonObject) =
## Pushes a Peon object onto the
## stack
self.stack.add(obj)
proc pop(self: PeonVM): PeonObject =
## Pops a Peon object off the
## stack, decreasing the stack
## pointer. The object is returned
return self.stack.pop()
proc peek(self: PeonVM): PeonObject =
## Returns the Peon object at the top
## of the stack without consuming
## it
return self.stack[^1]
proc get(self: PeonVM, idx: int): PeonObject =
## Accessor method that abstracts
## stack accessing through stack
## frames
return self.stack[idx + self.frames[^1]]
proc set(self: PeonVM, idx: int, val: PeonObject) =
## Setter method that abstracts
## stack accessing through stack
## frames
self.stack[idx + self.frames[^1]] = val
proc readByte(self: PeonVM): uint8 =
## Reads a single byte from the
## bytecode and returns it as an
## unsigned 8 bit integer
inc(self.ip)
return self.chunk.code[self.ip - 1]
proc readShort(self: PeonVM): uint16 =
## Reads two bytes from the
## bytecode and returns them
## as an unsigned 16 bit
## integer
return [self.readByte(), self.readByte()].fromDouble()
proc readLong(self: PeonVM): uint32 =
## Reads three bytes from the
## bytecode and returns them
## as an unsigned 32 bit
## integer. Note however that
## the boundary is capped at
## 24 bits instead of 32
return uint32([self.readByte(), self.readByte(), self.readByte()].fromTriple())
proc readInt64(self: PeonVM, idx: int): PeonObject =
## Reads a constant from the
## chunk's constant table and
## returns a Peon object. Assumes
## the constant is an Int64
var arr = [self.chunk.consts[idx], self.chunk.consts[idx + 1],
self.chunk.consts[idx + 2], self.chunk.consts[idx + 3],
self.chunk.consts[idx + 4], self.chunk.consts[idx + 5],
self.chunk.consts[idx + 6], self.chunk.consts[idx + 7],
]
result = PeonObject(kind: Int64)
copyMem(result.long.addr, arr.addr, sizeof(arr))
proc readUInt64(self: PeonVM, idx: int): PeonObject =
## Reads a constant from the
## chunk's constant table and
## returns a Peon object. Assumes
## the constant is an UInt64
var arr = [self.chunk.consts[idx], self.chunk.consts[idx + 1],
self.chunk.consts[idx + 2], self.chunk.consts[idx + 3],
self.chunk.consts[idx + 4], self.chunk.consts[idx + 5],
self.chunk.consts[idx + 6], self.chunk.consts[idx + 7],
]
result = PeonObject(kind: UInt64)
copyMem(result.uLong.addr, arr.addr, sizeof(arr))
proc readUInt32(self: PeonVM, idx: int): PeonObject =
## Reads a constant from the
## chunk's constant table and
## returns a Peon object. Assumes
## the constant is an UInt32
var arr = [self.chunk.consts[idx], self.chunk.consts[idx + 1],
self.chunk.consts[idx + 2], self.chunk.consts[idx + 3]]
result = PeonObject(kind: UInt32)
copyMem(result.uInt.addr, arr.addr, sizeof(arr))
proc readInt32(self: PeonVM, idx: int): PeonObject =
## Reads a constant from the
## chunk's constant table and
## returns a Peon object. Assumes
## the constant is an Int32
var arr = [self.chunk.consts[idx], self.chunk.consts[idx + 1],
self.chunk.consts[idx + 2], self.chunk.consts[idx + 3]]
result = PeonObject(kind: Int32)
copyMem(result.`int`.addr, arr.addr, sizeof(arr))
proc dispatch*(self: PeonVM) =
## Main bytecode dispatch loop
var instruction: OpCode
while true:
instruction = OpCode(self.readByte())
when DEBUG_TRACE_VM:
echo &"IP: {self.ip}"
echo &"SP: {self.stack.high()}"
echo &"Stack: {self.stack}"
echo &"Instruction: {instruction}"
discard readLine stdin
case instruction:
# Constant loading
of LoadTrue:
self.push(self.getBool(true))
of LoadFalse:
self.push(self.getBool(false))
of LoadNan:
self.push(self.getNan())
of LoadNil:
self.push(self.getNil())
of LoadInf:
self.push(self.getInf(true))
of LoadInt64:
self.push(self.readInt64(int(self.readLong())))
of LoadUInt64:
self.push(self.readUInt64(int(self.readLong())))
of LoadUInt32:
self.push(self.readUInt32(int(self.readLong())))
of Call:
# Calls a function. The calling convention for peon
# functions is pretty simple: the return address sits
# at the bottom of the stack frame, then follow the
# arguments and all temporaries/local variables
let newIp = self.readLong()
# We do this because if we immediately changed
# the instruction pointer, we'd read the wrong
# value for the argument count. Storing it and
# changing it later fixes this issue
self.frames.add(int(self.readLong()))
self.ip = int(newIp)
of OpCode.Return:
# Returns from a void function or terminates the
# program entirely if we're at the topmost frame
if self.frames.len() > 1:
let frame = self.frames.pop()
for i in countdown(self.stack.high(), frame):
discard self.pop()
self.ip = int(self.pop().uInt)
else:
return
of ReturnValue:
# Returns from a function which has a return value,
# pushing it on the stack
let retVal = self.pop()
let frame = self.frames.pop()
for i in countdown(self.stack.high(), frame):
discard self.pop()
self.ip = int(self.pop().uInt)
self.push(retVal)
of StoreVar:
# Stores the value at the top of the stack
# into the given stack index
self.set(int(self.readLong()), self.pop())
of StoreHeap:
self.heapVars.add(self.pop())
of LoadHeap:
self.push(self.heapVars[self.readLong()])
of LoadVar:
self.push(self.get(int(self.readLong())))
of NoOp:
continue
of Pop:
discard self.pop()
of PopN:
for _ in 0..<int(self.readLong()):
discard self.pop()
of Jump:
self.ip = int(self.readShort())
of JumpForwards:
self.ip += int(self.readShort())
of JumpBackwards:
self.ip -= int(self.readShort())
of JumpIfFalse:
if not self.peek().boolean:
self.ip += int(self.readShort())
of JumpIfTrue:
if self.peek().boolean:
self.ip += int(self.readShort())
of JumpIfFalsePop:
if not self.peek().boolean:
self.ip += int(self.readShort())
discard self.pop()
of JumpIfFalseOrPop:
if not self.peek().boolean:
self.ip += int(self.readShort())
else:
discard self.pop()
of LongJumpIfFalse:
if not self.peek().boolean:
self.ip += int(self.readLong())
of LongJumpIfFalsePop:
if not self.peek().boolean:
self.ip += int(self.readLong())
discard self.pop()
of LongJumpForwards:
self.ip += int(self.readLong())
of LongJumpBackwards:
self.ip -= int(self.readLong())
of LongJump:
self.ip = int(self.readLong())
of LongJumpIfFalseOrPop:
if not self.peek().boolean:
self.ip += int(self.readLong())
else:
discard self.pop()
else:
discard
proc run*(self: PeonVM, chunk: Chunk) =
## Executes a piece of Peon bytecode.
self.chunk = chunk
self.frames = @[0]
self.stack = @[]
self.ip = 0
self.dispatch()

55
src/config.nim Normal file
View File

@ -0,0 +1,55 @@
# Copyright 2022 Mattia Giambirtone & All Contributors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import strformat
const BYTECODE_MARKER* = "PEON_BYTECODE"
const HEAP_GROW_FACTOR* = 2 # How much extra memory to allocate for dynamic arrays and garbage collection when resizing
when HEAP_GROW_FACTOR <= 1:
{.fatal: "Heap growth factor must be > 1".}
const PEON_VERSION* = (major: 0, minor: 4, patch: 0)
const PEON_RELEASE* = "alpha"
const PEON_COMMIT_HASH* = "ed79385e2a93100331697f26a4a90157e60ad27a"
when len(PEON_COMMIT_HASH) != 40:
{.fatal: "The git commit hash must be exactly 40 characters long".}
const PEON_BRANCH* = "master"
when len(PEON_BRANCH) > 255:
{.fatal: "The git branch name's length must be less than or equal to 255 characters".}
const DEBUG_TRACE_VM* = true # Traces VM execution
const DEBUG_TRACE_GC* = false # Traces the garbage collector (TODO)
const DEBUG_TRACE_ALLOCATION* = false # Traces memory allocation/deallocation
const DEBUG_TRACE_COMPILER* = false # Traces the compiler
const PEON_VERSION_STRING* = &"Peon {PEON_VERSION.major}.{PEON_VERSION.minor}.{PEON_VERSION.patch} {PEON_RELEASE} ({PEON_BRANCH}, {CompileDate}, {CompileTime}, {PEON_COMMIT_HASH[0..8]}) [Nim {NimVersion}] on {hostOS} ({hostCPU})"
const HELP_MESSAGE* = """The peon programming language, Copyright (C) 2022 Mattia Giambirtone & All Contributors
This program is free software, see the license distributed with this program or check
http://www.apache.org/licenses/LICENSE-2.0 for more info.
Basic usage
-----------
$ peon Opens an interactive session (REPL)
$ peon file.pn Runs the given Peon source file
Command-line options
--------------------
-h, --help Shows this help text and exits
-v, --version Prints the peon version number and exits
-s, --string Executes the passed string as if it was a file
-i, --interactive Enables interactive mode, which opens a REPL session after execution of a file or source string
-c, --nocache Disables dumping the result of bytecode compilation to files for caching
-d, --cache-delay Configures the bytecode cache invalidation threshold, in minutes (defaults to 60)
"""

1353
src/frontend/compiler.nim Normal file

File diff suppressed because it is too large Load Diff

641
src/frontend/lexer.nim Normal file
View File

@ -0,0 +1,641 @@
# Copyright 2022 Mattia Giambirtone & All Contributors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
## A simple and modular tokenizer implementation with arbitrary lookahead
## using a customizable symbol table
import strutils
import parseutils
import strformat
import tables
import meta/token
import meta/errors
export token
export errors
type
SymbolTable* = ref object
## A table of symbols used
## to lex a source file
# Although we don't parse keywords
# as symbols, but rather as identifiers,
# we keep them here for consistency
# purposes
keywords: TableRef[string, TokenType]
symbols: TableRef[string, TokenType]
Lexer* = ref object
## A lexer object
symbols*: SymbolTable
source: string
tokens: seq[Token]
line: int
start: int
current: int
file: string
lines: seq[tuple[start, stop: int]]
lastLine: int
proc newSymbolTable: SymbolTable =
new(result)
result.keywords = newTable[string, TokenType]()
result.symbols = newTable[string, TokenType]()
proc addSymbol*(self: SymbolTable, lexeme: string, token: TokenType) =
## Adds a symbol to the symbol table. Overwrites
## any previous entries
self.symbols[lexeme] = token
proc removeSymbol*(self: SymbolTable, lexeme: string) =
## Removes a symbol from the symbol table
## (does nothing if it does not exist)
self.symbols.del(lexeme)
proc addKeyword*(self: SymbolTable, lexeme: string, token: TokenType) =
## Adds a keyword to the symbol table. Overwrites
## any previous entries
self.keywords[lexeme] = token
proc removeKeyword*(self: SymbolTable, lexeme: string) =
## Removes a keyword from the symbol table
## (does nothing if it does not exist)
self.keywords.del(lexeme)
proc existsSymbol*(self: SymbolTable, lexeme: string): bool {.inline.} =
## Returns true if a given symbol exists
## in the symbol table already
lexeme in self.symbols
proc existsKeyword*(self: SymbolTable, lexeme: string): bool {.inline.} =
## Returns true if a given keyword exists
## in the symbol table already
lexeme in self.keywords
proc getToken(self: Lexer, lexeme: string): Token =
## Gets the matching token object for a given
## string according to the symbol table or
## returns nil if there's no match
let table = self.symbols
var kind = table.symbols.getOrDefault(lexeme, table.keywords.getOrDefault(
lexeme, NoMatch))
if kind == NoMatch:
return nil
new(result)
result.kind = kind
result.lexeme = self.source[self.start..<self.current]
result.line = self.line
result.pos = (start: self.start, stop: self.current)
proc getMaxSymbolSize(self: SymbolTable): int =
## Returns the maximum length of all the symbols
## currently in the table. Note that keywords are
## not symbols, they're identifiers (or at least
## are parsed the same way in Lexer.parseIdentifier)
for lexeme in self.symbols.keys():
if len(lexeme) > result:
result = len(lexeme)
proc getSymbols(self: SymbolTable, n: int): seq[string] =
## Returns all n-bytes symbols
## in the symbol table
for lexeme in self.symbols.keys():
if len(lexeme) == n:
result.add(lexeme)
# Wrappers around isDigit and isAlphanumeric for
# strings
proc isDigit(s: string): bool =
for c in s:
if not c.isDigit():
return false
return true
proc isAlphaNumeric(s: string): bool =
for c in s:
if not c.isAlphaNumeric():
return false
return true
proc incLine(self: Lexer)
# Simple public getters used for error
# formatting and whatnot
proc getStart*(self: Lexer): int = self.start
proc getFile*(self: Lexer): string = self.file
proc getCurrent*(self: Lexer): int = self.current
proc getLine*(self: Lexer): int = self.line
proc getSource*(self: Lexer): string = self.source
proc getRelPos*(self: Lexer, line: int): tuple[start, stop: int] =
if self.tokens.len() == 0 or self.tokens[^1].kind != EndOfFile:
self.incLine()
return self.lines[line - 1]
proc newLexer*(self: Lexer = nil): Lexer =
## Initializes the lexer or resets
## the state of an existing one
new(result)
if self != nil:
result = self
result.source = ""
result.tokens = @[]
result.line = 1
result.start = 0
result.current = 0
result.file = ""
result.lines = @[]
result.lastLine = 0
result.symbols = newSymbolTable()
proc done(self: Lexer): bool =
## Returns true if we reached EOF
result = self.current >= self.source.len
proc incLine(self: Lexer) =
## Increments the lexer's line
## and updates internal line
## metadata
self.lines.add((self.lastLine, self.current))
self.lastLine = self.current
self.line += 1
proc step(self: Lexer, n: int = 1): string =
## Steps n characters forward in the
## source file (default = 1). A string
## of at most n bytes is returned. If n
## exceeds EOF, the string will be shorter
while len(result) < n:
if self.done() or self.current > self.source.high():
break
else:
result.add(self.source[self.current])
inc(self.current)
proc peek(self: Lexer, distance: int = 0, length: int = 1): string =
## Returns a stream of characters of
## at most length bytes from the source
## file, starting at the given distance,
## without consuming it. The distance
## parameter may be negative to retrieve
## previously consumed tokens. If the
## distance and/or the length are beyond
## EOF (even partially), the resulting string
## will be shorter than length bytes
var i = distance
while len(result) < length:
if self.done() or self.current + i > self.source.high() or
self.current + i < 0:
break
else:
result.add(self.source[self.current + i])
inc(i)
proc error(self: Lexer, message: string) =
## Raises a lexing error with a formatted
## error message
raise LexingError(msg: message, line: self.line, file: self.file, lexeme: self.peek())
proc check(self: Lexer, s: string, distance: int = 0): bool =
## Behaves like self.match(), without consuming the
## token. False is returned if we're at EOF
## regardless of what the token to check is.
## The distance is passed directly to self.peek()
if self.done():
return false
return self.peek(distance, len(s)) == s
proc check(self: Lexer, args: openarray[string], distance: int = 0): bool =
## Calls self.check() in a loop with
## each character from the given set of
## strings and returns at the first match.
## Useful to check multiple tokens in a situation
## where only one of them may match at one time
for s in args:
if self.check(s, distance):
return true
return false
proc match(self: Lexer, s: string): bool =
## Returns true if the next len(s) bytes
## of the source file match the provided
## string. If the match is successful,
## len(s) bytes are consumed, otherwise
## false is returned
if not self.check(s):
return false
discard self.step(len(s))
return true
proc match(self: Lexer, args: openarray[string]): bool =
## Calls self.match() in a loop with
## each character from the given set of
## strings and returns at the first match.
## Useful to match multiple tokens in a situation
## where only one of them may match at one time
for s in args:
if self.match(s):
return true
return false
proc createToken(self: Lexer, tokenType: TokenType) =
## Creates a token object and adds it to the token
## list. The lexeme and position of the token are
## inferred from the current state of the tokenizer
var tok: Token = new(Token)
tok.kind = tokenType
tok.lexeme = self.source[self.start..<self.current]
tok.line = self.line
tok.pos = (start: self.start, stop: self.current)
if len(tok.lexeme) != tok.pos.stop - tok.pos.start:
self.error("invalid state: len(tok.lexeme) != tok.pos.stop - tok.pos.start (this is most likely a compiler bug!)")
self.tokens.add(tok)
proc parseEscape(self: Lexer) =
# Boring escape sequence parsing. For more info check out
# https://en.wikipedia.org/wiki/Escape_sequences_in_C.
# As of now, \u and \U are not supported, but they'll
# likely be soon. Another notable limitation is that
# \xhhh and \nnn are limited to the size of a char
# (i.e. uint8, or 256 values)
case self.peek()[0]: # We use a char instead of a string because of how case statements handle ranges with strings
# (i.e. not well, given they crash the C code generator)
of 'a':
self.source[self.current] = cast[char](0x07)
of 'b':
self.source[self.current] = cast[char](0x7f)
of 'e':
self.source[self.current] = cast[char](0x1B)
of 'f':
self.source[self.current] = cast[char](0x0C)
of 'n':
when defined(windows):
# We natively convert LF to CRLF on Windows, and
# gotta thank Microsoft for the extra boilerplate!
self.source[self.current] = cast[char](0x0D)
self.source.insert(self.current + 1, 0X0A)
when defined(darwin):
# Thanks apple, lol
self.source[self.current] = cast[char](0x0A)
when defined(linux):
self.source[self.current] = cast[char](0X0D)
of 'r':
self.source[self.current] = cast[char](0x0D)
of 't':
self.source[self.current] = cast[char](0x09)
of 'v':
self.source[self.current] = cast[char](0x0B)
of '"':
self.source[self.current] = '"'
of '\'':
self.source[self.current] = '\''
of '\\':
self.source[self.current] = cast[char](0x5C)
of '0'..'9': # This is the reason we're using char instead of string. See https://github.com/nim-lang/Nim/issues/19678
var code = ""
var value = 0
var i = self.current
while i < self.source.high() and (let c = self.source[
i].toLowerAscii(); c in '0'..'7') and len(code) < 3:
code &= self.source[i]
i += 1
assert parseOct(code, value) == code.len()
if value > uint8.high().int:
self.error("escape sequence value too large (> 255)")
self.source[self.current] = cast[char](value)
of 'u', 'U':
self.error("unicode escape sequences are not supported (yet)")
of 'x':
var code = ""
var value = 0
var i = self.current
while i < self.source.high() and (let c = self.source[
i].toLowerAscii(); c in 'a'..'f' or c in '0'..'9'):
code &= self.source[i]
i += 1
assert parseHex(code, value) == code.len()
if value > uint8.high().int:
self.error("escape sequence value too large (> 255)")
self.source[self.current] = cast[char](value)
else:
self.error(&"invalid escape sequence '\\{self.peek()}'")
proc parseString(self: Lexer, delimiter: string, mode: string = "single") =
## Parses string literals. They can be expressed using matching pairs
## of either single or double quotes. Most C-style escape sequences are
## supported, moreover, a specific prefix may be prepended
## to the string to instruct the lexer on how to parse it:
## - b -> declares a byte string, where each character is
## interpreted as an integer instead of a character
## - r -> declares a raw string literal, where escape sequences
## are not parsed and stay as-is
## - f -> declares a format string, where variables may be
## interpolated using curly braces like f"Hello, {name}!".
## Braces may be escaped using a pair of them, so to represent
## a literal "{" in an f-string, one would use {{ instead
## Multi-line strings can be declared using matching triplets of
## either single or double quotes. They can span across multiple
## lines and escape sequences in them are not parsed, like in raw
## strings, so a multi-line string prefixed with the "r" modifier
## is redundant, although multi-line byte/format strings are supported
var slen = 0
while not self.check(delimiter) and not self.done():
if self.match("\n"):
if mode == "multi":
self.incLine()
else:
self.error("unexpected EOL while parsing string literal")
if mode in ["raw", "multi"]:
discard self.step()
elif self.match("\\"):
# This madness here serves to get rid of the slash, since \x is mapped
# to a one-byte sequence but the string '\x' is actually 2 bytes (or more,
# depending on the specific escape sequence)
self.source = self.source[0..<self.current] & self.source[
self.current + 1..^1]
self.parseEscape()
if mode == "format" and self.match("{"):
if self.match("{"):
self.source = self.source[0..<self.current] & self.source[
self.current + 1..^1]
continue
while not self.check(["}", "\""]):
discard self.step()
if self.check("\""):
self.error("unclosed '{' in format string")
elif mode == "format" and self.check("}"):
if not self.check("}", 1):
self.error("unmatched '}' in format string")
else:
self.source = self.source[0..<self.current] & self.source[
self.current + 1..^1]
discard self.step()
inc(slen)
if slen > 1 and delimiter == "'":
self.error("invalid character literal (length must be one!)")
if mode == "multi":
if not self.match(delimiter.repeat(3)):
self.error("unexpected EOL while parsing multi-line string literal")
elif self.done() and self.peek(-1) != delimiter:
self.error("unexpected EOF while parsing string literal")
else:
discard self.step()
if delimiter == "\"":
self.createToken(String)
else:
self.createToken(Char)
proc parseBinary(self: Lexer) =
## Parses binary numbers
while self.peek().isDigit():
if not self.check(["0", "1"]):
self.error(&"invalid digit '{self.peek()}' in binary literal")
discard self.step()
proc parseOctal(self: Lexer) =
## Parses octal numbers
while self.peek().isDigit():
if self.peek() notin "0".."7":
self.error(&"invalid digit '{self.peek()}' in octal literal")
discard self.step()
proc parseHex(self: Lexer) =
## Parses hexadecimal numbers
while self.peek().isAlphaNumeric():
if not self.peek().isDigit() and self.peek().toLowerAscii() notin "a".."f":
self.error(&"invalid hexadecimal literal")
discard self.step()
proc parseNumber(self: Lexer) =
## Parses numeric literals, which encompass
## integers and floating point numbers.
## Floats also support scientific notation
## (i.e. 3e14), while the fractional part
## must be separated from the decimal one
## using a dot (which acts as the comma).
## Float literals such as 32.5e3 are also supported.
## The "e" for the scientific notation of floats
## is case-insensitive. Binary number literals are
## expressed using the prefix 0b, hexadecimal
## numbers with the prefix 0x and octal numbers
## with the prefix 0o. Numeric literals support
## size specifiers, like so: 10'u8, 3.14'f32
var kind: TokenType
case self.peek():
of "b":
discard self.step()
kind = Binary
self.parseBinary()
of "x":
kind = Hex
discard self.step()
self.parseHex()
of "o":
kind = Octal
discard self.step()
self.parseOctal()
else:
kind = Integer
while isDigit(self.peek()) and not self.done():
discard self.step()
if self.check(["e", "E"]):
kind = Float
discard self.step()
while self.peek().isDigit() and not self.done():
discard self.step()
elif self.check("."):
# TODO: Is there a better way?
discard self.step()
if not isDigit(self.peek()):
self.error("invalid float number literal")
kind = Float
while isDigit(self.peek()) and not self.done():
discard self.step()
if self.check(["e", "E"]):
discard self.step()
while isDigit(self.peek()) and not self.done():
discard self.step()
if self.match("'"):
# Could be a size specifier, better catch it
while (self.peek().isAlphaNumeric() or self.check("_")) and
not self.done():
discard self.step()
self.createToken(kind)
if kind == Binary:
# To make our life easier, we pad the binary number in here already
while (self.tokens[^1].lexeme.len() - 2) mod 8 != 0:
self.tokens[^1].lexeme = "0b" & "0" & self.tokens[^1].lexeme[2..^1]
proc parseBackticks(self: Lexer) =
## Parses tokens surrounded
## by backticks. This may be used
## for name stropping as well as to
## reimplement existing operators
## (e.g. +, -, etc.) without the
## parser complaining about syntax
## errors
while not self.match("`") and not self.done():
if self.peek().isAlphaNumeric() or self.symbols.existsSymbol(self.peek()):
discard self.step()
continue
self.error(&"unexpected character: '{self.peek()}'")
self.createToken(Identifier)
# Strips the backticks
self.tokens[^1].lexeme = self.tokens[^1].lexeme[1..^2]
proc parseIdentifier(self: Lexer) =
## Parses keywords and identifiers.
## Note that multi-character tokens
## (aka UTF runes) are not supported
## by design and *will* break things
while (self.peek().isAlphaNumeric() or self.check("_")) and not self.done():
discard self.step()
let name: string = self.source[self.start..<self.current]
if self.symbols.existsKeyword(name):
# It's a keyword!
self.createToken(self.symbols.keywords[name])
else:
# It's an identifier!
self.createToken(Identifier)
proc next(self: Lexer) =
## Scans a single token. This method is
## called iteratively until the source
## file reaches EOF
if self.done():
# We done boi
return
elif self.match(["\r", "\f", "\e"]):
# We skip characters we don't need
return
elif self.match(" "):
# Whitespaces
self.createToken(TokenType.Whitespace)
elif self.match("\r"):
# Tabs
self.createToken(TokenType.Tab)
elif self.match("\n"):
# New line
self.incLine()
elif self.match("`"):
# Stropped token
self.parseBackticks()
elif self.match(["\"", "'"]):
# String or character literal
var mode = "single"
if self.peek(-1) != "'" and self.check(self.peek(-1)) and self.check(
self.peek(-1), 1):
# Multiline strings start with 3 quotes
discard self.step(2)
mode = "multi"
self.parseString(self.peek(-1), mode)
elif self.peek().isDigit():
discard self.step() # Needed because parseNumber reads the next
# character to tell the base of the number
# Number literal
self.parseNumber()
elif self.peek().isAlphaNumeric() and self.check(["\"", "'"], 1):
# Prefixed string literal (i.e. f"Hi {name}!")
case self.step():
of "r":
self.parseString(self.step(), "raw")
of "b":
self.parseString(self.step(), "bytes")
of "f":
self.parseString(self.step(), "format")
else:
self.error(&"unknown string prefix '{self.peek(-1)}'")
elif self.peek().isAlphaNumeric() or self.check("_"):
# Keywords and identifiers
self.parseIdentifier()
elif self.match("#"):
# Inline comments, pragmas, etc.
while not (self.check("\n") or self.done()):
discard self.step()
self.createToken(Comment)
else:
# If none of the above conditions matched, there's a few
# other options left:
# - The token is a built-in operator, or
# - it's an expression/statement delimiter, or
# - it's not a valid token at all
# We handle all of these cases here by trying to
# match the longest sequence of characters possible
# as either an operator or a statement/expression
# delimiter, erroring out if there's no match
var n = self.symbols.getMaxSymbolSize()
while n > 0:
for symbol in self.symbols.getSymbols(n):
if self.match(symbol):
# We've found the largest possible
# match!
self.tokens.add(self.getToken(symbol))
return
dec(n)
# We just assume what we have in front of us
# is a symbol
discard self.step()
self.createToken(Symbol)
proc lex*(self: Lexer, source, file: string): seq[Token] =
## Lexes a source file, converting a stream
## of characters into a series of tokens
var symbols = self.symbols
discard self.newLexer()
self.symbols = symbols
self.source = source
self.file = file
self.lines = @[]
while not self.done():
self.next()
self.start = self.current
self.tokens.add(Token(kind: EndOfFile, lexeme: "",
line: self.line, pos: (self.current, self.current)))
self.incLine()
return self.tokens

701
src/frontend/meta/ast.nim Normal file
View File

@ -0,0 +1,701 @@
# Copyright 2022 Mattia Giambirtone & All Contributors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
## An Abstract Syntax Tree (AST) structure for our recursive-descent
## top-down parser. For more info, check out docs/grammar.md
import strformat
import strutils
import token
export token
type
NodeKind* = enum
## Enumeration of the AST
## node types, sorted by
## precedence
# Declarations
funDecl = 0'u8,
varDecl,
# Statements
forStmt, # Unused for now (for loops are compiled to while loops)
ifStmt,
returnStmt,
breakStmt,
continueStmt,
whileStmt,
forEachStmt,
blockStmt,
raiseStmt,
assertStmt,
tryStmt,
yieldStmt,
awaitStmt,
importStmt,
deferStmt,
# An expression followed by a semicolon
exprStmt,
# Expressions
assignExpr,
lambdaExpr,
awaitExpr,
yieldExpr,
setItemExpr, # Set expressions like a.b = "c"
binaryExpr,
unaryExpr,
sliceExpr,
callExpr,
getItemExpr, # Get expressions like a.b
# Primary expressions
groupingExpr, # Parenthesized expressions such as (true) and (3 + 4)
trueExpr,
falseExpr,
strExpr,
charExpr,
intExpr,
floatExpr,
hexExpr,
octExpr,
binExpr,
nilExpr,
nanExpr,
infExpr,
identExpr, # Identifier
pragmaExpr
# Here I would've rather used object variants, and in fact that's what was in
# place before, but not being able to re-declare a field of the same type in
# another case branch is kind of a deal breaker long-term, so until that is
# fixed (check out https://github.com/nim-lang/RFCs/issues/368 for more info).
# I'll stick to using inheritance instead
# Generic AST node types
ASTNode* = ref object of RootObj
## An AST node
kind*: NodeKind
# Regardless of the type of node, we keep the token in the AST node for internal usage.
# This is not shown when the node is printed, but makes it a heck of a lot easier to report
# errors accurately even deep in the compilation pipeline
token*: Token
# This weird inheritance chain is needed for the parser to
# work properly
Declaration* = ref object of ASTNode
## A declaration
pragmas*: seq[Pragma]
generics*: seq[tuple[name: IdentExpr, cond: Expression]]
Statement* = ref object of Declaration
## A statement
Expression* = ref object of Statement
## An expression
LiteralExpr* = ref object of Expression
# Using a string for literals makes it much easier to handle numeric types, as
# there is no overflow nor underflow or float precision issues during parsing.
# Numbers are just serialized as strings and then converted back to numbers
# before being passed to the VM, which also keeps the door open in the future
# to implementing bignum arithmetic that can take advantage of natively supported
# machine types, meaning that if a numeric type fits into a 64 bit signed/unsigned
# int then it is stored in such a type to save space, otherwise it is just converted
# to a bigint. Bigfloats with arbitrary-precision arithmetic would also be nice,
# although arguably less useful (and probably significantly slower than bigints)
literal*: Token
IntExpr* = ref object of LiteralExpr
OctExpr* = ref object of LiteralExpr
HexExpr* = ref object of LiteralExpr
BinExpr* = ref object of LiteralExpr
FloatExpr* = ref object of LiteralExpr
StrExpr* = ref object of LiteralExpr
CharExpr* = ref object of LiteralExpr
TrueExpr* = ref object of LiteralExpr
FalseExpr* = ref object of LiteralExpr
NilExpr* = ref object of LiteralExpr
NanExpr* = ref object of LiteralExpr
InfExpr* = ref object of LiteralExpr
IdentExpr* = ref object of Expression
name*: Token
GroupingExpr* = ref object of Expression
expression*: Expression
GetItemExpr* = ref object of Expression
obj*: Expression
name*: IdentExpr
SetItemExpr* = ref object of GetItemExpr
# Since a setItem expression is just
# a getItem one followed by an assignment,
# inheriting it from getItem makes sense
value*: Expression
CallExpr* = ref object of Expression
callee*: Expression # The object being called
arguments*: tuple[positionals: seq[Expression], keyword: seq[tuple[
name: IdentExpr, value: Expression]]]
UnaryExpr* = ref object of Expression
operator*: Token
a*: Expression
BinaryExpr* = ref object of UnaryExpr
# Binary expressions can be seen here as unary
# expressions with an extra operand so we just
# inherit from that and add a second operand
b*: Expression
YieldExpr* = ref object of Expression
expression*: Expression
AwaitExpr* = ref object of Expression
expression*: Expression
LambdaExpr* = ref object of Expression
body*: Statement
arguments*: seq[tuple[name: IdentExpr, valueType: Expression,
mutable: bool, isRef: bool, isPtr: bool]]
defaults*: seq[Expression]
isGenerator*: bool
isAsync*: bool
isPure*: bool
returnType*: Expression
hasExplicitReturn*: bool
SliceExpr* = ref object of Expression
expression*: Expression
ends*: seq[Expression]
AssignExpr* = ref object of Expression
name*: Expression
value*: Expression
ExprStmt* = ref object of Statement
expression*: Expression
ImportStmt* = ref object of Statement
moduleName*: IdentExpr
AssertStmt* = ref object of Statement
expression*: Expression
RaiseStmt* = ref object of Statement
exception*: Expression
BlockStmt* = ref object of Statement
code*: seq[Declaration]
ForStmt* = ref object of Statement
discard # Unused
ForEachStmt* = ref object of Statement
identifier*: IdentExpr
expression*: Expression
body*: Statement
DeferStmt* = ref object of Statement
expression*: Expression
TryStmt* = ref object of Statement
body*: Statement
handlers*: seq[tuple[body: Statement, exc: IdentExpr]]
finallyClause*: Statement
elseClause*: Statement
WhileStmt* = ref object of Statement
condition*: Expression
body*: Statement
AwaitStmt* = ref object of Statement
expression*: Expression
BreakStmt* = ref object of Statement
ContinueStmt* = ref object of Statement
ReturnStmt* = ref object of Statement
value*: Expression
IfStmt* = ref object of Statement
condition*: Expression
thenBranch*: Statement
elseBranch*: Statement
YieldStmt* = ref object of Statement
expression*: Expression
VarDecl* = ref object of Declaration
name*: IdentExpr
value*: Expression
isConst*: bool
isPrivate*: bool
isLet*: bool
valueType*: Expression
FunDecl* = ref object of Declaration
name*: IdentExpr
body*: Statement
arguments*: seq[tuple[name: IdentExpr, valueType: Expression,
mutable: bool, isRef: bool, isPtr: bool]]
defaults*: seq[Expression]
isAsync*: bool
isGenerator*: bool
isPrivate*: bool
isPure*: bool
returnType*: Expression
hasExplicitReturn*: bool
Pragma* = ref object of Expression
name*: IdentExpr
args*: seq[LiteralExpr]
proc isConst*(self: ASTNode): bool =
## Returns true if the given
## AST node represents a value
## of constant type. All integers,
## strings and singletons count as
## constants
case self.kind:
of intExpr, hexExpr, binExpr, octExpr, strExpr, falseExpr, trueExpr,
infExpr, nanExpr, floatExpr, nilExpr:
return true
else:
return false
proc isLiteral*(self: ASTNode): bool {.inline.} =
## Returns if the AST node represents a literal
self.kind in {intExpr, hexExpr, binExpr, octExpr,
strExpr, falseExpr, trueExpr, infExpr,
nanExpr, floatExpr, nilExpr
}
## AST node constructors
proc newASTNode*(kind: NodeKind, token: Token): ASTNode =
## Initializes a new generic ASTNode object
new(result)
result.kind = kind
result.token = token
proc newPragma*(name: IdentExpr, args: seq[LiteralExpr]): Pragma =
new(result)
result.kind = pragmaExpr
result.args = args
result.name = name
proc newIntExpr*(literal: Token): IntExpr =
result = IntExpr(kind: intExpr)
result.literal = literal
result.token = literal
proc newOctExpr*(literal: Token): OctExpr =
result = OctExpr(kind: octExpr)
result.literal = literal
result.token = literal
proc newHexExpr*(literal: Token): HexExpr =
result = HexExpr(kind: hexExpr)
result.literal = literal
result.token = literal
proc newBinExpr*(literal: Token): BinExpr =
result = BinExpr(kind: binExpr)
result.literal = literal
result.token = literal
proc newFloatExpr*(literal: Token): FloatExpr =
result = FloatExpr(kind: floatExpr)
result.literal = literal
result.token = literal
proc newTrueExpr*(token: Token): LiteralExpr = LiteralExpr(kind: trueExpr,
token: token, literal: token)
proc newFalseExpr*(token: Token): LiteralExpr = LiteralExpr(kind: falseExpr,
token: token, literal: token)
proc newNaNExpr*(token: Token): LiteralExpr = LiteralExpr(kind: nanExpr,
token: token, literal: token)
proc newNilExpr*(token: Token): LiteralExpr = LiteralExpr(kind: nilExpr,
token: token, literal: token)
proc newInfExpr*(token: Token): LiteralExpr = LiteralExpr(kind: infExpr,
token: token, literal: token)
proc newStrExpr*(literal: Token): StrExpr =
result = StrExpr(kind: strExpr)
result.literal = literal
result.token = literal
proc newCharExpr*(literal: Token): CharExpr =
result = CharExpr(kind: charExpr)
result.literal = literal
result.token = literal
proc newIdentExpr*(name: Token): IdentExpr =
result = IdentExpr(kind: identExpr)
result.name = name
result.token = name
proc newGroupingExpr*(expression: Expression, token: Token): GroupingExpr =
result = GroupingExpr(kind: groupingExpr)
result.expression = expression
result.token = token
proc newLambdaExpr*(arguments: seq[tuple[name: IdentExpr, valueType: Expression,
mutable: bool, isRef: bool, isPtr: bool]], defaults: seq[Expression],
body: Statement, isGenerator: bool, isAsync: bool, token: Token,
returnType: Expression, pragmas: seq[Pragma],
generics: seq[tuple[name: IdentExpr, cond: Expression]]): LambdaExpr =
result = LambdaExpr(kind: lambdaExpr)
result.body = body
result.arguments = arguments
result.defaults = defaults
result.isGenerator = isGenerator
result.isAsync = isAsync
result.token = token
result.returnType = returnType
result.isPure = false
result.pragmas = pragmas
result.generics = generics
proc newGetItemExpr*(obj: Expression, name: IdentExpr,
token: Token): GetItemExpr =
result = GetItemExpr(kind: getItemExpr)
result.obj = obj
result.name = name
result.token = token
proc newSetItemExpr*(obj: Expression, name: IdentExpr, value: Expression,
token: Token): SetItemExpr =
result = SetItemExpr(kind: setItemExpr)
result.obj = obj
result.name = name
result.value = value
result.token = token
proc newCallExpr*(callee: Expression, arguments: tuple[positionals: seq[
Expression], keyword: seq[tuple[name: IdentExpr, value: Expression]]],
token: Token): CallExpr =
result = CallExpr(kind: callExpr)
result.callee = callee
result.arguments = arguments
result.token = token
proc newSliceExpr*(expression: Expression, ends: seq[Expression],
token: Token): SliceExpr =
result = SliceExpr(kind: sliceExpr)
result.expression = expression
result.ends = ends
result.token = token
proc newUnaryExpr*(operator: Token, a: Expression): UnaryExpr =
result = UnaryExpr(kind: unaryExpr)
result.operator = operator
result.a = a
result.token = result.operator
proc newBinaryExpr*(a: Expression, operator: Token, b: Expression): BinaryExpr =
result = BinaryExpr(kind: binaryExpr)
result.operator = operator
result.a = a
result.b = b
result.token = operator
proc newYieldExpr*(expression: Expression, token: Token): YieldExpr =
result = YieldExpr(kind: yieldExpr)
result.expression = expression
result.token = token
proc newAssignExpr*(name: Expression, value: Expression,
token: Token): AssignExpr =
result = AssignExpr(kind: assignExpr)
result.name = name
result.value = value
result.token = token
proc newAwaitExpr*(expression: Expression, token: Token): AwaitExpr =
result = AwaitExpr(kind: awaitExpr)
result.expression = expression
result.token = token
proc newExprStmt*(expression: Expression, token: Token): ExprStmt =
result = ExprStmt(kind: exprStmt)
result.expression = expression
result.token = token
proc newImportStmt*(moduleName: IdentExpr, token: Token): ImportStmt =
result = ImportStmt(kind: importStmt)
result.moduleName = moduleName
result.token = token
proc newYieldStmt*(expression: Expression, token: Token): YieldStmt =
result = YieldStmt(kind: yieldStmt)
result.expression = expression
result.token = token
proc newAwaitStmt*(expression: Expression, token: Token): AwaitStmt =
result = AwaitStmt(kind: awaitStmt)
result.expression = expression
result.token = token
proc newAssertStmt*(expression: Expression, token: Token): AssertStmt =
result = AssertStmt(kind: assertStmt)
result.expression = expression
result.token = token
proc newDeferStmt*(expression: Expression, token: Token): DeferStmt =
result = DeferStmt(kind: deferStmt)
result.expression = expression
result.token = token
proc newRaiseStmt*(exception: Expression, token: Token): RaiseStmt =
result = RaiseStmt(kind: raiseStmt)
result.exception = exception
result.token = token
proc newTryStmt*(body: Statement, handlers: seq[tuple[body: Statement, exc: IdentExpr]],
finallyClause: Statement,
elseClause: Statement, token: Token): TryStmt =
result = TryStmt(kind: tryStmt)
result.body = body
result.handlers = handlers
result.finallyClause = finallyClause
result.elseClause = elseClause
result.token = token
proc newBlockStmt*(code: seq[Declaration], token: Token): BlockStmt =
result = BlockStmt(kind: blockStmt)
result.code = code
result.token = token
proc newWhileStmt*(condition: Expression, body: Statement,
token: Token): WhileStmt =
result = WhileStmt(kind: whileStmt)
result.condition = condition
result.body = body
result.token = token
proc newForEachStmt*(identifier: IdentExpr, expression: Expression,
body: Statement, token: Token): ForEachStmt =
result = ForEachStmt(kind: forEachStmt)
result.identifier = identifier
result.expression = expression
result.body = body
result.token = token
proc newBreakStmt*(token: Token): BreakStmt =
result = BreakStmt(kind: breakStmt)
result.token = token
proc newContinueStmt*(token: Token): ContinueStmt =
result = ContinueStmt(kind: continueStmt)
result.token = token
proc newReturnStmt*(value: Expression, token: Token): ReturnStmt =
result = ReturnStmt(kind: returnStmt)
result.value = value
result.token = token
proc newIfStmt*(condition: Expression, thenBranch, elseBranch: Statement,
token: Token): IfStmt =
result = IfStmt(kind: ifStmt)
result.condition = condition
result.thenBranch = thenBranch
result.elseBranch = elseBranch
result.token = token
proc newVarDecl*(name: IdentExpr, value: Expression, isConst: bool = false,
isPrivate: bool = true, token: Token, isLet: bool = false,
valueType: Expression, pragmas: seq[Pragma]): VarDecl =
result = VarDecl(kind: varDecl)
result.name = name
result.value = value
result.isConst = isConst
result.isPrivate = isPrivate
result.token = token
result.isLet = isLet
result.valueType = valueType
result.pragmas = pragmas
proc newFunDecl*(name: IdentExpr, arguments: seq[tuple[name: IdentExpr, valueType: Expression, mutable: bool, isRef: bool, isPtr: bool]], defaults: seq[Expression],
body: Statement, isAsync, isGenerator: bool,
isPrivate: bool, token: Token, pragmas: seq[Pragma],
returnType: Expression, generics: seq[tuple[name: IdentExpr, cond: Expression]]): FunDecl =
result = FunDecl(kind: funDecl)
result.name = name
result.arguments = arguments
result.defaults = defaults
result.body = body
result.isAsync = isAsync
result.isGenerator = isGenerator
result.isPrivate = isPrivate
result.token = token
result.pragmas = pragmas
result.returnType = returnType
result.isPure = false
result.generics = generics
proc `$`*(self: ASTNode): string =
if self == nil:
return "nil"
case self.kind:
of intExpr, floatExpr, hexExpr, binExpr, octExpr, strExpr, trueExpr,
falseExpr, nanExpr, nilExpr, infExpr:
if self.kind in {trueExpr, falseExpr, nanExpr, nilExpr, infExpr}:
result &= &"Literal({($self.kind)[0..^5]})"
elif self.kind == strExpr:
result &= &"Literal({LiteralExpr(self).literal.lexeme[1..^2].escape()})"
else:
result &= &"Literal({LiteralExpr(self).literal.lexeme})"
of identExpr:
result &= &"Identifier('{IdentExpr(self).name.lexeme}')"
of groupingExpr:
result &= &"Grouping({GroupingExpr(self).expression})"
of getItemExpr:
var self = GetItemExpr(self)
result &= &"GetItem(obj={self.obj}, name={self.name})"
of setItemExpr:
var self = SetItemExpr(self)
result &= &"SetItem(obj={self.obj}, name={self.value}, value={self.value})"
of callExpr:
var self = CallExpr(self)
result &= &"""Call({self.callee}, arguments=(positionals=[{self.arguments.positionals.join(", ")}], keyword=[{self.arguments.keyword.join(", ")}]))"""
of unaryExpr:
var self = UnaryExpr(self)
result &= &"Unary(Operator('{self.operator.lexeme}'), {self.a})"
of binaryExpr:
var self = BinaryExpr(self)
result &= &"Binary({self.a}, Operator('{self.operator.lexeme}'), {self.b})"
of assignExpr:
var self = AssignExpr(self)
result &= &"Assign(name={self.name}, value={self.value})"
of exprStmt:
var self = ExprStmt(self)
result &= &"ExpressionStatement({self.expression})"
of breakStmt:
result = "Break()"
of importStmt:
var self = ImportStmt(self)
result &= &"Import({self.moduleName})"
of assertStmt:
var self = AssertStmt(self)
result &= &"Assert({self.expression})"
of raiseStmt:
var self = RaiseStmt(self)
result &= &"Raise({self.exception})"
of blockStmt:
var self = BlockStmt(self)
result &= &"""Block([{self.code.join(", ")}])"""
of whileStmt:
var self = WhileStmt(self)
result &= &"While(condition={self.condition}, body={self.body})"
of forEachStmt:
var self = ForEachStmt(self)
result &= &"ForEach(identifier={self.identifier}, expression={self.expression}, body={self.body})"
of returnStmt:
var self = ReturnStmt(self)
result &= &"Return({self.value})"
of yieldExpr:
var self = YieldExpr(self)
result &= &"Yield({self.expression})"
of awaitExpr:
var self = AwaitExpr(self)
result &= &"Await({self.expression})"
of ifStmt:
var self = IfStmt(self)
if self.elseBranch == nil:
result &= &"If(condition={self.condition}, thenBranch={self.thenBranch}, elseBranch=nil)"
else:
result &= &"If(condition={self.condition}, thenBranch={self.thenBranch}, elseBranch={self.elseBranch})"
of yieldStmt:
var self = YieldStmt(self)
result &= &"YieldStmt({self.expression})"
of awaitStmt:
var self = AwaitStmt(self)
result &= &"AwaitStmt({self.expression})"
of varDecl:
var self = VarDecl(self)
result &= &"Var(name={self.name}, value={self.value}, const={self.isConst}, private={self.isPrivate}, type={self.valueType})"
of funDecl:
var self = FunDecl(self)
result &= &"""FunDecl(name={self.name}, body={self.body}, type={self.returnType}, arguments=[{self.arguments.join(", ")}], defaults=[{self.defaults.join(", ")}], generics=[{self.generics.join(", ")}], async={self.isAsync}, generator={self.isGenerator}, private={self.isPrivate})"""
of lambdaExpr:
var self = LambdaExpr(self)
result &= &"""Lambda(body={self.body}, type={self.returnType}, arguments=[{self.arguments.join(", ")}], defaults=[{self.defaults.join(", ")}], generator={self.isGenerator}, async={self.isAsync})"""
of deferStmt:
var self = DeferStmt(self)
result &= &"Defer({self.expression})"
of sliceExpr:
var self = SliceExpr(self)
result &= &"""Slice({self.expression}, ends=[{self.ends.join(", ")}])"""
of tryStmt:
var self = TryStmt(self)
result &= &"TryStmt(body={self.body}, handlers={self.handlers}"
if self.finallyClause != nil:
result &= &", finallyClause={self.finallyClause}"
else:
result &= ", finallyClause=nil"
if self.elseClause != nil:
result &= &", elseClause={self.elseClause}"
else:
result &= ", elseClause=nil"
result &= ")"
else:
discard
proc `==`*(self, other: IdentExpr): bool {.inline.} = self.token == other.token

View File

@ -0,0 +1,228 @@
# Copyright 2022 Mattia Giambirtone & All Contributors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
## Low level bytecode implementation details
import strutils
import strformat
import ../../util/multibyte
type
Chunk* = ref object
## A piece of bytecode.
## consts is used when serializing to/from a bytecode stream.
## code is the linear sequence of compiled bytecode instructions.
## lines maps bytecode instructions to line numbers using Run
## Length Encoding. Instructions are encoded in groups whose structure
## follows the following schema:
## - The first integer represents the line number
## - The second integer represents the count of whatever comes after it
## (let's call it c)
## - After c, a sequence of c integers follows
##
## A visual representation may be easier to understand: [1, 2, 3, 4]
## This is to be interpreted as "there are 2 instructions at line 1 whose values
## are 3 and 4"
## This is more efficient than using the naive approach, which would encode
## the same line number multiple times and waste considerable amounts of space.
consts*: seq[uint8]
code*: seq[uint8]
lines*: seq[int]
OpCode* {.pure.} = enum
## Enum of Peon's bytecode opcodes
# Note: x represents the argument
# to unary opcodes, while a and b
# represent arguments to binary
# opcodes. Other variable names (c, d, ...)
# may be used for more complex opcodes. If
# an opcode takes any arguments at runtime,
# they come from either the stack or the VM's
# closure array. Some other opcodes (e.g.
# jumps), take arguments in the form of 16
# or 24 bit numbers that are defined statically
# at compilation time into the bytecode
# These push a constant onto the stack
LoadInt64 = 0u8,
LoadUInt64,
LoadInt32,
LoadUInt32,
LoadInt16,
LoadUInt16,
LoadInt8,
LoadUInt8,
LoadFloat64,
LoadFloat32,
LoadString,
## Singleton opcodes (each of them pushes a constant singleton on the stack)
LoadNil,
LoadTrue,
LoadFalse,
LoadNan,
LoadInf,
## Basic stack operations
Pop, # Pops an element off the stack and discards it
Push, # Pushes x onto the stack
PopN, # Pops x elements off the stack (optimization for exiting local scopes which usually pop many elements)
## Name resolution/handling
LoadAttribute, # Pushes the attribute b of object a onto the stack
LoadVar, # Pushes the object at position x in the stack onto the stack
StoreVar, # Stores the value of b at position a in the stack
LoadHeap, # Pushes the object position x in the closure array onto the stack
StoreHeap, # Stores the value of b at position a in the closure array
## Looping and jumping
Jump, # Absolute, unconditional jump into the bytecode
JumpForwards, # Relative, unconditional, positive jump in the bytecode
JumpBackwards, # Relative, unconditional, negative jump in the bytecode
JumpIfFalse, # Jumps to a relative index in the bytecode if x is false
JumpIfTrue, # Jumps to a relative index in the bytecode if x is true
JumpIfFalsePop, # Like JumpIfFalse, but also pops off the stack (regardless of truthyness). Optimization for if statements
JumpIfFalseOrPop, # Jumps to an absolute index in the bytecode if x is false and pops otherwise (used for logical and)
## Long variants of jumps (they use a 24-bit operand instead of a 16-bit one)
LongJump,
LongJumpIfFalse,
LongJumpIfTrue,
LongJumpIfFalsePop,
LongJumpIfFalseOrPop,
LongJumpForwards,
LongJumpBackwards,
## Functions
Call, # Calls a function and initiates a new stack frame
Return, # Terminates the current function without popping off the stack
ReturnValue, # Pops a return value off the stack and terminates the current function
## Exception handling
Raise, # Raises exception x or re-raises active exception if x is nil
BeginTry, # Initiates an exception handling context
FinishTry, # Closes the current exception handling context
## Generators
Yield, # Yields control from a generator back to the caller
## Coroutines
Await, # Calls an asynchronous function
## Misc
Assert, # Raises an AssertionFailed exception if x is false
NoOp, # Just a no-op
# We group instructions by their operation/operand types for easier handling when debugging
# Simple instructions encompass instructions that push onto/pop off the stack unconditionally (True, False, Pop, etc.)
const simpleInstructions* = {OpCode.Return, LoadNil,
LoadTrue, LoadFalse,
LoadNan, LoadInf,
Pop, OpCode.Raise,
BeginTry, FinishTry,
OpCode.Yield, OpCode.Await,
OpCode.NoOp, OpCode.Return,
OpCode.ReturnValue}
# Constant instructions are instructions that operate on the bytecode constant table
const constantInstructions* = {LoadInt64, LoadUInt64,
LoadInt32, LoadUInt32,
LoadInt16, LoadUInt16,
LoadInt8, LoadUInt8,
LoadFloat64, LoadFloat32,
LoadString}
# Stack triple instructions operate on the stack at arbitrary offsets and pop arguments off of it in the form
# of 24 bit integers
const stackTripleInstructions* = {StoreVar, LoadVar, LoadHeap, StoreHeap}
# Stack double instructions operate on the stack at arbitrary offsets and pop arguments off of it in the form
# of 16 bit integers
const stackDoubleInstructions* = {}
# Argument double argument instructions take hardcoded arguments as 16 bit integers
const argumentDoubleInstructions* = {PopN, }
# Argument double argument instructions take hardcoded arguments as 24 bit integers
const argumentTripleInstructions* = {}
# Instructions that call functions
const callInstructions* = {Call, }
# Jump instructions jump at relative or absolute bytecode offsets
const jumpInstructions* = {Jump, LongJump, JumpIfFalse, JumpIfFalsePop,
JumpForwards, JumpBackwards,
LongJumpIfFalse, LongJumpIfFalsePop,
LongJumpForwards, LongJumpBackwards,
JumpIfTrue, LongJumpIfTrue}
proc newChunk*: Chunk =
## Initializes a new, empty chunk
result = Chunk(consts: @[], code: @[], lines: @[])
proc `$`*(self: Chunk): string = &"""Chunk(consts=[{self.consts.join(", ")}], code=[{self.code.join(", ")}], lines=[{self.lines.join(", ")}])"""
proc write*(self: Chunk, newByte: uint8, line: int) =
## Adds the given instruction at the provided line number
## to the given chunk object
assert line > 0, "line must be greater than zero"
if self.lines.high() >= 1 and self.lines[^2] == line:
self.lines[^1] += 1
else:
self.lines.add(line)
self.lines.add(1)
self.code.add(newByte)
proc write*(self: Chunk, bytes: openarray[uint8], line: int) =
## Calls write in a loop with all members of the given
## array
for cByte in bytes:
self.write(cByte, line)
proc write*(self: Chunk, newByte: OpCode, line: int) =
## Adds the given instruction at the provided line number
## to the given chunk object
self.write(uint8(newByte), line)
proc write*(self: Chunk, bytes: openarray[OpCode], line: int) =
## Calls write in a loop with all members of the given
## array
for cByte in bytes:
self.write(uint8(cByte), line)
proc getLine*(self: Chunk, idx: int): int =
## Returns the associated line of a given
## instruction index
if self.lines.len < 2:
raise newException(IndexDefect, "the chunk object is empty")
var
count: int
current: int = 0
for n in countup(0, self.lines.high(), 2):
count = self.lines[n + 1]
if idx in current - count..<current + count:
return self.lines[n]
current += count
raise newException(IndexDefect, "index out of range")
proc writeConstant*(self: Chunk, data: openarray[uint8]): array[3, uint8] =
## Writes a series of bytes to the chunk's constant
## table and returns the index of the first byte as
## an array of 3 bytes
result = self.consts.len().toTriple()
for b in data:
self.consts.add(b)

View File

@ -0,0 +1,34 @@
# Copyright 2022 Mattia Giambirtone & All Contributors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import token
import ast
type
## Nim exceptions for internal Peon failures
PeonException* = ref object of CatchableError
LexingError* = ref object of PeonException
file*: string
lexeme*: string
line*: int
ParseError* = ref object of PeonException
file*: string
token*: Token
module*: string
CompileError* = ref object of PeonException
node*: ASTNode
file*: string
module*: string
SerializationError* = ref object of PeonException
file*: string

View File

@ -0,0 +1,89 @@
# Copyright 2022 Mattia Giambirtone & All Contributors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import strformat
type
TokenType* {.pure.} = enum
## Token types enumeration
# Booleans
True, False,
# Other singleton types
Infinity, NotANumber, Nil
# Control flow statements
If, Else,
# Looping statements
While, For,
# Keywords
Function, Break, Continue,
Var, Let, Const, Return,
Coroutine, Generator, Import,
Raise, Assert, Await, Foreach,
Yield, Defer, Try, Except,
Finally, Type, Operator, Case,
Enum, From, Ptr, Ref
# Literal types
Integer, Float, String, Identifier,
Binary, Octal, Hex, Char
# Brackets, parentheses,
# operators and others
LeftParen, RightParen, # ()
LeftBrace, RightBrace, # {}
LeftBracket, RightBracket, # []
Dot, Semicolon, Comma, # . ; ,
# Miscellaneous
EndOfFile, # Marks the end of the token stream
NoMatch, # Used internally by the symbol table
Comment, # Useful for documentation comments, pragmas, etc.
Symbol, # A generic symbol
# These are not used at the moment but may be
# employed to enforce indentation or other neat
# stuff I haven't thought about yet
Whitespace,
Tab,
Token* = ref object
## A token object
kind*: TokenType # Type of the token
lexeme*: string # The lexeme associated to the token
line*: int # The line where the token appears
pos*: tuple[start, stop: int] # The absolute position in the source file
# (0-indexed and inclusive at the beginning)
proc `$`*(self: Token): string =
## Strinfifies
if self != nil:
result = &"Token(kind={self.kind}, lexeme='{$(self.lexeme)}', line={self.line}, pos=({self.pos.start}, {self.pos.stop}))"
else:
result = "nil"
proc `==`*(self, other: Token): bool =
## Returns self == other
return self.kind == other.kind and self.lexeme == other.lexeme

1155
src/frontend/parser.nim Normal file

File diff suppressed because it is too large Load Diff

359
src/main.nim Normal file
View File

@ -0,0 +1,359 @@
# Builtins & external libs
import strformat
import strutils
import terminal
import os
# Thanks art <3
import jale/editor as ed
import jale/templates
import jale/plugin/defaults
import jale/plugin/editor_history
import jale/keycodes
import jale/multiline
# Our stuff
import frontend/lexer as l
import frontend/parser as p
import frontend/compiler as c
import backend/vm as v
import util/serializer as s
# Forward declarations
proc fillSymbolTable(tokenizer: Lexer)
proc getLineEditor: LineEditor
# Handy dandy compile-time constants
const debugLexer = false
const debugParser = false
const debugCompiler = true
const debugSerializer = false
const debugRuntime = false
when debugSerializer:
import nimSHA2
import times
when debugCompiler:
import util/debugger
proc repl =
styledEcho fgMagenta, "Welcome into the peon REPL!"
var
keep = true
tokens: seq[Token] = @[]
tree: seq[Declaration] = @[]
compiled: Chunk
serialized: Serialized
tokenizer = newLexer()
parser = newParser()
compiler = newCompiler()
serializer = newSerializer()
vm = newPeonVM()
editor = getLineEditor()
input: string
tokenizer.fillSymbolTable()
editor.bindEvent(jeQuit):
stdout.styledWriteLine(fgGreen, "Goodbye!")
editor.prompt = ""
keep = false
editor.bindKey("ctrl+a"):
editor.content.home()
editor.bindKey("ctrl+e"):
editor.content.`end`()
while keep:
try:
input = editor.read()
if input.len() == 0:
continue
tokens = tokenizer.lex(input, "stdin")
if tokens.len() == 0:
continue
when debugLexer:
styledEcho fgCyan, "Tokenization step:"
for i, token in tokens:
if i == tokens.high():
# Who cares about EOF?
break
styledEcho fgGreen, "\t", $token
echo ""
tree = parser.parse(tokens, "stdin")
if tree.len() == 0:
continue
when debugParser:
styledEcho fgCyan, "Parsing step:"
for node in tree:
styledEcho fgGreen, "\t", $node
echo ""
compiled = compiler.compile(tree, "stdin")
when debugCompiler:
styledEcho fgCyan, "Compilation step:"
styledEcho fgCyan, "\tRaw byte stream: ", fgGreen, "[", fgYellow, compiled.code.join(", "), fgGreen, "]"
styledEcho fgCyan, "\tConstant table: ", fgGreen, "[", fgYellow, compiled.consts.join(", "), fgGreen, "]"
styledEcho fgCyan, "\nBytecode disassembler output below:\n"
disassembleChunk(compiled, "stdin")
echo ""
serializer.dumpFile(compiled, input, "stdin", "stdin.pbc")
serialized = serializer.loadFile("stdin.pbc")
when debugSerializer:
var hashMatches = computeSHA256(input).toHex().toLowerAscii() == serialized.fileHash
styledEcho fgCyan, "Serialization step: "
styledEcho fgBlue, &"\t- File hash: ", fgYellow, serialized.fileHash, fgBlue, " (", if hashMatches: fgGreen else: fgRed, if hashMatches: "OK" else: "Fail", fgBlue, ")"
styledEcho fgBlue, "\t- Peon version: ", fgYellow, &"{serialized.peonVer.major}.{serialized.peonVer.minor}.{serialized.peonVer.patch}", fgBlue, " (commit ", fgYellow, serialized.commitHash[0..8], fgBlue, ") on branch ", fgYellow, serialized.peonBranch
stdout.styledWriteLine(fgBlue, "\t- Compilation date & time: ", fgYellow, fromUnix(serialized.compileDate).format("d/M/yyyy HH:mm:ss"))
stdout.styledWrite(fgBlue, &"\t- Constants segment: ")
if serialized.chunk.consts == compiled.consts:
styledEcho fgGreen, "OK"
else:
styledEcho fgRed, "Corrupted"
stdout.styledWrite(fgBlue, &"\t- Code segment: ")
if serialized.chunk.code == compiled.code:
styledEcho fgGreen, "OK"
else:
styledEcho fgRed, "Corrupted"
stdout.styledWrite(fgBlue, "\t- Line info segment: ")
if serialized.chunk.lines == compiled.lines:
styledEcho fgGreen, "OK"
else:
styledEcho fgRed, "Corrupted"
when debugRuntime:
styledEcho fgCyan, "\n\nExecution step: "
vm.run(serialized.chunk)
except LexingError:
let exc = LexingError(getCurrentException())
let relPos = tokenizer.getRelPos(exc.line)
let line = tokenizer.getSource().splitLines()[exc.line - 1].strip()
stderr.styledWriteLine(fgRed, "A fatal error occurred while parsing ", fgYellow, &"'{exc.file}'", fgRed, ", module ",
fgYellow, &"'{exc.file.extractFilename()}'", fgRed, ", line ", fgYellow, $exc.line, fgRed, " at ", fgYellow, &"'{exc.lexeme}'",
fgRed, ": ", fgGreen , getCurrentExceptionMsg())
styledEcho fgBlue, "Source line: " , fgDefault, line
styledEcho fgCyan, " ".repeat(len("Source line: ")) & "^".repeat(relPos.stop - relPos.start)
except ParseError:
let exc = ParseError(getCurrentException())
let lexeme = exc.token.lexeme
let lineNo = exc.token.line
let relPos = tokenizer.getRelPos(lineNo)
let fn = parser.getCurrentFunction()
let line = tokenizer.getSource().splitLines()[lineNo - 1].strip()
var fnMsg = ""
if fn != nil and fn.kind == funDecl:
fnMsg &= &"in function '{FunDecl(fn).name.token.lexeme}'"
stderr.styledWriteLine(fgRed, "A fatal error occurred while parsing ", fgYellow, &"'{exc.file}'", fgRed, ", module ",
fgYellow, &"'{exc.file}'", fgRed, ", line ", fgYellow, $lineNo, fgRed, " at ", fgYellow, &"'{lexeme}'",
fgRed, ": ", fgGreen , getCurrentExceptionMsg())
styledEcho fgBlue, "Source line: " , fgDefault, line
styledEcho fgCyan, " ".repeat(len("Source line: ")) & "^".repeat(relPos.stop - relPos.start)
except CompileError:
let exc = CompileError(getCurrentException())
let lexeme = exc.node.token.lexeme
let lineNo = exc.node.token.line
let relPos = tokenizer.getRelPos(lineNo)
let line = tokenizer.getSource().splitLines()[lineNo - 1].strip()
var fn = compiler.getCurrentFunction()
var fnMsg = ""
if fn != nil and fn.kind == funDecl:
fnMsg &= &"in function '{FunDecl(fn).name.token.lexeme}'"
stderr.styledWriteLine(fgRed, "A fatal error occurred while compiling ", fgYellow, &"'{exc.file}'", fgRed, ", module ",
fgYellow, &"'{exc.module}'", fgRed, ", line ", fgYellow, $lineNo, fgRed, " at ", fgYellow, &"'{lexeme}'",
fgRed, ": ", fgGreen , getCurrentExceptionMsg())
styledEcho fgBlue, "Source line: " , fgDefault, line
styledEcho fgCyan, " ".repeat(len("Source line: ")) & "^".repeat(relPos.stop - relPos.start)
except SerializationError:
let exc = SerializationError(getCurrentException())
stderr.styledWriteLine(fgRed, "A fatal error occurred while (de-)serializing", fgYellow, &"'{exc.file}'", fgGreen, ": ", getCurrentExceptionMsg())
quit(0)
proc runFile(f: string) =
var
tokens: seq[Token] = @[]
tree: seq[Declaration] = @[]
compiled: Chunk
serialized: Serialized
tokenizer = newLexer()
parser = newParser()
compiler = newCompiler()
serializer = newSerializer()
vm = newPeonVM()
input: string
tokenizer.fillSymbolTable()
try:
input = readFile(f)
tokens = tokenizer.lex(input, f)
if tokens.len() == 0:
return
when debugLexer:
styledEcho fgCyan, "Tokenization step:"
for i, token in tokens:
if i == tokens.high():
# Who cares about EOF?
break
styledEcho fgGreen, "\t", $token
echo ""
tree = parser.parse(tokens, f)
if tree.len() == 0:
return
when debugParser:
styledEcho fgCyan, "Parsing step:"
for node in tree:
styledEcho fgGreen, "\t", $node
echo ""
compiled = compiler.compile(tree, f)
when debugCompiler:
styledEcho fgCyan, "Compilation step:"
styledEcho fgCyan, "\tRaw byte stream: ", fgGreen, "[", fgYellow, compiled.code.join(", "), fgGreen, "]"
styledEcho fgCyan, "\tConstant table: ", fgGreen, "[", fgYellow, compiled.consts.join(", "), fgGreen, "]"
styledEcho fgCyan, "\nBytecode disassembler output below:\n"
disassembleChunk(compiled, f)
echo ""
serializer.dumpFile(compiled, input, f, splitFile(f).name & ".pbc")
serialized = serializer.loadFile(splitFile(f).name & ".pbc")
when debugSerializer:
var hashMatches = computeSHA256(input).toHex().toLowerAscii() == serialized.fileHash
styledEcho fgCyan, "Serialization step: "
styledEcho fgBlue, &"\t- File hash: ", fgYellow, serialized.fileHash, fgBlue, " (", if hashMatches: fgGreen else: fgRed, if hashMatches: "OK" else: "Fail", fgBlue, ")"
styledEcho fgBlue, "\t- Peon version: ", fgYellow, &"{serialized.peonVer.major}.{serialized.peonVer.minor}.{serialized.peonVer.patch}", fgBlue, " (commit ", fgYellow, serialized.commitHash[0..8], fgBlue, ") on branch ", fgYellow, serialized.peonBranch
stdout.styledWriteLine(fgBlue, "\t- Compilation date & time: ", fgYellow, fromUnix(serialized.compileDate).format("d/M/yyyy HH:mm:ss"))
stdout.styledWrite(fgBlue, &"\t- Constants segment: ")
if serialized.chunk.consts == compiled.consts:
styledEcho fgGreen, "OK"
else:
styledEcho fgRed, "Corrupted"
stdout.styledWrite(fgBlue, &"\t- Code segment: ")
if serialized.chunk.code == compiled.code:
styledEcho fgGreen, "OK"
else:
styledEcho fgRed, "Corrupted"
stdout.styledWrite(fgBlue, "\t- Line info segment: ")
if serialized.chunk.lines == compiled.lines:
styledEcho fgGreen, "OK"
else:
styledEcho fgRed, "Corrupted"
when debugRuntime:
styledEcho fgCyan, "\n\nExecution step: "
vm.run(serialized.chunk)
except LexingError:
let exc = LexingError(getCurrentException())
let relPos = tokenizer.getRelPos(exc.line)
let line = tokenizer.getSource().splitLines()[exc.line - 1].strip()
stderr.styledWriteLine(fgRed, "A fatal error occurred while parsing ", fgYellow, &"'{exc.file}'", fgRed, ", module ",
fgYellow, &"'{exc.file}'", fgRed, ", line ", fgYellow, $exc.line, fgRed, " at ", fgYellow, &"'{exc.lexeme}'",
fgRed, ": ", fgGreen , getCurrentExceptionMsg())
styledEcho fgBlue, "Source line: " , fgDefault, line
styledEcho fgCyan, " ".repeat(len("Source line: ")) & "^".repeat(relPos.stop - relPos.start)
except ParseError:
let exc = ParseError(getCurrentException())
let lexeme = exc.token.lexeme
let lineNo = exc.token.line
let relPos = tokenizer.getRelPos(lineNo)
let fn = parser.getCurrentFunction()
let line = tokenizer.getSource().splitLines()[lineNo - 1].strip()
var fnMsg = ""
if fn != nil and fn.kind == funDecl:
fnMsg &= &"in function '{FunDecl(fn).name.token.lexeme}'"
stderr.styledWriteLine(fgRed, "A fatal error occurred while parsing ", fgYellow, &"'{exc.file}'", fgRed, ", module ",
fgYellow, &"'{exc.file}'", fgRed, ", line ", fgYellow, $lineNo, fgRed, " at ", fgYellow, &"'{lexeme}'",
fgRed, ": ", fgGreen , getCurrentExceptionMsg())
styledEcho fgBlue, "Source line: " , fgDefault, line
styledEcho fgCyan, " ".repeat(len("Source line: ")) & "^".repeat(relPos.stop - relPos.start)
except CompileError:
let exc = CompileError(getCurrentException())
let lexeme = exc.node.token.lexeme
let lineNo = exc.node.token.line
let relPos = tokenizer.getRelPos(lineNo)
let line = tokenizer.getSource().splitLines()[lineNo - 1].strip()
var fn = compiler.getCurrentFunction()
var fnMsg = ""
if fn != nil and fn.kind == funDecl:
fnMsg &= &"in function '{FunDecl(fn).name.token.lexeme}'"
stderr.styledWriteLine(fgRed, "A fatal error occurred while compiling ", fgYellow, &"'{exc.file}'", fgRed, ", module ",
fgYellow, &"'{exc.module}'", fgRed, ", line ", fgYellow, $lineNo, fgRed, " at ", fgYellow, &"'{lexeme}'",
fgRed, ": ", fgGreen , getCurrentExceptionMsg())
styledEcho fgBlue, "Source line: " , fgDefault, line
styledEcho fgCyan, " ".repeat(len("Source line: ")) & "^".repeat(relPos.stop - relPos.start)
except SerializationError:
let exc = SerializationError(getCurrentException())
stderr.styledWriteLine(fgRed, "A fatal error occurred while (de-)serializing", fgYellow, &"'{exc.file}'", fgGreen, ": ", getCurrentExceptionMsg())
except IOError:
stderr.styledWriteLine(fgRed, "An error occurred while trying to read ", fgYellow, &"'{f}'", fgGreen, &": {getCurrentExceptionMsg()}")
except OSError:
stderr.styledWriteLine(fgRed, "An error occurred while trying to read ", fgYellow, &"'{f}'", fgGreen, &": {osErrorMsg(osLastError())} [errno {osLastError()}]")
when isMainModule:
setControlCHook(proc () {.noconv.} = quit(0))
let args = commandLineParams()
if args.len() == 0:
repl()
else:
runFile(args[0])
proc fillSymbolTable(tokenizer: Lexer) =
## Initializes the Lexer's symbol
## table with the builtin symbols
## and keywords
# 1-byte symbols
tokenizer.symbols.addSymbol("{", LeftBrace)
tokenizer.symbols.addSymbol("}", RightBrace)
tokenizer.symbols.addSymbol("(", LeftParen)
tokenizer.symbols.addSymbol(")", RightParen)
tokenizer.symbols.addSymbol("[", LeftBracket)
tokenizer.symbols.addSymbol("]", RightBracket)
tokenizer.symbols.addSymbol(".", Dot)
tokenizer.symbols.addSymbol(",", Comma)
tokenizer.symbols.addSymbol(";", Semicolon)
# Keywords
tokenizer.symbols.addKeyword("type", TokenType.Type)
tokenizer.symbols.addKeyword("enum", Enum)
tokenizer.symbols.addKeyword("case", Case)
tokenizer.symbols.addKeyword("operator", Operator)
tokenizer.symbols.addKeyword("generator", Generator)
tokenizer.symbols.addKeyword("fn", TokenType.Function)
tokenizer.symbols.addKeyword("coroutine", Coroutine)
tokenizer.symbols.addKeyword("break", TokenType.Break)
tokenizer.symbols.addKeyword("continue", Continue)
tokenizer.symbols.addKeyword("while", While)
tokenizer.symbols.addKeyword("for", For)
tokenizer.symbols.addKeyword("foreach", Foreach)
tokenizer.symbols.addKeyword("if", If)
tokenizer.symbols.addKeyword("else", Else)
tokenizer.symbols.addKeyword("await", TokenType.Await)
tokenizer.symbols.addKeyword("defer", Defer)
tokenizer.symbols.addKeyword("try", Try)
tokenizer.symbols.addKeyword("except", Except)
tokenizer.symbols.addKeyword("finally", Finally)
tokenizer.symbols.addKeyword("raise", TokenType.Raise)
tokenizer.symbols.addKeyword("assert", TokenType.Assert)
tokenizer.symbols.addKeyword("const", Const)
tokenizer.symbols.addKeyword("let", Let)
tokenizer.symbols.addKeyword("var", Var)
tokenizer.symbols.addKeyword("import", Import)
tokenizer.symbols.addKeyword("yield", TokenType.Yield)
tokenizer.symbols.addKeyword("return", TokenType.Return)
# These are more like expressions with a reserved
# name that produce a value of a builtin type,
# but we don't need to care about that until
# we're in the parsing/ compilation steps so
# it's fine
tokenizer.symbols.addKeyword("nan", NotANumber)
tokenizer.symbols.addKeyword("inf", Infinity)
tokenizer.symbols.addKeyword("nil", TokenType.Nil)
tokenizer.symbols.addKeyword("true", True)
tokenizer.symbols.addKeyword("false", False)
tokenizer.symbols.addKeyword("ref", Ref)
tokenizer.symbols.addKeyword("ptr", Ptr)
for sym in [">", "<", "=", "~", "/", "+", "-", "_", "*", "?", "@", ":"]:
tokenizer.symbols.addSymbol(sym, Symbol)
proc getLineEditor: LineEditor =
result = newLineEditor()
result.prompt = "=> "
result.populateDefaults()
let history = result.plugHistory()
result.bindHistory(history)

87
src/memory/allocator.nim Normal file
View File

@ -0,0 +1,87 @@
# Copyright 2022 Mattia Giambirtone
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
## Memory allocator from JAPL
import segfaults
import ../config
when DEBUG_TRACE_ALLOCATION:
import strformat
proc reallocate*(p: pointer, oldSize: int, newSize: int): pointer =
## Wrapper around realloc/dealloc
try:
if newSize == 0 and p != nil:
when DEBUG_TRACE_ALLOCATION:
if oldSize > 1:
echo &"DEBUG - Memory manager: Deallocating {oldSize} bytes"
else:
echo "DEBUG - Memory manager: Deallocating 1 byte"
dealloc(p)
return nil
when DEBUG_TRACE_ALLOCATION:
if pointr == nil and newSize == 0:
echo &"DEBUG - Memory manager: Warning, asked to dealloc() nil pointer from {oldSize} to {newSize} bytes, ignoring request"
if oldSize > 0 and p != nil or oldSize == 0:
when DEBUG_TRACE_ALLOCATION:
if oldSize == 0:
if newSize > 1:
echo &"DEBUG - Memory manager: Allocating {newSize} bytes of memory"
else:
echo "DEBUG - Memory manager: Allocating 1 byte of memory"
else:
echo &"DEBUG - Memory manager: Resizing {oldSize} bytes of memory to {newSize} bytes"
result = realloc(p, newSize)
when DEBUG_TRACE_ALLOCATION:
if oldSize > 0 and pointr == nil:
echo &"DEBUG - Memory manager: Warning, asked to realloc() nil pointer from {oldSize} to {newSize} bytes, ignoring request"
except NilAccessDefect:
stderr.write("JAPL: could not manage memory, segmentation fault\n")
quit(139) # For now, there's not much we can do if we can't get the memory we need, so we exit
template resizeArray*(kind: untyped, pointr: pointer, oldCount,
newCount: int): untyped =
## Handy macro (in the C sense of macro, not nim's) to resize a dynamic array
cast[ptr UncheckedArray[kind]](reallocate(pointr, sizeof(kind) * oldCount,
sizeof(kind) * newCount))
template freeArray*(kind: untyped, pointr: pointer, oldCount: int): untyped =
## Frees a dynamic array
reallocate(pointr, sizeof(kind) * oldCount, 0)
template free*(kind: untyped, pointr: pointer): untyped =
## Frees a pointer by reallocating its
## size to 0
reallocate(pointr, sizeof(kind), 0)
template growCapacity*(capacity: int): untyped =
## Handy macro used to calculate how much
## more memory is needed when reallocating
## dynamic arrays
if capacity < 8:
8
else:
capacity * ARRAY_GROW_FACTOR
template allocate*(castTo: untyped, sizeTo: untyped, count: int): untyped =
## Allocates an object and casts its pointer to the specified type
cast[ptr castTo](reallocate(nil, 0, sizeof(sizeTo) * count))

View File

@ -0,0 +1,193 @@
## Builtin arithmetic operators for Peon
operator `+`(a, b: int): int {
#pragma[magic: AddInt64, pure]
return;
}
operator `+`(a, b: uint): uint {
#pragma[magic: AddUInt64, pure]
return;
}
operator `+`(a, b: int32): int32 {
#pragma[magic: AddInt32, pure]
return;
}
operator `+`(a, b: uint32): uint32 {
#pragma[magic: AddUInt32, pure]
return;
}
operator `+`(a, b: int16): int16 {
#pragma[magic: AddInt16, pure]
return;
}
operator `+`(a, b: uint16): uint16 {
#pragma[magic: AddUInt16, pure]
return;
}
operator `+`(a, b: int8): int8 {
#pragma[magic: AddInt8, pure]
return;
}
operator `+`(a, b: uint8): uint8 {
#pragma[magic: AddUInt8, pure]
return;
}
operator `-`(a, b: int): int {
#pragma[magic: SubInt64, pure]
return;
}
operator `-`(a, b: uint): uint {
#pragma[magic: SubUInt64, pure]
return;
}
operator `-`(a, b: int32): int32 {
#pragma[magic: SubInt32, pure]
return;
}
operator `-`(a, b: uint32): uint32 {
#pragma[magic: SubUInt32, pure]
return;
}
operator `-`(a, b: int16): int16 {
#pragma[magic: SubInt16, pure]
return;
}
operator `-`(a, b: uint16): uint16 {
#pragma[magic: SubUInt16, pure]
return;
}
operator `-`(a, b: int8): int8 {
#pragma[magic: SubInt8, pure]
return;
}
operator `-`(a, b: uint8): uint8 {
#pragma[magic: SubUInt8, pure]
return;
}
operator `*`(a, b: int): int {
#pragma[magic: MulInt64, pure]
return;
}
operator `*`(a, b: uint): uint {
#pragma[magic: MulUInt64, pure]
return;
}
operator `*`(a, b: int32): int32 {
#pragma[magic: MulInt32, pure]
return;
}
operator `*`(a, b: uint32): uint32 {
#pragma[magic: MulUInt32, pure]
return;
}
operator `*`(a, b: int16): int16 {
#pragma[magic: MulInt16, pure]
return;
}
operator `*`(a, b: uint16): uint16 {
#pragma[magic: MulUInt16, pure]
return;
}
operator `*`(a, b: int8): int8 {
#pragma[magic: MulInt8, pure]
return;
}
operator `*`(a, b: uint8): uint8 {
#pragma[magic: MulUInt8, pure]
return;
}
operator `/`(a, b: int): int {
#pragma[magic: DivInt64, pure]
return;
}
operator `/`(a, b: uint): uint {
#pragma[magic: DivUInt64, pure]
return;
}
operator `/`(a, b: int32): int32 {
#pragma[magic: DivInt32, pure]
return;
}
operator `/`(a, b: uint32): uint32 {
#pragma[magic: DivUInt32, pure]
return;
}
operator `/`(a, b: int16): int16 {
#pragma[magic: DivInt16, pure]
return;
}
operator `/`(a, b: uint16): uint16 {
#pragma[magic: DivUInt16, pure]
return;
}
operator `/`(a, b: int8): int8 {
#pragma[magic: DivInt8, pure]
return;
}
operator `/`(a, b: uint8): uint8 {
#pragma[magic: DivUInt8, pure]
return;
}

5
src/tests.pn Normal file
View File

@ -0,0 +1,5 @@
operator `+`(a: int): int {
return a;
}
+1; # Works: defined for int64

174
src/util/debugger.nim Normal file
View File

@ -0,0 +1,174 @@
# Copyright 2022 Mattia Giambirtone & All Contributors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import ../frontend/meta/bytecode
import multibyte
import strformat
import strutils
import terminal
proc nl = stdout.write("\n")
proc printDebug(s: string, newline: bool = false) =
stdout.styledWrite(fgMagenta, "DEBUG - Disassembler -> ")
stdout.styledWrite(fgGreen, s)
if newline:
nl()
proc printName(opcode: OpCode, newline: bool = false) =
stdout.styledWrite(fgRed, $opcode, " (", fgYellow, $uint8(opcode), fgRed, ")")
if newline:
nl()
proc printInstruction(instruction: OpCode, newline: bool = false) =
printDebug("Instruction: ")
printName(instruction)
if newline:
nl()
proc simpleInstruction(instruction: OpCode, offset: int): int =
printInstruction(instruction)
nl()
return offset + 1
proc stackTripleInstruction(instruction: OpCode, chunk: Chunk,
offset: int): int =
## Debugs instructions that operate on a single value on the stack using a 24-bit operand
var slot = [chunk.code[offset + 1], chunk.code[offset + 2], chunk.code[
offset + 3]].fromTriple()
printInstruction(instruction)
stdout.styledWrite(fgGreen, &", points to index ")
stdout.styledWriteLine(fgYellow, &"{slot}")
return offset + 4
proc stackDoubleInstruction(instruction: OpCode, chunk: Chunk,
offset: int): int =
## Debugs instructions that operate on a single value on the stack using a 16-bit operand
var slot = [chunk.code[offset + 1], chunk.code[offset + 2]].fromDouble()
printInstruction(instruction)
stdout.write(&", points to index ")
stdout.styledWrite(fgGreen, &", points to index ")
stdout.styledWriteLine(fgYellow, &"{slot}")
return offset + 3
proc argumentDoubleInstruction(instruction: OpCode, chunk: Chunk, offset: int): int =
## Debugs instructions that operate on a hardcoded value on the stack using a 16-bit operand
var slot = [chunk.code[offset + 1], chunk.code[offset + 2]].fromDouble()
printInstruction(instruction)
stdout.styledWrite(fgGreen, &", has argument ")
stdout.styledWriteLine(fgYellow, $slot)
return offset + 3
proc argumentTripleInstruction(instruction: OpCode, chunk: Chunk, offset: int): int =
## Debugs instructions that operate on a hardcoded value on the stack using a 24-bit operand
var slot = [chunk.code[offset + 1], chunk.code[offset + 2], chunk.code[offset + 3]].fromTriple()
printInstruction(instruction)
stdout.styledWrite(fgGreen, ", has argument ")
stdout.styledWriteLine(fgYellow, $slot)
return offset + 4
proc callInstruction(instruction: OpCode, chunk: Chunk, offset: int): int =
## Debugs function calls
var slot = [chunk.code[offset + 1], chunk.code[offset + 2], chunk.code[offset + 3]].fromTriple()
var args = [chunk.code[offset + 4], chunk.code[offset + 5], chunk.code[offset + 6]].fromTriple()
printInstruction(instruction)
stdout.styledWrite(fgGreen, &", jumps to address ", fgYellow, $slot, fgGreen, " with ", fgYellow, $args, fgGreen, " argument")
if args > 1:
stdout.styledWrite(fgYellow, "s")
nl()
return offset + 7
proc constantInstruction(instruction: OpCode, chunk: Chunk, offset: int): int =
## Debugs instructions that operate on the constant table
var constant = [chunk.code[offset + 1], chunk.code[offset + 2], chunk.code[
offset + 3]].fromTriple()
printInstruction(instruction)
stdout.styledWrite(fgGreen, &", points to constant at position ", fgYellow, $constant)
nl()
printDebug("Operand: ")
stdout.styledWriteLine(fgYellow, &"{chunk.consts[constant]}")
return offset + 4
proc jumpInstruction(instruction: OpCode, chunk: Chunk, offset: int): int =
## Debugs jumps
var jump: int
case instruction:
of Jump, JumpIfFalse, JumpIfTrue, JumpIfFalsePop, JumpForwards, JumpBackwards:
jump = [chunk.code[offset + 1], chunk.code[offset + 2]].fromDouble().int()
of LongJump, LongJumpIfFalse, LongJumpIfTrue, LongJumpIfFalsePop,
LongJumpForwards, LongJumpBackwards:
jump = [chunk.code[offset + 1], chunk.code[offset + 2], chunk.code[
offset + 3]].fromTriple().int()
else:
discard # Unreachable
printInstruction(instruction, true)
printDebug("Jump size: ")
stdout.styledWrite(fgYellow, $jump)
nl()
return offset + 3
proc disassembleInstruction*(chunk: Chunk, offset: int): int =
## Takes one bytecode instruction and prints it
printDebug("Offset: ")
stdout.styledWriteLine(fgYellow, $offset)
printDebug("Line: ")
stdout.styledWriteLine(fgYellow, &"{chunk.getLine(offset)}")
var opcode = OpCode(chunk.code[offset])
case opcode:
of simpleInstructions:
result = simpleInstruction(opcode, offset)
of constantInstructions:
result = constantInstruction(opcode, chunk, offset)
of stackDoubleInstructions:
result = stackDoubleInstruction(opcode, chunk, offset)
of stackTripleInstructions:
result = stackTripleInstruction(opcode, chunk, offset)
of argumentDoubleInstructions:
result = argumentDoubleInstruction(opcode, chunk, offset)
of argumentTripleInstructions:
result = argumentTripleInstruction(opcode, chunk, offset)
of callInstructions:
result = callInstruction(opcode, chunk, offset)
of jumpInstructions:
result = jumpInstruction(opcode, chunk, offset)
else:
echo &"DEBUG - Unknown opcode {opcode} at index {offset}"
result = offset + 1
proc disassembleChunk*(chunk: Chunk, name: string) =
## Takes a chunk of bytecode, and prints it
echo &"==== Peon Bytecode Debugger - Chunk '{name}' ====\n"
var index = 0
while index < chunk.code.len:
index = disassembleInstruction(chunk, index)
echo ""
echo &"==== Debug session ended - Chunk '{name}' ===="

61
src/util/multibyte.nim Normal file
View File

@ -0,0 +1,61 @@
# Copyright 2022 Mattia Giambirtone & All Contributors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
## Utilities to convert from/to our 16-bit and 24-bit representations
## of numbers
proc toDouble*(input: int | uint | uint16): array[2, uint8] =
## Converts an unsigned integer
## to an array[2, uint8]
result = cast[array[2, uint8]](uint16(input))
proc toTriple*(input: uint | int): array[3, uint8] =
## Converts an unsigned integer to an array[3, uint8]
result = cast[array[3, uint8]](uint(input))
proc toQuad*(input: int | uint | uint16 | uint32): array[4, uint8] =
## Converts an unsigned integer to an array[4, uint8]
result = cast[array[4, uint8]](uint(input))
proc toLong*(input: int | uint | uint16 | uint32 | uint64): array[8, uint8] =
## Converts an unsigned integer to an array[8, uint8]
result = cast[array[8, uint8]](uint(input))
proc fromDouble*(input: array[2, uint8]): uint16 =
## Rebuilds the output of toDouble into
## an uint16
copyMem(result.addr, unsafeAddr(input), sizeof(uint16))
proc fromTriple*(input: array[3, uint8]): uint =
## Rebuilds the output of toTriple into
## an uint
copyMem(result.addr, unsafeAddr(input), sizeof(uint8) * 3)
proc fromQuad*(input: array[4, uint8]): uint =
## Rebuilts the output of toQuad into
## an uint
copyMem(result.addr, unsafeAddr(input), sizeof(uint32))
proc fromLong*(input: array[8, uint8]): uint =
## Rebuilts the output of toQuad into
## an uint
copyMem(result.addr, unsafeAddr(input), sizeof(uint64))

241
src/util/serializer.nim Normal file
View File

@ -0,0 +1,241 @@
# Copyright 2022 Mattia Giambirtone & All Contributors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import ../frontend/meta/errors
import ../frontend/meta/bytecode
import ../config
import multibyte
import ../frontend/compiler
import strformat
import strutils
import nimSHA2
import times
export ast
type
Serializer* = ref object
file: string
filename: string
chunk: Chunk
Serialized* = ref object
## Wrapper returned by
## the Serializer.read*
## procedures to store
## metadata
fileHash*: string
peonVer*: tuple[major, minor, patch: int]
peonBranch*: string
commitHash*: string
compileDate*: int
chunk*: Chunk
proc `$`*(self: Serialized): string =
result = &"Serialized(fileHash={self.fileHash}, version={self.peonVer.major}.{self.peonVer.minor}.{self.peonVer.patch}, branch={self.peonBranch}), commitHash={self.commitHash}, date={self.compileDate}, chunk={self.chunk[]}"
proc error(self: Serializer, message: string) =
## Raises a formatted SerializationError exception
raise SerializationError(msg: message, file: self.filename)
proc newSerializer*(self: Serializer = nil): Serializer =
new(result)
if self != nil:
result = self
result.file = ""
result.filename = ""
result.chunk = nil
## Basic routines and helpers to convert various objects from and to to their byte representation
proc toBytes(self: Serializer, s: string): seq[byte] =
for c in s:
result.add(byte(c))
proc toBytes(self: Serializer, s: int): array[8, uint8] =
result = cast[array[8, uint8]](s)
proc toBytes(self: Serializer, d: SHA256Digest): seq[byte] =
for b in d:
result.add(b)
proc bytesToString(self: Serializer, input: seq[byte]): string =
for b in input:
result.add(char(b))
proc extend[T](s: var seq[T], a: openarray[T]) =
## Extends s with the elements of a
for e in a:
s.add(e)
proc writeHeaders(self: Serializer, stream: var seq[byte], file: string) =
## Writes the Peon bytecode headers in-place into a byte stream
stream.extend(self.toBytes(BYTECODE_MARKER))
stream.add(byte(PEON_VERSION.major))
stream.add(byte(PEON_VERSION.minor))
stream.add(byte(PEON_VERSION.patch))
stream.add(byte(len(PEON_BRANCH)))
stream.extend(self.toBytes(PEON_BRANCH))
stream.extend(self.toBytes(PEON_COMMIT_HASH))
stream.extend(self.toBytes(getTime().toUnixFloat().int()))
stream.extend(self.toBytes(computeSHA256(file)))
proc writeLineData(self: Serializer, stream: var seq[byte]) =
## Writes line information for debugging
## bytecode instructions
stream.extend(len(self.chunk.lines).toQuad())
for b in self.chunk.lines:
stream.extend(b.toTriple())
proc writeConstants(self: Serializer, stream: var seq[byte]) =
## Writes the constants table in-place into the
## given stream
stream.extend(self.chunk.consts.len().toQuad())
for constant in self.chunk.consts:
stream.add(constant)
proc writeCode(self: Serializer, stream: var seq[byte]) =
## Writes the bytecode from the given chunk to the
## given source stream
stream.extend(self.chunk.code.len.toTriple())
stream.extend(self.chunk.code)
proc readHeaders(self: Serializer, stream: seq[byte], serialized: Serialized): int =
## Reads the bytecode headers from a given stream
## of bytes
var stream = stream
if stream[0..<len(BYTECODE_MARKER)] != self.toBytes(BYTECODE_MARKER):
self.error("malformed bytecode marker")
result += len(BYTECODE_MARKER)
stream = stream[len(BYTECODE_MARKER)..^1]
serialized.peonVer = (major: int(stream[0]), minor: int(stream[1]), patch: int(stream[2]))
stream = stream[3..^1]
result += 3
let branchLength = stream[0]
stream = stream[1..^1]
result += 1
serialized.peonBranch = self.bytesToString(stream[0..<branchLength])
stream = stream[branchLength..^1]
result += int(branchLength)
serialized.commitHash = self.bytesToString(stream[0..<40]).toLowerAscii()
stream = stream[40..^1]
result += 40
serialized.compileDate = int(fromLong([stream[0], stream[1], stream[2],
stream[3], stream[4], stream[5], stream[6], stream[7]]))
stream = stream[8..^1]
result += 8
serialized.fileHash = self.bytesToString(stream[0..<32]).toHex().toLowerAscii()
result += 32
proc readLineData(self: Serializer, stream: seq[byte]): int =
## Reads line information from a stream
## of bytes
let size = [stream[0], stream[1], stream[2], stream[3]].fromQuad()
result += 4
var stream = stream[4..^1]
for i in countup(0, int(size) - 1):
self.chunk.lines.add(int([stream[0], stream[1], stream[2]].fromTriple()))
result += 3
stream = stream[3..^1]
proc readConstants(self: Serializer, stream: seq[byte]): int =
## Reads the constant table from the given stream
## of bytes
let size = [stream[0], stream[1], stream[2], stream[3]].fromQuad()
result += 4
var stream = stream[4..^1]
for i in countup(0, int(size) - 1):
self.chunk.consts.add(stream[i])
inc(result)
proc readCode(self: Serializer, stream: seq[byte]): int =
## Reads the bytecode from a given stream and writes
## it into the given chunk
let size = [stream[0], stream[1], stream[2]].fromTriple()
var stream = stream[3..^1]
for i in countup(0, int(size) - 1):
self.chunk.code.add(stream[i])
doAssert len(self.chunk.code) == int(size)
return int(size)
proc dumpBytes*(self: Serializer, chunk: Chunk, file, filename: string): seq[byte] =
## Dumps the given bytecode and file to a sequence of bytes and returns it.
## The file argument must be the actual file's content and is needed to
## compute its SHA256 hash.
self.file = file
self.filename = filename
self.chunk = chunk
self.writeHeaders(result, self.file)
self.writeLineData(result)
self.writeConstants(result)
self.writeCode(result)
proc dumpFile*(self: Serializer, chunk: Chunk, file, filename, dest: string) =
## Dumps the result of dumpBytes to a file at dest
var fp = open(dest, fmWrite)
defer: fp.close()
let data = self.dumpBytes(chunk, file, filename)
discard fp.writeBytes(data, 0, len(data))
proc loadBytes*(self: Serializer, stream: seq[byte]): Serialized =
## Loads the result from dumpBytes to a Serializer object
## for use in the VM or for inspection
discard self.newSerializer()
new(result)
result.chunk = newChunk()
self.chunk = result.chunk
var stream = stream
try:
stream = stream[self.readHeaders(stream, result)..^1]
stream = stream[self.readLineData(stream)..^1]
stream = stream[self.readConstants(stream)..^1]
stream = stream[self.readCode(stream)..^1]
except IndexDefect:
self.error("truncated bytecode stream")
except AssertionDefect:
self.error(&"corrupted bytecode stream: {getCurrentExceptionMsg()}")
proc loadFile*(self: Serializer, src: string): Serialized =
## Loads a bytecode file
var fp = open(src, fmRead)
defer: fp.close()
let size = fp.getFileSize()
var pos = 0'i64
var data: seq[byte] = newSeqOfCap[byte](size)
for _ in 0..<size:
data.add(0)
while pos < size:
discard fp.readBytes(data, pos, size)
pos = fp.getFilePos()
return self.loadBytes(data)

1
tests/test.pn Normal file
View File

@ -0,0 +1 @@
# TODO