From 76812a20911446dd8d0d6a36cb8b935344b5f6a1 Mon Sep 17 00:00:00 2001 From: Mattia Giambirtone Date: Mon, 4 Apr 2022 12:29:23 +0200 Subject: [PATCH] Initial commit from JAPL with some changes --- LICENSE | 222 ++++-- src/backend/types/arrayList.nim | 196 +++++ src/backend/types/baseObject.nim | 84 +++ src/backend/types/dispatch.nim | 48 ++ src/backend/types/floatObject.nim | 49 ++ src/backend/types/hashMap.nim | 207 ++++++ src/backend/types/intObject.nim | 40 + src/backend/types/iterable.nim | 45 ++ src/backend/types/stringObject.nim | 15 + src/backend/vm.nim | 20 + src/config.nim | 61 ++ src/frontend/compiler.nim | 1048 ++++++++++++++++++++++++++ src/frontend/lexer.nim | 574 +++++++++++++++ src/frontend/meta/ast.nim | 764 +++++++++++++++++++ src/frontend/meta/bytecode.nim | 297 ++++++++ src/frontend/meta/errors.nim | 21 + src/frontend/meta/token.nim | 86 +++ src/frontend/optimizer.nim | 402 ++++++++++ src/frontend/parser.nim | 1096 ++++++++++++++++++++++++++++ src/frontend/serializer.nim | 273 +++++++ src/main.nim | 186 +++++ src/memory/allocator.nim | 85 +++ src/util/debugger.nim | 195 +++++ src/util/multibyte.nim | 40 + 24 files changed, 6001 insertions(+), 53 deletions(-) create mode 100644 src/backend/types/arrayList.nim create mode 100644 src/backend/types/baseObject.nim create mode 100644 src/backend/types/dispatch.nim create mode 100644 src/backend/types/floatObject.nim create mode 100644 src/backend/types/hashMap.nim create mode 100644 src/backend/types/intObject.nim create mode 100644 src/backend/types/iterable.nim create mode 100644 src/backend/types/stringObject.nim create mode 100644 src/backend/vm.nim create mode 100644 src/config.nim create mode 100644 src/frontend/compiler.nim create mode 100644 src/frontend/lexer.nim create mode 100644 src/frontend/meta/ast.nim create mode 100644 src/frontend/meta/bytecode.nim create mode 100644 src/frontend/meta/errors.nim create mode 100644 src/frontend/meta/token.nim create mode 100644 src/frontend/optimizer.nim create mode 100644 src/frontend/parser.nim create mode 100644 src/frontend/serializer.nim create mode 100644 src/main.nim create mode 100644 src/memory/allocator.nim create mode 100644 src/util/debugger.nim create mode 100644 src/util/multibyte.nim diff --git a/LICENSE b/LICENSE index eb2e968..261eeb9 100644 --- a/LICENSE +++ b/LICENSE @@ -1,85 +1,201 @@ -The Artistic License 2.0 + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ -Copyright (c) 2000-2006, The Perl Foundation. + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION -Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. + 1. Definitions. -Preamble + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. -This license establishes the terms under which a given free software Package may be copied, modified, distributed, and/or redistributed. The intent is that the Copyright Holder maintains some artistic control over the development of that Package while still keeping the Package available as open source and free software. + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. -You are always permitted to make arrangements wholly outside of this license directly with the Copyright Holder of a given Package. If the terms of this license do not permit the full use that you propose to make of the Package, you should contact the Copyright Holder and seek a different licensing arrangement. + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. -Definitions + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. - "Copyright Holder" means the individual(s) or organization(s) named in the copyright notice for the entire Package. + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. - "Contributor" means any party that has contributed code or other material to the Package, in accordance with the Copyright Holder's procedures. + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. - "You" and "your" means any person who would like to copy, distribute, or modify the Package. + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). - "Package" means the collection of files distributed by the Copyright Holder, and derivatives of that collection and/or of those files. A given Package may consist of either the Standard Version, or a Modified Version. + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. - "Distribute" means providing a copy of the Package or making it accessible to anyone else, or in the case of a company or organization, to others outside of your company or organization. + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." - "Distributor Fee" means any fee that you charge for Distributing this Package or providing support for this Package to another party. It does not mean licensing fees. + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. - "Standard Version" refers to the Package if it has not been modified, or has been modified only in ways explicitly requested by the Copyright Holder. + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. - "Modified Version" means the Package, if it has been changed, and such changes were not explicitly requested by the Copyright Holder. + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. - "Original License" means this Artistic License as Distributed with the Standard Version of the Package, in its current version or as it may be modified by The Perl Foundation in the future. + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: - "Source" form means the source code, documentation source, and configuration files for the Package. + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and - "Compiled" form means the compiled bytecode, object code, binary, or any other form resulting from mechanical transformation or translation of the Source form. + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and -Permission for Use and Modification Without Distribution + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and -(1) You are permitted to use the Standard Version and create and use Modified Versions for any purpose without restriction, provided that you do not Distribute the Modified Version. + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. -Permissions for Redistribution of the Standard Version + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. -(2) You may Distribute verbatim copies of the Source form of the Standard Version of this Package in any medium without restriction, either gratis or for a Distributor Fee, provided that you duplicate all of the original copyright notices and associated disclaimers. At your discretion, such verbatim copies may or may not include a Compiled form of the Package. + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. -(3) You may apply any bug fixes, portability changes, and other modifications made available from the Copyright Holder. The resulting Package will still be considered the Standard Version, and as such will be subject to the Original License. + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. -Distribution of Modified Versions of the Package as Source + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. -(4) You may Distribute your Modified Version as Source (either gratis or for a Distributor Fee, and with or without a Compiled form of the Modified Version) provided that you clearly document how it differs from the Standard Version, including, but not limited to, documenting any non-standard features, executables, or modules, and provided that you do at least ONE of the following: + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. - (a) make the Modified Version available to the Copyright Holder of the Standard Version, under the Original License, so that the Copyright Holder may include your modifications in the Standard Version. - (b) ensure that installation of your Modified Version does not prevent the user installing or running the Standard Version. In addition, the Modified Version must bear a name that is different from the name of the Standard Version. - (c) allow anyone who receives a copy of the Modified Version to make the Source form of the Modified Version available to others under + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. - (i) the Original License or - (ii) a license that permits the licensee to freely copy, modify and redistribute the Modified Version using the same licensing terms that apply to the copy that the licensee received, and requires that the Source form of the Modified Version, and of any works derived from it, be made freely available in that license fees are prohibited but Distributor Fees are allowed. + END OF TERMS AND CONDITIONS -Distribution of Compiled Forms of the Standard Version or Modified Versions without the Source + APPENDIX: How to apply the Apache License to your work. -(5) You may Distribute Compiled forms of the Standard Version without the Source, provided that you include complete instructions on how to get the Source of the Standard Version. Such instructions must be valid at the time of your distribution. If these instructions, at any time while you are carrying out such distribution, become invalid, you must provide new instructions on demand or cease further distribution. If you provide valid instructions or cease distribution within thirty days after you become aware that the instructions are invalid, then you do not forfeit any of your rights under this license. + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. -(6) You may Distribute a Modified Version in Compiled form without the Source, provided that you comply with Section 4 with respect to the Source of the Modified Version. + Copyright [yyyy] [name of copyright owner] -Aggregating or Linking the Package + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at -(7) You may aggregate the Package (either the Standard Version or Modified Version) with other packages and Distribute the resulting aggregation provided that you do not charge a licensing fee for the Package. Distributor Fees are permitted, and licensing fees for other components in the aggregation are permitted. The terms of this license apply to the use and Distribution of the Standard or Modified Versions as included in the aggregation. + http://www.apache.org/licenses/LICENSE-2.0 -(8) You are permitted to link Modified and Standard Versions with other works, to embed the Package in a larger work of your own, or to build stand-alone binary or bytecode versions of applications that include the Package, and Distribute the result without restriction, provided the result does not expose a direct interface to the Package. - -Items That are Not Considered Part of a Modified Version - -(9) Works (including, but not limited to, modules and scripts) that merely extend or make use of the Package, do not, by themselves, cause the Package to be a Modified Version. In addition, such works are not considered parts of the Package itself, and are not subject to the terms of this license. - -General Provisions - -(10) Any use, modification, and distribution of the Standard or Modified Versions is governed by this Artistic License. By using, modifying or distributing the Package, you accept this license. Do not use, modify, or distribute the Package, if you do not accept this license. - -(11) If your Modified Version has been derived from a Modified Version made by someone other than you, you are nevertheless required to ensure that your Modified Version complies with the requirements of this license. - -(12) This license does not grant you the right to use any trademark, service mark, tradename, or logo of the Copyright Holder. - -(13) This license includes the non-exclusive, worldwide, free-of-charge patent license to make, have made, use, offer to sell, sell, import and otherwise transfer the Package with respect to any patent claims licensable by the Copyright Holder that are necessarily infringed by the Package. If you institute patent litigation (including a cross-claim or counterclaim) against any party alleging that the Package constitutes direct or contributory patent infringement, then this Artistic License to you shall terminate on the date that such litigation is filed. - -(14) Disclaimer of Warranty: -THE PACKAGE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES. THE IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT ARE DISCLAIMED TO THE EXTENT PERMITTED BY YOUR LOCAL LAW. UNLESS REQUIRED BY LAW, NO COPYRIGHT HOLDER OR CONTRIBUTOR WILL BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING IN ANY WAY OUT OF THE USE OF THE PACKAGE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/src/backend/types/arrayList.nim b/src/backend/types/arrayList.nim new file mode 100644 index 0000000..d1f41a3 --- /dev/null +++ b/src/backend/types/arrayList.nim @@ -0,0 +1,196 @@ +# Copyright 2022 Mattia Giambirtone & All Contributors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Implementation of a custom list data type for JAPL objects (used also internally by the VM) + +import iterable +import ../../memory/allocator +import baseObject + +import strformat + + +type + ArrayList*[T] = object of Iterable + ## Implementation of a simple dynamic + ## array with amortized O(1) append complexity + ## and O(1) complexity when popping/deleting + ## the last element + container: ptr UncheckedArray[T] + ArrayListIterator*[T] = object of Iterator + list: ArrayList[T] + current: int + + +proc newArrayList*[T]: ptr ArrayList[T] = + ## Allocates a new, empty array list + result = allocateObj(ArrayList[T], ObjectType.List) + result.capacity = 0 + result.container = nil + result.length = 0 + + +proc append*[T](self: ptr ArrayList[T], elem: T) = + ## Appends an object to the end of the list + ## in amortized constant time (~O(1)) + if self.capacity <= self.length: + self.capacity = growCapacity(self.capacity) + self.container = resizeArray(T, self.container, self.length, self.capacity) + self.container[self.length] = elem + self.length += 1 + + +proc pop*[T](self: ptr ArrayList[T], idx: int = -1): T = + ## Pops an item from the list. By default, the last + ## element is popped, in which case the operation's + ## time complexity is O(1). When an arbitrary element + ## is popped, the complexity rises to O(k) where k + ## is the number of elements that had to be shifted + ## by 1 to avoid empty slots + var idx = idx + if self.length == 0: + raise newException(IndexDefect, "pop from empty ArrayList") + if idx == -1: + idx = self.length - 1 + if idx notin 0..self.length - 1: + raise newException(IndexDefect, &"ArrayList index out of bounds: {idx} notin 0..{self.length - 1}") + result = self.container[idx] + if idx != self.length - 1: + for i in countup(idx, self.length - 1): + self.container[i] = self.container[i + 1] + self.capacity -= 1 + self.length -= 1 + + +proc `[]`*[T](self: ptr ArrayList[T], idx: int): T = + ## Retrieves an item from the list, in constant + ## time + if self.length == 0: + raise newException(IndexDefect, &"ArrayList index out of bounds: : {idx} notin 0..{self.length - 1}") + if idx notin 0..self.length - 1: + raise newException(IndexDefect, &"ArrayList index out of bounds: {idx} notin 0..{self.length - 1}") + result = self.container[idx] + + +proc `[]`*[T](self: ptr ArrayList[T], slice: Hslice[int, int]): ptr ArrayList[T] = + ## Retrieves a subset of the list, in O(k) time where k is the size + ## of the slice + if self.length == 0: + raise newException(IndexDefect, "ArrayList index out of bounds") + if slice.a notin 0..self.length - 1 or slice.b notin 0..self.length: + raise newException(IndexDefect, "ArrayList index out of bounds") + result = newArrayList[T]() + for i in countup(slice.a, slice.b - 1): + result.append(self.container[i]) + + +proc `[]=`*[T](self: ptr ArrayList[T], idx: int, obj: T) = + ## Assigns an object to the given index, in constant + ## time + if self.length == 0: + raise newException(IndexDefect, "ArrayList is empty") + if idx notin 0..self.length - 1: + raise newException(IndexDefect, "ArrayList index out of bounds") + self.container[idx] = obj + + +proc delete*[T](self: ptr ArrayList[T], idx: int) = + ## Deletes an object from the given index. + ## This method shares the time complexity + ## of self.pop() + if self.length == 0: + raise newException(IndexDefect, "delete from empty ArrayList") + if idx notin 0..self.length - 1: + raise newException(IndexDefect, &"ArrayList index out of bounds: {idx} notin 0..{self.length - 1}") + discard self.pop(idx) + + +proc contains*[T](self: ptr ArrayList[T], elem: T): bool = + ## Returns true if the given object is present + ## in the list, false otherwise. O(n) complexity + if self.length > 0: + for i in 0..self.length - 1: + if self[i] == elem: + return true + return false + + +proc high*[T](self: ptr ArrayList[T]): int = + ## Returns the index of the last + ## element in the list, in constant time + if self.length == 0: + raise newException(IndexDefect, "ArrayList is empty") + result = self.length - 1 + + +proc len*[T](self: ptr ArrayList[T]): int = + ## Returns the length of the list + ## in constant time + result = self.length + + +iterator pairs*[T](self: ptr ArrayList[T]): tuple[key: int, val: T] = + ## Implements pairwise iteration (similar to python's enumerate) + for i in countup(0, self.length - 1): + yield (key: i, val: self[i]) + + +iterator items*[T](self: ptr ArrayList[T]): T = + ## Implements iteration + for i in countup(0, self.length - 1): + yield self[i] + + +proc reversed*[T](self: ptr ArrayList[T], first: int = -1, last: int = 0): ptr ArrayList[T] = + ## Returns a reversed version of the given list, from first to last. + ## First defaults to -1 (the end of the list) and last defaults to 0 (the + ## beginning of the list) + var first = first + if first == -1: + first = self.length - 1 + result = newArrayList[T]() + for i in countdown(first, last): + result.append(self[i]) + + +proc extend*[T](self: ptr ArrayList[T], other: seq[T]) = + ## Iteratively calls self.append() with the elements + ## from a nim sequence + for elem in other: + self.append(elem) + + +proc extend*[T](self: ptr ArrayList[T], other: ptr ArrayList[T]) = + ## Iteratively calls self.append() with the elements + ## from another ArrayList + for elem in other: + self.append(elem) + + +proc `$`*[T](self: ptr ArrayList[T]): string = + ## Returns a string representation + ## of self + result = "[" + if self.length > 0: + for i in 0..self.length - 1: + result = result & $self.container[i] + if i < self.length - 1: + result = result & ", " + result = result & "]" + + +proc getIter*[T](self: ptr ArrayList[T]): Iterator = + ## Returns the iterator object of the + ## arraylist + result = allocate(ArrayListIterator, ) # TODO \ No newline at end of file diff --git a/src/backend/types/baseObject.nim b/src/backend/types/baseObject.nim new file mode 100644 index 0000000..d7c1059 --- /dev/null +++ b/src/backend/types/baseObject.nim @@ -0,0 +1,84 @@ +# Copyright 2022 Mattia Giambirtone & All Contributors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +## The base JAPL object + +import ../../memory/allocator + + +type + ObjectType* {.pure.} = enum + ## All the possible object types + String, Exception, Function, + Class, Module, BaseObject, + Native, Integer, Float, + Bool, NotANumber, Infinity, + Nil, List, Dict, Set, Tuple + Obj* = object of RootObj + ## The base object for all + ## JAPL types. Every object + ## in JAPL implicitly inherits + ## from this base type and extends + ## its functionality + kind*: ObjectType + hashValue*: uint64 + + +## Object constructors and allocators + +proc allocateObject*(size: int, kind: ObjectType): ptr Obj = + ## Wrapper around reallocate() to create a new generic JAPL object + result = cast[ptr Obj](reallocate(nil, 0, size)) + result.kind = kind + + +template allocateObj*(kind: untyped, objType: ObjectType): untyped = + ## Wrapper around allocateObject to cast a generic object + ## to a more specific type + cast[ptr kind](allocateObject(sizeof kind, objType)) + + +proc newObj*: ptr Obj = + ## Allocates a generic JAPL object + result = allocateObj(Obj, ObjectType.BaseObject) + result.hashValue = 0x123FFFF + + +## Default object methods implementations + +# In JAPL code, this method will be called +# stringify() +proc `$`*(self: ptr Obj): string = "" +proc stringify*(self: ptr Obj): string = $self + +proc hash*(self: ptr Obj): int64 = 0x123FFAA # Constant hash value +# I could've used mul, sub and div, but "div" is a reserved +# keyword and using `div` looks ugly. So to keep everything +# consistent I just made all names long +proc multiply*(self, other: ptr Obj): ptr Obj = nil +proc sum*(self, other: ptr Obj): ptr Obj = nil +proc divide*(self, other: ptr Obj): ptr Obj = nil +proc subtract*(self, other: ptr Obj): ptr Obj = nil +# Returns 0 if self == other, a negative number if self < other +# and a positive number if self > other. This is a convenience +# method to implement all basic comparison operators in one +# method +proc compare*(self, other: ptr Obj): ptr Obj = nil +# Specific methods for each comparison +proc equalTo*(self, other: ptr Obj): ptr Obj = nil +proc greaterThan*(self, other: ptr Obj): ptr Obj = nil +proc lessThan*(self, other: ptr Obj): ptr Obj = nil +proc greaterOrEqual*(self, other: ptr Obj): ptr Obj = nil +proc lessOrEqual*(self, other: ptr Obj): ptr Obj = nil \ No newline at end of file diff --git a/src/backend/types/dispatch.nim b/src/backend/types/dispatch.nim new file mode 100644 index 0000000..edda38f --- /dev/null +++ b/src/backend/types/dispatch.nim @@ -0,0 +1,48 @@ +# Copyright 2022 Mattia Giambirtone & All Contributors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +## Type dispatching module +import baseObject +import intObject +import floatObject + + +proc dispatch*(obj: ptr Obj, p: proc (self: ptr Obj): ptr Obj): ptr Obj = + ## Dispatches a given one-argument procedure according to + ## the provided object's runtime type and returns its result + case obj.kind: + of BaseObject: + result = p(obj) + of ObjectType.Float: + result = p(cast[ptr Float](obj)) + of ObjectType.Integer: + result = p(cast[ptr Integer](obj)) + else: + discard + + +proc dispatch*(a, b: ptr Obj, p: proc (self: ptr Obj, other: ptr Obj): ptr Obj): ptr Obj = + ## Dispatches a given two-argument procedure according to + ## the provided object's runtime type and returns its result + case a.kind: + of BaseObject: + result = p(a, b) + of ObjectType.Float: + # Further type casting for b is expected to occur later + # in the given procedure + result = p(cast[ptr Float](a), b) + of ObjectType.Integer: + result = p(cast[ptr Integer](a), b) + else: + discard \ No newline at end of file diff --git a/src/backend/types/floatObject.nim b/src/backend/types/floatObject.nim new file mode 100644 index 0000000..b6902ef --- /dev/null +++ b/src/backend/types/floatObject.nim @@ -0,0 +1,49 @@ +# Copyright 2022 Mattia Giambirtone & All Contributors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +## Implementation of integer types + +import baseObject +import lenientops + + +type Float* = object of Obj + value: float64 + + +proc newFloat*(value: float): ptr Float = + ## Initializes a new JAPL + ## float object from + ## a machine native float + result = allocateObj(Float, ObjectType.Float) + result.value = value + + +proc toNativeFloat*(self: ptr Float): float = + ## Returns the float's machine + ## native underlying value + result = self.value + + +proc `$`*(self: ptr Float): string = $self.value + + +proc hash*(self: ptr Float): int64 = + ## Implements hashing + ## for the given float + if self.value - int(self.value) == self.value: + result = int(self.value) + else: + result = 2166136261 xor int(self.value) # TODO: Improve this + result *= 16777619 \ No newline at end of file diff --git a/src/backend/types/hashMap.nim b/src/backend/types/hashMap.nim new file mode 100644 index 0000000..d4bd367 --- /dev/null +++ b/src/backend/types/hashMap.nim @@ -0,0 +1,207 @@ +# Copyright 2022 Mattia Giambirtone & All Contributors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import ../../memory/allocator +import ../../config + +import baseObject +import iterable + + +type + Entry = object + ## Low-level object to store key/value pairs. + ## Using an extra value for marking the entry as + ## a tombstone instead of something like detecting + ## tombstones as entries with null keys but full values + ## may seem wasteful. The thing is, though, that since + ## we want to implement sets on top of this hashmap and + ## the implementation of a set is *literally* a dictionary + ## with empty values and keys as the elements, this would + ## confuse our findEntry method and would force us to override + ## it to account for a different behavior. + ## Using a third field takes up more space, but saves us + ## from the hassle of rewriting code + key: ptr Obj + value: ptr Obj + tombstone: bool + HashMap* = object of Iterable + ## An associative array with O(1) lookup time, + ## similar to nim's Table type, but using raw + ## memory to be more compatible with JAPL's runtime + ## memory management + entries: ptr UncheckedArray[ptr Entry] + # This attribute counts *only* non-deleted entries + actual_length: int + + +proc newHashMap*: ptr HashMap = + ## Initializes a new, empty hashmap + result = allocateObj(HashMap, ObjectType.Dict) + result.actual_length = 0 + result.entries = nil + result.capacity = 0 + result.length = 0 + + +proc freeHashMap*(self: ptr HashMap) = + ## Frees the memory associated with the hashmap + discard freeArray(UncheckedArray[ptr Entry], self.entries, self.capacity) + self.length = 0 + self.actual_length = 0 + self.capacity = 0 + self.entries = nil + + +proc findEntry(self: ptr UncheckedArray[ptr Entry], key: ptr Obj, capacity: int): ptr Entry = + ## Low-level method used to find entries in the underlying + ## array, returns a pointer to an entry + var capacity = uint64(capacity) + var idx = uint64(key.hash()) mod capacity + while true: + result = self[idx] + if system.`==`(result.key, nil): + # We found an empty bucket + break + elif result.tombstone: + # We found a previously deleted + # entry. In this case, we need + # to make sure the tombstone + # will get overwritten when the + # user wants to add a new value + # that would replace it, BUT also + # for it to not stop our linear + # probe sequence. Hence, if the + # key of the tombstone is the same + # as the one we're looking for, + # we break out of the loop, otherwise + # we keep searching + if result.key == key: + break + elif result.key == key: + # We were looking for a specific key and + # we found it, so we also bail out + break + # If none of these conditions match, we have a collision! + # This means we can just move on to the next slot in our probe + # sequence until we find an empty slot. The way our resizing + # mechanism works makes the empty slot invariant easy to + # maintain since we increase the underlying array's size + # before we are actually full + idx = (idx + 1) mod capacity + + +proc adjustCapacity(self: ptr HashMap) = + var newCapacity = growCapacity(self.capacity) + var entries = allocate(UncheckedArray[ptr Entry], Entry, newCapacity) + var oldEntry: ptr Entry + var newEntry: ptr Entry + self.length = 0 + for x in countup(0, newCapacity - 1): + entries[x] = allocate(Entry, Entry, 1) + entries[x].tombstone = false + entries[x].key = nil + entries[x].value = nil + for x in countup(0, self.capacity - 1): + oldEntry = self.entries[x] + if not system.`==`(oldEntry.key, nil): + newEntry = entries.findEntry(oldEntry.key, newCapacity) + newEntry.key = oldEntry.key + newEntry.value = oldEntry.value + self.length += 1 + discard freeArray(UncheckedArray[ptr Entry], self.entries, self.capacity) + self.entries = entries + self.capacity = newCapacity + + +proc setEntry(self: ptr HashMap, key: ptr Obj, value: ptr Obj): bool = + if float64(self.length + 1) >= float64(self.capacity) * MAP_LOAD_FACTOR: + self.adjustCapacity() + var entry = findEntry(self.entries, key, self.capacity) + result = system.`==`(entry.key, nil) + if result: + self.actual_length += 1 + self.length += 1 + entry.key = key + entry.value = value + entry.tombstone = false + + +proc `[]`*(self: ptr HashMap, key: ptr Obj): ptr Obj = + var entry = findEntry(self.entries, key, self.capacity) + if system.`==`(entry.key, nil) or entry.tombstone: + raise newException(KeyError, "Key not found: " & $key) + result = entry.value + + +proc `[]=`*(self: ptr HashMap, key: ptr Obj, value: ptr Obj) = + discard self.setEntry(key, value) + + +proc len*(self: ptr HashMap): int = + result = self.actual_length + + +proc del*(self: ptr HashMap, key: ptr Obj) = + if self.len() == 0: + raise newException(KeyError, "delete from empty hashmap") + var entry = findEntry(self.entries, key, self.capacity) + if not system.`==`(entry.key, nil): + self.actual_length -= 1 + entry.tombstone = true + else: + raise newException(KeyError, "Key not found: " & $key) + + +proc contains*(self: ptr HashMap, key: ptr Obj): bool = + let entry = findEntry(self.entries, key, self.capacity) + if not system.`==`(entry.key, nil) and not entry.tombstone: + result = true + else: + result = false + + +iterator keys*(self: ptr HashMap): ptr Obj = + var entry: ptr Entry + for i in countup(0, self.capacity - 1): + entry = self.entries[i] + if not system.`==`(entry.key, nil) and not entry.tombstone: + yield entry.key + + +iterator values*(self: ptr HashMap): ptr Obj = + for key in self.keys(): + yield self[key] + + +iterator pairs*(self: ptr HashMap): tuple[key: ptr Obj, val: ptr Obj] = + for key in self.keys(): + yield (key: key, val: self[key]) + + +iterator items*(self: ptr HashMap): ptr Obj = + for k in self.keys(): + yield k + + +proc `$`*(self: ptr HashMap): string = + var i = 0 + result &= "{" + for key, value in self.pairs(): + result &= $key & ": " & $value + if i < self.len() - 1: + result &= ", " + i += 1 + result &= "}" \ No newline at end of file diff --git a/src/backend/types/intObject.nim b/src/backend/types/intObject.nim new file mode 100644 index 0000000..fcfa7cc --- /dev/null +++ b/src/backend/types/intObject.nim @@ -0,0 +1,40 @@ +# Copyright 2022 Mattia Giambirtone & All Contributors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +## Implementation of integer types + +import baseObject + + +type Integer* = object of Obj + value: int64 + + +proc newInteger*(value: int64): ptr Integer = + ## Initializes a new JAPL + ## integer object from + ## a machine native integer + result = allocateObj(Integer, ObjectType.Integer) + result.value = value + + +proc toNativeInteger*(self: ptr Integer): int64 = + ## Returns the integer's machine + ## native underlying value + result = self.value + + +proc `$`*(self: ptr Integer): string = $self.value +proc hash*(self: ptr Integer): int64 = self.value + diff --git a/src/backend/types/iterable.nim b/src/backend/types/iterable.nim new file mode 100644 index 0000000..fc04d62 --- /dev/null +++ b/src/backend/types/iterable.nim @@ -0,0 +1,45 @@ +# Copyright 2022 Mattia Giambirtone & All Contributors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Implementation of iterable types and iterators in JAPL + +import baseObject + + +type + Iterable* = object of Obj + ## Defines the standard interface + ## for iterable types in JAPL + length*: int + capacity*: int + Iterator* = object of Iterable + ## This object drives iteration + ## for every iterable type in JAPL except + ## generators + iterable*: ptr Obj + iterCount*: int + + +proc getIter*(self: Iterable): ptr Iterator = + ## Returns the iterator object of an + ## iterable, which drives foreach + ## loops + return nil + + +proc next*(self: Iterator): ptr Obj = + ## Returns the next element from + ## the iterator or nil if the + ## iterator has been consumed + return nil \ No newline at end of file diff --git a/src/backend/types/stringObject.nim b/src/backend/types/stringObject.nim new file mode 100644 index 0000000..b26c15c --- /dev/null +++ b/src/backend/types/stringObject.nim @@ -0,0 +1,15 @@ +# Copyright 2022 Mattia Giambirtone & All Contributors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# JAPL string implementations \ No newline at end of file diff --git a/src/backend/vm.nim b/src/backend/vm.nim new file mode 100644 index 0000000..5e65e22 --- /dev/null +++ b/src/backend/vm.nim @@ -0,0 +1,20 @@ +# Copyright 2022 Mattia Giambirtone & All Contributors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +## The JAPL runtime environment + + +type + VM* = ref object + stack: diff --git a/src/config.nim b/src/config.nim new file mode 100644 index 0000000..e34ed23 --- /dev/null +++ b/src/config.nim @@ -0,0 +1,61 @@ +# Copyright 2022 Mattia Giambirtone & All Contributors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import strformat + +const BYTECODE_MARKER* = "JAPL_BYTECODE" +const MAP_LOAD_FACTOR* = 0.75 # Load factor for builtin hashmaps +when MAP_LOAD_FACTOR >= 1.0: + {.fatal: "Hashmap load factor must be < 1".} +const HEAP_GROW_FACTOR* = 2 # How much extra memory to allocate for dynamic arrays and garbage collection when resizing +when HEAP_GROW_FACTOR <= 1: + {.fatal: "Heap growth factor must be > 1".} +const MAX_STACK_FRAMES* = 800 # The maximum number of stack frames at any one time. Acts as a recursion limiter (1 frame = 1 call) +when MAX_STACK_FRAMES <= 0: + {.fatal: "The frame limit must be > 0".} +const JAPL_VERSION* = (major: 0, minor: 4, patch: 0) +const JAPL_RELEASE* = "alpha" +const JAPL_COMMIT_HASH* = "ba9c8b4e5664c0670eb8925d65b307e397d6ed82" +when len(JAPL_COMMIT_HASH) != 40: + {.fatal: "The git commit hash must be exactly 40 characters long".} +const JAPL_BRANCH* = "master" +when len(JAPL_BRANCH) >= 255: + {.fatal: "The git branch name's length must be less than or equal to 255 characters".} +const DEBUG_TRACE_VM* = false # Traces VM execution +const SKIP_STDLIB_INIT* = false # Skips stdlib initialization (can be imported manually) +const DEBUG_TRACE_GC* = false # Traces the garbage collector (TODO) +const DEBUG_TRACE_ALLOCATION* = false # Traces memory allocation/deallocation +const DEBUG_TRACE_COMPILER* = false # Traces the compiler +const JAPL_VERSION_STRING* = &"JAPL {JAPL_VERSION.major}.{JAPL_VERSION.minor}.{JAPL_VERSION.patch} {JAPL_RELEASE} ({JAPL_BRANCH}, {CompileDate}, {CompileTime}, {JAPL_COMMIT_HASH[0..8]}) [Nim {NimVersion}] on {hostOS} ({hostCPU})" +const HELP_MESSAGE* = """The JAPL programming language, Copyright (C) 2022 Mattia Giambirtone & All Contributors + +This program is free software, see the license distributed with this program or check +http://www.apache.org/licenses/LICENSE-2.0 for more info. + +Basic usage +----------- + +$ jpl Opens an interactive session (REPL) +$ jpl file.jpl Runs the given JAPL source file + +Command-line options +-------------------- + +-h, --help Shows this help text and exits +-v, --version Prints the JAPL version number and exits +-s, --string Executes the passed string as if it was a file +-i, --interactive Enables interactive mode, which opens a REPL session after execution of a file or source string +-c, --nocache Disables dumping the result of bytecode compilation to files for caching +-d, --cache-delay Configures the bytecode cache invalidation threshold, in minutes (defaults to 60) +""" diff --git a/src/frontend/compiler.nim b/src/frontend/compiler.nim new file mode 100644 index 0000000..139c40a --- /dev/null +++ b/src/frontend/compiler.nim @@ -0,0 +1,1048 @@ +# Copyright 2022 Mattia Giambirtone & All Contributors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import meta/token +import meta/ast +import meta/errors +import meta/bytecode +import ../config +import ../util/multibyte + + +import strformat +import algorithm +import parseutils +import sequtils + + +export ast +export bytecode +export token +export multibyte + + +type + Name = ref object + ## A compile-time wrapper around + ## statically resolved names. + ## Depth indicates to which scope + ## the variable belongs, zero meaning + ## the global one + name: IdentExpr + owner: string + depth: int + isPrivate: bool + isConst: bool + + Loop = object + ## A "loop object" used + ## by the compiler to emit + ## appropriate jump offsets + ## for continue and break + ## statements + start: int + depth: int + breakPos: seq[int] + + Compiler* = ref object + ## A wrapper around the compiler's state + + # The bytecode chunk where we write code to + chunk: Chunk + # The output of our parser (AST) + ast: seq[ASTNode] + # The current AST node we're looking at + current: int + # The current file being compiled (used only for + # error reporting) + file: string + # Compile-time "simulation" of the stack at + # runtime to load variables that have stack + # behavior more efficiently + names: seq[Name] + # The current scope depth. If > 0, we're + # in a local scope, otherwise it's global + scopeDepth: int + # The current function being compiled + currentFunction: FunDecl + # Are optimizations turned on? + enableOptimizations*: bool + # The current loop being compiled (used to + # keep track of where to jump) + currentLoop: Loop + # The current module being compiled + # (used to restrict access to statically + # defined variables at compile time) + currentModule: string + # Each time a defer statement is + # compiled, its code is emitted + # here. Later, if there is any code + # to defer in the current function, + # funDecl will wrap the function's code + # inside an implicit try/finally block + # and add this code in the finally branch. + # This sequence is emptied each time a + # fun declaration is compiled and stores only + # deferred code for the current function (may + # be empty) + deferred: seq[uint8] + # List of closed-over variables + closedOver: seq[IdentExpr] + + + +proc initCompiler*(enableOptimizations: bool = true): Compiler = + ## Initializes a new Compiler object + new(result) + result.ast = @[] + result.current = 0 + result.file = "" + result.names = @[] + result.scopeDepth = 0 + result.currentFunction = nil + result.enableOptimizations = enableOptimizations + result.currentModule = "" + + +## Forward declarations +proc expression(self: Compiler, node: ASTNode) +proc statement(self: Compiler, node: ASTNode) +proc declaration(self: Compiler, node: ASTNode) +proc peek(self: Compiler, distance: int = 0): ASTNode +## End of forward declarations + +## Public getters for nicer error formatting +proc getCurrentNode*(self: Compiler): ASTNode = (if self.current >= + self.ast.len(): self.ast[^1] else: self.ast[self.current - 1]) + + +## Utility functions + +proc peek(self: Compiler, distance: int = 0): ASTNode = + ## Peeks at the AST node at the given distance. + ## If the distance is out of bounds, the last + ## AST node in the tree is returned. A negative + ## distance may be used to retrieve previously + ## consumed AST nodes + if self.ast.high() == -1 or self.current + distance > self.ast.high() or + self.current + distance < 0: + result = self.ast[^1] + else: + result = self.ast[self.current + distance] + + +proc done(self: Compiler): bool = + ## Returns true if the compiler is done + ## compiling, false otherwise + result = self.current > self.ast.high() + + +proc error(self: Compiler, message: string) = + ## Raises a formatted CompileError exception + var tok = self.getCurrentNode().token + raise newException(CompileError, &"A fatal error occurred while compiling '{self.file}', module '{self.currentModule}' line {tok.line} at '{tok.lexeme}' -> {message}") + + +proc step(self: Compiler): ASTNode = + ## Steps to the next node and returns + ## the consumed one + result = self.peek() + if not self.done(): + self.current += 1 + + +proc emitByte(self: Compiler, byt: OpCode|uint8) = + ## Emits a single byte, writing it to + ## the current chunk being compiled + when DEBUG_TRACE_COMPILER: + echo &"DEBUG - Compiler: Emitting {$byt}" + self.chunk.write(uint8 byt, self.peek().token.line) + + +proc emitBytes(self: Compiler, byt1: OpCode|uint8, byt2: OpCode|uint8) = + ## Emits multiple bytes instead of a single one, this is useful + ## to emit operators along with their operands or for multi-byte + ## instructions that are longer than one byte + self.emitByte(uint8 byt1) + self.emitByte(uint8 byt2) + + +proc emitBytes(self: Compiler, bytarr: array[2, uint8]) = + ## Handy helper method to write an array of 2 bytes into + ## the current chunk, calling emitByte on each of its + ## elements + self.emitBytes(bytarr[0], bytarr[1]) + + +proc emitBytes(self: Compiler, bytarr: array[3, uint8]) = + ## Handy helper method to write an array of 3 bytes into + ## the current chunk, calling emitByte on each of its + ## elements + self.emitBytes(bytarr[0], bytarr[1]) + self.emitByte(bytarr[2]) + + +proc makeConstant(self: Compiler, val: ASTNode): array[3, uint8] = + ## Adds a constant to the current chunk's constant table + ## and returns its index as a 3-byte array of uint8s + result = self.chunk.addConstant(val) + + +proc emitConstant(self: Compiler, obj: ASTNode) = + ## Emits a LoadConstant instruction along + ## with its operand + self.emitByte(LoadConstant) + self.emitBytes(self.makeConstant(obj)) + + +proc identifierConstant(self: Compiler, identifier: IdentExpr): array[3, uint8] = + ## Emits an identifier name as a string in the current chunk's constant + ## table. This is used to load globals declared as dynamic that cannot + ## be resolved statically by the compiler + try: + result = self.makeConstant(identifier) + except CompileError: + self.error(getCurrentExceptionMsg()) + + +proc emitJump(self: Compiler, opcode: OpCode): int = + ## Emits a dummy jump offset to be patched later. Assumes + ## the largest offset (emits 4 bytes, one for the given jump + ## opcode, while the other 3 are for the jump offset which is set + ## to the maximum unsigned 24 bit integer). If the shorter + ## 16 bit alternative is later found to be better suited, patchJump + ## will fix this. This function returns the absolute index into the + ## chunk's bytecode array where the given placeholder instruction was written + self.emitByte(opcode) + self.emitBytes((0xffffff).toTriple()) + result = self.chunk.code.len() - 4 + + +proc patchJump(self: Compiler, offset: int) = + ## Patches a previously emitted jump + ## using emitJump. Since emitJump assumes + ## a long jump, this also shrinks the jump + ## offset and changes the bytecode instruction if possible + ## (i.e. jump is in 16 bit range), but the converse is also + ## true (i.e. it might change a regular jump into a long one) + let jump: int = self.chunk.code.len() - offset + if jump > 16777215: + self.error("cannot jump more than 16777215 bytecode instructions") + if jump < uint16.high().int: + case OpCode(self.chunk.code[offset]): + of LongJumpForwards: + self.chunk.code[offset] = JumpForwards.uint8() + of LongJumpBackwards: + self.chunk.code[offset] = JumpBackwards.uint8() + of LongJumpIfFalse: + self.chunk.code[offset] = JumpIfFalse.uint8() + of LongJumpIfFalsePop: + self.chunk.code[offset] = JumpIfFalsePop.uint8() + of LongJumpIfFalseOrPop: + self.chunk.code[offset] = JumpIfFalseOrPop.uint8() + else: + discard + self.chunk.code.delete(offset + 1) # Discards the 24 bit integer + let offsetArray = jump.toDouble() + self.chunk.code[offset + 1] = offsetArray[0] + self.chunk.code[offset + 2] = offsetArray[1] + else: + case OpCode(self.chunk.code[offset]): + of JumpForwards: + self.chunk.code[offset] = LongJumpForwards.uint8() + of JumpBackwards: + self.chunk.code[offset] = LongJumpBackwards.uint8() + of JumpIfFalse: + self.chunk.code[offset] = LongJumpIfFalse.uint8() + of JumpIfFalsePop: + self.chunk.code[offset] = LongJumpIfFalsePop.uint8() + of JumpIfFalseOrPop: + self.chunk.code[offset] = LongJumpIfFalseOrPop.uint8() + else: + discard + let offsetArray = jump.toTriple() + self.chunk.code[offset + 1] = offsetArray[0] + self.chunk.code[offset + 2] = offsetArray[1] + self.chunk.code[offset + 3] = offsetArray[2] + +## End of utility functions + +proc literal(self: Compiler, node: ASTNode) = + ## Emits instructions for literals such + ## as singletons, strings, numbers and + ## collections + case node.kind: + of trueExpr: + self.emitByte(OpCode.True) + of falseExpr: + self.emitByte(OpCode.False) + of nilExpr: + self.emitByte(OpCode.Nil) + of infExpr: + self.emitByte(OpCode.Inf) + of nanExpr: + self.emitByte(OpCode.Nan) + of strExpr: + self.emitConstant(node) + # The optimizer will emit warning + # for overflowing numbers. Here, we + # treat them as errors + of intExpr: + var x: int + var y = IntExpr(node) + try: + assert parseInt(y.literal.lexeme, x) == len(y.literal.lexeme) + except ValueError: + self.error("integer value out of range") + self.emitConstant(y) + # Even though most likely the optimizer + # will collapse all these other literals + # to nodes of kind intExpr, that can be + # disabled. This also allows us to catch + # basic overflow errors before running any code + of hexExpr: + var x: int + var y = HexExpr(node) + try: + assert parseHex(y.literal.lexeme, x) == len(y.literal.lexeme) + except ValueError: + self.error("integer value out of range") + self.emitConstant(newIntExpr(Token(lexeme: $x, line: y.token.line, + pos: (start: y.token.pos.start, stop: y.token.pos.start + + len($x))))) + of binExpr: + var x: int + var y = BinExpr(node) + try: + assert parseBin(y.literal.lexeme, x) == len(y.literal.lexeme) + except ValueError: + self.error("integer value out of range") + self.emitConstant(newIntExpr(Token(lexeme: $x, line: y.token.line, + pos: (start: y.token.pos.start, stop: y.token.pos.start + + len($x))))) + of octExpr: + var x: int + var y = OctExpr(node) + try: + assert parseOct(y.literal.lexeme, x) == len(y.literal.lexeme) + except ValueError: + self.error("integer value out of range") + self.emitConstant(newIntExpr(Token(lexeme: $x, line: y.token.line, + pos: (start: y.token.pos.start, stop: y.token.pos.start + + len($x))))) + of floatExpr: + var x: float + var y = FloatExpr(node) + try: + assert parseFloat(y.literal.lexeme, x) == len(y.literal.lexeme) + except ValueError: + self.error("floating point value out of range") + self.emitConstant(y) + of listExpr: + var y = ListExpr(node) + if y.members.len() > 16777216: + self.error("collection literals can't have more than 16777216 elements") + for member in y.members: + self.expression(member) + self.emitByte(BuildList) + self.emitBytes(y.members.len().toTriple()) # 24-bit integer, meaning collection literals can have up to 2^24 elements + of tupleExpr: + var y = TupleExpr(node) + if y.members.len() > 16777216: + self.error("collection literals can't have more than 16777216 elements") + for member in y.members: + self.expression(member) + self.emitByte(BuildTuple) + self.emitBytes(y.members.len().toTriple()) + of setExpr: + var y = SetExpr(node) + if y.members.len() > 16777216: + self.error("collection literals can't have more than 16777216 elements") + for member in y.members: + self.expression(member) + self.emitByte(BuildSet) + self.emitBytes(y.members.len().toTriple()) + of dictExpr: + var y = DictExpr(node) + if y.keys.len() > 16777216: + self.error("collection literals can't have more than 16777216 elements") + for (key, value) in zip(y.keys, y.values): + self.expression(key) + self.expression(value) + self.emitByte(BuildDict) + self.emitBytes(y.keys.len().toTriple()) + of awaitExpr: + var y = AwaitExpr(node) + self.expression(y.awaitee) + self.emitByte(OpCode.Await) + else: + self.error(&"invalid AST node of kind {node.kind} at literal(): {node} (This is an internal error and most likely a bug)") + + +proc unary(self: Compiler, node: UnaryExpr) = + ## Compiles unary expressions such as decimal or + ## bitwise negation + self.expression(node.a) # Pushes the operand onto the stack + case node.operator.kind: + of Minus: + self.emitByte(UnaryNegate) + of Plus: + discard # Unary + does nothing, but we allow it for consistency + of TokenType.LogicalNot: + self.emitByte(OpCode.LogicalNot) + of Tilde: + self.emitByte(UnaryNot) + else: + self.error(&"invalid AST node of kind {node.kind} at unary(): {node} (This is an internal error and most likely a bug)") + + +proc binary(self: Compiler, node: BinaryExpr) = + ## Compiles all binary expressions + # These two lines prepare the stack by pushing the + # opcode's operands onto it + self.expression(node.a) + self.expression(node.b) + case node.operator.kind: + of Plus: + self.emitByte(BinaryAdd) + of Minus: + self.emitByte(BinarySubtract) + of Asterisk: + self.emitByte(BinaryMultiply) + of DoubleAsterisk: + self.emitByte(BinaryPow) + of Percentage: + self.emitByte(BinaryMod) + of FloorDiv: + self.emitByte(BinaryFloorDiv) + of Slash: + self.emitByte(BinaryDivide) + of Ampersand: + self.emitByte(BinaryAnd) + of Caret: + self.emitByte(BinaryXor) + of Pipe: + self.emitByte(BinaryOr) + of As: + self.emitByte(BinaryAs) + of Is: + self.emitByte(BinaryIs) + of IsNot: + self.emitByte(BinaryIsNot) + of Of: + self.emitByte(BinaryOf) + of RightShift: + self.emitByte(BinaryShiftRight) + of LeftShift: + self.emitByte(BinaryShiftLeft) + of TokenType.LessThan: + self.emitByte(OpCode.LessThan) + of TokenType.GreaterThan: + self.emitByte(OpCode.GreaterThan) + of TokenType.DoubleEqual: + self.emitByte(EqualTo) + of TokenType.LessOrEqual: + self.emitByte(OpCode.LessOrEqual) + of TokenType.GreaterOrEqual: + self.emitByte(OpCode.GreaterOrEqual) + of TokenType.LogicalAnd: + self.expression(node.a) + var jump: int + if self.enableOptimizations: + jump = self.emitJump(JumpIfFalseOrPop) + else: + jump = self.emitJump(JumpIfFalse) + self.emitByte(Pop) + self.expression(node.b) + self.patchJump(jump) + of TokenType.LogicalOr: + self.expression(node.a) + let jump = self.emitJump(JumpIfTrue) + self.expression(node.b) + self.patchJump(jump) + # TODO: In-place operations + else: + self.error(&"invalid AST node of kind {node.kind} at binary(): {node} (This is an internal error and most likely a bug)") + + +proc identifier(self: Compiler, node: IdentExpr) + + +proc declareName(self: Compiler, node: ASTNode) = + ## Compiles all name declarations (constants, static, + ## and dynamic) + case node.kind: + of varDecl: + var node = VarDecl(node) + if not node.isStatic: + # This emits code for dynamically-resolved variables (i.e. globals declared as dynamic and unresolvable names) + self.emitByte(DeclareName) + self.emitBytes(self.identifierConstant(IdentExpr(node.name))) + else: + # Statically resolved variable here. Creates a new Name entry + # so that self.identifier emits the proper stack offset + if self.names.high() > 16777215: + # If someone ever hits this limit in real-world scenarios, I swear I'll + # slap myself 100 times with a sign saying "I'm dumb". Mark my words + self.error("cannot declare more than 16777215 static variables at a time") + self.names.add(Name(depth: self.scopeDepth, name: IdentExpr(node.name), + isPrivate: node.isPrivate, + owner: node.owner, + isConst: node.isConst)) + self.emitByte(StoreFast) + self.emitBytes(self.names.high().toTriple()) + of funDecl: + var node = FunDecl(node) + # Declares the function's name in the + # current (outer) scope... + self.declareName(node.name) + # ... but its arguments in an inner one! + self.scopeDepth += 1 + # (this ugly part is needed because + # self.blockStmt() already increments + # and decrements the scope depth) + for argument in node.arguments: + if self.names.high() > 16777215: + self.error("cannot declare more than 16777215 static variables at a time") + self.names.add(Name(depth: self.scopeDepth + 1, isPrivate: true, owner: self.currentModule, isConst: false, name: IdentExpr(argument))) + self.emitByte(LoadFast) + self.emitBytes(self.names.high().toTriple()) + self.scopeDepth -= 1 + # TODO: Default arguments and unpacking + else: + discard # TODO: Classes + + +proc varDecl(self: Compiler, node: VarDecl) = + ## Compiles variable declarations + self.expression(node.value) + self.declareName(node) + + +proc resolveStatic(self: Compiler, name: IdentExpr, + depth: int = self.scopeDepth): Name = + ## Traverses self.staticNames backwards and returns the + ## first name object with the given name. Returns + ## nil when the name can't be found. This function + ## has no concept of scope depth, because getStaticIndex + ## does that job + for obj in reversed(self.names): + if obj.name.token.lexeme == name.token.lexeme: + return obj + return nil + + +proc deleteStatic(self: Compiler, name: IdentExpr) = + ## Traverses self.staticNames backwards and deletes the + ## a name object with the given name. Does nothing when + ## the name can't be found + for i, obj in reversed(self.names): + if obj.name.token.lexeme == name.token.lexeme: + self.names.del(i) + + +proc getStaticIndex(self: Compiler, name: IdentExpr, depth: int = self.scopeDepth): int = + ## Gets the predicted stack position of the given variable + ## if it is static, returns -1 if it is to be bound dynamically + ## or it does not exist at all and returns -2 if the variable + ## is outside of the current local scope and is to be emitted as a closure. + var i: int = self.names.high() + for variable in reversed(self.names): + if name.name.lexeme == variable.name.name.lexeme: + if variable.depth == depth: + return i + else: + # This tells self.identifier() that this is + # a closed-over variable + return -2 + dec(i) + return -1 + + +proc identifier(self: Compiler, node: IdentExpr) = + ## Compiles access to identifiers + let s = self.resolveStatic(node) + if s != nil: + if s.isConst: + # Constants are emitted as, you guessed it, constant instructions + # no matter the scope depth. Also, name resolution specifiers do not + # apply to them (because what would it mean for a constant to be dynamic + # anyway?) + self.emitConstant(node) + else: + let index = self.getStaticIndex(node) + if index != -1: + if index >= 0: + self.emitByte(LoadFast) # Static name resolution, loads value at index in the stack. Very fast. Much wow. + self.emitBytes(index.toTriple()) + else: + if self.closedOver.len() == 0: + self.error("error: closure variable array is empty but LoadHeap would be emitted (this is an internal error and most likely a bug)") + if self.closedOver.len() >= 16777216: + self.error("too many consecutive closed-over variables (max is 16777215)") + self.emitByte(LoadHeap) # Heap-allocated closure variable. Stored in a separate "closure array" in the VM that does not have stack semantics + self.emitBytes(self.closedOver.high().toTriple()) + else: + self.emitByte(LoadName) # Resolves by name, at runtime, in a global hashmap. Slowest method + self.emitBytes(self.identifierConstant(node)) + + +proc assignment(self: Compiler, node: ASTNode) = + ## Compiles assignment expressions + case node.kind: + of assignExpr: + var node = AssignExpr(node) + var name = IdentExpr(node.name) + let r = self.resolveStatic(name) + if r != nil and r.isConst: + self.error("cannot assign to constant") + self.expression(node.value) + let index = self.getStaticIndex(name) + case node.token.kind: + of InplaceAdd: + self.emitByte(BinaryAdd) + of InplaceSub: + self.emitByte(BinarySubtract) + of InplaceDiv: + self.emitByte(BinaryDivide) + of InplaceMul: + self.emitByte(BinaryMultiply) + of InplacePow: + self.emitByte(BinaryPow) + of InplaceFloorDiv: + self.emitByte(BinaryFloorDiv) + of InplaceMod: + self.emitByte(BinaryMod) + of InplaceAnd: + self.emitByte(BinaryAnd) + of InplaceXor: + self.emitByte(BinaryXor) + of InplaceRightShift: + self.emitByte(BinaryShiftRight) + of InplaceLeftShift: + self.emitByte(BinaryShiftLeft) + else: + discard # Unreachable + # In-place operators just change + # what values is set to a given + # stack offset/name, so we only + # need to perform the operation + # as usual and then store it. + # TODO: A better optimization would + # be to have everything in one opcode, + # but that requires variants for stack, + # heap, and closure variables and I cba + if index != -1: + self.emitByte(StoreFast) + self.emitBytes(index.toTriple()) + else: + # Assignment only encompasses variable assignments, + # so we can ensure the name is a constant (i.e. an + # IdentExpr) instead of an object (which would be + # the case with setItemExpr) + self.emitByte(StoreName) + self.emitBytes(self.makeConstant(name)) + of setItemExpr: + discard + # TODO + else: + self.error(&"invalid AST node of kind {node.kind} at assignment(): {node} (This is an internal error and most likely a bug)") + + +proc beginScope(self: Compiler) = + ## Begins a new local scope by incrementing the current + ## scope's depth + inc(self.scopeDepth) + + +proc endScope(self: Compiler) = + ## Ends the current local scope + if self.scopeDepth < 0: + self.error("cannot call endScope with scopeDepth < 0 (This is an internal error and most likely a bug)") + var popped: int = 0 + for ident in reversed(self.names): + if ident.depth > self.scopeDepth: + inc(popped) + if not self.enableOptimizations: + # All variables with a scope depth larger than the current one + # are now out of scope. Begone, you're now homeless! + self.emitByte(Pop) + if self.enableOptimizations and popped > 1: + # If we're popping less than 65535 variables, then + # we can emit a PopN instruction. This is true for + # 99.99999% of the use cases of the language (who the + # hell is going to use 65 THOUSAND local variables?), but + # if you'll ever use more then JAPL will emit a PopN instruction + # for the first 65 thousand and change local variables and then + # emit another batch of plain ol' Pop instructions for the rest + if popped <= uint16.high().int(): + self.emitByte(PopN) + self.emitBytes(popped.toDouble()) + else: + self.emitByte(PopN) + self.emitBytes(uint16.high().int.toDouble()) + for i in countdown(self.names.high(), popped - uint16.high().int()): + if self.names[i].depth > self.scopeDepth: + self.emitByte(Pop) + elif popped == 1: + # We only emit PopN if we're popping more than one value + self.emitByte(Pop) + for _ in countup(0, popped - 1): + discard self.names.pop() + dec(self.scopeDepth) + + +proc blockStmt(self: Compiler, node: BlockStmt) = + ## Compiles block statements, which create a new + ## local scope. + self.beginScope() + for decl in node.code: + self.declaration(decl) + self.endScope() + + +proc ifStmt(self: Compiler, node: IfStmt) = + ## Compiles if/else statements for conditional + ## execution of code + self.expression(node.condition) + var jumpCode: OpCode + if self.enableOptimizations: + jumpCode = JumpIfFalsePop + else: + jumpCode = JumpIfFalse + let jump = self.emitJump(jumpCode) + if not self.enableOptimizations: + self.emitByte(Pop) + self.statement(node.thenBranch) + self.patchJump(jump) + if node.elseBranch != nil: + let jump = self.emitJump(JumpForwards) + self.statement(node.elseBranch) + self.patchJump(jump) + + +proc emitLoop(self: Compiler, begin: int) = + ## Emits a JumpBackwards instruction with the correct + ## jump offset + var offset: int + case OpCode(self.chunk.code[begin + 1]): # The jump instruction + of LongJumpForwards, LongJumpBackwards, LongJumpIfFalse, + LongJumpIfFalsePop, LongJumpIfTrue: + offset = self.chunk.code.len() - begin + 4 + else: + offset = self.chunk.code.len() - begin + if offset > uint16.high().int: + if offset > 16777215: + self.error("cannot jump more than 16777215 bytecode instructions") + self.emitByte(LongJumpBackwards) + self.emitBytes(offset.toTriple()) + else: + self.emitByte(JumpBackwards) + self.emitBytes(offset.toDouble()) + + +proc whileStmt(self: Compiler, node: WhileStmt) = + ## Compiles C-style while loops + let start = self.chunk.code.len() + self.expression(node.condition) + let jump = self.emitJump(JumpIfFalsePop) + self.statement(node.body) + self.patchJump(jump) + self.emitLoop(start) + + +proc expression(self: Compiler, node: ASTNode) = + ## Compiles all expressions + case node.kind: + of getItemExpr: + discard # TODO + # Note that for setItem and assign we don't convert + # the node to its true type because that type information + # would be lost in the call anyway. The differentiation + # happens in self.assignment + of setItemExpr, assignExpr: + self.assignment(node) + of identExpr: + self.identifier(IdentExpr(node)) + of unaryExpr: + # Unary expressions such as ~5 and -3 + self.unary(UnaryExpr(node)) + of groupingExpr: + # Grouping expressions like (2 + 1) + self.expression(GroupingExpr(node).expression) + of binaryExpr: + # Binary expressions such as 2 ^ 5 and 0.66 * 3.14 + self.binary(BinaryExpr(node)) + of intExpr, hexExpr, binExpr, octExpr, strExpr, falseExpr, trueExpr, + infExpr, nanExpr, floatExpr, nilExpr, + tupleExpr, setExpr, listExpr, dictExpr: + # Since all of these AST nodes mostly share + # the same overall structure, and the kind + # discriminant is enough to tell one + # from the other, why bother with + # specialized cases when one is enough? + self.literal(node) + else: + self.error(&"invalid AST node of kind {node.kind} at expression(): {node} (This is an internal error and most likely a bug)") + + +proc delStmt(self: Compiler, node: ASTNode) = + ## Compiles del statements, which unbind + ## a name from the current scope + case node.kind: + of identExpr: + var node = IdentExpr(node) + let i = self.getStaticIndex(node) + if i != -1: + self.emitByte(DeleteFast) + self.emitBytes(i.toTriple()) + self.deleteStatic(node) + else: + self.emitByte(DeleteName) + self.emitBytes(self.identifierConstant(node)) + else: + discard # The parser already handles the other cases + + +proc awaitStmt(self: Compiler, node: AwaitStmt) = + ## Compiles await statements. An await statement + ## is like an await expression, but parsed in the + ## context of statements for usage outside expressions, + ## meaning it can be used standalone. It's basically the + ## same as an await expression followed by a semicolon. + ## Await expressions are the only native construct to + ## run coroutines from within an already asynchronous + ## loop (which should be orchestrated by an event loop). + ## They block in the caller until the callee returns + self.expression(node.awaitee) + self.emitByte(OpCode.Await) + + +proc deferStmt(self: Compiler, node: DeferStmt) = + ## Compiles defer statements. A defer statement + ## is executed right before the function exits + ## (either because of a return or an exception) + let current = self.chunk.code.len + self.expression(node.deferred) + for i in countup(current, self.chunk.code.high()): + self.deferred.add(self.chunk.code[i]) + self.chunk.code.del(i) + + +proc returnStmt(self: Compiler, node: ReturnStmt) = + ## Compiles return statements. An empty return + ## implicitly returns nil + self.expression(node.value) + self.emitByte(OpCode.Return) + + +proc yieldStmt(self: Compiler, node: YieldStmt) = + ## Compiles yield statements + self.expression(node.expression) + self.emitByte(OpCode.Yield) + + +proc raiseStmt(self: Compiler, node: RaiseStmt) = + ## Compiles yield statements + self.expression(node.exception) + self.emitByte(OpCode.Raise) + + +proc continueStmt(self: Compiler, node: ContinueStmt) = + ## Compiles continue statements. A continue statements + ## jumps to the next iteration in a loop + if self.currentLoop.start <= 65535: + self.emitByte(Jump) + self.emitBytes(self.currentLoop.start.toDouble()) + else: + self.emitByte(LongJump) + self.emitBytes(self.currentLoop.start.toTriple()) + + +proc breakStmt(self: Compiler, node: BreakStmt) = + ## Compiles break statements. A continue statement + ## jumps to the next iteration in a loop + + # Emits dummy jump offset, this is + # patched later + discard self.emitJump(OpCode.Break) + self.currentLoop.breakPos.add(self.chunk.code.high() - 4) + if self.currentLoop.depth > self.scopeDepth: + # Breaking out of a loop closes its scope + self.endScope() + + +proc patchBreaks(self: Compiler) = + ## Patches "break" opcodes with + ## actual jumps. This is needed + ## because the size of code + ## to skip is not known before + ## the loop is fully compiled + for brk in self.currentLoop.breakPos: + self.chunk.code[brk] = JumpForwards.uint8() + self.patchJump(brk) + + +proc assertStmt(self: Compiler, node: AssertStmt) = + ## Compiles assert statements (raise + ## AssertionError if the expression is falsey) + self.expression(node.expression) + self.emitByte(OpCode.Assert) + + +proc statement(self: Compiler, node: ASTNode) = + ## Compiles all statements + case node.kind: + of exprStmt: + self.expression(ExprStmt(node).expression) + self.emitByte(Pop) # Expression statements discard their value. Their main use case is side effects in function calls + of NodeKind.ifStmt: + self.ifStmt(IfStmt(node)) + of NodeKind.delStmt: + self.delStmt(DelStmt(node).name) + of NodeKind.assertStmt: + self.assertStmt(AssertStmt(node)) + of NodeKind.raiseStmt: + self.raiseStmt(RaiseStmt(node)) + of NodeKind.breakStmt: + self.breakStmt(BreakStmt(node)) + of NodeKind.continueStmt: + self.continueStmt(ContinueStmt(node)) + of NodeKind.returnStmt: + self.returnStmt(ReturnStmt(node)) + of NodeKind.importStmt: + discard + of NodeKind.fromImportStmt: + discard + of NodeKind.whileStmt, NodeKind.forStmt: + ## Our parser already desugars for loops to + ## while loops! + let loop = self.currentLoop + self.currentLoop = Loop(start: self.chunk.code.len(), + depth: self.scopeDepth, breakPos: @[]) + self.whileStmt(WhileStmt(node)) + self.patchBreaks() + self.currentLoop = loop + of NodeKind.forEachStmt: + discard + of NodeKind.blockStmt: + self.blockStmt(BlockStmt(node)) + of NodeKind.yieldStmt: + self.yieldStmt(YieldStmt(node)) + of NodeKind.awaitStmt: + self.awaitStmt(AwaitStmt(node)) + of NodeKind.deferStmt: + self.deferStmt(DeferStmt(node)) + of NodeKind.tryStmt: + discard + else: + self.expression(node) + + +proc funDecl(self: Compiler, node: FunDecl) = + ## Compiles function declarations + + # We store the current function + var function = self.currentFunction + self.currentFunction = node + # A function's code is just compiled linearly + # and then jumped over + let jmp = self.emitJump(JumpForwards) + self.declareName(node) + + # Since the deferred array is a linear + # sequence of instructions and we want + # to keep track to whose function's each + # set of deferred instruction belongs, + # we record the length of the deferred + # array before compiling the function + # and use this info later to compile + # the try/finally block with the deferred + # code + var deferStart = self.deferred.len() + + self.blockStmt(BlockStmt(node.body)) + # Yup, we're done. That was easy, huh? + # But after all functions are just named + # scopes, and we compile them just like that: + # we declare their name and arguments (before + # their body so recursion works) and then just + # handle them as a block statement (which takes + # care of incrementing self.scopeDepth so locals + # are resolved properly). There's a need for a bit + # of boilerplate code to make closures work, but + # that's about it + + # All functions implicitly return nil. This code + # will not be executed by the VM in all but the simplest + # cases where there is an explicit return statement, but + # I cannot figure out an elegant and simple way to tell + # if a function already returned or not, so we play it safe + + if not self.enableOptimizations: + if OpCode(self.chunk.code[^1]) != OpCode.Return: + self.emitBytes(OpCode.Nil, OpCode.Return) + else: + if OpCode(self.chunk.code[^1]) != OpCode.Return: + self.emitByte(ImplicitReturn) + + # Currently defer is not functional so we + # just pop the instructions + for i in countup(deferStart, self.deferred.len(), 1): + self.deferred.delete(i) + + self.patchJump(jmp) + # This makes us compile nested functions correctly + self.currentFunction = function + + +proc classDecl(self: Compiler, node: ClassDecl) = + ## Compiles class declarations + self.declareName(node.name) + self.emitByte(MakeClass) + self.blockStmt(BlockStmt(node.body)) + + +proc declaration(self: Compiler, node: ASTNode) = + ## Compiles all declarations + case node.kind: + of NodeKind.varDecl: + self.varDecl(VarDecl(node)) + of NodeKind.funDecl: + self.funDecl(FunDecl(node)) + of NodeKind.classDecl: + self.classDecl(ClassDecl(node)) + else: + self.statement(node) + + +proc compile*(self: Compiler, ast: seq[ASTNode], file: string): Chunk = + ## Compiles a sequence of AST nodes into a chunk + ## object + self.chunk = newChunk() + self.ast = ast + self.file = file + self.names = @[] + self.scopeDepth = 0 + self.currentFunction = nil + self.currentModule = "
" + self.current = 0 + while not self.done(): + self.declaration(self.step()) + if self.ast.len() > 0: + # *Technically* an empty program is a valid program + self.endScope() + self.emitByte(OpCode.Return) # Exits the VM's main loop when used at the global scope + result = self.chunk + if self.ast.len() > 0 and self.scopeDepth != -1: + self.error(&"internal error: invalid scopeDepth state (expected -1, got {self.scopeDepth}), did you forget to call endScope/beginScope?") diff --git a/src/frontend/lexer.nim b/src/frontend/lexer.nim new file mode 100644 index 0000000..e86cf32 --- /dev/null +++ b/src/frontend/lexer.nim @@ -0,0 +1,574 @@ +# Copyright 2022 Mattia Giambirtone & All Contributors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +## A simple and modular tokenizer implementation with arbitrary lookahead + +import strutils +import parseutils +import strformat +import tables + +import meta/token +import meta/errors + + +export token # Makes Token available when importing the lexer module +export errors + + +type SymbolTable = object + ## A table of symbols used + ## to lex a source file + keywords: TableRef[string, Token] + operators: TableRef[string, Token] + + +# Table of all single-character tokens +var tokens = to_table({ + '(': LeftParen, ')': RightParen, + '{': LeftBrace, '}': RightBrace, + '.': Dot, ',': Comma, '-': Minus, + '+': Plus, '*': Asterisk, + '>': GreaterThan, '<': LessThan, '=': Equal, + '~': Tilde, '/': Slash, '%': Percentage, + '[': LeftBracket, ']': RightBracket, + ':': Colon, '^': Caret, '&': Ampersand, + '|': Pipe, ';': Semicolon}) + +# Table of all double-character tokens +const double = to_table({"**": DoubleAsterisk, + ">>": RightShift, + "<<": LeftShift, + "==": DoubleEqual, + "!=": NotEqual, + ">=": GreaterOrEqual, + "<=": LessOrEqual, + "//": FloorDiv, + "+=": InplaceAdd, + "-=": InplaceSub, + "/=": InplaceDiv, + "*=": InplaceMul, + "^=": InplaceXor, + "&=": InplaceAnd, + "|=": InplaceOr, + "%=": InplaceMod, + }) + +# Table of all triple-character tokens +const triple = to_table({"//=": InplaceFloorDiv, + "**=": InplacePow, + ">>=": InplaceRightShift, + "<<=": InplaceLeftShift + }) + + +# Constant table storing all the reserved keywords (which are parsed as identifiers) +const keywords = to_table({ + "fun": Fun, "raise": Raise, + "if": If, "else": Else, + "for": For, "while": While, + "var": Var, "nil": Nil, + "true": True, "false": False, + "return": Return, "break": Break, + "continue": Continue, "inf": Infinity, + "nan": NotANumber, "is": Is, + "lambda": Lambda, "class": Class, + "async": Async, "import": Import, + "isnot": IsNot, "from": From, + "const": Const, "not": LogicalNot, + "assert": Assert, "or": LogicalOr, + "and": LogicalAnd, "del": Del, + "async": Async, "await": Await, + "foreach": Foreach, "yield": Yield, + "private": Private, "public": Public, + "static": Static, "dynamic": Dynamic, + "as": As, "of": Of, "defer": Defer, + "except": Except, "finally": Finally, + "try": Try + }) + + +type + Lexer* = ref object + ## A lexer object + source: string + tokens: seq[Token] + line: int + start: int + current: int + file: string + lines: seq[tuple[start, stop: int]] + lastLine: int + + +# Simple public getters +proc getStart*(self: Lexer): int = self.start +proc getCurrent*(self: Lexer): int = self.current +proc getLine*(self: Lexer): int = self.line +proc getSource*(self: Lexer): string = self.source +proc getRelPos*(self: Lexer, line: int): tuple[start, stop: int] = (if line > 1: self.lines[line - 2] else: (start: 0, stop: self.current)) + + +proc initLexer*(self: Lexer = nil): Lexer = + ## Initializes the lexer or resets + ## the state of an existing one + new(result) + if self != nil: + result = self + result.source = "" + result.tokens = @[] + result.line = 1 + result.start = 0 + result.current = 0 + result.file = "" + result.lines = @[] + result.lastLine = 0 + + +proc done(self: Lexer): bool = + ## Returns true if we reached EOF + result = self.current >= self.source.len + + +proc incLine(self: Lexer) = + ## Increments the lexer's line + ## and updates internal line + ## metadata + self.lines.add((start: self.lastLine, stop: self.current)) + self.line += 1 + self.lastLine = self.current + + +proc step(self: Lexer, n: int = 1): string = + ## Steps n characters forward in the + ## source file (default = 1). A null + ## terminator is returned if the lexer + ## is at EOF. The amount of skipped + ## characters is returned + if self.done(): + return "\0" + self.current = self.current + n + result = self.source[self.current..self.current + n] + + +proc peek(self: Lexer, distance: int = 0): string = + ## Returns the character in the source file at + ## the given distance, without consuming it. + ## The character is converted to a string of + ## length one for compatibility with the rest + ## of the lexer. + ## A null terminator is returned if the lexer + ## is at EOF. The distance parameter may be + ## negative to retrieve previously consumed + ## tokens, while the default distance is 0 + ## (retrieves the next token to be consumed). + ## If the given distance goes beyond EOF, a + ## null terminator is returned + if self.done() or self.current + distance > self.source.high(): + result = "\0" + else: + # hack to "convert" a char to a string + result = &"{self.source[self.current + distance]}" + + +proc peek(self: Lexer, distance: int = 0, length: int = 1): string = + ## Behaves like self.peek(), but + ## can peek more than one character, + ## starting from the given distance. + ## A string of exactly length characters + ## is returned. If the length of the + ## desired string goes beyond EOF, + ## the resulting string is padded + ## with null terminators + var i = distance + while i <= length: + result.add(self.peek(i)) + inc(i) + +proc error(self: Lexer, message: string) = + ## Raises a lexing error with a formatted + ## error message + + raise newException(LexingError, &"A fatal error occurred while parsing '{self.file}', line {self.line} at '{self.peek()}' -> {message}") + + +proc check(self: Lexer, what: string, distance: int = 0): bool = + ## Behaves like match, without consuming the + ## token. False is returned if we're at EOF + ## regardless of what the token to check is. + ## The distance is passed directly to self.peek() + if self.done(): + return false + return self.peek(distance) == what + + +proc check(self: Lexer, what: string): bool = + ## Calls self.check() in a loop with + ## each character from the given source + ## string. Useful to check multi-character + ## strings in one go + for i, chr in what: + # Why "i" you ask? Well, since check + # does not consume the tokens it checks + # against we need some way of keeping + # track where we are in the string the + # caller gave us, otherwise this will + # not behave as expected + if not self.check(&"{chr}", i): + return false + return true + + +proc check(self: Lexer, what: openarray[string]): bool = + ## Calls self.check() in a loop with + ## each character from the given seq of + ## char and returns at the first match. + ## Useful to check multiple tokens in a situation + ## where only one of them may match at one time + for s in what: + if self.check(s): + return true + return false + + +proc match(self: Lexer, what: char): bool = + ## Returns true if the next character matches + ## the given character, and consumes it. + ## Otherwise, false is returned + if self.done(): + self.error("unexpected EOF") + return false + elif not self.check(what): + self.error(&"expecting '{what}', got '{self.peek()}' instead") + return false + self.current += 1 + return true + + +proc match(self: Lexer, what: string): bool = + ## Calls self.match() in a loop with + ## each character from the given source + ## string. Useful to match multi-character + ## strings in one go + for chr in what: + if not self.match(chr): + return false + return true + + +proc createToken(self: Lexer, tokenType: TokenType) = + ## Creates a token object and adds it to the token + ## list + var tok: Token = new(Token) + tok.kind = tokenType + tok.lexeme = self.source[self.start.. uint8.high().int: + self.error("escape sequence value too large (> 255)") + self.source[self.current] = cast[char](value) + of 'u', 'U': + self.error("unicode escape sequences are not supported (yet)") + of 'x': + var code = "" + var value = 0 + var i = self.current + while i < self.source.high() and (let c = self.source[ + i].toLowerAscii(); c in 'a'..'f' or c in '0'..'9'): + code &= self.source[i] + i += 1 + assert parseHex(code, value) == code.len() + if value > uint8.high().int: + self.error("escape sequence value too large (> 255)") + self.source[self.current] = cast[char](value) + else: + self.error(&"invalid escape sequence '\\{self.peek()}'") + + +proc parseString(self: Lexer, delimiter: char, mode: string = "single") = + ## Parses string literals. They can be expressed using matching pairs + ## of either single or double quotes. Most C-style escape sequences are + ## supported, moreover, a specific prefix may be prepended + ## to the string to instruct the lexer on how to parse it: + ## - b -> declares a byte string, where each character is + ## interpreted as an integer instead of a character + ## - r -> declares a raw string literal, where escape sequences + ## are not parsed and stay as-is + ## - f -> declares a format string, where variables may be + ## interpolated using curly braces like f"Hello, {name}!". + ## Braces may be escaped using a pair of them, so to represent + ## a literal "{" in an f-string, one would use {{ instead + ## Multi-line strings can be declared using matching triplets of + ## either single or double quotes. They can span across multiple + ## lines and escape sequences in them are not parsed, like in raw + ## strings, so a multi-line string prefixed with the "r" modifier + ## is redundant, although multi-line byte/format strings are supported + while not self.check(delimiter) and not self.done(): + if self.check('\n'): + if mode == "multi": + self.incLine() + else: + self.error("unexpected EOL while parsing string literal") + if mode in ["raw", "multi"]: + discard self.step() + if self.check('\\'): + # This madness here serves to get rid of the slash, since \x is mapped + # to a one-byte sequence but the string '\x' actually 2 bytes (or more, + # depending on the specific escape sequence) + self.source = self.source[0..> b (a with bits shifted b times to the right) onto the stack + BinaryShiftLeft, # Pushes the result of a << b (a with bits shifted b times to the left) onto the stack + BinaryXor, # Pushes the result of a ^ b (bitwise exclusive or) onto the stack + BinaryOr, # Pushes the result of a | b (bitwise or) onto the stack + BinaryAnd, # Pushes the result of a & b (bitwise and) onto the stack + UnaryNot, # Pushes the result of ~x (bitwise not) onto the stack + BinaryAs, # Pushes the result of a as b onto the stack (converts a to the type of b. Explicit support from a is required) + BinaryIs, # Pushes the result of a is b onto the stack (true if a and b point to the same object, false otherwise) + BinaryIsNot, # Pushes the result of not (a is b). This could be implemented in terms of BinaryIs, but it's more efficient this way + BinaryOf, # Pushes the result of a of b onto the stack (true if a is a subclass of b, false otherwise) + BinarySlice, # Perform slicing on supported objects (like "hello"[0:2], which yields "he"). The result is pushed onto the stack + BinarySubscript, # Subscript operator, like "hello"[0] (which pushes 'h' onto the stack) + ## Binary comparison operators + GreaterThan, # Pushes the result of a > b onto the stack + LessThan, # Pushes the result of a < b onto the stack + EqualTo, # Pushes the result of a == b onto the stack + NotEqualTo, # Pushes the result of a != b onto the stack (optimization for not (a == b)) + GreaterOrEqual, # Pushes the result of a >= b onto the stack + LessOrEqual, # Pushes the result of a <= b onto the stack + ## Logical operators + LogicalNot, # Pushes true if + LogicalAnd, + LogicalOr, + ## Constant opcodes (each of them pushes a singleton on the stack) + Nil, + True, + False, + Nan, + Inf, + ## Basic stack operations + Pop, # Pops an element off the stack and discards it + Push, # Pushes x onto the stack + PopN, # Pops x elements off the stack (optimization for exiting scopes and returning from functions) + ## Name resolution/handling + LoadAttribute, + DeclareName, # Declares a global dynamically bound name in the current scope + LoadName, # Loads a dynamically bound variable + LoadFast, # Loads a statically bound variable + StoreName, # Sets/updates a dynamically bound variable's value + StoreFast, # Sets/updates a statically bound variable's value + DeleteName, # Unbinds a dynamically bound variable's name from the current scope + DeleteFast, # Unbinds a statically bound variable's name from the current scope + LoadHeap, # Loads a closed-over variable + StoreHeap, # Stores a closed-over variable + ## Looping and jumping + Jump, # Absolute, unconditional jump into the bytecode + JumpIfFalse, # Jumps to an absolute index in the bytecode if the value at the top of the stack is falsey + JumpIfTrue, # Jumps to an absolute index in the bytecode if the value at the top of the stack is truthy + JumpIfFalsePop, # Like JumpIfFalse, but it also pops off the stack (regardless of truthyness). Optimization for if statements + JumpIfFalseOrPop, # Jumps to an absolute index in the bytecode if the value at the top of the stack is falsey and pops it otherwise + JumpForwards, # Relative, unconditional, positive jump in the bytecode + JumpBackwards, # Relative, unconditional, negative jump into the bytecode + Break, # Temporary opcode used to signal exiting out of loops + ## Long variants of jumps (they use a 24-bit operand instead of a 16-bit one) + LongJump, + LongJumpIfFalse, + LongJumpIfTrue, + LongJumpIfFalsePop, + LongJumpIfFalseOrPop, + LongJumpForwards, + LongJumpBackwards, + ## Functions + Call, # Calls a callable object + Return # Returns from the current function + ## Exception handling + Raise, # Raises exception x + ReRaise, # Re-raises active exception + BeginTry, # Initiates an exception handling context + FinishTry, # Closes the current exception handling context + ## Generators + Yield, + ## Coroutines + Await, + ## Collection literals + BuildList, + BuildDict, + BuildSet, + BuildTuple, + ## Misc + Assert, # Raises an AssertionFailed exception if the value at the top of the stack is falsey + MakeClass, # Builds a class instance from the values at the top of the stack (class object, constructor arguments, etc.) + Slice, # Slices an object (takes 3 arguments: start, stop, step). Pushes the result of a.subscript(b, c, d) onto the stack + GetItem, # Pushes the result of a.getItem(b) onto the stack + ImplicitReturn, # Optimization for returning nil from functions (saves us a VM "clock cycle") + + +# We group instructions by their operation/operand types for easier handling when debugging + +# Simple instructions encompass: +# - Instructions that push onto/pop off the stack unconditionally (True, False, Pop, etc.) +# - Unary and binary operators +const simpleInstructions* = {Return, BinaryAdd, BinaryMultiply, + BinaryDivide, BinarySubtract, + BinaryMod, BinaryPow, Nil, + True, False, OpCode.Nan, OpCode.Inf, + BinaryShiftLeft, BinaryShiftRight, + BinaryXor, LogicalNot, EqualTo, + GreaterThan, LessThan, LoadAttribute, + BinarySlice, Pop, UnaryNegate, + BinaryIs, BinaryAs, GreaterOrEqual, + LessOrEqual, BinaryOr, BinaryAnd, + UnaryNot, BinaryFloorDiv, BinaryOf, Raise, + ReRaise, BeginTry, FinishTry, Yield, Await, + MakeClass, ImplicitReturn} + +# Constant instructions are instructions that operate on the bytecode constant table +const constantInstructions* = {LoadConstant, DeclareName, LoadName, StoreName, DeleteName} + +# Stack triple instructions operate on the stack at arbitrary offsets and pop arguments off of it in the form +# of 24 bit integers +const stackTripleInstructions* = {Call, StoreFast, DeleteFast, LoadFast, LoadHeap, StoreHeap} + +# Stack double instructions operate on the stack at arbitrary offsets and pop arguments off of it in the form +# of 16 bit integers +const stackDoubleInstructions* = {} + +# Argument double argument instructions take hardcoded arguments on the stack as 16 bit integers +const argumentDoubleInstructions* = {PopN, } + +# Jump instructions jump at relative or absolute bytecode offsets +const jumpInstructions* = {JumpIfFalse, JumpIfFalsePop, JumpForwards, JumpBackwards, + LongJumpIfFalse, LongJumpIfFalsePop, LongJumpForwards, + LongJumpBackwards, JumpIfTrue, LongJumpIfTrue} + +# Collection instructions push a built-in collection type onto the stack +const collectionInstructions* = {BuildList, BuildDict, BuildSet, BuildTuple} + + +proc newChunk*(reuseConsts: bool = true): Chunk = + ## Initializes a new, empty chunk + result = Chunk(consts: @[], code: @[], lines: @[], reuseConsts: reuseConsts) + + +proc `$`*(self: Chunk): string = &"""Chunk(consts=[{self.consts.join(", ")}], code=[{self.code.join(", ")}], lines=[{self.lines.join(", ")}])""" + + +proc write*(self: Chunk, newByte: uint8, line: int) = + ## Adds the given instruction at the provided line number + ## to the given chunk object + assert line > 0, "line must be greater than zero" + if self.lines.high() >= 1 and self.lines[^2] == line: + self.lines[^1] += 1 + else: + self.lines.add(line) + self.lines.add(1) + self.code.add(newByte) + + +proc write*(self: Chunk, bytes: openarray[uint8], line: int) = + ## Calls write in a loop with all members of the given + ## array + for cByte in bytes: + self.write(cByte, line) + + +proc write*(self: Chunk, newByte: OpCode, line: int) = + ## Adds the given instruction at the provided line number + ## to the given chunk object + self.write(uint8(newByte), line) + + +proc write*(self: Chunk, bytes: openarray[OpCode], line: int) = + ## Calls write in a loop with all members of the given + ## array + for cByte in bytes: + self.write(uint8(cByte), line) + + +proc getLine*(self: Chunk, idx: int): int = + ## Returns the associated line of a given + ## instruction index + if self.lines.len < 2: + raise newException(IndexDefect, "the chunk object is empty") + var + count: int + current: int = 0 + for n in countup(0, self.lines.high(), 2): + count = self.lines[n + 1] + if idx in current - count.. < + LessOrEqual, GreaterOrEqual, # >= <= + NotEqual, RightShift, LeftShift, # != >> << + LogicalAnd, LogicalOr, LogicalNot, FloorDiv, # and or not // + InplaceAdd, InplaceSub, InplaceDiv, # += -= /= + InplaceMod, InplaceMul, InplaceXor, # %= *= ^= + InplaceAnd, InplaceOr, # &= |= + DoubleEqual, InplaceFloorDiv, InplacePow, # == //= **= + InplaceRightShift, InplaceLeftShift + + # Miscellaneous + + EndOfFile + + + Token* = ref object + ## A token object + kind*: TokenType + lexeme*: string + line*: int + pos*: tuple[start, stop: int] + + +proc `$`*(self: Token): string = + if self != nil: + result = &"Token(kind={self.kind}, lexeme={$(self.lexeme)}, line={self.line}, pos=({self.pos.start}, {self.pos.stop}))" + else: + result = "nil" diff --git a/src/frontend/optimizer.nim b/src/frontend/optimizer.nim new file mode 100644 index 0000000..a2124f5 --- /dev/null +++ b/src/frontend/optimizer.nim @@ -0,0 +1,402 @@ +# Copyright 2022 Mattia Giambirtone & All Contributors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import meta/ast +import meta/token + +import parseutils +import strformat +import strutils +import math + + +type + WarningKind* = enum + unreachableCode, + nameShadowing, + isWithALiteral, + equalityWithSingleton, + valueOverflow, + implicitConversion, + invalidOperation + + Warning* = ref object + kind*: WarningKind + node*: ASTNode + + Optimizer* = ref object + warnings: seq[Warning] + foldConstants*: bool + + +proc initOptimizer*(foldConstants: bool = true): Optimizer = + ## Initializes a new optimizer object + new(result) + result.foldConstants = foldConstants + result.warnings = @[] + + +proc newWarning(self: Optimizer, kind: WarningKind, node: ASTNode) = + self.warnings.add(Warning(kind: kind, node: node)) + + +proc `$`*(self: Warning): string = &"Warning(kind={self.kind}, node={self.node})" + + +# Forward declaration +proc optimizeNode(self: Optimizer, node: ASTNode): ASTNode + + +proc optimizeConstant(self: Optimizer, node: ASTNode): ASTNode = + ## Performs some checks on constant AST nodes such as + ## integers. This method converts all of the different + ## integer forms (binary, octal and hexadecimal) to + ## decimal integers. Overflows are checked here too + if not self.foldConstants: + return node + case node.kind: + of intExpr: + var x: int + var y = IntExpr(node) + try: + assert parseInt(y.literal.lexeme, x) == len(y.literal.lexeme) + except ValueError: + self.newWarning(valueOverflow, node) + result = node + of hexExpr: + var x: int + var y = HexExpr(node) + try: + assert parseHex(y.literal.lexeme, x) == len(y.literal.lexeme) + except ValueError: + self.newWarning(valueOverflow, node) + return node + result = IntExpr(kind: intExpr, literal: Token(kind: Integer, lexeme: $x, line: y.literal.line, pos: (start: -1, stop: -1))) + of binExpr: + var x: int + var y = BinExpr(node) + try: + assert parseBin(y.literal.lexeme, x) == len(y.literal.lexeme) + except ValueError: + self.newWarning(valueOverflow, node) + return node + result = IntExpr(kind: intExpr, literal: Token(kind: Integer, lexeme: $x, line: y.literal.line, pos: (start: -1, stop: -1))) + of octExpr: + var x: int + var y = OctExpr(node) + try: + assert parseOct(y.literal.lexeme, x) == len(y.literal.lexeme) + except ValueError: + self.newWarning(valueOverflow, node) + return node + result = IntExpr(kind: intExpr, literal: Token(kind: Integer, lexeme: $x, line: y.literal.line, pos: (start: -1, stop: -1))) + of floatExpr: + var x: float + var y = FloatExpr(node) + try: + discard parseFloat(y.literal.lexeme, x) + except ValueError: + self.newWarning(valueOverflow, node) + return node + result = FloatExpr(kind: floatExpr, literal: Token(kind: Float, lexeme: $x, line: y.literal.line, pos: (start: -1, stop: -1))) + else: + result = node + + +proc optimizeUnary(self: Optimizer, node: UnaryExpr): ASTNode = + ## Attempts to optimize unary expressions + var a = self.optimizeNode(node.a) + if self.warnings.len() > 0 and self.warnings[^1].kind == valueOverflow and self.warnings[^1].node == a: + # We can't optimize further, the overflow will be caught in the compiler + return UnaryExpr(kind: unaryExpr, a: a, operator: node.operator) + case a.kind: + of intExpr: + var x: int + assert parseInt(IntExpr(a).literal.lexeme, x) == len(IntExpr(a).literal.lexeme) + case node.operator.kind: + of Tilde: + x = not x + of Minus: + x = -x + else: + discard # Unreachable + result = IntExpr(kind: intExpr, literal: Token(kind: Integer, lexeme: $x, line: node.operator.line, pos: (start: -1, stop: -1))) + of floatExpr: + var x: float + discard parseFloat(FloatExpr(a).literal.lexeme, x) + case node.operator.kind: + of Minus: + x = -x + of Tilde: + self.newWarning(invalidOperation, node) + return node + else: + discard + result = FloatExpr(kind: floatExpr, literal: Token(kind: Float, lexeme: $x, line: node.operator.line, pos: (start: -1, stop: -1))) + else: + result = node + + +proc optimizeBinary(self: Optimizer, node: BinaryExpr): ASTNode = + ## Attempts to optimize binary expressions + var a, b: ASTNode + a = self.optimizeNode(node.a) + b = self.optimizeNode(node.b) + if self.warnings.len() > 0 and self.warnings[^1].kind == valueOverflow and (self.warnings[^1].node == a or self.warnings[^1].node == b): + # We can't optimize further, the overflow will be caught in the compiler. We don't return the same node + # because optimizeNode might've been able to optimize one of the two operands and we don't know which + return BinaryExpr(kind: binaryExpr, a: a, b: b, operator: node.operator) + if node.operator.kind == DoubleEqual: + if a.kind in {trueExpr, falseExpr, nilExpr, nanExpr, infExpr}: + self.newWarning(equalityWithSingleton, a) + elif b.kind in {trueExpr, falseExpr, nilExpr, nanExpr, infExpr}: + self.newWarning(equalityWithSingleton, b) + elif node.operator.kind == Is: + if a.kind in {strExpr, intExpr, tupleExpr, dictExpr, listExpr, setExpr}: + self.newWarning(isWithALiteral, a) + elif b.kind in {strExpr, intExpr, tupleExpr, dictExpr, listExpr, setExpr}: + self.newWarning(isWithALiteral, b) + if a.kind == intExpr and b.kind == intExpr: + # Optimizes integer operations + var x, y, z: int + assert parseInt(IntExpr(a).literal.lexeme, x) == IntExpr(a).literal.lexeme.len() + assert parseInt(IntExpr(b).literal.lexeme, y) == IntExpr(b).literal.lexeme.len() + try: + case node.operator.kind: + of Plus: + z = x + y + of Minus: + z = x - y + of Asterisk: + z = x * y + of FloorDiv: + z = int(x / y) + of DoubleAsterisk: + if y >= 0: + z = x ^ y + else: + # Nim's builtin pow operator can't handle + # negative exponents, so we use math's + # pow and convert from/to floats instead + z = pow(x.float, y.float).int + of Percentage: + z = x mod y + of Caret: + z = x xor y + of Ampersand: + z = x and y + of Pipe: + z = x or y + of Slash: + # Special case, yields a float + return FloatExpr(kind: intExpr, literal: Token(kind: Float, lexeme: $(x / y), line: IntExpr(a).literal.line, pos: (start: -1, stop: -1))) + else: + result = BinaryExpr(kind: binaryExpr, a: a, b: b, operator: node.operator) + except OverflowDefect: + self.newWarning(valueOverflow, node) + return BinaryExpr(kind: binaryExpr, a: a, b: b, operator: node.operator) + except RangeDefect: + # TODO: What warning do we raise here? + return BinaryExpr(kind: binaryExpr, a: a, b: b, operator: node.operator) + result = IntExpr(kind: intExpr, literal: Token(kind: Integer, lexeme: $z, line: IntExpr(a).literal.line, pos: (start: -1, stop: -1))) + elif a.kind == floatExpr or b.kind == floatExpr: + var x, y, z: float + if a.kind == intExpr: + var temp: int + assert parseInt(IntExpr(a).literal.lexeme, temp) == IntExpr(a).literal.lexeme.len() + x = float(temp) + self.newWarning(implicitConversion, a) + else: + discard parseFloat(FloatExpr(a).literal.lexeme, x) + if b.kind == intExpr: + var temp: int + assert parseInt(IntExpr(b).literal.lexeme, temp) == IntExpr(b).literal.lexeme.len() + y = float(temp) + self.newWarning(implicitConversion, b) + else: + discard parseFloat(FloatExpr(b).literal.lexeme, y) + # Optimizes float operations + try: + case node.operator.kind: + of Plus: + z = x + y + of Minus: + z = x - y + of Asterisk: + z = x * y + of FloorDiv, Slash: + z = x / y + of DoubleAsterisk: + z = pow(x, y) + of Percentage: + z = x mod y + else: + result = BinaryExpr(kind: binaryExpr, a: a, b: b, operator: node.operator) + except OverflowDefect: + self.newWarning(valueOverflow, node) + return BinaryExpr(kind: binaryExpr, a: a, b: b, operator: node.operator) + result = FloatExpr(kind: floatExpr, literal: Token(kind: Float, lexeme: $z, line: LiteralExpr(a).literal.line, pos: (start: -1, stop: -1))) + elif a.kind == strExpr and b.kind == strExpr: + var a = StrExpr(a) + var b = StrExpr(b) + case node.operator.kind: + of Plus: + result = StrExpr(kind: strExpr, literal: Token(kind: String, lexeme: "'" & a.literal.lexeme[1..<(^1)] & b.literal.lexeme[1..<(^1)] & "'", pos: (start: -1, stop: -1))) + else: + result = node + elif a.kind == strExpr and self.optimizeNode(b).kind == intExpr and not (self.warnings.len() > 0 and self.warnings[^1].kind == valueOverflow and self.warnings[^1].node == b): + var a = StrExpr(a) + var b = IntExpr(b) + var bb: int + assert parseInt(b.literal.lexeme, bb) == b.literal.lexeme.len() + case node.operator.kind: + of Asterisk: + result = StrExpr(kind: strExpr, literal: Token(kind: String, lexeme: "'" & a.literal.lexeme[1..<(^1)].repeat(bb) & "'")) + else: + result = node + elif b.kind == strExpr and self.optimizeNode(a).kind == intExpr and not (self.warnings.len() > 0 and self.warnings[^1].kind == valueOverflow and self.warnings[^1].node == a): + var b = StrExpr(b) + var a = IntExpr(a) + var aa: int + assert parseInt(a.literal.lexeme, aa) == a.literal.lexeme.len() + case node.operator.kind: + of Asterisk: + result = StrExpr(kind: strExpr, literal: Token(kind: String, lexeme: "'" & b.literal.lexeme[1..<(^1)].repeat(aa) & "'")) + else: + result = node + else: + # There's no constant folding we can do! + result = node + + +proc detectClosures(self: Optimizer, node: FunDecl) = + ## Goes trough a function's code and detects + ## references to variables in enclosing local + ## scopes + var names: seq[Declaration] = @[] + for line in BlockStmt(node.body).code: + case line.kind: + of varDecl: + names.add(VarDecl(line)) + of funDecl: + names.add(FunDecl(line)) + of classDecl: + names.add(ClassDecl(line)) + else: + discard + for name in names: + + +proc optimizeNode(self: Optimizer, node: ASTNode): ASTNode = + ## Analyzes an AST node and attempts to perform + ## optimizations on it. If no optimizations can be + ## applied or self.foldConstants is set to false, + ## then the same node is returned + if not self.foldConstants: + return node + case node.kind: + of exprStmt: + result = newExprStmt(self.optimizeNode(ExprStmt(node).expression), ExprStmt(node).token) + of intExpr, hexExpr, octExpr, binExpr, floatExpr, strExpr: + result = self.optimizeConstant(node) + of unaryExpr: + result = self.optimizeUnary(UnaryExpr(node)) + of binaryExpr: + result = self.optimizeBinary(BinaryExpr(node)) + of groupingExpr: + # Recursively unnests groups + result = self.optimizeNode(GroupingExpr(node).expression) + of callExpr: + var node = CallExpr(node) + for i, positional in node.arguments.positionals: + node.arguments.positionals[i] = self.optimizeNode(positional) + for i, (key, value) in node.arguments.keyword: + node.arguments.keyword[i].value = self.optimizeNode(value) + result = node + of sliceExpr: + var node = SliceExpr(node) + for i, e in node.ends: + node.ends[i] = self.optimizeNode(e) + node.slicee = self.optimizeNode(node.slicee) + result = node + of tryStmt: + var node = TryStmt(node) + node.body = self.optimizeNode(node.body) + if node.finallyClause != nil: + node.finallyClause = self.optimizeNode(node.finallyClause) + if node.elseClause != nil: + node.elseClause = self.optimizeNode(node.elseClause) + for i, handler in node.handlers: + node.handlers[i].body = self.optimizeNode(node.handlers[i].body) + result = node + of funDecl: + var decl = FunDecl(node) + for i, node in decl.defaults: + decl.defaults[i] = self.optimizeNode(node) + decl.body = self.optimizeNode(decl.body) + result = decl + of blockStmt: + var node = BlockStmt(node) + for i, n in node.code: + node.code[i] = self.optimizeNode(n) + result = node + of varDecl: + var decl = VarDecl(node) + decl.value = self.optimizeNode(decl.value) + result = decl + of assignExpr: + var asgn = AssignExpr(node) + asgn.value = self.optimizeNode(asgn.value) + result = asgn + of listExpr: + var l = ListExpr(node) + for i, e in l.members: + l.members[i] = self.optimizeNode(e) + result = node + of setExpr: + var s = SetExpr(node) + for i, e in s.members: + s.members[i] = self.optimizeNode(e) + result = node + of tupleExpr: + var t = TupleExpr(node) + for i, e in t.members: + t.members[i] = self.optimizeNode(e) + result = node + of dictExpr: + var d = DictExpr(node) + for i, e in d.keys: + d.keys[i] = self.optimizeNode(e) + for i, e in d.values: + d.values[i] = self.optimizeNode(e) + result = node + else: + result = node + + +proc optimize*(self: Optimizer, tree: seq[ASTNode]): tuple[tree: seq[ASTNode], warnings: seq[Warning]] = + ## Runs the optimizer on the given source + ## tree and returns a new optimized tree + ## as well as a list of warnings that may + ## be of interest. The input tree may be + ## identical to the output tree if no optimization + ## could be performed. Constant folding can be + ## turned off by setting foldConstants to false + ## when initializing the optimizer object. This + ## optimization step also takes care of detecting + ## closed-over variables so that the compiler can + ## emit appropriate instructions for them later on + var newTree: seq[ASTNode] = @[] + for node in tree: + newTree.add(self.optimizeNode(node)) + result = (tree: newTree, warnings: self.warnings) diff --git a/src/frontend/parser.nim b/src/frontend/parser.nim new file mode 100644 index 0000000..fe8f579 --- /dev/null +++ b/src/frontend/parser.nim @@ -0,0 +1,1096 @@ +# Copyright 2022 Mattia Giambirtone & All Contributors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +## A recursive-descent top-down parser implementation + +import strformat + + +import meta/token +import meta/ast +import meta/errors + + +export token, ast, errors + + +type + + LoopContext = enum + Loop, None + + Parser* = ref object + ## A recursive-descent top-down + ## parser implementation + + # Index into self.tokens + current: int + # The name of the file being parsed. + # Only meaningful for parse errors + file: string + # The list of tokens representing + # the source code to be parsed. + # In most cases, those will come + # from the builtin lexer, but this + # behavior is not enforced and the + # tokenizer is entirely separate from + # the parser + tokens: seq[Token] + # Little internal attribute that tells + # us if we're inside a loop or not. This + # allows us to detect errors like break + # being used outside loops + currentLoop: LoopContext + # Stores the current function + # being parsed. This is a reference + # to either a FunDecl or LambdaExpr + # AST node and is mostly used to allow + # implicit generators to work. What that + # means is that there is no need for the + # programmer to specifiy a function is a + # generator like in nim, (which uses the + # 'iterator' keyword): any function is + # automatically a generator if it contains + # any number of yield statement(s) or + # yield expression(s). This attribute + # is nil when the parser is at the top-level + # code and is what allows the parser to detect + # errors like return outside functions and attempts + # to declare public names inside them before + # compilation even begins + currentFunction: ASTNode + # Stores the current scope depth (0 = global, > 0 local) + scopeDepth: int + + +proc initParser*(): Parser = + ## Initializes a new Parser object + new(result) + result.current = 0 + result.file = "" + result.tokens = @[] + result.currentFunction = nil + result.currentLoop = None + result.scopeDepth = 0 + +# Public getters for improved error formatting +proc getCurrent*(self: Parser): int = self.current +proc getCurrentToken*(self: Parser): Token = (if self.getCurrent() >= self.tokens.len(): self.tokens[^1] else: self.tokens[self.current - 1]) + +# Handy templates to make our life easier, thanks nim! + +template endOfFile: Token = Token(kind: EndOfFile, lexeme: "", line: -1) +template endOfLine(msg: string) = self.expect(Semicolon, msg) + + +proc peek(self: Parser, distance: int = 0): Token = + ## Peeks at the token at the given distance. + ## If the distance is out of bounds, an EOF + ## token is returned. A negative distance may + ## be used to retrieve previously consumed + ## tokens + if self.tokens.high() == -1 or self.current + distance > self.tokens.high() or self.current + distance < 0: + result = endOfFile + else: + result = self.tokens[self.current + distance] + + +proc done(self: Parser): bool = + ## Returns true if we're at the + ## end of the file. Note that the + ## parser expects an explicit + ## EOF token to signal the end + ## of the file (unless the token + ## list is empty) + result = self.tokens.len() == 0 or self.peek().kind == EndOfFile + + +proc step(self: Parser, n: int = 1): Token = + ## Steps n tokens into the input, + ## returning the last consumed one + if self.done(): + result = self.peek() + else: + result = self.tokens[self.current] + self.current += 1 + + +proc error(self: Parser, message: string) = + ## Raises a formatted ParseError exception + var lexeme = self.getCurrentToken().lexeme + var errorMessage = &"A fatal error occurred while parsing '{self.file}', line {self.peek().line} at '{lexeme}' -> {message}" + raise newException(ParseError, errorMessage) + + +proc check(self: Parser, kind: TokenType, distance: int = 0): bool = + ## Checks if the given token at the given distance + ## matches the expected kind and returns a boolean. + ## The distance parameter is passed directly to + ## self.peek() + self.peek(distance).kind == kind + + +proc check(self: Parser, kind: openarray[TokenType]): bool = + ## Calls self.check() in a loop with each entry of + ## the given openarray of token kinds and returns + ## at the first match. Note that this assumes + ## that only one token may exist at a given + ## position + for k in kind: + if self.check(k): + return true + return false + + +proc match(self: Parser, kind: TokenType, distance: int = 0): bool = + ## Behaves like self.check(), except that when a token + ## matches it is consumed + if self.check(kind, distance): + discard self.step() + result = true + else: + result = false + + +proc match(self: Parser, kind: openarray[TokenType]): bool = + ## Calls self.match() in a loop with each entry of + ## the given openarray of token kinds and returns + ## at the first match. Note that this assumes + ## that only one token may exist at a given + ## position + for k in kind: + if self.match(k): + return true + result = false + + +proc expect(self: Parser, kind: TokenType, message: string = "") = + ## Behaves like self.match(), except that + ## when a token doesn't match an error + ## is raised. If no error message is + ## given, a default one is used + if not self.match(kind): + if message.len() == 0: + self.error(&"expecting token of kind {kind}, found {self.peek().kind} instead") + else: + self.error(message) + + +proc unnest(self: Parser, node: ASTNode): ASTNode = + ## Unpacks an arbitrarily nested grouping expression + var node = node + while node.kind == groupingExpr and GroupingExpr(node).expression != nil: + node = GroupingExpr(node).expression + result = node + + +# Forward declarations +proc expression(self: Parser): ASTNode +proc expressionStatement(self: Parser): ASTNode +proc statement(self: Parser): ASTNode +proc varDecl(self: Parser, isStatic: bool = true, isPrivate: bool = true): ASTNode +proc funDecl(self: Parser, isAsync: bool = false, isStatic: bool = true, isPrivate: bool = true, isLambda: bool = false): ASTNode +proc declaration(self: Parser): ASTNode + + +proc primary(self: Parser): ASTNode = + ## Parses primary expressions such + ## as integer literals and keywords + ## that map to builtin types (true, false, etc) + case self.peek().kind: + of True: + result = newTrueExpr(self.step()) + of False: + result = newFalseExpr(self.step()) + of TokenType.NotANumber: + result = newNanExpr(self.step()) + of Nil: + result = newNilExpr(self.step()) + of Float: + result = newFloatExpr(self.step()) + of Integer: + result = newIntExpr(self.step()) + of Identifier: + result = newIdentExpr(self.step()) + of LeftParen: + let tok = self.step() + if self.match(RightParen): + # This yields an empty tuple + result = newTupleExpr(@[], tok) + else: + result = self.expression() + if self.match(Comma): + var tupleObject = newTupleExpr(@[result], tok) + while not self.check(RightParen): + tupleObject.members.add(self.expression()) + if not self.match(Comma): + break + result = tupleObject + self.expect(RightParen, "unterminated tuple literal") + else: + self.expect(RightParen, "unterminated parenthesized expression") + result = newGroupingExpr(result, tok) + of LeftBracket: + let tok = self.step() + if self.match(RightBracket): + # This yields an empty list + result = newListExpr(@[], tok) + else: + var listObject = newListExpr(@[], tok) + while not self.check(RightBracket): + listObject.members.add(self.expression()) + if not self.match(Comma): + break + result = listObject + self.expect(RightBracket, "unterminated list literal") + of LeftBrace: + let tok = self.step() + if self.match(RightBrace): + # This yields an empty dictionary, not an empty set! + # For empty sets, there will be a builtin set() type + # that can be instantiated with no arguments + result = newDictExpr(@[], @[], tok) + else: + result = self.expression() + if self.match(Comma) or self.check(RightBrace): + var setObject = newSetExpr(@[result], tok) + while not self.check(RightBrace): + setObject.members.add(self.expression()) + if not self.match(Comma): + break + result = setObject + self.expect(RightBrace, "unterminated set literal") + elif self.match(Colon): + var dictObject = newDictExpr(@[result], @[self.expression()], tok) + if self.match(RightBrace): + return dictObject + if self.match(Comma): + while not self.check(RightBrace): + dictObject.keys.add(self.expression()) + self.expect(Colon) + dictObject.values.add(self.expression()) + if not self.match(Comma): + break + self.expect(RightBrace, "unterminated dict literal") + result = dictObject + of Yield: + let tok = self.step() + if self.currentFunction == nil: + self.error("'yield' cannot be outside functions") + if self.currentFunction.kind == NodeKind.funDecl: + FunDecl(self.currentFunction).isGenerator = true + else: + LambdaExpr(self.currentFunction).isGenerator = true + if not self.check([RightBrace, RightBracket, RightParen, Comma, Semicolon]): + result = newYieldExpr(self.expression(), tok) + else: + result = newYieldExpr(newNilExpr(Token()), tok) + of Await: + let tok = self.step() + if self.currentFunction == nil: + self.error("'await' cannot be used outside functions") + if self.currentFunction.kind == lambdaExpr or not FunDecl(self.currentFunction).isAsync: + self.error("'await' can only be used inside async functions") + result = newAwaitExpr(self.expression(), tok) + of Lambda: + discard self.step() + result = self.funDecl(isLambda=true) + of RightParen, RightBracket, RightBrace: + # This is *technically* unnecessary: the parser would + # throw an error regardless, but it's a little bit nicer + # when the error message is more specific + self.error(&"unmatched '{self.peek().lexeme}'") + of Hex: + result = newHexExpr(self.step()) + of Octal: + result = newOctExpr(self.step()) + of Binary: + result = newBinExpr(self.step()) + of String: + result = newStrExpr(self.step()) + of Infinity: + result = newInfExpr(self.step()) + else: + self.error("invalid syntax") + + +proc makeCall(self: Parser, callee: ASTNode): ASTNode = + ## Utility function called iteratively by self.call() + ## to parse a function-like call + let tok = self.peek(-1) + var argNames: seq[ASTNode] = @[] + var arguments: tuple[positionals: seq[ASTNode], keyword: seq[tuple[name: ASTNode, value: ASTNode]]] = (positionals: @[], keyword: @[]) + var argument: ASTNode = nil + var argCount = 0 + if not self.check(RightParen): + while true: + if argCount >= 255: + self.error("cannot store more than 255 arguments") + break + argument = self.expression() + if argument.kind == assignExpr: + if AssignExpr(argument).name in argNames: + self.error("duplicate keyword argument in call") + argNames.add(AssignExpr(argument).name) + arguments.keyword.add((name: AssignExpr(argument).name, value: AssignExpr(argument).value)) + elif arguments.keyword.len() == 0: + arguments.positionals.add(argument) + else: + self.error("positional arguments cannot follow keyword arguments in call") + if not self.match(Comma): + break + argCount += 1 + self.expect(RightParen) + result = newCallExpr(callee, arguments, tok) + + +proc call(self: Parser): ASTNode = + ## Parses calls, object field + ## field accessing and slicing + ## expressions + result = self.primary() + while true: + if self.match(LeftParen): + result = self.makeCall(result) + elif self.match(Dot): + self.expect(Identifier, "expecting attribute name after '.'") + result = newGetItemExpr(result, newIdentExpr(self.peek(-1)), self.peek(-1)) + elif self.match(LeftBracket): + let tok = self.peek(-1) + var ends: seq[ASTNode] = @[] + while not self.match(RightBracket) and ends.len() < 3: + ends.add(self.expression()) + discard self.match(Colon) + if ends.len() < 1: + self.error("invalid syntax") + result = newSliceExpr(result, ends, tok) + else: + break + + +proc unary(self: Parser): ASTNode = + ## Parses unary expressions + if self.match([Minus, Tilde, LogicalNot, Plus]): + result = newUnaryExpr(self.peek(-1), self.unary()) + else: + result = self.call() + + +proc pow(self: Parser): ASTNode = + ## Parses exponentiation expressions + result = self.unary() + var operator: Token + var right: ASTNode + while self.match(DoubleAsterisk): + operator = self.peek(-1) + right = self.unary() + result = newBinaryExpr(result, operator, right) + + +proc mul(self: Parser): ASTNode = + ## Parses multiplication and division expressions + result = self.pow() + var operator: Token + var right: ASTNode + while self.match([Slash, Percentage, FloorDiv, Asterisk]): + operator = self.peek(-1) + right = self.pow() + result = newBinaryExpr(result, operator, right) + + +proc add(self: Parser): ASTNode = + ## Parses addition and subtraction expressions + result = self.mul() + var operator: Token + var right: ASTNode + while self.match([Plus, Minus]): + operator = self.peek(-1) + right = self.mul() + result = newBinaryExpr(result, operator, right) + + +proc comparison(self: Parser): ASTNode = + ## Parses comparison expressions + result = self.add() + var operator: Token + var right: ASTNode + while self.match([LessThan, GreaterThan, LessOrEqual, GreaterOrEqual, Is, As, Of, IsNot]): + operator = self.peek(-1) + right = self.add() + result = newBinaryExpr(result, operator, right) + + +proc equality(self: Parser): ASTNode = + ## Parses equality expressions + result = self.comparison() + var operator: Token + var right: ASTNode + while self.match([DoubleEqual, NotEqual]): + operator = self.peek(-1) + right = self.comparison() + result = newBinaryExpr(result, operator, right) + + +proc logicalAnd(self: Parser): ASTNode = + ## Parses logical AND expressions + result = self.equality() + var operator: Token + var right: ASTNode + while self.match(LogicalAnd): + operator = self.peek(-1) + right = self.equality() + result = newBinaryExpr(result, operator, right) + + +proc logicalOr(self: Parser): ASTNode = + ## Parses logical OR expressions + result = self.logicalAnd() + var operator: Token + var right: ASTNode + while self.match(LogicalOr): + operator = self.peek(-1) + right = self.logicalAnd() + result = newBinaryExpr(result, operator, right) + + +proc bitwiseAnd(self: Parser): ASTNode = + ## Parser a & b expressions + result = self.logicalOr() + var operator: Token + var right: ASTNode + while self.match(Pipe): + operator = self.peek(-1) + right = self.logicalOr() + result = newBinaryExpr(result, operator, right) + + +proc bitwiseOr(self: Parser): ASTNode = + ## Parser a | b expressions + result = self.bitwiseAnd() + var operator: Token + var right: ASTNode + while self.match(Ampersand): + operator = self.peek(-1) + right = self.bitwiseAnd() + result = newBinaryExpr(result, operator, right) + + +proc assignment(self: Parser): ASTNode = + ## Parses assignment, the highest-level + ## expression (including stuff like a.b = 1). + ## Slice assignments are also parsed here + result = self.bitwiseOr() + if self.match([Equal, InplaceAdd, InplaceSub, InplaceDiv, InplaceMod, + InplacePow, InplaceMul, InplaceXor, InplaceAnd, InplaceOr, + InplaceFloorDiv, InplaceRightShift, InplaceLeftShift]): + let tok = self.peek(-1) + var value = self.expression() + if result.kind in {identExpr, sliceExpr}: + result = newAssignExpr(result, value, tok) + elif result.kind == getItemExpr: + result = newSetItemExpr(GetItemExpr(result).obj, GetItemExpr(result).name, value, tok) + else: + self.error("invalid assignment target") + + +proc delStmt(self: Parser): ASTNode = + ## Parses "del" statements, + ## which unbind a name from its + ## value in the current scope and + ## calls its destructor + let tok = self.peek(-1) + var expression = self.expression() + var temp = expression + endOfLIne("missing semicolon after del statement") + if expression.kind == groupingExpr: + # We unpack grouping expressions + temp = self.unnest(temp) + if temp.isLiteral(): + self.error("cannot delete a literal") + elif temp.kind in {binaryExpr, unaryExpr}: + self.error("cannot delete operator") + elif temp.kind == callExpr: + self.error("cannot delete function call") + elif temp.kind == assignExpr: + self.error("cannot delete assignment") + else: + result = newDelStmt(expression, tok) + + +proc assertStmt(self: Parser): ASTNode = + ## Parses "assert" statements, + ## raise an error if the expression + ## fed into them is falsey + let tok = self.peek(-1) + var expression = self.expression() + endOfLine("missing semicolon after assert statement") + result = newAssertStmt(expression, tok) + + +proc beginScope(self: Parser) = + ## Begins a new syntactical scope + inc(self.scopeDepth) + + +proc endScope(self: Parser) = + ## Ends a new syntactical scope + dec(self.scopeDepth) + + +proc blockStmt(self: Parser): ASTNode = + ## Parses block statements. A block + ## statement simply opens a new local + ## scope + + self.beginScope() + let tok = self.peek(-1) + var code: seq[ASTNode] = @[] + while not self.check(RightBrace) and not self.done(): + code.add(self.declaration()) + self.expect(RightBrace, "unterminated block statement") + result = newBlockStmt(code, tok) + self.endScope() + + +proc breakStmt(self: Parser): ASTNode = + ## Parses break statements + let tok = self.peek(-1) + if self.currentLoop != Loop: + self.error("'break' cannot be used outside loops") + endOfLine("missing semicolon after break statement") + result = newBreakStmt(tok) + + +proc deferStmt(self: Parser): ASTNode = + ## Parses defer statements + let tok = self.peek(-1) + if self.currentFunction == nil: + self.error("'defer' cannot be used outside functions") + result = newDeferStmt(self.expression(), tok) + endOfLine("missing semicolon after defer statement") + + +proc continueStmt(self: Parser): ASTNode = + ## Parses continue statements + let tok = self.peek(-1) + if self.currentLoop != Loop: + self.error("'continue' cannot be used outside loops") + endOfLine("missing semicolon after continue statement") + result = newContinueStmt(tok) + + +proc returnStmt(self: Parser): ASTNode = + ## Parses return statements + let tok = self.peek(-1) + if self.currentFunction == nil: + self.error("'return' cannot be used outside functions") + var value: ASTNode + if not self.check(Semicolon): + # Since return can be used on its own too + # (in which case it implicitly returns nil), + # we need to check if there's an actual value + # to return or not + value = self.expression() + endOfLine("missing semicolon after return statement") + result = newReturnStmt(value, tok) + + +proc yieldStmt(self: Parser): ASTNode = + ## Parses yield Statements + let tok = self.peek(-1) + if self.currentFunction == nil: + self.error("'yield' cannot be outside functions") + if self.currentFunction.kind == NodeKind.funDecl: + FunDecl(self.currentFunction).isGenerator = true + else: + LambdaExpr(self.currentFunction).isGenerator = true + if not self.check(Semicolon): + result = newYieldStmt(self.expression(), tok) + else: + result = newYieldStmt(newNilExpr(Token()), tok) + endOfLine("missing semicolon after yield statement") + + +proc awaitStmt(self: Parser): ASTNode = + ## Parses yield Statements + let tok = self.peek(-1) + if self.currentFunction == nil: + self.error("'await' cannot be used outside functions") + if self.currentFunction.kind == lambdaExpr or not FunDecl(self.currentFunction).isAsync: + self.error("'await' can only be used inside async functions") + result = newAwaitStmt(self.expression(), tok) + endOfLine("missing semicolon after yield statement") + + +proc raiseStmt(self: Parser): ASTNode = + ## Parses raise statements + var exception: ASTNode + let tok = self.peek(-1) + if not self.check(Semicolon): + # Raise can be used on its own, in which + # case it re-raises the last active exception + exception = self.expression() + endOfLine("missing semicolon after raise statement") + result = newRaiseStmt(exception, tok) + + +proc forEachStmt(self: Parser): ASTNode = + ## Parses C#-like foreach loops + let tok = self.peek(-1) + var enclosingLoop = self.currentLoop + self.currentLoop = Loop + self.expect(LeftParen, "expecting '(' after 'foreach'") + self.expect(Identifier) + var identifier = newIdentExpr(self.peek(-1)) + self.expect(Colon) + var expression = self.expression() + self.expect(RightParen) + var body = self.statement() + result = newForEachStmt(identifier, expression, body, tok) + self.currentLoop = enclosingLoop + + +proc importStmt(self: Parser): ASTNode = + ## Parses import statements + let tok = self.peek(-1) + self.expect(Identifier, "expecting module name(s) after import statement") + result = newImportStmt(self.expression(), tok) + endOfLine("missing semicolon after import statement") + + +proc fromStmt(self: Parser): ASTNode = + ## Parser from xx import yy statements + let tok = self.peek(-1) + self.expect(Identifier, "expecting module name(s) after import statement") + result = newIdentExpr(self.peek(-1)) + var attributes: seq[ASTNode] = @[] + var attribute: ASTNode + self.expect(Import) + self.expect(Identifier) + attribute = newIdentExpr(self.peek(-1)) + attributes.add(attribute) + while self.match(Comma): + self.expect(Identifier) + attribute = newIdentExpr(self.peek(-1)) + attributes.add(attribute) + # from x import a [, b, c, ...]; + endOfLine("missing semicolon after import statement") + result = newFromImportStmt(result, attributes, tok) + + +proc tryStmt(self: Parser): ASTNode = + ## Parses try/except/finally/else blocks + let tok = self.peek(-1) + var body = self.statement() + var handlers: seq[tuple[body, exc, name: ASTNode]] = @[] + var finallyClause: ASTNode + var elseClause: ASTNode + var asName: ASTNode + var excName: ASTNode + var handlerBody: ASTNode + while self.match(Except): + excName = self.expression() + if excName.kind == identExpr: + continue + elif excName.kind == binaryExpr and BinaryExpr(excName).operator.kind == As: + asName = BinaryExpr(excName).b + if BinaryExpr(excName).b.kind != identExpr: + self.error("expecting alias name after 'except ... as'") + excName = BinaryExpr(excName).a + # Note how we don't use elif here: when the if above sets excName to As' + # first operand, that might be a tuple, which we unpack below + if excName.kind == tupleExpr: + # This allows to do except (a, b, c) as SomeError {...} + # TODO: Consider adding the ability to make exc a sequence + # instead of adding the same body with different exception + # types each time + handlerBody = self.statement() + for element in TupleExpr(excName).members: + handlers.add((body: handlerBody, exc: element, name: asName)) + continue + else: + excName = nil + handlerBody = self.statement() + handlers.add((body: handlerBody, exc: excName, name: asName)) + asName = nil + if self.match(Else): + elseClause = self.statement() + if self.match(Finally): + finallyClause = self.statement() + if handlers.len() == 0 and elseClause == nil and finallyClause == nil: + self.error("expecting 'except', 'finally' or 'else' statements after 'try' block") + for i, handler in handlers: + if handler.exc == nil and i != handlers.high(): + self.error("catch-all exception handler with bare 'except' must come last in try statement") + result = newTryStmt(body, handlers, finallyClause, elseClause, tok) + + +proc whileStmt(self: Parser): ASTNode = + ## Parses a C-style while loop statement + let tok = self.peek(-1) + self.beginScope() + var enclosingLoop = self.currentLoop + self.currentLoop = Loop + self.expect(LeftParen, "expecting '(' before while loop condition") + var condition = self.expression() + self.expect(RightParen, "unterminated while loop condition") + result = newWhileStmt(condition, self.statement(), tok) + self.currentLoop = enclosingLoop + self.endScope() + + +proc forStmt(self: Parser): ASTNode = + ## Parses a C-style for loop + self.beginScope() + let tok = self.peek(-1) + var enclosingLoop = self.currentLoop + self.currentLoop = Loop + self.expect(LeftParen, "expecting '(' after 'for'") + var initializer: ASTNode = nil + var condition: ASTNode = nil + var increment: ASTNode = nil + # The code below is not really that illuminating, but + # it's there to disallow weird things like a public for loop + # increment variable which doesn't really make sense, but still + # allow people that like verbosity (for *some* reason) to use + # private static var declarations as well as just private var + # and static var as well as providing decently specific error + # messages + if self.match(Semicolon): + discard + elif self.match(Dynamic): + self.error("dynamic declarations are not allowed in the foor loop initializer") + elif self.match(Public): + self.error("public declarations are not allowed in the for loop initializer") + elif self.match(Static): + self.expect(Var, "expecting 'var' after 'static' in for loop initializer") + initializer = self.varDecl(isStatic=true, isPrivate=true) + elif self.match(Private): + if self.match(Dynamic): + self.error("dynamic declarations are not allowed in the foor loop initializer") + elif self.match(Static): + self.expect(Var, "expecting 'var' after 'static' in for loop initializer") + initializer = self.varDecl(isStatic=true, isPrivate=true) + elif self.match(Var): + initializer = self.varDecl(isStatic=true, isPrivate=true) + elif self.match(Var): + initializer = self.varDecl(isStatic=true, isPrivate=true) + else: + initializer = self.expressionStatement() + if not self.check(Semicolon): + condition = self.expression() + self.expect(Semicolon, "expecting ';' after for loop condition") + if not self.check(RightParen): + increment = self.expression() + self.expect(RightParen, "unterminated for loop increment") + var body = self.statement() + if increment != nil: + # The increment runs after each iteration, so we + # inject it into the block as the last statement + body = newBlockStmt(@[body, newExprStmt(increment, increment.token)], tok) + if condition == nil: + ## An empty condition is functionally + ## equivalent to "true" + condition = newTrueExpr(Token()) + # We can use a while loop, which in this case works just as well + body = newWhileStmt(condition, body, tok) + if initializer != nil: + # Nested blocks, so the initializer is + # only executed once + body = newBlockStmt(@[initializer, body], tok) + # This desgugars the following code: + # for (var i = 0; i < 10; i += 1) { + # print(i); + # } + # To the semantically equivalent snippet + # below: + # { + # private static var i = 0; + # while (i < 10) { + # print(i); + # i += 1; + # } + # } + result = body + self.currentLoop = enclosingLoop + self.endScope() + + +proc ifStmt(self: Parser): ASTNode = + ## Parses if statements + let tok = self.peek(-1) + self.expect(LeftParen, "expecting '(' before if condition") + var condition = self.expression() + self.expect(RightParen, "expecting ')' after if condition") + var thenBranch = self.statement() + var elseBranch: ASTNode = nil + if self.match(Else): + elseBranch = self.statement() + result = newIfStmt(condition, thenBranch, elseBranch, tok) + + +template checkDecl(self: Parser, isStatic, isPrivate: bool) = + ## Handy utility function that avoids us from copy + ## pasting the same checks to all declaration handlers + if not isStatic and self.currentFunction != nil: + self.error("dynamic declarations are not allowed inside functions") + if not isStatic and self.scopeDepth > 0: + self.error("dynamic declarations are not allowed inside local scopes") + if not isPrivate and self.currentFunction != nil: + self.error("cannot bind public names inside functions") + if not isPrivate and self.scopeDepth > 0: + self.error("cannot bind public names inside local scopes") + + +proc varDecl(self: Parser, isStatic: bool = true, isPrivate: bool = true): ASTNode = + ## Parses variable declarations + self.checkDecl(isStatic, isPrivate) + var varKind = self.peek(-1) + var keyword = "" + var value: ASTNode + case varKind.kind: + of Const: + # Note that isStatic being false is an error, because constants are replaced at compile-time + if not isStatic: + self.error("constant declarations cannot be dynamic") + keyword = "constant" + else: + keyword = "variable" + self.expect(Identifier, &"expecting {keyword} name after '{varKind.lexeme}'") + var name = newIdentExpr(self.peek(-1)) + if self.match(Equal): + value = self.expression() + if varKind.kind == Const and not value.isConst(): + self.error("the initializer for constant declarations must be a primitive and constant type") + else: + if varKind.kind == Const: + self.error("constant declaration requires an explicit initializer") + value = newNilExpr(Token()) + self.expect(Semicolon, &"expecting semicolon after {keyword} declaration") + case varKind.kind: + of Var: + result = newVarDecl(name, value, isStatic=isStatic, isPrivate=isPrivate, token=varKind, owner=self.file, closedOver=false) + of Const: + result = newVarDecl(name, value, isConst=true, isPrivate=isPrivate, isStatic=true, token=varKind, owner=self.file, closedOver=false) + else: + discard # Unreachable + + +proc funDecl(self: Parser, isAsync: bool = false, isStatic: bool = true, isPrivate: bool = true, isLambda: bool = false): ASTNode = + ## Parses function and lambda declarations. Note that lambdas count as expressions! + self.checkDecl(isStatic, isPrivate) + let tok = self.peek(-1) + var enclosingFunction = self.currentFunction + var arguments: seq[ASTNode] = @[] + var defaults: seq[ASTNode] = @[] + if not isLambda: + self.currentFunction = newFunDecl(nil, arguments, defaults, newBlockStmt(@[], Token()), isAsync=isAsync, isGenerator=false, isStatic=isStatic, isPrivate=isPrivate, token=tok, owner=self.file, closedOver=false) + else: + self.currentFunction = newLambdaExpr(arguments, defaults, newBlockStmt(@[], Token()), isGenerator=false, token=tok) + if not isLambda: + self.expect(Identifier, "expecting function name after 'fun'") + FunDecl(self.currentFunction).name = newIdentExpr(self.peek(-1)) + if self.match(LeftBrace): + # Argument-less function + discard + else: + var parameter: IdentExpr + self.expect(LeftParen) + while not self.check(RightParen): + if arguments.len > 255: + self.error("cannot have more than 255 arguments in function declaration") + self.expect(Identifier) + parameter = newIdentExpr(self.peek(-1)) + if parameter in arguments: + self.error("duplicate parameter name in function declaration") + arguments.add(parameter) + if self.match(Equal): + defaults.add(self.expression()) + elif defaults.len() > 0: + self.error("positional argument(s) cannot follow default argument(s) in function declaration") + if not self.match(Comma): + break + self.expect(RightParen) + self.expect(LeftBrace) + if not isLambda: + FunDecl(self.currentFunction).body = self.blockStmt() + FunDecl(self.currentFunction).arguments = arguments + else: + LambdaExpr(self.currentFunction).body = self.blockStmt() + LambdaExpr(self.currentFunction).arguments = arguments + result = self.currentFunction + self.currentFunction = enclosingFunction + + +proc classDecl(self: Parser, isStatic: bool = true, isPrivate: bool = true): ASTNode = + ## Parses class declarations + self.checkDecl(isStatic, isPrivate) + let tok = self.peek(-1) + var parents: seq[ASTNode] = @[] + self.expect(Identifier) + var name = newIdentExpr(self.peek(-1)) + if self.match(LessThan): + while true: + self.expect(Identifier) + parents.add(newIdentExpr(self.peek(-1))) + if not self.match(Comma): + break + self.expect(LeftBrace) + result = newClassDecl(name, self.blockStmt(), isPrivate=isPrivate, isStatic=isStatic, parents=parents, token=tok, owner=self.file, closedOver=false) + + +proc expression(self: Parser): ASTNode = + ## Parses expressions + result = self.assignment() + + +proc expressionStatement(self: Parser): ASTNode = + ## Parses expression statements, which + ## are expressions followed by a semicolon + var expression = self.expression() + endOfLine("missing semicolon after expression") + result = newExprStmt(expression, expression.token) + + +proc statement(self: Parser): ASTNode = + ## Parses statements + case self.peek().kind: + of If: + discard self.step() + result = self.ifStmt() + of Del: + discard self.step() + result = self.delStmt() + of Assert: + discard self.step() + result = self.assertStmt() + of Raise: + discard self.step() + result = self.raiseStmt() + of Break: + discard self.step() + result = self.breakStmt() + of Continue: + discard self.step() + result = self.continueStmt() + of Return: + discard self.step() + result = self.returnStmt() + of Import: + discard self.step() + result = self.importStmt() + of From: + discard self.step() + result = self.fromStmt() + of While: + discard self.step() + result = self.whileStmt() + of For: + discard self.step() + result = self.forStmt() + of Foreach: + discard self.step() + result = self.forEachStmt() + of LeftBrace: + discard self.step() + result = self.blockStmt() + of Yield: + discard self.step() + result = self.yieldStmt() + of Await: + discard self.step() + result = self.awaitStmt() + of Defer: + discard self.step() + result = self.deferStmt() + of Try: + discard self.step() + result = self.tryStmt() + else: + result = self.expressionStatement() + + +proc declaration(self: Parser): ASTNode = + ## Parses declarations + case self.peek().kind: + of Var, Const: + discard self.step() + result = self.varDecl() + of Class: + discard self.step() + result = self.classDecl() + of Fun: + discard self.step() + result = self.funDecl() + of Private, Public: + discard self.step() + var isStatic: bool = true + let isPrivate = if self.peek(-1).kind == Private: true else: false + if self.match(Dynamic): + isStatic = false + elif self.match(Static): + discard # This is just to allow an "explicit" static keyword + if self.match(Async): + result = self.funDecl(isStatic=isStatic, isPrivate=isPrivate, isAsync=true) + else: + case self.peek().kind: + of Var, Const: + discard self.step() + result = self.varDecl(isStatic=isStatic, isPrivate=isPrivate) + of Class: + discard self.step() + result = self.classDecl(isStatic=isStatic, isPrivate=isPrivate) + of Fun: + discard self.step() + result = self.funDecl(isStatic=isStatic, isPrivate=isPrivate) + else: + self.error("expecting declaration") + of Static, Dynamic: + discard self.step() + let isStatic: bool = if self.peek(-1).kind == Static: true else: false + if self.match(Async): + self.expect(Fun) + result = self.funDecl(isStatic=isStatic, isPrivate=true, isAsync=true) + else: + case self.peek().kind: + of Var, Const: + discard self.step() + result = self.varDecl(isStatic=isStatic, isPrivate=true) + of Class: + discard self.step() + result = self.classDecl(isStatic=isStatic, isPrivate=true) + of Fun: + discard self.step() + result = self.funDecl(isStatic=isStatic, isPrivate=true) + else: + self.error("expecting declaration") + of Async: + discard self.step() + self.expect(Fun) + result = self.funDecl(isAsync=true) + + else: + result = self.statement() + + +proc parse*(self: Parser, tokens: seq[Token], file: string): seq[ASTNode] = + ## Parses a series of tokens into an AST node + self.tokens = tokens + self.file = file + self.current = 0 + self.currentLoop = None + self.currentFunction = nil + self.scopeDepth = 0 + while not self.done(): + result.add(self.declaration()) diff --git a/src/frontend/serializer.nim b/src/frontend/serializer.nim new file mode 100644 index 0000000..1a44fab --- /dev/null +++ b/src/frontend/serializer.nim @@ -0,0 +1,273 @@ +# Copyright 2022 Mattia Giambirtone & All Contributors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import meta/ast +import meta/errors +import meta/bytecode +import meta/token +import ../config +import ../util/multibyte + +import strformat +import strutils +import nimSHA2 +import times + + +export ast + +type + Serializer* = ref object + file: string + filename: string + chunk: Chunk + Serialized* = ref object + ## Wrapper returned by + ## the Serializer.read* + ## procedures to store + ## metadata + fileHash*: string + japlVer*: tuple[major, minor, patch: int] + japlBranch*: string + commitHash*: string + compileDate*: int + chunk*: Chunk + + +proc `$`*(self: Serialized): string = + result = &"Serialized(fileHash={self.fileHash}, version={self.japlVer.major}.{self.japlVer.minor}.{self.japlVer.patch}, branch={self.japlBranch}), commitHash={self.commitHash}, date={self.compileDate}, chunk={self.chunk[]}" + + +proc error(self: Serializer, message: string) = + ## Raises a formatted SerializationError exception + raise newException(SerializationError, &"A fatal error occurred while (de)serializing '{self.filename}' -> {message}") + + +proc initSerializer*(self: Serializer = nil): Serializer = + new(result) + if self != nil: + result = self + result.file = "" + result.filename = "" + result.chunk = nil + + +## Basic routines and helpers to convert various objects from and to to their byte representation + +proc toBytes(self: Serializer, s: string): seq[byte] = + for c in s: + result.add(byte(c)) + + +proc toBytes(self: Serializer, s: int): array[8, uint8] = + result = cast[array[8, uint8]](s) + + +proc toBytes(self: Serializer, d: SHA256Digest): seq[byte] = + for b in d: + result.add(b) + + +proc bytesToString(self: Serializer, input: seq[byte]): string = + for b in input: + result.add(char(b)) + + +proc bytesToInt(self: Serializer, input: array[8, byte]): int = + copyMem(result.addr, input.unsafeAddr, sizeof(int)) + + +proc bytesToInt(self: Serializer, input: array[3, byte]): int = + copyMem(result.addr, input.unsafeAddr, sizeof(byte) * 3) + + +proc extend[T](s: var seq[T], a: openarray[T]) = + ## Extends s with the elements of a + for e in a: + s.add(e) + + +proc writeHeaders(self: Serializer, stream: var seq[byte], file: string) = + ## Writes the JAPL bytecode headers in-place into a byte stream + stream.extend(self.toBytes(BYTECODE_MARKER)) + stream.add(byte(JAPL_VERSION.major)) + stream.add(byte(JAPL_VERSION.minor)) + stream.add(byte(JAPL_VERSION.patch)) + stream.add(byte(len(JAPL_BRANCH))) + stream.extend(self.toBytes(JAPL_BRANCH)) + if len(JAPL_COMMIT_HASH) != 40: + self.error("the commit hash must be exactly 40 characters long") + stream.extend(self.toBytes(JAPL_COMMIT_HASH)) + stream.extend(self.toBytes(getTime().toUnixFloat().int())) + stream.extend(self.toBytes(computeSHA256(file))) + + +proc writeConstants(self: Serializer, stream: var seq[byte]) = + ## Writes the constants table in-place into the given stream + for constant in self.chunk.consts: + case constant.kind: + of intExpr, floatExpr: + stream.add(0x1) + stream.extend(len(constant.token.lexeme).toTriple()) + stream.extend(self.toBytes(constant.token.lexeme)) + of strExpr: + stream.add(0x2) + var temp: byte + var strip: int = 2 + var offset: int = 1 + case constant.token.lexeme[0]: + of 'f': + strip = 3 + inc(offset) + temp = 0x2 + of 'b': + strip = 3 + inc(offset) + temp = 0x1 + else: + strip = 2 + temp = 0x0 + stream.extend((len(constant.token.lexeme) - strip).toTriple()) # Removes the quotes from the length count as they're not written + stream.add(temp) + stream.add(self.toBytes(constant.token.lexeme[offset..^2])) + of identExpr: + stream.add(0x0) + stream.extend(len(constant.token.lexeme).toTriple()) + stream.add(self.toBytes(constant.token.lexeme)) + else: + self.error(&"unknown constant kind in chunk table ({constant.kind})") + stream.add(0x59) # End marker + + +proc readConstants(self: Serializer, stream: seq[byte]): int = + ## Reads the constant table from the given stream and + ## adds each constant to the chunk object (note: most compile-time + ## information such as the original token objects and line info is lost when + ## serializing the data, so those fields are set to nil or some default + ## value). Returns the number of bytes that were processed in the stream + var stream = stream + var count: int = 0 + while true: + case stream[0]: + of 0x59: + inc(count) + break + of 0x2: + stream = stream[1..^1] + let size = self.bytesToInt([stream[0], stream[1], stream[2]]) + stream = stream[3..^1] + var s = newStrExpr(Token(lexeme: "")) + case stream[0]: + of 0x0: + discard + of 0x1: + s.token.lexeme.add("b") + of 0x2: + s.token.lexeme.add("f") + else: + self.error(&"unknown string modifier in chunk table (0x{stream[0].toHex()}") + stream = stream[1..^1] + s.token.lexeme.add("\"") + for i in countup(0, size - 1): + s.token.lexeme.add(cast[char](stream[i])) + s.token.lexeme.add("\"") + stream = stream[size..^1] + self.chunk.consts.add(s) + inc(count, size + 5) + of 0x1: + stream = stream[1..^1] + inc(count) + let size = self.bytesToInt([stream[0], stream[1], stream[2]]) + stream = stream[3..^1] + inc(count, 3) + var tok: Token = new(Token) + tok.lexeme = self.bytesToString(stream[0..>> ") + source = lineEditor.read() + if source in ["# clear", "#clear"]: + echo "\x1Bc" & JAPL_VERSION_STRING + continue + elif source == "#exit" or source == "# exit": + echo "Goodbye!" + break + elif source == "": + continue + except IOError: + echo "" + break + try: + tokens = lexer.lex(source, filename) + when debugLexer: + echo "Tokenization step: " + for token in tokens: + echo "\t", token + echo "" + + tree = parser.parse(tokens, filename) + when debugParser: + echo "Parsing step: " + for node in tree: + echo "\t", node + echo "" + + optimized = optimizer.optimize(tree) + when debugOptimizer: + echo &"Optimization step (constant folding enabled: {optimizer.foldConstants}):" + for node in optimized.tree: + echo "\t", node + echo "" + stdout.write(&"Produced warnings: ") + if optimized.warnings.len() > 0: + echo "" + for warning in optimized.warnings: + echo "\t", warning + else: + stdout.write("No warnings produced\n") + echo "" + + compiled = compiler.compile(optimized.tree, filename) + when debugCompiler: + echo "Compilation step:" + stdout.write("\t") + echo &"""Raw byte stream: [{compiled.code.join(", ")}]""" + echo "\nBytecode disassembler output below:\n" + disassembleChunk(compiled, filename) + echo "" + + when debugSerializer: + serializedRaw = serializer.dumpBytes(compiled, source, filename) + echo "Serialization step: " + stdout.write("\t") + echo &"""Raw hex output: {serializedRaw.mapIt(toHex(it)).join("").toLowerAscii()}""" + echo "" + + serialized = serializer.loadBytes(serializedRaw) + echo "Deserialization step:" + echo &"\t- File hash: {serialized.fileHash} (matches: {computeSHA256(source).toHex().toLowerAscii() == serialized.fileHash})" + echo &"\t- JAPL version: {serialized.japlVer.major}.{serialized.japlVer.minor}.{serialized.japlVer.patch} (commit {serialized.commitHash[0..8]} on branch {serialized.japlBranch})" + stdout.write("\t") + echo &"""- Compilation date & time: {fromUnix(serialized.compileDate).format("d/M/yyyy HH:mm:ss")}""" + stdout.write(&"\t- Reconstructed constants table: [") + for i, e in serialized.chunk.consts: + stdout.write(e) + if i < len(serialized.chunk.consts) - 1: + stdout.write(", ") + stdout.write("]\n") + stdout.write(&"\t- Reconstructed bytecode: [") + for i, e in serialized.chunk.code: + stdout.write($e) + if i < len(serialized.chunk.code) - 1: + stdout.write(", ") + stdout.write(&"] (matches: {serialized.chunk.code == compiled.code})\n") + except LexingError: + let lineNo = lexer.getLine() + let relPos = lexer.getRelPos(lineNo) + let line = lexer.getSource().splitLines()[lineNo - 1].strip() + echo getCurrentExceptionMsg() + echo &"Source line: {line}" + echo " ".repeat(relPos.start + len("Source line: ")) & "^".repeat(relPos.stop - relPos.start) + except ParseError: + let lineNo = parser.getCurrentToken().line + let relPos = lexer.getRelPos(lineNo) + let line = lexer.getSource().splitLines()[lineNo - 1].strip() + echo getCurrentExceptionMsg() + echo &"Source line: {line}" + echo " ".repeat(relPos.start + len("Source line: ")) & "^".repeat(relPos.stop - parser.getCurrentToken().lexeme.len()) + except CompileError: + let lineNo = compiler.getCurrentNode().token.line + let relPos = lexer.getRelPos(lineNo) + let line = lexer.getSource().splitLines()[lineNo - 1].strip() + echo getCurrentExceptionMsg() + echo &"Source line: {line}" + echo " ".repeat(relPos.start + len("Source line: ")) & "^".repeat(relPos.stop - compiler.getCurrentNode().token.lexeme.len()) + + +when isMainModule: + setControlCHook(proc {.noconv.} = quit(1)) + main() diff --git a/src/memory/allocator.nim b/src/memory/allocator.nim new file mode 100644 index 0000000..b673aa5 --- /dev/null +++ b/src/memory/allocator.nim @@ -0,0 +1,85 @@ +# Copyright 2022 Mattia Giambirtone +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +## Memory allocator from JAPL + + +import segfaults +import ../config + +when DEBUG_TRACE_ALLOCATION: + import strformat + + +proc reallocate*(p: pointer, oldSize: int, newSize: int): pointer = + ## Wrapper around realloc/dealloc + try: + if newSize == 0 and p != nil: + when DEBUG_TRACE_ALLOCATION: + if oldSize > 1: + echo &"DEBUG - Memory manager: Deallocating {oldSize} bytes" + else: + echo "DEBUG - Memory manager: Deallocating 1 byte" + dealloc(p) + return nil + when DEBUG_TRACE_ALLOCATION: + if pointr == nil and newSize == 0: + echo &"DEBUG - Memory manager: Warning, asked to dealloc() nil pointer from {oldSize} to {newSize} bytes, ignoring request" + if oldSize > 0 and p != nil or oldSize == 0: + when DEBUG_TRACE_ALLOCATION: + if oldSize == 0: + if newSize > 1: + echo &"DEBUG - Memory manager: Allocating {newSize} bytes of memory" + else: + echo "DEBUG - Memory manager: Allocating 1 byte of memory" + else: + echo &"DEBUG - Memory manager: Resizing {oldSize} bytes of memory to {newSize} bytes" + result = realloc(p, newSize) + when DEBUG_TRACE_ALLOCATION: + if oldSize > 0 and pointr == nil: + echo &"DEBUG - Memory manager: Warning, asked to realloc() nil pointer from {oldSize} to {newSize} bytes, ignoring request" + except NilAccessDefect: + stderr.write("JAPL: could not manage memory, segmentation fault\n") + quit(139) # For now, there's not much we can do if we can't get the memory we need, so we exit + + +template resizeArray*(kind: untyped, pointr: pointer, oldCount, newCount: int): untyped = + ## Handy macro (in the C sense of macro, not nim's) to resize a dynamic array + cast[ptr UncheckedArray[kind]](reallocate(pointr, sizeof(kind) * oldCount, sizeof(kind) * newCount)) + + +template freeArray*(kind: untyped, pointr: pointer, oldCount: int): untyped = + ## Frees a dynamic array + reallocate(pointr, sizeof(kind) * oldCount, 0) + + +template free*(kind: untyped, pointr: pointer): untyped = + ## Frees a pointer by reallocating its + ## size to 0 + reallocate(pointr, sizeof(kind), 0) + + +template growCapacity*(capacity: int): untyped = + ## Handy macro used to calculate how much + ## more memory is needed when reallocating + ## dynamic arrays + if capacity < 8: + 8 + else: + capacity * ARRAY_GROW_FACTOR + + +template allocate*(castTo: untyped, sizeTo: untyped, count: int): untyped = + ## Allocates an object and casts its pointer to the specified type + cast[ptr castTo](reallocate(nil, 0, sizeof(sizeTo) * count)) diff --git a/src/util/debugger.nim b/src/util/debugger.nim new file mode 100644 index 0000000..3e5f14a --- /dev/null +++ b/src/util/debugger.nim @@ -0,0 +1,195 @@ +# Copyright 2022 Mattia Giambirtone & All Contributors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import ../frontend/meta/bytecode +import ../frontend/meta/ast +import multibyte + + +import strformat +import strutils +import terminal + + +proc nl = stdout.write("\n") + + +proc printDebug(s: string, newline: bool = false) = + stdout.write(&"DEBUG - Disassembler -> {s}") + if newline: + nl() + + +proc printName(name: string, newline: bool = false) = + setForegroundColor(fgRed) + stdout.write(name) + setForegroundColor(fgGreen) + if newline: + nl() + + +proc printInstruction(instruction: OpCode, newline: bool = false) = + printDebug("Instruction: ") + printName($instruction) + if newline: + nl() + + +proc simpleInstruction(instruction: OpCode, offset: int): int = + printInstruction(instruction) + nl() + return offset + 1 + + +proc stackTripleInstruction(instruction: OpCode, chunk: Chunk, offset: int): int = + ## Debugs instructions that operate on a single value on the stack using a 24-bit operand + var slot = [chunk.code[offset + 1], chunk.code[offset + 2], chunk.code[offset + 3]].fromTriple() + printInstruction(instruction) + stdout.write(&", points to index ") + setForegroundColor(fgYellow) + stdout.write(&"{slot}") + nl() + return offset + 4 + + +proc stackDoubleInstruction(instruction: OpCode, chunk: Chunk, offset: int): int = + ## Debugs instructions that operate on a single value on the stack using a 16-bit operand + var slot = [chunk.code[offset + 1], chunk.code[offset + 2]].fromDouble() + printInstruction(instruction) + stdout.write(&", points to index ") + setForegroundColor(fgYellow) + stdout.write(&"{slot}") + nl() + return offset + 3 + + +proc argumentDoubleInstruction(instruction: OpCode, chunk: Chunk, offset: int): int = + ## Debugs instructions that operate on a hardcoded value value on the stack using a 16-bit operand + var slot = [chunk.code[offset + 1], chunk.code[offset + 2]].fromDouble() + printInstruction(instruction) + stdout.write(&", has argument ") + setForegroundColor(fgYellow) + stdout.write(&"{slot}") + nl() + return offset + 3 + + +proc constantInstruction(instruction: OpCode, chunk: Chunk, offset: int): int = + ## Debugs instructions that operate on the constant table + var constant = [chunk.code[offset + 1], chunk.code[offset + 2], chunk.code[offset + 3]].fromTriple() + printInstruction(instruction) + stdout.write(&", points to constant at position ") + setForegroundColor(fgYellow) + stdout.write(&"{constant}") + nl() + let obj = chunk.consts[constant] + setForegroundColor(fgGreen) + printDebug("Operand: ") + setForegroundColor(fgYellow) + stdout.write(&"{obj}\n") + setForegroundColor(fgGreen) + printDebug("Value kind: ") + setForegroundColor(fgYellow) + stdout.write(&"{obj.kind}\n") + return offset + 4 + + +proc jumpInstruction(instruction: OpCode, chunk: Chunk, offset: int): int = + ## Debugs jumps + var jump: int + case instruction: + of JumpIfFalse, JumpIfTrue, JumpIfFalsePop, JumpForwards, JumpBackwards: + jump = [chunk.code[offset + 1], chunk.code[offset + 2]].fromDouble().int() + of LongJumpIfFalse, LongJumpIfTrue, LongJumpIfFalsePop, LongJumpForwards, LongJumpBackwards: + jump = [chunk.code[offset + 1], chunk.code[offset + 2], chunk.code[offset + 3]].fromTriple().int() + else: + discard # Unreachable + printInstruction(instruction, true) + printDebug("Jump size: ") + setForegroundColor(fgYellow) + stdout.write($jump) + nl() + return offset + 3 + + +proc collectionInstruction(instruction: OpCode, chunk: Chunk, offset: int): int = + ## Debugs instructions that push collection types on the stack + var elemCount = int([chunk.code[offset + 1], chunk.code[offset + 2], chunk.code[offset + 3]].fromTriple()) + printInstruction(instruction, true) + case instruction: + of BuildList, BuildTuple, BuildSet: + var elements: seq[ASTNode] = @[] + for n in countup(0, elemCount - 1): + elements.add(chunk.consts[n]) + printDebug("Elements: ") + setForegroundColor(fgYellow) + stdout.write(&"""[{elements.join(", ")}]""") + setForegroundColor(fgGreen) + of BuildDict: + var elements: seq[tuple[key: ASTNode, value: ASTNode]] = @[] + for n in countup(0, (elemCount - 1) * 2, 2): + elements.add((key: chunk.consts[n], value: chunk.consts[n + 1])) + printDebug("Elements: ") + setForegroundColor(fgYellow) + stdout.write(&"""[{elements.join(", ")}]""") + setForegroundColor(fgGreen) + else: + discard # Unreachable + echo "" + return offset + 4 + + +proc disassembleInstruction*(chunk: Chunk, offset: int): int = + ## Takes one bytecode instruction and prints it + setForegroundColor(fgGreen) + printDebug("Offset: ") + setForegroundColor(fgYellow) + echo offset + setForegroundColor(fgGreen) + printDebug("Line: ") + setForegroundColor(fgYellow) + stdout.write(&"{chunk.getLine(offset)}\n") + setForegroundColor(fgGreen) + var opcode = OpCode(chunk.code[offset]) + case opcode: + of simpleInstructions: + result = simpleInstruction(opcode, offset) + of constantInstructions: + result = constantInstruction(opcode, chunk, offset) + of stackDoubleInstructions: + result = stackDoubleInstruction(opcode, chunk, offset) + of stackTripleInstructions: + result = stackTripleInstruction(opcode, chunk, offset) + of argumentDoubleInstructions: + result = argumentDoubleInstruction(opcode, chunk, offset) + of jumpInstructions: + result = jumpInstruction(opcode, chunk, offset) + of collectionInstructions: + result = collectionInstruction(opcode, chunk, offset) + else: + echo &"DEBUG - Unknown opcode {opcode} at index {offset}" + result = offset + 1 + + +proc disassembleChunk*(chunk: Chunk, name: string) = + ## Takes a chunk of bytecode, and prints it + echo &"==== JAPL Bytecode Debugger - Chunk '{name}' ====\n" + var index = 0 + while index < chunk.code.len: + index = disassembleInstruction(chunk, index) + echo "" + setForegroundColor(fgDefault) + echo &"==== Debug session ended - Chunk '{name}' ====" + + diff --git a/src/util/multibyte.nim b/src/util/multibyte.nim new file mode 100644 index 0000000..cd63a31 --- /dev/null +++ b/src/util/multibyte.nim @@ -0,0 +1,40 @@ +# Copyright 2022 Mattia Giambirtone & All Contributors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +## Utilities to convert from/to our 16-bit and 24-bit representations +## of numbers + + +proc toDouble*(input: int | uint | uint16): array[2, uint8] = + ## Converts an int (either int, uint or uint16) + ## to an array[2, uint8] + result = cast[array[2, uint8]](uint16(input)) + + +proc toTriple*(input: uint | int): array[3, uint8] = + ## Converts an unsigned integer (int is converted + ## to an uint and sign is lost!) to an array[3, uint8] + result = cast[array[3, uint8]](uint(input)) + + +proc fromDouble*(input: array[2, uint8]): uint16 = + ## Rebuilds the output of toDouble into + ## an uint16 + copyMem(result.addr, unsafeAddr(input), sizeof(uint16)) + + +proc fromTriple*(input: array[3, uint8]): uint = + ## Rebuilds the output of toTriple into + ## an uint + copyMem(result.addr, unsafeAddr(input), sizeof(uint8) * 3)