Readded frontend
This commit is contained in:
parent
a545341428
commit
776a2241f7
|
@ -0,0 +1,195 @@
|
|||
# Copyright 2021 Mattia Giambirtone & All Contributors
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# Implementation of a custom list data type for JAPL objects (used also internally by the VM)
|
||||
|
||||
{.experimental: "implicitDeref".}
|
||||
import iterable
|
||||
import ../../memory/allocator
|
||||
import base
|
||||
import strformat
|
||||
|
||||
|
||||
type
|
||||
ArrayList*[T] = object of Iterable
|
||||
## Implementation of a simple dynamic
|
||||
## array with amortized O(1) append complexity
|
||||
## and O(1) complexity when popping/deleting
|
||||
## the last element
|
||||
container: ptr UncheckedArray[T]
|
||||
ArrayListIterator*[T] = object of Iterator
|
||||
list: ArrayList[T]
|
||||
current: int
|
||||
|
||||
|
||||
proc newArrayList*[T](): ptr ArrayList[T] =
|
||||
## Allocates a new, empty array list
|
||||
result = allocateObj(ArrayList[T], ObjectType.List)
|
||||
result.capacity = 0
|
||||
result.container = nil
|
||||
result.length = 0
|
||||
|
||||
|
||||
proc append*[T](self: ptr ArrayList[T], elem: T) =
|
||||
## Appends an object to the end of the list
|
||||
## in amortized constant time (~O(1))
|
||||
if self.capacity <= self.length:
|
||||
self.capacity = growCapacity(self.capacity)
|
||||
self.container = resizeArray(T, self.container, self.length, self.capacity)
|
||||
self.container[self.length] = elem
|
||||
self.length += 1
|
||||
|
||||
|
||||
proc pop*[T](self: ptr ArrayList[T], idx: int = -1): T =
|
||||
## Pops an item from the list. By default, the last
|
||||
## element is popped, in which case the operation's
|
||||
## time complexity is O(1). When an arbitrary element
|
||||
## is popped, the complexity rises to O(k) where k
|
||||
## is the number of elements that had to be shifted
|
||||
## by 1 to avoid empty slots
|
||||
var idx = idx
|
||||
if self.length == 0:
|
||||
raise newException(IndexDefect, "pop from empty ArrayList")
|
||||
if idx == -1:
|
||||
idx = self.length - 1
|
||||
if idx notin 0..self.length - 1:
|
||||
raise newException(IndexDefect, &"ArrayList index out of bounds: {idx} notin 0..{self.length - 1}")
|
||||
result = self.container[idx]
|
||||
if idx != self.length - 1:
|
||||
for i in countup(idx, self.length - 1):
|
||||
self.container[i] = self.container[i + 1]
|
||||
self.capacity -= 1
|
||||
self.length -= 1
|
||||
|
||||
|
||||
proc `[]`*[T](self: ptr ArrayList[T], idx: int): T =
|
||||
## Retrieves an item from the list, in constant
|
||||
## time
|
||||
if self.length == 0:
|
||||
raise newException(IndexDefect, &"ArrayList index out of bounds: : {idx} notin 0..{self.length - 1}")
|
||||
if idx notin 0..self.length - 1:
|
||||
raise newException(IndexDefect, &"ArrayList index out of bounds: {idx} notin 0..{self.length - 1}")
|
||||
result = self.container[idx]
|
||||
|
||||
|
||||
proc `[]`*[T](self: ptr ArrayList[T], slice: Hslice[int, int]): ptr ArrayList[T] =
|
||||
## Retrieves a subset of the list, in O(k) time where k is the size
|
||||
## of the slice
|
||||
if self.length == 0:
|
||||
raise newException(IndexDefect, "ArrayList index out of bounds")
|
||||
if slice.a notin 0..self.length - 1 or slice.b notin 0..self.length:
|
||||
raise newException(IndexDefect, "ArrayList index out of bounds")
|
||||
result = newArrayList[T]()
|
||||
for i in countup(slice.a, slice.b - 1):
|
||||
result.append(self.container[i])
|
||||
|
||||
|
||||
proc `[]=`*[T](self: ptr ArrayList[T], idx: int, obj: T) =
|
||||
## Assigns an object to the given index, in constant
|
||||
## time
|
||||
if self.length == 0:
|
||||
raise newException(IndexDefect, "ArrayList is empty")
|
||||
if idx notin 0..self.length - 1:
|
||||
raise newException(IndexDefect, "ArrayList index out of bounds")
|
||||
self.container[idx] = obj
|
||||
|
||||
|
||||
proc delete*[T](self: ptr ArrayList[T], idx: int) =
|
||||
## Deletes an object from the given index.
|
||||
## This method shares the time complexity
|
||||
## of self.pop()
|
||||
if self.length == 0:
|
||||
raise newException(IndexDefect, "delete from empty ArrayList")
|
||||
if idx notin 0..self.length - 1:
|
||||
raise newException(IndexDefect, &"ArrayList index out of bounds: {idx} notin 0..{self.length - 1}")
|
||||
discard self.pop(idx)
|
||||
|
||||
|
||||
proc contains*[T](self: ptr ArrayList[T], elem: T): bool =
|
||||
## Returns true if the given object is present
|
||||
## in the list, false otherwise. O(n) complexity
|
||||
if self.length > 0:
|
||||
for i in 0..self.length - 1:
|
||||
if self[i] == elem:
|
||||
return true
|
||||
return false
|
||||
|
||||
|
||||
proc high*[T](self: ptr ArrayList[T]): int =
|
||||
## Returns the index of the last
|
||||
## element in the list, in constant time
|
||||
if self.length == 0:
|
||||
raise newException(IndexDefect, "ArrayList is empty")
|
||||
result = self.length - 1
|
||||
|
||||
|
||||
proc len*[T](self: ptr ArrayList[T]): int =
|
||||
## Returns the length of the list
|
||||
## in constant time
|
||||
result = self.length
|
||||
|
||||
|
||||
iterator pairs*[T](self: ptr ArrayList[T]): tuple[key: int, val: T] =
|
||||
## Implements pairwise iteration (similar to python's enumerate)
|
||||
for i in countup(0, self.length - 1):
|
||||
yield (key: i, val: self[i])
|
||||
|
||||
|
||||
iterator items*[T](self: ptr ArrayList[T]): T =
|
||||
## Implements iteration
|
||||
for i in countup(0, self.length - 1):
|
||||
yield self[i]
|
||||
|
||||
|
||||
proc reversed*[T](self: ptr ArrayList[T], first: int = -1, last: int = 0): ptr ArrayList[T] =
|
||||
## Returns a reversed version of the given list, from first to last.
|
||||
## First defaults to -1 (the end of the list) and last defaults to 0 (the
|
||||
## beginning of the list)
|
||||
var first = first
|
||||
if first == -1:
|
||||
first = self.length - 1
|
||||
result = newArrayList[T]()
|
||||
for i in countdown(first, last):
|
||||
result.append(self[i])
|
||||
|
||||
|
||||
proc extend*[T](self: ptr ArrayList[T], other: seq[T]) =
|
||||
## Iteratively calls self.append() with the elements
|
||||
## from a nim sequence
|
||||
for elem in other:
|
||||
self.append(elem)
|
||||
|
||||
|
||||
proc extend*[T](self: ptr ArrayList[T], other: ptr ArrayList[T]) =
|
||||
## Iteratively calls self.append() with the elements
|
||||
## from another ArrayList
|
||||
for elem in other:
|
||||
self.append(elem)
|
||||
|
||||
|
||||
proc `$`*[T](self: ptr ArrayList[T]): string =
|
||||
## Returns a string representation
|
||||
## of self
|
||||
result = "["
|
||||
if self.length > 0:
|
||||
for i in 0..self.length - 1:
|
||||
result = result & $self.container[i]
|
||||
if i < self.length - 1:
|
||||
result = result & ", "
|
||||
result = result & "]"
|
||||
|
||||
|
||||
proc getIter*[T](self: ptr ArrayList[T]): Iterator =
|
||||
## Returns the iterator object of the
|
||||
## arraylist
|
||||
result = allocate(ArrayListIterator, ) # TODO
|
|
@ -0,0 +1,60 @@
|
|||
# Copyright 2021 Mattia Giambirtone & All Contributors
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
|
||||
import ../../memory/allocator
|
||||
|
||||
|
||||
type
|
||||
ObjectType* {.pure.} = enum
|
||||
## All the possible object types
|
||||
String, Exception, Function,
|
||||
Class, Module, BaseObject,
|
||||
Native, Integer, Float,
|
||||
Bool, NotANumber, Infinity,
|
||||
Nil, List, Dict, Set, Tuple
|
||||
Obj* = object of RootObj
|
||||
## The base object for all
|
||||
## JAPL types. Every object
|
||||
## in JAPL implicitly inherits
|
||||
## from this base type
|
||||
kind*: ObjectType
|
||||
hashValue*: uint64
|
||||
|
||||
|
||||
## Object constructors and allocators
|
||||
|
||||
proc allocateObject*(size: int, kind: ObjectType): ptr Obj =
|
||||
## Wrapper around memory.reallocate to create a new generic JAPL object
|
||||
result = cast[ptr Obj](reallocate(nil, 0, size))
|
||||
result.kind = kind
|
||||
|
||||
|
||||
template allocateObj*(kind: untyped, objType: ObjectType): untyped =
|
||||
## Wrapper around allocateObject to cast a generic object
|
||||
## to a more specific type
|
||||
cast[ptr kind](allocateObject(sizeof kind, objType))
|
||||
|
||||
|
||||
proc newObj*(): ptr Obj =
|
||||
## Allocates a generic JAPL object
|
||||
result = allocateObj(Obj, ObjectType.BaseObject)
|
||||
|
||||
|
||||
proc asObj*(self: ptr Obj): ptr Obj =
|
||||
## Casts a specific JAPL object into a generic
|
||||
## pointer to Obj
|
||||
result = cast[ptr Obj](self)
|
||||
|
||||
|
|
@ -0,0 +1,164 @@
|
|||
# Copyright 2021 Mattia Giambirtone & All Contributors
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
|
||||
import ../../memory/allocator
|
||||
import ../../config
|
||||
import base
|
||||
import iterable
|
||||
|
||||
|
||||
type
|
||||
Entry = object
|
||||
key: ptr Obj
|
||||
value: ptr Obj
|
||||
tombstone: bool
|
||||
HashMap* = object of Iterable
|
||||
entries: ptr UncheckedArray[ptr Entry]
|
||||
actual_length: int
|
||||
|
||||
|
||||
proc newHashMap*(): ptr HashMap =
|
||||
result = allocateObj(HashMap, ObjectType.Dict)
|
||||
result.actual_length = 0
|
||||
result.entries = nil
|
||||
result.capacity = 0
|
||||
result.length = 0
|
||||
|
||||
|
||||
proc freeHashMap*(self: ptr HashMap) =
|
||||
discard freeArray(UncheckedArray[ptr Entry], self.entries, self.capacity)
|
||||
self.length = 0
|
||||
self.actual_length = 0
|
||||
self.capacity = 0
|
||||
self.entries = nil
|
||||
|
||||
|
||||
proc findEntry(self: ptr UncheckedArray[ptr Entry], key: ptr Obj, capacity: int): ptr Entry =
|
||||
var capacity = uint64(capacity)
|
||||
var idx = uint64(key.hash()) mod capacity
|
||||
while true:
|
||||
result = self[idx]
|
||||
if system.`==`(result.key, nil):
|
||||
break
|
||||
elif result.tombstone:
|
||||
if result.key == key:
|
||||
break
|
||||
elif result.key == key:
|
||||
break
|
||||
idx = (idx + 1) mod capacity
|
||||
|
||||
|
||||
proc adjustCapacity(self: ptr HashMap) =
|
||||
var newCapacity = growCapacity(self.capacity)
|
||||
var entries = allocate(UncheckedArray[ptr Entry], Entry, newCapacity)
|
||||
var oldEntry: ptr Entry
|
||||
var newEntry: ptr Entry
|
||||
self.length = 0
|
||||
for x in countup(0, newCapacity - 1):
|
||||
entries[x] = allocate(Entry, Entry, 1)
|
||||
entries[x].tombstone = false
|
||||
entries[x].key = nil
|
||||
entries[x].value = nil
|
||||
for x in countup(0, self.capacity - 1):
|
||||
oldEntry = self.entries[x]
|
||||
if not system.`==`(oldEntry.key, nil):
|
||||
newEntry = entries.findEntry(oldEntry.key, newCapacity)
|
||||
newEntry.key = oldEntry.key
|
||||
newEntry.value = oldEntry.value
|
||||
self.length += 1
|
||||
discard freeArray(UncheckedArray[ptr Entry], self.entries, self.capacity)
|
||||
self.entries = entries
|
||||
self.capacity = newCapacity
|
||||
|
||||
|
||||
proc setEntry(self: ptr HashMap, key: ptr Obj, value: ptr Obj): bool =
|
||||
if float64(self.length + 1) >= float64(self.capacity) * MAP_LOAD_FACTOR:
|
||||
self.adjustCapacity()
|
||||
var entry = findEntry(self.entries, key, self.capacity)
|
||||
result = system.`==`(entry.key, nil)
|
||||
if result:
|
||||
self.actual_length += 1
|
||||
self.length += 1
|
||||
entry.key = key
|
||||
entry.value = value
|
||||
entry.tombstone = false
|
||||
|
||||
|
||||
proc `[]`*(self: ptr HashMap, key: ptr Obj): ptr Obj =
|
||||
var entry = findEntry(self.entries, key, self.capacity)
|
||||
if system.`==`(entry.key, nil) or entry.tombstone:
|
||||
raise newException(KeyError, "Key not found: " & $key)
|
||||
result = entry.value
|
||||
|
||||
|
||||
proc `[]=`*(self: ptr HashMap, key: ptr Obj, value: ptr Obj) =
|
||||
discard self.setEntry(key, value)
|
||||
|
||||
|
||||
proc len*(self: ptr HashMap): int =
|
||||
result = self.actual_length
|
||||
|
||||
|
||||
proc del*(self: ptr HashMap, key: ptr Obj) =
|
||||
if self.len() == 0:
|
||||
raise newException(KeyError, "delete from empty hashmap")
|
||||
var entry = findEntry(self.entries, key, self.capacity)
|
||||
if not system.`==`(entry.key, nil):
|
||||
self.actual_length -= 1
|
||||
entry.tombstone = true
|
||||
else:
|
||||
raise newException(KeyError, "Key not found: " & $key)
|
||||
|
||||
|
||||
proc contains*(self: ptr HashMap, key: ptr Obj): bool =
|
||||
let entry = findEntry(self.entries, key, self.capacity)
|
||||
if not system.`==`(entry.key, nil) and not entry.tombstone:
|
||||
result = true
|
||||
else:
|
||||
result = false
|
||||
|
||||
|
||||
iterator keys*(self: ptr HashMap): ptr Obj =
|
||||
var entry: ptr Entry
|
||||
for i in countup(0, self.capacity - 1):
|
||||
entry = self.entries[i]
|
||||
if not system.`==`(entry.key, nil) and not entry.tombstone:
|
||||
yield entry.key
|
||||
|
||||
|
||||
iterator values*(self: ptr HashMap): ptr Obj =
|
||||
for key in self.keys():
|
||||
yield self[key]
|
||||
|
||||
|
||||
iterator pairs*(self: ptr HashMap): tuple[key: ptr Obj, val: ptr Obj] =
|
||||
for key in self.keys():
|
||||
yield (key: key, val: self[key])
|
||||
|
||||
|
||||
iterator items*(self: ptr HashMap): ptr Obj =
|
||||
for k in self.keys():
|
||||
yield k
|
||||
|
||||
|
||||
proc `$`*(self: ptr HashMap): string =
|
||||
var i = 0
|
||||
result &= "{"
|
||||
for key, value in self.pairs():
|
||||
result &= $key & ": " & $value
|
||||
if i < self.len() - 1:
|
||||
result &= ", "
|
||||
i += 1
|
||||
result &= "}"
|
|
@ -0,0 +1,44 @@
|
|||
# Copyright 2021 Mattia Giambirtone & All Contributors
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# Implementation of iterable types and iterators in JAPL
|
||||
|
||||
import base
|
||||
|
||||
|
||||
type
|
||||
Iterable* = object of Obj
|
||||
## Defines the standard interface
|
||||
## for iterable types in JAPL
|
||||
length*: int
|
||||
capacity*: int
|
||||
Iterator* = object of Iterable
|
||||
## This object drives iteration
|
||||
## for every iterable type in JAPL except
|
||||
## generators
|
||||
iterable*: ptr Obj
|
||||
iterCount*: int
|
||||
|
||||
|
||||
proc getIter*(self: Iterable): ptr Iterator =
|
||||
## Returns the iterator object of an
|
||||
## iterable, which drives foreach
|
||||
## loops
|
||||
return nil
|
||||
|
||||
|
||||
proc next*(self: Iterator): ptr Obj =
|
||||
## Returns the next element from
|
||||
## the iterator or nil if the
|
||||
## iterator has been consumed
|
||||
return nil
|
|
@ -0,0 +1,908 @@
|
|||
# Copyright 2021 Mattia Giambirtone & All Contributors
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
import meta/token
|
||||
import meta/ast
|
||||
import meta/errors
|
||||
import meta/bytecode
|
||||
import ../config
|
||||
import ../util/multibyte
|
||||
|
||||
|
||||
import strformat
|
||||
import algorithm
|
||||
import parseutils
|
||||
import sequtils
|
||||
|
||||
|
||||
export ast
|
||||
export bytecode
|
||||
export token
|
||||
export multibyte
|
||||
|
||||
|
||||
type
|
||||
Name = ref object
|
||||
## A compile-time wrapper around
|
||||
## statically resolved names.
|
||||
## Depth indicates to which scope
|
||||
## the variable belongs, zero meaning
|
||||
## the global one
|
||||
name: IdentExpr
|
||||
owner: string
|
||||
depth: int
|
||||
isPrivate: bool
|
||||
isConst: bool
|
||||
|
||||
Loop = object
|
||||
## A "loop object" used
|
||||
## by the compiler to emit
|
||||
## appropriate jump offsets
|
||||
## for continue and break
|
||||
## statements
|
||||
start: int
|
||||
depth: int
|
||||
breakPos: seq[int]
|
||||
|
||||
Compiler* = ref object
|
||||
## A wrapper around the compiler's state
|
||||
chunk: Chunk
|
||||
ast: seq[ASTNode]
|
||||
current: int
|
||||
file: string
|
||||
names: seq[Name]
|
||||
scopeDepth: int
|
||||
currentFunction: FunDecl
|
||||
enableOptimizations*: bool
|
||||
currentLoop: Loop
|
||||
# Each time a defer statement is
|
||||
# compiled, its code is emitted
|
||||
# here. Later, if there is any code
|
||||
# to defer in the current function,
|
||||
# funDecl will wrap the function's code
|
||||
# inside an implicit try/finally block
|
||||
# and add this code in the finally branch.
|
||||
# This sequence is emptied each time a
|
||||
# fun declaration is compiled and stores only
|
||||
# deferred code for the current function (may
|
||||
# be empty)
|
||||
deferred: seq[uint8]
|
||||
|
||||
|
||||
|
||||
proc initCompiler*(enableOptimizations: bool = true): Compiler =
|
||||
## Initializes a new Compiler object
|
||||
new(result)
|
||||
result.ast = @[]
|
||||
result.current = 0
|
||||
result.file = ""
|
||||
result.names = @[]
|
||||
result.scopeDepth = 0
|
||||
result.currentFunction = nil
|
||||
result.enableOptimizations = enableOptimizations
|
||||
|
||||
|
||||
|
||||
## Forward declarations
|
||||
proc expression(self: Compiler, node: ASTNode)
|
||||
proc statement(self: Compiler, node: ASTNode)
|
||||
proc declaration(self: Compiler, node: ASTNode)
|
||||
proc peek(self: Compiler, distance: int = 0): ASTNode
|
||||
## End of forward declarations
|
||||
|
||||
## Public getters for nicer error formatting
|
||||
proc getCurrentNode*(self: Compiler): ASTNode = (if self.current >=
|
||||
self.ast.len(): self.ast[^1] else: self.ast[self.current - 1])
|
||||
|
||||
|
||||
## Utility functions
|
||||
|
||||
proc peek(self: Compiler, distance: int = 0): ASTNode =
|
||||
## Peeks at the AST node at the given distance.
|
||||
## If the distance is out of bounds, the last
|
||||
## AST node in the tree is returned. A negative
|
||||
## distance may be used to retrieve previously
|
||||
## consumed AST nodes
|
||||
if self.ast.high() == -1 or self.current + distance > self.ast.high() or
|
||||
self.current + distance < 0:
|
||||
result = self.ast[^1]
|
||||
else:
|
||||
result = self.ast[self.current + distance]
|
||||
|
||||
|
||||
proc done(self: Compiler): bool =
|
||||
## Returns true if the compiler is done
|
||||
## compiling, false otherwise
|
||||
result = self.current > self.ast.high()
|
||||
|
||||
|
||||
proc error(self: Compiler, message: string) =
|
||||
## Raises a formatted CompileError exception
|
||||
var tok = self.getCurrentNode().token
|
||||
raise newException(CompileError, &"A fatal error occurred while compiling '{self.file}', line {tok.line} at '{tok.lexeme}' -> {message}")
|
||||
|
||||
|
||||
proc step(self: Compiler): ASTNode =
|
||||
## Steps to the next node and returns
|
||||
## the consumed one
|
||||
result = self.peek()
|
||||
if not self.done():
|
||||
self.current += 1
|
||||
|
||||
|
||||
proc emitByte(self: Compiler, byt: OpCode|uint8) =
|
||||
## Emits a single byte, writing it to
|
||||
## the current chunk being compiled
|
||||
when DEBUG_TRACE_COMPILER:
|
||||
echo &"DEBUG - Compiler: Emitting {$byt}"
|
||||
self.chunk.write(uint8 byt, self.peek().token.line)
|
||||
|
||||
|
||||
proc emitBytes(self: Compiler, byt1: OpCode|uint8, byt2: OpCode|uint8) =
|
||||
## Emits multiple bytes instead of a single one, this is useful
|
||||
## to emit operators along with their operands or for multi-byte
|
||||
## instructions that are longer than one byte
|
||||
self.emitByte(uint8 byt1)
|
||||
self.emitByte(uint8 byt2)
|
||||
|
||||
|
||||
proc emitBytes(self: Compiler, bytarr: array[2, uint8]) =
|
||||
## Handy helper method to write an array of 2 bytes into
|
||||
## the current chunk, calling emitByte on each of its
|
||||
## elements
|
||||
self.emitBytes(bytarr[0], bytarr[1])
|
||||
|
||||
|
||||
proc emitBytes(self: Compiler, bytarr: array[3, uint8]) =
|
||||
## Handy helper method to write an array of 3 bytes into
|
||||
## the current chunk, calling emitByte on each of its
|
||||
## elements
|
||||
self.emitBytes(bytarr[0], bytarr[1])
|
||||
self.emitByte(bytarr[2])
|
||||
|
||||
|
||||
proc makeConstant(self: Compiler, val: ASTNode): array[3, uint8] =
|
||||
## Adds a constant to the current chunk's constant table
|
||||
## and returns its index as a 3-byte array of uint8s
|
||||
result = self.chunk.addConstant(val)
|
||||
|
||||
|
||||
proc emitConstant(self: Compiler, obj: ASTNode) =
|
||||
## Emits a LoadConstant instruction along
|
||||
## with its operand
|
||||
self.emitByte(LoadConstant)
|
||||
self.emitBytes(self.makeConstant(obj))
|
||||
|
||||
|
||||
proc identifierConstant(self: Compiler, identifier: IdentExpr): array[3, uint8] =
|
||||
## Emits an identifier name as a string in the current chunk's constant
|
||||
## table. This is used to load globals declared as dynamic that cannot
|
||||
## be resolved statically by the compiler
|
||||
try:
|
||||
result = self.makeConstant(identifier)
|
||||
except CompileError:
|
||||
self.error(getCurrentExceptionMsg())
|
||||
|
||||
|
||||
proc emitJump(self: Compiler, opcode: OpCode): int =
|
||||
## Emits a dummy jump offset to be patched later. Assumes
|
||||
## the largest offset (emits 4 bytes, one for the given jump
|
||||
## opcode, while the other 3 are for the jump offset which is set
|
||||
## to the maximum unsigned 24 bit integer). If the shorter
|
||||
## 16 bit alternative is later found to be better suited, patchJump
|
||||
## will fix this. This function returns the absolute index into the
|
||||
## chunk's bytecode array where the given placeholder instruction was written
|
||||
self.emitByte(opcode)
|
||||
self.emitBytes((0xffffff).toTriple())
|
||||
result = self.chunk.code.len() - 4
|
||||
|
||||
|
||||
proc patchJump(self: Compiler, offset: int) =
|
||||
## Patches a previously emitted jump
|
||||
## using emitJump. Since emitJump assumes
|
||||
## a long jump, this also shrinks the jump
|
||||
## offset and changes the bytecode instruction if possible
|
||||
## (i.e. jump is in 16 bit range), but the converse is also
|
||||
## true (i.e. it might change a regular jump into a long one)
|
||||
let jump: int = self.chunk.code.len() - offset - 4
|
||||
if jump > 16777215:
|
||||
self.error("cannot jump more than 16777215 bytecode instructions")
|
||||
if jump < uint16.high().int:
|
||||
case OpCode(self.chunk.code[offset]):
|
||||
of LongJumpForwards:
|
||||
self.chunk.code[offset] = JumpForwards.uint8()
|
||||
of LongJumpBackwards:
|
||||
self.chunk.code[offset] = JumpBackwards.uint8()
|
||||
of LongJumpIfFalse:
|
||||
self.chunk.code[offset] = JumpIfFalse.uint8()
|
||||
of LongJumpIfFalsePop:
|
||||
self.chunk.code[offset] = JumpIfFalsePop.uint8()
|
||||
else:
|
||||
self.error(&"invalid opcode {self.chunk.code[offset]} in patchJump (This is an internal error and most likely a bug)")
|
||||
self.chunk.code.delete(offset + 1) # Discards the 24 bit integer
|
||||
let offsetArray = jump.toDouble()
|
||||
self.chunk.code[offset + 1] = offsetArray[0]
|
||||
self.chunk.code[offset + 2] = offsetArray[1]
|
||||
else:
|
||||
case OpCode(self.chunk.code[offset]):
|
||||
of JumpForwards:
|
||||
self.chunk.code[offset] = LongJumpForwards.uint8()
|
||||
of JumpBackwards:
|
||||
self.chunk.code[offset] = LongJumpBackwards.uint8()
|
||||
of JumpIfFalse:
|
||||
self.chunk.code[offset] = LongJumpIfFalse.uint8()
|
||||
of JumpIfFalsePop:
|
||||
self.chunk.code[offset] = LongJumpIfFalsePop.uint8()
|
||||
else:
|
||||
self.error(&"invalid opcode {self.chunk.code[offset]} in patchJump (This is an internal error and most likely a bug)")
|
||||
let offsetArray = jump.toTriple()
|
||||
self.chunk.code[offset + 1] = offsetArray[0]
|
||||
self.chunk.code[offset + 2] = offsetArray[1]
|
||||
self.chunk.code[offset + 3] = offsetArray[2]
|
||||
|
||||
## End of utility functions
|
||||
|
||||
proc literal(self: Compiler, node: ASTNode) =
|
||||
## Emits instructions for literals such
|
||||
## as singletons, strings, numbers and
|
||||
## collections
|
||||
case node.kind:
|
||||
of trueExpr:
|
||||
self.emitByte(OpCode.True)
|
||||
of falseExpr:
|
||||
self.emitByte(OpCode.False)
|
||||
of nilExpr:
|
||||
self.emitByte(OpCode.Nil)
|
||||
of infExpr:
|
||||
self.emitByte(OpCode.Inf)
|
||||
of nanExpr:
|
||||
self.emitByte(OpCode.Nan)
|
||||
of strExpr:
|
||||
self.emitConstant(node)
|
||||
# The optimizer will emit warning
|
||||
# for overflowing numbers. Here, we
|
||||
# treat them as errors
|
||||
of intExpr:
|
||||
var x: int
|
||||
var y = IntExpr(node)
|
||||
try:
|
||||
assert parseInt(y.literal.lexeme, x) == len(y.literal.lexeme)
|
||||
except ValueError:
|
||||
self.error("integer value out of range")
|
||||
self.emitConstant(y)
|
||||
# Even though most likely the optimizer
|
||||
# will collapse all these other literals
|
||||
# to nodes of kind intExpr, that can be
|
||||
# disabled. This also allows us to catch
|
||||
# basic overflow errors before running any code
|
||||
of hexExpr:
|
||||
var x: int
|
||||
var y = HexExpr(node)
|
||||
try:
|
||||
assert parseHex(y.literal.lexeme, x) == len(y.literal.lexeme)
|
||||
except ValueError:
|
||||
self.error("integer value out of range")
|
||||
self.emitConstant(newIntExpr(Token(lexeme: $x, line: y.token.line,
|
||||
pos: (start: y.token.pos.start, stop: y.token.pos.start +
|
||||
len($x)))))
|
||||
of binExpr:
|
||||
var x: int
|
||||
var y = BinExpr(node)
|
||||
try:
|
||||
assert parseBin(y.literal.lexeme, x) == len(y.literal.lexeme)
|
||||
except ValueError:
|
||||
self.error("integer value out of range")
|
||||
self.emitConstant(newIntExpr(Token(lexeme: $x, line: y.token.line,
|
||||
pos: (start: y.token.pos.start, stop: y.token.pos.start +
|
||||
len($x)))))
|
||||
of octExpr:
|
||||
var x: int
|
||||
var y = OctExpr(node)
|
||||
try:
|
||||
assert parseOct(y.literal.lexeme, x) == len(y.literal.lexeme)
|
||||
except ValueError:
|
||||
self.error("integer value out of range")
|
||||
self.emitConstant(newIntExpr(Token(lexeme: $x, line: y.token.line,
|
||||
pos: (start: y.token.pos.start, stop: y.token.pos.start +
|
||||
len($x)))))
|
||||
of floatExpr:
|
||||
var x: float
|
||||
var y = FloatExpr(node)
|
||||
try:
|
||||
assert parseFloat(y.literal.lexeme, x) == len(y.literal.lexeme)
|
||||
except ValueError:
|
||||
self.error("floating point value out of range")
|
||||
self.emitConstant(y)
|
||||
of listExpr:
|
||||
var y = ListExpr(node)
|
||||
for member in y.members:
|
||||
self.expression(member)
|
||||
self.emitByte(BuildList)
|
||||
self.emitBytes(y.members.len().toTriple()) # 24-bit integer, meaning list literals can have up to 2^24 elements
|
||||
of tupleExpr:
|
||||
var y = TupleExpr(node)
|
||||
for member in y.members:
|
||||
self.expression(member)
|
||||
self.emitByte(BuildTuple)
|
||||
self.emitBytes(y.members.len().toTriple())
|
||||
of setExpr:
|
||||
var y = SetExpr(node)
|
||||
for member in y.members:
|
||||
self.expression(member)
|
||||
self.emitByte(BuildSet)
|
||||
self.emitBytes(y.members.len().toTriple())
|
||||
of dictExpr:
|
||||
var y = DictExpr(node)
|
||||
for (key, value) in zip(y.keys, y.values):
|
||||
self.expression(key)
|
||||
self.expression(value)
|
||||
self.emitByte(BuildDict)
|
||||
self.emitBytes(y.keys.len().toTriple())
|
||||
of awaitExpr:
|
||||
var y = AwaitExpr(node)
|
||||
self.expression(y.awaitee)
|
||||
self.emitByte(OpCode.Await)
|
||||
else:
|
||||
self.error(&"invalid AST node of kind {node.kind} at literal(): {node} (This is an internal error and most likely a bug)")
|
||||
|
||||
|
||||
proc unary(self: Compiler, node: UnaryExpr) =
|
||||
## Compiles unary expressions such as negation or
|
||||
## bitwise inversion
|
||||
self.expression(node.a) # Pushes the operand onto the stack
|
||||
case node.operator.kind:
|
||||
of Minus:
|
||||
self.emitByte(UnaryNegate)
|
||||
of Plus:
|
||||
discard # Unary + does nothing
|
||||
of TokenType.LogicalNot:
|
||||
self.emitByte(OpCode.LogicalNot)
|
||||
of Tilde:
|
||||
self.emitByte(UnaryNot)
|
||||
else:
|
||||
self.error(&"invalid AST node of kind {node.kind} at unary(): {node} (This is an internal error and most likely a bug)")
|
||||
|
||||
|
||||
proc binary(self: Compiler, node: BinaryExpr) =
|
||||
## Compiles all binary expressions
|
||||
# These two lines prepare the stack by pushing the
|
||||
# opcode's operands onto it
|
||||
self.expression(node.a)
|
||||
self.expression(node.b)
|
||||
case node.operator.kind:
|
||||
of Plus:
|
||||
self.emitByte(BinaryAdd)
|
||||
of Minus:
|
||||
self.emitByte(BinarySubtract)
|
||||
of Asterisk:
|
||||
self.emitByte(BinaryMultiply)
|
||||
of DoubleAsterisk:
|
||||
self.emitByte(BinaryPow)
|
||||
of Percentage:
|
||||
self.emitByte(BinaryMod)
|
||||
of FloorDiv:
|
||||
self.emitByte(BinaryFloorDiv)
|
||||
of Slash:
|
||||
self.emitByte(BinaryDivide)
|
||||
of Ampersand:
|
||||
self.emitByte(BinaryAnd)
|
||||
of Caret:
|
||||
self.emitByte(BinaryXor)
|
||||
of Pipe:
|
||||
self.emitByte(BinaryOr)
|
||||
of As:
|
||||
self.emitByte(BinaryAs)
|
||||
of Is:
|
||||
self.emitByte(BinaryIs)
|
||||
of IsNot:
|
||||
self.emitByte(BinaryIsNot)
|
||||
of Of:
|
||||
self.emitByte(BinaryOf)
|
||||
of RightShift:
|
||||
self.emitByte(BinaryShiftRight)
|
||||
of LeftShift:
|
||||
self.emitByte(BinaryShiftLeft)
|
||||
of TokenType.LessThan:
|
||||
self.emitByte(OpCode.LessThan)
|
||||
of TokenType.GreaterThan:
|
||||
self.emitByte(OpCode.GreaterThan)
|
||||
of TokenType.DoubleEqual:
|
||||
self.emitByte(EqualTo)
|
||||
of TokenType.LessOrEqual:
|
||||
self.emitByte(OpCode.LessOrEqual)
|
||||
of TokenType.GreaterOrEqual:
|
||||
self.emitByte(OpCode.GreaterOrEqual)
|
||||
of TokenType.LogicalAnd:
|
||||
self.expression(node.a)
|
||||
let jump = self.emitJump(JumpIfFalse)
|
||||
self.emitByte(Pop)
|
||||
self.expression(node.b)
|
||||
self.patchJump(jump)
|
||||
of TokenType.LogicalOr:
|
||||
self.expression(node.a)
|
||||
let jump = self.emitJump(JumpIfTrue)
|
||||
self.expression(node.b)
|
||||
self.patchJump(jump)
|
||||
# TODO: In-place operations
|
||||
else:
|
||||
self.error(&"invalid AST node of kind {node.kind} at binary(): {node} (This is an internal error and most likely a bug)")
|
||||
|
||||
|
||||
proc declareName(self: Compiler, node: ASTNode) =
|
||||
## Compiles all name declarations (constants, static,
|
||||
## and dynamic)
|
||||
case node.kind:
|
||||
of varDecl:
|
||||
var node = VarDecl(node)
|
||||
if not node.isStatic:
|
||||
# This emits code for dynamically-resolved variables (i.e. globals declared as dynamic and unresolvable names)
|
||||
self.emitByte(DeclareName)
|
||||
self.emitBytes(self.identifierConstant(IdentExpr(node.name)))
|
||||
else:
|
||||
# Statically resolved variable here. Only creates a new Name entry
|
||||
# so that self.identifier emits the proper stack offset
|
||||
if self.names.high() > 16777215:
|
||||
# If someone ever hits this limit in real-world scenarios, I swear I'll
|
||||
# slap myself 100 times with a sign saying "I'm dumb". Mark my words
|
||||
self.error("cannot declare more than 16777215 static variables at a time")
|
||||
self.names.add(Name(depth: self.scopeDepth, name: IdentExpr(node.name),
|
||||
isPrivate: node.isPrivate,
|
||||
owner: node.owner,
|
||||
isConst: node.isConst))
|
||||
else:
|
||||
discard # TODO: Classes, functions
|
||||
|
||||
|
||||
proc varDecl(self: Compiler, node: VarDecl) =
|
||||
## Compiles variable declarations
|
||||
self.expression(node.value)
|
||||
self.declareName(node)
|
||||
|
||||
|
||||
proc resolveStatic(self: Compiler, name: IdentExpr,
|
||||
depth: int = self.scopeDepth): Name =
|
||||
## Traverses self.staticNames backwards and returns the
|
||||
## first name object with the given name at the given
|
||||
## depth. The default depth is the current one. Returns
|
||||
## nil when the name can't be found
|
||||
for obj in reversed(self.names):
|
||||
if obj.name.token.lexeme == name.token.lexeme and obj.depth == depth:
|
||||
return obj
|
||||
return nil
|
||||
|
||||
|
||||
proc deleteStatic(self: Compiler, name: IdentExpr,
|
||||
depth: int = self.scopeDepth) =
|
||||
## Traverses self.staticNames backwards and returns the
|
||||
## deletes name object with the given name at the given
|
||||
## depth. The default depth is the current one. Does
|
||||
## nothing when the name can't be found
|
||||
for i, obj in reversed(self.names):
|
||||
if obj.name.token.lexeme == name.token.lexeme and obj.depth == depth:
|
||||
self.names.del(i)
|
||||
|
||||
|
||||
proc getStaticIndex(self: Compiler, name: IdentExpr): int =
|
||||
## Gets the predicted stack position of the given variable
|
||||
## if it is static, returns -1 if it is to be bound dynamically
|
||||
## or it does not exist at all
|
||||
var i: int = self.names.high()
|
||||
for variable in reversed(self.names):
|
||||
if name.name.lexeme == variable.name.name.lexeme:
|
||||
return i
|
||||
dec(i)
|
||||
return -1
|
||||
|
||||
|
||||
proc identifier(self: Compiler, node: IdentExpr) =
|
||||
## Compiles access to identifiers
|
||||
let s = self.resolveStatic(node)
|
||||
if s != nil and s.isConst:
|
||||
# Constants are emitted as, you guessed it, constant instructions
|
||||
# no matter the scope depth. Also, name resolution specifiers do not
|
||||
# apply to them (because what would it mean for a constant to be dynamic
|
||||
# anyway?)
|
||||
self.emitConstant(node)
|
||||
else:
|
||||
let index = self.getStaticIndex(node)
|
||||
if index != -1:
|
||||
self.emitByte(LoadFast) # Static name resolution, loads value at index in the stack
|
||||
self.emitBytes(index.toTriple())
|
||||
else:
|
||||
self.emitByte(LoadName) # Resolves by name, at runtime, in a global hashmap
|
||||
self.emitBytes(self.identifierConstant(node))
|
||||
|
||||
|
||||
proc assignment(self: Compiler, node: ASTNode) =
|
||||
## Compiles assignment expressions
|
||||
case node.kind:
|
||||
of assignExpr:
|
||||
var node = AssignExpr(node)
|
||||
var name = IdentExpr(node.name)
|
||||
let r = self.resolveStatic(name)
|
||||
if r != nil and r.isConst:
|
||||
self.error("cannot assign to constant")
|
||||
|
||||
self.expression(node.value)
|
||||
let index = self.getStaticIndex(name)
|
||||
case node.token.kind:
|
||||
of InplaceAdd:
|
||||
self.emitByte(BinaryAdd)
|
||||
of InplaceSub:
|
||||
self.emitByte(BinarySubtract)
|
||||
of InplaceDiv:
|
||||
self.emitByte(BinaryDivide)
|
||||
of InplaceMul:
|
||||
self.emitByte(BinaryMultiply)
|
||||
of InplacePow:
|
||||
self.emitByte(BinaryPow)
|
||||
of InplaceFloorDiv:
|
||||
self.emitByte(BinaryFloorDiv)
|
||||
of InplaceMod:
|
||||
self.emitByte(BinaryMod)
|
||||
of InplaceAnd:
|
||||
self.emitByte(BinaryAnd)
|
||||
of InplaceXor:
|
||||
self.emitByte(BinaryXor)
|
||||
of InplaceRightShift:
|
||||
self.emitByte(BinaryShiftRight)
|
||||
of InplaceLeftShift:
|
||||
self.emitByte(BinaryShiftLeft)
|
||||
else:
|
||||
discard # Unreachable
|
||||
# In-place operators just change
|
||||
# what values is set to a given
|
||||
# stack offset/name, so we only
|
||||
# need to perform the operation
|
||||
# as usual and then store it.
|
||||
# TODO: A better optimization would
|
||||
# be to have everything in one opcode,
|
||||
# but that requires variants for stack,
|
||||
# heap, and closure variables
|
||||
if index != -1:
|
||||
self.emitByte(StoreFast)
|
||||
self.emitBytes(index.toTriple())
|
||||
else:
|
||||
# Assignment only encompasses variable assignments
|
||||
# so we can ensure the name is a constant (i.e. an
|
||||
# IdentExpr) instead of an object (which would be
|
||||
# the case with setItemExpr)
|
||||
self.emitByte(StoreName)
|
||||
self.emitBytes(self.makeConstant(name))
|
||||
of setItemExpr:
|
||||
discard
|
||||
# TODO
|
||||
else:
|
||||
self.error(&"invalid AST node of kind {node.kind} at assignment(): {node} (This is an internal error and most likely a bug)")
|
||||
|
||||
|
||||
proc beginScope(self: Compiler) =
|
||||
## Begins a new local scope by incrementing the current
|
||||
## scope's depth
|
||||
inc(self.scopeDepth)
|
||||
|
||||
|
||||
proc endScope(self: Compiler) =
|
||||
## Ends the current local scope
|
||||
if self.scopeDepth < 0:
|
||||
self.error("cannot call endScope with scopeDepth < 0 (This is an internal error and most likely a bug)")
|
||||
var popped: int = 0
|
||||
for ident in reversed(self.names):
|
||||
if ident.depth > self.scopeDepth:
|
||||
inc(popped)
|
||||
if not self.enableOptimizations:
|
||||
# All variables with a scope depth larger than the current one
|
||||
# are now out of scope. Begone, you're now homeless!
|
||||
self.emitByte(Pop)
|
||||
if self.enableOptimizations and popped > 1:
|
||||
# If we're popping less than 65535 variables, then
|
||||
# we can emit a PopN instruction. This is true for
|
||||
# 99.99999% of the use cases of the language (who the
|
||||
# hell is going to use 65 THOUSAND local variables?), but
|
||||
# if you'll ever use more then JAPL will emit a PopN instruction
|
||||
# for the first 65 thousand and change local variables and then
|
||||
# emit another batch of plain ol' Pop instructions for the rest
|
||||
if popped <= uint16.high().int():
|
||||
self.emitByte(PopN)
|
||||
self.emitBytes(popped.toTriple())
|
||||
else:
|
||||
self.emitByte(PopN)
|
||||
self.emitBytes(uint16.high().int.toTriple())
|
||||
for i in countdown(self.names.high(), popped - uint16.high().int()):
|
||||
if self.names[i].depth > self.scopeDepth:
|
||||
self.emitByte(Pop)
|
||||
elif popped == 1:
|
||||
# We only emit PopN if we're popping more than one value
|
||||
self.emitByte(Pop)
|
||||
for _ in countup(0, popped - 1):
|
||||
discard self.names.pop()
|
||||
dec(self.scopeDepth)
|
||||
|
||||
|
||||
proc blockStmt(self: Compiler, node: BlockStmt) =
|
||||
## Compiles block statements, which create a new
|
||||
## local scope.
|
||||
self.beginScope()
|
||||
for decl in node.code:
|
||||
self.declaration(decl)
|
||||
self.endScope()
|
||||
|
||||
|
||||
proc ifStmt(self: Compiler, node: IfStmt) =
|
||||
## Compiles if/else statements for conditional
|
||||
## execution of code
|
||||
self.expression(node.condition)
|
||||
var jumpCode: OpCode
|
||||
if self.enableOptimizations:
|
||||
jumpCode = JumpIfFalsePop
|
||||
else:
|
||||
jumpCode = JumpIfFalse
|
||||
let jump = self.emitJump(jumpCode)
|
||||
if not self.enableOptimizations:
|
||||
self.emitByte(Pop)
|
||||
self.statement(node.thenBranch)
|
||||
self.patchJump(jump)
|
||||
if node.elseBranch != nil:
|
||||
let jump = self.emitJump(JumpForwards)
|
||||
self.statement(node.elseBranch)
|
||||
self.patchJump(jump)
|
||||
|
||||
|
||||
proc emitLoop(self: Compiler, begin: int) =
|
||||
## Emits a JumpBackwards instruction with the correct
|
||||
## jump offset
|
||||
var offset: int
|
||||
case OpCode(self.chunk.code[begin + 1]): # The jump instruction
|
||||
of LongJumpForwards, LongJumpBackwards, LongJumpIfFalse,
|
||||
LongJumpIfFalsePop, LongJumpIfTrue:
|
||||
offset = self.chunk.code.len() - begin + 4
|
||||
else:
|
||||
offset = self.chunk.code.len() - begin
|
||||
if offset > uint16.high().int:
|
||||
if offset > 16777215:
|
||||
self.error("cannot jump more than 16777215 bytecode instructions")
|
||||
self.emitByte(LongJumpBackwards)
|
||||
self.emitBytes(offset.toTriple())
|
||||
else:
|
||||
self.emitByte(JumpBackwards)
|
||||
self.emitBytes(offset.toDouble())
|
||||
|
||||
|
||||
proc whileStmt(self: Compiler, node: WhileStmt) =
|
||||
## Compiles C-style while loops
|
||||
let start = self.chunk.code.len()
|
||||
self.expression(node.condition)
|
||||
let jump = self.emitJump(JumpIfFalsePop)
|
||||
self.statement(node.body)
|
||||
self.patchJump(jump)
|
||||
self.emitLoop(start)
|
||||
|
||||
|
||||
proc expression(self: Compiler, node: ASTNode) =
|
||||
## Compiles all expressions
|
||||
case node.kind:
|
||||
of getItemExpr:
|
||||
discard
|
||||
# Note that for setItem and assign we don't convert
|
||||
# the node to its true type because that type information
|
||||
# would be lost in the call anyway. The differentiation
|
||||
# happens in self.assignment
|
||||
of setItemExpr, assignExpr:
|
||||
self.assignment(node)
|
||||
of identExpr:
|
||||
self.identifier(IdentExpr(node))
|
||||
of unaryExpr:
|
||||
# Unary expressions such as ~5 and -3
|
||||
self.unary(UnaryExpr(node))
|
||||
of groupingExpr:
|
||||
# Grouping expressions like (2 + 1)
|
||||
self.expression(GroupingExpr(node).expression)
|
||||
of binaryExpr:
|
||||
# Binary expressions such as 2 ^ 5 and 0.66 * 3.14
|
||||
self.binary(BinaryExpr(node))
|
||||
of intExpr, hexExpr, binExpr, octExpr, strExpr, falseExpr, trueExpr,
|
||||
infExpr, nanExpr, floatExpr, nilExpr,
|
||||
tupleExpr, setExpr, listExpr, dictExpr:
|
||||
# Since all of these AST nodes mostly share
|
||||
# the same overall structure, and the kind
|
||||
# discriminant is enough to tell one
|
||||
# from the other, why bother with
|
||||
# specialized cases when one is enough?
|
||||
self.literal(node)
|
||||
else:
|
||||
self.error(&"invalid AST node of kind {node.kind} at expression(): {node} (This is an internal error and most likely a bug)")
|
||||
|
||||
|
||||
proc delStmt(self: Compiler, node: ASTNode) =
|
||||
## Compiles del statements, which unbind
|
||||
## a name from the current scope
|
||||
case node.kind:
|
||||
of identExpr:
|
||||
var node = IdentExpr(node)
|
||||
let i = self.getStaticIndex(node)
|
||||
if i != -1:
|
||||
self.emitByte(DeleteFast)
|
||||
self.emitBytes(i.toTriple())
|
||||
self.deleteStatic(node)
|
||||
else:
|
||||
self.emitByte(DeleteName)
|
||||
self.emitBytes(self.identifierConstant(node))
|
||||
else:
|
||||
discard # The parser already handles the other cases
|
||||
|
||||
|
||||
proc awaitStmt(self: Compiler, node: AwaitStmt) =
|
||||
## Compiles await statements. An await statement
|
||||
## is like an await expression, but parsed in the
|
||||
## context of statements for usage outside expressions,
|
||||
## meaning it can be used standalone. It's basically the
|
||||
## same as an await expression followed by a semicolon.
|
||||
## Await expressions are the only native construct to
|
||||
## run coroutines from within an already asynchronous
|
||||
## loop (which should be orchestrated by an event loop).
|
||||
## They block in the caller until the callee returns
|
||||
self.expression(node.awaitee)
|
||||
self.emitByte(OpCode.Await)
|
||||
|
||||
|
||||
proc deferStmt(self: Compiler, node: DeferStmt) =
|
||||
## Compiles defer statements. A defer statement
|
||||
## is executed right before the function exits
|
||||
## (either because of a return or an exception)
|
||||
let current = self.chunk.code.len
|
||||
self.expression(node.deferred)
|
||||
for i in countup(current, self.chunk.code.high()):
|
||||
self.deferred.add(self.chunk.code[i])
|
||||
self.chunk.code.del(i)
|
||||
|
||||
|
||||
proc returnStmt(self: Compiler, node: ReturnStmt) =
|
||||
## Compiles return statements. An empty return
|
||||
## implicitly returns nil
|
||||
self.expression(node.value)
|
||||
self.emitByte(OpCode.Return)
|
||||
|
||||
|
||||
proc yieldStmt(self: Compiler, node: YieldStmt) =
|
||||
## Compiles yield statements
|
||||
self.expression(node.expression)
|
||||
self.emitByte(OpCode.Yield)
|
||||
|
||||
|
||||
proc raiseStmt(self: Compiler, node: RaiseStmt) =
|
||||
## Compiles yield statements
|
||||
self.expression(node.exception)
|
||||
self.emitByte(OpCode.Raise)
|
||||
|
||||
|
||||
proc continueStmt(self: Compiler, node: ContinueStmt) =
|
||||
## Compiles continue statements. A continue statements
|
||||
## jumps to the next iteration in a loop
|
||||
if self.currentLoop.start <= 65535:
|
||||
self.emitByte(Jump)
|
||||
self.emitBytes(self.currentLoop.start.toDouble())
|
||||
else:
|
||||
self.emitByte(LongJump)
|
||||
self.emitBytes(self.currentLoop.start.toTriple())
|
||||
|
||||
|
||||
proc breakStmt(self: Compiler, node: BreakStmt) =
|
||||
## Compiles break statements. A continue statement
|
||||
## jumps to the next iteration in a loop
|
||||
|
||||
# Emits dummy jump offset, this is
|
||||
# patched later
|
||||
discard self.emitJump(OpCode.Break)
|
||||
self.currentLoop.breakPos.add(self.chunk.code.high() - 4)
|
||||
if self.currentLoop.depth > self.scopeDepth:
|
||||
# Breaking out of a loop closes its scope
|
||||
self.endScope()
|
||||
|
||||
|
||||
proc patchBreaks(self: Compiler) =
|
||||
## Patches "break" opcodes with
|
||||
## actual jumps. This is needed
|
||||
## because the size of code
|
||||
## to skip is not known before
|
||||
## the loop is fully compiled
|
||||
for brk in self.currentLoop.breakPos:
|
||||
self.chunk.code[brk] = JumpForwards.uint8()
|
||||
self.patchJump(brk)
|
||||
|
||||
|
||||
proc assertStmt(self: Compiler, node: AssertStmt) =
|
||||
## Compiles assert statements (raise
|
||||
## AssertionError if the expression is falsey)
|
||||
self.expression(node.expression)
|
||||
self.emitByte(OpCode.Assert)
|
||||
|
||||
|
||||
proc statement(self: Compiler, node: ASTNode) =
|
||||
## Compiles all statements
|
||||
case node.kind:
|
||||
of exprStmt:
|
||||
self.expression(ExprStmt(node).expression)
|
||||
self.emitByte(Pop) # Expression statements discard their value. Their main use case is side effects in function calls
|
||||
of NodeKind.ifStmt:
|
||||
self.ifStmt(IfStmt(node))
|
||||
of NodeKind.delStmt:
|
||||
self.delStmt(DelStmt(node).name)
|
||||
of NodeKind.assertStmt:
|
||||
self.assertStmt(AssertStmt(node))
|
||||
of NodeKind.raiseStmt:
|
||||
self.raiseStmt(RaiseStmt(node))
|
||||
of NodeKind.breakStmt:
|
||||
self.breakStmt(BreakStmt(node))
|
||||
of NodeKind.continueStmt:
|
||||
self.continueStmt(ContinueStmt(node))
|
||||
of NodeKind.returnStmt:
|
||||
self.returnStmt(ReturnStmt(node))
|
||||
of NodeKind.importStmt:
|
||||
discard
|
||||
of NodeKind.fromImportStmt:
|
||||
discard
|
||||
of NodeKind.whileStmt, NodeKind.forStmt:
|
||||
## Our parser already desugars for loops to
|
||||
## while loops!
|
||||
let loop = self.currentLoop
|
||||
self.currentLoop = Loop(start: self.chunk.code.len(),
|
||||
depth: self.scopeDepth, breakPos: @[])
|
||||
self.whileStmt(WhileStmt(node))
|
||||
self.patchBreaks()
|
||||
self.currentLoop = loop
|
||||
of NodeKind.forEachStmt:
|
||||
discard
|
||||
of NodeKind.blockStmt:
|
||||
self.blockStmt(BlockStmt(node))
|
||||
of NodeKind.yieldStmt:
|
||||
self.yieldStmt(YieldStmt(node))
|
||||
of NodeKind.awaitStmt:
|
||||
self.awaitStmt(AwaitStmt(node))
|
||||
of NodeKind.deferStmt:
|
||||
self.deferStmt(DeferStmt(node))
|
||||
of NodeKind.tryStmt:
|
||||
discard
|
||||
else:
|
||||
self.expression(node)
|
||||
|
||||
|
||||
proc declaration(self: Compiler, node: ASTNode) =
|
||||
## Compiles all declarations
|
||||
case node.kind:
|
||||
of NodeKind.varDecl:
|
||||
self.varDecl(VarDecl(node))
|
||||
of funDecl, classDecl:
|
||||
discard # TODO
|
||||
else:
|
||||
self.statement(node)
|
||||
|
||||
|
||||
proc compile*(self: Compiler, ast: seq[ASTNode], file: string): Chunk =
|
||||
## Compiles a sequence of AST nodes into a chunk
|
||||
## object
|
||||
self.chunk = newChunk()
|
||||
self.ast = ast
|
||||
self.file = file
|
||||
self.names = @[]
|
||||
self.scopeDepth = 0
|
||||
self.currentFunction = nil
|
||||
self.current = 0
|
||||
while not self.done():
|
||||
self.declaration(self.step())
|
||||
if self.ast.len() > 0:
|
||||
# *Technically* an empty program is a valid program
|
||||
self.endScope()
|
||||
self.emitByte(OpCode.Return) # Exits the VM's main loop when used at the global scope
|
||||
result = self.chunk
|
||||
if self.ast.len() > 0 and self.scopeDepth != -1:
|
||||
self.error(&"internal error: invalid scopeDepth state (expected -1, got {self.scopeDepth}), did you forget to call endScope/beginScope?")
|
|
@ -0,0 +1,552 @@
|
|||
# Copyright 2021 Mattia Giambirtone & All Contributors
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
## A simple and modular tokenizer implementation with arbitrary lookahead
|
||||
|
||||
import strutils
|
||||
import parseutils
|
||||
import strformat
|
||||
import tables
|
||||
|
||||
import meta/token
|
||||
import meta/errors
|
||||
|
||||
|
||||
export token # Makes Token available when importing the lexer module
|
||||
export errors
|
||||
|
||||
|
||||
# Tables of all character tokens that are not keywords
|
||||
|
||||
# Table of all single-character tokens
|
||||
const tokens = to_table({
|
||||
'(': LeftParen, ')': RightParen,
|
||||
'{': LeftBrace, '}': RightBrace,
|
||||
'.': Dot, ',': Comma, '-': Minus,
|
||||
'+': Plus, '*': Asterisk,
|
||||
'>': GreaterThan, '<': LessThan, '=': Equal,
|
||||
'~': Tilde, '/': Slash, '%': Percentage,
|
||||
'[': LeftBracket, ']': RightBracket,
|
||||
':': Colon, '^': Caret, '&': Ampersand,
|
||||
'|': Pipe, ';': Semicolon})
|
||||
|
||||
# Table of all double-character tokens
|
||||
const double = to_table({"**": DoubleAsterisk,
|
||||
">>": RightShift,
|
||||
"<<": LeftShift,
|
||||
"==": DoubleEqual,
|
||||
"!=": NotEqual,
|
||||
">=": GreaterOrEqual,
|
||||
"<=": LessOrEqual,
|
||||
"//": FloorDiv,
|
||||
"+=": InplaceAdd,
|
||||
"-=": InplaceSub,
|
||||
"/=": InplaceDiv,
|
||||
"*=": InplaceMul,
|
||||
"^=": InplaceXor,
|
||||
"&=": InplaceAnd,
|
||||
"|=": InplaceOr,
|
||||
"%=": InplaceMod,
|
||||
})
|
||||
|
||||
# Table of all triple-character tokens
|
||||
const triple = to_table({"//=": InplaceFloorDiv,
|
||||
"**=": InplacePow,
|
||||
">>=": InplaceRightShift,
|
||||
"<<=": InplaceLeftShift
|
||||
})
|
||||
|
||||
|
||||
# Constant table storing all the reserved keywords (which are parsed as identifiers)
|
||||
const keywords = to_table({
|
||||
"fun": Fun, "raise": Raise,
|
||||
"if": If, "else": Else,
|
||||
"for": For, "while": While,
|
||||
"var": Var, "nil": Nil,
|
||||
"true": True, "false": False,
|
||||
"return": Return, "break": Break,
|
||||
"continue": Continue, "inf": Infinity,
|
||||
"nan": NotANumber, "is": Is,
|
||||
"lambda": Lambda, "class": Class,
|
||||
"async": Async, "import": Import,
|
||||
"isnot": IsNot, "from": From,
|
||||
"const": Const, "not": LogicalNot,
|
||||
"assert": Assert, "or": LogicalOr,
|
||||
"and": LogicalAnd, "del": Del,
|
||||
"async": Async, "await": Await,
|
||||
"foreach": Foreach, "yield": Yield,
|
||||
"private": Private, "public": Public,
|
||||
"static": Static, "dynamic": Dynamic,
|
||||
"as": As, "of": Of, "defer": Defer,
|
||||
"except": Except, "finally": Finally,
|
||||
"try": Try
|
||||
})
|
||||
|
||||
|
||||
type
|
||||
Lexer* = ref object
|
||||
## A lexer object
|
||||
source: string
|
||||
tokens: seq[Token]
|
||||
line: int
|
||||
start: int
|
||||
current: int
|
||||
file: string
|
||||
lines: seq[tuple[start, stop: int]]
|
||||
lastLine: int
|
||||
|
||||
|
||||
# Simple public getters
|
||||
proc getStart*(self: Lexer): int = self.start
|
||||
proc getCurrent*(self: Lexer): int = self.current
|
||||
proc getLine*(self: Lexer): int = self.line
|
||||
proc getSource*(self: Lexer): string = self.source
|
||||
proc getRelPos*(self: Lexer, line: int): tuple[start, stop: int] = (if line > 1: self.lines[line - 2] else: (start: 0, stop: self.current))
|
||||
|
||||
|
||||
proc initLexer*(self: Lexer = nil): Lexer =
|
||||
## Initializes the lexer or resets
|
||||
## the state of an existing one
|
||||
new(result)
|
||||
if self != nil:
|
||||
result = self
|
||||
result.source = ""
|
||||
result.tokens = @[]
|
||||
result.line = 1
|
||||
result.start = 0
|
||||
result.current = 0
|
||||
result.file = ""
|
||||
result.lines = @[]
|
||||
result.lastLine = 0
|
||||
|
||||
|
||||
proc done(self: Lexer): bool =
|
||||
## Returns true if we reached EOF
|
||||
result = self.current >= self.source.len
|
||||
|
||||
|
||||
proc incLine(self: Lexer) =
|
||||
## Increments the lexer's line
|
||||
## and updates internal line
|
||||
## metadata
|
||||
self.lines.add((start: self.lastLine, stop: self.current))
|
||||
self.line += 1
|
||||
self.lastLine = self.current
|
||||
|
||||
|
||||
proc step(self: Lexer, n: int = 1): char =
|
||||
## Steps n characters forward in the
|
||||
## source file (default = 1). A null
|
||||
## terminator is returned if the lexer
|
||||
## is at EOF. Note that only the first
|
||||
## consumed character token is returned,
|
||||
## the other ones are skipped over
|
||||
if self.done():
|
||||
return '\0'
|
||||
self.current = self.current + n
|
||||
result = self.source[self.current - n]
|
||||
|
||||
|
||||
proc peek(self: Lexer, distance: int = 0): char =
|
||||
## Returns the character in the source file at
|
||||
## the given distance without consuming it.
|
||||
## A null terminator is returned if the lexer
|
||||
## is at EOF. The distance parameter may be
|
||||
## negative to retrieve previously consumed
|
||||
## tokens, while the default distance is 0
|
||||
## (retrieves the next token to be consumed).
|
||||
## If the given distance goes beyond EOF, a
|
||||
## null terminator is returned
|
||||
if self.done() or self.current + distance > self.source.high():
|
||||
result = '\0'
|
||||
else:
|
||||
result = self.source[self.current + distance]
|
||||
|
||||
|
||||
proc error(self: Lexer, message: string) =
|
||||
## Raises a lexing error with a formatted
|
||||
## error message
|
||||
|
||||
raise newException(LexingError, &"A fatal error occurred while parsing '{self.file}', line {self.line} at '{self.peek()}' -> {message}")
|
||||
|
||||
|
||||
proc check(self: Lexer, what: char, distance: int = 0): bool =
|
||||
## Behaves like match, without consuming the
|
||||
## token. False is returned if we're at EOF
|
||||
## regardless of what the token to check is.
|
||||
## The distance is passed directly to self.peek()
|
||||
if self.done():
|
||||
return false
|
||||
return self.peek(distance) == what
|
||||
|
||||
|
||||
proc check(self: Lexer, what: string): bool =
|
||||
## Calls self.check() in a loop with
|
||||
## each character from the given source
|
||||
## string. Useful to check multi-character
|
||||
## strings in one go
|
||||
for i, chr in what:
|
||||
# Why "i" you ask? Well, since check
|
||||
# does not consume the tokens it checks
|
||||
# against we need some way of keeping
|
||||
# track where we are in the string the
|
||||
# caller gave us, otherwise this will
|
||||
# not behave as expected
|
||||
if not self.check(chr, i):
|
||||
return false
|
||||
return true
|
||||
|
||||
|
||||
proc check(self: Lexer, what: openarray[char]): bool =
|
||||
## Calls self.check() in a loop with
|
||||
## each character from the given seq of
|
||||
## char and returns at the first match.
|
||||
## Useful to check multiple tokens in a situation
|
||||
## where only one of them may match at one time
|
||||
for chr in what:
|
||||
if self.check(chr):
|
||||
return true
|
||||
return false
|
||||
|
||||
|
||||
proc match(self: Lexer, what: char): bool =
|
||||
## Returns true if the next character matches
|
||||
## the given character, and consumes it.
|
||||
## Otherwise, false is returned
|
||||
if self.done():
|
||||
self.error("unexpected EOF")
|
||||
return false
|
||||
elif not self.check(what):
|
||||
self.error(&"expecting '{what}', got '{self.peek()}' instead")
|
||||
return false
|
||||
self.current += 1
|
||||
return true
|
||||
|
||||
|
||||
proc match(self: Lexer, what: string): bool =
|
||||
## Calls self.match() in a loop with
|
||||
## each character from the given source
|
||||
## string. Useful to match multi-character
|
||||
## strings in one go
|
||||
for chr in what:
|
||||
if not self.match(chr):
|
||||
return false
|
||||
return true
|
||||
|
||||
|
||||
proc createToken(self: Lexer, tokenType: TokenType) =
|
||||
## Creates a token object and adds it to the token
|
||||
## list
|
||||
var tok: Token = new(Token)
|
||||
tok.kind = tokenType
|
||||
tok.lexeme = self.source[self.start..<self.current]
|
||||
tok.line = self.line
|
||||
tok.pos = (start: self.start, stop: self.current)
|
||||
self.tokens.add(tok)
|
||||
|
||||
|
||||
proc parseEscape(self: Lexer) =
|
||||
# Boring escape sequence parsing. For more info check out
|
||||
# https://en.wikipedia.org/wiki/Escape_sequences_in_C.
|
||||
# As of now, \u and \U are not supported, but they'll
|
||||
# likely be soon. Another notable limitation is that
|
||||
# \xhhh and \nnn are limited to the size of a char
|
||||
# (i.e. uint8, or 256 values)
|
||||
case self.peek():
|
||||
of 'a':
|
||||
self.source[self.current] = cast[char](0x07)
|
||||
of 'b':
|
||||
self.source[self.current] = cast[char](0x7f)
|
||||
of 'e':
|
||||
self.source[self.current] = cast[char](0x1B)
|
||||
of 'f':
|
||||
self.source[self.current] = cast[char](0x0C)
|
||||
of 'n':
|
||||
when defined(windows):
|
||||
# We natively convert LF to CRLF on Windows, and
|
||||
# gotta thank Microsoft for the extra boilerplate!
|
||||
self.source[self.current] = cast[char](0x0D)
|
||||
self.source.insert(self.current + 1, 0X0A)
|
||||
when defined(darwin):
|
||||
# Thanks apple, lol
|
||||
self.source[self.current] = cast[char](0x0A)
|
||||
when defined(linux):
|
||||
self.source[self.current] = cast[char](0X0D)
|
||||
of 'r':
|
||||
self.source[self.current] = cast[char](0x0D)
|
||||
of 't':
|
||||
self.source[self.current] = cast[char](0x09)
|
||||
of 'v':
|
||||
self.source[self.current] = cast[char](0x0B)
|
||||
of '"':
|
||||
self.source[self.current] = '"'
|
||||
of '\'':
|
||||
self.source[self.current] = '\''
|
||||
of '\\':
|
||||
self.source[self.current] = cast[char](0x5C)
|
||||
of '0'..'9':
|
||||
var code = ""
|
||||
var value = 0
|
||||
var i = self.current
|
||||
while i < self.source.high() and (let c = self.source[
|
||||
i].toLowerAscii(); c in '0'..'7') and len(code) < 3:
|
||||
code &= self.source[i]
|
||||
i += 1
|
||||
assert parseOct(code, value) == code.len()
|
||||
if value > uint8.high().int:
|
||||
self.error("escape sequence value too large (> 255)")
|
||||
self.source[self.current] = cast[char](value)
|
||||
of 'u', 'U':
|
||||
self.error("unicode escape sequences are not supported (yet)")
|
||||
of 'x':
|
||||
var code = ""
|
||||
var value = 0
|
||||
var i = self.current
|
||||
while i < self.source.high() and (let c = self.source[
|
||||
i].toLowerAscii(); c in 'a'..'f' or c in '0'..'9'):
|
||||
code &= self.source[i]
|
||||
i += 1
|
||||
assert parseHex(code, value) == code.len()
|
||||
if value > uint8.high().int:
|
||||
self.error("escape sequence value too large (> 255)")
|
||||
self.source[self.current] = cast[char](value)
|
||||
else:
|
||||
self.error(&"invalid escape sequence '\\{self.peek()}'")
|
||||
|
||||
|
||||
proc parseString(self: Lexer, delimiter: char, mode: string = "single") =
|
||||
## Parses string literals. They can be expressed using matching pairs
|
||||
## of either single or double quotes. Most C-style escape sequences are
|
||||
## supported, moreover, a specific prefix may be prepended
|
||||
## to the string to instruct the lexer on how to parse it:
|
||||
## - b -> declares a byte string, where each character is
|
||||
## interpreted as an integer instead of a character
|
||||
## - r -> declares a raw string literal, where escape sequences
|
||||
## are not parsed and stay as-is
|
||||
## - f -> declares a format string, where variables may be
|
||||
## interpolated using curly braces like f"Hello, {name}!".
|
||||
## Braces may be escaped using a pair of them, so to represent
|
||||
## a literal "{" in an f-string, one would use {{ instead
|
||||
## Multi-line strings can be declared using matching triplets of
|
||||
## either single or double quotes. They can span across multiple
|
||||
## lines and escape sequences in them are not parsed, like in raw
|
||||
## strings, so a multi-line string prefixed with the "r" modifier
|
||||
## is redundant, although multi-line byte/format strings are supported
|
||||
while not self.check(delimiter) and not self.done():
|
||||
if self.check('\n'):
|
||||
if mode == "multi":
|
||||
self.incLine()
|
||||
else:
|
||||
self.error("unexpected EOL while parsing string literal")
|
||||
if mode in ["raw", "multi"]:
|
||||
discard self.step()
|
||||
if self.check('\\'):
|
||||
# This madness here serves to get rid of the slash, since \x is mapped
|
||||
# to a one-byte sequence but the string '\x' actually 2 bytes (or more,
|
||||
# depending on the specific escape sequence)
|
||||
self.source = self.source[0..<self.current] & self.source[
|
||||
self.current + 1..^1]
|
||||
self.parseEscape()
|
||||
if mode == "format" and self.check('{'):
|
||||
discard self.step()
|
||||
if self.check('{'):
|
||||
self.source = self.source[0..<self.current] & self.source[
|
||||
self.current + 1..^1]
|
||||
continue
|
||||
while not self.check(['}', '"']):
|
||||
discard self.step()
|
||||
if self.check('"'):
|
||||
self.error("unclosed '{' in format string")
|
||||
elif mode == "format" and self.check('}'):
|
||||
if not self.check('}', 1):
|
||||
self.error("unmatched '}' in format string")
|
||||
else:
|
||||
self.source = self.source[0..<self.current] & self.source[
|
||||
self.current + 1..^1]
|
||||
discard self.step()
|
||||
if mode == "multi":
|
||||
if not self.match(delimiter.repeat(3)):
|
||||
self.error("unexpected EOL while parsing multi-line string literal")
|
||||
if self.done():
|
||||
self.error("unexpected EOF while parsing string literal")
|
||||
return
|
||||
else:
|
||||
discard self.step()
|
||||
self.createToken(String)
|
||||
|
||||
|
||||
proc parseBinary(self: Lexer) =
|
||||
## Parses binary numbers
|
||||
while self.peek().isDigit():
|
||||
if not self.check(['0', '1']):
|
||||
self.error(&"invalid digit '{self.peek()}' in binary literal")
|
||||
discard self.step()
|
||||
self.createToken(Binary)
|
||||
# To make our life easier, we pad the binary number in here already
|
||||
while (self.tokens[^1].lexeme.len() - 2) mod 8 != 0:
|
||||
self.tokens[^1].lexeme = "0b" & "0" & self.tokens[^1].lexeme[2..^1]
|
||||
|
||||
|
||||
proc parseOctal(self: Lexer) =
|
||||
## Parses octal numbers
|
||||
while self.peek().isDigit():
|
||||
if self.peek() notin '0'..'7':
|
||||
self.error(&"invalid digit '{self.peek()}' in octal literal")
|
||||
discard self.step()
|
||||
self.createToken(Octal)
|
||||
|
||||
|
||||
proc parseHex(self: Lexer) =
|
||||
## Parses hexadecimal numbers
|
||||
while self.peek().isAlphaNumeric():
|
||||
if not self.peek().isDigit() and self.peek().toLowerAscii() notin 'a'..'f':
|
||||
self.error(&"invalid hexadecimal literal")
|
||||
discard self.step()
|
||||
self.createToken(Hex)
|
||||
|
||||
|
||||
proc parseNumber(self: Lexer) =
|
||||
## Parses numeric literals, which encompass
|
||||
## integers and floats composed of arabic digits.
|
||||
## Floats also support scientific notation
|
||||
## (i.e. 3e14), while the fractional part
|
||||
## must be separated from the decimal one
|
||||
## using a dot (which acts as a "comma").
|
||||
## Literals such as 32.5e3 are also supported.
|
||||
## The "e" for the scientific notation of floats
|
||||
## is case-insensitive. Binary number literals are
|
||||
## expressed using the prefix 0b, hexadecimal
|
||||
## numbers with the prefix 0x and octal numbers
|
||||
## with the prefix 0o
|
||||
case self.peek():
|
||||
of 'b':
|
||||
discard self.step()
|
||||
self.parseBinary()
|
||||
of 'x':
|
||||
discard self.step()
|
||||
self.parseHex()
|
||||
of 'o':
|
||||
discard self.step()
|
||||
self.parseOctal()
|
||||
else:
|
||||
var kind: TokenType = Integer
|
||||
while isDigit(self.peek()):
|
||||
discard self.step()
|
||||
if self.check(['e', 'E']):
|
||||
kind = Float
|
||||
discard self.step()
|
||||
while self.peek().isDigit():
|
||||
discard self.step()
|
||||
elif self.check('.'):
|
||||
# TODO: Is there a better way?
|
||||
discard self.step()
|
||||
if not isDigit(self.peek()):
|
||||
self.error("invalid float number literal")
|
||||
kind = Float
|
||||
while isDigit(self.peek()):
|
||||
discard self.step()
|
||||
if self.check(['e', 'E']):
|
||||
discard self.step()
|
||||
while isDigit(self.peek()):
|
||||
discard self.step()
|
||||
self.createToken(kind)
|
||||
|
||||
|
||||
proc parseIdentifier(self: Lexer) =
|
||||
## Parses identifiers and keywords.
|
||||
## Note that multi-character tokens
|
||||
## such as UTF runes are not supported
|
||||
while self.peek().isAlphaNumeric() or self.check('_'):
|
||||
discard self.step()
|
||||
var name: string = self.source[self.start..<self.current]
|
||||
if name in keywords:
|
||||
# It's a keyword
|
||||
self.createToken(keywords[name])
|
||||
else:
|
||||
# Identifier!
|
||||
self.createToken(Identifier)
|
||||
|
||||
|
||||
proc next(self: Lexer) =
|
||||
## Scans a single token. This method is
|
||||
## called iteratively until the source
|
||||
## file reaches EOF
|
||||
if self.done():
|
||||
return
|
||||
var single = self.step()
|
||||
if single in [' ', '\t', '\r', '\f',
|
||||
'\e']: # We skip whitespaces, tabs and other useless characters
|
||||
return
|
||||
elif single == '\n':
|
||||
self.incLine()
|
||||
elif single in ['"', '\'']:
|
||||
if self.check(single) and self.check(single, 1):
|
||||
# Multiline strings start with 3 quotes
|
||||
discard self.step(2)
|
||||
self.parseString(single, "multi")
|
||||
else:
|
||||
self.parseString(single)
|
||||
elif single.isDigit():
|
||||
self.parseNumber()
|
||||
elif single.isAlphaNumeric() and self.check(['"', '\'']):
|
||||
# Like Python, we support bytes and raw literals
|
||||
case single:
|
||||
of 'r':
|
||||
self.parseString(self.step(), "raw")
|
||||
of 'b':
|
||||
self.parseString(self.step(), "bytes")
|
||||
of 'f':
|
||||
self.parseString(self.step(), "format")
|
||||
else:
|
||||
self.error(&"unknown string prefix '{single}'")
|
||||
elif single.isAlphaNumeric() or single == '_':
|
||||
self.parseIdentifier()
|
||||
else:
|
||||
# Comments are a special case
|
||||
if single == '#':
|
||||
while not (self.check('\n') or self.done()):
|
||||
discard self.step()
|
||||
return
|
||||
# We start by checking for multi-character tokens,
|
||||
# in descending length so //= doesn't translate
|
||||
# to the pair of tokens (//, =) for example
|
||||
for key in triple.keys():
|
||||
if key[0] == single and self.check(key[1..^1]):
|
||||
discard self.step(2) # We step 2 characters
|
||||
self.createToken(triple[key])
|
||||
return
|
||||
for key in double.keys():
|
||||
if key[0] == single and self.check(key[1]):
|
||||
discard self.step()
|
||||
self.createToken(double[key])
|
||||
return
|
||||
if single in tokens:
|
||||
# Eventually we emit a single token
|
||||
self.createToken(tokens[single])
|
||||
else:
|
||||
self.error(&"unexpected token '{single}'")
|
||||
|
||||
|
||||
proc lex*(self: Lexer, source, file: string): seq[Token] =
|
||||
## Lexes a source file, converting a stream
|
||||
## of characters into a series of tokens
|
||||
discard self.initLexer()
|
||||
self.source = source
|
||||
self.file = file
|
||||
while not self.done():
|
||||
self.next()
|
||||
self.start = self.current
|
||||
self.tokens.add(Token(kind: EndOfFile, lexeme: "",
|
||||
line: self.line))
|
||||
return self.tokens
|
|
@ -0,0 +1,760 @@
|
|||
# Copyright 2021 Mattia Giambirtone & All Contributors
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
## An Abstract Syntax Tree (AST) structure for our recursive-descent
|
||||
## top-down parser. For more info, check out docs/grammar.md
|
||||
|
||||
|
||||
import strformat
|
||||
import strutils
|
||||
|
||||
|
||||
import token
|
||||
|
||||
|
||||
type
|
||||
NodeKind* = enum
|
||||
## Enumeration of the AST
|
||||
## node types, sorted by
|
||||
## precedence
|
||||
|
||||
# Declarations
|
||||
classDecl = 0u8,
|
||||
funDecl,
|
||||
varDecl,
|
||||
# Statements
|
||||
forStmt, # Unused for now (for loops are compiled to while loops)
|
||||
ifStmt,
|
||||
returnStmt,
|
||||
breakStmt,
|
||||
continueStmt,
|
||||
whileStmt,
|
||||
forEachStmt,
|
||||
blockStmt,
|
||||
raiseStmt,
|
||||
assertStmt,
|
||||
delStmt,
|
||||
tryStmt,
|
||||
yieldStmt,
|
||||
awaitStmt,
|
||||
fromImportStmt,
|
||||
importStmt,
|
||||
deferStmt,
|
||||
# An expression followed by a semicolon
|
||||
exprStmt,
|
||||
# Expressions
|
||||
assignExpr,
|
||||
lambdaExpr,
|
||||
awaitExpr,
|
||||
yieldExpr,
|
||||
setItemExpr, # Set expressions like a.b = "c"
|
||||
binaryExpr,
|
||||
unaryExpr,
|
||||
sliceExpr,
|
||||
callExpr,
|
||||
getItemExpr, # Get expressions like a.b
|
||||
# Primary expressions
|
||||
groupingExpr, # Parenthesized expressions such as (true) and (3 + 4)
|
||||
trueExpr,
|
||||
listExpr,
|
||||
tupleExpr,
|
||||
dictExpr,
|
||||
setExpr,
|
||||
falseExpr,
|
||||
strExpr,
|
||||
intExpr,
|
||||
floatExpr,
|
||||
hexExpr,
|
||||
octExpr,
|
||||
binExpr,
|
||||
nilExpr,
|
||||
nanExpr,
|
||||
infExpr,
|
||||
identExpr, # Identifier
|
||||
|
||||
|
||||
ASTNode* = ref object of RootObj
|
||||
## An AST node
|
||||
kind*: NodeKind
|
||||
# Regardless of the type of node, we keep the token in the AST node for internal usage.
|
||||
# This is not shown when the node is printed, but makes it a heck of a lot easier to report
|
||||
# errors accurately even deep in the compilation pipeline
|
||||
token*: Token
|
||||
|
||||
# Here I would've rather used object variants, and in fact that's what was in
|
||||
# place before, but not being able to re-declare a field of the same type in
|
||||
# another case branch is kind of a deal breaker long-term, so until that is
|
||||
# fixed (check out https://github.com/nim-lang/RFCs/issues/368 for more info)
|
||||
# I'll stick to using inheritance instead
|
||||
|
||||
LiteralExpr* = ref object of ASTNode
|
||||
# Using a string for literals makes it much easier to handle numeric types, as
|
||||
# there is no overflow nor underflow or float precision issues during parsing.
|
||||
# Numbers are just serialized as strings and then converted back to numbers
|
||||
# before being passed to the VM, which also keeps the door open in the future
|
||||
# to implementing bignum arithmetic that can take advantage of natively supported
|
||||
# machine types, meaning that if a numeric type fits into a 64 bit signed/unsigned
|
||||
# int then it is stored in such a type to save space, otherwise it is just converted
|
||||
# to a bigint. Bigfloats with arbitrary-precision arithmetic would also be nice,
|
||||
# although arguably less useful (and probably significantly slower than bigints)
|
||||
literal*: Token
|
||||
|
||||
IntExpr* = ref object of LiteralExpr
|
||||
OctExpr* = ref object of LiteralExpr
|
||||
HexExpr* = ref object of LiteralExpr
|
||||
BinExpr* = ref object of LiteralExpr
|
||||
FloatExpr* = ref object of LiteralExpr
|
||||
StrExpr* = ref object of LiteralExpr
|
||||
|
||||
# There are technically keywords, not literals!
|
||||
TrueExpr* = ref object of ASTNode
|
||||
FalseExpr* = ref object of ASTNode
|
||||
NilExpr* = ref object of ASTNode
|
||||
NanExpr* = ref object of ASTNode
|
||||
InfExpr* = ref object of ASTNode
|
||||
|
||||
# Although this is *technically* a literal, Nim doesn't
|
||||
# allow us to redefine fields from supertypes so it's
|
||||
# a tough luck for us
|
||||
ListExpr* = ref object of ASTNode
|
||||
members*: seq[ASTNode]
|
||||
|
||||
SetExpr* = ref object of ListExpr
|
||||
|
||||
TupleExpr* = ref object of ListExpr
|
||||
|
||||
DictExpr* = ref object of ASTNode
|
||||
keys*: seq[ASTNode]
|
||||
values*: seq[ASTNode]
|
||||
|
||||
IdentExpr* = ref object of ASTNode
|
||||
name*: Token
|
||||
|
||||
GroupingExpr* = ref object of ASTNode
|
||||
expression*: ASTNode
|
||||
|
||||
GetItemExpr* = ref object of ASTNode
|
||||
obj*: ASTNode
|
||||
name*: ASTNode
|
||||
|
||||
SetItemExpr* = ref object of GetItemExpr
|
||||
# Since a setItem expression is just
|
||||
# a getItem one followed by an assignment,
|
||||
# inheriting it from getItem makes sense
|
||||
value*: ASTNode
|
||||
|
||||
CallExpr* = ref object of ASTNode
|
||||
callee*: ASTNode # The thing being called
|
||||
arguments*: tuple[positionals: seq[ASTNode], keyword: seq[tuple[
|
||||
name: ASTNode, value: ASTNode]]]
|
||||
|
||||
UnaryExpr* = ref object of ASTNode
|
||||
operator*: Token
|
||||
a*: ASTNode
|
||||
|
||||
BinaryExpr* = ref object of UnaryExpr
|
||||
# Binary expressions can be seen here as unary
|
||||
# expressions with an extra operand so we just
|
||||
# inherit from that and add a second operand
|
||||
b*: ASTNode
|
||||
|
||||
YieldExpr* = ref object of ASTNode
|
||||
expression*: ASTNode
|
||||
|
||||
AwaitExpr* = ref object of ASTNode
|
||||
awaitee*: ASTNode
|
||||
|
||||
LambdaExpr* = ref object of ASTNode
|
||||
body*: ASTNode
|
||||
arguments*: seq[ASTNode]
|
||||
# This is, in order, the list of each default argument
|
||||
# the function takes. It maps 1:1 with self.arguments
|
||||
# although it may be shorter (in which case this maps
|
||||
# 1:1 with what's left of self.arguments after all
|
||||
# positional arguments have been consumed)
|
||||
defaults*: seq[ASTNode]
|
||||
isGenerator*: bool
|
||||
|
||||
SliceExpr* = ref object of ASTNode
|
||||
slicee*: ASTNode
|
||||
ends*: seq[ASTNode]
|
||||
|
||||
AssignExpr* = ref object of ASTNode
|
||||
name*: ASTNode
|
||||
value*: ASTNode
|
||||
|
||||
ExprStmt* = ref object of ASTNode
|
||||
expression*: ASTNode
|
||||
|
||||
ImportStmt* = ref object of ASTNode
|
||||
moduleName*: ASTNode
|
||||
|
||||
FromImportStmt* = ref object of ASTNode
|
||||
fromModule*: ASTNode
|
||||
fromAttributes*: seq[ASTNode]
|
||||
|
||||
DelStmt* = ref object of ASTNode
|
||||
name*: ASTNode
|
||||
|
||||
AssertStmt* = ref object of ASTNode
|
||||
expression*: ASTNode
|
||||
|
||||
RaiseStmt* = ref object of ASTNode
|
||||
exception*: ASTNode
|
||||
|
||||
BlockStmt* = ref object of ASTNode
|
||||
code*: seq[ASTNode]
|
||||
|
||||
ForStmt* = ref object of ASTNode
|
||||
discard # Unused
|
||||
|
||||
ForEachStmt* = ref object of ASTNode
|
||||
identifier*: ASTNode
|
||||
expression*: ASTNode
|
||||
body*: ASTNode
|
||||
|
||||
DeferStmt* = ref object of ASTNode
|
||||
deferred*: ASTNode
|
||||
|
||||
TryStmt* = ref object of ASTNode
|
||||
body*: ASTNode
|
||||
handlers*: seq[tuple[body: ASTNode, exc: ASTNode, name: ASTNode]]
|
||||
finallyClause*: ASTNode
|
||||
elseClause*: ASTNode
|
||||
|
||||
WhileStmt* = ref object of ASTNode
|
||||
condition*: ASTNode
|
||||
body*: ASTNode
|
||||
|
||||
AwaitStmt* = ref object of ASTNode
|
||||
awaitee*: ASTNode
|
||||
|
||||
BreakStmt* = ref object of ASTNode
|
||||
|
||||
ContinueStmt* = ref object of ASTNode
|
||||
|
||||
ReturnStmt* = ref object of ASTNode
|
||||
value*: ASTNode
|
||||
|
||||
IfStmt* = ref object of ASTNode
|
||||
condition*: ASTNode
|
||||
thenBranch*: ASTNode
|
||||
elseBranch*: ASTNode
|
||||
|
||||
YieldStmt* = ref object of ASTNode
|
||||
expression*: ASTNode
|
||||
|
||||
Declaration* = ref object of ASTNode
|
||||
owner*: string # Used for determining if a module can access a given field
|
||||
|
||||
VarDecl* = ref object of Declaration
|
||||
name*: ASTNode
|
||||
value*: ASTNode
|
||||
isConst*: bool
|
||||
isStatic*: bool
|
||||
isPrivate*: bool
|
||||
|
||||
FunDecl* = ref object of Declaration
|
||||
name*: ASTNode
|
||||
body*: ASTNode
|
||||
arguments*: seq[ASTNode]
|
||||
# This is, in order, the list of each default argument
|
||||
# the function takes. It maps 1:1 with self.arguments
|
||||
# although it may be shorter (in which case this maps
|
||||
# 1:1 with what's left of self.arguments after all
|
||||
# positional arguments have been consumed)
|
||||
defaults*: seq[ASTNode]
|
||||
isAsync*: bool
|
||||
isGenerator*: bool
|
||||
isStatic*: bool
|
||||
isPrivate*: bool
|
||||
|
||||
ClassDecl* = ref object of Declaration
|
||||
name*: ASTNode
|
||||
body*: ASTNode
|
||||
parents*: seq[ASTNode]
|
||||
isStatic*: bool
|
||||
isPrivate*: bool
|
||||
|
||||
Expression* = LiteralExpr | ListExpr | GetItemExpr | SetItemExpr | UnaryExpr | BinaryExpr | CallExpr | AssignExpr |
|
||||
GroupingExpr | IdentExpr | DictExpr | TupleExpr | SetExpr |
|
||||
TrueExpr | FalseExpr | NilExpr |
|
||||
NanExpr | InfExpr
|
||||
|
||||
Statement* = ExprStmt | ImportStmt | FromImportStmt | DelStmt | AssertStmt | RaiseStmt | BlockStmt | ForStmt | WhileStmt |
|
||||
ForStmt | BreakStmt | ContinueStmt | ReturnStmt | IfStmt
|
||||
|
||||
|
||||
|
||||
|
||||
proc newASTNode*(kind: NodeKind, token: Token): ASTNode =
|
||||
## Initializes a new generic ASTNode object
|
||||
new(result)
|
||||
result.kind = kind
|
||||
result.token = token
|
||||
|
||||
|
||||
proc isConst*(self: ASTNode): bool {.inline.} = self.kind in {intExpr, hexExpr, binExpr, octExpr, strExpr,
|
||||
falseExpr,
|
||||
trueExpr, infExpr,
|
||||
nanExpr,
|
||||
floatExpr, nilExpr}
|
||||
|
||||
|
||||
proc isLiteral*(self: ASTNode): bool {.inline.} = self.isConst() or self.kind in
|
||||
{tupleExpr, dictExpr, setExpr, listExpr}
|
||||
|
||||
|
||||
proc newIntExpr*(literal: Token): IntExpr =
|
||||
result = IntExpr(kind: intExpr)
|
||||
result.literal = literal
|
||||
result.token = literal
|
||||
|
||||
|
||||
proc newOctExpr*(literal: Token): OctExpr =
|
||||
result = OctExpr(kind: octExpr)
|
||||
result.literal = literal
|
||||
result.token = literal
|
||||
|
||||
|
||||
proc newHexExpr*(literal: Token): HexExpr =
|
||||
result = HexExpr(kind: hexExpr)
|
||||
result.literal = literal
|
||||
result.token = literal
|
||||
|
||||
|
||||
proc newBinExpr*(literal: Token): BinExpr =
|
||||
result = BinExpr(kind: binExpr)
|
||||
result.literal = literal
|
||||
result.token = literal
|
||||
|
||||
|
||||
proc newFloatExpr*(literal: Token): FloatExpr =
|
||||
result = FloatExpr(kind: floatExpr)
|
||||
result.literal = literal
|
||||
result.token = literal
|
||||
|
||||
|
||||
proc newTrueExpr*(token: Token): LiteralExpr = LiteralExpr(kind: trueExpr, token: token)
|
||||
proc newFalseExpr*(token: Token): LiteralExpr = LiteralExpr(kind: falseExpr, token: token)
|
||||
proc newNaNExpr*(token: Token): LiteralExpr = LiteralExpr(kind: nanExpr, token: token)
|
||||
proc newNilExpr*(token: Token): LiteralExpr = LiteralExpr(kind: nilExpr, token: token)
|
||||
proc newInfExpr*(token: Token): LiteralExpr = LiteralExpr(kind: infExpr, token: token)
|
||||
|
||||
|
||||
proc newStrExpr*(literal: Token): StrExpr =
|
||||
result = StrExpr(kind: strExpr)
|
||||
result.literal = literal
|
||||
result.token = literal
|
||||
|
||||
|
||||
proc newIdentExpr*(name: Token): IdentExpr =
|
||||
result = IdentExpr(kind: identExpr)
|
||||
result.name = name
|
||||
result.token = name
|
||||
|
||||
|
||||
proc newGroupingExpr*(expression: ASTNode, token: Token): GroupingExpr =
|
||||
result = GroupingExpr(kind: groupingExpr)
|
||||
result.expression = expression
|
||||
result.token = token
|
||||
|
||||
|
||||
proc newLambdaExpr*(arguments, defaults: seq[ASTNode], body: ASTNode,
|
||||
isGenerator: bool, token: Token): LambdaExpr =
|
||||
result = LambdaExpr(kind: lambdaExpr)
|
||||
result.body = body
|
||||
result.arguments = arguments
|
||||
result.defaults = defaults
|
||||
result.isGenerator = isGenerator
|
||||
result.token = token
|
||||
|
||||
|
||||
proc newGetItemExpr*(obj: ASTNode, name: ASTNode, token: Token): GetItemExpr =
|
||||
result = GetItemExpr(kind: getItemExpr)
|
||||
result.obj = obj
|
||||
result.name = name
|
||||
result.token = token
|
||||
|
||||
|
||||
proc newListExpr*(members: seq[ASTNode], token: Token): ListExpr =
|
||||
result = ListExpr(kind: listExpr)
|
||||
result.members = members
|
||||
result.token = token
|
||||
|
||||
|
||||
proc newSetExpr*(members: seq[ASTNode], token: Token): SetExpr =
|
||||
result = SetExpr(kind: setExpr)
|
||||
result.members = members
|
||||
result.token = token
|
||||
|
||||
|
||||
proc newTupleExpr*(members: seq[ASTNode], token: Token): TupleExpr =
|
||||
result = TupleExpr(kind: tupleExpr)
|
||||
result.members = members
|
||||
result.token = token
|
||||
|
||||
|
||||
proc newDictExpr*(keys, values: seq[ASTNode], token: Token): DictExpr =
|
||||
result = DictExpr(kind: dictExpr)
|
||||
result.keys = keys
|
||||
result.values = values
|
||||
result.token = token
|
||||
|
||||
|
||||
proc newSetItemExpr*(obj, name, value: ASTNode, token: Token): SetItemExpr =
|
||||
result = SetItemExpr(kind: setItemExpr)
|
||||
result.obj = obj
|
||||
result.name = name
|
||||
result.value = value
|
||||
result.token = token
|
||||
|
||||
|
||||
proc newCallExpr*(callee: ASTNode, arguments: tuple[positionals: seq[ASTNode],
|
||||
keyword: seq[tuple[name: ASTNode, value: ASTNode]]],
|
||||
token: Token): CallExpr =
|
||||
result = CallExpr(kind: callExpr)
|
||||
result.callee = callee
|
||||
result.arguments = arguments
|
||||
result.token = token
|
||||
|
||||
|
||||
proc newSliceExpr*(slicee: ASTNode, ends: seq[ASTNode],
|
||||
token: Token): SliceExpr =
|
||||
result = SliceExpr(kind: sliceExpr)
|
||||
result.slicee = slicee
|
||||
result.ends = ends
|
||||
result.token = token
|
||||
|
||||
|
||||
proc newUnaryExpr*(operator: Token, a: ASTNode): UnaryExpr =
|
||||
result = UnaryExpr(kind: unaryExpr)
|
||||
result.operator = operator
|
||||
result.a = a
|
||||
result.token = result.operator
|
||||
|
||||
|
||||
proc newBinaryExpr*(a: ASTNode, operator: Token, b: ASTNode): BinaryExpr =
|
||||
result = BinaryExpr(kind: binaryExpr)
|
||||
result.operator = operator
|
||||
result.a = a
|
||||
result.b = b
|
||||
result.token = operator
|
||||
|
||||
|
||||
proc newYieldExpr*(expression: ASTNode, token: Token): YieldExpr =
|
||||
result = YieldExpr(kind: yieldExpr)
|
||||
result.expression = expression
|
||||
result.token = token
|
||||
|
||||
|
||||
proc newAssignExpr*(name, value: ASTNode, token: Token): AssignExpr =
|
||||
result = AssignExpr(kind: assignExpr)
|
||||
result.name = name
|
||||
result.value = value
|
||||
result.token = token
|
||||
|
||||
|
||||
proc newAwaitExpr*(awaitee: ASTNode, token: Token): AwaitExpr =
|
||||
result = AwaitExpr(kind: awaitExpr)
|
||||
result.awaitee = awaitee
|
||||
result.token = token
|
||||
|
||||
|
||||
proc newExprStmt*(expression: ASTNode, token: Token): ExprStmt =
|
||||
result = ExprStmt(kind: exprStmt)
|
||||
result.expression = expression
|
||||
result.token = token
|
||||
|
||||
|
||||
proc newImportStmt*(moduleName: ASTNode, token: Token): ImportStmt =
|
||||
result = ImportStmt(kind: importStmt)
|
||||
result.moduleName = moduleName
|
||||
result.token = token
|
||||
|
||||
|
||||
proc newFromImportStmt*(fromModule: ASTNode, fromAttributes: seq[ASTNode],
|
||||
token: Token): FromImportStmt =
|
||||
result = FromImportStmt(kind: fromImportStmt)
|
||||
result.fromModule = fromModule
|
||||
result.fromAttributes = fromAttributes
|
||||
result.token = token
|
||||
|
||||
|
||||
proc newDelStmt*(name: ASTNode, token: Token): DelStmt =
|
||||
result = DelStmt(kind: delStmt)
|
||||
result.name = name
|
||||
result.token = token
|
||||
|
||||
|
||||
proc newYieldStmt*(expression: ASTNode, token: Token): YieldStmt =
|
||||
result = YieldStmt(kind: yieldStmt)
|
||||
result.expression = expression
|
||||
result.token = token
|
||||
|
||||
|
||||
proc newAwaitStmt*(awaitee: ASTNode, token: Token): AwaitExpr =
|
||||
result = AwaitExpr(kind: awaitExpr)
|
||||
result.awaitee = awaitee
|
||||
result.token = token
|
||||
|
||||
|
||||
proc newAssertStmt*(expression: ASTNode, token: Token): AssertStmt =
|
||||
result = AssertStmt(kind: assertStmt)
|
||||
result.expression = expression
|
||||
result.token = token
|
||||
|
||||
|
||||
proc newDeferStmt*(deferred: ASTNode, token: Token): DeferStmt =
|
||||
result = DeferStmt(kind: deferStmt)
|
||||
result.deferred = deferred
|
||||
result.token = token
|
||||
|
||||
|
||||
proc newRaiseStmt*(exception: ASTNode, token: Token): RaiseStmt =
|
||||
result = RaiseStmt(kind: raiseStmt)
|
||||
result.exception = exception
|
||||
result.token = token
|
||||
|
||||
|
||||
proc newTryStmt*(body: ASTNode, handlers: seq[tuple[body: ASTNode, exc: ASTNode, name: ASTNode]],
|
||||
finallyClause: ASTNode,
|
||||
elseClause: ASTNode, token: Token): TryStmt =
|
||||
result = TryStmt(kind: tryStmt)
|
||||
result.body = body
|
||||
result.handlers = handlers
|
||||
result.finallyClause = finallyClause
|
||||
result.elseClause = elseClause
|
||||
result.token = token
|
||||
|
||||
|
||||
proc newBlockStmt*(code: seq[ASTNode], token: Token): BlockStmt =
|
||||
result = BlockStmt(kind: blockStmt)
|
||||
result.code = code
|
||||
result.token = token
|
||||
|
||||
|
||||
proc newWhileStmt*(condition: ASTNode, body: ASTNode, token: Token): WhileStmt =
|
||||
result = WhileStmt(kind: whileStmt)
|
||||
result.condition = condition
|
||||
result.body = body
|
||||
result.token = token
|
||||
|
||||
|
||||
proc newForEachStmt*(identifier: ASTNode, expression, body: ASTNode,
|
||||
token: Token): ForEachStmt =
|
||||
result = ForEachStmt(kind: forEachStmt)
|
||||
result.identifier = identifier
|
||||
result.expression = expression
|
||||
result.body = body
|
||||
result.token = token
|
||||
|
||||
|
||||
proc newBreakStmt*(token: Token): BreakStmt =
|
||||
result = BreakStmt(kind: breakStmt)
|
||||
result.token = token
|
||||
|
||||
|
||||
proc newContinueStmt*(token: Token): ContinueStmt =
|
||||
result = ContinueStmt(kind: continueStmt)
|
||||
result.token = token
|
||||
|
||||
|
||||
proc newReturnStmt*(value: ASTNode, token: Token): ReturnStmt =
|
||||
result = ReturnStmt(kind: returnStmt)
|
||||
result.value = value
|
||||
result.token = token
|
||||
|
||||
|
||||
proc newIfStmt*(condition: ASTNode, thenBranch, elseBranch: ASTNode,
|
||||
token: Token): IfStmt =
|
||||
result = IfStmt(kind: ifStmt)
|
||||
result.condition = condition
|
||||
result.thenBranch = thenBranch
|
||||
result.elseBranch = elseBranch
|
||||
result.token = token
|
||||
|
||||
|
||||
proc newVarDecl*(name: ASTNode, value: ASTNode = newNilExpr(Token()),
|
||||
isStatic: bool = true, isConst: bool = false,
|
||||
isPrivate: bool = true, token: Token, owner: string): VarDecl =
|
||||
result = VarDecl(kind: varDecl)
|
||||
result.name = name
|
||||
result.value = value
|
||||
result.isConst = isConst
|
||||
result.isStatic = isStatic
|
||||
result.isPrivate = isPrivate
|
||||
result.token = token
|
||||
result.owner = owner
|
||||
|
||||
|
||||
proc newFunDecl*(name: ASTNode, arguments, defaults: seq[ASTNode],
|
||||
body: ASTNode, isStatic: bool = true, isAsync,
|
||||
isGenerator: bool, isPrivate: bool = true, token: Token,
|
||||
owner: string): FunDecl =
|
||||
result = FunDecl(kind: funDecl)
|
||||
result.name = name
|
||||
result.arguments = arguments
|
||||
result.defaults = defaults
|
||||
result.body = body
|
||||
result.isAsync = isAsync
|
||||
result.isGenerator = isGenerator
|
||||
result.isStatic = isStatic
|
||||
result.isPrivate = isPrivate
|
||||
result.token = token
|
||||
result.owner = owner
|
||||
|
||||
|
||||
proc newClassDecl*(name: ASTNode, body: ASTNode,
|
||||
parents: seq[ASTNode], isStatic: bool = true,
|
||||
isPrivate: bool = true, token: Token,
|
||||
owner: string): ClassDecl =
|
||||
result = ClassDecl(kind: classDecl)
|
||||
result.name = name
|
||||
result.body = body
|
||||
result.parents = parents
|
||||
result.isStatic = isStatic
|
||||
result.isPrivate = isPrivate
|
||||
result.token = token
|
||||
result.owner = owner
|
||||
|
||||
|
||||
proc `$`*(self: ASTNode): string =
|
||||
if self == nil:
|
||||
return "nil"
|
||||
case self.kind:
|
||||
of intExpr, floatExpr, hexExpr, binExpr, octExpr, strExpr, trueExpr,
|
||||
falseExpr, nanExpr, nilExpr, infExpr:
|
||||
if self.kind in {trueExpr, falseExpr, nanExpr, nilExpr, infExpr}:
|
||||
result &= &"Literal({($self.kind)[0..^5]})"
|
||||
elif self.kind == strExpr:
|
||||
result &= &"Literal({LiteralExpr(self).literal.lexeme[1..^2].escape()})"
|
||||
else:
|
||||
result &= &"Literal({LiteralExpr(self).literal.lexeme})"
|
||||
of identExpr:
|
||||
result &= &"Identifier('{IdentExpr(self).name.lexeme}')"
|
||||
of groupingExpr:
|
||||
result &= &"Grouping({GroupingExpr(self).expression})"
|
||||
of getItemExpr:
|
||||
var self = GetItemExpr(self)
|
||||
result &= &"GetItem(obj={self.obj}, name={self.name})"
|
||||
of setItemExpr:
|
||||
var self = SetItemExpr(self)
|
||||
result &= &"SetItem(obj={self.obj}, name={self.value}, value={self.value})"
|
||||
of callExpr:
|
||||
var self = CallExpr(self)
|
||||
result &= &"""Call({self.callee}, arguments=(positionals=[{self.arguments.positionals.join(", ")}], keyword=[{self.arguments.keyword.join(", ")}]))"""
|
||||
of unaryExpr:
|
||||
var self = UnaryExpr(self)
|
||||
result &= &"Unary(Operator('{self.operator.lexeme}'), {self.a})"
|
||||
of binaryExpr:
|
||||
var self = BinaryExpr(self)
|
||||
result &= &"Binary({self.a}, Operator('{self.operator.lexeme}'), {self.b})"
|
||||
of assignExpr:
|
||||
var self = AssignExpr(self)
|
||||
result &= &"Assign(name={self.name}, value={self.value})"
|
||||
of exprStmt:
|
||||
var self = ExprStmt(self)
|
||||
result &= &"ExpressionStatement({self.expression})"
|
||||
of breakStmt:
|
||||
result = "Break()"
|
||||
of importStmt:
|
||||
var self = ImportStmt(self)
|
||||
result &= &"Import({self.moduleName})"
|
||||
of fromImportStmt:
|
||||
var self = FromImportStmt(self)
|
||||
result &= &"""FromImport(fromModule={self.fromModule}, fromAttributes=[{self.fromAttributes.join(", ")}])"""
|
||||
of delStmt:
|
||||
var self = DelStmt(self)
|
||||
result &= &"Del({self.name})"
|
||||
of assertStmt:
|
||||
var self = AssertStmt(self)
|
||||
result &= &"Assert({self.expression})"
|
||||
of raiseStmt:
|
||||
var self = RaiseStmt(self)
|
||||
result &= &"Raise({self.exception})"
|
||||
of blockStmt:
|
||||
var self = BlockStmt(self)
|
||||
result &= &"""Block([{self.code.join(", ")}])"""
|
||||
of whileStmt:
|
||||
var self = WhileStmt(self)
|
||||
result &= &"While(condition={self.condition}, body={self.body})"
|
||||
of forEachStmt:
|
||||
var self = ForEachStmt(self)
|
||||
result &= &"ForEach(identifier={self.identifier}, expression={self.expression}, body={self.body})"
|
||||
of returnStmt:
|
||||
var self = ReturnStmt(self)
|
||||
result &= &"Return({self.value})"
|
||||
of yieldExpr:
|
||||
var self = YieldExpr(self)
|
||||
result &= &"Yield({self.expression})"
|
||||
of awaitExpr:
|
||||
var self = AwaitExpr(self)
|
||||
result &= &"Await({self.awaitee})"
|
||||
of ifStmt:
|
||||
var self = IfStmt(self)
|
||||
if self.elseBranch == nil:
|
||||
result &= &"If(condition={self.condition}, thenBranch={self.thenBranch}, elseBranch=nil)"
|
||||
else:
|
||||
result &= &"If(condition={self.condition}, thenBranch={self.thenBranch}, elseBranch={self.elseBranch})"
|
||||
of yieldStmt:
|
||||
var self = YieldStmt(self)
|
||||
result &= &"YieldStmt({self.expression})"
|
||||
of awaitStmt:
|
||||
var self = AwaitStmt(self)
|
||||
result &= &"AwaitStmt({self.awaitee})"
|
||||
of varDecl:
|
||||
var self = VarDecl(self)
|
||||
result &= &"Var(name={self.name}, value={self.value}, const={self.isConst}, static={self.isStatic}, private={self.isPrivate})"
|
||||
of funDecl:
|
||||
var self = FunDecl(self)
|
||||
result &= &"""FunDecl(name={self.name}, body={self.body}, arguments=[{self.arguments.join(", ")}], defaults=[{self.defaults.join(", ")}], async={self.isAsync}, generator={self.isGenerator}, static={self.isStatic}, private={self.isPrivate})"""
|
||||
of classDecl:
|
||||
var self = ClassDecl(self)
|
||||
result &= &"""Class(name={self.name}, body={self.body}, parents=[{self.parents.join(", ")}], static={self.isStatic}, private={self.isPrivate})"""
|
||||
of tupleExpr:
|
||||
var self = TupleExpr(self)
|
||||
result &= &"""Tuple([{self.members.join(", ")}])"""
|
||||
of setExpr:
|
||||
var self = SetExpr(self)
|
||||
result &= &"""Set([{self.members.join(", ")}])"""
|
||||
of listExpr:
|
||||
var self = ListExpr(self)
|
||||
result &= &"""List([{self.members.join(", ")}])"""
|
||||
of dictExpr:
|
||||
var self = DictExpr(self)
|
||||
result &= &"""Dict(keys=[{self.keys.join(", ")}], values=[{self.values.join(", ")}])"""
|
||||
of lambdaExpr:
|
||||
var self = LambdaExpr(self)
|
||||
result &= &"""Lambda(body={self.body}, arguments=[{self.arguments.join(", ")}], defaults=[{self.defaults.join(", ")}], generator={self.isGenerator})"""
|
||||
of deferStmt:
|
||||
var self = DeferStmt(self)
|
||||
result &= &"Defer({self.deferred})"
|
||||
of sliceExpr:
|
||||
var self = SliceExpr(self)
|
||||
result &= &"""Slice({self.slicee}, ends=[{self.ends.join(", ")}])"""
|
||||
of tryStmt:
|
||||
var self = TryStmt(self)
|
||||
result &= &"TryStmt(body={self.body}, handlers={self.handlers}"
|
||||
if self.finallyClause != nil:
|
||||
result &= &", finallyClause={self.finallyClause}"
|
||||
else:
|
||||
result &= ", finallyClause=nil"
|
||||
if self.elseClause != nil:
|
||||
result &= &", elseClause={self.elseClause}"
|
||||
else:
|
||||
result &= ", elseClause=nil"
|
||||
result &= ")"
|
||||
else:
|
||||
discard
|
|
@ -0,0 +1,286 @@
|
|||
# Copyright 2021 Mattia Giambirtone & All Contributors
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
import ast
|
||||
import ../../util/multibyte
|
||||
import errors
|
||||
|
||||
|
||||
import strutils
|
||||
import strformat
|
||||
|
||||
|
||||
export ast
|
||||
|
||||
|
||||
type
|
||||
Chunk* = ref object
|
||||
## A piece of bytecode.
|
||||
## Consts represents the constants table the code is referring to.
|
||||
## Code is the linear sequence of compiled bytecode instructions.
|
||||
## Lines maps bytecode instructions to line numbers using Run
|
||||
## Length Encoding. Instructions are encoded in groups whose structure
|
||||
## follows the following schema:
|
||||
## - The first integer represents the line number
|
||||
## - The second integer represents the count of whatever comes after it
|
||||
## (let's call it c)
|
||||
## - After c, a sequence of c integers follows
|
||||
##
|
||||
## A visual representation may be easier to understand: [1, 2, 3, 4]
|
||||
## This is to be interpreted as "there are 2 instructions at line 1 whose values
|
||||
## are 3 and 4"
|
||||
## This is more efficient than using the naive approach, which would encode
|
||||
## the same line number multiple times and waste considerable amounts of space.
|
||||
consts*: seq[ASTNode]
|
||||
code*: seq[uint8]
|
||||
lines*: seq[int]
|
||||
reuseConsts*: bool
|
||||
|
||||
OpCode* {.pure.} = enum
|
||||
## Enum of possible opcodes.
|
||||
|
||||
# Note: x represents the
|
||||
# argument to unary opcodes, while
|
||||
# a and b represent arguments to binary
|
||||
# opcodes. Other variable names may be
|
||||
# used for more complex opcodes. All
|
||||
# arguments to opcodes (if they take
|
||||
# arguments) come from popping off the
|
||||
# stack
|
||||
LoadConstant = 0u8, # Pushes constant at position x in the constant table onto the stack
|
||||
# Binary operators
|
||||
UnaryNegate, # Pushes the result of -x onto the stack
|
||||
BinaryAdd, # Pushes the result of a + b onto the stack
|
||||
BinarySubtract, # Pushes the result of a - b onto the stack
|
||||
BinaryDivide, # Pushes the result of a / b onto the stack (true division). The result is a float
|
||||
BinaryFloorDiv, # Pushes the result of a // b onto the stack (integer division). The result is always an integer
|
||||
BinaryMultiply, # Pushes the result of a * b onto the stack
|
||||
BinaryPow, # Pushes the result of a ** b (a to the power of b) onto the stack
|
||||
BinaryMod, # Pushes the result of a % b onto the stack (modulo division)
|
||||
BinaryShiftRight, # Pushes the result of a >> b (a with bits shifted b times to the right) onto the stack
|
||||
BinaryShiftLeft, # Pushes the result of a << b (a with bits shifted b times to the left) onto the stack
|
||||
BinaryXor, # Pushes the result of a ^ b (bitwise exclusive or) onto the stack
|
||||
BinaryOr, # Pushes the result of a | b (bitwise or) onto the stack
|
||||
BinaryAnd, # Pushes the result of a & b (bitwise and) onto the stack
|
||||
UnaryNot, # Pushes the result of ~x (bitwise not) onto the stack
|
||||
BinaryAs, # Pushes the result of a as b onto the stack (converts a to the type of b. Explicit support from a is required)
|
||||
BinaryIs, # Pushes the result of a is b onto the stack (true if a and b point to the same object, false otherwise)
|
||||
BinaryIsNot, # Pushes the result of not (a is b). This could be implemented in terms of BinaryIs, but it's more efficient this way
|
||||
BinaryOf, # Pushes the result of a of b onto the stack (true if a is a subclass of b, false otherwise)
|
||||
BinarySlice, # Perform slicing on supported objects (like "hello"[0:2], which yields "he"). The result is pushed onto the stack
|
||||
BinarySubscript, # Subscript operator, like "hello"[0] (which pushes 'h' onto the stack)
|
||||
# Binary comparison operators
|
||||
GreaterThan, # Pushes the result of a > b onto the stack
|
||||
LessThan, # Pushes the result of a < b onto the stack
|
||||
EqualTo, # Pushes the result of a == b onto the stack
|
||||
NotEqualTo, # Pushes the result of a != b onto the stack (optimization for not (a == b))
|
||||
GreaterOrEqual, # Pushes the result of a >= b onto the stack
|
||||
LessOrEqual, # Pushes the result of a <= b onto the stack
|
||||
# Logical operators
|
||||
LogicalNot,
|
||||
LogicalAnd,
|
||||
LogicalOr,
|
||||
# Constants/singletons
|
||||
Nil,
|
||||
True,
|
||||
False,
|
||||
Nan,
|
||||
Inf,
|
||||
# Basic stack operations
|
||||
Pop,
|
||||
Push,
|
||||
PopN, # Pops N elements off the stack (optimization for exiting scopes and returning from functions)
|
||||
# Name resolution/handling
|
||||
LoadAttribute,
|
||||
DeclareName, # Declares a global dynamically bound name in the current scope
|
||||
LoadName, # Loads a dynamically bound variable
|
||||
LoadFast, # Loads a statically bound variable
|
||||
StoreName, # Sets/updates a dynamically bound variable's value
|
||||
StoreFast, # Sets/updates a statically bound variable's value
|
||||
DeleteName, # Unbinds a dynamically bound variable's name from the current scope
|
||||
DeleteFast, # Unbinds a statically bound variable's name from the current scope
|
||||
# Looping and jumping
|
||||
Jump, # Absolute and unconditional jump into the bytecode
|
||||
JumpIfFalse, # Jumps to an absolute index in the bytecode if the value at the top of the stack is falsey
|
||||
JumpIfTrue, # Jumps to an absolute index in the bytecode if the value at the top of the stack is truthy
|
||||
JumpIfFalsePop, # Like JumpIfFalse, but it also pops off the stack (regardless of truthyness). Optimization for if statements
|
||||
JumpForwards, # Relative, unconditional, positive jump in the bytecode
|
||||
JumpBackwards, # Relative, unconditional, negative jump into the bytecode
|
||||
Break, # Temporary opcode used to signal exiting out of loop
|
||||
## Long variants of jumps (they use a 24-bit operand instead of a 16-bit one)
|
||||
LongJump,
|
||||
LongJumpIfFalse,
|
||||
LongJumpIfTrue,
|
||||
LongJumpIfFalsePop,
|
||||
LongJumpForwards,
|
||||
LongJumpBackwards,
|
||||
# Functions
|
||||
MakeFunction,
|
||||
Call,
|
||||
Return
|
||||
# Exception handling
|
||||
Raise,
|
||||
ReRaise, # Re-raises active exception
|
||||
BeginTry,
|
||||
FinishTry,
|
||||
# Generators
|
||||
Yield,
|
||||
# Coroutines
|
||||
Await,
|
||||
# Collection literals
|
||||
BuildList,
|
||||
BuildDict,
|
||||
BuildSet,
|
||||
BuildTuple,
|
||||
# Misc
|
||||
Assert,
|
||||
|
||||
|
||||
# We group instructions by their operation/operand types for easier handling when debugging
|
||||
|
||||
# Simple instructions encompass:
|
||||
# - Instructions that push onto/pop off the stack unconditionally (True, False, PopN, Pop, etc.)
|
||||
# - Unary and binary operators
|
||||
const simpleInstructions* = {Return, BinaryAdd, BinaryMultiply,
|
||||
BinaryDivide, BinarySubtract,
|
||||
BinaryMod, BinaryPow, Nil,
|
||||
True, False, OpCode.Nan, OpCode.Inf,
|
||||
BinaryShiftLeft, BinaryShiftRight,
|
||||
BinaryXor, LogicalNot, EqualTo,
|
||||
GreaterThan, LessThan, LoadAttribute,
|
||||
BinarySlice, Pop, UnaryNegate,
|
||||
BinaryIs, BinaryAs, GreaterOrEqual,
|
||||
LessOrEqual, BinaryOr, BinaryAnd,
|
||||
UnaryNot, BinaryFloorDiv, BinaryOf, Raise,
|
||||
ReRaise, BeginTry, FinishTry, Yield, Await}
|
||||
|
||||
# Constant instructions are instructions that operate on the bytecode constant table
|
||||
const constantInstructions* = {LoadConstant, DeclareName, LoadName, StoreName, DeleteName}
|
||||
|
||||
# Stack triple instructions operate on the stack at arbitrary offsets and pop arguments off of it in the form
|
||||
# of 24 bit integers
|
||||
const stackTripleInstructions* = {Call, StoreFast, DeleteFast, LoadFast}
|
||||
|
||||
# Stack Double instructions operate on the stack at arbitrary offsets and pop arguments off of it in the form
|
||||
# of 16 bit integers
|
||||
const stackDoubleInstructions* = {}
|
||||
|
||||
# Argument double argument instructions take hardcoded arguments on the stack as 16 bit integers
|
||||
const argumentDoubleInstructions* = {PopN, }
|
||||
|
||||
# Jump instructions jump at relative or absolute bytecode offsets
|
||||
const jumpInstructions* = {JumpIfFalse, JumpIfFalsePop, JumpForwards, JumpBackwards,
|
||||
LongJumpIfFalse, LongJumpIfFalsePop,
|
||||
LongJumpForwards,
|
||||
LongJumpBackwards, JumpIfTrue, LongJumpIfTrue}
|
||||
|
||||
# Collection instructions push a built-in collection type onto the stack
|
||||
const collectionInstructions* = {BuildList, BuildDict, BuildSet, BuildTuple}
|
||||
|
||||
|
||||
proc newChunk*(reuseConsts: bool = true): Chunk =
|
||||
## Initializes a new, empty chunk
|
||||
result = Chunk(consts: @[], code: @[], lines: @[], reuseConsts: reuseConsts)
|
||||
|
||||
|
||||
proc `$`*(self: Chunk): string = &"""Chunk(consts=[{self.consts.join(", ")}], code=[{self.code.join(", ")}], lines=[{self.lines.join(", ")}])"""
|
||||
|
||||
|
||||
proc write*(self: Chunk, newByte: uint8, line: int) =
|
||||
## Adds the given instruction at the provided line number
|
||||
## to the given chunk object
|
||||
assert line > 0, "line must be greater than zero"
|
||||
if self.lines.high() >= 1 and self.lines[^2] == line:
|
||||
self.lines[^1] += 1
|
||||
else:
|
||||
self.lines.add(line)
|
||||
self.lines.add(1)
|
||||
self.code.add(newByte)
|
||||
|
||||
|
||||
proc write*(self: Chunk, bytes: openarray[uint8], line: int) =
|
||||
## Calls write in a loop with all members of the given
|
||||
## array
|
||||
for cByte in bytes:
|
||||
self.write(cByte, line)
|
||||
|
||||
|
||||
proc write*(self: Chunk, newByte: OpCode, line: int) =
|
||||
## Adds the given instruction at the provided line number
|
||||
## to the given chunk object
|
||||
self.write(uint8(newByte), line)
|
||||
|
||||
|
||||
proc write*(self: Chunk, bytes: openarray[OpCode], line: int) =
|
||||
## Calls write in a loop with all members of the given
|
||||
## array
|
||||
for cByte in bytes:
|
||||
self.write(uint8(cByte), line)
|
||||
|
||||
|
||||
proc getLine*(self: Chunk, idx: int): int =
|
||||
## Returns the associated line of a given
|
||||
## instruction index
|
||||
if self.lines.len < 2:
|
||||
raise newException(IndexDefect, "the chunk object is empty")
|
||||
var
|
||||
count: int
|
||||
current: int = 0
|
||||
for n in countup(0, self.lines.high(), 2):
|
||||
count = self.lines[n + 1]
|
||||
if idx in current - count..<current + count:
|
||||
return self.lines[n]
|
||||
current += count
|
||||
raise newException(IndexDefect, "index out of range")
|
||||
|
||||
|
||||
proc findOrAddConstant(self: Chunk, constant: ASTNode): int =
|
||||
## Small optimization function that reuses the same constant
|
||||
## if it's already been written before (only if self.reuseConsts
|
||||
## equals true)
|
||||
if self.reuseConsts:
|
||||
for i, c in self.consts:
|
||||
# We cannot use simple equality because the nodes likely have
|
||||
# different token objects with different values
|
||||
if c.kind != constant.kind:
|
||||
continue
|
||||
if constant.isConst():
|
||||
var c = LiteralExpr(c)
|
||||
var constant = LiteralExpr(constant)
|
||||
if c.literal.lexeme == constant.literal.lexeme:
|
||||
# This wouldn't work for stuff like 2e3 and 2000.0, but those
|
||||
# forms are collapsed in the compiler before being written
|
||||
# to the constants table
|
||||
return i
|
||||
elif constant.kind == identExpr:
|
||||
var c = IdentExpr(c)
|
||||
var constant = IdentExpr(constant)
|
||||
if c.name.lexeme == constant.name.lexeme:
|
||||
return i
|
||||
else:
|
||||
continue
|
||||
self.consts.add(constant)
|
||||
result = self.consts.high()
|
||||
|
||||
|
||||
proc addConstant*(self: Chunk, constant: ASTNode): array[3, uint8] =
|
||||
## Writes a constant to a chunk. Returns its index casted to a 3-byte
|
||||
## sequence (array). Constant indexes are reused if a constant is used
|
||||
## more than once and self.reuseConsts equals true
|
||||
if self.consts.len() == 16777215:
|
||||
# The constant index is a 24 bit unsigned integer, so that's as far
|
||||
# as we can index into the constant table (the same applies
|
||||
# to our stack by the way). Not that anyone's ever gonna hit this
|
||||
# limit in the real world, but you know, just in case
|
||||
raise newException(CompileError, "cannot encode more than 16777215 constants")
|
||||
result = self.findOrAddConstant(constant).toTriple()
|
|
@ -0,0 +1,20 @@
|
|||
# Copyright 2021 Mattia Giambirtone & All Contributors
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
type
|
||||
NimVMException* = object of CatchableError
|
||||
LexingError* = object of NimVMException
|
||||
ParseError* = object of NimVMException
|
||||
CompileError* = object of NimVMException
|
||||
SerializationError* = object of NimVMException
|
|
@ -0,0 +1,86 @@
|
|||
# Copyright 2021 Mattia Giambirtone & All Contributors
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import strformat
|
||||
import strutils
|
||||
|
||||
|
||||
type
|
||||
TokenType* {.pure.} = enum
|
||||
## Token types enumeration
|
||||
|
||||
# Booleans
|
||||
True, False,
|
||||
|
||||
# Other singleton types
|
||||
Infinity, NotANumber, Nil
|
||||
|
||||
# Control-flow statements
|
||||
If, Else,
|
||||
|
||||
# Looping statements
|
||||
While, For,
|
||||
|
||||
# Keywords
|
||||
Fun, Break, Lambda,
|
||||
Continue, Var, Const, Is,
|
||||
Return, Async, Class, Import, From,
|
||||
IsNot, Raise, Assert, Del, Await,
|
||||
Foreach, Yield, Static, Dynamic,
|
||||
Private, Public, As, Of, Defer, Try,
|
||||
Except, Finally
|
||||
|
||||
# Basic types
|
||||
|
||||
Integer, Float, String, Identifier,
|
||||
Binary, Octal, Hex
|
||||
|
||||
# Brackets, parentheses and other
|
||||
# symbols
|
||||
|
||||
LeftParen, RightParen, # ()
|
||||
LeftBrace, RightBrace, # {}
|
||||
LeftBracket, RightBracket, # []
|
||||
Dot, Semicolon, Colon, Comma, # . ; : ,
|
||||
Plus, Minus, Slash, Asterisk, # + - / *
|
||||
Percentage, DoubleAsterisk, # % **
|
||||
Caret, Pipe, Ampersand, Tilde, # ^ | & ~
|
||||
Equal, GreaterThan, LessThan, # = > <
|
||||
LessOrEqual, GreaterOrEqual, # >= <=
|
||||
NotEqual, RightShift, LeftShift, # != >> <<
|
||||
LogicalAnd, LogicalOr, LogicalNot, FloorDiv, # and or not //
|
||||
InplaceAdd, InplaceSub, InplaceDiv, # += -= /=
|
||||
InplaceMod, InplaceMul, InplaceXor, # %= *= ^=
|
||||
InplaceAnd, InplaceOr, # &= |=
|
||||
DoubleEqual, InplaceFloorDiv, InplacePow, # == //= **=
|
||||
InplaceRightShift, InplaceLeftShift
|
||||
|
||||
# Miscellaneous
|
||||
|
||||
EndOfFile
|
||||
|
||||
|
||||
Token* = ref object
|
||||
## A token object
|
||||
kind*: TokenType
|
||||
lexeme*: string
|
||||
line*: int
|
||||
pos*: tuple[start, stop: int]
|
||||
|
||||
|
||||
proc `$`*(self: Token): string =
|
||||
if self != nil:
|
||||
result = &"Token(kind={self.kind}, lexeme={$(self.lexeme)}, line={self.line}, pos=({self.pos.start}, {self.pos.stop}))"
|
||||
else:
|
||||
result = "nil"
|
|
@ -0,0 +1,382 @@
|
|||
# Copyright 2021 Mattia Giambirtone & All Contributors
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
import meta/ast
|
||||
import meta/token
|
||||
|
||||
import parseutils
|
||||
import strformat
|
||||
import strutils
|
||||
import math
|
||||
|
||||
|
||||
type
|
||||
WarningKind* = enum
|
||||
unreachableCode,
|
||||
nameShadowing,
|
||||
isWithALiteral,
|
||||
equalityWithSingleton,
|
||||
valueOverflow,
|
||||
implicitConversion,
|
||||
invalidOperation
|
||||
|
||||
Warning* = ref object
|
||||
kind*: WarningKind
|
||||
node*: ASTNode
|
||||
|
||||
Optimizer* = ref object
|
||||
warnings: seq[Warning]
|
||||
foldConstants*: bool
|
||||
|
||||
|
||||
proc initOptimizer*(foldConstants: bool = true): Optimizer =
|
||||
## Initializes a new optimizer object
|
||||
new(result)
|
||||
result.foldConstants = foldConstants
|
||||
result.warnings = @[]
|
||||
|
||||
|
||||
proc newWarning(self: Optimizer, kind: WarningKind, node: ASTNode) =
|
||||
self.warnings.add(Warning(kind: kind, node: node))
|
||||
|
||||
|
||||
proc `$`*(self: Warning): string = &"Warning(kind={self.kind}, node={self.node})"
|
||||
|
||||
|
||||
# Forward declaration
|
||||
proc optimizeNode(self: Optimizer, node: ASTNode): ASTNode
|
||||
|
||||
|
||||
proc optimizeConstant(self: Optimizer, node: ASTNode): ASTNode =
|
||||
## Performs some checks on constant AST nodes such as
|
||||
## integers. This method converts all of the different
|
||||
## integer forms (binary, octal and hexadecimal) to
|
||||
## decimal integers. Overflows are checked here too
|
||||
if not self.foldConstants:
|
||||
return node
|
||||
case node.kind:
|
||||
of intExpr:
|
||||
var x: int
|
||||
var y = IntExpr(node)
|
||||
try:
|
||||
assert parseInt(y.literal.lexeme, x) == len(y.literal.lexeme)
|
||||
except ValueError:
|
||||
self.newWarning(valueOverflow, node)
|
||||
result = node
|
||||
of hexExpr:
|
||||
var x: int
|
||||
var y = HexExpr(node)
|
||||
try:
|
||||
assert parseHex(y.literal.lexeme, x) == len(y.literal.lexeme)
|
||||
except ValueError:
|
||||
self.newWarning(valueOverflow, node)
|
||||
return node
|
||||
result = IntExpr(kind: intExpr, literal: Token(kind: Integer, lexeme: $x, line: y.literal.line, pos: (start: -1, stop: -1)))
|
||||
of binExpr:
|
||||
var x: int
|
||||
var y = BinExpr(node)
|
||||
try:
|
||||
assert parseBin(y.literal.lexeme, x) == len(y.literal.lexeme)
|
||||
except ValueError:
|
||||
self.newWarning(valueOverflow, node)
|
||||
return node
|
||||
result = IntExpr(kind: intExpr, literal: Token(kind: Integer, lexeme: $x, line: y.literal.line, pos: (start: -1, stop: -1)))
|
||||
of octExpr:
|
||||
var x: int
|
||||
var y = OctExpr(node)
|
||||
try:
|
||||
assert parseOct(y.literal.lexeme, x) == len(y.literal.lexeme)
|
||||
except ValueError:
|
||||
self.newWarning(valueOverflow, node)
|
||||
return node
|
||||
result = IntExpr(kind: intExpr, literal: Token(kind: Integer, lexeme: $x, line: y.literal.line, pos: (start: -1, stop: -1)))
|
||||
of floatExpr:
|
||||
var x: float
|
||||
var y = FloatExpr(node)
|
||||
try:
|
||||
discard parseFloat(y.literal.lexeme, x)
|
||||
except ValueError:
|
||||
self.newWarning(valueOverflow, node)
|
||||
return node
|
||||
result = FloatExpr(kind: floatExpr, literal: Token(kind: Float, lexeme: $x, line: y.literal.line, pos: (start: -1, stop: -1)))
|
||||
else:
|
||||
result = node
|
||||
|
||||
|
||||
proc optimizeUnary(self: Optimizer, node: UnaryExpr): ASTNode =
|
||||
## Attempts to optimize unary expressions
|
||||
var a = self.optimizeNode(node.a)
|
||||
if self.warnings.len() > 0 and self.warnings[^1].kind == valueOverflow and self.warnings[^1].node == a:
|
||||
# We can't optimize further, the overflow will be caught in the compiler
|
||||
return UnaryExpr(kind: unaryExpr, a: a, operator: node.operator)
|
||||
case a.kind:
|
||||
of intExpr:
|
||||
var x: int
|
||||
assert parseInt(IntExpr(a).literal.lexeme, x) == len(IntExpr(a).literal.lexeme)
|
||||
case node.operator.kind:
|
||||
of Tilde:
|
||||
x = not x
|
||||
of Minus:
|
||||
x = -x
|
||||
else:
|
||||
discard # Unreachable
|
||||
result = IntExpr(kind: intExpr, literal: Token(kind: Integer, lexeme: $x, line: node.operator.line, pos: (start: -1, stop: -1)))
|
||||
of floatExpr:
|
||||
var x: float
|
||||
discard parseFloat(FloatExpr(a).literal.lexeme, x)
|
||||
case node.operator.kind:
|
||||
of Minus:
|
||||
x = -x
|
||||
of Tilde:
|
||||
self.newWarning(invalidOperation, node)
|
||||
return node
|
||||
else:
|
||||
discard
|
||||
result = FloatExpr(kind: floatExpr, literal: Token(kind: Float, lexeme: $x, line: node.operator.line, pos: (start: -1, stop: -1)))
|
||||
else:
|
||||
result = node
|
||||
|
||||
|
||||
proc optimizeBinary(self: Optimizer, node: BinaryExpr): ASTNode =
|
||||
## Attempts to optimize binary expressions
|
||||
var a, b: ASTNode
|
||||
a = self.optimizeNode(node.a)
|
||||
b = self.optimizeNode(node.b)
|
||||
if self.warnings.len() > 0 and self.warnings[^1].kind == valueOverflow and (self.warnings[^1].node == a or self.warnings[^1].node == b):
|
||||
# We can't optimize further, the overflow will be caught in the compiler. We don't return the same node
|
||||
# because optimizeNode might've been able to optimize one of the two operands and we don't know which
|
||||
return BinaryExpr(kind: binaryExpr, a: a, b: b, operator: node.operator)
|
||||
if node.operator.kind == DoubleEqual:
|
||||
if a.kind in {trueExpr, falseExpr, nilExpr, nanExpr, infExpr}:
|
||||
self.newWarning(equalityWithSingleton, a)
|
||||
elif b.kind in {trueExpr, falseExpr, nilExpr, nanExpr, infExpr}:
|
||||
self.newWarning(equalityWithSingleton, b)
|
||||
elif node.operator.kind == Is:
|
||||
if a.kind in {strExpr, intExpr, tupleExpr, dictExpr, listExpr, setExpr}:
|
||||
self.newWarning(isWithALiteral, a)
|
||||
elif b.kind in {strExpr, intExpr, tupleExpr, dictExpr, listExpr, setExpr}:
|
||||
self.newWarning(isWithALiteral, b)
|
||||
if a.kind == intExpr and b.kind == intExpr:
|
||||
# Optimizes integer operations
|
||||
var x, y, z: int
|
||||
assert parseInt(IntExpr(a).literal.lexeme, x) == IntExpr(a).literal.lexeme.len()
|
||||
assert parseInt(IntExpr(b).literal.lexeme, y) == IntExpr(b).literal.lexeme.len()
|
||||
try:
|
||||
case node.operator.kind:
|
||||
of Plus:
|
||||
z = x + y
|
||||
of Minus:
|
||||
z = x - y
|
||||
of Asterisk:
|
||||
z = x * y
|
||||
of FloorDiv:
|
||||
z = int(x / y)
|
||||
of DoubleAsterisk:
|
||||
if y >= 0:
|
||||
z = x ^ y
|
||||
else:
|
||||
# Nim's builtin pow operator can't handle
|
||||
# negative exponents, so we use math's
|
||||
# pow and convert from/to floats instead
|
||||
z = pow(x.float, y.float).int
|
||||
of Percentage:
|
||||
z = x mod y
|
||||
of Caret:
|
||||
z = x xor y
|
||||
of Ampersand:
|
||||
z = x and y
|
||||
of Pipe:
|
||||
z = x or y
|
||||
of Slash:
|
||||
# Special case, yields a float
|
||||
return FloatExpr(kind: intExpr, literal: Token(kind: Float, lexeme: $(x / y), line: IntExpr(a).literal.line, pos: (start: -1, stop: -1)))
|
||||
else:
|
||||
result = BinaryExpr(kind: binaryExpr, a: a, b: b, operator: node.operator)
|
||||
except OverflowDefect:
|
||||
self.newWarning(valueOverflow, node)
|
||||
return BinaryExpr(kind: binaryExpr, a: a, b: b, operator: node.operator)
|
||||
except RangeDefect:
|
||||
# TODO: What warning do we raise here?
|
||||
return BinaryExpr(kind: binaryExpr, a: a, b: b, operator: node.operator)
|
||||
result = IntExpr(kind: intExpr, literal: Token(kind: Integer, lexeme: $z, line: IntExpr(a).literal.line, pos: (start: -1, stop: -1)))
|
||||
elif a.kind == floatExpr or b.kind == floatExpr:
|
||||
var x, y, z: float
|
||||
if a.kind == intExpr:
|
||||
var temp: int
|
||||
assert parseInt(IntExpr(a).literal.lexeme, temp) == IntExpr(a).literal.lexeme.len()
|
||||
x = float(temp)
|
||||
self.newWarning(implicitConversion, a)
|
||||
else:
|
||||
discard parseFloat(FloatExpr(a).literal.lexeme, x)
|
||||
if b.kind == intExpr:
|
||||
var temp: int
|
||||
assert parseInt(IntExpr(b).literal.lexeme, temp) == IntExpr(b).literal.lexeme.len()
|
||||
y = float(temp)
|
||||
self.newWarning(implicitConversion, b)
|
||||
else:
|
||||
discard parseFloat(FloatExpr(b).literal.lexeme, y)
|
||||
# Optimizes float operations
|
||||
try:
|
||||
case node.operator.kind:
|
||||
of Plus:
|
||||
z = x + y
|
||||
of Minus:
|
||||
z = x - y
|
||||
of Asterisk:
|
||||
z = x * y
|
||||
of FloorDiv:
|
||||
z = x / y
|
||||
of DoubleAsterisk:
|
||||
z = pow(x, y)
|
||||
of Percentage:
|
||||
z = x mod y
|
||||
of Slash:
|
||||
z = x / y
|
||||
else:
|
||||
result = BinaryExpr(kind: binaryExpr, a: a, b: b, operator: node.operator)
|
||||
except OverflowDefect:
|
||||
self.newWarning(valueOverflow, node)
|
||||
return BinaryExpr(kind: binaryExpr, a: a, b: b, operator: node.operator)
|
||||
result = FloatExpr(kind: floatExpr, literal: Token(kind: Float, lexeme: $z, line: LiteralExpr(a).literal.line, pos: (start: -1, stop: -1)))
|
||||
elif a.kind == strExpr and b.kind == strExpr:
|
||||
var a = StrExpr(a)
|
||||
var b = StrExpr(b)
|
||||
case node.operator.kind:
|
||||
of Plus:
|
||||
result = StrExpr(kind: strExpr, literal: Token(kind: String, lexeme: "'" & a.literal.lexeme[1..<(^1)] & b.literal.lexeme[1..<(^1)] & "'", pos: (start: -1, stop: -1)))
|
||||
else:
|
||||
result = node
|
||||
elif a.kind == strExpr and self.optimizeNode(b).kind == intExpr and not (self.warnings.len() > 0 and self.warnings[^1].kind == valueOverflow and self.warnings[^1].node == b):
|
||||
var a = StrExpr(a)
|
||||
var b = IntExpr(b)
|
||||
var bb: int
|
||||
assert parseInt(b.literal.lexeme, bb) == b.literal.lexeme.len()
|
||||
case node.operator.kind:
|
||||
of Asterisk:
|
||||
result = StrExpr(kind: strExpr, literal: Token(kind: String, lexeme: "'" & a.literal.lexeme[1..<(^1)].repeat(bb) & "'"))
|
||||
else:
|
||||
result = node
|
||||
elif b.kind == strExpr and self.optimizeNode(a).kind == intExpr and not (self.warnings.len() > 0 and self.warnings[^1].kind == valueOverflow and self.warnings[^1].node == a):
|
||||
var b = StrExpr(b)
|
||||
var a = IntExpr(a)
|
||||
var aa: int
|
||||
assert parseInt(a.literal.lexeme, aa) == a.literal.lexeme.len()
|
||||
case node.operator.kind:
|
||||
of Asterisk:
|
||||
result = StrExpr(kind: strExpr, literal: Token(kind: String, lexeme: "'" & b.literal.lexeme[1..<(^1)].repeat(aa) & "'"))
|
||||
else:
|
||||
result = node
|
||||
else:
|
||||
# There's no constant folding we can do!
|
||||
result = node
|
||||
|
||||
|
||||
proc optimizeNode(self: Optimizer, node: ASTNode): ASTNode =
|
||||
## Analyzes an AST node and attempts to perform
|
||||
## optimizations on it. If no optimizations can be
|
||||
## applied or self.foldConstants is set to false,
|
||||
## then the same node is returned
|
||||
if not self.foldConstants:
|
||||
return node
|
||||
case node.kind:
|
||||
of exprStmt:
|
||||
result = newExprStmt(self.optimizeNode(ExprStmt(node).expression), ExprStmt(node).token)
|
||||
of intExpr, hexExpr, octExpr, binExpr, floatExpr, strExpr:
|
||||
result = self.optimizeConstant(node)
|
||||
of unaryExpr:
|
||||
result = self.optimizeUnary(UnaryExpr(node))
|
||||
of binaryExpr:
|
||||
result = self.optimizeBinary(BinaryExpr(node))
|
||||
of groupingExpr:
|
||||
# Recursively unnests groups
|
||||
result = self.optimizeNode(GroupingExpr(node).expression)
|
||||
of callExpr:
|
||||
var node = CallExpr(node)
|
||||
for i, positional in node.arguments.positionals:
|
||||
node.arguments.positionals[i] = self.optimizeNode(positional)
|
||||
for i, (key, value) in node.arguments.keyword:
|
||||
node.arguments.keyword[i].value = self.optimizeNode(value)
|
||||
result = node
|
||||
of sliceExpr:
|
||||
var node = SliceExpr(node)
|
||||
for i, e in node.ends:
|
||||
node.ends[i] = self.optimizeNode(e)
|
||||
node.slicee = self.optimizeNode(node.slicee)
|
||||
result = node
|
||||
of tryStmt:
|
||||
var node = TryStmt(node)
|
||||
node.body = self.optimizeNode(node.body)
|
||||
if node.finallyClause != nil:
|
||||
node.finallyClause = self.optimizeNode(node.finallyClause)
|
||||
if node.elseClause != nil:
|
||||
node.elseClause = self.optimizeNode(node.elseClause)
|
||||
for i, handler in node.handlers:
|
||||
node.handlers[i].body = self.optimizeNode(node.handlers[i].body)
|
||||
result = node
|
||||
of funDecl:
|
||||
var decl = FunDecl(node)
|
||||
for i, node in decl.defaults:
|
||||
decl.defaults[i] = self.optimizeNode(node)
|
||||
result = decl
|
||||
of blockStmt:
|
||||
var node = BlockStmt(node)
|
||||
for i, n in node.code:
|
||||
node.code[i] = self.optimizeNode(n)
|
||||
result = node
|
||||
of varDecl:
|
||||
var decl = VarDecl(node)
|
||||
decl.value = self.optimizeNode(decl.value)
|
||||
result = decl
|
||||
of assignExpr:
|
||||
var asgn = AssignExpr(node)
|
||||
asgn.value = self.optimizeNode(asgn.value)
|
||||
result = asgn
|
||||
of listExpr:
|
||||
var l = ListExpr(node)
|
||||
for i, e in l.members:
|
||||
l.members[i] = self.optimizeNode(e)
|
||||
result = node
|
||||
of setExpr:
|
||||
var s = SetExpr(node)
|
||||
for i, e in s.members:
|
||||
s.members[i] = self.optimizeNode(e)
|
||||
result = node
|
||||
of tupleExpr:
|
||||
var t = TupleExpr(node)
|
||||
for i, e in t.members:
|
||||
t.members[i] = self.optimizeNode(e)
|
||||
result = node
|
||||
of dictExpr:
|
||||
var d = DictExpr(node)
|
||||
for i, e in d.keys:
|
||||
d.keys[i] = self.optimizeNode(e)
|
||||
for i, e in d.values:
|
||||
d.values[i] = self.optimizeNode(e)
|
||||
result = node
|
||||
else:
|
||||
result = node
|
||||
|
||||
|
||||
proc optimize*(self: Optimizer, tree: seq[ASTNode]): tuple[tree: seq[ASTNode], warnings: seq[Warning]] =
|
||||
## Runs the optimizer on the given source
|
||||
## tree and returns a new optimized tree
|
||||
## as well as a list of warnings that may
|
||||
## be of interest. The input tree may be
|
||||
## identical to the output tree if no optimization
|
||||
## could be performed. Constant folding can be
|
||||
## turned off by setting foldConstants to false
|
||||
## when initializing the optimizer object
|
||||
var newTree: seq[ASTNode] = @[]
|
||||
for node in tree:
|
||||
newTree.add(self.optimizeNode(node))
|
||||
result = (tree: newTree, warnings: self.warnings)
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,273 @@
|
|||
# Copyright 2021 Mattia Giambirtone & All Contributors
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
import meta/ast
|
||||
import meta/errors
|
||||
import meta/bytecode
|
||||
import meta/token
|
||||
import ../config
|
||||
import ../util/multibyte
|
||||
|
||||
import strformat
|
||||
import strutils
|
||||
import nimSHA2
|
||||
import times
|
||||
|
||||
|
||||
export ast
|
||||
|
||||
type
|
||||
Serializer* = ref object
|
||||
file: string
|
||||
filename: string
|
||||
chunk: Chunk
|
||||
Serialized* = ref object
|
||||
## Wrapper returned by
|
||||
## the Serializer.read*
|
||||
## procedures to store
|
||||
## metadata
|
||||
fileHash*: string
|
||||
japlVer*: tuple[major, minor, patch: int]
|
||||
japlBranch*: string
|
||||
commitHash*: string
|
||||
compileDate*: int
|
||||
chunk*: Chunk
|
||||
|
||||
|
||||
proc `$`*(self: Serialized): string =
|
||||
result = &"Serialized(fileHash={self.fileHash}, version={self.japlVer.major}.{self.japlVer.minor}.{self.japlVer.patch}, branch={self.japlBranch}), commitHash={self.commitHash}, date={self.compileDate}, chunk={self.chunk[]}"
|
||||
|
||||
|
||||
proc error(self: Serializer, message: string) =
|
||||
## Raises a formatted SerializationError exception
|
||||
raise newException(SerializationError, &"A fatal error occurred while (de)serializing '{self.filename}' -> {message}")
|
||||
|
||||
|
||||
proc initSerializer*(self: Serializer = nil): Serializer =
|
||||
new(result)
|
||||
if self != nil:
|
||||
result = self
|
||||
result.file = ""
|
||||
result.filename = ""
|
||||
result.chunk = nil
|
||||
|
||||
|
||||
## Basic routines and helpers to convert various objects from and to to their byte representation
|
||||
|
||||
proc toBytes(self: Serializer, s: string): seq[byte] =
|
||||
for c in s:
|
||||
result.add(byte(c))
|
||||
|
||||
|
||||
proc toBytes(self: Serializer, s: int): array[8, uint8] =
|
||||
result = cast[array[8, uint8]](s)
|
||||
|
||||
|
||||
proc toBytes(self: Serializer, d: SHA256Digest): seq[byte] =
|
||||
for b in d:
|
||||
result.add(b)
|
||||
|
||||
|
||||
proc bytesToString(self: Serializer, input: seq[byte]): string =
|
||||
for b in input:
|
||||
result.add(char(b))
|
||||
|
||||
|
||||
proc bytesToInt(self: Serializer, input: array[8, byte]): int =
|
||||
copyMem(result.addr, input.unsafeAddr, sizeof(int))
|
||||
|
||||
|
||||
proc bytesToInt(self: Serializer, input: array[3, byte]): int =
|
||||
copyMem(result.addr, input.unsafeAddr, sizeof(byte) * 3)
|
||||
|
||||
|
||||
proc extend[T](s: var seq[T], a: openarray[T]) =
|
||||
## Extends s with the elements of a
|
||||
for e in a:
|
||||
s.add(e)
|
||||
|
||||
|
||||
proc writeHeaders(self: Serializer, stream: var seq[byte], file: string) =
|
||||
## Writes the JAPL bytecode headers in-place into a byte stream
|
||||
stream.extend(self.toBytes(BYTECODE_MARKER))
|
||||
stream.add(byte(JAPL_VERSION.major))
|
||||
stream.add(byte(JAPL_VERSION.minor))
|
||||
stream.add(byte(JAPL_VERSION.patch))
|
||||
stream.add(byte(len(JAPL_BRANCH)))
|
||||
stream.extend(self.toBytes(JAPL_BRANCH))
|
||||
if len(JAPL_COMMIT_HASH) != 40:
|
||||
self.error("the commit hash must be exactly 40 characters long")
|
||||
stream.extend(self.toBytes(JAPL_COMMIT_HASH))
|
||||
stream.extend(self.toBytes(getTime().toUnixFloat().int()))
|
||||
stream.extend(self.toBytes(computeSHA256(file)))
|
||||
|
||||
|
||||
proc writeConstants(self: Serializer, stream: var seq[byte]) =
|
||||
## Writes the constants table in-place into the given stream
|
||||
for constant in self.chunk.consts:
|
||||
case constant.kind:
|
||||
of intExpr, floatExpr:
|
||||
stream.add(0x1)
|
||||
stream.extend(len(constant.token.lexeme).toTriple())
|
||||
stream.extend(self.toBytes(constant.token.lexeme))
|
||||
of strExpr:
|
||||
stream.add(0x2)
|
||||
var temp: seq[byte] = @[]
|
||||
var strip: int = 2
|
||||
var offset: int = 1
|
||||
case constant.token.lexeme[0]:
|
||||
of 'f':
|
||||
strip = 3
|
||||
inc(offset)
|
||||
temp.add(0x2)
|
||||
of 'b':
|
||||
strip = 3
|
||||
inc(offset)
|
||||
temp.add(0x1)
|
||||
else:
|
||||
strip = 2
|
||||
temp.add(0x0)
|
||||
stream.extend((len(constant.token.lexeme) - strip).toTriple()) # Removes the quotes from the length count as they're not written
|
||||
stream.extend(temp)
|
||||
stream.add(self.toBytes(constant.token.lexeme[offset..^2]))
|
||||
of identExpr:
|
||||
stream.add(0x0)
|
||||
stream.extend(len(constant.token.lexeme).toTriple())
|
||||
stream.add(self.toBytes(constant.token.lexeme))
|
||||
else:
|
||||
self.error(&"unknown constant kind in chunk table ({constant.kind})")
|
||||
stream.add(0x59) # End marker
|
||||
|
||||
|
||||
proc readConstants(self: Serializer, stream: seq[byte]): int =
|
||||
## Reads the constant table from the given stream and
|
||||
## adds each constant to the chunk object (note: most compile-time
|
||||
## information such as the original token objects and line info is lost when
|
||||
## serializing the data, so those fields are set to nil or some default
|
||||
## value). Returns the number of bytes that were processed in the stream
|
||||
var stream = stream
|
||||
var count: int = 0
|
||||
while true:
|
||||
case stream[0]:
|
||||
of 0x59:
|
||||
inc(count)
|
||||
break
|
||||
of 0x2:
|
||||
stream = stream[1..^1]
|
||||
let size = self.bytesToInt([stream[0], stream[1], stream[2]])
|
||||
stream = stream[3..^1]
|
||||
var s = newStrExpr(Token(lexeme: ""))
|
||||
case stream[0]:
|
||||
of 0x0:
|
||||
discard
|
||||
of 0x1:
|
||||
s.token.lexeme.add("b")
|
||||
of 0x2:
|
||||
s.token.lexeme.add("f")
|
||||
else:
|
||||
self.error(&"unknown string modifier in chunk table (0x{stream[0].toHex()}")
|
||||
stream = stream[1..^1]
|
||||
s.token.lexeme.add("\"")
|
||||
for i in countup(0, size - 1):
|
||||
s.token.lexeme.add(cast[char](stream[i]))
|
||||
s.token.lexeme.add("\"")
|
||||
stream = stream[size..^1]
|
||||
self.chunk.consts.add(s)
|
||||
inc(count, size + 5)
|
||||
of 0x1:
|
||||
stream = stream[1..^1]
|
||||
inc(count)
|
||||
let size = self.bytesToInt([stream[0], stream[1], stream[2]])
|
||||
stream = stream[3..^1]
|
||||
inc(count, 3)
|
||||
var tok: Token = new(Token)
|
||||
tok.lexeme = self.bytesToString(stream[0..<size])
|
||||
if "." in tok.lexeme:
|
||||
tok.kind = Float
|
||||
self.chunk.consts.add(newFloatExpr(tok))
|
||||
else:
|
||||
tok.kind = Integer
|
||||
self.chunk.consts.add(newIntExpr(tok))
|
||||
stream = stream[size..^1]
|
||||
inc(count, size)
|
||||
of 0x0:
|
||||
stream = stream[1..^1]
|
||||
let size = self.bytesToInt([stream[0], stream[1], stream[2]])
|
||||
stream = stream[3..^1]
|
||||
discard self.chunk.addConstant(newIdentExpr(Token(lexeme: self.bytesToString(stream[0..<size]))))
|
||||
stream = stream[size..^1]
|
||||
inc(count, size + 4)
|
||||
else:
|
||||
self.error(&"unknown constant kind in chunk table (0x{stream[0].toHex()})")
|
||||
result = count
|
||||
|
||||
|
||||
proc writeCode(self: Serializer, stream: var seq[byte]) =
|
||||
## Writes the bytecode from the given chunk to the given source
|
||||
## stream
|
||||
stream.extend(self.chunk.code.len.toTriple())
|
||||
stream.extend(self.chunk.code)
|
||||
|
||||
|
||||
proc readCode(self: Serializer, stream: seq[byte]): int =
|
||||
## Reads the bytecode from a given stream and writes
|
||||
## it into the given chunk
|
||||
let size = [stream[0], stream[1], stream[2]].fromTriple()
|
||||
var stream = stream[3..^1]
|
||||
for i in countup(0, int(size) - 1):
|
||||
self.chunk.code.add(stream[i])
|
||||
assert len(self.chunk.code) == int(size)
|
||||
return int(size)
|
||||
|
||||
|
||||
proc dumpBytes*(self: Serializer, chunk: Chunk, file, filename: string): seq[byte] =
|
||||
## Dumps the given bytecode and file to a sequence of bytes and returns it.
|
||||
## The file argument must be the actual file's content and is needed to compute its SHA256 hash.
|
||||
self.file = file
|
||||
self.filename = filename
|
||||
self.chunk = chunk
|
||||
self.writeHeaders(result, self.file)
|
||||
self.writeConstants(result)
|
||||
self.writeCode(result)
|
||||
|
||||
|
||||
proc loadBytes*(self: Serializer, stream: seq[byte]): Serialized =
|
||||
## Loads the result from dumpBytes to a Serializer object
|
||||
## for use in the VM or for inspection
|
||||
discard self.initSerializer()
|
||||
new(result)
|
||||
result.chunk = newChunk()
|
||||
self.chunk = result.chunk
|
||||
var stream = stream
|
||||
try:
|
||||
if stream[0..<len(BYTECODE_MARKER)] != self.toBytes(BYTECODE_MARKER):
|
||||
self.error("malformed bytecode marker")
|
||||
stream = stream[len(BYTECODE_MARKER)..^1]
|
||||
result.japlVer = (major: int(stream[0]), minor: int(stream[1]), patch: int(stream[2]))
|
||||
stream = stream[3..^1]
|
||||
let branchLength = stream[0]
|
||||
stream = stream[1..^1]
|
||||
result.japlBranch = self.bytesToString(stream[0..<branchLength])
|
||||
stream = stream[branchLength..^1]
|
||||
result.commitHash = self.bytesToString(stream[0..<40]).toLowerAscii()
|
||||
stream = stream[40..^1]
|
||||
result.compileDate = self.bytesToInt([stream[0], stream[1], stream[2], stream[3], stream[4], stream[5], stream[6], stream[7]])
|
||||
stream = stream[8..^1]
|
||||
result.fileHash = self.bytesToString(stream[0..<32]).toHex().toLowerAscii()
|
||||
stream = stream[32..^1]
|
||||
stream = stream[self.readConstants(stream)..^1]
|
||||
stream = stream[self.readCode(stream)..^1]
|
||||
except IndexDefect:
|
||||
self.error("truncated bytecode file")
|
||||
except AssertionDefect:
|
||||
self.error("corrupted bytecode file")
|
Loading…
Reference in New Issue