Readded frontend

This commit is contained in:
Nocturn9x 2022-01-31 15:14:26 +01:00
parent a545341428
commit 776a2241f7
13 changed files with 4808 additions and 0 deletions

View File

@ -0,0 +1,195 @@
# Copyright 2021 Mattia Giambirtone & All Contributors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Implementation of a custom list data type for JAPL objects (used also internally by the VM)
{.experimental: "implicitDeref".}
import iterable
import ../../memory/allocator
import base
import strformat
type
ArrayList*[T] = object of Iterable
## Implementation of a simple dynamic
## array with amortized O(1) append complexity
## and O(1) complexity when popping/deleting
## the last element
container: ptr UncheckedArray[T]
ArrayListIterator*[T] = object of Iterator
list: ArrayList[T]
current: int
proc newArrayList*[T](): ptr ArrayList[T] =
## Allocates a new, empty array list
result = allocateObj(ArrayList[T], ObjectType.List)
result.capacity = 0
result.container = nil
result.length = 0
proc append*[T](self: ptr ArrayList[T], elem: T) =
## Appends an object to the end of the list
## in amortized constant time (~O(1))
if self.capacity <= self.length:
self.capacity = growCapacity(self.capacity)
self.container = resizeArray(T, self.container, self.length, self.capacity)
self.container[self.length] = elem
self.length += 1
proc pop*[T](self: ptr ArrayList[T], idx: int = -1): T =
## Pops an item from the list. By default, the last
## element is popped, in which case the operation's
## time complexity is O(1). When an arbitrary element
## is popped, the complexity rises to O(k) where k
## is the number of elements that had to be shifted
## by 1 to avoid empty slots
var idx = idx
if self.length == 0:
raise newException(IndexDefect, "pop from empty ArrayList")
if idx == -1:
idx = self.length - 1
if idx notin 0..self.length - 1:
raise newException(IndexDefect, &"ArrayList index out of bounds: {idx} notin 0..{self.length - 1}")
result = self.container[idx]
if idx != self.length - 1:
for i in countup(idx, self.length - 1):
self.container[i] = self.container[i + 1]
self.capacity -= 1
self.length -= 1
proc `[]`*[T](self: ptr ArrayList[T], idx: int): T =
## Retrieves an item from the list, in constant
## time
if self.length == 0:
raise newException(IndexDefect, &"ArrayList index out of bounds: : {idx} notin 0..{self.length - 1}")
if idx notin 0..self.length - 1:
raise newException(IndexDefect, &"ArrayList index out of bounds: {idx} notin 0..{self.length - 1}")
result = self.container[idx]
proc `[]`*[T](self: ptr ArrayList[T], slice: Hslice[int, int]): ptr ArrayList[T] =
## Retrieves a subset of the list, in O(k) time where k is the size
## of the slice
if self.length == 0:
raise newException(IndexDefect, "ArrayList index out of bounds")
if slice.a notin 0..self.length - 1 or slice.b notin 0..self.length:
raise newException(IndexDefect, "ArrayList index out of bounds")
result = newArrayList[T]()
for i in countup(slice.a, slice.b - 1):
result.append(self.container[i])
proc `[]=`*[T](self: ptr ArrayList[T], idx: int, obj: T) =
## Assigns an object to the given index, in constant
## time
if self.length == 0:
raise newException(IndexDefect, "ArrayList is empty")
if idx notin 0..self.length - 1:
raise newException(IndexDefect, "ArrayList index out of bounds")
self.container[idx] = obj
proc delete*[T](self: ptr ArrayList[T], idx: int) =
## Deletes an object from the given index.
## This method shares the time complexity
## of self.pop()
if self.length == 0:
raise newException(IndexDefect, "delete from empty ArrayList")
if idx notin 0..self.length - 1:
raise newException(IndexDefect, &"ArrayList index out of bounds: {idx} notin 0..{self.length - 1}")
discard self.pop(idx)
proc contains*[T](self: ptr ArrayList[T], elem: T): bool =
## Returns true if the given object is present
## in the list, false otherwise. O(n) complexity
if self.length > 0:
for i in 0..self.length - 1:
if self[i] == elem:
return true
return false
proc high*[T](self: ptr ArrayList[T]): int =
## Returns the index of the last
## element in the list, in constant time
if self.length == 0:
raise newException(IndexDefect, "ArrayList is empty")
result = self.length - 1
proc len*[T](self: ptr ArrayList[T]): int =
## Returns the length of the list
## in constant time
result = self.length
iterator pairs*[T](self: ptr ArrayList[T]): tuple[key: int, val: T] =
## Implements pairwise iteration (similar to python's enumerate)
for i in countup(0, self.length - 1):
yield (key: i, val: self[i])
iterator items*[T](self: ptr ArrayList[T]): T =
## Implements iteration
for i in countup(0, self.length - 1):
yield self[i]
proc reversed*[T](self: ptr ArrayList[T], first: int = -1, last: int = 0): ptr ArrayList[T] =
## Returns a reversed version of the given list, from first to last.
## First defaults to -1 (the end of the list) and last defaults to 0 (the
## beginning of the list)
var first = first
if first == -1:
first = self.length - 1
result = newArrayList[T]()
for i in countdown(first, last):
result.append(self[i])
proc extend*[T](self: ptr ArrayList[T], other: seq[T]) =
## Iteratively calls self.append() with the elements
## from a nim sequence
for elem in other:
self.append(elem)
proc extend*[T](self: ptr ArrayList[T], other: ptr ArrayList[T]) =
## Iteratively calls self.append() with the elements
## from another ArrayList
for elem in other:
self.append(elem)
proc `$`*[T](self: ptr ArrayList[T]): string =
## Returns a string representation
## of self
result = "["
if self.length > 0:
for i in 0..self.length - 1:
result = result & $self.container[i]
if i < self.length - 1:
result = result & ", "
result = result & "]"
proc getIter*[T](self: ptr ArrayList[T]): Iterator =
## Returns the iterator object of the
## arraylist
result = allocate(ArrayListIterator, ) # TODO

View File

@ -0,0 +1,60 @@
# Copyright 2021 Mattia Giambirtone & All Contributors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import ../../memory/allocator
type
ObjectType* {.pure.} = enum
## All the possible object types
String, Exception, Function,
Class, Module, BaseObject,
Native, Integer, Float,
Bool, NotANumber, Infinity,
Nil, List, Dict, Set, Tuple
Obj* = object of RootObj
## The base object for all
## JAPL types. Every object
## in JAPL implicitly inherits
## from this base type
kind*: ObjectType
hashValue*: uint64
## Object constructors and allocators
proc allocateObject*(size: int, kind: ObjectType): ptr Obj =
## Wrapper around memory.reallocate to create a new generic JAPL object
result = cast[ptr Obj](reallocate(nil, 0, size))
result.kind = kind
template allocateObj*(kind: untyped, objType: ObjectType): untyped =
## Wrapper around allocateObject to cast a generic object
## to a more specific type
cast[ptr kind](allocateObject(sizeof kind, objType))
proc newObj*(): ptr Obj =
## Allocates a generic JAPL object
result = allocateObj(Obj, ObjectType.BaseObject)
proc asObj*(self: ptr Obj): ptr Obj =
## Casts a specific JAPL object into a generic
## pointer to Obj
result = cast[ptr Obj](self)

View File

@ -0,0 +1,164 @@
# Copyright 2021 Mattia Giambirtone & All Contributors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import ../../memory/allocator
import ../../config
import base
import iterable
type
Entry = object
key: ptr Obj
value: ptr Obj
tombstone: bool
HashMap* = object of Iterable
entries: ptr UncheckedArray[ptr Entry]
actual_length: int
proc newHashMap*(): ptr HashMap =
result = allocateObj(HashMap, ObjectType.Dict)
result.actual_length = 0
result.entries = nil
result.capacity = 0
result.length = 0
proc freeHashMap*(self: ptr HashMap) =
discard freeArray(UncheckedArray[ptr Entry], self.entries, self.capacity)
self.length = 0
self.actual_length = 0
self.capacity = 0
self.entries = nil
proc findEntry(self: ptr UncheckedArray[ptr Entry], key: ptr Obj, capacity: int): ptr Entry =
var capacity = uint64(capacity)
var idx = uint64(key.hash()) mod capacity
while true:
result = self[idx]
if system.`==`(result.key, nil):
break
elif result.tombstone:
if result.key == key:
break
elif result.key == key:
break
idx = (idx + 1) mod capacity
proc adjustCapacity(self: ptr HashMap) =
var newCapacity = growCapacity(self.capacity)
var entries = allocate(UncheckedArray[ptr Entry], Entry, newCapacity)
var oldEntry: ptr Entry
var newEntry: ptr Entry
self.length = 0
for x in countup(0, newCapacity - 1):
entries[x] = allocate(Entry, Entry, 1)
entries[x].tombstone = false
entries[x].key = nil
entries[x].value = nil
for x in countup(0, self.capacity - 1):
oldEntry = self.entries[x]
if not system.`==`(oldEntry.key, nil):
newEntry = entries.findEntry(oldEntry.key, newCapacity)
newEntry.key = oldEntry.key
newEntry.value = oldEntry.value
self.length += 1
discard freeArray(UncheckedArray[ptr Entry], self.entries, self.capacity)
self.entries = entries
self.capacity = newCapacity
proc setEntry(self: ptr HashMap, key: ptr Obj, value: ptr Obj): bool =
if float64(self.length + 1) >= float64(self.capacity) * MAP_LOAD_FACTOR:
self.adjustCapacity()
var entry = findEntry(self.entries, key, self.capacity)
result = system.`==`(entry.key, nil)
if result:
self.actual_length += 1
self.length += 1
entry.key = key
entry.value = value
entry.tombstone = false
proc `[]`*(self: ptr HashMap, key: ptr Obj): ptr Obj =
var entry = findEntry(self.entries, key, self.capacity)
if system.`==`(entry.key, nil) or entry.tombstone:
raise newException(KeyError, "Key not found: " & $key)
result = entry.value
proc `[]=`*(self: ptr HashMap, key: ptr Obj, value: ptr Obj) =
discard self.setEntry(key, value)
proc len*(self: ptr HashMap): int =
result = self.actual_length
proc del*(self: ptr HashMap, key: ptr Obj) =
if self.len() == 0:
raise newException(KeyError, "delete from empty hashmap")
var entry = findEntry(self.entries, key, self.capacity)
if not system.`==`(entry.key, nil):
self.actual_length -= 1
entry.tombstone = true
else:
raise newException(KeyError, "Key not found: " & $key)
proc contains*(self: ptr HashMap, key: ptr Obj): bool =
let entry = findEntry(self.entries, key, self.capacity)
if not system.`==`(entry.key, nil) and not entry.tombstone:
result = true
else:
result = false
iterator keys*(self: ptr HashMap): ptr Obj =
var entry: ptr Entry
for i in countup(0, self.capacity - 1):
entry = self.entries[i]
if not system.`==`(entry.key, nil) and not entry.tombstone:
yield entry.key
iterator values*(self: ptr HashMap): ptr Obj =
for key in self.keys():
yield self[key]
iterator pairs*(self: ptr HashMap): tuple[key: ptr Obj, val: ptr Obj] =
for key in self.keys():
yield (key: key, val: self[key])
iterator items*(self: ptr HashMap): ptr Obj =
for k in self.keys():
yield k
proc `$`*(self: ptr HashMap): string =
var i = 0
result &= "{"
for key, value in self.pairs():
result &= $key & ": " & $value
if i < self.len() - 1:
result &= ", "
i += 1
result &= "}"

View File

@ -0,0 +1,44 @@
# Copyright 2021 Mattia Giambirtone & All Contributors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Implementation of iterable types and iterators in JAPL
import base
type
Iterable* = object of Obj
## Defines the standard interface
## for iterable types in JAPL
length*: int
capacity*: int
Iterator* = object of Iterable
## This object drives iteration
## for every iterable type in JAPL except
## generators
iterable*: ptr Obj
iterCount*: int
proc getIter*(self: Iterable): ptr Iterator =
## Returns the iterator object of an
## iterable, which drives foreach
## loops
return nil
proc next*(self: Iterator): ptr Obj =
## Returns the next element from
## the iterator or nil if the
## iterator has been consumed
return nil

908
src/frontend/compiler.nim Normal file
View File

@ -0,0 +1,908 @@
# Copyright 2021 Mattia Giambirtone & All Contributors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import meta/token
import meta/ast
import meta/errors
import meta/bytecode
import ../config
import ../util/multibyte
import strformat
import algorithm
import parseutils
import sequtils
export ast
export bytecode
export token
export multibyte
type
Name = ref object
## A compile-time wrapper around
## statically resolved names.
## Depth indicates to which scope
## the variable belongs, zero meaning
## the global one
name: IdentExpr
owner: string
depth: int
isPrivate: bool
isConst: bool
Loop = object
## A "loop object" used
## by the compiler to emit
## appropriate jump offsets
## for continue and break
## statements
start: int
depth: int
breakPos: seq[int]
Compiler* = ref object
## A wrapper around the compiler's state
chunk: Chunk
ast: seq[ASTNode]
current: int
file: string
names: seq[Name]
scopeDepth: int
currentFunction: FunDecl
enableOptimizations*: bool
currentLoop: Loop
# Each time a defer statement is
# compiled, its code is emitted
# here. Later, if there is any code
# to defer in the current function,
# funDecl will wrap the function's code
# inside an implicit try/finally block
# and add this code in the finally branch.
# This sequence is emptied each time a
# fun declaration is compiled and stores only
# deferred code for the current function (may
# be empty)
deferred: seq[uint8]
proc initCompiler*(enableOptimizations: bool = true): Compiler =
## Initializes a new Compiler object
new(result)
result.ast = @[]
result.current = 0
result.file = ""
result.names = @[]
result.scopeDepth = 0
result.currentFunction = nil
result.enableOptimizations = enableOptimizations
## Forward declarations
proc expression(self: Compiler, node: ASTNode)
proc statement(self: Compiler, node: ASTNode)
proc declaration(self: Compiler, node: ASTNode)
proc peek(self: Compiler, distance: int = 0): ASTNode
## End of forward declarations
## Public getters for nicer error formatting
proc getCurrentNode*(self: Compiler): ASTNode = (if self.current >=
self.ast.len(): self.ast[^1] else: self.ast[self.current - 1])
## Utility functions
proc peek(self: Compiler, distance: int = 0): ASTNode =
## Peeks at the AST node at the given distance.
## If the distance is out of bounds, the last
## AST node in the tree is returned. A negative
## distance may be used to retrieve previously
## consumed AST nodes
if self.ast.high() == -1 or self.current + distance > self.ast.high() or
self.current + distance < 0:
result = self.ast[^1]
else:
result = self.ast[self.current + distance]
proc done(self: Compiler): bool =
## Returns true if the compiler is done
## compiling, false otherwise
result = self.current > self.ast.high()
proc error(self: Compiler, message: string) =
## Raises a formatted CompileError exception
var tok = self.getCurrentNode().token
raise newException(CompileError, &"A fatal error occurred while compiling '{self.file}', line {tok.line} at '{tok.lexeme}' -> {message}")
proc step(self: Compiler): ASTNode =
## Steps to the next node and returns
## the consumed one
result = self.peek()
if not self.done():
self.current += 1
proc emitByte(self: Compiler, byt: OpCode|uint8) =
## Emits a single byte, writing it to
## the current chunk being compiled
when DEBUG_TRACE_COMPILER:
echo &"DEBUG - Compiler: Emitting {$byt}"
self.chunk.write(uint8 byt, self.peek().token.line)
proc emitBytes(self: Compiler, byt1: OpCode|uint8, byt2: OpCode|uint8) =
## Emits multiple bytes instead of a single one, this is useful
## to emit operators along with their operands or for multi-byte
## instructions that are longer than one byte
self.emitByte(uint8 byt1)
self.emitByte(uint8 byt2)
proc emitBytes(self: Compiler, bytarr: array[2, uint8]) =
## Handy helper method to write an array of 2 bytes into
## the current chunk, calling emitByte on each of its
## elements
self.emitBytes(bytarr[0], bytarr[1])
proc emitBytes(self: Compiler, bytarr: array[3, uint8]) =
## Handy helper method to write an array of 3 bytes into
## the current chunk, calling emitByte on each of its
## elements
self.emitBytes(bytarr[0], bytarr[1])
self.emitByte(bytarr[2])
proc makeConstant(self: Compiler, val: ASTNode): array[3, uint8] =
## Adds a constant to the current chunk's constant table
## and returns its index as a 3-byte array of uint8s
result = self.chunk.addConstant(val)
proc emitConstant(self: Compiler, obj: ASTNode) =
## Emits a LoadConstant instruction along
## with its operand
self.emitByte(LoadConstant)
self.emitBytes(self.makeConstant(obj))
proc identifierConstant(self: Compiler, identifier: IdentExpr): array[3, uint8] =
## Emits an identifier name as a string in the current chunk's constant
## table. This is used to load globals declared as dynamic that cannot
## be resolved statically by the compiler
try:
result = self.makeConstant(identifier)
except CompileError:
self.error(getCurrentExceptionMsg())
proc emitJump(self: Compiler, opcode: OpCode): int =
## Emits a dummy jump offset to be patched later. Assumes
## the largest offset (emits 4 bytes, one for the given jump
## opcode, while the other 3 are for the jump offset which is set
## to the maximum unsigned 24 bit integer). If the shorter
## 16 bit alternative is later found to be better suited, patchJump
## will fix this. This function returns the absolute index into the
## chunk's bytecode array where the given placeholder instruction was written
self.emitByte(opcode)
self.emitBytes((0xffffff).toTriple())
result = self.chunk.code.len() - 4
proc patchJump(self: Compiler, offset: int) =
## Patches a previously emitted jump
## using emitJump. Since emitJump assumes
## a long jump, this also shrinks the jump
## offset and changes the bytecode instruction if possible
## (i.e. jump is in 16 bit range), but the converse is also
## true (i.e. it might change a regular jump into a long one)
let jump: int = self.chunk.code.len() - offset - 4
if jump > 16777215:
self.error("cannot jump more than 16777215 bytecode instructions")
if jump < uint16.high().int:
case OpCode(self.chunk.code[offset]):
of LongJumpForwards:
self.chunk.code[offset] = JumpForwards.uint8()
of LongJumpBackwards:
self.chunk.code[offset] = JumpBackwards.uint8()
of LongJumpIfFalse:
self.chunk.code[offset] = JumpIfFalse.uint8()
of LongJumpIfFalsePop:
self.chunk.code[offset] = JumpIfFalsePop.uint8()
else:
self.error(&"invalid opcode {self.chunk.code[offset]} in patchJump (This is an internal error and most likely a bug)")
self.chunk.code.delete(offset + 1) # Discards the 24 bit integer
let offsetArray = jump.toDouble()
self.chunk.code[offset + 1] = offsetArray[0]
self.chunk.code[offset + 2] = offsetArray[1]
else:
case OpCode(self.chunk.code[offset]):
of JumpForwards:
self.chunk.code[offset] = LongJumpForwards.uint8()
of JumpBackwards:
self.chunk.code[offset] = LongJumpBackwards.uint8()
of JumpIfFalse:
self.chunk.code[offset] = LongJumpIfFalse.uint8()
of JumpIfFalsePop:
self.chunk.code[offset] = LongJumpIfFalsePop.uint8()
else:
self.error(&"invalid opcode {self.chunk.code[offset]} in patchJump (This is an internal error and most likely a bug)")
let offsetArray = jump.toTriple()
self.chunk.code[offset + 1] = offsetArray[0]
self.chunk.code[offset + 2] = offsetArray[1]
self.chunk.code[offset + 3] = offsetArray[2]
## End of utility functions
proc literal(self: Compiler, node: ASTNode) =
## Emits instructions for literals such
## as singletons, strings, numbers and
## collections
case node.kind:
of trueExpr:
self.emitByte(OpCode.True)
of falseExpr:
self.emitByte(OpCode.False)
of nilExpr:
self.emitByte(OpCode.Nil)
of infExpr:
self.emitByte(OpCode.Inf)
of nanExpr:
self.emitByte(OpCode.Nan)
of strExpr:
self.emitConstant(node)
# The optimizer will emit warning
# for overflowing numbers. Here, we
# treat them as errors
of intExpr:
var x: int
var y = IntExpr(node)
try:
assert parseInt(y.literal.lexeme, x) == len(y.literal.lexeme)
except ValueError:
self.error("integer value out of range")
self.emitConstant(y)
# Even though most likely the optimizer
# will collapse all these other literals
# to nodes of kind intExpr, that can be
# disabled. This also allows us to catch
# basic overflow errors before running any code
of hexExpr:
var x: int
var y = HexExpr(node)
try:
assert parseHex(y.literal.lexeme, x) == len(y.literal.lexeme)
except ValueError:
self.error("integer value out of range")
self.emitConstant(newIntExpr(Token(lexeme: $x, line: y.token.line,
pos: (start: y.token.pos.start, stop: y.token.pos.start +
len($x)))))
of binExpr:
var x: int
var y = BinExpr(node)
try:
assert parseBin(y.literal.lexeme, x) == len(y.literal.lexeme)
except ValueError:
self.error("integer value out of range")
self.emitConstant(newIntExpr(Token(lexeme: $x, line: y.token.line,
pos: (start: y.token.pos.start, stop: y.token.pos.start +
len($x)))))
of octExpr:
var x: int
var y = OctExpr(node)
try:
assert parseOct(y.literal.lexeme, x) == len(y.literal.lexeme)
except ValueError:
self.error("integer value out of range")
self.emitConstant(newIntExpr(Token(lexeme: $x, line: y.token.line,
pos: (start: y.token.pos.start, stop: y.token.pos.start +
len($x)))))
of floatExpr:
var x: float
var y = FloatExpr(node)
try:
assert parseFloat(y.literal.lexeme, x) == len(y.literal.lexeme)
except ValueError:
self.error("floating point value out of range")
self.emitConstant(y)
of listExpr:
var y = ListExpr(node)
for member in y.members:
self.expression(member)
self.emitByte(BuildList)
self.emitBytes(y.members.len().toTriple()) # 24-bit integer, meaning list literals can have up to 2^24 elements
of tupleExpr:
var y = TupleExpr(node)
for member in y.members:
self.expression(member)
self.emitByte(BuildTuple)
self.emitBytes(y.members.len().toTriple())
of setExpr:
var y = SetExpr(node)
for member in y.members:
self.expression(member)
self.emitByte(BuildSet)
self.emitBytes(y.members.len().toTriple())
of dictExpr:
var y = DictExpr(node)
for (key, value) in zip(y.keys, y.values):
self.expression(key)
self.expression(value)
self.emitByte(BuildDict)
self.emitBytes(y.keys.len().toTriple())
of awaitExpr:
var y = AwaitExpr(node)
self.expression(y.awaitee)
self.emitByte(OpCode.Await)
else:
self.error(&"invalid AST node of kind {node.kind} at literal(): {node} (This is an internal error and most likely a bug)")
proc unary(self: Compiler, node: UnaryExpr) =
## Compiles unary expressions such as negation or
## bitwise inversion
self.expression(node.a) # Pushes the operand onto the stack
case node.operator.kind:
of Minus:
self.emitByte(UnaryNegate)
of Plus:
discard # Unary + does nothing
of TokenType.LogicalNot:
self.emitByte(OpCode.LogicalNot)
of Tilde:
self.emitByte(UnaryNot)
else:
self.error(&"invalid AST node of kind {node.kind} at unary(): {node} (This is an internal error and most likely a bug)")
proc binary(self: Compiler, node: BinaryExpr) =
## Compiles all binary expressions
# These two lines prepare the stack by pushing the
# opcode's operands onto it
self.expression(node.a)
self.expression(node.b)
case node.operator.kind:
of Plus:
self.emitByte(BinaryAdd)
of Minus:
self.emitByte(BinarySubtract)
of Asterisk:
self.emitByte(BinaryMultiply)
of DoubleAsterisk:
self.emitByte(BinaryPow)
of Percentage:
self.emitByte(BinaryMod)
of FloorDiv:
self.emitByte(BinaryFloorDiv)
of Slash:
self.emitByte(BinaryDivide)
of Ampersand:
self.emitByte(BinaryAnd)
of Caret:
self.emitByte(BinaryXor)
of Pipe:
self.emitByte(BinaryOr)
of As:
self.emitByte(BinaryAs)
of Is:
self.emitByte(BinaryIs)
of IsNot:
self.emitByte(BinaryIsNot)
of Of:
self.emitByte(BinaryOf)
of RightShift:
self.emitByte(BinaryShiftRight)
of LeftShift:
self.emitByte(BinaryShiftLeft)
of TokenType.LessThan:
self.emitByte(OpCode.LessThan)
of TokenType.GreaterThan:
self.emitByte(OpCode.GreaterThan)
of TokenType.DoubleEqual:
self.emitByte(EqualTo)
of TokenType.LessOrEqual:
self.emitByte(OpCode.LessOrEqual)
of TokenType.GreaterOrEqual:
self.emitByte(OpCode.GreaterOrEqual)
of TokenType.LogicalAnd:
self.expression(node.a)
let jump = self.emitJump(JumpIfFalse)
self.emitByte(Pop)
self.expression(node.b)
self.patchJump(jump)
of TokenType.LogicalOr:
self.expression(node.a)
let jump = self.emitJump(JumpIfTrue)
self.expression(node.b)
self.patchJump(jump)
# TODO: In-place operations
else:
self.error(&"invalid AST node of kind {node.kind} at binary(): {node} (This is an internal error and most likely a bug)")
proc declareName(self: Compiler, node: ASTNode) =
## Compiles all name declarations (constants, static,
## and dynamic)
case node.kind:
of varDecl:
var node = VarDecl(node)
if not node.isStatic:
# This emits code for dynamically-resolved variables (i.e. globals declared as dynamic and unresolvable names)
self.emitByte(DeclareName)
self.emitBytes(self.identifierConstant(IdentExpr(node.name)))
else:
# Statically resolved variable here. Only creates a new Name entry
# so that self.identifier emits the proper stack offset
if self.names.high() > 16777215:
# If someone ever hits this limit in real-world scenarios, I swear I'll
# slap myself 100 times with a sign saying "I'm dumb". Mark my words
self.error("cannot declare more than 16777215 static variables at a time")
self.names.add(Name(depth: self.scopeDepth, name: IdentExpr(node.name),
isPrivate: node.isPrivate,
owner: node.owner,
isConst: node.isConst))
else:
discard # TODO: Classes, functions
proc varDecl(self: Compiler, node: VarDecl) =
## Compiles variable declarations
self.expression(node.value)
self.declareName(node)
proc resolveStatic(self: Compiler, name: IdentExpr,
depth: int = self.scopeDepth): Name =
## Traverses self.staticNames backwards and returns the
## first name object with the given name at the given
## depth. The default depth is the current one. Returns
## nil when the name can't be found
for obj in reversed(self.names):
if obj.name.token.lexeme == name.token.lexeme and obj.depth == depth:
return obj
return nil
proc deleteStatic(self: Compiler, name: IdentExpr,
depth: int = self.scopeDepth) =
## Traverses self.staticNames backwards and returns the
## deletes name object with the given name at the given
## depth. The default depth is the current one. Does
## nothing when the name can't be found
for i, obj in reversed(self.names):
if obj.name.token.lexeme == name.token.lexeme and obj.depth == depth:
self.names.del(i)
proc getStaticIndex(self: Compiler, name: IdentExpr): int =
## Gets the predicted stack position of the given variable
## if it is static, returns -1 if it is to be bound dynamically
## or it does not exist at all
var i: int = self.names.high()
for variable in reversed(self.names):
if name.name.lexeme == variable.name.name.lexeme:
return i
dec(i)
return -1
proc identifier(self: Compiler, node: IdentExpr) =
## Compiles access to identifiers
let s = self.resolveStatic(node)
if s != nil and s.isConst:
# Constants are emitted as, you guessed it, constant instructions
# no matter the scope depth. Also, name resolution specifiers do not
# apply to them (because what would it mean for a constant to be dynamic
# anyway?)
self.emitConstant(node)
else:
let index = self.getStaticIndex(node)
if index != -1:
self.emitByte(LoadFast) # Static name resolution, loads value at index in the stack
self.emitBytes(index.toTriple())
else:
self.emitByte(LoadName) # Resolves by name, at runtime, in a global hashmap
self.emitBytes(self.identifierConstant(node))
proc assignment(self: Compiler, node: ASTNode) =
## Compiles assignment expressions
case node.kind:
of assignExpr:
var node = AssignExpr(node)
var name = IdentExpr(node.name)
let r = self.resolveStatic(name)
if r != nil and r.isConst:
self.error("cannot assign to constant")
self.expression(node.value)
let index = self.getStaticIndex(name)
case node.token.kind:
of InplaceAdd:
self.emitByte(BinaryAdd)
of InplaceSub:
self.emitByte(BinarySubtract)
of InplaceDiv:
self.emitByte(BinaryDivide)
of InplaceMul:
self.emitByte(BinaryMultiply)
of InplacePow:
self.emitByte(BinaryPow)
of InplaceFloorDiv:
self.emitByte(BinaryFloorDiv)
of InplaceMod:
self.emitByte(BinaryMod)
of InplaceAnd:
self.emitByte(BinaryAnd)
of InplaceXor:
self.emitByte(BinaryXor)
of InplaceRightShift:
self.emitByte(BinaryShiftRight)
of InplaceLeftShift:
self.emitByte(BinaryShiftLeft)
else:
discard # Unreachable
# In-place operators just change
# what values is set to a given
# stack offset/name, so we only
# need to perform the operation
# as usual and then store it.
# TODO: A better optimization would
# be to have everything in one opcode,
# but that requires variants for stack,
# heap, and closure variables
if index != -1:
self.emitByte(StoreFast)
self.emitBytes(index.toTriple())
else:
# Assignment only encompasses variable assignments
# so we can ensure the name is a constant (i.e. an
# IdentExpr) instead of an object (which would be
# the case with setItemExpr)
self.emitByte(StoreName)
self.emitBytes(self.makeConstant(name))
of setItemExpr:
discard
# TODO
else:
self.error(&"invalid AST node of kind {node.kind} at assignment(): {node} (This is an internal error and most likely a bug)")
proc beginScope(self: Compiler) =
## Begins a new local scope by incrementing the current
## scope's depth
inc(self.scopeDepth)
proc endScope(self: Compiler) =
## Ends the current local scope
if self.scopeDepth < 0:
self.error("cannot call endScope with scopeDepth < 0 (This is an internal error and most likely a bug)")
var popped: int = 0
for ident in reversed(self.names):
if ident.depth > self.scopeDepth:
inc(popped)
if not self.enableOptimizations:
# All variables with a scope depth larger than the current one
# are now out of scope. Begone, you're now homeless!
self.emitByte(Pop)
if self.enableOptimizations and popped > 1:
# If we're popping less than 65535 variables, then
# we can emit a PopN instruction. This is true for
# 99.99999% of the use cases of the language (who the
# hell is going to use 65 THOUSAND local variables?), but
# if you'll ever use more then JAPL will emit a PopN instruction
# for the first 65 thousand and change local variables and then
# emit another batch of plain ol' Pop instructions for the rest
if popped <= uint16.high().int():
self.emitByte(PopN)
self.emitBytes(popped.toTriple())
else:
self.emitByte(PopN)
self.emitBytes(uint16.high().int.toTriple())
for i in countdown(self.names.high(), popped - uint16.high().int()):
if self.names[i].depth > self.scopeDepth:
self.emitByte(Pop)
elif popped == 1:
# We only emit PopN if we're popping more than one value
self.emitByte(Pop)
for _ in countup(0, popped - 1):
discard self.names.pop()
dec(self.scopeDepth)
proc blockStmt(self: Compiler, node: BlockStmt) =
## Compiles block statements, which create a new
## local scope.
self.beginScope()
for decl in node.code:
self.declaration(decl)
self.endScope()
proc ifStmt(self: Compiler, node: IfStmt) =
## Compiles if/else statements for conditional
## execution of code
self.expression(node.condition)
var jumpCode: OpCode
if self.enableOptimizations:
jumpCode = JumpIfFalsePop
else:
jumpCode = JumpIfFalse
let jump = self.emitJump(jumpCode)
if not self.enableOptimizations:
self.emitByte(Pop)
self.statement(node.thenBranch)
self.patchJump(jump)
if node.elseBranch != nil:
let jump = self.emitJump(JumpForwards)
self.statement(node.elseBranch)
self.patchJump(jump)
proc emitLoop(self: Compiler, begin: int) =
## Emits a JumpBackwards instruction with the correct
## jump offset
var offset: int
case OpCode(self.chunk.code[begin + 1]): # The jump instruction
of LongJumpForwards, LongJumpBackwards, LongJumpIfFalse,
LongJumpIfFalsePop, LongJumpIfTrue:
offset = self.chunk.code.len() - begin + 4
else:
offset = self.chunk.code.len() - begin
if offset > uint16.high().int:
if offset > 16777215:
self.error("cannot jump more than 16777215 bytecode instructions")
self.emitByte(LongJumpBackwards)
self.emitBytes(offset.toTriple())
else:
self.emitByte(JumpBackwards)
self.emitBytes(offset.toDouble())
proc whileStmt(self: Compiler, node: WhileStmt) =
## Compiles C-style while loops
let start = self.chunk.code.len()
self.expression(node.condition)
let jump = self.emitJump(JumpIfFalsePop)
self.statement(node.body)
self.patchJump(jump)
self.emitLoop(start)
proc expression(self: Compiler, node: ASTNode) =
## Compiles all expressions
case node.kind:
of getItemExpr:
discard
# Note that for setItem and assign we don't convert
# the node to its true type because that type information
# would be lost in the call anyway. The differentiation
# happens in self.assignment
of setItemExpr, assignExpr:
self.assignment(node)
of identExpr:
self.identifier(IdentExpr(node))
of unaryExpr:
# Unary expressions such as ~5 and -3
self.unary(UnaryExpr(node))
of groupingExpr:
# Grouping expressions like (2 + 1)
self.expression(GroupingExpr(node).expression)
of binaryExpr:
# Binary expressions such as 2 ^ 5 and 0.66 * 3.14
self.binary(BinaryExpr(node))
of intExpr, hexExpr, binExpr, octExpr, strExpr, falseExpr, trueExpr,
infExpr, nanExpr, floatExpr, nilExpr,
tupleExpr, setExpr, listExpr, dictExpr:
# Since all of these AST nodes mostly share
# the same overall structure, and the kind
# discriminant is enough to tell one
# from the other, why bother with
# specialized cases when one is enough?
self.literal(node)
else:
self.error(&"invalid AST node of kind {node.kind} at expression(): {node} (This is an internal error and most likely a bug)")
proc delStmt(self: Compiler, node: ASTNode) =
## Compiles del statements, which unbind
## a name from the current scope
case node.kind:
of identExpr:
var node = IdentExpr(node)
let i = self.getStaticIndex(node)
if i != -1:
self.emitByte(DeleteFast)
self.emitBytes(i.toTriple())
self.deleteStatic(node)
else:
self.emitByte(DeleteName)
self.emitBytes(self.identifierConstant(node))
else:
discard # The parser already handles the other cases
proc awaitStmt(self: Compiler, node: AwaitStmt) =
## Compiles await statements. An await statement
## is like an await expression, but parsed in the
## context of statements for usage outside expressions,
## meaning it can be used standalone. It's basically the
## same as an await expression followed by a semicolon.
## Await expressions are the only native construct to
## run coroutines from within an already asynchronous
## loop (which should be orchestrated by an event loop).
## They block in the caller until the callee returns
self.expression(node.awaitee)
self.emitByte(OpCode.Await)
proc deferStmt(self: Compiler, node: DeferStmt) =
## Compiles defer statements. A defer statement
## is executed right before the function exits
## (either because of a return or an exception)
let current = self.chunk.code.len
self.expression(node.deferred)
for i in countup(current, self.chunk.code.high()):
self.deferred.add(self.chunk.code[i])
self.chunk.code.del(i)
proc returnStmt(self: Compiler, node: ReturnStmt) =
## Compiles return statements. An empty return
## implicitly returns nil
self.expression(node.value)
self.emitByte(OpCode.Return)
proc yieldStmt(self: Compiler, node: YieldStmt) =
## Compiles yield statements
self.expression(node.expression)
self.emitByte(OpCode.Yield)
proc raiseStmt(self: Compiler, node: RaiseStmt) =
## Compiles yield statements
self.expression(node.exception)
self.emitByte(OpCode.Raise)
proc continueStmt(self: Compiler, node: ContinueStmt) =
## Compiles continue statements. A continue statements
## jumps to the next iteration in a loop
if self.currentLoop.start <= 65535:
self.emitByte(Jump)
self.emitBytes(self.currentLoop.start.toDouble())
else:
self.emitByte(LongJump)
self.emitBytes(self.currentLoop.start.toTriple())
proc breakStmt(self: Compiler, node: BreakStmt) =
## Compiles break statements. A continue statement
## jumps to the next iteration in a loop
# Emits dummy jump offset, this is
# patched later
discard self.emitJump(OpCode.Break)
self.currentLoop.breakPos.add(self.chunk.code.high() - 4)
if self.currentLoop.depth > self.scopeDepth:
# Breaking out of a loop closes its scope
self.endScope()
proc patchBreaks(self: Compiler) =
## Patches "break" opcodes with
## actual jumps. This is needed
## because the size of code
## to skip is not known before
## the loop is fully compiled
for brk in self.currentLoop.breakPos:
self.chunk.code[brk] = JumpForwards.uint8()
self.patchJump(brk)
proc assertStmt(self: Compiler, node: AssertStmt) =
## Compiles assert statements (raise
## AssertionError if the expression is falsey)
self.expression(node.expression)
self.emitByte(OpCode.Assert)
proc statement(self: Compiler, node: ASTNode) =
## Compiles all statements
case node.kind:
of exprStmt:
self.expression(ExprStmt(node).expression)
self.emitByte(Pop) # Expression statements discard their value. Their main use case is side effects in function calls
of NodeKind.ifStmt:
self.ifStmt(IfStmt(node))
of NodeKind.delStmt:
self.delStmt(DelStmt(node).name)
of NodeKind.assertStmt:
self.assertStmt(AssertStmt(node))
of NodeKind.raiseStmt:
self.raiseStmt(RaiseStmt(node))
of NodeKind.breakStmt:
self.breakStmt(BreakStmt(node))
of NodeKind.continueStmt:
self.continueStmt(ContinueStmt(node))
of NodeKind.returnStmt:
self.returnStmt(ReturnStmt(node))
of NodeKind.importStmt:
discard
of NodeKind.fromImportStmt:
discard
of NodeKind.whileStmt, NodeKind.forStmt:
## Our parser already desugars for loops to
## while loops!
let loop = self.currentLoop
self.currentLoop = Loop(start: self.chunk.code.len(),
depth: self.scopeDepth, breakPos: @[])
self.whileStmt(WhileStmt(node))
self.patchBreaks()
self.currentLoop = loop
of NodeKind.forEachStmt:
discard
of NodeKind.blockStmt:
self.blockStmt(BlockStmt(node))
of NodeKind.yieldStmt:
self.yieldStmt(YieldStmt(node))
of NodeKind.awaitStmt:
self.awaitStmt(AwaitStmt(node))
of NodeKind.deferStmt:
self.deferStmt(DeferStmt(node))
of NodeKind.tryStmt:
discard
else:
self.expression(node)
proc declaration(self: Compiler, node: ASTNode) =
## Compiles all declarations
case node.kind:
of NodeKind.varDecl:
self.varDecl(VarDecl(node))
of funDecl, classDecl:
discard # TODO
else:
self.statement(node)
proc compile*(self: Compiler, ast: seq[ASTNode], file: string): Chunk =
## Compiles a sequence of AST nodes into a chunk
## object
self.chunk = newChunk()
self.ast = ast
self.file = file
self.names = @[]
self.scopeDepth = 0
self.currentFunction = nil
self.current = 0
while not self.done():
self.declaration(self.step())
if self.ast.len() > 0:
# *Technically* an empty program is a valid program
self.endScope()
self.emitByte(OpCode.Return) # Exits the VM's main loop when used at the global scope
result = self.chunk
if self.ast.len() > 0 and self.scopeDepth != -1:
self.error(&"internal error: invalid scopeDepth state (expected -1, got {self.scopeDepth}), did you forget to call endScope/beginScope?")

552
src/frontend/lexer.nim Normal file
View File

@ -0,0 +1,552 @@
# Copyright 2021 Mattia Giambirtone & All Contributors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
## A simple and modular tokenizer implementation with arbitrary lookahead
import strutils
import parseutils
import strformat
import tables
import meta/token
import meta/errors
export token # Makes Token available when importing the lexer module
export errors
# Tables of all character tokens that are not keywords
# Table of all single-character tokens
const tokens = to_table({
'(': LeftParen, ')': RightParen,
'{': LeftBrace, '}': RightBrace,
'.': Dot, ',': Comma, '-': Minus,
'+': Plus, '*': Asterisk,
'>': GreaterThan, '<': LessThan, '=': Equal,
'~': Tilde, '/': Slash, '%': Percentage,
'[': LeftBracket, ']': RightBracket,
':': Colon, '^': Caret, '&': Ampersand,
'|': Pipe, ';': Semicolon})
# Table of all double-character tokens
const double = to_table({"**": DoubleAsterisk,
">>": RightShift,
"<<": LeftShift,
"==": DoubleEqual,
"!=": NotEqual,
">=": GreaterOrEqual,
"<=": LessOrEqual,
"//": FloorDiv,
"+=": InplaceAdd,
"-=": InplaceSub,
"/=": InplaceDiv,
"*=": InplaceMul,
"^=": InplaceXor,
"&=": InplaceAnd,
"|=": InplaceOr,
"%=": InplaceMod,
})
# Table of all triple-character tokens
const triple = to_table({"//=": InplaceFloorDiv,
"**=": InplacePow,
">>=": InplaceRightShift,
"<<=": InplaceLeftShift
})
# Constant table storing all the reserved keywords (which are parsed as identifiers)
const keywords = to_table({
"fun": Fun, "raise": Raise,
"if": If, "else": Else,
"for": For, "while": While,
"var": Var, "nil": Nil,
"true": True, "false": False,
"return": Return, "break": Break,
"continue": Continue, "inf": Infinity,
"nan": NotANumber, "is": Is,
"lambda": Lambda, "class": Class,
"async": Async, "import": Import,
"isnot": IsNot, "from": From,
"const": Const, "not": LogicalNot,
"assert": Assert, "or": LogicalOr,
"and": LogicalAnd, "del": Del,
"async": Async, "await": Await,
"foreach": Foreach, "yield": Yield,
"private": Private, "public": Public,
"static": Static, "dynamic": Dynamic,
"as": As, "of": Of, "defer": Defer,
"except": Except, "finally": Finally,
"try": Try
})
type
Lexer* = ref object
## A lexer object
source: string
tokens: seq[Token]
line: int
start: int
current: int
file: string
lines: seq[tuple[start, stop: int]]
lastLine: int
# Simple public getters
proc getStart*(self: Lexer): int = self.start
proc getCurrent*(self: Lexer): int = self.current
proc getLine*(self: Lexer): int = self.line
proc getSource*(self: Lexer): string = self.source
proc getRelPos*(self: Lexer, line: int): tuple[start, stop: int] = (if line > 1: self.lines[line - 2] else: (start: 0, stop: self.current))
proc initLexer*(self: Lexer = nil): Lexer =
## Initializes the lexer or resets
## the state of an existing one
new(result)
if self != nil:
result = self
result.source = ""
result.tokens = @[]
result.line = 1
result.start = 0
result.current = 0
result.file = ""
result.lines = @[]
result.lastLine = 0
proc done(self: Lexer): bool =
## Returns true if we reached EOF
result = self.current >= self.source.len
proc incLine(self: Lexer) =
## Increments the lexer's line
## and updates internal line
## metadata
self.lines.add((start: self.lastLine, stop: self.current))
self.line += 1
self.lastLine = self.current
proc step(self: Lexer, n: int = 1): char =
## Steps n characters forward in the
## source file (default = 1). A null
## terminator is returned if the lexer
## is at EOF. Note that only the first
## consumed character token is returned,
## the other ones are skipped over
if self.done():
return '\0'
self.current = self.current + n
result = self.source[self.current - n]
proc peek(self: Lexer, distance: int = 0): char =
## Returns the character in the source file at
## the given distance without consuming it.
## A null terminator is returned if the lexer
## is at EOF. The distance parameter may be
## negative to retrieve previously consumed
## tokens, while the default distance is 0
## (retrieves the next token to be consumed).
## If the given distance goes beyond EOF, a
## null terminator is returned
if self.done() or self.current + distance > self.source.high():
result = '\0'
else:
result = self.source[self.current + distance]
proc error(self: Lexer, message: string) =
## Raises a lexing error with a formatted
## error message
raise newException(LexingError, &"A fatal error occurred while parsing '{self.file}', line {self.line} at '{self.peek()}' -> {message}")
proc check(self: Lexer, what: char, distance: int = 0): bool =
## Behaves like match, without consuming the
## token. False is returned if we're at EOF
## regardless of what the token to check is.
## The distance is passed directly to self.peek()
if self.done():
return false
return self.peek(distance) == what
proc check(self: Lexer, what: string): bool =
## Calls self.check() in a loop with
## each character from the given source
## string. Useful to check multi-character
## strings in one go
for i, chr in what:
# Why "i" you ask? Well, since check
# does not consume the tokens it checks
# against we need some way of keeping
# track where we are in the string the
# caller gave us, otherwise this will
# not behave as expected
if not self.check(chr, i):
return false
return true
proc check(self: Lexer, what: openarray[char]): bool =
## Calls self.check() in a loop with
## each character from the given seq of
## char and returns at the first match.
## Useful to check multiple tokens in a situation
## where only one of them may match at one time
for chr in what:
if self.check(chr):
return true
return false
proc match(self: Lexer, what: char): bool =
## Returns true if the next character matches
## the given character, and consumes it.
## Otherwise, false is returned
if self.done():
self.error("unexpected EOF")
return false
elif not self.check(what):
self.error(&"expecting '{what}', got '{self.peek()}' instead")
return false
self.current += 1
return true
proc match(self: Lexer, what: string): bool =
## Calls self.match() in a loop with
## each character from the given source
## string. Useful to match multi-character
## strings in one go
for chr in what:
if not self.match(chr):
return false
return true
proc createToken(self: Lexer, tokenType: TokenType) =
## Creates a token object and adds it to the token
## list
var tok: Token = new(Token)
tok.kind = tokenType
tok.lexeme = self.source[self.start..<self.current]
tok.line = self.line
tok.pos = (start: self.start, stop: self.current)
self.tokens.add(tok)
proc parseEscape(self: Lexer) =
# Boring escape sequence parsing. For more info check out
# https://en.wikipedia.org/wiki/Escape_sequences_in_C.
# As of now, \u and \U are not supported, but they'll
# likely be soon. Another notable limitation is that
# \xhhh and \nnn are limited to the size of a char
# (i.e. uint8, or 256 values)
case self.peek():
of 'a':
self.source[self.current] = cast[char](0x07)
of 'b':
self.source[self.current] = cast[char](0x7f)
of 'e':
self.source[self.current] = cast[char](0x1B)
of 'f':
self.source[self.current] = cast[char](0x0C)
of 'n':
when defined(windows):
# We natively convert LF to CRLF on Windows, and
# gotta thank Microsoft for the extra boilerplate!
self.source[self.current] = cast[char](0x0D)
self.source.insert(self.current + 1, 0X0A)
when defined(darwin):
# Thanks apple, lol
self.source[self.current] = cast[char](0x0A)
when defined(linux):
self.source[self.current] = cast[char](0X0D)
of 'r':
self.source[self.current] = cast[char](0x0D)
of 't':
self.source[self.current] = cast[char](0x09)
of 'v':
self.source[self.current] = cast[char](0x0B)
of '"':
self.source[self.current] = '"'
of '\'':
self.source[self.current] = '\''
of '\\':
self.source[self.current] = cast[char](0x5C)
of '0'..'9':
var code = ""
var value = 0
var i = self.current
while i < self.source.high() and (let c = self.source[
i].toLowerAscii(); c in '0'..'7') and len(code) < 3:
code &= self.source[i]
i += 1
assert parseOct(code, value) == code.len()
if value > uint8.high().int:
self.error("escape sequence value too large (> 255)")
self.source[self.current] = cast[char](value)
of 'u', 'U':
self.error("unicode escape sequences are not supported (yet)")
of 'x':
var code = ""
var value = 0
var i = self.current
while i < self.source.high() and (let c = self.source[
i].toLowerAscii(); c in 'a'..'f' or c in '0'..'9'):
code &= self.source[i]
i += 1
assert parseHex(code, value) == code.len()
if value > uint8.high().int:
self.error("escape sequence value too large (> 255)")
self.source[self.current] = cast[char](value)
else:
self.error(&"invalid escape sequence '\\{self.peek()}'")
proc parseString(self: Lexer, delimiter: char, mode: string = "single") =
## Parses string literals. They can be expressed using matching pairs
## of either single or double quotes. Most C-style escape sequences are
## supported, moreover, a specific prefix may be prepended
## to the string to instruct the lexer on how to parse it:
## - b -> declares a byte string, where each character is
## interpreted as an integer instead of a character
## - r -> declares a raw string literal, where escape sequences
## are not parsed and stay as-is
## - f -> declares a format string, where variables may be
## interpolated using curly braces like f"Hello, {name}!".
## Braces may be escaped using a pair of them, so to represent
## a literal "{" in an f-string, one would use {{ instead
## Multi-line strings can be declared using matching triplets of
## either single or double quotes. They can span across multiple
## lines and escape sequences in them are not parsed, like in raw
## strings, so a multi-line string prefixed with the "r" modifier
## is redundant, although multi-line byte/format strings are supported
while not self.check(delimiter) and not self.done():
if self.check('\n'):
if mode == "multi":
self.incLine()
else:
self.error("unexpected EOL while parsing string literal")
if mode in ["raw", "multi"]:
discard self.step()
if self.check('\\'):
# This madness here serves to get rid of the slash, since \x is mapped
# to a one-byte sequence but the string '\x' actually 2 bytes (or more,
# depending on the specific escape sequence)
self.source = self.source[0..<self.current] & self.source[
self.current + 1..^1]
self.parseEscape()
if mode == "format" and self.check('{'):
discard self.step()
if self.check('{'):
self.source = self.source[0..<self.current] & self.source[
self.current + 1..^1]
continue
while not self.check(['}', '"']):
discard self.step()
if self.check('"'):
self.error("unclosed '{' in format string")
elif mode == "format" and self.check('}'):
if not self.check('}', 1):
self.error("unmatched '}' in format string")
else:
self.source = self.source[0..<self.current] & self.source[
self.current + 1..^1]
discard self.step()
if mode == "multi":
if not self.match(delimiter.repeat(3)):
self.error("unexpected EOL while parsing multi-line string literal")
if self.done():
self.error("unexpected EOF while parsing string literal")
return
else:
discard self.step()
self.createToken(String)
proc parseBinary(self: Lexer) =
## Parses binary numbers
while self.peek().isDigit():
if not self.check(['0', '1']):
self.error(&"invalid digit '{self.peek()}' in binary literal")
discard self.step()
self.createToken(Binary)
# To make our life easier, we pad the binary number in here already
while (self.tokens[^1].lexeme.len() - 2) mod 8 != 0:
self.tokens[^1].lexeme = "0b" & "0" & self.tokens[^1].lexeme[2..^1]
proc parseOctal(self: Lexer) =
## Parses octal numbers
while self.peek().isDigit():
if self.peek() notin '0'..'7':
self.error(&"invalid digit '{self.peek()}' in octal literal")
discard self.step()
self.createToken(Octal)
proc parseHex(self: Lexer) =
## Parses hexadecimal numbers
while self.peek().isAlphaNumeric():
if not self.peek().isDigit() and self.peek().toLowerAscii() notin 'a'..'f':
self.error(&"invalid hexadecimal literal")
discard self.step()
self.createToken(Hex)
proc parseNumber(self: Lexer) =
## Parses numeric literals, which encompass
## integers and floats composed of arabic digits.
## Floats also support scientific notation
## (i.e. 3e14), while the fractional part
## must be separated from the decimal one
## using a dot (which acts as a "comma").
## Literals such as 32.5e3 are also supported.
## The "e" for the scientific notation of floats
## is case-insensitive. Binary number literals are
## expressed using the prefix 0b, hexadecimal
## numbers with the prefix 0x and octal numbers
## with the prefix 0o
case self.peek():
of 'b':
discard self.step()
self.parseBinary()
of 'x':
discard self.step()
self.parseHex()
of 'o':
discard self.step()
self.parseOctal()
else:
var kind: TokenType = Integer
while isDigit(self.peek()):
discard self.step()
if self.check(['e', 'E']):
kind = Float
discard self.step()
while self.peek().isDigit():
discard self.step()
elif self.check('.'):
# TODO: Is there a better way?
discard self.step()
if not isDigit(self.peek()):
self.error("invalid float number literal")
kind = Float
while isDigit(self.peek()):
discard self.step()
if self.check(['e', 'E']):
discard self.step()
while isDigit(self.peek()):
discard self.step()
self.createToken(kind)
proc parseIdentifier(self: Lexer) =
## Parses identifiers and keywords.
## Note that multi-character tokens
## such as UTF runes are not supported
while self.peek().isAlphaNumeric() or self.check('_'):
discard self.step()
var name: string = self.source[self.start..<self.current]
if name in keywords:
# It's a keyword
self.createToken(keywords[name])
else:
# Identifier!
self.createToken(Identifier)
proc next(self: Lexer) =
## Scans a single token. This method is
## called iteratively until the source
## file reaches EOF
if self.done():
return
var single = self.step()
if single in [' ', '\t', '\r', '\f',
'\e']: # We skip whitespaces, tabs and other useless characters
return
elif single == '\n':
self.incLine()
elif single in ['"', '\'']:
if self.check(single) and self.check(single, 1):
# Multiline strings start with 3 quotes
discard self.step(2)
self.parseString(single, "multi")
else:
self.parseString(single)
elif single.isDigit():
self.parseNumber()
elif single.isAlphaNumeric() and self.check(['"', '\'']):
# Like Python, we support bytes and raw literals
case single:
of 'r':
self.parseString(self.step(), "raw")
of 'b':
self.parseString(self.step(), "bytes")
of 'f':
self.parseString(self.step(), "format")
else:
self.error(&"unknown string prefix '{single}'")
elif single.isAlphaNumeric() or single == '_':
self.parseIdentifier()
else:
# Comments are a special case
if single == '#':
while not (self.check('\n') or self.done()):
discard self.step()
return
# We start by checking for multi-character tokens,
# in descending length so //= doesn't translate
# to the pair of tokens (//, =) for example
for key in triple.keys():
if key[0] == single and self.check(key[1..^1]):
discard self.step(2) # We step 2 characters
self.createToken(triple[key])
return
for key in double.keys():
if key[0] == single and self.check(key[1]):
discard self.step()
self.createToken(double[key])
return
if single in tokens:
# Eventually we emit a single token
self.createToken(tokens[single])
else:
self.error(&"unexpected token '{single}'")
proc lex*(self: Lexer, source, file: string): seq[Token] =
## Lexes a source file, converting a stream
## of characters into a series of tokens
discard self.initLexer()
self.source = source
self.file = file
while not self.done():
self.next()
self.start = self.current
self.tokens.add(Token(kind: EndOfFile, lexeme: "",
line: self.line))
return self.tokens

760
src/frontend/meta/ast.nim Normal file
View File

@ -0,0 +1,760 @@
# Copyright 2021 Mattia Giambirtone & All Contributors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
## An Abstract Syntax Tree (AST) structure for our recursive-descent
## top-down parser. For more info, check out docs/grammar.md
import strformat
import strutils
import token
type
NodeKind* = enum
## Enumeration of the AST
## node types, sorted by
## precedence
# Declarations
classDecl = 0u8,
funDecl,
varDecl,
# Statements
forStmt, # Unused for now (for loops are compiled to while loops)
ifStmt,
returnStmt,
breakStmt,
continueStmt,
whileStmt,
forEachStmt,
blockStmt,
raiseStmt,
assertStmt,
delStmt,
tryStmt,
yieldStmt,
awaitStmt,
fromImportStmt,
importStmt,
deferStmt,
# An expression followed by a semicolon
exprStmt,
# Expressions
assignExpr,
lambdaExpr,
awaitExpr,
yieldExpr,
setItemExpr, # Set expressions like a.b = "c"
binaryExpr,
unaryExpr,
sliceExpr,
callExpr,
getItemExpr, # Get expressions like a.b
# Primary expressions
groupingExpr, # Parenthesized expressions such as (true) and (3 + 4)
trueExpr,
listExpr,
tupleExpr,
dictExpr,
setExpr,
falseExpr,
strExpr,
intExpr,
floatExpr,
hexExpr,
octExpr,
binExpr,
nilExpr,
nanExpr,
infExpr,
identExpr, # Identifier
ASTNode* = ref object of RootObj
## An AST node
kind*: NodeKind
# Regardless of the type of node, we keep the token in the AST node for internal usage.
# This is not shown when the node is printed, but makes it a heck of a lot easier to report
# errors accurately even deep in the compilation pipeline
token*: Token
# Here I would've rather used object variants, and in fact that's what was in
# place before, but not being able to re-declare a field of the same type in
# another case branch is kind of a deal breaker long-term, so until that is
# fixed (check out https://github.com/nim-lang/RFCs/issues/368 for more info)
# I'll stick to using inheritance instead
LiteralExpr* = ref object of ASTNode
# Using a string for literals makes it much easier to handle numeric types, as
# there is no overflow nor underflow or float precision issues during parsing.
# Numbers are just serialized as strings and then converted back to numbers
# before being passed to the VM, which also keeps the door open in the future
# to implementing bignum arithmetic that can take advantage of natively supported
# machine types, meaning that if a numeric type fits into a 64 bit signed/unsigned
# int then it is stored in such a type to save space, otherwise it is just converted
# to a bigint. Bigfloats with arbitrary-precision arithmetic would also be nice,
# although arguably less useful (and probably significantly slower than bigints)
literal*: Token
IntExpr* = ref object of LiteralExpr
OctExpr* = ref object of LiteralExpr
HexExpr* = ref object of LiteralExpr
BinExpr* = ref object of LiteralExpr
FloatExpr* = ref object of LiteralExpr
StrExpr* = ref object of LiteralExpr
# There are technically keywords, not literals!
TrueExpr* = ref object of ASTNode
FalseExpr* = ref object of ASTNode
NilExpr* = ref object of ASTNode
NanExpr* = ref object of ASTNode
InfExpr* = ref object of ASTNode
# Although this is *technically* a literal, Nim doesn't
# allow us to redefine fields from supertypes so it's
# a tough luck for us
ListExpr* = ref object of ASTNode
members*: seq[ASTNode]
SetExpr* = ref object of ListExpr
TupleExpr* = ref object of ListExpr
DictExpr* = ref object of ASTNode
keys*: seq[ASTNode]
values*: seq[ASTNode]
IdentExpr* = ref object of ASTNode
name*: Token
GroupingExpr* = ref object of ASTNode
expression*: ASTNode
GetItemExpr* = ref object of ASTNode
obj*: ASTNode
name*: ASTNode
SetItemExpr* = ref object of GetItemExpr
# Since a setItem expression is just
# a getItem one followed by an assignment,
# inheriting it from getItem makes sense
value*: ASTNode
CallExpr* = ref object of ASTNode
callee*: ASTNode # The thing being called
arguments*: tuple[positionals: seq[ASTNode], keyword: seq[tuple[
name: ASTNode, value: ASTNode]]]
UnaryExpr* = ref object of ASTNode
operator*: Token
a*: ASTNode
BinaryExpr* = ref object of UnaryExpr
# Binary expressions can be seen here as unary
# expressions with an extra operand so we just
# inherit from that and add a second operand
b*: ASTNode
YieldExpr* = ref object of ASTNode
expression*: ASTNode
AwaitExpr* = ref object of ASTNode
awaitee*: ASTNode
LambdaExpr* = ref object of ASTNode
body*: ASTNode
arguments*: seq[ASTNode]
# This is, in order, the list of each default argument
# the function takes. It maps 1:1 with self.arguments
# although it may be shorter (in which case this maps
# 1:1 with what's left of self.arguments after all
# positional arguments have been consumed)
defaults*: seq[ASTNode]
isGenerator*: bool
SliceExpr* = ref object of ASTNode
slicee*: ASTNode
ends*: seq[ASTNode]
AssignExpr* = ref object of ASTNode
name*: ASTNode
value*: ASTNode
ExprStmt* = ref object of ASTNode
expression*: ASTNode
ImportStmt* = ref object of ASTNode
moduleName*: ASTNode
FromImportStmt* = ref object of ASTNode
fromModule*: ASTNode
fromAttributes*: seq[ASTNode]
DelStmt* = ref object of ASTNode
name*: ASTNode
AssertStmt* = ref object of ASTNode
expression*: ASTNode
RaiseStmt* = ref object of ASTNode
exception*: ASTNode
BlockStmt* = ref object of ASTNode
code*: seq[ASTNode]
ForStmt* = ref object of ASTNode
discard # Unused
ForEachStmt* = ref object of ASTNode
identifier*: ASTNode
expression*: ASTNode
body*: ASTNode
DeferStmt* = ref object of ASTNode
deferred*: ASTNode
TryStmt* = ref object of ASTNode
body*: ASTNode
handlers*: seq[tuple[body: ASTNode, exc: ASTNode, name: ASTNode]]
finallyClause*: ASTNode
elseClause*: ASTNode
WhileStmt* = ref object of ASTNode
condition*: ASTNode
body*: ASTNode
AwaitStmt* = ref object of ASTNode
awaitee*: ASTNode
BreakStmt* = ref object of ASTNode
ContinueStmt* = ref object of ASTNode
ReturnStmt* = ref object of ASTNode
value*: ASTNode
IfStmt* = ref object of ASTNode
condition*: ASTNode
thenBranch*: ASTNode
elseBranch*: ASTNode
YieldStmt* = ref object of ASTNode
expression*: ASTNode
Declaration* = ref object of ASTNode
owner*: string # Used for determining if a module can access a given field
VarDecl* = ref object of Declaration
name*: ASTNode
value*: ASTNode
isConst*: bool
isStatic*: bool
isPrivate*: bool
FunDecl* = ref object of Declaration
name*: ASTNode
body*: ASTNode
arguments*: seq[ASTNode]
# This is, in order, the list of each default argument
# the function takes. It maps 1:1 with self.arguments
# although it may be shorter (in which case this maps
# 1:1 with what's left of self.arguments after all
# positional arguments have been consumed)
defaults*: seq[ASTNode]
isAsync*: bool
isGenerator*: bool
isStatic*: bool
isPrivate*: bool
ClassDecl* = ref object of Declaration
name*: ASTNode
body*: ASTNode
parents*: seq[ASTNode]
isStatic*: bool
isPrivate*: bool
Expression* = LiteralExpr | ListExpr | GetItemExpr | SetItemExpr | UnaryExpr | BinaryExpr | CallExpr | AssignExpr |
GroupingExpr | IdentExpr | DictExpr | TupleExpr | SetExpr |
TrueExpr | FalseExpr | NilExpr |
NanExpr | InfExpr
Statement* = ExprStmt | ImportStmt | FromImportStmt | DelStmt | AssertStmt | RaiseStmt | BlockStmt | ForStmt | WhileStmt |
ForStmt | BreakStmt | ContinueStmt | ReturnStmt | IfStmt
proc newASTNode*(kind: NodeKind, token: Token): ASTNode =
## Initializes a new generic ASTNode object
new(result)
result.kind = kind
result.token = token
proc isConst*(self: ASTNode): bool {.inline.} = self.kind in {intExpr, hexExpr, binExpr, octExpr, strExpr,
falseExpr,
trueExpr, infExpr,
nanExpr,
floatExpr, nilExpr}
proc isLiteral*(self: ASTNode): bool {.inline.} = self.isConst() or self.kind in
{tupleExpr, dictExpr, setExpr, listExpr}
proc newIntExpr*(literal: Token): IntExpr =
result = IntExpr(kind: intExpr)
result.literal = literal
result.token = literal
proc newOctExpr*(literal: Token): OctExpr =
result = OctExpr(kind: octExpr)
result.literal = literal
result.token = literal
proc newHexExpr*(literal: Token): HexExpr =
result = HexExpr(kind: hexExpr)
result.literal = literal
result.token = literal
proc newBinExpr*(literal: Token): BinExpr =
result = BinExpr(kind: binExpr)
result.literal = literal
result.token = literal
proc newFloatExpr*(literal: Token): FloatExpr =
result = FloatExpr(kind: floatExpr)
result.literal = literal
result.token = literal
proc newTrueExpr*(token: Token): LiteralExpr = LiteralExpr(kind: trueExpr, token: token)
proc newFalseExpr*(token: Token): LiteralExpr = LiteralExpr(kind: falseExpr, token: token)
proc newNaNExpr*(token: Token): LiteralExpr = LiteralExpr(kind: nanExpr, token: token)
proc newNilExpr*(token: Token): LiteralExpr = LiteralExpr(kind: nilExpr, token: token)
proc newInfExpr*(token: Token): LiteralExpr = LiteralExpr(kind: infExpr, token: token)
proc newStrExpr*(literal: Token): StrExpr =
result = StrExpr(kind: strExpr)
result.literal = literal
result.token = literal
proc newIdentExpr*(name: Token): IdentExpr =
result = IdentExpr(kind: identExpr)
result.name = name
result.token = name
proc newGroupingExpr*(expression: ASTNode, token: Token): GroupingExpr =
result = GroupingExpr(kind: groupingExpr)
result.expression = expression
result.token = token
proc newLambdaExpr*(arguments, defaults: seq[ASTNode], body: ASTNode,
isGenerator: bool, token: Token): LambdaExpr =
result = LambdaExpr(kind: lambdaExpr)
result.body = body
result.arguments = arguments
result.defaults = defaults
result.isGenerator = isGenerator
result.token = token
proc newGetItemExpr*(obj: ASTNode, name: ASTNode, token: Token): GetItemExpr =
result = GetItemExpr(kind: getItemExpr)
result.obj = obj
result.name = name
result.token = token
proc newListExpr*(members: seq[ASTNode], token: Token): ListExpr =
result = ListExpr(kind: listExpr)
result.members = members
result.token = token
proc newSetExpr*(members: seq[ASTNode], token: Token): SetExpr =
result = SetExpr(kind: setExpr)
result.members = members
result.token = token
proc newTupleExpr*(members: seq[ASTNode], token: Token): TupleExpr =
result = TupleExpr(kind: tupleExpr)
result.members = members
result.token = token
proc newDictExpr*(keys, values: seq[ASTNode], token: Token): DictExpr =
result = DictExpr(kind: dictExpr)
result.keys = keys
result.values = values
result.token = token
proc newSetItemExpr*(obj, name, value: ASTNode, token: Token): SetItemExpr =
result = SetItemExpr(kind: setItemExpr)
result.obj = obj
result.name = name
result.value = value
result.token = token
proc newCallExpr*(callee: ASTNode, arguments: tuple[positionals: seq[ASTNode],
keyword: seq[tuple[name: ASTNode, value: ASTNode]]],
token: Token): CallExpr =
result = CallExpr(kind: callExpr)
result.callee = callee
result.arguments = arguments
result.token = token
proc newSliceExpr*(slicee: ASTNode, ends: seq[ASTNode],
token: Token): SliceExpr =
result = SliceExpr(kind: sliceExpr)
result.slicee = slicee
result.ends = ends
result.token = token
proc newUnaryExpr*(operator: Token, a: ASTNode): UnaryExpr =
result = UnaryExpr(kind: unaryExpr)
result.operator = operator
result.a = a
result.token = result.operator
proc newBinaryExpr*(a: ASTNode, operator: Token, b: ASTNode): BinaryExpr =
result = BinaryExpr(kind: binaryExpr)
result.operator = operator
result.a = a
result.b = b
result.token = operator
proc newYieldExpr*(expression: ASTNode, token: Token): YieldExpr =
result = YieldExpr(kind: yieldExpr)
result.expression = expression
result.token = token
proc newAssignExpr*(name, value: ASTNode, token: Token): AssignExpr =
result = AssignExpr(kind: assignExpr)
result.name = name
result.value = value
result.token = token
proc newAwaitExpr*(awaitee: ASTNode, token: Token): AwaitExpr =
result = AwaitExpr(kind: awaitExpr)
result.awaitee = awaitee
result.token = token
proc newExprStmt*(expression: ASTNode, token: Token): ExprStmt =
result = ExprStmt(kind: exprStmt)
result.expression = expression
result.token = token
proc newImportStmt*(moduleName: ASTNode, token: Token): ImportStmt =
result = ImportStmt(kind: importStmt)
result.moduleName = moduleName
result.token = token
proc newFromImportStmt*(fromModule: ASTNode, fromAttributes: seq[ASTNode],
token: Token): FromImportStmt =
result = FromImportStmt(kind: fromImportStmt)
result.fromModule = fromModule
result.fromAttributes = fromAttributes
result.token = token
proc newDelStmt*(name: ASTNode, token: Token): DelStmt =
result = DelStmt(kind: delStmt)
result.name = name
result.token = token
proc newYieldStmt*(expression: ASTNode, token: Token): YieldStmt =
result = YieldStmt(kind: yieldStmt)
result.expression = expression
result.token = token
proc newAwaitStmt*(awaitee: ASTNode, token: Token): AwaitExpr =
result = AwaitExpr(kind: awaitExpr)
result.awaitee = awaitee
result.token = token
proc newAssertStmt*(expression: ASTNode, token: Token): AssertStmt =
result = AssertStmt(kind: assertStmt)
result.expression = expression
result.token = token
proc newDeferStmt*(deferred: ASTNode, token: Token): DeferStmt =
result = DeferStmt(kind: deferStmt)
result.deferred = deferred
result.token = token
proc newRaiseStmt*(exception: ASTNode, token: Token): RaiseStmt =
result = RaiseStmt(kind: raiseStmt)
result.exception = exception
result.token = token
proc newTryStmt*(body: ASTNode, handlers: seq[tuple[body: ASTNode, exc: ASTNode, name: ASTNode]],
finallyClause: ASTNode,
elseClause: ASTNode, token: Token): TryStmt =
result = TryStmt(kind: tryStmt)
result.body = body
result.handlers = handlers
result.finallyClause = finallyClause
result.elseClause = elseClause
result.token = token
proc newBlockStmt*(code: seq[ASTNode], token: Token): BlockStmt =
result = BlockStmt(kind: blockStmt)
result.code = code
result.token = token
proc newWhileStmt*(condition: ASTNode, body: ASTNode, token: Token): WhileStmt =
result = WhileStmt(kind: whileStmt)
result.condition = condition
result.body = body
result.token = token
proc newForEachStmt*(identifier: ASTNode, expression, body: ASTNode,
token: Token): ForEachStmt =
result = ForEachStmt(kind: forEachStmt)
result.identifier = identifier
result.expression = expression
result.body = body
result.token = token
proc newBreakStmt*(token: Token): BreakStmt =
result = BreakStmt(kind: breakStmt)
result.token = token
proc newContinueStmt*(token: Token): ContinueStmt =
result = ContinueStmt(kind: continueStmt)
result.token = token
proc newReturnStmt*(value: ASTNode, token: Token): ReturnStmt =
result = ReturnStmt(kind: returnStmt)
result.value = value
result.token = token
proc newIfStmt*(condition: ASTNode, thenBranch, elseBranch: ASTNode,
token: Token): IfStmt =
result = IfStmt(kind: ifStmt)
result.condition = condition
result.thenBranch = thenBranch
result.elseBranch = elseBranch
result.token = token
proc newVarDecl*(name: ASTNode, value: ASTNode = newNilExpr(Token()),
isStatic: bool = true, isConst: bool = false,
isPrivate: bool = true, token: Token, owner: string): VarDecl =
result = VarDecl(kind: varDecl)
result.name = name
result.value = value
result.isConst = isConst
result.isStatic = isStatic
result.isPrivate = isPrivate
result.token = token
result.owner = owner
proc newFunDecl*(name: ASTNode, arguments, defaults: seq[ASTNode],
body: ASTNode, isStatic: bool = true, isAsync,
isGenerator: bool, isPrivate: bool = true, token: Token,
owner: string): FunDecl =
result = FunDecl(kind: funDecl)
result.name = name
result.arguments = arguments
result.defaults = defaults
result.body = body
result.isAsync = isAsync
result.isGenerator = isGenerator
result.isStatic = isStatic
result.isPrivate = isPrivate
result.token = token
result.owner = owner
proc newClassDecl*(name: ASTNode, body: ASTNode,
parents: seq[ASTNode], isStatic: bool = true,
isPrivate: bool = true, token: Token,
owner: string): ClassDecl =
result = ClassDecl(kind: classDecl)
result.name = name
result.body = body
result.parents = parents
result.isStatic = isStatic
result.isPrivate = isPrivate
result.token = token
result.owner = owner
proc `$`*(self: ASTNode): string =
if self == nil:
return "nil"
case self.kind:
of intExpr, floatExpr, hexExpr, binExpr, octExpr, strExpr, trueExpr,
falseExpr, nanExpr, nilExpr, infExpr:
if self.kind in {trueExpr, falseExpr, nanExpr, nilExpr, infExpr}:
result &= &"Literal({($self.kind)[0..^5]})"
elif self.kind == strExpr:
result &= &"Literal({LiteralExpr(self).literal.lexeme[1..^2].escape()})"
else:
result &= &"Literal({LiteralExpr(self).literal.lexeme})"
of identExpr:
result &= &"Identifier('{IdentExpr(self).name.lexeme}')"
of groupingExpr:
result &= &"Grouping({GroupingExpr(self).expression})"
of getItemExpr:
var self = GetItemExpr(self)
result &= &"GetItem(obj={self.obj}, name={self.name})"
of setItemExpr:
var self = SetItemExpr(self)
result &= &"SetItem(obj={self.obj}, name={self.value}, value={self.value})"
of callExpr:
var self = CallExpr(self)
result &= &"""Call({self.callee}, arguments=(positionals=[{self.arguments.positionals.join(", ")}], keyword=[{self.arguments.keyword.join(", ")}]))"""
of unaryExpr:
var self = UnaryExpr(self)
result &= &"Unary(Operator('{self.operator.lexeme}'), {self.a})"
of binaryExpr:
var self = BinaryExpr(self)
result &= &"Binary({self.a}, Operator('{self.operator.lexeme}'), {self.b})"
of assignExpr:
var self = AssignExpr(self)
result &= &"Assign(name={self.name}, value={self.value})"
of exprStmt:
var self = ExprStmt(self)
result &= &"ExpressionStatement({self.expression})"
of breakStmt:
result = "Break()"
of importStmt:
var self = ImportStmt(self)
result &= &"Import({self.moduleName})"
of fromImportStmt:
var self = FromImportStmt(self)
result &= &"""FromImport(fromModule={self.fromModule}, fromAttributes=[{self.fromAttributes.join(", ")}])"""
of delStmt:
var self = DelStmt(self)
result &= &"Del({self.name})"
of assertStmt:
var self = AssertStmt(self)
result &= &"Assert({self.expression})"
of raiseStmt:
var self = RaiseStmt(self)
result &= &"Raise({self.exception})"
of blockStmt:
var self = BlockStmt(self)
result &= &"""Block([{self.code.join(", ")}])"""
of whileStmt:
var self = WhileStmt(self)
result &= &"While(condition={self.condition}, body={self.body})"
of forEachStmt:
var self = ForEachStmt(self)
result &= &"ForEach(identifier={self.identifier}, expression={self.expression}, body={self.body})"
of returnStmt:
var self = ReturnStmt(self)
result &= &"Return({self.value})"
of yieldExpr:
var self = YieldExpr(self)
result &= &"Yield({self.expression})"
of awaitExpr:
var self = AwaitExpr(self)
result &= &"Await({self.awaitee})"
of ifStmt:
var self = IfStmt(self)
if self.elseBranch == nil:
result &= &"If(condition={self.condition}, thenBranch={self.thenBranch}, elseBranch=nil)"
else:
result &= &"If(condition={self.condition}, thenBranch={self.thenBranch}, elseBranch={self.elseBranch})"
of yieldStmt:
var self = YieldStmt(self)
result &= &"YieldStmt({self.expression})"
of awaitStmt:
var self = AwaitStmt(self)
result &= &"AwaitStmt({self.awaitee})"
of varDecl:
var self = VarDecl(self)
result &= &"Var(name={self.name}, value={self.value}, const={self.isConst}, static={self.isStatic}, private={self.isPrivate})"
of funDecl:
var self = FunDecl(self)
result &= &"""FunDecl(name={self.name}, body={self.body}, arguments=[{self.arguments.join(", ")}], defaults=[{self.defaults.join(", ")}], async={self.isAsync}, generator={self.isGenerator}, static={self.isStatic}, private={self.isPrivate})"""
of classDecl:
var self = ClassDecl(self)
result &= &"""Class(name={self.name}, body={self.body}, parents=[{self.parents.join(", ")}], static={self.isStatic}, private={self.isPrivate})"""
of tupleExpr:
var self = TupleExpr(self)
result &= &"""Tuple([{self.members.join(", ")}])"""
of setExpr:
var self = SetExpr(self)
result &= &"""Set([{self.members.join(", ")}])"""
of listExpr:
var self = ListExpr(self)
result &= &"""List([{self.members.join(", ")}])"""
of dictExpr:
var self = DictExpr(self)
result &= &"""Dict(keys=[{self.keys.join(", ")}], values=[{self.values.join(", ")}])"""
of lambdaExpr:
var self = LambdaExpr(self)
result &= &"""Lambda(body={self.body}, arguments=[{self.arguments.join(", ")}], defaults=[{self.defaults.join(", ")}], generator={self.isGenerator})"""
of deferStmt:
var self = DeferStmt(self)
result &= &"Defer({self.deferred})"
of sliceExpr:
var self = SliceExpr(self)
result &= &"""Slice({self.slicee}, ends=[{self.ends.join(", ")}])"""
of tryStmt:
var self = TryStmt(self)
result &= &"TryStmt(body={self.body}, handlers={self.handlers}"
if self.finallyClause != nil:
result &= &", finallyClause={self.finallyClause}"
else:
result &= ", finallyClause=nil"
if self.elseClause != nil:
result &= &", elseClause={self.elseClause}"
else:
result &= ", elseClause=nil"
result &= ")"
else:
discard

View File

@ -0,0 +1,286 @@
# Copyright 2021 Mattia Giambirtone & All Contributors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import ast
import ../../util/multibyte
import errors
import strutils
import strformat
export ast
type
Chunk* = ref object
## A piece of bytecode.
## Consts represents the constants table the code is referring to.
## Code is the linear sequence of compiled bytecode instructions.
## Lines maps bytecode instructions to line numbers using Run
## Length Encoding. Instructions are encoded in groups whose structure
## follows the following schema:
## - The first integer represents the line number
## - The second integer represents the count of whatever comes after it
## (let's call it c)
## - After c, a sequence of c integers follows
##
## A visual representation may be easier to understand: [1, 2, 3, 4]
## This is to be interpreted as "there are 2 instructions at line 1 whose values
## are 3 and 4"
## This is more efficient than using the naive approach, which would encode
## the same line number multiple times and waste considerable amounts of space.
consts*: seq[ASTNode]
code*: seq[uint8]
lines*: seq[int]
reuseConsts*: bool
OpCode* {.pure.} = enum
## Enum of possible opcodes.
# Note: x represents the
# argument to unary opcodes, while
# a and b represent arguments to binary
# opcodes. Other variable names may be
# used for more complex opcodes. All
# arguments to opcodes (if they take
# arguments) come from popping off the
# stack
LoadConstant = 0u8, # Pushes constant at position x in the constant table onto the stack
# Binary operators
UnaryNegate, # Pushes the result of -x onto the stack
BinaryAdd, # Pushes the result of a + b onto the stack
BinarySubtract, # Pushes the result of a - b onto the stack
BinaryDivide, # Pushes the result of a / b onto the stack (true division). The result is a float
BinaryFloorDiv, # Pushes the result of a // b onto the stack (integer division). The result is always an integer
BinaryMultiply, # Pushes the result of a * b onto the stack
BinaryPow, # Pushes the result of a ** b (a to the power of b) onto the stack
BinaryMod, # Pushes the result of a % b onto the stack (modulo division)
BinaryShiftRight, # Pushes the result of a >> b (a with bits shifted b times to the right) onto the stack
BinaryShiftLeft, # Pushes the result of a << b (a with bits shifted b times to the left) onto the stack
BinaryXor, # Pushes the result of a ^ b (bitwise exclusive or) onto the stack
BinaryOr, # Pushes the result of a | b (bitwise or) onto the stack
BinaryAnd, # Pushes the result of a & b (bitwise and) onto the stack
UnaryNot, # Pushes the result of ~x (bitwise not) onto the stack
BinaryAs, # Pushes the result of a as b onto the stack (converts a to the type of b. Explicit support from a is required)
BinaryIs, # Pushes the result of a is b onto the stack (true if a and b point to the same object, false otherwise)
BinaryIsNot, # Pushes the result of not (a is b). This could be implemented in terms of BinaryIs, but it's more efficient this way
BinaryOf, # Pushes the result of a of b onto the stack (true if a is a subclass of b, false otherwise)
BinarySlice, # Perform slicing on supported objects (like "hello"[0:2], which yields "he"). The result is pushed onto the stack
BinarySubscript, # Subscript operator, like "hello"[0] (which pushes 'h' onto the stack)
# Binary comparison operators
GreaterThan, # Pushes the result of a > b onto the stack
LessThan, # Pushes the result of a < b onto the stack
EqualTo, # Pushes the result of a == b onto the stack
NotEqualTo, # Pushes the result of a != b onto the stack (optimization for not (a == b))
GreaterOrEqual, # Pushes the result of a >= b onto the stack
LessOrEqual, # Pushes the result of a <= b onto the stack
# Logical operators
LogicalNot,
LogicalAnd,
LogicalOr,
# Constants/singletons
Nil,
True,
False,
Nan,
Inf,
# Basic stack operations
Pop,
Push,
PopN, # Pops N elements off the stack (optimization for exiting scopes and returning from functions)
# Name resolution/handling
LoadAttribute,
DeclareName, # Declares a global dynamically bound name in the current scope
LoadName, # Loads a dynamically bound variable
LoadFast, # Loads a statically bound variable
StoreName, # Sets/updates a dynamically bound variable's value
StoreFast, # Sets/updates a statically bound variable's value
DeleteName, # Unbinds a dynamically bound variable's name from the current scope
DeleteFast, # Unbinds a statically bound variable's name from the current scope
# Looping and jumping
Jump, # Absolute and unconditional jump into the bytecode
JumpIfFalse, # Jumps to an absolute index in the bytecode if the value at the top of the stack is falsey
JumpIfTrue, # Jumps to an absolute index in the bytecode if the value at the top of the stack is truthy
JumpIfFalsePop, # Like JumpIfFalse, but it also pops off the stack (regardless of truthyness). Optimization for if statements
JumpForwards, # Relative, unconditional, positive jump in the bytecode
JumpBackwards, # Relative, unconditional, negative jump into the bytecode
Break, # Temporary opcode used to signal exiting out of loop
## Long variants of jumps (they use a 24-bit operand instead of a 16-bit one)
LongJump,
LongJumpIfFalse,
LongJumpIfTrue,
LongJumpIfFalsePop,
LongJumpForwards,
LongJumpBackwards,
# Functions
MakeFunction,
Call,
Return
# Exception handling
Raise,
ReRaise, # Re-raises active exception
BeginTry,
FinishTry,
# Generators
Yield,
# Coroutines
Await,
# Collection literals
BuildList,
BuildDict,
BuildSet,
BuildTuple,
# Misc
Assert,
# We group instructions by their operation/operand types for easier handling when debugging
# Simple instructions encompass:
# - Instructions that push onto/pop off the stack unconditionally (True, False, PopN, Pop, etc.)
# - Unary and binary operators
const simpleInstructions* = {Return, BinaryAdd, BinaryMultiply,
BinaryDivide, BinarySubtract,
BinaryMod, BinaryPow, Nil,
True, False, OpCode.Nan, OpCode.Inf,
BinaryShiftLeft, BinaryShiftRight,
BinaryXor, LogicalNot, EqualTo,
GreaterThan, LessThan, LoadAttribute,
BinarySlice, Pop, UnaryNegate,
BinaryIs, BinaryAs, GreaterOrEqual,
LessOrEqual, BinaryOr, BinaryAnd,
UnaryNot, BinaryFloorDiv, BinaryOf, Raise,
ReRaise, BeginTry, FinishTry, Yield, Await}
# Constant instructions are instructions that operate on the bytecode constant table
const constantInstructions* = {LoadConstant, DeclareName, LoadName, StoreName, DeleteName}
# Stack triple instructions operate on the stack at arbitrary offsets and pop arguments off of it in the form
# of 24 bit integers
const stackTripleInstructions* = {Call, StoreFast, DeleteFast, LoadFast}
# Stack Double instructions operate on the stack at arbitrary offsets and pop arguments off of it in the form
# of 16 bit integers
const stackDoubleInstructions* = {}
# Argument double argument instructions take hardcoded arguments on the stack as 16 bit integers
const argumentDoubleInstructions* = {PopN, }
# Jump instructions jump at relative or absolute bytecode offsets
const jumpInstructions* = {JumpIfFalse, JumpIfFalsePop, JumpForwards, JumpBackwards,
LongJumpIfFalse, LongJumpIfFalsePop,
LongJumpForwards,
LongJumpBackwards, JumpIfTrue, LongJumpIfTrue}
# Collection instructions push a built-in collection type onto the stack
const collectionInstructions* = {BuildList, BuildDict, BuildSet, BuildTuple}
proc newChunk*(reuseConsts: bool = true): Chunk =
## Initializes a new, empty chunk
result = Chunk(consts: @[], code: @[], lines: @[], reuseConsts: reuseConsts)
proc `$`*(self: Chunk): string = &"""Chunk(consts=[{self.consts.join(", ")}], code=[{self.code.join(", ")}], lines=[{self.lines.join(", ")}])"""
proc write*(self: Chunk, newByte: uint8, line: int) =
## Adds the given instruction at the provided line number
## to the given chunk object
assert line > 0, "line must be greater than zero"
if self.lines.high() >= 1 and self.lines[^2] == line:
self.lines[^1] += 1
else:
self.lines.add(line)
self.lines.add(1)
self.code.add(newByte)
proc write*(self: Chunk, bytes: openarray[uint8], line: int) =
## Calls write in a loop with all members of the given
## array
for cByte in bytes:
self.write(cByte, line)
proc write*(self: Chunk, newByte: OpCode, line: int) =
## Adds the given instruction at the provided line number
## to the given chunk object
self.write(uint8(newByte), line)
proc write*(self: Chunk, bytes: openarray[OpCode], line: int) =
## Calls write in a loop with all members of the given
## array
for cByte in bytes:
self.write(uint8(cByte), line)
proc getLine*(self: Chunk, idx: int): int =
## Returns the associated line of a given
## instruction index
if self.lines.len < 2:
raise newException(IndexDefect, "the chunk object is empty")
var
count: int
current: int = 0
for n in countup(0, self.lines.high(), 2):
count = self.lines[n + 1]
if idx in current - count..<current + count:
return self.lines[n]
current += count
raise newException(IndexDefect, "index out of range")
proc findOrAddConstant(self: Chunk, constant: ASTNode): int =
## Small optimization function that reuses the same constant
## if it's already been written before (only if self.reuseConsts
## equals true)
if self.reuseConsts:
for i, c in self.consts:
# We cannot use simple equality because the nodes likely have
# different token objects with different values
if c.kind != constant.kind:
continue
if constant.isConst():
var c = LiteralExpr(c)
var constant = LiteralExpr(constant)
if c.literal.lexeme == constant.literal.lexeme:
# This wouldn't work for stuff like 2e3 and 2000.0, but those
# forms are collapsed in the compiler before being written
# to the constants table
return i
elif constant.kind == identExpr:
var c = IdentExpr(c)
var constant = IdentExpr(constant)
if c.name.lexeme == constant.name.lexeme:
return i
else:
continue
self.consts.add(constant)
result = self.consts.high()
proc addConstant*(self: Chunk, constant: ASTNode): array[3, uint8] =
## Writes a constant to a chunk. Returns its index casted to a 3-byte
## sequence (array). Constant indexes are reused if a constant is used
## more than once and self.reuseConsts equals true
if self.consts.len() == 16777215:
# The constant index is a 24 bit unsigned integer, so that's as far
# as we can index into the constant table (the same applies
# to our stack by the way). Not that anyone's ever gonna hit this
# limit in the real world, but you know, just in case
raise newException(CompileError, "cannot encode more than 16777215 constants")
result = self.findOrAddConstant(constant).toTriple()

View File

@ -0,0 +1,20 @@
# Copyright 2021 Mattia Giambirtone & All Contributors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
type
NimVMException* = object of CatchableError
LexingError* = object of NimVMException
ParseError* = object of NimVMException
CompileError* = object of NimVMException
SerializationError* = object of NimVMException

View File

@ -0,0 +1,86 @@
# Copyright 2021 Mattia Giambirtone & All Contributors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import strformat
import strutils
type
TokenType* {.pure.} = enum
## Token types enumeration
# Booleans
True, False,
# Other singleton types
Infinity, NotANumber, Nil
# Control-flow statements
If, Else,
# Looping statements
While, For,
# Keywords
Fun, Break, Lambda,
Continue, Var, Const, Is,
Return, Async, Class, Import, From,
IsNot, Raise, Assert, Del, Await,
Foreach, Yield, Static, Dynamic,
Private, Public, As, Of, Defer, Try,
Except, Finally
# Basic types
Integer, Float, String, Identifier,
Binary, Octal, Hex
# Brackets, parentheses and other
# symbols
LeftParen, RightParen, # ()
LeftBrace, RightBrace, # {}
LeftBracket, RightBracket, # []
Dot, Semicolon, Colon, Comma, # . ; : ,
Plus, Minus, Slash, Asterisk, # + - / *
Percentage, DoubleAsterisk, # % **
Caret, Pipe, Ampersand, Tilde, # ^ | & ~
Equal, GreaterThan, LessThan, # = > <
LessOrEqual, GreaterOrEqual, # >= <=
NotEqual, RightShift, LeftShift, # != >> <<
LogicalAnd, LogicalOr, LogicalNot, FloorDiv, # and or not //
InplaceAdd, InplaceSub, InplaceDiv, # += -= /=
InplaceMod, InplaceMul, InplaceXor, # %= *= ^=
InplaceAnd, InplaceOr, # &= |=
DoubleEqual, InplaceFloorDiv, InplacePow, # == //= **=
InplaceRightShift, InplaceLeftShift
# Miscellaneous
EndOfFile
Token* = ref object
## A token object
kind*: TokenType
lexeme*: string
line*: int
pos*: tuple[start, stop: int]
proc `$`*(self: Token): string =
if self != nil:
result = &"Token(kind={self.kind}, lexeme={$(self.lexeme)}, line={self.line}, pos=({self.pos.start}, {self.pos.stop}))"
else:
result = "nil"

382
src/frontend/optimizer.nim Normal file
View File

@ -0,0 +1,382 @@
# Copyright 2021 Mattia Giambirtone & All Contributors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import meta/ast
import meta/token
import parseutils
import strformat
import strutils
import math
type
WarningKind* = enum
unreachableCode,
nameShadowing,
isWithALiteral,
equalityWithSingleton,
valueOverflow,
implicitConversion,
invalidOperation
Warning* = ref object
kind*: WarningKind
node*: ASTNode
Optimizer* = ref object
warnings: seq[Warning]
foldConstants*: bool
proc initOptimizer*(foldConstants: bool = true): Optimizer =
## Initializes a new optimizer object
new(result)
result.foldConstants = foldConstants
result.warnings = @[]
proc newWarning(self: Optimizer, kind: WarningKind, node: ASTNode) =
self.warnings.add(Warning(kind: kind, node: node))
proc `$`*(self: Warning): string = &"Warning(kind={self.kind}, node={self.node})"
# Forward declaration
proc optimizeNode(self: Optimizer, node: ASTNode): ASTNode
proc optimizeConstant(self: Optimizer, node: ASTNode): ASTNode =
## Performs some checks on constant AST nodes such as
## integers. This method converts all of the different
## integer forms (binary, octal and hexadecimal) to
## decimal integers. Overflows are checked here too
if not self.foldConstants:
return node
case node.kind:
of intExpr:
var x: int
var y = IntExpr(node)
try:
assert parseInt(y.literal.lexeme, x) == len(y.literal.lexeme)
except ValueError:
self.newWarning(valueOverflow, node)
result = node
of hexExpr:
var x: int
var y = HexExpr(node)
try:
assert parseHex(y.literal.lexeme, x) == len(y.literal.lexeme)
except ValueError:
self.newWarning(valueOverflow, node)
return node
result = IntExpr(kind: intExpr, literal: Token(kind: Integer, lexeme: $x, line: y.literal.line, pos: (start: -1, stop: -1)))
of binExpr:
var x: int
var y = BinExpr(node)
try:
assert parseBin(y.literal.lexeme, x) == len(y.literal.lexeme)
except ValueError:
self.newWarning(valueOverflow, node)
return node
result = IntExpr(kind: intExpr, literal: Token(kind: Integer, lexeme: $x, line: y.literal.line, pos: (start: -1, stop: -1)))
of octExpr:
var x: int
var y = OctExpr(node)
try:
assert parseOct(y.literal.lexeme, x) == len(y.literal.lexeme)
except ValueError:
self.newWarning(valueOverflow, node)
return node
result = IntExpr(kind: intExpr, literal: Token(kind: Integer, lexeme: $x, line: y.literal.line, pos: (start: -1, stop: -1)))
of floatExpr:
var x: float
var y = FloatExpr(node)
try:
discard parseFloat(y.literal.lexeme, x)
except ValueError:
self.newWarning(valueOverflow, node)
return node
result = FloatExpr(kind: floatExpr, literal: Token(kind: Float, lexeme: $x, line: y.literal.line, pos: (start: -1, stop: -1)))
else:
result = node
proc optimizeUnary(self: Optimizer, node: UnaryExpr): ASTNode =
## Attempts to optimize unary expressions
var a = self.optimizeNode(node.a)
if self.warnings.len() > 0 and self.warnings[^1].kind == valueOverflow and self.warnings[^1].node == a:
# We can't optimize further, the overflow will be caught in the compiler
return UnaryExpr(kind: unaryExpr, a: a, operator: node.operator)
case a.kind:
of intExpr:
var x: int
assert parseInt(IntExpr(a).literal.lexeme, x) == len(IntExpr(a).literal.lexeme)
case node.operator.kind:
of Tilde:
x = not x
of Minus:
x = -x
else:
discard # Unreachable
result = IntExpr(kind: intExpr, literal: Token(kind: Integer, lexeme: $x, line: node.operator.line, pos: (start: -1, stop: -1)))
of floatExpr:
var x: float
discard parseFloat(FloatExpr(a).literal.lexeme, x)
case node.operator.kind:
of Minus:
x = -x
of Tilde:
self.newWarning(invalidOperation, node)
return node
else:
discard
result = FloatExpr(kind: floatExpr, literal: Token(kind: Float, lexeme: $x, line: node.operator.line, pos: (start: -1, stop: -1)))
else:
result = node
proc optimizeBinary(self: Optimizer, node: BinaryExpr): ASTNode =
## Attempts to optimize binary expressions
var a, b: ASTNode
a = self.optimizeNode(node.a)
b = self.optimizeNode(node.b)
if self.warnings.len() > 0 and self.warnings[^1].kind == valueOverflow and (self.warnings[^1].node == a or self.warnings[^1].node == b):
# We can't optimize further, the overflow will be caught in the compiler. We don't return the same node
# because optimizeNode might've been able to optimize one of the two operands and we don't know which
return BinaryExpr(kind: binaryExpr, a: a, b: b, operator: node.operator)
if node.operator.kind == DoubleEqual:
if a.kind in {trueExpr, falseExpr, nilExpr, nanExpr, infExpr}:
self.newWarning(equalityWithSingleton, a)
elif b.kind in {trueExpr, falseExpr, nilExpr, nanExpr, infExpr}:
self.newWarning(equalityWithSingleton, b)
elif node.operator.kind == Is:
if a.kind in {strExpr, intExpr, tupleExpr, dictExpr, listExpr, setExpr}:
self.newWarning(isWithALiteral, a)
elif b.kind in {strExpr, intExpr, tupleExpr, dictExpr, listExpr, setExpr}:
self.newWarning(isWithALiteral, b)
if a.kind == intExpr and b.kind == intExpr:
# Optimizes integer operations
var x, y, z: int
assert parseInt(IntExpr(a).literal.lexeme, x) == IntExpr(a).literal.lexeme.len()
assert parseInt(IntExpr(b).literal.lexeme, y) == IntExpr(b).literal.lexeme.len()
try:
case node.operator.kind:
of Plus:
z = x + y
of Minus:
z = x - y
of Asterisk:
z = x * y
of FloorDiv:
z = int(x / y)
of DoubleAsterisk:
if y >= 0:
z = x ^ y
else:
# Nim's builtin pow operator can't handle
# negative exponents, so we use math's
# pow and convert from/to floats instead
z = pow(x.float, y.float).int
of Percentage:
z = x mod y
of Caret:
z = x xor y
of Ampersand:
z = x and y
of Pipe:
z = x or y
of Slash:
# Special case, yields a float
return FloatExpr(kind: intExpr, literal: Token(kind: Float, lexeme: $(x / y), line: IntExpr(a).literal.line, pos: (start: -1, stop: -1)))
else:
result = BinaryExpr(kind: binaryExpr, a: a, b: b, operator: node.operator)
except OverflowDefect:
self.newWarning(valueOverflow, node)
return BinaryExpr(kind: binaryExpr, a: a, b: b, operator: node.operator)
except RangeDefect:
# TODO: What warning do we raise here?
return BinaryExpr(kind: binaryExpr, a: a, b: b, operator: node.operator)
result = IntExpr(kind: intExpr, literal: Token(kind: Integer, lexeme: $z, line: IntExpr(a).literal.line, pos: (start: -1, stop: -1)))
elif a.kind == floatExpr or b.kind == floatExpr:
var x, y, z: float
if a.kind == intExpr:
var temp: int
assert parseInt(IntExpr(a).literal.lexeme, temp) == IntExpr(a).literal.lexeme.len()
x = float(temp)
self.newWarning(implicitConversion, a)
else:
discard parseFloat(FloatExpr(a).literal.lexeme, x)
if b.kind == intExpr:
var temp: int
assert parseInt(IntExpr(b).literal.lexeme, temp) == IntExpr(b).literal.lexeme.len()
y = float(temp)
self.newWarning(implicitConversion, b)
else:
discard parseFloat(FloatExpr(b).literal.lexeme, y)
# Optimizes float operations
try:
case node.operator.kind:
of Plus:
z = x + y
of Minus:
z = x - y
of Asterisk:
z = x * y
of FloorDiv:
z = x / y
of DoubleAsterisk:
z = pow(x, y)
of Percentage:
z = x mod y
of Slash:
z = x / y
else:
result = BinaryExpr(kind: binaryExpr, a: a, b: b, operator: node.operator)
except OverflowDefect:
self.newWarning(valueOverflow, node)
return BinaryExpr(kind: binaryExpr, a: a, b: b, operator: node.operator)
result = FloatExpr(kind: floatExpr, literal: Token(kind: Float, lexeme: $z, line: LiteralExpr(a).literal.line, pos: (start: -1, stop: -1)))
elif a.kind == strExpr and b.kind == strExpr:
var a = StrExpr(a)
var b = StrExpr(b)
case node.operator.kind:
of Plus:
result = StrExpr(kind: strExpr, literal: Token(kind: String, lexeme: "'" & a.literal.lexeme[1..<(^1)] & b.literal.lexeme[1..<(^1)] & "'", pos: (start: -1, stop: -1)))
else:
result = node
elif a.kind == strExpr and self.optimizeNode(b).kind == intExpr and not (self.warnings.len() > 0 and self.warnings[^1].kind == valueOverflow and self.warnings[^1].node == b):
var a = StrExpr(a)
var b = IntExpr(b)
var bb: int
assert parseInt(b.literal.lexeme, bb) == b.literal.lexeme.len()
case node.operator.kind:
of Asterisk:
result = StrExpr(kind: strExpr, literal: Token(kind: String, lexeme: "'" & a.literal.lexeme[1..<(^1)].repeat(bb) & "'"))
else:
result = node
elif b.kind == strExpr and self.optimizeNode(a).kind == intExpr and not (self.warnings.len() > 0 and self.warnings[^1].kind == valueOverflow and self.warnings[^1].node == a):
var b = StrExpr(b)
var a = IntExpr(a)
var aa: int
assert parseInt(a.literal.lexeme, aa) == a.literal.lexeme.len()
case node.operator.kind:
of Asterisk:
result = StrExpr(kind: strExpr, literal: Token(kind: String, lexeme: "'" & b.literal.lexeme[1..<(^1)].repeat(aa) & "'"))
else:
result = node
else:
# There's no constant folding we can do!
result = node
proc optimizeNode(self: Optimizer, node: ASTNode): ASTNode =
## Analyzes an AST node and attempts to perform
## optimizations on it. If no optimizations can be
## applied or self.foldConstants is set to false,
## then the same node is returned
if not self.foldConstants:
return node
case node.kind:
of exprStmt:
result = newExprStmt(self.optimizeNode(ExprStmt(node).expression), ExprStmt(node).token)
of intExpr, hexExpr, octExpr, binExpr, floatExpr, strExpr:
result = self.optimizeConstant(node)
of unaryExpr:
result = self.optimizeUnary(UnaryExpr(node))
of binaryExpr:
result = self.optimizeBinary(BinaryExpr(node))
of groupingExpr:
# Recursively unnests groups
result = self.optimizeNode(GroupingExpr(node).expression)
of callExpr:
var node = CallExpr(node)
for i, positional in node.arguments.positionals:
node.arguments.positionals[i] = self.optimizeNode(positional)
for i, (key, value) in node.arguments.keyword:
node.arguments.keyword[i].value = self.optimizeNode(value)
result = node
of sliceExpr:
var node = SliceExpr(node)
for i, e in node.ends:
node.ends[i] = self.optimizeNode(e)
node.slicee = self.optimizeNode(node.slicee)
result = node
of tryStmt:
var node = TryStmt(node)
node.body = self.optimizeNode(node.body)
if node.finallyClause != nil:
node.finallyClause = self.optimizeNode(node.finallyClause)
if node.elseClause != nil:
node.elseClause = self.optimizeNode(node.elseClause)
for i, handler in node.handlers:
node.handlers[i].body = self.optimizeNode(node.handlers[i].body)
result = node
of funDecl:
var decl = FunDecl(node)
for i, node in decl.defaults:
decl.defaults[i] = self.optimizeNode(node)
result = decl
of blockStmt:
var node = BlockStmt(node)
for i, n in node.code:
node.code[i] = self.optimizeNode(n)
result = node
of varDecl:
var decl = VarDecl(node)
decl.value = self.optimizeNode(decl.value)
result = decl
of assignExpr:
var asgn = AssignExpr(node)
asgn.value = self.optimizeNode(asgn.value)
result = asgn
of listExpr:
var l = ListExpr(node)
for i, e in l.members:
l.members[i] = self.optimizeNode(e)
result = node
of setExpr:
var s = SetExpr(node)
for i, e in s.members:
s.members[i] = self.optimizeNode(e)
result = node
of tupleExpr:
var t = TupleExpr(node)
for i, e in t.members:
t.members[i] = self.optimizeNode(e)
result = node
of dictExpr:
var d = DictExpr(node)
for i, e in d.keys:
d.keys[i] = self.optimizeNode(e)
for i, e in d.values:
d.values[i] = self.optimizeNode(e)
result = node
else:
result = node
proc optimize*(self: Optimizer, tree: seq[ASTNode]): tuple[tree: seq[ASTNode], warnings: seq[Warning]] =
## Runs the optimizer on the given source
## tree and returns a new optimized tree
## as well as a list of warnings that may
## be of interest. The input tree may be
## identical to the output tree if no optimization
## could be performed. Constant folding can be
## turned off by setting foldConstants to false
## when initializing the optimizer object
var newTree: seq[ASTNode] = @[]
for node in tree:
newTree.add(self.optimizeNode(node))
result = (tree: newTree, warnings: self.warnings)

1078
src/frontend/parser.nim Normal file

File diff suppressed because it is too large Load Diff

273
src/frontend/serializer.nim Normal file
View File

@ -0,0 +1,273 @@
# Copyright 2021 Mattia Giambirtone & All Contributors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import meta/ast
import meta/errors
import meta/bytecode
import meta/token
import ../config
import ../util/multibyte
import strformat
import strutils
import nimSHA2
import times
export ast
type
Serializer* = ref object
file: string
filename: string
chunk: Chunk
Serialized* = ref object
## Wrapper returned by
## the Serializer.read*
## procedures to store
## metadata
fileHash*: string
japlVer*: tuple[major, minor, patch: int]
japlBranch*: string
commitHash*: string
compileDate*: int
chunk*: Chunk
proc `$`*(self: Serialized): string =
result = &"Serialized(fileHash={self.fileHash}, version={self.japlVer.major}.{self.japlVer.minor}.{self.japlVer.patch}, branch={self.japlBranch}), commitHash={self.commitHash}, date={self.compileDate}, chunk={self.chunk[]}"
proc error(self: Serializer, message: string) =
## Raises a formatted SerializationError exception
raise newException(SerializationError, &"A fatal error occurred while (de)serializing '{self.filename}' -> {message}")
proc initSerializer*(self: Serializer = nil): Serializer =
new(result)
if self != nil:
result = self
result.file = ""
result.filename = ""
result.chunk = nil
## Basic routines and helpers to convert various objects from and to to their byte representation
proc toBytes(self: Serializer, s: string): seq[byte] =
for c in s:
result.add(byte(c))
proc toBytes(self: Serializer, s: int): array[8, uint8] =
result = cast[array[8, uint8]](s)
proc toBytes(self: Serializer, d: SHA256Digest): seq[byte] =
for b in d:
result.add(b)
proc bytesToString(self: Serializer, input: seq[byte]): string =
for b in input:
result.add(char(b))
proc bytesToInt(self: Serializer, input: array[8, byte]): int =
copyMem(result.addr, input.unsafeAddr, sizeof(int))
proc bytesToInt(self: Serializer, input: array[3, byte]): int =
copyMem(result.addr, input.unsafeAddr, sizeof(byte) * 3)
proc extend[T](s: var seq[T], a: openarray[T]) =
## Extends s with the elements of a
for e in a:
s.add(e)
proc writeHeaders(self: Serializer, stream: var seq[byte], file: string) =
## Writes the JAPL bytecode headers in-place into a byte stream
stream.extend(self.toBytes(BYTECODE_MARKER))
stream.add(byte(JAPL_VERSION.major))
stream.add(byte(JAPL_VERSION.minor))
stream.add(byte(JAPL_VERSION.patch))
stream.add(byte(len(JAPL_BRANCH)))
stream.extend(self.toBytes(JAPL_BRANCH))
if len(JAPL_COMMIT_HASH) != 40:
self.error("the commit hash must be exactly 40 characters long")
stream.extend(self.toBytes(JAPL_COMMIT_HASH))
stream.extend(self.toBytes(getTime().toUnixFloat().int()))
stream.extend(self.toBytes(computeSHA256(file)))
proc writeConstants(self: Serializer, stream: var seq[byte]) =
## Writes the constants table in-place into the given stream
for constant in self.chunk.consts:
case constant.kind:
of intExpr, floatExpr:
stream.add(0x1)
stream.extend(len(constant.token.lexeme).toTriple())
stream.extend(self.toBytes(constant.token.lexeme))
of strExpr:
stream.add(0x2)
var temp: seq[byte] = @[]
var strip: int = 2
var offset: int = 1
case constant.token.lexeme[0]:
of 'f':
strip = 3
inc(offset)
temp.add(0x2)
of 'b':
strip = 3
inc(offset)
temp.add(0x1)
else:
strip = 2
temp.add(0x0)
stream.extend((len(constant.token.lexeme) - strip).toTriple()) # Removes the quotes from the length count as they're not written
stream.extend(temp)
stream.add(self.toBytes(constant.token.lexeme[offset..^2]))
of identExpr:
stream.add(0x0)
stream.extend(len(constant.token.lexeme).toTriple())
stream.add(self.toBytes(constant.token.lexeme))
else:
self.error(&"unknown constant kind in chunk table ({constant.kind})")
stream.add(0x59) # End marker
proc readConstants(self: Serializer, stream: seq[byte]): int =
## Reads the constant table from the given stream and
## adds each constant to the chunk object (note: most compile-time
## information such as the original token objects and line info is lost when
## serializing the data, so those fields are set to nil or some default
## value). Returns the number of bytes that were processed in the stream
var stream = stream
var count: int = 0
while true:
case stream[0]:
of 0x59:
inc(count)
break
of 0x2:
stream = stream[1..^1]
let size = self.bytesToInt([stream[0], stream[1], stream[2]])
stream = stream[3..^1]
var s = newStrExpr(Token(lexeme: ""))
case stream[0]:
of 0x0:
discard
of 0x1:
s.token.lexeme.add("b")
of 0x2:
s.token.lexeme.add("f")
else:
self.error(&"unknown string modifier in chunk table (0x{stream[0].toHex()}")
stream = stream[1..^1]
s.token.lexeme.add("\"")
for i in countup(0, size - 1):
s.token.lexeme.add(cast[char](stream[i]))
s.token.lexeme.add("\"")
stream = stream[size..^1]
self.chunk.consts.add(s)
inc(count, size + 5)
of 0x1:
stream = stream[1..^1]
inc(count)
let size = self.bytesToInt([stream[0], stream[1], stream[2]])
stream = stream[3..^1]
inc(count, 3)
var tok: Token = new(Token)
tok.lexeme = self.bytesToString(stream[0..<size])
if "." in tok.lexeme:
tok.kind = Float
self.chunk.consts.add(newFloatExpr(tok))
else:
tok.kind = Integer
self.chunk.consts.add(newIntExpr(tok))
stream = stream[size..^1]
inc(count, size)
of 0x0:
stream = stream[1..^1]
let size = self.bytesToInt([stream[0], stream[1], stream[2]])
stream = stream[3..^1]
discard self.chunk.addConstant(newIdentExpr(Token(lexeme: self.bytesToString(stream[0..<size]))))
stream = stream[size..^1]
inc(count, size + 4)
else:
self.error(&"unknown constant kind in chunk table (0x{stream[0].toHex()})")
result = count
proc writeCode(self: Serializer, stream: var seq[byte]) =
## Writes the bytecode from the given chunk to the given source
## stream
stream.extend(self.chunk.code.len.toTriple())
stream.extend(self.chunk.code)
proc readCode(self: Serializer, stream: seq[byte]): int =
## Reads the bytecode from a given stream and writes
## it into the given chunk
let size = [stream[0], stream[1], stream[2]].fromTriple()
var stream = stream[3..^1]
for i in countup(0, int(size) - 1):
self.chunk.code.add(stream[i])
assert len(self.chunk.code) == int(size)
return int(size)
proc dumpBytes*(self: Serializer, chunk: Chunk, file, filename: string): seq[byte] =
## Dumps the given bytecode and file to a sequence of bytes and returns it.
## The file argument must be the actual file's content and is needed to compute its SHA256 hash.
self.file = file
self.filename = filename
self.chunk = chunk
self.writeHeaders(result, self.file)
self.writeConstants(result)
self.writeCode(result)
proc loadBytes*(self: Serializer, stream: seq[byte]): Serialized =
## Loads the result from dumpBytes to a Serializer object
## for use in the VM or for inspection
discard self.initSerializer()
new(result)
result.chunk = newChunk()
self.chunk = result.chunk
var stream = stream
try:
if stream[0..<len(BYTECODE_MARKER)] != self.toBytes(BYTECODE_MARKER):
self.error("malformed bytecode marker")
stream = stream[len(BYTECODE_MARKER)..^1]
result.japlVer = (major: int(stream[0]), minor: int(stream[1]), patch: int(stream[2]))
stream = stream[3..^1]
let branchLength = stream[0]
stream = stream[1..^1]
result.japlBranch = self.bytesToString(stream[0..<branchLength])
stream = stream[branchLength..^1]
result.commitHash = self.bytesToString(stream[0..<40]).toLowerAscii()
stream = stream[40..^1]
result.compileDate = self.bytesToInt([stream[0], stream[1], stream[2], stream[3], stream[4], stream[5], stream[6], stream[7]])
stream = stream[8..^1]
result.fileHash = self.bytesToString(stream[0..<32]).toHex().toLowerAscii()
stream = stream[32..^1]
stream = stream[self.readConstants(stream)..^1]
stream = stream[self.readCode(stream)..^1]
except IndexDefect:
self.error("truncated bytecode file")
except AssertionDefect:
self.error("corrupted bytecode file")