2022-04-04 12:29:23 +02:00
# Copyright 2022 Mattia Giambirtone & All Contributors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import meta / token
import meta / ast
import meta / errors
import meta / bytecode
import .. / config
import .. / util / multibyte
import strformat
import algorithm
import parseutils
import sequtils
2022-04-21 15:25:29 +02:00
import strutils
2022-04-04 12:29:23 +02:00
export ast
export bytecode
export token
export multibyte
type
Name = ref object
## A compile-time wrapper around
## statically resolved names.
## Depth indicates to which scope
## the variable belongs, zero meaning
## the global one
name : IdentExpr
owner : string
depth : int
isPrivate : bool
isConst : bool
2022-04-26 09:29:59 +02:00
valueType : IdentExpr
2022-04-04 12:29:23 +02:00
Loop = object
## A "loop object" used
## by the compiler to emit
## appropriate jump offsets
## for continue and break
## statements
start : int
depth : int
breakPos : seq [ int ]
Compiler * = ref object
## A wrapper around the compiler's state
# The bytecode chunk where we write code to
chunk : Chunk
# The output of our parser (AST)
ast : seq [ ASTNode ]
# The current AST node we're looking at
current : int
# The current file being compiled (used only for
# error reporting)
file : string
# Compile-time "simulation" of the stack at
# runtime to load variables that have stack
# behavior more efficiently
names : seq [ Name ]
# The current scope depth. If > 0, we're
# in a local scope, otherwise it's global
scopeDepth : int
# The current function being compiled
currentFunction : FunDecl
# Are optimizations turned on?
enableOptimizations * : bool
# The current loop being compiled (used to
# keep track of where to jump)
currentLoop : Loop
# The current module being compiled
# (used to restrict access to statically
# defined variables at compile time)
currentModule : string
# Each time a defer statement is
# compiled, its code is emitted
# here. Later, if there is any code
# to defer in the current function,
# funDecl will wrap the function's code
# inside an implicit try/finally block
# and add this code in the finally branch.
# This sequence is emptied each time a
# fun declaration is compiled and stores only
# deferred code for the current function (may
# be empty)
deferred : seq [ uint8 ]
# List of closed-over variables
closedOver : seq [ IdentExpr ]
2022-04-11 14:41:20 +02:00
proc newCompiler * ( enableOptimizations : bool = true ) : Compiler =
2022-04-04 12:29:23 +02:00
## Initializes a new Compiler object
new ( result )
result . ast = @ [ ]
result . current = 0
result . file = " "
result . names = @ [ ]
result . scopeDepth = 0
result . currentFunction = nil
result . enableOptimizations = enableOptimizations
result . currentModule = " "
## Forward declarations
proc expression ( self : Compiler , node : ASTNode )
proc statement ( self : Compiler , node : ASTNode )
proc declaration ( self : Compiler , node : ASTNode )
proc peek ( self : Compiler , distance : int = 0 ) : ASTNode
2022-04-21 15:25:29 +02:00
proc identifier ( self : Compiler , node : IdentExpr )
proc varDecl ( self : Compiler , node : VarDecl )
2022-04-04 12:29:23 +02:00
## End of forward declarations
2022-04-26 09:29:59 +02:00
## Public getter for nicer error formatting
2022-04-04 12:29:23 +02:00
proc getCurrentNode * ( self : Compiler ) : ASTNode = ( if self . current > =
self . ast . len ( ) : self . ast [ ^ 1 ] else : self . ast [ self . current - 1 ] )
## Utility functions
proc peek ( self : Compiler , distance : int = 0 ) : ASTNode =
## Peeks at the AST node at the given distance.
## If the distance is out of bounds, the last
## AST node in the tree is returned. A negative
## distance may be used to retrieve previously
## consumed AST nodes
if self . ast . high ( ) = = - 1 or self . current + distance > self . ast . high ( ) or
self . current + distance < 0 :
result = self . ast [ ^ 1 ]
else :
result = self . ast [ self . current + distance ]
proc done ( self : Compiler ) : bool =
## Returns true if the compiler is done
## compiling, false otherwise
result = self . current > self . ast . high ( )
2022-04-26 13:04:40 +02:00
proc error ( self : Compiler , message : string ) {. raises : [ CompileError , ValueError ] . } =
2022-04-04 12:29:23 +02:00
## Raises a formatted CompileError exception
var tok = self . getCurrentNode ( ) . token
raise newException ( CompileError , & " A fatal error occurred while compiling ' {self.file} ' , module ' {self.currentModule} ' line {tok.line} at ' {tok.lexeme} ' -> {message} " )
proc step ( self : Compiler ) : ASTNode =
## Steps to the next node and returns
## the consumed one
result = self . peek ( )
if not self . done ( ) :
self . current + = 1
proc emitByte ( self : Compiler , byt : OpCode | uint8 ) =
## Emits a single byte, writing it to
## the current chunk being compiled
when DEBUG_TRACE_COMPILER :
echo & " DEBUG - Compiler: Emitting { $byt } "
self . chunk . write ( uint8 byt , self . peek ( ) . token . line )
proc emitBytes ( self : Compiler , byt1 : OpCode | uint8 , byt2 : OpCode | uint8 ) =
2022-04-26 09:29:59 +02:00
## Emits multiple bytes instead of a single one. This is useful
2022-04-04 12:29:23 +02:00
## to emit operators along with their operands or for multi-byte
## instructions that are longer than one byte
self . emitByte ( uint8 byt1 )
self . emitByte ( uint8 byt2 )
proc emitBytes ( self : Compiler , bytarr : array [ 2 , uint8 ] ) =
## Handy helper method to write an array of 2 bytes into
## the current chunk, calling emitByte on each of its
## elements
self . emitBytes ( bytarr [ 0 ] , bytarr [ 1 ] )
proc emitBytes ( self : Compiler , bytarr : array [ 3 , uint8 ] ) =
## Handy helper method to write an array of 3 bytes into
## the current chunk, calling emitByte on each of its
## elements
self . emitBytes ( bytarr [ 0 ] , bytarr [ 1 ] )
self . emitByte ( bytarr [ 2 ] )
proc makeConstant ( self : Compiler , val : ASTNode ) : array [ 3 , uint8 ] =
## Adds a constant to the current chunk's constant table
## and returns its index as a 3-byte array of uint8s
result = self . chunk . addConstant ( val )
proc emitConstant ( self : Compiler , obj : ASTNode ) =
## Emits a LoadConstant instruction along
## with its operand
self . emitByte ( LoadConstant )
self . emitBytes ( self . makeConstant ( obj ) )
proc emitJump ( self : Compiler , opcode : OpCode ) : int =
## Emits a dummy jump offset to be patched later. Assumes
## the largest offset (emits 4 bytes, one for the given jump
## opcode, while the other 3 are for the jump offset which is set
## to the maximum unsigned 24 bit integer). If the shorter
## 16 bit alternative is later found to be better suited, patchJump
## will fix this. This function returns the absolute index into the
## chunk's bytecode array where the given placeholder instruction was written
self . emitByte ( opcode )
self . emitBytes ( ( 0xffffff ) . toTriple ( ) )
result = self . chunk . code . len ( ) - 4
proc patchJump ( self : Compiler , offset : int ) =
## Patches a previously emitted jump
## using emitJump. Since emitJump assumes
## a long jump, this also shrinks the jump
## offset and changes the bytecode instruction if possible
## (i.e. jump is in 16 bit range), but the converse is also
## true (i.e. it might change a regular jump into a long one)
let jump : int = self . chunk . code . len ( ) - offset
if jump > 16777215 :
self . error ( " cannot jump more than 16777215 bytecode instructions " )
if jump < uint16 . high ( ) . int :
case OpCode ( self . chunk . code [ offset ] ) :
of LongJumpForwards :
self . chunk . code [ offset ] = JumpForwards . uint8 ( )
of LongJumpBackwards :
self . chunk . code [ offset ] = JumpBackwards . uint8 ( )
of LongJumpIfFalse :
self . chunk . code [ offset ] = JumpIfFalse . uint8 ( )
of LongJumpIfFalsePop :
self . chunk . code [ offset ] = JumpIfFalsePop . uint8 ( )
of LongJumpIfFalseOrPop :
self . chunk . code [ offset ] = JumpIfFalseOrPop . uint8 ( )
else :
discard
2022-04-26 13:04:40 +02:00
self . chunk . code . delete ( offset + 1 ) # Discards the first 8 bits of the jump offset (which are empty)
let offsetArray = ( jump - 1 ) . toDouble ( ) # -1 since we got rid of 1 byte!
2022-04-04 12:29:23 +02:00
self . chunk . code [ offset + 1 ] = offsetArray [ 0 ]
self . chunk . code [ offset + 2 ] = offsetArray [ 1 ]
else :
case OpCode ( self . chunk . code [ offset ] ) :
of JumpForwards :
self . chunk . code [ offset ] = LongJumpForwards . uint8 ( )
of JumpBackwards :
self . chunk . code [ offset ] = LongJumpBackwards . uint8 ( )
of JumpIfFalse :
self . chunk . code [ offset ] = LongJumpIfFalse . uint8 ( )
of JumpIfFalsePop :
self . chunk . code [ offset ] = LongJumpIfFalsePop . uint8 ( )
of JumpIfFalseOrPop :
self . chunk . code [ offset ] = LongJumpIfFalseOrPop . uint8 ( )
else :
discard
let offsetArray = jump . toTriple ( )
self . chunk . code [ offset + 1 ] = offsetArray [ 0 ]
self . chunk . code [ offset + 2 ] = offsetArray [ 1 ]
self . chunk . code [ offset + 3 ] = offsetArray [ 2 ]
## End of utility functions
proc literal ( self : Compiler , node : ASTNode ) =
## Emits instructions for literals such
## as singletons, strings, numbers and
## collections
case node . kind :
of trueExpr :
self . emitByte ( OpCode . True )
of falseExpr :
self . emitByte ( OpCode . False )
of nilExpr :
self . emitByte ( OpCode . Nil )
of infExpr :
self . emitByte ( OpCode . Inf )
of nanExpr :
self . emitByte ( OpCode . Nan )
of strExpr :
self . emitConstant ( node )
# The optimizer will emit warning
# for overflowing numbers. Here, we
# treat them as errors
of intExpr :
var x : int
var y = IntExpr ( node )
try :
2022-04-26 09:29:59 +02:00
discard parseInt ( y . literal . lexeme , x )
2022-04-04 12:29:23 +02:00
except ValueError :
self . error ( " integer value out of range " )
self . emitConstant ( y )
# Even though most likely the optimizer
# will collapse all these other literals
# to nodes of kind intExpr, that can be
# disabled. This also allows us to catch
# basic overflow errors before running any code
of hexExpr :
var x : int
var y = HexExpr ( node )
try :
2022-04-26 09:29:59 +02:00
discard parseHex ( y . literal . lexeme , x )
2022-04-04 12:29:23 +02:00
except ValueError :
self . error ( " integer value out of range " )
self . emitConstant ( newIntExpr ( Token ( lexeme : $ x , line : y . token . line ,
pos : ( start : y . token . pos . start , stop : y . token . pos . start +
len ( $ x ) ) ) ) )
of binExpr :
var x : int
var y = BinExpr ( node )
try :
2022-04-26 09:29:59 +02:00
discard parseBin ( y . literal . lexeme , x )
2022-04-04 12:29:23 +02:00
except ValueError :
self . error ( " integer value out of range " )
self . emitConstant ( newIntExpr ( Token ( lexeme : $ x , line : y . token . line ,
pos : ( start : y . token . pos . start , stop : y . token . pos . start +
len ( $ x ) ) ) ) )
of octExpr :
var x : int
var y = OctExpr ( node )
try :
2022-04-26 09:29:59 +02:00
discard parseOct ( y . literal . lexeme , x )
2022-04-04 12:29:23 +02:00
except ValueError :
self . error ( " integer value out of range " )
self . emitConstant ( newIntExpr ( Token ( lexeme : $ x , line : y . token . line ,
pos : ( start : y . token . pos . start , stop : y . token . pos . start +
len ( $ x ) ) ) ) )
of floatExpr :
var x : float
var y = FloatExpr ( node )
try :
2022-04-26 09:29:59 +02:00
discard parseFloat ( y . literal . lexeme , x )
2022-04-04 12:29:23 +02:00
except ValueError :
self . error ( " floating point value out of range " )
self . emitConstant ( y )
of listExpr :
var y = ListExpr ( node )
if y . members . len ( ) > 16777216 :
self . error ( " collection literals can ' t have more than 16777216 elements " )
for member in y . members :
self . expression ( member )
self . emitByte ( BuildList )
self . emitBytes ( y . members . len ( ) . toTriple ( ) ) # 24-bit integer, meaning collection literals can have up to 2^24 elements
of tupleExpr :
var y = TupleExpr ( node )
if y . members . len ( ) > 16777216 :
self . error ( " collection literals can ' t have more than 16777216 elements " )
for member in y . members :
self . expression ( member )
self . emitByte ( BuildTuple )
self . emitBytes ( y . members . len ( ) . toTriple ( ) )
of setExpr :
var y = SetExpr ( node )
if y . members . len ( ) > 16777216 :
self . error ( " collection literals can ' t have more than 16777216 elements " )
for member in y . members :
self . expression ( member )
self . emitByte ( BuildSet )
self . emitBytes ( y . members . len ( ) . toTriple ( ) )
of dictExpr :
var y = DictExpr ( node )
if y . keys . len ( ) > 16777216 :
self . error ( " collection literals can ' t have more than 16777216 elements " )
for ( key , value ) in zip ( y . keys , y . values ) :
self . expression ( key )
self . expression ( value )
self . emitByte ( BuildDict )
self . emitBytes ( y . keys . len ( ) . toTriple ( ) )
of awaitExpr :
var y = AwaitExpr ( node )
2022-04-07 13:02:23 +02:00
self . expression ( y . expression )
2022-04-04 12:29:23 +02:00
self . emitByte ( OpCode . Await )
else :
self . error ( & " invalid AST node of kind {node.kind} at literal(): {node} (This is an internal error and most likely a bug) " )
proc unary ( self : Compiler , node : UnaryExpr ) =
## Compiles unary expressions such as decimal or
## bitwise negation
self . expression ( node . a ) # Pushes the operand onto the stack
case node . operator . kind :
of Minus :
self . emitByte ( UnaryNegate )
of Plus :
2022-04-26 09:29:59 +02:00
self . emitByte ( UnaryPlus )
2022-04-04 12:29:23 +02:00
of TokenType . LogicalNot :
self . emitByte ( OpCode . LogicalNot )
of Tilde :
self . emitByte ( UnaryNot )
else :
self . error ( & " invalid AST node of kind {node.kind} at unary(): {node} (This is an internal error and most likely a bug) " )
proc binary ( self : Compiler , node : BinaryExpr ) =
## Compiles all binary expressions
# These two lines prepare the stack by pushing the
# opcode's operands onto it
self . expression ( node . a )
self . expression ( node . b )
case node . operator . kind :
of Plus :
self . emitByte ( BinaryAdd )
of Minus :
self . emitByte ( BinarySubtract )
2022-04-07 13:02:23 +02:00
of Star :
2022-04-04 12:29:23 +02:00
self . emitByte ( BinaryMultiply )
2022-04-07 13:02:23 +02:00
of DoubleStar :
2022-04-04 12:29:23 +02:00
self . emitByte ( BinaryPow )
of Percentage :
self . emitByte ( BinaryMod )
of FloorDiv :
self . emitByte ( BinaryFloorDiv )
of Slash :
self . emitByte ( BinaryDivide )
of Ampersand :
self . emitByte ( BinaryAnd )
of Caret :
self . emitByte ( BinaryXor )
of Pipe :
self . emitByte ( BinaryOr )
of As :
self . emitByte ( BinaryAs )
of Is :
self . emitByte ( BinaryIs )
of IsNot :
self . emitByte ( BinaryIsNot )
of Of :
self . emitByte ( BinaryOf )
of RightShift :
self . emitByte ( BinaryShiftRight )
of LeftShift :
self . emitByte ( BinaryShiftLeft )
of TokenType . LessThan :
self . emitByte ( OpCode . LessThan )
of TokenType . GreaterThan :
self . emitByte ( OpCode . GreaterThan )
of TokenType . DoubleEqual :
self . emitByte ( EqualTo )
of TokenType . LessOrEqual :
self . emitByte ( OpCode . LessOrEqual )
of TokenType . GreaterOrEqual :
self . emitByte ( OpCode . GreaterOrEqual )
of TokenType . LogicalAnd :
self . expression ( node . a )
var jump : int
if self . enableOptimizations :
jump = self . emitJump ( JumpIfFalseOrPop )
else :
jump = self . emitJump ( JumpIfFalse )
self . emitByte ( Pop )
self . expression ( node . b )
self . patchJump ( jump )
of TokenType . LogicalOr :
self . expression ( node . a )
let jump = self . emitJump ( JumpIfTrue )
self . expression ( node . b )
self . patchJump ( jump )
# TODO: In-place operations
else :
self . error ( & " invalid AST node of kind {node.kind} at binary(): {node} (This is an internal error and most likely a bug) " )
2022-04-26 09:29:59 +02:00
proc declareName ( self : Compiler , node : ASTNode , kind : IdentExpr ) =
2022-04-21 15:25:29 +02:00
## Compiles all name declarations
2022-04-04 12:29:23 +02:00
case node . kind :
2022-04-21 15:25:29 +02:00
of NodeKind . varDecl :
2022-04-04 12:29:23 +02:00
var node = VarDecl ( node )
2022-04-26 09:29:59 +02:00
# Creates a new Name entry so that self.identifier emits the proper stack offset
2022-04-21 15:25:29 +02:00
if self . names . high ( ) > 16777215 :
# If someone ever hits this limit in real-world scenarios, I swear I'll
# slap myself 100 times with a sign saying "I'm dumb". Mark my words
2022-04-26 16:22:23 +02:00
self . error ( " cannot declare more than 16777216 variables at a time " )
2022-04-21 15:25:29 +02:00
self . names . add ( Name ( depth : self . scopeDepth , name : IdentExpr ( node . name ) ,
2022-04-26 09:29:59 +02:00
isPrivate : node . isPrivate ,
owner : self . currentModule ,
isConst : node . isConst ,
valueType : kind ) )
2022-04-21 15:25:29 +02:00
self . emitByte ( StoreVar )
self . emitBytes ( self . names . high ( ) . toTriple ( ) )
2022-04-04 12:29:23 +02:00
of funDecl :
var node = FunDecl ( node )
# Declares the function's name in the
# current (outer) scope...
2022-04-26 09:29:59 +02:00
self . declareName ( node . name , IdentExpr ( node . returnType ) )
2022-04-04 12:29:23 +02:00
# ... but its arguments in an inner one!
self . scopeDepth + = 1
# (this ugly part is needed because
# self.blockStmt() already increments
# and decrements the scope depth)
for argument in node . arguments :
if self . names . high ( ) > 16777215 :
2022-04-26 16:22:23 +02:00
self . error ( " cannot declare more than 16777216 variables at a time " )
2022-04-26 09:29:59 +02:00
self . names . add ( Name ( depth : self . scopeDepth + 1 , isPrivate : true , owner : self . currentModule , isConst : false , name : IdentExpr ( argument . name ) , valueType : kind ) )
2022-04-21 15:25:29 +02:00
self . emitByte ( LoadVar )
2022-04-04 12:29:23 +02:00
self . emitBytes ( self . names . high ( ) . toTriple ( ) )
self . scopeDepth - = 1
# TODO: Default arguments and unpacking
else :
discard # TODO: Classes
proc resolveStatic ( self : Compiler , name : IdentExpr ,
depth : int = self . scopeDepth ) : Name =
## Traverses self.staticNames backwards and returns the
## first name object with the given name. Returns
## nil when the name can't be found. This function
## has no concept of scope depth, because getStaticIndex
2022-04-26 09:29:59 +02:00
## does that job. Note that private names declared in
## other modules will not be resolved!
2022-04-04 12:29:23 +02:00
for obj in reversed ( self . names ) :
if obj . name . token . lexeme = = name . token . lexeme :
2022-04-26 09:29:59 +02:00
if obj . isPrivate and obj . owner ! = self . currentModule :
return nil
2022-04-04 12:29:23 +02:00
return obj
return nil
2022-04-26 09:29:59 +02:00
proc getStaticIndex ( self : Compiler , name : IdentExpr , depth : int = self . scopeDepth ) : tuple [ closedOver : bool , pos : int ] =
## Gets the predicted stack position of the given variable and
## returns a tuple (closedOver, pos) that tells the caller whether
## the variable is to be emitted as a closure as well as its predicted
## stack/closure array position. Returns (false, -1) if the variable's
## location can not be determined at compile time (this is an error!).
## Note that private names declared in other modules will not be resolved!
2022-04-04 12:29:23 +02:00
var i : int = self . names . high ( )
for variable in reversed ( self . names ) :
if name . name . lexeme = = variable . name . name . lexeme :
2022-04-26 09:29:59 +02:00
if variable . isPrivate and variable . owner ! = self . currentModule :
return ( false , - 1 )
if variable . depth = = depth or variable . depth = = 0 :
# variable.depth == 0 for globals!
return ( false , i )
elif variable . depth > 0 :
for j , closure in reversed ( self . closedOver ) :
if closure . name . lexeme = = name . name . lexeme :
return ( true , j )
2022-04-04 12:29:23 +02:00
dec ( i )
2022-04-26 09:29:59 +02:00
return ( false , - 1 )
2022-04-04 12:29:23 +02:00
proc identifier ( self : Compiler , node : IdentExpr ) =
## Compiles access to identifiers
let s = self . resolveStatic ( node )
if s ! = nil :
if s . isConst :
# Constants are emitted as, you guessed it, constant instructions
# no matter the scope depth. Also, name resolution specifiers do not
# apply to them (because what would it mean for a constant to be dynamic
# anyway?)
self . emitConstant ( node )
else :
2022-04-26 09:29:59 +02:00
let t = self . getStaticIndex ( node )
let index = t . pos
2022-04-04 12:29:23 +02:00
if index ! = - 1 :
2022-04-26 13:04:40 +02:00
if not t . closedOver :
2022-04-21 15:25:29 +02:00
self . emitByte ( LoadVar ) # Static name resolution, loads value at index in the stack. Very fast. Much wow.
2022-04-04 12:29:23 +02:00
self . emitBytes ( index . toTriple ( ) )
else :
if self . closedOver . len ( ) = = 0 :
self . error ( " error: closure variable array is empty but LoadHeap would be emitted (this is an internal error and most likely a bug) " )
if self . closedOver . len ( ) > = 16777216 :
2022-04-26 09:29:59 +02:00
self . error ( " too many consecutive closure-over variables (max is 16777216) " )
2022-04-04 12:29:23 +02:00
self . emitByte ( LoadHeap ) # Heap-allocated closure variable. Stored in a separate "closure array" in the VM that does not have stack semantics
self . emitBytes ( self . closedOver . high ( ) . toTriple ( ) )
else :
2022-04-21 15:25:29 +02:00
self . error ( & " reference to undeclared name ' {node.token.lexeme} ' " )
2022-04-04 12:29:23 +02:00
proc assignment ( self : Compiler , node : ASTNode ) =
## Compiles assignment expressions
case node . kind :
of assignExpr :
var node = AssignExpr ( node )
var name = IdentExpr ( node . name )
let r = self . resolveStatic ( name )
if r ! = nil and r . isConst :
self . error ( " cannot assign to constant " )
self . expression ( node . value )
2022-04-26 09:29:59 +02:00
let t = self . getStaticIndex ( name )
let index = t . pos
2022-04-04 12:29:23 +02:00
case node . token . kind :
of InplaceAdd :
self . emitByte ( BinaryAdd )
of InplaceSub :
self . emitByte ( BinarySubtract )
of InplaceDiv :
self . emitByte ( BinaryDivide )
of InplaceMul :
self . emitByte ( BinaryMultiply )
of InplacePow :
self . emitByte ( BinaryPow )
of InplaceFloorDiv :
self . emitByte ( BinaryFloorDiv )
of InplaceMod :
self . emitByte ( BinaryMod )
of InplaceAnd :
self . emitByte ( BinaryAnd )
of InplaceXor :
self . emitByte ( BinaryXor )
of InplaceRightShift :
self . emitByte ( BinaryShiftRight )
of InplaceLeftShift :
self . emitByte ( BinaryShiftLeft )
else :
discard # Unreachable
# In-place operators just change
# what values is set to a given
# stack offset/name, so we only
# need to perform the operation
# as usual and then store it.
# TODO: A better optimization would
# be to have everything in one opcode,
# but that requires variants for stack,
# heap, and closure variables and I cba
if index ! = - 1 :
2022-04-26 09:29:59 +02:00
if not t . closedOver :
self . emitByte ( StoreVar )
else :
self . emitByte ( StoreHeap )
2022-04-04 12:29:23 +02:00
self . emitBytes ( index . toTriple ( ) )
else :
2022-04-21 15:25:29 +02:00
self . error ( & " reference to undeclared name ' {node.token.lexeme} ' " )
2022-04-04 12:29:23 +02:00
of setItemExpr :
discard
# TODO
else :
self . error ( & " invalid AST node of kind {node.kind} at assignment(): {node} (This is an internal error and most likely a bug) " )
proc beginScope ( self : Compiler ) =
## Begins a new local scope by incrementing the current
## scope's depth
inc ( self . scopeDepth )
proc endScope ( self : Compiler ) =
## Ends the current local scope
if self . scopeDepth < 0 :
self . error ( " cannot call endScope with scopeDepth < 0 (This is an internal error and most likely a bug) " )
var popped : int = 0
for ident in reversed ( self . names ) :
if ident . depth > self . scopeDepth :
inc ( popped )
if not self . enableOptimizations :
# All variables with a scope depth larger than the current one
# are now out of scope. Begone, you're now homeless!
self . emitByte ( Pop )
if self . enableOptimizations and popped > 1 :
# If we're popping less than 65535 variables, then
# we can emit a PopN instruction. This is true for
# 99.99999% of the use cases of the language (who the
# hell is going to use 65 THOUSAND local variables?), but
# if you'll ever use more then JAPL will emit a PopN instruction
# for the first 65 thousand and change local variables and then
# emit another batch of plain ol' Pop instructions for the rest
if popped < = uint16 . high ( ) . int ( ) :
self . emitByte ( PopN )
self . emitBytes ( popped . toDouble ( ) )
else :
self . emitByte ( PopN )
self . emitBytes ( uint16 . high ( ) . int . toDouble ( ) )
for i in countdown ( self . names . high ( ) , popped - uint16 . high ( ) . int ( ) ) :
if self . names [ i ] . depth > self . scopeDepth :
self . emitByte ( Pop )
elif popped = = 1 :
# We only emit PopN if we're popping more than one value
self . emitByte ( Pop )
for _ in countup ( 0 , popped - 1 ) :
discard self . names . pop ( )
dec ( self . scopeDepth )
proc blockStmt ( self : Compiler , node : BlockStmt ) =
## Compiles block statements, which create a new
## local scope.
self . beginScope ( )
for decl in node . code :
self . declaration ( decl )
self . endScope ( )
proc ifStmt ( self : Compiler , node : IfStmt ) =
## Compiles if/else statements for conditional
## execution of code
self . expression ( node . condition )
var jumpCode : OpCode
if self . enableOptimizations :
jumpCode = JumpIfFalsePop
else :
jumpCode = JumpIfFalse
let jump = self . emitJump ( jumpCode )
if not self . enableOptimizations :
self . emitByte ( Pop )
self . statement ( node . thenBranch )
self . patchJump ( jump )
if node . elseBranch ! = nil :
let jump = self . emitJump ( JumpForwards )
self . statement ( node . elseBranch )
self . patchJump ( jump )
proc emitLoop ( self : Compiler , begin : int ) =
## Emits a JumpBackwards instruction with the correct
## jump offset
var offset : int
case OpCode ( self . chunk . code [ begin + 1 ] ) : # The jump instruction
of LongJumpForwards , LongJumpBackwards , LongJumpIfFalse ,
LongJumpIfFalsePop , LongJumpIfTrue :
offset = self . chunk . code . len ( ) - begin + 4
else :
offset = self . chunk . code . len ( ) - begin
if offset > uint16 . high ( ) . int :
if offset > 16777215 :
self . error ( " cannot jump more than 16777215 bytecode instructions " )
self . emitByte ( LongJumpBackwards )
self . emitBytes ( offset . toTriple ( ) )
else :
self . emitByte ( JumpBackwards )
self . emitBytes ( offset . toDouble ( ) )
proc whileStmt ( self : Compiler , node : WhileStmt ) =
## Compiles C-style while loops
let start = self . chunk . code . len ( )
self . expression ( node . condition )
let jump = self . emitJump ( JumpIfFalsePop )
self . statement ( node . body )
self . patchJump ( jump )
self . emitLoop ( start )
2022-04-21 15:25:29 +02:00
proc inferValueType ( self : Compiler , node : ASTNode ) : ASTNode =
## Infers the type of a given literal expression
case node . kind :
of listExpr :
return ListExpr ( node ) . valueType
of dictExpr :
# It's not important that we don't use
# valueType here, we just need to return
# a non-nil value so we don't error out
return DictExpr ( node ) . keyType
2022-04-26 09:29:59 +02:00
of intExpr , floatExpr , binExpr , octExpr , hexExpr :
var node = LiteralExpr ( node )
2022-04-21 15:25:29 +02:00
var size = node . token . lexeme . split ( " ' " )
if len ( size ) notin 1 .. 2 :
2022-04-26 09:29:59 +02:00
self . error ( " invalid state: inferValueType -> invalid size specifier " )
2022-04-21 15:25:29 +02:00
elif size . len ( ) = = 1 :
return newIdentExpr ( Token ( lexeme : " int " ) )
elif size [ 1 ] in [ " u64 " , " i64 " , " u32 " , " i32 " , " f64 " , " f32 " , " i32 " , " u32 " , " u8 " , " i8 " ] :
if size [ 1 ] . startsWith ( " u " ) :
size [ 1 ] = size [ 1 ] . strip ( true , false , { ' u ' } )
size [ 1 ] = & " uint{size[1]} "
elif size [ 1 ] . startsWith ( " i " ) :
size [ 1 ] = size [ 1 ] . strip ( true , false , { ' i ' } )
size [ 1 ] = & " int{size[1]} "
elif size [ 1 ] . startsWith ( " f " ) :
size [ 1 ] = size [ 1 ] . strip ( true , false , { ' f ' } )
2022-04-26 13:04:40 +02:00
size [ 1 ] = & " float{size[1]} "
2022-04-21 15:25:29 +02:00
return newIdentExpr ( Token ( lexeme : size [ 1 ] ) )
else :
self . error ( & " invalid type specifier ' {size[1]} ' for ' {size[0]} ' " )
return newIdentExpr ( Token ( lexeme : " int " ) )
2022-04-26 13:04:40 +02:00
of nilExpr :
return newIdentExpr ( Token ( lexeme : " nil " ) )
2022-04-21 15:25:29 +02:00
else :
discard # TODO
proc inferExprType ( self : Compiler , node : ASTNode ) : ASTNode =
## Infers the type of a given expression and
## returns it
case node . kind :
2022-04-21 15:58:33 +02:00
of identExpr :
var node = IdentExpr ( node )
2022-04-26 09:29:59 +02:00
var name = self . resolveStatic ( node )
if name = = nil :
return nil
return name . valueType
2022-04-21 15:25:29 +02:00
of unaryExpr :
return self . inferValueType ( UnaryExpr ( node ) . a )
of binaryExpr :
var node = BinaryExpr ( node )
2022-04-26 09:29:59 +02:00
var a = self . inferExprType ( node . a )
var b = self . inferExprType ( node . b )
if a = = nil or b = = nil :
return nil
2022-04-21 15:25:29 +02:00
return a
of { intExpr , hexExpr , binExpr , octExpr ,
strExpr , falseExpr , trueExpr , infExpr ,
nanExpr , floatExpr , nilExpr , listExpr ,
dictExpr , setExpr , tupleExpr
} :
return self . inferValueType ( node )
else :
discard # Unreachable
proc inferDeclType ( self : Compiler , node : Declaration ) : ASTNode =
## Infers the type of a given declaration if it's
## not already defined and returns it
case node . kind :
of funDecl :
var node = FunDecl ( node )
if node . returnType ! = nil :
return node . returnType
of NodeKind . varDecl :
var node = VarDecl ( node )
if node . valueType ! = nil :
return node . valueType
else :
return self . inferExprType ( node . value )
else :
return # Unreachable
2022-04-04 12:29:23 +02:00
proc expression ( self : Compiler , node : ASTNode ) =
## Compiles all expressions
2022-04-21 15:25:29 +02:00
if self . inferExprType ( node ) = = nil :
self . error ( " expression has no type " )
2022-04-04 12:29:23 +02:00
case node . kind :
of getItemExpr :
discard # TODO
# Note that for setItem and assign we don't convert
# the node to its true type because that type information
# would be lost in the call anyway. The differentiation
# happens in self.assignment
of setItemExpr , assignExpr :
self . assignment ( node )
of identExpr :
self . identifier ( IdentExpr ( node ) )
of unaryExpr :
# Unary expressions such as ~5 and -3
self . unary ( UnaryExpr ( node ) )
of groupingExpr :
# Grouping expressions like (2 + 1)
self . expression ( GroupingExpr ( node ) . expression )
of binaryExpr :
# Binary expressions such as 2 ^ 5 and 0.66 * 3.14
self . binary ( BinaryExpr ( node ) )
of intExpr , hexExpr , binExpr , octExpr , strExpr , falseExpr , trueExpr ,
2022-04-26 09:29:59 +02:00
infExpr , nanExpr , floatExpr , nilExpr , tupleExpr , setExpr , listExpr ,
dictExpr :
2022-04-04 12:29:23 +02:00
# Since all of these AST nodes mostly share
# the same overall structure, and the kind
# discriminant is enough to tell one
# from the other, why bother with
# specialized cases when one is enough?
self . literal ( node )
else :
self . error ( & " invalid AST node of kind {node.kind} at expression(): {node} (This is an internal error and most likely a bug) " )
proc awaitStmt ( self : Compiler , node : AwaitStmt ) =
## Compiles await statements. An await statement
## is like an await expression, but parsed in the
## context of statements for usage outside expressions,
## meaning it can be used standalone. It's basically the
## same as an await expression followed by a semicolon.
## Await expressions are the only native construct to
## run coroutines from within an already asynchronous
2022-04-26 09:29:59 +02:00
## context (which should be orchestrated by an event loop).
2022-04-04 12:29:23 +02:00
## They block in the caller until the callee returns
2022-04-07 13:02:23 +02:00
self . expression ( node . expression )
2022-04-04 12:29:23 +02:00
self . emitByte ( OpCode . Await )
proc deferStmt ( self : Compiler , node : DeferStmt ) =
## Compiles defer statements. A defer statement
2022-04-26 09:29:59 +02:00
## is executed right before its containing function
## exits (either because of a return or an exception)
2022-04-04 12:29:23 +02:00
let current = self . chunk . code . len
2022-04-07 13:02:23 +02:00
self . expression ( node . expression )
2022-04-04 12:29:23 +02:00
for i in countup ( current , self . chunk . code . high ( ) ) :
self . deferred . add ( self . chunk . code [ i ] )
self . chunk . code . del ( i )
proc returnStmt ( self : Compiler , node : ReturnStmt ) =
## Compiles return statements. An empty return
## implicitly returns nil
2022-04-26 09:29:59 +02:00
let returnType = self . inferExprType ( node . value )
2022-04-26 13:04:40 +02:00
if returnType = = nil and self . currentFunction . returnType ! = nil :
self . error ( & " expected return value of type ' {self.currentFunction.returnType.token.lexeme} ' , but expression has no type " )
elif self . currentFunction . returnType = = nil :
if node . value . kind ! = nilExpr :
self . error ( " non-nil return value is not allowed in functions without an explicit return type " )
else :
if returnType . token . lexeme ! = self . currentFunction . returnType . token . lexeme :
self . error ( & " expected return value of type ' {self.currentFunction.returnType.token.lexeme} ' , got ' {returnType.token.lexeme} ' instead " )
2022-04-04 12:29:23 +02:00
self . expression ( node . value )
self . emitByte ( OpCode . Return )
proc yieldStmt ( self : Compiler , node : YieldStmt ) =
## Compiles yield statements
self . expression ( node . expression )
self . emitByte ( OpCode . Yield )
proc raiseStmt ( self : Compiler , node : RaiseStmt ) =
## Compiles yield statements
self . expression ( node . exception )
self . emitByte ( OpCode . Raise )
proc continueStmt ( self : Compiler , node : ContinueStmt ) =
## Compiles continue statements. A continue statements
## jumps to the next iteration in a loop
if self . currentLoop . start < = 65535 :
self . emitByte ( Jump )
self . emitBytes ( self . currentLoop . start . toDouble ( ) )
else :
self . emitByte ( LongJump )
self . emitBytes ( self . currentLoop . start . toTriple ( ) )
proc breakStmt ( self : Compiler , node : BreakStmt ) =
## Compiles break statements. A continue statement
## jumps to the next iteration in a loop
# Emits dummy jump offset, this is
# patched later
discard self . emitJump ( OpCode . Break )
self . currentLoop . breakPos . add ( self . chunk . code . high ( ) - 4 )
if self . currentLoop . depth > self . scopeDepth :
# Breaking out of a loop closes its scope
self . endScope ( )
proc patchBreaks ( self : Compiler ) =
## Patches "break" opcodes with
## actual jumps. This is needed
## because the size of code
## to skip is not known before
## the loop is fully compiled
for brk in self . currentLoop . breakPos :
self . chunk . code [ brk ] = JumpForwards . uint8 ( )
self . patchJump ( brk )
proc assertStmt ( self : Compiler , node : AssertStmt ) =
## Compiles assert statements (raise
## AssertionError if the expression is falsey)
self . expression ( node . expression )
self . emitByte ( OpCode . Assert )
proc statement ( self : Compiler , node : ASTNode ) =
## Compiles all statements
case node . kind :
of exprStmt :
2022-04-21 15:25:29 +02:00
var expression = ExprStmt ( node ) . expression
self . expression ( expression )
2022-04-04 12:29:23 +02:00
self . emitByte ( Pop ) # Expression statements discard their value. Their main use case is side effects in function calls
of NodeKind . ifStmt :
self . ifStmt ( IfStmt ( node ) )
of NodeKind . assertStmt :
self . assertStmt ( AssertStmt ( node ) )
of NodeKind . raiseStmt :
self . raiseStmt ( RaiseStmt ( node ) )
of NodeKind . breakStmt :
self . breakStmt ( BreakStmt ( node ) )
of NodeKind . continueStmt :
self . continueStmt ( ContinueStmt ( node ) )
of NodeKind . returnStmt :
self . returnStmt ( ReturnStmt ( node ) )
of NodeKind . importStmt :
discard
of NodeKind . whileStmt , NodeKind . forStmt :
## Our parser already desugars for loops to
## while loops!
let loop = self . currentLoop
self . currentLoop = Loop ( start : self . chunk . code . len ( ) ,
depth : self . scopeDepth , breakPos : @ [ ] )
self . whileStmt ( WhileStmt ( node ) )
self . patchBreaks ( )
self . currentLoop = loop
of NodeKind . forEachStmt :
discard
of NodeKind . blockStmt :
self . blockStmt ( BlockStmt ( node ) )
of NodeKind . yieldStmt :
self . yieldStmt ( YieldStmt ( node ) )
of NodeKind . awaitStmt :
self . awaitStmt ( AwaitStmt ( node ) )
of NodeKind . deferStmt :
self . deferStmt ( DeferStmt ( node ) )
of NodeKind . tryStmt :
discard
else :
self . expression ( node )
2022-04-12 12:18:25 +02:00
proc varDecl ( self : Compiler , node : VarDecl ) =
## Compiles variable declarations
2022-04-26 13:04:40 +02:00
let kind = self . inferDeclType ( node )
if kind = = nil :
2022-04-12 12:18:25 +02:00
self . error ( & " Cannot determine the type of ' {node.name.token.lexeme} ' " )
self . expression ( node . value )
2022-04-26 13:04:40 +02:00
self . declareName ( node , IdentExpr ( kind ) )
2022-04-12 12:18:25 +02:00
2022-04-04 12:29:23 +02:00
proc funDecl ( self : Compiler , node : FunDecl ) =
## Compiles function declarations
2022-04-26 13:04:40 +02:00
2022-04-04 12:29:23 +02:00
# We store the current function
var function = self . currentFunction
self . currentFunction = node
# A function's code is just compiled linearly
# and then jumped over
let jmp = self . emitJump ( JumpForwards )
2022-04-26 09:29:59 +02:00
self . declareName ( node , IdentExpr ( node . returnType ) )
2022-04-04 12:29:23 +02:00
# Since the deferred array is a linear
# sequence of instructions and we want
# to keep track to whose function's each
# set of deferred instruction belongs,
# we record the length of the deferred
# array before compiling the function
# and use this info later to compile
# the try/finally block with the deferred
# code
var deferStart = self . deferred . len ( )
self . blockStmt ( BlockStmt ( node . body ) )
# Yup, we're done. That was easy, huh?
# But after all functions are just named
# scopes, and we compile them just like that:
# we declare their name and arguments (before
# their body so recursion works) and then just
# handle them as a block statement (which takes
# care of incrementing self.scopeDepth so locals
# are resolved properly). There's a need for a bit
# of boilerplate code to make closures work, but
# that's about it
2022-04-12 10:56:05 +02:00
self . emitBytes ( OpCode . Nil , OpCode . Return )
2022-04-04 12:29:23 +02:00
# Currently defer is not functional so we
# just pop the instructions
for i in countup ( deferStart , self . deferred . len ( ) , 1 ) :
self . deferred . delete ( i )
self . patchJump ( jmp )
# This makes us compile nested functions correctly
self . currentFunction = function
proc declaration ( self : Compiler , node : ASTNode ) =
## Compiles all declarations
case node . kind :
of NodeKind . varDecl :
self . varDecl ( VarDecl ( node ) )
of NodeKind . funDecl :
self . funDecl ( FunDecl ( node ) )
else :
self . statement ( node )
proc compile * ( self : Compiler , ast : seq [ ASTNode ] , file : string ) : Chunk =
## Compiles a sequence of AST nodes into a chunk
## object
self . chunk = newChunk ( )
self . ast = ast
self . file = file
self . names = @ [ ]
self . scopeDepth = 0
self . currentFunction = nil
2022-04-26 09:29:59 +02:00
self . currentModule = self . file
2022-04-04 12:29:23 +02:00
self . current = 0
while not self . done ( ) :
self . declaration ( self . step ( ) )
if self . ast . len ( ) > 0 :
# *Technically* an empty program is a valid program
self . endScope ( )
self . emitByte ( OpCode . Return ) # Exits the VM's main loop when used at the global scope
result = self . chunk
if self . ast . len ( ) > 0 and self . scopeDepth ! = - 1 :
2022-04-07 12:15:34 +02:00
self . error ( & " invalid state: invalid scopeDepth value (expected -1, got {self.scopeDepth}), did you forget to call endScope/beginScope? " )