2022-04-04 12:29:23 +02:00
# Copyright 2022 Mattia Giambirtone & All Contributors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import meta / token
import meta / ast
import meta / errors
import .. / config
import .. / util / multibyte
import strformat
import algorithm
import parseutils
2022-04-21 15:25:29 +02:00
import strutils
2022-05-07 10:48:01 +02:00
import sequtils
2022-04-04 12:29:23 +02:00
export ast
export token
export multibyte
2022-05-07 10:48:01 +02:00
type
TypeKind * = enum
## An enumeration of compile-time
## types
Int8 , UInt8 , Int16 , UInt16 , Int32 ,
UInt32 , Int64 , UInt64 , Float32 , Float64 ,
Char , Byte , String , Function , CustomType ,
Nil , Nan , Bool , Inf
Type * = ref object
## A wrapper around
## compile-time types
node * : ASTNode
case kind * : TypeKind :
of Function :
returnType * : Type
else :
discard
# This way we don't have recursive dependency issues
import meta / bytecode
export bytecode
2022-04-04 12:29:23 +02:00
type
Name = ref object
## A compile-time wrapper around
2022-05-04 14:27:15 +02:00
## statically resolved names
2022-05-07 10:48:01 +02:00
# Name of the identifier
name : IdentExpr
# Owner of the identifier (module)
owner : string
# Scope depth
depth : int
# Is this name private?
isPrivate : bool
# Is this a constant?
isConst : bool
# Can this name's value be mutated?
isLet : bool
# The name's type
valueType : Type
# For variables, the position in the bytecode
# where its StoreVar instruction was emitted.
# For functions, this marks where the function's
# code begins
codePos : int
2022-04-04 12:29:23 +02:00
Loop = object
## A "loop object" used
## by the compiler to emit
## appropriate jump offsets
## for continue and break
## statements
2022-05-07 10:48:01 +02:00
# Position in the bytecode where the loop starts
start : int
# Scope depth where the loop is located
depth : int
# Absolute jump offsets into our bytecode that we need to
# patch. Used for break statements
breakPos : seq [ int ]
2022-04-04 12:29:23 +02:00
Compiler * = ref object
2022-05-04 14:27:15 +02:00
## A wrapper around the Peon compiler's state
2022-04-04 12:29:23 +02:00
# The bytecode chunk where we write code to
chunk : Chunk
# The output of our parser (AST)
2022-05-18 13:32:32 +02:00
ast : seq [ Declaration ]
2022-04-04 12:29:23 +02:00
# The current AST node we're looking at
current : int
# The current file being compiled (used only for
# error reporting)
file : string
# Compile-time "simulation" of the stack at
# runtime to load variables that have stack
# behavior more efficiently
names : seq [ Name ]
# The current scope depth. If > 0, we're
# in a local scope, otherwise it's global
scopeDepth : int
# The current function being compiled
currentFunction : FunDecl
# Are optimizations turned on?
enableOptimizations * : bool
# The current loop being compiled (used to
# keep track of where to jump)
currentLoop : Loop
# The current module being compiled
# (used to restrict access to statically
# defined variables at compile time)
currentModule : string
# Each time a defer statement is
# compiled, its code is emitted
# here. Later, if there is any code
# to defer in the current function,
# funDecl will wrap the function's code
# inside an implicit try/finally block
# and add this code in the finally branch.
# This sequence is emptied each time a
2022-05-20 15:47:04 +02:00
# function declaration is compiled and stores only
2022-04-04 12:29:23 +02:00
# deferred code for the current function (may
# be empty)
deferred : seq [ uint8 ]
# List of closed-over variables
closedOver : seq [ IdentExpr ]
2022-04-11 14:41:20 +02:00
proc newCompiler * ( enableOptimizations : bool = true ) : Compiler =
2022-04-04 12:29:23 +02:00
## Initializes a new Compiler object
new ( result )
result . ast = @ [ ]
result . current = 0
result . file = " "
result . names = @ [ ]
result . scopeDepth = 0
result . currentFunction = nil
result . enableOptimizations = enableOptimizations
result . currentModule = " "
## Forward declarations
2022-05-04 14:27:15 +02:00
proc expression ( self : Compiler , node : Expression )
proc statement ( self : Compiler , node : Statement )
proc declaration ( self : Compiler , node : Declaration )
2022-04-04 12:29:23 +02:00
proc peek ( self : Compiler , distance : int = 0 ) : ASTNode
2022-04-21 15:25:29 +02:00
proc identifier ( self : Compiler , node : IdentExpr )
proc varDecl ( self : Compiler , node : VarDecl )
2022-05-04 14:27:15 +02:00
proc inferType ( self : Compiler , node : LiteralExpr ) : Type
proc inferType ( self : Compiler , node : Expression ) : Type
2022-04-04 12:29:23 +02:00
## End of forward declarations
2022-04-26 09:29:59 +02:00
## Public getter for nicer error formatting
2022-04-04 12:29:23 +02:00
proc getCurrentNode * ( self : Compiler ) : ASTNode = ( if self . current > =
self . ast . len ( ) : self . ast [ ^ 1 ] else : self . ast [ self . current - 1 ] )
## Utility functions
proc peek ( self : Compiler , distance : int = 0 ) : ASTNode =
## Peeks at the AST node at the given distance.
## If the distance is out of bounds, the last
## AST node in the tree is returned. A negative
## distance may be used to retrieve previously
## consumed AST nodes
if self . ast . high ( ) = = - 1 or self . current + distance > self . ast . high ( ) or
self . current + distance < 0 :
result = self . ast [ ^ 1 ]
else :
result = self . ast [ self . current + distance ]
proc done ( self : Compiler ) : bool =
## Returns true if the compiler is done
## compiling, false otherwise
result = self . current > self . ast . high ( )
2022-05-20 15:47:04 +02:00
proc error ( self : Compiler , message : string ) {. raises : [ CompileError , ValueError ] . } =
2022-04-04 12:29:23 +02:00
## Raises a formatted CompileError exception
var tok = self . getCurrentNode ( ) . token
raise newException ( CompileError , & " A fatal error occurred while compiling ' {self.file} ' , module ' {self.currentModule} ' line {tok.line} at ' {tok.lexeme} ' -> {message} " )
proc step ( self : Compiler ) : ASTNode =
## Steps to the next node and returns
## the consumed one
result = self . peek ( )
if not self . done ( ) :
self . current + = 1
proc emitByte ( self : Compiler , byt : OpCode | uint8 ) =
## Emits a single byte, writing it to
## the current chunk being compiled
when DEBUG_TRACE_COMPILER :
echo & " DEBUG - Compiler: Emitting { $byt } "
self . chunk . write ( uint8 byt , self . peek ( ) . token . line )
proc emitBytes ( self : Compiler , byt1 : OpCode | uint8 , byt2 : OpCode | uint8 ) =
2022-04-26 09:29:59 +02:00
## Emits multiple bytes instead of a single one. This is useful
2022-04-04 12:29:23 +02:00
## to emit operators along with their operands or for multi-byte
## instructions that are longer than one byte
self . emitByte ( uint8 byt1 )
self . emitByte ( uint8 byt2 )
proc emitBytes ( self : Compiler , bytarr : array [ 2 , uint8 ] ) =
## Handy helper method to write an array of 2 bytes into
## the current chunk, calling emitByte on each of its
## elements
self . emitBytes ( bytarr [ 0 ] , bytarr [ 1 ] )
2022-05-07 10:48:01 +02:00
proc emitBytes ( self : Compiler , bytarr : openarray [ uint8 ] ) =
2022-04-04 12:29:23 +02:00
## Handy helper method to write an array of 3 bytes into
## the current chunk, calling emitByte on each of its
## elements
2022-05-07 10:48:01 +02:00
for b in bytarr :
self . emitByte ( b )
2022-04-04 12:29:23 +02:00
2022-05-20 15:47:04 +02:00
proc makeConstant ( self : Compiler , val : Expression , typ : Type ) : array [ 3 , uint8 ] =
2022-04-04 12:29:23 +02:00
## Adds a constant to the current chunk's constant table
## and returns its index as a 3-byte array of uint8s
2022-05-20 15:47:04 +02:00
case typ . kind :
of UInt8 , Int8 :
result = self . chunk . writeConstant ( [ uint8 ( parseInt ( val . token . lexeme ) ) ] )
of Int16 , UInt16 :
result = self . chunk . writeConstant ( parseInt ( val . token . lexeme ) . toDouble ( ) )
of Int32 , UInt32 :
result = self . chunk . writeConstant ( parseInt ( val . token . lexeme ) . toQuad ( ) )
of Int64 , UInt64 :
result = self . chunk . writeConstant ( parseInt ( val . token . lexeme ) . toLong ( ) )
else :
discard
2022-04-04 12:29:23 +02:00
2022-05-07 10:48:01 +02:00
proc emitConstant ( self : Compiler , obj : Expression , kind : Type ) =
2022-04-04 12:29:23 +02:00
## Emits a LoadConstant instruction along
## with its operand
2022-05-04 14:27:15 +02:00
case self . inferType ( obj ) . kind :
2022-05-02 17:26:38 +02:00
of Int64 :
self . emitByte ( LoadInt64 )
else :
2022-05-18 13:32:32 +02:00
discard # TODO
2022-05-07 10:48:01 +02:00
self . emitBytes ( self . makeConstant ( obj , kind ) )
2022-04-04 12:29:23 +02:00
proc emitJump ( self : Compiler , opcode : OpCode ) : int =
## Emits a dummy jump offset to be patched later. Assumes
## the largest offset (emits 4 bytes, one for the given jump
## opcode, while the other 3 are for the jump offset which is set
## to the maximum unsigned 24 bit integer). If the shorter
## 16 bit alternative is later found to be better suited, patchJump
## will fix this. This function returns the absolute index into the
## chunk's bytecode array where the given placeholder instruction was written
self . emitByte ( opcode )
self . emitBytes ( ( 0xffffff ) . toTriple ( ) )
result = self . chunk . code . len ( ) - 4
proc patchJump ( self : Compiler , offset : int ) =
2022-05-18 13:32:32 +02:00
## Patches a previously emitted relative
2022-05-01 13:07:50 +02:00
## jump using emitJump. Since emitJump assumes
2022-04-04 12:29:23 +02:00
## a long jump, this also shrinks the jump
## offset and changes the bytecode instruction if possible
## (i.e. jump is in 16 bit range), but the converse is also
## true (i.e. it might change a regular jump into a long one)
2022-05-16 19:23:38 +02:00
var jump : int = self . chunk . code . len ( ) - offset
2022-04-04 12:29:23 +02:00
if jump > 16777215 :
2022-05-01 13:07:50 +02:00
self . error ( " cannot jump more than 16777216 bytecode instructions " )
2022-04-04 12:29:23 +02:00
if jump < uint16 . high ( ) . int :
case OpCode ( self . chunk . code [ offset ] ) :
of LongJumpForwards :
self . chunk . code [ offset ] = JumpForwards . uint8 ( )
of LongJumpBackwards :
self . chunk . code [ offset ] = JumpBackwards . uint8 ( )
of LongJumpIfFalse :
self . chunk . code [ offset ] = JumpIfFalse . uint8 ( )
of LongJumpIfFalsePop :
self . chunk . code [ offset ] = JumpIfFalsePop . uint8 ( )
of LongJumpIfFalseOrPop :
self . chunk . code [ offset ] = JumpIfFalseOrPop . uint8 ( )
else :
discard
2022-05-18 13:32:32 +02:00
self . chunk . code . delete ( offset + 1 ) # Discards the first 8 bits of the jump offset (which are empty)
2022-04-26 13:04:40 +02:00
let offsetArray = ( jump - 1 ) . toDouble ( ) # -1 since we got rid of 1 byte!
2022-04-04 12:29:23 +02:00
self . chunk . code [ offset + 1 ] = offsetArray [ 0 ]
self . chunk . code [ offset + 2 ] = offsetArray [ 1 ]
else :
case OpCode ( self . chunk . code [ offset ] ) :
of JumpForwards :
self . chunk . code [ offset ] = LongJumpForwards . uint8 ( )
of JumpBackwards :
self . chunk . code [ offset ] = LongJumpBackwards . uint8 ( )
of JumpIfFalse :
self . chunk . code [ offset ] = LongJumpIfFalse . uint8 ( )
of JumpIfFalsePop :
self . chunk . code [ offset ] = LongJumpIfFalsePop . uint8 ( )
of JumpIfFalseOrPop :
self . chunk . code [ offset ] = LongJumpIfFalseOrPop . uint8 ( )
else :
discard
let offsetArray = jump . toTriple ( )
self . chunk . code [ offset + 1 ] = offsetArray [ 0 ]
self . chunk . code [ offset + 2 ] = offsetArray [ 1 ]
self . chunk . code [ offset + 3 ] = offsetArray [ 2 ]
2022-05-02 17:26:38 +02:00
proc resolve ( self : Compiler , name : IdentExpr ,
depth : int = self . scopeDepth ) : Name =
## Traverses self.names backwards and returns the
## first name object with the given name. Returns
## nil when the name can't be found. This function
## has no concept of scope depth, because getStackPos
## does that job. Note that private names declared in
## other modules will not be resolved!
for obj in reversed ( self . names ) :
if obj . name . token . lexeme = = name . token . lexeme :
if obj . isPrivate and obj . owner ! = self . currentModule :
2022-05-18 13:32:32 +02:00
continue # There may be a name in the current module that
2022-05-02 17:26:38 +02:00
# matches, so we skip this
return obj
return nil
2022-05-18 13:32:32 +02:00
proc getStackPos ( self : Compiler , name : IdentExpr ,
depth : int = self . scopeDepth ) : tuple [ closedOver : bool , pos : int ] =
2022-05-02 17:26:38 +02:00
## Iterates the internal list of declared names backwards and
## returns a tuple (closedOver, pos) that tells the caller whether the
## the name is to be emitted as a closure as well as its predicted
2022-05-18 13:32:32 +02:00
## stack/closure array position. Returns (false, -1) if the variable's
2022-05-02 17:26:38 +02:00
## location can not be determined at compile time (this is an error!).
## Note that private names declared in other modules will not be resolved!
var i : int = self . names . high ( )
for variable in reversed ( self . names ) :
if name . name . lexeme = = variable . name . name . lexeme :
if variable . isPrivate and variable . owner ! = self . currentModule :
continue
if variable . depth = = depth or variable . depth = = 0 :
# variable.depth == 0 for globals!
return ( false , i )
elif variable . depth > 0 :
for j , closure in reversed ( self . closedOver ) :
if closure . name . lexeme = = name . name . lexeme :
return ( true , j )
dec ( i )
return ( false , - 1 )
2022-05-18 13:32:32 +02:00
proc detectClosureVariable ( self : Compiler , name : IdentExpr ,
depth : int = self . scopeDepth ) =
2022-05-02 17:26:38 +02:00
## Detects if the given name is used in a local scope deeper
## than the given one and modifies the code emitted for it
## to store it as a closure variable if it is. Does nothing if the name
## hasn't been declared yet or is unreachable (for example if it's
2022-05-04 14:01:38 +02:00
## declared as private in another module). This function must be called
2022-05-18 13:32:32 +02:00
## each time a name is referenced in order for closed-over variables
## to be emitted properly, otherwise the runtime may behave
2022-05-02 17:26:38 +02:00
## unpredictably or crash
let entry = self . resolve ( name )
if entry = = nil :
return
if entry . depth < depth :
# Ding! The given name is closed over: we need to
# change the StoreVar instruction that created this
# name entry into a StoreHeap. We don't need to change
# other pieces of code because self.identifier() already
# emits LoadHeap if it detects the variable is closed over,
# whether or not this function is called
self . closedOver . add ( entry . name )
if self . closedOver . len ( ) > = 16777216 :
2022-05-04 14:01:38 +02:00
self . error ( " too many consecutive closed-over variables (max is 16777216) " )
2022-05-02 17:26:38 +02:00
let idx = self . closedOver . high ( ) . toTriple ( )
self . chunk . code [ entry . codePos ] = StoreHeap . uint8
self . chunk . code [ entry . codePos + 1 ] = idx [ 0 ]
self . chunk . code [ entry . codePos + 2 ] = idx [ 1 ]
self . chunk . code [ entry . codePos + 3 ] = idx [ 2 ]
2022-05-07 10:48:01 +02:00
proc compareTypes ( self : Compiler , a , b : Type ) : bool =
## Compares two type objects
## for equality (works with nil!)
if a = = nil :
return b = = nil
elif b = = nil :
return a = = nil
if a . kind ! = b . kind :
return false
case a . kind :
of Int8 , UInt8 , Int16 , UInt16 , Int32 ,
UInt32 , Int64 , UInt64 , Float32 , Float64 ,
Char , Byte , String , Nil , Nan , Bool , Inf :
return true
of Function :
2022-05-18 13:32:32 +02:00
let
2022-05-07 10:48:01 +02:00
a = FunDecl ( a . node )
b = FunDecl ( b . node )
if a . name . token . lexeme ! = b . name . token . lexeme :
return false
elif a . arguments . len ( ) ! = b . arguments . len ( ) :
return false
2022-05-18 13:32:32 +02:00
elif not self . compareTypes ( self . inferType ( a . returnType ) ,
self . inferType ( b . returnType ) ) :
2022-05-07 10:48:01 +02:00
return false
for ( argA , argB ) in zip ( a . arguments , b . arguments ) :
if argA . mutable ! = argB . mutable :
return false
elif argA . isRef ! = argB . isRef :
return false
elif argA . isPtr ! = argB . isPtr :
return false
2022-05-18 13:32:32 +02:00
elif not self . compareTypes ( self . inferType ( argA . valueType ) ,
self . inferType ( argB . valueType ) ) :
2022-05-07 10:48:01 +02:00
return false
return true
else :
discard
proc toIntrinsic ( name : string ) : Type =
2022-05-18 13:32:32 +02:00
## Converts a string to an intrinsic
2022-05-07 10:48:01 +02:00
## type if it is valid and returns nil
2022-05-18 13:32:32 +02:00
## otherwise
2022-05-07 10:48:01 +02:00
if name in [ " int " , " int64 " , " i64 " ] :
return Type ( kind : Int64 )
elif name in [ " uint64 " , " u64 " ] :
return Type ( kind : UInt64 )
elif name in [ " int32 " , " i32 " ] :
return Type ( kind : Int32 )
elif name in [ " uint32 " , " u32 " ] :
return Type ( kind : UInt32 )
elif name in [ " int16 " , " i16 " ] :
return Type ( kind : Int16 )
elif name in [ " uint16 " , " u16 " ] :
return Type ( kind : UInt16 )
elif name in [ " int8 " , " i8 " ] :
return Type ( kind : Int8 )
elif name in [ " uint8 " , " u8 " ] :
return Type ( kind : UInt8 )
elif name in [ " f64 " , " float " , " float64 " ] :
return Type ( kind : Float64 )
elif name in [ " f32 " , " float32 " ] :
return Type ( kind : Float32 )
elif name = = " byte " :
return Type ( kind : Byte )
elif name = = " char " :
return Type ( kind : Char )
elif name = = " nan " :
return Type ( kind : Nan )
elif name = = " nil " :
return Type ( kind : Nil )
elif name = = " inf " :
return Type ( kind : Inf )
elif name = = " bool " :
return Type ( kind : Bool )
else :
return nil
2022-05-02 17:26:38 +02:00
2022-05-04 14:27:15 +02:00
proc inferType ( self : Compiler , node : LiteralExpr ) : Type =
2022-05-02 17:26:38 +02:00
## Infers the type of a given literal expression
2022-05-16 19:23:38 +02:00
if node = = nil :
return nil
2022-05-02 17:26:38 +02:00
case node . kind :
of intExpr , binExpr , octExpr , hexExpr :
let size = node . token . lexeme . split ( " ' " )
if len ( size ) notin 1 .. 2 :
self . error ( " invalid state: inferValueType -> invalid size specifier (This is an internal error and most likely a bug!) " )
if size . len ( ) = = 1 :
2022-05-07 10:48:01 +02:00
return Type ( node : node , kind : Int64 )
2022-05-02 17:26:38 +02:00
let typ = size [ 1 ] . toIntrinsic ( )
2022-05-07 10:48:01 +02:00
if not self . compareTypes ( typ , nil ) :
2022-05-02 17:26:38 +02:00
return typ
else :
self . error ( & " invalid type specifier ' {size[1]} ' for int " )
of floatExpr :
let size = node . token . lexeme . split ( " ' " )
if len ( size ) notin 1 .. 2 :
self . error ( " invalid state: inferValueType -> invalid size specifier (This is an internal error and most likely a bug!) " )
if size . len ( ) = = 1 or size [ 1 ] = = " f64 " :
2022-05-07 10:48:01 +02:00
return Type ( node : node , kind : Float64 )
2022-05-02 17:26:38 +02:00
let typ = size [ 1 ] . toIntrinsic ( )
2022-05-07 10:48:01 +02:00
if not self . compareTypes ( typ , nil ) :
2022-05-02 17:26:38 +02:00
return typ
else :
self . error ( & " invalid type specifier ' {size[1]} ' for float " )
of nilExpr :
2022-05-07 10:48:01 +02:00
return Type ( node : node , kind : Nil )
2022-05-02 17:26:38 +02:00
of trueExpr :
2022-05-07 10:48:01 +02:00
return Type ( node : node , kind : Bool )
2022-05-02 17:26:38 +02:00
of falseExpr :
2022-05-07 10:48:01 +02:00
return Type ( node : node , kind : Bool )
2022-05-02 17:26:38 +02:00
of nanExpr :
2022-05-07 10:48:01 +02:00
return Type ( node : node , kind : TypeKind . Nan )
2022-05-02 17:26:38 +02:00
of infExpr :
2022-05-07 10:48:01 +02:00
return Type ( node : node , kind : TypeKind . Inf )
2022-05-02 17:26:38 +02:00
else :
2022-05-18 13:32:32 +02:00
discard # TODO
2022-05-02 17:26:38 +02:00
2022-05-18 13:32:32 +02:00
proc toIntrinsic ( self : Compiler , typ : Expression ) : Type =
## Gets an expression's
2022-05-07 10:48:01 +02:00
## intrinsic type, if possible
if typ = = nil :
return nil
case typ . kind :
of trueExpr , falseExpr , intExpr , floatExpr :
return typ . token . lexeme . toIntrinsic ( )
of identExpr :
let inferred = self . inferType ( typ )
if inferred = = nil :
return typ . token . lexeme . toIntrinsic ( )
return inferred
else :
discard
2022-05-04 14:27:15 +02:00
proc inferType ( self : Compiler , node : Expression ) : Type =
2022-05-02 17:26:38 +02:00
## Infers the type of a given expression and
## returns it
2022-05-16 19:23:38 +02:00
if node = = nil :
return nil
2022-05-02 17:26:38 +02:00
case node . kind :
of identExpr :
2022-05-07 10:48:01 +02:00
let node = IdentExpr ( node )
let name = self . resolve ( node )
2022-05-04 14:27:15 +02:00
if name ! = nil :
return name . valueType
2022-05-07 10:48:01 +02:00
else :
return node . name . lexeme . toIntrinsic ( )
2022-05-02 17:26:38 +02:00
of unaryExpr :
2022-05-04 14:27:15 +02:00
return self . inferType ( UnaryExpr ( node ) . a )
2022-05-02 17:26:38 +02:00
of binaryExpr :
2022-05-04 14:27:15 +02:00
let node = BinaryExpr ( node )
var a = self . inferType ( node . a )
var b = self . inferType ( node . b )
2022-05-07 10:48:01 +02:00
if not self . compareTypes ( a , b ) :
2022-05-02 17:26:38 +02:00
return nil
return a
2022-05-18 13:32:32 +02:00
of { intExpr , hexExpr , binExpr , octExpr ,
strExpr , falseExpr , trueExpr , infExpr ,
2022-05-04 14:01:38 +02:00
nanExpr , floatExpr , nilExpr
2022-05-02 17:26:38 +02:00
} :
2022-05-04 14:27:15 +02:00
return self . inferType ( LiteralExpr ( node ) )
2022-05-02 17:26:38 +02:00
else :
2022-05-18 13:32:32 +02:00
discard # Unreachable
2022-05-02 17:26:38 +02:00
proc typeToStr ( self : Compiler , typ : Type ) : string =
2022-05-04 14:01:38 +02:00
## Returns the string representation of a
## type object
2022-05-02 17:26:38 +02:00
case typ . kind :
2022-05-04 14:01:38 +02:00
of Int8 , UInt8 , Int16 , UInt16 , Int32 ,
UInt32 , Int64 , UInt64 , Float32 , Float64 ,
2022-05-18 13:32:32 +02:00
Char , Byte , String , Nil , TypeKind . Nan , Bool ,
2022-05-04 14:01:38 +02:00
TypeKind . Inf :
2022-05-02 17:26:38 +02:00
return ( $ typ . kind ) . toLowerAscii ( )
of Function :
result = " function ( "
case typ . node . kind :
of funDecl :
var node = FunDecl ( typ . node )
for i , argument in node . arguments :
2022-05-04 14:27:15 +02:00
result & = & " {argument.name.token.lexeme}: {self.typeToStr(self.inferType(argument.name))} "
2022-05-07 10:48:01 +02:00
if i < node . arguments . len ( ) - 1 :
2022-05-02 17:26:38 +02:00
result & = " , "
result & = " ) "
of lambdaExpr :
var node = LambdaExpr ( typ . node )
for i , argument in node . arguments :
result & = & " {argument.name.token.lexeme}: {argument.valueType} "
2022-05-07 10:48:01 +02:00
if i < node . arguments . len ( ) - 1 :
2022-05-02 17:26:38 +02:00
result & = " , "
result & = " ) "
else :
2022-05-18 13:32:32 +02:00
discard # Unreachable
2022-05-07 10:48:01 +02:00
result & = & " : {self.typeToStr(typ.returnType)} "
2022-05-02 17:26:38 +02:00
else :
discard
2022-05-18 13:32:32 +02:00
2022-05-02 17:26:38 +02:00
2022-05-07 10:48:01 +02:00
proc inferType ( self : Compiler , node : Declaration ) : Type =
## Infers the type of a given declaration
## and returns it
2022-05-16 19:23:38 +02:00
if node = = nil :
return nil
2022-05-07 10:48:01 +02:00
case node . kind :
of funDecl :
var node = FunDecl ( node )
let resolved = self . resolve ( node . name )
if resolved ! = nil :
2022-05-21 12:20:12 +02:00
echo resolved [ ]
2022-05-07 10:48:01 +02:00
return resolved . valueType
of NodeKind . varDecl :
var node = VarDecl ( node )
let resolved = self . resolve ( node . name )
if resolved ! = nil :
return resolved . valueType
else :
return self . inferType ( node . value )
2022-05-02 17:26:38 +02:00
else :
2022-05-18 13:32:32 +02:00
return # Unreachable
2022-05-04 14:01:38 +02:00
2022-04-04 12:29:23 +02:00
## End of utility functions
2022-05-07 10:48:01 +02:00
2022-04-04 12:29:23 +02:00
proc literal ( self : Compiler , node : ASTNode ) =
## Emits instructions for literals such
## as singletons, strings, numbers and
## collections
case node . kind :
of trueExpr :
2022-05-07 10:48:01 +02:00
self . emitByte ( LoadTrue )
2022-04-04 12:29:23 +02:00
of falseExpr :
2022-05-07 10:48:01 +02:00
self . emitByte ( LoadFalse )
2022-04-04 12:29:23 +02:00
of nilExpr :
2022-05-07 10:48:01 +02:00
self . emitByte ( LoadNil )
2022-04-04 12:29:23 +02:00
of infExpr :
2022-05-07 10:48:01 +02:00
self . emitByte ( LoadInf )
2022-04-04 12:29:23 +02:00
of nanExpr :
2022-05-07 10:48:01 +02:00
self . emitByte ( LoadNan )
2022-04-04 12:29:23 +02:00
of strExpr :
2022-05-07 10:48:01 +02:00
self . emitConstant ( LiteralExpr ( node ) , Type ( kind : String ) )
# TODO: Take size specifier into account!
2022-04-04 12:29:23 +02:00
of intExpr :
var x : int
var y = IntExpr ( node )
try :
2022-04-26 09:29:59 +02:00
discard parseInt ( y . literal . lexeme , x )
2022-04-04 12:29:23 +02:00
except ValueError :
self . error ( " integer value out of range " )
2022-05-07 10:48:01 +02:00
self . emitConstant ( y , Type ( kind : Int64 ) )
2022-04-04 12:29:23 +02:00
of hexExpr :
var x : int
var y = HexExpr ( node )
try :
2022-04-26 09:29:59 +02:00
discard parseHex ( y . literal . lexeme , x )
2022-04-04 12:29:23 +02:00
except ValueError :
self . error ( " integer value out of range " )
2022-05-18 13:32:32 +02:00
let node = newIntExpr ( Token ( lexeme : $ x , line : y . token . line ,
pos : ( start : y . token . pos . start ,
2022-05-07 10:48:01 +02:00
stop : y . token . pos . start + len ( $ x ) )
2022-05-18 13:32:32 +02:00
)
)
2022-05-07 10:48:01 +02:00
self . emitConstant ( node , Type ( kind : Int64 ) )
2022-04-04 12:29:23 +02:00
of binExpr :
var x : int
var y = BinExpr ( node )
try :
2022-04-26 09:29:59 +02:00
discard parseBin ( y . literal . lexeme , x )
2022-04-04 12:29:23 +02:00
except ValueError :
self . error ( " integer value out of range " )
2022-05-18 13:32:32 +02:00
let node = newIntExpr ( Token ( lexeme : $ x , line : y . token . line ,
pos : ( start : y . token . pos . start ,
2022-05-07 10:48:01 +02:00
stop : y . token . pos . start + len ( $ x ) )
2022-05-18 13:32:32 +02:00
)
)
2022-05-07 10:48:01 +02:00
self . emitConstant ( node , Type ( kind : Int64 ) )
2022-04-04 12:29:23 +02:00
of octExpr :
var x : int
var y = OctExpr ( node )
try :
2022-04-26 09:29:59 +02:00
discard parseOct ( y . literal . lexeme , x )
2022-04-04 12:29:23 +02:00
except ValueError :
self . error ( " integer value out of range " )
2022-05-18 13:32:32 +02:00
let node = newIntExpr ( Token ( lexeme : $ x , line : y . token . line ,
pos : ( start : y . token . pos . start ,
2022-05-07 10:48:01 +02:00
stop : y . token . pos . start + len ( $ x ) )
2022-05-18 13:32:32 +02:00
)
)
2022-05-07 10:48:01 +02:00
self . emitConstant ( node , Type ( kind : Int64 ) )
2022-04-04 12:29:23 +02:00
of floatExpr :
var x : float
var y = FloatExpr ( node )
try :
2022-04-26 09:29:59 +02:00
discard parseFloat ( y . literal . lexeme , x )
2022-04-04 12:29:23 +02:00
except ValueError :
self . error ( " floating point value out of range " )
2022-05-07 10:48:01 +02:00
self . emitConstant ( y , Type ( kind : Float64 ) )
2022-04-04 12:29:23 +02:00
of awaitExpr :
var y = AwaitExpr ( node )
2022-04-07 13:02:23 +02:00
self . expression ( y . expression )
2022-04-04 12:29:23 +02:00
self . emitByte ( OpCode . Await )
else :
2022-05-01 13:07:50 +02:00
self . error ( & " invalid AST node of kind {node.kind} at literal(): {node} (This is an internal error and most likely a bug!) " )
2022-04-04 12:29:23 +02:00
proc unary ( self : Compiler , node : UnaryExpr ) =
2022-05-07 10:48:01 +02:00
## Compiles unary expressions such as decimal
## and bitwise negation
2022-05-21 12:20:12 +02:00
self . expression ( node . a ) # Pushes the operand onto the stack
2022-04-04 12:29:23 +02:00
proc binary ( self : Compiler , node : BinaryExpr ) =
## Compiles all binary expressions
2022-05-01 13:07:50 +02:00
2022-04-04 12:29:23 +02:00
# These two lines prepare the stack by pushing the
# opcode's operands onto it
self . expression ( node . a )
self . expression ( node . b )
2022-05-01 13:07:50 +02:00
# TODO: Find implementation of
# the given operator and call it
2022-04-04 12:29:23 +02:00
case node . operator . kind :
2022-05-07 10:48:01 +02:00
of NoMatch :
2022-05-01 13:07:50 +02:00
# a and b
2022-04-04 12:29:23 +02:00
self . expression ( node . a )
var jump : int
if self . enableOptimizations :
jump = self . emitJump ( JumpIfFalseOrPop )
else :
jump = self . emitJump ( JumpIfFalse )
self . emitByte ( Pop )
self . expression ( node . b )
self . patchJump ( jump )
2022-05-07 10:48:01 +02:00
of EndOfFile :
2022-05-01 13:07:50 +02:00
# a or b
2022-04-04 12:29:23 +02:00
self . expression ( node . a )
let jump = self . emitJump ( JumpIfTrue )
self . expression ( node . b )
self . patchJump ( jump )
else :
2022-05-01 13:07:50 +02:00
self . error ( & " invalid AST node of kind {node.kind} at binary(): {node} (This is an internal error and most likely a bug!) " )
2022-04-04 12:29:23 +02:00
2022-05-02 17:26:38 +02:00
proc declareName ( self : Compiler , node : Declaration ) =
2022-04-27 16:03:48 +02:00
## Statically declares a name into the current scope
2022-04-04 12:29:23 +02:00
case node . kind :
2022-04-21 15:25:29 +02:00
of NodeKind . varDecl :
2022-04-04 12:29:23 +02:00
var node = VarDecl ( node )
2022-04-26 09:29:59 +02:00
# Creates a new Name entry so that self.identifier emits the proper stack offset
2022-04-21 15:25:29 +02:00
if self . names . high ( ) > 16777215 :
# If someone ever hits this limit in real-world scenarios, I swear I'll
# slap myself 100 times with a sign saying "I'm dumb". Mark my words
2022-04-26 16:22:23 +02:00
self . error ( " cannot declare more than 16777216 variables at a time " )
2022-05-18 13:32:32 +02:00
self . names . add ( Name ( depth : self . scopeDepth ,
2022-04-27 16:03:48 +02:00
name : node . name ,
2022-04-26 09:29:59 +02:00
isPrivate : node . isPrivate ,
owner : self . currentModule ,
isConst : node . isConst ,
2022-05-18 13:32:32 +02:00
valueType : Type ( kind : self . inferType (
node . value ) . kind , node : node ) ,
2022-04-29 23:04:53 +02:00
codePos : self . chunk . code . len ( ) ,
2022-05-02 17:26:38 +02:00
isLet : node . isLet ) )
2022-04-21 15:25:29 +02:00
self . emitByte ( StoreVar )
self . emitBytes ( self . names . high ( ) . toTriple ( ) )
2022-05-01 13:07:50 +02:00
of NodeKind . funDecl :
2022-04-04 12:29:23 +02:00
var node = FunDecl ( node )
# Declares the function's name in the
2022-05-01 13:07:50 +02:00
# current scope but no StoreVar is emitted
2022-05-07 10:48:01 +02:00
# because the name is only useful at compile time.
# TODO: Maybe emit some optional debugging
# metadata to let the VM know where a function's
# code begins and ends (similar to what gcc does with
# CFI in object files) to build stack traces
2022-04-27 16:03:48 +02:00
self . names . add ( Name ( depth : self . scopeDepth ,
isPrivate : node . isPrivate ,
isConst : false ,
owner : self . currentModule ,
2022-05-18 13:32:32 +02:00
valueType : Type ( kind : Function , node : node ,
returnType : self . inferType (
node . returnType ) ) ,
2022-05-07 10:48:01 +02:00
codePos : self . chunk . code . len ( ) ,
2022-04-29 23:04:53 +02:00
name : node . name ,
2022-05-02 17:26:38 +02:00
isLet : false ) )
2022-04-04 12:29:23 +02:00
for argument in node . arguments :
if self . names . high ( ) > 16777215 :
2022-04-26 16:22:23 +02:00
self . error ( " cannot declare more than 16777216 variables at a time " )
2022-05-18 13:32:32 +02:00
self . names . add ( Name ( depth : self . scopeDepth + 1 ,
isPrivate : true ,
owner : self . currentModule ,
isConst : false ,
name : argument . name ,
2022-05-07 10:48:01 +02:00
valueType : nil ,
2022-04-29 23:04:53 +02:00
codePos : self . chunk . code . len ( ) ,
2022-05-02 17:26:38 +02:00
isLet : false ) )
2022-05-07 10:48:01 +02:00
self . names [ ^ 1 ] . valueType = self . inferType ( argument . valueType )
self . names [ ^ 1 ] . valueType . node = argument . name
2022-05-16 19:31:17 +02:00
self . emitByte ( LoadVar )
2022-04-04 12:29:23 +02:00
self . emitBytes ( self . names . high ( ) . toTriple ( ) )
else :
2022-05-18 13:32:32 +02:00
discard # Unreachable
2022-04-27 16:03:48 +02:00
2022-04-04 12:29:23 +02:00
proc identifier ( self : Compiler , node : IdentExpr ) =
## Compiles access to identifiers
2022-04-27 16:03:48 +02:00
let s = self . resolve ( node )
if s = = nil :
2022-04-21 15:25:29 +02:00
self . error ( & " reference to undeclared name ' {node.token.lexeme} ' " )
2022-04-27 16:03:48 +02:00
elif s . isConst :
# Constants are emitted as, you guessed it, LoadConstant instructions
# no matter the scope depth. If optimizations are enabled, the compiler
2022-04-29 23:04:53 +02:00
# will reuse the same constant every time it is referenced instead of
# allocating a new one each time
2022-05-07 10:48:01 +02:00
self . emitConstant ( node , self . inferType ( node ) )
2022-04-27 16:03:48 +02:00
else :
self . detectClosureVariable ( s . name )
let t = self . getStackPos ( node )
let index = t . pos
2022-05-18 13:32:32 +02:00
# We don't check if index is -1 because if it
2022-04-27 16:03:48 +02:00
# were, self.resolve() would have returned nil
if not t . closedOver :
# Static name resolution, loads value at index in the stack. Very fast. Much wow.
self . emitByte ( LoadVar )
self . emitBytes ( index . toTriple ( ) )
else :
if self . closedOver . len ( ) = = 0 :
self . error ( " error: closure variable array is empty but LoadHeap would be emitted (this is an internal error and most likely a bug) " )
# Heap-allocated closure variable. Stored in a separate "closure array" in the VM that does not have stack semantics.
2022-05-18 13:32:32 +02:00
# This makes closures work as expected and is not comparatively slower than indexing our stack (since they're both
2022-04-27 16:03:48 +02:00
# dynamic arrays at runtime anyway)
self . emitByte ( LoadHeap )
self . emitBytes ( self . closedOver . high ( ) . toTriple ( ) )
2022-04-04 12:29:23 +02:00
2022-05-07 10:48:01 +02:00
proc findByName ( self : Compiler , name : string ) : seq [ Name ] =
## Looks for objects that have been already declared
2022-05-20 15:47:04 +02:00
## with the given name. Returns all objects that apply
2022-05-07 10:48:01 +02:00
for obj in reversed ( self . names ) :
if obj . name . token . lexeme = = name :
result . add ( obj )
proc findByType ( self : Compiler , name : string , kind : Type ) : seq [ Name ] =
## Looks for objects that have already been declared
## with the given name and type
for obj in self . findByName ( name ) :
if self . compareTypes ( obj . valueType , kind ) :
result . add ( obj )
2022-04-04 12:29:23 +02:00
proc assignment ( self : Compiler , node : ASTNode ) =
## Compiles assignment expressions
case node . kind :
of assignExpr :
2022-05-04 14:27:15 +02:00
let node = AssignExpr ( node )
2022-05-01 13:07:50 +02:00
let name = IdentExpr ( node . name )
2022-04-27 16:03:48 +02:00
let r = self . resolve ( name )
2022-04-29 23:04:53 +02:00
if r = = nil :
2022-05-04 14:27:15 +02:00
self . error ( & " assignment to undeclared name ' {name.token.lexeme} ' " )
2022-04-29 23:04:53 +02:00
elif r . isConst :
2022-05-07 10:48:01 +02:00
self . error ( & " cannot assign to ' {name.token.lexeme} ' (constant) " )
2022-04-29 23:04:53 +02:00
elif r . isLet :
2022-05-04 14:27:15 +02:00
self . error ( & " cannot reassign ' {name.token.lexeme} ' " )
2022-04-04 12:29:23 +02:00
self . expression ( node . value )
2022-04-27 16:03:48 +02:00
let t = self . getStackPos ( name )
2022-04-26 09:29:59 +02:00
let index = t . pos
2022-04-04 12:29:23 +02:00
if index ! = - 1 :
2022-04-26 09:29:59 +02:00
if not t . closedOver :
self . emitByte ( StoreVar )
else :
self . emitByte ( StoreHeap )
2022-04-04 12:29:23 +02:00
self . emitBytes ( index . toTriple ( ) )
else :
2022-04-21 15:25:29 +02:00
self . error ( & " reference to undeclared name ' {node.token.lexeme} ' " )
2022-04-04 12:29:23 +02:00
of setItemExpr :
2022-05-07 10:48:01 +02:00
let node = SetItemExpr ( node )
let typ = self . inferType ( node )
if typ = = nil :
self . error ( & " cannot determine the type of ' {node.name.token.lexeme} ' " )
2022-04-04 12:29:23 +02:00
# TODO
else :
self . error ( & " invalid AST node of kind {node.kind} at assignment(): {node} (This is an internal error and most likely a bug) " )
proc beginScope ( self : Compiler ) =
## Begins a new local scope by incrementing the current
## scope's depth
inc ( self . scopeDepth )
proc endScope ( self : Compiler ) =
## Ends the current local scope
2022-05-20 15:47:04 +02:00
if self . scopeDepth = = 0 :
self . error ( " cannot call endScope with scopeDepth == 0 (This is an internal error and most likely a bug) " )
dec ( self . scopeDepth )
2022-04-04 12:29:23 +02:00
var popped : int = 0
2022-05-20 15:47:04 +02:00
for i , ident in reversed ( self . names ) :
2022-04-04 12:29:23 +02:00
if ident . depth > self . scopeDepth :
inc ( popped )
2022-05-20 15:47:04 +02:00
self . names . delete ( self . names . len ( ) - i )
2022-04-04 12:29:23 +02:00
if not self . enableOptimizations :
# All variables with a scope depth larger than the current one
# are now out of scope. Begone, you're now homeless!
self . emitByte ( Pop )
if self . enableOptimizations and popped > 1 :
# If we're popping less than 65535 variables, then
# we can emit a PopN instruction. This is true for
# 99.99999% of the use cases of the language (who the
# hell is going to use 65 THOUSAND local variables?), but
2022-05-04 14:27:15 +02:00
# if you'll ever use more then Peon will emit a PopN instruction
2022-04-04 12:29:23 +02:00
# for the first 65 thousand and change local variables and then
# emit another batch of plain ol' Pop instructions for the rest
if popped < = uint16 . high ( ) . int ( ) :
self . emitByte ( PopN )
self . emitBytes ( popped . toDouble ( ) )
else :
self . emitByte ( PopN )
self . emitBytes ( uint16 . high ( ) . int . toDouble ( ) )
for i in countdown ( self . names . high ( ) , popped - uint16 . high ( ) . int ( ) ) :
if self . names [ i ] . depth > self . scopeDepth :
self . emitByte ( Pop )
elif popped = = 1 :
# We only emit PopN if we're popping more than one value
self . emitByte ( Pop )
proc blockStmt ( self : Compiler , node : BlockStmt ) =
## Compiles block statements, which create a new
## local scope.
self . beginScope ( )
for decl in node . code :
self . declaration ( decl )
self . endScope ( )
proc ifStmt ( self : Compiler , node : IfStmt ) =
## Compiles if/else statements for conditional
## execution of code
self . expression ( node . condition )
var jumpCode : OpCode
if self . enableOptimizations :
jumpCode = JumpIfFalsePop
else :
jumpCode = JumpIfFalse
let jump = self . emitJump ( jumpCode )
if not self . enableOptimizations :
self . emitByte ( Pop )
self . statement ( node . thenBranch )
self . patchJump ( jump )
if node . elseBranch ! = nil :
let jump = self . emitJump ( JumpForwards )
self . statement ( node . elseBranch )
self . patchJump ( jump )
proc emitLoop ( self : Compiler , begin : int ) =
## Emits a JumpBackwards instruction with the correct
## jump offset
var offset : int
case OpCode ( self . chunk . code [ begin + 1 ] ) : # The jump instruction
of LongJumpForwards , LongJumpBackwards , LongJumpIfFalse ,
LongJumpIfFalsePop , LongJumpIfTrue :
offset = self . chunk . code . len ( ) - begin + 4
else :
offset = self . chunk . code . len ( ) - begin
if offset > uint16 . high ( ) . int :
if offset > 16777215 :
self . error ( " cannot jump more than 16777215 bytecode instructions " )
self . emitByte ( LongJumpBackwards )
self . emitBytes ( offset . toTriple ( ) )
else :
self . emitByte ( JumpBackwards )
self . emitBytes ( offset . toDouble ( ) )
proc whileStmt ( self : Compiler , node : WhileStmt ) =
2022-05-07 10:48:01 +02:00
## Compiles C-style while loops and
## desugared C-style for loops
2022-04-04 12:29:23 +02:00
let start = self . chunk . code . len ( )
self . expression ( node . condition )
2022-05-07 10:48:01 +02:00
var jump : int
if self . enableOptimizations :
jump = self . emitJump ( JumpIfFalsePop )
else :
jump = self . emitJump ( JumpIfFalse )
self . emitByte ( Pop )
2022-04-04 12:29:23 +02:00
self . statement ( node . body )
self . patchJump ( jump )
self . emitLoop ( start )
2022-05-04 14:27:15 +02:00
proc expression ( self : Compiler , node : Expression ) =
2022-04-04 12:29:23 +02:00
## Compiles all expressions
2022-05-04 14:27:15 +02:00
if self . inferType ( node ) = = nil :
2022-05-02 12:38:43 +02:00
if node . kind ! = identExpr :
# So we can raise a more appropriate
# error in self.identifier()
self . error ( " expression has no type " )
2022-04-04 12:29:23 +02:00
case node . kind :
2022-05-16 19:40:13 +02:00
of callExpr :
2022-05-18 13:32:32 +02:00
discard # TODO
2022-04-04 12:29:23 +02:00
of getItemExpr :
2022-05-18 13:32:32 +02:00
discard # TODO
2022-04-04 12:29:23 +02:00
# Note that for setItem and assign we don't convert
# the node to its true type because that type information
# would be lost in the call anyway. The differentiation
2022-05-07 10:48:01 +02:00
# happens in self.assignment()
2022-04-04 12:29:23 +02:00
of setItemExpr , assignExpr :
self . assignment ( node )
of identExpr :
self . identifier ( IdentExpr ( node ) )
of unaryExpr :
# Unary expressions such as ~5 and -3
self . unary ( UnaryExpr ( node ) )
of groupingExpr :
# Grouping expressions like (2 + 1)
self . expression ( GroupingExpr ( node ) . expression )
of binaryExpr :
# Binary expressions such as 2 ^ 5 and 0.66 * 3.14
self . binary ( BinaryExpr ( node ) )
of intExpr , hexExpr , binExpr , octExpr , strExpr , falseExpr , trueExpr ,
2022-05-04 14:01:38 +02:00
infExpr , nanExpr , floatExpr , nilExpr :
2022-05-07 10:48:01 +02:00
# Since all of these AST nodes share the
# same overall structure and the kind
# field is enough to tell one from the
2022-05-18 13:32:32 +02:00
# other, why bother with specialized
2022-05-07 10:48:01 +02:00
# cases when one is enough?
2022-04-04 12:29:23 +02:00
self . literal ( node )
else :
self . error ( & " invalid AST node of kind {node.kind} at expression(): {node} (This is an internal error and most likely a bug) " )
proc awaitStmt ( self : Compiler , node : AwaitStmt ) =
## Compiles await statements. An await statement
## is like an await expression, but parsed in the
## context of statements for usage outside expressions,
## meaning it can be used standalone. It's basically the
## same as an await expression followed by a semicolon.
## Await expressions are the only native construct to
## run coroutines from within an already asynchronous
2022-04-26 09:29:59 +02:00
## context (which should be orchestrated by an event loop).
2022-04-04 12:29:23 +02:00
## They block in the caller until the callee returns
2022-04-07 13:02:23 +02:00
self . expression ( node . expression )
2022-04-04 12:29:23 +02:00
self . emitByte ( OpCode . Await )
proc deferStmt ( self : Compiler , node : DeferStmt ) =
## Compiles defer statements. A defer statement
2022-04-26 09:29:59 +02:00
## is executed right before its containing function
## exits (either because of a return or an exception)
2022-04-04 12:29:23 +02:00
let current = self . chunk . code . len
2022-04-07 13:02:23 +02:00
self . expression ( node . expression )
2022-04-04 12:29:23 +02:00
for i in countup ( current , self . chunk . code . high ( ) ) :
self . deferred . add ( self . chunk . code [ i ] )
self . chunk . code . del ( i )
proc returnStmt ( self : Compiler , node : ReturnStmt ) =
## Compiles return statements. An empty return
## implicitly returns nil
2022-05-04 14:27:15 +02:00
let returnType = self . inferType ( node . value )
let typ = self . inferType ( self . currentFunction )
2022-05-16 19:23:38 +02:00
## Having the return type
if typ . returnType = = nil and returnType ! = nil :
2022-05-21 12:20:12 +02:00
self . error ( " non-empty return statement is not allowed in functions with an explicit return type " )
2022-05-16 19:23:38 +02:00
elif returnType = = nil and typ . returnType ! = nil :
self . error ( & " expected return value of type ' {self.typeToStr(typ.returnType)} ' , but expression has no type " )
2022-05-07 10:48:01 +02:00
elif not self . compareTypes ( returnType , typ . returnType ) :
self . error ( & " expected return value of type ' {self.typeToStr(typ.returnType)} ' , got ' {self.typeToStr(returnType)} ' instead " )
2022-05-16 19:23:38 +02:00
if node . value ! = nil :
self . expression ( node . value )
self . emitByte ( OpCode . ReturnPop )
else :
self . emitByte ( OpCode . Return )
2022-04-04 12:29:23 +02:00
proc yieldStmt ( self : Compiler , node : YieldStmt ) =
## Compiles yield statements
self . expression ( node . expression )
self . emitByte ( OpCode . Yield )
proc raiseStmt ( self : Compiler , node : RaiseStmt ) =
## Compiles yield statements
self . expression ( node . exception )
self . emitByte ( OpCode . Raise )
proc continueStmt ( self : Compiler , node : ContinueStmt ) =
## Compiles continue statements. A continue statements
## jumps to the next iteration in a loop
if self . currentLoop . start < = 65535 :
self . emitByte ( Jump )
self . emitBytes ( self . currentLoop . start . toDouble ( ) )
else :
2022-05-07 10:48:01 +02:00
if self . currentLoop . start > 16777215 :
self . error ( " too much code to jump over in continue statement " )
2022-04-04 12:29:23 +02:00
self . emitByte ( LongJump )
self . emitBytes ( self . currentLoop . start . toTriple ( ) )
proc breakStmt ( self : Compiler , node : BreakStmt ) =
## Compiles break statements. A continue statement
## jumps to the next iteration in a loop
# Emits dummy jump offset, this is
# patched later
2022-04-29 23:04:53 +02:00
discard self . emitJump ( OpCode . Jump )
2022-04-04 12:29:23 +02:00
self . currentLoop . breakPos . add ( self . chunk . code . high ( ) - 4 )
if self . currentLoop . depth > self . scopeDepth :
# Breaking out of a loop closes its scope
self . endScope ( )
proc patchBreaks ( self : Compiler ) =
## Patches "break" opcodes with
## actual jumps. This is needed
## because the size of code
## to skip is not known before
## the loop is fully compiled
for brk in self . currentLoop . breakPos :
self . chunk . code [ brk ] = JumpForwards . uint8 ( )
self . patchJump ( brk )
proc assertStmt ( self : Compiler , node : AssertStmt ) =
## Compiles assert statements (raise
## AssertionError if the expression is falsey)
self . expression ( node . expression )
self . emitByte ( OpCode . Assert )
2022-05-04 14:27:15 +02:00
proc statement ( self : Compiler , node : Statement ) =
2022-04-04 12:29:23 +02:00
## Compiles all statements
case node . kind :
of exprStmt :
2022-04-21 15:25:29 +02:00
var expression = ExprStmt ( node ) . expression
self . expression ( expression )
2022-05-18 13:32:32 +02:00
self . emitByte ( Pop ) # Expression statements discard their value. Their main use case is side effects in function calls
2022-04-04 12:29:23 +02:00
of NodeKind . ifStmt :
self . ifStmt ( IfStmt ( node ) )
of NodeKind . assertStmt :
self . assertStmt ( AssertStmt ( node ) )
of NodeKind . raiseStmt :
self . raiseStmt ( RaiseStmt ( node ) )
of NodeKind . breakStmt :
self . breakStmt ( BreakStmt ( node ) )
of NodeKind . continueStmt :
self . continueStmt ( ContinueStmt ( node ) )
of NodeKind . returnStmt :
self . returnStmt ( ReturnStmt ( node ) )
of NodeKind . importStmt :
discard
of NodeKind . whileStmt , NodeKind . forStmt :
## Our parser already desugars for loops to
## while loops!
let loop = self . currentLoop
self . currentLoop = Loop ( start : self . chunk . code . len ( ) ,
2022-05-07 10:48:01 +02:00
depth : self . scopeDepth , breakPos : @ [ ] )
2022-04-04 12:29:23 +02:00
self . whileStmt ( WhileStmt ( node ) )
self . patchBreaks ( )
self . currentLoop = loop
of NodeKind . forEachStmt :
discard
of NodeKind . blockStmt :
self . blockStmt ( BlockStmt ( node ) )
of NodeKind . yieldStmt :
self . yieldStmt ( YieldStmt ( node ) )
of NodeKind . awaitStmt :
self . awaitStmt ( AwaitStmt ( node ) )
of NodeKind . deferStmt :
self . deferStmt ( DeferStmt ( node ) )
of NodeKind . tryStmt :
discard
else :
2022-05-04 14:27:15 +02:00
self . expression ( Expression ( node ) )
2022-04-04 12:29:23 +02:00
2022-04-12 12:18:25 +02:00
proc varDecl ( self : Compiler , node : VarDecl ) =
## Compiles variable declarations
2022-05-04 14:01:38 +02:00
let kind = self . toIntrinsic ( node . valueType )
2022-05-04 14:27:15 +02:00
let typ = self . inferType ( node . value )
2022-05-02 17:26:38 +02:00
if kind = = nil and typ = = nil :
self . error ( & " cannot determine the type of ' {node.name.token.lexeme} ' " )
elif typ ! = kind and kind ! = nil :
self . error ( & " expected value of type ' {self.typeToStr(kind)} ' , but ' {node.name.token.lexeme} ' is of type ' {self.typeToStr(typ)} ' " )
2022-04-12 12:18:25 +02:00
self . expression ( node . value )
2022-05-02 17:26:38 +02:00
self . declareName ( node )
2022-04-12 12:18:25 +02:00
2022-04-04 12:29:23 +02:00
proc funDecl ( self : Compiler , node : FunDecl ) =
## Compiles function declarations
2022-05-16 19:31:17 +02:00
# A function's code is just compiled linearly
# and then jumped over
let jmp = self . emitJump ( Jump )
2022-05-07 10:48:01 +02:00
self . declareName ( node )
if node . body ! = nil :
2022-05-16 19:23:38 +02:00
if BlockStmt ( node . body ) . code . len ( ) = = 0 :
self . error ( " Cannot declare function with empty body " )
2022-05-07 10:48:01 +02:00
let fnType = self . inferType ( node )
let impl = self . findByType ( node . name . token . lexeme , fnType )
if impl . len ( ) > 1 :
# Oh-oh! We found more than one implementation of
# the same function! Error!
var msg = & " multiple matching implementations of ' {node.name.token.lexeme} ' found: \n "
for fn in reversed ( impl ) :
2022-05-16 19:23:38 +02:00
var node = FunDecl ( fn . valueType . node )
2022-05-07 10:48:01 +02:00
discard self . typeToStr ( fn . valueType )
2022-05-16 19:23:38 +02:00
msg & = & " - ' {node.name.token.lexeme} ' at line {node.token.line} of type {self.typeToStr(fn.valueType)} \n "
2022-05-07 10:48:01 +02:00
self . error ( msg )
2022-04-04 12:29:23 +02:00
# We store the current function
var function = self . currentFunction
self . currentFunction = node
2022-05-18 13:32:32 +02:00
# Since the deferred array is a linear
2022-04-04 12:29:23 +02:00
# sequence of instructions and we want
# to keep track to whose function's each
# set of deferred instruction belongs,
# we record the length of the deferred
# array before compiling the function
# and use this info later to compile
# the try/finally block with the deferred
# code
var deferStart = self . deferred . len ( )
self . blockStmt ( BlockStmt ( node . body ) )
# Yup, we're done. That was easy, huh?
# But after all functions are just named
# scopes, and we compile them just like that:
# we declare their name and arguments (before
# their body so recursion works) and then just
# handle them as a block statement (which takes
# care of incrementing self.scopeDepth so locals
# are resolved properly). There's a need for a bit
# of boilerplate code to make closures work, but
# that's about it
2022-05-21 12:20:12 +02:00
case self . currentFunction . kind :
of NodeKind . funDecl :
if not self . currentFunction . hasExplicitReturn :
let typ = self . inferType ( self . currentFunction )
if self . currentFunction . returnType = = nil and typ ! = nil :
self . error ( " non-empty return statement is not allowed in functions without an explicit return type " )
if self . currentFunction . returnType ! = nil :
self . error ( " function has an explicit return type, but no explicit return statement was found " )
self . emitByte ( OpCode . Return )
of NodeKind . lambdaExpr :
if not LambdaExpr ( Declaration ( self . currentFunction ) ) . hasExplicitReturn :
self . emitByte ( OpCode . Return )
else :
discard # Unreachable
2022-04-04 12:29:23 +02:00
# Currently defer is not functional so we
# just pop the instructions
2022-05-07 10:48:01 +02:00
for i in countup ( deferStart , self . deferred . len ( ) - 1 , 1 ) :
2022-04-04 12:29:23 +02:00
self . deferred . delete ( i )
self . patchJump ( jmp )
# This makes us compile nested functions correctly
self . currentFunction = function
2022-05-04 14:27:15 +02:00
proc declaration ( self : Compiler , node : Declaration ) =
2022-04-04 12:29:23 +02:00
## Compiles all declarations
case node . kind :
of NodeKind . varDecl :
self . varDecl ( VarDecl ( node ) )
of NodeKind . funDecl :
self . funDecl ( FunDecl ( node ) )
else :
2022-05-04 14:27:15 +02:00
self . statement ( Statement ( node ) )
2022-04-04 12:29:23 +02:00
2022-05-18 13:32:32 +02:00
proc compile * ( self : Compiler , ast : seq [ Declaration ] , file : string ) : Chunk =
2022-04-04 12:29:23 +02:00
## Compiles a sequence of AST nodes into a chunk
## object
self . chunk = newChunk ( )
self . ast = ast
self . file = file
self . names = @ [ ]
self . scopeDepth = 0
self . currentFunction = nil
2022-04-26 09:29:59 +02:00
self . currentModule = self . file
2022-04-04 12:29:23 +02:00
self . current = 0
while not self . done ( ) :
2022-05-04 14:27:15 +02:00
self . declaration ( Declaration ( self . step ( ) ) )
2022-04-04 12:29:23 +02:00
if self . ast . len ( ) > 0 :
# *Technically* an empty program is a valid program
self . emitByte ( OpCode . Return ) # Exits the VM's main loop when used at the global scope
result = self . chunk
2022-05-20 15:47:04 +02:00
if self . ast . len ( ) > 0 and self . scopeDepth ! = 0 :
self . error ( & " invalid state: invalid scopeDepth value (expected 0, got {self.scopeDepth}), did you forget to call endScope/beginScope? " )