2022-04-04 12:29:23 +02:00
# Copyright 2022 Mattia Giambirtone & All Contributors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import meta / token
import meta / ast
import meta / errors
import .. / config
import .. / util / multibyte
2022-06-14 22:45:32 +02:00
import lexer as l
import parser as p
2022-04-04 12:29:23 +02:00
2022-06-14 12:12:56 +02:00
import tables
2022-04-04 12:29:23 +02:00
import strformat
import algorithm
import parseutils
2022-04-21 15:25:29 +02:00
import strutils
2022-05-07 10:48:01 +02:00
import sequtils
2022-05-22 13:02:48 +02:00
import os
2022-04-04 12:29:23 +02:00
export ast
export token
export multibyte
2022-05-22 15:26:12 +02:00
2022-05-07 10:48:01 +02:00
type
2022-05-29 15:54:01 +02:00
TypeKind = enum
2022-05-07 10:48:01 +02:00
## An enumeration of compile-time
## types
Int8 , UInt8 , Int16 , UInt16 , Int32 ,
UInt32 , Int64 , UInt64 , Float32 , Float64 ,
Char , Byte , String , Function , CustomType ,
2022-06-14 23:34:42 +02:00
Nil , Nan , Bool , Inf , Typevar , Generic ,
2022-06-19 14:44:14 +02:00
Reference , Pointer
2022-05-24 09:55:08 +02:00
Any # Any is used internally in a few cases,
# for example when looking for operators
# when only the type of the arguments is of
# interest
2022-05-29 15:54:01 +02:00
Type = ref object
2022-05-07 10:48:01 +02:00
## A wrapper around
## compile-time types
2022-06-19 14:44:14 +02:00
mutable : bool
2022-05-29 15:54:01 +02:00
case kind : TypeKind :
2022-05-07 10:48:01 +02:00
of Function :
2022-05-29 15:54:01 +02:00
name : string
2022-06-20 09:39:54 +02:00
# Unfortunately we need to pollute
# the type system with AST nodes due
# to how we handle generics
funNode : FunDecl
2022-05-27 14:01:57 +02:00
isLambda : bool
2022-05-29 17:04:19 +02:00
isGenerator : bool
isCoroutine : bool
2022-05-29 15:54:01 +02:00
args : seq [ tuple [ name : string , kind : Type ] ]
returnType : Type
2022-06-14 23:34:42 +02:00
isBuiltinFunction : bool
builtinOp : string
2022-06-19 14:44:14 +02:00
of Reference , Pointer :
2022-05-29 15:54:01 +02:00
value : Type
2022-06-19 14:44:14 +02:00
of Generic :
node : IdentExpr
2022-05-07 10:48:01 +02:00
else :
discard
2022-05-22 13:02:48 +02:00
2022-05-07 10:48:01 +02:00
# This way we don't have recursive dependency issues
import meta / bytecode
export bytecode
2022-04-04 12:29:23 +02:00
type
Name = ref object
## A compile-time wrapper around
2022-05-04 14:27:15 +02:00
## statically resolved names
2022-05-07 10:48:01 +02:00
# Name of the identifier
name : IdentExpr
# Owner of the identifier (module)
owner : string
# Scope depth
depth : int
# Is this name private?
isPrivate : bool
# Is this a constant?
isConst : bool
# Can this name's value be mutated?
isLet : bool
# The name's type
valueType : Type
# For functions, this marks where the function's
2022-05-29 15:54:01 +02:00
# code begins. For variables, this stores where
# their StoreVar/StoreHeap instruction was emitted
2022-05-07 10:48:01 +02:00
codePos : int
2022-05-29 15:54:01 +02:00
# Is the name closed over (i.e. used in a closure)?
2022-05-27 14:01:57 +02:00
isClosedOver : bool
2022-06-02 01:33:56 +02:00
# Is this a function argument?
isFunctionArgument : bool
2022-05-29 15:54:01 +02:00
# Where is this node declared in the file?
line : int
2022-04-04 12:29:23 +02:00
Loop = object
## A "loop object" used
## by the compiler to emit
## appropriate jump offsets
## for continue and break
## statements
2022-05-07 10:48:01 +02:00
# Position in the bytecode where the loop starts
start : int
# Scope depth where the loop is located
depth : int
# Absolute jump offsets into our bytecode that we need to
# patch. Used for break statements
breakPos : seq [ int ]
2022-04-04 12:29:23 +02:00
Compiler * = ref object
2022-05-04 14:27:15 +02:00
## A wrapper around the Peon compiler's state
2022-04-04 12:29:23 +02:00
# The bytecode chunk where we write code to
chunk : Chunk
# The output of our parser (AST)
2022-05-18 13:32:32 +02:00
ast : seq [ Declaration ]
2022-04-04 12:29:23 +02:00
# The current AST node we're looking at
current : int
# The current file being compiled (used only for
# error reporting)
file : string
# Compile-time "simulation" of the stack at
# runtime to load variables that have stack
# behavior more efficiently
names : seq [ Name ]
# The current scope depth. If > 0, we're
# in a local scope, otherwise it's global
scopeDepth : int
# The current function being compiled
currentFunction : FunDecl
# Are optimizations turned on?
2022-05-29 15:54:01 +02:00
enableOptimizations : bool
2022-04-04 12:29:23 +02:00
# The current loop being compiled (used to
# keep track of where to jump)
currentLoop : Loop
2022-05-30 22:06:15 +02:00
# Are we in REPL mode? If so, Pop instructions
2022-06-14 12:12:56 +02:00
# for expression statements at the top level are
# swapped for a special PopRepl instruction that
# prints the result of the expression once it is
# evaluated
2022-05-30 22:06:15 +02:00
replMode : bool
2022-04-04 12:29:23 +02:00
# The current module being compiled
# (used to restrict access to statically
# defined variables at compile time)
currentModule : string
# Each time a defer statement is
# compiled, its code is emitted
# here. Later, if there is any code
# to defer in the current function,
# funDecl will wrap the function's code
# inside an implicit try/finally block
# and add this code in the finally branch.
# This sequence is emptied each time a
2022-05-20 15:47:04 +02:00
# function declaration is compiled and stores only
2022-04-04 12:29:23 +02:00
# deferred code for the current function (may
# be empty)
deferred : seq [ uint8 ]
# List of closed-over variables
2022-05-26 18:31:40 +02:00
closedOver : seq [ Name ]
2022-06-14 12:12:56 +02:00
# Keeps track of stack frames
2022-06-02 01:33:56 +02:00
frames : seq [ int ]
2022-06-14 12:12:56 +02:00
# Compiler procedures called by pragmas
compilerProcs : TableRef [ string , proc ( self : Compiler , pragma : Pragma , node : ASTNode ) ]
2022-04-04 12:29:23 +02:00
## Forward declarations
2022-06-07 11:23:08 +02:00
proc compile * ( self : Compiler , ast : seq [ Declaration ] , file : string ) : Chunk
2022-05-04 14:27:15 +02:00
proc expression ( self : Compiler , node : Expression )
proc statement ( self : Compiler , node : Statement )
proc declaration ( self : Compiler , node : Declaration )
2022-04-04 12:29:23 +02:00
proc peek ( self : Compiler , distance : int = 0 ) : ASTNode
2022-04-21 15:25:29 +02:00
proc identifier ( self : Compiler , node : IdentExpr )
proc varDecl ( self : Compiler , node : VarDecl )
2022-06-19 14:44:14 +02:00
proc inferType ( self : Compiler , node : LiteralExpr , strictMutable : bool = true ) : Type
proc inferType ( self : Compiler , node : Expression , strictMutable : bool = true ) : Type
2022-05-22 13:02:48 +02:00
proc findByName ( self : Compiler , name : string ) : seq [ Name ]
2022-06-19 14:44:14 +02:00
proc findByType ( self : Compiler , name : string , kind : Type , strictMutable : bool = true ) : seq [ Name ]
proc compareTypes ( self : Compiler , a , b : Type , strictMutable : bool = true ) : bool
2022-06-02 01:33:56 +02:00
proc patchReturnAddress ( self : Compiler , pos : int )
2022-06-14 12:12:56 +02:00
proc handleMagicPragma ( self : Compiler , pragma : Pragma , node : ASTnode )
proc handlePurePragma ( self : Compiler , pragma : Pragma , node : ASTnode )
2022-06-14 18:10:13 +02:00
proc dispatchPragmas ( self : Compiler , node : ASTnode )
2022-04-04 12:29:23 +02:00
## End of forward declarations
2022-06-14 12:12:56 +02:00
proc newCompiler * ( enableOptimizations : bool = true , replMode : bool = false ) : Compiler =
## Initializes a new Compiler object
new ( result )
result . ast = @ [ ]
result . current = 0
result . file = " "
result . names = @ [ ]
result . scopeDepth = 0
result . currentFunction = nil
result . enableOptimizations = enableOptimizations
result . replMode = replMode
result . currentModule = " "
result . compilerProcs = newTable [ string , proc ( self : Compiler , pragma : Pragma , node : ASTNode ) ] ( )
result . compilerProcs [ " magic " ] = handleMagicPragma
result . compilerProcs [ " pure " ] = handlePurePragma
2022-04-26 09:29:59 +02:00
## Public getter for nicer error formatting
2022-04-04 12:29:23 +02:00
proc getCurrentNode * ( self : Compiler ) : ASTNode = ( if self . current > =
self . ast . len ( ) : self . ast [ ^ 1 ] else : self . ast [ self . current - 1 ] )
2022-05-22 11:49:38 +02:00
proc getCurrentFunction * ( self : Compiler ) : Declaration {. inline . } = self . currentFunction
2022-05-26 18:31:40 +02:00
proc getFile * ( self : Compiler ) : string {. inline . } = self . file
proc getModule * ( self : Compiler ) : string {. inline . } = self . currentModule
2022-04-04 12:29:23 +02:00
## Utility functions
proc peek ( self : Compiler , distance : int = 0 ) : ASTNode =
## Peeks at the AST node at the given distance.
## If the distance is out of bounds, the last
## AST node in the tree is returned. A negative
## distance may be used to retrieve previously
## consumed AST nodes
2022-06-14 12:12:56 +02:00
if self . ast . high ( ) = = - 1 or self . current + distance > self . ast . high ( ) or self . current + distance < 0 :
2022-04-04 12:29:23 +02:00
result = self . ast [ ^ 1 ]
else :
result = self . ast [ self . current + distance ]
2022-06-14 12:12:56 +02:00
proc done ( self : Compiler ) : bool {. inline . } =
2022-04-04 12:29:23 +02:00
## Returns true if the compiler is done
## compiling, false otherwise
result = self . current > self . ast . high ( )
2022-06-14 12:12:56 +02:00
proc error ( self : Compiler , message : string ) {. raises : [ CompileError ] , inline . } =
2022-05-22 11:49:38 +02:00
## Raises a CompileError exception
2022-05-22 13:02:48 +02:00
raise CompileError ( msg : message , node : self . getCurrentNode ( ) , file : self . file , module : self . currentModule )
2022-04-04 12:29:23 +02:00
2022-06-14 12:12:56 +02:00
proc step ( self : Compiler ) : ASTNode {. inline . } =
2022-04-04 12:29:23 +02:00
## Steps to the next node and returns
## the consumed one
result = self . peek ( )
if not self . done ( ) :
self . current + = 1
2022-06-14 12:12:56 +02:00
proc emitByte ( self : Compiler , byt : OpCode | uint8 ) {. inline . } =
2022-04-04 12:29:23 +02:00
## Emits a single byte, writing it to
## the current chunk being compiled
when DEBUG_TRACE_COMPILER :
echo & " DEBUG - Compiler: Emitting { $byt } "
self . chunk . write ( uint8 byt , self . peek ( ) . token . line )
2022-06-14 12:12:56 +02:00
proc emitBytes ( self : Compiler , bytarr : openarray [ OpCode | uint8 ] ) {. inline . } =
2022-05-22 13:02:48 +02:00
## Handy helper method to write arbitrary bytes into
2022-04-04 12:29:23 +02:00
## the current chunk, calling emitByte on each of its
## elements
2022-05-07 10:48:01 +02:00
for b in bytarr :
self . emitByte ( b )
2022-04-04 12:29:23 +02:00
2022-05-20 15:47:04 +02:00
proc makeConstant ( self : Compiler , val : Expression , typ : Type ) : array [ 3 , uint8 ] =
2022-04-04 12:29:23 +02:00
## Adds a constant to the current chunk's constant table
## and returns its index as a 3-byte array of uint8s
2022-05-22 15:26:12 +02:00
var v : int
discard parseInt ( val . token . lexeme , v )
2022-05-20 15:47:04 +02:00
case typ . kind :
of UInt8 , Int8 :
2022-05-22 15:26:12 +02:00
result = self . chunk . writeConstant ( [ uint8 ( v ) ] )
2022-05-20 15:47:04 +02:00
of Int16 , UInt16 :
2022-05-22 15:26:12 +02:00
result = self . chunk . writeConstant ( v . toDouble ( ) )
2022-05-20 15:47:04 +02:00
of Int32 , UInt32 :
2022-05-22 15:26:12 +02:00
result = self . chunk . writeConstant ( v . toQuad ( ) )
2022-05-20 15:47:04 +02:00
of Int64 , UInt64 :
2022-05-22 15:26:12 +02:00
result = self . chunk . writeConstant ( v . toLong ( ) )
2022-06-02 01:33:56 +02:00
of String :
result = self . chunk . writeConstant ( v . toBytes ( ) )
of Float32 :
var f : float = 0 .0
discard parseFloat ( val . token . lexeme , f )
result = self . chunk . writeConstant ( cast [ array [ 4 , uint8 ] ] ( float32 ( f ) ) )
of Float64 :
var f : float = 0 .0
discard parseFloat ( val . token . lexeme , f )
result = self . chunk . writeConstant ( cast [ array [ 8 , uint8 ] ] ( f ) )
2022-05-20 15:47:04 +02:00
else :
discard
2022-04-04 12:29:23 +02:00
2022-05-07 10:48:01 +02:00
proc emitConstant ( self : Compiler , obj : Expression , kind : Type ) =
2022-05-29 15:54:01 +02:00
## Emits a constant instruction along
2022-04-04 12:29:23 +02:00
## with its operand
2022-06-02 01:33:56 +02:00
case kind . kind :
2022-05-02 17:26:38 +02:00
of Int64 :
self . emitByte ( LoadInt64 )
2022-05-29 15:54:01 +02:00
of UInt64 :
self . emitByte ( LoadUInt64 )
of Int32 :
self . emitByte ( LoadInt32 )
2022-06-02 01:33:56 +02:00
of UInt32 :
self . emitByte ( LoadUInt32 )
of Int16 :
self . emitByte ( LoadInt16 )
of UInt16 :
self . emitByte ( LoadUInt16 )
of Int8 :
self . emitByte ( LoadInt8 )
of UInt8 :
self . emitByte ( LoadUInt8 )
of String :
self . emitByte ( LoadString )
let str = LiteralExpr ( obj ) . literal . lexeme
if str . len ( ) > = 16777216 :
self . error ( " string constants cannot be larger than 16777216 bytes " )
self . emitBytes ( LiteralExpr ( obj ) . literal . lexeme . len ( ) . toTriple ( ) )
of Float32 :
self . emitByte ( LoadFloat32 )
of Float64 :
self . emitByte ( LoadFloat64 )
2022-05-02 17:26:38 +02:00
else :
2022-05-18 13:32:32 +02:00
discard # TODO
2022-05-07 10:48:01 +02:00
self . emitBytes ( self . makeConstant ( obj , kind ) )
2022-04-04 12:29:23 +02:00
proc emitJump ( self : Compiler , opcode : OpCode ) : int =
2022-06-02 01:33:56 +02:00
## Emits a dummy jump offset to be patched later
## and returns the absolute index into the chunk's
## bytecode array where the given placeholder
## instruction was written
2022-04-04 12:29:23 +02:00
self . emitByte ( opcode )
2022-06-02 01:33:56 +02:00
self . emitBytes ( 0 . toTriple ( ) )
2022-04-04 12:29:23 +02:00
result = self . chunk . code . len ( ) - 4
proc patchJump ( self : Compiler , offset : int ) =
2022-05-18 13:32:32 +02:00
## Patches a previously emitted relative
2022-06-02 01:33:56 +02:00
## jump using emitJump
2022-05-16 19:23:38 +02:00
var jump : int = self . chunk . code . len ( ) - offset
2022-04-04 12:29:23 +02:00
if jump > 16777215 :
2022-05-01 13:07:50 +02:00
self . error ( " cannot jump more than 16777216 bytecode instructions " )
2022-06-02 01:33:56 +02:00
let offsetArray = ( jump - 4 ) . toTriple ( )
self . chunk . code [ offset + 1 ] = offsetArray [ 0 ]
self . chunk . code [ offset + 2 ] = offsetArray [ 1 ]
self . chunk . code [ offset + 3 ] = offsetArray [ 2 ]
2022-04-04 12:29:23 +02:00
2022-05-02 17:26:38 +02:00
proc resolve ( self : Compiler , name : IdentExpr ,
depth : int = self . scopeDepth ) : Name =
## Traverses self.names backwards and returns the
## first name object with the given name. Returns
## nil when the name can't be found. This function
## has no concept of scope depth, because getStackPos
## does that job. Note that private names declared in
## other modules will not be resolved!
for obj in reversed ( self . names ) :
if obj . name . token . lexeme = = name . token . lexeme :
if obj . isPrivate and obj . owner ! = self . currentModule :
2022-05-26 18:31:40 +02:00
continue # There may be a name in the current module that
2022-05-02 17:26:38 +02:00
# matches, so we skip this
return obj
return nil
2022-06-02 01:33:56 +02:00
proc getStackPos ( self : Compiler , name : IdentExpr , depth : int = self . scopeDepth ) : int =
## Returns the predicted call stack position of a given name, relative
## to the current frame
result = 2
var found = false
2022-05-02 17:26:38 +02:00
for variable in reversed ( self . names ) :
if name . name . lexeme = = variable . name . name . lexeme :
if variable . isPrivate and variable . owner ! = self . currentModule :
continue
2022-06-02 10:19:34 +02:00
if variable . depth = = depth or variable . depth = = 0 :
2022-05-02 17:26:38 +02:00
# variable.depth == 0 for globals!
2022-06-02 01:33:56 +02:00
found = true
break
2022-06-02 10:19:34 +02:00
inc ( result )
2022-06-02 01:33:56 +02:00
if not found :
return - 1
proc getClosurePos ( self : Compiler , name : IdentExpr , depth : int = self . scopeDepth ) : int =
## Iterates the internal list of declared closure names backwards and
## returns the predicted closure array position of a given name.
## Returns -1 if the name can't be found (this includes names that
## are private in other modules)
result = self . closedOver . high ( )
var found = false
for variable in reversed ( self . closedOver ) :
if name . name . lexeme = = variable . name . name . lexeme :
if variable . isPrivate and variable . owner ! = self . currentModule :
continue
elif variable . depth = = depth :
found = true
break
dec ( result )
if not found :
return - 1
proc detectClosureVariable ( self : Compiler , name : Name , depth : int = self . scopeDepth ) =
2022-05-02 17:26:38 +02:00
## Detects if the given name is used in a local scope deeper
## than the given one and modifies the code emitted for it
## to store it as a closure variable if it is. Does nothing if the name
## hasn't been declared yet or is unreachable (for example if it's
2022-05-04 14:01:38 +02:00
## declared as private in another module). This function must be called
2022-05-18 13:32:32 +02:00
## each time a name is referenced in order for closed-over variables
## to be emitted properly, otherwise the runtime may behave
2022-05-02 17:26:38 +02:00
## unpredictably or crash
2022-06-13 15:04:53 +02:00
if name . isNil ( ) or name . depth = = 0 :
2022-05-02 17:26:38 +02:00
return
2022-06-14 12:12:56 +02:00
elif name . depth < depth and not name . isClosedOver :
2022-05-02 17:26:38 +02:00
# Ding! The given name is closed over: we need to
2022-05-30 22:06:15 +02:00
# change the dummy Jump instruction that self.declareName
2022-06-02 01:33:56 +02:00
# put in place for us into a StoreClosure. We also update
# the name's isClosedOver field so that self.identifier()
# can emit a LoadClosure instruction instead of a LoadVar
2022-05-26 18:31:40 +02:00
self . closedOver . add ( name )
2022-05-27 14:01:57 +02:00
let idx = self . closedOver . high ( ) . toTriple ( )
2022-05-02 17:26:38 +02:00
if self . closedOver . len ( ) > = 16777216 :
2022-05-04 14:01:38 +02:00
self . error ( " too many consecutive closed-over variables (max is 16777216) " )
2022-05-30 22:06:15 +02:00
self . chunk . code [ name . codePos ] = StoreClosure . uint8
2022-05-26 18:31:40 +02:00
self . chunk . code [ name . codePos + 1 ] = idx [ 0 ]
self . chunk . code [ name . codePos + 2 ] = idx [ 1 ]
self . chunk . code [ name . codePos + 3 ] = idx [ 2 ]
2022-05-27 14:01:57 +02:00
name . isClosedOver = true
2022-05-02 17:26:38 +02:00
2022-05-22 13:02:48 +02:00
2022-06-19 14:44:14 +02:00
proc compareTypes ( self : Compiler , a , b : Type , strictMutable : bool = true ) : bool =
2022-05-07 10:48:01 +02:00
## Compares two type objects
## for equality (works with nil!)
2022-05-29 15:54:01 +02:00
# The nil code here is for void functions (when
# we compare their return types)
2022-06-13 15:04:53 +02:00
if a . isNil ( ) :
return b . isNil ( ) or b . kind = = Any
elif b . isNil ( ) :
return a . isNil ( ) or a . kind = = Any
2022-05-30 09:29:03 +02:00
elif a . kind = = Any or b . kind = = Any :
# This is needed internally: user code
# cannot generate code for matching
# arbitrary types, but we need it for
# function calls and stuff like that
# since peon doesn't have return type
# inference
return true
2022-06-14 22:45:32 +02:00
elif a . kind = = Generic or b . kind = = Generic :
# Matching generic argument types
return true
2022-05-29 15:54:01 +02:00
elif a . kind ! = b . kind :
# Next, we see the type discriminant:
# If they're different, then they can't
# be the same type!
2022-06-19 14:44:14 +02:00
return false
elif a . mutable ! = b . mutable and strictMutable :
# Are they both (im)mutable? If not,
# they're different
return false
2022-05-07 10:48:01 +02:00
case a . kind :
2022-05-29 15:54:01 +02:00
# If all previous checks pass, it's time
# to go through each possible type peon
# supports and compare it
2022-05-07 10:48:01 +02:00
of Int8 , UInt8 , Int16 , UInt16 , Int32 ,
UInt32 , Int64 , UInt64 , Float32 , Float64 ,
Char , Byte , String , Nil , Nan , Bool , Inf :
2022-05-29 15:54:01 +02:00
# A value type's type is always equal to
# another one's
2022-05-07 10:48:01 +02:00
return true
2022-06-19 14:44:14 +02:00
of Reference , Pointer :
2022-05-29 15:54:01 +02:00
# Here we already know that both
2022-06-19 14:44:14 +02:00
# a and b are of either of the two
2022-05-29 15:54:01 +02:00
# types in this branch, so we just need
# to compare their values
return self . compareTypes ( a . value , b . value )
2022-05-07 10:48:01 +02:00
of Function :
2022-05-29 15:54:01 +02:00
# Functions are a bit trickier
if a . args . len ( ) ! = b . args . len ( ) :
return false
elif not self . compareTypes ( a . returnType , b . returnType ) :
2022-05-29 23:01:36 +02:00
return false
2022-05-29 15:54:01 +02:00
for ( argA , argB ) in zip ( a . args , b . args ) :
2022-06-19 14:44:14 +02:00
if not self . compareTypes ( argA . kind , argB . kind , strictMutable ) :
2022-05-07 10:48:01 +02:00
return false
2022-05-29 15:54:01 +02:00
return true
2022-05-07 10:48:01 +02:00
else :
discard
proc toIntrinsic ( name : string ) : Type =
2022-05-18 13:32:32 +02:00
## Converts a string to an intrinsic
2022-05-07 10:48:01 +02:00
## type if it is valid and returns nil
2022-05-18 13:32:32 +02:00
## otherwise
2022-05-07 10:48:01 +02:00
if name in [ " int " , " int64 " , " i64 " ] :
return Type ( kind : Int64 )
elif name in [ " uint64 " , " u64 " ] :
return Type ( kind : UInt64 )
elif name in [ " int32 " , " i32 " ] :
return Type ( kind : Int32 )
elif name in [ " uint32 " , " u32 " ] :
return Type ( kind : UInt32 )
elif name in [ " int16 " , " i16 " ] :
return Type ( kind : Int16 )
elif name in [ " uint16 " , " u16 " ] :
return Type ( kind : UInt16 )
elif name in [ " int8 " , " i8 " ] :
return Type ( kind : Int8 )
elif name in [ " uint8 " , " u8 " ] :
return Type ( kind : UInt8 )
elif name in [ " f64 " , " float " , " float64 " ] :
return Type ( kind : Float64 )
elif name in [ " f32 " , " float32 " ] :
return Type ( kind : Float32 )
elif name = = " byte " :
return Type ( kind : Byte )
elif name = = " char " :
return Type ( kind : Char )
elif name = = " nan " :
return Type ( kind : Nan )
elif name = = " nil " :
return Type ( kind : Nil )
elif name = = " inf " :
return Type ( kind : Inf )
elif name = = " bool " :
return Type ( kind : Bool )
2022-06-14 23:34:42 +02:00
elif name = = " typevar " :
return Type ( kind : Typevar )
2022-05-07 10:48:01 +02:00
else :
return nil
2022-05-02 17:26:38 +02:00
2022-06-19 14:44:14 +02:00
proc inferType ( self : Compiler , node : LiteralExpr , strictMutable : bool = true ) : Type =
2022-05-02 17:26:38 +02:00
## Infers the type of a given literal expression
2022-06-13 15:04:53 +02:00
if node . isNil ( ) :
2022-05-16 19:23:38 +02:00
return nil
2022-05-02 17:26:38 +02:00
case node . kind :
of intExpr , binExpr , octExpr , hexExpr :
let size = node . token . lexeme . split ( " ' " )
if len ( size ) notin 1 .. 2 :
self . error ( " invalid state: inferValueType -> invalid size specifier (This is an internal error and most likely a bug!) " )
if size . len ( ) = = 1 :
2022-05-29 15:54:01 +02:00
return Type ( kind : Int64 )
2022-05-02 17:26:38 +02:00
let typ = size [ 1 ] . toIntrinsic ( )
2022-06-19 14:44:14 +02:00
if not self . compareTypes ( typ , nil , strictMutable ) :
2022-05-02 17:26:38 +02:00
return typ
else :
self . error ( & " invalid type specifier ' {size[1]} ' for int " )
of floatExpr :
let size = node . token . lexeme . split ( " ' " )
if len ( size ) notin 1 .. 2 :
self . error ( " invalid state: inferValueType -> invalid size specifier (This is an internal error and most likely a bug!) " )
if size . len ( ) = = 1 or size [ 1 ] = = " f64 " :
2022-05-29 15:54:01 +02:00
return Type ( kind : Float64 )
2022-05-02 17:26:38 +02:00
let typ = size [ 1 ] . toIntrinsic ( )
2022-06-19 14:44:14 +02:00
if not self . compareTypes ( typ , nil , strictMutable ) :
2022-05-02 17:26:38 +02:00
return typ
else :
self . error ( & " invalid type specifier ' {size[1]} ' for float " )
of nilExpr :
2022-05-29 15:54:01 +02:00
return Type ( kind : Nil )
2022-05-02 17:26:38 +02:00
of trueExpr :
2022-05-29 15:54:01 +02:00
return Type ( kind : Bool )
2022-05-02 17:26:38 +02:00
of falseExpr :
2022-05-29 15:54:01 +02:00
return Type ( kind : Bool )
2022-05-02 17:26:38 +02:00
of nanExpr :
2022-05-29 15:54:01 +02:00
return Type ( kind : TypeKind . Nan )
2022-05-02 17:26:38 +02:00
of infExpr :
2022-05-29 15:54:01 +02:00
return Type ( kind : TypeKind . Inf )
2022-05-02 17:26:38 +02:00
else :
2022-05-18 13:32:32 +02:00
discard # TODO
2022-05-02 17:26:38 +02:00
2022-06-19 14:44:14 +02:00
proc inferType ( self : Compiler , node : Expression , strictMutable : bool = true ) : Type =
2022-05-02 17:26:38 +02:00
## Infers the type of a given expression and
## returns it
2022-06-13 15:04:53 +02:00
if node . isNil ( ) :
2022-05-16 19:23:38 +02:00
return nil
2022-05-02 17:26:38 +02:00
case node . kind :
of identExpr :
2022-05-07 10:48:01 +02:00
let node = IdentExpr ( node )
let name = self . resolve ( node )
2022-06-13 15:04:53 +02:00
if not name . isNil ( ) :
2022-06-19 14:44:14 +02:00
result = name . valueType
2022-05-07 10:48:01 +02:00
else :
2022-05-25 14:17:58 +02:00
result = node . name . lexeme . toIntrinsic ( )
2022-05-02 17:26:38 +02:00
of unaryExpr :
2022-05-04 14:27:15 +02:00
return self . inferType ( UnaryExpr ( node ) . a )
2022-05-02 17:26:38 +02:00
of binaryExpr :
2022-05-04 14:27:15 +02:00
let node = BinaryExpr ( node )
2022-06-19 14:44:14 +02:00
var a = self . inferType ( node . a , strictMutable )
var b = self . inferType ( node . b , strictMutable )
if not self . compareTypes ( a , b , strictMutable ) :
2022-05-02 17:26:38 +02:00
return nil
return a
2022-05-18 13:32:32 +02:00
of { intExpr , hexExpr , binExpr , octExpr ,
strExpr , falseExpr , trueExpr , infExpr ,
2022-05-04 14:01:38 +02:00
nanExpr , floatExpr , nilExpr
2022-05-02 17:26:38 +02:00
} :
2022-05-04 14:27:15 +02:00
return self . inferType ( LiteralExpr ( node ) )
2022-05-27 14:01:57 +02:00
of lambdaExpr :
var node = LambdaExpr ( node )
2022-05-29 15:54:01 +02:00
result = Type ( kind : Function , returnType : nil , args : @ [ ] , isLambda : true )
2022-06-13 15:04:53 +02:00
if not node . returnType . isNil ( ) :
2022-05-27 14:01:57 +02:00
result . returnType = self . inferType ( node . returnType )
for argument in node . arguments :
2022-06-19 14:44:14 +02:00
result . args . add ( ( argument . name . token . lexeme , self . inferType ( argument . valueType , strictMutable ) ) )
2022-05-30 09:29:03 +02:00
of callExpr :
var node = CallExpr ( node )
case node . callee . kind :
of identExpr :
2022-05-30 12:31:15 +02:00
let resolved = self . resolve ( IdentExpr ( node . callee ) )
2022-06-13 15:04:53 +02:00
if not resolved . isNil ( ) :
2022-05-30 12:31:15 +02:00
result = resolved . valueType . returnType
2022-06-13 15:04:53 +02:00
if result . isNil ( ) :
2022-05-30 22:06:15 +02:00
result = Type ( kind : Any )
2022-05-30 12:31:15 +02:00
else :
result = nil
2022-05-30 09:29:03 +02:00
of lambdaExpr :
2022-06-19 14:44:14 +02:00
result = self . inferType ( LambdaExpr ( node . callee ) . returnType , strictMutable )
2022-05-30 09:29:03 +02:00
else :
discard # Unreachable
2022-06-19 14:44:14 +02:00
of varExpr :
result = self . inferType ( Var ( node ) . value )
result . mutable = true
of refExpr :
result = Type ( kind : Reference , value : self . inferType ( Ref ( node ) . value , strictMutable ) )
of ptrExpr :
result = Type ( kind : Pointer , value : self . inferType ( Ptr ( node ) . value , strictMutable ) )
2022-05-02 17:26:38 +02:00
else :
2022-05-18 13:32:32 +02:00
discard # Unreachable
2022-05-02 17:26:38 +02:00
2022-06-19 14:44:14 +02:00
proc inferType ( self : Compiler , node : Declaration , strictMutable : bool = true ) : Type =
2022-05-07 10:48:01 +02:00
## Infers the type of a given declaration
## and returns it
2022-06-13 15:04:53 +02:00
if node . isNil ( ) :
2022-05-16 19:23:38 +02:00
return nil
2022-05-07 10:48:01 +02:00
case node . kind :
of funDecl :
var node = FunDecl ( node )
let resolved = self . resolve ( node . name )
2022-06-13 15:04:53 +02:00
if not resolved . isNil ( ) :
2022-05-07 10:48:01 +02:00
return resolved . valueType
of NodeKind . varDecl :
var node = VarDecl ( node )
let resolved = self . resolve ( node . name )
2022-06-13 15:04:53 +02:00
if not resolved . isNil ( ) :
2022-05-07 10:48:01 +02:00
return resolved . valueType
else :
2022-06-19 14:44:14 +02:00
return self . inferType ( node . value , strictMutable )
2022-05-02 17:26:38 +02:00
else :
2022-05-18 13:32:32 +02:00
return # Unreachable
2022-05-04 14:01:38 +02:00
2022-05-29 15:54:01 +02:00
proc typeToStr ( self : Compiler , typ : Type ) : string =
## Returns the string representation of a
## type object
case typ . kind :
of Int8 , UInt8 , Int16 , UInt16 , Int32 ,
UInt32 , Int64 , UInt64 , Float32 , Float64 ,
Char , Byte , String , Nil , TypeKind . Nan , Bool ,
TypeKind . Inf :
2022-06-19 14:44:14 +02:00
result & = ( $ typ . kind ) . toLowerAscii ( )
2022-05-29 15:54:01 +02:00
of Pointer :
2022-06-19 14:44:14 +02:00
result & = & " ptr {self.typeToStr(typ.value)} "
2022-05-29 15:54:01 +02:00
of Reference :
2022-06-19 14:44:14 +02:00
result & = & " ref {self.typeToStr(typ.value)} "
2022-05-29 15:54:01 +02:00
of Function :
2022-06-19 14:44:14 +02:00
result & = " fn ( "
2022-05-29 15:54:01 +02:00
for i , ( argName , argType ) in typ . args :
2022-06-19 14:44:14 +02:00
result & = & " {argName}: "
echo argType [ ]
if argType . mutable :
result & = " var "
result & = self . typeToStr ( argType )
2022-05-29 15:54:01 +02:00
if i < typ . args . len ( ) - 1 :
result & = " , "
result & = " ) "
2022-06-13 15:04:53 +02:00
if not typ . returnType . isNil ( ) :
2022-05-29 15:54:01 +02:00
result & = & " : {self.typeToStr(typ.returnType)} "
2022-06-19 14:44:14 +02:00
of Generic :
result = typ . node . name . lexeme
2022-05-29 15:54:01 +02:00
else :
discard
2022-04-04 12:29:23 +02:00
## End of utility functions
2022-05-07 10:48:01 +02:00
2022-04-04 12:29:23 +02:00
proc literal ( self : Compiler , node : ASTNode ) =
## Emits instructions for literals such
## as singletons, strings, numbers and
## collections
case node . kind :
of trueExpr :
2022-05-07 10:48:01 +02:00
self . emitByte ( LoadTrue )
2022-04-04 12:29:23 +02:00
of falseExpr :
2022-05-07 10:48:01 +02:00
self . emitByte ( LoadFalse )
2022-04-04 12:29:23 +02:00
of nilExpr :
2022-05-07 10:48:01 +02:00
self . emitByte ( LoadNil )
2022-04-04 12:29:23 +02:00
of infExpr :
2022-05-07 10:48:01 +02:00
self . emitByte ( LoadInf )
2022-04-04 12:29:23 +02:00
of nanExpr :
2022-05-07 10:48:01 +02:00
self . emitByte ( LoadNan )
2022-04-04 12:29:23 +02:00
of strExpr :
2022-05-07 10:48:01 +02:00
self . emitConstant ( LiteralExpr ( node ) , Type ( kind : String ) )
# TODO: Take size specifier into account!
2022-04-04 12:29:23 +02:00
of intExpr :
var x : int
var y = IntExpr ( node )
try :
2022-04-26 09:29:59 +02:00
discard parseInt ( y . literal . lexeme , x )
2022-04-04 12:29:23 +02:00
except ValueError :
self . error ( " integer value out of range " )
2022-06-02 01:33:56 +02:00
self . emitConstant ( y , self . inferType ( y ) )
2022-04-04 12:29:23 +02:00
of hexExpr :
var x : int
var y = HexExpr ( node )
try :
2022-04-26 09:29:59 +02:00
discard parseHex ( y . literal . lexeme , x )
2022-04-04 12:29:23 +02:00
except ValueError :
self . error ( " integer value out of range " )
2022-05-18 13:32:32 +02:00
let node = newIntExpr ( Token ( lexeme : $ x , line : y . token . line ,
pos : ( start : y . token . pos . start ,
2022-05-07 10:48:01 +02:00
stop : y . token . pos . start + len ( $ x ) )
2022-05-18 13:32:32 +02:00
)
)
2022-06-02 01:33:56 +02:00
self . emitConstant ( node , self . inferType ( y ) )
2022-04-04 12:29:23 +02:00
of binExpr :
var x : int
var y = BinExpr ( node )
try :
2022-04-26 09:29:59 +02:00
discard parseBin ( y . literal . lexeme , x )
2022-04-04 12:29:23 +02:00
except ValueError :
self . error ( " integer value out of range " )
2022-05-18 13:32:32 +02:00
let node = newIntExpr ( Token ( lexeme : $ x , line : y . token . line ,
pos : ( start : y . token . pos . start ,
2022-05-07 10:48:01 +02:00
stop : y . token . pos . start + len ( $ x ) )
2022-05-18 13:32:32 +02:00
)
)
2022-06-02 01:33:56 +02:00
self . emitConstant ( node , self . inferType ( y ) )
2022-04-04 12:29:23 +02:00
of octExpr :
var x : int
var y = OctExpr ( node )
try :
2022-04-26 09:29:59 +02:00
discard parseOct ( y . literal . lexeme , x )
2022-04-04 12:29:23 +02:00
except ValueError :
self . error ( " integer value out of range " )
2022-05-18 13:32:32 +02:00
let node = newIntExpr ( Token ( lexeme : $ x , line : y . token . line ,
pos : ( start : y . token . pos . start ,
2022-05-07 10:48:01 +02:00
stop : y . token . pos . start + len ( $ x ) )
2022-05-18 13:32:32 +02:00
)
)
2022-06-02 01:33:56 +02:00
self . emitConstant ( node , self . inferType ( y ) )
2022-04-04 12:29:23 +02:00
of floatExpr :
var x : float
var y = FloatExpr ( node )
try :
2022-04-26 09:29:59 +02:00
discard parseFloat ( y . literal . lexeme , x )
2022-04-04 12:29:23 +02:00
except ValueError :
self . error ( " floating point value out of range " )
2022-06-02 01:33:56 +02:00
self . emitConstant ( y , self . inferType ( y ) )
2022-04-04 12:29:23 +02:00
of awaitExpr :
var y = AwaitExpr ( node )
2022-04-07 13:02:23 +02:00
self . expression ( y . expression )
2022-04-04 12:29:23 +02:00
self . emitByte ( OpCode . Await )
else :
2022-05-01 13:07:50 +02:00
self . error ( & " invalid AST node of kind {node.kind} at literal(): {node} (This is an internal error and most likely a bug!) " )
2022-04-04 12:29:23 +02:00
2022-05-30 09:29:03 +02:00
proc findByName ( self : Compiler , name : string ) : seq [ Name ] =
## Looks for objects that have been already declared
## with the given name. Returns all objects that apply
for obj in reversed ( self . names ) :
if obj . name . token . lexeme = = name :
result . add ( obj )
2022-06-19 14:44:14 +02:00
proc findByType ( self : Compiler , name : string , kind : Type , strictMutable : bool = true ) : seq [ Name ] =
2022-05-30 09:29:03 +02:00
## Looks for objects that have already been declared
## with the given name and type
for obj in self . findByName ( name ) :
2022-06-19 14:44:14 +02:00
if self . compareTypes ( obj . valueType , kind , strictMutable ) :
2022-05-30 09:29:03 +02:00
result . add ( obj )
2022-06-19 14:44:14 +02:00
proc matchImpl ( self : Compiler , name : string , kind : Type , strictMutable : bool = true ) : Name =
2022-05-24 09:55:08 +02:00
## Tries to find a matching function implementation
## compatible with the given type and returns its
## name object
2022-06-19 14:44:14 +02:00
let impl = self . findByType ( name , kind , strictMutable )
2022-05-22 13:02:48 +02:00
if impl . len ( ) = = 0 :
2022-05-24 09:55:08 +02:00
var msg = & " cannot find a suitable implementation for ' {name} ' "
let names = self . findByName ( name )
if names . len ( ) > 0 :
2022-05-24 10:23:34 +02:00
msg & = & " , found {len(names)} candidate "
if names . len ( ) > 1 :
msg & = " s "
msg & = " : "
2022-05-24 09:55:08 +02:00
for name in names :
2022-05-24 10:23:34 +02:00
msg & = & " \n - ' {name.name.token.lexeme} ' of type ' {self.typeToStr(name.valueType)} ' "
if name . valueType . kind ! = Function :
msg & = " , not a callable "
elif kind . args . len ( ) ! = name . valueType . args . len ( ) :
2022-05-24 22:26:45 +02:00
msg & = & " , wrong number of arguments ({name.valueType.args.len()} expected, got {kind.args.len()}) "
2022-05-24 10:23:34 +02:00
else :
for i , arg in kind . args :
2022-06-19 14:44:14 +02:00
echo name . valueType . args [ i ] . kind . mutable
echo arg . kind . mutable
if name . valueType . args [ i ] . kind . mutable and not arg . kind . mutable :
2022-06-07 11:23:08 +02:00
msg & = & " , first mismatch at position {i + 1}: {name.valueType.args[i].name} is immutable, not ' var ' "
break
elif not self . compareTypes ( arg . kind , name . valueType . args [ i ] . kind ) :
2022-05-29 15:54:01 +02:00
msg & = & " , first mismatch at position {i + 1}: expected argument of type ' {self.typeToStr(name.valueType.args[i].kind)} ' , got ' {self.typeToStr(arg.kind)} ' instead "
2022-06-07 11:23:08 +02:00
break
2022-05-24 09:55:08 +02:00
self . error ( msg )
elif impl . len ( ) > 1 :
var msg = & " multiple matching implementations of ' {name} ' found: \n "
2022-05-22 17:23:52 +02:00
for fn in reversed ( impl ) :
2022-06-14 22:45:32 +02:00
msg & = & " - ' {fn.name.token.lexeme} ' at line {fn.line} of type {self.typeToStr(fn.valueType)} \n "
2022-05-22 17:23:52 +02:00
self . error ( msg )
2022-05-24 10:23:34 +02:00
return impl [ 0 ]
2022-05-24 09:55:08 +02:00
2022-06-08 16:07:08 +02:00
proc emitFunction ( self : Compiler , name : Name ) =
2022-06-02 01:33:56 +02:00
## Wrapper to emit LoadFunction instructions
self . emitByte ( LoadFunction )
2022-06-13 15:44:53 +02:00
self . emitBytes ( name . codePos . toTriple ( ) )
2022-06-02 01:33:56 +02:00
2022-06-14 18:10:13 +02:00
proc handleBuiltinFunction ( self : Compiler , fn : Name , args : seq [ Expression ] ) =
## Emits single instructions for builtin functions
## such as addition or subtraction
2022-06-14 23:34:42 +02:00
if fn . valueType . builtinOp notin [ " GenericLogicalOr " , " GenericLogicalAnd " ] :
2022-06-14 22:45:32 +02:00
for argument in args :
self . expression ( argument )
2022-06-14 23:34:42 +02:00
case fn . valueType . builtinOp :
2022-06-14 18:10:13 +02:00
of " AddInt64 " :
self . emitByte ( AddInt64 )
of " SubInt64 " :
self . emitByte ( SubInt64 )
of " DivInt64 " :
self . emitByte ( DivInt64 )
of " MulInt64 " :
self . emitByte ( MulInt64 )
of " AddInt32 " :
self . emitByte ( AddInt32 )
of " SubInt32 " :
self . emitByte ( SubInt32 )
of " DivInt32 " :
self . emitByte ( DivInt32 )
of " MulInt32 " :
self . emitByte ( MulInt32 )
of " AddInt16 " :
self . emitByte ( AddInt16 )
of " SubInt16 " :
self . emitByte ( SubInt16 )
of " DivInt16 " :
self . emitByte ( DivInt16 )
of " MulInt16 " :
self . emitByte ( MulInt16 )
of " AddInt8 " :
self . emitByte ( AddInt8 )
of " SubInt8 " :
self . emitByte ( SubInt8 )
of " DivInt8 " :
self . emitByte ( DivInt8 )
of " MulInt8 " :
self . emitByte ( MulInt8 )
of " AddUInt64 " :
self . emitByte ( AddUInt64 )
of " SubUInt64 " :
self . emitByte ( SubUInt64 )
of " DivUInt64 " :
self . emitByte ( DivUInt64 )
of " MulUInt64 " :
self . emitByte ( MulUInt64 )
of " AddUInt32 " :
self . emitByte ( AddUInt32 )
of " SubUInt32 " :
self . emitByte ( SubUInt32 )
of " DivUInt32 " :
self . emitByte ( DivUInt32 )
of " MulUInt32 " :
self . emitByte ( MulUInt32 )
of " AddUInt16 " :
self . emitByte ( AddUInt16 )
of " SubUInt16 " :
self . emitByte ( SubUInt16 )
of " DivUInt16 " :
self . emitByte ( DivUInt16 )
of " MulUInt16 " :
self . emitByte ( MulUInt16 )
of " AddUInt8 " :
self . emitByte ( AddUInt8 )
of " SubUInt8 " :
self . emitByte ( SubUInt8 )
of " DivUInt8 " :
self . emitByte ( DivUInt8 )
of " MulUInt8 " :
self . emitByte ( MulUInt8 )
of " AddFloat64 " :
self . emitByte ( AddInt8 )
of " SubFloat64 " :
self . emitByte ( SubInt8 )
of " DivFloat64 " :
self . emitByte ( DivInt8 )
of " MulFloat64 " :
self . emitByte ( MulInt8 )
of " AddFloat32 " :
self . emitByte ( AddFloat32 )
of " SubFloat32 " :
self . emitByte ( SubFloat32 )
of " DivFloat32 " :
self . emitByte ( DivFloat32 )
of " MulFloat32 " :
self . emitByte ( MulFloat32 )
2022-06-14 22:45:32 +02:00
of " GenericLogicalOr " :
self . expression ( args [ 0 ] )
let jump = self . emitJump ( JumpIfTrue )
self . expression ( args [ 1 ] )
self . patchJump ( jump )
of " GenericLogicalAnd " :
self . expression ( args [ 0 ] )
var jump : int
if self . enableOptimizations :
jump = self . emitJump ( JumpIfFalseOrPop )
else :
jump = self . emitJump ( JumpIfFalse )
self . emitByte ( Pop )
self . expression ( args [ 1 ] )
self . patchJump ( jump )
2022-06-14 18:10:13 +02:00
else :
discard # Unreachable
2022-05-30 22:06:15 +02:00
proc generateCall ( self : Compiler , fn : Name , args : seq [ Expression ] ) =
## Small wrapper that abstracts emitting a call instruction
## for a given function
2022-06-14 23:34:42 +02:00
if fn . valueType . isBuiltinFunction :
2022-06-14 18:10:13 +02:00
self . handleBuiltinFunction ( fn , args )
return
2022-06-20 09:39:54 +02:00
if any ( fn . valueType . args , proc ( arg : tuple [ name : string , kind : Type ] ) : bool = arg [ 1 ] . kind = = Generic ) :
# The function has generic arguments! We need to compile a version
# of it with the right type data
# We don't want to cause *any* interference to
# other objects, so we just play it safe
var node = fn . valueType . funNode . deepCopy ( )
for argument in node . arguments :
2022-06-02 01:33:56 +02:00
self . emitFunction ( fn )
self . emitByte ( LoadReturnAddress )
let pos = self . chunk . code . len ( )
self . emitBytes ( 0 . toQuad ( ) )
2022-06-02 01:50:06 +02:00
for argument in args :
2022-06-02 01:33:56 +02:00
self . expression ( argument )
self . emitByte ( Call ) # Creates a new call frame
var size = 2 # We start at 2 because each call frame
# contains at least 2 elements (function
# object and return address)
for name in reversed ( self . names ) :
# Then, for each local variable
# we increase the frame size by 1
if name . depth = = self . scopeDepth :
inc ( size )
self . emitBytes ( size . toTriple ( ) )
self . patchReturnAddress ( pos )
2022-05-24 09:55:08 +02:00
2022-06-13 15:04:53 +02:00
proc generateObjCall ( self : Compiler , args : seq [ Expression ] ) =
## Small wrapper that abstracts emitting a call instruction
## for a given function already loaded on the operand stack
self . emitByte ( PushC ) # Pops the function off the operand stack onto the call stack
self . emitByte ( LoadReturnAddress )
let pos = self . chunk . code . len ( )
self . emitBytes ( 0 . toQuad ( ) )
for argument in args :
self . expression ( argument )
self . emitByte ( Call ) # Creates a new call frame
var size = 2 # We start at 2 because each call frame
# contains at least 2 elements (function
# object and return address)
for name in reversed ( self . names ) :
# Then, for each local variable
# we increase the frame size by 1
if name . depth = = self . scopeDepth :
inc ( size )
self . emitBytes ( size . toTriple ( ) )
self . patchReturnAddress ( pos )
2022-05-30 22:06:15 +02:00
proc callUnaryOp ( self : Compiler , fn : Name , op : UnaryExpr ) =
## Emits the code to call a unary operator
self . generateCall ( fn , @ [ op . a ] )
2022-05-24 09:55:08 +02:00
proc callBinaryOp ( self : Compiler , fn : Name , op : BinaryExpr ) =
## Emits the code to call a binary operator
# Pushes the return address
2022-05-30 22:06:15 +02:00
self . generateCall ( fn , @ [ op . a , op . b ] )
2022-05-24 09:55:08 +02:00
proc unary ( self : Compiler , node : UnaryExpr ) =
## Compiles unary expressions such as decimal
## and bitwise negation
2022-06-19 14:44:14 +02:00
let valueType = self . inferType ( node . a , strictMutable = false )
let funct = self . matchImpl ( node . token . lexeme , Type ( kind : Function , returnType : Type ( kind : Any ) , args : @ [ ( " " , valueType ) ] ) , strictMutable = false )
2022-05-24 09:55:08 +02:00
self . callUnaryOp ( funct , node )
2022-04-04 12:29:23 +02:00
proc binary ( self : Compiler , node : BinaryExpr ) =
## Compiles all binary expressions
2022-06-19 14:44:14 +02:00
let typeOfA = self . inferType ( node . a , strictMutable = false )
let typeOfB = self . inferType ( node . b , strictMutable = false )
let funct = self . matchImpl ( node . token . lexeme , Type ( kind : Function , returnType : Type ( kind : Any ) , args : @ [ ( " " , typeOfA ) , ( " " , typeOfB ) ] ) , strictMutable = false )
2022-05-24 09:55:08 +02:00
self . callBinaryOp ( funct , node )
2022-04-04 12:29:23 +02:00
2022-06-07 11:23:08 +02:00
proc declareName ( self : Compiler , node : Declaration , mutable : bool = false ) =
2022-05-24 09:55:08 +02:00
## Statically declares a name into the current scope.
## "Declaring" a name only means updating our internal
## list of identifiers so that further calls to resolve()
## correctly return them. There is no code to actually
## declare a variable at runtime: the value is already
2022-06-14 22:45:32 +02:00
## on the stack
2022-04-04 12:29:23 +02:00
case node . kind :
2022-04-21 15:25:29 +02:00
of NodeKind . varDecl :
2022-04-04 12:29:23 +02:00
var node = VarDecl ( node )
2022-04-26 09:29:59 +02:00
# Creates a new Name entry so that self.identifier emits the proper stack offset
2022-04-21 15:25:29 +02:00
if self . names . high ( ) > 16777215 :
# If someone ever hits this limit in real-world scenarios, I swear I'll
# slap myself 100 times with a sign saying "I'm dumb". Mark my words
2022-04-26 16:22:23 +02:00
self . error ( " cannot declare more than 16777216 variables at a time " )
2022-05-23 10:49:38 +02:00
for name in self . findByName ( node . name . token . lexeme ) :
2022-06-02 01:33:56 +02:00
if name . depth = = self . scopeDepth and name . valueType . kind notin { Function , CustomType } and not name . isFunctionArgument :
# Trying to redeclare a variable in the same module is an error, but it's okay
# if it's a function argument (for example, if you want to copy a number to
# mutate it)
2022-05-29 15:54:01 +02:00
self . error ( & " attempt to redeclare ' {node.name.token.lexeme} ' , which was previously defined in ' {name.owner} ' at line {name.line} " )
2022-05-18 13:32:32 +02:00
self . names . add ( Name ( depth : self . scopeDepth ,
2022-04-27 16:03:48 +02:00
name : node . name ,
2022-04-26 09:29:59 +02:00
isPrivate : node . isPrivate ,
owner : self . currentModule ,
isConst : node . isConst ,
2022-06-14 23:34:42 +02:00
valueType : self . inferType ( node . value ) ,
2022-04-29 23:04:53 +02:00
codePos : self . chunk . code . len ( ) ,
2022-05-27 14:01:57 +02:00
isLet : node . isLet ,
2022-05-29 15:54:01 +02:00
isClosedOver : false ,
line : node . token . line ) )
2022-06-19 14:44:14 +02:00
if mutable :
self . names [ ^ 1 ] . valueType . mutable = true
2022-05-30 22:06:15 +02:00
# We emit a jump of 0 because this may become a
# StoreHeap instruction. If they variable is
# not closed over, we'll sadly be wasting a
# VM cycle. The previous implementation used 4 no-op
# instructions, which wasted 4 times as many clock
# cycles.
# TODO: Optimize this. It's a bit tricky because
# deleting bytecode would render all of our
# jump offsets and other absolute indeces in the
# bytecode wrong
if self . scopeDepth > 0 :
# Closure variables are only used in local
# scopes
2022-06-02 12:05:22 +02:00
self . emitByte ( JumpForwards )
2022-05-30 22:06:15 +02:00
self . emitBytes ( 0 . toTriple ( ) )
2022-05-01 13:07:50 +02:00
of NodeKind . funDecl :
2022-04-04 12:29:23 +02:00
var node = FunDecl ( node )
2022-06-19 14:44:14 +02:00
# We declare the generics before the function so we
# can refer to them
for gen in node . generics :
self . names . add ( Name ( depth : self . scopeDepth + 1 ,
isPrivate : true ,
isConst : false ,
owner : self . currentModule ,
line : node . token . line ,
valueType : Type ( kind : Generic , mutable : false , node : gen . name ) ,
name : gen . name ) )
2022-04-27 16:03:48 +02:00
self . names . add ( Name ( depth : self . scopeDepth ,
isPrivate : node . isPrivate ,
isConst : false ,
owner : self . currentModule ,
2022-05-29 15:54:01 +02:00
valueType : Type ( kind : Function ,
2022-05-29 23:01:36 +02:00
name : node . name . token . lexeme ,
2022-06-20 09:39:54 +02:00
returnType : self . inferType ( node . returnType ) ,
args : @ [ ] ,
funNode : node ) ,
2022-06-02 01:33:56 +02:00
codePos : self . chunk . code . len ( ) ,
2022-04-29 23:04:53 +02:00
name : node . name ,
2022-05-27 14:01:57 +02:00
isLet : false ,
2022-05-29 15:54:01 +02:00
isClosedOver : false ,
line : node . token . line ) )
2022-05-22 13:02:48 +02:00
let fn = self . names [ ^ 1 ]
2022-05-29 15:54:01 +02:00
var name : Name
2022-04-04 12:29:23 +02:00
for argument in node . arguments :
if self . names . high ( ) > 16777215 :
2022-04-26 16:22:23 +02:00
self . error ( " cannot declare more than 16777216 variables at a time " )
2022-06-02 01:33:56 +02:00
# wait, no LoadVar? Yes! That's because when calling functions,
2022-05-23 10:49:38 +02:00
# arguments will already be on the stack so there's no need to
# load them here
2022-05-29 15:54:01 +02:00
name = Name ( depth : self . scopeDepth + 1 ,
isPrivate : true ,
owner : self . currentModule ,
isConst : false ,
name : argument . name ,
valueType : nil ,
2022-05-29 17:04:19 +02:00
codePos : 0 ,
2022-05-29 15:54:01 +02:00
isLet : false ,
2022-06-02 01:33:56 +02:00
isClosedOver : false ,
line : argument . name . token . line ,
isFunctionArgument : true )
2022-05-29 15:54:01 +02:00
self . names . add ( name )
name . valueType = self . inferType ( argument . valueType )
2022-05-23 11:53:34 +02:00
# If it's still nil, it's an error!
2022-06-13 15:04:53 +02:00
if name . valueType . isNil ( ) :
2022-05-29 15:54:01 +02:00
self . error ( & " cannot determine the type of argument ' {argument.name.token.lexeme} ' " )
fn . valueType . args . add ( ( argument . name . token . lexeme , name . valueType ) )
2022-04-04 12:29:23 +02:00
else :
2022-05-27 14:01:57 +02:00
discard # TODO: Types, enums
2022-05-18 13:32:32 +02:00
2022-04-27 16:03:48 +02:00
2022-04-04 12:29:23 +02:00
proc identifier ( self : Compiler , node : IdentExpr ) =
## Compiles access to identifiers
2022-04-27 16:03:48 +02:00
let s = self . resolve ( node )
2022-06-13 15:04:53 +02:00
if s . isNil ( ) :
2022-04-21 15:25:29 +02:00
self . error ( & " reference to undeclared name ' {node.token.lexeme} ' " )
2022-04-27 16:03:48 +02:00
elif s . isConst :
2022-05-26 18:31:40 +02:00
# Constants are always emitted as Load* instructions
# no matter the scope depth
2022-05-07 10:48:01 +02:00
self . emitConstant ( node , self . inferType ( node ) )
2022-04-27 16:03:48 +02:00
else :
2022-06-13 15:04:53 +02:00
self . detectClosureVariable ( s )
2022-06-08 16:07:08 +02:00
if s . valueType . kind = = Function :
2022-06-14 23:34:42 +02:00
if not s . valueType . isBuiltinFunction :
self . emitByte ( LoadFunctionObj )
self . emitBytes ( s . codePos . toTriple ( ) )
else :
self . emitByte ( LoadNil )
2022-06-13 15:04:53 +02:00
elif not s . isClosedOver :
2022-04-27 16:03:48 +02:00
# Static name resolution, loads value at index in the stack. Very fast. Much wow.
self . emitByte ( LoadVar )
2022-06-02 01:33:56 +02:00
# No need to check for -1 here: we already did a nil-check above!
self . emitBytes ( self . getStackPos ( s . name ) . toTriple ( ) )
2022-04-27 16:03:48 +02:00
else :
2022-06-02 01:33:56 +02:00
# Loads a closure variable. Stored in a separate "closure array" in the VM that does not
# align its semantics with the call stack. This makes closures work as expected and is
# not much slower than indexing our stack (since they're both dynamic arrays at runtime anyway)
2022-05-30 22:06:15 +02:00
self . emitByte ( LoadClosure )
2022-04-27 16:03:48 +02:00
self . emitBytes ( self . closedOver . high ( ) . toTriple ( ) )
2022-06-13 15:04:53 +02:00
2022-04-04 12:29:23 +02:00
proc assignment ( self : Compiler , node : ASTNode ) =
## Compiles assignment expressions
case node . kind :
of assignExpr :
2022-05-04 14:27:15 +02:00
let node = AssignExpr ( node )
2022-05-01 13:07:50 +02:00
let name = IdentExpr ( node . name )
2022-04-27 16:03:48 +02:00
let r = self . resolve ( name )
2022-06-13 15:04:53 +02:00
if r . isNil ( ) :
2022-05-04 14:27:15 +02:00
self . error ( & " assignment to undeclared name ' {name.token.lexeme} ' " )
2022-04-29 23:04:53 +02:00
elif r . isConst :
2022-05-07 10:48:01 +02:00
self . error ( & " cannot assign to ' {name.token.lexeme} ' (constant) " )
2022-04-29 23:04:53 +02:00
elif r . isLet :
2022-05-04 14:27:15 +02:00
self . error ( & " cannot reassign ' {name.token.lexeme} ' " )
2022-04-04 12:29:23 +02:00
self . expression ( node . value )
2022-06-02 01:33:56 +02:00
self . detectClosureVariable ( r )
if not r . isClosedOver :
self . emitByte ( StoreVar )
self . emitBytes ( self . getStackPos ( name ) . toTriple ( ) )
2022-04-04 12:29:23 +02:00
else :
2022-06-02 01:33:56 +02:00
# Loads a closure variable. Stored in a separate "closure array" in the VM that does not
# align its semantics with the call stack. This makes closures work as expected and is
# not much slower than indexing our stack (since they're both dynamic arrays at runtime anyway)
self . emitByte ( StoreClosure )
self . emitBytes ( self . getClosurePos ( name ) . toTriple ( ) )
2022-04-04 12:29:23 +02:00
of setItemExpr :
2022-05-07 10:48:01 +02:00
let node = SetItemExpr ( node )
let typ = self . inferType ( node )
2022-06-13 15:04:53 +02:00
if typ . isNil ( ) :
2022-05-07 10:48:01 +02:00
self . error ( & " cannot determine the type of ' {node.name.token.lexeme} ' " )
2022-04-04 12:29:23 +02:00
# TODO
else :
self . error ( & " invalid AST node of kind {node.kind} at assignment(): {node} (This is an internal error and most likely a bug) " )
proc beginScope ( self : Compiler ) =
## Begins a new local scope by incrementing the current
## scope's depth
inc ( self . scopeDepth )
2022-05-30 09:29:03 +02:00
2022-04-04 12:29:23 +02:00
2022-06-02 01:33:56 +02:00
proc endScope ( self : Compiler , deleteNames : bool = true , fromFunc : bool = false ) =
2022-04-04 12:29:23 +02:00
## Ends the current local scope
2022-05-29 17:04:19 +02:00
if self . scopeDepth < 0 :
self . error ( " cannot call endScope with scopeDepth < 0 (This is an internal error and most likely a bug) " )
2022-05-20 15:47:04 +02:00
dec ( self . scopeDepth )
2022-05-30 09:29:03 +02:00
var names : seq [ Name ] = @ [ ]
for name in self . names :
if name . depth > self . scopeDepth :
names . add ( name )
2022-05-30 22:06:15 +02:00
if not self . enableOptimizations and not fromFunc :
2022-04-04 12:29:23 +02:00
# All variables with a scope depth larger than the current one
# are now out of scope. Begone, you're now homeless!
2022-06-02 01:33:56 +02:00
self . emitByte ( PopC )
2022-05-30 22:06:15 +02:00
if self . enableOptimizations and len ( names ) > 1 and not fromFunc :
2022-04-04 12:29:23 +02:00
# If we're popping less than 65535 variables, then
# we can emit a PopN instruction. This is true for
# 99.99999% of the use cases of the language (who the
# hell is going to use 65 THOUSAND local variables?), but
2022-05-04 14:27:15 +02:00
# if you'll ever use more then Peon will emit a PopN instruction
2022-04-04 12:29:23 +02:00
# for the first 65 thousand and change local variables and then
# emit another batch of plain ol' Pop instructions for the rest
2022-05-30 09:29:03 +02:00
self . emitByte ( PopN )
self . emitBytes ( len ( names ) . toDouble ( ) )
if len ( names ) > uint16 . high ( ) . int ( ) :
for i in countdown ( self . names . high ( ) , len ( names ) - uint16 . high ( ) . int ( ) ) :
2022-04-04 12:29:23 +02:00
if self . names [ i ] . depth > self . scopeDepth :
2022-06-02 01:33:56 +02:00
self . emitByte ( PopC )
2022-05-30 22:06:15 +02:00
elif len ( names ) = = 1 and not fromFunc :
2022-04-04 12:29:23 +02:00
# We only emit PopN if we're popping more than one value
2022-06-02 01:33:56 +02:00
self . emitByte ( PopC )
2022-05-30 09:29:03 +02:00
# This seems *really* slow, but
# what else should I do? Nim doesn't
# allow the removal of items during
# seq iteration so ¯\_(ツ)_/¯
2022-06-02 01:33:56 +02:00
if deleteNames :
var idx = 0
while idx < self . names . len ( ) :
for name in names :
if self . names [ idx ] = = name :
self . names . delete ( idx )
inc ( idx )
idx = 0
while idx < self . closedOver . len ( ) :
for name in names :
if name . isClosedOver :
self . closedOver . delete ( idx )
self . emitByte ( PopClosure )
inc ( idx )
2022-04-04 12:29:23 +02:00
proc blockStmt ( self : Compiler , node : BlockStmt ) =
## Compiles block statements, which create a new
## local scope.
self . beginScope ( )
for decl in node . code :
self . declaration ( decl )
self . endScope ( )
proc ifStmt ( self : Compiler , node : IfStmt ) =
## Compiles if/else statements for conditional
## execution of code
2022-06-02 12:05:22 +02:00
var cond = self . inferType ( node . condition )
if not self . compareTypes ( cond , Type ( kind : Bool ) ) :
2022-06-13 15:04:53 +02:00
if cond . isNil ( ) :
2022-06-02 12:05:22 +02:00
if node . condition . kind = = identExpr :
self . error ( & " reference to undeclared identifier ' {IdentExpr(node.condition).name.lexeme} ' " )
elif node . condition . kind = = callExpr and CallExpr ( node . condition ) . callee . kind = = identExpr :
self . error ( & " reference to undeclared identifier ' {IdentExpr(CallExpr(node.condition).callee).name.lexeme} ' " )
else :
self . error ( & " expecting value of type ' bool ' , but expression has no type " )
else :
self . error ( & " expecting value of type ' bool ' , got ' {self.typeToStr(cond)} ' instead " )
2022-04-04 12:29:23 +02:00
self . expression ( node . condition )
var jumpCode : OpCode
if self . enableOptimizations :
jumpCode = JumpIfFalsePop
else :
jumpCode = JumpIfFalse
let jump = self . emitJump ( jumpCode )
if not self . enableOptimizations :
self . emitByte ( Pop )
self . statement ( node . thenBranch )
2022-06-02 12:19:18 +02:00
let jump2 = self . emitJump ( JumpForwards )
2022-04-04 12:29:23 +02:00
self . patchJump ( jump )
2022-06-13 15:04:53 +02:00
if not node . elseBranch . isNil ( ) :
2022-04-04 12:29:23 +02:00
self . statement ( node . elseBranch )
2022-06-02 12:19:18 +02:00
self . patchJump ( jump2 )
2022-04-04 12:29:23 +02:00
proc emitLoop ( self : Compiler , begin : int ) =
## Emits a JumpBackwards instruction with the correct
## jump offset
2022-06-02 12:05:22 +02:00
var offset = self . chunk . code . len ( ) - begin + 4
if offset > 16777215 :
self . error ( " cannot jump more than 16777215 bytecode instructions " )
self . emitByte ( JumpBackwards )
self . emitBytes ( offset . toTriple ( ) )
2022-04-04 12:29:23 +02:00
proc whileStmt ( self : Compiler , node : WhileStmt ) =
2022-05-07 10:48:01 +02:00
## Compiles C-style while loops and
## desugared C-style for loops
2022-04-04 12:29:23 +02:00
let start = self . chunk . code . len ( )
self . expression ( node . condition )
2022-05-07 10:48:01 +02:00
var jump : int
if self . enableOptimizations :
jump = self . emitJump ( JumpIfFalsePop )
else :
jump = self . emitJump ( JumpIfFalse )
self . emitByte ( Pop )
2022-04-04 12:29:23 +02:00
self . statement ( node . body )
self . patchJump ( jump )
self . emitLoop ( start )
2022-06-08 16:07:08 +02:00
proc isPure ( self : Compiler , node : ASTNode ) : bool =
## Checks if a function has any side effects
var pragmas : seq [ Pragma ]
case node . kind :
of lambdaExpr :
pragmas = LambdaExpr ( node ) . pragmas
else :
pragmas = Declaration ( node ) . pragmas
if pragmas . len ( ) = = 0 :
return false
for pragma in pragmas :
if pragma . name . name . lexeme = = " pure " :
return true
return false
proc checkCallIsPure ( self : Compiler , node : ASTnode ) : bool =
## Checks if a call has any side effects
if not self . isPure ( node ) :
return true
var pragmas : seq [ Pragma ]
case node . kind :
of lambdaExpr :
pragmas = LambdaExpr ( node ) . pragmas
else :
pragmas = Declaration ( node ) . pragmas
if pragmas . len ( ) = = 0 :
return false
for pragma in pragmas :
if pragma . name . name . lexeme = = " pure " :
return true
return false
2022-06-13 15:04:53 +02:00
proc callExpr ( self : Compiler , node : CallExpr ) =
2022-05-30 09:29:03 +02:00
## Compiles code to call a function
var args : seq [ tuple [ name : string , kind : Type ] ] = @ [ ]
2022-05-30 22:06:15 +02:00
var argExpr : seq [ Expression ] = @ [ ]
2022-05-30 09:29:03 +02:00
var kind : Type
2022-06-19 14:44:14 +02:00
var strictMutable = true
2022-05-30 09:29:03 +02:00
# TODO: Keyword arguments
for i , argument in node . arguments . positionals :
kind = self . inferType ( argument )
2022-06-13 15:04:53 +02:00
if kind . isNil ( ) :
2022-05-30 09:29:03 +02:00
if argument . kind = = identExpr :
self . error ( & " reference to undeclared identifier ' {IdentExpr(argument).name.lexeme} ' " )
self . error ( & " cannot infer the type of argument {i + 1} in function call " )
2022-06-19 14:44:14 +02:00
if kind . mutable :
strictMutable = false
2022-05-30 09:29:03 +02:00
args . add ( ( " " , kind ) )
2022-05-30 22:06:15 +02:00
argExpr . add ( argument )
2022-05-30 09:29:03 +02:00
for argument in node . arguments . keyword :
discard
if args . len ( ) > = 16777216 :
self . error ( & " cannot pass more than 16777215 arguments " )
var funct : Name
case node . callee . kind :
of identExpr :
2022-06-19 14:44:14 +02:00
funct = self . matchImpl ( IdentExpr ( node . callee ) . name . lexeme , Type ( kind : Function , returnType : Type ( kind : Any ) , args : args ) , strictMutable )
2022-06-08 16:07:08 +02:00
of NodeKind . callExpr :
var node = node . callee
while node . kind = = callExpr :
2022-06-13 15:04:53 +02:00
self . callExpr ( CallExpr ( node ) )
2022-06-08 16:07:08 +02:00
node = CallExpr ( node ) . callee
2022-05-30 09:29:03 +02:00
else :
2022-06-13 15:04:53 +02:00
discard # TODO: Calling expressions
if not funct . isNil ( ) :
2022-06-14 23:34:42 +02:00
if funct . valueType . isBuiltinFunction :
2022-06-14 18:10:13 +02:00
self . handleBuiltinFunction ( funct , argExpr )
else :
2022-06-14 23:34:42 +02:00
self . generateCall ( funct , argExpr )
else :
self . generateObjCall ( argExpr )
2022-06-08 16:07:08 +02:00
if self . scopeDepth > 0 and not self . checkCallIsPure ( node . callee ) :
if not self . currentFunction . name . isNil ( ) :
self . error ( & " cannot make sure that calls to ' {self.currentFunction.name.token.lexeme} ' are side-effect free " )
else :
self . error ( & " cannot make sure that call is side-effect free " )
2022-05-30 09:29:03 +02:00
2022-05-04 14:27:15 +02:00
proc expression ( self : Compiler , node : Expression ) =
2022-04-04 12:29:23 +02:00
## Compiles all expressions
case node . kind :
2022-05-30 22:06:15 +02:00
of NodeKind . callExpr :
2022-06-13 15:04:53 +02:00
self . callExpr ( CallExpr ( node ) ) # TODO
2022-04-04 12:29:23 +02:00
of getItemExpr :
2022-06-07 11:23:08 +02:00
discard # TODO: Get rid of this
of pragmaExpr :
2022-05-18 13:32:32 +02:00
discard # TODO
2022-04-04 12:29:23 +02:00
# Note that for setItem and assign we don't convert
# the node to its true type because that type information
# would be lost in the call anyway. The differentiation
2022-05-07 10:48:01 +02:00
# happens in self.assignment()
2022-06-07 11:23:08 +02:00
of setItemExpr , assignExpr : # TODO: Get rid of this
2022-04-04 12:29:23 +02:00
self . assignment ( node )
of identExpr :
self . identifier ( IdentExpr ( node ) )
of unaryExpr :
# Unary expressions such as ~5 and -3
self . unary ( UnaryExpr ( node ) )
of groupingExpr :
# Grouping expressions like (2 + 1)
self . expression ( GroupingExpr ( node ) . expression )
of binaryExpr :
# Binary expressions such as 2 ^ 5 and 0.66 * 3.14
self . binary ( BinaryExpr ( node ) )
of intExpr , hexExpr , binExpr , octExpr , strExpr , falseExpr , trueExpr ,
2022-05-04 14:01:38 +02:00
infExpr , nanExpr , floatExpr , nilExpr :
2022-05-07 10:48:01 +02:00
# Since all of these AST nodes share the
# same overall structure and the kind
# field is enough to tell one from the
2022-05-18 13:32:32 +02:00
# other, why bother with specialized
2022-05-07 10:48:01 +02:00
# cases when one is enough?
2022-04-04 12:29:23 +02:00
self . literal ( node )
else :
self . error ( & " invalid AST node of kind {node.kind} at expression(): {node} (This is an internal error and most likely a bug) " )
proc awaitStmt ( self : Compiler , node : AwaitStmt ) =
## Compiles await statements. An await statement
## is like an await expression, but parsed in the
## context of statements for usage outside expressions,
## meaning it can be used standalone. It's basically the
## same as an await expression followed by a semicolon.
## Await expressions are the only native construct to
## run coroutines from within an already asynchronous
2022-04-26 09:29:59 +02:00
## context (which should be orchestrated by an event loop).
2022-04-04 12:29:23 +02:00
## They block in the caller until the callee returns
2022-04-07 13:02:23 +02:00
self . expression ( node . expression )
2022-04-04 12:29:23 +02:00
self . emitByte ( OpCode . Await )
proc deferStmt ( self : Compiler , node : DeferStmt ) =
## Compiles defer statements. A defer statement
2022-04-26 09:29:59 +02:00
## is executed right before its containing function
## exits (either because of a return or an exception)
2022-04-04 12:29:23 +02:00
let current = self . chunk . code . len
2022-04-07 13:02:23 +02:00
self . expression ( node . expression )
2022-04-04 12:29:23 +02:00
for i in countup ( current , self . chunk . code . high ( ) ) :
self . deferred . add ( self . chunk . code [ i ] )
2022-06-02 01:33:56 +02:00
self . chunk . code . delete ( i ) # TODO: Do not change bytecode size
proc endFunctionBeforeReturn ( self : Compiler ) =
## Emits code to clear a function's
## stack frame right before executing
## its return instruction
var popped = 0
for name in self . names :
if name . depth = = self . scopeDepth and name . valueType . kind ! = Function :
inc ( popped )
if self . enableOptimizations and popped > 1 :
self . emitByte ( PopN )
self . emitBytes ( popped . toDouble ( ) )
dec ( popped , uint16 . high ( ) . int )
while popped > 0 :
self . emitByte ( PopC )
dec ( popped )
2022-04-04 12:29:23 +02:00
proc returnStmt ( self : Compiler , node : ReturnStmt ) =
## Compiles return statements. An empty return
## implicitly returns nil
2022-06-07 11:23:08 +02:00
let actual = self . inferType ( node . value )
let expected = self . inferType ( self . currentFunction )
var comp : Type = actual
2022-05-16 19:23:38 +02:00
## Having the return type
2022-06-07 11:23:08 +02:00
if actual . isNil ( ) and not expected . returnType . isNil ( ) :
if not node . value . isNil ( ) :
2022-06-02 01:33:56 +02:00
if node . value . kind = = identExpr :
self . error ( & " reference to undeclared identifier ' {node.value.token.lexeme} ' " )
elif node . value . kind = = callExpr and CallExpr ( node . value ) . callee . kind = = identExpr :
self . error ( & " call to undeclared function ' {CallExpr(node.value).callee.token.lexeme} ' " )
2022-06-07 11:23:08 +02:00
self . error ( & " expected return value of type ' {self.typeToStr(expected.returnType)} ' , but expression has no type " )
elif expected . returnType . isNil ( ) and not actual . isNil ( ) :
2022-05-30 22:06:15 +02:00
self . error ( " non-empty return statement is not allowed in void functions " )
2022-06-07 11:23:08 +02:00
elif not self . compareTypes ( actual , comp ) :
self . error ( & " expected return value of type ' {self.typeToStr(comp)} ' , got ' {self.typeToStr(actual)} ' instead " )
if not node . value . isNil ( ) :
2022-05-16 19:23:38 +02:00
self . expression ( node . value )
2022-06-02 01:33:56 +02:00
self . emitByte ( OpCode . SetResult )
self . endFunctionBeforeReturn ( )
self . emitByte ( OpCode . Return )
2022-06-07 11:23:08 +02:00
if not node . value . isNil ( ) :
2022-06-02 01:33:56 +02:00
self . emitByte ( 1 )
2022-05-16 19:23:38 +02:00
else :
2022-06-02 01:33:56 +02:00
self . emitByte ( 0 )
2022-04-04 12:29:23 +02:00
proc yieldStmt ( self : Compiler , node : YieldStmt ) =
## Compiles yield statements
self . expression ( node . expression )
self . emitByte ( OpCode . Yield )
proc raiseStmt ( self : Compiler , node : RaiseStmt ) =
2022-06-07 11:23:08 +02:00
## Compiles raise statements
2022-04-04 12:29:23 +02:00
self . expression ( node . exception )
self . emitByte ( OpCode . Raise )
proc continueStmt ( self : Compiler , node : ContinueStmt ) =
## Compiles continue statements. A continue statements
## jumps to the next iteration in a loop
2022-06-02 12:05:22 +02:00
if self . currentLoop . start > 16777215 :
self . error ( " too much code to jump over in continue statement " )
self . emitByte ( Jump )
self . emitBytes ( self . currentLoop . start . toTriple ( ) )
2022-04-04 12:29:23 +02:00
proc breakStmt ( self : Compiler , node : BreakStmt ) =
## Compiles break statements. A continue statement
## jumps to the next iteration in a loop
# Emits dummy jump offset, this is
# patched later
2022-05-26 18:31:40 +02:00
self . currentLoop . breakPos . add ( self . emitJump ( OpCode . Jump ) )
2022-04-04 12:29:23 +02:00
if self . currentLoop . depth > self . scopeDepth :
# Breaking out of a loop closes its scope
self . endScope ( )
proc patchBreaks ( self : Compiler ) =
## Patches "break" opcodes with
## actual jumps. This is needed
## because the size of code
## to skip is not known before
## the loop is fully compiled
for brk in self . currentLoop . breakPos :
self . chunk . code [ brk ] = JumpForwards . uint8 ( )
self . patchJump ( brk )
proc assertStmt ( self : Compiler , node : AssertStmt ) =
## Compiles assert statements (raise
## AssertionError if the expression is falsey)
self . expression ( node . expression )
self . emitByte ( OpCode . Assert )
2022-06-07 11:23:08 +02:00
proc forEachStmt ( self : Compiler , node : ForEachStmt ) =
## Compiles foreach loops
# TODO
proc importStmt ( self : Compiler , node : ImportStmt ) =
## Imports a module at compile time
if self . scopeDepth > 0 :
self . error ( " import statements are only allowed at the top level " )
2022-06-14 22:45:32 +02:00
var lexer = newLexer ( )
var parser = newParser ( )
2022-06-07 11:23:08 +02:00
var compiler = newCompiler ( )
# TODO: Find module
2022-06-14 22:45:32 +02:00
var result = compiler . compile ( parser . parse ( lexer . lex ( " " , node . moduleName . name . lexeme ) , node . moduleName . name . lexeme ) , node . moduleName . name . lexeme )
2022-06-07 11:23:08 +02:00
2022-05-04 14:27:15 +02:00
proc statement ( self : Compiler , node : Statement ) =
2022-04-04 12:29:23 +02:00
## Compiles all statements
case node . kind :
of exprStmt :
2022-04-21 15:25:29 +02:00
var expression = ExprStmt ( node ) . expression
self . expression ( expression )
2022-06-13 15:04:53 +02:00
if expression . kind = = callExpr and self . inferType ( CallExpr ( expression ) . callee ) . returnType . isNil ( ) :
2022-06-02 01:33:56 +02:00
# The expression has no type, so we don't have to
# pop anything
discard
2022-05-30 22:06:15 +02:00
else :
2022-06-13 15:04:53 +02:00
if self . replMode :
2022-06-02 01:33:56 +02:00
self . emitByte ( PopRepl )
else :
self . emitByte ( Pop )
2022-04-04 12:29:23 +02:00
of NodeKind . ifStmt :
self . ifStmt ( IfStmt ( node ) )
of NodeKind . assertStmt :
self . assertStmt ( AssertStmt ( node ) )
of NodeKind . raiseStmt :
self . raiseStmt ( RaiseStmt ( node ) )
of NodeKind . breakStmt :
self . breakStmt ( BreakStmt ( node ) )
of NodeKind . continueStmt :
self . continueStmt ( ContinueStmt ( node ) )
of NodeKind . returnStmt :
self . returnStmt ( ReturnStmt ( node ) )
of NodeKind . importStmt :
2022-06-07 11:23:08 +02:00
self . importStmt ( ImportStmt ( node ) )
2022-04-04 12:29:23 +02:00
of NodeKind . whileStmt , NodeKind . forStmt :
## Our parser already desugars for loops to
## while loops!
let loop = self . currentLoop
self . currentLoop = Loop ( start : self . chunk . code . len ( ) ,
2022-05-07 10:48:01 +02:00
depth : self . scopeDepth , breakPos : @ [ ] )
2022-04-04 12:29:23 +02:00
self . whileStmt ( WhileStmt ( node ) )
self . patchBreaks ( )
self . currentLoop = loop
of NodeKind . forEachStmt :
2022-06-07 11:23:08 +02:00
self . forEachStmt ( ForEachStmt ( node ) )
2022-04-04 12:29:23 +02:00
of NodeKind . blockStmt :
self . blockStmt ( BlockStmt ( node ) )
of NodeKind . yieldStmt :
self . yieldStmt ( YieldStmt ( node ) )
of NodeKind . awaitStmt :
self . awaitStmt ( AwaitStmt ( node ) )
of NodeKind . deferStmt :
self . deferStmt ( DeferStmt ( node ) )
of NodeKind . tryStmt :
discard
else :
2022-05-04 14:27:15 +02:00
self . expression ( Expression ( node ) )
2022-04-04 12:29:23 +02:00
2022-04-12 12:18:25 +02:00
proc varDecl ( self : Compiler , node : VarDecl ) =
## Compiles variable declarations
2022-05-29 17:04:19 +02:00
let expected = self . inferType ( node . valueType )
let actual = self . inferType ( node . value )
2022-06-13 15:04:53 +02:00
if expected . isNil ( ) and actual . isNil ( ) :
2022-06-13 17:28:05 +02:00
if node . value . kind = = identExpr or node . value . kind = = callExpr and CallExpr ( node . value ) . callee . kind = = identExpr :
var name = node . value . token . lexeme
if node . value . kind = = callExpr :
name = CallExpr ( node . value ) . callee . token . lexeme
self . error ( & " reference to undeclared identifier ' {name} ' " )
2022-05-25 12:15:45 +02:00
self . error ( & " ' {node.name.token.lexeme} ' has no type " )
2022-06-19 14:44:14 +02:00
elif not expected . isNil ( ) and expected . mutable : # I mean, variables *are* already mutable (some of them anyway)
2022-05-29 17:04:19 +02:00
self . error ( & " invalid type ' {self.typeToStr(expected)} ' for var " )
elif not self . compareTypes ( expected , actual ) :
2022-06-13 15:04:53 +02:00
if not expected . isNil ( ) :
2022-05-29 17:04:19 +02:00
self . error ( & " expected value of type ' {self.typeToStr(expected)} ' , but ' {node.name.token.lexeme} ' is of type ' {self.typeToStr(actual)} ' " )
2022-04-12 12:18:25 +02:00
self . expression ( node . value )
2022-06-19 14:44:14 +02:00
self . declareName ( node , mutable = node . token . kind = = TokenType . Var )
2022-06-02 01:33:56 +02:00
self . emitByte ( StoreVar )
2022-06-13 17:28:05 +02:00
self . emitBytes ( self . names . len ( ) . toTriple ( ) )
2022-04-12 12:18:25 +02:00
2022-06-07 11:23:08 +02:00
proc typeDecl ( self : Compiler , node : TypeDecl ) =
## Compiles type declarations
2022-06-13 17:28:05 +02:00
# TODO
2022-06-07 11:23:08 +02:00
2022-06-14 12:12:56 +02:00
proc handleMagicPragma ( self : Compiler , pragma : Pragma , node : ASTNode ) =
2022-06-14 18:10:13 +02:00
## Handles the "magic" pragma. Assumes the given name is already
## declared
if pragma . args . len ( ) ! = 1 :
self . error ( " ' magic ' pragma: wrong number of arguments " )
elif pragma . args [ 0 ] . kind ! = strExpr :
self . error ( " ' magic ' pragma: wrong type of argument (string expected) " )
elif node . kind ! = NodeKind . funDecl :
self . error ( " ' magic ' pragma is not valid in this context " )
var node = FunDecl ( node )
var fn = self . resolve ( node . name )
2022-06-14 23:34:42 +02:00
fn . valueType . isBuiltinFunction = true
fn . valueType . builtinOp = pragma . args [ 0 ] . token . lexeme [ 1 .. ^ 2 ]
2022-06-14 12:12:56 +02:00
proc handlePurePragma ( self : Compiler , pragma : Pragma , node : ASTNode ) =
## Handles the "pure" pragma
2022-06-14 18:10:13 +02:00
case node . kind :
of funDecl :
FunDecl ( node ) . isPure = true
of lambdaExpr :
LambdaExpr ( node ) . isPure = true
else :
self . error ( " ' pure ' pragma: invalid usage " )
proc dispatchPragmas ( self : Compiler , node : ASTnode ) =
## Dispatches pragmas bound to objects
var pragmas : seq [ Pragma ] = @ [ ]
case node . kind :
of funDecl , NodeKind . typeDecl , NodeKind . varDecl :
pragmas = Declaration ( node ) . pragmas
of lambdaExpr :
pragmas = LambdaExpr ( node ) . pragmas
else :
discard # Unreachable
for pragma in pragmas :
if pragma . name . token . lexeme notin self . compilerProcs :
self . error ( & " unknown pragma ' {pragma.name.token.lexeme} ' " )
self . compilerProcs [ pragma . name . token . lexeme ] ( self , pragma , node )
2022-06-14 12:12:56 +02:00
2022-04-04 12:29:23 +02:00
proc funDecl ( self : Compiler , node : FunDecl ) =
## Compiles function declarations
2022-05-22 15:26:12 +02:00
var function = self . currentFunction
2022-05-07 10:48:01 +02:00
self . declareName ( node )
2022-06-20 09:39:54 +02:00
if node . generics . len ( ) < 0 :
2022-06-19 14:44:14 +02:00
# We can't know the type of
# generic arguments yet, so
# we wait for the function to
# be called to compile its code
# or dispatch any pragmas. We
# still declare its name so that
# it can be assigned to variables
# and passed to functions
return
2022-06-14 18:10:13 +02:00
self . dispatchPragmas ( node )
2022-06-02 01:33:56 +02:00
let fn = self . names [ ^ ( node . arguments . len ( ) + 1 ) ]
2022-06-14 18:10:13 +02:00
var jmp : int
2022-06-14 23:34:42 +02:00
if not fn . valueType . isBuiltinFunction :
2022-06-14 18:10:13 +02:00
self . frames . add ( self . names . high ( ) )
# A function's code is just compiled linearly
# and then jumped over
jmp = self . emitJump ( JumpForwards )
# Function's code starts after the jump
fn . codePos = self . chunk . code . len ( )
for argument in node . arguments :
# Pops off the operand stack onto the
# call stack
self . emitByte ( LoadArgument )
2022-06-07 11:23:08 +02:00
if not node . returnType . isNil ( ) and self . inferType ( node . returnType ) . isNil ( ) :
2022-06-13 17:28:05 +02:00
# Are we returning a generic type?
2022-06-07 11:23:08 +02:00
var isGeneric = false
if node . returnType . kind = = identExpr :
let name = IdentExpr ( node . returnType )
for g in node . generics :
if name = = g . name :
2022-06-13 17:28:05 +02:00
# Yep!
2022-06-07 11:23:08 +02:00
isGeneric = true
break
if not isGeneric :
2022-06-13 17:28:05 +02:00
# Nope
2022-06-07 11:23:08 +02:00
self . error ( & " cannot infer the type of ' {node.returnType.token.lexeme} ' " )
2022-05-22 15:26:12 +02:00
# TODO: Forward declarations
2022-06-13 15:04:53 +02:00
if not node . body . isNil ( ) :
2022-06-14 23:34:42 +02:00
if BlockStmt ( node . body ) . code . len ( ) = = 0 and not fn . valueType . isBuiltinFunction :
2022-06-14 18:10:13 +02:00
self . error ( " cannot declare function with empty body " )
2022-05-07 10:48:01 +02:00
let fnType = self . inferType ( node )
let impl = self . findByType ( node . name . token . lexeme , fnType )
if impl . len ( ) > 1 :
# Oh-oh! We found more than one implementation of
2022-05-25 12:15:45 +02:00
# the same function with the same name! Error!
2022-05-07 10:48:01 +02:00
var msg = & " multiple matching implementations of ' {node.name.token.lexeme} ' found: \n "
for fn in reversed ( impl ) :
2022-05-30 12:31:15 +02:00
msg & = & " - ' {fn.name.token.lexeme} ' at line {fn.line} of type {self.typeToStr(fn.valueType)} \n "
2022-05-07 10:48:01 +02:00
self . error ( msg )
2022-05-22 15:26:12 +02:00
# We store the current function
self . currentFunction = node
2022-06-14 23:34:42 +02:00
if not fn . valueType . isBuiltinFunction :
2022-06-14 18:10:13 +02:00
# Since the deferred array is a linear
# sequence of instructions and we want
# to keep track to whose function's each
# set of deferred instruction belongs,
# we record the length of the deferred
# array before compiling the function
# and use this info later to compile
# the try/finally block with the deferred
# code
var deferStart = self . deferred . len ( )
# We let our debugger know a function is starting
let start = self . chunk . code . high ( )
self . beginScope ( )
for decl in BlockStmt ( node . body ) . code :
self . declaration ( decl )
var typ : Type
var hasVal : bool = false
case self . currentFunction . kind :
of NodeKind . funDecl :
typ = self . inferType ( self . currentFunction )
hasVal = self . currentFunction . hasExplicitReturn
of NodeKind . lambdaExpr :
typ = self . inferType ( LambdaExpr ( Declaration ( self . currentFunction ) ) )
hasVal = LambdaExpr ( Declaration ( self . currentFunction ) ) . hasExplicitReturn
else :
discard # Unreachable
if hasVal and self . currentFunction . returnType . isNil ( ) and not typ . returnType . isNil ( ) :
self . error ( " non-empty return statement is not allowed in void functions " )
elif not hasVal and not self . currentFunction . returnType . isNil ( ) :
self . error ( " function has an explicit return type, but no return statement was found " )
self . endFunctionBeforeReturn ( )
hasVal = hasVal and not typ . returnType . isNil ( )
self . endScope ( deleteNames = true , fromFunc = true )
# Terminates the function's context
self . emitByte ( OpCode . Return )
if hasVal :
self . emitByte ( 1 )
2022-05-22 15:26:12 +02:00
else :
2022-06-14 18:10:13 +02:00
self . emitByte ( 0 )
# Function is ending!
self . chunk . cfi . add ( start . toTriple ( ) )
self . chunk . cfi . add ( self . chunk . code . high ( ) . toTriple ( ) )
self . chunk . cfi . add ( self . frames [ ^ 1 ] . toTriple ( ) )
self . chunk . cfi . add ( uint8 ( node . arguments . len ( ) ) )
if not node . name . isNil ( ) :
self . chunk . cfi . add ( node . name . token . lexeme . len ( ) . toDouble ( ) )
var s = node . name . token . lexeme
if node . name . token . lexeme . len ( ) > = uint16 . high ( ) . int :
s = node . name . token . lexeme [ 0 .. uint16 . high ( ) ]
self . chunk . cfi . add ( s . toBytes ( ) )
else :
self . chunk . cfi . add ( 0 . toDouble ( ) )
# Currently defer is not functional so we
# just pop the instructions
for i in countup ( deferStart , self . deferred . len ( ) - 1 , 1 ) :
self . deferred . delete ( i )
self . patchJump ( jmp )
# This makes us compile nested functions correctly
discard self . frames . pop ( )
2022-04-04 12:29:23 +02:00
self . currentFunction = function
2022-06-02 01:33:56 +02:00
proc patchReturnAddress ( self : Compiler , pos : int ) =
2022-05-22 17:23:52 +02:00
## Patches the return address of a function
2022-06-02 01:33:56 +02:00
## call
2022-05-22 17:23:52 +02:00
let address = self . chunk . code . len ( ) . toQuad ( )
2022-06-02 01:33:56 +02:00
self . chunk . code [ pos ] = address [ 0 ]
self . chunk . code [ pos + 1 ] = address [ 1 ]
self . chunk . code [ pos + 2 ] = address [ 2 ]
self . chunk . code [ pos + 3 ] = address [ 3 ]
2022-05-22 17:23:52 +02:00
2022-05-04 14:27:15 +02:00
proc declaration ( self : Compiler , node : Declaration ) =
2022-04-04 12:29:23 +02:00
## Compiles all declarations
case node . kind :
of NodeKind . varDecl :
self . varDecl ( VarDecl ( node ) )
of NodeKind . funDecl :
self . funDecl ( FunDecl ( node ) )
2022-06-07 11:23:08 +02:00
of NodeKind . typeDecl :
self . typeDecl ( TypeDecl ( node ) )
2022-04-04 12:29:23 +02:00
else :
2022-05-04 14:27:15 +02:00
self . statement ( Statement ( node ) )
2022-04-04 12:29:23 +02:00
2022-05-18 13:32:32 +02:00
proc compile * ( self : Compiler , ast : seq [ Declaration ] , file : string ) : Chunk =
2022-04-04 12:29:23 +02:00
## Compiles a sequence of AST nodes into a chunk
## object
self . chunk = newChunk ( )
self . ast = ast
self . file = file
self . names = @ [ ]
self . scopeDepth = 0
self . currentFunction = nil
2022-05-22 13:02:48 +02:00
self . currentModule = self . file . extractFilename ( )
2022-04-04 12:29:23 +02:00
self . current = 0
2022-06-02 01:33:56 +02:00
# Every peon program has a hidden entry point in
# which user code is wrapped. Think of it as if
# peon is implicitly writing the main() function
# of your program and putting all of your code in
# there. While we call our entry point just like
# any regular peon function, we can't use our handy
# helper generateCall() because we need to keep track
# of where our program ends (which we don't know yet).
# To fix this, we emit dummy offsets and patch them
# later, once we know the boundaries of our hidden main()
var main = Name ( depth : 0 ,
isPrivate : true ,
isConst : false ,
isLet : false ,
isClosedOver : false ,
owner : self . currentModule ,
valueType : Type ( kind : Function ,
name : " " ,
returnType : nil ,
args : @ [ ] ) ,
2022-06-02 12:19:18 +02:00
codePos : 13 , # Jump address is hardcoded
2022-06-02 01:33:56 +02:00
name : newIdentExpr ( Token ( lexeme : " " , kind : Identifier ) ) ,
line : - 1 )
self . names . add ( main )
self . emitByte ( LoadFunction )
self . emitBytes ( main . codePos . toTriple ( ) )
self . emitByte ( LoadReturnAddress )
let pos = self . chunk . code . len ( )
self . emitBytes ( 0 . toQuad ( ) )
self . emitByte ( Call )
self . emitBytes ( 2 . toTriple ( ) )
2022-04-04 12:29:23 +02:00
while not self . done ( ) :
2022-05-04 14:27:15 +02:00
self . declaration ( Declaration ( self . step ( ) ) )
2022-06-02 01:33:56 +02:00
self . endScope ( fromFunc = true )
self . patchReturnAddress ( pos )
self . emitByte ( OpCode . Return )
self . emitByte ( 0 )
2022-04-04 12:29:23 +02:00
result = self . chunk