2022-04-04 12:29:23 +02:00
# Copyright 2022 Mattia Giambirtone & All Contributors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import meta / token
import meta / ast
import meta / errors
import .. / util / multibyte
2022-08-14 18:37:06 +02:00
import .. / util / symbols
2022-06-14 22:45:32 +02:00
import lexer as l
import parser as p
2022-10-17 11:28:00 +02:00
import .. / config
2022-04-04 12:29:23 +02:00
2022-08-17 19:31:27 +02:00
import std / tables
import std / strformat
import std / algorithm
import std / parseutils
import std / strutils
import std / sequtils
import std / sets
import std / os
2022-04-04 12:29:23 +02:00
export ast
export token
export multibyte
2022-05-22 15:26:12 +02:00
2022-05-07 10:48:01 +02:00
type
2022-05-29 15:54:01 +02:00
TypeKind = enum
2022-05-07 10:48:01 +02:00
## An enumeration of compile-time
## types
Int8 , UInt8 , Int16 , UInt16 , Int32 ,
UInt32 , Int64 , UInt64 , Float32 , Float64 ,
Char , Byte , String , Function , CustomType ,
2022-06-14 23:34:42 +02:00
Nil , Nan , Bool , Inf , Typevar , Generic ,
2022-06-19 14:44:14 +02:00
Reference , Pointer
2022-05-24 09:55:08 +02:00
Any # Any is used internally in a few cases,
# for example when looking for operators
# when only the type of the arguments is of
# interest
2022-05-29 15:54:01 +02:00
Type = ref object
2022-05-07 10:48:01 +02:00
## A wrapper around
## compile-time types
2022-06-19 14:44:14 +02:00
mutable : bool
2022-05-29 15:54:01 +02:00
case kind : TypeKind :
2022-05-07 10:48:01 +02:00
of Function :
2022-05-27 14:01:57 +02:00
isLambda : bool
2022-05-29 17:04:19 +02:00
isGenerator : bool
isCoroutine : bool
2022-10-13 13:12:24 +02:00
isGeneric : bool
2022-05-29 15:54:01 +02:00
args : seq [ tuple [ name : string , kind : Type ] ]
returnType : Type
2022-06-14 23:34:42 +02:00
isBuiltinFunction : bool
builtinOp : string
2022-06-21 20:18:53 +02:00
fun : FunDecl
2022-08-19 10:45:07 +02:00
isClosure : bool
2022-08-30 12:55:14 +02:00
envLen : int
children : seq [ Type ]
2022-10-06 09:57:19 +02:00
parent : Type
2022-10-13 13:12:24 +02:00
retJumps : seq [ int ]
of CustomType :
fields : TableRef [ string , Type ]
2022-06-19 14:44:14 +02:00
of Reference , Pointer :
2022-05-29 15:54:01 +02:00
value : Type
2022-06-19 14:44:14 +02:00
of Generic :
2022-10-13 13:12:24 +02:00
# cond represents a type constraint. For
# example, fn foo[T: int & !uint](...) {...}
# would map to [(true, int), (false, uint)]
cond : seq [ tuple [ match : bool , kind : Type ] ]
name : string
2022-05-07 10:48:01 +02:00
else :
discard
2022-05-22 13:02:48 +02:00
2022-05-07 10:48:01 +02:00
# This way we don't have recursive dependency issues
import meta / bytecode
export bytecode
2022-04-04 12:29:23 +02:00
type
2022-10-13 13:12:24 +02:00
NameKind {. pure . } = enum
## A name enumeration type
2022-10-17 11:28:00 +02:00
None , Module , Argument , Var , Function , CustomType , Enum
2022-04-04 12:29:23 +02:00
Name = ref object
## A compile-time wrapper around
2022-05-04 14:27:15 +02:00
## statically resolved names
2022-05-07 10:48:01 +02:00
2022-10-17 11:28:00 +02:00
# The name's identifier
ident : IdentExpr
2022-10-13 13:12:24 +02:00
# Type of the identifier (NOT of the value!)
kind : NameKind
2022-05-07 10:48:01 +02:00
# Owner of the identifier (module)
owner : string
# Scope depth
depth : int
# Is this name private?
isPrivate : bool
# Is this a constant?
isConst : bool
# Can this name's value be mutated?
isLet : bool
# The name's type
valueType : Type
# For functions, this marks where the function's
2022-08-30 12:55:14 +02:00
# code begins
2022-05-07 10:48:01 +02:00
codePos : int
2022-08-19 10:45:07 +02:00
# The function that owns this variable (may be nil!)
belongsTo : Name
2022-05-29 15:54:01 +02:00
# Where is this node declared in the file?
line : int
2022-08-30 12:55:14 +02:00
# Has this name been closed over?
isClosedOver : bool
2022-10-13 13:12:24 +02:00
# Has this name been referenced at least once?
resolved : bool
# The AST node associated with this node. This
# is needed because we compile declarations only
# if they're actually used
node : Declaration
2022-10-17 11:28:00 +02:00
# Is this name exported? (Only makes sense if isPrivate
# equals false)
exported : bool
2022-04-04 12:29:23 +02:00
Loop = object
## A "loop object" used
## by the compiler to emit
## appropriate jump offsets
## for continue and break
## statements
2022-05-07 10:48:01 +02:00
# Position in the bytecode where the loop starts
start : int
# Scope depth where the loop is located
depth : int
2022-10-17 11:28:00 +02:00
# Jump offsets into our bytecode that we need to
2022-05-07 10:48:01 +02:00
# patch. Used for break statements
2022-10-13 13:12:24 +02:00
breakJumps : seq [ int ]
2022-04-04 12:29:23 +02:00
Compiler * = ref object
2022-05-04 14:27:15 +02:00
## A wrapper around the Peon compiler's state
2022-04-04 12:29:23 +02:00
# The bytecode chunk where we write code to
chunk : Chunk
# The output of our parser (AST)
2022-05-18 13:32:32 +02:00
ast : seq [ Declaration ]
2022-04-04 12:29:23 +02:00
# The current AST node we're looking at
current : int
# The current file being compiled (used only for
# error reporting)
file : string
# Compile-time "simulation" of the stack at
# runtime to load variables that have stack
# behavior more efficiently
names : seq [ Name ]
# The current scope depth. If > 0, we're
# in a local scope, otherwise it's global
scopeDepth : int
2022-10-08 15:48:26 +02:00
# Scope ownership data
scopeOwners : seq [ tuple [ owner : Name , depth : int ] ]
2022-04-04 12:29:23 +02:00
# The current function being compiled
2022-08-19 10:45:07 +02:00
currentFunction : Name
2022-04-04 12:29:23 +02:00
# The current loop being compiled (used to
# keep track of where to jump)
currentLoop : Loop
2022-05-30 22:06:15 +02:00
# Are we in REPL mode? If so, Pop instructions
2022-06-14 12:12:56 +02:00
# for expression statements at the top level are
# swapped for a special PopRepl instruction that
# prints the result of the expression once it is
# evaluated
2022-05-30 22:06:15 +02:00
replMode : bool
2022-04-04 12:29:23 +02:00
# The current module being compiled
# (used to restrict access to statically
# defined variables at compile time)
currentModule : string
# Each time a defer statement is
# compiled, its code is emitted
# here. Later, if there is any code
# to defer in the current function,
# funDecl will wrap the function's code
# inside an implicit try/finally block
# and add this code in the finally branch.
# This sequence is emptied each time a
2022-05-20 15:47:04 +02:00
# function declaration is compiled and stores only
2022-04-04 12:29:23 +02:00
# deferred code for the current function (may
# be empty)
deferred : seq [ uint8 ]
# List of closed-over variables
2022-11-02 12:03:14 +01:00
closures : seq [ Name ]
2022-06-14 12:12:56 +02:00
# Compiler procedures called by pragmas
2022-10-13 13:12:24 +02:00
compilerProcs : TableRef [ string , proc ( self : Compiler , pragma : Pragma , node : ASTNode , name : Name ) ]
2022-10-17 11:28:00 +02:00
# Stores line data for error reporting
2022-08-15 11:46:24 +02:00
lines : seq [ tuple [ start , stop : int ] ]
2022-10-17 11:28:00 +02:00
# The source of the current module,
# used for error reporting
2022-08-15 11:46:24 +02:00
source : string
2022-08-16 12:20:07 +02:00
# Currently imported modules
modules : HashSet [ string ]
2022-08-30 12:55:14 +02:00
# Stores the position of all jumps
jumps : seq [ tuple [ patched : bool , offset : int ] ]
# List of CFI start offsets into our CFI data
2022-10-24 13:53:27 +02:00
cfiOffsets : seq [ tuple [ start , stop , pos : int , fn : Name ] ]
2022-10-17 11:28:00 +02:00
# We store these objects to compile modules
lexer : Lexer
parser : Parser
# Are we compiling the main module?
isMainModule : bool
2022-08-15 11:46:24 +02:00
CompileError * = ref object of PeonException
compiler * : Compiler
node * : ASTNode
file * : string
module * : string
2022-04-04 12:29:23 +02:00
2022-08-30 12:55:14 +02:00
# Forward declarations
2022-08-15 11:46:24 +02:00
proc compile * ( self : Compiler , ast : seq [ Declaration ] , file : string , lines : seq [ tuple [ start , stop : int ] ] , source : string , chunk : Chunk = nil ,
2022-10-17 11:28:00 +02:00
incremental : bool = false , isMainModule : bool = true ) : Chunk
2022-05-04 14:27:15 +02:00
proc expression ( self : Compiler , node : Expression )
proc statement ( self : Compiler , node : Statement )
proc declaration ( self : Compiler , node : Declaration )
2022-04-04 12:29:23 +02:00
proc peek ( self : Compiler , distance : int = 0 ) : ASTNode
2022-04-21 15:25:29 +02:00
proc identifier ( self : Compiler , node : IdentExpr )
2022-10-13 13:12:24 +02:00
proc varDecl ( self : Compiler , node : VarDecl , name : Name )
2022-10-17 11:28:00 +02:00
proc specialize ( self : Compiler , name : Name , args : seq [ Expression ] ) : Name
2022-10-13 13:12:24 +02:00
proc matchImpl ( self : Compiler , name : string , kind : Type , node : ASTNode = nil ) : Name
2022-11-02 12:03:14 +01:00
proc infer ( self : Compiler , node : LiteralExpr , allowGeneric : bool = false ) : Type
proc infer ( self : Compiler , node : Expression , allowGeneric : bool = false ) : Type
proc inferOrError [ T : LiteralExpr | Expression ] ( self : Compiler , node : T , allowGeneric : bool = false ) : Type
2022-05-22 13:02:48 +02:00
proc findByName ( self : Compiler , name : string ) : seq [ Name ]
2022-10-17 11:28:00 +02:00
proc findByModule ( self : Compiler , name : string ) : seq [ Name ]
2022-07-31 16:09:22 +02:00
proc findByType ( self : Compiler , name : string , kind : Type , depth : int = - 1 ) : seq [ Name ]
2022-10-11 10:01:01 +02:00
proc compare ( self : Compiler , a , b : Type ) : bool
2022-06-02 01:33:56 +02:00
proc patchReturnAddress ( self : Compiler , pos : int )
2022-10-13 13:12:24 +02:00
proc handleMagicPragma ( self : Compiler , pragma : Pragma , node : ASTnode , name : Name )
proc handlePurePragma ( self : Compiler , pragma : Pragma , node : ASTnode , name : Name )
proc dispatchPragmas ( self : Compiler , node : ASTnode , name : Name )
proc funDecl ( self : Compiler , node : FunDecl , name : Name )
proc typeDecl ( self : Compiler , node : TypeDecl , name : Name )
2022-10-17 11:28:00 +02:00
proc compileModule ( self : Compiler , moduleName : string )
2022-10-13 13:12:24 +02:00
proc generateCall ( self : Compiler , fn : Name , args : seq [ Expression ] , line : int )
2022-08-30 12:55:14 +02:00
# End of forward declarations
2022-04-04 12:29:23 +02:00
2022-06-14 12:12:56 +02:00
2022-10-17 11:28:00 +02:00
proc newCompiler * ( replMode : bool = false ) : Compiler =
2022-06-14 12:12:56 +02:00
## Initializes a new Compiler object
new ( result )
result . ast = @ [ ]
result . current = 0
result . file = " "
result . names = @ [ ]
result . scopeDepth = 0
2022-08-15 11:46:24 +02:00
result . lines = @ [ ]
2022-08-30 12:55:14 +02:00
result . jumps = @ [ ]
2022-06-14 12:12:56 +02:00
result . currentFunction = nil
result . replMode = replMode
result . currentModule = " "
2022-10-13 13:12:24 +02:00
result . compilerProcs = newTable [ string , proc ( self : Compiler , pragma : Pragma , node : ASTNode , name : Name ) ] ( )
2022-06-14 12:12:56 +02:00
result . compilerProcs [ " magic " ] = handleMagicPragma
result . compilerProcs [ " pure " ] = handlePurePragma
2022-08-15 11:46:24 +02:00
result . source = " "
2022-10-08 15:48:26 +02:00
result . scopeOwners = @ [ ]
2022-10-17 11:28:00 +02:00
result . lexer = newLexer ( )
result . lexer . fillSymbolTable ( )
result . parser = newParser ( )
result . isMainModule = false
2022-11-02 12:03:14 +01:00
result . closures = @ [ ]
2022-10-08 15:48:26 +02:00
2022-06-14 12:12:56 +02:00
2022-08-30 12:55:14 +02:00
## Public getters for nicer error formatting
2022-04-04 12:29:23 +02:00
proc getCurrentNode * ( self : Compiler ) : ASTNode = ( if self . current > =
self . ast . len ( ) : self . ast [ ^ 1 ] else : self . ast [ self . current - 1 ] )
2022-08-30 12:55:14 +02:00
proc getCurrentFunction * ( self : Compiler ) : Declaration {. inline . } = ( if self . currentFunction . isNil ( ) : nil else : self . currentFunction . valueType . fun )
2022-05-26 18:31:40 +02:00
proc getFile * ( self : Compiler ) : string {. inline . } = self . file
proc getModule * ( self : Compiler ) : string {. inline . } = self . currentModule
2022-08-15 11:46:24 +02:00
proc getLines * ( self : Compiler ) : seq [ tuple [ start , stop : int ] ] = self . lines
proc getSource * ( self : Compiler ) : string = self . source
proc getRelPos * ( self : Compiler , line : int ) : tuple [ start , stop : int ] = self . lines [ line - 1 ]
2022-04-04 12:29:23 +02:00
## Utility functions
2022-08-14 18:37:06 +02:00
2022-08-30 12:55:14 +02:00
proc `$` * ( self : Name ) : string = $ self [ ]
2022-10-13 13:12:24 +02:00
proc `$` ( self : Type ) : string = $ self [ ]
2022-08-30 12:55:14 +02:00
2022-04-04 12:29:23 +02:00
proc peek ( self : Compiler , distance : int = 0 ) : ASTNode =
## Peeks at the AST node at the given distance.
## If the distance is out of bounds, the last
## AST node in the tree is returned. A negative
## distance may be used to retrieve previously
## consumed AST nodes
2022-06-14 12:12:56 +02:00
if self . ast . high ( ) = = - 1 or self . current + distance > self . ast . high ( ) or self . current + distance < 0 :
2022-04-04 12:29:23 +02:00
result = self . ast [ ^ 1 ]
else :
result = self . ast [ self . current + distance ]
2022-06-14 12:12:56 +02:00
proc done ( self : Compiler ) : bool {. inline . } =
2022-04-04 12:29:23 +02:00
## Returns true if the compiler is done
## compiling, false otherwise
result = self . current > self . ast . high ( )
2022-08-01 11:35:45 +02:00
proc error ( self : Compiler , message : string , node : ASTNode = nil ) {. raises : [ CompileError ] , inline . } =
2022-05-22 11:49:38 +02:00
## Raises a CompileError exception
2022-08-15 11:46:24 +02:00
raise CompileError ( msg : message , node : if node . isNil ( ) : self . getCurrentNode ( ) else : node , file : self . file , module : self . currentModule , compiler : self )
2022-04-04 12:29:23 +02:00
2022-06-14 12:12:56 +02:00
proc step ( self : Compiler ) : ASTNode {. inline . } =
2022-04-04 12:29:23 +02:00
## Steps to the next node and returns
## the consumed one
result = self . peek ( )
if not self . done ( ) :
self . current + = 1
2022-08-30 12:55:14 +02:00
proc emitByte ( self : Compiler , byt : OpCode | uint8 , line : int ) {. inline . } =
2022-04-04 12:29:23 +02:00
## Emits a single byte, writing it to
## the current chunk being compiled
2022-08-30 12:55:14 +02:00
self . chunk . write ( uint8 byt , line )
2022-04-04 12:29:23 +02:00
2022-08-30 12:55:14 +02:00
proc emitBytes ( self : Compiler , bytarr : openarray [ OpCode | uint8 ] , line : int ) {. inline . } =
2022-05-22 13:02:48 +02:00
## Handy helper method to write arbitrary bytes into
2022-04-04 12:29:23 +02:00
## the current chunk, calling emitByte on each of its
## elements
2022-05-07 10:48:01 +02:00
for b in bytarr :
2022-08-30 12:55:14 +02:00
self . emitByte ( b , line )
2022-04-04 12:29:23 +02:00
2022-05-20 15:47:04 +02:00
proc makeConstant ( self : Compiler , val : Expression , typ : Type ) : array [ 3 , uint8 ] =
2022-04-04 12:29:23 +02:00
## Adds a constant to the current chunk's constant table
## and returns its index as a 3-byte array of uint8s
2022-08-18 19:18:29 +02:00
var lit : string
if typ . kind in [ UInt8 , Int8 , Int16 , UInt16 , Int32 , UInt32 , Int64 , UInt64 ] :
lit = val . token . lexeme
if " ' " in lit :
var idx = lit . high ( )
while lit [ idx ] ! = ' \' ' :
lit = lit [ 0 .. ^ 2 ]
dec ( idx )
lit = lit [ 0 .. ^ 2 ]
2022-05-20 15:47:04 +02:00
case typ . kind :
of UInt8 , Int8 :
2022-08-18 19:18:29 +02:00
result = self . chunk . writeConstant ( [ uint8 ( parseInt ( lit ) ) ] )
2022-05-20 15:47:04 +02:00
of Int16 , UInt16 :
2022-08-18 19:18:29 +02:00
result = self . chunk . writeConstant ( parseInt ( lit ) . toDouble ( ) )
2022-05-20 15:47:04 +02:00
of Int32 , UInt32 :
2022-08-18 19:18:29 +02:00
result = self . chunk . writeConstant ( parseInt ( lit ) . toQuad ( ) )
of Int64 :
result = self . chunk . writeConstant ( parseInt ( lit ) . toLong ( ) )
of UInt64 :
result = self . chunk . writeConstant ( parseBiggestUInt ( lit ) . toLong ( ) )
2022-06-02 01:33:56 +02:00
of String :
2022-08-15 17:20:09 +02:00
result = self . chunk . writeConstant ( val . token . lexeme [ 1 .. ^ 1 ] . toBytes ( ) )
2022-06-02 01:33:56 +02:00
of Float32 :
var f : float = 0 .0
discard parseFloat ( val . token . lexeme , f )
result = self . chunk . writeConstant ( cast [ array [ 4 , uint8 ] ] ( float32 ( f ) ) )
of Float64 :
var f : float = 0 .0
discard parseFloat ( val . token . lexeme , f )
result = self . chunk . writeConstant ( cast [ array [ 8 , uint8 ] ] ( f ) )
2022-05-20 15:47:04 +02:00
else :
discard
2022-04-04 12:29:23 +02:00
2022-05-07 10:48:01 +02:00
proc emitConstant ( self : Compiler , obj : Expression , kind : Type ) =
2022-05-29 15:54:01 +02:00
## Emits a constant instruction along
2022-04-04 12:29:23 +02:00
## with its operand
2022-06-02 01:33:56 +02:00
case kind . kind :
2022-05-02 17:26:38 +02:00
of Int64 :
2022-08-30 12:55:14 +02:00
self . emitByte ( LoadInt64 , obj . token . line )
2022-05-29 15:54:01 +02:00
of UInt64 :
2022-08-30 12:55:14 +02:00
self . emitByte ( LoadUInt64 , obj . token . line )
2022-05-29 15:54:01 +02:00
of Int32 :
2022-08-30 12:55:14 +02:00
self . emitByte ( LoadInt32 , obj . token . line )
2022-06-02 01:33:56 +02:00
of UInt32 :
2022-08-30 12:55:14 +02:00
self . emitByte ( LoadUInt32 , obj . token . line )
2022-06-02 01:33:56 +02:00
of Int16 :
2022-08-30 12:55:14 +02:00
self . emitByte ( LoadInt16 , obj . token . line )
2022-06-02 01:33:56 +02:00
of UInt16 :
2022-08-30 12:55:14 +02:00
self . emitByte ( LoadUInt16 , obj . token . line )
2022-06-02 01:33:56 +02:00
of Int8 :
2022-08-30 12:55:14 +02:00
self . emitByte ( LoadInt8 , obj . token . line )
2022-06-02 01:33:56 +02:00
of UInt8 :
2022-08-30 12:55:14 +02:00
self . emitByte ( LoadUInt8 , obj . token . line )
2022-06-02 01:33:56 +02:00
of String :
2022-08-30 12:55:14 +02:00
self . emitByte ( LoadString , obj . token . line )
2022-06-02 01:33:56 +02:00
let str = LiteralExpr ( obj ) . literal . lexeme
if str . len ( ) > = 16777216 :
2022-06-21 20:18:53 +02:00
self . error ( " string constants cannot be larger than 16777215 bytes " )
2022-08-30 12:55:14 +02:00
self . emitBytes ( ( str . len ( ) - 2 ) . toTriple ( ) , obj . token . line )
2022-06-02 01:33:56 +02:00
of Float32 :
2022-08-30 12:55:14 +02:00
self . emitByte ( LoadFloat32 , obj . token . line )
2022-06-02 01:33:56 +02:00
of Float64 :
2022-08-30 12:55:14 +02:00
self . emitByte ( LoadFloat64 , obj . token . line )
2022-05-02 17:26:38 +02:00
else :
2022-05-18 13:32:32 +02:00
discard # TODO
2022-08-30 12:55:14 +02:00
self . emitBytes ( self . makeConstant ( obj , kind ) , obj . token . line )
2022-04-04 12:29:23 +02:00
2022-08-30 12:55:14 +02:00
proc setJump ( self : Compiler , offset : int , jmp : array [ 3 , uint8 ] ) =
## Sets a jump at the given
## offset to the given value
self . chunk . code [ offset + 1 ] = jmp [ 0 ]
self . chunk . code [ offset + 2 ] = jmp [ 1 ]
self . chunk . code [ offset + 3 ] = jmp [ 2 ]
proc setJump ( self : Compiler , offset : int , jmp : seq [ uint8 ] ) =
## Sets a jump at the given
## offset to the given value
self . chunk . code [ offset + 1 ] = jmp [ 0 ]
self . chunk . code [ offset + 2 ] = jmp [ 1 ]
self . chunk . code [ offset + 3 ] = jmp [ 2 ]
2022-04-04 12:29:23 +02:00
proc patchJump ( self : Compiler , offset : int ) =
2022-05-18 13:32:32 +02:00
## Patches a previously emitted relative
2022-06-02 01:33:56 +02:00
## jump using emitJump
2022-08-30 12:55:14 +02:00
var jump : int = self . chunk . code . len ( ) - self . jumps [ offset ] . offset
if jump < 0 :
self . error ( " invalid jump size (< 0), did the bytecode size change without fixJumps being called? " )
2022-04-04 12:29:23 +02:00
if jump > 16777215 :
2022-10-13 16:52:37 +02:00
# TODO: Emit consecutive jumps?
2022-06-21 20:18:53 +02:00
self . error ( " cannot jump more than 16777215 instructions " )
2022-08-30 12:55:14 +02:00
self . setJump ( self . jumps [ offset ] . offset , ( jump - 4 ) . toTriple ( ) )
self . jumps [ offset ] . patched = true
proc emitJump ( self : Compiler , opcode : OpCode , line : int ) : int =
## Emits a dummy jump offset to be patched later
## and returns a unique identifier for that jump
## to be passed to patchJump
self . emitByte ( opcode , line )
self . jumps . add ( ( patched : false , offset : self . chunk . code . high ( ) ) )
self . emitBytes ( 0 . toTriple ( ) , line )
result = self . jumps . high ( )
proc fixCFIOffsets ( self : Compiler , oldLen : int , modifiedAt : int ) =
## Fixes CFI offsets after the size of our
## bytecode has changed
if oldLen = = self . chunk . code . len ( ) :
return
let offset = self . chunk . code . len ( ) - oldLen
var newCFI : array [ 3 , uint8 ]
var tmp : int
2022-10-24 13:53:27 +02:00
var i = 0
2022-10-08 09:18:35 +02:00
for cfi in self . cfiOffsets . mitems ( ) :
if cfi . start > = modifiedAt :
newCFI = ( cfi . start + offset ) . toTriple ( )
2022-10-24 13:53:27 +02:00
self . chunk . cfi [ cfi . pos ] = newCFI [ 0 ]
self . chunk . cfi [ cfi . pos + 1 ] = newCFI [ 1 ]
self . chunk . cfi [ cfi . pos + 2 ] = newCFI [ 2 ]
tmp = [ self . chunk . cfi [ cfi . pos + 3 ] , self . chunk . cfi [ cfi . pos + 4 ] , self . chunk . cfi [ cfi . pos + 5 ] ] . fromTriple ( ) . int
2022-08-30 12:55:14 +02:00
newCFI = ( tmp + offset ) . toTriple ( )
2022-10-24 13:53:27 +02:00
self . chunk . cfi [ cfi . pos + 3 ] = newCFI [ 0 ]
self . chunk . cfi [ cfi . pos + 4 ] = newCFI [ 1 ]
self . chunk . cfi [ cfi . pos + 5 ] = newCFI [ 2 ]
2022-08-30 12:55:14 +02:00
cfi . fn . codePos + = offset
2022-10-08 09:18:35 +02:00
cfi . start + = offset
cfi . stop + = offset
2022-10-24 13:53:27 +02:00
inc ( i )
2022-08-30 12:55:14 +02:00
proc fixJumps ( self : Compiler , oldLen : int , modifiedAt : int ) =
## Fixes jump offsets after the size
## of our bytecode has changed
if oldLen = = self . chunk . code . len ( ) :
return
let offset = self . chunk . code . len ( ) - oldLen
for jump in self . jumps . mitems ( ) :
if jump . offset > = modifiedAt :
# While all already-patched jumps need
# to have their jump offsets fixed, we
# also need to update our internal jumps
# list in cases where we shifted the jump
# instruction itself into the code!
jump . offset + = offset
2022-10-08 09:18:35 +02:00
self . setJump ( jump . offset , self . chunk . code [ jump . offset + 1 .. jump . offset + 3 ] )
2022-04-04 12:29:23 +02:00
2022-05-02 17:26:38 +02:00
2022-11-02 12:03:14 +01:00
proc resolve ( self : Compiler , name : string ) : Name =
## Traverses all existing namespaces and returns
## the first object with the given name. Returns
## nil when the name can't be found. Note that,
## when a declaration is first resolved, it is
## also compiled on-the-fly
2022-05-02 17:26:38 +02:00
for obj in reversed ( self . names ) :
2022-11-02 12:03:14 +01:00
if obj . ident . token . lexeme = = name :
2022-10-17 11:28:00 +02:00
if obj . owner ! = self . currentModule :
2022-11-02 12:03:14 +01:00
# We don't own this name, but we
# may still have access to it
2022-10-17 11:28:00 +02:00
if obj . isPrivate :
2022-11-02 12:03:14 +01:00
# Name is private in its owner
# module, so we definitely can't
# use it
2022-10-17 11:28:00 +02:00
continue
elif obj . exported :
2022-11-02 12:03:14 +01:00
# The name is public in its owner
# module and said module has explicitly
# exported it to us: we can use it
result = obj
break
# If the name is public but not exported in
# its owner module, then we act as if it's
# private. This is to avoid namespace pollution
# from imports (i.e. if module A imports modules
# C and D and module B imports module A, then B
# might not want to also have access to C's and D's
# names as they might clash with its own stuff)
result = obj
break
if not result . isNil ( ) and not result . resolved :
# There's no reason to compile a declaration
# unless it is used at least once: this way
# not only do we save space if a name is declared
# but never used, but it also makes it easier to
# implement generics. Yay!
result . resolved = true
# Now we just dispatch to one of our functions to
# compile the declaration
case result . kind :
of NameKind . Var :
self . varDecl ( VarDecl ( result . node ) , result )
of NameKind . CustomType :
self . typeDecl ( TypeDecl ( result . node ) , result )
of NameKind . Function :
# Generic functions need to be compiled at
# the call site because we need to know the
# type of the arguments, but regular functions
# can be precompiled as soon as we resolve them
if not result . valueType . isGeneric :
self . funDecl ( FunDecl ( result . node ) , result )
else :
discard
2022-10-13 16:52:37 +02:00
2022-11-02 12:03:14 +01:00
proc resolve ( self : Compiler , name : IdentExpr ) : Name =
## Version of resolve that takes Identifier
## AST nodes instead of strings
return self . resolve ( name . token . lexeme )
proc resolveOrError [ T : IdentExpr | string ] ( self : Compiler , name : T ) : Name =
## Calls self.resolve() and errors out with an appropriate
## message if it returns nil
result = self . resolve ( name )
if result . isNil ( ) :
when T is IdentExpr :
self . error ( & " reference to undefined name ' {name.token.lexeme} ' " , name )
when T is string :
self . error ( & " reference to undefined name ' {name} ' " )
2022-05-02 17:26:38 +02:00
2022-08-17 17:31:15 +02:00
proc getStackPos ( self : Compiler , name : Name ) : int =
2022-10-13 13:12:24 +02:00
## Returns the predicted call stack position of a
## given name, relative to the current frame
2022-06-02 01:33:56 +02:00
var found = false
2022-08-01 10:36:06 +02:00
result = 2
for variable in self . names :
2022-11-02 12:03:14 +01:00
if variable . kind in [ NameKind . Module , NameKind . CustomType , NameKind . Enum , NameKind . Function , NameKind . None ] :
2022-10-21 16:10:00 +02:00
continue
elif variable . kind = = NameKind . Argument and variable . depth > self . scopeDepth :
2022-10-13 13:12:24 +02:00
continue
elif not variable . belongsTo . isNil ( ) and variable . belongsTo . valueType . isBuiltinFunction :
2022-07-09 12:47:53 +02:00
continue
2022-10-13 16:52:37 +02:00
elif not variable . valueType . isNil ( ) and variable . valueType . kind = = Generic :
continue
2022-10-17 12:03:27 +02:00
elif variable . owner ! = self . currentModule :
continue
if name . ident = = variable . ident :
2022-08-01 10:36:06 +02:00
found = true
break
2022-10-07 15:55:41 +02:00
inc ( result )
2022-06-02 01:33:56 +02:00
if not found :
return - 1
2022-08-01 10:36:06 +02:00
proc getClosurePos ( self : Compiler , name : Name ) : int =
2022-08-30 12:55:14 +02:00
## Returns the position of a name in a closure's
## environment
if not self . currentFunction . valueType . isClosure :
2022-06-02 01:33:56 +02:00
return - 1
2022-11-02 12:03:14 +01:00
for i , e in self . closures :
if e = = name :
2022-08-30 12:55:14 +02:00
return i
return - 1
2022-06-02 01:33:56 +02:00
2022-10-11 10:01:01 +02:00
proc compare ( self : Compiler , a , b : Type ) : bool =
2022-05-07 10:48:01 +02:00
## Compares two type objects
## for equality (works with nil!)
2022-05-29 15:54:01 +02:00
# The nil code here is for void functions (when
# we compare their return types)
2022-06-13 15:04:53 +02:00
if a . isNil ( ) :
return b . isNil ( ) or b . kind = = Any
elif b . isNil ( ) :
return a . isNil ( ) or a . kind = = Any
2022-05-30 09:29:03 +02:00
elif a . kind = = Any or b . kind = = Any :
# This is needed internally: user code
# cannot generate code for matching
# arbitrary types, but we need it for
# function calls and stuff like that
# since peon doesn't have return type
# inference
return true
2022-10-13 13:12:24 +02:00
elif a . kind ! = b . kind and not ( a . kind = = Generic or b . kind = = Generic ) :
2022-05-29 15:54:01 +02:00
# Next, we see the type discriminant:
# If they're different, then they can't
2022-10-13 13:12:24 +02:00
# be the same type! For generics, we match
# those later, as we need access to the type
# discriminant inside a case statement
2022-06-19 14:44:14 +02:00
return false
2022-10-13 13:12:24 +02:00
if a . kind ! = Generic and b . kind ! = Generic :
case a . kind :
# If all previous checks pass, it's time
# to go through each possible type peon
# supports and compare it
of Int8 , UInt8 , Int16 , UInt16 , Int32 ,
UInt32 , Int64 , UInt64 , Float32 , Float64 ,
Char , Byte , String , Nil , Nan , Bool , Inf :
# A value type's type is always equal to
# another one's
return true
of Reference , Pointer :
# Here we already know that both
# a and b are of either of the two
# types in this branch, so we just need
# to compare their values
return self . compare ( a . value , b . value )
of Function :
# Functions are a bit trickier
if a . args . len ( ) ! = b . args . len ( ) :
2022-05-07 10:48:01 +02:00
return false
2022-10-13 13:12:24 +02:00
if a . isCoroutine ! = b . isCoroutine :
return false
2022-10-24 13:53:27 +02:00
if not self . compare ( a . returnType , b . returnType ) :
2022-10-13 13:12:24 +02:00
return false
for ( argA , argB ) in zip ( a . args , b . args ) :
if not self . compare ( argA . kind , argB . kind ) :
return false
return true
else :
discard # TODO: Custom types
else :
case a . kind :
of Generic :
# Generic types
case b . kind :
of Generic :
for c1 in a . cond :
for c2 in b . cond :
if self . compare ( c1 . kind , c2 . kind ) :
return c1 . match = = c2 . match
else :
for constraint in a . cond :
if self . compare ( constraint . kind , b ) :
return constraint . match
else :
discard
case b . kind :
of Generic :
# Generic types
case a . kind :
of Generic :
for c1 in a . cond :
for c2 in b . cond :
if self . compare ( c1 . kind , c2 . kind ) :
return c1 . match = = c2 . match
else :
for constraint in b . cond :
if self . compare ( constraint . kind , a ) :
return constraint . match
else :
discard
return false
2022-05-07 10:48:01 +02:00
proc toIntrinsic ( name : string ) : Type =
2022-05-18 13:32:32 +02:00
## Converts a string to an intrinsic
2022-05-07 10:48:01 +02:00
## type if it is valid and returns nil
2022-05-18 13:32:32 +02:00
## otherwise
2022-10-13 13:12:24 +02:00
if name = = " all " :
return Type ( kind : Any )
elif name in [ " int " , " int64 " , " i64 " ] :
2022-05-07 10:48:01 +02:00
return Type ( kind : Int64 )
2022-08-17 17:31:15 +02:00
elif name in [ " uint64 " , " u64 " , " uint " ] :
2022-05-07 10:48:01 +02:00
return Type ( kind : UInt64 )
elif name in [ " int32 " , " i32 " ] :
return Type ( kind : Int32 )
elif name in [ " uint32 " , " u32 " ] :
return Type ( kind : UInt32 )
2022-08-17 17:31:15 +02:00
elif name in [ " int16 " , " i16 " , " short " ] :
2022-05-07 10:48:01 +02:00
return Type ( kind : Int16 )
elif name in [ " uint16 " , " u16 " ] :
return Type ( kind : UInt16 )
elif name in [ " int8 " , " i8 " ] :
return Type ( kind : Int8 )
elif name in [ " uint8 " , " u8 " ] :
return Type ( kind : UInt8 )
elif name in [ " f64 " , " float " , " float64 " ] :
return Type ( kind : Float64 )
elif name in [ " f32 " , " float32 " ] :
return Type ( kind : Float32 )
2022-08-17 17:31:15 +02:00
elif name in [ " byte " , " b " ] :
2022-05-07 10:48:01 +02:00
return Type ( kind : Byte )
2022-08-17 17:31:15 +02:00
elif name in [ " char " , " c " ] :
2022-05-07 10:48:01 +02:00
return Type ( kind : Char )
elif name = = " nan " :
return Type ( kind : Nan )
elif name = = " nil " :
return Type ( kind : Nil )
elif name = = " inf " :
return Type ( kind : Inf )
elif name = = " bool " :
return Type ( kind : Bool )
2022-06-14 23:34:42 +02:00
elif name = = " typevar " :
return Type ( kind : Typevar )
2022-08-15 11:46:24 +02:00
elif name = = " string " :
return Type ( kind : String )
2022-05-07 10:48:01 +02:00
else :
return nil
2022-05-02 17:26:38 +02:00
2022-11-02 12:03:14 +01:00
proc infer ( self : Compiler , node : LiteralExpr , allowGeneric : bool = false ) : Type =
2022-05-02 17:26:38 +02:00
## Infers the type of a given literal expression
2022-10-13 13:12:24 +02:00
## (if the expression is nil, nil is returned)
2022-06-13 15:04:53 +02:00
if node . isNil ( ) :
2022-05-16 19:23:38 +02:00
return nil
2022-05-02 17:26:38 +02:00
case node . kind :
of intExpr , binExpr , octExpr , hexExpr :
let size = node . token . lexeme . split ( " ' " )
if len ( size ) notin 1 .. 2 :
self . error ( " invalid state: inferValueType -> invalid size specifier (This is an internal error and most likely a bug!) " )
if size . len ( ) = = 1 :
2022-05-29 15:54:01 +02:00
return Type ( kind : Int64 )
2022-05-02 17:26:38 +02:00
let typ = size [ 1 ] . toIntrinsic ( )
2022-10-11 10:01:01 +02:00
if not self . compare ( typ , nil ) :
2022-05-02 17:26:38 +02:00
return typ
else :
self . error ( & " invalid type specifier ' {size[1]} ' for int " )
of floatExpr :
let size = node . token . lexeme . split ( " ' " )
if len ( size ) notin 1 .. 2 :
self . error ( " invalid state: inferValueType -> invalid size specifier (This is an internal error and most likely a bug!) " )
if size . len ( ) = = 1 or size [ 1 ] = = " f64 " :
2022-05-29 15:54:01 +02:00
return Type ( kind : Float64 )
2022-05-02 17:26:38 +02:00
let typ = size [ 1 ] . toIntrinsic ( )
2022-10-13 13:12:24 +02:00
if not typ . isNil ( ) :
2022-05-02 17:26:38 +02:00
return typ
else :
self . error ( & " invalid type specifier ' {size[1]} ' for float " )
of nilExpr :
2022-05-29 15:54:01 +02:00
return Type ( kind : Nil )
2022-05-02 17:26:38 +02:00
of trueExpr :
2022-05-29 15:54:01 +02:00
return Type ( kind : Bool )
2022-05-02 17:26:38 +02:00
of falseExpr :
2022-05-29 15:54:01 +02:00
return Type ( kind : Bool )
2022-05-02 17:26:38 +02:00
of nanExpr :
2022-05-29 15:54:01 +02:00
return Type ( kind : TypeKind . Nan )
2022-05-02 17:26:38 +02:00
of infExpr :
2022-05-29 15:54:01 +02:00
return Type ( kind : TypeKind . Inf )
2022-08-15 11:46:24 +02:00
of strExpr :
return Type ( kind : String )
2022-05-02 17:26:38 +02:00
else :
2022-05-18 13:32:32 +02:00
discard # TODO
2022-05-02 17:26:38 +02:00
2022-11-02 12:03:14 +01:00
proc infer ( self : Compiler , node : Expression , allowGeneric : bool = false ) : Type =
2022-05-02 17:26:38 +02:00
## Infers the type of a given expression and
2022-10-13 13:12:24 +02:00
## returns it (if the node is nil, nil is
2022-10-13 18:34:11 +02:00
## returned). Always returns a concrete type
2022-11-02 12:03:14 +01:00
## unless allowGeneric is set to true
2022-06-13 15:04:53 +02:00
if node . isNil ( ) :
2022-05-16 19:23:38 +02:00
return nil
2022-05-02 17:26:38 +02:00
case node . kind :
of identExpr :
2022-05-07 10:48:01 +02:00
let node = IdentExpr ( node )
2022-10-13 16:52:37 +02:00
var name = self . resolve ( node )
2022-06-13 15:04:53 +02:00
if not name . isNil ( ) :
2022-06-19 14:44:14 +02:00
result = name . valueType
2022-11-02 12:03:14 +01:00
if not result . isNil ( ) and result . kind = = Generic and not allowGeneric :
2022-10-13 16:52:37 +02:00
if name . belongsTo . isNil ( ) :
name = self . resolve ( result . name )
if not name . isNil ( ) :
2022-10-13 18:34:11 +02:00
result = name . valueType
2022-10-13 16:52:37 +02:00
else :
for arg in name . belongsTo . valueType . args :
if node . token . lexeme = = arg . name :
2022-10-13 18:34:11 +02:00
result = arg . kind
2022-05-07 10:48:01 +02:00
else :
2022-05-25 14:17:58 +02:00
result = node . name . lexeme . toIntrinsic ( )
2022-05-02 17:26:38 +02:00
of unaryExpr :
2022-08-01 11:30:44 +02:00
let node = UnaryExpr ( node )
2022-10-13 18:34:11 +02:00
let impl = self . matchImpl ( node . operator . lexeme , Type ( kind : Function , returnType : Type ( kind : Any ) , args : @ [ ( " " , self . infer ( node . a ) ) ] ) , node )
result = impl . valueType . returnType
2022-11-02 12:03:14 +01:00
if result . kind = = Generic and not allowGeneric :
2022-10-17 11:28:00 +02:00
result = self . specialize ( impl , @ [ node . a ] ) . valueType . returnType
2022-05-02 17:26:38 +02:00
of binaryExpr :
2022-08-01 11:30:44 +02:00
let node = BinaryExpr ( node )
2022-10-13 18:34:11 +02:00
let impl = self . matchImpl ( node . operator . lexeme , Type ( kind : Function , returnType : Type ( kind : Any ) , args : @ [ ( " " , self . infer ( node . a ) ) , ( " " , self . infer ( node . b ) ) ] ) , node )
result = impl . valueType . returnType
2022-11-02 12:03:14 +01:00
if result . kind = = Generic and not allowGeneric :
2022-10-17 11:28:00 +02:00
result = self . specialize ( impl , @ [ node . a , node . b ] ) . valueType . returnType
2022-05-18 13:32:32 +02:00
of { intExpr , hexExpr , binExpr , octExpr ,
strExpr , falseExpr , trueExpr , infExpr ,
2022-05-04 14:01:38 +02:00
nanExpr , floatExpr , nilExpr
2022-05-02 17:26:38 +02:00
} :
2022-10-13 18:34:11 +02:00
result = self . infer ( LiteralExpr ( node ) )
2022-05-27 14:01:57 +02:00
of lambdaExpr :
var node = LambdaExpr ( node )
2022-05-29 15:54:01 +02:00
result = Type ( kind : Function , returnType : nil , args : @ [ ] , isLambda : true )
2022-06-13 15:04:53 +02:00
if not node . returnType . isNil ( ) :
2022-10-11 09:56:55 +02:00
result . returnType = self . infer ( node . returnType )
2022-05-27 14:01:57 +02:00
for argument in node . arguments :
2022-10-11 09:56:55 +02:00
result . args . add ( ( argument . name . token . lexeme , self . infer ( argument . valueType ) ) )
2022-05-30 09:29:03 +02:00
of callExpr :
var node = CallExpr ( node )
case node . callee . kind :
of identExpr :
2022-05-30 12:31:15 +02:00
let resolved = self . resolve ( IdentExpr ( node . callee ) )
2022-06-13 15:04:53 +02:00
if not resolved . isNil ( ) :
2022-10-25 11:17:38 +02:00
case resolved . valueType . kind :
of Function :
result = resolved . valueType . returnType
else :
result = resolved . valueType
2022-05-30 12:31:15 +02:00
else :
result = nil
2022-05-30 09:29:03 +02:00
of lambdaExpr :
2022-10-11 09:56:55 +02:00
result = self . infer ( LambdaExpr ( node . callee ) . returnType )
2022-07-31 16:09:22 +02:00
of callExpr :
2022-10-11 09:56:55 +02:00
result = self . infer ( CallExpr ( node . callee ) )
2022-08-19 09:08:49 +02:00
if not result . isNil ( ) :
result = result . returnType
2022-05-30 09:29:03 +02:00
else :
discard # Unreachable
2022-06-19 14:44:14 +02:00
of varExpr :
2022-10-11 09:56:55 +02:00
result = self . infer ( Var ( node ) . value )
2022-06-19 14:44:14 +02:00
result . mutable = true
of refExpr :
2022-10-11 09:56:55 +02:00
result = Type ( kind : Reference , value : self . infer ( Ref ( node ) . value ) )
2022-06-19 14:44:14 +02:00
of ptrExpr :
2022-10-11 09:56:55 +02:00
result = Type ( kind : Pointer , value : self . infer ( Ptr ( node ) . value ) )
2022-08-01 10:36:06 +02:00
of groupingExpr :
2022-10-11 09:56:55 +02:00
result = self . infer ( GroupingExpr ( node ) . expression )
2022-05-02 17:26:38 +02:00
else :
2022-05-18 13:32:32 +02:00
discard # Unreachable
2022-11-02 12:03:14 +01:00
proc inferOrError [ T : LiteralExpr | Expression ] ( self : Compiler , node : T , allowGeneric : bool = false ) : Type =
## Attempts to infer the type of
## the given expression and raises an
## error with an appropriate message if
## it fails
result = self . infer ( node , allowGeneric )
if result . isNil ( ) :
case node . kind :
of identExpr :
self . error ( & " reference to undefined name ' {IdentExpr(node).token.lexeme} ' " , node )
of callExpr :
let node = CallExpr ( node )
if node . callee . kind = = identExpr :
self . error ( & " call to undefined function ' {IdentExpr(node.callee).token.lexeme} ' " , node )
else :
self . error ( " expression has no type " , node )
else :
self . error ( " expression has no type " , node )
2022-05-02 17:26:38 +02:00
2022-05-29 15:54:01 +02:00
proc typeToStr ( self : Compiler , typ : Type ) : string =
## Returns the string representation of a
## type object
2022-08-18 23:49:20 +02:00
if typ . isNil ( ) :
return " nil "
2022-05-29 15:54:01 +02:00
case typ . kind :
of Int8 , UInt8 , Int16 , UInt16 , Int32 ,
UInt32 , Int64 , UInt64 , Float32 , Float64 ,
Char , Byte , String , Nil , TypeKind . Nan , Bool ,
TypeKind . Inf :
2022-06-19 14:44:14 +02:00
result & = ( $ typ . kind ) . toLowerAscii ( )
2022-05-29 15:54:01 +02:00
of Pointer :
2022-06-19 14:44:14 +02:00
result & = & " ptr {self.typeToStr(typ.value)} "
2022-05-29 15:54:01 +02:00
of Reference :
2022-06-19 14:44:14 +02:00
result & = & " ref {self.typeToStr(typ.value)} "
2022-05-29 15:54:01 +02:00
of Function :
2022-06-19 14:44:14 +02:00
result & = " fn ( "
2022-05-29 15:54:01 +02:00
for i , ( argName , argType ) in typ . args :
2022-06-19 14:44:14 +02:00
result & = & " {argName}: "
if argType . mutable :
result & = " var "
result & = self . typeToStr ( argType )
2022-05-29 15:54:01 +02:00
if i < typ . args . len ( ) - 1 :
result & = " , "
result & = " ) "
2022-06-13 15:04:53 +02:00
if not typ . returnType . isNil ( ) :
2022-05-29 15:54:01 +02:00
result & = & " : {self.typeToStr(typ.returnType)} "
2022-06-19 14:44:14 +02:00
of Generic :
2022-10-13 13:12:24 +02:00
for i , condition in typ . cond :
if i > 0 :
result & = " | "
if not condition . match :
result & = " ~ "
result & = self . typeToStr ( condition . kind )
2022-05-29 15:54:01 +02:00
else :
discard
2022-06-21 20:18:53 +02:00
proc findByName ( self : Compiler , name : string ) : seq [ Name ] =
## Looks for objects that have been already declared
## with the given name. Returns all objects that apply
for obj in reversed ( self . names ) :
2022-10-17 11:28:00 +02:00
if obj . ident . token . lexeme = = name :
if obj . owner ! = self . currentModule :
if obj . isPrivate or not obj . exported :
continue
result . add ( obj )
proc findByModule ( self : Compiler , name : string ) : seq [ Name ] =
## Looks for objects that have been already declared AS
## public within the given module. Returns all objects that apply
for obj in reversed ( self . names ) :
if obj . owner = = name :
2022-06-21 20:18:53 +02:00
result . add ( obj )
2022-07-31 16:09:22 +02:00
proc findByType ( self : Compiler , name : string , kind : Type , depth : int = - 1 ) : seq [ Name ] =
2022-06-21 20:18:53 +02:00
## Looks for objects that have already been declared
2022-07-31 16:09:22 +02:00
## with the given name and type. If depth is not -1,
2022-10-13 13:12:24 +02:00
## it also compares the name's scope depth. Returns
## all objects that apply
2022-06-21 20:18:53 +02:00
for obj in self . findByName ( name ) :
2022-10-13 13:12:24 +02:00
if self . compare ( obj . valueType , kind ) and ( depth = = - 1 or depth = = obj . depth ) :
2022-07-09 12:47:53 +02:00
result . add ( obj )
2022-08-30 12:55:14 +02:00
proc findAtDepth ( self : Compiler , name : string , depth : int ) : seq [ Name ] {. used . } =
2022-07-09 12:47:53 +02:00
## Looks for objects that have been already declared
## with the given name at the given scope depth.
## Returns all objects that apply
for obj in self . findByName ( name ) :
if obj . depth = = depth :
2022-06-21 20:18:53 +02:00
result . add ( obj )
2022-10-13 13:12:24 +02:00
proc matchImpl ( self : Compiler , name : string , kind : Type , node : ASTNode = nil ) : Name =
2022-06-21 20:18:53 +02:00
## Tries to find a matching function implementation
## compatible with the given type and returns its
## name object
2022-07-09 12:47:53 +02:00
let impl = self . findByType ( name , kind )
2022-06-21 20:18:53 +02:00
if impl . len ( ) = = 0 :
var msg = & " cannot find a suitable implementation for ' {name} ' "
let names = self . findByName ( name )
if names . len ( ) > 0 :
2022-10-11 09:56:55 +02:00
msg & = & " , found {len(names)} potential candidate "
2022-06-21 20:18:53 +02:00
if names . len ( ) > 1 :
msg & = " s "
msg & = " : "
for name in names :
2022-10-17 11:28:00 +02:00
msg & = & " \n - in module ' {name.owner} ' at line {name.ident.token.line} of type ' {self.typeToStr(name.valueType)} ' "
2022-06-21 20:18:53 +02:00
if name . valueType . kind ! = Function :
msg & = " , not a callable "
elif kind . args . len ( ) ! = name . valueType . args . len ( ) :
msg & = & " , wrong number of arguments ({name.valueType.args.len()} expected, got {kind.args.len()}) "
else :
for i , arg in kind . args :
if name . valueType . args [ i ] . kind . mutable and not arg . kind . mutable :
msg & = & " , first mismatch at position {i + 1}: {name.valueType.args[i].name} is immutable, not ' var ' "
break
2022-10-11 10:01:01 +02:00
elif not self . compare ( arg . kind , name . valueType . args [ i ] . kind ) :
2022-06-21 20:18:53 +02:00
msg & = & " , first mismatch at position {i + 1}: expected argument of type ' {self.typeToStr(name.valueType.args[i].kind)} ' , got ' {self.typeToStr(arg.kind)} ' instead "
break
2022-10-13 13:12:24 +02:00
self . error ( msg , node )
2022-06-21 20:18:53 +02:00
elif impl . len ( ) > 1 :
var msg = & " multiple matching implementations of ' {name} ' found: \n "
for fn in reversed ( impl ) :
2022-10-13 13:12:24 +02:00
msg & = & " - in module ' {fn.owner} ' at line {fn.line} of type {self.typeToStr(fn.valueType)} \n "
self . error ( msg , node )
result = impl [ 0 ]
2022-06-21 20:18:53 +02:00
2022-11-02 12:03:14 +01:00
2022-10-13 13:12:24 +02:00
proc check ( self : Compiler , term : Expression , kind : Type , allowAny : bool = false ) =
2022-07-16 13:04:00 +02:00
## Checks the type of term against a known type.
## Raises an error if appropriate and returns
## otherwise
2022-11-02 12:03:14 +01:00
let k = self . inferOrError ( term )
if k . kind = = Any and not allowAny :
2022-10-13 13:12:24 +02:00
# Any should only be used internally: error!
self . error ( " ' all ' is not a valid type in this context " , term )
2022-10-11 10:01:01 +02:00
elif not self . compare ( k , kind ) :
2022-08-17 17:31:15 +02:00
self . error ( & " expecting value of type ' {self.typeToStr(kind)} ' , got ' {self.typeToStr(k)} ' instead " , term )
2022-07-16 13:04:00 +02:00
2022-10-13 13:12:24 +02:00
proc handleBuiltinFunction ( self : Compiler , fn : Type , args : seq [ Expression ] , line : int ) =
2022-07-09 12:47:53 +02:00
## Emits instructions for builtin functions
2022-06-14 18:10:13 +02:00
## such as addition or subtraction
2022-10-13 13:12:24 +02:00
if fn . builtinOp notin [ " LogicalOr " , " LogicalAnd " ] :
2022-07-10 13:19:57 +02:00
if len ( args ) = = 2 :
self . expression ( args [ 1 ] )
2022-08-01 10:36:06 +02:00
self . expression ( args [ 0 ] )
elif len ( args ) = = 1 :
self . expression ( args [ 0 ] )
2022-08-17 17:31:15 +02:00
const codes : Table [ string , OpCode ] = { " Negate " : Negate ,
" NegateFloat32 " : NegateFloat32 ,
" NegateFloat64 " : NegateFloat64 ,
" Add " : Add ,
" Subtract " : Subtract ,
" Divide " : Divide ,
" Multiply " : Multiply ,
" SignedDivide " : SignedDivide ,
" AddFloat64 " : AddFloat64 ,
" SubtractFloat64 " : SubtractFloat64 ,
" DivideFloat64 " : DivideFloat64 ,
" MultiplyFloat64 " : MultiplyFloat64 ,
" AddFloat32 " : AddFloat32 ,
" SubtractFloat32 " : SubtractFloat32 ,
" DivideFloat32 " : DivideFloat32 ,
" MultiplyFloat32 " : MultiplyFloat32 ,
" Pow " : Pow ,
" SignedPow " : SignedPow ,
" PowFloat32 " : PowFloat32 ,
" PowFloat64 " : PowFloat64 ,
" Mod " : Mod ,
" SignedMod " : SignedMod ,
" ModFloat32 " : ModFloat32 ,
" ModFloat64 " : ModFloat64 ,
" Or " : Or ,
" And " : And ,
" Xor " : Xor ,
" Not " : Not ,
" LShift " : LShift ,
" RShift " : RShift ,
" Equal " : Equal ,
" NotEqual " : NotEqual ,
" LessThan " : LessThan ,
" GreaterThan " : GreaterThan ,
" LessOrEqual " : LessOrEqual ,
" GreaterOrEqual " : GreaterOrEqual ,
" PrintInt64 " : PrintInt64 ,
" PrintUInt64 " : PrintUInt64 ,
" PrintInt32 " : PrintInt32 ,
" PrintUInt32 " : PrintUInt32 ,
" PrintInt16 " : PrintInt16 ,
" PrintUInt16 " : PrintUInt16 ,
" PrintInt8 " : PrintInt8 ,
" PrintUInt8 " : PrintUInt8 ,
" PrintFloat64 " : PrintFloat64 ,
" PrintFloat32 " : PrintFloat32 ,
" PrintHex " : PrintHex ,
" PrintBool " : PrintBool ,
" PrintNan " : PrintNan ,
" PrintInf " : PrintInf ,
" PrintString " : PrintString ,
2022-10-13 13:12:24 +02:00
" SysClock64 " : SysClock64 ,
2022-10-13 18:34:11 +02:00
" LogicalNot " : LogicalNot ,
" NegInf " : LoadNInf
2022-08-17 17:31:15 +02:00
} . to_table ( )
2022-10-13 13:12:24 +02:00
if fn . builtinOp in codes :
self . emitByte ( codes [ fn . builtinOp ] , line )
2022-08-17 17:31:15 +02:00
return
# Some builtin operations are slightly more complex
# so we handle them separately
2022-10-13 13:12:24 +02:00
case fn . builtinOp :
2022-07-09 12:47:53 +02:00
of " LogicalOr " :
2022-06-14 22:45:32 +02:00
self . expression ( args [ 0 ] )
2022-10-13 13:12:24 +02:00
let jump = self . emitJump ( JumpIfTrue , line )
2022-06-14 22:45:32 +02:00
self . expression ( args [ 1 ] )
self . patchJump ( jump )
2022-07-09 12:47:53 +02:00
of " LogicalAnd " :
2022-06-14 22:45:32 +02:00
self . expression ( args [ 0 ] )
2022-10-13 13:12:24 +02:00
var jump = self . emitJump ( JumpIfFalseOrPop , line )
2022-06-14 22:45:32 +02:00
self . expression ( args [ 1 ] )
self . patchJump ( jump )
2022-06-14 18:10:13 +02:00
else :
2022-10-13 13:12:24 +02:00
self . error ( & " unknown built-in: ' {fn.builtinOp} ' " , fn . fun )
2022-05-30 22:06:15 +02:00
2022-08-30 12:55:14 +02:00
proc beginScope ( self : Compiler ) =
## Begins a new local scope by incrementing the current
## scope's depth
inc ( self . scopeDepth )
2022-10-08 15:48:26 +02:00
self . scopeOwners . add ( ( self . currentFunction , self . scopeDepth ) )
2022-05-24 09:55:08 +02:00
2022-10-07 15:55:41 +02:00
# Flattens our weird function tree into a linear
# list
2022-08-30 12:55:14 +02:00
proc flattenImpl ( self : Type , to : var seq [ Type ] ) =
to . add ( self )
for child in self . children :
flattenImpl ( child , to )
proc flatten ( self : Type ) : seq [ Type ] = flattenImpl ( self , result )
proc endScope ( self : Compiler ) =
## Ends the current local scope
if self . scopeDepth < 0 :
self . error ( " cannot call endScope with scopeDepth < 0 (This is an internal error and most likely a bug) " )
2022-10-08 15:48:26 +02:00
discard self . scopeOwners . pop ( )
2022-10-13 13:12:24 +02:00
dec ( self . scopeDepth )
2022-08-30 12:55:14 +02:00
var names : seq [ Name ] = @ [ ]
var popCount = 0
2022-11-02 12:03:14 +01:00
if self . scopeDepth = = - 1 and not self . isMainModule :
# When we're compiling another module, we don't
# close its global scope because self.compileModule()
# needs access to it
return
2022-08-30 12:55:14 +02:00
for name in self . names :
if name . depth > self . scopeDepth :
2022-11-02 12:03:14 +01:00
if not name . belongsTo . isNil ( ) and not name . belongsTo . resolved :
continue
2022-08-30 12:55:14 +02:00
names . add ( name )
2022-10-24 13:53:27 +02:00
#[if not name.resolved:
2022-10-13 16:52:37 +02:00
# TODO: Emit a warning?
2022-10-24 13:53:27 +02:00
continue ] #
if name . owner ! = self . currentModule and self . scopeDepth > - 1 :
2022-11-02 12:03:14 +01:00
# Names coming from other modules only go out of scope
# when the global scope is closed (i.e. at the end of
# the module)
2022-10-13 16:52:37 +02:00
continue
2022-11-02 12:03:14 +01:00
if name . kind = = NameKind . Var :
inc ( popCount )
elif name . kind = = NameKind . Argument :
if not name . belongsTo . valueType . isBuiltinFunction and name . belongsTo . resolved :
2022-10-13 13:12:24 +02:00
# We don't pop arguments to builtin functions because those don't
# actually have scopes: their arguments are temporaries on the stack
inc ( popCount )
elif name . kind = = NameKind . Function and name . valueType . children . len ( ) > 0 and name . depth = = 0 :
2022-10-07 15:55:41 +02:00
# When a closure goes out of scope, its environment is reclaimed.
# This includes the environments of every other closure that may
# have been contained within it, too
2022-08-30 12:55:14 +02:00
var i = 0
2022-11-02 12:03:14 +01:00
var envLen = 0
var lastEnvLen = 0
2022-10-07 15:55:41 +02:00
# Why this? Well, it's simple: if a function returns
# a closure, that function becomes a closure too. The
# environments of closures are aligned one after the
# other, so if a and b are both closures, but only b
# closes over a value, both a and b will have an envLen
# of 1, which would cause us to emit one extra PopClosure
# instruction than what's actually needed. We can account
# for this easily by checking if the contained function's
# environment is larger than the contained one, which will
# guarantee there actually is some value that the contained
# function is closing over
2022-11-02 12:03:14 +01:00
for fn in flatten ( name . valueType ) :
2022-10-07 15:55:41 +02:00
if fn . isClosure and fn . envLen > lastEnvLen :
envLen + = fn . envLen
lastEnvLen = fn . envLen
for y in 0 .. < envLen :
2022-11-02 12:03:14 +01:00
self . closures . delete ( y + i )
2022-10-07 15:55:41 +02:00
self . emitByte ( PopClosure , self . peek ( ) . token . line )
self . emitBytes ( ( y + i ) . toTriple ( ) , self . peek ( ) . token . line )
inc ( i )
2022-08-30 12:55:14 +02:00
if popCount > 1 :
2022-11-02 12:03:14 +01:00
# If we're popping more than one variable,
# we emit a bunch of PopN instructions until
# the pop count is greater than zero
while popCount > 0 :
self . emitByte ( PopN , self . peek ( ) . token . line )
self . emitBytes ( popCount . toDouble ( ) , self . peek ( ) . token . line )
popCount - = popCount . toDouble ( ) . fromDouble ( ) . int
2022-08-30 12:55:14 +02:00
elif popCount = = 1 :
# We only emit PopN if we're popping more than one value
self . emitByte ( PopC , self . peek ( ) . token . line )
# This seems *really* slow, but
# what else should I do? Nim doesn't
# allow the removal of items during
# seq iteration so ¯\_(ツ)_/¯
var idx = 0
while idx < self . names . len ( ) :
for name in names :
if self . names [ idx ] = = name :
self . names . delete ( idx )
inc ( idx )
2022-05-24 09:55:08 +02:00
2022-04-04 12:29:23 +02:00
2022-10-13 13:12:24 +02:00
proc unpackGenerics ( self : Compiler , condition : Expression , list : var seq [ tuple [ match : bool , kind : Type ] ] , accept : bool = true ) =
## Recursively unpacks a type constraint in a generic type
case condition . kind :
of identExpr :
2022-11-02 12:03:14 +01:00
list . add ( ( accept , self . inferOrError ( condition ) ) )
2022-10-13 13:12:24 +02:00
of binaryExpr :
let condition = BinaryExpr ( condition )
case condition . operator . lexeme :
2022-10-17 11:28:00 +02:00
of " | " :
2022-10-13 13:12:24 +02:00
self . unpackGenerics ( condition . a , list )
self . unpackGenerics ( condition . b , list )
else :
self . error ( " invalid type constraint in generic declaration " , condition )
of unaryExpr :
let condition = UnaryExpr ( condition )
case condition . operator . lexeme :
2022-10-17 11:28:00 +02:00
of " ~ " :
2022-10-13 13:12:24 +02:00
self . unpackGenerics ( condition . a , list , accept = false )
else :
self . error ( " invalid type constraint in generic declaration " , condition )
else :
self . error ( " invalid type constraint in generic declaration " , condition )
2022-10-17 11:28:00 +02:00
proc declareName ( self : Compiler , node : ASTNode , mutable : bool = false ) : Name =
2022-05-24 09:55:08 +02:00
## Statically declares a name into the current scope.
## "Declaring" a name only means updating our internal
## list of identifiers so that further calls to resolve()
## correctly return them. There is no code to actually
## declare a variable at runtime: the value is already
2022-06-14 22:45:32 +02:00
## on the stack
2022-10-17 11:28:00 +02:00
var declaredName : string = " "
2022-04-04 12:29:23 +02:00
case node . kind :
2022-04-21 15:25:29 +02:00
of NodeKind . varDecl :
2022-04-04 12:29:23 +02:00
var node = VarDecl ( node )
2022-04-26 09:29:59 +02:00
# Creates a new Name entry so that self.identifier emits the proper stack offset
2022-04-21 15:25:29 +02:00
if self . names . high ( ) > 16777215 :
# If someone ever hits this limit in real-world scenarios, I swear I'll
# slap myself 100 times with a sign saying "I'm dumb". Mark my words
2022-06-21 20:18:53 +02:00
self . error ( " cannot declare more than 16777215 variables at a time " )
2022-10-17 11:28:00 +02:00
declaredName = node . name . token . lexeme
2022-05-18 13:32:32 +02:00
self . names . add ( Name ( depth : self . scopeDepth ,
2022-10-17 11:28:00 +02:00
ident : node . name ,
2022-04-26 09:29:59 +02:00
isPrivate : node . isPrivate ,
owner : self . currentModule ,
isConst : node . isConst ,
2022-10-11 09:56:55 +02:00
valueType : self . infer ( node . value ) ,
2022-05-27 14:01:57 +02:00
isLet : node . isLet ,
2022-08-19 10:45:07 +02:00
line : node . token . line ,
2022-10-13 13:12:24 +02:00
belongsTo : self . currentFunction ,
kind : NameKind . Var ,
node : node
2022-08-19 10:45:07 +02:00
) )
2022-06-19 14:44:14 +02:00
if mutable :
self . names [ ^ 1 ] . valueType . mutable = true
2022-10-21 16:10:00 +02:00
result = self . names [ ^ 1 ]
2022-05-01 13:07:50 +02:00
of NodeKind . funDecl :
2022-04-04 12:29:23 +02:00
var node = FunDecl ( node )
2022-11-02 12:03:14 +01:00
result = Name ( depth : self . scopeDepth ,
isPrivate : node . isPrivate ,
isConst : false ,
owner : self . currentModule ,
valueType : Type ( kind : Function ,
returnType : nil , # We check it later
args : @ [ ] ,
fun : node ,
children : @ [ ] ) ,
ident : node . name ,
node : node ,
isLet : false ,
line : node . token . line ,
kind : NameKind . Function ,
belongsTo : self . currentFunction )
# First we declare the function's generics, if it has any.
# This is because the function's return type may in itself
# be a generic, so it needs to exist first
var constraints : seq [ tuple [ match : bool , kind : Type ] ] = @ [ ]
for gen in node . generics :
self . unpackGenerics ( gen . cond , constraints )
self . names . add ( Name ( depth : result . depth + 1 ,
isPrivate : true ,
valueType : Type ( kind : Generic , name : gen . name . token . lexeme , mutable : false , cond : constraints ) ,
codePos : 0 ,
isLet : false ,
line : result . node . token . line ,
belongsTo : result ,
ident : gen . name ,
owner : self . currentModule ) )
constraints = @ [ ]
if not node . returnType . isNil ( ) :
result . valueType . returnType = self . inferOrError ( node . returnType , allowGeneric = true )
self . names . add ( result )
# We now declare and typecheck the function's
# arguments
for argument in FunDecl ( result . node ) . arguments :
2022-04-04 12:29:23 +02:00
if self . names . high ( ) > 16777215 :
2022-06-21 20:18:53 +02:00
self . error ( " cannot declare more than 16777215 variables at a time " )
2022-11-02 12:03:14 +01:00
self . names . add ( Name ( depth : result . depth + 1 ,
isPrivate : true ,
owner : self . currentModule ,
isConst : false ,
ident : argument . name ,
valueType : self . inferOrError ( argument . valueType , allowGeneric = true ) ,
codePos : 0 ,
isLet : false ,
line : argument . name . token . line ,
belongsTo : result ,
kind : NameKind . Argument
) )
result . valueType . args . add ( ( self . names [ ^ 1 ] . ident . token . lexeme , self . names [ ^ 1 ] . valueType ) )
if node . generics . len ( ) > 0 :
result . valueType . isGeneric = true
2022-10-17 11:28:00 +02:00
of NodeKind . importStmt :
var node = ImportStmt ( node )
var name = node . moduleName . token . lexeme . extractFilename ( ) . replace ( " .pn " , " " )
declaredName = name
self . names . add ( Name ( depth : self . scopeDepth ,
2022-11-02 12:03:14 +01:00
owner : self . currentModule ,
ident : newIdentExpr ( Token ( kind : Identifier , lexeme : name , line : node . moduleName . token . line ) ) ,
line : node . moduleName . token . line ,
kind : NameKind . Module ,
isPrivate : false
2022-10-17 11:28:00 +02:00
) )
2022-10-21 16:10:00 +02:00
result = self . names [ ^ 1 ]
2022-04-04 12:29:23 +02:00
else :
2022-05-27 14:01:57 +02:00
discard # TODO: Types, enums
2022-10-17 11:28:00 +02:00
for name in self . findByName ( declaredName ) :
2022-10-21 16:10:00 +02:00
if name = = result :
continue
elif ( name . kind = = NameKind . Var and name . depth = = self . scopeDepth ) or name . kind in [ NameKind . Module , NameKind . CustomType , NameKind . Enum ] :
self . error ( & " attempt to redeclare ' {name.ident.token.lexeme} ' , which was previously defined in ' {name.owner} ' at line {name.line} " )
2022-05-18 13:32:32 +02:00
2022-04-27 16:03:48 +02:00
2022-08-30 12:55:14 +02:00
proc emitLoop ( self : Compiler , begin : int , line : int ) =
## Emits a JumpBackwards instruction with the correct
## jump offset
let offset = self . chunk . code . high ( ) - begin + 4
if offset > 16777215 :
2022-10-13 16:52:37 +02:00
# TODO: Emit consecutive jumps?
2022-08-30 12:55:14 +02:00
self . error ( " cannot jump more than 16777215 bytecode instructions " )
self . emitByte ( JumpBackwards , line )
self . emitBytes ( offset . toTriple ( ) , line )
2022-06-13 15:04:53 +02:00
2022-04-04 12:29:23 +02:00
2022-08-30 12:55:14 +02:00
proc patchBreaks ( self : Compiler ) =
## Patches the jumps emitted by
## breakStmt. This is needed
## because the size of code
## to skip is not known before
## the loop is fully compiled
2022-10-13 13:12:24 +02:00
for brk in self . currentLoop . breakJumps :
2022-08-30 12:55:14 +02:00
self . patchJump ( brk )
2022-04-04 12:29:23 +02:00
2022-10-13 13:12:24 +02:00
proc handleMagicPragma ( self : Compiler , pragma : Pragma , node : ASTNode , name : Name ) =
2022-08-30 12:55:14 +02:00
## Handles the "magic" pragma. Assumes the given name is already
## declared
if pragma . args . len ( ) ! = 1 :
self . error ( " ' magic ' pragma: wrong number of arguments " )
elif pragma . args [ 0 ] . kind ! = strExpr :
self . error ( " ' magic ' pragma: wrong type of argument (constant string expected) " )
elif node . kind ! = NodeKind . funDecl :
self . error ( " ' magic ' pragma is not valid in this context " )
var node = FunDecl ( node )
2022-10-13 13:12:24 +02:00
name . valueType . isBuiltinFunction = true
name . valueType . builtinOp = pragma . args [ 0 ] . token . lexeme [ 1 .. ^ 2 ]
2022-08-30 12:55:14 +02:00
# The magic pragma ignores the function's body
node . body = nil
2022-05-30 09:29:03 +02:00
2022-04-04 12:29:23 +02:00
2022-10-13 13:12:24 +02:00
proc handlePurePragma ( self : Compiler , pragma : Pragma , node : ASTNode , name : Name ) =
2022-08-30 12:55:14 +02:00
## Handles the "pure" pragma
case node . kind :
of NodeKind . funDecl :
FunDecl ( node ) . isPure = true
of lambdaExpr :
LambdaExpr ( node ) . isPure = true
else :
self . error ( " ' pure ' pragma is not valid in this context " )
2022-10-13 13:12:24 +02:00
proc dispatchPragmas ( self : Compiler , node : ASTnode , name : Name ) =
2022-08-30 12:55:14 +02:00
## Dispatches pragmas bound to objects
var pragmas : seq [ Pragma ] = @ [ ]
case node . kind :
of NodeKind . funDecl , NodeKind . typeDecl , NodeKind . varDecl :
pragmas = Declaration ( node ) . pragmas
of lambdaExpr :
pragmas = LambdaExpr ( node ) . pragmas
else :
discard # Unreachable
for pragma in pragmas :
if pragma . name . token . lexeme notin self . compilerProcs :
self . error ( & " unknown pragma ' {pragma.name.token.lexeme} ' " )
2022-10-13 13:12:24 +02:00
self . compilerProcs [ pragma . name . token . lexeme ] ( self , pragma , node , name )
2022-08-30 12:55:14 +02:00
proc patchReturnAddress ( self : Compiler , pos : int ) =
## Patches the return address of a function
## call
2022-10-13 13:12:24 +02:00
let address = self . chunk . code . len ( ) . toLong ( )
2022-08-30 12:55:14 +02:00
self . chunk . consts [ pos ] = address [ 0 ]
self . chunk . consts [ pos + 1 ] = address [ 1 ]
self . chunk . consts [ pos + 2 ] = address [ 2 ]
self . chunk . consts [ pos + 3 ] = address [ 3 ]
2022-10-13 13:12:24 +02:00
self . chunk . consts [ pos + 4 ] = address [ 4 ]
self . chunk . consts [ pos + 5 ] = address [ 5 ]
self . chunk . consts [ pos + 6 ] = address [ 6 ]
self . chunk . consts [ pos + 7 ] = address [ 7 ]
2022-08-30 12:55:14 +02:00
2022-10-17 11:28:00 +02:00
proc terminateProgram ( self : Compiler , pos : int ) =
2022-08-30 12:55:14 +02:00
## Utility to terminate a peon program
2022-10-17 11:28:00 +02:00
self . endScope ( )
2022-08-30 12:55:14 +02:00
self . emitByte ( OpCode . Return , self . peek ( ) . token . line )
self . emitByte ( 0 , self . peek ( ) . token . line ) # Entry point has no return value (TODO: Add easter eggs, cuz why not)
2022-10-17 11:28:00 +02:00
self . patchReturnAddress ( pos )
2022-08-30 12:55:14 +02:00
2022-10-17 11:28:00 +02:00
proc beginProgram ( self : Compiler ) : int =
## Utility to begin a peon program's
## bytecode. Returns the position of
2022-08-30 12:55:14 +02:00
## a dummy return address of the program's
## entry point to be patched by terminateProgram
# Every peon program has a hidden entry point in
# which user code is wrapped. Think of it as if
# peon is implicitly writing the main() function
# of your program and putting all of your code in
# there. While we call our entry point just like
# any regular peon function, we can't use our handy
# helper generateCall() because we need to keep track
# of where our program ends (which we don't know yet).
# To fix this, we emit dummy offsets and patch them
# later, once we know the boundaries of our hidden main()
2022-10-17 11:28:00 +02:00
var main = Name ( depth : 0 ,
2022-08-30 12:55:14 +02:00
isPrivate : true ,
isConst : false ,
isLet : false ,
owner : self . currentModule ,
valueType : Type ( kind : Function ,
returnType : nil ,
args : @ [ ] ,
) ,
2022-10-17 11:28:00 +02:00
codePos : self . chunk . code . len ( ) + 12 ,
ident : newIdentExpr ( Token ( lexeme : " " , kind : Identifier ) ) ,
2022-10-13 13:12:24 +02:00
kind : NameKind . Function ,
2022-08-30 12:55:14 +02:00
line : - 1 )
2022-10-17 11:28:00 +02:00
self . names . add ( main )
self . scopeOwners . add ( ( main , 0 ) )
self . emitByte ( LoadUInt64 , 1 )
self . emitBytes ( self . chunk . writeConstant ( main . codePos . toLong ( ) ) , 1 )
self . emitByte ( LoadUInt64 , 1 )
self . emitBytes ( self . chunk . writeConstant ( 0 . toLong ( ) ) , 1 )
result = self . chunk . consts . len ( ) - 8
self . emitByte ( Call , 1 )
self . emitBytes ( 0 . toTriple ( ) , 1 )
2022-08-30 12:55:14 +02:00
## End of utility functions
proc literal ( self : Compiler , node : ASTNode ) =
## Emits instructions for literals such
## as singletons, strings and numbers
case node . kind :
of trueExpr :
self . emitByte ( LoadTrue , node . token . line )
of falseExpr :
self . emitByte ( LoadFalse , node . token . line )
of nilExpr :
self . emitByte ( LoadNil , node . token . line )
of infExpr :
self . emitByte ( LoadInf , node . token . line )
of nanExpr :
self . emitByte ( LoadNan , node . token . line )
of strExpr :
self . emitConstant ( LiteralExpr ( node ) , Type ( kind : String ) )
of intExpr :
let y = IntExpr ( node )
2022-10-11 09:56:55 +02:00
let kind = self . infer ( y )
2022-08-30 12:55:14 +02:00
if kind . kind in [ Int64 , Int32 , Int16 , Int8 ] :
var x : int
try :
discard parseInt ( y . literal . lexeme , x )
except ValueError :
self . error ( " integer value out of range " )
else :
var x : uint64
try :
discard parseBiggestUInt ( y . literal . lexeme , x )
except ValueError :
self . error ( " integer value out of range " )
self . emitConstant ( y , kind )
of hexExpr :
var x : int
var y = HexExpr ( node )
try :
discard parseHex ( y . literal . lexeme , x )
except ValueError :
self . error ( " integer value out of range " )
let node = newIntExpr ( Token ( lexeme : $ x , line : y . token . line ,
pos : ( start : y . token . pos . start ,
stop : y . token . pos . start + len ( $ x ) )
)
)
2022-10-11 09:56:55 +02:00
self . emitConstant ( node , self . infer ( y ) )
2022-08-30 12:55:14 +02:00
of binExpr :
var x : int
var y = BinExpr ( node )
try :
discard parseBin ( y . literal . lexeme , x )
except ValueError :
self . error ( " integer value out of range " )
let node = newIntExpr ( Token ( lexeme : $ x , line : y . token . line ,
pos : ( start : y . token . pos . start ,
stop : y . token . pos . start + len ( $ x ) )
)
)
2022-10-11 09:56:55 +02:00
self . emitConstant ( node , self . infer ( y ) )
2022-08-30 12:55:14 +02:00
of octExpr :
var x : int
var y = OctExpr ( node )
try :
discard parseOct ( y . literal . lexeme , x )
except ValueError :
self . error ( " integer value out of range " )
let node = newIntExpr ( Token ( lexeme : $ x , line : y . token . line ,
pos : ( start : y . token . pos . start ,
stop : y . token . pos . start + len ( $ x ) )
)
)
2022-10-11 09:56:55 +02:00
self . emitConstant ( node , self . infer ( y ) )
2022-08-30 12:55:14 +02:00
of floatExpr :
var x : float
var y = FloatExpr ( node )
try :
discard parseFloat ( y . literal . lexeme , x )
except ValueError :
self . error ( " floating point value out of range " )
2022-10-11 09:56:55 +02:00
self . emitConstant ( y , self . infer ( y ) )
2022-08-30 12:55:14 +02:00
of awaitExpr :
var y = AwaitExpr ( node )
self . expression ( y . expression )
self . emitByte ( OpCode . Await , node . token . line )
else :
self . error ( & " invalid AST node of kind {node.kind} at literal(): {node} (This is an internal error and most likely a bug!) " )
proc callUnaryOp ( self : Compiler , fn : Name , op : UnaryExpr ) =
## Emits the code to call a unary operator
2022-10-13 13:12:24 +02:00
self . generateCall ( fn , @ [ op . a ] , fn . line )
2022-08-30 12:55:14 +02:00
proc callBinaryOp ( self : Compiler , fn : Name , op : BinaryExpr ) =
## Emits the code to call a binary operator
2022-10-13 13:12:24 +02:00
self . generateCall ( fn , @ [ op . a , op . b ] , fn . line )
2022-08-30 12:55:14 +02:00
proc unary ( self : Compiler , node : UnaryExpr ) =
## Compiles unary expressions such as decimal
## and bitwise negation
2022-10-11 09:56:55 +02:00
let valueType = self . infer ( node . a )
2022-10-13 13:12:24 +02:00
let funct = self . matchImpl ( node . token . lexeme , Type ( kind : Function , returnType : Type ( kind : Any ) , args : @ [ ( " " , valueType ) ] ) , node )
2022-08-30 12:55:14 +02:00
self . callUnaryOp ( funct , node )
proc binary ( self : Compiler , node : BinaryExpr ) =
2022-10-13 13:12:24 +02:00
## Compiles all binary expression
2022-10-11 09:56:55 +02:00
let typeOfA = self . infer ( node . a )
let typeOfB = self . infer ( node . b )
2022-10-13 13:12:24 +02:00
let funct = self . matchImpl ( node . token . lexeme , Type ( kind : Function , returnType : Type ( kind : Any ) , args : @ [ ( " " , typeOfA ) , ( " " , typeOfB ) ] ) , node )
2022-08-30 12:55:14 +02:00
self . callBinaryOp ( funct , node )
proc identifier ( self : Compiler , node : IdentExpr ) =
## Compiles access to identifiers
2022-11-02 12:03:14 +01:00
var s = self . resolveOrError ( node )
if s . isConst :
2022-08-30 12:55:14 +02:00
# Constants are always emitted as Load* instructions
# no matter the scope depth
2022-10-11 09:56:55 +02:00
self . emitConstant ( node , self . infer ( node ) )
2022-08-30 12:55:14 +02:00
else :
2022-11-02 12:03:14 +01:00
if s . kind = = NameKind . Function :
# Functions have no runtime representation, they're just
# a location to jump to, but we pretend they aren't and
# resolve them to their address into our bytecode when
# they're referenced
2022-10-13 13:12:24 +02:00
self . emitByte ( LoadUInt64 , node . token . line )
2022-08-30 12:55:14 +02:00
self . emitBytes ( self . chunk . writeConstant ( s . codePos . toLong ( ) ) , node . token . line )
2022-11-02 12:03:14 +01:00
elif s . depth > 0 and self . scopeDepth > 0 and not self . currentFunction . isNil ( ) and s . depth ! = self . scopeDepth and self . scopeOwners [ s . depth ] . owner ! = self . currentFunction :
2022-08-30 12:55:14 +02:00
# Loads a closure variable. Stored in a separate "closure array" in the VM that does not
# align its semantics with the call stack. This makes closures work as expected and is
# not much slower than indexing our stack (since they're both dynamic arrays at runtime anyway)
if not s . isClosedOver :
2022-10-06 09:57:19 +02:00
var fn = self . currentFunction . valueType
while true :
fn . isClosure = true
2022-10-07 15:55:41 +02:00
fn . envLen + = 1
2022-10-06 09:57:19 +02:00
if fn . parent . isNil ( ) :
break
2022-11-02 12:03:14 +01:00
fn = fn . parent
2022-10-06 09:57:19 +02:00
s . isClosedOver = true
2022-11-02 12:03:14 +01:00
self . closures . add ( s )
2022-08-30 12:55:14 +02:00
let stackIdx = self . getStackPos ( s ) . toTriple ( )
2022-11-02 12:03:14 +01:00
let closeIdx = self . closures . high ( ) . toTriple ( )
2022-08-30 12:55:14 +02:00
let oldLen = self . chunk . code . len ( )
2022-10-07 15:55:41 +02:00
# This madness makes it so that we can insert bytecode
# at arbitrary offsets into our alredy compiled code and
# have our metadata be up to date
2022-08-30 12:55:14 +02:00
self . chunk . code . insert ( StoreClosure . uint8 , s . belongsTo . codePos )
self . chunk . code . insert ( stackIdx [ 0 ] , s . belongsTo . codePos + 1 )
self . chunk . code . insert ( stackIdx [ 1 ] , s . belongsTo . codePos + 2 )
self . chunk . code . insert ( stackIdx [ 2 ] , s . belongsTo . codePos + 3 )
self . chunk . code . insert ( closeIdx [ 0 ] , s . belongsTo . codePos + 4 )
self . chunk . code . insert ( closeIdx [ 1 ] , s . belongsTo . codePos + 5 )
self . chunk . code . insert ( closeIdx [ 2 ] , s . belongsTo . codePos + 6 )
self . chunk . lines [ self . chunk . getIdx ( self . chunk . getLine ( s . belongsTo . codePos ) ) + 1 ] + = 7
self . fixJumps ( oldLen , s . belongsTo . codePos )
self . fixCFIOffsets ( oldLen , s . belongsTo . codePos )
2022-11-02 12:03:14 +01:00
let pos = self . getClosurePos ( s )
if pos = = - 1 :
self . error ( & " cannot compute closure offset for ' {s.ident.token.lexeme} ' " , s . ident )
2022-08-30 12:55:14 +02:00
self . emitByte ( LoadClosure , node . token . line )
2022-11-02 12:03:14 +01:00
self . emitBytes ( pos . toTriple ( ) , node . token . line )
2022-08-30 12:55:14 +02:00
else :
# Static name resolution, loads value at index in the stack. Very fast. Much wow.
self . emitByte ( LoadVar , node . token . line )
2022-11-02 12:03:14 +01:00
# No need to check for -1 here: we already did a nil check above!ù
2022-08-30 12:55:14 +02:00
self . emitBytes ( self . getStackPos ( s ) . toTriple ( ) , node . token . line )
proc assignment ( self : Compiler , node : ASTNode ) =
## Compiles assignment expressions
case node . kind :
of assignExpr :
let node = AssignExpr ( node )
let name = IdentExpr ( node . name )
2022-11-02 12:03:14 +01:00
var r = self . resolveOrError ( name )
if r . isConst :
self . error ( & " cannot assign to ' {name.token.lexeme} ' (value is a constant) " , name )
2022-08-30 12:55:14 +02:00
elif r . isLet :
2022-11-02 12:03:14 +01:00
self . error ( & " cannot reassign ' {name.token.lexeme} ' (value is immutable) " , name )
2022-08-30 12:55:14 +02:00
self . expression ( node . value )
2022-10-06 00:21:25 +02:00
if not r . isClosedOver :
2022-08-30 12:55:14 +02:00
self . emitByte ( StoreVar , node . token . line )
self . emitBytes ( self . getStackPos ( r ) . toTriple ( ) , node . token . line )
else :
# Loads a closure variable. Stored in a separate "closure array" in the VM that does not
# align its semantics with the call stack. This makes closures work as expected and is
# not much slower than indexing our stack (since they're both dynamic arrays at runtime anyway)
self . emitByte ( StoreClosure , node . token . line )
self . emitBytes ( self . getClosurePos ( r ) . toTriple ( ) , node . token . line )
of setItemExpr :
let node = SetItemExpr ( node )
2022-10-11 09:56:55 +02:00
let typ = self . infer ( node )
2022-08-30 12:55:14 +02:00
if typ . isNil ( ) :
self . error ( & " cannot determine the type of ' {node.name.token.lexeme} ' " )
# TODO
else :
self . error ( & " invalid AST node of kind {node.kind} at assignment(): {node} (This is an internal error and most likely a bug) " )
2022-07-09 12:47:53 +02:00
2022-04-04 12:29:23 +02:00
proc blockStmt ( self : Compiler , node : BlockStmt ) =
## Compiles block statements, which create a new
2022-07-09 12:47:53 +02:00
## local scope
2022-04-04 12:29:23 +02:00
self . beginScope ( )
for decl in node . code :
self . declaration ( decl )
self . endScope ( )
proc ifStmt ( self : Compiler , node : IfStmt ) =
## Compiles if/else statements for conditional
## execution of code
2022-07-16 13:04:00 +02:00
self . check ( node . condition , Type ( kind : Bool ) )
2022-07-09 12:47:53 +02:00
self . expression ( node . condition )
2022-08-30 12:55:14 +02:00
let jump = self . emitJump ( JumpIfFalsePop , node . token . line )
2022-04-04 12:29:23 +02:00
self . statement ( node . thenBranch )
2022-08-30 12:55:14 +02:00
let jump2 = self . emitJump ( JumpForwards , node . token . line )
2022-04-04 12:29:23 +02:00
self . patchJump ( jump )
2022-06-13 15:04:53 +02:00
if not node . elseBranch . isNil ( ) :
2022-04-04 12:29:23 +02:00
self . statement ( node . elseBranch )
2022-06-02 12:19:18 +02:00
self . patchJump ( jump2 )
2022-04-04 12:29:23 +02:00
proc whileStmt ( self : Compiler , node : WhileStmt ) =
2022-05-07 10:48:01 +02:00
## Compiles C-style while loops and
## desugared C-style for loops
2022-07-16 13:04:00 +02:00
self . check ( node . condition , Type ( kind : Bool ) )
2022-08-17 17:31:15 +02:00
let start = self . chunk . code . high ( )
2022-04-04 12:29:23 +02:00
self . expression ( node . condition )
2022-08-30 12:55:14 +02:00
let jump = self . emitJump ( JumpIfFalsePop , node . token . line )
2022-04-04 12:29:23 +02:00
self . statement ( node . body )
2022-08-30 12:55:14 +02:00
self . emitLoop ( start , node . token . line )
2022-08-18 19:18:29 +02:00
self . patchJump ( jump )
2022-04-04 12:29:23 +02:00
2022-10-13 13:12:24 +02:00
proc generateCall ( self : Compiler , fn : Type , args : seq [ Expression ] , line : int ) =
## Version of generateCall that takes Type objects
## instead of Name objects. The function is assumed
## to be on the stack
self . emitByte ( LoadUInt64 , line )
self . emitBytes ( self . chunk . writeConstant ( 0 . toLong ( ) ) , line )
let pos = self . chunk . consts . len ( ) - 8
for i , argument in reversed ( args ) :
# We pass the arguments in reverse
# because of how stacks work. They'll
# be reversed again at runtime
self . check ( argument , fn . args [ ^ ( i + 1 ) ] . kind )
self . expression ( argument )
# Creates a new call frame and jumps
# to the function's first instruction
# in the code
if not fn . isClosure :
self . emitByte ( Call , line )
else :
self . emitByte ( CallClosure , line )
self . emitBytes ( args . len ( ) . toTriple ( ) , line )
if fn . isClosure :
self . emitBytes ( fn . envLen . toTriple ( ) , line )
self . patchReturnAddress ( pos )
proc generateCall ( self : Compiler , fn : Name , args : seq [ Expression ] , line : int ) =
## Small wrapper that abstracts emitting a call instruction
## for a given function
if fn . valueType . isBuiltinFunction :
self . handleBuiltinFunction ( fn . valueType , args , line )
return
2022-10-25 11:17:38 +02:00
case fn . kind :
of NameKind . Var :
2022-11-02 12:03:14 +01:00
self . identifier ( VarDecl ( fn . node ) . name )
2022-10-25 11:17:38 +02:00
of NameKind . Function :
self . emitByte ( LoadUInt64 , line )
self . emitBytes ( self . chunk . writeConstant ( fn . codePos . toLong ( ) ) , line )
else :
discard
2022-10-13 13:12:24 +02:00
self . emitByte ( LoadUInt64 , line )
self . emitBytes ( self . chunk . writeConstant ( 0 . toLong ( ) ) , line )
let pos = self . chunk . consts . len ( ) - 8
for arg in reversed ( args ) :
self . expression ( arg )
# Creates a new call frame and jumps
# to the function's first instruction
# in the code
if not fn . valueType . isClosure :
self . emitByte ( Call , line )
else :
self . emitByte ( CallClosure , line )
self . emitBytes ( args . len ( ) . toTriple ( ) , line )
if fn . valueType . isClosure :
self . emitBytes ( fn . valueType . envLen . toTriple ( ) , line )
self . patchReturnAddress ( pos )
2022-10-17 11:28:00 +02:00
proc specialize ( self : Compiler , name : Name , args : seq [ Expression ] ) : Name =
## Specializes a generic type by
2022-10-13 16:52:37 +02:00
## instantiating a concrete version
## of it
2022-10-13 13:12:24 +02:00
var mapping : TableRef [ string , Type ] = newTable [ string , Type ] ( )
var kind : Type
2022-10-17 11:28:00 +02:00
result = deepCopy ( name )
case name . kind :
of NameKind . Function :
# This first loop checks if a user tries to reassign a generic's
# name to a different type
for i , ( name , typ ) in result . valueType . args :
if typ . kind ! = Generic :
continue
kind = self . infer ( args [ i ] )
if typ . name in mapping and not self . compare ( kind , mapping [ typ . name ] ) :
self . error ( & " expected generic argument ' {typ.name} ' to be of type {self.typeToStr(mapping[typ.name])}, got {self.typeToStr(kind)} instead " )
mapping [ typ . name ] = kind
result . valueType . args [ i ] . kind = kind
for ( argExpr , argName ) in zip ( args , result . valueType . args ) :
if self . names . high ( ) > 16777215 :
self . error ( " cannot declare more than 16777215 variables at a time " )
2022-10-21 16:10:00 +02:00
self . names . add ( Name ( depth : name . depth + 1 ,
2022-10-17 11:28:00 +02:00
isPrivate : true ,
owner : self . currentModule ,
isConst : false ,
ident : newIdentExpr ( Token ( lexeme : argName . name ) ) ,
valueType : argName . kind ,
codePos : 0 ,
isLet : false ,
line : name . line ,
belongsTo : result ,
kind : NameKind . Argument
) )
if result . valueType . returnType . kind = = Generic :
result . valueType . returnType = mapping [ result . valueType . returnType . name ]
else :
discard # TODO: Custom user-defined types
2022-10-13 13:12:24 +02:00
2022-07-31 16:09:22 +02:00
proc callExpr ( self : Compiler , node : CallExpr ) : Name {. discardable . } =
2022-08-30 12:55:14 +02:00
## Compiles code to call a chain of function calls
2022-05-30 09:29:03 +02:00
var args : seq [ tuple [ name : string , kind : Type ] ] = @ [ ]
2022-05-30 22:06:15 +02:00
var argExpr : seq [ Expression ] = @ [ ]
2022-05-30 09:29:03 +02:00
var kind : Type
# TODO: Keyword arguments
2022-10-13 13:12:24 +02:00
var i = node . arguments . positionals . len ( )
for argument in node . arguments . positionals :
dec ( i )
2022-10-11 09:56:55 +02:00
kind = self . infer ( argument )
2022-06-13 15:04:53 +02:00
if kind . isNil ( ) :
2022-10-13 13:12:24 +02:00
if node . callee . kind ! = identExpr :
self . error ( & " cannot infer the type of argument {i + 1} in call " )
else :
self . error ( & " cannot infer the type of argument {i + 1} in call to ' {node.callee.token.lexeme} ' " )
2022-05-30 09:29:03 +02:00
args . add ( ( " " , kind ) )
2022-05-30 22:06:15 +02:00
argExpr . add ( argument )
2022-05-30 09:29:03 +02:00
case node . callee . kind :
of identExpr :
2022-08-30 12:55:14 +02:00
# Calls like hi()
2022-10-13 13:12:24 +02:00
result = self . matchImpl ( IdentExpr ( node . callee ) . name . lexeme , Type ( kind : Function , returnType : Type ( kind : Any ) , args : args ) , node )
if result . valueType . isGeneric :
2022-10-13 16:52:37 +02:00
# We can't instantiate a concrete version
# of a generic function without the types
# of its arguments, so we wait until the
# very last moment to compile it, once
# that info is available to us
2022-11-02 12:03:14 +01:00
result = self . specialize ( result , argExpr )
2022-10-13 13:12:24 +02:00
# Now we call it
self . generateCall ( result , argExpr , node . token . line )
2022-06-08 16:07:08 +02:00
of NodeKind . callExpr :
2022-08-30 12:55:14 +02:00
# Calling a call expression, like hello()()
var node : Expression = node
var all : seq [ CallExpr ] = @ [ ]
while CallExpr ( node ) . callee . kind = = callExpr :
all . add ( CallExpr ( CallExpr ( node ) . callee ) )
node = CallExpr ( node ) . callee
for exp in reversed ( all ) :
self . callExpr ( exp )
2022-10-07 15:55:41 +02:00
case all [ ^ 1 ] . callee . kind :
of identExpr :
let fn = self . resolve ( IdentExpr ( all [ ^ 1 ] . callee ) )
self . generateCall ( fn . valueType . returnType , argExpr , fn . line )
else :
discard # TODO: Lambdas
2022-07-31 16:09:22 +02:00
# TODO: Calling lambdas on-the-fly (i.e. on the same line)
2022-05-30 09:29:03 +02:00
else :
2022-10-11 09:56:55 +02:00
let typ = self . infer ( node )
2022-06-21 20:18:53 +02:00
if typ . isNil ( ) :
self . error ( & " expression has no type " )
else :
self . error ( & " object of type ' {self.typeToStr(typ)} ' is not callable " )
2022-05-30 09:29:03 +02:00
2022-05-04 14:27:15 +02:00
proc expression ( self : Compiler , node : Expression ) =
2022-04-04 12:29:23 +02:00
## Compiles all expressions
case node . kind :
2022-05-30 22:06:15 +02:00
of NodeKind . callExpr :
2022-06-13 15:04:53 +02:00
self . callExpr ( CallExpr ( node ) ) # TODO
2022-04-04 12:29:23 +02:00
of getItemExpr :
2022-06-07 11:23:08 +02:00
discard # TODO: Get rid of this
of pragmaExpr :
2022-05-18 13:32:32 +02:00
discard # TODO
2022-04-04 12:29:23 +02:00
# Note that for setItem and assign we don't convert
# the node to its true type because that type information
# would be lost in the call anyway. The differentiation
2022-05-07 10:48:01 +02:00
# happens in self.assignment()
2022-06-07 11:23:08 +02:00
of setItemExpr , assignExpr : # TODO: Get rid of this
2022-04-04 12:29:23 +02:00
self . assignment ( node )
of identExpr :
self . identifier ( IdentExpr ( node ) )
of unaryExpr :
# Unary expressions such as ~5 and -3
self . unary ( UnaryExpr ( node ) )
of groupingExpr :
# Grouping expressions like (2 + 1)
self . expression ( GroupingExpr ( node ) . expression )
of binaryExpr :
# Binary expressions such as 2 ^ 5 and 0.66 * 3.14
self . binary ( BinaryExpr ( node ) )
of intExpr , hexExpr , binExpr , octExpr , strExpr , falseExpr , trueExpr ,
2022-05-04 14:01:38 +02:00
infExpr , nanExpr , floatExpr , nilExpr :
2022-05-07 10:48:01 +02:00
# Since all of these AST nodes share the
# same overall structure and the kind
# field is enough to tell one from the
2022-05-18 13:32:32 +02:00
# other, why bother with specialized
2022-05-07 10:48:01 +02:00
# cases when one is enough?
2022-04-04 12:29:23 +02:00
self . literal ( node )
else :
self . error ( & " invalid AST node of kind {node.kind} at expression(): {node} (This is an internal error and most likely a bug) " )
proc awaitStmt ( self : Compiler , node : AwaitStmt ) =
## Compiles await statements. An await statement
## is like an await expression, but parsed in the
## context of statements for usage outside expressions,
## meaning it can be used standalone. It's basically the
## same as an await expression followed by a semicolon.
2022-08-30 12:55:14 +02:00
## Await expressions and statements are the only native
## construct to run coroutines from within an already
## asynchronous context (which should be orchestrated
## by an event loop). They block in the caller until
## the callee returns
2022-04-07 13:02:23 +02:00
self . expression ( node . expression )
2022-08-30 12:55:14 +02:00
self . emitByte ( OpCode . Await , node . token . line )
2022-04-04 12:29:23 +02:00
proc deferStmt ( self : Compiler , node : DeferStmt ) =
## Compiles defer statements. A defer statement
2022-04-26 09:29:59 +02:00
## is executed right before its containing function
## exits (either because of a return or an exception)
2022-08-30 12:55:14 +02:00
var oldChunk = self . chunk
var chunk = newChunk ( )
chunk . consts = self . chunk . consts
chunk . lines = self . chunk . lines
chunk . cfi = self . chunk . cfi
self . chunk = chunk
2022-04-07 13:02:23 +02:00
self . expression ( node . expression )
2022-08-30 12:55:14 +02:00
for b in chunk . code :
self . deferred . add ( b )
self . chunk = oldChunk
self . chunk . consts & = chunk . consts
self . chunk . lines & = chunk . lines
self . chunk . cfi & = chunk . cfi
2022-06-02 01:33:56 +02:00
2022-04-04 12:29:23 +02:00
proc returnStmt ( self : Compiler , node : ReturnStmt ) =
2022-06-21 20:18:53 +02:00
## Compiles return statements
2022-11-02 12:03:14 +01:00
self . check ( node . value , self . currentFunction . valueType . returnType )
2022-06-07 11:23:08 +02:00
if not node . value . isNil ( ) :
2022-05-16 19:23:38 +02:00
self . expression ( node . value )
2022-08-30 12:55:14 +02:00
self . emitByte ( OpCode . SetResult , node . token . line )
2022-10-13 13:12:24 +02:00
# Since the "set result" part and "exit the function" part
# of our return mechanism are already decoupled into two
# separate opcodes, we perform the former and then jump to
# the function's last return statement, which is always emitted
# by funDecl() at the end of the function's lifecycle, greatly
2022-11-02 12:03:14 +01:00
# simplifying the design, since now there's just one return
2022-10-13 13:12:24 +02:00
# instruction to jump to instead of many potential points
2022-11-02 12:03:14 +01:00
# where the function returns from. Note that depending on whether
# the function has any local variables or not, this jump might be
# patched to jump to the function's PopN/PopC instruction(s) rather
# than straight to the return statement
2022-10-13 13:12:24 +02:00
self . currentFunction . valueType . retJumps . add ( self . emitJump ( JumpForwards , node . token . line ) )
2022-04-04 12:29:23 +02:00
proc yieldStmt ( self : Compiler , node : YieldStmt ) =
## Compiles yield statements
self . expression ( node . expression )
2022-08-30 12:55:14 +02:00
self . emitByte ( OpCode . Yield , node . token . line )
2022-04-04 12:29:23 +02:00
proc raiseStmt ( self : Compiler , node : RaiseStmt ) =
2022-06-07 11:23:08 +02:00
## Compiles raise statements
2022-04-04 12:29:23 +02:00
self . expression ( node . exception )
2022-08-30 12:55:14 +02:00
self . emitByte ( OpCode . Raise , node . token . line )
2022-04-04 12:29:23 +02:00
proc continueStmt ( self : Compiler , node : ContinueStmt ) =
2022-08-30 12:55:14 +02:00
## Compiles continue statements. A continue statement
2022-04-04 12:29:23 +02:00
## jumps to the next iteration in a loop
2022-06-02 12:05:22 +02:00
if self . currentLoop . start > 16777215 :
self . error ( " too much code to jump over in continue statement " )
2022-08-30 12:55:14 +02:00
self . emitByte ( Jump , node . token . line )
self . emitBytes ( self . currentLoop . start . toTriple ( ) , node . token . line )
2022-04-04 12:29:23 +02:00
proc breakStmt ( self : Compiler , node : BreakStmt ) =
2022-08-30 12:55:14 +02:00
## Compiles break statements. A break statement
## jumps to the end of the loop
2022-10-13 13:12:24 +02:00
self . currentLoop . breakJumps . add ( self . emitJump ( OpCode . JumpForwards , node . token . line ) )
2022-04-04 12:29:23 +02:00
if self . currentLoop . depth > self . scopeDepth :
# Breaking out of a loop closes its scope
self . endScope ( )
proc assertStmt ( self : Compiler , node : AssertStmt ) =
## Compiles assert statements (raise
## AssertionError if the expression is falsey)
self . expression ( node . expression )
2022-08-30 12:55:14 +02:00
self . emitByte ( OpCode . Assert , node . token . line )
2022-04-04 12:29:23 +02:00
2022-06-07 11:23:08 +02:00
proc forEachStmt ( self : Compiler , node : ForEachStmt ) =
## Compiles foreach loops
# TODO
proc importStmt ( self : Compiler , node : ImportStmt ) =
## Imports a module at compile time
2022-10-17 11:28:00 +02:00
let filename = splitPath ( node . moduleName . token . lexeme ) . tail
2022-08-14 18:37:06 +02:00
try :
2022-10-17 11:28:00 +02:00
self . compileModule ( node . moduleName . token . lexeme )
discard self . declareName ( node )
2022-08-14 18:37:06 +02:00
except IOError :
self . error ( & """ could not import ' {filename} ' : {getCurrentExceptionMsg()} """ )
2022-08-15 11:46:24 +02:00
except OSError :
2022-08-16 13:11:09 +02:00
self . error ( & """ could not import ' {filename} ' : {getCurrentExceptionMsg()} [errno {osLastError()}] """ )
2022-06-07 11:23:08 +02:00
2022-10-17 11:28:00 +02:00
proc exportStmt ( self : Compiler , node : ExportStmt ) =
## Exports a name at compile time to
## all modules importing us
2022-11-02 12:03:14 +01:00
var name = self . resolveOrError ( node . name )
2022-10-17 11:28:00 +02:00
if name . isPrivate :
self . error ( " cannot export private names " )
name . exported = true
case name . kind :
of NameKind . Module :
# We need to export everything
# this module defines!
for name in self . findByModule ( name . ident . token . lexeme ) :
name . exported = true
of NameKind . Function :
for name in self . findByName ( name . ident . token . lexeme ) :
if name . kind ! = NameKind . Function :
continue
name . exported = true
else :
discard
2022-10-13 16:52:37 +02:00
proc printRepl ( self : Compiler , typ : Type , node : Expression ) =
## Emits instruction to print
## peon types in REPL mode
case typ . kind :
of Int64 :
self . emitByte ( PrintInt64 , node . token . line )
of UInt64 :
self . emitByte ( PrintUInt64 , node . token . line )
of Int32 :
self . emitByte ( PrintInt32 , node . token . line )
of UInt32 :
self . emitByte ( PrintInt32 , node . token . line )
of Int16 :
self . emitByte ( PrintInt16 , node . token . line )
of UInt16 :
self . emitByte ( PrintUInt16 , node . token . line )
of Int8 :
self . emitByte ( PrintInt8 , node . token . line )
of UInt8 :
self . emitByte ( PrintUInt8 , node . token . line )
of Float64 :
self . emitByte ( PrintFloat64 , node . token . line )
of Float32 :
self . emitByte ( PrintFloat32 , node . token . line )
of Bool :
self . emitByte ( PrintBool , node . token . line )
of Nan :
self . emitByte ( PrintNan , node . token . line )
of Inf :
self . emitByte ( PrintInf , node . token . line )
of String :
self . emitByte ( PrintString , node . token . line )
else :
self . emitByte ( PrintHex , node . token . line )
2022-05-04 14:27:15 +02:00
proc statement ( self : Compiler , node : Statement ) =
2022-04-04 12:29:23 +02:00
## Compiles all statements
case node . kind :
of exprStmt :
2022-08-17 17:31:15 +02:00
let expression = ExprStmt ( node ) . expression
2022-10-11 09:56:55 +02:00
let kind = self . infer ( expression )
2022-04-21 15:25:29 +02:00
self . expression ( expression )
2022-08-17 17:31:15 +02:00
if kind . isNil ( ) :
# The expression has no type and produces no value,
# so we don't have to pop anything
2022-06-02 01:33:56 +02:00
discard
2022-07-16 13:04:00 +02:00
elif self . replMode :
2022-10-13 16:52:37 +02:00
self . printRepl ( kind , expression )
2022-05-30 22:06:15 +02:00
else :
2022-08-30 12:55:14 +02:00
self . emitByte ( Pop , node . token . line )
2022-04-04 12:29:23 +02:00
of NodeKind . ifStmt :
self . ifStmt ( IfStmt ( node ) )
of NodeKind . assertStmt :
self . assertStmt ( AssertStmt ( node ) )
of NodeKind . raiseStmt :
self . raiseStmt ( RaiseStmt ( node ) )
of NodeKind . breakStmt :
self . breakStmt ( BreakStmt ( node ) )
of NodeKind . continueStmt :
self . continueStmt ( ContinueStmt ( node ) )
of NodeKind . returnStmt :
self . returnStmt ( ReturnStmt ( node ) )
of NodeKind . importStmt :
2022-06-07 11:23:08 +02:00
self . importStmt ( ImportStmt ( node ) )
2022-10-17 11:28:00 +02:00
of NodeKind . exportStmt :
self . exportStmt ( ExportStmt ( node ) )
2022-07-09 13:36:21 +02:00
of NodeKind . whileStmt :
# Note: Our parser already desugars
2022-07-16 13:04:00 +02:00
# for loops to while loops
2022-04-04 12:29:23 +02:00
let loop = self . currentLoop
self . currentLoop = Loop ( start : self . chunk . code . len ( ) ,
2022-10-13 13:12:24 +02:00
depth : self . scopeDepth , breakJumps : @ [ ] )
2022-04-04 12:29:23 +02:00
self . whileStmt ( WhileStmt ( node ) )
self . patchBreaks ( )
self . currentLoop = loop
of NodeKind . forEachStmt :
2022-06-07 11:23:08 +02:00
self . forEachStmt ( ForEachStmt ( node ) )
2022-04-04 12:29:23 +02:00
of NodeKind . blockStmt :
self . blockStmt ( BlockStmt ( node ) )
of NodeKind . yieldStmt :
self . yieldStmt ( YieldStmt ( node ) )
of NodeKind . awaitStmt :
self . awaitStmt ( AwaitStmt ( node ) )
of NodeKind . deferStmt :
self . deferStmt ( DeferStmt ( node ) )
of NodeKind . tryStmt :
discard
else :
2022-05-04 14:27:15 +02:00
self . expression ( Expression ( node ) )
2022-04-04 12:29:23 +02:00
2022-10-13 13:12:24 +02:00
proc varDecl ( self : Compiler , node : VarDecl , name : Name ) =
2022-04-12 12:18:25 +02:00
## Compiles variable declarations
2022-11-02 12:03:14 +01:00
# Our parser guarantees that the variable declaration
# will have a type declaration or a value (or both)
var typ : Type
if node . value . isNil ( ) :
# Variable has no value: the type declaration
# takes over
typ = self . inferOrError ( node . valueType )
elif node . valueType . isNil :
# Variable has no type declaration: the type
# of its value takes over
typ = self . inferOrError ( node . value )
else :
# Variable has both a type declaration and
# a value: the value's type must match the
# type declaration
let expected = self . inferOrError ( node . valueType )
self . check ( node . value , expected )
# If this doesn't fail, then we're good
typ = expected
name . valueType = typ
2022-04-12 12:18:25 +02:00
self . expression ( node . value )
2022-08-30 12:55:14 +02:00
self . emitByte ( StoreVar , node . token . line )
2022-10-13 18:34:11 +02:00
self . emitBytes ( self . getStackPos ( name ) . toTriple ( ) , node . token . line )
2022-04-12 12:18:25 +02:00
2022-10-13 13:12:24 +02:00
proc typeDecl ( self : Compiler , node : TypeDecl , name : Name ) =
2022-06-07 11:23:08 +02:00
## Compiles type declarations
2022-06-13 17:28:05 +02:00
# TODO
2022-06-07 11:23:08 +02:00
2022-10-13 13:12:24 +02:00
proc funDecl ( self : Compiler , node : FunDecl , name : Name ) =
2022-04-04 12:29:23 +02:00
## Compiles function declarations
2022-10-13 13:12:24 +02:00
if node . token . kind = = Operator and node . name . token . lexeme in [ " . " , ] :
self . error ( & " Due to current compiler limitations, the ' {node.name.token.lexeme} ' operator cannot be overridden " , node . name )
2022-06-21 20:18:53 +02:00
var node = node
2022-10-13 13:12:24 +02:00
var jmp : int
2022-08-30 12:55:14 +02:00
# We store the current function
var function = self . currentFunction
if not self . currentFunction . isNil ( ) :
2022-10-13 13:12:24 +02:00
self . currentFunction . valueType . children . add ( name . valueType )
name . valueType . parent = function . valueType
self . currentFunction = name
if not node . body . isNil ( ) :
2022-10-08 09:18:35 +02:00
# A function's code is just compiled linearly
# and then jumped over
2022-08-30 12:55:14 +02:00
jmp = self . emitJump ( JumpForwards , node . token . line )
2022-10-13 13:12:24 +02:00
name . codePos = self . chunk . code . len ( )
2022-08-30 12:55:14 +02:00
# We let our debugger know this function's boundaries
self . chunk . cfi . add ( self . chunk . code . high ( ) . toTriple ( ) )
2022-10-24 13:53:27 +02:00
self . cfiOffsets . add ( ( start : self . chunk . code . high ( ) , stop : 0 , pos : self . chunk . cfi . len ( ) - 3 , fn : name ) )
var cfiOffset = self . cfiOffsets [ ^ 1 ]
2022-08-30 12:55:14 +02:00
let idx = self . chunk . cfi . len ( )
self . chunk . cfi . add ( 0 . toTriple ( ) ) # Patched it later
self . chunk . cfi . add ( uint8 ( node . arguments . len ( ) ) )
if not node . name . isNil ( ) :
2022-10-17 11:28:00 +02:00
self . chunk . cfi . add ( name . ident . token . lexeme . len ( ) . toDouble ( ) )
var s = name . ident . token . lexeme
2022-08-30 12:55:14 +02:00
if s . len ( ) > = uint16 . high ( ) . int :
s = node . name . token . lexeme [ 0 .. uint16 . high ( ) ]
self . chunk . cfi . add ( s . toBytes ( ) )
2022-06-21 20:18:53 +02:00
else :
2022-08-30 12:55:14 +02:00
self . chunk . cfi . add ( 0 . toDouble ( ) )
if BlockStmt ( node . body ) . code . len ( ) = = 0 :
self . error ( " cannot declare function with empty body " )
2022-06-21 20:18:53 +02:00
# Since the deferred array is a linear
# sequence of instructions and we want
# to keep track to whose function's each
# set of deferred instruction belongs,
# we record the length of the deferred
# array before compiling the function
# and use this info later to compile
# the try/finally block with the deferred
# code
var deferStart = self . deferred . len ( )
self . beginScope ( )
for decl in BlockStmt ( node . body ) . code :
self . declaration ( decl )
2022-08-19 10:45:07 +02:00
let typ = self . currentFunction . valueType . returnType
2022-06-21 20:18:53 +02:00
var hasVal : bool = false
2022-08-19 10:45:07 +02:00
case self . currentFunction . valueType . fun . kind :
2022-06-21 20:18:53 +02:00
of NodeKind . funDecl :
2022-08-19 10:45:07 +02:00
hasVal = self . currentFunction . valueType . fun . hasExplicitReturn
2022-06-21 20:18:53 +02:00
of NodeKind . lambdaExpr :
2022-10-24 13:53:27 +02:00
hasVal = LambdaExpr ( self . currentFunction . node ) . hasExplicitReturn
2022-06-14 18:10:13 +02:00
else :
2022-06-21 20:18:53 +02:00
discard # Unreachable
2022-07-09 12:47:53 +02:00
if not hasVal and not typ . isNil ( ) :
# There is no explicit return statement anywhere in the function's
2022-10-13 13:12:24 +02:00
# body: while this is not a tremendously useful piece of information
# (since the presence of at least one doesn't mean all control flow
# cases are covered), it definitely is an error worth reporting
2022-08-30 12:55:14 +02:00
self . error ( " function has an explicit return type, but no return statement was found " , node )
2022-06-21 20:18:53 +02:00
hasVal = hasVal and not typ . isNil ( )
2022-10-13 13:12:24 +02:00
for jump in self . currentFunction . valueType . retJumps :
self . patchJump ( jump )
2022-07-09 12:47:53 +02:00
self . endScope ( )
2022-06-21 20:18:53 +02:00
# Terminates the function's context
2022-08-30 12:55:14 +02:00
self . emitByte ( OpCode . Return , self . peek ( ) . token . line )
2022-06-21 20:18:53 +02:00
if hasVal :
2022-08-30 12:55:14 +02:00
self . emitByte ( 1 , self . peek ( ) . token . line )
2022-06-21 20:18:53 +02:00
else :
2022-08-30 12:55:14 +02:00
self . emitByte ( 0 , self . peek ( ) . token . line )
let stop = self . chunk . code . len ( ) . toTriple ( )
self . chunk . cfi [ idx ] = stop [ 0 ]
self . chunk . cfi [ idx + 1 ] = stop [ 1 ]
self . chunk . cfi [ idx + 2 ] = stop [ 2 ]
2022-10-24 13:53:27 +02:00
cfiOffset . stop = self . chunk . code . len ( )
2022-08-01 10:36:06 +02:00
# Currently defer is not functional, so we
2022-06-21 20:18:53 +02:00
# just pop the instructions
for _ in deferStart .. self . deferred . high ( ) :
discard self . deferred . pop ( )
# Well, we've compiled everything: time to patch
# the jump offset
self . patchJump ( jmp )
2022-08-30 12:55:14 +02:00
else :
discard # TODO: Forward declarations
# Restores the enclosing function (if any).
# Makes nested calls work (including recursion)
self . currentFunction = function
2022-06-02 01:33:56 +02:00
2022-05-22 17:23:52 +02:00
2022-05-04 14:27:15 +02:00
proc declaration ( self : Compiler , node : Declaration ) =
2022-10-13 13:12:24 +02:00
## Handles all declarations. They are not compiled
## right away, but rather only when they're referenced
## the first time
2022-04-04 12:29:23 +02:00
case node . kind :
2022-10-13 13:12:24 +02:00
of NodeKind . varDecl , NodeKind . funDecl , NodeKind . typeDecl :
self . dispatchPragmas ( node , self . declareName ( node ) )
2022-04-04 12:29:23 +02:00
else :
2022-05-04 14:27:15 +02:00
self . statement ( Statement ( node ) )
2022-04-04 12:29:23 +02:00
2022-08-16 12:20:07 +02:00
proc compile * ( self : Compiler , ast : seq [ Declaration ] , file : string , lines : seq [ tuple [ start , stop : int ] ] , source : string , chunk : Chunk = nil ,
2022-10-17 11:28:00 +02:00
incremental : bool = false , isMainModule : bool = true ) : Chunk =
2022-08-15 11:46:24 +02:00
## Compiles a sequence of AST nodes into a chunk
## object
if chunk . isNil ( ) :
self . chunk = newChunk ( )
else :
self . chunk = chunk
self . ast = ast
self . file = file
self . scopeDepth = 0
self . currentFunction = nil
2022-10-17 11:28:00 +02:00
self . currentModule = self . file . extractFilename ( ) . replace ( " .pn " , " " )
2022-08-15 11:46:24 +02:00
self . current = 0
self . lines = lines
self . source = source
2022-10-17 11:28:00 +02:00
self . isMainModule = isMainModule
if not incremental :
self . jumps = @ [ ]
let pos = self . beginProgram ( )
2022-04-04 12:29:23 +02:00
while not self . done ( ) :
2022-05-04 14:27:15 +02:00
self . declaration ( Declaration ( self . step ( ) ) )
2022-10-17 11:28:00 +02:00
self . terminateProgram ( pos )
# TODO: REPL is broken, we need a new way to make
# incremental compilation resume from where it stopped!
2022-04-04 12:29:23 +02:00
result = self . chunk
2022-08-15 11:46:24 +02:00
2022-10-17 11:28:00 +02:00
proc compileModule ( self : Compiler , moduleName : string ) =
## Compiles an imported module into an existing chunk
## using the compiler's internal parser and lexer objects
var path = " "
for i , searchPath in lookupPaths :
path = joinPath ( getCurrentDir ( ) , joinPath ( searchPath , moduleName ) )
if fileExists ( path ) :
break
elif i = = searchPath . high ( ) :
self . error ( & """ could not import ' {path} ' : module not found """ )
2022-08-16 13:37:09 +02:00
if self . modules . contains ( path ) :
2022-08-16 13:11:09 +02:00
return
2022-08-30 12:55:14 +02:00
let source = readFile ( path )
2022-10-17 11:28:00 +02:00
let current = self . current
let ast = self . ast
let file = self . file
let module = self . currentModule
let lines = self . lines
let src = self . source
self . isMainModule = false
discard self . compile ( self . parser . parse ( self . lexer . lex ( source , path ) ,
path , self . lexer . getLines ( ) ,
source , persist = true ) ,
path , self . lexer . getLines ( ) , source , chunk = self . chunk , incremental = true ,
isMainModule = false )
self . scopeDepth = 0
self . current = current
self . ast = ast
self . file = file
self . currentModule = module
self . lines = lines
self . source = src
2022-08-16 13:37:09 +02:00
self . modules . incl ( path )