2022-12-02 15:45:34 +01:00
# a new recursive descent parser for nds
# parser: converts a stream of tokens into an AST
import .. / scanner
import .. / chunk
import node
import .. / config
2022-12-02 19:14:41 +01:00
import .. / types / value
2022-12-02 15:45:34 +01:00
import strformat
2022-12-02 19:14:41 +01:00
import strutils
import bitops
2022-12-02 15:45:34 +01:00
import sequtils
import sugar
2022-12-02 20:47:31 +01:00
import options
2022-12-02 15:45:34 +01:00
# TYPEDEF
type
Parser = ref object
# scanning
scanner : Scanner
source : string
current : Token
2022-12-02 20:47:31 +01:00
previous : Option [ Token ]
next : Option [ Token ]
# if there is a next set, advance won't trigger the scanner
# it will use next instead
hold : Node # temporary hold, used to implement ampersand op
2022-12-02 15:45:34 +01:00
# errors
hadError * : bool
panicMode : bool
proc newParser * ( name : string , source : string ) : Parser =
result = new ( Parser )
result . source = source
result . hadError = false
result . panicMode = false
2022-12-02 20:47:31 +01:00
result . next = none [ Token ] ( )
result . previous = none [ Token ] ( )
2022-12-02 15:45:34 +01:00
# UTILS
# error handling
proc errorAt ( parser : Parser , line : int , msg : string , at : string = " " ) =
if parser . panicMode :
return # don't display errors if already in panic mode
write stderr , & " [line {line}] Error "
if at . len > 0 :
write stderr , & " at {at} "
write stderr , msg
write stderr , " \n "
parser . hadError = true
parser . panicMode = true
proc error ( parser : Parser , msg : string ) =
2022-12-02 20:47:31 +01:00
parser . errorAt ( parser . previous . get ( ) . line , msg )
2022-12-02 15:45:34 +01:00
proc errorAtCurrent ( parser : Parser , msg : string ) =
parser . errorAt ( parser . current . line , msg )
# scanning for tokens
proc advance ( parser : Parser ) =
2022-12-02 20:47:31 +01:00
parser . previous = some ( parser . current )
2022-12-02 15:45:34 +01:00
while true :
2022-12-02 20:47:31 +01:00
if parser . next . isSome ( ) :
parser . current = parser . next . get ( )
parser . next = none [ Token ] ( )
else :
parser . current = parser . scanner . scanToken ( )
2022-12-02 15:45:34 +01:00
when debugScanner :
parser . current . debugPrint ( )
if ( parser . current . tokenType ! = tkError ) :
break
parser . errorAtCurrent ( parser . current . text )
2022-12-02 20:47:31 +01:00
proc backtrack ( parser : Parser ) =
parser . next = some ( parser . current )
parser . current = parser . previous . get ( )
# danger - danger.previous is undefined here
# luckily thanks to options if that ever happens
# we get a crash
parser . previous = none [ Token ] ( )
2022-12-02 15:45:34 +01:00
proc match ( parser : Parser , tokenType : TokenType ) : bool =
if parser . current . tokenType = = tokenType :
parser . advance ( )
true
else :
false
2022-12-02 19:14:41 +01:00
proc match ( parser : Parser , tokenTypes : set [ TokenType ] ) : bool =
if parser . current . tokenType in tokenTypes :
parser . advance ( )
true
else :
false
proc consume ( parser : Parser , tokenType : TokenType | set [ TokenType ] , msg : string ) =
2022-12-02 15:45:34 +01:00
if not parser . match ( tokenType ) :
parser . errorAtCurrent ( msg )
2022-12-02 21:08:52 +01:00
parser . advance ( ) # stop infinite loops
2022-12-02 15:45:34 +01:00
2022-12-02 19:14:41 +01:00
proc peek ( parser : Parser ) : Token =
parser . current
proc peekMatch ( parser : Parser , tokenType : TokenType ) : bool =
parser . peek ( ) . tokenType = = tokenType
2022-12-02 15:45:34 +01:00
proc synchronize ( parser : Parser ) =
parser . panicMode = false
while parser . current . tokenType ! = tkEof :
2022-12-02 20:47:31 +01:00
if parser . previous . get ( ) . tokenType in { tkSemicolon , tkRightBrace } :
2022-12-02 15:45:34 +01:00
return
2022-12-02 21:08:52 +01:00
if parser . current . tokenType in { tkProc , tkVar , tkFor , tkIf , tkWhile } :
2022-12-02 15:45:34 +01:00
return
parser . advance ( )
2022-12-02 19:14:41 +01:00
proc isAtEnd ( parser : Parser ) : bool =
parser . current . tokenType = = tkEof
2022-12-02 15:45:34 +01:00
# EXPRESSIONS
2022-12-02 19:14:41 +01:00
proc expression ( parser : Parser ) : Node
# expressions, but not assignments
proc exprNonAssign ( parser : Parser ) : Node
proc parseList ( parser : Parser ) : Node =
result = Node ( kind : nkList , elems : @ [ ] )
while not parser . isAtEnd ( ) and not parser . peekMatch ( tkRightBracket ) :
result . elems . add ( parser . expression ( ) )
if parser . peek ( ) . tokenType ! = tkRightBracket :
parser . consume ( tkComma , " ' , ' expected after list elements. " )
parser . consume ( tkRightBracket , " ' ] ' expected after list declaration. " )
proc parseTable ( parser : Parser ) : Node =
result = Node ( kind : nkTable , keys : @ [ ] , values : @ [ ] )
while not parser . isAtEnd ( ) and not parser . peekMatch ( tkRightBrace ) :
# [key] = syntax
if parser . match ( tkLeftBracket ) :
result . keys . add ( parser . expression ( ) )
parser . consume ( tkRightBracket , " ' ] ' expected after table key. " )
# key = syntax
elif parser . match ( tkIdentifier ) :
2022-12-02 20:47:31 +01:00
result . keys . add ( Node ( kind : nkConst , constant : parser . previous . get ( ) . text . fromNimString ( ) ) )
2022-12-02 19:14:41 +01:00
else :
parser . errorAtCurrent ( " Key expected (have you forgotten to put the key in brackets?). " )
parser . consume ( tkEqual , " ' = ' expected after key. " )
result . values . add ( parser . exprNonAssign ( ) )
if parser . peek ( ) . tokenType ! = tkRightBrace :
parser . consume ( tkComma , " ' , ' expected after table key value pair. " )
parser . consume ( tkRightBrace , " ' } ' expected after table declaration. " )
2022-12-02 21:08:52 +01:00
proc parseProcDeclaration ( parser : Parser ) : Node =
# returns a nkProc, assumes that the left paren
# has been consumed, so is followed by optionally
# a param list, a ) and then an expression
var params : seq [ string ] = @ [ ]
while not parser . isAtEnd ( ) and not parser . peekMatch ( tkRightParen ) :
parser . consume ( tkIdentifier , " Parameter name expected. " )
params . add ( parser . previous . get ( ) . text )
if not parser . isAtEnd ( ) and not parser . peekMatch ( tkRightParen ) :
parser . consume ( tkComma , " ' , ' expected between parameters. " )
parser . consume ( tkRightParen , " ' ) ' expected after parameter list. " )
let body = parser . expression ( )
result = Node ( kind : nkProc , parameters : params , procBody : body )
2022-12-02 19:14:41 +01:00
proc primary ( parser : Parser ) : Node =
if parser . match ( tkFalse ) :
return Node ( kind : nkConst , constant : ndFalse )
if parser . match ( tkTrue ) :
return Node ( kind : nkConst , constant : ndTrue )
if parser . match ( tkNil ) :
return Node ( kind : nkConst , constant : ndNil )
if parser . match ( tkNumber ) :
2022-12-02 20:47:31 +01:00
return Node ( kind : nkConst , constant : fromFloat ( parseFloat ( parser . previous . get ( ) . text ) ) )
2022-12-02 19:14:41 +01:00
if parser . match ( tkString ) :
2022-12-02 20:47:31 +01:00
return Node ( kind : nkConst , constant : fromNimString ( parser . previous . get ( ) . text [ 1 .. ^ 2 ] ) )
2022-12-02 19:14:41 +01:00
if parser . match ( tkLeftParen ) :
let grouped = parser . expression ( )
parser . consume ( tkRightParen , " Expect ' ) ' after expression. " )
return Node ( kind : nkExpr , expression : grouped )
if parser . match ( tkStartList ) :
return parser . parseList ( )
if parser . match ( tkStartTable ) :
return parser . parseTable ( )
if parser . match ( tkIdentifier ) :
2022-12-02 20:47:31 +01:00
return Node ( kind : nkVarGet , gVarName : parser . previous . get ( ) . text )
if parser . match ( tkAmpersand ) :
result = parser . hold
parser . hold = nil
return result
2022-12-02 21:08:52 +01:00
if parser . match ( tkProc ) :
parser . consume ( tkLeftParen , " ' ( ' expected after ' proc ' . " )
return parser . parseProcDeclaration ( )
2022-12-02 19:14:41 +01:00
parser . errorAtCurrent ( " Primary expected, but something else found. " )
2022-12-02 19:27:02 +01:00
proc parseArgList ( parser : Parser ) : seq [ Node ] =
# once ( has been consumed, consume args and the ) or just a )
var args : seq [ Node ] = @ [ ]
while not parser . isAtEnd ( ) and not parser . peekMatch ( tkRightParen ) :
let arg = parser . expression ( )
if not parser . isAtEnd ( ) and not parser . peekMatch ( tkRightParen ) :
parser . consume ( tkComma , " ' , ' expected between arguments. " )
args . add ( arg )
parser . consume ( tkRightParen , " ' ) ' expected after argument list. " )
return args
proc parseIndex ( parser : Parser ) : Node =
2022-12-02 19:14:41 +01:00
result = parser . primary ( )
2022-12-02 19:27:02 +01:00
while parser . match ( { tkLeftBracket , tkDot , tkIdentifier } ) :
# NOTE: :index is counted as a single identifier, so two identifiers after eachother will be handled here
2022-12-02 20:47:31 +01:00
if parser . previous . get ( ) . tokenType = = tkLeftBracket :
2022-12-02 19:14:41 +01:00
let index = parser . expression ( )
parser . consume ( tkRightBracket , " ' ] ' after index. " )
result = Node ( kind : nkGetIndex , gCollection : result , gIndex : index )
2022-12-02 20:47:31 +01:00
elif parser . previous . get ( ) . tokenType = = tkIdentifier :
let identText = parser . previous . get ( ) . text
2022-12-02 19:27:02 +01:00
if identText [ 0 ] ! = ' : ' :
parser . errorAtCurrent ( " ' ; ' expected after expression statement. " )
# update this with whatever the original error when two idents follow eachother is
return
let ident = Node ( kind : nkConst , constant : identText [ 1 .. ^ 1 ] . fromNimString ( ) )
# ident removes the : from it
var args : seq [ Node ] = @ [ ]
if parser . match ( tkLeftParen ) :
args = parser . parseArgList ( )
let funct = Node ( kind : nkGetIndex , gCollection : result , gIndex : ident )
result = Node ( kind : nkColonCall , arguments : args , function : funct )
2022-12-02 19:14:41 +01:00
else :
# dot
parser . consume ( tkIdentifier , " Identifier expected after ' . ' index operator. " )
2022-12-02 20:47:31 +01:00
result = Node ( kind : nkGetIndex , gCollection : result , gIndex : Node ( kind : nkConst , constant : parser . previous . get ( ) . text . fromNimString ( ) ) )
2022-12-02 19:14:41 +01:00
proc parseCall ( parser : Parser ) : Node =
result = parser . parseIndex ( )
if parser . match ( tkLeftParen ) :
2022-12-02 19:27:02 +01:00
let args = parser . parseArgList ( )
2022-12-02 19:14:41 +01:00
result = Node ( kind : nkCall , arguments : args , function : result )
proc parseIf ( parser : Parser ) : Node =
parser . consume ( tkLeftParen , " ' ( ' expected after ' if ' . " )
let cond = parser . expression ( )
parser . consume ( tkRightParen , " ' ) ' expected after condition. " )
let body = parser . expression ( )
result = Node ( kind : nkIf , ifCondition : cond , ifBody : body )
if parser . match ( tkElse ) :
result . elseBody = parser . expression ( )
proc parseWhile ( parser : Parser ) : Node =
parser . consume ( tkLeftParen , " ' ( ' expected after ' while ' . " )
let cond = parser . expression ( )
parser . consume ( tkRightParen , " ' ) ' expected after condition. " )
let body = parser . expression ( )
result = Node ( kind : nkWhile , whileCondition : cond , whileBody : body )
proc unary ( parser : Parser ) : Node =
# unary level for unary operators, plus some control flow is here too
const unaryOps = { tkBang , tkMinus , tkIf , tkWhile , tkHashtag }
if parser . match ( unaryOps ) :
2022-12-02 20:47:31 +01:00
let op = parser . previous . get ( )
2022-12-02 19:14:41 +01:00
case op . tokenType :
of tkBang :
let right = parser . unary ( )
return Node ( kind : nkNot , argument : right )
of tkMinus :
let right = parser . unary ( )
return Node ( kind : nkNegate , argument : right )
of tkHashtag :
let right = parser . unary ( )
return Node ( kind : nkLen , argument : right )
of tkIf :
return parser . parseIf ( )
of tkWhile :
return parser . parseWhile ( )
else :
parser . errorAtCurrent ( " Invalid parser state: unaryOps and case statement out of line. " )
return parser . parseCall ( )
proc factor ( parser : Parser ) : Node =
result = parser . unary ( )
while parser . match ( { tkSlash , tkStar } ) :
2022-12-02 20:47:31 +01:00
let op = parser . previous . get ( )
2022-12-02 19:14:41 +01:00
let right = parser . unary ( )
if op . tokenType = = tkSlash :
result = Node ( kind : nkDiv , left : result , right : right )
else :
result = Node ( kind : nkMult , left : result , right : right )
proc term ( parser : Parser ) : Node =
result = parser . factor ( )
while parser . match ( { tkMinus , tkPlus } ) :
2022-12-02 20:47:31 +01:00
let op = parser . previous . get ( )
2022-12-02 19:14:41 +01:00
let right = parser . factor ( )
if op . tokenType = = tkMinus :
result = Node ( kind : nkMinus , left : result , right : right )
else :
result = Node ( kind : nkPlus , left : result , right : right )
proc comparison ( parser : Parser ) : Node =
result = parser . term ( )
while parser . match ( { tkGreater , tkGreaterEqual , tkLess , tkLessEqual } ) :
2022-12-02 20:47:31 +01:00
let op = parser . previous . get ( )
2022-12-02 19:14:41 +01:00
let right = parser . term ( )
case op . tokenType :
of tkGreater :
result = Node ( kind : nkGreater , left : result , right : right )
of tkGreaterEqual :
result = Node ( kind : nkGe , left : result , right : right )
of tkLess :
result = Node ( kind : nkLess , left : result , right : right )
of tkLessEqual :
result = Node ( kind : nkLe , left : result , right : right )
else :
parser . errorAtCurrent ( " invalid state in comparison: case and set don ' t match up. " )
proc equality ( parser : Parser ) : Node =
result = parser . comparison ( )
while parser . match ( { tkBangEqual , tkEqualEqual } ) :
2022-12-02 20:47:31 +01:00
let op = parser . previous . get ( )
2022-12-02 19:14:41 +01:00
let right = parser . comparison ( )
if op . tokenType = = tkBangEqual :
result = Node ( kind : nkNeq , left : result , right : right )
else :
result = Node ( kind : nkEq , left : result , right : right )
proc parseAnd ( parser : Parser ) : Node =
result = parser . equality ( )
while parser . match ( tkAnd ) :
let right = parser . equality ( )
result = Node ( kind : nkAnd , left : result , right : right )
proc parseOr ( parser : Parser ) : Node =
result = parser . parseAnd ( )
while parser . match ( tkOr ) :
let right = parser . parseAnd ( )
result = Node ( kind : nkOr , left : result , right : right )
proc parsePipeCall ( parser : Parser ) : Node =
result = parser . parseOr ( )
while parser . match ( tkDoublecolon ) :
let right = parser . parseOr ( )
# to the right, if topmost level is a call, it will insert it
# if the topmost is not a call, e.g. 5 :: funcs.double it will assume it's a function with one arg - the one before the pipe
# if the thing to the right has lower precedence stuff than a call, please note that it will not insert into the call, it will assume that the return val is a function
# to have such lower precedence ops, use parens: 5 :: (long expression)(arg1, arg2)
# case 1: right is already a call or coloncall
if right . kind in { nkCall , nkColonCall } :
right . arguments . insert ( result , 0 )
result = right
# else: right val is a function which we call
else :
result = Node ( kind : nkCall , arguments : @ [ result ] , function : right )
proc exprNonAssign ( parser : Parser ) : Node =
parser . parsePipeCall ( )
proc parseAssign ( parser : Parser ) : Node =
result = parser . exprNonAssign ( )
if parser . match ( tkEqual ) :
# check if result is assignable
const assignable = { nkVarGet , nkGetIndex }
2022-12-02 20:47:31 +01:00
let right = parser . parseAssign ( )
2022-12-02 19:14:41 +01:00
if result . kind notin assignable :
parser . errorAtCurrent ( " Attempt to assign to invalid target. " )
return
if result . kind = = nkVarGet :
result = Node ( kind : nkVarSet , sVarName : result . gVarName , newVal : right )
else :
# nkGetIndex
result = Node ( kind : nkSetIndex , sCollection : result . gCollection , sIndex : result . gIndex , sValue : right )
proc parseAmpersand ( parser : Parser ) : Node =
result = parser . parseAssign ( )
2022-12-02 20:47:31 +01:00
if parser . match ( tkAmpersand ) :
parser . hold = Node ( kind : nkExpr , expression : result )
parser . backtrack ( )
return parser . parseAmpersand ( )
2022-12-02 19:14:41 +01:00
proc expression ( parser : Parser ) : Node =
parser . parseAmpersand ( )
2022-12-02 15:45:34 +01:00
# STATEMENTS
proc statement ( parser : Parser ) : Node
2022-12-02 21:08:52 +01:00
proc exprStatement ( parser : Parser ) : Node =
2022-12-02 19:14:41 +01:00
let expression = parser . expression ( )
if expression ! = nil :
result = Node ( kind : nkExprStmt , expression : expression )
else :
parser . errorAtCurrent ( " Expression expected. " )
2022-12-02 21:08:52 +01:00
parser . consume ( tkSemicolon , " ' ; ' expected after expression statement. " )
proc statement ( parser : Parser ) : Node =
if parser . match ( tkProc ) :
# it is possibly a proc declaration, but
# it could also be a proc expression
if parser . peekMatch ( tkLeftParen ) :
# proc expression - backtrack and let it go to expression statement
parser . backtrack ( )
result = parser . exprStatement ( )
else :
# proc definition - var declaration sort of code
parser . consume ( tkIdentifier , " Procedure name expected after ' proc ' . " )
let varname = parser . previous . get ( ) . text
parser . consume ( tkLeftParen , " ' ( ' expected after procedure name. " )
let funct = parser . parseProcDeclaration ( )
result = Node ( kind : nkVarDecl , name : varname , value : funct )
parser . consume ( tkSemicolon , " ' ; ' expected after procedure declaration. " )
else :
result = parser . exprStatement ( )
2022-12-02 15:45:34 +01:00
if parser . panicMode :
parser . synchronize ( )
proc parse * ( parser : Parser ) : Node =
parser . scanner = newScanner ( parser . source )
2022-12-02 19:14:41 +01:00
result = Node ( kind : nkBlockExpr , children : @ [ ] )
2022-12-02 15:45:34 +01:00
parser . advance ( )
2022-12-02 19:14:41 +01:00
while not parser . isAtEnd ( ) :
let statement = parser . statement ( )
result . children . add ( statement )
2022-12-02 15:45:34 +01:00