482 lines
16 KiB
Nim
482 lines
16 KiB
Nim
# a new recursive descent parser for nds
|
|
# parser: converts a stream of tokens into an AST
|
|
|
|
import ../scanner
|
|
import node
|
|
import ../config
|
|
import ../types/value
|
|
|
|
import strformat
|
|
import strutils
|
|
import bitops
|
|
import options
|
|
|
|
# TYPEDEF
|
|
|
|
type
|
|
Parser = ref object
|
|
# scanning
|
|
scanner: Scanner
|
|
source: string
|
|
current: Token
|
|
previous: Option[Token]
|
|
next: Option[Token]
|
|
# if there is a next set, advance won't trigger the scanner
|
|
# it will use next instead
|
|
hold: Node # temporary hold, used to implement ampersand op
|
|
|
|
# errors
|
|
hadError*: bool
|
|
panicMode: bool
|
|
|
|
proc newParser*(name: string, source: string): Parser =
|
|
result = new(Parser)
|
|
result.source = source
|
|
result.hadError = false
|
|
result.panicMode = false
|
|
result.next = none[Token]()
|
|
result.previous = none[Token]()
|
|
|
|
# UTILS
|
|
|
|
# error handling
|
|
|
|
proc errorAt(parser: Parser, line: int, msg: string, at: string = "") =
|
|
if parser.panicMode:
|
|
return # don't display errors if already in panic mode
|
|
write stderr, &"[line {line}] Error "
|
|
if at.len > 0:
|
|
write stderr, &"at {at} "
|
|
write stderr, msg
|
|
write stderr, "\n"
|
|
parser.hadError = true
|
|
parser.panicMode = true
|
|
|
|
proc errorAtCurrent(parser: Parser, msg: string) =
|
|
parser.errorAt(parser.current.line, msg)
|
|
|
|
# scanning for tokens
|
|
|
|
proc advance(parser: Parser) =
|
|
parser.previous = some(parser.current)
|
|
while true:
|
|
if parser.next.isSome():
|
|
parser.current = parser.next.get()
|
|
parser.next = none[Token]()
|
|
else:
|
|
parser.current = parser.scanner.scanToken()
|
|
when debugScanner:
|
|
parser.current.debugPrint()
|
|
if (parser.current.tokenType != tkError):
|
|
break
|
|
parser.errorAtCurrent(parser.current.text)
|
|
|
|
proc backtrack(parser: Parser) =
|
|
parser.next = some(parser.current)
|
|
parser.current = parser.previous.get()
|
|
# danger - danger.previous is undefined here
|
|
# luckily thanks to options if that ever happens
|
|
# we get a crash
|
|
parser.previous = none[Token]()
|
|
|
|
proc match(parser: Parser, tokenType: TokenType): bool =
|
|
if parser.current.tokenType == tokenType:
|
|
parser.advance()
|
|
true
|
|
else:
|
|
false
|
|
|
|
proc match(parser: Parser, tokenTypes: set[TokenType]): bool =
|
|
if parser.current.tokenType in tokenTypes:
|
|
parser.advance()
|
|
true
|
|
else:
|
|
false
|
|
|
|
proc consume(parser: Parser, tokenType: TokenType | set[TokenType], msg: string): bool =
|
|
# return val is useful for avoiding infinite loops
|
|
# if there was an error, might as well exit loops that try to consume more
|
|
if parser.match(tokenType):
|
|
true
|
|
else:
|
|
parser.errorAtCurrent(msg)
|
|
false
|
|
|
|
|
|
proc peek(parser: Parser): Token =
|
|
parser.current
|
|
|
|
proc peekMatch(parser: Parser, tokenType: TokenType): bool =
|
|
parser.peek().tokenType == tokenType
|
|
|
|
proc synchronize(parser: Parser) =
|
|
parser.panicMode = false
|
|
while parser.current.tokenType != tkEof:
|
|
if parser.previous.get().tokenType in {tkSemicolon}:
|
|
return
|
|
if parser.current.tokenType in {tkProc, tkVar, tkFor, tkIf, tkWhile, tkRightBrace}:
|
|
return
|
|
parser.advance()
|
|
|
|
proc isAtEnd(parser: Parser): bool =
|
|
parser.current.tokenType == tkEof
|
|
# EXPRESSIONS
|
|
|
|
proc expression(parser: Parser): Node
|
|
proc statement(parser: Parser, inBlock: bool = false): Node
|
|
# expressions, but not assignments
|
|
proc exprNonAssign(parser: Parser): Node
|
|
|
|
proc parseList(parser: Parser): Node =
|
|
result = Node(kind: nkList, elems: @[])
|
|
|
|
while not parser.isAtEnd() and not parser.peekMatch(tkRightBracket):
|
|
result.elems.add(parser.expression())
|
|
if parser.peek().tokenType != tkRightBracket and not parser.consume(tkComma, "',' expected after list elements."):
|
|
break
|
|
|
|
discard parser.consume(tkRightBracket, "']' expected after list declaration.")
|
|
|
|
proc parseTable(parser: Parser): Node =
|
|
result = Node(kind: nkTable, keys: @[], values: @[])
|
|
|
|
while not parser.isAtEnd() and not parser.peekMatch(tkRightBrace):
|
|
# [key] = syntax
|
|
if parser.match(tkLeftBracket):
|
|
result.keys.add(parser.expression())
|
|
discard parser.consume(tkRightBracket, "']' expected after table key.")
|
|
# key = syntax
|
|
elif parser.match(tkIdentifier):
|
|
result.keys.add(Node(kind: nkConst, constant: parser.previous.get().text.fromNimString()))
|
|
else:
|
|
parser.errorAtCurrent("Key expected (have you forgotten to put the key in brackets?).")
|
|
discard parser.consume(tkEqual, "'=' expected after key.")
|
|
|
|
result.values.add(parser.exprNonAssign())
|
|
if parser.peek().tokenType != tkRightBrace and not parser.consume(tkComma, "',' expected after table key value pair."):
|
|
break
|
|
|
|
discard parser.consume(tkRightBrace, "'}' expected after table declaration.")
|
|
|
|
proc parseProcDeclaration(parser: Parser): Node =
|
|
# returns a nkProc, assumes that the left paren
|
|
# has been consumed, so is followed by optionally
|
|
# a param list, a ) and then an expression
|
|
|
|
var params: seq[string] = @[]
|
|
|
|
while not parser.isAtEnd() and not parser.peekMatch(tkRightParen):
|
|
if not parser.consume(tkIdentifier, "Parameter name expected."):
|
|
break
|
|
params.add(parser.previous.get().text)
|
|
if not parser.isAtEnd() and not parser.peekMatch(tkRightParen) and not parser.consume(tkComma, "',' expected between parameters."):
|
|
break
|
|
discard parser.consume(tkRightParen, "')' expected after parameter list.")
|
|
|
|
let body = parser.expression()
|
|
|
|
result = Node(kind: nkProc, parameters: params, procBody: body)
|
|
|
|
proc parseBlock(parser: Parser): Node =
|
|
result = Node(kind: nkBlockExpr, children: @[], labels: @[])
|
|
|
|
while parser.match(tkLabel):
|
|
result.labels.add(parser.previous.get().text[1..^1])
|
|
|
|
while not parser.isAtEnd():
|
|
if parser.peekMatch(tkRightBrace):
|
|
break
|
|
let child = parser.statement(true)
|
|
if child == nil:
|
|
# there was an error, but it should have already been reported
|
|
# we will be in panic mode anyways
|
|
parser.errorAtCurrent("Internal error: Block got empty expression.")
|
|
return
|
|
result.children.add(child)
|
|
if child.kind == nkExpr:
|
|
break
|
|
|
|
discard parser.consume(tkRightBrace, "'}' expected after block expression.")
|
|
|
|
proc primary(parser: Parser): Node =
|
|
if parser.match(tkFalse):
|
|
return Node(kind: nkConst, constant: ndFalse)
|
|
if parser.match(tkTrue):
|
|
return Node(kind: nkConst, constant: ndTrue)
|
|
if parser.match(tkNil):
|
|
return Node(kind: nkConst, constant: ndNil)
|
|
if parser.match(tkNumber):
|
|
return Node(kind: nkConst, constant: fromFloat(parseFloat(parser.previous.get().text)))
|
|
if parser.match(tkString):
|
|
return Node(kind: nkConst, constant: fromNimString(parser.previous.get().text[1..^2]))
|
|
if parser.match(tkLeftParen):
|
|
let grouped = parser.expression()
|
|
discard parser.consume(tkRightParen, "Expect ')' after expression.")
|
|
return Node(kind: nkExpr, expression: grouped)
|
|
if parser.match(tkLeftBrace):
|
|
return parser.parseBlock()
|
|
if parser.match(tkStartList):
|
|
return parser.parseList()
|
|
if parser.match(tkStartTable):
|
|
return parser.parseTable()
|
|
if parser.match(tkIdentifier):
|
|
return Node(kind: nkVarGet, gVarName: parser.previous.get().text)
|
|
if parser.match(tkAmpersand):
|
|
result = parser.hold
|
|
parser.hold = nil
|
|
return result
|
|
if parser.match(tkProc):
|
|
discard parser.consume(tkLeftParen, "'(' expected after 'proc'.")
|
|
return parser.parseProcDeclaration()
|
|
|
|
parser.errorAtCurrent("Primary expected, but something else found.")
|
|
|
|
proc parseArgList(parser: Parser): seq[Node] =
|
|
# once ( has been consumed, consume args and the ) or just a )
|
|
var args: seq[Node] = @[]
|
|
while not parser.isAtEnd() and not parser.peekMatch(tkRightParen):
|
|
let arg = parser.expression()
|
|
if not parser.isAtEnd() and not parser.peekMatch(tkRightParen) and not parser.consume(tkComma, "',' expected between arguments."):
|
|
break
|
|
args.add(arg)
|
|
discard parser.consume(tkRightParen, "')' expected after argument list.")
|
|
return args
|
|
|
|
|
|
proc parseIndex(parser: Parser): Node =
|
|
result = parser.primary()
|
|
|
|
while parser.match({tkLeftBracket, tkDot, tkIdentifier}):
|
|
# NOTE: :index is counted as a single identifier, so two identifiers after eachother will be handled here
|
|
if parser.previous.get().tokenType == tkLeftBracket:
|
|
let index = parser.expression()
|
|
if not parser.consume(tkRightBracket, "']' after index."):
|
|
break
|
|
result = Node(kind: nkGetIndex, gCollection: result, gIndex: index)
|
|
elif parser.previous.get().tokenType == tkIdentifier:
|
|
let identText = parser.previous.get().text
|
|
if identText[0] != ':':
|
|
parser.errorAtCurrent("';' expected after expression statement.")
|
|
# update this with whatever the original error when two idents follow eachother is
|
|
return
|
|
let ident = Node(kind: nkConst, constant: identText[1..^1].fromNimString())
|
|
# ident removes the : from it
|
|
var args: seq[Node] = @[]
|
|
if parser.match(tkLeftParen):
|
|
args = parser.parseArgList()
|
|
let funct = Node(kind: nkGetIndex, gCollection: result, gIndex: ident)
|
|
result = Node(kind: nkColonCall, arguments: args, function: funct)
|
|
else:
|
|
# dot
|
|
if not parser.consume(tkIdentifier, "Identifier expected after '.' index operator."):
|
|
break
|
|
result = Node(kind: nkGetIndex, gCollection: result, gIndex: Node(kind: nkConst, constant: parser.previous.get().text.fromNimString()))
|
|
|
|
proc parseCall(parser: Parser): Node =
|
|
result = parser.parseIndex()
|
|
if parser.match(tkLeftParen):
|
|
let args = parser.parseArgList()
|
|
result = Node(kind: nkCall, arguments: args, function: result)
|
|
|
|
|
|
proc parseIf(parser: Parser): Node =
|
|
discard parser.consume(tkLeftParen, "'(' expected after 'if'.")
|
|
let cond = parser.expression()
|
|
discard parser.consume(tkRightParen, "')' expected after condition.")
|
|
let body = parser.expression()
|
|
|
|
result = Node(kind: nkIf, ifCondition: cond, ifBody: body)
|
|
if parser.match(tkElse):
|
|
result.elseBody = parser.expression()
|
|
|
|
proc parseWhile(parser:Parser): Node =
|
|
discard parser.consume(tkLeftParen, "'(' expected after 'while'.")
|
|
let cond = parser.expression()
|
|
discard parser.consume(tkRightParen, "')' expected after condition.")
|
|
let body = parser.expression()
|
|
|
|
result = Node(kind: nkWhile, whileCondition: cond, whileBody: body)
|
|
|
|
proc unary(parser: Parser): Node =
|
|
# unary level for unary operators, plus some control flow is here too
|
|
const unaryOps = {tkBang, tkMinus, tkIf, tkWhile, tkHashtag}
|
|
if parser.match(unaryOps):
|
|
let op = parser.previous.get()
|
|
case op.tokenType:
|
|
of tkBang:
|
|
let right = parser.unary()
|
|
return Node(kind: nkNot, argument: right)
|
|
of tkMinus:
|
|
let right = parser.unary()
|
|
return Node(kind: nkNegate, argument: right)
|
|
of tkHashtag:
|
|
let right = parser.unary()
|
|
return Node(kind: nkLen, argument: right)
|
|
of tkIf:
|
|
return parser.parseIf()
|
|
of tkWhile:
|
|
return parser.parseWhile()
|
|
else:
|
|
parser.errorAtCurrent("Invalid parser state: unaryOps and case statement out of line.")
|
|
return parser.parseCall()
|
|
|
|
proc factor(parser: Parser): Node =
|
|
result = parser.unary()
|
|
|
|
while parser.match({tkSlash, tkStar}):
|
|
let op = parser.previous.get()
|
|
let right = parser.unary()
|
|
if op.tokenType == tkSlash:
|
|
result = Node(kind: nkDiv, left: result, right: right)
|
|
else:
|
|
result = Node(kind: nkMult, left: result, right: right)
|
|
|
|
proc term(parser: Parser): Node =
|
|
result = parser.factor()
|
|
|
|
while parser.match({tkMinus, tkPlus}):
|
|
let op = parser.previous.get()
|
|
let right = parser.factor()
|
|
if op.tokenType == tkMinus:
|
|
result = Node(kind: nkMinus, left: result, right: right)
|
|
else:
|
|
result = Node(kind: nkPlus, left: result, right: right)
|
|
|
|
proc comparison(parser: Parser): Node =
|
|
result = parser.term()
|
|
|
|
while parser.match({tkGreater, tkGreaterEqual, tkLess, tkLessEqual}):
|
|
let op = parser.previous.get()
|
|
let right = parser.term()
|
|
case op.tokenType:
|
|
of tkGreater:
|
|
result = Node(kind: nkGreater, left: result, right: right)
|
|
of tkGreaterEqual:
|
|
result = Node(kind: nkGe, left: result, right: right)
|
|
of tkLess:
|
|
result = Node(kind: nkLess, left: result, right: right)
|
|
of tkLessEqual:
|
|
result = Node(kind: nkLe, left: result, right: right)
|
|
else:
|
|
parser.errorAtCurrent("invalid state in comparison: case and set don't match up.")
|
|
|
|
proc equality(parser: Parser): Node =
|
|
result = parser.comparison()
|
|
|
|
while parser.match({tkBangEqual, tkEqualEqual}):
|
|
let op = parser.previous.get()
|
|
let right = parser.comparison()
|
|
if op.tokenType == tkBangEqual:
|
|
result = Node(kind: nkNeq, left: result, right: right)
|
|
else:
|
|
result = Node(kind: nkEq, left: result, right: right)
|
|
|
|
|
|
proc parseAnd(parser: Parser): Node =
|
|
result = parser.equality()
|
|
|
|
while parser.match(tkAnd):
|
|
let right = parser.equality()
|
|
result = Node(kind: nkAnd, left: result, right: right)
|
|
|
|
proc parseOr(parser: Parser): Node =
|
|
result = parser.parseAnd()
|
|
|
|
while parser.match(tkOr):
|
|
let right = parser.parseAnd()
|
|
result = Node(kind: nkOr, left: result, right: right)
|
|
|
|
proc parsePipeCall(parser: Parser): Node =
|
|
result = parser.parseOr()
|
|
|
|
while parser.match(tkDoublecolon):
|
|
let right = parser.parseOr()
|
|
# to the right, if topmost level is a call, it will insert it
|
|
# if the topmost is not a call, e.g. 5 :: funcs.double it will assume it's a function with one arg - the one before the pipe
|
|
# if the thing to the right has lower precedence stuff than a call, please note that it will not insert into the call, it will assume that the return val is a function
|
|
# to have such lower precedence ops, use parens: 5 :: (long expression)(arg1, arg2)
|
|
|
|
# case 1: right is already a call or coloncall
|
|
if right.kind in {nkCall, nkColonCall}:
|
|
right.arguments.insert(result, 0)
|
|
result = right
|
|
# else: right val is a function which we call
|
|
else:
|
|
result = Node(kind: nkCall, arguments: @[result], function: right)
|
|
|
|
|
|
proc exprNonAssign(parser: Parser): Node =
|
|
parser.parsePipeCall()
|
|
|
|
|
|
proc parseAssign(parser: Parser): Node =
|
|
result = parser.exprNonAssign()
|
|
|
|
if parser.match(tkEqual):
|
|
# check if result is assignable
|
|
const assignable = {nkVarGet, nkGetIndex}
|
|
let right = parser.parseAssign()
|
|
if result.kind notin assignable:
|
|
parser.errorAtCurrent("Attempt to assign to invalid target.")
|
|
return
|
|
if result.kind == nkVarGet:
|
|
result = Node(kind: nkVarSet, sVarName: result.gVarName, newVal: right)
|
|
else:
|
|
# nkGetIndex
|
|
result = Node(kind: nkSetIndex, sCollection: result.gCollection, sIndex: result.gIndex, sValue: right)
|
|
|
|
proc parseAmpersand(parser: Parser): Node =
|
|
result = parser.parseAssign()
|
|
if parser.match(tkAmpersand):
|
|
parser.hold = Node(kind: nkExpr, expression: result)
|
|
parser.backtrack()
|
|
return parser.parseAmpersand()
|
|
|
|
proc expression(parser: Parser): Node =
|
|
parser.parseAmpersand()
|
|
|
|
# STATEMENTS
|
|
proc exprStatement(parser: Parser, inBlock: bool): Node =
|
|
let expression = parser.expression()
|
|
if expression != nil:
|
|
result = Node(kind: nkExprStmt, expression: expression)
|
|
else:
|
|
parser.errorAtCurrent("Expression expected.")
|
|
if parser.peekMatch(tkRightBrace) and inBlock:
|
|
result = Node(kind: nkExpr, expression: result.expression) # block should also check if it is the last expr.
|
|
else:
|
|
discard parser.consume(tkSemicolon, "';' expected after expression statement.")
|
|
|
|
proc statement(parser: Parser, inBlock: bool = false): Node =
|
|
if parser.match(tkProc):
|
|
# it is possibly a proc declaration, but
|
|
# it could also be a proc expression
|
|
|
|
if parser.peekMatch(tkLeftParen):
|
|
# proc expression - backtrack and let it go to expression statement
|
|
parser.backtrack()
|
|
result = parser.exprStatement(inBlock)
|
|
else:
|
|
# proc definition - var declaration sort of code
|
|
discard parser.consume(tkIdentifier, "Procedure name expected after 'proc'.")
|
|
let varname = parser.previous.get().text
|
|
discard parser.consume(tkLeftParen, "'(' expected after procedure name.")
|
|
let funct = parser.parseProcDeclaration()
|
|
result = Node(kind: nkVarDecl, name: varname, value: funct)
|
|
discard parser.consume(tkSemicolon, "';' expected after procedure declaration.")
|
|
else:
|
|
result = parser.exprStatement(inBlock)
|
|
|
|
if parser.panicMode:
|
|
parser.synchronize()
|
|
|
|
proc parse*(parser: Parser): Node =
|
|
parser.scanner = newScanner(parser.source)
|
|
result = Node(kind: nkBlockExpr, children: @[])
|
|
|
|
parser.advance()
|
|
while not parser.isAtEnd():
|
|
let statement = parser.statement()
|
|
result.children.add(statement)
|
|
|