nondescript/src/ndspkg/compv2/parser.nim

390 lines
12 KiB
Nim

# a new recursive descent parser for nds
# parser: converts a stream of tokens into an AST
import ../scanner
import ../chunk
import node
import ../config
import ../types/value
import strformat
import strutils
import bitops
import sequtils
import sugar
# TYPEDEF
type
Parser = ref object
# scanning
scanner: Scanner
source: string
current: Token
previous: Token
# errors
hadError*: bool
panicMode: bool
proc newParser*(name: string, source: string): Parser =
result = new(Parser)
result.source = source
result.hadError = false
result.panicMode = false
# UTILS
# error handling
proc errorAt(parser: Parser, line: int, msg: string, at: string = "") =
if parser.panicMode:
return # don't display errors if already in panic mode
write stderr, &"[line {line}] Error "
if at.len > 0:
write stderr, &"at {at} "
write stderr, msg
write stderr, "\n"
parser.hadError = true
parser.panicMode = true
proc error(parser: Parser, msg: string) =
parser.errorAt(parser.previous.line, msg)
proc errorAtCurrent(parser: Parser, msg: string) =
parser.errorAt(parser.current.line, msg)
# scanning for tokens
proc advance(parser: Parser) =
parser.previous = parser.current
while true:
parser.current = parser.scanner.scanToken()
when debugScanner:
parser.current.debugPrint()
if (parser.current.tokenType != tkError):
break
parser.errorAtCurrent(parser.current.text)
proc match(parser: Parser, tokenType: TokenType): bool =
if parser.current.tokenType == tokenType:
parser.advance()
true
else:
false
proc match(parser: Parser, tokenTypes: set[TokenType]): bool =
if parser.current.tokenType in tokenTypes:
parser.advance()
true
else:
false
proc consume(parser: Parser, tokenType: TokenType | set[TokenType], msg: string) =
if not parser.match(tokenType):
parser.errorAtCurrent(msg)
proc peek(parser: Parser): Token =
parser.current
proc peekMatch(parser: Parser, tokenType: TokenType): bool =
parser.peek().tokenType == tokenType
proc synchronize(parser: Parser) =
parser.panicMode = false
while parser.current.tokenType != tkEof:
if parser.previous.tokenType in {tkSemicolon, tkRightBrace}:
return
if parser.current.tokenType in {tkFunct, tkVar, tkFor, tkIf, tkWhile}:
return
parser.advance()
proc isAtEnd(parser: Parser): bool =
parser.current.tokenType == tkEof
# EXPRESSIONS
proc expression(parser: Parser): Node
# expressions, but not assignments
proc exprNonAssign(parser: Parser): Node
proc parseList(parser: Parser): Node =
result = Node(kind: nkList, elems: @[])
while not parser.isAtEnd() and not parser.peekMatch(tkRightBracket):
result.elems.add(parser.expression())
if parser.peek().tokenType != tkRightBracket:
parser.consume(tkComma, "',' expected after list elements.")
parser.consume(tkRightBracket, "']' expected after list declaration.")
proc parseTable(parser: Parser): Node =
result = Node(kind: nkTable, keys: @[], values: @[])
while not parser.isAtEnd() and not parser.peekMatch(tkRightBrace):
# [key] = syntax
if parser.match(tkLeftBracket):
result.keys.add(parser.expression())
parser.consume(tkRightBracket, "']' expected after table key.")
# key = syntax
elif parser.match(tkIdentifier):
result.keys.add(Node(kind: nkConst, constant: parser.previous.text.fromNimString()))
else:
parser.errorAtCurrent("Key expected (have you forgotten to put the key in brackets?).")
parser.consume(tkEqual, "'=' expected after key.")
result.values.add(parser.exprNonAssign())
if parser.peek().tokenType != tkRightBrace:
parser.consume(tkComma, "',' expected after table key value pair.")
parser.consume(tkRightBrace, "'}' expected after table declaration.")
proc primary(parser: Parser): Node =
if parser.match(tkFalse):
return Node(kind: nkConst, constant: ndFalse)
if parser.match(tkTrue):
return Node(kind: nkConst, constant: ndTrue)
if parser.match(tkNil):
return Node(kind: nkConst, constant: ndNil)
if parser.match(tkNumber):
return Node(kind: nkConst, constant: fromFloat(parseFloat(parser.previous.text)))
if parser.match(tkString):
return Node(kind: nkConst, constant: fromNimString(parser.previous.text[1..^2]))
if parser.match(tkLeftParen):
let grouped = parser.expression()
parser.consume(tkRightParen, "Expect ')' after expression.")
return Node(kind: nkExpr, expression: grouped)
if parser.match(tkStartList):
return parser.parseList()
if parser.match(tkStartTable):
return parser.parseTable()
if parser.match(tkIdentifier):
return Node(kind: nkVarGet, gVarName: parser.previous.text)
parser.errorAtCurrent("Primary expected, but something else found.")
proc parseArgList(parser: Parser): seq[Node] =
# once ( has been consumed, consume args and the ) or just a )
var args: seq[Node] = @[]
while not parser.isAtEnd() and not parser.peekMatch(tkRightParen):
let arg = parser.expression()
if not parser.isAtEnd() and not parser.peekMatch(tkRightParen):
parser.consume(tkComma, "',' expected between arguments.")
args.add(arg)
parser.consume(tkRightParen, "')' expected after argument list.")
return args
proc parseIndex(parser: Parser): Node =
result = parser.primary()
while parser.match({tkLeftBracket, tkDot, tkIdentifier}):
# NOTE: :index is counted as a single identifier, so two identifiers after eachother will be handled here
if parser.previous.tokenType == tkLeftBracket:
let index = parser.expression()
parser.consume(tkRightBracket, "']' after index.")
result = Node(kind: nkGetIndex, gCollection: result, gIndex: index)
elif parser.previous.tokenType == tkIdentifier:
let identText = parser.previous.text
if identText[0] != ':':
parser.errorAtCurrent("';' expected after expression statement.")
# update this with whatever the original error when two idents follow eachother is
return
let ident = Node(kind: nkConst, constant: identText[1..^1].fromNimString())
# ident removes the : from it
var args: seq[Node] = @[]
if parser.match(tkLeftParen):
args = parser.parseArgList()
let funct = Node(kind: nkGetIndex, gCollection: result, gIndex: ident)
result = Node(kind: nkColonCall, arguments: args, function: funct)
else:
# dot
parser.consume(tkIdentifier, "Identifier expected after '.' index operator.")
result = Node(kind: nkGetIndex, gCollection: result, gIndex: Node(kind: nkConst, constant: parser.previous.text.fromNimString()))
proc parseCall(parser: Parser): Node =
result = parser.parseIndex()
if parser.match(tkLeftParen):
let args = parser.parseArgList()
result = Node(kind: nkCall, arguments: args, function: result)
proc parseIf(parser: Parser): Node =
parser.consume(tkLeftParen, "'(' expected after 'if'.")
let cond = parser.expression()
parser.consume(tkRightParen, "')' expected after condition.")
let body = parser.expression()
result = Node(kind: nkIf, ifCondition: cond, ifBody: body)
if parser.match(tkElse):
result.elseBody = parser.expression()
proc parseWhile(parser:Parser): Node =
parser.consume(tkLeftParen, "'(' expected after 'while'.")
let cond = parser.expression()
parser.consume(tkRightParen, "')' expected after condition.")
let body = parser.expression()
result = Node(kind: nkWhile, whileCondition: cond, whileBody: body)
proc unary(parser: Parser): Node =
# unary level for unary operators, plus some control flow is here too
const unaryOps = {tkBang, tkMinus, tkIf, tkWhile, tkHashtag}
if parser.match(unaryOps):
let op = parser.previous
case op.tokenType:
of tkBang:
let right = parser.unary()
return Node(kind: nkNot, argument: right)
of tkMinus:
let right = parser.unary()
return Node(kind: nkNegate, argument: right)
of tkHashtag:
let right = parser.unary()
return Node(kind: nkLen, argument: right)
of tkIf:
return parser.parseIf()
of tkWhile:
return parser.parseWhile()
else:
parser.errorAtCurrent("Invalid parser state: unaryOps and case statement out of line.")
return parser.parseCall()
proc factor(parser: Parser): Node =
result = parser.unary()
while parser.match({tkSlash, tkStar}):
let op = parser.previous
let right = parser.unary()
if op.tokenType == tkSlash:
result = Node(kind: nkDiv, left: result, right: right)
else:
result = Node(kind: nkMult, left: result, right: right)
proc term(parser: Parser): Node =
result = parser.factor()
while parser.match({tkMinus, tkPlus}):
let op = parser.previous
let right = parser.factor()
if op.tokenType == tkMinus:
result = Node(kind: nkMinus, left: result, right: right)
else:
result = Node(kind: nkPlus, left: result, right: right)
proc comparison(parser: Parser): Node =
result = parser.term()
while parser.match({tkGreater, tkGreaterEqual, tkLess, tkLessEqual}):
let op = parser.previous
let right = parser.term()
case op.tokenType:
of tkGreater:
result = Node(kind: nkGreater, left: result, right: right)
of tkGreaterEqual:
result = Node(kind: nkGe, left: result, right: right)
of tkLess:
result = Node(kind: nkLess, left: result, right: right)
of tkLessEqual:
result = Node(kind: nkLe, left: result, right: right)
else:
parser.errorAtCurrent("invalid state in comparison: case and set don't match up.")
proc equality(parser: Parser): Node =
result = parser.comparison()
while parser.match({tkBangEqual, tkEqualEqual}):
let op = parser.previous
let right = parser.comparison()
if op.tokenType == tkBangEqual:
result = Node(kind: nkNeq, left: result, right: right)
else:
result = Node(kind: nkEq, left: result, right: right)
proc parseAnd(parser: Parser): Node =
result = parser.equality()
while parser.match(tkAnd):
let right = parser.equality()
result = Node(kind: nkAnd, left: result, right: right)
proc parseOr(parser: Parser): Node =
result = parser.parseAnd()
while parser.match(tkOr):
let right = parser.parseAnd()
result = Node(kind: nkOr, left: result, right: right)
proc parsePipeCall(parser: Parser): Node =
result = parser.parseOr()
while parser.match(tkDoublecolon):
let right = parser.parseOr()
# to the right, if topmost level is a call, it will insert it
# if the topmost is not a call, e.g. 5 :: funcs.double it will assume it's a function with one arg - the one before the pipe
# if the thing to the right has lower precedence stuff than a call, please note that it will not insert into the call, it will assume that the return val is a function
# to have such lower precedence ops, use parens: 5 :: (long expression)(arg1, arg2)
# case 1: right is already a call or coloncall
if right.kind in {nkCall, nkColonCall}:
right.arguments.insert(result, 0)
result = right
# else: right val is a function which we call
else:
result = Node(kind: nkCall, arguments: @[result], function: right)
proc exprNonAssign(parser: Parser): Node =
parser.parsePipeCall()
proc parseAssign(parser: Parser): Node =
result = parser.exprNonAssign()
if parser.match(tkEqual):
# check if result is assignable
const assignable = {nkVarGet, nkGetIndex}
let right = parser.expression()
if result.kind notin assignable:
parser.errorAtCurrent("Attempt to assign to invalid target.")
return
if result.kind == nkVarGet:
result = Node(kind: nkVarSet, sVarName: result.gVarName, newVal: right)
else:
# nkGetIndex
result = Node(kind: nkSetIndex, sCollection: result.gCollection, sIndex: result.gIndex, sValue: right)
proc parseAmpersand(parser: Parser): Node =
result = parser.parseAssign()
# TODO
proc expression(parser: Parser): Node =
parser.parseAmpersand()
# STATEMENTS
proc statement(parser: Parser): Node
proc statement(parser: Parser): Node =
let expression = parser.expression()
if expression != nil:
result = Node(kind: nkExprStmt, expression: expression)
else:
parser.errorAtCurrent("Expression expected.")
parser.consume(tkSemicolon, "; expected after expression statement.")
if parser.panicMode:
parser.synchronize()
proc parse*(parser: Parser): Node =
parser.scanner = newScanner(parser.source)
result = Node(kind: nkBlockExpr, children: @[])
parser.advance()
while not parser.isAtEnd():
let statement = parser.statement()
result.children.add(statement)