nondescript/src/ndspkg/compv2/parser.nim

499 lines
18 KiB
Nim

# a new recursive descent parser for nds
# parser: converts a stream of tokens into an AST
import ../scanner
import node
import ../config
import ../types/value
import strformat
import strutils
import bitops
import options
# TYPEDEF
type
Parser = ref object
# scanning
scanner: Scanner
source: string
current: Token
previous: Option[Token]
next: Option[Token]
line: int
# if there is a next set, advance won't trigger the scanner
# it will use next instead
hold: Node # temporary hold, used to implement ampersand op
# errors
hadError*: bool
panicMode: bool
proc newParser*(name: string, source: string): Parser =
result = new(Parser)
result.source = source
result.hadError = false
result.panicMode = false
result.next = none[Token]()
result.previous = none[Token]()
# UTILS
# error handling
proc errorAt(parser: Parser, line: int, msg: string, at: string = "") =
if parser.panicMode:
return # don't display errors if already in panic mode
write stderr, &"[line {line}] Error "
if at.len > 0:
write stderr, &"at {at} "
write stderr, msg
write stderr, "\n"
parser.hadError = true
parser.panicMode = true
proc errorAtCurrent(parser: Parser, msg: string) =
parser.errorAt(parser.current.line, msg)
# scanning for tokens
proc advance(parser: Parser) =
parser.previous = some(parser.current)
parser.line = parser.current.line
while true:
if parser.next.isSome():
parser.current = parser.next.get()
parser.next = none[Token]()
else:
parser.current = parser.scanner.scanToken()
if (parser.current.tokenType != tkError):
break
parser.errorAtCurrent(parser.current.text)
proc backtrack(parser: Parser) =
parser.next = some(parser.current)
parser.current = parser.previous.get()
# danger - danger.previous is undefined here
# luckily thanks to options if that ever happens
# we get a crash
parser.previous = none[Token]()
proc match(parser: Parser, tokenType: TokenType): bool =
if parser.current.tokenType == tokenType:
parser.advance()
true
else:
false
proc match(parser: Parser, tokenTypes: set[TokenType]): bool =
if parser.current.tokenType in tokenTypes:
parser.advance()
true
else:
false
proc consume(parser: Parser, tokenType: TokenType | set[TokenType], msg: string): bool =
# return val is useful for avoiding infinite loops
# if there was an error, might as well exit loops that try to consume more
if parser.match(tokenType):
true
else:
parser.errorAtCurrent(msg)
false
proc peek(parser: Parser): Token =
parser.current
proc peekMatch(parser: Parser, tokenType: TokenType): bool =
parser.peek().tokenType == tokenType
proc isAtEnd(parser: Parser): bool =
parser.current.tokenType == tkEof
proc synchronize(parser: Parser) =
parser.panicMode = false
while not parser.isAtEnd():
if parser.previous.get().tokenType in {tkSemicolon}:
return
if parser.current.tokenType in {tkProc, tkVar, tkFor, tkIf, tkWhile, tkRightBrace}:
return
parser.advance()
# EXPRESSIONS
proc expression(parser: Parser): Node
proc statement(parser: Parser, inBlock: bool = false): Node
# expressions, but not assignments
proc exprNonAssign(parser: Parser): Node
proc parseList(parser: Parser): Node =
result = Node(kind: nkList, elems: @[], line: parser.line)
while not parser.isAtEnd() and not parser.peekMatch(tkRightBracket):
result.elems.add(parser.expression())
if parser.peek().tokenType != tkRightBracket and not parser.consume(tkComma, "',' expected after list elements."):
break
discard parser.consume(tkRightBracket, "']' expected after list declaration.")
proc parseTable(parser: Parser): Node =
result = Node(kind: nkTable, keys: @[], values: @[], line: parser.line)
while not parser.isAtEnd() and not parser.peekMatch(tkRightBrace):
# [key] = syntax
if parser.match(tkLeftBracket):
result.keys.add(parser.expression())
discard parser.consume(tkRightBracket, "']' expected after table key.")
# key = syntax
elif parser.match(tkIdentifier):
result.keys.add(Node(kind: nkConst, constant: parser.previous.get().text.fromNimString(), line: parser.line))
else:
parser.errorAtCurrent("Key expected (have you forgotten to put the key in brackets?).")
discard parser.consume({tkEqual, tkColon}, "'=' expected after key.")
result.values.add(parser.exprNonAssign())
if parser.peek().tokenType != tkRightBrace and not parser.consume(tkComma, "',' expected after table key value pair."):
break
discard parser.consume(tkRightBrace, "'}' expected after table declaration.")
proc parseProcDeclaration(parser: Parser): Node =
# returns a nkProc, assumes that the left paren
# has been consumed, so is followed by optionally
# a param list, a ) and then an expression
var params: seq[string] = @[]
while not parser.isAtEnd() and not parser.peekMatch(tkRightParen):
if not parser.consume(tkIdentifier, "Parameter name expected."):
break
params.add(parser.previous.get().text)
if not parser.isAtEnd() and not parser.peekMatch(tkRightParen) and not parser.consume(tkComma, "',' expected between parameters."):
break
discard parser.consume(tkRightParen, "')' expected after parameter list.")
let body = parser.expression()
result = Node(kind: nkProc, parameters: params, procBody: body, line: parser.line)
proc parseBlock(parser: Parser): Node =
result = Node(kind: nkBlockExpr, children: @[], labels: @[], line: parser.line)
while parser.match(tkLabel):
result.labels.add(parser.previous.get().text[1..^1])
while not parser.isAtEnd():
if parser.peekMatch(tkRightBrace):
break
let child = parser.statement(true)
if child == nil:
# there was an error, but it should have already been reported
# we will be in panic mode anyways
parser.errorAtCurrent("Internal error: Block got empty expression.")
return
result.children.add(child)
if child.kind == nkExpr:
break
discard parser.consume(tkRightBrace, "'}' expected after block expression.")
proc primary(parser: Parser): Node =
if parser.match(tkFalse):
return Node(kind: nkFalse, line: parser.line)
if parser.match(tkTrue):
return Node(kind: nkTrue, line: parser.line)
if parser.match(tkNil):
return Node(kind: nkNil, line: parser.line)
if parser.match(tkNumber):
return Node(kind: nkConst, constant: fromFloat(parseFloat(parser.previous.get().text)), line: parser.line)
if parser.match(tkString):
return Node(kind: nkConst, constant: fromNimString(parser.previous.get().text[1..^2]), line: parser.line)
if parser.match(tkLeftParen):
let grouped = parser.expression()
discard parser.consume(tkRightParen, "Expect ')' after expression.")
return Node(kind: nkExpr, expression: grouped, line: parser.line)
if parser.match(tkLeftBrace):
return parser.parseBlock()
if parser.match(tkStartList):
return parser.parseList()
if parser.match(tkStartTable):
return parser.parseTable()
if parser.match(tkIdentifier):
return Node(kind: nkVarGet, gVarName: parser.previous.get().text, line: parser.line)
if parser.match(tkAmpersand):
result = parser.hold
parser.hold = nil
return result
if parser.match(tkProc):
discard parser.consume(tkLeftParen, "'(' expected after 'proc'.")
return parser.parseProcDeclaration()
parser.errorAtCurrent(&"Primary expected, but found: {parser.current.tokenType}.")
parser.advance() # to always go forward if a completely unknown symbol is found
proc parseArgList(parser: Parser): seq[Node] =
# once ( has been consumed, consume args and the ) or just a )
var args: seq[Node] = @[]
while not parser.isAtEnd() and not parser.peekMatch(tkRightParen):
let arg = parser.expression()
if not parser.isAtEnd() and not parser.peekMatch(tkRightParen) and not parser.consume(tkComma, "',' expected between arguments."):
break
args.add(arg)
discard parser.consume(tkRightParen, "')' expected after argument list.")
return args
proc parseIndexOrCall(parser: Parser): Node =
# parses calls and indexes
result = parser.primary()
while parser.match({tkLeftBracket, tkDot, tkIdentifier, tkLeftParen}):
# NOTE: :index is counted as a single identifier, so two identifiers after eachother will be handled here
if parser.previous.get().tokenType == tkLeftBracket:
let index = parser.expression()
if not parser.consume(tkRightBracket, "']' after index."):
break
result = Node(kind: nkGetIndex, gCollection: result, gIndex: index, line: parser.line)
elif parser.previous.get().tokenType == tkIdentifier:
let identText = parser.previous.get().text
if identText[0] != ':':
parser.errorAtCurrent("';' expected after expression statement.")
# update this with whatever the original error when two idents follow eachother is
return
let ident = Node(kind: nkConst, constant: identText[1..^1].fromNimString(), line: parser.line)
# ident removes the : from it
var args: seq[Node] = @[]
if parser.match(tkLeftParen):
args = parser.parseArgList()
let funct = Node(kind: nkGetIndex, gCollection: result, gIndex: ident, line: parser.line)
result = Node(kind: nkColonCall, arguments: args, function: funct, line: parser.line)
elif parser.previous.get().tokenType == tkLeftParen:
# call
let args = parser.parseArgList()
result = Node(kind: nkCall, arguments: args, function: result, line: parser.line)
else:
# dot
if not parser.consume(tkIdentifier, "Identifier expected after '.' index operator."):
break
result = Node(kind: nkGetIndex, gCollection: result, gIndex: Node(kind: nkConst, constant: parser.previous.get().text.fromNimString()), line: parser.line)
proc parseIf(parser: Parser): Node =
discard parser.consume(tkLeftParen, "'(' expected after 'if'.")
let cond = parser.expression()
discard parser.consume(tkRightParen, "')' expected after condition.")
let body = parser.expression()
result = Node(kind: nkIf, ifCondition: cond, ifBody: body, line: parser.line)
if parser.match(tkElse):
result.elseBody = parser.expression()
proc parseWhile(parser:Parser): Node =
discard parser.consume(tkLeftParen, "'(' expected after 'while'.")
let cond = parser.expression()
discard parser.consume(tkRightParen, "')' expected after condition.")
let body = parser.expression()
result = Node(kind: nkWhile, whileCondition: cond, whileBody: body, line: parser.line)
proc unary(parser: Parser): Node =
# unary level for unary operators, plus some control flow is here too
const unaryOps = {tkBang, tkMinus, tkIf, tkWhile, tkHashtag}
if parser.match(unaryOps):
let op = parser.previous.get()
case op.tokenType:
of tkBang:
let right = parser.unary()
return Node(kind: nkNot, argument: right, line: parser.line)
of tkMinus:
let right = parser.unary()
return Node(kind: nkNegate, argument: right, line: parser.line)
of tkHashtag:
let right = parser.unary()
return Node(kind: nkLen, argument: right, line: parser.line)
of tkIf:
return parser.parseIf()
of tkWhile:
return parser.parseWhile()
else:
parser.errorAtCurrent("Invalid parser state: unaryOps and case statement out of line.")
return parser.parseIndexOrCall()
proc factor(parser: Parser): Node =
result = parser.unary()
while parser.match({tkSlash, tkStar}):
let op = parser.previous.get()
let right = parser.unary()
if op.tokenType == tkSlash:
result = Node(kind: nkDiv, left: result, right: right, line: parser.line)
else:
result = Node(kind: nkMult, left: result, right: right, line: parser.line)
proc term(parser: Parser): Node =
result = parser.factor()
while parser.match({tkMinus, tkPlus}):
let op = parser.previous.get()
let right = parser.factor()
if op.tokenType == tkMinus:
result = Node(kind: nkMinus, left: result, right: right, line: parser.line)
else:
result = Node(kind: nkPlus, left: result, right: right, line: parser.line)
proc comparison(parser: Parser): Node =
result = parser.term()
while parser.match({tkGreater, tkGreaterEqual, tkLess, tkLessEqual}):
let op = parser.previous.get()
let right = parser.term()
case op.tokenType:
of tkGreater:
result = Node(kind: nkGreater, left: result, right: right, line: parser.line)
of tkGreaterEqual:
result = Node(kind: nkGe, left: result, right: right, line: parser.line)
of tkLess:
result = Node(kind: nkLess, left: result, right: right, line: parser.line)
of tkLessEqual:
result = Node(kind: nkLe, left: result, right: right, line: parser.line)
else:
parser.errorAtCurrent("invalid state in comparison: case and set don't match up.")
proc equality(parser: Parser): Node =
result = parser.comparison()
while parser.match({tkBangEqual, tkEqualEqual}):
let op = parser.previous.get()
let right = parser.comparison()
if op.tokenType == tkBangEqual:
result = Node(kind: nkNeq, left: result, right: right, line: parser.line)
else:
result = Node(kind: nkEq, left: result, right: right, line: parser.line)
proc parseAnd(parser: Parser): Node =
result = parser.equality()
while parser.match(tkAnd):
let right = parser.equality()
result = Node(kind: nkAnd, left: result, right: right, line: parser.line)
proc parseOr(parser: Parser): Node =
result = parser.parseAnd()
while parser.match(tkOr):
let right = parser.parseAnd()
result = Node(kind: nkOr, left: result, right: right, line: parser.line)
proc parsePipeCall(parser: Parser): Node =
result = parser.parseOr()
while parser.match(tkDoublecolon):
let right = parser.parseOr()
# to the right, if topmost level is a call, it will insert it
# if the topmost is not a call, e.g. 5 :: funcs.double it will assume it's a function with one arg - the one before the pipe
# if the thing to the right has lower precedence stuff than a call, please note that it will not insert into the call, it will assume that the return val is a function
# to have such lower precedence ops, use parens: 5 :: (long expression)(arg1, arg2)
# case 1: right is already a call or coloncall
if right.kind in {nkCall, nkColonCall}:
right.arguments.insert(result, 0)
result = right
# else: right val is a function which we call
else:
result = Node(kind: nkCall, arguments: @[result], function: right, line: parser.line)
proc exprNonAssign(parser: Parser): Node =
parser.parsePipeCall()
proc parseAssign(parser: Parser): Node =
result = parser.exprNonAssign()
if parser.match(tkEqual):
# check if result is assignable
const assignable = {nkVarGet, nkGetIndex}
let right = parser.parseAssign()
if result.kind notin assignable:
parser.errorAtCurrent("Attempt to assign to invalid target.")
return
if result.kind == nkVarGet:
result = Node(kind: nkVarSet, sVarName: result.gVarName, newVal: right, line: parser.line)
else:
# nkGetIndex
result = Node(kind: nkSetIndex, sCollection: result.gCollection, sIndex: result.gIndex, sValue: right, line: parser.line)
proc parseAmpersand(parser: Parser): Node =
result = parser.parseAssign()
if parser.match(tkAmpersand):
parser.hold = Node(kind: nkExpr, expression: result, line: parser.line)
parser.backtrack()
return parser.parseAmpersand()
proc expression(parser: Parser): Node =
parser.parseAmpersand()
# STATEMENTS
proc exprStatement(parser: Parser, inBlock: bool): Node =
let expression = parser.expression()
if expression != nil:
result = Node(kind: nkExprStmt, expression: expression, line: parser.line)
else:
parser.errorAtCurrent("Expression expected.")
if parser.peekMatch(tkRightBrace) and inBlock:
result = Node(kind: nkExpr, expression: result.expression, line: parser.line) # block should also check if it is the last expr.
else:
discard parser.consume(tkSemicolon, "';' expected after expression statement.")
proc statement(parser: Parser, inBlock: bool = false): Node =
if parser.match(tkProc):
# it is possibly a proc declaration, but
# it could also be a proc expression
if parser.peekMatch(tkLeftParen):
# proc expression - backtrack and let it go to expression statement
parser.backtrack()
result = parser.exprStatement(inBlock)
else:
# proc definition - var declaration sort of code
discard parser.consume(tkIdentifier, "Procedure name expected after 'proc'.")
let varname = parser.previous.get().text
discard parser.consume(tkLeftParen, "'(' expected after procedure name.")
let funct = parser.parseProcDeclaration()
result = Node(kind: nkVarDecl, name: varname, value: funct, line: parser.line)
discard parser.consume(tkSemicolon, "';' expected after procedure declaration.")
elif parser.match(tkBreak):
if parser.match(tkLabel):
result = Node(kind: nkBreak, label: parser.previous.get().text[1..^1], line: parser.line)
else:
result = Node(kind: nkBreak, label: "", line: parser.line)
discard parser.consume(tkSemicolon, "';' expected after break statement.")
elif parser.match(tkVar):
discard parser.consume(tkIdentifier, "Identifier expected after 'var'.")
let name = parser.previous.get().text
if name[0] == ':':
parser.errorAtCurrent("Attempt to declare variable with name starting with ':'.")
if parser.match(tkEqual):
let val = parser.expression()
result = Node(kind: nkVarDecl, name: name, value: val, line: parser.line)
else:
result = Node(kind: nkVarDecl, name: name, value: nil, line: parser.line)
discard parser.consume(tkSemicolon, "';' expected after variable declaration.")
else:
result = parser.exprStatement(inBlock)
if parser.panicMode:
parser.synchronize()
proc parse*(parser: Parser): Node =
parser.scanner = newScanner(parser.source)
result = Node(kind: nkProgram, pChildren: @[], line: parser.line)
parser.advance()
while not parser.isAtEnd():
let statement = parser.statement()
result.pChildren.add(statement)