nondescript/src/ndspkg/scanner.nim

245 lines
7.3 KiB
Nim

import strutils
import tables
import strformat
import unicode
type
Scanner* = ref object
start: int
current: int
line: int
source: string
TokenType* = enum
tkNone, # the default tokentype, if encountered anywhere, erroring out is the best course of action
tkLeftParen, tkRightParen, tkLeftBrace, tkRightBrace, tkComma, tkDot,
tkColon, tkDoublecolon, tkArrow,
tkMinus, tkPlus, tkSemicolon, tkSlash, tkStar, tkBang, tkBangEqual,
tkGreater, tkGreaterEqual, tkLess, tkLessEqual, tkEqual, tkEqualEqual,
tkStartList, tkStartTable, tkLeftBracket, tkRightBracket,
tkHashtag, tkAmpersand,
tkIdentifier, tkString,
tkNumber, tkAnd, tkElse, tkFalse, tkFor, tkFunct, tkGoto, tkIf, tkNil,
tkOr, tkLabel, tkBreak, tkTrue, tkVar, tkWhile,
tkError, tkEof
Token* = object
tokenType*: TokenType
text*: string
line*: int
proc debugPrint*(token: Token) =
write stdout, &"Token of type {$token.tokenType} [{token.text}] at line {$token.line}\n"
proc isAtEnd(scanner: Scanner): bool =
scanner.current > scanner.source.high
proc advance(scanner: Scanner): Rune =
scanner.source.fastRuneAt(scanner.current, result, doInc = true)
proc peek(scanner: Scanner): Rune =
if scanner.isAtEnd():
return "\0".runeAt(0)
else:
scanner.source.fastRuneAt(scanner.current, result, doInc = false)
proc peekNext(scanner: Scanner): Rune =
if scanner.current < scanner.source.high:
scanner.source.fastRuneAt(scanner.current + 1, result, doInc = false)
else:
return "\0".runeAt(0)
template `==`(l: char, r: Rune): bool =
($l).runeAt(0) == r
template `==`(l: Rune, r: char): bool =
($r).runeAt(0) == l
proc match(scanner: Scanner, exp: char | Rune): bool =
if scanner.peek() == exp:
discard scanner.advance()
true
else:
false
proc newScanner*(source: string): Scanner =
Scanner(source: source, line: 0, current: 0)
proc makeToken(scanner: Scanner, tokenType: TokenType): Token =
result.tokenType = tokenType
result.text = scanner.source[scanner.start..scanner.current-1]
result.line = scanner.line
proc errorToken(scanner: Scanner, msg: string): Token =
result.tokenType = tkError
result.text = msg
result.line = scanner.line
proc toChar(r: Rune): char =
## use only for matching runes in case statements
if r.size() > 1:
char(255) # never match this
else:
($r)[0]
proc skipWhitespace(scanner: Scanner) =
while true:
let c = scanner.peek().toChar()
case c:
of {' ', '\r', '\t'}:
discard scanner.advance()
of '\n':
scanner.line.inc
discard scanner.advance()
of '/':
if scanner.peekNext() == '/':
while not scanner.isAtEnd() and scanner.peek().toChar() != '\n' :
discard scanner.advance()
elif scanner.peekNext() == '*':
var depth = 1
while not scanner.isAtEnd():
discard scanner.advance()
if scanner.peek().toChar() == '/' and scanner.peekNext().toChar() == '*':
depth.inc
discard scanner.advance()
discard scanner.advance()
if scanner.peek().toChar() == '*' and scanner.peekNext().toChar() == '/':
depth.dec
discard scanner.advance()
discard scanner.advance()
if depth == 0:
break
else:
return
else:
return
proc scanString(scanner: Scanner): Token =
while not scanner.isAtEnd() and scanner.peek().toChar() != '\"' :
if scanner.peek() == '\n':
scanner.line.inc
discard scanner.advance()
if scanner.isAtEnd():
return scanner.errorToken("Unterminated string.")
discard scanner.advance()
scanner.makeToken(tkString)
proc scanNumber(scanner: Scanner): Token =
while scanner.peek().toChar() in Digits:
discard scanner.advance()
if scanner.peek().toChar() == '.' and scanner.peekNext().toChar() in Digits:
discard scanner.advance()
while scanner.peek().toChar() in Digits:
discard scanner.advance()
return scanner.makeToken(tkNumber)
const keywords = {
"and": tkAnd,
"break": tkBreak,
"else": tkElse,
"false": tkFalse,
"for": tkFor,
"proc": tkFunct,
# here's a language that uses funct... still waiting for the day when a good de-funct joke comes to my mind that I can abuse
"goto": tkGoto,
"if": tkIf,
"nil": tkNil,
"or": tkOr,
"true": tkTrue,
"var": tkVar,
"while": tkWhile,
}.toTable
proc canStartIdent(chr: Rune): bool =
chr.isAlpha() or chr.toChar() == '_'
proc canContIdent(chr: Rune): bool =
canStartIdent(chr) or chr.toChar() in Digits
proc scanIdentifier(scanner: Scanner): Token =
while scanner.peek().canContIdent():
discard scanner.advance()
let text = scanner.source[scanner.start..scanner.current-1]
if keywords.hasKey(text):
return scanner.makeToken(keywords[text])
return scanner.makeToken(tkIdentifier)
proc canContLabel(chr: Rune): bool =
chr.isAlpha() or chr.toChar() == '_'
proc scanLabel(scanner: Scanner): Token =
if not scanner.peek.canContLabel:
return scanner.errorToken("Labels must only contain letters and underscores.")
while scanner.peek.canContLabel:
discard scanner.advance()
return scanner.makeToken(tkLabel)
proc scanToken*(scanner: Scanner): Token =
scanner.skipWhitespace()
scanner.start = scanner.current
if scanner.isAtEnd():
return scanner.makeToken(tkEof)
let rune = scanner.advance()
let c = rune.toChar()
case c:
of '(': return scanner.makeToken(tkLeftParen)
of ')': return scanner.makeToken(tkRightParen)
of '{': return scanner.makeToken(tkLeftBrace)
of '}': return scanner.makeToken(tkRightBrace)
of '[': return scanner.makeToken(tkLeftBracket)
of ']': return scanner.makeToken(tkRightBracket)
of ';': return scanner.makeToken(tkSemicolon)
of ',': return scanner.makeToken(tkComma)
of '.': return scanner.makeToken(tkDot)
of '-':
if scanner.match('>'):
return scanner.makeToken(tkArrow)
else:
return scanner.makeToken(tkMinus)
of '+': return scanner.makeToken(tkPlus)
of '/': return scanner.makeToken(tkSlash)
of '*': return scanner.makeToken(tkStar)
of '#': return scanner.makeToken(tkHashtag)
of '&': return scanner.makeToken(tkAmpersand)
of '!':
return if scanner.match('='): scanner.makeToken(tkBangEqual) else: scanner.makeToken(tkBang)
of '=':
return if scanner.match('='): scanner.makeToken(tkEqualEqual) else: scanner.makeToken(tkEqual)
of '<':
return if scanner.match('='): scanner.makeToken(tkLessEqual) else: scanner.makeToken(tkLess)
of '>':
return if scanner.match('='): scanner.makeToken(tkGreaterEqual) else: scanner.makeToken(tkGreater)
of '\"':
return scanner.scanString()
of Digits:
return scanner.scanNumber()
of '@':
if scanner.match('['): return scanner.makeToken(tkStartList)
elif scanner.match('{'): return scanner.makeToken(tkStartTable)
else: return scanner.scanLabel()
of ':':
if scanner.match(':'): return scanner.makeToken(tkDoublecolon)
elif scanner.peek().canContIdent(): return scanner.scanIdentifier()
else: return scanner.makeToken(tkColon)
else:
if rune.canStartIdent():
# ':' can start ident, but is not handled here
return scanner.scanIdentifier()
else:
return scanner.errorToken("Unexpected character.")