nondescript/src/ndspkg/scanner.nim

205 lines
5.8 KiB
Nim

import strutils
import tables
import strformat
type
Scanner* = ref object
start: int
current: int
line: int
source: string
TokenType* = enum
tkNone, # the default tokentype, if encountered anywhere, erroring out is the best course of action
tkLeftParen, tkRightParen, tkLeftBrace, tkRightBrace, tkComma, tkDot,
tkMinus, tkPlus, tkSemicolon, tkSlash, tkStar, tkBang, tkBangEqual,
tkGreater, tkGreaterEqual, tkLess, tkLessEqual, tkEqual, tkEqualEqual,
tkStartList, tkStartTable, tkLeftBracket, tkRightBracket,
tkHashtag,
tkIdentifier, tkString,
tkNumber, tkAnd, tkElse, tkFalse, tkFor, tkFunct, tkGoto, tkIf, tkNil,
tkOr, tkPrint, tkLabel, tkBreak, tkTrue, tkVar, tkWhile,
tkError, tkEof
Token* = object
tokenType*: TokenType
text*: string
line*: int
proc debugPrint*(token: Token) =
write stdout, &"Token of type {$token.tokenType} [{token.text}] at line {$token.line}\n"
proc isAtEnd(scanner: Scanner): bool =
scanner.current > scanner.source.high
proc advance(scanner: Scanner): char =
scanner.current.inc
scanner.source[scanner.current - 1]
proc peek(scanner: Scanner): char =
if scanner.isAtEnd():
'\0'
else:
scanner.source[scanner.current]
proc peekNext(scanner: Scanner): char =
if scanner.current < scanner.source.high:
scanner.source[scanner.current + 1]
else:
'\0'
proc match(scanner: Scanner, exp: char): bool =
if scanner.peek() == exp:
discard scanner.advance()
true
else:
false
proc newScanner*(source: string): Scanner =
Scanner(source: source, line: 0, current: 0)
proc makeToken(scanner: Scanner, tokenType: TokenType): Token =
result.tokenType = tokenType
result.text = scanner.source[scanner.start..scanner.current-1]
result.line = scanner.line
proc errorToken(scanner: Scanner, msg: string): Token =
result.tokenType = tkError
result.text = msg
result.line = scanner.line
proc skipWhitespace(scanner: Scanner) =
while true:
let c = scanner.peek()
case c:
of {' ', '\r', '\t'}:
discard scanner.advance()
of '\n':
scanner.line.inc
discard scanner.advance()
of '/':
if scanner.peekNext() == '/':
while not scanner.isAtEnd() and scanner.peek != '\n' :
discard scanner.advance()
else:
return
else:
return
proc scanString(scanner: Scanner): Token =
while not scanner.isAtEnd() and scanner.peek() != '\"' :
if scanner.peek() == '\n':
scanner.line.inc
discard scanner.advance()
if scanner.isAtEnd():
return scanner.errorToken("Unterminated string.")
discard scanner.advance()
scanner.makeToken(tkString)
proc scanNumber(scanner: Scanner): Token =
while scanner.peek() in Digits:
discard scanner.advance()
if scanner.peek() == '.' and scanner.peekNext() in Digits:
discard scanner.advance()
while scanner.peek() in Digits:
discard scanner.advance()
return scanner.makeToken(tkNumber)
const keywords = {
"and": tkAnd,
"break": tkBreak,
"else": tkElse,
"false": tkFalse,
"for": tkFor,
"funct": tkFunct,
# here's a language that uses funct... still waiting for the day when a good de-funct joke comes to my mind that I can abuse
"goto": tkGoto,
"if": tkIf,
"nil": tkNil,
"or": tkOr,
"print": tkPrint,
"true": tkTrue,
"var": tkVar,
"while": tkWhile
}.toTable
proc canStartIdent(chr: char): bool =
chr in Letters or chr in {'_', ':'}
proc canContIdent(chr: char): bool =
canStartIdent(chr) or chr in Digits
proc scanIdentifier(scanner: Scanner): Token =
while scanner.peek.canContIdent:
discard scanner.advance()
let text = scanner.source[scanner.start..scanner.current-1]
if keywords.hasKey(text):
return scanner.makeToken(keywords[text])
return scanner.makeToken(tkIdentifier)
proc canContLabel(chr: char): bool =
chr in Letters or chr == '_'
proc scanLabel(scanner: Scanner): Token =
if not scanner.peek.canContLabel:
return scanner.errorToken("Labels must only contain letters and underscores.")
while scanner.peek.canContLabel:
discard scanner.advance()
return scanner.makeToken(tkLabel)
proc scanToken*(scanner: Scanner): Token =
scanner.skipWhitespace()
scanner.start = scanner.current
if scanner.isAtEnd():
return scanner.makeToken(tkEof)
let c = scanner.advance()
case c:
of '(': return scanner.makeToken(tkLeftParen)
of ')': return scanner.makeToken(tkRightParen)
of '{': return scanner.makeToken(tkLeftBrace)
of '}': return scanner.makeToken(tkRightBrace)
of '[': return scanner.makeToken(tkLeftBracket)
of ']': return scanner.makeToken(tkRightBracket)
of ';': return scanner.makeToken(tkSemicolon)
of ',': return scanner.makeToken(tkComma)
of '.': return scanner.makeToken(tkDot)
of '-': return scanner.makeToken(tkMinus)
of '+': return scanner.makeToken(tkPlus)
of '/': return scanner.makeToken(tkSlash)
of '*': return scanner.makeToken(tkStar)
of '#': return scanner.makeToken(tkHashtag)
of '!':
return if scanner.match('='): scanner.makeToken(tkBangEqual) else: scanner.makeToken(tkBang)
of '=':
return if scanner.match('='): scanner.makeToken(tkEqualEqual) else: scanner.makeToken(tkEqual)
of '<':
return if scanner.match('='): scanner.makeToken(tkLessEqual) else: scanner.makeToken(tkLess)
of '>':
return if scanner.match('='): scanner.makeToken(tkGreaterEqual) else: scanner.makeToken(tkGreater)
of '\"':
return scanner.scanString()
of Digits:
return scanner.scanNumber()
of '@':
if scanner.match('['): return scanner.makeToken(tkStartList)
elif scanner.match('{'): return scanner.makeToken(tkStartTable)
else: return scanner.scanLabel()
else:
if c.canStartIdent():
return scanner.scanIdentifier()
else:
return scanner.errorToken("Unexpected character.")