nondescript/src/ndspkg/scanner.nim

245 lines
7.3 KiB
Nim
Raw Normal View History

2022-01-20 21:54:11 +01:00
import strutils
import tables
import strformat
2022-02-08 10:10:07 +01:00
import unicode
2022-01-20 21:54:11 +01:00
type
Scanner* = ref object
start: int
current: int
line: int
source: string
TokenType* = enum
tkNone, # the default tokentype, if encountered anywhere, erroring out is the best course of action
2022-02-08 09:23:21 +01:00
tkLeftParen, tkRightParen, tkLeftBrace, tkRightBrace, tkComma, tkDot,
tkColon, tkDoublecolon, tkArrow,
2022-01-20 21:54:11 +01:00
tkMinus, tkPlus, tkSemicolon, tkSlash, tkStar, tkBang, tkBangEqual,
tkGreater, tkGreaterEqual, tkLess, tkLessEqual, tkEqual, tkEqualEqual,
2022-02-03 03:18:11 +01:00
tkStartList, tkStartTable, tkLeftBracket, tkRightBracket,
tkHashtag, tkAmpersand,
2022-01-20 21:54:11 +01:00
tkIdentifier, tkString,
2022-01-21 01:51:55 +01:00
tkNumber, tkAnd, tkElse, tkFalse, tkFor, tkFunct, tkGoto, tkIf, tkNil,
2022-02-07 05:35:07 +01:00
tkOr, tkLabel, tkBreak, tkTrue, tkVar, tkWhile,
2022-01-20 21:54:11 +01:00
tkError, tkEof
Token* = object
tokenType*: TokenType
text*: string
line*: int
proc debugPrint*(token: Token) =
write stdout, &"Token of type {$token.tokenType} [{token.text}] at line {$token.line}\n"
proc isAtEnd(scanner: Scanner): bool =
scanner.current > scanner.source.high
2022-02-08 10:10:07 +01:00
proc advance(scanner: Scanner): Rune =
scanner.source.fastRuneAt(scanner.current, result, doInc = true)
2022-01-20 21:54:11 +01:00
2022-02-08 10:10:07 +01:00
proc peek(scanner: Scanner): Rune =
2022-01-20 21:54:11 +01:00
if scanner.isAtEnd():
2022-02-08 10:10:07 +01:00
return "\0".runeAt(0)
2022-01-20 21:54:11 +01:00
else:
2022-02-08 10:10:07 +01:00
scanner.source.fastRuneAt(scanner.current, result, doInc = false)
2022-01-20 21:54:11 +01:00
2022-02-08 10:10:07 +01:00
proc peekNext(scanner: Scanner): Rune =
2022-01-20 21:54:11 +01:00
if scanner.current < scanner.source.high:
2022-02-08 10:10:07 +01:00
scanner.source.fastRuneAt(scanner.current + 1, result, doInc = false)
2022-01-20 21:54:11 +01:00
else:
2022-02-08 10:10:07 +01:00
return "\0".runeAt(0)
2022-01-20 21:54:11 +01:00
2022-02-08 10:10:07 +01:00
template `==`(l: char, r: Rune): bool =
($l).runeAt(0) == r
template `==`(l: Rune, r: char): bool =
($r).runeAt(0) == l
proc match(scanner: Scanner, exp: char | Rune): bool =
2022-01-20 21:54:11 +01:00
if scanner.peek() == exp:
discard scanner.advance()
true
else:
false
proc newScanner*(source: string): Scanner =
Scanner(source: source, line: 0, current: 0)
proc makeToken(scanner: Scanner, tokenType: TokenType): Token =
result.tokenType = tokenType
result.text = scanner.source[scanner.start..scanner.current-1]
result.line = scanner.line
proc errorToken(scanner: Scanner, msg: string): Token =
result.tokenType = tkError
result.text = msg
result.line = scanner.line
2022-02-08 10:10:07 +01:00
proc toChar(r: Rune): char =
## use only for matching runes in case statements
if r.size() > 1:
char(255) # never match this
else:
($r)[0]
2022-01-20 21:54:11 +01:00
proc skipWhitespace(scanner: Scanner) =
while true:
2022-02-08 10:10:07 +01:00
let c = scanner.peek().toChar()
2022-01-20 21:54:11 +01:00
case c:
of {' ', '\r', '\t'}:
discard scanner.advance()
of '\n':
scanner.line.inc
discard scanner.advance()
of '/':
if scanner.peekNext() == '/':
2022-02-08 10:10:07 +01:00
while not scanner.isAtEnd() and scanner.peek().toChar() != '\n' :
discard scanner.advance()
elif scanner.peekNext() == '*':
var depth = 1
while not scanner.isAtEnd():
2022-01-20 21:54:11 +01:00
discard scanner.advance()
2022-02-08 10:10:07 +01:00
if scanner.peek().toChar() == '/' and scanner.peekNext().toChar() == '*':
depth.inc
discard scanner.advance()
discard scanner.advance()
if scanner.peek().toChar() == '*' and scanner.peekNext().toChar() == '/':
depth.dec
discard scanner.advance()
discard scanner.advance()
if depth == 0:
break
2022-01-20 21:54:11 +01:00
else:
return
else:
return
proc scanString(scanner: Scanner): Token =
2022-02-08 10:10:07 +01:00
while not scanner.isAtEnd() and scanner.peek().toChar() != '\"' :
2022-01-20 21:54:11 +01:00
if scanner.peek() == '\n':
scanner.line.inc
discard scanner.advance()
if scanner.isAtEnd():
return scanner.errorToken("Unterminated string.")
discard scanner.advance()
scanner.makeToken(tkString)
proc scanNumber(scanner: Scanner): Token =
2022-02-08 10:10:07 +01:00
while scanner.peek().toChar() in Digits:
2022-01-20 21:54:11 +01:00
discard scanner.advance()
2022-02-08 10:10:07 +01:00
if scanner.peek().toChar() == '.' and scanner.peekNext().toChar() in Digits:
2022-01-20 21:54:11 +01:00
discard scanner.advance()
2022-02-08 10:10:07 +01:00
while scanner.peek().toChar() in Digits:
2022-01-20 21:54:11 +01:00
discard scanner.advance()
return scanner.makeToken(tkNumber)
const keywords = {
"and": tkAnd,
"break": tkBreak,
"else": tkElse,
"false": tkFalse,
"for": tkFor,
2022-02-09 06:58:51 +01:00
"proc": tkFunct,
2022-01-21 01:51:55 +01:00
# here's a language that uses funct... still waiting for the day when a good de-funct joke comes to my mind that I can abuse
"goto": tkGoto,
2022-01-20 21:54:11 +01:00
"if": tkIf,
"nil": tkNil,
"or": tkOr,
"true": tkTrue,
"var": tkVar,
2022-02-03 22:28:25 +01:00
"while": tkWhile,
2022-01-20 21:54:11 +01:00
}.toTable
2022-02-08 10:10:07 +01:00
proc canStartIdent(chr: Rune): bool =
chr.isAlpha() or chr.toChar() == '_'
2022-01-20 21:54:11 +01:00
2022-02-08 10:10:07 +01:00
proc canContIdent(chr: Rune): bool =
canStartIdent(chr) or chr.toChar() in Digits
2022-01-20 21:54:11 +01:00
proc scanIdentifier(scanner: Scanner): Token =
2022-02-08 10:10:07 +01:00
while scanner.peek().canContIdent():
2022-01-20 21:54:11 +01:00
discard scanner.advance()
let text = scanner.source[scanner.start..scanner.current-1]
if keywords.hasKey(text):
return scanner.makeToken(keywords[text])
return scanner.makeToken(tkIdentifier)
2022-02-08 10:10:07 +01:00
proc canContLabel(chr: Rune): bool =
chr.isAlpha() or chr.toChar() == '_'
2022-01-20 21:54:11 +01:00
proc scanLabel(scanner: Scanner): Token =
if not scanner.peek.canContLabel:
return scanner.errorToken("Labels must only contain letters and underscores.")
while scanner.peek.canContLabel:
discard scanner.advance()
return scanner.makeToken(tkLabel)
proc scanToken*(scanner: Scanner): Token =
scanner.skipWhitespace()
scanner.start = scanner.current
if scanner.isAtEnd():
return scanner.makeToken(tkEof)
2022-02-08 10:10:07 +01:00
let rune = scanner.advance()
let c = rune.toChar()
2022-01-20 21:54:11 +01:00
case c:
of '(': return scanner.makeToken(tkLeftParen)
of ')': return scanner.makeToken(tkRightParen)
of '{': return scanner.makeToken(tkLeftBrace)
of '}': return scanner.makeToken(tkRightBrace)
2022-02-03 03:18:11 +01:00
of '[': return scanner.makeToken(tkLeftBracket)
of ']': return scanner.makeToken(tkRightBracket)
2022-01-20 21:54:11 +01:00
of ';': return scanner.makeToken(tkSemicolon)
of ',': return scanner.makeToken(tkComma)
of '.': return scanner.makeToken(tkDot)
2022-02-08 09:23:21 +01:00
of '-':
if scanner.match('>'):
return scanner.makeToken(tkArrow)
else:
return scanner.makeToken(tkMinus)
2022-01-20 21:54:11 +01:00
of '+': return scanner.makeToken(tkPlus)
of '/': return scanner.makeToken(tkSlash)
of '*': return scanner.makeToken(tkStar)
2022-02-03 03:18:11 +01:00
of '#': return scanner.makeToken(tkHashtag)
of '&': return scanner.makeToken(tkAmpersand)
2022-01-20 21:54:11 +01:00
of '!':
return if scanner.match('='): scanner.makeToken(tkBangEqual) else: scanner.makeToken(tkBang)
of '=':
return if scanner.match('='): scanner.makeToken(tkEqualEqual) else: scanner.makeToken(tkEqual)
of '<':
return if scanner.match('='): scanner.makeToken(tkLessEqual) else: scanner.makeToken(tkLess)
of '>':
return if scanner.match('='): scanner.makeToken(tkGreaterEqual) else: scanner.makeToken(tkGreater)
of '\"':
return scanner.scanString()
of Digits:
return scanner.scanNumber()
of '@':
2022-02-03 03:18:11 +01:00
if scanner.match('['): return scanner.makeToken(tkStartList)
elif scanner.match('{'): return scanner.makeToken(tkStartTable)
else: return scanner.scanLabel()
2022-02-08 09:23:21 +01:00
of ':':
if scanner.match(':'): return scanner.makeToken(tkDoublecolon)
elif scanner.peek().canContIdent(): return scanner.scanIdentifier()
else: return scanner.makeToken(tkColon)
2022-01-20 21:54:11 +01:00
else:
2022-02-08 10:10:07 +01:00
if rune.canStartIdent():
2022-02-08 09:23:21 +01:00
# ':' can start ident, but is not handled here
return scanner.scanIdentifier()
2022-01-20 21:54:11 +01:00
else:
return scanner.errorToken("Unexpected character.")