245 lines
7.3 KiB
Nim
245 lines
7.3 KiB
Nim
import strutils
|
|
import tables
|
|
import strformat
|
|
import unicode
|
|
|
|
type
|
|
Scanner* = ref object
|
|
start: int
|
|
current: int
|
|
line: int
|
|
source: string
|
|
|
|
TokenType* = enum
|
|
tkNone, # the default tokentype, if encountered anywhere, erroring out is the best course of action
|
|
tkLeftParen, tkRightParen, tkLeftBrace, tkRightBrace, tkComma, tkDot,
|
|
tkColon, tkDoublecolon, tkArrow,
|
|
tkMinus, tkPlus, tkSemicolon, tkSlash, tkStar, tkBang, tkBangEqual,
|
|
tkGreater, tkGreaterEqual, tkLess, tkLessEqual, tkEqual, tkEqualEqual,
|
|
tkStartList, tkStartTable, tkLeftBracket, tkRightBracket,
|
|
tkHashtag, tkAmpersand,
|
|
tkIdentifier, tkString,
|
|
tkNumber, tkAnd, tkElse, tkFalse, tkFor, tkFunct, tkGoto, tkIf, tkNil,
|
|
tkOr, tkLabel, tkBreak, tkTrue, tkVar, tkWhile,
|
|
tkError, tkEof
|
|
|
|
Token* = object
|
|
tokenType*: TokenType
|
|
text*: string
|
|
line*: int
|
|
|
|
proc debugPrint*(token: Token) =
|
|
write stdout, &"Token of type {$token.tokenType} [{token.text}] at line {$token.line}\n"
|
|
|
|
proc isAtEnd(scanner: Scanner): bool =
|
|
scanner.current > scanner.source.high
|
|
|
|
proc advance(scanner: Scanner): Rune =
|
|
scanner.source.fastRuneAt(scanner.current, result, doInc = true)
|
|
|
|
proc peek(scanner: Scanner): Rune =
|
|
if scanner.isAtEnd():
|
|
return "\0".runeAt(0)
|
|
else:
|
|
scanner.source.fastRuneAt(scanner.current, result, doInc = false)
|
|
|
|
proc peekNext(scanner: Scanner): Rune =
|
|
if scanner.current < scanner.source.high:
|
|
scanner.source.fastRuneAt(scanner.current + 1, result, doInc = false)
|
|
else:
|
|
return "\0".runeAt(0)
|
|
|
|
template `==`(l: char, r: Rune): bool =
|
|
($l).runeAt(0) == r
|
|
|
|
template `==`(l: Rune, r: char): bool =
|
|
($r).runeAt(0) == l
|
|
|
|
proc match(scanner: Scanner, exp: char | Rune): bool =
|
|
if scanner.peek() == exp:
|
|
discard scanner.advance()
|
|
true
|
|
else:
|
|
false
|
|
|
|
proc newScanner*(source: string): Scanner =
|
|
Scanner(source: source, line: 0, current: 0)
|
|
|
|
proc makeToken(scanner: Scanner, tokenType: TokenType): Token =
|
|
result.tokenType = tokenType
|
|
result.text = scanner.source[scanner.start..scanner.current-1]
|
|
result.line = scanner.line
|
|
|
|
proc errorToken(scanner: Scanner, msg: string): Token =
|
|
result.tokenType = tkError
|
|
result.text = msg
|
|
result.line = scanner.line
|
|
|
|
|
|
|
|
proc toChar(r: Rune): char =
|
|
## use only for matching runes in case statements
|
|
if r.size() > 1:
|
|
char(255) # never match this
|
|
else:
|
|
($r)[0]
|
|
|
|
proc skipWhitespace(scanner: Scanner) =
|
|
while true:
|
|
let c = scanner.peek().toChar()
|
|
case c:
|
|
of {' ', '\r', '\t'}:
|
|
discard scanner.advance()
|
|
of '\n':
|
|
scanner.line.inc
|
|
discard scanner.advance()
|
|
of '/':
|
|
if scanner.peekNext() == '/':
|
|
while not scanner.isAtEnd() and scanner.peek().toChar() != '\n' :
|
|
discard scanner.advance()
|
|
elif scanner.peekNext() == '*':
|
|
var depth = 1
|
|
while not scanner.isAtEnd():
|
|
discard scanner.advance()
|
|
if scanner.peek().toChar() == '/' and scanner.peekNext().toChar() == '*':
|
|
depth.inc
|
|
discard scanner.advance()
|
|
discard scanner.advance()
|
|
if scanner.peek().toChar() == '*' and scanner.peekNext().toChar() == '/':
|
|
depth.dec
|
|
discard scanner.advance()
|
|
discard scanner.advance()
|
|
if depth == 0:
|
|
break
|
|
else:
|
|
return
|
|
else:
|
|
return
|
|
|
|
proc scanString(scanner: Scanner): Token =
|
|
while not scanner.isAtEnd() and scanner.peek().toChar() != '\"' :
|
|
if scanner.peek() == '\n':
|
|
scanner.line.inc
|
|
discard scanner.advance()
|
|
|
|
if scanner.isAtEnd():
|
|
return scanner.errorToken("Unterminated string.")
|
|
|
|
discard scanner.advance()
|
|
scanner.makeToken(tkString)
|
|
|
|
proc scanNumber(scanner: Scanner): Token =
|
|
while scanner.peek().toChar() in Digits:
|
|
discard scanner.advance()
|
|
|
|
if scanner.peek().toChar() == '.' and scanner.peekNext().toChar() in Digits:
|
|
discard scanner.advance()
|
|
while scanner.peek().toChar() in Digits:
|
|
discard scanner.advance()
|
|
|
|
return scanner.makeToken(tkNumber)
|
|
|
|
const keywords = {
|
|
"and": tkAnd,
|
|
"break": tkBreak,
|
|
"else": tkElse,
|
|
"false": tkFalse,
|
|
"for": tkFor,
|
|
"funct": tkFunct,
|
|
# here's a language that uses funct... still waiting for the day when a good de-funct joke comes to my mind that I can abuse
|
|
"goto": tkGoto,
|
|
"if": tkIf,
|
|
"nil": tkNil,
|
|
"or": tkOr,
|
|
"true": tkTrue,
|
|
"var": tkVar,
|
|
"while": tkWhile,
|
|
}.toTable
|
|
|
|
proc canStartIdent(chr: Rune): bool =
|
|
chr.isAlpha() or chr.toChar() == '_'
|
|
|
|
proc canContIdent(chr: Rune): bool =
|
|
canStartIdent(chr) or chr.toChar() in Digits
|
|
|
|
proc scanIdentifier(scanner: Scanner): Token =
|
|
while scanner.peek().canContIdent():
|
|
discard scanner.advance()
|
|
|
|
let text = scanner.source[scanner.start..scanner.current-1]
|
|
|
|
if keywords.hasKey(text):
|
|
return scanner.makeToken(keywords[text])
|
|
|
|
return scanner.makeToken(tkIdentifier)
|
|
|
|
proc canContLabel(chr: Rune): bool =
|
|
chr.isAlpha() or chr.toChar() == '_'
|
|
|
|
proc scanLabel(scanner: Scanner): Token =
|
|
if not scanner.peek.canContLabel:
|
|
return scanner.errorToken("Labels must only contain letters and underscores.")
|
|
|
|
while scanner.peek.canContLabel:
|
|
discard scanner.advance()
|
|
|
|
return scanner.makeToken(tkLabel)
|
|
|
|
proc scanToken*(scanner: Scanner): Token =
|
|
|
|
scanner.skipWhitespace()
|
|
scanner.start = scanner.current
|
|
|
|
if scanner.isAtEnd():
|
|
return scanner.makeToken(tkEof)
|
|
|
|
let rune = scanner.advance()
|
|
let c = rune.toChar()
|
|
|
|
case c:
|
|
of '(': return scanner.makeToken(tkLeftParen)
|
|
of ')': return scanner.makeToken(tkRightParen)
|
|
of '{': return scanner.makeToken(tkLeftBrace)
|
|
of '}': return scanner.makeToken(tkRightBrace)
|
|
of '[': return scanner.makeToken(tkLeftBracket)
|
|
of ']': return scanner.makeToken(tkRightBracket)
|
|
of ';': return scanner.makeToken(tkSemicolon)
|
|
of ',': return scanner.makeToken(tkComma)
|
|
of '.': return scanner.makeToken(tkDot)
|
|
of '-':
|
|
if scanner.match('>'):
|
|
return scanner.makeToken(tkArrow)
|
|
else:
|
|
return scanner.makeToken(tkMinus)
|
|
of '+': return scanner.makeToken(tkPlus)
|
|
of '/': return scanner.makeToken(tkSlash)
|
|
of '*': return scanner.makeToken(tkStar)
|
|
of '#': return scanner.makeToken(tkHashtag)
|
|
of '&': return scanner.makeToken(tkAmpersand)
|
|
of '!':
|
|
return if scanner.match('='): scanner.makeToken(tkBangEqual) else: scanner.makeToken(tkBang)
|
|
of '=':
|
|
return if scanner.match('='): scanner.makeToken(tkEqualEqual) else: scanner.makeToken(tkEqual)
|
|
of '<':
|
|
return if scanner.match('='): scanner.makeToken(tkLessEqual) else: scanner.makeToken(tkLess)
|
|
of '>':
|
|
return if scanner.match('='): scanner.makeToken(tkGreaterEqual) else: scanner.makeToken(tkGreater)
|
|
of '\"':
|
|
return scanner.scanString()
|
|
of Digits:
|
|
return scanner.scanNumber()
|
|
of '@':
|
|
if scanner.match('['): return scanner.makeToken(tkStartList)
|
|
elif scanner.match('{'): return scanner.makeToken(tkStartTable)
|
|
else: return scanner.scanLabel()
|
|
of ':':
|
|
if scanner.match(':'): return scanner.makeToken(tkDoublecolon)
|
|
elif scanner.peek().canContIdent(): return scanner.scanIdentifier()
|
|
else: return scanner.makeToken(tkColon)
|
|
else:
|
|
if rune.canStartIdent():
|
|
# ':' can start ident, but is not handled here
|
|
return scanner.scanIdentifier()
|
|
else:
|
|
return scanner.errorToken("Unexpected character.")
|