2022-01-20 21:54:11 +01:00
|
|
|
import strutils
|
|
|
|
import tables
|
|
|
|
import strformat
|
|
|
|
|
|
|
|
type
|
|
|
|
Scanner* = ref object
|
|
|
|
start: int
|
|
|
|
current: int
|
|
|
|
line: int
|
|
|
|
source: string
|
|
|
|
|
|
|
|
TokenType* = enum
|
|
|
|
tkNone, # the default tokentype, if encountered anywhere, erroring out is the best course of action
|
|
|
|
tkLeftParen, tkRightParen, tkLeftBrace, tkRightBrace, tkComma, tkDot,
|
|
|
|
tkMinus, tkPlus, tkSemicolon, tkSlash, tkStar, tkBang, tkBangEqual,
|
|
|
|
tkGreater, tkGreaterEqual, tkLess, tkLessEqual, tkEqual, tkEqualEqual,
|
|
|
|
tkIdentifier, tkString,
|
2022-01-21 01:51:55 +01:00
|
|
|
tkNumber, tkAnd, tkElse, tkFalse, tkFor, tkFunct, tkGoto, tkIf, tkNil,
|
2022-01-20 21:54:11 +01:00
|
|
|
tkOr, tkPrint, tkLabel, tkBreak, tkTrue, tkVar, tkWhile,
|
|
|
|
tkError, tkEof
|
|
|
|
|
|
|
|
Token* = object
|
|
|
|
tokenType*: TokenType
|
|
|
|
text*: string
|
|
|
|
line*: int
|
|
|
|
|
|
|
|
proc debugPrint*(token: Token) =
|
|
|
|
write stdout, &"Token of type {$token.tokenType} [{token.text}] at line {$token.line}\n"
|
|
|
|
|
|
|
|
proc isAtEnd(scanner: Scanner): bool =
|
|
|
|
scanner.current > scanner.source.high
|
|
|
|
|
|
|
|
proc advance(scanner: Scanner): char =
|
|
|
|
scanner.current.inc
|
|
|
|
scanner.source[scanner.current - 1]
|
|
|
|
|
|
|
|
proc peek(scanner: Scanner): char =
|
|
|
|
if scanner.isAtEnd():
|
|
|
|
'\0'
|
|
|
|
else:
|
|
|
|
scanner.source[scanner.current]
|
|
|
|
|
|
|
|
proc peekNext(scanner: Scanner): char =
|
|
|
|
if scanner.current < scanner.source.high:
|
|
|
|
scanner.source[scanner.current + 1]
|
|
|
|
else:
|
|
|
|
'\0'
|
|
|
|
|
|
|
|
proc match(scanner: Scanner, exp: char): bool =
|
|
|
|
if scanner.peek() == exp:
|
|
|
|
discard scanner.advance()
|
|
|
|
true
|
|
|
|
else:
|
|
|
|
false
|
|
|
|
|
|
|
|
proc newScanner*(source: string): Scanner =
|
|
|
|
Scanner(source: source, line: 0, current: 0)
|
|
|
|
|
|
|
|
proc makeToken(scanner: Scanner, tokenType: TokenType): Token =
|
|
|
|
result.tokenType = tokenType
|
|
|
|
result.text = scanner.source[scanner.start..scanner.current-1]
|
|
|
|
result.line = scanner.line
|
|
|
|
|
|
|
|
proc errorToken(scanner: Scanner, msg: string): Token =
|
|
|
|
result.tokenType = tkError
|
|
|
|
result.text = msg
|
|
|
|
result.line = scanner.line
|
|
|
|
|
|
|
|
proc skipWhitespace(scanner: Scanner) =
|
|
|
|
while true:
|
|
|
|
let c = scanner.peek()
|
|
|
|
case c:
|
|
|
|
of {' ', '\r', '\t'}:
|
|
|
|
discard scanner.advance()
|
|
|
|
of '\n':
|
|
|
|
scanner.line.inc
|
|
|
|
discard scanner.advance()
|
|
|
|
of '/':
|
|
|
|
if scanner.peekNext() == '/':
|
2022-01-28 04:17:11 +01:00
|
|
|
while not scanner.isAtEnd() and scanner.peek != '\n' :
|
2022-01-20 21:54:11 +01:00
|
|
|
discard scanner.advance()
|
|
|
|
else:
|
|
|
|
return
|
|
|
|
else:
|
|
|
|
return
|
|
|
|
|
|
|
|
proc scanString(scanner: Scanner): Token =
|
2022-01-28 04:17:11 +01:00
|
|
|
while not scanner.isAtEnd() and scanner.peek() != '\"' :
|
2022-01-20 21:54:11 +01:00
|
|
|
if scanner.peek() == '\n':
|
|
|
|
scanner.line.inc
|
|
|
|
discard scanner.advance()
|
|
|
|
|
|
|
|
if scanner.isAtEnd():
|
|
|
|
return scanner.errorToken("Unterminated string.")
|
|
|
|
|
|
|
|
discard scanner.advance()
|
|
|
|
scanner.makeToken(tkString)
|
|
|
|
|
|
|
|
proc scanNumber(scanner: Scanner): Token =
|
|
|
|
while scanner.peek() in Digits:
|
|
|
|
discard scanner.advance()
|
|
|
|
|
|
|
|
if scanner.peek() == '.' and scanner.peekNext() in Digits:
|
|
|
|
discard scanner.advance()
|
|
|
|
while scanner.peek() in Digits:
|
|
|
|
discard scanner.advance()
|
|
|
|
|
|
|
|
return scanner.makeToken(tkNumber)
|
|
|
|
|
|
|
|
const keywords = {
|
|
|
|
"and": tkAnd,
|
|
|
|
"break": tkBreak,
|
|
|
|
"else": tkElse,
|
|
|
|
"false": tkFalse,
|
|
|
|
"for": tkFor,
|
2022-01-21 01:51:55 +01:00
|
|
|
"funct": tkFunct,
|
|
|
|
# here's a language that uses funct... still waiting for the day when a good de-funct joke comes to my mind that I can abuse
|
|
|
|
"goto": tkGoto,
|
2022-01-20 21:54:11 +01:00
|
|
|
"if": tkIf,
|
|
|
|
"nil": tkNil,
|
|
|
|
"or": tkOr,
|
|
|
|
"print": tkPrint,
|
|
|
|
"true": tkTrue,
|
|
|
|
"var": tkVar,
|
|
|
|
"while": tkWhile
|
|
|
|
}.toTable
|
|
|
|
|
|
|
|
proc canStartIdent(chr: char): bool =
|
|
|
|
chr in Letters or chr in {'_', '^'}
|
|
|
|
|
|
|
|
proc canContIdent(chr: char): bool =
|
|
|
|
canStartIdent(chr) or chr in Digits
|
|
|
|
|
|
|
|
proc scanIdentifier(scanner: Scanner): Token =
|
|
|
|
while scanner.peek.canContIdent:
|
|
|
|
discard scanner.advance()
|
|
|
|
|
|
|
|
let text = scanner.source[scanner.start..scanner.current-1]
|
|
|
|
|
|
|
|
if keywords.hasKey(text):
|
|
|
|
return scanner.makeToken(keywords[text])
|
|
|
|
|
|
|
|
return scanner.makeToken(tkIdentifier)
|
|
|
|
|
|
|
|
proc canContLabel(chr: char): bool =
|
|
|
|
chr in Letters or chr == '_'
|
|
|
|
|
|
|
|
proc scanLabel(scanner: Scanner): Token =
|
|
|
|
if not scanner.peek.canContLabel:
|
|
|
|
return scanner.errorToken("Labels must only contain letters and underscores.")
|
|
|
|
|
|
|
|
while scanner.peek.canContLabel:
|
|
|
|
discard scanner.advance()
|
|
|
|
|
|
|
|
return scanner.makeToken(tkLabel)
|
|
|
|
|
|
|
|
proc scanToken*(scanner: Scanner): Token =
|
|
|
|
|
|
|
|
scanner.skipWhitespace()
|
|
|
|
scanner.start = scanner.current
|
|
|
|
|
|
|
|
if scanner.isAtEnd():
|
|
|
|
return scanner.makeToken(tkEof)
|
|
|
|
|
|
|
|
let c = scanner.advance()
|
|
|
|
|
|
|
|
case c:
|
|
|
|
of '(': return scanner.makeToken(tkLeftParen)
|
|
|
|
of ')': return scanner.makeToken(tkRightParen)
|
|
|
|
of '{': return scanner.makeToken(tkLeftBrace)
|
|
|
|
of '}': return scanner.makeToken(tkRightBrace)
|
|
|
|
of ';': return scanner.makeToken(tkSemicolon)
|
|
|
|
of ',': return scanner.makeToken(tkComma)
|
|
|
|
of '.': return scanner.makeToken(tkDot)
|
|
|
|
of '-': return scanner.makeToken(tkMinus)
|
|
|
|
of '+': return scanner.makeToken(tkPlus)
|
|
|
|
of '/': return scanner.makeToken(tkSlash)
|
|
|
|
of '*': return scanner.makeToken(tkStar)
|
|
|
|
of '!':
|
|
|
|
return if scanner.match('='): scanner.makeToken(tkBangEqual) else: scanner.makeToken(tkBang)
|
|
|
|
of '=':
|
|
|
|
return if scanner.match('='): scanner.makeToken(tkEqualEqual) else: scanner.makeToken(tkEqual)
|
|
|
|
of '<':
|
|
|
|
return if scanner.match('='): scanner.makeToken(tkLessEqual) else: scanner.makeToken(tkLess)
|
|
|
|
of '>':
|
|
|
|
return if scanner.match('='): scanner.makeToken(tkGreaterEqual) else: scanner.makeToken(tkGreater)
|
|
|
|
of '\"':
|
|
|
|
return scanner.scanString()
|
|
|
|
of Digits:
|
|
|
|
return scanner.scanNumber()
|
|
|
|
of '@':
|
|
|
|
return scanner.scanLabel()
|
|
|
|
else:
|
|
|
|
if c.canStartIdent():
|
|
|
|
return scanner.scanIdentifier()
|
|
|
|
else:
|
|
|
|
return scanner.errorToken("Unexpected character.")
|