import strutils import tables import strformat import unicode type Scanner* = ref object start: int current: int line: int source: string TokenType* = enum tkNone, # the default tokentype, if encountered anywhere, erroring out is the best course of action tkLeftParen, tkRightParen, tkLeftBrace, tkRightBrace, tkComma, tkDot, tkColon, tkDoublecolon, tkArrow, tkMinus, tkPlus, tkSemicolon, tkSlash, tkStar, tkBang, tkBangEqual, tkGreater, tkGreaterEqual, tkLess, tkLessEqual, tkEqual, tkEqualEqual, tkStartList, tkStartTable, tkLeftBracket, tkRightBracket, tkHashtag, tkAmpersand, tkIdentifier, tkString, tkNumber, tkAnd, tkElse, tkFalse, tkFor, tkFunct, tkGoto, tkIf, tkNil, tkOr, tkLabel, tkBreak, tkTrue, tkVar, tkWhile, tkError, tkEof Token* = object tokenType*: TokenType text*: string line*: int proc debugPrint*(token: Token) = write stdout, &"Token of type {$token.tokenType} [{token.text}] at line {$token.line}\n" proc isAtEnd(scanner: Scanner): bool = scanner.current > scanner.source.high proc advance(scanner: Scanner): Rune = scanner.source.fastRuneAt(scanner.current, result, doInc = true) proc peek(scanner: Scanner): Rune = if scanner.isAtEnd(): return "\0".runeAt(0) else: scanner.source.fastRuneAt(scanner.current, result, doInc = false) proc peekNext(scanner: Scanner): Rune = if scanner.current < scanner.source.high: scanner.source.fastRuneAt(scanner.current + 1, result, doInc = false) else: return "\0".runeAt(0) template `==`(l: char, r: Rune): bool = ($l).runeAt(0) == r template `==`(l: Rune, r: char): bool = ($r).runeAt(0) == l proc match(scanner: Scanner, exp: char | Rune): bool = if scanner.peek() == exp: discard scanner.advance() true else: false proc newScanner*(source: string): Scanner = Scanner(source: source, line: 0, current: 0) proc makeToken(scanner: Scanner, tokenType: TokenType): Token = result.tokenType = tokenType result.text = scanner.source[scanner.start..scanner.current-1] result.line = scanner.line proc errorToken(scanner: Scanner, msg: string): Token = result.tokenType = tkError result.text = msg result.line = scanner.line proc toChar(r: Rune): char = ## use only for matching runes in case statements if r.size() > 1: char(255) # never match this else: ($r)[0] proc skipWhitespace(scanner: Scanner) = while true: let c = scanner.peek().toChar() case c: of {' ', '\r', '\t'}: discard scanner.advance() of '\n': scanner.line.inc discard scanner.advance() of '/': if scanner.peekNext() == '/': while not scanner.isAtEnd() and scanner.peek().toChar() != '\n' : discard scanner.advance() elif scanner.peekNext() == '*': var depth = 1 while not scanner.isAtEnd(): discard scanner.advance() if scanner.peek().toChar() == '/' and scanner.peekNext().toChar() == '*': depth.inc discard scanner.advance() discard scanner.advance() if scanner.peek().toChar() == '*' and scanner.peekNext().toChar() == '/': depth.dec discard scanner.advance() discard scanner.advance() if depth == 0: break else: return else: return proc scanString(scanner: Scanner): Token = while not scanner.isAtEnd() and scanner.peek().toChar() != '\"' : if scanner.peek() == '\n': scanner.line.inc discard scanner.advance() if scanner.isAtEnd(): return scanner.errorToken("Unterminated string.") discard scanner.advance() scanner.makeToken(tkString) proc scanNumber(scanner: Scanner): Token = while scanner.peek().toChar() in Digits: discard scanner.advance() if scanner.peek().toChar() == '.' and scanner.peekNext().toChar() in Digits: discard scanner.advance() while scanner.peek().toChar() in Digits: discard scanner.advance() return scanner.makeToken(tkNumber) const keywords = { "and": tkAnd, "break": tkBreak, "else": tkElse, "false": tkFalse, "for": tkFor, "proc": tkFunct, # here's a language that uses funct... still waiting for the day when a good de-funct joke comes to my mind that I can abuse "goto": tkGoto, "if": tkIf, "nil": tkNil, "or": tkOr, "true": tkTrue, "var": tkVar, "while": tkWhile, }.toTable proc canStartIdent(chr: Rune): bool = chr.isAlpha() or chr.toChar() == '_' proc canContIdent(chr: Rune): bool = canStartIdent(chr) or chr.toChar() in Digits proc scanIdentifier(scanner: Scanner): Token = while scanner.peek().canContIdent(): discard scanner.advance() let text = scanner.source[scanner.start..scanner.current-1] if keywords.hasKey(text): return scanner.makeToken(keywords[text]) return scanner.makeToken(tkIdentifier) proc canContLabel(chr: Rune): bool = chr.isAlpha() or chr.toChar() == '_' proc scanLabel(scanner: Scanner): Token = if not scanner.peek.canContLabel: return scanner.errorToken("Labels must only contain letters and underscores.") while scanner.peek.canContLabel: discard scanner.advance() return scanner.makeToken(tkLabel) proc scanToken*(scanner: Scanner): Token = scanner.skipWhitespace() scanner.start = scanner.current if scanner.isAtEnd(): return scanner.makeToken(tkEof) let rune = scanner.advance() let c = rune.toChar() case c: of '(': return scanner.makeToken(tkLeftParen) of ')': return scanner.makeToken(tkRightParen) of '{': return scanner.makeToken(tkLeftBrace) of '}': return scanner.makeToken(tkRightBrace) of '[': return scanner.makeToken(tkLeftBracket) of ']': return scanner.makeToken(tkRightBracket) of ';': return scanner.makeToken(tkSemicolon) of ',': return scanner.makeToken(tkComma) of '.': return scanner.makeToken(tkDot) of '-': if scanner.match('>'): return scanner.makeToken(tkArrow) else: return scanner.makeToken(tkMinus) of '+': return scanner.makeToken(tkPlus) of '/': return scanner.makeToken(tkSlash) of '*': return scanner.makeToken(tkStar) of '#': return scanner.makeToken(tkHashtag) of '&': return scanner.makeToken(tkAmpersand) of '!': return if scanner.match('='): scanner.makeToken(tkBangEqual) else: scanner.makeToken(tkBang) of '=': return if scanner.match('='): scanner.makeToken(tkEqualEqual) else: scanner.makeToken(tkEqual) of '<': return if scanner.match('='): scanner.makeToken(tkLessEqual) else: scanner.makeToken(tkLess) of '>': return if scanner.match('='): scanner.makeToken(tkGreaterEqual) else: scanner.makeToken(tkGreater) of '\"': return scanner.scanString() of Digits: return scanner.scanNumber() of '@': if scanner.match('['): return scanner.makeToken(tkStartList) elif scanner.match('{'): return scanner.makeToken(tkStartTable) else: return scanner.scanLabel() of ':': if scanner.match(':'): return scanner.makeToken(tkDoublecolon) elif scanner.peek().canContIdent(): return scanner.scanIdentifier() else: return scanner.makeToken(tkColon) else: if rune.canStartIdent(): # ':' can start ident, but is not handled here return scanner.scanIdentifier() else: return scanner.errorToken("Unexpected character.")