import strutils import tables import strformat type Scanner* = ref object start: int current: int line: int source: string TokenType* = enum tkNone, # the default tokentype, if encountered anywhere, erroring out is the best course of action tkLeftParen, tkRightParen, tkLeftBrace, tkRightBrace, tkComma, tkDot, tkMinus, tkPlus, tkSemicolon, tkSlash, tkStar, tkBang, tkBangEqual, tkGreater, tkGreaterEqual, tkLess, tkLessEqual, tkEqual, tkEqualEqual, tkIdentifier, tkString, tkNumber, tkAnd, tkElse, tkFalse, tkFor, tkFunct, tkGoto, tkIf, tkNil, tkOr, tkPrint, tkLabel, tkBreak, tkTrue, tkVar, tkWhile, tkError, tkEof Token* = object tokenType*: TokenType text*: string line*: int proc debugPrint*(token: Token) = write stdout, &"Token of type {$token.tokenType} [{token.text}] at line {$token.line}\n" proc isAtEnd(scanner: Scanner): bool = scanner.current > scanner.source.high proc advance(scanner: Scanner): char = scanner.current.inc scanner.source[scanner.current - 1] proc peek(scanner: Scanner): char = if scanner.isAtEnd(): '\0' else: scanner.source[scanner.current] proc peekNext(scanner: Scanner): char = if scanner.current < scanner.source.high: scanner.source[scanner.current + 1] else: '\0' proc match(scanner: Scanner, exp: char): bool = if scanner.peek() == exp: discard scanner.advance() true else: false proc newScanner*(source: string): Scanner = Scanner(source: source, line: 0, current: 0) proc makeToken(scanner: Scanner, tokenType: TokenType): Token = result.tokenType = tokenType result.text = scanner.source[scanner.start..scanner.current-1] result.line = scanner.line proc errorToken(scanner: Scanner, msg: string): Token = result.tokenType = tkError result.text = msg result.line = scanner.line proc skipWhitespace(scanner: Scanner) = while true: let c = scanner.peek() case c: of {' ', '\r', '\t'}: discard scanner.advance() of '\n': scanner.line.inc discard scanner.advance() of '/': if scanner.peekNext() == '/': while scanner.peek != '\n' and not scanner.isAtEnd(): discard scanner.advance() else: return else: return proc scanString(scanner: Scanner): Token = while scanner.peek() != '\"' and not scanner.isAtEnd(): if scanner.peek() == '\n': scanner.line.inc discard scanner.advance() if scanner.isAtEnd(): return scanner.errorToken("Unterminated string.") discard scanner.advance() scanner.makeToken(tkString) proc scanNumber(scanner: Scanner): Token = while scanner.peek() in Digits: discard scanner.advance() if scanner.peek() == '.' and scanner.peekNext() in Digits: discard scanner.advance() while scanner.peek() in Digits: discard scanner.advance() return scanner.makeToken(tkNumber) const keywords = { "and": tkAnd, "break": tkBreak, "else": tkElse, "false": tkFalse, "for": tkFor, "funct": tkFunct, # here's a language that uses funct... still waiting for the day when a good de-funct joke comes to my mind that I can abuse "goto": tkGoto, "if": tkIf, "nil": tkNil, "or": tkOr, "print": tkPrint, "true": tkTrue, "var": tkVar, "while": tkWhile }.toTable proc canStartIdent(chr: char): bool = chr in Letters or chr in {'_', '^'} proc canContIdent(chr: char): bool = canStartIdent(chr) or chr in Digits proc scanIdentifier(scanner: Scanner): Token = while scanner.peek.canContIdent: discard scanner.advance() let text = scanner.source[scanner.start..scanner.current-1] if keywords.hasKey(text): return scanner.makeToken(keywords[text]) return scanner.makeToken(tkIdentifier) proc canContLabel(chr: char): bool = chr in Letters or chr == '_' proc scanLabel(scanner: Scanner): Token = if not scanner.peek.canContLabel: return scanner.errorToken("Labels must only contain letters and underscores.") while scanner.peek.canContLabel: discard scanner.advance() return scanner.makeToken(tkLabel) proc scanToken*(scanner: Scanner): Token = scanner.skipWhitespace() scanner.start = scanner.current if scanner.isAtEnd(): return scanner.makeToken(tkEof) let c = scanner.advance() case c: of '(': return scanner.makeToken(tkLeftParen) of ')': return scanner.makeToken(tkRightParen) of '{': return scanner.makeToken(tkLeftBrace) of '}': return scanner.makeToken(tkRightBrace) of ';': return scanner.makeToken(tkSemicolon) of ',': return scanner.makeToken(tkComma) of '.': return scanner.makeToken(tkDot) of '-': return scanner.makeToken(tkMinus) of '+': return scanner.makeToken(tkPlus) of '/': return scanner.makeToken(tkSlash) of '*': return scanner.makeToken(tkStar) of '!': return if scanner.match('='): scanner.makeToken(tkBangEqual) else: scanner.makeToken(tkBang) of '=': return if scanner.match('='): scanner.makeToken(tkEqualEqual) else: scanner.makeToken(tkEqual) of '<': return if scanner.match('='): scanner.makeToken(tkLessEqual) else: scanner.makeToken(tkLess) of '>': return if scanner.match('='): scanner.makeToken(tkGreaterEqual) else: scanner.makeToken(tkGreater) of '\"': return scanner.scanString() of Digits: return scanner.scanNumber() of '@': return scanner.scanLabel() else: if c.canStartIdent(): return scanner.scanIdentifier() else: return scanner.errorToken("Unexpected character.")