# A simple tokenizer implementation with one character of lookahead. # This module has been designed to be easily extendible in its functionality # given that JAPL is in a state of high activity and many features are # being added along the way. To add support for a new keyword, just create # an appropriate TokenType entry in the enum in the file at meta/tokenotype.nim # and then add it to the constant RESERVED table. A similar approach applies for # other tokens, but multi-character ones require more tweaking import system import strutils import strformat import tables import common import meta/tokentype import meta/tokenobject import meta/valueobject const TOKENS = to_table({ '(': TokenType.LP, ')': TokenType.RP, '{': TokenType.LB, '}': TokenType.RB, '.': TokenType.DOT, ',': TokenType.COMMA, '-': TokenType.MINUS, '+': TokenType.PLUS, ';': TokenType.SEMICOLON, '*': TokenType.STAR, '>': TokenType.GT, '<': TokenType.LT, '=': TokenType.EQ, '!': TokenType.NEG, '/': TokenType.SLASH, '%': TokenType.MOD, '[': TokenType.LS, ']': TokenType.RS, ':': TokenType.COLON}) const RESERVED = to_table({ "or": TokenType.OR, "and": TokenType.AND, "class": TokenType.CLASS, "fun": TokenType.FUN, "if": TokenType.IF, "else": TokenType.ELSE, "for": TokenType.FOR, "while": TokenType.WHILE, "var": TokenType.VAR, "nil": TokenType.NIL, "true": TokenType.TRUE, "false": TokenType.FALSE, "return": TokenType.RETURN, "this": TokenType.THIS, "super": TokenType.SUPER, "del": TokenType.DEL, "break": TokenType.BREAK, "continue": TokenType.CONTINUE}) proc initLexer*(source: string): Lexer = result = Lexer(source: source, tokens: @[], line: 1, start: 0, current: 0, errored: false) proc done(self: Lexer): bool = result = self.current >= self.source.len proc step(self: var Lexer): char = if self.done(): return '\0' self.current = self.current + 1 result = self.source[self.current - 1] proc peek(self: Lexer): char = if self.done(): result = '\0' else: result = self.source[self.current] proc match(self: var Lexer, what: char): bool = if self.done(): return false elif self.peek() != what: return false self.current = self.current + 1 return true proc peekNext(self: Lexer): char = if self.current + 1 >= self.source.len: result = '\0' else: result = self.source[self.current + 1] proc createToken(self: var Lexer, tokenType: TokenType, literal: Value): Token = result = Token(kind: tokenType, lexeme: self.source[self.start..' and self.match('='): self.tokens.add(self.createToken(GE, ">=".asStr())) elif single == '<' and self.match('='): self.tokens.add(self.createToken(LE, "<=".asStr())) elif single == '!' and self.match('='): self.tokens.add(self.createToken(NE, "!=".asStr())) elif single == '*' and self.match('*'): self.tokens.add(self.createToken(POW, "**".asStr())) else: self.tokens.add(self.createToken(TOKENS[single], asStr(&"{single}"))) else: self.errored = true echo &"SyntaxError: Unexpected character '{single}' at line {self.line}" proc lex*(self: var Lexer): seq[Token] = while not self.done(): self.start = self.current self.scanToken() self.tokens.add(Token(kind: EOF, lexeme: "EOF", literal: Value(kind: ValueTypes.NIL), line: self.line)) return self.tokens