japl/nim/lexer.nim

196 lines
6.3 KiB
Nim
Raw Normal View History

2020-08-05 16:16:12 +02:00
import tables
import meta/tokentype
import meta/tokenobject
import meta/exceptions
2020-08-05 17:50:29 +02:00
import meta/valueobject
2020-08-05 16:16:12 +02:00
import system
2020-08-05 18:45:14 +02:00
import strutils
import strformat
2020-08-05 16:16:12 +02:00
const TOKENS = to_table({
"(": TokenType.LP, ")": TokenType.RP,
"{": TokenType.LB, "}": TokenType.RB,
".": TokenType.DOT, ",": TokenType.COMMA,
"-": TokenType.MINUS, "+": TokenType.PLUS,
";": TokenType.SEMICOLON, "*": TokenType.STAR,
">": TokenType.GT, "<": TokenType.LT,
"=": TokenType.EQ, "!": TokenType.NEG,
2020-08-06 14:51:50 +02:00
"/": TokenType.SLASH, "%": TokenType.MOD,
"[": TokenType.LS, "]": TokenType.RS})
2020-08-05 16:16:12 +02:00
const RESERVED = to_table({
"or": TokenType.OR, "and": TokenType.AND,
"class": TokenType.CLASS, "fun": TokenType.FUN,
"if": TokenType.IF, "else": TokenType.ELSE,
"for": TokenType.FOR, "while": TokenType.WHILE,
"var": TokenType.VAR, "nil": TokenType.NIL,
"true": TokenType.TRUE, "false": TokenType.FALSE,
"return": TokenType.RETURN,
"this": TokenType.THIS, "super": TokenType.SUPER,
"del": TokenType.DEL, "break": TokenType.BREAK})
type Lexer* = object
source: string
tokens: seq[Token]
line: int
start: int
current: int
2020-08-05 17:50:29 +02:00
proc initLexer*(source: string): Lexer =
2020-08-05 16:16:12 +02:00
result = Lexer(source: source, tokens: @[], line: 1, start: 0, current: 0)
proc done(self: Lexer): bool =
2020-08-05 16:16:12 +02:00
result = self.current >= self.source.len
2020-08-06 00:14:26 +02:00
proc step(self: var Lexer): string =
if self.done():
return ""
self.current = self.current + 1
result = &"{self.source[self.current - 1]}"
proc peek(self: Lexer): string =
2020-08-05 16:16:12 +02:00
if self.done():
result = ""
else:
result = &"{self.source[self.current]}"
2020-08-06 00:14:26 +02:00
proc match(self: var Lexer, what: string): bool =
if self.done():
return false
elif self.peek() != what:
return false
self.current = self.current + 1
return true
proc peekNext(self: Lexer): string =
2020-08-05 16:16:12 +02:00
if self.current + 1 >= self.source.len:
result = ""
else:
result = &"{self.source[self.current + 1]}"
proc isDigit(s: string): bool =
2020-08-05 18:45:14 +02:00
result = s >= "0" and s <= "9"
proc isAlpha(s: string): bool =
result = (s >= "a" and s <= "z") or (s >= "A" and s <= "Z") or s == "_"
proc isAlnum(s: string): bool =
result = isDigit(s) or isAlpha(s)
proc createToken(self: var Lexer, tokenType: TokenType, literal: Value): Token =
2020-08-05 16:16:12 +02:00
result = Token(kind: tokenType,
2020-08-05 17:50:29 +02:00
lexeme: self.source[self.start..<self.current],
literal: literal,
line: self.line
2020-08-05 16:16:12 +02:00
)
proc parseString(self: var Lexer, delimiter: string) =
2020-08-05 16:16:12 +02:00
while self.peek() != delimiter and not self.done():
if self.peek() == "\n":
self.line = self.line + 1
discard self.step()
if self.done():
raise newException(ParseError, &"Unterminated string literal at {self.line}")
discard self.step()
2020-08-05 17:50:29 +02:00
let value = StrValue(value: self.source[self.start..<self.current - 1]) # Get the value between quotes
let token = self.createToken(STR, value)
self.tokens.add(token)
2020-08-05 16:16:12 +02:00
2020-08-05 17:50:29 +02:00
proc parseNumber(self: var Lexer) =
2020-08-05 18:45:14 +02:00
while isDigit(self.peek()):
discard self.step()
if self.peek() == ".":
discard self.step()
while self.peek().isDigit():
discard self.step()
var value = FloatValue(value: parseFloat(self.source[self.start..<self.current]))
self.tokens.add(self.createToken(FLOAT, value))
else:
var value = IntValue(value: parseInt(self.source[self.start..<self.current]))
self.tokens.add(self.createToken(INT, value))
proc parseIdentifier(self: var Lexer) =
while isAlnum(self.peek()):
discard self.step()
var text: string = self.source[self.start..<self.current]
var keyword = text in RESERVED
if keyword:
self.tokens.add(self.createToken(RESERVED[text], StrValue(value: text)))
else:
self.tokens.add(self.createToken(ID, StrValue(value: text)))
2020-08-06 00:14:26 +02:00
proc parseComment(self: var Lexer) =
var closed = false
while not self.done():
var finish = self.peek() & self.peekNext()
if finish == "/*": # Nested comments
discard self.step()
discard self.step()
self.parseComment()
elif finish == "*/":
closed = true
discard self.step() # Consume the two ends
discard self.step()
break
discard self.step()
if self.done() and not closed:
raise newException(ParseError, &"Unexpected EOF at line {self.line}")
proc scanToken(self: var Lexer) =
var single = self.step()
if single in [" ", "\t", "\r"]:
return
elif single == "\n":
self.current = self.current + 1
elif single in ["""'""", "'"]:
self.parseString(single)
elif single.isDigit():
self.parseNumber()
elif single.isAlnum():
self.parseIdentifier()
elif single in TOKENS:
if single == "/" and self.match("/"):
while self.peek() != "\n" and not self.done():
discard self.step()
elif single == "/" and self.match("*"):
self.parseComment()
elif single == "=" and self.match("="):
self.tokens.add(self.createToken(DEQ, StrValue(value: "==")))
elif single == ">" and self.match("="):
self.tokens.add(self.createToken(GE, StrValue(value: ">=")))
elif single == "<" and self.match("="):
self.tokens.add(self.createToken(LE, StrValue(value: "<=")))
elif single == "!" and self.match("="):
self.tokens.add(self.createToken(NE, StrValue(value: "!=")))
elif single == "*" and self.match("*"):
self.tokens.add(self.createToken(POW, StrValue(value: "**")))
else:
self.tokens.add(self.createToken(TOKENS[single], StrValue(value: single)))
else:
raise newException(ParseError, &"Unexpected character '{single}' at {self.line}")
proc lex*(self: var Lexer): seq[Token] =
2020-08-06 00:14:26 +02:00
while not self.done():
self.start = self.current
self.scanToken()
self.tokens.add(Token(kind: EOF, lexeme: "", literal: IntValue(value: -1), line: self.line))
return self.tokens