2020-08-05 16:16:12 +02:00
|
|
|
import tables
|
|
|
|
import meta/tokentype
|
|
|
|
import meta/tokenobject
|
2020-08-05 17:50:29 +02:00
|
|
|
import meta/valueobject
|
2020-08-13 23:39:26 +02:00
|
|
|
import types/objecttype
|
2020-08-05 16:16:12 +02:00
|
|
|
import system
|
2020-08-05 18:45:14 +02:00
|
|
|
import strutils
|
|
|
|
import strformat
|
2020-08-05 16:16:12 +02:00
|
|
|
|
|
|
|
|
|
|
|
const TOKENS = to_table({
|
2020-08-06 17:28:28 +02:00
|
|
|
'(': TokenType.LP, ')': TokenType.RP,
|
|
|
|
'{': TokenType.LB, '}': TokenType.RB,
|
|
|
|
'.': TokenType.DOT, ',': TokenType.COMMA,
|
|
|
|
'-': TokenType.MINUS, '+': TokenType.PLUS,
|
|
|
|
';': TokenType.SEMICOLON, '*': TokenType.STAR,
|
|
|
|
'>': TokenType.GT, '<': TokenType.LT,
|
|
|
|
'=': TokenType.EQ, '!': TokenType.NEG,
|
|
|
|
'/': TokenType.SLASH, '%': TokenType.MOD,
|
2020-08-14 10:02:13 +02:00
|
|
|
'[': TokenType.LS, ']': TokenType.RS,
|
|
|
|
':': TokenType.COLON})
|
2020-08-05 16:16:12 +02:00
|
|
|
|
|
|
|
const RESERVED = to_table({
|
|
|
|
"or": TokenType.OR, "and": TokenType.AND,
|
|
|
|
"class": TokenType.CLASS, "fun": TokenType.FUN,
|
|
|
|
"if": TokenType.IF, "else": TokenType.ELSE,
|
|
|
|
"for": TokenType.FOR, "while": TokenType.WHILE,
|
|
|
|
"var": TokenType.VAR, "nil": TokenType.NIL,
|
|
|
|
"true": TokenType.TRUE, "false": TokenType.FALSE,
|
|
|
|
"return": TokenType.RETURN,
|
|
|
|
"this": TokenType.THIS, "super": TokenType.SUPER,
|
|
|
|
"del": TokenType.DEL, "break": TokenType.BREAK})
|
|
|
|
|
|
|
|
|
|
|
|
type Lexer* = object
|
|
|
|
source: string
|
|
|
|
tokens: seq[Token]
|
|
|
|
line: int
|
|
|
|
start: int
|
|
|
|
current: int
|
2020-08-10 18:39:53 +02:00
|
|
|
errored*: bool
|
2020-08-05 16:16:12 +02:00
|
|
|
|
|
|
|
|
2020-08-05 17:50:29 +02:00
|
|
|
proc initLexer*(source: string): Lexer =
|
2020-08-10 18:39:53 +02:00
|
|
|
result = Lexer(source: source, tokens: @[], line: 1, start: 0, current: 0, errored: false)
|
2020-08-05 16:16:12 +02:00
|
|
|
|
|
|
|
|
2020-08-05 19:01:00 +02:00
|
|
|
proc done(self: Lexer): bool =
|
2020-08-05 16:16:12 +02:00
|
|
|
result = self.current >= self.source.len
|
|
|
|
|
|
|
|
|
2020-08-06 17:28:28 +02:00
|
|
|
proc step(self: var Lexer): char =
|
2020-08-06 00:14:26 +02:00
|
|
|
if self.done():
|
2020-08-06 17:28:28 +02:00
|
|
|
return '\0'
|
2020-08-06 00:14:26 +02:00
|
|
|
self.current = self.current + 1
|
2020-08-06 17:28:28 +02:00
|
|
|
result = self.source[self.current - 1]
|
2020-08-06 00:14:26 +02:00
|
|
|
|
|
|
|
|
2020-08-06 17:28:28 +02:00
|
|
|
proc peek(self: Lexer): char =
|
2020-08-05 16:16:12 +02:00
|
|
|
if self.done():
|
2020-08-06 17:28:28 +02:00
|
|
|
result = '\0'
|
2020-08-05 16:16:12 +02:00
|
|
|
else:
|
2020-08-06 17:28:28 +02:00
|
|
|
result = self.source[self.current]
|
2020-08-05 16:16:12 +02:00
|
|
|
|
|
|
|
|
2020-08-06 17:28:28 +02:00
|
|
|
proc match(self: var Lexer, what: char): bool =
|
2020-08-06 00:14:26 +02:00
|
|
|
if self.done():
|
|
|
|
return false
|
|
|
|
elif self.peek() != what:
|
|
|
|
return false
|
|
|
|
self.current = self.current + 1
|
|
|
|
return true
|
|
|
|
|
|
|
|
|
2020-08-06 17:28:28 +02:00
|
|
|
proc peekNext(self: Lexer): char =
|
2020-08-05 16:16:12 +02:00
|
|
|
if self.current + 1 >= self.source.len:
|
2020-08-06 17:28:28 +02:00
|
|
|
result = '\0'
|
2020-08-05 16:16:12 +02:00
|
|
|
else:
|
2020-08-06 17:28:28 +02:00
|
|
|
result = self.source[self.current + 1]
|
2020-08-05 19:01:00 +02:00
|
|
|
|
|
|
|
|
|
|
|
proc createToken(self: var Lexer, tokenType: TokenType, literal: Value): Token =
|
2020-08-05 16:16:12 +02:00
|
|
|
result = Token(kind: tokenType,
|
2020-08-05 17:50:29 +02:00
|
|
|
lexeme: self.source[self.start..<self.current],
|
|
|
|
literal: literal,
|
|
|
|
line: self.line
|
2020-08-05 16:16:12 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
|
2020-08-06 17:28:28 +02:00
|
|
|
proc parseString(self: var Lexer, delimiter: char) =
|
2020-08-05 16:16:12 +02:00
|
|
|
while self.peek() != delimiter and not self.done():
|
2020-08-06 17:28:28 +02:00
|
|
|
if self.peek() == '\n':
|
2020-08-05 16:16:12 +02:00
|
|
|
self.line = self.line + 1
|
|
|
|
discard self.step()
|
|
|
|
if self.done():
|
2020-08-10 18:39:53 +02:00
|
|
|
echo &"SyntaxError: Unterminated string literal at line {self.line}"
|
|
|
|
self.errored = true
|
2020-08-05 16:16:12 +02:00
|
|
|
discard self.step()
|
2020-08-14 08:25:05 +02:00
|
|
|
let value = Value(kind: ValueTypes.OBJECT, obj: Obj(kind: ObjectTypes.STRING, str: self.source[self.start..<self.current])) # Get the value between quotes
|
2020-08-05 17:50:29 +02:00
|
|
|
let token = self.createToken(STR, value)
|
|
|
|
self.tokens.add(token)
|
2020-08-05 16:16:12 +02:00
|
|
|
|
2020-08-05 17:50:29 +02:00
|
|
|
|
2020-08-05 19:01:00 +02:00
|
|
|
proc parseNumber(self: var Lexer) =
|
2020-08-05 18:45:14 +02:00
|
|
|
while isDigit(self.peek()):
|
|
|
|
discard self.step()
|
2020-08-06 17:28:28 +02:00
|
|
|
if self.peek() == '.':
|
2020-08-05 18:45:14 +02:00
|
|
|
discard self.step()
|
|
|
|
while self.peek().isDigit():
|
|
|
|
discard self.step()
|
2020-08-10 10:48:21 +02:00
|
|
|
var value = Value(kind: ValueTypes.DOUBLE, floatValue: parseFloat(self.source[self.start..<self.current]))
|
|
|
|
self.tokens.add(self.createToken(TokenType.NUMBER, value))
|
2020-08-05 18:45:14 +02:00
|
|
|
else:
|
2020-08-10 10:48:21 +02:00
|
|
|
var value = Value(kind: ValueTypes.INTEGER, intValue: parseInt(self.source[self.start..<self.current]))
|
|
|
|
self.tokens.add(self.createToken(TokenType.NUMBER, value))
|
2020-08-05 18:45:14 +02:00
|
|
|
|
|
|
|
|
2020-08-05 19:01:00 +02:00
|
|
|
proc parseIdentifier(self: var Lexer) =
|
2020-08-06 17:28:28 +02:00
|
|
|
while self.peek().isAlphaNumeric():
|
2020-08-05 19:01:00 +02:00
|
|
|
discard self.step()
|
|
|
|
var text: string = self.source[self.start..<self.current]
|
|
|
|
var keyword = text in RESERVED
|
|
|
|
if keyword:
|
2020-08-09 21:45:50 +02:00
|
|
|
self.tokens.add(self.createToken(RESERVED[text], Value(kind: ValueTypes.OBJECT, obj: Obj(kind: ObjectTypes.STRING, str: text))))
|
2020-08-05 19:01:00 +02:00
|
|
|
else:
|
2020-08-09 21:45:50 +02:00
|
|
|
self.tokens.add(self.createToken(ID, Value(kind: ValueTypes.OBJECT, obj: Obj(kind:ObjectTypes.STRING, str: text))))
|
2020-08-05 19:01:00 +02:00
|
|
|
|
|
|
|
|
2020-08-06 00:14:26 +02:00
|
|
|
proc parseComment(self: var Lexer) =
|
|
|
|
var closed = false
|
|
|
|
while not self.done():
|
|
|
|
var finish = self.peek() & self.peekNext()
|
|
|
|
if finish == "/*": # Nested comments
|
|
|
|
discard self.step()
|
|
|
|
discard self.step()
|
|
|
|
self.parseComment()
|
|
|
|
elif finish == "*/":
|
|
|
|
closed = true
|
|
|
|
discard self.step() # Consume the two ends
|
|
|
|
discard self.step()
|
|
|
|
break
|
|
|
|
discard self.step()
|
|
|
|
if self.done() and not closed:
|
2020-08-10 18:39:53 +02:00
|
|
|
self.errored = true
|
|
|
|
echo &"SyntaxError: Unexpected EOF at line {self.line}"
|
2020-08-06 00:14:26 +02:00
|
|
|
|
|
|
|
|
|
|
|
proc scanToken(self: var Lexer) =
|
|
|
|
var single = self.step()
|
2020-08-06 17:28:28 +02:00
|
|
|
if single in [' ', '\t', '\r']:
|
2020-08-06 00:14:26 +02:00
|
|
|
return
|
2020-08-06 17:28:28 +02:00
|
|
|
elif single == '\n':
|
2020-08-06 00:14:26 +02:00
|
|
|
self.current = self.current + 1
|
2020-08-06 17:28:28 +02:00
|
|
|
elif single in ['"', '\'']:
|
2020-08-06 00:14:26 +02:00
|
|
|
self.parseString(single)
|
|
|
|
elif single.isDigit():
|
|
|
|
self.parseNumber()
|
2020-08-06 17:28:28 +02:00
|
|
|
elif single.isAlphaNumeric():
|
2020-08-06 00:14:26 +02:00
|
|
|
self.parseIdentifier()
|
|
|
|
elif single in TOKENS:
|
2020-08-06 17:28:28 +02:00
|
|
|
if single == '/' and self.match('/'):
|
|
|
|
while self.peek() != '\n' and not self.done():
|
2020-08-06 00:14:26 +02:00
|
|
|
discard self.step()
|
2020-08-06 17:28:28 +02:00
|
|
|
elif single == '/' and self.match('*'):
|
2020-08-06 00:14:26 +02:00
|
|
|
self.parseComment()
|
2020-08-06 17:28:28 +02:00
|
|
|
elif single == '=' and self.match('='):
|
2020-08-09 21:45:50 +02:00
|
|
|
self.tokens.add(self.createToken(DEQ, Value(kind: ValueTypes.OBJECT, obj: Obj(kind: ObjectTypes.STRING, str: "=="))))
|
2020-08-06 17:28:28 +02:00
|
|
|
elif single == '>' and self.match('='):
|
2020-08-09 21:45:50 +02:00
|
|
|
self.tokens.add(self.createToken(GE, Value(kind: ValueTypes.OBJECT, obj: Obj(kind: ObjectTypes.STRING, str: ">="))))
|
2020-08-06 17:28:28 +02:00
|
|
|
elif single == '<' and self.match('='):
|
2020-08-09 21:45:50 +02:00
|
|
|
self.tokens.add(self.createToken(LE, Value(kind: ValueTypes.OBJECT, obj: Obj(kind: ObjectTypes.STRING, str: "<="))))
|
2020-08-06 17:28:28 +02:00
|
|
|
elif single == '!' and self.match('='):
|
2020-08-09 21:45:50 +02:00
|
|
|
self.tokens.add(self.createToken(NE, Value(kind: ValueTypes.OBJECT, obj: Obj(kind: ObjectTypes.STRING, str: "!="))))
|
2020-08-06 17:28:28 +02:00
|
|
|
elif single == '*' and self.match('*'):
|
2020-08-09 21:45:50 +02:00
|
|
|
self.tokens.add(self.createToken(POW, Value(kind: ValueTypes.OBJECT, obj: Obj(kind: ObjectTypes.STRING, str: "**"))))
|
2020-08-06 00:14:26 +02:00
|
|
|
else:
|
2020-08-09 21:45:50 +02:00
|
|
|
self.tokens.add(self.createToken(TOKENS[single], Value(kind: ValueTypes.OBJECT, obj: Obj(kind: ObjectTypes.STRING, str: &"{single}"))))
|
2020-08-06 00:14:26 +02:00
|
|
|
else:
|
2020-08-10 18:39:53 +02:00
|
|
|
self.errored = true
|
|
|
|
echo &"SyntaxError: Unexpected character '{single}' at {self.line}"
|
2020-08-06 00:14:26 +02:00
|
|
|
|
|
|
|
|
2020-08-06 00:28:32 +02:00
|
|
|
proc lex*(self: var Lexer): seq[Token] =
|
2020-08-06 00:14:26 +02:00
|
|
|
while not self.done():
|
|
|
|
self.start = self.current
|
|
|
|
self.scanToken()
|
2020-08-09 21:45:50 +02:00
|
|
|
self.tokens.add(Token(kind: EOF, lexeme: "EOF", literal: Value(kind: ValueTypes.NIL), line: self.line))
|
2020-08-06 00:14:26 +02:00
|
|
|
return self.tokens
|
2020-08-06 17:28:28 +02:00
|
|
|
|