127 lines
5.2 KiB
Nim
127 lines
5.2 KiB
Nim
|
import frontend/lexer
|
||
|
|
||
|
|
||
|
proc fillSymbolTable(tokenizer: Lexer) =
|
||
|
## Initializes the Lexer's symbol
|
||
|
## table with the builtin symbols
|
||
|
## and keywords
|
||
|
|
||
|
# 1-byte symbols
|
||
|
tokenizer.symbols.addSymbol("`", Backtick)
|
||
|
tokenizer.symbols.addSymbol("+", Plus)
|
||
|
tokenizer.symbols.addSymbol("-", Minus)
|
||
|
tokenizer.symbols.addSymbol("*", Star)
|
||
|
tokenizer.symbols.addSymbol("/", Slash)
|
||
|
tokenizer.symbols.addSymbol("{", LeftBrace)
|
||
|
tokenizer.symbols.addSymbol("}", RightBrace)
|
||
|
tokenizer.symbols.addSymbol("(", LeftParen)
|
||
|
tokenizer.symbols.addSymbol(")", RightParen)
|
||
|
tokenizer.symbols.addSymbol("[", LeftBracket)
|
||
|
tokenizer.symbols.addSymbol("]", RightBracket)
|
||
|
tokenizer.symbols.addSymbol(".", Dot)
|
||
|
tokenizer.symbols.addSymbol(",", Comma)
|
||
|
tokenizer.symbols.addSymbol(">", GreaterThan)
|
||
|
tokenizer.symbols.addSymbol("<", LessThan)
|
||
|
tokenizer.symbols.addSymbol(";", Semicolon)
|
||
|
tokenizer.symbols.addSymbol("=", Equal)
|
||
|
tokenizer.symbols.addSymbol("~", Tilde)
|
||
|
tokenizer.symbols.addSymbol("%", Percentage)
|
||
|
tokenizer.symbols.addSymbol(":", Colon)
|
||
|
tokenizer.symbols.addSymbol("&", Ampersand)
|
||
|
tokenizer.symbols.addSymbol("^", Caret)
|
||
|
tokenizer.symbols.addSymbol("|", Pipe)
|
||
|
# 2-byte symbols
|
||
|
tokenizer.symbols.addSymbol("+=", InplaceAdd)
|
||
|
tokenizer.symbols.addSymbol("-=", InplaceSub)
|
||
|
tokenizer.symbols.addSymbol(">=", GreaterOrEqual)
|
||
|
tokenizer.symbols.addSymbol("<=", LessOrEqual)
|
||
|
tokenizer.symbols.addSymbol("*=", InplaceMul)
|
||
|
tokenizer.symbols.addSymbol("/=", InplaceDiv)
|
||
|
tokenizer.symbols.addSymbol("&=", InplaceAnd)
|
||
|
tokenizer.symbols.addSymbol("!=", NotEqual)
|
||
|
tokenizer.symbols.addSymbol("|=", InplaceOr)
|
||
|
tokenizer.symbols.addSymbol("^=", InplaceXor)
|
||
|
tokenizer.symbols.addSymbol("%=", InplaceMod)
|
||
|
tokenizer.symbols.addSymbol("//", FloorDiv)
|
||
|
tokenizer.symbols.addSymbol("==", DoubleEqual)
|
||
|
tokenizer.symbols.addSymbol("**", DoubleStar)
|
||
|
tokenizer.symbols.addSymbol(">>", RightShift)
|
||
|
tokenizer.symbols.addSymbol("<<", LeftShift)
|
||
|
# 3-byte symbols
|
||
|
tokenizer.symbols.addSymbol("//=", InplaceFloorDiv)
|
||
|
tokenizer.symbols.addSymbol("**=", InplacePow)
|
||
|
tokenizer.symbols.addSymbol(">>=", InplaceRightShift)
|
||
|
tokenizer.symbols.addSymbol("<<=", InplaceLeftShift)
|
||
|
# Keywords
|
||
|
tokenizer.symbols.addKeyword("type", Type)
|
||
|
tokenizer.symbols.addKeyword("enum", Enum)
|
||
|
tokenizer.symbols.addKeyword("case", Case)
|
||
|
tokenizer.symbols.addKeyword("operator", Operator)
|
||
|
tokenizer.symbols.addKeyword("generator", Generator)
|
||
|
tokenizer.symbols.addKeyword("function", Function)
|
||
|
tokenizer.symbols.addKeyword("coroutine", Coroutine)
|
||
|
tokenizer.symbols.addKeyword("break", Break)
|
||
|
tokenizer.symbols.addKeyword("continue", Continue)
|
||
|
tokenizer.symbols.addKeyword("while", While)
|
||
|
tokenizer.symbols.addKeyword("for", For)
|
||
|
tokenizer.symbols.addKeyword("foreach", Foreach)
|
||
|
tokenizer.symbols.addKeyword("if", If)
|
||
|
tokenizer.symbols.addKeyword("else", Else)
|
||
|
tokenizer.symbols.addKeyword("await", Await)
|
||
|
tokenizer.symbols.addKeyword("defer", Defer)
|
||
|
tokenizer.symbols.addKeyword("try", Try)
|
||
|
tokenizer.symbols.addKeyword("except", Except)
|
||
|
tokenizer.symbols.addKeyword("finally", Finally)
|
||
|
tokenizer.symbols.addKeyword("raise", Raise)
|
||
|
tokenizer.symbols.addKeyword("assert", Assert)
|
||
|
tokenizer.symbols.addKeyword("const", Const)
|
||
|
tokenizer.symbols.addKeyword("let", Let)
|
||
|
tokenizer.symbols.addKeyword("var", Var)
|
||
|
tokenizer.symbols.addKeyword("lambda", Lambda)
|
||
|
tokenizer.symbols.addKeyword("import", Import)
|
||
|
# These are technically more like expressions
|
||
|
# with a reserved name that produce a value of a
|
||
|
# builtin type, but we don't need to care about
|
||
|
# that until we're in the parsing and compilation
|
||
|
# steps so it's fine
|
||
|
tokenizer.symbols.addKeyword("nan", NotANumber)
|
||
|
tokenizer.symbols.addKeyword("inf", Infinity)
|
||
|
tokenizer.symbols.addKeyword("nil", Nil)
|
||
|
tokenizer.symbols.addKeyword("true", True)
|
||
|
tokenizer.symbols.addKeyword("false", False)
|
||
|
# These are technically operators, but since
|
||
|
# they fit neatly into the definition for an
|
||
|
# identifier/keyword we parse them as such
|
||
|
# and specialize them later
|
||
|
tokenizer.symbols.addKeyword("isnot", IsNot)
|
||
|
tokenizer.symbols.addKeyword("is", Is)
|
||
|
tokenizer.symbols.addKeyword("as", As)
|
||
|
tokenizer.symbols.addKeyword("of", Of)
|
||
|
tokenizer.symbols.addKeyword("and", LogicalAnd)
|
||
|
tokenizer.symbols.addKeyword("or", LogicalOr)
|
||
|
tokenizer.symbols.addKeyword("not", LogicalNot)
|
||
|
|
||
|
# P.S.: There's no reason for the order of addition of
|
||
|
# symbols to be ascending (the symbol table uses a hashmap
|
||
|
# intrernally). You can add/remove symbols (and keywords
|
||
|
# for that matter) as you like!
|
||
|
|
||
|
|
||
|
when isMainModule:
|
||
|
setControlCHook(proc () {.noconv.} = quit(0))
|
||
|
var tokenizer = newLexer()
|
||
|
tokenizer.fillSymbolTable()
|
||
|
while true:
|
||
|
try:
|
||
|
stdout.write("> ")
|
||
|
for token in tokenizer.lex(stdin.readLine(), "<stdin>"):
|
||
|
if token.kind notin [Whitespace, Tab]:
|
||
|
# Reduces clutter in the output
|
||
|
echo token
|
||
|
except IOError:
|
||
|
break
|
||
|
except LexingError:
|
||
|
echo getCurrentExceptionMsg()
|
||
|
echo ""
|
||
|
quit(0)
|