Added allTokens test
This commit is contained in:
parent
79f3803328
commit
e061bb399b
|
@ -15,12 +15,70 @@
|
|||
import frontend/parsing/lexer
|
||||
|
||||
|
||||
import std/tables
|
||||
|
||||
export tables
|
||||
|
||||
|
||||
var tokens* = {"{": TokenType.LeftBrace,
|
||||
"}": TokenType.RightBrace,
|
||||
"(": TokenType.LeftParen,
|
||||
")": TokenType.RightParen,
|
||||
"[": TokenType.LeftBracket,
|
||||
"]": TokenType.RightBracket,
|
||||
".": TokenType.Dot,
|
||||
",": TokenType.Comma,
|
||||
":": TokenType.Semicolon,
|
||||
"type": TokenType.Type,
|
||||
"enum": TokenType.Enum,
|
||||
"case": TokenType.Case,
|
||||
"operator": TokenType.Operator,
|
||||
"generator": TokenType.Generator,
|
||||
"fn": TokenType.Function,
|
||||
"coroutine": TokenType.Coroutine,
|
||||
"break": TokenType.Break,
|
||||
"continue": TokenType.Continue,
|
||||
"while": TokenType.While,
|
||||
"for": TokenType.For,
|
||||
"foreach": TokenType.Foreach,
|
||||
"if": TokenType.If,
|
||||
"else": TokenType.Else,
|
||||
"await": TokenType.Await,
|
||||
"assert": TokenType.Assert,
|
||||
"const": TokenType.Const,
|
||||
"let": TokenType.Let,
|
||||
"var": TokenType.Var,
|
||||
"import": TokenType.Import,
|
||||
"yield": TokenType.Yield,
|
||||
"return": TokenType.Return,
|
||||
"object": TokenType.Object,
|
||||
"export": TokenType.Export,
|
||||
"block": TokenType.Block,
|
||||
"switch": TokenType.Switch,
|
||||
"lent": TokenType.Lent,
|
||||
"true": TokenType.True,
|
||||
"false": TokenType.False,
|
||||
"inf": TokenType.Inf,
|
||||
"ptr": TokenType.Ptr,
|
||||
"nan": TokenType.Nan,
|
||||
"inf": TokenType.Inf,
|
||||
}.toTable()
|
||||
|
||||
|
||||
for sym in [">", "<", "=", "~", "/", "+", "-", "_", "*", "?", "@", ":", "==", "!=",
|
||||
">=", "<=", "+=", "-=", "/=", "*=", "**=", "!", "%", "&", "|", "^",
|
||||
">>", "<<"]:
|
||||
tokens[sym] = TokenType.Symbol
|
||||
|
||||
|
||||
proc fillSymbolTable*(tokenizer: Lexer) =
|
||||
## Initializes the Lexer's symbol
|
||||
## table with builtin symbols and
|
||||
## keywords
|
||||
|
||||
# 1-byte symbols
|
||||
|
||||
# Specialized symbols for which we need a specific token type
|
||||
# for easier handling in the parser (it's nicer to use enum members
|
||||
# rather than strings whenever possible)
|
||||
tokenizer.symbols.addSymbol("{", TokenType.LeftBrace)
|
||||
tokenizer.symbols.addSymbol("}", TokenType.RightBrace)
|
||||
tokenizer.symbols.addSymbol("(", TokenType.LeftParen)
|
||||
|
@ -30,7 +88,20 @@ proc fillSymbolTable*(tokenizer: Lexer) =
|
|||
tokenizer.symbols.addSymbol(".", TokenType.Dot)
|
||||
tokenizer.symbols.addSymbol(",", TokenType.Comma)
|
||||
tokenizer.symbols.addSymbol(";", TokenType.Semicolon)
|
||||
# Keywords
|
||||
|
||||
# Generic symbols avoid us the need to create a gazillion members of the
|
||||
# TokenType enum. These are also not handled directly in the parser, but
|
||||
# rather processed as classes of operators based on precedence, so using
|
||||
# strings is less of a concern
|
||||
for sym in [">", "<", "=", "~", "/", "+", "-", "_", "*", "?", "@", ":", "==", "!=",
|
||||
">=", "<=", "+=", "-=", "/=", "*=", "**=", "!", "%", "&", "|", "^",
|
||||
">>", "<<"]:
|
||||
tokenizer.symbols.addSymbol(sym, TokenType.Symbol)
|
||||
# Keywords. We differentiate keywords from symbols because they have priority
|
||||
# over the latter, and also because the lexer internally uses the symbol map to do
|
||||
# maximal matching and it's helpful not to increase the amount of substrings we
|
||||
# need to check (especially because keywords match exactly and uniquely, while symbols
|
||||
# can share substrings)
|
||||
tokenizer.symbols.addKeyword("type", TokenType.Type)
|
||||
tokenizer.symbols.addKeyword("enum", TokenType.Enum)
|
||||
tokenizer.symbols.addKeyword("case", TokenType.Case)
|
||||
|
@ -69,7 +140,3 @@ proc fillSymbolTable*(tokenizer: Lexer) =
|
|||
tokenizer.symbols.addKeyword("ptr", TokenType.Ptr)
|
||||
tokenizer.symbols.addKeyword("nan", TokenType.Nan)
|
||||
tokenizer.symbols.addKeyword("inf", TokenType.Inf)
|
||||
for sym in [">", "<", "=", "~", "/", "+", "-", "_", "*", "?", "@", ":", "==", "!=",
|
||||
">=", "<=", "+=", "-=", "/=", "*=", "**=", "!", "%", "&", "|", "^",
|
||||
">>", "<<"]:
|
||||
tokenizer.symbols.addSymbol(sym, TokenType.Symbol)
|
|
@ -1,5 +1,6 @@
|
|||
import util/testing
|
||||
import util/fmterr
|
||||
import util/symbols
|
||||
import frontend/parsing/lexer
|
||||
|
||||
|
||||
|
@ -27,19 +28,31 @@ when isMainModule:
|
|||
testTokenizeFails("illegalTabs", "\t", "tabs are not allowed in peon code, use spaces for indentation instead", line=1, location=(0, 0))
|
||||
]
|
||||
)
|
||||
var allTokens = ""
|
||||
var allTokensList = newSeqOfCap[TokenType](symbols.tokens.len())
|
||||
for lexeme in symbols.tokens.keys():
|
||||
allTokens.add(&"{lexeme} ")
|
||||
if lexeme == "_":
|
||||
# Due to how the lexer is designed, a bare underscore is
|
||||
# parsed as an identifier rather than a symbol
|
||||
allTokensList.add(TokenType.Identifier)
|
||||
else:
|
||||
allTokensList.add(symbols.tokens[lexeme])
|
||||
allTokensList.add(TokenType.EndOfFile)
|
||||
suite.addTest(testTokenizeSucceeds("allTokens", allTokens, allTokensList))
|
||||
const skippedChars = [';', '\'', '\n', '\\', '\t', '\e', '\a', '\r'];
|
||||
var
|
||||
characters = ""
|
||||
tokens = newSeqOfCap[TokenType](256)
|
||||
charTokens = newSeqOfCap[TokenType](256)
|
||||
for value in 0..255:
|
||||
tokens.add(Char)
|
||||
charTokens.add(Char)
|
||||
if char(value) in skippedChars:
|
||||
# These cases are special and we handle them separately
|
||||
continue
|
||||
characters.add(&"'{char(value)}'")
|
||||
tokens.add(TokenType.EndOfFile)
|
||||
charTokens.add(TokenType.EndOfFile)
|
||||
characters.add("""';' '\'' '\n' '\\' '\t' '\e' '\a' '\r'""")
|
||||
suite.addTest(testTokenizeSucceeds("allCharacters", characters, tokens))
|
||||
suite.addTest(testTokenizeSucceeds("allCharacters", characters, charTokens))
|
||||
suite.run()
|
||||
echo "Tokenization test results: "
|
||||
for test in suite.tests:
|
||||
|
@ -49,7 +62,7 @@ when isMainModule:
|
|||
echo &" - Outcome: {test.outcome}"
|
||||
echo &" - Expected state: {test.expected} "
|
||||
echo &" - Expected outcome: {test.getExpectedOutcome()}"
|
||||
echo &"\n The test failed for the following reason: {test.reason}"
|
||||
echo &"\n The test failed for the following reason: {test.reason}\n"
|
||||
if not test.outcome.exc.isNil():
|
||||
echo &"\n Formatted error message follows\n"
|
||||
print(LexingError(test.outcome.exc))
|
||||
|
|
Loading…
Reference in New Issue