178 lines
13 KiB
Nim
178 lines
13 KiB
Nim
import util/testing
|
|
import util/fmterr
|
|
import util/symbols
|
|
import frontend/parsing/lexer
|
|
|
|
|
|
import std/strformat
|
|
|
|
|
|
# Utilities to construct tokens for validation purposes
|
|
|
|
proc makeToken(kind: TokenType, line: int = 1, pos: tuple[start, stop: int] = (0, 0), relPos: tuple[start, stop: int] = (-1, -1), lexeme: string,
|
|
spaces: int = 0): Token {.inline.} =
|
|
var pos = pos
|
|
if pos == (0, 0):
|
|
pos = (0, max(lexeme.high(), 0))
|
|
var relPos = relPos
|
|
if relPos == (-1, -1):
|
|
relPos = pos
|
|
return Token(kind: kind, line: line, pos: pos, relPos: relPos, lexeme: lexeme, spaces: spaces)
|
|
|
|
|
|
proc endOfFile(line: int = 1, pos: tuple[start, stop: int] = (0, 0), relPos: tuple[start, stop: int] = (-1, -1), spaces: int = 0): Token {.inline.} =
|
|
return makeToken(TokenType.EndOfFile, line, pos, relPos, "", spaces)
|
|
|
|
proc makeString(line: int = 1, pos: tuple[start, stop: int] = (0, 0), relPos: tuple[start, stop: int] = (-1, -1), content: string, delimiter: string, spaces: int = 0): Token {.inline.} =
|
|
return makeToken(TokenType.String, line, pos, relPos, &"{delimiter}{content}{delimiter}", spaces)
|
|
|
|
proc makeChar(line: int = 1, pos: tuple[start, stop: int] = (0, 0), relPos: tuple[start, stop: int] = (-1, -1), content: string, spaces: int = 0): Token {.inline.} =
|
|
return makeToken(TokenType.Char, line, pos, relPos, &"'{content}'", spaces)
|
|
|
|
proc makeSymbol(line: int = 1, pos: tuple[start, stop: int] = (0, 0), relPos: tuple[start, stop: int] = (-1, -1), content: string, spaces: int = 0): Token {.inline.} =
|
|
return makeToken(TokenType.Symbol, line, pos, relPos, content, spaces)
|
|
|
|
proc makeIdent(line: int = 1, pos: tuple[start, stop: int] = (0, 0), relPos: tuple[start, stop: int] = (-1, -1), name: string, spaces: int = 0): Token {.inline.} =
|
|
return makeToken(TokenType.Identifier, line, pos, relPos, name, spaces)
|
|
|
|
proc makeInteger(line: int = 1, pos: tuple[start, stop: int] = (0, 0), relPos: tuple[start, stop: int] = (-1, -1), value: int, spaces: int = 0): Token {.inline.} =
|
|
return makeToken(TokenType.Integer, line, pos, relPos, $value, spaces)
|
|
|
|
|
|
when isMainModule:
|
|
var suite = newTestSuite()
|
|
suite.addTests(
|
|
[
|
|
testTokenizeSucceeds("emptyFile", "", @[endOfFile()]),
|
|
testTokenizeSucceeds("ignoredEscapes", "\r\f\e", @[endOfFile(pos=(3, 3))]),
|
|
testTokenizeSucceeds("newLine", "\n", @[endOfFile(line=2, pos=(1, 1), relPos=(0, 0))]),
|
|
testTokenizeSucceeds("emptyString", """""""", @[makeString(content="", delimiter="\""), endOfFile(pos=(2, 2))]),
|
|
testTokenizeSucceeds("escapedDelimChar", """'\''""", @[makeChar(content="\\'"), endOfFile(pos=(4, 4))]),
|
|
testTokenizeSucceeds("escapedDelimString", """"\""""", @[makeString(content="\\\"", delimiter="\""), endOfFile(pos=(4, 4))]),
|
|
testTokenizeSucceeds("bareUnicode", "🌎 😂 👩👩👦👦", @[makeSymbol(content="🌎", pos=(0, 3)), makeSymbol(content="😂", pos=(5, 8), spaces=1),
|
|
makeSymbol(content="👩👩👦👦", pos=(10, 34), spaces=1), endOfFile(pos=(35, 35))]),
|
|
testTokenizeSucceeds("stroppedSingleUnicode", "`🌎` `😂` `👩👩👦👦`", @[makeIdent(name="🌎", pos=(0, 5)),
|
|
makeIdent(name="😂", pos=(7, 12), spaces=1),
|
|
makeIdent(name="👩👩👦👦", pos=(14, 40), spaces=1),
|
|
endOfFile(pos=(41, 41))]),
|
|
testTokenizeSucceeds("stroppedMultiUnicode", "`🌎🌎` `😂😂` `👩👩👦👦👩👩👦👦`", @[makeIdent(name="🌎🌎", pos=(0, 9)),
|
|
makeIdent(name="😂😂", pos=(11, 20), spaces=1),
|
|
makeIdent(name="👩👩👦👦👩👩👦👦", pos=(22, 73), spaces=1),
|
|
endOfFile(pos=(74, 74))]),
|
|
testTokenizeSucceeds("allIntegers", "1 0x1 0o1 0b1", @[makeInteger(value=1),
|
|
makeToken(TokenType.Hex, pos=(2, 4), lexeme="0x1", spaces=1),
|
|
makeToken(TokenType.Octal, pos=(6, 8), lexeme="0o1", spaces=1),
|
|
makeToken(TokenType.Binary, pos=(10, 12), lexeme="0b1", spaces=1),
|
|
endOfFile(pos=(13, 13))
|
|
]),
|
|
testTokenizeSucceeds("allFloats", "1.0 1e5 1E5 1.5e4 1.5E4", @[makeToken(TokenType.Float, pos=(0, 2), lexeme="1.0"),
|
|
makeToken(TokenType.Float, pos=(4, 6), lexeme="1e5", spaces=1),
|
|
makeToken(TokenType.Float, pos=(8, 10), lexeme="1E5", spaces=1),
|
|
makeToken(TokenType.Float, pos=(12, 16), lexeme="1.5e4", spaces=1),
|
|
makeToken(TokenType.Float, pos=(18, 22), lexeme="1.5E4", spaces=1),
|
|
endOfFile(pos=(23, 23))]),
|
|
testTokenizeSucceeds("sizedNumbers", "1'u8 0x1'i8 0o1'i64 0b1'u32 2.0'f32 1e5'f64 1E5'f32 1.5e4'f64 1.5E4'f32",
|
|
@[makeToken(TokenType.Integer, lexeme="1'u8"),
|
|
makeToken(TokenType.Hex, pos=(5, 10), lexeme="0x1'i8", spaces=1),
|
|
makeToken(TokenType.Octal, pos=(12, 18), lexeme="0o1'i64", spaces=1),
|
|
makeToken(TokenType.Binary, pos=(20, 26), lexeme="0b1'u32", spaces=1),
|
|
makeToken(TokenType.Float, pos=(28, 34), lexeme="2.0'f32", spaces=1),
|
|
makeToken(TokenType.Float, pos=(36, 42), lexeme="1e5'f64", spaces=1),
|
|
makeToken(TokenType.Float, pos=(44, 50), lexeme="1E5'f32", spaces=1),
|
|
makeToken(TokenType.Float, pos=(52, 60), lexeme="1.5e4'f64", spaces=1),
|
|
makeToken(TokenType.Float, pos=(62, 70), lexeme="1.5E4'f32", spaces=1),
|
|
endOfFile(pos=(71, 71))]),
|
|
testTokenizeSucceeds("simpleEscapes", """ "\a \b \f \n \r \t \v \" \' \\ " """, @[makeString(spaces=1, content="""\a \b \f \n \r \t \v \" \' \\ """,
|
|
delimiter="\"", pos=(1, 32)),
|
|
endOfFile(pos=(34, 34))]),
|
|
testTokenizeFails("invalidFloatEndsWithDot", "2.", "invalid float number literal", line=1, location=(0, 1)),
|
|
testTokenizeFails("invalidFloatSpuriousChars", "2.f", "invalid float number literal", line=1, location=(0, 1)),
|
|
testTokenizeFails("unterminatedChar", "'", "unexpected EOF while parsing character literal", line=1, location=(0, 0)),
|
|
testTokenizeFails("emptyChar", "''", "character literal cannot be of length zero", line=1, location=(0, 1)),
|
|
testTokenizeFails("charTooLong", "'ab'", "invalid character literal (length must be one!)", line=1, location=(0, 3)),
|
|
testTokenizeFails("unterminatedString", "\"", "unexpected EOF while parsing string literal", line=1, location=(0, 0)),
|
|
testTokenizeFails("unterminatedCharWithExtraContent", "'o;", "unexpected EOF while parsing character literal", line=1, location=(0, 2)),
|
|
testTokenizeFails("unterminatedStringWithExtraContent", "\"o;", "unexpected EOF while parsing string literal", line=1, location=(0, 2)),
|
|
testTokenizeFails("unterminatedCharWithNewline", "'\\n;", "unexpected EOF while parsing character literal", line=1, location=(0, 3)),
|
|
testTokenizeFails("unterminatedStringWithNewline", "\"\\n;", "unexpected EOF while parsing string literal", line=1, location=(0, 3)),
|
|
testTokenizeFails("illegalTabs", "\t", "tabs are not allowed in peon code, use spaces for indentation instead", line=1, location=(0, 0)),
|
|
testTokenizeFails("illegalShortUnicodeEscape", """ "\u123" """, "unicode escape sequences are not supported yet", line=1, location=(1, 2)),
|
|
testTokenizeFails("illegalLongUnicodeEscape", """ "\U123" """, "unicode escape sequences are not supported yet", line=1, location=(1, 2)),
|
|
testTokenizeFails("illegalNLInBacktick", "`test\n`", """unexpected character in stropped identifier: "\x0A"""", line=1, location=(0, 5)),
|
|
testTokenizeFails("illegalTabInBacktick", "`test\t`", """unexpected character in stropped identifier: "\x09"""", line=1, location=(0, 5)),
|
|
testTokenizeFails("illegalEscapeinBacktick", "`test\e`", """unexpected character in stropped identifier: "\x1B"""", line=1, location=(0, 5)),
|
|
testTokenizeFails("illegalCRInBacktick", "`test\r`", """unexpected character in stropped identifier: "\x0D"""", line=1, location=(0, 5)),
|
|
testTokenizeFails("illegalFFInBacktick", "`test\f`", """unexpected character in stropped identifier: "\x0C"""", line=1, location=(0, 5)),
|
|
testTokenizeFails("unterminatedStroppedIdent", "`test", "unexpected EOF while parsing stropped identifier", line=1, location=(0, 4))
|
|
|
|
]
|
|
)
|
|
var allTokens = ""
|
|
var allTokensList = newSeqOfCap[Token](symbols.tokens.len())
|
|
var i = 0
|
|
for lexeme in symbols.tokens.keys():
|
|
allTokens.add(&"{lexeme} ")
|
|
let spaces = if i == 0: 0 else: 1
|
|
let pos = (i, i + len(lexeme) - 1)
|
|
if lexeme == "_":
|
|
# Due to how the lexer is designed, a bare underscore is
|
|
# parsed as an identifier rather than a symbol
|
|
allTokensList.add(makeIdent(pos=pos, name="_", spaces=spaces))
|
|
else:
|
|
allTokensList.add(makeToken(symbols.tokens[lexeme], pos=pos, lexeme=lexeme, spaces=spaces))
|
|
inc(i, len(lexeme) + 1)
|
|
allTokensList.add(endOfFile(pos=(i, i)))
|
|
suite.addTest(testTokenizeSucceeds("allTokens", allTokens, allTokensList))
|
|
const skippedChars = ['\'', '\n', '\\', '\t', '\e', '\a', '\r'];
|
|
var
|
|
characters = ""
|
|
charTokens = newSeqOfCap[Token](256)
|
|
i = 0
|
|
for value in 0..255:
|
|
if char(value) in skippedChars:
|
|
# These cases are special and we handle them separately
|
|
continue
|
|
charTokens.add(makeChar(pos=(i, i + 2), content= &"{char(value)}"))
|
|
characters.add(&"'{char(value)}'")
|
|
inc(i, 3)
|
|
characters.add("""'\'''\n''\\''\t''\e''\a''\r'""")
|
|
charTokens.add(makeChar(pos=(i, i + 3), content="""\'"""))
|
|
inc(i, 4)
|
|
charTokens.add(makeChar(pos=(i, i + 3), content="""\n"""))
|
|
inc(i, 4)
|
|
charTokens.add(makeChar(pos=(i, i + 3), content="""\\"""))
|
|
inc(i, 4)
|
|
charTokens.add(makeChar(pos=(i, i + 3), content="""\t"""))
|
|
inc(i, 4)
|
|
charTokens.add(makeChar(pos=(i, i + 3), content="""\e"""))
|
|
inc(i, 4)
|
|
charTokens.add(makeChar(pos=(i, i + 3), content="""\a"""))
|
|
inc(i, 4)
|
|
charTokens.add(makeChar(pos=(i, i + 3), content="""\r"""))
|
|
inc(i, 4)
|
|
charTokens.add(endOfFile(pos=(i, i)))
|
|
suite.addTest(testTokenizeSucceeds("allCharacters", characters, charTokens))
|
|
echo "Running tokenizer tests"
|
|
suite.run(verbose=true)
|
|
if suite.successful():
|
|
echo "OK: All tokenizer tests were successful"
|
|
quit(0)
|
|
else:
|
|
echo "ERR: Not all tests were successful, details below:\n"
|
|
for test in suite.tests:
|
|
if test.status in [Failed, Crashed]:
|
|
echo &" - {test.name} -> {test.status}"
|
|
echo &" Details:"
|
|
echo &" - Outcome -> {test.outcome}"
|
|
echo &" - Expected state -> {test.expected} "
|
|
echo &" - Expected outcome -> {test.getExpectedOutcome()}"
|
|
if test.reason.len() > 0:
|
|
echo &"\n The test failed for the following reason -> {test.reason}\n"
|
|
else:
|
|
echo "\n No further information is available about this failure"
|
|
if not test.outcome.exc.isNil():
|
|
echo &"\n Formatted error message follows\n"
|
|
print(LexingError(test.outcome.exc))
|
|
echo "\n Formatted error message ends here\n"
|
|
quit(-1)
|