Migrate all tokenizer tests to new format
This commit is contained in:
parent
9d9093ca6c
commit
3953751cef
|
@ -274,7 +274,7 @@ proc error(self: Lexer, message: string) =
|
|||
|
||||
|
||||
proc check(self: Lexer, s: string, distance: int = 0): bool =
|
||||
## Behaves like self.match(), without consuming the
|
||||
## Behaves Bike self.match(), without consuming the
|
||||
## token. False is returned if we're at EOF
|
||||
## regardless of what the token to check is.
|
||||
## The distance is passed directly to self.peek()
|
||||
|
@ -570,7 +570,7 @@ proc parseBackticks(self: Lexer) =
|
|||
if self.match(["\n", "\t", "\e", "\r", "\f"]):
|
||||
self.error(&"unexpected character in stropped identifier: {self.peek(-1).escape()}")
|
||||
discard self.step()
|
||||
if self.done() and not self.match("`"):
|
||||
if self.done() or not self.match("`"):
|
||||
self.error("unexpected EOF while parsing stropped identifier")
|
||||
self.createToken(Identifier)
|
||||
# Strips the backticks
|
||||
|
|
|
@ -120,27 +120,27 @@ proc tokenizeSucceedsRunner(suite: TestSuite, test: Test) =
|
|||
for (provided, expected) in zip(tokens, test.tokens):
|
||||
if provided.kind != expected.kind:
|
||||
test.status = Failed
|
||||
test.reason = &"Token type mismatch at #{i}: expected {expected.kind}, got {provided.kind}"
|
||||
test.reason = &"Token type mismatch at #{i}: expected {expected.kind}, got {provided.kind}\n Expected: {expected}\n Got: {provided}"
|
||||
return
|
||||
if provided.lexeme != expected.lexeme:
|
||||
test.status = Failed
|
||||
test.reason = &"Token lexeme mismatch at #{i}: expected '{expected.lexeme}', got '{provided.lexeme}'"
|
||||
test.reason = &"Token lexeme mismatch at #{i}: expected '{expected.lexeme}', got '{provided.lexeme}'\n Expected: {expected}\n Got: {provided}"
|
||||
return
|
||||
if provided.line != expected.line:
|
||||
test.status = Failed
|
||||
test.reason = &"Token line mismatch at #{i}: expected {expected.line}, got {provided.line}"
|
||||
test.reason = &"Token line mismatch at #{i}: expected {expected.line}, got {provided.line}\n Expected: {expected}\n Got: {provided}"
|
||||
return
|
||||
if provided.pos != expected.pos:
|
||||
test.status = Failed
|
||||
test.reason = &"Token position mismatch at #{i}: expected {expected.pos}, got {provided.pos}"
|
||||
test.reason = &"Token position mismatch at #{i}: expected {expected.pos}, got {provided.pos}\n Expected: {expected}\n Got: {provided}"
|
||||
return
|
||||
if provided.relPos != expected.relPos:
|
||||
test.status = Failed
|
||||
test.reason = &"Token relative position mismatch at #{i}: expected {expected.relPos}, got {provided.relPos}"
|
||||
test.reason = &"Token relative position mismatch at #{i}: expected {expected.relPos}, got {provided.relPos}\n Expected: {expected}\n Got: {provided}"
|
||||
return
|
||||
if provided.spaces != expected.spaces:
|
||||
test.status = Failed
|
||||
test.reason = &"Token spacing mismatch at #{i}: expected {expected.spaces}, got {provided.spaces}"
|
||||
test.reason = &"Token spacing mismatch at #{i}: expected {expected.spaces}, got {provided.spaces}\n Expected: {expected}\n Got: {provided}"
|
||||
return
|
||||
inc(i)
|
||||
except LexingError:
|
||||
|
@ -220,17 +220,15 @@ proc removeTests*(self: TestSuite, tests: openarray[Test]) =
|
|||
proc testTokenizeSucceeds*(name, source: string, tokens: seq[Token], skip = false): Test =
|
||||
## Creates a new tokenizer test that is expected to succeed.
|
||||
## The type of each token returned by the tokenizer is matched
|
||||
## against the given list of token types: the test only succeeds
|
||||
## against the given list of tokens: the test only succeeds
|
||||
## if no discrepancies are found
|
||||
result = Test(expected: Success)
|
||||
result = Test(expected: Success, kind: Tokenizer)
|
||||
result.outcome.line = -1
|
||||
result.outcome.location = (-1, -1)
|
||||
result.name = name
|
||||
result.kind = Tokenizer
|
||||
result.status = Init
|
||||
result.source = source
|
||||
result.skip = skip
|
||||
result.expected = Success
|
||||
result.runnerFunc = tokenizeSucceedsRunner
|
||||
result.tokens = tokens
|
||||
|
||||
|
@ -238,9 +236,8 @@ proc testTokenizeSucceeds*(name, source: string, tokens: seq[Token], skip = fals
|
|||
proc testTokenizeFails*(name, source: string, message: string, line: int, location: tuple[start, stop: int], skip = false): Test =
|
||||
## Creates a new tokenizer test that is expected to fail with the
|
||||
## given error message and at the given location
|
||||
result = Test(expected: Failed)
|
||||
result = Test(expected: Failed, kind: Tokenizer)
|
||||
result.name = name
|
||||
result.kind = Tokenizer
|
||||
result.status = Init
|
||||
result.source = source
|
||||
result.skip = skip
|
||||
|
@ -250,14 +247,37 @@ proc testTokenizeFails*(name, source: string, message: string, line: int, locati
|
|||
result.line = line
|
||||
|
||||
|
||||
proc run*(self: TestSuite) =
|
||||
proc run*(self: TestSuite, verbose: bool = false) =
|
||||
## Runs the test suite to completion,
|
||||
## sequentially
|
||||
if verbose:
|
||||
echo "Starting test suite"
|
||||
var
|
||||
ran = 0
|
||||
failed = 0
|
||||
crashed = 0
|
||||
successful = 0
|
||||
skipped = 0
|
||||
for test in self.tests:
|
||||
if test.skip:
|
||||
test.status = Skipped
|
||||
inc(skipped)
|
||||
continue
|
||||
if verbose:
|
||||
stdout.write(&"Running '{test.name}' ({ran + 1}/{self.tests.len()})\r")
|
||||
test.runnerFunc(self, test)
|
||||
case test.status:
|
||||
of Success:
|
||||
inc(successful)
|
||||
of Failed:
|
||||
inc(failed)
|
||||
of Crashed:
|
||||
inc(crashed)
|
||||
else:
|
||||
discard
|
||||
inc(ran)
|
||||
if verbose:
|
||||
echo &"Ran {ran} tests ({skipped} skipped, {successful} successful, {failed} failed, {crashed} crashed)"
|
||||
|
||||
|
||||
proc successful*(self: TestSuite): bool =
|
||||
|
@ -265,12 +285,11 @@ proc successful*(self: TestSuite): bool =
|
|||
## successfully or not. If called before run(),
|
||||
## this function returns false. Skipped tests
|
||||
## do not affect the outcome of this function
|
||||
result = true
|
||||
for test in self.tests:
|
||||
if test.status in [Skipped, Success]:
|
||||
continue
|
||||
result = false
|
||||
break
|
||||
return false
|
||||
return true
|
||||
|
||||
|
||||
proc getExpectedException(self: Test): ref Exception =
|
||||
|
|
|
@ -9,21 +9,34 @@ import std/strformat
|
|||
|
||||
# Utilities to construct tokens for validation purposes
|
||||
|
||||
proc makeToken(kind: TokenType, line: int = 1, pos: tuple[start, stop: int] = (0, 0), relPos: tuple[start, stop: int] = (0, 0), lexeme: string,
|
||||
proc makeToken(kind: TokenType, line: int = 1, pos: tuple[start, stop: int] = (0, 0), relPos: tuple[start, stop: int] = (-1, -1), lexeme: string,
|
||||
spaces: int = 0): Token {.inline.} =
|
||||
var pos = pos
|
||||
if pos == (0, 0):
|
||||
pos = (0, max(lexeme.high(), 0))
|
||||
var relPos = relPos
|
||||
if relPos == (-1, -1):
|
||||
relPos = pos
|
||||
return Token(kind: kind, line: line, pos: pos, relPos: relPos, lexeme: lexeme, spaces: spaces)
|
||||
|
||||
proc endOfFile(line: int = 1, pos: tuple[start, stop: int] = (0, 0), relPos: tuple[start, stop: int] = (0, 0), spaces: int = 0): Token {.inline.} =
|
||||
return makeToken(EndOfFile, line, pos, relPos, "", spaces)
|
||||
|
||||
proc makeString(line: int = 1, pos: tuple[start, stop: int] = (0, 0), relPos: tuple[start, stop: int] = (0, 0), content: string, delimiter: string, spaces: int = 0): Token {.inline.} =
|
||||
return makeToken(String, line, pos, relPos, &"{delimiter}{content}{delimiter}", spaces)
|
||||
proc endOfFile(line: int = 1, pos: tuple[start, stop: int] = (0, 0), relPos: tuple[start, stop: int] = (-1, -1), spaces: int = 0): Token {.inline.} =
|
||||
return makeToken(TokenType.EndOfFile, line, pos, relPos, "", spaces)
|
||||
|
||||
proc makeChar(line: int = 1, pos: tuple[start, stop: int] = (0, 0), relPos: tuple[start, stop: int] = (0, 0), content: string, spaces: int = 0): Token {.inline.} =
|
||||
return makeToken(Char, line, pos, relPos, &"'{content}'", spaces)
|
||||
proc makeString(line: int = 1, pos: tuple[start, stop: int] = (0, 0), relPos: tuple[start, stop: int] = (-1, -1), content: string, delimiter: string, spaces: int = 0): Token {.inline.} =
|
||||
return makeToken(TokenType.String, line, pos, relPos, &"{delimiter}{content}{delimiter}", spaces)
|
||||
|
||||
proc makeSymbol(line: int = 1, pos: tuple[start, stop: int] = (0, 0), relPos: tuple[start, stop: int] = (0, 0), content: string, spaces: int = 0): Token {.inline.} =
|
||||
return makeToken(Symbol, line, pos, relPos, content, spaces)
|
||||
proc makeChar(line: int = 1, pos: tuple[start, stop: int] = (0, 0), relPos: tuple[start, stop: int] = (-1, -1), content: string, spaces: int = 0): Token {.inline.} =
|
||||
return makeToken(TokenType.Char, line, pos, relPos, &"'{content}'", spaces)
|
||||
|
||||
proc makeSymbol(line: int = 1, pos: tuple[start, stop: int] = (0, 0), relPos: tuple[start, stop: int] = (-1, -1), content: string, spaces: int = 0): Token {.inline.} =
|
||||
return makeToken(TokenType.Symbol, line, pos, relPos, content, spaces)
|
||||
|
||||
proc makeIdent(line: int = 1, pos: tuple[start, stop: int] = (0, 0), relPos: tuple[start, stop: int] = (-1, -1), name: string, spaces: int = 0): Token {.inline.} =
|
||||
return makeToken(TokenType.Identifier, line, pos, relPos, name, spaces)
|
||||
|
||||
proc makeInteger(line: int = 1, pos: tuple[start, stop: int] = (0, 0), relPos: tuple[start, stop: int] = (-1, -1), value: int, spaces: int = 0): Token {.inline.} =
|
||||
return makeToken(TokenType.Integer, line, pos, relPos, $value, spaces)
|
||||
|
||||
|
||||
when isMainModule:
|
||||
|
@ -31,23 +44,47 @@ when isMainModule:
|
|||
suite.addTests(
|
||||
[
|
||||
testTokenizeSucceeds("emptyFile", "", @[endOfFile()]),
|
||||
testTokenizeSucceeds("ignoredEscapes", "\r\f\e", @[endOfFile(pos=(3, 3), relPos=(3, 3))]),
|
||||
testTokenizeSucceeds("newLine", "\n", @[endOfFile(line=2, pos=(1, 1))]),
|
||||
testTokenizeSucceeds("emptyString", """""""", @[makeString(content="", delimiter="\"", pos=(0, 1), relPos=(0, 1)), endOfFile(pos=(2, 2), relPos=(2, 2))]),
|
||||
testTokenizeSucceeds("escapedDelimChar", """'\''""", @[makeChar(content="\\'", pos=(0, 3), relPos=(0, 3)), endOfFile(pos=(4, 4), relPos=(4, 4))]),
|
||||
testTokenizeSucceeds("escapedDelimString", """"\""""", @[makeString(content="\\\"", delimiter="\"", pos=(0, 3), relPos=(0, 3)), endOfFile(pos=(4, 4), relPos=(4, 4))]),
|
||||
#[testTokenizeSucceeds("bareUnicode", "🌎 😂 👩👩👦👦", @[TokenType.Symbol, TokenType.Symbol, TokenType.Symbol, TokenType.EndOfFile]),
|
||||
testTokenizeSucceeds("stroppedSingleUnicode", "`🌎` `😂` `👩👩👦👦`", @[TokenType.Identifier, TokenType.Identifier, TokenType.Identifier, TokenType.EndOfFile]),
|
||||
testTokenizeSucceeds("stroppedMultiUnicode", "`🌎🌎` `😂😂` `👩👩👦👦👩👩👦👦`", @[TokenType.Identifier, TokenType.Identifier, TokenType.Identifier, TokenType.EndOfFile]),
|
||||
testTokenizeSucceeds("stringWithEscapes", """ "\n\t\r\e\f" """, @[TokenType.String, TokenType.EndOfFile]),
|
||||
testTokenizeSucceeds("allIntegers", "1 0x1 0o1 0b1", @[TokenType.Integer, TokenType.Hex, TokenType.Octal, TokenType.Binary, TokenType.EndOfFile]),
|
||||
testTokenizeSucceeds("ignoredEscapes", "\r\f\e", @[endOfFile(pos=(3, 3))]),
|
||||
testTokenizeSucceeds("newLine", "\n", @[endOfFile(line=2, pos=(1, 1), relPos=(0, 0))]),
|
||||
testTokenizeSucceeds("emptyString", """""""", @[makeString(content="", delimiter="\""), endOfFile(pos=(2, 2))]),
|
||||
testTokenizeSucceeds("escapedDelimChar", """'\''""", @[makeChar(content="\\'"), endOfFile(pos=(4, 4))]),
|
||||
testTokenizeSucceeds("escapedDelimString", """"\""""", @[makeString(content="\\\"", delimiter="\""), endOfFile(pos=(4, 4))]),
|
||||
testTokenizeSucceeds("bareUnicode", "🌎 😂 👩👩👦👦", @[makeSymbol(content="🌎", pos=(0, 3)), makeSymbol(content="😂", pos=(5, 8), spaces=1),
|
||||
makeSymbol(content="👩👩👦👦", pos=(10, 34), spaces=1), endOfFile(pos=(35, 35))]),
|
||||
testTokenizeSucceeds("stroppedSingleUnicode", "`🌎` `😂` `👩👩👦👦`", @[makeIdent(name="🌎", pos=(0, 5)),
|
||||
makeIdent(name="😂", pos=(7, 12), spaces=1),
|
||||
makeIdent(name="👩👩👦👦", pos=(14, 40), spaces=1),
|
||||
endOfFile(pos=(41, 41))]),
|
||||
testTokenizeSucceeds("stroppedMultiUnicode", "`🌎🌎` `😂😂` `👩👩👦👦👩👩👦👦`", @[makeIdent(name="🌎🌎", pos=(0, 9)),
|
||||
makeIdent(name="😂😂", pos=(11, 20), spaces=1),
|
||||
makeIdent(name="👩👩👦👦👩👩👦👦", pos=(22, 73), spaces=1),
|
||||
endOfFile(pos=(74, 74))]),
|
||||
testTokenizeSucceeds("allIntegers", "1 0x1 0o1 0b1", @[makeInteger(value=1),
|
||||
makeToken(TokenType.Hex, pos=(2, 4), lexeme="0x1", spaces=1),
|
||||
makeToken(TokenType.Octal, pos=(6, 8), lexeme="0o1", spaces=1),
|
||||
makeToken(TokenType.Binary, pos=(10, 12), lexeme="0b1", spaces=1),
|
||||
endOfFile(pos=(13, 13))
|
||||
]),
|
||||
testTokenizeSucceeds("allFloats", "1.0 1e5 1E5 1.5e4 1.5E4", @[makeToken(TokenType.Float, pos=(0, 2), lexeme="1.0"),
|
||||
makeToken(TokenType.Float, pos=(4, 6), lexeme="1e5", spaces=1),
|
||||
makeToken(TokenType.Float, pos=(8, 10), lexeme="1E5", spaces=1),
|
||||
makeToken(TokenType.Float, pos=(12, 16), lexeme="1.5e4", spaces=1),
|
||||
makeToken(TokenType.Float, pos=(18, 22), lexeme="1.5E4", spaces=1),
|
||||
endOfFile(pos=(23, 23))]),
|
||||
testTokenizeSucceeds("sizedNumbers", "1'u8 0x1'i8 0o1'i64 0b1'u32 2.0'f32 1e5'f64 1E5'f32 1.5e4'f64 1.5E4'f32",
|
||||
@[TokenType.Integer, TokenType.Hex, TokenType.Octal, TokenType.Binary,
|
||||
TokenType.Float, TokenType.Float, TokenType.Float, TokenType.Float, TokenType.Float,
|
||||
TokenType.EndOfFile]),
|
||||
testTokenizeSucceeds("allFloats", "1.0 1e5 1E5 1.5e4 1.5E4", @[TokenType.Float, TokenType.Float, TokenType.Float,
|
||||
TokenType.Float, TokenType.Float, TokenType.EndOfFile]),
|
||||
testTokenizeSucceeds("simpleEscapes", """ "\a \b \f \n \r \t \v \" \' \\ " """, @[TokenType.String, TokenType.EndOfFile]),]#
|
||||
@[makeToken(TokenType.Integer, lexeme="1'u8"),
|
||||
makeToken(TokenType.Hex, pos=(5, 10), lexeme="0x1'i8", spaces=1),
|
||||
makeToken(TokenType.Octal, pos=(12, 18), lexeme="0o1'i64", spaces=1),
|
||||
makeToken(TokenType.Binary, pos=(20, 26), lexeme="0b1'u32", spaces=1),
|
||||
makeToken(TokenType.Float, pos=(28, 34), lexeme="2.0'f32", spaces=1),
|
||||
makeToken(TokenType.Float, pos=(36, 42), lexeme="1e5'f64", spaces=1),
|
||||
makeToken(TokenType.Float, pos=(44, 50), lexeme="1E5'f32", spaces=1),
|
||||
makeToken(TokenType.Float, pos=(52, 60), lexeme="1.5e4'f64", spaces=1),
|
||||
makeToken(TokenType.Float, pos=(62, 70), lexeme="1.5E4'f32", spaces=1),
|
||||
endOfFile(pos=(71, 71))]),
|
||||
testTokenizeSucceeds("simpleEscapes", """ "\a \b \f \n \r \t \v \" \' \\ " """, @[makeString(spaces=1, content="""\a \b \f \n \r \t \v \" \' \\ """,
|
||||
delimiter="\"", pos=(1, 32)),
|
||||
endOfFile(pos=(34, 34))]),
|
||||
testTokenizeFails("invalidFloatEndsWithDot", "2.", "invalid float number literal", line=1, location=(0, 1)),
|
||||
testTokenizeFails("invalidFloatSpuriousChars", "2.f", "invalid float number literal", line=1, location=(0, 1)),
|
||||
testTokenizeFails("unterminatedChar", "'", "unexpected EOF while parsing character literal", line=1, location=(0, 0)),
|
||||
|
@ -70,33 +107,53 @@ when isMainModule:
|
|||
|
||||
]
|
||||
)
|
||||
#[var allTokens = ""
|
||||
var allTokensList = newSeqOfCap[TokenType](symbols.tokens.len())
|
||||
var allTokens = ""
|
||||
var allTokensList = newSeqOfCap[Token](symbols.tokens.len())
|
||||
var i = 0
|
||||
for lexeme in symbols.tokens.keys():
|
||||
allTokens.add(&"{lexeme} ")
|
||||
let spaces = if i == 0: 0 else: 1
|
||||
let pos = (i, i + len(lexeme) - 1)
|
||||
if lexeme == "_":
|
||||
# Due to how the lexer is designed, a bare underscore is
|
||||
# parsed as an identifier rather than a symbol
|
||||
allTokensList.add(TokenType.Identifier)
|
||||
allTokensList.add(makeIdent(pos=pos, name="_", spaces=spaces))
|
||||
else:
|
||||
allTokensList.add(symbols.tokens[lexeme])
|
||||
allTokensList.add(TokenType.EndOfFile)
|
||||
allTokensList.add(makeToken(symbols.tokens[lexeme], pos=pos, lexeme=lexeme, spaces=spaces))
|
||||
inc(i, len(lexeme) + 1)
|
||||
allTokensList.add(endOfFile(pos=(i, i)))
|
||||
suite.addTest(testTokenizeSucceeds("allTokens", allTokens, allTokensList))
|
||||
const skippedChars = ['\'', '\n', '\\', '\t', '\e', '\a', '\r'];
|
||||
var
|
||||
characters = ""
|
||||
charTokens = newSeqOfCap[TokenType](256)
|
||||
charTokens = newSeqOfCap[Token](256)
|
||||
i = 0
|
||||
for value in 0..255:
|
||||
charTokens.add(Char)
|
||||
if char(value) in skippedChars:
|
||||
# These cases are special and we handle them separately
|
||||
continue
|
||||
charTokens.add(makeChar(pos=(i, i + 2), content= &"{char(value)}"))
|
||||
characters.add(&"'{char(value)}'")
|
||||
charTokens.add(TokenType.EndOfFile)
|
||||
characters.add("""'\'' '\n' '\\' '\t' '\e' '\a' '\r'""")
|
||||
suite.addTest(testTokenizeSucceeds("allCharacters", characters, charTokens))]#
|
||||
inc(i, 3)
|
||||
characters.add("""'\'''\n''\\''\t''\e''\a''\r'""")
|
||||
charTokens.add(makeChar(pos=(i, i + 3), content="""\'"""))
|
||||
inc(i, 4)
|
||||
charTokens.add(makeChar(pos=(i, i + 3), content="""\n"""))
|
||||
inc(i, 4)
|
||||
charTokens.add(makeChar(pos=(i, i + 3), content="""\\"""))
|
||||
inc(i, 4)
|
||||
charTokens.add(makeChar(pos=(i, i + 3), content="""\t"""))
|
||||
inc(i, 4)
|
||||
charTokens.add(makeChar(pos=(i, i + 3), content="""\e"""))
|
||||
inc(i, 4)
|
||||
charTokens.add(makeChar(pos=(i, i + 3), content="""\a"""))
|
||||
inc(i, 4)
|
||||
charTokens.add(makeChar(pos=(i, i + 3), content="""\r"""))
|
||||
inc(i, 4)
|
||||
charTokens.add(endOfFile(pos=(i, i)))
|
||||
suite.addTest(testTokenizeSucceeds("allCharacters", characters, charTokens))
|
||||
echo "Running tokenizer tests"
|
||||
suite.run()
|
||||
suite.run(verbose=true)
|
||||
if suite.successful():
|
||||
echo "OK: All tokenizer tests were successful"
|
||||
quit(0)
|
||||
|
|
Loading…
Reference in New Issue