From 3953751cef79c953281e9874b9f425e411f22e84 Mon Sep 17 00:00:00 2001 From: nocturn9x Date: Fri, 5 Apr 2024 16:24:45 +0200 Subject: [PATCH] Migrate all tokenizer tests to new format --- src/frontend/parsing/lexer.nim | 4 +- src/util/testing.nim | 51 +++++++++---- tests/tokenize.nim | 129 ++++++++++++++++++++++++--------- 3 files changed, 130 insertions(+), 54 deletions(-) diff --git a/src/frontend/parsing/lexer.nim b/src/frontend/parsing/lexer.nim index b3eefc0..da5c9b4 100644 --- a/src/frontend/parsing/lexer.nim +++ b/src/frontend/parsing/lexer.nim @@ -274,7 +274,7 @@ proc error(self: Lexer, message: string) = proc check(self: Lexer, s: string, distance: int = 0): bool = - ## Behaves like self.match(), without consuming the + ## Behaves Bike self.match(), without consuming the ## token. False is returned if we're at EOF ## regardless of what the token to check is. ## The distance is passed directly to self.peek() @@ -570,7 +570,7 @@ proc parseBackticks(self: Lexer) = if self.match(["\n", "\t", "\e", "\r", "\f"]): self.error(&"unexpected character in stropped identifier: {self.peek(-1).escape()}") discard self.step() - if self.done() and not self.match("`"): + if self.done() or not self.match("`"): self.error("unexpected EOF while parsing stropped identifier") self.createToken(Identifier) # Strips the backticks diff --git a/src/util/testing.nim b/src/util/testing.nim index 03838ad..b6cf794 100644 --- a/src/util/testing.nim +++ b/src/util/testing.nim @@ -120,27 +120,27 @@ proc tokenizeSucceedsRunner(suite: TestSuite, test: Test) = for (provided, expected) in zip(tokens, test.tokens): if provided.kind != expected.kind: test.status = Failed - test.reason = &"Token type mismatch at #{i}: expected {expected.kind}, got {provided.kind}" + test.reason = &"Token type mismatch at #{i}: expected {expected.kind}, got {provided.kind}\n Expected: {expected}\n Got: {provided}" return if provided.lexeme != expected.lexeme: test.status = Failed - test.reason = &"Token lexeme mismatch at #{i}: expected '{expected.lexeme}', got '{provided.lexeme}'" + test.reason = &"Token lexeme mismatch at #{i}: expected '{expected.lexeme}', got '{provided.lexeme}'\n Expected: {expected}\n Got: {provided}" return if provided.line != expected.line: test.status = Failed - test.reason = &"Token line mismatch at #{i}: expected {expected.line}, got {provided.line}" + test.reason = &"Token line mismatch at #{i}: expected {expected.line}, got {provided.line}\n Expected: {expected}\n Got: {provided}" return if provided.pos != expected.pos: test.status = Failed - test.reason = &"Token position mismatch at #{i}: expected {expected.pos}, got {provided.pos}" + test.reason = &"Token position mismatch at #{i}: expected {expected.pos}, got {provided.pos}\n Expected: {expected}\n Got: {provided}" return if provided.relPos != expected.relPos: test.status = Failed - test.reason = &"Token relative position mismatch at #{i}: expected {expected.relPos}, got {provided.relPos}" + test.reason = &"Token relative position mismatch at #{i}: expected {expected.relPos}, got {provided.relPos}\n Expected: {expected}\n Got: {provided}" return if provided.spaces != expected.spaces: test.status = Failed - test.reason = &"Token spacing mismatch at #{i}: expected {expected.spaces}, got {provided.spaces}" + test.reason = &"Token spacing mismatch at #{i}: expected {expected.spaces}, got {provided.spaces}\n Expected: {expected}\n Got: {provided}" return inc(i) except LexingError: @@ -220,17 +220,15 @@ proc removeTests*(self: TestSuite, tests: openarray[Test]) = proc testTokenizeSucceeds*(name, source: string, tokens: seq[Token], skip = false): Test = ## Creates a new tokenizer test that is expected to succeed. ## The type of each token returned by the tokenizer is matched - ## against the given list of token types: the test only succeeds + ## against the given list of tokens: the test only succeeds ## if no discrepancies are found - result = Test(expected: Success) + result = Test(expected: Success, kind: Tokenizer) result.outcome.line = -1 result.outcome.location = (-1, -1) result.name = name - result.kind = Tokenizer result.status = Init result.source = source result.skip = skip - result.expected = Success result.runnerFunc = tokenizeSucceedsRunner result.tokens = tokens @@ -238,9 +236,8 @@ proc testTokenizeSucceeds*(name, source: string, tokens: seq[Token], skip = fals proc testTokenizeFails*(name, source: string, message: string, line: int, location: tuple[start, stop: int], skip = false): Test = ## Creates a new tokenizer test that is expected to fail with the ## given error message and at the given location - result = Test(expected: Failed) + result = Test(expected: Failed, kind: Tokenizer) result.name = name - result.kind = Tokenizer result.status = Init result.source = source result.skip = skip @@ -250,14 +247,37 @@ proc testTokenizeFails*(name, source: string, message: string, line: int, locati result.line = line -proc run*(self: TestSuite) = +proc run*(self: TestSuite, verbose: bool = false) = ## Runs the test suite to completion, ## sequentially + if verbose: + echo "Starting test suite" + var + ran = 0 + failed = 0 + crashed = 0 + successful = 0 + skipped = 0 for test in self.tests: if test.skip: test.status = Skipped + inc(skipped) continue + if verbose: + stdout.write(&"Running '{test.name}' ({ran + 1}/{self.tests.len()})\r") test.runnerFunc(self, test) + case test.status: + of Success: + inc(successful) + of Failed: + inc(failed) + of Crashed: + inc(crashed) + else: + discard + inc(ran) + if verbose: + echo &"Ran {ran} tests ({skipped} skipped, {successful} successful, {failed} failed, {crashed} crashed)" proc successful*(self: TestSuite): bool = @@ -265,12 +285,11 @@ proc successful*(self: TestSuite): bool = ## successfully or not. If called before run(), ## this function returns false. Skipped tests ## do not affect the outcome of this function - result = true for test in self.tests: if test.status in [Skipped, Success]: continue - result = false - break + return false + return true proc getExpectedException(self: Test): ref Exception = diff --git a/tests/tokenize.nim b/tests/tokenize.nim index 2a85cdb..35a2c61 100644 --- a/tests/tokenize.nim +++ b/tests/tokenize.nim @@ -9,21 +9,34 @@ import std/strformat # Utilities to construct tokens for validation purposes -proc makeToken(kind: TokenType, line: int = 1, pos: tuple[start, stop: int] = (0, 0), relPos: tuple[start, stop: int] = (0, 0), lexeme: string, +proc makeToken(kind: TokenType, line: int = 1, pos: tuple[start, stop: int] = (0, 0), relPos: tuple[start, stop: int] = (-1, -1), lexeme: string, spaces: int = 0): Token {.inline.} = + var pos = pos + if pos == (0, 0): + pos = (0, max(lexeme.high(), 0)) + var relPos = relPos + if relPos == (-1, -1): + relPos = pos return Token(kind: kind, line: line, pos: pos, relPos: relPos, lexeme: lexeme, spaces: spaces) -proc endOfFile(line: int = 1, pos: tuple[start, stop: int] = (0, 0), relPos: tuple[start, stop: int] = (0, 0), spaces: int = 0): Token {.inline.} = - return makeToken(EndOfFile, line, pos, relPos, "", spaces) -proc makeString(line: int = 1, pos: tuple[start, stop: int] = (0, 0), relPos: tuple[start, stop: int] = (0, 0), content: string, delimiter: string, spaces: int = 0): Token {.inline.} = - return makeToken(String, line, pos, relPos, &"{delimiter}{content}{delimiter}", spaces) +proc endOfFile(line: int = 1, pos: tuple[start, stop: int] = (0, 0), relPos: tuple[start, stop: int] = (-1, -1), spaces: int = 0): Token {.inline.} = + return makeToken(TokenType.EndOfFile, line, pos, relPos, "", spaces) -proc makeChar(line: int = 1, pos: tuple[start, stop: int] = (0, 0), relPos: tuple[start, stop: int] = (0, 0), content: string, spaces: int = 0): Token {.inline.} = - return makeToken(Char, line, pos, relPos, &"'{content}'", spaces) +proc makeString(line: int = 1, pos: tuple[start, stop: int] = (0, 0), relPos: tuple[start, stop: int] = (-1, -1), content: string, delimiter: string, spaces: int = 0): Token {.inline.} = + return makeToken(TokenType.String, line, pos, relPos, &"{delimiter}{content}{delimiter}", spaces) -proc makeSymbol(line: int = 1, pos: tuple[start, stop: int] = (0, 0), relPos: tuple[start, stop: int] = (0, 0), content: string, spaces: int = 0): Token {.inline.} = - return makeToken(Symbol, line, pos, relPos, content, spaces) +proc makeChar(line: int = 1, pos: tuple[start, stop: int] = (0, 0), relPos: tuple[start, stop: int] = (-1, -1), content: string, spaces: int = 0): Token {.inline.} = + return makeToken(TokenType.Char, line, pos, relPos, &"'{content}'", spaces) + +proc makeSymbol(line: int = 1, pos: tuple[start, stop: int] = (0, 0), relPos: tuple[start, stop: int] = (-1, -1), content: string, spaces: int = 0): Token {.inline.} = + return makeToken(TokenType.Symbol, line, pos, relPos, content, spaces) + +proc makeIdent(line: int = 1, pos: tuple[start, stop: int] = (0, 0), relPos: tuple[start, stop: int] = (-1, -1), name: string, spaces: int = 0): Token {.inline.} = + return makeToken(TokenType.Identifier, line, pos, relPos, name, spaces) + +proc makeInteger(line: int = 1, pos: tuple[start, stop: int] = (0, 0), relPos: tuple[start, stop: int] = (-1, -1), value: int, spaces: int = 0): Token {.inline.} = + return makeToken(TokenType.Integer, line, pos, relPos, $value, spaces) when isMainModule: @@ -31,23 +44,47 @@ when isMainModule: suite.addTests( [ testTokenizeSucceeds("emptyFile", "", @[endOfFile()]), - testTokenizeSucceeds("ignoredEscapes", "\r\f\e", @[endOfFile(pos=(3, 3), relPos=(3, 3))]), - testTokenizeSucceeds("newLine", "\n", @[endOfFile(line=2, pos=(1, 1))]), - testTokenizeSucceeds("emptyString", """""""", @[makeString(content="", delimiter="\"", pos=(0, 1), relPos=(0, 1)), endOfFile(pos=(2, 2), relPos=(2, 2))]), - testTokenizeSucceeds("escapedDelimChar", """'\''""", @[makeChar(content="\\'", pos=(0, 3), relPos=(0, 3)), endOfFile(pos=(4, 4), relPos=(4, 4))]), - testTokenizeSucceeds("escapedDelimString", """"\""""", @[makeString(content="\\\"", delimiter="\"", pos=(0, 3), relPos=(0, 3)), endOfFile(pos=(4, 4), relPos=(4, 4))]), - #[testTokenizeSucceeds("bareUnicode", "🌎 😂 👩‍👩‍👦‍👦", @[TokenType.Symbol, TokenType.Symbol, TokenType.Symbol, TokenType.EndOfFile]), - testTokenizeSucceeds("stroppedSingleUnicode", "`🌎` `😂` `👩‍👩‍👦‍👦`", @[TokenType.Identifier, TokenType.Identifier, TokenType.Identifier, TokenType.EndOfFile]), - testTokenizeSucceeds("stroppedMultiUnicode", "`🌎🌎` `😂😂` `👩‍👩‍👦‍👦👩‍👩‍👦‍👦`", @[TokenType.Identifier, TokenType.Identifier, TokenType.Identifier, TokenType.EndOfFile]), - testTokenizeSucceeds("stringWithEscapes", """ "\n\t\r\e\f" """, @[TokenType.String, TokenType.EndOfFile]), - testTokenizeSucceeds("allIntegers", "1 0x1 0o1 0b1", @[TokenType.Integer, TokenType.Hex, TokenType.Octal, TokenType.Binary, TokenType.EndOfFile]), + testTokenizeSucceeds("ignoredEscapes", "\r\f\e", @[endOfFile(pos=(3, 3))]), + testTokenizeSucceeds("newLine", "\n", @[endOfFile(line=2, pos=(1, 1), relPos=(0, 0))]), + testTokenizeSucceeds("emptyString", """""""", @[makeString(content="", delimiter="\""), endOfFile(pos=(2, 2))]), + testTokenizeSucceeds("escapedDelimChar", """'\''""", @[makeChar(content="\\'"), endOfFile(pos=(4, 4))]), + testTokenizeSucceeds("escapedDelimString", """"\""""", @[makeString(content="\\\"", delimiter="\""), endOfFile(pos=(4, 4))]), + testTokenizeSucceeds("bareUnicode", "🌎 😂 👩‍👩‍👦‍👦", @[makeSymbol(content="🌎", pos=(0, 3)), makeSymbol(content="😂", pos=(5, 8), spaces=1), + makeSymbol(content="👩‍👩‍👦‍👦", pos=(10, 34), spaces=1), endOfFile(pos=(35, 35))]), + testTokenizeSucceeds("stroppedSingleUnicode", "`🌎` `😂` `👩‍👩‍👦‍👦`", @[makeIdent(name="🌎", pos=(0, 5)), + makeIdent(name="😂", pos=(7, 12), spaces=1), + makeIdent(name="👩‍👩‍👦‍👦", pos=(14, 40), spaces=1), + endOfFile(pos=(41, 41))]), + testTokenizeSucceeds("stroppedMultiUnicode", "`🌎🌎` `😂😂` `👩‍👩‍👦‍👦👩‍👩‍👦‍👦`", @[makeIdent(name="🌎🌎", pos=(0, 9)), + makeIdent(name="😂😂", pos=(11, 20), spaces=1), + makeIdent(name="👩‍👩‍👦‍👦👩‍👩‍👦‍👦", pos=(22, 73), spaces=1), + endOfFile(pos=(74, 74))]), + testTokenizeSucceeds("allIntegers", "1 0x1 0o1 0b1", @[makeInteger(value=1), + makeToken(TokenType.Hex, pos=(2, 4), lexeme="0x1", spaces=1), + makeToken(TokenType.Octal, pos=(6, 8), lexeme="0o1", spaces=1), + makeToken(TokenType.Binary, pos=(10, 12), lexeme="0b1", spaces=1), + endOfFile(pos=(13, 13)) + ]), + testTokenizeSucceeds("allFloats", "1.0 1e5 1E5 1.5e4 1.5E4", @[makeToken(TokenType.Float, pos=(0, 2), lexeme="1.0"), + makeToken(TokenType.Float, pos=(4, 6), lexeme="1e5", spaces=1), + makeToken(TokenType.Float, pos=(8, 10), lexeme="1E5", spaces=1), + makeToken(TokenType.Float, pos=(12, 16), lexeme="1.5e4", spaces=1), + makeToken(TokenType.Float, pos=(18, 22), lexeme="1.5E4", spaces=1), + endOfFile(pos=(23, 23))]), testTokenizeSucceeds("sizedNumbers", "1'u8 0x1'i8 0o1'i64 0b1'u32 2.0'f32 1e5'f64 1E5'f32 1.5e4'f64 1.5E4'f32", - @[TokenType.Integer, TokenType.Hex, TokenType.Octal, TokenType.Binary, - TokenType.Float, TokenType.Float, TokenType.Float, TokenType.Float, TokenType.Float, - TokenType.EndOfFile]), - testTokenizeSucceeds("allFloats", "1.0 1e5 1E5 1.5e4 1.5E4", @[TokenType.Float, TokenType.Float, TokenType.Float, - TokenType.Float, TokenType.Float, TokenType.EndOfFile]), - testTokenizeSucceeds("simpleEscapes", """ "\a \b \f \n \r \t \v \" \' \\ " """, @[TokenType.String, TokenType.EndOfFile]),]# + @[makeToken(TokenType.Integer, lexeme="1'u8"), + makeToken(TokenType.Hex, pos=(5, 10), lexeme="0x1'i8", spaces=1), + makeToken(TokenType.Octal, pos=(12, 18), lexeme="0o1'i64", spaces=1), + makeToken(TokenType.Binary, pos=(20, 26), lexeme="0b1'u32", spaces=1), + makeToken(TokenType.Float, pos=(28, 34), lexeme="2.0'f32", spaces=1), + makeToken(TokenType.Float, pos=(36, 42), lexeme="1e5'f64", spaces=1), + makeToken(TokenType.Float, pos=(44, 50), lexeme="1E5'f32", spaces=1), + makeToken(TokenType.Float, pos=(52, 60), lexeme="1.5e4'f64", spaces=1), + makeToken(TokenType.Float, pos=(62, 70), lexeme="1.5E4'f32", spaces=1), + endOfFile(pos=(71, 71))]), + testTokenizeSucceeds("simpleEscapes", """ "\a \b \f \n \r \t \v \" \' \\ " """, @[makeString(spaces=1, content="""\a \b \f \n \r \t \v \" \' \\ """, + delimiter="\"", pos=(1, 32)), + endOfFile(pos=(34, 34))]), testTokenizeFails("invalidFloatEndsWithDot", "2.", "invalid float number literal", line=1, location=(0, 1)), testTokenizeFails("invalidFloatSpuriousChars", "2.f", "invalid float number literal", line=1, location=(0, 1)), testTokenizeFails("unterminatedChar", "'", "unexpected EOF while parsing character literal", line=1, location=(0, 0)), @@ -70,33 +107,53 @@ when isMainModule: ] ) - #[var allTokens = "" - var allTokensList = newSeqOfCap[TokenType](symbols.tokens.len()) + var allTokens = "" + var allTokensList = newSeqOfCap[Token](symbols.tokens.len()) + var i = 0 for lexeme in symbols.tokens.keys(): allTokens.add(&"{lexeme} ") + let spaces = if i == 0: 0 else: 1 + let pos = (i, i + len(lexeme) - 1) if lexeme == "_": # Due to how the lexer is designed, a bare underscore is # parsed as an identifier rather than a symbol - allTokensList.add(TokenType.Identifier) + allTokensList.add(makeIdent(pos=pos, name="_", spaces=spaces)) else: - allTokensList.add(symbols.tokens[lexeme]) - allTokensList.add(TokenType.EndOfFile) + allTokensList.add(makeToken(symbols.tokens[lexeme], pos=pos, lexeme=lexeme, spaces=spaces)) + inc(i, len(lexeme) + 1) + allTokensList.add(endOfFile(pos=(i, i))) suite.addTest(testTokenizeSucceeds("allTokens", allTokens, allTokensList)) const skippedChars = ['\'', '\n', '\\', '\t', '\e', '\a', '\r']; var characters = "" - charTokens = newSeqOfCap[TokenType](256) + charTokens = newSeqOfCap[Token](256) + i = 0 for value in 0..255: - charTokens.add(Char) if char(value) in skippedChars: # These cases are special and we handle them separately continue + charTokens.add(makeChar(pos=(i, i + 2), content= &"{char(value)}")) characters.add(&"'{char(value)}'") - charTokens.add(TokenType.EndOfFile) - characters.add("""'\'' '\n' '\\' '\t' '\e' '\a' '\r'""") - suite.addTest(testTokenizeSucceeds("allCharacters", characters, charTokens))]# + inc(i, 3) + characters.add("""'\'''\n''\\''\t''\e''\a''\r'""") + charTokens.add(makeChar(pos=(i, i + 3), content="""\'""")) + inc(i, 4) + charTokens.add(makeChar(pos=(i, i + 3), content="""\n""")) + inc(i, 4) + charTokens.add(makeChar(pos=(i, i + 3), content="""\\""")) + inc(i, 4) + charTokens.add(makeChar(pos=(i, i + 3), content="""\t""")) + inc(i, 4) + charTokens.add(makeChar(pos=(i, i + 3), content="""\e""")) + inc(i, 4) + charTokens.add(makeChar(pos=(i, i + 3), content="""\a""")) + inc(i, 4) + charTokens.add(makeChar(pos=(i, i + 3), content="""\r""")) + inc(i, 4) + charTokens.add(endOfFile(pos=(i, i))) + suite.addTest(testTokenizeSucceeds("allCharacters", characters, charTokens)) echo "Running tokenizer tests" - suite.run() + suite.run(verbose=true) if suite.successful(): echo "OK: All tokenizer tests were successful" quit(0)