Improve testing capabilities, initial work on parser testing. WIP

This commit is contained in:
Mattia Giambirtone 2024-03-08 11:21:07 +01:00
parent e2b4037b73
commit 9d9093ca6c
6 changed files with 234 additions and 151 deletions

View File

@ -323,19 +323,19 @@ type
default*: BlockStmt
proc isLiteral*(self: ASTNode): bool =
## Returns whether the given AST node
## represents a literal, constant expression
return self.kind in [intExpr, hexExpr, binExpr, octExpr, strExpr, falseExpr, trueExpr, floatExpr, nanExpr, infExpr]
proc isConst*(self: ASTNode): bool =
## Returns true if the given
## AST node represents a value
## of constant type. All integers,
## of a constant type. All integers,
## strings and singletons count as
## constants
case self.kind:
of intExpr, hexExpr, binExpr, octExpr, strExpr, falseExpr, trueExpr,
floatExpr, nanExpr, infExpr:
return true
else:
return false
return self.isLiteral() # TODO
proc isDecl*(self: ASTNode): bool =

View File

@ -41,26 +41,49 @@ type
symbols: TableRef[string, TokenType]
StringParseMode = enum
Default, Raw, Format, Byte
Default, Raw, Format, Byte, Char
Lexer* = ref object
## A lexer object
# Contains all the symbols we recognize
symbols*: SymbolTable
# The peon source code being tokenized
source: string
# This will contain the tokenized output after
# lex()
tokens: seq[Token]
# Number of lines encountered so far
line: int
# start contains the absolute position in the
# source after we last saw a token, while current
# is incremented every time we call step(). This is
# necessary for multi-byte tokens
start: int
current: int
# Current file being tokenized (for error reporting)
file: string
# Tuples of absolute locations where lines start and
# end
lines: seq[tuple[start, stop: int]]
# Location of the last newline
lastLine: int
# Relative position in the current line
linePos: int
# Absolute location in the source code where the current
# line starts
lineCurrent: int
# Number of spaces since we last met a valid token.
# Useful for checking indentation and maybe in the
# future for precedence inference
spaces: int
LexingError* = ref object of PeonException
## A lexing exception
lexer*: Lexer
# Absolute location where the error
# occurred. The rest of the error metadata
# can be pulled out of the lexer object itself
pos*: tuple[start, stop: int]
@ -112,8 +135,7 @@ proc getToken(self: Lexer, lexeme: string): Token =
## string according to the symbol table or
## returns nil if there's no match
let table = self.symbols
var kind = table.symbols.getOrDefault(lexeme, table.keywords.getOrDefault(
lexeme, NoMatch))
var kind = table.symbols.getOrDefault(lexeme, table.keywords.getOrDefault(lexeme, NoMatch))
if kind == NoMatch:
return nil
new(result)
@ -171,9 +193,9 @@ proc getLine*(self: Lexer): int = self.line
proc getLines*(self: Lexer): seq[tuple[start, stop: int]] = self.lines
proc getSource*(self: Lexer): string = self.source
proc getRelPos*(self: Lexer, line: int): tuple[start, stop: int] =
if self.tokens.len() == 0 or self.tokens[^1].kind != EndOfFile:
self.incLine()
return self.lines[line - 1]
if self.tokens.len() == 0 or self.tokens[^1].kind != EndOfFile:
self.incLine()
return self.lines[line - 1]
proc newLexer*(self: Lexer = nil): Lexer =
@ -397,11 +419,10 @@ proc parseEscape(self: Lexer) =
self.error(&"invalid escape sequence '\\{self.peek()}'")
proc parseString(self: Lexer, delimiter: string, mode: StringParseMode = Default) =
## Parses string and character literals. They can be expressed using
## matching pairs of double or single quotes respectively. Most C-style
## escape sequences are supported, moreover, a specific prefix may be
## prepended to the string to instruct the lexer on how to parse it:
proc parseString(self: Lexer, delimiter: string, mode: StringParseMode = Default, multiline: bool = false) =
## Parses string and character literals enclosed within the given delimiter.
## Most C-style escape sequences are supported, moreover, a specific prefix
## may be prepended to the string to instruct the lexer on how to parse it:
## - b -> declares a byte string, where each character is
## interpreted as an integer instead of a character
## - r -> declares a raw string literal, where escape sequences
@ -410,15 +431,19 @@ proc parseString(self: Lexer, delimiter: string, mode: StringParseMode = Default
## interpolated using curly braces like f"Hello, {name}!".
## Braces may be escaped using a pair of them, so to represent
## a literal "{" in an f-string, one would use {{ instead
## Multi-line strings can be declared using matching triplets of
## either single or double quotes. They can span across multiple
## lines and escape sequences in them are not parsed, like in raw
## strings, so a multi-line string prefixed with the "r" modifier
## is redundant, although multi-line byte/format strings are supported
## Multi-line strings are supported if multiline equals true, but not that
## escape sequences in them are not parsed, like in raw strings, so a multi-line
## string prefixed with the "r" modifier is redundant, although multi-line byte/format
## strings are supported
var slen = 0
while not self.check(delimiter) and not self.done():
inc(slen)
if mode == Raw:
inc(slen)
if self.match("\n") and not multiline:
if mode == Char:
self.error("unexpected EOL while parsing character literal")
else:
self.error("unexpected EOL while parsing string literal")
elif mode == Raw:
discard self.step()
elif self.match("\\"):
self.parseEscape()
@ -436,20 +461,20 @@ proc parseString(self: Lexer, delimiter: string, mode: StringParseMode = Default
self.error("unmatched '}' in format string")
discard self.step()
if self.done() and not self.match(delimiter):
if delimiter == "'":
if mode == Char:
self.error("unexpected EOF while parsing character literal")
else:
self.error("unexpected EOF while parsing string literal")
else:
discard self.step()
if delimiter != "'":
if mode != Char:
self.createToken(String)
else:
if slen == 0:
self.error("character literal cannot be of length zero")
elif slen > 1:
self.error("invalid character literal (length must be one!)")
self.createToken(Char)
self.createToken(TokenType.Char)
proc parseBinary(self: Lexer) =
@ -527,8 +552,7 @@ proc parseNumber(self: Lexer) =
discard self.step()
if self.match("'"):
# Could be a size specifier, better catch it
while (self.peek().isAlphaNumeric() or self.check("_")) and
not self.done():
while (self.peek().isAlphaNumeric() or self.check("_")) and not self.done():
discard self.step()
self.createToken(kind)
@ -542,10 +566,12 @@ proc parseBackticks(self: Lexer) =
## except for newlines, tabs, carriage returns
## and other useless/confusing escape sequences
## like \e and \f
while not self.match("`") and not self.done():
if self.match(["\n", "\t", "\e", "\r", "\e"]):
self.error(&"unexpected character in stropped identifier: '{self.peek()}'")
while not self.check("`") and not self.done():
if self.match(["\n", "\t", "\e", "\r", "\f"]):
self.error(&"unexpected character in stropped identifier: {self.peek(-1).escape()}")
discard self.step()
if self.done() and not self.match("`"):
self.error("unexpected EOF while parsing stropped identifier")
self.createToken(Identifier)
# Strips the backticks
self.tokens[^1].lexeme = self.tokens[^1].lexeme[1..^2]
@ -594,12 +620,16 @@ proc next(self: Lexer) =
self.parseBackticks()
elif self.match(["\"", "'"]):
# String or character literal
var mode = Default
var delimiter = self.peek(-1)
if self.peek(-1) != "'" and self.check(self.peek(-1)) and self.check(
self.peek(-1), 1):
var multiline = false
if delimiter == "'":
mode = Char
if mode != Char and self.match("\"\""):
# Multiline strings start with 3 quotes
delimiter.add(self.step(2))
self.parseString(self.peek(-1), Default)
multiline = true
delimiter.add("\"\"")
self.parseString(delimiter, mode, multiline)
elif self.peek().isDigit():
discard self.step() # Needed because parseNumber reads the next
# character to tell the base of the number
@ -608,20 +638,26 @@ proc next(self: Lexer) =
elif self.peek().isAlphaNumeric() and self.check(["\"", "'"], 1):
# Prefixed string literal (i.e. f"Hi {name}!")
var mode = Default
var delimiter = self.step()
if self.peek(-1) != "'" and self.check(self.peek(-1)) and self.check(
self.peek(-1), 1):
# Multiline strings start with 3 quotes
delimiter.add(self.step(2))
case self.step():
of "r":
self.parseString(delimiter, Raw)
mode = Raw
of "b":
self.parseString(self.step(), Byte)
mode = Byte
of "f":
self.parseString(self.step(), Format)
mode = Format
else:
self.error(&"unknown string prefix '{self.peek(-1)}'")
var delimiter = self.peek(-1)
var multiline = false
if delimiter == "'":
if mode == Format:
self.error("interpolation is not available in character literals")
mode = Char
if mode != Char and self.match("\"\""):
# Multiline strings start with 3 quotes
multiline = true
delimiter.add("\"\"")
self.parseString(delimiter, mode, multiline)
elif self.peek().isAlphaNumeric() or self.check("_"):
# Keywords and identifiers
self.parseIdentifier()
@ -679,6 +715,5 @@ proc lex*(self: Lexer, source, file: string): seq[Token] =
self.lineCurrent = self.linePos
self.tokens.add(Token(kind: EndOfFile, lexeme: "",
line: self.line, pos: (self.current, self.current),
relPos: (start: 0, stop: self.linePos - 1)))
self.incLine()
relPos: (start: self.lineCurrent, stop: self.linePos)))
return self.tokens

View File

@ -15,6 +15,7 @@
## A recursive-descent top-down parser implementation
import std/strformat
import std/sequtils
import std/strutils
import std/tables
import std/os
@ -185,7 +186,7 @@ proc endScope(self: Parser) {.inline.} =
dec(self.scopeDepth)
func peek(self: Parser, distance: int = 0): Token {.inline.} =
proc peek(self: Parser, distance: int = 0): Token {.inline.} =
## Peeks at the token at the given distance.
## If the distance is out of bounds, an EOF
## token is returned. A negative distance may
@ -197,7 +198,7 @@ func peek(self: Parser, distance: int = 0): Token {.inline.} =
result = self.tokens[self.current + distance]
func done(self: Parser): bool {.inline.} =
proc done(self: Parser): bool {.inline.} =
## Returns true if we're at the
## end of the file. Note that the
## parser expects an explicit
@ -215,7 +216,7 @@ proc step(self: Parser): Token {.inline.} =
self.current += 1
proc error(self: Parser, message: string, token: Token = nil) {.raises: [ParseError].} =
proc error(self: Parser, message: string, token: Token = nil) =
## Raises a ParseError exception
var token = if token.isNil(): self.peek() else: token
if token.kind == EndOfFile:
@ -230,7 +231,7 @@ proc error(self: Parser, message: string, token: Token = nil) {.raises: [ParseEr
# tell at tokenization time which of the two contexts we're in, we just treat everything
# as a symbol and in the cases where we need a specific token we just match the string
# directly
func check(self: Parser, kind: TokenType, distance: int = 0): bool {.inline.} =
proc check(self: Parser, kind: TokenType, distance: int = 0): bool {.inline.} =
## Checks if the given token at the given distance
## matches the expected kind and returns a boolean.
## The distance parameter is passed directly to
@ -238,7 +239,7 @@ func check(self: Parser, kind: TokenType, distance: int = 0): bool {.inline.} =
self.peek(distance).kind == kind
func check(self: Parser, kind: string, distance: int = 0): bool {.inline.} =
proc check(self: Parser, kind: string, distance: int = 0): bool {.inline.} =
## Checks if the given token at the given distance
## matches the expected kind and returns a boolean.
## The distance parameter is passed directly to
@ -1321,7 +1322,10 @@ proc findOperators(self: Parser, tokens: seq[Token]) =
proc parse*(self: Parser, tokens: seq[Token], file: string, lines: seq[tuple[start, stop: int]], source: string, persist: bool = false): seq[ASTNode] =
## Parses a sequence of tokens into a sequence of AST nodes
self.tokens = tokens
# I'm way too lazy to figure out a better way to ignore
# comments, so here ya go
self.tokens = tokens.filterIt(it.kind != Comment)
self.file = file
self.source = source
self.lines = lines

View File

@ -42,7 +42,7 @@ proc formatError*(errKind: string = "", outFile = stderr, file, line: string, li
# Print the line where the error occurred and underline the exact node that caused
# the error. Might be inaccurate, but definitely better than nothing
outFile.styledWrite(fgRed, styleBright, "Source line: ", resetStyle, fgDefault, line[0..<pos.start])
outFile.styledWrite(fgRed, styleUnderscore, line[pos.start..pos.stop])
outFile.styledWrite(fgRed, styleUnderscore, line[pos.start..<pos.stop])
if pos.stop + 1 <= line.high():
outFile.styledWriteLine(fgDefault, line[pos.stop + 1..^1])
else:

View File

@ -21,6 +21,7 @@ import std/strutils
import std/sequtils
import frontend/parsing/lexer
import frontend/parsing/parser
import util/symbols
@ -50,37 +51,40 @@ type
skip*: bool # Skip running this test if true
name*: string # Test name. Only useful for displaying purposes
kind*: TestKind # Test kind (tokenizer, parser, compiler, etc.)
source*: string # The source input of the test. Usually peon code
status*: TestStatus # The test's current state
expected*: TestStatus # The test's expected final state after run()
case kind*: TestKind: # Test kind (tokenizer, parser, compiler, etc.)
of Tokenizer:
lexer: Lexer
tokens: seq[Token]
of Parser:
tree: ParseTree
else:
discard
source*: string # The source input of the test. Usually peon code
status*: TestStatus # The test's current state
case expected*: TestStatus: # The test's expected final state after run()
of Failed:
line: int
message: string
location: tuple[start, stop: int]
else:
discard
outcome*: TestOutcome # The test's outcome
runnerFunc: TestRunner # The test's internal runner function
reason*: string # A human readable reason why the test failed
reason*: string # A human readable reason why the test failed
TokenizerTest* = ref object of Test
## A tokenization test. Allows to specify
## a desired error message and error location
## upon tokenization failure
message: string
location: tuple[start, stop: int]
line: int
lexer: Lexer
tokens: seq[TokenType]
TestSuite* = ref object
## A suite of tests
tests*: seq[Test]
proc `$`(self: tuple[start, stop: int]): string =
proc `$`*(self: tuple[start, stop: int]): string =
if self == (-1, -1):
result = "none"
else:
result = &"(start={self.start}, stop={self.stop})"
proc `$`(self: TestOutcome): string =
proc `$`*(self: TestOutcome): string =
result &= &"Outcome(error={self.error}"
if not self.exc.isNil():
var name = ($self.exc.name).split(":")[0]
@ -93,25 +97,18 @@ proc `$`(self: TestOutcome): string =
proc `$`*(self: Test): string =
proc setup(self: Test) =
case self.kind:
of Tokenizer:
var self = TokenizerTest(self)
return &"TokenizerTest(name='{self.name}', status={self.status}, outcome={self.outcome}, source='{self.source.escape()}', location={self.location}, message='{self.message}')"
self.lexer = newLexer()
self.lexer.fillSymbolTable()
else:
# TODO
return ""
proc setup(self: TokenizerTest) =
self.lexer = newLexer()
self.lexer.fillSymbolTable()
discard # TODO
proc tokenizeSucceedsRunner(suite: TestSuite, test: Test) =
## Runs a tokenitazion test that is expected to succeed
## and checks that it returns the tokens we expect
var test = TokenizerTest(test)
test.setup()
try:
let tokens = test.lexer.lex(test.source, test.name)
@ -120,10 +117,30 @@ proc tokenizeSucceedsRunner(suite: TestSuite, test: Test) =
test.reason = &"Number of provided tokens ({test.tokens.len()}) does not match number of returned tokens ({tokens.len()})"
return
var i = 0
for (token, kind) in zip(tokens, test.tokens):
if token.kind != kind:
for (provided, expected) in zip(tokens, test.tokens):
if provided.kind != expected.kind:
test.status = Failed
test.reason = &"Token type mismatch at #{i}: expected {kind}, got {token.kind}"
test.reason = &"Token type mismatch at #{i}: expected {expected.kind}, got {provided.kind}"
return
if provided.lexeme != expected.lexeme:
test.status = Failed
test.reason = &"Token lexeme mismatch at #{i}: expected '{expected.lexeme}', got '{provided.lexeme}'"
return
if provided.line != expected.line:
test.status = Failed
test.reason = &"Token line mismatch at #{i}: expected {expected.line}, got {provided.line}"
return
if provided.pos != expected.pos:
test.status = Failed
test.reason = &"Token position mismatch at #{i}: expected {expected.pos}, got {provided.pos}"
return
if provided.relPos != expected.relPos:
test.status = Failed
test.reason = &"Token relative position mismatch at #{i}: expected {expected.relPos}, got {provided.relPos}"
return
if provided.spaces != expected.spaces:
test.status = Failed
test.reason = &"Token spacing mismatch at #{i}: expected {expected.spaces}, got {provided.spaces}"
return
inc(i)
except LexingError:
@ -133,6 +150,7 @@ proc tokenizeSucceedsRunner(suite: TestSuite, test: Test) =
test.status = Failed
test.outcome.error = true
test.outcome.exc = getCurrentException()
test.reason = "Tokenization failed"
return
except CatchableError:
test.status = Crashed
@ -145,7 +163,6 @@ proc tokenizeSucceedsRunner(suite: TestSuite, test: Test) =
proc tokenizeFailsRunner(suite: TestSuite, test: Test) =
## Runs a tokenitazion test that is expected to fail
## and checks that it does so in the way we expect
var test = TokenizerTest(test)
test.setup()
try:
discard test.lexer.lex(test.source, test.name)
@ -156,6 +173,11 @@ proc tokenizeFailsRunner(suite: TestSuite, test: Test) =
if exc.pos == test.location and exc.line == test.line and exc.msg == test.message:
test.status = Success
else:
if exc.pos != test.location or exc.line != test.line:
test.reason = &"Expecting failure at {test.line}:({test.location.start}, {test.location.stop}), failed at {exc.line}:({exc.pos.start}, {exc.pos.stop})"
else:
# message is wrong
test.reason = &"Expecting error message to be '{test.message}', got '{exc.msg}'"
test.status = Failed
test.outcome.error = true
test.outcome.exc = getCurrentException()
@ -195,43 +217,37 @@ proc removeTests*(self: TestSuite, tests: openarray[Test]) =
self.removeTest(test)
proc newTokenizeTest(name, source: string, skip = false): TokenizerTest =
## Internal helper to initialize a tokenization test
new(result)
proc testTokenizeSucceeds*(name, source: string, tokens: seq[Token], skip = false): Test =
## Creates a new tokenizer test that is expected to succeed.
## The type of each token returned by the tokenizer is matched
## against the given list of token types: the test only succeeds
## if no discrepancies are found
result = Test(expected: Success)
result.outcome.line = -1
result.outcome.location = (-1, -1)
result.name = name
result.kind = Tokenizer
result.status = Init
result.source = source
result.skip = skip
result.line = -1
result.outcome.line = -1
result.outcome.location = (-1, -1)
result.location = (-1, -1)
result.message = ""
proc testTokenizeSucceeds*(name, source: string, tokens: seq[TokenType], skip = false): Test =
## Creates a new tokenizer test that is expected to succeed.
## The type of each token returned by the tokenizer is matched
## against the given list of token types: the test only succeeds
## if no discrepancies are found
var test = newTokenizeTest(name, source, skip)
test.runnerFunc = tokenizeSucceedsRunner
test.tokens = tokens
result = Test(test)
result.expected = Success
result.runnerFunc = tokenizeSucceedsRunner
result.tokens = tokens
proc testTokenizeFails*(name, source: string, message: string, line: int, location: tuple[start, stop: int], skip = false): Test =
## Creates a new tokenizer test that is expected to fail with the
## given error message and at the given location
var test = newTokenizeTest(name, source, skip)
test.runnerFunc = tokenizeFailsRunner
test.message = message
test.location = location
test.line = line
result = Test(test)
result.expected = Failed
result = Test(expected: Failed)
result.name = name
result.kind = Tokenizer
result.status = Init
result.source = source
result.skip = skip
result.runnerFunc = tokenizeFailsRunner
result.message = message
result.location = location
result.line = line
proc run*(self: TestSuite) =
@ -257,30 +273,27 @@ proc successful*(self: TestSuite): bool =
break
proc getExpectedException(self: TokenizerTest): ref Exception =
proc getExpectedException(self: Test): ref Exception =
## Gets the exception that we expect to be
## raised by the test. Could be nil if we
## expect no errors
if self.expected == Success:
return nil
return LexingError(msg: self.message, line: self.line, file: self.name, lexer: self.lexer, pos: self.location)
case self.kind:
of Tokenizer:
return LexingError(msg: self.message, line: self.line, file: self.name, lexer: self.lexer, pos: self.location)
else:
discard # TODO
proc getExpectedOutcome(self: TokenizerTest): TestOutcome =
## Gets the expected outcome of a tokenization test
if self.expected == Success:
return (false, self.getExpectedException(), -1, (-1, -1))
else:
return (false, self.getExpectedException, self.line, self.location)
proc getExpectedOutcome*(self: Test): TestOutcome =
## Returns the expected outcome of a test
## Gets the expected outcome of a test
doAssert self.expected in [Success, Failed], "expected outcome is neither Success nor Failed: wtf?"
case self.kind:
of Tokenizer:
return TokenizerTest(self).getExpectedOutcome()
if self.expected == Success:
return (false, self.getExpectedException(), -1, (-1, -1))
else:
return (false, self.getExpectedException(), self.line, self.location)
else:
# TODO
discard

View File

@ -7,17 +7,36 @@ import frontend/parsing/lexer
import std/strformat
# Utilities to construct tokens for validation purposes
proc makeToken(kind: TokenType, line: int = 1, pos: tuple[start, stop: int] = (0, 0), relPos: tuple[start, stop: int] = (0, 0), lexeme: string,
spaces: int = 0): Token {.inline.} =
return Token(kind: kind, line: line, pos: pos, relPos: relPos, lexeme: lexeme, spaces: spaces)
proc endOfFile(line: int = 1, pos: tuple[start, stop: int] = (0, 0), relPos: tuple[start, stop: int] = (0, 0), spaces: int = 0): Token {.inline.} =
return makeToken(EndOfFile, line, pos, relPos, "", spaces)
proc makeString(line: int = 1, pos: tuple[start, stop: int] = (0, 0), relPos: tuple[start, stop: int] = (0, 0), content: string, delimiter: string, spaces: int = 0): Token {.inline.} =
return makeToken(String, line, pos, relPos, &"{delimiter}{content}{delimiter}", spaces)
proc makeChar(line: int = 1, pos: tuple[start, stop: int] = (0, 0), relPos: tuple[start, stop: int] = (0, 0), content: string, spaces: int = 0): Token {.inline.} =
return makeToken(Char, line, pos, relPos, &"'{content}'", spaces)
proc makeSymbol(line: int = 1, pos: tuple[start, stop: int] = (0, 0), relPos: tuple[start, stop: int] = (0, 0), content: string, spaces: int = 0): Token {.inline.} =
return makeToken(Symbol, line, pos, relPos, content, spaces)
when isMainModule:
var suite = newTestSuite()
suite.addTests(
[
testTokenizeSucceeds("emptyFile", "", @[TokenType.EndOfFile]),
testTokenizeSucceeds("newLine", "\n", @[TokenType.EndOfFile]),
testTokenizeSucceeds("carriageReturn", "\r", @[TokenType.EndOfFile]),
testTokenizeSucceeds("emptyString", "\"\"", @[TokenType.String, TokenType.EndOfFile]),
testTokenizeSucceeds("escapedSingleQuote", "'\\''", @[TokenType.Char, TokenType.EndOfFile]),
testTokenizeSucceeds("escapedDoubleQuote", """ "\"" """, @[TokenType.String, TokenType.EndOfFile]),
testTokenizeSucceeds("bareUnicode", "🌎 😂 👩‍👩‍👦‍👦", @[TokenType.Symbol, TokenType.Symbol, TokenType.Symbol, TokenType.EndOfFile]),
testTokenizeSucceeds("emptyFile", "", @[endOfFile()]),
testTokenizeSucceeds("ignoredEscapes", "\r\f\e", @[endOfFile(pos=(3, 3), relPos=(3, 3))]),
testTokenizeSucceeds("newLine", "\n", @[endOfFile(line=2, pos=(1, 1))]),
testTokenizeSucceeds("emptyString", """""""", @[makeString(content="", delimiter="\"", pos=(0, 1), relPos=(0, 1)), endOfFile(pos=(2, 2), relPos=(2, 2))]),
testTokenizeSucceeds("escapedDelimChar", """'\''""", @[makeChar(content="\\'", pos=(0, 3), relPos=(0, 3)), endOfFile(pos=(4, 4), relPos=(4, 4))]),
testTokenizeSucceeds("escapedDelimString", """"\""""", @[makeString(content="\\\"", delimiter="\"", pos=(0, 3), relPos=(0, 3)), endOfFile(pos=(4, 4), relPos=(4, 4))]),
#[testTokenizeSucceeds("bareUnicode", "🌎 😂 👩‍👩‍👦‍👦", @[TokenType.Symbol, TokenType.Symbol, TokenType.Symbol, TokenType.EndOfFile]),
testTokenizeSucceeds("stroppedSingleUnicode", "`🌎` `😂` `👩‍👩‍👦‍👦`", @[TokenType.Identifier, TokenType.Identifier, TokenType.Identifier, TokenType.EndOfFile]),
testTokenizeSucceeds("stroppedMultiUnicode", "`🌎🌎` `😂😂` `👩‍👩‍👦‍👦👩‍👩‍👦‍👦`", @[TokenType.Identifier, TokenType.Identifier, TokenType.Identifier, TokenType.EndOfFile]),
testTokenizeSucceeds("stringWithEscapes", """ "\n\t\r\e\f" """, @[TokenType.String, TokenType.EndOfFile]),
@ -28,7 +47,7 @@ when isMainModule:
TokenType.EndOfFile]),
testTokenizeSucceeds("allFloats", "1.0 1e5 1E5 1.5e4 1.5E4", @[TokenType.Float, TokenType.Float, TokenType.Float,
TokenType.Float, TokenType.Float, TokenType.EndOfFile]),
testTokenizeSucceeds("simpleEscapes", """ "\a \b \f \n \r \t \v \" \' \\ " """, @[TokenType.String, TokenType.EndOfFile]),
testTokenizeSucceeds("simpleEscapes", """ "\a \b \f \n \r \t \v \" \' \\ " """, @[TokenType.String, TokenType.EndOfFile]),]#
testTokenizeFails("invalidFloatEndsWithDot", "2.", "invalid float number literal", line=1, location=(0, 1)),
testTokenizeFails("invalidFloatSpuriousChars", "2.f", "invalid float number literal", line=1, location=(0, 1)),
testTokenizeFails("unterminatedChar", "'", "unexpected EOF while parsing character literal", line=1, location=(0, 0)),
@ -41,10 +60,17 @@ when isMainModule:
testTokenizeFails("unterminatedStringWithNewline", "\"\\n;", "unexpected EOF while parsing string literal", line=1, location=(0, 3)),
testTokenizeFails("illegalTabs", "\t", "tabs are not allowed in peon code, use spaces for indentation instead", line=1, location=(0, 0)),
testTokenizeFails("illegalShortUnicodeEscape", """ "\u123" """, "unicode escape sequences are not supported yet", line=1, location=(1, 2)),
testTokenizeFails("illegalLongUnicodeEscape", """ "\U123" """, "unicode escape sequences are not supported yet", line=1, location=(1, 2))
testTokenizeFails("illegalLongUnicodeEscape", """ "\U123" """, "unicode escape sequences are not supported yet", line=1, location=(1, 2)),
testTokenizeFails("illegalNLInBacktick", "`test\n`", """unexpected character in stropped identifier: "\x0A"""", line=1, location=(0, 5)),
testTokenizeFails("illegalTabInBacktick", "`test\t`", """unexpected character in stropped identifier: "\x09"""", line=1, location=(0, 5)),
testTokenizeFails("illegalEscapeinBacktick", "`test\e`", """unexpected character in stropped identifier: "\x1B"""", line=1, location=(0, 5)),
testTokenizeFails("illegalCRInBacktick", "`test\r`", """unexpected character in stropped identifier: "\x0D"""", line=1, location=(0, 5)),
testTokenizeFails("illegalFFInBacktick", "`test\f`", """unexpected character in stropped identifier: "\x0C"""", line=1, location=(0, 5)),
testTokenizeFails("unterminatedStroppedIdent", "`test", "unexpected EOF while parsing stropped identifier", line=1, location=(0, 4))
]
)
var allTokens = ""
#[var allTokens = ""
var allTokensList = newSeqOfCap[TokenType](symbols.tokens.len())
for lexeme in symbols.tokens.keys():
allTokens.add(&"{lexeme} ")
@ -68,22 +94,27 @@ when isMainModule:
characters.add(&"'{char(value)}'")
charTokens.add(TokenType.EndOfFile)
characters.add("""'\'' '\n' '\\' '\t' '\e' '\a' '\r'""")
suite.addTest(testTokenizeSucceeds("allCharacters", characters, charTokens))
suite.addTest(testTokenizeSucceeds("allCharacters", characters, charTokens))]#
echo "Running tokenizer tests"
suite.run()
echo "Tokenization test results: "
for test in suite.tests:
echo &" - {test.name} -> {test.status}"
if test.status in [Failed, Crashed]:
echo &" Details:"
echo &" - Outcome: {test.outcome}"
echo &" - Expected state: {test.expected} "
echo &" - Expected outcome: {test.getExpectedOutcome()}"
echo &"\n The test failed for the following reason: {test.reason}\n"
if not test.outcome.exc.isNil():
echo &"\n Formatted error message follows\n"
print(LexingError(test.outcome.exc))
echo "\n Formatted error message ends here\n"
if suite.successful():
echo "OK: All tokenizer tests were successful"
quit(0)
else:
echo "ERR: Not all tests were successful, details below:\n"
for test in suite.tests:
if test.status in [Failed, Crashed]:
echo &" - {test.name} -> {test.status}"
echo &" Details:"
echo &" - Outcome -> {test.outcome}"
echo &" - Expected state -> {test.expected} "
echo &" - Expected outcome -> {test.getExpectedOutcome()}"
if test.reason.len() > 0:
echo &"\n The test failed for the following reason -> {test.reason}\n"
else:
echo "\n No further information is available about this failure"
if not test.outcome.exc.isNil():
echo &"\n Formatted error message follows\n"
print(LexingError(test.outcome.exc))
echo "\n Formatted error message ends here\n"
quit(-1)