Improve testing capabilities, initial work on parser testing. WIP
This commit is contained in:
parent
e2b4037b73
commit
9d9093ca6c
|
@ -323,19 +323,19 @@ type
|
|||
default*: BlockStmt
|
||||
|
||||
|
||||
proc isLiteral*(self: ASTNode): bool =
|
||||
## Returns whether the given AST node
|
||||
## represents a literal, constant expression
|
||||
return self.kind in [intExpr, hexExpr, binExpr, octExpr, strExpr, falseExpr, trueExpr, floatExpr, nanExpr, infExpr]
|
||||
|
||||
|
||||
proc isConst*(self: ASTNode): bool =
|
||||
## Returns true if the given
|
||||
## AST node represents a value
|
||||
## of constant type. All integers,
|
||||
## of a constant type. All integers,
|
||||
## strings and singletons count as
|
||||
## constants
|
||||
case self.kind:
|
||||
of intExpr, hexExpr, binExpr, octExpr, strExpr, falseExpr, trueExpr,
|
||||
floatExpr, nanExpr, infExpr:
|
||||
return true
|
||||
else:
|
||||
return false
|
||||
return self.isLiteral() # TODO
|
||||
|
||||
|
||||
proc isDecl*(self: ASTNode): bool =
|
||||
|
|
|
@ -41,26 +41,49 @@ type
|
|||
symbols: TableRef[string, TokenType]
|
||||
|
||||
StringParseMode = enum
|
||||
Default, Raw, Format, Byte
|
||||
Default, Raw, Format, Byte, Char
|
||||
|
||||
Lexer* = ref object
|
||||
## A lexer object
|
||||
|
||||
# Contains all the symbols we recognize
|
||||
symbols*: SymbolTable
|
||||
# The peon source code being tokenized
|
||||
source: string
|
||||
# This will contain the tokenized output after
|
||||
# lex()
|
||||
tokens: seq[Token]
|
||||
# Number of lines encountered so far
|
||||
line: int
|
||||
# start contains the absolute position in the
|
||||
# source after we last saw a token, while current
|
||||
# is incremented every time we call step(). This is
|
||||
# necessary for multi-byte tokens
|
||||
start: int
|
||||
current: int
|
||||
# Current file being tokenized (for error reporting)
|
||||
file: string
|
||||
# Tuples of absolute locations where lines start and
|
||||
# end
|
||||
lines: seq[tuple[start, stop: int]]
|
||||
# Location of the last newline
|
||||
lastLine: int
|
||||
# Relative position in the current line
|
||||
linePos: int
|
||||
# Absolute location in the source code where the current
|
||||
# line starts
|
||||
lineCurrent: int
|
||||
# Number of spaces since we last met a valid token.
|
||||
# Useful for checking indentation and maybe in the
|
||||
# future for precedence inference
|
||||
spaces: int
|
||||
|
||||
LexingError* = ref object of PeonException
|
||||
## A lexing exception
|
||||
lexer*: Lexer
|
||||
# Absolute location where the error
|
||||
# occurred. The rest of the error metadata
|
||||
# can be pulled out of the lexer object itself
|
||||
pos*: tuple[start, stop: int]
|
||||
|
||||
|
||||
|
@ -112,8 +135,7 @@ proc getToken(self: Lexer, lexeme: string): Token =
|
|||
## string according to the symbol table or
|
||||
## returns nil if there's no match
|
||||
let table = self.symbols
|
||||
var kind = table.symbols.getOrDefault(lexeme, table.keywords.getOrDefault(
|
||||
lexeme, NoMatch))
|
||||
var kind = table.symbols.getOrDefault(lexeme, table.keywords.getOrDefault(lexeme, NoMatch))
|
||||
if kind == NoMatch:
|
||||
return nil
|
||||
new(result)
|
||||
|
@ -171,9 +193,9 @@ proc getLine*(self: Lexer): int = self.line
|
|||
proc getLines*(self: Lexer): seq[tuple[start, stop: int]] = self.lines
|
||||
proc getSource*(self: Lexer): string = self.source
|
||||
proc getRelPos*(self: Lexer, line: int): tuple[start, stop: int] =
|
||||
if self.tokens.len() == 0 or self.tokens[^1].kind != EndOfFile:
|
||||
self.incLine()
|
||||
return self.lines[line - 1]
|
||||
if self.tokens.len() == 0 or self.tokens[^1].kind != EndOfFile:
|
||||
self.incLine()
|
||||
return self.lines[line - 1]
|
||||
|
||||
|
||||
proc newLexer*(self: Lexer = nil): Lexer =
|
||||
|
@ -397,11 +419,10 @@ proc parseEscape(self: Lexer) =
|
|||
self.error(&"invalid escape sequence '\\{self.peek()}'")
|
||||
|
||||
|
||||
proc parseString(self: Lexer, delimiter: string, mode: StringParseMode = Default) =
|
||||
## Parses string and character literals. They can be expressed using
|
||||
## matching pairs of double or single quotes respectively. Most C-style
|
||||
## escape sequences are supported, moreover, a specific prefix may be
|
||||
## prepended to the string to instruct the lexer on how to parse it:
|
||||
proc parseString(self: Lexer, delimiter: string, mode: StringParseMode = Default, multiline: bool = false) =
|
||||
## Parses string and character literals enclosed within the given delimiter.
|
||||
## Most C-style escape sequences are supported, moreover, a specific prefix
|
||||
## may be prepended to the string to instruct the lexer on how to parse it:
|
||||
## - b -> declares a byte string, where each character is
|
||||
## interpreted as an integer instead of a character
|
||||
## - r -> declares a raw string literal, where escape sequences
|
||||
|
@ -410,15 +431,19 @@ proc parseString(self: Lexer, delimiter: string, mode: StringParseMode = Default
|
|||
## interpolated using curly braces like f"Hello, {name}!".
|
||||
## Braces may be escaped using a pair of them, so to represent
|
||||
## a literal "{" in an f-string, one would use {{ instead
|
||||
## Multi-line strings can be declared using matching triplets of
|
||||
## either single or double quotes. They can span across multiple
|
||||
## lines and escape sequences in them are not parsed, like in raw
|
||||
## strings, so a multi-line string prefixed with the "r" modifier
|
||||
## is redundant, although multi-line byte/format strings are supported
|
||||
## Multi-line strings are supported if multiline equals true, but not that
|
||||
## escape sequences in them are not parsed, like in raw strings, so a multi-line
|
||||
## string prefixed with the "r" modifier is redundant, although multi-line byte/format
|
||||
## strings are supported
|
||||
var slen = 0
|
||||
while not self.check(delimiter) and not self.done():
|
||||
inc(slen)
|
||||
if mode == Raw:
|
||||
inc(slen)
|
||||
if self.match("\n") and not multiline:
|
||||
if mode == Char:
|
||||
self.error("unexpected EOL while parsing character literal")
|
||||
else:
|
||||
self.error("unexpected EOL while parsing string literal")
|
||||
elif mode == Raw:
|
||||
discard self.step()
|
||||
elif self.match("\\"):
|
||||
self.parseEscape()
|
||||
|
@ -436,20 +461,20 @@ proc parseString(self: Lexer, delimiter: string, mode: StringParseMode = Default
|
|||
self.error("unmatched '}' in format string")
|
||||
discard self.step()
|
||||
if self.done() and not self.match(delimiter):
|
||||
if delimiter == "'":
|
||||
if mode == Char:
|
||||
self.error("unexpected EOF while parsing character literal")
|
||||
else:
|
||||
self.error("unexpected EOF while parsing string literal")
|
||||
else:
|
||||
discard self.step()
|
||||
if delimiter != "'":
|
||||
if mode != Char:
|
||||
self.createToken(String)
|
||||
else:
|
||||
if slen == 0:
|
||||
self.error("character literal cannot be of length zero")
|
||||
elif slen > 1:
|
||||
self.error("invalid character literal (length must be one!)")
|
||||
self.createToken(Char)
|
||||
self.createToken(TokenType.Char)
|
||||
|
||||
|
||||
proc parseBinary(self: Lexer) =
|
||||
|
@ -527,8 +552,7 @@ proc parseNumber(self: Lexer) =
|
|||
discard self.step()
|
||||
if self.match("'"):
|
||||
# Could be a size specifier, better catch it
|
||||
while (self.peek().isAlphaNumeric() or self.check("_")) and
|
||||
not self.done():
|
||||
while (self.peek().isAlphaNumeric() or self.check("_")) and not self.done():
|
||||
discard self.step()
|
||||
self.createToken(kind)
|
||||
|
||||
|
@ -542,10 +566,12 @@ proc parseBackticks(self: Lexer) =
|
|||
## except for newlines, tabs, carriage returns
|
||||
## and other useless/confusing escape sequences
|
||||
## like \e and \f
|
||||
while not self.match("`") and not self.done():
|
||||
if self.match(["\n", "\t", "\e", "\r", "\e"]):
|
||||
self.error(&"unexpected character in stropped identifier: '{self.peek()}'")
|
||||
while not self.check("`") and not self.done():
|
||||
if self.match(["\n", "\t", "\e", "\r", "\f"]):
|
||||
self.error(&"unexpected character in stropped identifier: {self.peek(-1).escape()}")
|
||||
discard self.step()
|
||||
if self.done() and not self.match("`"):
|
||||
self.error("unexpected EOF while parsing stropped identifier")
|
||||
self.createToken(Identifier)
|
||||
# Strips the backticks
|
||||
self.tokens[^1].lexeme = self.tokens[^1].lexeme[1..^2]
|
||||
|
@ -594,12 +620,16 @@ proc next(self: Lexer) =
|
|||
self.parseBackticks()
|
||||
elif self.match(["\"", "'"]):
|
||||
# String or character literal
|
||||
var mode = Default
|
||||
var delimiter = self.peek(-1)
|
||||
if self.peek(-1) != "'" and self.check(self.peek(-1)) and self.check(
|
||||
self.peek(-1), 1):
|
||||
var multiline = false
|
||||
if delimiter == "'":
|
||||
mode = Char
|
||||
if mode != Char and self.match("\"\""):
|
||||
# Multiline strings start with 3 quotes
|
||||
delimiter.add(self.step(2))
|
||||
self.parseString(self.peek(-1), Default)
|
||||
multiline = true
|
||||
delimiter.add("\"\"")
|
||||
self.parseString(delimiter, mode, multiline)
|
||||
elif self.peek().isDigit():
|
||||
discard self.step() # Needed because parseNumber reads the next
|
||||
# character to tell the base of the number
|
||||
|
@ -608,20 +638,26 @@ proc next(self: Lexer) =
|
|||
elif self.peek().isAlphaNumeric() and self.check(["\"", "'"], 1):
|
||||
# Prefixed string literal (i.e. f"Hi {name}!")
|
||||
var mode = Default
|
||||
var delimiter = self.step()
|
||||
if self.peek(-1) != "'" and self.check(self.peek(-1)) and self.check(
|
||||
self.peek(-1), 1):
|
||||
# Multiline strings start with 3 quotes
|
||||
delimiter.add(self.step(2))
|
||||
case self.step():
|
||||
of "r":
|
||||
self.parseString(delimiter, Raw)
|
||||
mode = Raw
|
||||
of "b":
|
||||
self.parseString(self.step(), Byte)
|
||||
mode = Byte
|
||||
of "f":
|
||||
self.parseString(self.step(), Format)
|
||||
mode = Format
|
||||
else:
|
||||
self.error(&"unknown string prefix '{self.peek(-1)}'")
|
||||
var delimiter = self.peek(-1)
|
||||
var multiline = false
|
||||
if delimiter == "'":
|
||||
if mode == Format:
|
||||
self.error("interpolation is not available in character literals")
|
||||
mode = Char
|
||||
if mode != Char and self.match("\"\""):
|
||||
# Multiline strings start with 3 quotes
|
||||
multiline = true
|
||||
delimiter.add("\"\"")
|
||||
self.parseString(delimiter, mode, multiline)
|
||||
elif self.peek().isAlphaNumeric() or self.check("_"):
|
||||
# Keywords and identifiers
|
||||
self.parseIdentifier()
|
||||
|
@ -679,6 +715,5 @@ proc lex*(self: Lexer, source, file: string): seq[Token] =
|
|||
self.lineCurrent = self.linePos
|
||||
self.tokens.add(Token(kind: EndOfFile, lexeme: "",
|
||||
line: self.line, pos: (self.current, self.current),
|
||||
relPos: (start: 0, stop: self.linePos - 1)))
|
||||
self.incLine()
|
||||
relPos: (start: self.lineCurrent, stop: self.linePos)))
|
||||
return self.tokens
|
||||
|
|
|
@ -15,6 +15,7 @@
|
|||
## A recursive-descent top-down parser implementation
|
||||
|
||||
import std/strformat
|
||||
import std/sequtils
|
||||
import std/strutils
|
||||
import std/tables
|
||||
import std/os
|
||||
|
@ -185,7 +186,7 @@ proc endScope(self: Parser) {.inline.} =
|
|||
dec(self.scopeDepth)
|
||||
|
||||
|
||||
func peek(self: Parser, distance: int = 0): Token {.inline.} =
|
||||
proc peek(self: Parser, distance: int = 0): Token {.inline.} =
|
||||
## Peeks at the token at the given distance.
|
||||
## If the distance is out of bounds, an EOF
|
||||
## token is returned. A negative distance may
|
||||
|
@ -197,7 +198,7 @@ func peek(self: Parser, distance: int = 0): Token {.inline.} =
|
|||
result = self.tokens[self.current + distance]
|
||||
|
||||
|
||||
func done(self: Parser): bool {.inline.} =
|
||||
proc done(self: Parser): bool {.inline.} =
|
||||
## Returns true if we're at the
|
||||
## end of the file. Note that the
|
||||
## parser expects an explicit
|
||||
|
@ -215,7 +216,7 @@ proc step(self: Parser): Token {.inline.} =
|
|||
self.current += 1
|
||||
|
||||
|
||||
proc error(self: Parser, message: string, token: Token = nil) {.raises: [ParseError].} =
|
||||
proc error(self: Parser, message: string, token: Token = nil) =
|
||||
## Raises a ParseError exception
|
||||
var token = if token.isNil(): self.peek() else: token
|
||||
if token.kind == EndOfFile:
|
||||
|
@ -230,7 +231,7 @@ proc error(self: Parser, message: string, token: Token = nil) {.raises: [ParseEr
|
|||
# tell at tokenization time which of the two contexts we're in, we just treat everything
|
||||
# as a symbol and in the cases where we need a specific token we just match the string
|
||||
# directly
|
||||
func check(self: Parser, kind: TokenType, distance: int = 0): bool {.inline.} =
|
||||
proc check(self: Parser, kind: TokenType, distance: int = 0): bool {.inline.} =
|
||||
## Checks if the given token at the given distance
|
||||
## matches the expected kind and returns a boolean.
|
||||
## The distance parameter is passed directly to
|
||||
|
@ -238,7 +239,7 @@ func check(self: Parser, kind: TokenType, distance: int = 0): bool {.inline.} =
|
|||
self.peek(distance).kind == kind
|
||||
|
||||
|
||||
func check(self: Parser, kind: string, distance: int = 0): bool {.inline.} =
|
||||
proc check(self: Parser, kind: string, distance: int = 0): bool {.inline.} =
|
||||
## Checks if the given token at the given distance
|
||||
## matches the expected kind and returns a boolean.
|
||||
## The distance parameter is passed directly to
|
||||
|
@ -1321,7 +1322,10 @@ proc findOperators(self: Parser, tokens: seq[Token]) =
|
|||
|
||||
proc parse*(self: Parser, tokens: seq[Token], file: string, lines: seq[tuple[start, stop: int]], source: string, persist: bool = false): seq[ASTNode] =
|
||||
## Parses a sequence of tokens into a sequence of AST nodes
|
||||
self.tokens = tokens
|
||||
|
||||
# I'm way too lazy to figure out a better way to ignore
|
||||
# comments, so here ya go
|
||||
self.tokens = tokens.filterIt(it.kind != Comment)
|
||||
self.file = file
|
||||
self.source = source
|
||||
self.lines = lines
|
||||
|
|
|
@ -42,7 +42,7 @@ proc formatError*(errKind: string = "", outFile = stderr, file, line: string, li
|
|||
# Print the line where the error occurred and underline the exact node that caused
|
||||
# the error. Might be inaccurate, but definitely better than nothing
|
||||
outFile.styledWrite(fgRed, styleBright, "Source line: ", resetStyle, fgDefault, line[0..<pos.start])
|
||||
outFile.styledWrite(fgRed, styleUnderscore, line[pos.start..pos.stop])
|
||||
outFile.styledWrite(fgRed, styleUnderscore, line[pos.start..<pos.stop])
|
||||
if pos.stop + 1 <= line.high():
|
||||
outFile.styledWriteLine(fgDefault, line[pos.stop + 1..^1])
|
||||
else:
|
||||
|
|
|
@ -21,6 +21,7 @@ import std/strutils
|
|||
import std/sequtils
|
||||
|
||||
import frontend/parsing/lexer
|
||||
import frontend/parsing/parser
|
||||
import util/symbols
|
||||
|
||||
|
||||
|
@ -50,37 +51,40 @@ type
|
|||
|
||||
skip*: bool # Skip running this test if true
|
||||
name*: string # Test name. Only useful for displaying purposes
|
||||
kind*: TestKind # Test kind (tokenizer, parser, compiler, etc.)
|
||||
source*: string # The source input of the test. Usually peon code
|
||||
status*: TestStatus # The test's current state
|
||||
expected*: TestStatus # The test's expected final state after run()
|
||||
case kind*: TestKind: # Test kind (tokenizer, parser, compiler, etc.)
|
||||
of Tokenizer:
|
||||
lexer: Lexer
|
||||
tokens: seq[Token]
|
||||
of Parser:
|
||||
tree: ParseTree
|
||||
else:
|
||||
discard
|
||||
source*: string # The source input of the test. Usually peon code
|
||||
status*: TestStatus # The test's current state
|
||||
case expected*: TestStatus: # The test's expected final state after run()
|
||||
of Failed:
|
||||
line: int
|
||||
message: string
|
||||
location: tuple[start, stop: int]
|
||||
else:
|
||||
discard
|
||||
outcome*: TestOutcome # The test's outcome
|
||||
runnerFunc: TestRunner # The test's internal runner function
|
||||
reason*: string # A human readable reason why the test failed
|
||||
reason*: string # A human readable reason why the test failed
|
||||
|
||||
|
||||
TokenizerTest* = ref object of Test
|
||||
## A tokenization test. Allows to specify
|
||||
## a desired error message and error location
|
||||
## upon tokenization failure
|
||||
message: string
|
||||
location: tuple[start, stop: int]
|
||||
line: int
|
||||
lexer: Lexer
|
||||
tokens: seq[TokenType]
|
||||
|
||||
TestSuite* = ref object
|
||||
## A suite of tests
|
||||
tests*: seq[Test]
|
||||
|
||||
proc `$`(self: tuple[start, stop: int]): string =
|
||||
|
||||
proc `$`*(self: tuple[start, stop: int]): string =
|
||||
if self == (-1, -1):
|
||||
result = "none"
|
||||
else:
|
||||
result = &"(start={self.start}, stop={self.stop})"
|
||||
|
||||
|
||||
proc `$`(self: TestOutcome): string =
|
||||
proc `$`*(self: TestOutcome): string =
|
||||
result &= &"Outcome(error={self.error}"
|
||||
if not self.exc.isNil():
|
||||
var name = ($self.exc.name).split(":")[0]
|
||||
|
@ -93,25 +97,18 @@ proc `$`(self: TestOutcome): string =
|
|||
|
||||
|
||||
|
||||
proc `$`*(self: Test): string =
|
||||
proc setup(self: Test) =
|
||||
case self.kind:
|
||||
of Tokenizer:
|
||||
var self = TokenizerTest(self)
|
||||
return &"TokenizerTest(name='{self.name}', status={self.status}, outcome={self.outcome}, source='{self.source.escape()}', location={self.location}, message='{self.message}')"
|
||||
self.lexer = newLexer()
|
||||
self.lexer.fillSymbolTable()
|
||||
else:
|
||||
# TODO
|
||||
return ""
|
||||
|
||||
|
||||
proc setup(self: TokenizerTest) =
|
||||
self.lexer = newLexer()
|
||||
self.lexer.fillSymbolTable()
|
||||
discard # TODO
|
||||
|
||||
|
||||
proc tokenizeSucceedsRunner(suite: TestSuite, test: Test) =
|
||||
## Runs a tokenitazion test that is expected to succeed
|
||||
## and checks that it returns the tokens we expect
|
||||
var test = TokenizerTest(test)
|
||||
test.setup()
|
||||
try:
|
||||
let tokens = test.lexer.lex(test.source, test.name)
|
||||
|
@ -120,10 +117,30 @@ proc tokenizeSucceedsRunner(suite: TestSuite, test: Test) =
|
|||
test.reason = &"Number of provided tokens ({test.tokens.len()}) does not match number of returned tokens ({tokens.len()})"
|
||||
return
|
||||
var i = 0
|
||||
for (token, kind) in zip(tokens, test.tokens):
|
||||
if token.kind != kind:
|
||||
for (provided, expected) in zip(tokens, test.tokens):
|
||||
if provided.kind != expected.kind:
|
||||
test.status = Failed
|
||||
test.reason = &"Token type mismatch at #{i}: expected {kind}, got {token.kind}"
|
||||
test.reason = &"Token type mismatch at #{i}: expected {expected.kind}, got {provided.kind}"
|
||||
return
|
||||
if provided.lexeme != expected.lexeme:
|
||||
test.status = Failed
|
||||
test.reason = &"Token lexeme mismatch at #{i}: expected '{expected.lexeme}', got '{provided.lexeme}'"
|
||||
return
|
||||
if provided.line != expected.line:
|
||||
test.status = Failed
|
||||
test.reason = &"Token line mismatch at #{i}: expected {expected.line}, got {provided.line}"
|
||||
return
|
||||
if provided.pos != expected.pos:
|
||||
test.status = Failed
|
||||
test.reason = &"Token position mismatch at #{i}: expected {expected.pos}, got {provided.pos}"
|
||||
return
|
||||
if provided.relPos != expected.relPos:
|
||||
test.status = Failed
|
||||
test.reason = &"Token relative position mismatch at #{i}: expected {expected.relPos}, got {provided.relPos}"
|
||||
return
|
||||
if provided.spaces != expected.spaces:
|
||||
test.status = Failed
|
||||
test.reason = &"Token spacing mismatch at #{i}: expected {expected.spaces}, got {provided.spaces}"
|
||||
return
|
||||
inc(i)
|
||||
except LexingError:
|
||||
|
@ -133,6 +150,7 @@ proc tokenizeSucceedsRunner(suite: TestSuite, test: Test) =
|
|||
test.status = Failed
|
||||
test.outcome.error = true
|
||||
test.outcome.exc = getCurrentException()
|
||||
test.reason = "Tokenization failed"
|
||||
return
|
||||
except CatchableError:
|
||||
test.status = Crashed
|
||||
|
@ -145,7 +163,6 @@ proc tokenizeSucceedsRunner(suite: TestSuite, test: Test) =
|
|||
proc tokenizeFailsRunner(suite: TestSuite, test: Test) =
|
||||
## Runs a tokenitazion test that is expected to fail
|
||||
## and checks that it does so in the way we expect
|
||||
var test = TokenizerTest(test)
|
||||
test.setup()
|
||||
try:
|
||||
discard test.lexer.lex(test.source, test.name)
|
||||
|
@ -156,6 +173,11 @@ proc tokenizeFailsRunner(suite: TestSuite, test: Test) =
|
|||
if exc.pos == test.location and exc.line == test.line and exc.msg == test.message:
|
||||
test.status = Success
|
||||
else:
|
||||
if exc.pos != test.location or exc.line != test.line:
|
||||
test.reason = &"Expecting failure at {test.line}:({test.location.start}, {test.location.stop}), failed at {exc.line}:({exc.pos.start}, {exc.pos.stop})"
|
||||
else:
|
||||
# message is wrong
|
||||
test.reason = &"Expecting error message to be '{test.message}', got '{exc.msg}'"
|
||||
test.status = Failed
|
||||
test.outcome.error = true
|
||||
test.outcome.exc = getCurrentException()
|
||||
|
@ -195,43 +217,37 @@ proc removeTests*(self: TestSuite, tests: openarray[Test]) =
|
|||
self.removeTest(test)
|
||||
|
||||
|
||||
proc newTokenizeTest(name, source: string, skip = false): TokenizerTest =
|
||||
## Internal helper to initialize a tokenization test
|
||||
new(result)
|
||||
proc testTokenizeSucceeds*(name, source: string, tokens: seq[Token], skip = false): Test =
|
||||
## Creates a new tokenizer test that is expected to succeed.
|
||||
## The type of each token returned by the tokenizer is matched
|
||||
## against the given list of token types: the test only succeeds
|
||||
## if no discrepancies are found
|
||||
result = Test(expected: Success)
|
||||
result.outcome.line = -1
|
||||
result.outcome.location = (-1, -1)
|
||||
result.name = name
|
||||
result.kind = Tokenizer
|
||||
result.status = Init
|
||||
result.source = source
|
||||
result.skip = skip
|
||||
result.line = -1
|
||||
result.outcome.line = -1
|
||||
result.outcome.location = (-1, -1)
|
||||
result.location = (-1, -1)
|
||||
result.message = ""
|
||||
|
||||
|
||||
proc testTokenizeSucceeds*(name, source: string, tokens: seq[TokenType], skip = false): Test =
|
||||
## Creates a new tokenizer test that is expected to succeed.
|
||||
## The type of each token returned by the tokenizer is matched
|
||||
## against the given list of token types: the test only succeeds
|
||||
## if no discrepancies are found
|
||||
var test = newTokenizeTest(name, source, skip)
|
||||
test.runnerFunc = tokenizeSucceedsRunner
|
||||
test.tokens = tokens
|
||||
result = Test(test)
|
||||
result.expected = Success
|
||||
result.runnerFunc = tokenizeSucceedsRunner
|
||||
result.tokens = tokens
|
||||
|
||||
|
||||
proc testTokenizeFails*(name, source: string, message: string, line: int, location: tuple[start, stop: int], skip = false): Test =
|
||||
## Creates a new tokenizer test that is expected to fail with the
|
||||
## given error message and at the given location
|
||||
var test = newTokenizeTest(name, source, skip)
|
||||
test.runnerFunc = tokenizeFailsRunner
|
||||
test.message = message
|
||||
test.location = location
|
||||
test.line = line
|
||||
result = Test(test)
|
||||
result.expected = Failed
|
||||
result = Test(expected: Failed)
|
||||
result.name = name
|
||||
result.kind = Tokenizer
|
||||
result.status = Init
|
||||
result.source = source
|
||||
result.skip = skip
|
||||
result.runnerFunc = tokenizeFailsRunner
|
||||
result.message = message
|
||||
result.location = location
|
||||
result.line = line
|
||||
|
||||
|
||||
proc run*(self: TestSuite) =
|
||||
|
@ -257,30 +273,27 @@ proc successful*(self: TestSuite): bool =
|
|||
break
|
||||
|
||||
|
||||
proc getExpectedException(self: TokenizerTest): ref Exception =
|
||||
proc getExpectedException(self: Test): ref Exception =
|
||||
## Gets the exception that we expect to be
|
||||
## raised by the test. Could be nil if we
|
||||
## expect no errors
|
||||
if self.expected == Success:
|
||||
return nil
|
||||
return LexingError(msg: self.message, line: self.line, file: self.name, lexer: self.lexer, pos: self.location)
|
||||
case self.kind:
|
||||
of Tokenizer:
|
||||
return LexingError(msg: self.message, line: self.line, file: self.name, lexer: self.lexer, pos: self.location)
|
||||
else:
|
||||
discard # TODO
|
||||
|
||||
|
||||
|
||||
proc getExpectedOutcome(self: TokenizerTest): TestOutcome =
|
||||
## Gets the expected outcome of a tokenization test
|
||||
if self.expected == Success:
|
||||
return (false, self.getExpectedException(), -1, (-1, -1))
|
||||
else:
|
||||
return (false, self.getExpectedException, self.line, self.location)
|
||||
|
||||
|
||||
proc getExpectedOutcome*(self: Test): TestOutcome =
|
||||
## Returns the expected outcome of a test
|
||||
## Gets the expected outcome of a test
|
||||
doAssert self.expected in [Success, Failed], "expected outcome is neither Success nor Failed: wtf?"
|
||||
case self.kind:
|
||||
of Tokenizer:
|
||||
return TokenizerTest(self).getExpectedOutcome()
|
||||
if self.expected == Success:
|
||||
return (false, self.getExpectedException(), -1, (-1, -1))
|
||||
else:
|
||||
return (false, self.getExpectedException(), self.line, self.location)
|
||||
else:
|
||||
# TODO
|
||||
discard
|
|
@ -7,17 +7,36 @@ import frontend/parsing/lexer
|
|||
import std/strformat
|
||||
|
||||
|
||||
# Utilities to construct tokens for validation purposes
|
||||
|
||||
proc makeToken(kind: TokenType, line: int = 1, pos: tuple[start, stop: int] = (0, 0), relPos: tuple[start, stop: int] = (0, 0), lexeme: string,
|
||||
spaces: int = 0): Token {.inline.} =
|
||||
return Token(kind: kind, line: line, pos: pos, relPos: relPos, lexeme: lexeme, spaces: spaces)
|
||||
|
||||
proc endOfFile(line: int = 1, pos: tuple[start, stop: int] = (0, 0), relPos: tuple[start, stop: int] = (0, 0), spaces: int = 0): Token {.inline.} =
|
||||
return makeToken(EndOfFile, line, pos, relPos, "", spaces)
|
||||
|
||||
proc makeString(line: int = 1, pos: tuple[start, stop: int] = (0, 0), relPos: tuple[start, stop: int] = (0, 0), content: string, delimiter: string, spaces: int = 0): Token {.inline.} =
|
||||
return makeToken(String, line, pos, relPos, &"{delimiter}{content}{delimiter}", spaces)
|
||||
|
||||
proc makeChar(line: int = 1, pos: tuple[start, stop: int] = (0, 0), relPos: tuple[start, stop: int] = (0, 0), content: string, spaces: int = 0): Token {.inline.} =
|
||||
return makeToken(Char, line, pos, relPos, &"'{content}'", spaces)
|
||||
|
||||
proc makeSymbol(line: int = 1, pos: tuple[start, stop: int] = (0, 0), relPos: tuple[start, stop: int] = (0, 0), content: string, spaces: int = 0): Token {.inline.} =
|
||||
return makeToken(Symbol, line, pos, relPos, content, spaces)
|
||||
|
||||
|
||||
when isMainModule:
|
||||
var suite = newTestSuite()
|
||||
suite.addTests(
|
||||
[
|
||||
testTokenizeSucceeds("emptyFile", "", @[TokenType.EndOfFile]),
|
||||
testTokenizeSucceeds("newLine", "\n", @[TokenType.EndOfFile]),
|
||||
testTokenizeSucceeds("carriageReturn", "\r", @[TokenType.EndOfFile]),
|
||||
testTokenizeSucceeds("emptyString", "\"\"", @[TokenType.String, TokenType.EndOfFile]),
|
||||
testTokenizeSucceeds("escapedSingleQuote", "'\\''", @[TokenType.Char, TokenType.EndOfFile]),
|
||||
testTokenizeSucceeds("escapedDoubleQuote", """ "\"" """, @[TokenType.String, TokenType.EndOfFile]),
|
||||
testTokenizeSucceeds("bareUnicode", "🌎 😂 👩👩👦👦", @[TokenType.Symbol, TokenType.Symbol, TokenType.Symbol, TokenType.EndOfFile]),
|
||||
testTokenizeSucceeds("emptyFile", "", @[endOfFile()]),
|
||||
testTokenizeSucceeds("ignoredEscapes", "\r\f\e", @[endOfFile(pos=(3, 3), relPos=(3, 3))]),
|
||||
testTokenizeSucceeds("newLine", "\n", @[endOfFile(line=2, pos=(1, 1))]),
|
||||
testTokenizeSucceeds("emptyString", """""""", @[makeString(content="", delimiter="\"", pos=(0, 1), relPos=(0, 1)), endOfFile(pos=(2, 2), relPos=(2, 2))]),
|
||||
testTokenizeSucceeds("escapedDelimChar", """'\''""", @[makeChar(content="\\'", pos=(0, 3), relPos=(0, 3)), endOfFile(pos=(4, 4), relPos=(4, 4))]),
|
||||
testTokenizeSucceeds("escapedDelimString", """"\""""", @[makeString(content="\\\"", delimiter="\"", pos=(0, 3), relPos=(0, 3)), endOfFile(pos=(4, 4), relPos=(4, 4))]),
|
||||
#[testTokenizeSucceeds("bareUnicode", "🌎 😂 👩👩👦👦", @[TokenType.Symbol, TokenType.Symbol, TokenType.Symbol, TokenType.EndOfFile]),
|
||||
testTokenizeSucceeds("stroppedSingleUnicode", "`🌎` `😂` `👩👩👦👦`", @[TokenType.Identifier, TokenType.Identifier, TokenType.Identifier, TokenType.EndOfFile]),
|
||||
testTokenizeSucceeds("stroppedMultiUnicode", "`🌎🌎` `😂😂` `👩👩👦👦👩👩👦👦`", @[TokenType.Identifier, TokenType.Identifier, TokenType.Identifier, TokenType.EndOfFile]),
|
||||
testTokenizeSucceeds("stringWithEscapes", """ "\n\t\r\e\f" """, @[TokenType.String, TokenType.EndOfFile]),
|
||||
|
@ -28,7 +47,7 @@ when isMainModule:
|
|||
TokenType.EndOfFile]),
|
||||
testTokenizeSucceeds("allFloats", "1.0 1e5 1E5 1.5e4 1.5E4", @[TokenType.Float, TokenType.Float, TokenType.Float,
|
||||
TokenType.Float, TokenType.Float, TokenType.EndOfFile]),
|
||||
testTokenizeSucceeds("simpleEscapes", """ "\a \b \f \n \r \t \v \" \' \\ " """, @[TokenType.String, TokenType.EndOfFile]),
|
||||
testTokenizeSucceeds("simpleEscapes", """ "\a \b \f \n \r \t \v \" \' \\ " """, @[TokenType.String, TokenType.EndOfFile]),]#
|
||||
testTokenizeFails("invalidFloatEndsWithDot", "2.", "invalid float number literal", line=1, location=(0, 1)),
|
||||
testTokenizeFails("invalidFloatSpuriousChars", "2.f", "invalid float number literal", line=1, location=(0, 1)),
|
||||
testTokenizeFails("unterminatedChar", "'", "unexpected EOF while parsing character literal", line=1, location=(0, 0)),
|
||||
|
@ -41,10 +60,17 @@ when isMainModule:
|
|||
testTokenizeFails("unterminatedStringWithNewline", "\"\\n;", "unexpected EOF while parsing string literal", line=1, location=(0, 3)),
|
||||
testTokenizeFails("illegalTabs", "\t", "tabs are not allowed in peon code, use spaces for indentation instead", line=1, location=(0, 0)),
|
||||
testTokenizeFails("illegalShortUnicodeEscape", """ "\u123" """, "unicode escape sequences are not supported yet", line=1, location=(1, 2)),
|
||||
testTokenizeFails("illegalLongUnicodeEscape", """ "\U123" """, "unicode escape sequences are not supported yet", line=1, location=(1, 2))
|
||||
testTokenizeFails("illegalLongUnicodeEscape", """ "\U123" """, "unicode escape sequences are not supported yet", line=1, location=(1, 2)),
|
||||
testTokenizeFails("illegalNLInBacktick", "`test\n`", """unexpected character in stropped identifier: "\x0A"""", line=1, location=(0, 5)),
|
||||
testTokenizeFails("illegalTabInBacktick", "`test\t`", """unexpected character in stropped identifier: "\x09"""", line=1, location=(0, 5)),
|
||||
testTokenizeFails("illegalEscapeinBacktick", "`test\e`", """unexpected character in stropped identifier: "\x1B"""", line=1, location=(0, 5)),
|
||||
testTokenizeFails("illegalCRInBacktick", "`test\r`", """unexpected character in stropped identifier: "\x0D"""", line=1, location=(0, 5)),
|
||||
testTokenizeFails("illegalFFInBacktick", "`test\f`", """unexpected character in stropped identifier: "\x0C"""", line=1, location=(0, 5)),
|
||||
testTokenizeFails("unterminatedStroppedIdent", "`test", "unexpected EOF while parsing stropped identifier", line=1, location=(0, 4))
|
||||
|
||||
]
|
||||
)
|
||||
var allTokens = ""
|
||||
#[var allTokens = ""
|
||||
var allTokensList = newSeqOfCap[TokenType](symbols.tokens.len())
|
||||
for lexeme in symbols.tokens.keys():
|
||||
allTokens.add(&"{lexeme} ")
|
||||
|
@ -68,22 +94,27 @@ when isMainModule:
|
|||
characters.add(&"'{char(value)}'")
|
||||
charTokens.add(TokenType.EndOfFile)
|
||||
characters.add("""'\'' '\n' '\\' '\t' '\e' '\a' '\r'""")
|
||||
suite.addTest(testTokenizeSucceeds("allCharacters", characters, charTokens))
|
||||
suite.addTest(testTokenizeSucceeds("allCharacters", characters, charTokens))]#
|
||||
echo "Running tokenizer tests"
|
||||
suite.run()
|
||||
echo "Tokenization test results: "
|
||||
for test in suite.tests:
|
||||
echo &" - {test.name} -> {test.status}"
|
||||
if test.status in [Failed, Crashed]:
|
||||
echo &" Details:"
|
||||
echo &" - Outcome: {test.outcome}"
|
||||
echo &" - Expected state: {test.expected} "
|
||||
echo &" - Expected outcome: {test.getExpectedOutcome()}"
|
||||
echo &"\n The test failed for the following reason: {test.reason}\n"
|
||||
if not test.outcome.exc.isNil():
|
||||
echo &"\n Formatted error message follows\n"
|
||||
print(LexingError(test.outcome.exc))
|
||||
echo "\n Formatted error message ends here\n"
|
||||
if suite.successful():
|
||||
echo "OK: All tokenizer tests were successful"
|
||||
quit(0)
|
||||
else:
|
||||
echo "ERR: Not all tests were successful, details below:\n"
|
||||
for test in suite.tests:
|
||||
if test.status in [Failed, Crashed]:
|
||||
echo &" - {test.name} -> {test.status}"
|
||||
echo &" Details:"
|
||||
echo &" - Outcome -> {test.outcome}"
|
||||
echo &" - Expected state -> {test.expected} "
|
||||
echo &" - Expected outcome -> {test.getExpectedOutcome()}"
|
||||
if test.reason.len() > 0:
|
||||
echo &"\n The test failed for the following reason -> {test.reason}\n"
|
||||
else:
|
||||
echo "\n No further information is available about this failure"
|
||||
if not test.outcome.exc.isNil():
|
||||
echo &"\n Formatted error message follows\n"
|
||||
print(LexingError(test.outcome.exc))
|
||||
echo "\n Formatted error message ends here\n"
|
||||
quit(-1)
|
||||
|
|
Loading…
Reference in New Issue