Improve testing capabilities, initial work on parser testing. WIP

2024-03-08 11:21:07 +01:00 · 2024-03-08 11:21:07 +01:00 · 9d9093ca6c
parent e2b4037b73
commit 9d9093ca6c
6 changed files with 234 additions and 151 deletions
--- a/src/frontend/parsing/ast.nim
+++ b/src/frontend/parsing/ast.nim
@ -323,19 +323,19 @@ type
        default*: BlockStmt


+proc isLiteral*(self: ASTNode): bool =
+    ## Returns whether the given AST node
+    ## represents a literal, constant expression
+    return self.kind in [intExpr, hexExpr, binExpr, octExpr, strExpr, falseExpr, trueExpr, floatExpr, nanExpr, infExpr]
+

 proc isConst*(self: ASTNode): bool =
    ## Returns true if the given
    ## AST node represents a value
-    ## of constant type. All integers,
+    ## of a constant type. All integers,
    ## strings and singletons count as
    ## constants
-    case self.kind:
-        of intExpr, hexExpr, binExpr, octExpr, strExpr, falseExpr, trueExpr,
-            floatExpr, nanExpr, infExpr:
-            return true
-        else:
-            return false
+    return self.isLiteral()   # TODO


 proc isDecl*(self: ASTNode): bool =
--- a/src/frontend/parsing/lexer.nim
+++ b/src/frontend/parsing/lexer.nim
@ -41,26 +41,49 @@ type
        symbols: TableRef[string, TokenType]

    StringParseMode = enum
-        Default, Raw, Format, Byte
+        Default, Raw, Format, Byte, Char

    Lexer* = ref object
        ## A lexer object
+        
+        # Contains all the symbols we recognize
        symbols*: SymbolTable
+        # The peon source code being tokenized
        source: string
+        # This will contain the tokenized output after
+        # lex()
        tokens: seq[Token]
+        # Number of lines encountered so far
        line: int
+        # start contains the absolute position in the
+        # source after we last saw a token, while current
+        # is incremented every time we call step(). This is
+        # necessary for multi-byte tokens
        start: int
        current: int
+        # Current file being tokenized (for error reporting)
        file: string
+        # Tuples of absolute locations where lines start and
+        # end
        lines: seq[tuple[start, stop: int]]
+        # Location of the last newline
        lastLine: int
+        # Relative position in the current line
        linePos: int
+        # Absolute location in the source code where the current
+        # line starts
        lineCurrent: int
+        # Number of spaces since we last met a valid token.
+        # Useful for checking indentation and maybe in the
+        # future for precedence inference
        spaces: int

    LexingError* = ref object of PeonException
        ## A lexing exception
        lexer*: Lexer
+        # Absolute location where the error
+        # occurred. The rest of the error metadata
+        # can be pulled out of the lexer object itself
        pos*: tuple[start, stop: int]


@ -112,8 +135,7 @@ proc getToken(self: Lexer, lexeme: string): Token =
    ## string according to the symbol table or
    ## returns nil if there's no match
    let table = self.symbols
-    var kind = table.symbols.getOrDefault(lexeme, table.keywords.getOrDefault(
-            lexeme, NoMatch))
+    var kind = table.symbols.getOrDefault(lexeme, table.keywords.getOrDefault(lexeme, NoMatch))
    if kind == NoMatch:
        return nil
    new(result)
@ -171,9 +193,9 @@ proc getLine*(self: Lexer): int = self.line
 proc getLines*(self: Lexer): seq[tuple[start, stop: int]] = self.lines
 proc getSource*(self: Lexer): string = self.source
 proc getRelPos*(self: Lexer, line: int): tuple[start, stop: int] = 
-        if self.tokens.len() == 0 or self.tokens[^1].kind != EndOfFile:
-            self.incLine()
-        return self.lines[line - 1]
+    if self.tokens.len() == 0 or self.tokens[^1].kind != EndOfFile:
+        self.incLine()
+    return self.lines[line - 1]


 proc newLexer*(self: Lexer = nil): Lexer =
@ -397,11 +419,10 @@ proc parseEscape(self: Lexer) =
            self.error(&"invalid escape sequence '\\{self.peek()}'")


-proc parseString(self: Lexer, delimiter: string, mode: StringParseMode = Default) =
-    ## Parses string and character literals. They can be expressed using 
-    ## matching pairs of double or single quotes respectively. Most C-style 
-    ## escape sequences are supported, moreover, a specific prefix may be 
-    ## prepended to the string to instruct the lexer on how to parse it:
+proc parseString(self: Lexer, delimiter: string, mode: StringParseMode = Default, multiline: bool = false) =
+    ## Parses string and character literals enclosed within the given delimiter.
+    ## Most C-style escape sequences are supported, moreover, a specific prefix
+    ## may be prepended to the string to instruct the lexer on how to parse it:
    ## - b -> declares a byte string, where each character is
    ##     interpreted as an integer instead of a character
    ## - r -> declares a raw string literal, where escape sequences
@ -410,15 +431,19 @@ proc parseString(self: Lexer, delimiter: string, mode: StringParseMode = Default
    ##     interpolated using curly braces like f"Hello, {name}!".
    ##     Braces may be escaped using a pair of them, so to represent
    ##     a literal "{" in an f-string, one would use {{ instead
-    ## Multi-line strings can be declared using matching triplets of
-    ## either single or double quotes. They can span across multiple
-    ## lines and escape sequences in them are not parsed, like in raw
-    ## strings, so a multi-line string prefixed with the "r" modifier
-    ## is redundant, although multi-line byte/format strings are supported
+    ## Multi-line strings are supported if multiline equals true, but not that
+    ## escape sequences in them are not parsed, like in raw strings, so a multi-line
+    ## string prefixed with the "r" modifier is redundant, although multi-line byte/format
+    ## strings are supported
    var slen = 0
    while not self.check(delimiter) and not self.done():  
-        inc(slen)          
-        if mode == Raw:
+        inc(slen)
+        if self.match("\n") and not multiline:
+            if mode == Char:
+                self.error("unexpected EOL while parsing character literal")
+            else:
+                self.error("unexpected EOL while parsing string literal")     
+        elif mode == Raw:
            discard self.step()
        elif self.match("\\"):
            self.parseEscape()
@ -436,20 +461,20 @@ proc parseString(self: Lexer, delimiter: string, mode: StringParseMode = Default
                self.error("unmatched '}' in format string")
        discard self.step()
    if self.done() and not self.match(delimiter):
-        if delimiter == "'":
+        if mode == Char:
            self.error("unexpected EOF while parsing character literal")
        else:
            self.error("unexpected EOF while parsing string literal")
    else:
        discard self.step()
-    if delimiter != "'":
+    if mode != Char:
        self.createToken(String)
    else:
        if slen == 0:
            self.error("character literal cannot be of length zero")
        elif slen > 1:
            self.error("invalid character literal (length must be one!)")
-        self.createToken(Char)
+        self.createToken(TokenType.Char)


 proc parseBinary(self: Lexer) =
@ -527,8 +552,7 @@ proc parseNumber(self: Lexer) =
                    discard self.step()
    if self.match("'"):
        # Could be a size specifier, better catch it
-        while (self.peek().isAlphaNumeric() or self.check("_")) and
-                not self.done():
+        while (self.peek().isAlphaNumeric() or self.check("_")) and not self.done():
            discard self.step()
    self.createToken(kind)

@ -542,10 +566,12 @@ proc parseBackticks(self: Lexer) =
    ## except for newlines, tabs, carriage returns
    ## and other useless/confusing escape sequences
    ## like \e and \f
-    while not self.match("`") and not self.done():
-        if self.match(["\n", "\t", "\e", "\r", "\e"]):
-            self.error(&"unexpected character in stropped identifier: '{self.peek()}'")
+    while not self.check("`") and not self.done():
+        if self.match(["\n", "\t", "\e", "\r", "\f"]):
+            self.error(&"unexpected character in stropped identifier: {self.peek(-1).escape()}")
        discard self.step()
+    if self.done() and not self.match("`"):
+        self.error("unexpected EOF while parsing stropped identifier")
    self.createToken(Identifier)
    # Strips the backticks
    self.tokens[^1].lexeme = self.tokens[^1].lexeme[1..^2]
@ -594,12 +620,16 @@ proc next(self: Lexer) =
        self.parseBackticks()
    elif self.match(["\"", "'"]):
        # String or character literal
+        var mode = Default
        var delimiter = self.peek(-1)
-        if self.peek(-1) != "'" and self.check(self.peek(-1)) and self.check(
-                self.peek(-1), 1):
+        var multiline = false
+        if delimiter == "'":
+            mode = Char
+        if mode != Char and self.match("\"\""):
            # Multiline strings start with 3 quotes
-            delimiter.add(self.step(2))
-        self.parseString(self.peek(-1), Default)
+            multiline = true
+            delimiter.add("\"\"")
+        self.parseString(delimiter, mode, multiline)
    elif self.peek().isDigit():
        discard self.step() # Needed because parseNumber reads the next
                            # character to tell the base of the number
@ -608,20 +638,26 @@ proc next(self: Lexer) =
    elif self.peek().isAlphaNumeric() and self.check(["\"", "'"], 1):
        # Prefixed string literal (i.e. f"Hi {name}!")
        var mode = Default
-        var delimiter = self.step()
-        if self.peek(-1) != "'" and self.check(self.peek(-1)) and self.check(
-                self.peek(-1), 1):
-            # Multiline strings start with 3 quotes
-            delimiter.add(self.step(2))
        case self.step():
            of "r":
-                self.parseString(delimiter, Raw)
+                mode = Raw
            of "b":
-                self.parseString(self.step(), Byte)
+                mode = Byte
            of "f":
-                self.parseString(self.step(), Format)
+                mode = Format
            else:
                self.error(&"unknown string prefix '{self.peek(-1)}'")
+        var delimiter = self.peek(-1)
+        var multiline = false
+        if delimiter == "'":
+            if mode == Format:
+                self.error("interpolation is not available in character literals")
+            mode = Char
+        if mode != Char and self.match("\"\""):
+            # Multiline strings start with 3 quotes
+            multiline = true
+            delimiter.add("\"\"")
+        self.parseString(delimiter, mode, multiline)
    elif self.peek().isAlphaNumeric() or self.check("_"):
        # Keywords and identifiers
        self.parseIdentifier()
@ -679,6 +715,5 @@ proc lex*(self: Lexer, source, file: string): seq[Token] =
        self.lineCurrent = self.linePos
    self.tokens.add(Token(kind: EndOfFile, lexeme: "",
            line: self.line, pos: (self.current, self.current),
-            relPos: (start: 0, stop: self.linePos - 1)))
-    self.incLine()
+            relPos: (start: self.lineCurrent, stop: self.linePos)))
    return self.tokens
--- a/src/frontend/parsing/parser.nim
+++ b/src/frontend/parsing/parser.nim
@ -15,6 +15,7 @@
 ## A recursive-descent top-down parser implementation

 import std/strformat
+import std/sequtils
 import std/strutils
 import std/tables
 import std/os
@ -185,7 +186,7 @@ proc endScope(self: Parser) {.inline.} =
    dec(self.scopeDepth)


-func peek(self: Parser, distance: int = 0): Token {.inline.} =
+proc peek(self: Parser, distance: int = 0): Token {.inline.} =
    ## Peeks at the token at the given distance.
    ## If the distance is out of bounds, an EOF
    ## token is returned. A negative distance may
@ -197,7 +198,7 @@ func peek(self: Parser, distance: int = 0): Token {.inline.} =
        result = self.tokens[self.current + distance]


-func done(self: Parser): bool {.inline.} =
+proc done(self: Parser): bool {.inline.} =
    ## Returns true if we're at the
    ## end of the file. Note that the
    ## parser expects an explicit
@ -215,7 +216,7 @@ proc step(self: Parser): Token {.inline.} =
        self.current += 1


-proc error(self: Parser, message: string, token: Token = nil) {.raises: [ParseError].} =
+proc error(self: Parser, message: string, token: Token = nil) =
    ## Raises a ParseError exception
    var token = if token.isNil(): self.peek() else: token
    if token.kind == EndOfFile:
@ -230,7 +231,7 @@ proc error(self: Parser, message: string, token: Token = nil) {.raises: [ParseEr
 # tell at tokenization time which of the two contexts we're in, we just treat everything
 # as a symbol and in the cases where we need a specific token we just match the string
 # directly
-func check(self: Parser, kind: TokenType, distance: int = 0): bool {.inline.} =
+proc check(self: Parser, kind: TokenType, distance: int = 0): bool {.inline.} =
    ## Checks if the given token at the given distance
    ## matches the expected kind and returns a boolean.
    ## The distance parameter is passed directly to
@ -238,7 +239,7 @@ func check(self: Parser, kind: TokenType, distance: int = 0): bool {.inline.} =
    self.peek(distance).kind == kind


-func check(self: Parser, kind: string, distance: int = 0): bool {.inline.} =
+proc check(self: Parser, kind: string, distance: int = 0): bool {.inline.} =
    ## Checks if the given token at the given distance
    ## matches the expected kind and returns a boolean.
    ## The distance parameter is passed directly to
@ -1321,7 +1322,10 @@ proc findOperators(self: Parser, tokens: seq[Token]) =

 proc parse*(self: Parser, tokens: seq[Token], file: string, lines: seq[tuple[start, stop: int]], source: string, persist: bool = false): seq[ASTNode] =
    ## Parses a sequence of tokens into a sequence of AST nodes
-    self.tokens = tokens
+
+    # I'm way too lazy to figure out a better way to ignore
+    # comments, so here ya go
+    self.tokens = tokens.filterIt(it.kind != Comment)
    self.file = file
    self.source = source
    self.lines = lines
--- a/src/util/fmterr.nim
+++ b/src/util/fmterr.nim
@ -42,7 +42,7 @@ proc formatError*(errKind: string = "", outFile = stderr, file, line: string, li
        # Print the line where the error occurred and underline the exact node that caused
        # the error. Might be inaccurate, but definitely better than nothing
        outFile.styledWrite(fgRed, styleBright, "Source line: ", resetStyle, fgDefault, line[0..<pos.start])
-        outFile.styledWrite(fgRed, styleUnderscore, line[pos.start..pos.stop])
+        outFile.styledWrite(fgRed, styleUnderscore, line[pos.start..<pos.stop])
        if pos.stop + 1 <= line.high():
            outFile.styledWriteLine(fgDefault, line[pos.stop + 1..^1])
        else:
--- a/src/util/testing.nim
+++ b/src/util/testing.nim
@ -21,6 +21,7 @@ import std/strutils
 import std/sequtils

 import frontend/parsing/lexer
+import frontend/parsing/parser
 import util/symbols


@ -50,37 +51,40 @@ type
        
        skip*: bool              # Skip running this test if true
        name*: string            # Test name. Only useful for displaying purposes
-        kind*: TestKind          # Test kind (tokenizer, parser, compiler, etc.)
-        source*: string          # The source input of the test. Usually peon code
-        status*: TestStatus      # The test's current state
-        expected*: TestStatus    # The test's expected final state after run()
+        case kind*: TestKind:    # Test kind (tokenizer, parser, compiler, etc.)
+            of Tokenizer:
+                lexer: Lexer
+                tokens: seq[Token]
+            of Parser:
+                tree: ParseTree
+            else:
+                discard 
+        source*: string              # The source input of the test. Usually peon code
+        status*: TestStatus          # The test's current state
+        case expected*: TestStatus:  # The test's expected final state after run()
+            of Failed:
+                line: int
+                message: string
+                location: tuple[start, stop: int]
+            else:
+                discard
        outcome*: TestOutcome    # The test's outcome
        runnerFunc: TestRunner   # The test's internal runner function
-        reason*: string           # A human readable reason why the test failed
+        reason*: string          # A human readable reason why the test failed
    
-
-    TokenizerTest* = ref object of Test
-        ## A tokenization test. Allows to specify
-        ## a desired error message and error location
-        ## upon tokenization failure
-        message: string
-        location: tuple[start, stop: int]
-        line: int
-        lexer: Lexer
-        tokens: seq[TokenType]
-
    TestSuite* = ref object
        ## A suite of tests
        tests*: seq[Test]

-proc `$`(self: tuple[start, stop: int]): string = 
+
+proc `$`*(self: tuple[start, stop: int]): string = 
    if self == (-1, -1):
        result = "none"
    else:
        result = &"(start={self.start}, stop={self.stop})"


-proc `$`(self: TestOutcome): string =
+proc `$`*(self: TestOutcome): string =
    result &= &"Outcome(error={self.error}"
    if not self.exc.isNil():
        var name = ($self.exc.name).split(":")[0]
@ -93,25 +97,18 @@ proc `$`(self: TestOutcome): string =



-proc `$`*(self: Test): string =
+proc setup(self: Test) =
    case self.kind:
        of Tokenizer:
-            var self = TokenizerTest(self)
-            return &"TokenizerTest(name='{self.name}', status={self.status}, outcome={self.outcome}, source='{self.source.escape()}', location={self.location}, message='{self.message}')"
+            self.lexer = newLexer()
+            self.lexer.fillSymbolTable()
        else:
-            # TODO
-            return ""
-
-
-proc setup(self: TokenizerTest) =
-    self.lexer = newLexer()
-    self.lexer.fillSymbolTable()
+            discard   # TODO


 proc tokenizeSucceedsRunner(suite: TestSuite, test: Test) =
    ## Runs a tokenitazion test that is expected to succeed
    ## and checks that it returns the tokens we expect
-    var test = TokenizerTest(test)
    test.setup()
    try:
        let tokens = test.lexer.lex(test.source, test.name)
@ -120,10 +117,30 @@ proc tokenizeSucceedsRunner(suite: TestSuite, test: Test) =
            test.reason = &"Number of provided tokens ({test.tokens.len()}) does not match number of returned tokens ({tokens.len()})"
            return
        var i = 0
-        for (token, kind) in zip(tokens, test.tokens):
-            if token.kind != kind:
+        for (provided, expected) in zip(tokens, test.tokens):
+            if provided.kind != expected.kind:
                test.status = Failed
-                test.reason = &"Token type mismatch at #{i}: expected {kind}, got {token.kind}"
+                test.reason = &"Token type mismatch at #{i}: expected {expected.kind}, got {provided.kind}"
+                return
+            if provided.lexeme != expected.lexeme:
+                test.status = Failed
+                test.reason = &"Token lexeme mismatch at #{i}: expected '{expected.lexeme}', got '{provided.lexeme}'"
+                return
+            if provided.line != expected.line:
+                test.status = Failed
+                test.reason = &"Token line mismatch at #{i}: expected {expected.line}, got {provided.line}"
+                return
+            if provided.pos != expected.pos:
+                test.status = Failed
+                test.reason = &"Token position mismatch at #{i}: expected {expected.pos}, got {provided.pos}"
+                return
+            if provided.relPos != expected.relPos:
+                test.status = Failed
+                test.reason = &"Token relative position mismatch at #{i}: expected {expected.relPos}, got {provided.relPos}"
+                return
+            if provided.spaces != expected.spaces:
+                test.status = Failed
+                test.reason = &"Token spacing mismatch at #{i}: expected {expected.spaces}, got {provided.spaces}"
                return
            inc(i)
    except LexingError:
@ -133,6 +150,7 @@ proc tokenizeSucceedsRunner(suite: TestSuite, test: Test) =
        test.status = Failed
        test.outcome.error = true
        test.outcome.exc = getCurrentException()
+        test.reason = "Tokenization failed"
        return
    except CatchableError:
        test.status = Crashed
@ -145,7 +163,6 @@ proc tokenizeSucceedsRunner(suite: TestSuite, test: Test) =
 proc tokenizeFailsRunner(suite: TestSuite, test: Test) =
    ## Runs a tokenitazion test that is expected to fail
    ## and checks that it does so in the way we expect
-    var test = TokenizerTest(test)
    test.setup()
    try:
        discard test.lexer.lex(test.source, test.name)
@ -156,6 +173,11 @@ proc tokenizeFailsRunner(suite: TestSuite, test: Test) =
        if exc.pos == test.location and exc.line == test.line and exc.msg == test.message:
            test.status = Success
        else:
+            if exc.pos != test.location or exc.line != test.line:
+                test.reason = &"Expecting failure at {test.line}:({test.location.start}, {test.location.stop}), failed at {exc.line}:({exc.pos.start}, {exc.pos.stop})"
+            else:
+                # message is wrong
+                test.reason = &"Expecting error message to be '{test.message}', got '{exc.msg}'"
            test.status = Failed
            test.outcome.error = true
        test.outcome.exc = getCurrentException()
@ -195,43 +217,37 @@ proc removeTests*(self: TestSuite, tests: openarray[Test]) =
        self.removeTest(test)


-proc newTokenizeTest(name, source: string, skip = false): TokenizerTest =
-    ## Internal helper to initialize a tokenization test
-    new(result)
+proc testTokenizeSucceeds*(name, source: string, tokens: seq[Token], skip = false): Test =
+    ## Creates a new tokenizer test that is expected to succeed.
+    ## The type of each token returned by the tokenizer is matched
+    ## against the given list of token types: the test only succeeds
+    ## if no discrepancies are found
+    result = Test(expected: Success)
+    result.outcome.line = -1
+    result.outcome.location = (-1, -1)
    result.name = name
    result.kind = Tokenizer
    result.status = Init
    result.source = source
    result.skip = skip
-    result.line = -1
-    result.outcome.line = -1
-    result.outcome.location = (-1, -1)
-    result.location = (-1, -1)
-    result.message = ""
-
-
-proc testTokenizeSucceeds*(name, source: string, tokens: seq[TokenType], skip = false): Test =
-    ## Creates a new tokenizer test that is expected to succeed.
-    ## The type of each token returned by the tokenizer is matched
-    ## against the given list of token types: the test only succeeds
-    ## if no discrepancies are found
-    var test = newTokenizeTest(name, source, skip)
-    test.runnerFunc = tokenizeSucceedsRunner
-    test.tokens = tokens
-    result = Test(test)
    result.expected = Success
+    result.runnerFunc = tokenizeSucceedsRunner
+    result.tokens = tokens


 proc testTokenizeFails*(name, source: string, message: string, line: int, location: tuple[start, stop: int], skip = false): Test =
    ## Creates a new tokenizer test that is expected to fail with the
    ## given error message and at the given location
-    var test = newTokenizeTest(name, source, skip)
-    test.runnerFunc = tokenizeFailsRunner
-    test.message = message
-    test.location = location
-    test.line = line
-    result = Test(test)
-    result.expected = Failed
+    result = Test(expected: Failed)
+    result.name = name
+    result.kind = Tokenizer
+    result.status = Init
+    result.source = source
+    result.skip = skip
+    result.runnerFunc = tokenizeFailsRunner
+    result.message = message
+    result.location = location
+    result.line = line


 proc run*(self: TestSuite) =
@ -257,30 +273,27 @@ proc successful*(self: TestSuite): bool =
        break


-proc getExpectedException(self: TokenizerTest): ref Exception =
+proc getExpectedException(self: Test): ref Exception =
    ## Gets the exception that we expect to be
    ## raised by the test. Could be nil if we
    ## expect no errors
    if self.expected == Success:
        return nil
-    return LexingError(msg: self.message, line: self.line, file: self.name, lexer: self.lexer, pos: self.location)
+    case self.kind:
+        of Tokenizer:
+            return LexingError(msg: self.message, line: self.line, file: self.name, lexer: self.lexer, pos: self.location)
+        else:
+            discard   # TODO
    
-    
-
-proc getExpectedOutcome(self: TokenizerTest): TestOutcome =
-    ## Gets the expected outcome of a tokenization test
-    if self.expected == Success:
-        return (false, self.getExpectedException(), -1, (-1, -1))
-    else:
-        return (false, self.getExpectedException, self.line, self.location)
-

 proc getExpectedOutcome*(self: Test): TestOutcome =
-    ## Returns the expected outcome of a test
+    ## Gets the expected outcome of a test
    doAssert self.expected in [Success, Failed], "expected outcome is neither Success nor Failed: wtf?"
    case self.kind:
        of Tokenizer:
-            return TokenizerTest(self).getExpectedOutcome()
+            if self.expected == Success:
+                return (false, self.getExpectedException(), -1, (-1, -1))
+            else:
+                return (false, self.getExpectedException(), self.line, self.location)
        else:
-            # TODO
            discard
--- a/tests/tokenize.nim
+++ b/tests/tokenize.nim
@ -7,17 +7,36 @@ import frontend/parsing/lexer
 import std/strformat


+# Utilities to construct tokens for validation purposes
+
+proc makeToken(kind: TokenType, line: int = 1, pos: tuple[start, stop: int] = (0, 0), relPos: tuple[start, stop: int] = (0, 0), lexeme: string,
+               spaces: int = 0): Token {.inline.} =
+    return Token(kind: kind, line: line, pos: pos, relPos: relPos, lexeme: lexeme, spaces: spaces)
+
+proc endOfFile(line: int = 1, pos: tuple[start, stop: int] = (0, 0), relPos: tuple[start, stop: int] = (0, 0), spaces: int = 0): Token {.inline.} = 
+    return makeToken(EndOfFile, line, pos, relPos, "", spaces)
+
+proc makeString(line: int = 1, pos: tuple[start, stop: int] = (0, 0), relPos: tuple[start, stop: int] = (0, 0), content: string, delimiter: string, spaces: int = 0): Token {.inline.} =
+    return makeToken(String, line, pos, relPos, &"{delimiter}{content}{delimiter}", spaces)
+
+proc makeChar(line: int = 1, pos: tuple[start, stop: int] = (0, 0), relPos: tuple[start, stop: int] = (0, 0), content: string, spaces: int = 0): Token {.inline.} =
+    return makeToken(Char, line, pos, relPos, &"'{content}'", spaces)
+
+proc makeSymbol(line: int = 1,  pos: tuple[start, stop: int] = (0, 0), relPos: tuple[start, stop: int] = (0, 0), content: string, spaces: int = 0): Token {.inline.} =
+    return makeToken(Symbol, line, pos, relPos, content, spaces)
+
+
 when isMainModule:
    var suite = newTestSuite()
    suite.addTests(
        [
-            testTokenizeSucceeds("emptyFile", "", @[TokenType.EndOfFile]),
-            testTokenizeSucceeds("newLine", "\n", @[TokenType.EndOfFile]),
-            testTokenizeSucceeds("carriageReturn", "\r", @[TokenType.EndOfFile]),
-            testTokenizeSucceeds("emptyString", "\"\"", @[TokenType.String, TokenType.EndOfFile]),
-            testTokenizeSucceeds("escapedSingleQuote", "'\\''", @[TokenType.Char, TokenType.EndOfFile]),
-            testTokenizeSucceeds("escapedDoubleQuote", """  "\""  """, @[TokenType.String, TokenType.EndOfFile]),
-            testTokenizeSucceeds("bareUnicode", "🌎 😂 👩‍👩‍👦‍👦", @[TokenType.Symbol, TokenType.Symbol, TokenType.Symbol, TokenType.EndOfFile]),
+            testTokenizeSucceeds("emptyFile", "", @[endOfFile()]),
+            testTokenizeSucceeds("ignoredEscapes", "\r\f\e", @[endOfFile(pos=(3, 3), relPos=(3, 3))]),
+            testTokenizeSucceeds("newLine", "\n", @[endOfFile(line=2, pos=(1, 1))]),
+            testTokenizeSucceeds("emptyString", """""""", @[makeString(content="", delimiter="\"", pos=(0, 1), relPos=(0, 1)), endOfFile(pos=(2, 2), relPos=(2, 2))]),
+            testTokenizeSucceeds("escapedDelimChar", """'\''""", @[makeChar(content="\\'", pos=(0, 3), relPos=(0, 3)), endOfFile(pos=(4, 4), relPos=(4, 4))]),
+            testTokenizeSucceeds("escapedDelimString", """"\""""", @[makeString(content="\\\"", delimiter="\"", pos=(0, 3), relPos=(0, 3)), endOfFile(pos=(4, 4), relPos=(4, 4))]),
+            #[testTokenizeSucceeds("bareUnicode", "🌎 😂 👩‍👩‍👦‍👦", @[TokenType.Symbol, TokenType.Symbol, TokenType.Symbol, TokenType.EndOfFile]),
            testTokenizeSucceeds("stroppedSingleUnicode", "`🌎` `😂` `👩‍👩‍👦‍👦`", @[TokenType.Identifier, TokenType.Identifier, TokenType.Identifier, TokenType.EndOfFile]),
            testTokenizeSucceeds("stroppedMultiUnicode", "`🌎🌎` `😂😂` `👩‍👩‍👦‍👦👩‍👩‍👦‍👦`", @[TokenType.Identifier, TokenType.Identifier, TokenType.Identifier, TokenType.EndOfFile]),
            testTokenizeSucceeds("stringWithEscapes", """ "\n\t\r\e\f" """, @[TokenType.String, TokenType.EndOfFile]),
@ -28,7 +47,7 @@ when isMainModule:
                                   TokenType.EndOfFile]),
            testTokenizeSucceeds("allFloats", "1.0 1e5 1E5 1.5e4 1.5E4", @[TokenType.Float, TokenType.Float, TokenType.Float, 
                                                                           TokenType.Float, TokenType.Float, TokenType.EndOfFile]),
-            testTokenizeSucceeds("simpleEscapes", """ "\a \b \f \n \r \t \v \" \' \\ " """, @[TokenType.String, TokenType.EndOfFile]),
+            testTokenizeSucceeds("simpleEscapes", """ "\a \b \f \n \r \t \v \" \' \\ " """, @[TokenType.String, TokenType.EndOfFile]),]#
            testTokenizeFails("invalidFloatEndsWithDot", "2.", "invalid float number literal",  line=1, location=(0, 1)),
            testTokenizeFails("invalidFloatSpuriousChars", "2.f", "invalid float number literal",  line=1, location=(0, 1)),
            testTokenizeFails("unterminatedChar", "'", "unexpected EOF while parsing character literal", line=1, location=(0, 0)),
@ -41,10 +60,17 @@ when isMainModule:
            testTokenizeFails("unterminatedStringWithNewline", "\"\\n;", "unexpected EOF while parsing string literal",  line=1, location=(0, 3)),
            testTokenizeFails("illegalTabs", "\t", "tabs are not allowed in peon code, use spaces for indentation instead", line=1, location=(0, 0)),
            testTokenizeFails("illegalShortUnicodeEscape", """ "\u123" """, "unicode escape sequences are not supported yet", line=1, location=(1, 2)),
-            testTokenizeFails("illegalLongUnicodeEscape", """ "\U123" """, "unicode escape sequences are not supported yet", line=1, location=(1, 2))
+            testTokenizeFails("illegalLongUnicodeEscape", """ "\U123" """, "unicode escape sequences are not supported yet", line=1, location=(1, 2)),
+            testTokenizeFails("illegalNLInBacktick", "`test\n`", """unexpected character in stropped identifier: "\x0A"""", line=1, location=(0, 5)),
+            testTokenizeFails("illegalTabInBacktick", "`test\t`", """unexpected character in stropped identifier: "\x09"""", line=1, location=(0, 5)),
+            testTokenizeFails("illegalEscapeinBacktick", "`test\e`", """unexpected character in stropped identifier: "\x1B"""", line=1, location=(0, 5)),
+            testTokenizeFails("illegalCRInBacktick", "`test\r`", """unexpected character in stropped identifier: "\x0D"""", line=1, location=(0, 5)),
+            testTokenizeFails("illegalFFInBacktick", "`test\f`", """unexpected character in stropped identifier: "\x0C"""", line=1, location=(0, 5)),
+            testTokenizeFails("unterminatedStroppedIdent", "`test", "unexpected EOF while parsing stropped identifier", line=1, location=(0, 4))
+
        ]
    )
-    var allTokens = ""
+    #[var allTokens = ""
    var allTokensList = newSeqOfCap[TokenType](symbols.tokens.len())
    for lexeme in symbols.tokens.keys():
        allTokens.add(&"{lexeme} ")
@ -68,22 +94,27 @@ when isMainModule:
        characters.add(&"'{char(value)}'")
    charTokens.add(TokenType.EndOfFile)
    characters.add("""'\'' '\n' '\\' '\t' '\e' '\a' '\r'""")
-    suite.addTest(testTokenizeSucceeds("allCharacters", characters, charTokens))
+    suite.addTest(testTokenizeSucceeds("allCharacters", characters, charTokens))]#
+    echo "Running tokenizer tests"
    suite.run()
-    echo "Tokenization test results: "
-    for test in suite.tests:
-        echo &"  - {test.name} -> {test.status}"
-        if test.status in [Failed, Crashed]:
-            echo &"    Details:"
-            echo &"      - Outcome:          {test.outcome}"
-            echo &"      - Expected state:   {test.expected} "
-            echo &"      - Expected outcome: {test.getExpectedOutcome()}"
-            echo &"\n  The test failed for the following reason: {test.reason}\n"
-            if not test.outcome.exc.isNil():
-                echo &"\n    Formatted error message follows\n"
-                print(LexingError(test.outcome.exc))
-                echo "\n     Formatted error message ends here\n"
    if suite.successful():
        echo "OK: All tokenizer tests were successful"
        quit(0)
+    else:
+        echo "ERR: Not all tests were successful, details below:\n"
+        for test in suite.tests:
+            if test.status in [Failed, Crashed]:
+                echo &"  - {test.name} -> {test.status}"
+                echo &"    Details:"
+                echo &"      - Outcome             -> {test.outcome}"
+                echo &"      - Expected state      -> {test.expected} "
+                echo &"      - Expected outcome    -> {test.getExpectedOutcome()}"
+                if test.reason.len() > 0:
+                    echo &"\n  The test failed for the following reason -> {test.reason}\n"
+                else:
+                    echo "\n  No further information is available about this failure"
+                if not test.outcome.exc.isNil():
+                    echo &"\n    Formatted error message follows\n"
+                    print(LexingError(test.outcome.exc))
+                    echo "\n     Formatted error message ends here\n"
    quit(-1)