peon/src/test.nim

270 lines
12 KiB
Nim

# Builtins & external libs
import sequtils
import strformat
import strutils
import nimSHA2
import times
import jale/editor as ed
import jale/templates
import jale/plugin/defaults
import jale/plugin/editor_history
import jale/keycodes
import jale/multiline
# Our stuff
import frontend/lexer as l
import frontend/parser as p
import frontend/compiler as c
import frontend/optimizer as o
import util/serializer as s
import util/debugger
# Forward declarations
proc fillSymbolTable(tokenizer: Lexer)
proc getLineEditor: LineEditor
# Handy dandy compile-time constants
const debugLexer = true
const debugParser = true
const debugCompiler = true
const debugOptimizer = true
const debugSerializer = true
when isMainModule:
setControlCHook(proc () {.noconv.} = quit(0))
var
keep = true
tokens: seq[Token] = @[]
tree: seq[ASTNode] = @[]
compiled: Chunk
optimized: tuple[tree: seq[ASTNode], warnings: seq[Warning]]
serialized: Serialized
serializedRaw: seq[byte]
tokenizer = newLexer()
parser = newParser()
optimizer = newOptimizer()
compiler = newCompiler()
serializer = newSerializer()
editor = getLineEditor()
input: string
tokenizer.fillSymbolTable()
editor.bindEvent(jeQuit):
keep = false
editor.bindKey("ctrl+a"):
editor.content.home()
editor.bindKey("ctrl+e"):
editor.content.`end`()
while keep:
try:
input = editor.read()
if input.len() > 0:
# Currently the parser doesn't handle these tokens well
tokens = filter(tokenizer.lex(input, "<stdin>"), proc (x: Token): bool = x.kind notin {TokenType.Whitespace, Tab})
if tokens.len() > 0:
when debugLexer:
echo "Tokenization step:"
for i, token in tokens:
if i == tokens.high():
# Who cares about EOF?
break
echo "\t", token
echo ""
tree = parser.parse(tokens, "<stdin>")
when debugParser:
echo "Parsing step:"
for node in tree:
echo "\t", node
echo ""
optimized = optimizer.optimize(tree)
when debugOptimizer:
echo &"Optimization step (constant folding enabled: {optimizer.foldConstants}):"
for node in optimized.tree:
echo "\t", node
echo ""
stdout.write(&"Produced warnings: ")
if optimized.warnings.len() > 0:
echo ""
for warning in optimized.warnings:
echo "\t", warning
else:
stdout.write("No warnings produced\n")
echo ""
compiled = compiler.compile(optimized.tree, "<stdin>")
when debugCompiler:
echo "Compilation step:"
stdout.write("\t")
echo &"""Raw byte stream: [{compiled.code.join(", ")}]"""
echo "\nBytecode disassembler output below:\n"
disassembleChunk(compiled, "<stdin>")
echo ""
serializedRaw = serializer.dumpBytes(compiled, input, "<stdin>")
serialized = serializer.loadBytes(serializedRaw)
when debugSerializer:
echo "Serialization step: "
stdout.write("\t")
echo &"""Raw hex output: {serializedRaw.mapIt(toHex(it)).join("").toLowerAscii()}"""
echo ""
echo "Deserialization step:"
echo &"\t- File hash: {serialized.fileHash} (matches: {computeSHA256(input).toHex().toLowerAscii() == serialized.fileHash})"
echo &"\t- Peon version: {serialized.peonVer.major}.{serialized.peonVer.minor}.{serialized.peonVer.patch} (commit {serialized.commitHash[0..8]} on branch {serialized.peonBranch})"
stdout.write("\t")
echo &"""- Compilation date & time: {fromUnix(serialized.compileDate).format("d/M/yyyy HH:mm:ss")}"""
stdout.write(&"\t- Reconstructed constants table: [")
for i, e in serialized.chunk.consts:
stdout.write(e)
if i < len(serialized.chunk.consts) - 1:
stdout.write(", ")
stdout.write("]\n")
stdout.write(&"\t- Reconstructed bytecode: [")
for i, e in serialized.chunk.code:
stdout.write($e)
if i < len(serialized.chunk.code) - 1:
stdout.write(", ")
stdout.write(&"] (matches: {serialized.chunk.code == compiled.code})\n")
except IOError:
break
# TODO: The code for error reporting completely
# breaks down with multiline input, fix it
except LexingError:
let lineNo = tokenizer.getLine()
let relPos = tokenizer.getRelPos(lineNo)
let line = tokenizer.getSource().splitLines()[lineNo - 1].strip()
echo getCurrentExceptionMsg()
echo &"Source line: {line}"
echo " ".repeat(relPos.start + len("Source line: ")) & "^".repeat(relPos.stop - relPos.start)
except ParseError:
let lineNo = parser.getCurrentToken().line
let relPos = tokenizer.getRelPos(lineNo)
let line = tokenizer.getSource().splitLines()[lineNo - 1].strip()
echo getCurrentExceptionMsg()
echo &"Source line: {line}"
echo " ".repeat(relPos.start + len("Source line: ")) & "^".repeat(relPos.stop - parser.getCurrentToken().lexeme.len())
except CompileError:
let lineNo = compiler.getCurrentNode().token.line
let relPos = tokenizer.getRelPos(lineNo)
let line = tokenizer.getSource().splitLines()[lineNo - 1].strip()
echo getCurrentExceptionMsg()
echo &"Source line: {line}"
echo " ".repeat(relPos.start + len("Source line: ")) & "^".repeat(relPos.stop - compiler.getCurrentNode().token.lexeme.len())
quit(0)
proc fillSymbolTable(tokenizer: Lexer) =
## Initializes the Lexer's symbol
## table with the builtin symbols
## and keywords
# 1-byte symbols
tokenizer.symbols.addSymbol("`", Backtick)
tokenizer.symbols.addSymbol("+", Plus)
tokenizer.symbols.addSymbol("-", Minus)
tokenizer.symbols.addSymbol("*", Star)
tokenizer.symbols.addSymbol("/", Slash)
tokenizer.symbols.addSymbol("{", LeftBrace)
tokenizer.symbols.addSymbol("}", RightBrace)
tokenizer.symbols.addSymbol("(", LeftParen)
tokenizer.symbols.addSymbol(")", RightParen)
tokenizer.symbols.addSymbol("[", LeftBracket)
tokenizer.symbols.addSymbol("]", RightBracket)
tokenizer.symbols.addSymbol(".", Dot)
tokenizer.symbols.addSymbol(",", Comma)
tokenizer.symbols.addSymbol(">", TokenType.GreaterThan)
tokenizer.symbols.addSymbol("<", TokenType.LessThan)
tokenizer.symbols.addSymbol(";", Semicolon)
tokenizer.symbols.addSymbol("=", Equal)
tokenizer.symbols.addSymbol("~", Tilde)
tokenizer.symbols.addSymbol("%", Percentage)
tokenizer.symbols.addSymbol(":", Colon)
tokenizer.symbols.addSymbol("&", Ampersand)
tokenizer.symbols.addSymbol("^", Caret)
tokenizer.symbols.addSymbol("|", Pipe)
# 2-byte symbols
tokenizer.symbols.addSymbol("+=", InplaceAdd)
tokenizer.symbols.addSymbol("-=", InplaceSub)
tokenizer.symbols.addSymbol(">=", TokenType.GreaterOrEqual)
tokenizer.symbols.addSymbol("<=", TokenType.LessOrEqual)
tokenizer.symbols.addSymbol("*=", InplaceMul)
tokenizer.symbols.addSymbol("/=", InplaceDiv)
tokenizer.symbols.addSymbol("&=", InplaceAnd)
tokenizer.symbols.addSymbol("!=", NotEqual)
tokenizer.symbols.addSymbol("|=", InplaceOr)
tokenizer.symbols.addSymbol("^=", InplaceXor)
tokenizer.symbols.addSymbol("%=", InplaceMod)
tokenizer.symbols.addSymbol("//", FloorDiv)
tokenizer.symbols.addSymbol("==", DoubleEqual)
tokenizer.symbols.addSymbol("**", DoubleStar)
tokenizer.symbols.addSymbol(">>", RightShift)
tokenizer.symbols.addSymbol("<<", LeftShift)
# 3-byte symbols
tokenizer.symbols.addSymbol("//=", InplaceFloorDiv)
tokenizer.symbols.addSymbol("**=", InplacePow)
tokenizer.symbols.addSymbol(">>=", InplaceRightShift)
tokenizer.symbols.addSymbol("<<=", InplaceLeftShift)
# Keywords
tokenizer.symbols.addKeyword("type", Type)
tokenizer.symbols.addKeyword("enum", Enum)
tokenizer.symbols.addKeyword("case", Case)
tokenizer.symbols.addKeyword("operator", Operator)
tokenizer.symbols.addKeyword("generator", Generator)
tokenizer.symbols.addKeyword("function", Function)
tokenizer.symbols.addKeyword("coroutine", Coroutine)
tokenizer.symbols.addKeyword("break", TokenType.Break)
tokenizer.symbols.addKeyword("continue", Continue)
tokenizer.symbols.addKeyword("while", While)
tokenizer.symbols.addKeyword("for", For)
tokenizer.symbols.addKeyword("foreach", Foreach)
tokenizer.symbols.addKeyword("if", If)
tokenizer.symbols.addKeyword("else", Else)
tokenizer.symbols.addKeyword("await", TokenType.Await)
tokenizer.symbols.addKeyword("defer", Defer)
tokenizer.symbols.addKeyword("try", Try)
tokenizer.symbols.addKeyword("except", Except)
tokenizer.symbols.addKeyword("finally", Finally)
tokenizer.symbols.addKeyword("raise", TokenType.Raise)
tokenizer.symbols.addKeyword("assert", TokenType.Assert)
tokenizer.symbols.addKeyword("const", Const)
tokenizer.symbols.addKeyword("let", Let)
tokenizer.symbols.addKeyword("var", Var)
tokenizer.symbols.addKeyword("import", Import)
tokenizer.symbols.addKeyword("yield", TokenType.Yield)
tokenizer.symbols.addKeyword("return", TokenType.Return)
# These are technically more like expressions
# with a reserved name that produce a value of a
# builtin type, but we don't need to care about
# that until we're in the parsing and compilation
# steps so it's fine
tokenizer.symbols.addKeyword("nan", NotANumber)
tokenizer.symbols.addKeyword("inf", Infinity)
tokenizer.symbols.addKeyword("nil", TokenType.Nil)
tokenizer.symbols.addKeyword("true", TokenType.True)
tokenizer.symbols.addKeyword("false", TokenType.False)
# These are technically operators, but since
# they fit neatly into the definition for an
# identifier/keyword we parse them as such
# and specialize them later
tokenizer.symbols.addKeyword("isnot", IsNot)
tokenizer.symbols.addKeyword("is", Is)
tokenizer.symbols.addKeyword("as", As)
tokenizer.symbols.addKeyword("of", Of)
tokenizer.symbols.addKeyword("and", TokenType.LogicalAnd)
tokenizer.symbols.addKeyword("or", TokenType.LogicalOr)
tokenizer.symbols.addKeyword("not", TokenType.LogicalNot)
# P.S.: There's no reason for the order of addition of
# symbols to be ascending in length (the symbol table uses
# a hashmap internally). You can add/remove symbols (and
# keywords for that matter) as you like!
proc getLineEditor: LineEditor =
result = newLineEditor()
result.prompt = "=> "
result.populateDefaults()
let history = result.plugHistory()
result.bindHistory(history)