270 lines
12 KiB
Nim
270 lines
12 KiB
Nim
# Builtins & external libs
|
|
import sequtils
|
|
import strformat
|
|
import strutils
|
|
import nimSHA2
|
|
import times
|
|
import jale/editor as ed
|
|
import jale/templates
|
|
import jale/plugin/defaults
|
|
import jale/plugin/editor_history
|
|
import jale/keycodes
|
|
import jale/multiline
|
|
|
|
|
|
# Our stuff
|
|
import frontend/lexer as l
|
|
import frontend/parser as p
|
|
import frontend/compiler as c
|
|
import frontend/optimizer as o
|
|
import util/serializer as s
|
|
import util/debugger
|
|
|
|
|
|
# Forward declarations
|
|
proc fillSymbolTable(tokenizer: Lexer)
|
|
proc getLineEditor: LineEditor
|
|
|
|
# Handy dandy compile-time constants
|
|
const debugLexer = true
|
|
const debugParser = true
|
|
const debugCompiler = true
|
|
const debugOptimizer = true
|
|
const debugSerializer = true
|
|
|
|
|
|
when isMainModule:
|
|
setControlCHook(proc () {.noconv.} = quit(0))
|
|
var
|
|
keep = true
|
|
tokens: seq[Token] = @[]
|
|
tree: seq[ASTNode] = @[]
|
|
compiled: Chunk
|
|
optimized: tuple[tree: seq[ASTNode], warnings: seq[Warning]]
|
|
serialized: Serialized
|
|
serializedRaw: seq[byte]
|
|
tokenizer = newLexer()
|
|
parser = newParser()
|
|
optimizer = newOptimizer()
|
|
compiler = newCompiler()
|
|
serializer = newSerializer()
|
|
editor = getLineEditor()
|
|
input: string
|
|
tokenizer.fillSymbolTable()
|
|
editor.bindEvent(jeQuit):
|
|
keep = false
|
|
editor.bindKey("ctrl+a"):
|
|
editor.content.home()
|
|
editor.bindKey("ctrl+e"):
|
|
editor.content.`end`()
|
|
while keep:
|
|
try:
|
|
input = editor.read()
|
|
if input.len() > 0:
|
|
# Currently the parser doesn't handle these tokens well
|
|
tokens = filter(tokenizer.lex(input, "<stdin>"), proc (x: Token): bool = x.kind notin {TokenType.Whitespace, Tab})
|
|
if tokens.len() > 0:
|
|
when debugLexer:
|
|
echo "Tokenization step:"
|
|
for i, token in tokens:
|
|
if i == tokens.high():
|
|
# Who cares about EOF?
|
|
break
|
|
echo "\t", token
|
|
echo ""
|
|
tree = parser.parse(tokens, "<stdin>")
|
|
when debugParser:
|
|
echo "Parsing step:"
|
|
for node in tree:
|
|
echo "\t", node
|
|
echo ""
|
|
optimized = optimizer.optimize(tree)
|
|
when debugOptimizer:
|
|
echo &"Optimization step (constant folding enabled: {optimizer.foldConstants}):"
|
|
for node in optimized.tree:
|
|
echo "\t", node
|
|
echo ""
|
|
stdout.write(&"Produced warnings: ")
|
|
if optimized.warnings.len() > 0:
|
|
echo ""
|
|
for warning in optimized.warnings:
|
|
echo "\t", warning
|
|
else:
|
|
stdout.write("No warnings produced\n")
|
|
echo ""
|
|
compiled = compiler.compile(optimized.tree, "<stdin>")
|
|
when debugCompiler:
|
|
echo "Compilation step:"
|
|
stdout.write("\t")
|
|
echo &"""Raw byte stream: [{compiled.code.join(", ")}]"""
|
|
echo "\nBytecode disassembler output below:\n"
|
|
disassembleChunk(compiled, "<stdin>")
|
|
echo ""
|
|
serializedRaw = serializer.dumpBytes(compiled, input, "<stdin>")
|
|
serialized = serializer.loadBytes(serializedRaw)
|
|
when debugSerializer:
|
|
echo "Serialization step: "
|
|
stdout.write("\t")
|
|
echo &"""Raw hex output: {serializedRaw.mapIt(toHex(it)).join("").toLowerAscii()}"""
|
|
echo ""
|
|
|
|
echo "Deserialization step:"
|
|
echo &"\t- File hash: {serialized.fileHash} (matches: {computeSHA256(input).toHex().toLowerAscii() == serialized.fileHash})"
|
|
echo &"\t- Peon version: {serialized.peonVer.major}.{serialized.peonVer.minor}.{serialized.peonVer.patch} (commit {serialized.commitHash[0..8]} on branch {serialized.peonBranch})"
|
|
stdout.write("\t")
|
|
echo &"""- Compilation date & time: {fromUnix(serialized.compileDate).format("d/M/yyyy HH:mm:ss")}"""
|
|
stdout.write(&"\t- Reconstructed constants table: [")
|
|
for i, e in serialized.chunk.consts:
|
|
stdout.write(e)
|
|
if i < len(serialized.chunk.consts) - 1:
|
|
stdout.write(", ")
|
|
stdout.write("]\n")
|
|
stdout.write(&"\t- Reconstructed bytecode: [")
|
|
for i, e in serialized.chunk.code:
|
|
stdout.write($e)
|
|
if i < len(serialized.chunk.code) - 1:
|
|
stdout.write(", ")
|
|
stdout.write(&"] (matches: {serialized.chunk.code == compiled.code})\n")
|
|
except IOError:
|
|
break
|
|
# TODO: The code for error reporting completely
|
|
# breaks down with multiline input, fix it
|
|
except LexingError:
|
|
let lineNo = tokenizer.getLine()
|
|
let relPos = tokenizer.getRelPos(lineNo)
|
|
let line = tokenizer.getSource().splitLines()[lineNo - 1].strip()
|
|
echo getCurrentExceptionMsg()
|
|
echo &"Source line: {line}"
|
|
echo " ".repeat(relPos.start + len("Source line: ")) & "^".repeat(relPos.stop - relPos.start)
|
|
except ParseError:
|
|
let lineNo = parser.getCurrentToken().line
|
|
let relPos = tokenizer.getRelPos(lineNo)
|
|
let line = tokenizer.getSource().splitLines()[lineNo - 1].strip()
|
|
echo getCurrentExceptionMsg()
|
|
echo &"Source line: {line}"
|
|
echo " ".repeat(relPos.start + len("Source line: ")) & "^".repeat(relPos.stop - parser.getCurrentToken().lexeme.len())
|
|
except CompileError:
|
|
let lineNo = compiler.getCurrentNode().token.line
|
|
let relPos = tokenizer.getRelPos(lineNo)
|
|
let line = tokenizer.getSource().splitLines()[lineNo - 1].strip()
|
|
echo getCurrentExceptionMsg()
|
|
echo &"Source line: {line}"
|
|
echo " ".repeat(relPos.start + len("Source line: ")) & "^".repeat(relPos.stop - compiler.getCurrentNode().token.lexeme.len())
|
|
|
|
quit(0)
|
|
|
|
|
|
|
|
proc fillSymbolTable(tokenizer: Lexer) =
|
|
## Initializes the Lexer's symbol
|
|
## table with the builtin symbols
|
|
## and keywords
|
|
|
|
# 1-byte symbols
|
|
tokenizer.symbols.addSymbol("`", Backtick)
|
|
tokenizer.symbols.addSymbol("+", Plus)
|
|
tokenizer.symbols.addSymbol("-", Minus)
|
|
tokenizer.symbols.addSymbol("*", Star)
|
|
tokenizer.symbols.addSymbol("/", Slash)
|
|
tokenizer.symbols.addSymbol("{", LeftBrace)
|
|
tokenizer.symbols.addSymbol("}", RightBrace)
|
|
tokenizer.symbols.addSymbol("(", LeftParen)
|
|
tokenizer.symbols.addSymbol(")", RightParen)
|
|
tokenizer.symbols.addSymbol("[", LeftBracket)
|
|
tokenizer.symbols.addSymbol("]", RightBracket)
|
|
tokenizer.symbols.addSymbol(".", Dot)
|
|
tokenizer.symbols.addSymbol(",", Comma)
|
|
tokenizer.symbols.addSymbol(">", TokenType.GreaterThan)
|
|
tokenizer.symbols.addSymbol("<", TokenType.LessThan)
|
|
tokenizer.symbols.addSymbol(";", Semicolon)
|
|
tokenizer.symbols.addSymbol("=", Equal)
|
|
tokenizer.symbols.addSymbol("~", Tilde)
|
|
tokenizer.symbols.addSymbol("%", Percentage)
|
|
tokenizer.symbols.addSymbol(":", Colon)
|
|
tokenizer.symbols.addSymbol("&", Ampersand)
|
|
tokenizer.symbols.addSymbol("^", Caret)
|
|
tokenizer.symbols.addSymbol("|", Pipe)
|
|
# 2-byte symbols
|
|
tokenizer.symbols.addSymbol("+=", InplaceAdd)
|
|
tokenizer.symbols.addSymbol("-=", InplaceSub)
|
|
tokenizer.symbols.addSymbol(">=", TokenType.GreaterOrEqual)
|
|
tokenizer.symbols.addSymbol("<=", TokenType.LessOrEqual)
|
|
tokenizer.symbols.addSymbol("*=", InplaceMul)
|
|
tokenizer.symbols.addSymbol("/=", InplaceDiv)
|
|
tokenizer.symbols.addSymbol("&=", InplaceAnd)
|
|
tokenizer.symbols.addSymbol("!=", NotEqual)
|
|
tokenizer.symbols.addSymbol("|=", InplaceOr)
|
|
tokenizer.symbols.addSymbol("^=", InplaceXor)
|
|
tokenizer.symbols.addSymbol("%=", InplaceMod)
|
|
tokenizer.symbols.addSymbol("//", FloorDiv)
|
|
tokenizer.symbols.addSymbol("==", DoubleEqual)
|
|
tokenizer.symbols.addSymbol("**", DoubleStar)
|
|
tokenizer.symbols.addSymbol(">>", RightShift)
|
|
tokenizer.symbols.addSymbol("<<", LeftShift)
|
|
# 3-byte symbols
|
|
tokenizer.symbols.addSymbol("//=", InplaceFloorDiv)
|
|
tokenizer.symbols.addSymbol("**=", InplacePow)
|
|
tokenizer.symbols.addSymbol(">>=", InplaceRightShift)
|
|
tokenizer.symbols.addSymbol("<<=", InplaceLeftShift)
|
|
# Keywords
|
|
tokenizer.symbols.addKeyword("type", Type)
|
|
tokenizer.symbols.addKeyword("enum", Enum)
|
|
tokenizer.symbols.addKeyword("case", Case)
|
|
tokenizer.symbols.addKeyword("operator", Operator)
|
|
tokenizer.symbols.addKeyword("generator", Generator)
|
|
tokenizer.symbols.addKeyword("function", Function)
|
|
tokenizer.symbols.addKeyword("coroutine", Coroutine)
|
|
tokenizer.symbols.addKeyword("break", TokenType.Break)
|
|
tokenizer.symbols.addKeyword("continue", Continue)
|
|
tokenizer.symbols.addKeyword("while", While)
|
|
tokenizer.symbols.addKeyword("for", For)
|
|
tokenizer.symbols.addKeyword("foreach", Foreach)
|
|
tokenizer.symbols.addKeyword("if", If)
|
|
tokenizer.symbols.addKeyword("else", Else)
|
|
tokenizer.symbols.addKeyword("await", TokenType.Await)
|
|
tokenizer.symbols.addKeyword("defer", Defer)
|
|
tokenizer.symbols.addKeyword("try", Try)
|
|
tokenizer.symbols.addKeyword("except", Except)
|
|
tokenizer.symbols.addKeyword("finally", Finally)
|
|
tokenizer.symbols.addKeyword("raise", TokenType.Raise)
|
|
tokenizer.symbols.addKeyword("assert", TokenType.Assert)
|
|
tokenizer.symbols.addKeyword("const", Const)
|
|
tokenizer.symbols.addKeyword("let", Let)
|
|
tokenizer.symbols.addKeyword("var", Var)
|
|
tokenizer.symbols.addKeyword("import", Import)
|
|
tokenizer.symbols.addKeyword("yield", TokenType.Yield)
|
|
tokenizer.symbols.addKeyword("return", TokenType.Return)
|
|
# These are technically more like expressions
|
|
# with a reserved name that produce a value of a
|
|
# builtin type, but we don't need to care about
|
|
# that until we're in the parsing and compilation
|
|
# steps so it's fine
|
|
tokenizer.symbols.addKeyword("nan", NotANumber)
|
|
tokenizer.symbols.addKeyword("inf", Infinity)
|
|
tokenizer.symbols.addKeyword("nil", TokenType.Nil)
|
|
tokenizer.symbols.addKeyword("true", TokenType.True)
|
|
tokenizer.symbols.addKeyword("false", TokenType.False)
|
|
# These are technically operators, but since
|
|
# they fit neatly into the definition for an
|
|
# identifier/keyword we parse them as such
|
|
# and specialize them later
|
|
tokenizer.symbols.addKeyword("isnot", IsNot)
|
|
tokenizer.symbols.addKeyword("is", Is)
|
|
tokenizer.symbols.addKeyword("as", As)
|
|
tokenizer.symbols.addKeyword("of", Of)
|
|
tokenizer.symbols.addKeyword("and", TokenType.LogicalAnd)
|
|
tokenizer.symbols.addKeyword("or", TokenType.LogicalOr)
|
|
tokenizer.symbols.addKeyword("not", TokenType.LogicalNot)
|
|
|
|
# P.S.: There's no reason for the order of addition of
|
|
# symbols to be ascending in length (the symbol table uses
|
|
# a hashmap internally). You can add/remove symbols (and
|
|
# keywords for that matter) as you like!
|
|
|
|
|
|
proc getLineEditor: LineEditor =
|
|
result = newLineEditor()
|
|
result.prompt = "=> "
|
|
result.populateDefaults()
|
|
let history = result.plugHistory()
|
|
result.bindHistory(history) |