peon/src/main.nim

222 lines
8.7 KiB
Nim
Raw Normal View History

2022-04-07 15:06:15 +02:00
# Builtins & external libs
2022-04-05 11:23:59 +02:00
import sequtils
import strformat
2022-04-07 15:06:15 +02:00
import strutils
import jale/editor as ed
2022-04-05 11:23:59 +02:00
import jale/templates
import jale/plugin/defaults
import jale/plugin/editor_history
import jale/keycodes
import jale/multiline
2022-04-07 15:06:15 +02:00
# Our stuff
import frontend/lexer as l
import frontend/parser as p
import frontend/compiler as c
import backend/vm as v
2022-04-07 15:06:15 +02:00
import util/serializer as s
# Forward declarations
proc fillSymbolTable(tokenizer: Lexer)
proc getLineEditor: LineEditor
2022-04-05 11:23:59 +02:00
2022-04-11 14:41:20 +02:00
# Handy dandy compile-time constants
const debugLexer = true
2022-05-16 19:31:17 +02:00
const debugParser = true
const debugCompiler = true
const debugSerializer = true
const debugRuntime = true
when debugSerializer:
import nimSHA2
import times
when debugCompiler:
import util/debugger
2022-04-05 11:23:59 +02:00
when isMainModule:
setControlCHook(proc () {.noconv.} = quit(0))
2022-04-11 14:41:20 +02:00
var
keep = true
tokens: seq[Token] = @[]
tree: seq[Declaration] = @[]
2022-04-11 14:41:20 +02:00
compiled: Chunk
serialized: Serialized
serializedRaw: seq[byte]
tokenizer = newLexer()
parser = newParser()
compiler = newCompiler()
serializer = newSerializer()
vm = newPeonVM()
2022-04-11 14:41:20 +02:00
editor = getLineEditor()
input: string
tokenizer.fillSymbolTable()
editor.bindEvent(jeQuit):
2022-04-05 11:23:59 +02:00
keep = false
editor.bindKey("ctrl+a"):
editor.content.home()
editor.bindKey("ctrl+e"):
editor.content.`end`()
2022-04-05 11:23:59 +02:00
while keep:
try:
input = editor.read()
if input.len() == 0:
continue
# Currently the parser doesn't handle these tokens well
tokens = filter(tokenizer.lex(input, "stdin"), proc (
x: Token): bool = x.kind notin {TokenType.Whitespace, Tab})
if tokens.len() == 0:
continue
when debugLexer:
echo "Tokenization step:"
for i, token in tokens:
if i == tokens.high():
# Who cares about EOF?
break
echo "\t", token
echo ""
tree = parser.parse(tokens, "stdin")
if tree.len() == 0:
continue
when debugParser:
echo "Parsing step:"
for node in tree:
echo "\t", node
echo ""
compiled = compiler.compile(tree, "stdin")
when debugCompiler:
echo "Compilation step:"
stdout.write("\t")
echo &"""Raw byte stream: [{compiled.code.join(", ")}]"""
echo "\nBytecode disassembler output below:\n"
disassembleChunk(compiled, "stdin")
echo ""
serializer.dumpToFile(compiled, input, "stdin", "stdin.pbc")
serializedRaw = serializer.dumpBytes(compiled, input, "stdin")
serialized = serializer.loadFile("stdin.pbc")
when debugSerializer:
echo "Serialization step: "
stdout.write("\t")
echo &"""Raw hex output: {serializedRaw.mapIt(toHex(it)).join("").toLowerAscii()}"""
echo ""
2022-04-07 15:06:15 +02:00
echo &"\t- File hash: {serialized.fileHash} (matches: {computeSHA256(input).toHex().toLowerAscii() == serialized.fileHash})"
echo &"\t- Peon version: {serialized.peonVer.major}.{serialized.peonVer.minor}.{serialized.peonVer.patch} (commit {serialized.commitHash[0..8]} on branch {serialized.peonBranch})"
stdout.write("\t")
echo &"""- Compilation date & time: {fromUnix(serialized.compileDate).format("d/M/yyyy HH:mm:ss")}"""
stdout.write(&"\t- Reconstructed constants table: [")
for i, e in serialized.chunk.consts:
stdout.write(e)
if i < len(serialized.chunk.consts) - 1:
stdout.write(", ")
stdout.write(&"] (matches: {serialized.chunk.consts == compiled.consts})\n")
stdout.write(&"\t- Reconstructed bytecode: [")
for i, e in serialized.chunk.code:
stdout.write($e)
if i < len(serialized.chunk.code) - 1:
stdout.write(", ")
stdout.write(&"] (matches: {serialized.chunk.code == compiled.code})\n")
when debugRuntime:
echo "Execution step: "
vm.run(serialized.chunk)
2022-04-05 11:23:59 +02:00
except IOError:
break
# TODO: The code for error reporting completely
# breaks down with multiline input, fix it
2022-04-05 11:23:59 +02:00
except LexingError:
# let lineNo = tokenizer.getLine()
# let relPos = tokenizer.getRelPos(lineNo)
# let line = tokenizer.getSource().splitLines()[lineNo - 1].strip()
2022-04-05 11:23:59 +02:00
echo getCurrentExceptionMsg()
# echo &"Source line: {line}"
# echo " ".repeat(relPos.start + len("Source line: ")) & "^".repeat(relPos.stop - relPos.start)
2022-04-05 11:23:59 +02:00
except ParseError:
# let lineNo = parser.getCurrentToken().line
# let relPos = tokenizer.getRelPos(lineNo)
# let line = tokenizer.getSource().splitLines()[lineNo - 1].strip()
2022-04-05 11:23:59 +02:00
echo getCurrentExceptionMsg()
# echo &"Source line: {line}"
# echo " ".repeat(relPos.start + len("Source line: ")) & "^".repeat(relPos.stop - parser.getCurrentToken().lexeme.len())
2022-04-12 12:18:25 +02:00
except CompileError:
# let lineNo = compiler.getCurrentNode().token.line
# let relPos = tokenizer.getRelPos(lineNo)
# let line = tokenizer.getSource().splitLines()[lineNo - 1].strip()
2022-04-12 12:18:25 +02:00
echo getCurrentExceptionMsg()
# echo &"Source line: {line}"
# echo " ".repeat(relPos.start + len("Source line: ")) & "^".repeat(relPos.stop - compiler.getCurrentNode().token.lexeme.len())
2022-04-26 16:22:23 +02:00
except SerializationError:
echo getCurrentExceptionMsg()
2022-04-05 11:23:59 +02:00
quit(0)
2022-04-05 00:26:01 +02:00
proc fillSymbolTable(tokenizer: Lexer) =
## Initializes the Lexer's symbol
## table with the builtin symbols
## and keywords
2022-04-05 00:26:01 +02:00
# 1-byte symbols
tokenizer.symbols.addSymbol("{", LeftBrace)
tokenizer.symbols.addSymbol("}", RightBrace)
tokenizer.symbols.addSymbol("(", LeftParen)
tokenizer.symbols.addSymbol(")", RightParen)
tokenizer.symbols.addSymbol("[", LeftBracket)
tokenizer.symbols.addSymbol("]", RightBracket)
tokenizer.symbols.addSymbol(".", Dot)
tokenizer.symbols.addSymbol(",", Comma)
tokenizer.symbols.addSymbol(";", Semicolon)
# Keywords
tokenizer.symbols.addKeyword("type", TokenType.Type)
2022-04-05 00:26:01 +02:00
tokenizer.symbols.addKeyword("enum", Enum)
tokenizer.symbols.addKeyword("case", Case)
tokenizer.symbols.addKeyword("operator", Operator)
tokenizer.symbols.addKeyword("generator", Generator)
tokenizer.symbols.addKeyword("fn", TokenType.Function)
2022-04-05 00:26:01 +02:00
tokenizer.symbols.addKeyword("coroutine", Coroutine)
tokenizer.symbols.addKeyword("break", TokenType.Break)
2022-04-05 00:26:01 +02:00
tokenizer.symbols.addKeyword("continue", Continue)
tokenizer.symbols.addKeyword("while", While)
tokenizer.symbols.addKeyword("for", For)
tokenizer.symbols.addKeyword("foreach", Foreach)
tokenizer.symbols.addKeyword("if", If)
tokenizer.symbols.addKeyword("else", Else)
tokenizer.symbols.addKeyword("await", TokenType.Await)
2022-04-05 00:26:01 +02:00
tokenizer.symbols.addKeyword("defer", Defer)
tokenizer.symbols.addKeyword("try", Try)
tokenizer.symbols.addKeyword("except", Except)
tokenizer.symbols.addKeyword("finally", Finally)
tokenizer.symbols.addKeyword("raise", TokenType.Raise)
tokenizer.symbols.addKeyword("assert", TokenType.Assert)
2022-04-05 00:26:01 +02:00
tokenizer.symbols.addKeyword("const", Const)
tokenizer.symbols.addKeyword("let", Let)
tokenizer.symbols.addKeyword("var", Var)
tokenizer.symbols.addKeyword("import", Import)
tokenizer.symbols.addKeyword("yield", TokenType.Yield)
tokenizer.symbols.addKeyword("return", TokenType.Return)
# These are more like expressions with a reserved
# name that produce a value of a builtin type,
# but we don't need to care about that until
# we're in the parsing/ compilation steps so
# it's fine
2022-04-05 00:26:01 +02:00
tokenizer.symbols.addKeyword("nan", NotANumber)
tokenizer.symbols.addKeyword("inf", Infinity)
tokenizer.symbols.addKeyword("nil", TokenType.Nil)
tokenizer.symbols.addKeyword("true", True)
tokenizer.symbols.addKeyword("false", False)
tokenizer.symbols.addKeyword("ref", Ref)
tokenizer.symbols.addKeyword("ptr", Ptr)
for sym in [">", "<", "=", "~", "/", "+", "-", "_", "*", "?", "@", ":"]:
tokenizer.symbols.addSymbol(sym, Symbol)
2022-04-05 00:26:01 +02:00
proc getLineEditor: LineEditor =
result = newLineEditor()
result.prompt = "=> "
2022-04-11 14:41:20 +02:00
result.populateDefaults()
let history = result.plugHistory()
result.bindHistory(history)