From e6a48ccee6f7b91b357d2e95f6050ecd32dc8f5f Mon Sep 17 00:00:00 2001 From: prod2 <95874442+prod2@users.noreply.github.com> Date: Fri, 2 Dec 2022 15:45:34 +0100 Subject: [PATCH] compiler rewrite start updated readme accordingly created node.nim in compv2 changed nds.nim - setup for debugging the parser for now --- README.md | 23 ++-- main.nim | 52 --------- src/nds.nim | 33 +++--- src/ndspkg/compv2/compiler.nim | 22 ---- src/ndspkg/compv2/emitter/emitter.nim | 0 src/ndspkg/compv2/node.nim | 144 +++++++++++++++++++++++++ src/ndspkg/compv2/parser.nim | 106 ++++++++++++++++++ src/ndspkg/compv2/parser/parser.nim | 14 --- src/ndspkg/compv2/parser/statement.nim | 45 -------- src/ndspkg/compv2/parser/utils.nim | 52 --------- src/ndspkg/compv2/types.nim | 35 ------ src/ndspkg/config.nim | 7 ++ 12 files changed, 285 insertions(+), 248 deletions(-) delete mode 100644 main.nim delete mode 100644 src/ndspkg/compv2/compiler.nim delete mode 100644 src/ndspkg/compv2/emitter/emitter.nim create mode 100644 src/ndspkg/compv2/node.nim create mode 100644 src/ndspkg/compv2/parser.nim delete mode 100644 src/ndspkg/compv2/parser/parser.nim delete mode 100644 src/ndspkg/compv2/parser/statement.nim delete mode 100644 src/ndspkg/compv2/parser/utils.nim delete mode 100644 src/ndspkg/compv2/types.nim diff --git a/README.md b/README.md index 7a7acc7..bcd5d36 100644 --- a/README.md +++ b/README.md @@ -1,21 +1,10 @@ # nondescript -Nondescript started as a nim implementation of clox (see https://craftinginterpreters.com) but has acquired large deviations. +Nondescript is a toy programming language. Currently its compiler is being rewritten, so please check the latest commit from Feb 2022 to see a working version in action. -## Deviations from lox +# Some of nondescript's features -- closures, gc not implemented yet -- classes will not be implemented - - - -- Everything is an expression, except variable declarations and break statements - - hence expressions can contain statements, so the stack is tracked inside the compiler, using compiler.stackCount - - uses different local representation than lox inside the compiler - - compiler.addLocal also has a delta argument - check it out if interested -- 1 file = 1 chunk - - function objects are just a pointer to an instruction to jump to -- constant indexes, local indexes have 2 bytes as arguments - no 256 limit on locals/constants +- Almost everything is an expression - block expressions can be labelled (@label) and the break statement takes a label to break out of - set block expression results using :label - set the return value of functions using :result @@ -23,6 +12,8 @@ Nondescript started as a nim implementation of clox (see https://craftinginterpr - tables (example: @{ "hello" = "world"}) - length operator # - ampersand operator to chain binary operators to finished expressions +- just a little syntax sugar (see sugar.nds in tests) +- and some more (an introduction to nondescript coming soon...) ## Examples @@ -34,8 +25,8 @@ See the following folders in the source tree, look for the extension .nds: ## Building Requirements: -- nim (1.6.2 tested) -- c compiler (gcc, glibc tested) +- nim (1.6.8 tested) and nimble (recommended way to install is choosenim) +- c compiler (gcc tested) The 4 steps to a REPL: ``` diff --git a/main.nim b/main.nim deleted file mode 100644 index 2f2b992..0000000 --- a/main.nim +++ /dev/null @@ -1,52 +0,0 @@ -import vm -import compiler -import os -import config - -type Result = enum - rsOK, rsCompileError, rsRuntimeError - -proc interpret(name: string, source: string): Result = - let compiler = newCompiler(name, source) - compiler.compile() - if compiler.hadError: - return rsCompileError - let vm = newVM(compiler.chunk) - case vm.run(): - of irOK: - rsOK - of irRuntimeError: - rsRuntimeError - - -proc repl = - while true: - try: - let line = konLineEditor() - if line.len > 0: - discard interpret("repl", line) - except ReadlineInterruptedException: - break - -proc runFile(path: string) = - case interpret(path, readFile(path)): - of rsCompileError: - quit 65 - of rsRuntimeError: - quit 70 - of rsOK: - quit 0 - -const hardcodedPath* = "" - -if paramCount() == 0: - if hardcodedPath == "": - repl() - else: - runFile(hardcodedPath) -elif paramCount() == 1: - runFile(paramStr(1)) -else: - echo "Maximum param count is 1" - quit 1 - diff --git a/src/nds.nim b/src/nds.nim index 3fff2c4..7fddc42 100644 --- a/src/nds.nim +++ b/src/nds.nim @@ -1,8 +1,12 @@ import ndspkg/vm -import ndspkg/compiler/compiler -import ndspkg/compiler/types +import ndspkg/compv2/parser +import ndspkg/compv2/node import ndspkg/config +when compilerChoice == cmOne: + import ndspkg/compiler/compiler + import ndspkg/compiler/types + import os import strformat @@ -10,16 +14,21 @@ type Result = enum rsOK, rsCompileError, rsRuntimeError proc interpret(name: string, source: string): Result = - let compiler = newCompiler(name, source) - compiler.compile() - if compiler.hadError: - return rsCompileError - case compiler.chunk.run(): - of irOK: - rsOK - of irRuntimeError: - rsRuntimeError - + when compilerChoice == cmAst: + let parser = newParser(name, source) + let node = parser.parse() + echo $node + elif compilerChoice == cmOne: + let compiler = newCompiler(name, source) + compiler.compile() + if compiler.hadError: + return rsCompileError + case compiler.chunk.run(): + of irOK: + rsOK + of irRuntimeError: + rsRuntimeError + proc repl = while true: diff --git a/src/ndspkg/compv2/compiler.nim b/src/ndspkg/compv2/compiler.nim deleted file mode 100644 index ac92460..0000000 --- a/src/ndspkg/compv2/compiler.nim +++ /dev/null @@ -1,22 +0,0 @@ -import ../scanner -import ../chunk -import ../config - -import parser/parser -import emitter/emitter - -proc compile*(source: string): CompileResult = - result = CompileResult(ok: false) - let scanner = newScanner(source) - let parser = newParser(scanner) - let nodeRoot = parser.parse() - if parser.hadError: - return result - let emitter = newEmitter(nodeRoot) - let chunk = emitter.emit() - if emitter.hadError: - return result - when debugDumpChunk: - if not emitter.hadError: - chunk.disassembleChunk() - return CompileResult(ok: true, chunk: chunk) \ No newline at end of file diff --git a/src/ndspkg/compv2/emitter/emitter.nim b/src/ndspkg/compv2/emitter/emitter.nim deleted file mode 100644 index e69de29..0000000 diff --git a/src/ndspkg/compv2/node.nim b/src/ndspkg/compv2/node.nim new file mode 100644 index 0000000..75e2f06 --- /dev/null +++ b/src/ndspkg/compv2/node.nim @@ -0,0 +1,144 @@ + +import ../types/value + +import strformat +import strutils + +type + NodeKind* = enum + nkBlockExpr, nkExprStmt, nkBreak, + nkConst, nkList, nkTable, nkGetIndex, nkSetIndex, nkLen, + nkIf, nkWhile, nkOr, nkAnd, nkNegate, nkNot, + nkPlus, nkMinus, nkMult, nkDiv, nkEq, nkNeq, nkLess, nkGreater, + nkGe, nkLe, + nkCall, nkVarDecl, nkProc, + nkVarGet, nkVarSet + + Node* = ref object + case kind*: NodeKind: + of nkBlockExpr: + children*: seq[Node] + labels*: seq[string] + of nkExprStmt: # only when there is a ; does it compile to expr stmt; expr used for parentheses, & + expression*: Node + of nkBreak: + label*: string + of nkConst: + constant*: NdValue + of nkList: + elems*: seq[Node] + of nkTable: + keys*: seq[Node] + values*: seq[Node] + of nkGetIndex: + gCollection*: Node + gIndex*: Node + of nkSetIndex: + sCollection*: Node + sIndex*: Node + sValue*: Node + of nkLen, nkNegate, nkNot: # unary ops + argument*: Node + of nkIf: + ifCondition*: Node + ifBody*: Node + elseBody*: Node + of nkWhile: + whileCondition*: Node + whileBody*: Node + of nkAnd, nkOr, nkPlus, nkMinus, nkMult, nkDiv, nkEq, nkNeq, nkLess, nkGreater, nkGe, nkLe: # binary ops + left*: Node + right*: Node + of nkCall: + function*: Node + arguments*: seq[Node] + of nkVarDecl: + name*: string + value*: Node # can be nil + of nkProc: + parameters*: seq[string] + procBody*: Node + of nkVarGet: + gVarName*: string + of nkVarSet: + sVarName*: string + newVal*: Node + +proc `$`*(node: Node): string = + case node.kind: + of nkAnd: + result = &"(and {node.left} {node.right})" + of nkBlockExpr: + let labels = node.labels.join(", ") + result = &"(block labels: {labels} elements: " + for ch in node.children: + result &= $ch & ", " + result[^1] = ')' + of nkBreak: + result = &"(break {node.label})" + of nkCall: + result = &"(call {node.function} " + for ch in node.arguments: + result &= $ch & ", " + result[^1] = ')' + of nkConst: + result = &"(const {node.constant})" + of nkDiv: + result = &"(/ {node.left} {node.right})" + of nkEq: + result = &"(== {node.left} {node.right})" + of nkExprStmt: + result = &"(exprStmt {node.expression})" + of nkGe: + result = &"(ge {node.left} {node.right})" + of nkGetIndex: + result = &"({node.gCollection}[{node.gIndex}])" + of nkGreater: + result = &"(greater {node.left} {node.right})" + of nkIf: + result = &"(if {node.ifCondition}: {node.ifBody} else: {node.elseBody})" + of nkLe: + result = &"(le {node.left} {node.right})" + of nkLen: + result = &"(# {node.argument})" + of nkLess: + result = &"(less {node.left} {node.right})" + of nkList: + result = &"(list " + for ch in node.elems: + result &= &"{ch}, " + result[^1] = ')' + of nkMinus: + result = &"(- {node.left} {node.right})" + of nkMult: + result = &"(* {node.left} {node.right})" + of nkNegate: + result = &"(neg {node.argument})" + of nkNeq: + result = &"(!= {node.left} {node.right})" + of nkNot: + result = &"(! {node.argument})" + of nkOr: + result = &"(or {node.left} {node.right})" + of nkPlus: + result = &"(+ {node.left} {node.right})" + of nkProc: + let params = node.parameters.join(", ") + result = &"(proc params: {params} body: {node.procBody})" + of nkSetIndex: + result = &"({node.sCollection}[{node.sIndex}] = {node.sValue})" + of nkTable: + var keys = "" + var values = "" + for i in 0..node.keys.high: + keys &= &"{node.keys[i]}, " + values &= &"{node.values[i]}, " + result = &"(table keys: {keys}, values: {values})" + of nkVarDecl: + result = &"(varDecl {node.name} = {node.value})" + of nkVarGet: + result = &"(varGet {node.gVarName})" + of nkVarSet: + result = &"(varSet {node.sVarName} = {node.newVal})" + of nkWhile: + result = &"(while {node.whileCondition}: {node.whileBody})" diff --git a/src/ndspkg/compv2/parser.nim b/src/ndspkg/compv2/parser.nim new file mode 100644 index 0000000..e86da52 --- /dev/null +++ b/src/ndspkg/compv2/parser.nim @@ -0,0 +1,106 @@ +# a new recursive descent parser for nds +# parser: converts a stream of tokens into an AST + +import ../scanner +import ../chunk +import node +import ../config + +import strformat +import sequtils +import sugar + +# TYPEDEF + +type + Parser = ref object + # scanning + scanner: Scanner + source: string + current: Token + previous: Token + + # errors + hadError*: bool + panicMode: bool + +proc newParser*(name: string, source: string): Parser = + result = new(Parser) + result.source = source + result.hadError = false + result.panicMode = false + +# UTILS + + +# error handling + +proc errorAt(parser: Parser, line: int, msg: string, at: string = "") = + if parser.panicMode: + return # don't display errors if already in panic mode + write stderr, &"[line {line}] Error " + if at.len > 0: + write stderr, &"at {at} " + write stderr, msg + write stderr, "\n" + parser.hadError = true + parser.panicMode = true + +proc error(parser: Parser, msg: string) = + parser.errorAt(parser.previous.line, msg) + +proc errorAtCurrent(parser: Parser, msg: string) = + parser.errorAt(parser.current.line, msg) + +# scanning for tokens + +proc advance(parser: Parser) = + parser.previous = parser.current + while true: + parser.current = parser.scanner.scanToken() + when debugScanner: + parser.current.debugPrint() + if (parser.current.tokenType != tkError): + break + parser.errorAtCurrent(parser.current.text) + +proc match(parser: Parser, tokenType: TokenType): bool = + if parser.current.tokenType == tokenType: + parser.advance() + true + else: + false + +proc consume(parser: Parser, tokenType: TokenType, msg: string) = + if not parser.match(tokenType): + parser.errorAtCurrent(msg) + +proc synchronize(parser: Parser) = + parser.panicMode = false + while parser.current.tokenType != tkEof: + if parser.previous.tokenType in {tkSemicolon, tkRightBrace}: + return + if parser.current.tokenType in {tkFunct, tkVar, tkFor, tkIf, tkWhile}: + return + parser.advance() + +# EXPRESSIONS + +# STATEMENTS + +proc statement(parser: Parser): Node + + + +proc statement(parser: Parser): Node = + if parser.panicMode: + parser.synchronize() + +proc parse*(parser: Parser): Node = + parser.scanner = newScanner(parser.source) + var result: Node = Node(kind: nkBlockExpr, children: @[]) + + parser.advance() + while parser.current.tokenType != tkEof: + result.children.add(parser.statement()) + diff --git a/src/ndspkg/compv2/parser/parser.nim b/src/ndspkg/compv2/parser/parser.nim deleted file mode 100644 index 0abed99..0000000 --- a/src/ndspkg/compv2/parser/parser.nim +++ /dev/null @@ -1,14 +0,0 @@ -import ../types -import ../../scanner - -import utils -import statements - -proc newParser*(sc: Scanner): Parser = - result.new() - result.scanner = sc - -proc parse*(par: Parser): Node = - result = Node(kind: nkRoot) - while par.current.tokenType != tkEof: - result.children.add(par.statement()) \ No newline at end of file diff --git a/src/ndspkg/compv2/parser/statement.nim b/src/ndspkg/compv2/parser/statement.nim deleted file mode 100644 index 1f7185a..0000000 --- a/src/ndspkg/compv2/parser/statement.nim +++ /dev/null @@ -1,45 +0,0 @@ -import ../types -import utils - -# below are the expressions that can contain statements in some way -# the only expressions that can contain a statement are: - # the block expression -proc statement*(par: Parser) - -proc parseBlock(par: Parser): Node = - ## Despite the name, can be used for statements if the arg statement is true - ## Also can be used for function bodies - result = Node(kind: nkBlockExpr) - while par.current.tokenType != tkRightBrace and par.current.tokenType != tkEof: - result.children.add(par.statement()) - - par.consume(tkRightBrace, "Expect '}' after block.") - -# statements - -proc breakStatement(par: Parser): Node = - if not par.match(tkLabel): - par.error("Label expected after break.") - - let label = comp.previous.text[1..^1] - result = Node(kind: nkBreak, label: label) - - par.consume(tkSemicolon, "Semicolon expected after break statement.") - if par.current.tokenType != tkRightBrace: - par.error("Break statement must be the last element inside the innermost block it is in.") - -proc statement*(par: Parser): Node = - if par.match(tkVar): - result = par.varStatement() - par.consume(tkSemicolon, "Semicolon expected after expression statement.") - elif par.match(tkDef): - result = par.procStatement() - par.consume(tkSemicolon, "Semicolon expected after procedure declaration.") - elif par.match(tkBreak): - result = par.breakStatement() - else: - result = Node(kind: nkExprStmt, expression: par.expression()) - par.consume(tkSemicolon, "Semicolon expected after expression statement.") - - if par.panicMode: - par.synchronize() \ No newline at end of file diff --git a/src/ndspkg/compv2/parser/utils.nim b/src/ndspkg/compv2/parser/utils.nim deleted file mode 100644 index 3566d1d..0000000 --- a/src/ndspkg/compv2/parser/utils.nim +++ /dev/null @@ -1,52 +0,0 @@ -import ../types -import ../../config - -proc errorAt*(par: Parser, line: int, msg: string, at: string = "") = - if par.panicMode: - return - write stderr, &"[line {line}] Error " - if at.len > 0: - write stderr, &"at {at} " - write stderr, msg - write stderr, "\n" - par.hadError = true - par.panicMode = true - -proc error*(par: Parser, msg: string) = - ## create a simple error message - par.errorAt(par.previous.line, msg, par.previous.text) - -proc errorAtCurrent*(par: Parser, msg: string, supress: bool = false) = - par.errorAt(par.current.line, msg, if supress: "" else: par.current.text) - -proc advance*(par: Parser) = - par.previous = par.current - while true: - par.current = par.scanner.scanToken() - when debugScanner: - par.current.debugPrint() - if (par.current.tokenType != tkError): - break - par.errorAtCurrent(par.current.text, true) - -proc match*(par: Parser, tokenType: TokenType): bool = - if par.current.tokenType == tokenType: - par.advance() - true - else: - false - -proc consume*(par: Parser, tokenType: TokenType, msg: string) = - if par.current.tokenType == tokenType: - par.advance() - else: - par.errorAtCurrent(msg) - -proc synchronize*(par: Parser) = - par.panicMode = false - while par.current.tokenType != tkEof: - if par.previous.tokenType in {tkSemicolon, tkRightBrace}: - return - if par.current.tokenType in {tkFunct, tkVar, tkFor, tkIf, tkWhile}: - return - par.advance() diff --git a/src/ndspkg/compv2/types.nim b/src/ndspkg/compv2/types.nim deleted file mode 100644 index 330554d..0000000 --- a/src/ndspkg/compv2/types.nim +++ /dev/null @@ -1,35 +0,0 @@ -import ../scanner -import ../chunk -import ../types/value - -type - Parser* = ref object - scanner*: Scanner - current*: Token - previous*: Token - hadError*: bool - panicMode*: bool - - NodeKind* = enum - nkBlockExpr, nkRoot, - nkExprStmt, - nkBreak, - nkConst - - Node* = ref object - case kind*: NodeKind: - of nkBlockExpr, nkRoot: - children*: seq[Node] - of nkExprStmt: - expression*: Node - of nkBreak: - label*: string - of nkConst: - constant*: NdValue - - CompileResult* = object - case ok*: bool: - of true: - chunk*: Chunk - of false: - discard diff --git a/src/ndspkg/config.nim b/src/ndspkg/config.nim index c5e74e9..dbdb0be 100644 --- a/src/ndspkg/config.nim +++ b/src/ndspkg/config.nim @@ -15,6 +15,13 @@ const boundsChecks* = defined(debug) or defined(release) const profileInstructions* = defined(ndsprofile) # if true, the time spent on every opcode is measured const debugClosures* = defined(debug) # specific closure debug switches +type compMode* = enum + cmOne, cmAst +const compilerChoice* = cmAst +# choose a compiler: cmOne - version 1, deprecated +# cmAst - version 2, but only use parser and print AST produced +# cmOne will be removed once compv2 is done + # choose a line editor for the repl const lineEditor = leRdstdin