compiler rewrite start

updated readme accordingly
created node.nim in compv2
changed nds.nim - setup for debugging the parser for now
This commit is contained in:
prod2 2022-12-02 15:45:34 +01:00
parent 88cc276100
commit e6a48ccee6
12 changed files with 285 additions and 248 deletions

View File

@ -1,21 +1,10 @@
# nondescript
Nondescript started as a nim implementation of clox (see https://craftinginterpreters.com) but has acquired large deviations.
Nondescript is a toy programming language. Currently its compiler is being rewritten, so please check the latest commit from Feb 2022 to see a working version in action.
## Deviations from lox
# Some of nondescript's features
- closures, gc not implemented yet
- classes will not be implemented
- Everything is an expression, except variable declarations and break statements
- hence expressions can contain statements, so the stack is tracked inside the compiler, using compiler.stackCount
- uses different local representation than lox inside the compiler
- compiler.addLocal also has a delta argument - check it out if interested
- 1 file = 1 chunk
- function objects are just a pointer to an instruction to jump to
- constant indexes, local indexes have 2 bytes as arguments - no 256 limit on locals/constants
- Almost everything is an expression
- block expressions can be labelled (@label) and the break statement takes a label to break out of
- set block expression results using :label
- set the return value of functions using :result
@ -23,6 +12,8 @@ Nondescript started as a nim implementation of clox (see https://craftinginterpr
- tables (example: @{ "hello" = "world"})
- length operator #
- ampersand operator to chain binary operators to finished expressions
- just a little syntax sugar (see sugar.nds in tests)
- and some more (an introduction to nondescript coming soon...)
## Examples
@ -34,8 +25,8 @@ See the following folders in the source tree, look for the extension .nds:
## Building
Requirements:
- nim (1.6.2 tested)
- c compiler (gcc, glibc tested)
- nim (1.6.8 tested) and nimble (recommended way to install is choosenim)
- c compiler (gcc tested)
The 4 steps to a REPL:
```

View File

@ -1,52 +0,0 @@
import vm
import compiler
import os
import config
type Result = enum
rsOK, rsCompileError, rsRuntimeError
proc interpret(name: string, source: string): Result =
let compiler = newCompiler(name, source)
compiler.compile()
if compiler.hadError:
return rsCompileError
let vm = newVM(compiler.chunk)
case vm.run():
of irOK:
rsOK
of irRuntimeError:
rsRuntimeError
proc repl =
while true:
try:
let line = konLineEditor()
if line.len > 0:
discard interpret("repl", line)
except ReadlineInterruptedException:
break
proc runFile(path: string) =
case interpret(path, readFile(path)):
of rsCompileError:
quit 65
of rsRuntimeError:
quit 70
of rsOK:
quit 0
const hardcodedPath* = ""
if paramCount() == 0:
if hardcodedPath == "":
repl()
else:
runFile(hardcodedPath)
elif paramCount() == 1:
runFile(paramStr(1))
else:
echo "Maximum param count is 1"
quit 1

View File

@ -1,8 +1,12 @@
import ndspkg/vm
import ndspkg/compiler/compiler
import ndspkg/compiler/types
import ndspkg/compv2/parser
import ndspkg/compv2/node
import ndspkg/config
when compilerChoice == cmOne:
import ndspkg/compiler/compiler
import ndspkg/compiler/types
import os
import strformat
@ -10,16 +14,21 @@ type Result = enum
rsOK, rsCompileError, rsRuntimeError
proc interpret(name: string, source: string): Result =
let compiler = newCompiler(name, source)
compiler.compile()
if compiler.hadError:
return rsCompileError
case compiler.chunk.run():
of irOK:
rsOK
of irRuntimeError:
rsRuntimeError
when compilerChoice == cmAst:
let parser = newParser(name, source)
let node = parser.parse()
echo $node
elif compilerChoice == cmOne:
let compiler = newCompiler(name, source)
compiler.compile()
if compiler.hadError:
return rsCompileError
case compiler.chunk.run():
of irOK:
rsOK
of irRuntimeError:
rsRuntimeError
proc repl =
while true:

View File

@ -1,22 +0,0 @@
import ../scanner
import ../chunk
import ../config
import parser/parser
import emitter/emitter
proc compile*(source: string): CompileResult =
result = CompileResult(ok: false)
let scanner = newScanner(source)
let parser = newParser(scanner)
let nodeRoot = parser.parse()
if parser.hadError:
return result
let emitter = newEmitter(nodeRoot)
let chunk = emitter.emit()
if emitter.hadError:
return result
when debugDumpChunk:
if not emitter.hadError:
chunk.disassembleChunk()
return CompileResult(ok: true, chunk: chunk)

144
src/ndspkg/compv2/node.nim Normal file
View File

@ -0,0 +1,144 @@
import ../types/value
import strformat
import strutils
type
NodeKind* = enum
nkBlockExpr, nkExprStmt, nkBreak,
nkConst, nkList, nkTable, nkGetIndex, nkSetIndex, nkLen,
nkIf, nkWhile, nkOr, nkAnd, nkNegate, nkNot,
nkPlus, nkMinus, nkMult, nkDiv, nkEq, nkNeq, nkLess, nkGreater,
nkGe, nkLe,
nkCall, nkVarDecl, nkProc,
nkVarGet, nkVarSet
Node* = ref object
case kind*: NodeKind:
of nkBlockExpr:
children*: seq[Node]
labels*: seq[string]
of nkExprStmt: # only when there is a ; does it compile to expr stmt; expr used for parentheses, &
expression*: Node
of nkBreak:
label*: string
of nkConst:
constant*: NdValue
of nkList:
elems*: seq[Node]
of nkTable:
keys*: seq[Node]
values*: seq[Node]
of nkGetIndex:
gCollection*: Node
gIndex*: Node
of nkSetIndex:
sCollection*: Node
sIndex*: Node
sValue*: Node
of nkLen, nkNegate, nkNot: # unary ops
argument*: Node
of nkIf:
ifCondition*: Node
ifBody*: Node
elseBody*: Node
of nkWhile:
whileCondition*: Node
whileBody*: Node
of nkAnd, nkOr, nkPlus, nkMinus, nkMult, nkDiv, nkEq, nkNeq, nkLess, nkGreater, nkGe, nkLe: # binary ops
left*: Node
right*: Node
of nkCall:
function*: Node
arguments*: seq[Node]
of nkVarDecl:
name*: string
value*: Node # can be nil
of nkProc:
parameters*: seq[string]
procBody*: Node
of nkVarGet:
gVarName*: string
of nkVarSet:
sVarName*: string
newVal*: Node
proc `$`*(node: Node): string =
case node.kind:
of nkAnd:
result = &"(and {node.left} {node.right})"
of nkBlockExpr:
let labels = node.labels.join(", ")
result = &"(block labels: {labels} elements: "
for ch in node.children:
result &= $ch & ", "
result[^1] = ')'
of nkBreak:
result = &"(break {node.label})"
of nkCall:
result = &"(call {node.function} "
for ch in node.arguments:
result &= $ch & ", "
result[^1] = ')'
of nkConst:
result = &"(const {node.constant})"
of nkDiv:
result = &"(/ {node.left} {node.right})"
of nkEq:
result = &"(== {node.left} {node.right})"
of nkExprStmt:
result = &"(exprStmt {node.expression})"
of nkGe:
result = &"(ge {node.left} {node.right})"
of nkGetIndex:
result = &"({node.gCollection}[{node.gIndex}])"
of nkGreater:
result = &"(greater {node.left} {node.right})"
of nkIf:
result = &"(if {node.ifCondition}: {node.ifBody} else: {node.elseBody})"
of nkLe:
result = &"(le {node.left} {node.right})"
of nkLen:
result = &"(# {node.argument})"
of nkLess:
result = &"(less {node.left} {node.right})"
of nkList:
result = &"(list "
for ch in node.elems:
result &= &"{ch}, "
result[^1] = ')'
of nkMinus:
result = &"(- {node.left} {node.right})"
of nkMult:
result = &"(* {node.left} {node.right})"
of nkNegate:
result = &"(neg {node.argument})"
of nkNeq:
result = &"(!= {node.left} {node.right})"
of nkNot:
result = &"(! {node.argument})"
of nkOr:
result = &"(or {node.left} {node.right})"
of nkPlus:
result = &"(+ {node.left} {node.right})"
of nkProc:
let params = node.parameters.join(", ")
result = &"(proc params: {params} body: {node.procBody})"
of nkSetIndex:
result = &"({node.sCollection}[{node.sIndex}] = {node.sValue})"
of nkTable:
var keys = ""
var values = ""
for i in 0..node.keys.high:
keys &= &"{node.keys[i]}, "
values &= &"{node.values[i]}, "
result = &"(table keys: {keys}, values: {values})"
of nkVarDecl:
result = &"(varDecl {node.name} = {node.value})"
of nkVarGet:
result = &"(varGet {node.gVarName})"
of nkVarSet:
result = &"(varSet {node.sVarName} = {node.newVal})"
of nkWhile:
result = &"(while {node.whileCondition}: {node.whileBody})"

View File

@ -0,0 +1,106 @@
# a new recursive descent parser for nds
# parser: converts a stream of tokens into an AST
import ../scanner
import ../chunk
import node
import ../config
import strformat
import sequtils
import sugar
# TYPEDEF
type
Parser = ref object
# scanning
scanner: Scanner
source: string
current: Token
previous: Token
# errors
hadError*: bool
panicMode: bool
proc newParser*(name: string, source: string): Parser =
result = new(Parser)
result.source = source
result.hadError = false
result.panicMode = false
# UTILS
# error handling
proc errorAt(parser: Parser, line: int, msg: string, at: string = "") =
if parser.panicMode:
return # don't display errors if already in panic mode
write stderr, &"[line {line}] Error "
if at.len > 0:
write stderr, &"at {at} "
write stderr, msg
write stderr, "\n"
parser.hadError = true
parser.panicMode = true
proc error(parser: Parser, msg: string) =
parser.errorAt(parser.previous.line, msg)
proc errorAtCurrent(parser: Parser, msg: string) =
parser.errorAt(parser.current.line, msg)
# scanning for tokens
proc advance(parser: Parser) =
parser.previous = parser.current
while true:
parser.current = parser.scanner.scanToken()
when debugScanner:
parser.current.debugPrint()
if (parser.current.tokenType != tkError):
break
parser.errorAtCurrent(parser.current.text)
proc match(parser: Parser, tokenType: TokenType): bool =
if parser.current.tokenType == tokenType:
parser.advance()
true
else:
false
proc consume(parser: Parser, tokenType: TokenType, msg: string) =
if not parser.match(tokenType):
parser.errorAtCurrent(msg)
proc synchronize(parser: Parser) =
parser.panicMode = false
while parser.current.tokenType != tkEof:
if parser.previous.tokenType in {tkSemicolon, tkRightBrace}:
return
if parser.current.tokenType in {tkFunct, tkVar, tkFor, tkIf, tkWhile}:
return
parser.advance()
# EXPRESSIONS
# STATEMENTS
proc statement(parser: Parser): Node
proc statement(parser: Parser): Node =
if parser.panicMode:
parser.synchronize()
proc parse*(parser: Parser): Node =
parser.scanner = newScanner(parser.source)
var result: Node = Node(kind: nkBlockExpr, children: @[])
parser.advance()
while parser.current.tokenType != tkEof:
result.children.add(parser.statement())

View File

@ -1,14 +0,0 @@
import ../types
import ../../scanner
import utils
import statements
proc newParser*(sc: Scanner): Parser =
result.new()
result.scanner = sc
proc parse*(par: Parser): Node =
result = Node(kind: nkRoot)
while par.current.tokenType != tkEof:
result.children.add(par.statement())

View File

@ -1,45 +0,0 @@
import ../types
import utils
# below are the expressions that can contain statements in some way
# the only expressions that can contain a statement are:
# the block expression
proc statement*(par: Parser)
proc parseBlock(par: Parser): Node =
## Despite the name, can be used for statements if the arg statement is true
## Also can be used for function bodies
result = Node(kind: nkBlockExpr)
while par.current.tokenType != tkRightBrace and par.current.tokenType != tkEof:
result.children.add(par.statement())
par.consume(tkRightBrace, "Expect '}' after block.")
# statements
proc breakStatement(par: Parser): Node =
if not par.match(tkLabel):
par.error("Label expected after break.")
let label = comp.previous.text[1..^1]
result = Node(kind: nkBreak, label: label)
par.consume(tkSemicolon, "Semicolon expected after break statement.")
if par.current.tokenType != tkRightBrace:
par.error("Break statement must be the last element inside the innermost block it is in.")
proc statement*(par: Parser): Node =
if par.match(tkVar):
result = par.varStatement()
par.consume(tkSemicolon, "Semicolon expected after expression statement.")
elif par.match(tkDef):
result = par.procStatement()
par.consume(tkSemicolon, "Semicolon expected after procedure declaration.")
elif par.match(tkBreak):
result = par.breakStatement()
else:
result = Node(kind: nkExprStmt, expression: par.expression())
par.consume(tkSemicolon, "Semicolon expected after expression statement.")
if par.panicMode:
par.synchronize()

View File

@ -1,52 +0,0 @@
import ../types
import ../../config
proc errorAt*(par: Parser, line: int, msg: string, at: string = "") =
if par.panicMode:
return
write stderr, &"[line {line}] Error "
if at.len > 0:
write stderr, &"at {at} "
write stderr, msg
write stderr, "\n"
par.hadError = true
par.panicMode = true
proc error*(par: Parser, msg: string) =
## create a simple error message
par.errorAt(par.previous.line, msg, par.previous.text)
proc errorAtCurrent*(par: Parser, msg: string, supress: bool = false) =
par.errorAt(par.current.line, msg, if supress: "" else: par.current.text)
proc advance*(par: Parser) =
par.previous = par.current
while true:
par.current = par.scanner.scanToken()
when debugScanner:
par.current.debugPrint()
if (par.current.tokenType != tkError):
break
par.errorAtCurrent(par.current.text, true)
proc match*(par: Parser, tokenType: TokenType): bool =
if par.current.tokenType == tokenType:
par.advance()
true
else:
false
proc consume*(par: Parser, tokenType: TokenType, msg: string) =
if par.current.tokenType == tokenType:
par.advance()
else:
par.errorAtCurrent(msg)
proc synchronize*(par: Parser) =
par.panicMode = false
while par.current.tokenType != tkEof:
if par.previous.tokenType in {tkSemicolon, tkRightBrace}:
return
if par.current.tokenType in {tkFunct, tkVar, tkFor, tkIf, tkWhile}:
return
par.advance()

View File

@ -1,35 +0,0 @@
import ../scanner
import ../chunk
import ../types/value
type
Parser* = ref object
scanner*: Scanner
current*: Token
previous*: Token
hadError*: bool
panicMode*: bool
NodeKind* = enum
nkBlockExpr, nkRoot,
nkExprStmt,
nkBreak,
nkConst
Node* = ref object
case kind*: NodeKind:
of nkBlockExpr, nkRoot:
children*: seq[Node]
of nkExprStmt:
expression*: Node
of nkBreak:
label*: string
of nkConst:
constant*: NdValue
CompileResult* = object
case ok*: bool:
of true:
chunk*: Chunk
of false:
discard

View File

@ -15,6 +15,13 @@ const boundsChecks* = defined(debug) or defined(release)
const profileInstructions* = defined(ndsprofile) # if true, the time spent on every opcode is measured
const debugClosures* = defined(debug) # specific closure debug switches
type compMode* = enum
cmOne, cmAst
const compilerChoice* = cmAst
# choose a compiler: cmOne - version 1, deprecated
# cmAst - version 2, but only use parser and print AST produced
# cmOne will be removed once compv2 is done
# choose a line editor for the repl
const lineEditor = leRdstdin