import strformat import strutils import sugar import scanner import chunk import types/value import bitops # needed for value import config type Local = object name: string # name of this local index: int # what is its index in the stack (0 is the stack bottom - nil in the main function) depth: int # depth of this local # if depth is -1, the variable cannot be referenced yet # its depth will be set once its first ever value is determined Scope = ref object labels: seq[string] #depth: int goalStackIndex: int # the stack count it started with plus 1 jumps: seq[int] # jumps to be patched that jump to the end function: bool # if true, it is a function Compiler = ref object # # input # scanner: Scanner source: string # # state # previous: Token current: Token canAssign: bool locals: seq[Local] scopes: seq[Scope] stackIndex: int # how large the stack is # when there's an error both are set # panic mode can be turned off e.g. at block boundaries panicMode: bool # # output # chunk*: Chunk hadError*: bool Precedence = enum pcNone, pcExprTop, pcAmpersand, pcAssignment, pcNonAssignTop, pcOr, pcAnd, pcEquality, pcComparison, pcTerm, pcFactor, pcUnary, pcIndex, pcCall, pcPrimary # pcUnary applies to all prefix operators regardless of this enum's value # changing pcUnary's position can change the priority of all unary ops # # Note: unary only rules should have precedence pcNone!!! # pcExprTop, pcNonAssignTop are special placeholders! ParseRule = object name: string # debug purposes only prefix: (Compiler) -> void infix: (Compiler) -> void prec: Precedence # only relevant to infix, prefix always has pcUnary proc newScope(comp: Compiler, function: bool): Scope = result.new() #result.depth = comp.scopes.len + 1 result.function = function result.goalStackIndex = comp.stackIndex + 1 comp.scopes.add(result) # HELPERS FOR THE COMPILER TYPE proc newCompiler*(name: string, source: string): Compiler = result = new(Compiler) result.chunk = initChunk(name) result.source = source result.hadError = false result.panicMode = false result.canAssign = true result.locals = @[] result.scopes = @[] proc errorAt(comp: Compiler, line: int, msg: string, at: string = "") = if comp.panicMode: return write stderr, &"[line {line}] Error " if at.len > 0: write stderr, &"at {at} " write stderr, msg write stderr, "\n" comp.hadError = true comp.panicMode = true proc error(comp: Compiler, msg: string) = ## create a simple error message comp.errorAt(comp.previous.line, msg) proc errorAtCurrent(comp: Compiler, msg: string) = comp.errorAt(comp.current.line, msg) proc advance(comp: Compiler) = comp.previous = comp.current while true: comp.current = comp.scanner.scanToken() when debugScanner: comp.current.debugPrint() if (comp.current.tokenType != tkError): break comp.errorAtCurrent(comp.current.text) proc match(comp: Compiler, tokenType: TokenType): bool = if comp.current.tokenType == tokenType: comp.advance() true else: false proc consume(comp: Compiler, tokenType: TokenType, msg: string) = if comp.current.tokenType == tokenType: comp.advance() else: comp.errorAtCurrent(msg) proc synchronize(comp: Compiler) = comp.panicMode = false while comp.current.tokenType != tkEof: if comp.previous.tokenType in {tkSemicolon, tkRightBrace}: return if comp.current.tokenType in {tkFunct, tkVar, tkFor, tkIf, tkWhile}: return comp.advance() proc writeChunk(comp: Compiler, dStackIndex: int, ch: OpCode | DoubleUint8 | uint8) = comp.stackIndex += dStackIndex when debugCompiler: debugEcho &"new stackindex: {comp.stackIndex}, delta: {dStackIndex} due to {ch.repr}" comp.chunk.writeChunk(ch, comp.previous.line) proc writePops(comp: Compiler, count: int) = if count > argMax: comp.error("Too many local variables in block.") if count == 0: return when debugCompiler: debugEcho &"Emitting {count}xPop." if count == 1: comp.writeChunk(-1, opPop) elif count < shortArgMax: comp.writeChunk(-count, opPopSA) comp.writeChunk(0, count.uint8) else: comp.writeChunk(-count, opPopA) comp.writeChunk(0, count.toDU8()) proc writeConstant(comp: Compiler, constant: NdValue) = comp.stackIndex.inc let index = comp.chunk.writeConstant(constant, comp.previous.line) if index >= argMax: comp.error("Too many constants in one chunk.") proc addLocal(comp: Compiler, name: string, delta: int) = if comp.locals.len >= argMax: comp.error("Too many local variables in function.") # if delta is 0 or negative, it means that it is already on the stack when addLocal is called # if delta is positive, the first ever value of the local is to the right comp.locals.add(Local(name: name, depth: if delta > 0: -1 else: comp.scopes.high, index: comp.stackIndex + delta)) proc markInitialized(comp: Compiler) = comp.locals[comp.locals.high].depth = comp.scopes.high # PARSE RULE/PRECEDENCE MISC proc nop(comp: Compiler) = discard var rules: array[TokenType, ParseRule] template genRule(ttype: TokenType, tprefix: (Compiler) -> void, tinfix: (Compiler) -> void, tprec: Precedence) = if tprec == pcUnary: raise newException(Exception, "pcUnary cannot be used as a rule precedence! Use pcNone for unary-only rules!") elif tprec == pcPrimary: raise newException(Exception, "Invalid rule: pcPrimary cannot be used for binary operators, if this rule is for a primary value, use pcNone!") elif tprec in {pcNonAssignTop, pcExprTop}: raise newException(Exception, "Invalid rule: a top pc is just a placeholder") elif tprec == pcNone and tinfix != nop: raise newException(Exception, "Invalid rule: pcNone only allowed for unary operators and primary values, not for infix ones!") rules[ttype] = ParseRule(name: $ttype, prefix: tprefix, infix: tinfix, prec: tprec) for i in TokenType: genRule(i, nop, nop, pcNone) proc getRule(opType: TokenType): ParseRule = rules[opType] proc applyRule(rule: ParseRule): Precedence = # returns the rule's precedence rule.prec proc increment(prec: Precedence): Precedence = # increases precedence by one if prec == pcPrimary: raise newException(Exception, "Invalid ruletable, pcPrimary precedence increment attempted.") else: Precedence(int(prec) + 1) proc `<=`(a, b: Precedence): bool = int(a) <= int(b) # JUMP HELPERS proc emitJump(comp: Compiler, delta: int, op: OpCode): int = # delta -> 0 if the jump does not pop # delta -> -1 if the jump pops the condition from the stack comp.writeChunk(delta, op) comp.writeChunk(0, 0xffffff.toDU8) comp.chunk.len - argSize proc patchJump(comp: Compiler, offset: int) = let jump = (comp.chunk.len - offset - argSize) if (jump > argMax): comp.error("Too much code to jump over.") let jumpt = jump.toDU8 comp.chunk.code[offset] = jumpt[0] comp.chunk.code[offset + 1] = jumpt[1] proc emitLoop(comp: Compiler, loopstart: int, delta: int, op: OpCode) = comp.writeChunk(delta, op) let offset = comp.chunk.len - loopstart + argSize if offset > argMax: comp.error("Loop body too large.") comp.writeChunk(0, offset.toDU8) # SCOPE HELPERS proc beginScope(comp: Compiler, function: bool = false) = let scope = comp.newScope(function) when debugCompiler: debugEcho &"Begin scope called for depth {comp.scopes.len} function? {function}" if function: scope.labels.add("result") scope.labels.add("function") else: while comp.match(tkLabel): let label = comp.previous.text[1..^1] scope.labels.add(label) if function: # if it's a function scope, the frame will move # access to outside locals is also limited to upvalues and closures comp.stackIndex = 0 else: # only put the opNil if it's not a function scope, since # function scopes are initialized by the caller comp.writeChunk(1, opNil) for label in scope.labels: comp.addLocal(&":{label}", delta = 0) proc restore(comp: Compiler, scope: Scope) = let delta = comp.stackIndex - scope.goalStackIndex comp.writePops(delta) when assertionsCompiler: if not comp.stackIndex == scope.goalStackIndex: comp.error("Assertion failed in restore") when debugCompiler: debugEcho &"Restored scope: delta {delta}" proc restoreInFunct(comp: Compiler, scope: Scope) = let pops = comp.stackIndex comp.writePops(pops) comp.stackIndex = scope.goalStackIndex when debugCompiler: debugEcho &"Restored function scope: delta {pops}; new stackindex: {comp.stackIndex}" proc jumpToEnd(comp: Compiler, scope: Scope) = ## Jumps to the end of scope, does not affect stackIndex var delta: int if scope.function: delta = comp.stackIndex else: delta = comp.stackIndex - scope.goalStackIndex comp.writePops(delta) let jmp = comp.emitJump(delta, opJump) scope.jumps.add(jmp) proc endScope(comp: Compiler) = # remove locals let popped = comp.scopes.pop() let function = popped.function when debugCompiler: debugEcho &"End scope called for depth {comp.scopes.len} function? {function}" if function: comp.restoreInFunct(popped) else: comp.restore(popped) # patch jumps to after the scope (such jumps from breaks emit the pops before jumping) for jump in popped.jumps: comp.patchJump(jump) if function: comp.writeChunk(0, opReturn) # EXPRESSIONS proc parsePrecedence(comp: Compiler, prec: Precedence) = comp.advance() let rule = comp.previous.tokenType.getRule() if rule.prefix != nop: comp.canAssign = prec <= pcAssignment when debugCompiler: debugEcho &"parsePrecedence call, valid prefix op found, rule used: {rule.name}, precedence: {prec}" rule.prefix(comp) while prec <= comp.current.tokenType.getRule().prec: comp.advance() # checked for isSome in the loop # since advance moves current to previous if comp.previous.tokenType.getRule().infix == nop: # should never happen, as having a precedence set # means that it is a binary op comp.error("Invalid rule table.") return else: let infixRule = comp.previous.tokenType.getRule().infix infixRule(comp) else: comp.error(&"Expect expression, got {($comp.previous.tokenType)[2..^1]}.") proc expression(comp: Compiler) = ## The lowest precedence among the pratt-parsed expressions # DO NOT ADD ANYTHING HERE # only use the pratt table for parsing expressions! when assertionsVM: let oldStackIndex = comp.stackIndex comp.parsePrecedence(pcExprTop) when assertionsVM: let diff = comp.stackIndex - oldStackIndex if diff != 1: comp.error(&"Assertion failed: expression increased ({oldStackIndex} -> {comp.stackIndex}) the stack index by {diff} (should be 1).") proc number(comp: Compiler) = # assume the number is already advanced through let value = comp.previous.text.parseFloat.fromFloat() comp.writeConstant(value) when debugCompiler: debugEcho &"Written constant (type: {value.ndType}, str repr: {$value}) to chunk" tkNumber.genRule(number, nop, pcNone) proc expFalse(comp: Compiler) = comp.writeChunk(1, opFalse) tkFalse.genRule(expFalse, nop, pcNone) proc expTrue(comp: Compiler) = comp.writeChunk(1, opTrue) tkTrue.genRule(expTrue, nop, pcNone) proc expNil(comp: Compiler) = comp.writeChunk(1, opNil) tkNil.genRule(expNil, nop, pcNone) proc expString(comp: Compiler) = let value = comp.previous.text[1..^2].fromNimString() comp.writeConstant(value) when debugCompiler: debugEcho &"Written constant (type: {value.ndType}, str repr: {$value}) to chunk" tkString.genRule(expString, nop, pcNone) proc resolveLocal(comp: Compiler, name: string): int = ## returns the stack index of the local of the name var i = comp.locals.high while i >= 0: let local = comp.locals[i] if local.name == name: if local.depth == -1: comp.error("Can't read local variable in its own initializer.") return local.index i.dec return -1 proc variable(comp: Compiler) = # named variable var getOp = opGetGlobal var setOp = opSetGlobal let name = comp.previous.text # try resolving local, set arg to the index on the stack var arg = comp.resolveLocal(name) if arg != -1: # local getOp = opGetLocal setOp = opSetLocal else: # global arg = comp.chunk.addConstant(name.fromNimString()) if comp.match(tkEqual): # assignment (global/local) if not comp.canAssign: comp.error("Invalid assignment target.") return comp.parsePrecedence(pcAssignment) comp.writeChunk(0, setOp) else: # get (global/local) comp.writeChunk(1, getOp) comp.writeChunk(0, arg.toDU8) tkIdentifier.genRule(variable, nop, pcNone) proc grouping(comp: Compiler) = # assume initial '(' is already consumed comp.expression() comp.consume(tkRightParen, "Expect ')' after expression.") proc parseCall(comp: Compiler) = # ( consumed # create the call env # current stack before opCall: # ... # opCall converts it to this # ... var argcount = 0 # put args on stack while comp.current.tokenType notin {tkRightParen, tkEof}: comp.expression() inc argcount if comp.current.tokenType != tkRightParen: comp.consume(tkComma, "Expected ',' between arguments in function calls.") comp.consume(tkRightParen, "Expected ')' after arguments in function calls.") # emit call comp.writeChunk(-argcount, opCall) comp.writeChunk(0, argcount.uint8) tkLeftParen.genRule(grouping, parseCall, pcCall) proc unary(comp: Compiler) = let opType = comp.previous.tokenType comp.parsePrecedence(pcUnary) case opType: of tkMinus: comp.writeChunk(0, opNegate) of tkBang: comp.writeChunk(0, opNot) of tkInt: comp.writeChunk(0, opInt) of tkChr: comp.writeChunk(0, opChr) of tkPutch: comp.writeChunk(0, opPutchar) else: discard # unreachable tkBang.genRule(unary, nop, pcNone) tkInt.genRule(unary, nop, pcNone) tkChr.genRule(unary, nop, pcNone) tkPutch.genRule(unary, nop, pcNone) proc binary(comp: Compiler) = let opType = comp.previous.tokenType # safety checked in parsePrecedence let rule = opType.getRule() comp.parsePrecedence(rule.applyRule.increment) case opType: of tkPlus: comp.writeChunk(-1, opAdd) of tkMinus: comp.writeChunk(-1, opSubtract) of tkStar: comp.writeChunk(-1, opMultiply) of tkSlash: comp.writeChunk(-1, opDivide) of tkEqualEqual: comp.writeChunk(-1, opEqual) of tkBangEqual: comp.writeChunk(-1, opEqual) comp.writeChunk(0, opNot) of tkGreater: comp.writeChunk(-1, opGreater) of tkLess: comp.writeChunk(-1, opLess) of tkGreaterEqual: comp.writeChunk(-1, opLess) comp.writeChunk(0, opNot) of tkLessEqual: comp.writeChunk(-1, opGreater) comp.writeChunk(0, opNot) else: return # unreachable tkMinus.genRule(unary, binary, pcTerm) tkPlus.genRule(nop, binary, pcTerm) tkSlash.genRule(nop, binary, pcFactor) tkStar.genRule(nop, binary, pcFactor) tkEqualEqual.genRule(nop, binary, pcEquality) tkBangEqual.genRule(nop, binary, pcEquality) tkGreater.genRule(nop, binary, pcComparison) tkGreaterEqual.genRule(nop, binary, pcComparison) tkLess.genRule(nop, binary, pcComparison) tkLessEqual.genRule(nop, binary, pcComparison) proc ifExpr(comp: Compiler) = # if expressions return the body if condition is truthy, # the else expression otherwise, unless there is no else: # if there is no else, it returns the condition if it is falsey comp.consume(tkLeftParen, "Expect '(' after 'if'.") comp.expression() comp.consume(tkRightParen, "Expect ')' after condition.") let thenJump = comp.emitJump(0, opJumpIfFalse) # conditional code that can be jumped over must leave the stack in tact! comp.writeChunk(-1, opPop) comp.expression() # net change to stack: -1 + 1 = 0 let elseJump = comp.emitJump(0, opJump) comp.patchJump(thenJump) if comp.match(tkElse): comp.writeChunk(-1, opPop) comp.expression() comp.patchJump(elseJump) tkIf.genRule(ifExpr, nop, pcNone) proc andExpr(comp: Compiler) = let endJump = comp.emitJump(0, opJumpIfFalse) comp.writeChunk(-1, opPop) comp.parsePrecedence(pcAnd) # net effect on stack: -1 + 1 = 0 comp.patchJump(endJump) tkAnd.genRule(nop, andExpr, pcAnd) proc orExpr(comp: Compiler) = let elseJump = comp.emitJump(0, opJumpIfFalse) let endJump = comp.emitJump(0, opJump) comp.patchJump(elseJump) comp.writeChunk(-1, opPop) comp.parsePrecedence(pcOr) # net effect on stack: -1 + 1 = 0 comp.patchJump(endJump) tkOr.genRule(nop, orExpr, pcOr) proc debugExpr(comp: Compiler) = comp.expression() when debugCompiler: debugEcho &"debug expression, current stackindex: {comp.stackIndex}" comp.writeChunk(0, opPrint) tkPrint.genRule(debugExpr, nop, pcNone) proc parseWhile(comp: Compiler) = comp.writeChunk(1, opNil) # return value let loopStart = comp.chunk.len comp.consume(tkLeftParen, "Expect '(' after 'while'.") # condition comp.expression() comp.consume(tkRightParen, "Expect ')' after condition.") let exitJump = comp.emitJump(-1, opJumpIfFalsePop) # this cannot be handled with just opPop, since the net change in the # stack size inside code that is conditional must be 0! # body comp.writeChunk(-1, opPop) # pop the old return value comp.expression() # net stack change: 1 + -1 = 0 comp.emitLoop(loopstart = loopStart, delta = 0, op = opLoop) comp.patchJump(exitJump) tkWhile.genRule(parseWhile, nop, pcNone) proc parseFunct(comp: Compiler) = # jump over let jumpOverBody = comp.emitJump(1, opFunctionDef) comp.consume(tkLeftParen, "Expected '(' after keyword 'funct'.") var params: seq[string] # parameters while comp.current.tokenType == tkIdentifier: comp.advance() params.add(comp.previous.text) if comp.current.tokenType == tkRightParen: break comp.consume(tkComma, "Expected ',' to separate items in the parameter list.") comp.consume(tkRightParen, "Expected ')' after parameter list.") # function body: let functII = comp.chunk.len comp.beginScope(function = true) # this saves the old stackindex, sets it to 0, :function and :result at index 0 # assumption: # the caller will create the following stack for the function to run in: # [0] = return value placeholder # [1] = arg #1 # [2] = arg #2 # [3] = arg #3 if params.len > shortArgMax: comp.error("Too many parameters.") comp.writeChunk(0, opCheckArity) # runtime arity check comp.writeChunk(0, params.len.uint8) for i in countup(1, params.len): comp.stackIndex = i comp.addLocal(params[i-1], 0) comp.expression() when assertionsCompiler: let shouldbeStackIndex = params.len + 1 if shouldbeStackIndex != comp.stackIndex: comp.error(&"Assertion failed: wrong stackindex ({comp.stackIndex}) in function declaration (should be {shouldbeStackIndex}).") comp.endScope() dec comp.stackIndex # the previous end scope did not put anything on the stack, it is jumped over # end of function declaration: comp.patchJump(jumpOverBody) tkFunct.genRule(parseFunct, nop, pcNone) # lists proc parseList(comp: Compiler) = var count: int while comp.current.tokenType != tkRightBracket: comp.expression() count.inc() if comp.current.tokenType != tkRightBracket or comp.current.tokenType == tkComma: comp.consume(tkComma, "Comma expected after list member.") comp.consume(tkRightBracket, "Right bracket expected after list members.") if count > argMax: comp.error("Maximum list length exceeded.") comp.writeChunk(1 - count, opCreateList) comp.writeChunk(0, count.toDU8()) tkStartList.genRule(parseList, nop, pcNone) # tables proc parseTable(comp: Compiler) = var count: int while comp.current.tokenType != tkRightBrace: comp.expression() comp.consume(tkEqual, "Equal sign expected after key.") comp.expression() count.inc() if comp.current.tokenType != tkRightBrace or comp.current.tokenType == tkComma: comp.consume(tkComma, "Comma expected after key-value pair.") comp.consume(tkRightBrace, "Right brace expected after list members.") if count > argMax: comp.error("Maximum table length exceeded.") comp.writeChunk(1 - 2 * count, opCreateTable) comp.writeChunk(0, count.toDU8()) tkStartTable.genRule(parseTable, nop, pcNone) # len op proc parseLen(comp: Compiler) = comp.expression() comp.writeChunk(0, opLen) tkHashtag.genRule(parseLen, nop, pcNone) # get/set index proc parseIndex(comp: Compiler) = # the index comp.expression() comp.consume(tkRightBracket, "Right bracket expected after index.") if comp.match(tkEqual): comp.parsePrecedence(pcNonAssignTop) comp.writeChunk(-2, opSetIndex) else: comp.writeChunk(-1, opGetIndex) tkLeftBracket.genRule(nop, parseIndex, pcIndex) proc parseAmpersand(comp: Compiler) = # just a simple expression separator discard tkAmpersand.genRule(nop, parseAmpersand, pcAmpersand) # below are the expressions that can contain statements in some way # the only expressions that can contain a statement are: # the block expression proc statement(comp: Compiler) proc parseBlock(comp: Compiler) = ## Despite the name, can be used for statements if the arg statement is true ## Also can be used for function bodies comp.beginScope() while comp.current.tokenType != tkRightBrace and comp.current.tokenType != tkEof: comp.statement() comp.endScope() comp.consume(tkRightBrace, "Expect '}' after block.") tkLeftBrace.genRule(parseBlock, nop, pcNone) # statements proc parseVariable(comp: Compiler, msg: string): int = ## Parses variable declarations ## During manipulation with variables: ## if global: ## consume the identifier and return index to work with ## if local: ## register the name with the vm comp.consume(tkIdentifier, msg) let name = comp.previous.text if name[0] in {':'}: comp.error("Illegal variable name.") if comp.scopes.len > 0: # declareVariable # local # check if name exists already within scope for i in countdown(comp.locals.high, 0): let local = comp.locals[i] if local.depth != -1 and local.depth < comp.scopes.len: break if name == local.name: comp.error("Already a variable with this name in this scope.") break comp.addLocal(name, 1) 0 # index to the constant is irrelevant if the var is local else: # global comp.chunk.addConstant(name.fromNimString()) proc defineVariable(comp: Compiler, index: int) = ## Generate code that moves the variable on the stack ## to a variable at the right place in memory ## the right place is defined by the 3 following byte after the op ## the thing to move is the item below it if comp.scopes.len > 0: # local variable: it's already on the right place # but we need to mark initialized comp.markInitialized() else: comp.writeChunk(-1, opDefineGlobal) comp.writeChunk(0, index.toDU8) proc varStatement(comp: Compiler) = let globalIndex = comp.parseVariable("Expect variable name.") if comp.match(tkEqual): comp.expression() else: comp.writeChunk(1, opNil) comp.defineVariable(globalIndex) proc breakStatement(comp: Compiler) = if not comp.match(tkLabel): comp.error("Label expected after break.") let label = comp.previous.text[1..^1] for i in countdown(comp.scopes.high, 0): let scope = comp.scopes[i] if scope.labels.contains(label): comp.jumpToEnd(scope) break comp.consume(tkSemicolon, "Semicolon expected after break statement.") if comp.current.tokenType != tkRightBrace: comp.error("Break statement must be the last element inside the innermost block it is in.") proc statement(comp: Compiler) = if comp.match(tkVar): comp.varStatement() comp.consume(tkSemicolon, "Semicolon expected after expression statement.") elif comp.match(tkBreak): comp.breakStatement() else: comp.expression() comp.writeChunk(-1, opPop) comp.consume(tkSemicolon, "Semicolon expected after expression statement.") if comp.panicMode: comp.synchronize() proc compile*(comp: Compiler) = comp.scanner = newScanner(comp.source) comp.writeChunk(0, opNil) # the starting stackIndex is 0, which points to this nil # it is correctly set to delta = 0!!! comp.advance() while comp.current.tokenType != tkEof: comp.statement() comp.writeChunk(-1, opPop) comp.writeChunk(0, opReturn) when debugDumpChunk: if not comp.hadError: comp.chunk.disassembleChunk()