# Copyright 2020 Mattia Giambirtone # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ## The JAPL bytecode compiler import strutils import sequtils import algorithm import strformat import tables import multibyte import lexer import meta/opcode import meta/token import meta/looptype import types/baseObject import types/function import types/numbers import types/japlString import types/iterable import types/arrayList import config when isMainModule: import util/debug import types/methods when DEBUG_TRACE_COMPILER: import terminal type Compiler* = ref object ## The state of the compiler enclosing*: Compiler function*: ptr Function context*: FunctionType locals*: seq[Local] localCount*: int scopeDepth*: int parser*: Parser loop*: Loop objects*: ptr ArrayList[ptr Obj] file*: ptr String interned*: Table[string, ptr Obj] afterReturn: bool Local* = ref object # A local variable name*: Token depth*: int Parser* = ref object # A Parser object current*: int tokens*: ptr ArrayList[Token] hadError*: bool panicMode*: bool file*: ptr String Precedence {.pure.} = enum None, Assign, Or, And, Eq, Comp, As, Is, Term, Factor, Unary, Exp, Call, Primary ParseFn = proc(self: Compiler, canAssign: bool): void ParseRule = ref object prefix, infix: ParseFn precedence: Precedence proc makeRule(prefix, infix: ParseFn, precedence: Precedence): ParseRule = ## Creates a new rule for parsing result = ParseRule(prefix: prefix, infix: infix, precedence: precedence) proc advance(self: Parser): Token = ## Steps forward by one in the tokens' list and ## increments the current token index result = self.tokens[self.current] inc(self.current) proc peek(self: Parser): Token = ## Returns the current token without consuming it return self.tokens[self.current] proc peekNext(self: Parser): Token = ## Returns the next token without consuming it ## or an EOF token if we're at the end of the file if self.current <= len(self.tokens) - 1: return self.tokens[self.current + 1] return Token(kind: EOF, lexeme: "") proc previous(self: Parser): Token = ## Returns the previously consumed token return self.tokens[self.current - 1] proc check(self: Parser, kind: TokenType): bool = ## Checks if the current token is of the expected type ## without consuming it return self.peek().kind == kind proc checkNext(self: Parser, kind: TokenType): bool = ## Checks if the next token is of the expected type ## without consuming it return self.peekNext().kind == kind proc match(self: Parser, kind: TokenType): bool = ## Calls self.check() and consumes a token if the expected ## token type is encountered, in which case true ## is returned. False is returned otherwise if not self.check(kind): return false discard self.advance() return true proc synchronize(self: Parser) = ## Synchronizes the parser's state. This is useful when ## dealing with parsing errors. When an error occurs, we ## note it with our nice panicMode and hadError fields, but ## that in itself doesn't allow the parser to go forward ## in the code and report other possible errors. On the ## other hand, attempting to start parsing the source ## right after an error has occurred could lead to a ## cascade of unhelpful error messages that complicate ## debugging issues. So, when an error occurs, we try ## to get back into a state that at least allows us to keep ## parsing and pretend the error never happened (the code ## would not be compiled anyway so we might as well tell the ## user if anything else is wrong with their code). The parser ## will skip to the next valid token for a statement, like an ## if or a for loop or a class declaration, and then keep ## parsing from there. Note that hadError is never reset, but ## panicMode is self.panicMode = false while self.peek().kind != TokenType.EOF: # Infinite loops are bad, so we must take EOF into account if self.previous().kind == TokenType.SEMICOLON: return case self.peek().kind: of TokenType.CLASS, TokenType.FUN, TokenType.VAR, TokenType.FOR, TokenType.IF, TokenType.WHILE, TokenType.RETURN: # We found a statement boundary, so the parser bails out return else: discard discard self.advance() proc parseError(self: Parser, token: Token, message: string) = ## Notifies the user about parsing errors, writing them to ## the standard error file. This parser is designed to report ## all syntatical errors inside a file in one go, rather than ## stopping at the first error occurrence. This allows a user ## to identify and fix multiple errors without running the parser ## multiple times if self.panicMode: # This serves to identify wheter an error already occurred, in which case we return return self.panicMode = true self.hadError = true stderr.write(&"A fatal error occurred while parsing '{self.file}', line {token.line}, at '{token.lexeme}' -> {message}\n") self.synchronize() proc consume(self: Parser, expected: TokenType, message: string) = ## Attempts to consume a token if it is of the expected type ## or raises a parsing error with the given message otherwise if self.check(expected): discard self.advance() return self.parseError(self.peek(), message) proc currentChunk(self: Compiler): var Chunk = ## Returns the current chunk being compiled result = self.function.chunk proc compileError(self: Compiler, message: string) = ## Notifies the user about an error occurred during ## compilation, writing to the standard error file stderr.write(&"A fatal error occurred while compiling '{self.file}', line {self.parser.peek().line}, at '{self.parser.peek().lexeme}' -> {message}\n") self.parser.hadError = true self.parser.panicMode = true proc emitByte(self: Compiler, byt: OpCode|uint8) = ## Emits a single bytecode instruction and writes it ## to the current chunk being compiled when DEBUG_TRACE_COMPILER: write stdout, &"DEBUG - Compiler: Emitting {$byt} (uint8 value of {$(uint8 byt)}" if byt.int() <= OpCode.high().int(): write stdout, &"; opcode value of {$byt.OpCode}" write stdout, ")\n" self.currentChunk.writeChunk(uint8 byt, self.parser.previous.line) proc emitBytes(self: Compiler, byt1: OpCode|uint8, byt2: OpCode|uint8) = ## Emits multiple bytes instead of a single one, this is useful ## to emit operators along with their operands or for multi-byte ## instructions that are longer than one byte self.emitByte(uint8 byt1) self.emitByte(uint8 byt2) proc emitBytes(self: Compiler, bytarr: array[3, uint8]) = ## Handy helper method to write an array of 3 bytes into ## the current chunk, calling emiteByte(s) on each of its ## elements self.emitBytes(bytarr[0], bytarr[1]) self.emitByte(bytarr[2]) proc makeConstant(self: Compiler, val: ptr Obj): array[3, uint8] = ## Does the same as makeConstant(), but encodes the index in the ## chunk's constant table as an array (which is later reconstructed ## into an integer at runtime) to store more than 256 constants in the table result = self.currentChunk.addConstant(val) proc emitConstant(self: Compiler, obj: ptr Obj) = ## Emits a Constant instruction along ## with its operand self.emitByte(OpCode.Constant) self.emitBytes(self.makeConstant(obj)) proc initParser*(tokens: seq[Token], file: string): Parser proc getRule(kind: TokenType): ParseRule # Forward declarations for later use proc statement(self: Compiler) proc declaration(self: Compiler) proc initCompiler*(context: FunctionType, enclosing: Compiler = nil, parser: Parser = initParser(@[], ""), file: string): Compiler proc parsePrecedence(self: Compiler, precedence: Precedence) = ## Parses expressions using pratt's elegant algorithm to precedence parsing if self.parser.peek().kind == TokenType.EOF: self.parser.parseError(self.parser.peek(), "Expecting expression") return else: discard self.parser.advance() var prefixRule = getRule(self.parser.previous.kind).prefix if prefixRule == nil: # If there is no prefix rule then an expression is expected self.parser.parseError(self.parser.previous, "Expecting expression") return var canAssign = precedence <= Precedence.Assign # This is used to detect invalid assignment targets # such as "hello" = 3; self.prefixRule(canAssign) # otherwise call the prefix rule (e.g. for binary negation) if self.parser.previous.kind == EOF: self.parser.current -= 1 # If we're at EOF, we bail out and restore the EOF terminator so that # the parser behaves accordingly later on return while precedence <= (getRule(self.parser.peek.kind).precedence): # This will parse all expressions with the same precedence # or lower to the current expression var infixRule = getRule(self.parser.advance.kind).infix if self.parser.peek().kind != EOF: self.infixRule(canAssign) else: self.parser.parseError(self.parser.previous, "Expecting expression, got EOF") if canAssign and self.parser.match(TokenType.EQ): self.parser.parseError(self.parser.peek, "Invalid assignment target") proc expression(self: Compiler) = ## Parses expressions self.parsePrecedence(Precedence.Assign) # The highest-level expression is assignment proc binary(self: Compiler, canAssign: bool) = ## Parses binary operators var operator = self.parser.previous().kind var rule = getRule(operator) self.parsePrecedence(Precedence((int rule.precedence) + 1)) case operator: of TokenType.PLUS: self.emitByte(OpCode.Add) of TokenType.MINUS: self.emitByte(OpCode.Subtract) of TokenType.SLASH: self.emitByte(OpCode.Divide) of TokenType.STAR: self.emitByte(OpCode.Multiply) of TokenType.MOD: self.emitByte(OpCode.Mod) of TokenType.POW: self.emitByte(OpCode.Pow) of TokenType.NE: self.emitBytes(OpCode.Equal, OpCode.Not) of TokenType.DEQ: self.emitByte(OpCode.Equal) of TokenType.GT: # To allow for chaining of greater/less comparisons in the future (without doing # weird stuff such as allowing false with the greater/less than operators) # we need to move their logic in another function. This will # also allow for a sort of short-circuiting control flow like # for logical ands and ors, because why not? self.emitByte(OpCode.Greater) of TokenType.GE: self.emitByte(OpCode.GreaterOrEqual) of TokenType.LT: self.emitByte(OpCode.Less) of TokenType.LE: self.emitByte(OpCode.LessOrEqual) of TokenType.CARET: self.emitByte(OpCode.Xor) of TokenType.SHL: self.emitByte(OpCode.Shl) of TokenType.SHR: self.emitByte(OpCode.Shr) of TokenType.BOR: self.emitByte(OpCode.Bor) of TokenType.BAND: self.emitByte(OpCode.Band) of TokenType.IS: self.emitByte(OpCode.Is) of TokenType.ISNOT: self.emitBytes(OpCode.Is, Opcode.Not) of TokenType.AS: self.emitByte(OpCode.As) else: discard # Unreachable proc unary(self: Compiler, canAssign: bool) = ## Parses unary expressions such as negation or ## binary inversion var operator = self.parser.previous().kind if self.parser.peek().kind != EOF: self.parsePrecedence(Precedence.Unary) else: self.parser.parseError(self.parser.previous, "Expecting expression, got EOF") return case operator: of MINUS: self.emitByte(OpCode.Negate) of NEG: self.emitByte(OpCode.Not) of TILDE: self.emitByte(OpCode.Bnot) of PLUS: discard # Unary + does nothing anyway else: return template markObject*(self: Compiler, obj: ptr Obj): untyped = ## Marks compile-time objects (since those take up memory as well) ## for the VM to reclaim space later on let temp = obj self.objects.append(temp) temp proc strVal(self: Compiler, canAssign: bool) = ## Parses string literals var str = self.parser.previous().lexeme var delimiter = &"{str[0]}" # TODO: Add proper escape sequences support str = str.unescape(delimiter, delimiter) if str notin self.interned: self.interned[str] = str.asStr() self.emitConstant(self.markObject(self.interned[str])) else: # We intern only constant strings! # Note that we don't call self.markObject on an already # interned string because that has already been marked self.emitConstant(self.interned[str]) proc bracketAssign(self: Compiler, canAssign: bool) = ## Parses assignments such as a[0] = "something" discard # TODO -> Implement this proc bracket(self: Compiler, canAssign: bool) = ## Parses getitem/slice expressions, such as "hello"[0:1] ## or someList[5]. Slices can take up to two arguments, a start ## and an end index in the chosen iterable. ## Both arguments are optional, so doing "hi"[::] ## will basically copy the string (gets everything from ## start to end of the iterable). ## Indexes start from 0, and while the start index is ## inclusive, the end index is not. If an end index is ## not specified--like in "hello"[0:]--, then the it is ## assumed to be the length of the iterable. Likewise, ## if the start index is missing, it is assumed to be 0. ## Like in Python, using an end index that's out of bounds ## will not raise an error. Doing "hello"[0:999] will just ## return the whole string instead. ## It has to be noted that negative indexes are allowed: -1 ## means the last element in the iterable, -2 the element ## before that and so on, but that if a negative index's value ## goes back too far it does NOT loop back to the end of the ## iterable and will cause an IndexError at runtime instead if self.parser.peek.kind == TokenType.COLON: self.emitByte(OpCode.Nil) discard self.parser.advance() if self.parser.peek().kind == TokenType.RS: self.emitByte(OpCode.Nil) else: self.parsePrecedence(Precedence.Term) self.emitByte(OpCode.Slice) else: self.parsePrecedence(Precedence.Term) if self.parser.peek().kind == TokenType.RS: self.emitByte(OpCode.GetItem) elif self.parser.peek().kind == TokenType.COLON: discard self.parser.advance() if self.parser.peek().kind == TokenType.RS: self.emitByte(OpCode.Nil) else: self.parsePrecedence(Precedence.Term) self.emitByte(OpCode.Slice) if self.parser.peek().kind == TokenType.EQ: discard self.parser.advance() self.parsePrecedence(Precedence.Term) self.parser.consume(TokenType.RS, "Expecting ']' after slice expression") proc literal(self: Compiler, canAssign: bool) = ## Parses literal values such as true, nan and inf case self.parser.previous().kind: of TokenType.TRUE: self.emitByte(OpCode.True) of TokenType.FALSE: self.emitByte(OpCode.False) of TokenType.NIL: self.emitByte(OpCode.Nil) of TokenType.INF: self.emitByte(OpCode.Inf) of TokenType.NAN: self.emitByte(OpCode.Nan) else: discard # Unreachable proc number(self: Compiler, canAssign: bool) = ## Parses numerical constants var value = self.parser.previous().lexeme try: if "." in value: self.emitConstant(self.markObject(parseFloat(value).asFloat())) else: self.emitConstant(self.markObject(parseInt(value).asInt())) except ValueError: self.compileError("number literal is too big") proc grouping(self: Compiler, canAssign: bool) = ## Parses parenthesized expressions. The only interesting ## semantic about parentheses is that they allow lower-precedence ## expressions where a higher precedence one is expected if self.parser.match(TokenType.EOF): self.parser.parseError(self.parser.previous, "Expecting ')'") elif self.parser.match(RP): self.emitByte(OpCode.Nil) else: self.expression() self.parser.consume(TokenType.RP, "Expecting ')' after parentheszed expression") proc identifierConstant(self: Compiler, tok: Token): array[3, uint8] = ## Emits instructions for identifiers return self.makeConstant(self.markObject(asStr(tok.lexeme))) proc addLocal(self: Compiler, name: Token) = ## Stores a local variable. Local name resolution ## happens at compile time rather than runtime, ## unlike global variables which are treated differently. ## Note that at first, a local is in a special "uninitialized" ## state, this is useful to detect errors such as var a = a; ## inside local scopes var local = Local(name: name, depth: -1) inc(self.localCount) self.locals.add(local) proc declareVariable(self: Compiler) = ## Declares a variable, this is only useful ## for local variables, there is no way to ## "declare" a global at compile time. This ## assumption works because locals ## and temporaries have stack semantics inside ## local scopes if self.scopeDepth == 0: return var name = self.parser.previous() self.addLocal(name) proc parseVariable(self: Compiler, message: string): array[3, uint8] = ## Parses variables and declares them self.parser.consume(TokenType.ID, message) self.declareVariable() if self.scopeDepth > 0: return [uint8 0, uint8 0, uint8 0] return self.identifierConstant(self.parser.previous()) proc markInitialized(self: Compiler) = ## Marks the latest defined global as ## initialized and ready for use if self.scopeDepth == 0: return self.locals[self.localCount - 1].depth = self.scopeDepth proc defineVariable(self: Compiler, idx: array[3, uint8]) = ## Same as defineVariable, but this is used when ## there's more than 255 locals in the chunk's table if self.scopeDepth > 0: self.markInitialized() return self.emitByte(OpCode.DefineGlobal) self.emitBytes(idx) proc resolveLocal(self: Compiler, name: Token): int = ## Resolves a local variable and catches errors such as ## var a = a var i = self.localCount - 1 for local in reversed(self.locals): if local.name.lexeme == name.lexeme: if local.depth == -1: self.compileError("cannot read local variable in its own initializer") return i i = i - 1 return -1 proc namedVariable(self: Compiler, tok: Token, canAssign: bool) = ## Handles local and global variables assignment, as well ## as variable resolution var arg = self.resolveLocal(tok) casted = cast[array[3, uint8]](arg) get: OpCode set: OpCode if arg != -1: get = OpCode.GetLocal set = OpCode.SetLocal else: get = OpCode.GetGlobal set = OpCode.SetGlobal casted = self.identifierConstant(tok) if self.parser.match(TokenType.EQ) and canAssign: self.expression() self.emitByte(set) self.emitBytes(casted) else: self.emitByte(get) self.emitBytes(casted) proc variable(self: Compiler, canAssign: bool) = ## Emits the code to declare a variable, ## both locally and globally self.namedVariable(self.parser.previous(), canAssign) proc varDeclaration(self: Compiler) = ## Parses a variable declaration var name: array[3, uint8] = self.parseVariable("Expecting variable name") if self.parser.match(TokenType.EQ): self.expression() else: self.emitByte(OpCode.Nil) self.parser.consume(TokenType.SEMICOLON, "Missing semicolon after var declaration") self.defineVariable(name) proc expressionStatement(self: Compiler) = ## Parses an expression statement, which is ## an expression followed by a semicolon. It then ## emits a pop instruction self.expression() self.parser.consume(TokenType.SEMICOLON, "Missing semicolon after expression") self.emitByte(OpCode.Pop) proc delStatement(self: Compiler) = self.expression() # TODO: isLiteral? if self.parser.previous().kind in {TokenType.NUMBER, TokenType.STR}: self.compileError("cannot delete a literal") var code: OpCode if self.scopeDepth == 0: code = OpCode.DeleteGlobal else: code = OpCode.DeleteLocal self.localCount = self.localCount - 1 var name = self.identifierConstant(self.parser.previous()) self.emitBytes(code, name[0]) self.emitBytes(name[1], name[2]) self.parser.consume(TokenType.SEMICOLON, "Missing semicolon after del statement") proc parseBlock(self: Compiler) = ## Parses a block statement, which is basically ## a list of other statements while not self.parser.check(TokenType.RB) and not self.parser.check(TokenType.EOF): self.declaration() self.parser.consume(TokenType.RB, "Expecting '}' after block statement") proc beginScope(self: Compiler) = ## Begins a scope by increasing the ## current scope depth. This is literally ## all it takes to create a scope, since the ## only semantically interesting behavior of ## scopes is a change in names resolution inc(self.scopeDepth) proc endScope(self: Compiler) = ## Ends a scope, popping off any local that ## was created inside it along the way self.scopeDepth = self.scopeDepth - 1 var start: Natural = self.localCount while self.localCount > 0 and self.locals[self.localCount - 1].depth > self.scopeDepth: self.emitByte(OpCode.Pop) self.localCount = self.localCount - 1 if start >= self.localCount: self.locals.delete(self.localCount, start) proc emitJump(self: Compiler, opcode: OpCode): int = ## Emits a jump instruction with a placeholder offset ## that is later patched, check patchJump for more info ## about how jumps work self.emitByte(opcode) self.emitByte(0xff) self.emitByte(0xff) when DEBUG_TRACE_COMPILER: setForegroundColor(fgYellow) write stdout, &"DEBUG - Compiler: emit jump @ {self.currentChunk.code.len-2}\n" setForegroundColor(fgDefault) return self.currentChunk.code.len - 2 proc patchJump(self: Compiler, offset: int) = ## Patches a previously emitted jump instruction. ## Since it's impossible to know how much code ## needs to be jumped before compiling the code ## itself, jumps are first encoded with a placeholder ## offset. Then, after the code that has to be jumped ## over has been compiled, its size is known and the ## previously emitted offset is replaced with the actual ## jump size. ## Note that, due to how the language is designed, ## only up to 2^16 bytecode instructions can ## be jumped over, so the size of the if/else conditions ## or loops is limited (hopefully 65 thousands and change ## instructions are enough for everyone) when DEBUG_TRACE_COMPILER: setForegroundColor(fgYellow) write stdout, &"DEBUG - Compiler: patching jump @ {offset}" let jump = self.currentChunk.code.len - offset - 2 if jump > (int uint16.high): when DEBUG_TRACE_COMPILER: setForegroundColor(fgDefault) write stdout, "\n" self.compileError("too much code to jump over") else: let casted = toDouble(jump) self.currentChunk.code[offset] = casted[0] self.currentChunk.code[offset + 1] = casted[1] when DEBUG_TRACE_COMPILER: write stdout, &" points to {casted[0]}, {casted[1]} = {jump}\n" setForegroundColor(fgDefault) proc ifStatement(self: Compiler) = ## Parses if statements in a C-style fashion self.parser.consume(TokenType.LP, "The if condition must be parenthesized") if self.parser.peek.kind != TokenType.EOF: self.expression() if self.parser.peek.kind != TokenType.EOF: self.parser.consume(TokenType.RP, "The if condition must be parenthesized") if self.parser.peek.kind != TokenType.EOF: var jump: int = self.emitJump(OpCode.JumpIfFalse) self.emitByte(OpCode.Pop) self.statement() var elseJump = self.emitJump(OpCode.Jump) self.patchJump(jump) self.emitByte(OpCode.Pop) if self.parser.match(TokenType.ELSE): self.statement() self.patchJump(elseJump) else: self.parser.parseError(self.parser.previous(), "Invalid syntax") else: self.parser.parseError(self.parser.previous(), "The if condition must be parenthesized") proc emitLoop(self: Compiler, start: int) = ## Creates a loop and emits related instructions. when DEBUG_TRACE_COMPILER: setForegroundColor(fgYellow) write stdout, &"DEBUG - Compiler: emitting loop at start {start} " self.emitByte(OpCode.Loop) var offset = self.currentChunk.code.len - start + 2 if offset > (int uint16.high): when DEBUG_TRACE_COMPILER: setForegroundColor(fgDefault) write stdout, "\n" self.compileError("loop body is too large") else: let offsetBytes = toDouble(offset) self.emitByte(offsetBytes[0]) self.emitByte(offsetBytes[1]) when DEBUG_TRACE_COMPILER: write stdout, &"pointing to {offsetBytes[0]}, {offsetBytes[1]} = {offset}\n" proc endLooping(self: Compiler) = ## This method is used to make ## the break statement work and patch ## it with a jump instruction if self.loop.loopEnd != -1: self.patchJump(self.loop.loopEnd) self.emitByte(OpCode.Pop) for brk in self.loop.breaks: when DEBUG_TRACE_COMPILER: setForegroundColor(fgYellow) write stdout, &"DEBUG - Compiler: patching break at {brk}\n" setForegroundColor(fgDefault) self.currentChunk.code[brk] = OpCode.Jump.uint8 self.patchJump(brk + 1) self.loop = self.loop.outer proc whileStatement(self: Compiler) = ## Parses while loops in a C-style fashion let loop = Loop(depth: self.scopeDepth, outer: self.loop, start: self.currentChunk.code.len, alive: true, loopEnd: -1) self.loop = loop self.parser.consume(TokenType.LP, "The loop condition must be parenthesized") if self.parser.peek.kind != TokenType.EOF: self.expression() if self.parser.peek.kind != TokenType.EOF: self.parser.consume(TokenType.RP, "The loop condition must be parenthesized") if self.parser.peek.kind != TokenType.EOF: self.loop.loopEnd = self.emitJump(OpCode.JumpIfFalse) self.emitByte(OpCode.Pop) self.loop.body = self.currentChunk.code.len self.statement() self.emitLoop(self.loop.start) #self.patchJump(self.loop.loopEnd) # Prod2: imo will get patched over by endLooping anyways #self.emitByte(OpCode.Pop) else: self.parser.parseError(self.parser.previous(), "Invalid syntax") else: self.parser.parseError(self.parser.previous(), "The loop condition must be parenthesized") self.endLooping() proc forStatement(self: Compiler) = ## Parses for loops in a C-style fashion self.beginScope() self.parser.consume(TokenType.LP, "The loop condition must be parenthesized") if self.parser.peek.kind != TokenType.EOF: if self.parser.match(TokenType.SEMICOLON): discard elif self.parser.match(TokenType.VAR): self.varDeclaration() else: self.expressionStatement() var loop = Loop(depth: self.scopeDepth, outer: self.loop, start: self.currentChunk.code.len, alive: true, loopEnd: -1) self.loop = loop if not self.parser.match(TokenType.SEMICOLON): self.expression() if self.parser.previous.kind != TokenType.EOF: self.parser.consume(TokenType.SEMICOLON, "Expecting ';'") self.loop.loopEnd = self.emitJump(OpCode.JumpIfFalse) self.emitByte(OpCode.Pop) else: self.parser.current -= 1 self.parser.parseError(self.parser.previous(), "Invalid syntax") if not self.parser.match(RP): var bodyJump = self.emitJump(OpCode.Jump) var incrementStart = self.currentChunk.code.len if self.parser.peek.kind != TokenType.EOF: self.expression() self.emitByte(OpCode.Pop) self.parser.consume(TokenType.RP, "The loop condition must be parenthesized") self.emitLoop(self.loop.start) self.loop.start = incrementStart self.patchJump(bodyJump) if self.parser.peek.kind != TokenType.EOF: self.loop.body = self.currentChunk.code.len self.statement() self.emitLoop(self.loop.start) else: self.parser.current -= 1 self.parser.parseError(self.parser.previous(), "Invalid syntax") if self.loop.loopEnd != -1: self.patchJump(self.loop.loopEnd) self.emitByte(OpCode.Pop) else: self.parser.parseError(self.parser.previous(), "The loop condition must be parenthesized") self.endLooping() self.endScope() proc parseBreak(self: Compiler) = ## Parses break statements. A break ## statement causes the current loop ## to exit and jump to its end if not self.loop.alive: self.parser.parseError(self.parser.previous, "'break' outside loop") else: self.parser.consume(TokenType.SEMICOLON, "missing semicolon after break statement") var i = self.localCount - 1 while i >= 0 and self.locals[i].depth > self.loop.depth: self.emitByte(OpCode.Pop) i -= 1 discard self.emitJump(OpCode.Break) self.loop.breaks.add(self.currentChunk.code.len() - 3) proc parseAnd(self: Compiler, canAssign: bool) = ## Parses expressions such as a and b var jump = self.emitJump(OpCode.JumpIfFalse) self.emitByte(OpCode.Pop) self.parsePrecedence(Precedence.And) self.patchJump(jump) proc parseOr(self: Compiler, canAssign: bool) = ## Parses expressions such as a or b var elseJump = self.emitJump(OpCode.JumpIfFalse) var endJump = self.emitJump(OpCode.Jump) self.patchJump(elseJump) self.emitByte(OpCode.Pop) self.parsePrecedence(Precedence.Or) self.patchJump(endJump) proc continueStatement(self: Compiler) = ## Parses continue statements inside loops. ## The continue statement causes the loop to skip ## to the next iteration if not self.loop.alive: self.parser.parseError(self.parser.previous, "'continue' outside loop") else: self.parser.consume(TokenType.SEMICOLON, "missing semicolon after continue statement") var i = self.localCount - 1 while i >= 0 and self.locals[i].depth > self.loop.depth: self.emitByte(OpCode.Pop) i -= 1 self.emitLoop(self.loop.start) proc endCompiler(self: Compiler): ptr Function = ## Ends the current compiler instance and returns its ## compiled bytecode wrapped around a function object, ## also emitting a return instruction with nil as operand. ## Because of this, all functions implicitly return nil ## if no return statement is supplied self.emitByte(OpCode.Nil) self.emitByte(OpCode.Return) return self.function proc parseFunction(self: Compiler, funType: FunctionType) = ## Parses function declarations. Functions can have ## keyword arguments (WIP), but once a parameter is declared ## as a keyword one, all subsequent parameters must be ## keyword ones as well var self = initCompiler(funType, self, self.parser, self.file.toStr()) self.beginScope() if self.parser.check(LB): self.parser.consume(LB, "Expecting '{' before function body") self.parseBlock() var fun = self.endCompiler() self = self.enclosing self.emitByte(OpCode.Constant) self.emitBytes(self.makeConstant(fun)) return self.parser.consume(LP, "Expecting '('") if self.parser.hadError: return var paramNames: seq[string] = @[] var defaultFollows: bool = false if not self.parser.check(RP): while true: self.function.arity += 1 if self.function.arity + self.function.optionals > 255: self.compileError("functions cannot have more than 255 arguments") break var paramIdx = self.parseVariable("expecting parameter name") if self.parser.hadError: return if self.parser.previous.lexeme in paramNames: self.compileError("duplicate parameter name in function declaration") return paramNames.add(self.parser.previous.lexeme) self.defineVariable(paramIdx) if self.parser.match(TokenType.EQ): if self.parser.peek.kind == EOF: self.compileError("Unexpected EOF") return self.function.arity -= 1 self.function.optionals += 1 self.expression() self.function.defaults.append(self.parser.previous.lexeme.asStr()) defaultFollows = true elif defaultFollows: self.compileError("non-default argument follows default argument") return if not self.parser.match(COMMA): break self.parser.consume(RP, "Expecting ')' after parameters") self.parser.consume(LB, "Expecting '{' before function body") self.parseBlock() var fun = self.endCompiler() self = self.enclosing self.emitByte(OpCode.Constant) self.emitBytes(self.makeConstant(fun)) proc parseLambda(self: Compiler, canAssign: bool) = ## Parses lambda expressions of the form => (params) {code} self.parseFunction(FunctionType.LAMBDA) proc funDeclaration(self: Compiler) = ## Parses function declarations and declares ## them in the current scope var funName = self.parseVariable("expecting function name") self.markInitialized() self.parseFunction(FunctionType.FUNC) self.defineVariable(funName) proc argumentList(self: Compiler): tuple[pos: uint8, kw: uint8] = ## Parses arguments passed to function calls result.pos = 0 result.kw = 0 if not self.parser.check(RP): while true: if self.parser.check(ID) and self.parser.checkNext(TokenType.EQ): discard self.parser.advance() discard self.parser.advance() if self.parser.check(EOF): self.parser.parseError(self.parser.previous, "Unexpected EOF") return else: self.expression() if result.pos + result.kw == 255: self.compileError("cannot pass more than 255 arguments") return if not self.parser.match(COMMA): break result.kw += 1 else: if self.parser.check(EOF): self.parser.parseError(self.parser.previous, "Unexpected EOF") return if result.kw > 0: self.parser.parseError(self.parser.peek, "positional argument follows default argument") return self.expression() if result.pos == 255: self.compileError("cannot pass more than 255 arguments") return result.pos += 1 if not self.parser.match(COMMA): break self.parser.consume(RP, "Expecting ')' after arguments") proc call(self: Compiler, canAssign: bool) = ## Emits appropriate bytecode to call ## a function with its arguments # TODO -> Keyword arguments let args = self.argumentList() self.emitBytes(OpCode.Call, args.pos) proc returnStatement(self: Compiler) = ## Parses return statements and emits ## appropriate bytecode instructions ## for them if self.context == SCRIPT: self.compileError("'return' outside function") self.afterReturn = true if self.parser.match(TokenType.SEMICOLON): # Empty return self.emitByte(OpCode.Nil) self.emitByte(OpCode.Return) else: self.expression() self.parser.consume(TokenType.SEMICOLON, "missing semicolon after return statement") self.emitByte(OpCode.Return) proc statement(self: Compiler) = ## Parses statements if self.parser.match(TokenType.FOR): self.forStatement() elif self.parser.match(TokenType.IF): self.ifStatement() elif self.parser.match(TokenType.WHILE): self.whileStatement() elif self.parser.match(TokenType.RETURN): self.returnStatement() elif self.parser.match(TokenType.CONTINUE): self.continueStatement() elif self.parser.match(TokenType.BREAK): self.parseBreak() elif self.parser.match(TokenType.DEL): self.delStatement() elif self.parser.match(TokenType.LB): self.beginScope() self.parseBlock() self.endScope() else: self.expressionStatement() proc declaration(self: Compiler) = ## Parses declarations # TODO -> Fix this # if self.afterReturn: # self.compileError("dead code after return statement") # self.parser.tokens.append(Token(kind: TokenType.EOF, lexeme: "")) if self.parser.match(FUN): self.funDeclaration() elif self.parser.match(VAR): self.varDeclaration() else: self.statement() proc freeCompiler*(self: Compiler) = ## Frees all the allocated objects ## from the compiler when DEBUG_TRACE_ALLOCATION: var objCount = len(self.objects) var objFreed = 0 for obj in reversed(self.objects): freeObject(obj) discard self.objects.pop() when DEBUG_TRACE_ALLOCATION: objFreed += 1 when DEBUG_TRACE_ALLOCATION: echo &"DEBUG - Compiler: Freed {objFreed} objects out of {objCount} compile-time objects" # The array of all parse rules. # This array instructs our Pratt parser on how # to parse expressions and statements. # makeRule defines rules for unary and binary # operators as well as the token's precedence var rules: array[TokenType, ParseRule] = [ makeRule(nil, binary, Precedence.Term), # PLUS makeRule(unary, binary, Precedence.Term), # MINUS makeRule(nil, binary, Precedence.Factor), # SLASH makeRule(nil, binary, Precedence.Factor), # STAR makeRule(unary, nil, Precedence.None), # NEG makeRule(nil, binary, Precedence.Eq), # NE makeRule(nil, nil, Precedence.None), # EQ makeRule(nil, binary, Precedence.Comp), # DEQ makeRule(nil, binary, Precedence.Comp), # LT makeRule(nil, binary, Precedence.Comp), # GE makeRule(nil, binary, Precedence.Comp), # LE makeRule(nil, binary, Precedence.Factor), # MOD makeRule(nil, binary, Precedence.Exp), # POW makeRule(nil, binary, Precedence.Comp), # GT makeRule(grouping, call, Precedence.Call), # LP makeRule(nil, nil, Precedence.None), # RP makeRule(nil, bracket, Precedence.Call), # LS makeRule(nil, nil, Precedence.None), # LB makeRule(nil, nil, Precedence.None), # RB makeRule(nil, nil, Precedence.None), # COMMA makeRule(nil, nil, Precedence.None), # DOT makeRule(variable, nil, Precedence.None), # ID makeRule(nil, nil, Precedence.None), # RS makeRule(number, nil, Precedence.None), # NUMBER makeRule(strVal, nil, Precedence.None), # STR makeRule(nil, nil, Precedence.None), # SEMICOLON makeRule(nil, parseAnd, Precedence.And), # AND makeRule(nil, nil, Precedence.None), # CLASS makeRule(nil, nil, Precedence.None), # ELSE makeRule(nil, nil, Precedence.None), # FOR makeRule(nil, nil, Precedence.None), # FUN makeRule(literal, nil, Precedence.None), # FALSE makeRule(nil, nil, Precedence.None), # IF makeRule(literal, nil, Precedence.None), # NIL makeRule(nil, nil, Precedence.None), # RETURN makeRule(nil, nil, Precedence.None), # SUPER makeRule(nil, nil, Precedence.None), # THIS makeRule(nil, parseOr, Precedence.Or), # OR makeRule(literal, nil, Precedence.None), # TRUE makeRule(nil, nil, Precedence.None), # VAR makeRule(nil, nil, Precedence.None), # WHILE makeRule(nil, nil, Precedence.None), # DEL makeRule(nil, nil, Precedence.None), # BREAK makeRule(nil, nil, Precedence.None), # EOF makeRule(nil, nil, Precedence.None), # COLON makeRule(nil, nil, Precedence.None), # CONTINUE makeRule(nil, binary, Precedence.Term), # CARET makeRule(nil, binary, Precedence.Term), # SHL makeRule(nil, binary, Precedence.Term), # SHR makeRule(literal, nil, Precedence.Term), # INF makeRule(literal, nil, Precedence.Term), # NAN makeRule(nil, binary, Precedence.Term), # BAND makeRule(nil, binary, Precedence.Term), # BOR makeRule(unary, nil, Precedence.None), # TILDE makeRule(nil, binary, Precedence.Is), # IS makeRule(nil, binary, Precedence.As), # AS makeRule(parseLambda, nil, Precedence.None), # LAMBDA makeRule(nil, binary, Precedence.Is), # ISNOT ] proc getRule(kind: TokenType): ParseRule = ## Returns an appropriate precedence rule ## object for a given token type result = rules[kind] proc compile*(self: Compiler, source: string): ptr Function = ## Compiles a source string into a function ## object. This wires up all the code ## inside the parser and the lexer var scanner = initLexer(source, self.file.toStr()) var tokens = scanner.lex() if not scanner.errored: self.parser = initParser(tokens, self.file.toStr()) while not self.parser.match(EOF): self.declaration() var function = self.endCompiler() if not self.parser.hadError: when DEBUG_TRACE_COMPILER: echo "DEBUG - Compiler: Result -> Ok" return function else: when DEBUG_TRACE_COMPILER: echo "DEBUG - Compiler: Result -> ParseError" return nil else: when DEBUG_TRACE_COMPILER: echo "DEBUG - Compiler: Result -> LexingError" return nil proc initParser*(tokens: seq[Token], file: string): Parser = ## Initializes a new Parser obvject and returns a reference ## to it # TODO -> Make the parser independent of the compiler. As # of now, the compiler is what drives the parser and while # that might be easier for us it is not an ideal design. # We'll have to devise a standard interface for other people # to try and hook their parsers into JAPL with ease (pretty # much like our lexer now has the sole requirement of # having a lex() procedure that returns a list of tokens) result = Parser(current: 0, tokens: newArrayList[Token](), hadError: false, panicMode: false, file: file.asStr()) result.tokens.extend[:Token](tokens) proc initCompiler*(context: FunctionType, enclosing: Compiler = nil, parser: Parser = initParser(@[], ""), file: string): Compiler = ## Initializes a new Compiler object and returns a reference ## to it result = new(Compiler) result.parser = parser result.function = nil # Garbage collection paranoia result.locals = @[] result.scopeDepth = 0 result.localCount = 0 result.loop = Loop(alive: false, loopEnd: -1) result.objects = newArrayList[ptr Obj]() result.context = context result.enclosing = enclosing result.file = file.asStr() result.objects.append(result.file) result.parser.file = result.file result.locals.add(Local(depth: 0, name: Token(kind: EOF, lexeme: ""))) inc(result.localCount) result.afterReturn = false case context: of FunctionType.Func: result.function = result.markObject(newFunction(enclosing.parser.previous().lexeme, newChunk())) of FunctionType.Lambda: result.function = result.markObject(newLambda(newChunk())) else: # Script result.function = result.markObject(newFunction("", newChunk())) result.function.name = nil # This way the compiler can be executed on its own # without the VM when isMainModule: echo "JAPL Compiler REPL" while true: try: var compiler: Compiler = initCompiler(SCRIPT, file="test") stdout.write("=> ") var compiled = compiler.compile(stdin.readLine()) if compiled != nil: disassembleChunk(compiled.chunk, "test") except IOError: echo "" break