japl/src/compiler.nim

# Copyright 2020 Mattia Giambirtone
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

## The JAPL bytecode compiler


import strutils
import sequtils
import algorithm
import strformat
import tables

import multibyte
import lexer
import meta/opcode
import meta/token
import meta/looptype
import types/baseObject
import types/function
import types/numbers
import types/japlString
import types/iterable
import types/arrayList
import config
when isMainModule:
    import util/debug
import types/methods
when DEBUG_TRACE_COMPILER:
    import terminal


type
    Compiler* = ref object
        ## The state of the compiler
        enclosing*: Compiler
        function*: ptr Function
        context*: FunctionType
        locals*: seq[Local]
        localCount*: int
        scopeDepth*: int
        parser*: Parser
        loop*: Loop
        objects*: ptr ArrayList[ptr Obj]
        file*: ptr String
        interned*: Table[string, ptr Obj]
        afterReturn: bool

    Local* = ref object   # A local variable
       name*: Token
       depth*: int

    Parser* = ref object  # A Parser object
        current*: int
        tokens*: ptr ArrayList[Token]
        hadError*: bool
        panicMode*: bool
        file*: ptr String

    Precedence {.pure.} = enum
        None,
        Assign,
        Or,
        And,
        Eq,
        Comp,
        As,
        Is,
        Term,
        Factor,
        Unary,
        Exp,
        Call,
        Primary

    ParseFn = proc(self: Compiler, canAssign: bool): void

    ParseRule = ref object
        prefix, infix: ParseFn
        precedence: Precedence


proc makeRule(prefix, infix: ParseFn, precedence: Precedence): ParseRule =
    ## Creates a new rule for parsing
    result = ParseRule(prefix: prefix, infix: infix, precedence: precedence)


proc advance(self: Parser): Token =
    ## Steps forward by one in the tokens' list and
    ## increments the current token index
    result = self.tokens[self.current]
    inc(self.current)


proc peek(self: Parser): Token =
    ## Returns the current token without consuming it
    return self.tokens[self.current]


proc peekNext(self: Parser): Token =
    ## Returns the next token without consuming it
    ## or an EOF token if we're at the end of the file
    if self.current <= len(self.tokens) - 1:
        return self.tokens[self.current + 1]
    return Token(kind: EOF, lexeme: "")


proc previous(self: Parser): Token =
    ## Returns the previously consumed token
    return self.tokens[self.current - 1]


proc check(self: Parser, kind: TokenType): bool =
    ## Checks if the current token is of the expected type
    ## without consuming it
    return self.peek().kind == kind


proc checkNext(self: Parser, kind: TokenType): bool =
    ## Checks if the next token is of the expected type
    ## without consuming it
    return self.peekNext().kind == kind


proc match(self: Parser, kind: TokenType): bool =
    ## Calls self.check() and consumes a token if the expected
    ## token type is encountered, in which case true
    ## is returned. False is returned otherwise
    if not self.check(kind): return false
    discard self.advance()
    return true


proc synchronize(self: Parser) =
    ## Synchronizes the parser's state. This is useful when
    ## dealing with parsing errors. When an error occurs, we
    ## note it with our nice panicMode and hadError fields, but
    ## that in itself doesn't allow the parser to go forward
    ## in the code and report other possible errors. On the
    ## other hand, attempting to start parsing the source
    ## right after an error has occurred could lead to a
    ## cascade of unhelpful error messages that complicate
    ## debugging issues. So, when an error occurs, we try
    ## to get back into a state that at least allows us to keep
    ## parsing and pretend the error never happened (the code
    ## would not be compiled anyway so we might as well tell the
    ## user if anything else is wrong with their code). The parser
    ## will skip to the next valid token for a statement, like an
    ## if or a for loop or a class declaration, and then keep
    ## parsing from there. Note that hadError is never reset, but
    ## panicMode is
    self.panicMode = false
    while self.peek().kind != TokenType.EOF:   # Infinite loops are bad, so we must take EOF into account
        if self.previous().kind == TokenType.SEMICOLON:
            return
        case self.peek().kind:
            of TokenType.CLASS, TokenType.FUN, TokenType.VAR,
                TokenType.FOR, TokenType.IF, TokenType.WHILE,
                TokenType.RETURN:   # We found a statement boundary, so the parser bails out
                return
            else:
                discard
        discard self.advance()


proc parseError(self: Parser, token: Token, message: string) =
    ## Notifies the user about parsing errors, writing them to
    ## the standard error file. This parser is designed to report
    ## all syntatical errors inside a file in one go, rather than
    ## stopping at the first error occurrence. This allows a user
    ## to identify and fix multiple errors without running the parser
    ## multiple times
    if self.panicMode:    # This serves to identify wheter an error already occurred, in which case we return
        return
    self.panicMode = true
    self.hadError = true
    stderr.write(&"A fatal error occurred while parsing '{self.file}', line {token.line}, at '{token.lexeme}' -> {message}\n")
    self.synchronize()


proc consume(self: Parser, expected: TokenType, message: string) =
    ## Attempts to consume a token if it is of the expected type
    ## or raises a parsing error with the given message otherwise
    if self.check(expected):
        discard self.advance()
        return
    self.parseError(self.peek(), message)


proc currentChunk(self: Compiler): var Chunk =
    ## Returns the current chunk being compiled
    result = self.function.chunk


proc compileError(self: Compiler, message: string) =
    ## Notifies the user about an error occurred during
    ## compilation, writing to the standard error file
    stderr.write(&"A fatal error occurred while compiling '{self.file}', line {self.parser.peek().line}, at '{self.parser.peek().lexeme}' -> {message}\n")
    self.parser.hadError = true
    self.parser.panicMode = true


proc emitByte(self: Compiler, byt: OpCode|uint8) =
    ## Emits a single bytecode instruction and writes it
    ## to the current chunk being compiled
    when DEBUG_TRACE_COMPILER:
        write stdout, &"DEBUG - Compiler: Emitting {$byt} (uint8 value of  {$(uint8 byt)}"
        if byt.int() <= OpCode.high().int():
          write stdout, &"; opcode value of {$byt.OpCode}"
        write stdout, ")\n"

    self.currentChunk.writeChunk(uint8 byt, self.parser.previous.line)


proc emitBytes(self: Compiler, byt1: OpCode|uint8, byt2: OpCode|uint8) =
    ## Emits multiple bytes instead of a single one, this is useful
    ## to emit operators along with their operands or for multi-byte
    ## instructions that are longer than one byte
    self.emitByte(uint8 byt1)
    self.emitByte(uint8 byt2)


proc emitBytes(self: Compiler, bytarr: array[3, uint8]) =
    ## Handy helper method to write an array of 3 bytes into
    ## the current chunk, calling emiteByte(s) on each of its
    ## elements
    self.emitBytes(bytarr[0], bytarr[1])
    self.emitByte(bytarr[2])


proc makeConstant(self: Compiler, val: ptr Obj): array[3, uint8] =
    ## Does the same as makeConstant(), but encodes the index in the
    ## chunk's constant table as an array (which is later reconstructed
    ## into an integer at runtime) to store more than 256 constants in the table
    result = self.currentChunk.addConstant(val)


proc emitConstant(self: Compiler, obj: ptr Obj) =
    ## Emits a Constant instruction along
    ## with its operand
    self.emitByte(OpCode.Constant)
    self.emitBytes(self.makeConstant(obj))


proc initParser*(tokens: seq[Token], file: string): Parser
proc getRule(kind: TokenType): ParseRule  # Forward declarations for later use
proc statement(self: Compiler)
proc declaration(self: Compiler)
proc initCompiler*(context: FunctionType, enclosing: Compiler = nil, parser: Parser = initParser(@[], ""), file: string): Compiler


proc parsePrecedence(self: Compiler, precedence: Precedence) =
    ## Parses expressions using pratt's elegant algorithm to precedence parsing
    if self.parser.peek().kind == TokenType.EOF:
        self.parser.parseError(self.parser.peek(), "Expecting expression")
        return
    else:
        discard self.parser.advance()
    var prefixRule = getRule(self.parser.previous.kind).prefix
    if prefixRule == nil:   # If there is no prefix rule then an expression is expected
        self.parser.parseError(self.parser.previous, "Expecting expression")
        return
    var canAssign = precedence <= Precedence.Assign   # This is used to detect invalid assignment targets
    # such as "hello" = 3;
    self.prefixRule(canAssign)   # otherwise call the prefix rule (e.g. for binary negation)
    if self.parser.previous.kind == EOF:
        self.parser.current -= 1    # If we're at EOF, we bail out and restore the EOF terminator so that
        # the parser behaves accordingly later on
        return
    while precedence <= (getRule(self.parser.peek.kind).precedence):  # This will parse all expressions with the same precedence
    # or lower to the current expression
        var infixRule = getRule(self.parser.advance.kind).infix
        if self.parser.peek().kind != EOF:
            self.infixRule(canAssign)
        else:
            self.parser.parseError(self.parser.previous, "Expecting expression, got EOF")
    if canAssign and self.parser.match(TokenType.EQ):
        self.parser.parseError(self.parser.peek, "Invalid assignment target")


proc expression(self: Compiler) =
    ## Parses expressions
    self.parsePrecedence(Precedence.Assign)  # The highest-level expression is assignment


proc binary(self: Compiler, canAssign: bool) =
    ## Parses binary operators
    var operator = self.parser.previous().kind
    var rule = getRule(operator)
    self.parsePrecedence(Precedence((int rule.precedence) + 1))
    case operator:
        of TokenType.PLUS:
            self.emitByte(OpCode.Add)
        of TokenType.MINUS:
            self.emitByte(OpCode.Subtract)
        of TokenType.SLASH:
            self.emitByte(OpCode.Divide)
        of TokenType.STAR:
            self.emitByte(OpCode.Multiply)
        of TokenType.MOD:
            self.emitByte(OpCode.Mod)
        of TokenType.POW:
            self.emitByte(OpCode.Pow)
        of TokenType.NE:
            self.emitBytes(OpCode.Equal, OpCode.Not)
        of TokenType.DEQ:
            self.emitByte(OpCode.Equal)
        of TokenType.GT:
            # To allow for chaining of greater/less comparisons in the future (without doing
            # weird stuff such as allowing false with the greater/less than operators)
            # we need to move their logic in another function. This will
            # also allow for a sort of short-circuiting control flow like
            # for logical ands and ors, because why not?
            self.emitByte(OpCode.Greater)
        of TokenType.GE:
            self.emitByte(OpCode.GreaterOrEqual)
        of TokenType.LT:
            self.emitByte(OpCode.Less)
        of TokenType.LE:
            self.emitByte(OpCode.LessOrEqual)
        of TokenType.CARET:
           self.emitByte(OpCode.Xor)
        of TokenType.SHL:
            self.emitByte(OpCode.Shl)
        of TokenType.SHR:
            self.emitByte(OpCode.Shr)
        of TokenType.BOR:
            self.emitByte(OpCode.Bor)
        of TokenType.BAND:
            self.emitByte(OpCode.Band)
        of TokenType.IS:
            self.emitByte(OpCode.Is)
        of TokenType.ISNOT:
            self.emitBytes(OpCode.Is, Opcode.Not)
        of TokenType.AS:
            self.emitByte(OpCode.As)
        else:
            discard # Unreachable


proc unary(self: Compiler, canAssign: bool) =
    ## Parses unary expressions such as negation or
    ## binary inversion
    var operator = self.parser.previous().kind
    if self.parser.peek().kind != EOF:
        self.parsePrecedence(Precedence.Unary)
    else:
        self.parser.parseError(self.parser.previous, "Expecting expression, got EOF")
        return
    case operator:
        of MINUS:
            self.emitByte(OpCode.Negate)
        of NEG:
            self.emitByte(OpCode.Not)
        of TILDE:
            self.emitByte(OpCode.Bnot)
        of PLUS:
            discard   # Unary + does nothing anyway
        else:
            return


template markObject*(self: Compiler, obj: ptr Obj): untyped =
    ## Marks compile-time objects (since those take up memory as well)
    ## for the VM to reclaim space later on
    let temp = obj
    self.objects.append(temp)
    temp


proc strVal(self: Compiler, canAssign: bool) =
    ## Parses string literals
    var str = self.parser.previous().lexeme
    var delimiter = &"{str[0]}"    # TODO: Add proper escape sequences support
    str = str.unescape(delimiter, delimiter)
    if str notin self.interned:
        self.interned[str] = str.asStr()
        self.emitConstant(self.markObject(self.interned[str]))
    else:
        # We intern only constant strings!
        # Note that we don't call self.markObject on an already
        # interned string because that has already been marked
        self.emitConstant(self.interned[str])


proc bracketAssign(self: Compiler, canAssign: bool) =
    ## Parses assignments such as a[0] = "something"
    discard # TODO -> Implement this


proc bracket(self: Compiler, canAssign: bool) =
    ## Parses getitem/slice expressions, such as "hello"[0:1]
    ## or someList[5]. Slices can take up to two arguments, a start
    ## and an end index in the chosen iterable.
    ## Both arguments are optional, so doing "hi"[::]
    ## will basically copy the string (gets everything from
    ## start to end of the iterable).
    ## Indexes start from 0, and while the start index is
    ## inclusive, the end index is not. If an end index is
    ## not specified--like in "hello"[0:]--, then the it is
    ## assumed to be the length of the iterable. Likewise,
    ## if the start index is missing, it is assumed to be 0.
    ## Like in Python, using an end index that's out of bounds
    ## will not raise an error. Doing "hello"[0:999] will just
    ## return the whole string instead.
    ## It has to be noted that negative indexes are allowed: -1
    ## means the last element in the iterable, -2 the element
    ## before that and so on, but that if a negative index's value
    ## goes back too far it does NOT loop back to the end of the
    ## iterable and will cause an IndexError at runtime instead
    if self.parser.peek.kind == TokenType.COLON:
        self.emitByte(OpCode.Nil)
        discard self.parser.advance()
        if self.parser.peek().kind == TokenType.RS:
            self.emitByte(OpCode.Nil)
        else:
            self.parsePrecedence(Precedence.Term)
        self.emitByte(OpCode.Slice)
    else:
        self.parsePrecedence(Precedence.Term)
        if self.parser.peek().kind == TokenType.RS:
            self.emitByte(OpCode.GetItem)
        elif self.parser.peek().kind == TokenType.COLON:
            discard self.parser.advance()
            if self.parser.peek().kind == TokenType.RS:
                self.emitByte(OpCode.Nil)
            else:
                self.parsePrecedence(Precedence.Term)
            self.emitByte(OpCode.Slice)
    if self.parser.peek().kind == TokenType.EQ:
        discard self.parser.advance()
        self.parsePrecedence(Precedence.Term)
    self.parser.consume(TokenType.RS, "Expecting ']' after slice expression")


proc literal(self: Compiler, canAssign: bool) =
    ## Parses literal values such as true, nan and inf
    case self.parser.previous().kind:
        of TokenType.TRUE:
            self.emitByte(OpCode.True)
        of TokenType.FALSE:
            self.emitByte(OpCode.False)
        of TokenType.NIL:
            self.emitByte(OpCode.Nil)
        of TokenType.INF:
            self.emitByte(OpCode.Inf)
        of TokenType.NAN:
            self.emitByte(OpCode.Nan)
        else:
            discard  # Unreachable


proc number(self: Compiler, canAssign: bool) =
    ## Parses numerical constants
    var value = self.parser.previous().lexeme
    try:
        if "." in value:
            self.emitConstant(self.markObject(parseFloat(value).asFloat()))
        else:
            self.emitConstant(self.markObject(parseInt(value).asInt()))
    except ValueError:
        self.compileError("number literal is too big")


proc grouping(self: Compiler, canAssign: bool) =
    ## Parses parenthesized expressions. The only interesting
    ## semantic about parentheses is that they allow lower-precedence
    ## expressions where a higher precedence one is expected
    if self.parser.match(TokenType.EOF):
        self.parser.parseError(self.parser.previous, "Expecting ')'")
    elif self.parser.match(RP):
        self.emitByte(OpCode.Nil)
    else:
        self.expression()
        self.parser.consume(TokenType.RP, "Expecting ')' after parentheszed expression")


proc identifierConstant(self: Compiler, tok: Token): array[3, uint8] =
    ## Emits instructions for identifiers
    return self.makeConstant(self.markObject(asStr(tok.lexeme)))


proc addLocal(self: Compiler, name: Token) =
    ## Stores a local variable. Local name resolution
    ## happens at compile time rather than runtime,
    ## unlike global variables which are treated differently.
    ## Note that at first, a local is in a special "uninitialized"
    ## state, this is useful to detect errors such as var a = a;
    ## inside local scopes
    var local = Local(name: name, depth: -1)
    inc(self.localCount)
    self.locals.add(local)


proc declareVariable(self: Compiler) =
    ## Declares a variable, this is only useful
    ## for local variables, there is no way to
    ## "declare" a global at compile time. This
    ## assumption works because locals
    ## and temporaries have stack semantics inside
    ## local scopes
    if self.scopeDepth == 0:
        return
    var name = self.parser.previous()
    self.addLocal(name)


proc parseVariable(self: Compiler, message: string): array[3, uint8] =
    ## Parses variables and declares them
    self.parser.consume(TokenType.ID, message)
    self.declareVariable()
    if self.scopeDepth > 0:
        return [uint8 0, uint8 0, uint8 0]
    return self.identifierConstant(self.parser.previous())


proc markInitialized(self: Compiler) =
    ## Marks the latest defined global as
    ## initialized and ready for use
    if self.scopeDepth == 0:
        return
    self.locals[self.localCount - 1].depth = self.scopeDepth


proc defineVariable(self: Compiler, idx: array[3, uint8]) =
    ## Same as defineVariable, but this is used when
    ## there's more than 255 locals in the chunk's table
    if self.scopeDepth > 0:
        self.markInitialized()
        return
    self.emitByte(OpCode.DefineGlobal)
    self.emitBytes(idx)


proc resolveLocal(self: Compiler, name: Token): int =
    ## Resolves a local variable and catches errors such as
    ## var a = a
    var i = self.localCount - 1
    for local in reversed(self.locals):
        if local.name.lexeme == name.lexeme:
            if local.depth == -1:
                self.compileError("cannot read local variable in its own initializer")
            return i
        i = i - 1
    return -1


proc namedVariable(self: Compiler, tok: Token, canAssign: bool) =
    ## Handles local and global variables assignment, as well
    ## as variable resolution
    var
        arg = self.resolveLocal(tok)
        casted = cast[array[3, uint8]](arg)
        get: OpCode
        set: OpCode
    if arg != -1:
        get = OpCode.GetLocal
        set = OpCode.SetLocal
    else:
        get = OpCode.GetGlobal
        set = OpCode.SetGlobal
        casted = self.identifierConstant(tok)
    if self.parser.match(TokenType.EQ) and canAssign:
        self.expression()
        self.emitByte(set)
        self.emitBytes(casted)
    else:
        self.emitByte(get)
        self.emitBytes(casted)


proc variable(self: Compiler, canAssign: bool) =
    ## Emits the code to declare a variable,
    ## both locally and globally
    self.namedVariable(self.parser.previous(), canAssign)


proc varDeclaration(self: Compiler) =
    ## Parses a variable declaration
    var name: array[3, uint8] = self.parseVariable("Expecting variable name")
    if self.parser.match(TokenType.EQ):
        self.expression()
    else:
        self.emitByte(OpCode.Nil)
    self.parser.consume(TokenType.SEMICOLON, "Missing semicolon after var declaration")
    self.defineVariable(name)


proc expressionStatement(self: Compiler) =
    ## Parses an expression statement, which is
    ## an expression followed by a semicolon. It then
    ## emits a pop instruction
    self.expression()
    self.parser.consume(TokenType.SEMICOLON, "Missing semicolon after expression")
    self.emitByte(OpCode.Pop)


proc delStatement(self: Compiler) =
    self.expression()
    # TODO: isLiteral?
    if self.parser.previous().kind in {TokenType.NUMBER, TokenType.STR}:
        self.compileError("cannot delete a literal")
    var code: OpCode
    if self.scopeDepth == 0:
        code = OpCode.DeleteGlobal
    else:
        code = OpCode.DeleteLocal
        self.localCount = self.localCount - 1
    var name = self.identifierConstant(self.parser.previous())
    self.emitBytes(code, name[0])
    self.emitBytes(name[1], name[2])
    self.parser.consume(TokenType.SEMICOLON, "Missing semicolon after del statement")


proc parseBlock(self: Compiler) =
    ## Parses a block statement, which is basically
    ## a list of other statements
    while not self.parser.check(TokenType.RB) and not self.parser.check(TokenType.EOF):
        self.declaration()
    self.parser.consume(TokenType.RB, "Expecting '}' after block statement")


proc beginScope(self: Compiler) =
    ## Begins a scope by increasing the
    ## current scope depth. This is literally
    ## all it takes to create a scope, since the
    ## only semantically interesting behavior of
    ## scopes is a change in names resolution
    inc(self.scopeDepth)


proc endScope(self: Compiler) =
    ## Ends a scope, popping off any local that
    ## was created inside it along the way
    self.scopeDepth = self.scopeDepth - 1
    var start: Natural = self.localCount
    while self.localCount > 0 and self.locals[self.localCount - 1].depth > self.scopeDepth:
        self.emitByte(OpCode.Pop)
        self.localCount = self.localCount - 1
    if start >= self.localCount:
        self.locals.delete(self.localCount, start)


proc emitJump(self: Compiler, opcode: OpCode): int =
    ## Emits a jump instruction with a placeholder offset
    ## that is later patched, check patchJump for more info
    ## about how jumps work
    self.emitByte(opcode)
    self.emitByte(0xff)
    self.emitByte(0xff)
    when DEBUG_TRACE_COMPILER:
        setForegroundColor(fgYellow)
        write stdout, &"DEBUG - Compiler: emit jump @ {self.currentChunk.code.len-2}\n"
        setForegroundColor(fgDefault)
    return self.currentChunk.code.len - 2


proc patchJump(self: Compiler, offset: int) =
    ## Patches a previously emitted jump instruction.
    ## Since it's impossible to know how much code
    ## needs to be jumped before compiling the code
    ## itself, jumps are first encoded with a placeholder
    ## offset. Then, after the code that has to be jumped
    ## over has been compiled, its size is known and the
    ## previously emitted offset is replaced with the actual
    ## jump size.
    ## Note that, due to how the language is designed,
    ## only up to 2^16 bytecode instructions can
    ## be jumped over, so the size of the if/else conditions
    ## or loops is limited (hopefully 65 thousands and change
    ## instructions are enough for everyone)

    when DEBUG_TRACE_COMPILER:
        setForegroundColor(fgYellow)
        write stdout, &"DEBUG - Compiler: patching jump @ {offset}"
    let jump = self.currentChunk.code.len - offset - 2
    if jump > (int uint16.high):
        when DEBUG_TRACE_COMPILER:
            setForegroundColor(fgDefault)
            write stdout, "\n"
        self.compileError("too much code to jump over")
    else:
        let casted = toDouble(jump)
        self.currentChunk.code[offset] = casted[0]
        self.currentChunk.code[offset + 1] = casted[1]
        when DEBUG_TRACE_COMPILER:
            write stdout, &" points to {casted[0]}, {casted[1]} = {jump}\n"
            setForegroundColor(fgDefault)

proc ifStatement(self: Compiler) =
    ## Parses if statements in a C-style fashion
    self.parser.consume(TokenType.LP, "The if condition must be parenthesized")
    if self.parser.peek.kind != TokenType.EOF:
        self.expression()
        if self.parser.peek.kind != TokenType.EOF:
            self.parser.consume(TokenType.RP, "The if condition must be parenthesized")
        if self.parser.peek.kind != TokenType.EOF:
            var jump: int = self.emitJump(OpCode.JumpIfFalse)
            self.emitByte(OpCode.Pop)
            self.statement()
            var elseJump = self.emitJump(OpCode.Jump)
            self.patchJump(jump)
            self.emitByte(OpCode.Pop)
            if self.parser.match(TokenType.ELSE):
                self.statement()
            self.patchJump(elseJump)
        else:
            self.parser.parseError(self.parser.previous(), "Invalid syntax")
    else:
        self.parser.parseError(self.parser.previous(), "The if condition must be parenthesized")


proc emitLoop(self: Compiler, start: int) =
    ## Creates a loop and emits related instructions.
    when DEBUG_TRACE_COMPILER:
        setForegroundColor(fgYellow)
        write stdout, &"DEBUG - Compiler: emitting loop at start {start} "
    self.emitByte(OpCode.Loop)
    var offset = self.currentChunk.code.len - start + 2
    if offset > (int uint16.high):
        when DEBUG_TRACE_COMPILER:
            setForegroundColor(fgDefault)
            write stdout, "\n"
        self.compileError("loop body is too large")
    else:
        let offsetBytes = toDouble(offset)
        self.emitByte(offsetBytes[0])
        self.emitByte(offsetBytes[1])
        when DEBUG_TRACE_COMPILER:
            write stdout, &"pointing to {offsetBytes[0]}, {offsetBytes[1]} = {offset}\n"


proc endLooping(self: Compiler) =
    ## This method is used to make
    ## the break statement work and patch
    ## it with a jump instruction
    if self.loop.loopEnd != -1:
        self.patchJump(self.loop.loopEnd)
        self.emitByte(OpCode.Pop)

    for brk in self.loop.breaks:
        when DEBUG_TRACE_COMPILER:
            setForegroundColor(fgYellow)
            write stdout, &"DEBUG - Compiler: patching break at {brk}\n"
            setForegroundColor(fgDefault)
        self.currentChunk.code[brk] = OpCode.Jump.uint8
        self.patchJump(brk + 1)

    self.loop = self.loop.outer


proc whileStatement(self: Compiler) =
    ## Parses while loops in a C-style fashion
    let loop = Loop(depth: self.scopeDepth, outer: self.loop, start: self.currentChunk.code.len, alive: true, loopEnd: -1)
    self.loop = loop
    self.parser.consume(TokenType.LP, "The loop condition must be parenthesized")
    if self.parser.peek.kind != TokenType.EOF:
        self.expression()
        if self.parser.peek.kind != TokenType.EOF:
            self.parser.consume(TokenType.RP, "The loop condition must be parenthesized")
        if self.parser.peek.kind != TokenType.EOF:
            self.loop.loopEnd = self.emitJump(OpCode.JumpIfFalse)
            self.emitByte(OpCode.Pop)
            self.loop.body = self.currentChunk.code.len
            self.statement()
            self.emitLoop(self.loop.start)
            #self.patchJump(self.loop.loopEnd) # Prod2: imo will get patched over by endLooping anyways
            #self.emitByte(OpCode.Pop)
        else:
            self.parser.parseError(self.parser.previous(), "Invalid syntax")
    else:
        self.parser.parseError(self.parser.previous(), "The loop condition must be parenthesized")
    self.endLooping()


proc forStatement(self: Compiler) =
    ## Parses for loops in a C-style fashion
    self.beginScope()
    self.parser.consume(TokenType.LP, "The loop condition must be parenthesized")
    if self.parser.peek.kind != TokenType.EOF:
        if self.parser.match(TokenType.SEMICOLON):
            discard
        elif self.parser.match(TokenType.VAR):
            self.varDeclaration()
        else:
            self.expressionStatement()
        var loop = Loop(depth: self.scopeDepth, outer: self.loop, start: self.currentChunk.code.len, alive: true, loopEnd: -1)
        self.loop = loop
        if not self.parser.match(TokenType.SEMICOLON):
            self.expression()
            if self.parser.previous.kind != TokenType.EOF:
                self.parser.consume(TokenType.SEMICOLON, "Expecting ';'")
                self.loop.loopEnd = self.emitJump(OpCode.JumpIfFalse)
                self.emitByte(OpCode.Pop)
            else:
                self.parser.current -= 1
                self.parser.parseError(self.parser.previous(), "Invalid syntax")
        if not self.parser.match(RP):
            var bodyJump = self.emitJump(OpCode.Jump)
            var incrementStart = self.currentChunk.code.len
            if self.parser.peek.kind != TokenType.EOF:
                self.expression()
                self.emitByte(OpCode.Pop)
                self.parser.consume(TokenType.RP, "The loop condition must be parenthesized")
                self.emitLoop(self.loop.start)
                self.loop.start = incrementStart
                self.patchJump(bodyJump)
        if self.parser.peek.kind != TokenType.EOF:
            self.loop.body = self.currentChunk.code.len
            self.statement()
            self.emitLoop(self.loop.start)
        else:
            self.parser.current -= 1
            self.parser.parseError(self.parser.previous(), "Invalid syntax")
        if self.loop.loopEnd != -1:
            self.patchJump(self.loop.loopEnd)
            self.emitByte(OpCode.Pop)
    else:
        self.parser.parseError(self.parser.previous(), "The loop condition must be parenthesized")
    self.endLooping()
    self.endScope()


proc parseBreak(self: Compiler) =
    ## Parses break statements. A break
    ## statement causes the current loop
    ## to exit and jump to its end
    if not self.loop.alive:
        self.parser.parseError(self.parser.previous, "'break' outside loop")
    else:
        self.parser.consume(TokenType.SEMICOLON, "missing semicolon after break statement")
        var i = self.localCount - 1
        while i >= 0 and self.locals[i].depth > self.loop.depth:
            self.emitByte(OpCode.Pop)
            i -= 1
        discard self.emitJump(OpCode.Break)
        self.loop.breaks.add(self.currentChunk.code.len() - 3)


proc parseAnd(self: Compiler, canAssign: bool) =
    ## Parses expressions such as a and b
    var jump = self.emitJump(OpCode.JumpIfFalse)
    self.emitByte(OpCode.Pop)
    self.parsePrecedence(Precedence.And)
    self.patchJump(jump)


proc parseOr(self: Compiler, canAssign: bool) =
    ## Parses expressions such as a or b
    var elseJump = self.emitJump(OpCode.JumpIfFalse)
    var endJump = self.emitJump(OpCode.Jump)
    self.patchJump(elseJump)
    self.emitByte(OpCode.Pop)
    self.parsePrecedence(Precedence.Or)
    self.patchJump(endJump)


proc continueStatement(self: Compiler) =
    ## Parses continue statements inside loops.
    ## The continue statement causes the loop to skip
    ## to the next iteration
    if not self.loop.alive:
        self.parser.parseError(self.parser.previous, "'continue' outside loop")
    else:
        self.parser.consume(TokenType.SEMICOLON, "missing semicolon after continue statement")
        var i = self.localCount - 1
        while i >= 0 and self.locals[i].depth > self.loop.depth:
            self.emitByte(OpCode.Pop)
            i -= 1
        self.emitLoop(self.loop.start)


proc endCompiler(self: Compiler): ptr Function =
    ## Ends the current compiler instance and returns its
    ## compiled bytecode wrapped around a function object,
    ## also emitting a return instruction with nil as operand.
    ## Because of this, all functions implicitly return nil
    ## if no return statement is supplied
    self.emitByte(OpCode.Nil)
    self.emitByte(OpCode.Return)
    return self.function


proc parseFunction(self: Compiler, funType: FunctionType) =
    ## Parses function declarations. Functions can have
    ## keyword arguments (WIP), but once a parameter is declared
    ## as a keyword one, all subsequent parameters must be
    ## keyword ones as well
    var self = initCompiler(funType, self, self.parser, self.file.toStr())
    self.beginScope()
    if self.parser.check(LB):
        self.parser.consume(LB, "Expecting '{' before function body")
        self.parseBlock()
        var fun = self.endCompiler()
        self = self.enclosing
        self.emitByte(OpCode.Constant)
        self.emitBytes(self.makeConstant(fun))
        return
    self.parser.consume(LP, "Expecting '('")
    if self.parser.hadError:
        return
    var paramNames: seq[string] = @[]
    var defaultFollows: bool = false
    if not self.parser.check(RP):
        while true:
            self.function.arity += 1
            if self.function.arity + self.function.optionals > 255:
                self.compileError("functions cannot have more than 255 arguments")
                break
            var paramIdx = self.parseVariable("expecting parameter name")
            if self.parser.hadError:
                return
            if self.parser.previous.lexeme in paramNames:
                self.compileError("duplicate parameter name in function declaration")
                return
            paramNames.add(self.parser.previous.lexeme)
            self.defineVariable(paramIdx)
            if self.parser.match(TokenType.EQ):
                if self.parser.peek.kind == EOF:
                    self.compileError("Unexpected EOF")
                    return
                self.function.arity -= 1
                self.function.optionals += 1
                self.expression()
                self.function.defaults.append(self.parser.previous.lexeme.asStr())
                defaultFollows = true
            elif defaultFollows:
                self.compileError("non-default argument follows default argument")
                return
            if not self.parser.match(COMMA):
                break
    self.parser.consume(RP, "Expecting ')' after parameters")
    self.parser.consume(LB, "Expecting '{' before function body")
    self.parseBlock()
    var fun = self.endCompiler()
    self = self.enclosing
    self.emitByte(OpCode.Constant)
    self.emitBytes(self.makeConstant(fun))


proc parseLambda(self: Compiler, canAssign: bool) =
    ## Parses lambda expressions of the form => (params) {code}
    self.parseFunction(FunctionType.LAMBDA)


proc funDeclaration(self: Compiler) =
    ## Parses function declarations and declares
    ## them in the current scope
    var funName = self.parseVariable("expecting function name")
    self.markInitialized()
    self.parseFunction(FunctionType.FUNC)
    self.defineVariable(funName)


proc argumentList(self: Compiler): tuple[pos: uint8, kw: uint8] =
    ## Parses arguments passed to function calls
    result.pos = 0
    result.kw = 0
    if not self.parser.check(RP):
        while true:
            if self.parser.check(ID) and self.parser.checkNext(TokenType.EQ):
                discard self.parser.advance()
                discard self.parser.advance()
                if self.parser.check(EOF):
                    self.parser.parseError(self.parser.previous, "Unexpected EOF")
                    return
                else:
                    self.expression()
                    if result.pos + result.kw == 255:
                        self.compileError("cannot pass more than 255 arguments")
                        return
                    if not self.parser.match(COMMA):
                        break
                    result.kw += 1
            else:
                if self.parser.check(EOF):
                    self.parser.parseError(self.parser.previous, "Unexpected EOF")
                    return
                if result.kw > 0:
                    self.parser.parseError(self.parser.peek, "positional argument follows default argument")
                    return
                self.expression()
                if result.pos == 255:
                    self.compileError("cannot pass more than 255 arguments")
                    return
                result.pos += 1
                if not self.parser.match(COMMA):
                    break
    self.parser.consume(RP, "Expecting ')' after arguments")


proc call(self: Compiler, canAssign: bool) =
    ## Emits appropriate bytecode to call
    ## a function with its arguments
    # TODO -> Keyword arguments
    let args = self.argumentList()
    self.emitBytes(OpCode.Call, args.pos)


proc returnStatement(self: Compiler) =
    ## Parses return statements and emits
    ## appropriate bytecode instructions
    ## for them
    if self.context == SCRIPT:
        self.compileError("'return' outside function")
    self.afterReturn = true
    if self.parser.match(TokenType.SEMICOLON):   # Empty return
        self.emitByte(OpCode.Nil)
        self.emitByte(OpCode.Return)
    else:
        self.expression()
        self.parser.consume(TokenType.SEMICOLON, "missing semicolon after return statement")
        self.emitByte(OpCode.Return)


proc statement(self: Compiler) =
    ## Parses statements
    if self.parser.match(TokenType.FOR):
        self.forStatement()
    elif self.parser.match(TokenType.IF):
        self.ifStatement()
    elif self.parser.match(TokenType.WHILE):
        self.whileStatement()
    elif self.parser.match(TokenType.RETURN):
        self.returnStatement()
    elif self.parser.match(TokenType.CONTINUE):
        self.continueStatement()
    elif self.parser.match(TokenType.BREAK):
        self.parseBreak()
    elif self.parser.match(TokenType.DEL):
        self.delStatement()
    elif self.parser.match(TokenType.LB):
        self.beginScope()
        self.parseBlock()
        self.endScope()
    else:
        self.expressionStatement()


proc declaration(self: Compiler) =
    ## Parses declarations
    # TODO -> Fix this
#    if self.afterReturn:
 #       self.compileError("dead code after return statement")
  #      self.parser.tokens.append(Token(kind: TokenType.EOF, lexeme: ""))
    if self.parser.match(FUN):
        self.funDeclaration()
    elif self.parser.match(VAR):
        self.varDeclaration()
    else:
        self.statement()


proc freeCompiler*(self: Compiler) =
    ## Frees all the allocated objects
    ## from the compiler
    when DEBUG_TRACE_ALLOCATION:
        var objCount = len(self.objects)
        var objFreed = 0
    for obj in reversed(self.objects):
        freeObject(obj)
        discard self.objects.pop()
        when DEBUG_TRACE_ALLOCATION:
            objFreed += 1
    when DEBUG_TRACE_ALLOCATION:
        echo &"DEBUG - Compiler: Freed {objFreed} objects out of {objCount} compile-time objects"


# The array of all parse rules.
# This array instructs our Pratt parser on how
# to parse expressions and statements.
# makeRule defines rules for unary and binary
# operators as well as the token's precedence
var rules: array[TokenType, ParseRule] = [
    makeRule(nil, binary, Precedence.Term), # PLUS
    makeRule(unary, binary, Precedence.Term), # MINUS
    makeRule(nil, binary, Precedence.Factor), # SLASH
    makeRule(nil, binary, Precedence.Factor), # STAR
    makeRule(unary, nil, Precedence.None), # NEG
    makeRule(nil, binary, Precedence.Eq), # NE
    makeRule(nil, nil, Precedence.None), # EQ
    makeRule(nil, binary, Precedence.Comp), # DEQ
    makeRule(nil, binary, Precedence.Comp), # LT
    makeRule(nil, binary, Precedence.Comp), # GE
    makeRule(nil, binary, Precedence.Comp), # LE
    makeRule(nil, binary, Precedence.Factor), # MOD
    makeRule(nil, binary, Precedence.Exp), # POW
    makeRule(nil, binary, Precedence.Comp), # GT
    makeRule(grouping, call, Precedence.Call), # LP
    makeRule(nil, nil, Precedence.None), # RP
    makeRule(nil, bracket, Precedence.Call), # LS
    makeRule(nil, nil, Precedence.None), # LB
    makeRule(nil, nil, Precedence.None), # RB
    makeRule(nil, nil, Precedence.None), # COMMA
    makeRule(nil, nil, Precedence.None), # DOT
    makeRule(variable, nil, Precedence.None), # ID
    makeRule(nil, nil, Precedence.None), # RS
    makeRule(number, nil, Precedence.None), # NUMBER
    makeRule(strVal, nil, Precedence.None), # STR
    makeRule(nil, nil, Precedence.None), # SEMICOLON
    makeRule(nil, parseAnd, Precedence.And), # AND
    makeRule(nil, nil, Precedence.None), # CLASS
    makeRule(nil, nil, Precedence.None), # ELSE
    makeRule(nil, nil, Precedence.None), # FOR
    makeRule(nil, nil, Precedence.None), # FUN
    makeRule(literal, nil, Precedence.None), # FALSE
    makeRule(nil, nil, Precedence.None), # IF
    makeRule(literal, nil, Precedence.None), # NIL
    makeRule(nil, nil, Precedence.None), # RETURN
    makeRule(nil, nil, Precedence.None), # SUPER
    makeRule(nil, nil, Precedence.None), # THIS
    makeRule(nil, parseOr, Precedence.Or), # OR
    makeRule(literal, nil, Precedence.None), # TRUE
    makeRule(nil, nil, Precedence.None), # VAR
    makeRule(nil, nil, Precedence.None), # WHILE
    makeRule(nil, nil, Precedence.None), # DEL
    makeRule(nil, nil, Precedence.None), # BREAK
    makeRule(nil, nil, Precedence.None), # EOF
    makeRule(nil, nil, Precedence.None), # COLON
    makeRule(nil, nil, Precedence.None), # CONTINUE
    makeRule(nil, binary, Precedence.Term), # CARET
    makeRule(nil, binary, Precedence.Term), # SHL
    makeRule(nil, binary, Precedence.Term), # SHR
    makeRule(literal, nil, Precedence.Term), # INF
    makeRule(literal, nil, Precedence.Term), # NAN
    makeRule(nil, binary, Precedence.Term), # BAND
    makeRule(nil, binary, Precedence.Term), # BOR
    makeRule(unary, nil, Precedence.None), # TILDE
    makeRule(nil, binary, Precedence.Is),   # IS
    makeRule(nil, binary, Precedence.As),   # AS
    makeRule(parseLambda, nil, Precedence.None), # LAMBDA
    makeRule(nil, binary, Precedence.Is),   # ISNOT

]


proc getRule(kind: TokenType): ParseRule =
    ## Returns an appropriate precedence rule
    ## object for a given token type
    result = rules[kind]


proc compile*(self: Compiler, source: string): ptr Function =
    ## Compiles a source string into a function
    ## object. This wires up all the code
    ## inside the parser and the lexer
    var scanner = initLexer(source, self.file.toStr())
    var tokens = scanner.lex()
    if not scanner.errored:
        self.parser = initParser(tokens, self.file.toStr())
        while not self.parser.match(EOF):
            self.declaration()
        var function = self.endCompiler()
        if not self.parser.hadError:
            when DEBUG_TRACE_COMPILER:
                echo "DEBUG - Compiler: Result -> Ok"
            return function
        else:
            when DEBUG_TRACE_COMPILER:
                echo "DEBUG - Compiler: Result -> ParseError"
            return nil
    else:
        when DEBUG_TRACE_COMPILER:
            echo "DEBUG - Compiler: Result -> LexingError"
        return nil


proc initParser*(tokens: seq[Token], file: string): Parser =
    ## Initializes a new Parser obvject and returns a reference
    ## to it
    # TODO -> Make the parser independent of the compiler. As
    # of now, the compiler is what drives the parser and while
    # that might be easier for us it is not an ideal design.
    # We'll have to devise a standard interface for other people
    # to try and hook their parsers into JAPL with ease (pretty
    # much like our lexer now has the sole requirement of
    # having a lex() procedure that returns a list of tokens)
    result = Parser(current: 0, tokens: newArrayList[Token](), hadError: false, panicMode: false, file: file.asStr())
    result.tokens.extend[:Token](tokens)


proc initCompiler*(context: FunctionType, enclosing: Compiler = nil, parser: Parser = initParser(@[], ""), file: string): Compiler =
    ## Initializes a new Compiler object and returns a reference
    ## to it
    result = new(Compiler)
    result.parser = parser
    result.function = nil   # Garbage collection paranoia
    result.locals = @[]
    result.scopeDepth = 0
    result.localCount = 0
    result.loop = Loop(alive: false, loopEnd: -1)
    result.objects = newArrayList[ptr Obj]()
    result.context = context
    result.enclosing = enclosing
    result.file = file.asStr()
    result.objects.append(result.file)
    result.parser.file = result.file
    result.locals.add(Local(depth: 0, name: Token(kind: EOF, lexeme: "")))
    inc(result.localCount)
    result.afterReturn = false
    case context:
        of FunctionType.Func:
            result.function = result.markObject(newFunction(enclosing.parser.previous().lexeme, newChunk()))
        of FunctionType.Lambda:
            result.function = result.markObject(newLambda(newChunk()))
        else:  # Script
            result.function = result.markObject(newFunction("", newChunk()))
            result.function.name = nil


# This way the compiler can be executed on its own
# without the VM
when isMainModule:
    echo "JAPL Compiler REPL"
    while true:
        try:
            var compiler: Compiler = initCompiler(SCRIPT, file="test")
            stdout.write("=> ")
            var compiled = compiler.compile(stdin.readLine())
            if compiled != nil:
                disassembleChunk(compiled.chunk, "test")
        except IOError:
            echo ""
            break