peon-rewrite/src/backend/bytecode/codegen/generator.nim

import frontend/compiler/typechecker
import backend/bytecode/opcodes
import backend/bytecode/tooling/multibyte
import errors


import std/strutils
import std/parseutils
import std/tables
import std/strformat


type

    FunctionWrapper = ref object
        ## A wrapper around a typed function
        ## declaration. This is necessary to
        ## carry bytecode-specific information
        ## regarding this function along with
        ## the typed declaration itself
        decl: TypedFunDecl
        location: int

    BytecodeGenerator* = ref object
        ## A bytecode generator

        # The piece of code we compile into
        chunk: Chunk
        # The current size of the call
        # stack (which is always known
        # statically)
        stackSize: int
        # Stores the position of all jumps
        jumps: seq[tuple[patched: bool, offset: int]]
        # Metadata regarding function locations (used to construct
        # the debugging fields in the resulting bytecode)
        functions: seq[tuple[start, stop, pos: int, fn: Name]]
        # Used for error reporting
        currentFile: string
        currentNode: TypedNode
        # The typechecker used to validate the peon code we're generating
        # bytecode for
        typeChecker: TypeChecker


proc newBytecodeGenerator*: BytecodeGenerator =
    ## Initializes a new, blank bytecode
    ## generator
    result = BytecodeGenerator()


proc generateExpression(self: BytecodeGenerator, expression: TypedExpr)


proc error(self: BytecodeGenerator, msg: string, typedNode: TypedNode = nil)  =
    ## Raises a generic peon exception
    var typedNode = typedNode
    var file = self.currentFile
    if typedNode.isNil():
        typedNode = self.currentNode
    if file == "" and typedNode.node.isDecl():
        file = TypedDecl(typedNode).name.owner.ident.token.lexeme
    raise CodeGenError(msg: msg, line: typedNode.node.token.line, file: file)


proc emitByte(self: BytecodeGenerator, byt: OpCode | uint8, line: int) {.inline.} =
    ## Emits a single byte, writing it to
    ## the current chunk being compiled
    self.chunk.write(uint8(byt), line)


proc emitBytes(self: BytecodeGenerator, bytarr: openarray[OpCode | uint8], line: int) {.inline.} =
    ## Handy helper method to write arbitrary bytes into
    ## the current chunk, calling emitByte on each of its
    ## elements
    for b in bytarr:
        self.emitByte(b, line)


proc makeConstant(self: BytecodeGenerator, value: TypedExpr): array[3, uint8] =
    ## Adds a constant to the current chunk's constant table
    ## and returns its index as a 3-byte array of uint8s
    var lit: string
    if value.kind.kind == Integer:
        lit = value.node.token.lexeme
        if lit.contains("'"):
            var idx = lit.high()
            while lit[idx] != '\'':
                lit = lit[0..^2]
                dec(idx)
            lit = lit[0..^2]
    case value.kind.kind:
        of Integer:
            case value.kind.size:
                of Tiny:
                    result = self.chunk.writeConstant([uint8(parseInt(lit))])
                of Short:
                    result = self.chunk.writeConstant(parseInt(lit).toDouble())
                of Long:
                    result = self.chunk.writeConstant(parseInt(lit).toQuad())
                of LongLong:
                    if not value.kind.signed:
                        result = self.chunk.writeConstant(parseInt(lit).toLong())
                    else:
                        result = self.chunk.writeConstant(parseBiggestUInt(lit).toLong())
        of String:
            result = self.chunk.writeConstant(value.node.token.lexeme[1..^1].toBytes())
        of Float:
            case value.kind.width:
                of Half:
                    var f: float = 0.0
                    discard parseFloat(value.node.token.lexeme, f)
                    result = self.chunk.writeConstant(cast[array[4, uint8]](float32(f)))
                of Full:
                    var f: float = 0.0
                    discard parseFloat(value.node.token.lexeme, f)
                    result = self.chunk.writeConstant(cast[array[8, uint8]](f))
        else:
            discard


proc emitConstant(self: BytecodeGenerator, expression: TypedExpr) =
    ## Emits a constant instruction along
    ## with its operand
    let
        typ = expression.kind
        node = expression.node
    case typ.kind:
        of Integer:
            case typ.size:
                of LongLong:
                    if typ.signed:
                        self.emitByte(LoadInt64, node.token.line)
                    else:
                        self.emitByte(LoadUInt64, node.token.line)
                of Long:
                    if typ.signed:
                        self.emitByte(LoadInt32, node.token.line)
                    else:
                        self.emitByte(LoadUInt32, node.token.line)
                of Short:
                    if typ.signed:
                        self.emitByte(LoadInt16, node.token.line)
                    else:
                        self.emitByte(LoadUInt16, node.token.line)
                of Tiny:
                    if typ.signed:
                        self.emitByte(LoadInt8, node.token.line)
                    else:
                        self.emitByte(LoadUInt8, node.token.line)
        of String:
            self.emitByte(LoadString, node.token.line)
            let str = LiteralExpr(node).literal.lexeme
            if str.len() >= 16777216:
                self.error("string constants cannot be larger than 16777215 bytes", expression)
            self.emitBytes((str.len() - 2).toTriple(), node.token.line)
        of Float:
            case typ.width:
                of Half:
                    self.emitByte(LoadFloat32, node.token.line)
                of Full:
                    self.emitByte(LoadFloat64, node.token.line)
        else:
            discard # TODO
    self.emitBytes(self.makeConstant(expression), node.token.line)


proc setJump(self: BytecodeGenerator, offset: int, jmp: array[3, uint8]) =
    ## Sets a jump at the given
    ## offset to the given value
    self.chunk.code[offset + 1] = jmp[0]
    self.chunk.code[offset + 2] = jmp[1]
    self.chunk.code[offset + 3] = jmp[2]


proc setJump(self: BytecodeGenerator, offset: int, jmp: seq[uint8]) =
    ## Sets a jump at the given
    ## offset to the given value
    self.chunk.code[offset + 1] = jmp[0]
    self.chunk.code[offset + 2] = jmp[1]
    self.chunk.code[offset + 3] = jmp[2]


proc emitJump(self: BytecodeGenerator, opcode: OpCode, line: int): int =
    ## Emits a dummy jump offset to be patched later
    ## and returns a unique identifier for that jump
    ## to be passed to patchJump
    self.emitByte(opcode, line)
    self.jumps.add((patched: false, offset: self.chunk.code.high()))
    self.emitBytes(0.toTriple(), line)
    result = self.jumps.high()


proc patchJump(self: BytecodeGenerator, offset: int) =
    ## Patches a previously emitted relative
    ## jump using emitJump
    var jump: int = self.chunk.code.len() - self.jumps[offset].offset
    if jump < 0:
        self.error("jump size cannot be negative (This is an internal error and most likely a bug)")
    if jump > 16777215:
        # TODO: Emit consecutive jumps using insertAt
        self.error("cannot jump more than 16777215 instructions")
    if jump > 0:
        self.setJump(self.jumps[offset].offset, (jump - 4).toTriple())
        self.jumps[offset].patched = true


proc handleBuiltinFunction(self: BytecodeGenerator, fn: FunctionWrapper, args: seq[TypedExpr], line: int) =
    ## Emits instructions for builtin functions
    ## such as addition or subtraction
    var builtinOp: string
    for pragma in FunDecl(fn.decl.node).pragmas:
        if pragma.name.token.lexeme == "magic":
            builtinOp = pragma.args[0].token.lexeme
    if builtinOp notin ["LogicalOr", "LogicalAnd"]:
        if len(args) == 2:
            self.generateExpression(args[1])
            self.generateExpression(args[0])
        elif len(args) == 1:
            self.generateExpression(args[0])
    const codes: Table[string, OpCode] = {"Negate": Negate,
                                          "NegateFloat32": NegateFloat32,
                                          "NegateFloat64": NegateFloat64,
                                          "Add": Add,
                                          "Subtract": Subtract,
                                          "Divide": Divide,
                                          "Multiply": Multiply,
                                          "SignedDivide": SignedDivide,
                                          "AddFloat64": AddFloat64,
                                          "SubtractFloat64": SubtractFloat64,
                                          "DivideFloat64": DivideFloat64,
                                          "MultiplyFloat64": MultiplyFloat64,
                                          "AddFloat32": AddFloat32,
                                          "SubtractFloat32": SubtractFloat32,
                                          "DivideFloat32": DivideFloat32,
                                          "MultiplyFloat32": MultiplyFloat32,
                                          "Pow": Pow,
                                          "SignedPow": SignedPow,
                                          "PowFloat32": PowFloat32,
                                          "PowFloat64": PowFloat64,
                                          "Mod": Mod,
                                          "SignedMod": SignedMod,
                                          "ModFloat32": ModFloat32,
                                          "ModFloat64": ModFloat64,
                                          "Or": Or,
                                          "And": And,
                                          "Xor": Xor,
                                          "Not": Not,
                                          "LShift": LShift,
                                          "RShift": RShift,
                                          "Equal": Equal,
                                          "NotEqual": NotEqual,
                                          "LessThan": LessThan,
                                          "GreaterThan": GreaterThan,
                                          "LessOrEqual": LessOrEqual,
                                          "GreaterOrEqual": GreaterOrEqual,
                                          "SignedLessThan": SignedLessThan,
                                          "SignedGreaterThan": SignedGreaterThan,
                                          "SignedLessOrEqual": SignedLessOrEqual,
                                          "SignedGreaterOrEqual": SignedGreaterOrEqual,
                                          "Float32LessThan": Float32LessThan,
                                          "Float32GreaterThan": Float32GreaterThan,
                                          "Float32LessOrEqual": Float32LessOrEqual,
                                          "Float32GreaterOrEqual": Float32GreaterOrEqual,
                                          "Float64LessThan": Float64LessThan,
                                          "Float64GreaterThan": Float64GreaterThan,
                                          "Float64LessOrEqual": Float64LessOrEqual,
                                          "Float64GreaterOrEqual": Float64GreaterOrEqual,
                                          "PrintString": PrintString,
                                          "SysClock64": SysClock64,
                                          "LogicalNot": LogicalNot,
                                          "NegInf": LoadNInf,
                                          "Identity": Identity
                                          }.toTable()
    if builtinOp == "print":
        let typ = args[0].kind
        case typ.kind:
            of Integer:
                case typ.size:
                    of LongLong:
                        if typ.signed:
                            self.emitByte(PrintInt64, line)
                        else:
                            self.emitByte(PrintUInt64, line)
                    of Long:
                        if typ.signed:
                            self.emitByte(PrintInt32, line)
                        else:
                            self.emitByte(PrintUInt32, line)
                    of Short:
                        if typ.signed:
                            self.emitByte(PrintInt16, line)
                        else:
                            self.emitByte(PrintUInt16, line)
                    of Tiny:
                        if typ.signed:
                            self.emitByte(PrintInt8, line)
                        else:
                            self.emitByte(PrintUInt8, line)
            of Float:
                case typ.width:
                    of Full:
                        self.emitByte(PrintFloat64, line)
                    of Half:
                        self.emitByte(PrintFloat32, line)
            of String:
                self.emitByte(PrintString, line)
            of Boolean:
                self.emitByte(PrintBool, line)
            of TypeKind.Nan:
                self.emitByte(PrintNan, line)
            of TypeKind.Infinity:
                self.emitByte(PrintInf, line)
            of Function:
                self.emitByte(LoadString, line)
                var loc: string = fn.location.toHex()
                while loc[0] == '0' and loc.len() > 1:
                    loc = loc[1..^1]
                var str: string
                if typ.isLambda:
                    str = &"anonymous function at 0x{loc}"
                else:
                    str = &"function '{FunDecl(fn.decl.node).name.token.lexeme}' at 0x{loc}"
                self.emitBytes(str.len().toTriple(), line)
                self.emitBytes(self.chunk.writeConstant(str.toBytes()), line)
                self.emitByte(PrintString, line)
            else:
                self.error(&"invalid type {self.typechecker.stringify(typ)} for built-in 'print'", args[0])
        return
    if builtinOp in codes:
        self.emitByte(codes[builtinOp], line)
        return
    # Some builtin operations are slightly more complex
    # so we handle them separately
    case builtinOp:
        of "LogicalOr":
            self.generateExpression(args[0])
            let jump = self.emitJump(JumpIfTrue, line)
            self.generateExpression(args[1])
            self.patchJump(jump)
        of "LogicalAnd":
            self.generateExpression(args[0])
            let jump = self.emitJump(JumpIfFalseOrPop, line)
            self.generateExpression(args[1])
            self.patchJump(jump)
        of "cast":
            # Type casts are a merely compile-time construct:
            # they don't produce any code at runtime because
            # the underlying data representation does not change!
            # The only reason why there's a "cast" pragma is to
            # make it so that the peon stub can have no body
            discard
        else:
            self.error(&"unknown built-in: '{builtinOp}'")


proc patchReturnAddress(self: BytecodeGenerator, pos: int) =
    ## Patches the return address of a function
    ## call
    let address = self.chunk.code.len().toLong()
    self.chunk.consts[pos] = address[0]
    self.chunk.consts[pos + 1] = address[1]
    self.chunk.consts[pos + 2] = address[2]
    self.chunk.consts[pos + 3] = address[3]
    self.chunk.consts[pos + 4] = address[4]
    self.chunk.consts[pos + 5] = address[5]
    self.chunk.consts[pos + 6] = address[6]
    self.chunk.consts[pos + 7] = address[7]


proc generateLiteral(self: BytecodeGenerator, literal: TypedExpr) =
    ## Emits code for literals
    let
        typ = literal.kind
        node = literal.node
    case typ.kind:
        of Integer, Float:
            # No need to do any input validation here: the typechecker
            # has graciously done all the work for us! :)
            self.emitConstant(literal)
        of Infinity:
            if typ.positive:
                self.emitByte(LoadInf, node.token.line)
            else:
                self.emitByte(LoadNInf, node.token.line)
        of NaN:
           self.emitByte(LoadNaN, node.token.line)
        else:
            self.error(&"Unknown typed node of type {node.kind} at generateLiteral()")


proc generateUnary(self: BytecodeGenerator, expression: TypedExpr) =
    ## Emits code for unary expressions
    echo expression[]


proc generateExpression(self: BytecodeGenerator, expression: TypedExpr) =
    ## Emits code for expressions
    if expression.node.isConst():
        self.generateLiteral(expression)
    else:
        let node = expression.node
        case node.kind:
            of binaryExpr:
                self.generateUnary(expression)
            of unaryExpr:
                discard
            else:
                self.error(&"Unknown typed node of type {node.kind} at generateExpression()")


proc beginProgram(self: BytecodeGenerator): int =
    ## Emits boilerplate code to set up
    ## a peon program
    self.emitByte(LoadUInt64, 1)
    # The initial jump address is always the same
    self.emitBytes(self.chunk.writeConstant(12.toLong()), 1)
    self.emitByte(LoadUInt64, 1)
    # We emit a dummy return address which is patched later
    self.emitBytes(self.chunk.writeConstant(0.toLong()), 1)
    result = self.chunk.consts.len() - 8
    self.emitByte(Call, 1)
    self.emitBytes(0.toTriple(), 1)


proc endProgram(self: BytecodeGenerator, pos: int) =
    ## Emits boilerplate code to tear down
    ## a peon program
    self.emitByte(OpCode.Return, self.currentNode.node.token.line)
    # Entry point has no return value
    self.emitByte(0, self.currentNode.node.token.line)
    # Patch the return address now that we know the boundaries
    # of the function
    self.patchReturnAddress(pos)


proc generate*(self: BytecodeGenerator, compiled: seq[TypedNode], typeChecker: TypeChecker): Chunk =
    ## Turn the given compilation output
    ## into a bytecode chunk
    self.chunk = newChunk()
    self.typeChecker = typeChecker
    let offset = self.beginProgram()
    for typedNode in compiled:
        self.currentNode = typedNode
        let currentFile = self.currentFile
        if self.currentNode.node.isDecl():
            self.currentFile = TypedDecl(typedNode).name.module.ident.token.lexeme
        case typedNode.node.kind:
            of exprStmt:
                self.generateExpression(TypedExprStmt(typedNode).expression)
                self.emitByte(Pop, typedNode.node.token.line)
            else:
                self.error(&"Unknown typed node of type {typedNode.node.kind} at generate()")
        self.currentFile = currentFile
    self.endProgram(offset)
    result = self.chunk