japl/src/compiler.nim

1134 lines
41 KiB
Nim

# Copyright 2020 Mattia Giambirtone
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
## The JAPL bytecode compiler
import strutils
import algorithm
import strformat
import lexer
import meta/opcode
import meta/token
import meta/looptype
import types/jobject
import types/jstring
import types/function
import tables
when isMainModule:
import util/debug
type
Compiler* = object
## The state of the compiler
enclosing*: ref Compiler
function*: ptr Function
context*: FunctionType
locals*: seq[Local]
localCount*: int
scopeDepth*: int
parser*: Parser
loop*: Loop
objects*: seq[ptr Obj]
file*: string
Local* = ref object # A local variable
name*: Token
depth*: int
Parser* = ref object # A Parser object
current*: int
tokens*: seq[Token]
hadError*: bool
panicMode*: bool
file*: string
Precedence {.pure.} = enum
None,
Assignment,
Or,
And,
Equality,
Comparison,
Term,
Factor,
Unary,
Call,
Primary
ParseFn = proc(self: ref Compiler, canAssign: bool): void
ParseRule = ref object
prefix, infix: ParseFn
precedence: Precedence
proc makeRule(prefix, infix: ParseFn, precedence: Precedence): ParseRule =
## Creates a new rule for parsing
return ParseRule(prefix: prefix, infix: infix, precedence: precedence)
proc advance(self: var Parser): Token =
## Steps forward by one in the tokens' list and
## increments the current token index
result = self.tokens[self.current]
inc(self.current)
proc peek(self: Parser): Token =
## Returns the current token without consuming it
return self.tokens[self.current]
proc previous(self: Parser): Token =
## Returns the previously consumed token
return self.tokens[self.current - 1]
proc check(self: Parser, kind: TokenType): bool =
## Checks if the current token is of the expected type
## without consuming it
return self.peek().kind == kind
proc match(self: var Parser, kind: TokenType): bool =
## Calls self.check() and consumes a token if the expected
## token type is encountered, in which case true
## is returned. False is returned otherwise
if not self.check(kind): return false
discard self.advance()
return true
proc parseError(self: var Parser, token: Token, message: string) =
## Notifies the user about parsing errors, writing them to
## the standard error file. This parser is designed to report
## all syntatical errors inside a file in one go, rather than
## stopping at the first error occurrence. This allows a user
## to identify and fix multiple errors without running the parser
## multiple times
if self.panicMode: # This serves to identify wheter an error already occurred, in which case we return
return
self.panicMode = true
self.hadError = true
stderr.write(&"A fatal error occurred while parsing '{self.file}', line {token.line}, at '{token.lexeme}' -> {message}\n")
proc consume(self: var Parser, expected: TokenType, message: string) =
## Attempts to consume a token if it is of the expected type
## or raises a parsing error with the given message otherwise
if self.check(expected):
discard self.advance()
return
self.parseError(self.peek(), message)
proc currentChunk(self: ref Compiler): var Chunk =
## Returns the current chunk being compiled
result = self.function.chunk
proc compileError(self: ref Compiler, message: string) =
## Notifies the user about an error occurred during
## compilation, writing to the standard error file
stderr.write(&"A fatal error occurred while compiling '{self.file}', line {self.parser.peek().line}, at '{self.parser.peek().lexeme}' -> {message}\n")
self.parser.hadError = true
self.parser.panicMode = true
proc emitByte(self: ref Compiler, byt: OpCode|uint8) =
## Emits a single bytecode instruction and writes it
## to the current chunk being compiled
self.currentChunk.writeChunk(uint8 byt, self.parser.previous.line)
proc emitBytes(self: ref Compiler, byt1: OpCode|uint8, byt2: OpCode|uint8) =
## Emits multiple bytes instead of a single one, this is useful
## to emit operators along with their operands or for multi-byte
## instructions that are longer than one byte
self.emitByte(uint8 byt1)
self.emitByte(uint8 byt2)
proc emitBytes(self: ref Compiler, bytarr: array[3, uint8]) =
## Handy helper method to write an array of 3 bytes into
## the current chunk, calling emiteByte(s) on each of its
## elements
self.emitBytes(bytarr[0], bytarr[1])
self.emitByte(bytarr[2])
proc makeConstant(self: ref Compiler, obj: ptr Obj): uint8 =
## Adds a constant (literal) to the current chunk's
## constants table
result = uint8 self.currentChunk.addConstant(obj)
proc makeLongConstant(self: ref Compiler, val: ptr Obj): array[3, uint8] =
## Does the same as makeConstant(), but encodes the index in the
## chunk's constant table as an array (which is later reconstructed
## into an integer at runtime) to store more than 256 constants in the table
result = self.currentChunk.writeConstant(val)
proc emitConstant(self: ref Compiler, obj: ptr Obj) =
## Emits a Constant or ConstantLong instruction along
## with its operand
if self.currentChunk().consts.len > 255:
self.emitByte(OpCode.ConstantLong)
self.emitBytes(self.makeLongConstant(obj))
else:
self.emitBytes(OpCode.Constant, self.makeConstant(obj))
proc initParser*(tokens: seq[Token], file: string): Parser
proc getRule(kind: TokenType): ParseRule # Forward declarations for later use
proc statement(self: ref Compiler)
proc declaration(self: ref Compiler)
proc initCompiler*(context: FunctionType, enclosing: ref Compiler = nil, parser: Parser = initParser(@[], ""), file: string): ref Compiler
proc endCompiler(self: ref Compiler): ptr Function =
## Ends the current compiler instance and returns its
## compiled bytecode wrapped around a function object,
## also emitting a return instruction with nil as operand.
## Because of this, all functions implicitly return nil
## if no return statement is supplied
self.emitByte(OpCode.Nil)
self.emitByte(OpCode.Return)
return self.function
proc parsePrecedence(self: ref Compiler, precedence: Precedence) =
## Parses expressions using pratt's elegant algorithm to precedence parsing
discard self.parser.advance()
var prefixRule = getRule(self.parser.previous.kind).prefix
if prefixRule == nil: # If there is no prefix rule than an expression is expected
self.parser.parseError(self.parser.previous, "Expecting expression")
return
var canAssign = precedence <= Precedence.Assignment # This is used to detect invalid assignment targets
# such as "hello" = 3;
self.prefixRule(canAssign) # otherwise call the prefix rule (e.g. for binary negation)
if self.parser.previous.kind == EOF:
self.parser.current -= 1 # If we're at EOF, we bail out and restore the EOF terminator so that
# the parser behaves accordingly later on
return
while precedence <= (getRule(self.parser.peek.kind).precedence): # This will parse all expressions with the same precedence
# or lower to the current expression
var infixRule = getRule(self.parser.advance.kind).infix
if self.parser.peek().kind != EOF:
self.infixRule(canAssign)
else:
self.parser.parseError(self.parser.previous, "Expecting expression, got EOF")
if canAssign and self.parser.match(EQ):
self.parser.parseError(self.parser.peek, "Invalid assignment target")
proc expression(self: ref Compiler) =
## Parses expressions
self.parsePrecedence(Precedence.Assignment) # The highest-level expression is assignment
proc binary(self: ref Compiler, canAssign: bool) =
## Parses binary operators
var operator = self.parser.previous().kind
var rule = getRule(operator)
self.parsePrecedence(Precedence((int rule.precedence) + 1))
case operator:
of TokenType.PLUS:
self.emitByte(OpCode.Add)
of TokenType.MINUS:
self.emitByte(OpCode.Subtract)
of TokenType.SLASH:
self.emitByte(OpCode.Divide)
of TokenType.STAR:
self.emitByte(OpCode.Multiply)
of TokenType.MOD:
self.emitByte(OpCode.Mod)
of TokenType.POW:
self.emitByte(OpCode.Pow)
of TokenType.NE:
self.emitBytes(OpCode.Equal, OpCode.Not)
of TokenType.DEQ:
self.emitByte(OpCode.Equal)
of TokenType.GT:
self.emitByte(OpCode.Greater)
of TokenType.GE:
self.emitBytes(OpCode.Less, OpCode.Not)
of TokenType.LT:
self.emitByte(OpCode.Less)
of TokenType.LE:
self.emitBytes(OpCode.Greater, OpCode.Not)
of TokenType.CARET:
self.emitByte(OpCode.Xor)
of TokenType.SHL:
self.emitByte(OpCode.Shl)
of TokenType.SHR:
self.emitByte(OpCode.Shr)
of TokenType.BOR:
self.emitByte(OpCode.Bor)
of TokenType.BAND:
self.emitByte(OpCode.Band)
else:
discard # Unreachable
proc unary(self: ref Compiler, canAssign: bool) =
## Parses unary expressions such as negation or
## binary inversion
var operator = self.parser.previous().kind
if self.parser.peek().kind != EOF:
self.parsePrecedence(Precedence.Unary)
else:
self.parser.parseError(self.parser.previous, "Expecting expression, got EOF")
return
case operator:
of MINUS:
self.emitByte(OpCode.Negate)
of NEG:
self.emitByte(OpCode.Not)
of TILDE:
self.emitByte(OpCode.Bnot)
else:
return
template markObject*(self: ref Compiler, obj: ptr Obj): untyped =
## Marks compile-time objects (since those take up memory as well)
## for the VM to reclaim space later on
let temp = obj
self.objects.add(temp)
temp
proc strVal(self: ref Compiler, canAssign: bool) =
## Parses string literals
var str = self.parser.previous().lexeme
var delimiter = &"{str[0]}" # TODO: Add proper escape sequences support
str = str.unescape(delimiter, delimiter)
self.emitConstant(self.markObject(jstring.newString(str)))
proc bracketAssign(self: ref Compiler, canAssign: bool) =
## Parses assignments such as a[0] = "something"
discard # TODO -> Implement this
proc bracket(self: ref Compiler, canAssign: bool) =
## Parses slice expressions, such as "hello"[0].
## Slice can take up to two arguments, a start
## and an end index in the chosen iterable.
## Both arguments are optional, so doing "hi"[::]
## will basically copy the string into a new object.
## Indexes start from 0, and while the start index is
## inclusive, the end index is not. If an end index is
## not specified like this "hello"[0:], then the it is
## assumed to be the length of the iterable. Likewise,
## if the start index is missing, it is assumed to be 0.
## Like in Python, using an end index that's out of bounds
## will not raise an error. Doing "hello"[0:999] will just
## return the whole string instead
if self.parser.peek.kind == TokenType.COLON:
self.emitByte(OpCode.Nil)
discard self.parser.advance()
if self.parser.peek().kind == TokenType.RS:
self.emitByte(OpCode.Nil)
else:
self.parsePrecedence(Precedence.Term)
self.emitByte(OpCode.SliceRange)
else:
self.parsePrecedence(Precedence.Term)
if self.parser.peek().kind == TokenType.RS:
self.emitByte(OpCode.Slice)
elif self.parser.peek().kind == TokenType.COLON:
discard self.parser.advance()
if self.parser.peek().kind == TokenType.RS:
self.emitByte(OpCode.Nil)
else:
self.parsePrecedence(Precedence.Term)
self.emitByte(OpCode.SliceRange)
if self.parser.peek().kind == TokenType.EQ:
discard self.parser.advance()
self.parsePrecedence(Precedence.Term)
self.parser.consume(TokenType.RS, "Expecting ']' after slice expression")
proc literal(self: ref Compiler, canAssign: bool) =
## Parses literal values such as true, nan and inf
case self.parser.previous().kind:
of TokenType.TRUE:
self.emitByte(OpCode.True)
of TokenType.FALSE:
self.emitByte(OpCode.False)
of TokenType.NIL:
self.emitByte(OpCode.Nil)
of TokenType.INF:
self.emitByte(OpCode.Inf)
of TokenType.NAN:
self.emitByte(OpCode.Nan)
else:
discard # Unreachable
proc number(self: ref Compiler, canAssign: bool) =
## Parses numerical constants
var value = self.parser.previous().lexeme
var obj: ptr Obj
try:
if "." in value:
obj = parseFloat(value).asFloat()
else:
obj = parseInt(value).asInt()
except OverflowError:
self.compileError("number literal is too big")
self.emitConstant(obj)
proc grouping(self: ref Compiler, canAssign: bool) =
## Parses parenthesized expressions. The only interesting
## semantic about parentheses is that they allow lower-precedence
## expressions where a higher precedence one is expected
if self.parser.match(TokenType.EOF):
self.parser.parseError(self.parser.previous, "Expecting ')'")
elif self.parser.match(RP):
self.emitByte(OpCode.Nil)
else:
self.expression()
self.parser.consume(TokenType.RP, "Expecting ')' after parentheszed expression")
proc synchronize(self: ref Compiler) =
## Synchronizes the parser's state. This is useful when
## dealing with parsing errors. When an error occurs, we
## note it with our nice panicMode and hadError fields, but
## that in itself doesn't allow the parser to go forward
## in the code and report other possible errors. On the
## other hand, attempting to start parsing the source
## right after an error has occurred could lead to a
## cascade of unhelpful error messages that complicate
## debugging issues. So, when an error occurs, we try
## to get back into a state that at least allows us to keep
## parsing and pretend the error never happened (the code
## would not be compiled anyway so we might as well tell the
## user if anything else is wrong with their code). The parser
## will skip to the next valid token for a statement, like an
## if or a for loop or a class declaration, and then keep
## parsing from there. Note that hadError is never reset, but
## panidMode is
self.parser.panicMode = false
while self.parser.peek().kind != TokenType.EOF: # Infinite loops are bad, so we must take EOF into account
if self.parser.previous().kind == TokenType.SEMICOLON:
return
case self.parser.peek().kind:
of TokenType.CLASS, TokenType.FUN, TokenType.VAR,
TokenType.FOR, TokenType.IF, TokenType.WHILE,
TokenType.RETURN: # We found a statement boundary, so the parser bails out
return
else:
discard
discard self.parser.advance()
proc identifierConstant(self: ref Compiler, tok: Token): uint8 =
## Emits instructions for identifiers
return self.makeConstant(self.markObject(jstring.newString(tok.lexeme)))
proc identifierLongConstant(self: ref Compiler, tok: Token): array[3, uint8] =
## Same as identifierConstant, but this is used when the constant table is longer
## than 255 elements
return self.makeLongConstant(self.markObject(jstring.newString(tok.lexeme)))
proc addLocal(self: ref Compiler, name: Token) =
## Stores a local variable. Local name resolution
## happens at compile time rather than runtime,
## unlike global variables which are treated differently.
## Note that at first, a local is in a special "uninitialized"
## state, this is useful to detect errors such as var a = a;
## inside local scopes
var local = Local(name: name, depth: -1)
inc(self.localCount)
self.locals.add(local)
proc declareVariable(self: ref Compiler) =
## Declares a variable, this is only useful
## for local variables, there is no way to
## "declare" a global at compile time. This
## assumption works because locals
## and temporaries have stack semantics inside
## local scopes
if self.scopeDepth == 0:
return
var name = self.parser.previous()
self.addLocal(name)
proc parseVariable(self: ref Compiler, message: string): uint8 =
## Parses variables and declares them
self.parser.consume(TokenType.ID, message)
self.declareVariable()
if self.scopeDepth > 0:
return uint8 0
return self.identifierConstant(self.parser.previous())
proc parseLongVariable(self: ref Compiler, message: string): array[3, uint8] =
## Parses variables and declares them. This is used in place
## of parseVariable when there's more than 255 constants
## in the chunk table
self.parser.consume(TokenType.ID, message)
self.declareVariable()
if self.scopeDepth > 0:
return [uint8 0, uint8 0, uint8 0]
return self.identifierLongConstant(self.parser.previous())
proc markInitialized(self: ref Compiler) =
## Marks the latest defined global as
## initialized and ready for use
if self.scopeDepth == 0:
return
self.locals[self.localCount - 1].depth = self.scopeDepth
proc defineVariable(self: ref Compiler, idx: uint8) =
## Defines a variable, emitting appropriate
## instructions if we're in the local scope
## or marking the last local as initialized
## otherwise
if self.scopeDepth > 0:
self.markInitialized()
return
self.emitBytes(OpCode.DefineGlobal, idx)
proc defineVariable(self: ref Compiler, idx: array[3, uint8]) =
## Same as defineVariable, but this is used when
## there's more than 255 locals in the chunk's table
if self.scopeDepth > 0:
self.markInitialized()
return
self.emitByte(OpCode.DefineGlobal)
self.emitBytes(idx)
proc resolveLocal(self: ref Compiler, name: Token): int =
## Resolves a local variable and catches errors such as
## var a = a
var i = self.localCount - 1
for local in reversed(self.locals):
if local.name.lexeme == name.lexeme:
if local.depth == -1:
self.compileError("cannot read local variable in its own initializer")
return i
i = i - 1
return -1
proc namedVariable(self: ref Compiler, tok: Token, canAssign: bool) =
## Handles local and global variables assignment, as well
## as variable resolution.
var arg = self.resolveLocal(tok)
var
get: OpCode
set: OpCode
if arg != -1:
get = OpCode.GetLocal
set = OpCode.SetLocal
else:
get = OpCode.GetGlobal
set = OpCode.SetGlobal
arg = int self.identifierConstant(tok)
if self.parser.match(EQ) and canAssign:
self.expression()
self.emitBytes(set, uint8 arg)
else:
self.emitBytes(get, uint8 arg)
proc namedLongVariable(self: ref Compiler, tok: Token, canAssign: bool) =
## Handles local and global variables assignment, as well
## as variable resolution. This is only called when the constants
## table's length exceeds 255
var arg = self.resolveLocal(tok)
var casted = cast[array[3, uint8]](arg)
var
get: OpCode
set: OpCode
if arg != -1:
get = OpCode.GetLocal
set = OpCode.SetLocal
else:
get = OpCode.GetGlobal
set = OpCode.SetGlobal
casted = self.identifierLongConstant(tok)
if self.parser.match(EQ) and canAssign:
self.expression()
self.emitByte(set)
self.emitBytes(casted)
else:
self.emitByte(get)
self.emitBytes(casted)
proc variable(self: ref Compiler, canAssign: bool) =
## Emits the code to declare a variable,
## both locally and globally
if self.locals.len < 255:
self.namedVariable(self.parser.previous(), canAssign)
else:
self.namedLongVariable(self.parser.previous(), canAssign)
proc varDeclaration(self: ref Compiler) =
## Parses a variable declaration, taking into account
## the possibility that the chunk table could already
## be bigger than 255 elements
var shortName: uint8
var longName: array[3, uint8]
var useShort: bool = true
if self.currentChunk.consts.len < 255:
shortName = self.parseVariable("Expecting variable name")
else:
useShort = false
longName = self.parseLongVariable("Expecting variable name")
if self.parser.match(EQ):
self.expression()
else:
self.emitByte(OpCode.Nil)
self.parser.consume(TokenType.SEMICOLON, "Missing semicolon after var declaration")
if useShort:
self.defineVariable(shortName)
else:
self.defineVariable(longName)
proc expressionStatement(self: ref Compiler) =
## Parses an expression statement, which is
## an expression followed by a semicolon. It then
## emits a pop instruction
self.expression()
self.parser.consume(TokenType.SEMICOLON, "Missing semicolon after expression")
self.emitByte(OpCode.Pop)
# TODO: This code will not be used right now as it might clash with the future GC, fix this to make it GC aware!
proc deleteVariable(self: ref Compiler, canAssign: bool) =
self.expression()
if self.parser.previous().kind in [TokenType.NUMBER, TokenType.STR]:
self.compileError("cannot delete a literal")
var code: OpCode
if self.scopeDepth == 0:
code = OpCode.DeleteGlobal
else:
code = OpCode.DeleteLocal
self.localCount = self.localCount - 1
if self.currentChunk.consts.len < 255:
var name = self.identifierConstant(self.parser.previous())
self.emitBytes(code, name)
else:
var name = self.identifierLongConstant(self.parser.previous())
self.emitBytes(code, name[0])
self.emitBytes(name[1], name[2])
proc parseBlock(self: ref Compiler) =
## Parses a block statement, which is basically
## a list of other statements
while not self.parser.check(TokenType.RB) and not self.parser.check(TokenType.EOF):
self.declaration()
self.parser.consume(TokenType.RB, "Expecting '}' after block statement")
proc beginScope(self: ref Compiler) =
## Begins a scope by increasing the
## current scope depth. This is literally
## all it takes to create a scope, since the
## only semantically interesting behavior of
## scopes is a change in names resolution
inc(self.scopeDepth)
proc endScope(self: ref Compiler) =
## Ends a scope, popping off any local that
## was created inside it along the way
self.scopeDepth = self.scopeDepth - 1
while self.localCount > 0 and self.locals[self.localCount - 1].depth > self.scopeDepth:
self.emitByte(OpCode.Pop)
self.localCount = self.localCount - 1
proc emitJump(self: ref Compiler, opcode: OpCode): int =
## Emits a jump instruction with a placeholder offset
## that is later patched, check patchJump for more info
## about how jumps work
self.emitByte(opcode)
self.emitByte(0xff)
self.emitByte(0xff)
return self.currentChunk.code.len - 2
proc patchJump(self: ref Compiler, offset: int) =
## Patches a previously emitted jump instruction.
## Since it's impossible to know how much code
## needs to be jumped before compiling the code
## itself, jumps are first encoded with a placeholder
## offset. Then, after the code that has to be jumped
## over has been compiled, its size is known and the
## previously emitted offset is replaced with the actual
## jump size.
## Note that, due to how the language is designed,
## only up to 2^16 bytecode instructions can
## be jumped over, so the size of the if/else conditions
## or loops is limited (hopefully 65 thousands and change
## instructions are enough for everyone)
var jump = self.currentChunk.code.len - offset - 2
if jump > (int uint16.high):
self.compileError("too much code to jump over")
else:
self.currentChunk.code[offset] = uint8 (jump shr 8) and 0xff
self.currentChunk.code[offset + 1] = uint8 jump and 0xff
proc ifStatement(self: ref Compiler) =
## Parses if statements in a C-style fashion
self.parser.consume(TokenType.LP, "The if condition must be parenthesized")
if self.parser.peek.kind != TokenType.EOF:
self.expression()
if self.parser.peek.kind != TokenType.EOF:
self.parser.consume(TokenType.RP, "The if condition must be parenthesized")
if self.parser.peek.kind != TokenType.EOF:
var jump: int = self.emitJump(OpCode.JumpIfFalse)
self.emitByte(OpCode.Pop)
self.statement()
var elseJump = self.emitJump(OpCode.Jump)
self.patchJump(jump)
self.emitByte(OpCode.Pop)
if self.parser.match(TokenType.ELSE):
self.statement()
self.patchJump(elseJump)
else:
self.parser.parseError(self.parser.previous(), "Invalid syntax")
else:
self.parser.parseError(self.parser.previous(), "The if condition must be parenthesized")
proc emitLoop(self: ref Compiler, start: int) =
## Creates a loop and emits related instructions.
self.emitByte(OpCode.Loop)
var offset = self.currentChunk.code.len - start + 2
if offset > (int uint16.high):
self.compileError("loop body is too large")
else:
self.emitByte(uint8 (offset shr 8) and 0xff)
self.emitByte(uint8 offset and 0xff)
proc endLooping(self: ref Compiler) =
## This method is used to make
## the break statement work and patch
## it with a jump instruction
if self.loop.loopEnd != -1:
self.patchJump(self.loop.loopEnd)
self.emitByte(OpCode.Pop)
var i = self.loop.body
while i < self.currentChunk.code.len:
if self.currentChunk.code[i] == uint OpCode.Break:
self.currentChunk.code[i] = uint8 OpCode.Jump
self.patchJump(i + 1)
i += 3
else:
i += 1
self.loop = self.loop.outer
proc whileStatement(self: ref Compiler) =
## Parses while loops in a C-style fashion
var loop = Loop(depth: self.scopeDepth, outer: self.loop, start: self.currentChunk.code.len, alive: true, loopEnd: -1)
self.loop = loop
self.parser.consume(TokenType.LP, "The loop condition must be parenthesized")
if self.parser.peek.kind != TokenType.EOF:
self.expression()
if self.parser.peek.kind != TokenType.EOF:
self.parser.consume(TokenType.RP, "The loop condition must be parenthesized")
if self.parser.peek.kind != TokenType.EOF:
self.loop.loopEnd = self.emitJump(OpCode.JumpIfFalse)
self.emitByte(OpCode.Pop)
self.loop.body = self.currentChunk.code.len
self.statement()
self.emitLoop(self.loop.start)
self.patchJump(self.loop.loopEnd)
self.emitByte(OpCode.Pop)
else:
self.parser.parseError(self.parser.previous(), "Invalid syntax")
else:
self.parser.parseError(self.parser.previous(), "The loop condition must be parenthesized")
self.endLooping()
proc forStatement(self: ref Compiler) =
## Parses for loops in a C-style fashion
self.beginScope()
self.parser.consume(TokenType.LP, "The loop condition must be parenthesized")
if self.parser.peek.kind != TokenType.EOF:
if self.parser.match(TokenType.SEMICOLON):
discard
elif self.parser.match(TokenType.VAR):
self.varDeclaration()
else:
self.expressionStatement()
var loop = Loop(depth: self.scopeDepth, outer: self.loop, start: self.currentChunk.code.len, alive: true, loopEnd: -1)
self.loop = loop
if not self.parser.match(TokenType.SEMICOLON):
self.expression()
if self.parser.previous.kind != TokenType.EOF:
self.parser.consume(TokenType.SEMICOLON, "Expecting ';'")
self.loop.loopEnd = self.emitJump(OpCode.JumpIfFalse)
self.emitByte(OpCode.Pop)
else:
self.parser.current -= 1
self.parser.parseError(self.parser.previous(), "Invalid syntax")
if not self.parser.match(RP):
var bodyJump = self.emitJump(OpCode.Jump)
var incrementStart = self.currentChunk.code.len
if self.parser.peek.kind != TokenType.EOF:
self.expression()
self.emitByte(OpCode.Pop)
self.parser.consume(TokenType.RP, "The loop condition must be parenthesized")
self.emitLoop(self.loop.start)
self.loop.start = incrementStart
self.patchJump(bodyJump)
if self.parser.peek.kind != TokenType.EOF:
self.loop.body = self.currentChunk.code.len
self.statement()
self.emitLoop(self.loop.start)
else:
self.parser.current -= 1
self.parser.parseError(self.parser.previous(), "Invalid syntax")
if self.loop.loopEnd != -1:
self.patchJump(self.loop.loopEnd)
self.emitByte(OpCode.Pop)
else:
self.parser.parseError(self.parser.previous(), "The loop condition must be parenthesized")
self.endLooping()
self.endScope()
proc parseBreak(self: ref Compiler) =
## Parses break statements. A break
## statement causes the current loop
## to exit and jump to its end
if not self.loop.alive:
self.parser.parseError(self.parser.previous, "'break' outside loop")
else:
self.parser.consume(TokenType.SEMICOLON, "missing semicolon after statement")
var i = self.localCount - 1
while i >= 0 and self.locals[i].depth > self.loop.depth:
self.emitByte(OpCode.Pop)
i -= 1
discard self.emitJump(OpCode.Break)
proc parseAnd(self: ref Compiler, canAssign: bool) =
## Parses expressions such as a and b
var jump = self.emitJump(OpCode.JumpIfFalse)
self.emitByte(OpCode.Pop)
self.parsePrecedence(Precedence.And)
self.patchJump(jump)
proc parseOr(self: ref Compiler, canAssign: bool) =
## Parses expressions such as a or b
var elseJump = self.emitJump(OpCode.JumpIfFalse)
var endJump = self.emitJump(OpCode.Jump)
self.patchJump(elseJump)
self.emitByte(OpCode.Pop)
self.parsePrecedence(Precedence.Or)
self.patchJump(endJump)
proc continueStatement(self: ref Compiler) =
## Parses continue statements inside loops.
## The continue statement causes the loop to skip
## to the next iteration
if not self.loop.alive:
self.parser.parseError(self.parser.previous, "'continue' outside loop")
else:
self.parser.consume(TokenType.SEMICOLON, "missing semicolon after statement")
var i = self.localCount - 1
while i >= 0 and self.locals[i].depth > self.loop.depth:
self.emitByte(OpCode.Pop)
i -= 1
self.emitLoop(self.loop.start)
proc parseFunction(self: ref Compiler, funType: FunctionType) =
## Parses function declarations. Functions can have
## keyword arguments (WIP), but once a parameter is declared
## as a keyword one, all subsequent parameters must be
## keyword ones as well
var self = initCompiler(funType, self, self.parser, self.file)
self.beginScope()
self.parser.consume(LP, "Expecting '(' after function name")
if self.parser.hadError:
return
var paramNames: seq[string] = @[]
var defaultFollows: bool = false
if not self.parser.check(RP):
while true:
self.function.arity += 1
if self.function.arity + self.function.optionals > 255:
self.compileError("cannot have more than 255 arguments")
break
var paramIdx = self.parseVariable("expecting parameter name")
if self.parser.hadError:
return
if self.parser.previous.lexeme in paramNames:
self.compileError("duplicate parameter name in function declaration")
return
paramNames.add(self.parser.previous.lexeme)
self.defineVariable(paramIdx)
if self.parser.match(EQ):
if self.parser.peek.kind == EOF:
self.compileError("Unexpected EOF")
return
self.function.arity -= 1
self.function.optionals += 1
self.expression()
# self.function.defaults.add(self.parser.previous.lexeme) # TODO
defaultFollows = true
elif defaultFollows:
self.compileError("non-default argument follows default argument")
return
if not self.parser.match(COMMA):
break
self.parser.consume(RP, "Expecting ')' after parameters")
self.parser.consume(LB, "Expecting '{' before function body")
self.parseBlock()
var fun = self.endCompiler()
self = self.enclosing
if self.currentChunk.consts.len < 255:
self.emitBytes(OpCode.Constant, self.makeConstant(fun))
else:
self.emitByte(OpCode.ConstantLong)
self.emitBytes(self.makeLongConstant(fun))
proc funDeclaration(self: ref Compiler) =
## Parses function declarations and declares
## them in the current scope
var funName = self.parseVariable("expecting function name")
self.markInitialized()
self.parseFunction(FunctionType.FUNC)
self.defineVariable(funName)
proc argumentList(self: ref Compiler): uint8 =
## Parses arguments passed to function calls
result = 0
if not self.parser.check(RP):
while true:
self.expression()
if result == 255:
self.compileError("cannot have more than 255 arguments")
return
result += 1
if not self.parser.match(COMMA):
break
self.parser.consume(RP, "Expecting ')' after arguments")
proc call(self: ref Compiler, canAssign: bool) =
## Emits appropriate bytecode to call
## a function
var argCount = self.argumentList()
self.emitBytes(OpCode.Call, argCount)
proc returnStatement(self: ref Compiler) =
## Parses return statements and emits
## appropriate bytecode instructions
## for them
if self.context == SCRIPT:
self.compileError("'return' outside function")
if self.parser.match(TokenType.SEMICOLON): # Empty return
self.emitByte(OpCode.Nil)
self.emitByte(OpCode.Return)
else:
self.expression()
self.parser.consume(TokenType.SEMICOLON, "missing semicolon after return statement")
self.emitByte(OpCode.Return)
proc statement(self: ref Compiler) =
## Parses statements
if self.parser.match(TokenType.FOR):
self.forStatement()
elif self.parser.match(IF):
self.ifStatement()
elif self.parser.match(TokenType.WHILE):
self.whileStatement()
elif self.parser.match(TokenType.RETURN):
self.returnStatement()
elif self.parser.match(TokenType.CONTINUE):
self.continueStatement()
elif self.parser.match(TokenType.BREAK):
self.parseBreak()
elif self.parser.match(LB):
self.beginScope()
self.parseBlock()
self.endScope()
else:
self.expressionStatement()
proc declaration(self: ref Compiler) =
## Parses declarations
if self.parser.match(FUN):
self.funDeclaration()
elif self.parser.match(VAR):
self.varDeclaration()
else:
self.statement()
if self.parser.panicMode:
self.synchronize()
# The array of all parse rules
var rules: array[TokenType, ParseRule] = [
makeRule(nil, binary, Precedence.Term), # PLUS
makeRule(unary, binary, Precedence.Term), # MINUS
makeRule(nil, binary, Precedence.Factor), # SLASH
makeRule(nil, binary, Precedence.Factor), # STAR
makeRule(unary, nil, Precedence.None), # NEG
makeRule(nil, binary, Precedence.Equality), # NE
makeRule(nil, nil, Precedence.None), # EQ
makeRule(nil, binary, Precedence.Comparison), # DEQ
makeRule(nil, binary, Precedence.Comparison), # LT
makeRule(nil, binary, Precedence.Comparison), # GE
makeRule(nil, binary, Precedence.Comparison), # LE
makeRule(nil, binary, Precedence.Factor), # MOD
makeRule(nil, binary, Precedence.Factor), # POW
makeRule(nil, binary, Precedence.Comparison), # GT
makeRule(grouping, call, Precedence.Call), # LP
makeRule(nil, nil, Precedence.None), # RP
makeRule(nil, bracket, Precedence.Call), # LS
makeRule(nil, nil, Precedence.None), # LB
makeRule(nil, nil, Precedence.None), # RB
makeRule(nil, nil, Precedence.None), # COMMA
makeRule(nil, nil, Precedence.None), # DOT
makeRule(variable, nil, Precedence.None), # ID
makeRule(nil, nil, Precedence.None), # RS
makeRule(number, nil, Precedence.None), # NUMBER
makeRule(strVal, nil, Precedence.None), # STR
makeRule(nil, nil, Precedence.None), # semicolon
makeRule(nil, parseAnd, Precedence.And), # AND
makeRule(nil, nil, Precedence.None), # CLASS
makeRule(nil, nil, Precedence.None), # ELSE
makeRule(nil, nil, Precedence.None), # FOR
makeRule(nil, nil, Precedence.None), # FUN
makeRule(literal, nil, Precedence.None), # FALSE
makeRule(nil, nil, Precedence.None), # IF
makeRule(literal, nil, Precedence.None), # NIL
makeRule(nil, nil, Precedence.None), # RETURN
makeRule(nil, nil, Precedence.None), # SUPER
makeRule(nil, nil, Precedence.None), # THIS
makeRule(nil, parseOr, Precedence.Or), # OR
makeRule(literal, nil, Precedence.None), # TRUE
makeRule(nil, nil, Precedence.None), # VAR
makeRule(nil, nil, Precedence.None), # WHILE
makeRule(nil, nil, Precedence.None), # DEL # TODO: Fix del statement to make it GC-aware
makeRule(nil, nil, Precedence.None), # BREAK
makeRule(nil, nil, Precedence.None), # EOF
makeRule(nil, nil, Precedence.None), # TokenType.COLON
makeRule(nil, nil, Precedence.None), # CONTINUE
makeRule(nil, binary, Precedence.Term), # CARET
makeRule(nil, binary, Precedence.Term), # SHL
makeRule(nil, binary, Precedence.Term), # SHR
makeRule(literal, nil, Precedence.Term), # INF
makeRule(literal, nil, Precedence.Term), # NAN
makeRule(nil, binary, Precedence.Term), # BAND
makeRule(nil, binary, Precedence.Term), # BOR
makeRule(unary, nil, Precedence.Term), # TILDE
]
proc getRule(kind: TokenType): ParseRule =
## Returns an appropriate precedence rule
## object for a given token type
result = rules[kind]
proc compile*(self: ref Compiler, source: string): ptr Function =
## Compiles a source string into a function
## object. This wires up all the code
## inside the parser and the lexer
var scanner = initLexer(source, self.file)
var tokens = scanner.lex()
if not scanner.errored:
self.parser = initParser(tokens, self.file)
while not self.parser.match(EOF):
self.declaration()
var function = self.endCompiler()
if not self.parser.hadError:
return function
else:
return nil
else:
return nil
proc initParser*(tokens: seq[Token], file: string): Parser =
result = Parser(current: 0, tokens: tokens, hadError: false, panicMode: false, file: file)
proc initCompiler*(context: FunctionType, enclosing: ref Compiler = nil, parser: Parser = initParser(@[], ""), file: string): ref Compiler =
## Initializes a new compiler object and returns a reference
## to it
result = new(Compiler)
result.parser = parser
result.function = nil # Garbage collection paranoia
result.locals = @[]
result.scopeDepth = 0
result.localCount = 0
result.loop = Loop(alive: false, loopEnd: -1)
result.objects = @[]
result.context = context
result.enclosing = enclosing
result.file = file
result.parser.file = file
result.locals.add(Local(depth: 0, name: Token(kind: EOF, lexeme: "")))
inc(result.localCount)
result.function = result.markObject(newFunction())
if context != SCRIPT: # If we're compiling a function, we give it its name
result.function.name = jstring.newString(enclosing.parser.previous().lexeme)
# This way the compiler can be executed on its own
# without the VM
when isMainModule:
var compiler: ref Compiler = initCompiler(SCRIPT, file="test")
echo "JAPL Compiler REPL"
while true:
try:
stdout.write("=> ")
var compiled = compiler.compile(stdin.readLine())
if compiled != nil:
disassembleChunk(compiled.chunk, "test")
except IOError:
echo ""
break