mirror of https://github.com/japl-lang/japl.git
1134 lines
41 KiB
Nim
1134 lines
41 KiB
Nim
# Copyright 2020 Mattia Giambirtone
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
## The JAPL bytecode compiler
|
|
|
|
|
|
import strutils
|
|
import algorithm
|
|
import strformat
|
|
import lexer
|
|
import meta/opcode
|
|
import meta/token
|
|
import meta/looptype
|
|
import types/jobject
|
|
import types/jstring
|
|
import types/function
|
|
import tables
|
|
when isMainModule:
|
|
import util/debug
|
|
|
|
type
|
|
Compiler* = object
|
|
## The state of the compiler
|
|
enclosing*: ref Compiler
|
|
function*: ptr Function
|
|
context*: FunctionType
|
|
locals*: seq[Local]
|
|
localCount*: int
|
|
scopeDepth*: int
|
|
parser*: Parser
|
|
loop*: Loop
|
|
objects*: seq[ptr Obj]
|
|
file*: string
|
|
|
|
Local* = ref object # A local variable
|
|
name*: Token
|
|
depth*: int
|
|
|
|
Parser* = ref object # A Parser object
|
|
current*: int
|
|
tokens*: seq[Token]
|
|
hadError*: bool
|
|
panicMode*: bool
|
|
file*: string
|
|
|
|
Precedence {.pure.} = enum
|
|
None,
|
|
Assignment,
|
|
Or,
|
|
And,
|
|
Equality,
|
|
Comparison,
|
|
Term,
|
|
Factor,
|
|
Unary,
|
|
Call,
|
|
Primary
|
|
|
|
ParseFn = proc(self: ref Compiler, canAssign: bool): void
|
|
|
|
ParseRule = ref object
|
|
prefix, infix: ParseFn
|
|
precedence: Precedence
|
|
|
|
|
|
proc makeRule(prefix, infix: ParseFn, precedence: Precedence): ParseRule =
|
|
## Creates a new rule for parsing
|
|
return ParseRule(prefix: prefix, infix: infix, precedence: precedence)
|
|
|
|
|
|
proc advance(self: var Parser): Token =
|
|
## Steps forward by one in the tokens' list and
|
|
## increments the current token index
|
|
result = self.tokens[self.current]
|
|
inc(self.current)
|
|
|
|
|
|
proc peek(self: Parser): Token =
|
|
## Returns the current token without consuming it
|
|
return self.tokens[self.current]
|
|
|
|
|
|
proc previous(self: Parser): Token =
|
|
## Returns the previously consumed token
|
|
return self.tokens[self.current - 1]
|
|
|
|
|
|
proc check(self: Parser, kind: TokenType): bool =
|
|
## Checks if the current token is of the expected type
|
|
## without consuming it
|
|
return self.peek().kind == kind
|
|
|
|
|
|
proc match(self: var Parser, kind: TokenType): bool =
|
|
## Calls self.check() and consumes a token if the expected
|
|
## token type is encountered, in which case true
|
|
## is returned. False is returned otherwise
|
|
if not self.check(kind): return false
|
|
discard self.advance()
|
|
return true
|
|
|
|
|
|
proc parseError(self: var Parser, token: Token, message: string) =
|
|
## Notifies the user about parsing errors, writing them to
|
|
## the standard error file. This parser is designed to report
|
|
## all syntatical errors inside a file in one go, rather than
|
|
## stopping at the first error occurrence. This allows a user
|
|
## to identify and fix multiple errors without running the parser
|
|
## multiple times
|
|
if self.panicMode: # This serves to identify wheter an error already occurred, in which case we return
|
|
return
|
|
self.panicMode = true
|
|
self.hadError = true
|
|
stderr.write(&"A fatal error occurred while parsing '{self.file}', line {token.line}, at '{token.lexeme}' -> {message}\n")
|
|
|
|
|
|
proc consume(self: var Parser, expected: TokenType, message: string) =
|
|
## Attempts to consume a token if it is of the expected type
|
|
## or raises a parsing error with the given message otherwise
|
|
if self.check(expected):
|
|
discard self.advance()
|
|
return
|
|
self.parseError(self.peek(), message)
|
|
|
|
|
|
proc currentChunk(self: ref Compiler): var Chunk =
|
|
## Returns the current chunk being compiled
|
|
result = self.function.chunk
|
|
|
|
|
|
proc compileError(self: ref Compiler, message: string) =
|
|
## Notifies the user about an error occurred during
|
|
## compilation, writing to the standard error file
|
|
stderr.write(&"A fatal error occurred while compiling '{self.file}', line {self.parser.peek().line}, at '{self.parser.peek().lexeme}' -> {message}\n")
|
|
self.parser.hadError = true
|
|
self.parser.panicMode = true
|
|
|
|
|
|
proc emitByte(self: ref Compiler, byt: OpCode|uint8) =
|
|
## Emits a single bytecode instruction and writes it
|
|
## to the current chunk being compiled
|
|
|
|
self.currentChunk.writeChunk(uint8 byt, self.parser.previous.line)
|
|
|
|
|
|
proc emitBytes(self: ref Compiler, byt1: OpCode|uint8, byt2: OpCode|uint8) =
|
|
## Emits multiple bytes instead of a single one, this is useful
|
|
## to emit operators along with their operands or for multi-byte
|
|
## instructions that are longer than one byte
|
|
self.emitByte(uint8 byt1)
|
|
self.emitByte(uint8 byt2)
|
|
|
|
|
|
proc emitBytes(self: ref Compiler, bytarr: array[3, uint8]) =
|
|
## Handy helper method to write an array of 3 bytes into
|
|
## the current chunk, calling emiteByte(s) on each of its
|
|
## elements
|
|
self.emitBytes(bytarr[0], bytarr[1])
|
|
self.emitByte(bytarr[2])
|
|
|
|
|
|
proc makeConstant(self: ref Compiler, obj: ptr Obj): uint8 =
|
|
## Adds a constant (literal) to the current chunk's
|
|
## constants table
|
|
result = uint8 self.currentChunk.addConstant(obj)
|
|
|
|
|
|
proc makeLongConstant(self: ref Compiler, val: ptr Obj): array[3, uint8] =
|
|
## Does the same as makeConstant(), but encodes the index in the
|
|
## chunk's constant table as an array (which is later reconstructed
|
|
## into an integer at runtime) to store more than 256 constants in the table
|
|
result = self.currentChunk.writeConstant(val)
|
|
|
|
|
|
proc emitConstant(self: ref Compiler, obj: ptr Obj) =
|
|
## Emits a Constant or ConstantLong instruction along
|
|
## with its operand
|
|
if self.currentChunk().consts.len > 255:
|
|
self.emitByte(OpCode.ConstantLong)
|
|
self.emitBytes(self.makeLongConstant(obj))
|
|
else:
|
|
self.emitBytes(OpCode.Constant, self.makeConstant(obj))
|
|
|
|
|
|
proc initParser*(tokens: seq[Token], file: string): Parser
|
|
proc getRule(kind: TokenType): ParseRule # Forward declarations for later use
|
|
proc statement(self: ref Compiler)
|
|
proc declaration(self: ref Compiler)
|
|
proc initCompiler*(context: FunctionType, enclosing: ref Compiler = nil, parser: Parser = initParser(@[], ""), file: string): ref Compiler
|
|
|
|
|
|
proc endCompiler(self: ref Compiler): ptr Function =
|
|
## Ends the current compiler instance and returns its
|
|
## compiled bytecode wrapped around a function object,
|
|
## also emitting a return instruction with nil as operand.
|
|
## Because of this, all functions implicitly return nil
|
|
## if no return statement is supplied
|
|
self.emitByte(OpCode.Nil)
|
|
self.emitByte(OpCode.Return)
|
|
return self.function
|
|
|
|
|
|
proc parsePrecedence(self: ref Compiler, precedence: Precedence) =
|
|
## Parses expressions using pratt's elegant algorithm to precedence parsing
|
|
discard self.parser.advance()
|
|
var prefixRule = getRule(self.parser.previous.kind).prefix
|
|
if prefixRule == nil: # If there is no prefix rule than an expression is expected
|
|
self.parser.parseError(self.parser.previous, "Expecting expression")
|
|
return
|
|
var canAssign = precedence <= Precedence.Assignment # This is used to detect invalid assignment targets
|
|
# such as "hello" = 3;
|
|
self.prefixRule(canAssign) # otherwise call the prefix rule (e.g. for binary negation)
|
|
if self.parser.previous.kind == EOF:
|
|
self.parser.current -= 1 # If we're at EOF, we bail out and restore the EOF terminator so that
|
|
# the parser behaves accordingly later on
|
|
return
|
|
while precedence <= (getRule(self.parser.peek.kind).precedence): # This will parse all expressions with the same precedence
|
|
# or lower to the current expression
|
|
var infixRule = getRule(self.parser.advance.kind).infix
|
|
if self.parser.peek().kind != EOF:
|
|
self.infixRule(canAssign)
|
|
else:
|
|
self.parser.parseError(self.parser.previous, "Expecting expression, got EOF")
|
|
if canAssign and self.parser.match(EQ):
|
|
self.parser.parseError(self.parser.peek, "Invalid assignment target")
|
|
|
|
|
|
proc expression(self: ref Compiler) =
|
|
## Parses expressions
|
|
self.parsePrecedence(Precedence.Assignment) # The highest-level expression is assignment
|
|
|
|
|
|
proc binary(self: ref Compiler, canAssign: bool) =
|
|
## Parses binary operators
|
|
var operator = self.parser.previous().kind
|
|
var rule = getRule(operator)
|
|
self.parsePrecedence(Precedence((int rule.precedence) + 1))
|
|
case operator:
|
|
of TokenType.PLUS:
|
|
self.emitByte(OpCode.Add)
|
|
of TokenType.MINUS:
|
|
self.emitByte(OpCode.Subtract)
|
|
of TokenType.SLASH:
|
|
self.emitByte(OpCode.Divide)
|
|
of TokenType.STAR:
|
|
self.emitByte(OpCode.Multiply)
|
|
of TokenType.MOD:
|
|
self.emitByte(OpCode.Mod)
|
|
of TokenType.POW:
|
|
self.emitByte(OpCode.Pow)
|
|
of TokenType.NE:
|
|
self.emitBytes(OpCode.Equal, OpCode.Not)
|
|
of TokenType.DEQ:
|
|
self.emitByte(OpCode.Equal)
|
|
of TokenType.GT:
|
|
self.emitByte(OpCode.Greater)
|
|
of TokenType.GE:
|
|
self.emitBytes(OpCode.Less, OpCode.Not)
|
|
of TokenType.LT:
|
|
self.emitByte(OpCode.Less)
|
|
of TokenType.LE:
|
|
self.emitBytes(OpCode.Greater, OpCode.Not)
|
|
of TokenType.CARET:
|
|
self.emitByte(OpCode.Xor)
|
|
of TokenType.SHL:
|
|
self.emitByte(OpCode.Shl)
|
|
of TokenType.SHR:
|
|
self.emitByte(OpCode.Shr)
|
|
of TokenType.BOR:
|
|
self.emitByte(OpCode.Bor)
|
|
of TokenType.BAND:
|
|
self.emitByte(OpCode.Band)
|
|
else:
|
|
discard # Unreachable
|
|
|
|
|
|
proc unary(self: ref Compiler, canAssign: bool) =
|
|
## Parses unary expressions such as negation or
|
|
## binary inversion
|
|
var operator = self.parser.previous().kind
|
|
if self.parser.peek().kind != EOF:
|
|
self.parsePrecedence(Precedence.Unary)
|
|
else:
|
|
self.parser.parseError(self.parser.previous, "Expecting expression, got EOF")
|
|
return
|
|
case operator:
|
|
of MINUS:
|
|
self.emitByte(OpCode.Negate)
|
|
of NEG:
|
|
self.emitByte(OpCode.Not)
|
|
of TILDE:
|
|
self.emitByte(OpCode.Bnot)
|
|
else:
|
|
return
|
|
|
|
|
|
template markObject*(self: ref Compiler, obj: ptr Obj): untyped =
|
|
## Marks compile-time objects (since those take up memory as well)
|
|
## for the VM to reclaim space later on
|
|
let temp = obj
|
|
self.objects.add(temp)
|
|
temp
|
|
|
|
|
|
proc strVal(self: ref Compiler, canAssign: bool) =
|
|
## Parses string literals
|
|
var str = self.parser.previous().lexeme
|
|
var delimiter = &"{str[0]}" # TODO: Add proper escape sequences support
|
|
str = str.unescape(delimiter, delimiter)
|
|
self.emitConstant(self.markObject(jstring.newString(str)))
|
|
|
|
|
|
proc bracketAssign(self: ref Compiler, canAssign: bool) =
|
|
## Parses assignments such as a[0] = "something"
|
|
discard # TODO -> Implement this
|
|
|
|
|
|
proc bracket(self: ref Compiler, canAssign: bool) =
|
|
## Parses slice expressions, such as "hello"[0].
|
|
## Slice can take up to two arguments, a start
|
|
## and an end index in the chosen iterable.
|
|
## Both arguments are optional, so doing "hi"[::]
|
|
## will basically copy the string into a new object.
|
|
## Indexes start from 0, and while the start index is
|
|
## inclusive, the end index is not. If an end index is
|
|
## not specified like this "hello"[0:], then the it is
|
|
## assumed to be the length of the iterable. Likewise,
|
|
## if the start index is missing, it is assumed to be 0.
|
|
## Like in Python, using an end index that's out of bounds
|
|
## will not raise an error. Doing "hello"[0:999] will just
|
|
## return the whole string instead
|
|
if self.parser.peek.kind == TokenType.COLON:
|
|
self.emitByte(OpCode.Nil)
|
|
discard self.parser.advance()
|
|
if self.parser.peek().kind == TokenType.RS:
|
|
self.emitByte(OpCode.Nil)
|
|
else:
|
|
self.parsePrecedence(Precedence.Term)
|
|
self.emitByte(OpCode.SliceRange)
|
|
else:
|
|
self.parsePrecedence(Precedence.Term)
|
|
if self.parser.peek().kind == TokenType.RS:
|
|
self.emitByte(OpCode.Slice)
|
|
elif self.parser.peek().kind == TokenType.COLON:
|
|
discard self.parser.advance()
|
|
if self.parser.peek().kind == TokenType.RS:
|
|
self.emitByte(OpCode.Nil)
|
|
else:
|
|
self.parsePrecedence(Precedence.Term)
|
|
self.emitByte(OpCode.SliceRange)
|
|
if self.parser.peek().kind == TokenType.EQ:
|
|
discard self.parser.advance()
|
|
self.parsePrecedence(Precedence.Term)
|
|
self.parser.consume(TokenType.RS, "Expecting ']' after slice expression")
|
|
|
|
|
|
proc literal(self: ref Compiler, canAssign: bool) =
|
|
## Parses literal values such as true, nan and inf
|
|
case self.parser.previous().kind:
|
|
of TokenType.TRUE:
|
|
self.emitByte(OpCode.True)
|
|
of TokenType.FALSE:
|
|
self.emitByte(OpCode.False)
|
|
of TokenType.NIL:
|
|
self.emitByte(OpCode.Nil)
|
|
of TokenType.INF:
|
|
self.emitByte(OpCode.Inf)
|
|
of TokenType.NAN:
|
|
self.emitByte(OpCode.Nan)
|
|
else:
|
|
discard # Unreachable
|
|
|
|
|
|
proc number(self: ref Compiler, canAssign: bool) =
|
|
## Parses numerical constants
|
|
var value = self.parser.previous().lexeme
|
|
var obj: ptr Obj
|
|
try:
|
|
if "." in value:
|
|
obj = parseFloat(value).asFloat()
|
|
else:
|
|
obj = parseInt(value).asInt()
|
|
except OverflowError:
|
|
self.compileError("number literal is too big")
|
|
self.emitConstant(obj)
|
|
|
|
|
|
proc grouping(self: ref Compiler, canAssign: bool) =
|
|
## Parses parenthesized expressions. The only interesting
|
|
## semantic about parentheses is that they allow lower-precedence
|
|
## expressions where a higher precedence one is expected
|
|
if self.parser.match(TokenType.EOF):
|
|
self.parser.parseError(self.parser.previous, "Expecting ')'")
|
|
elif self.parser.match(RP):
|
|
self.emitByte(OpCode.Nil)
|
|
else:
|
|
self.expression()
|
|
self.parser.consume(TokenType.RP, "Expecting ')' after parentheszed expression")
|
|
|
|
|
|
proc synchronize(self: ref Compiler) =
|
|
## Synchronizes the parser's state. This is useful when
|
|
## dealing with parsing errors. When an error occurs, we
|
|
## note it with our nice panicMode and hadError fields, but
|
|
## that in itself doesn't allow the parser to go forward
|
|
## in the code and report other possible errors. On the
|
|
## other hand, attempting to start parsing the source
|
|
## right after an error has occurred could lead to a
|
|
## cascade of unhelpful error messages that complicate
|
|
## debugging issues. So, when an error occurs, we try
|
|
## to get back into a state that at least allows us to keep
|
|
## parsing and pretend the error never happened (the code
|
|
## would not be compiled anyway so we might as well tell the
|
|
## user if anything else is wrong with their code). The parser
|
|
## will skip to the next valid token for a statement, like an
|
|
## if or a for loop or a class declaration, and then keep
|
|
## parsing from there. Note that hadError is never reset, but
|
|
## panidMode is
|
|
self.parser.panicMode = false
|
|
while self.parser.peek().kind != TokenType.EOF: # Infinite loops are bad, so we must take EOF into account
|
|
if self.parser.previous().kind == TokenType.SEMICOLON:
|
|
return
|
|
case self.parser.peek().kind:
|
|
of TokenType.CLASS, TokenType.FUN, TokenType.VAR,
|
|
TokenType.FOR, TokenType.IF, TokenType.WHILE,
|
|
TokenType.RETURN: # We found a statement boundary, so the parser bails out
|
|
return
|
|
else:
|
|
discard
|
|
discard self.parser.advance()
|
|
|
|
|
|
proc identifierConstant(self: ref Compiler, tok: Token): uint8 =
|
|
## Emits instructions for identifiers
|
|
return self.makeConstant(self.markObject(jstring.newString(tok.lexeme)))
|
|
|
|
|
|
proc identifierLongConstant(self: ref Compiler, tok: Token): array[3, uint8] =
|
|
## Same as identifierConstant, but this is used when the constant table is longer
|
|
## than 255 elements
|
|
return self.makeLongConstant(self.markObject(jstring.newString(tok.lexeme)))
|
|
|
|
|
|
proc addLocal(self: ref Compiler, name: Token) =
|
|
## Stores a local variable. Local name resolution
|
|
## happens at compile time rather than runtime,
|
|
## unlike global variables which are treated differently.
|
|
## Note that at first, a local is in a special "uninitialized"
|
|
## state, this is useful to detect errors such as var a = a;
|
|
## inside local scopes
|
|
var local = Local(name: name, depth: -1)
|
|
inc(self.localCount)
|
|
self.locals.add(local)
|
|
|
|
|
|
proc declareVariable(self: ref Compiler) =
|
|
## Declares a variable, this is only useful
|
|
## for local variables, there is no way to
|
|
## "declare" a global at compile time. This
|
|
## assumption works because locals
|
|
## and temporaries have stack semantics inside
|
|
## local scopes
|
|
if self.scopeDepth == 0:
|
|
return
|
|
var name = self.parser.previous()
|
|
self.addLocal(name)
|
|
|
|
|
|
proc parseVariable(self: ref Compiler, message: string): uint8 =
|
|
## Parses variables and declares them
|
|
self.parser.consume(TokenType.ID, message)
|
|
self.declareVariable()
|
|
if self.scopeDepth > 0:
|
|
return uint8 0
|
|
return self.identifierConstant(self.parser.previous())
|
|
|
|
|
|
proc parseLongVariable(self: ref Compiler, message: string): array[3, uint8] =
|
|
## Parses variables and declares them. This is used in place
|
|
## of parseVariable when there's more than 255 constants
|
|
## in the chunk table
|
|
self.parser.consume(TokenType.ID, message)
|
|
self.declareVariable()
|
|
if self.scopeDepth > 0:
|
|
return [uint8 0, uint8 0, uint8 0]
|
|
return self.identifierLongConstant(self.parser.previous())
|
|
|
|
|
|
proc markInitialized(self: ref Compiler) =
|
|
## Marks the latest defined global as
|
|
## initialized and ready for use
|
|
if self.scopeDepth == 0:
|
|
return
|
|
self.locals[self.localCount - 1].depth = self.scopeDepth
|
|
|
|
|
|
proc defineVariable(self: ref Compiler, idx: uint8) =
|
|
## Defines a variable, emitting appropriate
|
|
## instructions if we're in the local scope
|
|
## or marking the last local as initialized
|
|
## otherwise
|
|
if self.scopeDepth > 0:
|
|
self.markInitialized()
|
|
return
|
|
self.emitBytes(OpCode.DefineGlobal, idx)
|
|
|
|
|
|
proc defineVariable(self: ref Compiler, idx: array[3, uint8]) =
|
|
## Same as defineVariable, but this is used when
|
|
## there's more than 255 locals in the chunk's table
|
|
if self.scopeDepth > 0:
|
|
self.markInitialized()
|
|
return
|
|
self.emitByte(OpCode.DefineGlobal)
|
|
self.emitBytes(idx)
|
|
|
|
|
|
proc resolveLocal(self: ref Compiler, name: Token): int =
|
|
## Resolves a local variable and catches errors such as
|
|
## var a = a
|
|
var i = self.localCount - 1
|
|
for local in reversed(self.locals):
|
|
if local.name.lexeme == name.lexeme:
|
|
if local.depth == -1:
|
|
self.compileError("cannot read local variable in its own initializer")
|
|
return i
|
|
i = i - 1
|
|
return -1
|
|
|
|
|
|
proc namedVariable(self: ref Compiler, tok: Token, canAssign: bool) =
|
|
## Handles local and global variables assignment, as well
|
|
## as variable resolution.
|
|
var arg = self.resolveLocal(tok)
|
|
var
|
|
get: OpCode
|
|
set: OpCode
|
|
if arg != -1:
|
|
get = OpCode.GetLocal
|
|
set = OpCode.SetLocal
|
|
else:
|
|
get = OpCode.GetGlobal
|
|
set = OpCode.SetGlobal
|
|
arg = int self.identifierConstant(tok)
|
|
if self.parser.match(EQ) and canAssign:
|
|
self.expression()
|
|
self.emitBytes(set, uint8 arg)
|
|
else:
|
|
self.emitBytes(get, uint8 arg)
|
|
|
|
|
|
proc namedLongVariable(self: ref Compiler, tok: Token, canAssign: bool) =
|
|
## Handles local and global variables assignment, as well
|
|
## as variable resolution. This is only called when the constants
|
|
## table's length exceeds 255
|
|
var arg = self.resolveLocal(tok)
|
|
var casted = cast[array[3, uint8]](arg)
|
|
var
|
|
get: OpCode
|
|
set: OpCode
|
|
if arg != -1:
|
|
get = OpCode.GetLocal
|
|
set = OpCode.SetLocal
|
|
else:
|
|
get = OpCode.GetGlobal
|
|
set = OpCode.SetGlobal
|
|
casted = self.identifierLongConstant(tok)
|
|
if self.parser.match(EQ) and canAssign:
|
|
self.expression()
|
|
self.emitByte(set)
|
|
self.emitBytes(casted)
|
|
else:
|
|
self.emitByte(get)
|
|
self.emitBytes(casted)
|
|
|
|
|
|
|
|
proc variable(self: ref Compiler, canAssign: bool) =
|
|
## Emits the code to declare a variable,
|
|
## both locally and globally
|
|
if self.locals.len < 255:
|
|
self.namedVariable(self.parser.previous(), canAssign)
|
|
else:
|
|
self.namedLongVariable(self.parser.previous(), canAssign)
|
|
|
|
|
|
proc varDeclaration(self: ref Compiler) =
|
|
## Parses a variable declaration, taking into account
|
|
## the possibility that the chunk table could already
|
|
## be bigger than 255 elements
|
|
var shortName: uint8
|
|
var longName: array[3, uint8]
|
|
var useShort: bool = true
|
|
if self.currentChunk.consts.len < 255:
|
|
shortName = self.parseVariable("Expecting variable name")
|
|
else:
|
|
useShort = false
|
|
longName = self.parseLongVariable("Expecting variable name")
|
|
if self.parser.match(EQ):
|
|
self.expression()
|
|
else:
|
|
self.emitByte(OpCode.Nil)
|
|
self.parser.consume(TokenType.SEMICOLON, "Missing semicolon after var declaration")
|
|
if useShort:
|
|
self.defineVariable(shortName)
|
|
else:
|
|
self.defineVariable(longName)
|
|
|
|
|
|
proc expressionStatement(self: ref Compiler) =
|
|
## Parses an expression statement, which is
|
|
## an expression followed by a semicolon. It then
|
|
## emits a pop instruction
|
|
self.expression()
|
|
self.parser.consume(TokenType.SEMICOLON, "Missing semicolon after expression")
|
|
self.emitByte(OpCode.Pop)
|
|
|
|
|
|
# TODO: This code will not be used right now as it might clash with the future GC, fix this to make it GC aware!
|
|
proc deleteVariable(self: ref Compiler, canAssign: bool) =
|
|
self.expression()
|
|
if self.parser.previous().kind in [TokenType.NUMBER, TokenType.STR]:
|
|
self.compileError("cannot delete a literal")
|
|
var code: OpCode
|
|
if self.scopeDepth == 0:
|
|
code = OpCode.DeleteGlobal
|
|
else:
|
|
code = OpCode.DeleteLocal
|
|
self.localCount = self.localCount - 1
|
|
if self.currentChunk.consts.len < 255:
|
|
var name = self.identifierConstant(self.parser.previous())
|
|
self.emitBytes(code, name)
|
|
else:
|
|
var name = self.identifierLongConstant(self.parser.previous())
|
|
self.emitBytes(code, name[0])
|
|
self.emitBytes(name[1], name[2])
|
|
|
|
|
|
proc parseBlock(self: ref Compiler) =
|
|
## Parses a block statement, which is basically
|
|
## a list of other statements
|
|
while not self.parser.check(TokenType.RB) and not self.parser.check(TokenType.EOF):
|
|
self.declaration()
|
|
self.parser.consume(TokenType.RB, "Expecting '}' after block statement")
|
|
|
|
|
|
proc beginScope(self: ref Compiler) =
|
|
## Begins a scope by increasing the
|
|
## current scope depth. This is literally
|
|
## all it takes to create a scope, since the
|
|
## only semantically interesting behavior of
|
|
## scopes is a change in names resolution
|
|
inc(self.scopeDepth)
|
|
|
|
|
|
proc endScope(self: ref Compiler) =
|
|
## Ends a scope, popping off any local that
|
|
## was created inside it along the way
|
|
self.scopeDepth = self.scopeDepth - 1
|
|
while self.localCount > 0 and self.locals[self.localCount - 1].depth > self.scopeDepth:
|
|
self.emitByte(OpCode.Pop)
|
|
self.localCount = self.localCount - 1
|
|
|
|
|
|
proc emitJump(self: ref Compiler, opcode: OpCode): int =
|
|
## Emits a jump instruction with a placeholder offset
|
|
## that is later patched, check patchJump for more info
|
|
## about how jumps work
|
|
self.emitByte(opcode)
|
|
self.emitByte(0xff)
|
|
self.emitByte(0xff)
|
|
return self.currentChunk.code.len - 2
|
|
|
|
|
|
proc patchJump(self: ref Compiler, offset: int) =
|
|
## Patches a previously emitted jump instruction.
|
|
## Since it's impossible to know how much code
|
|
## needs to be jumped before compiling the code
|
|
## itself, jumps are first encoded with a placeholder
|
|
## offset. Then, after the code that has to be jumped
|
|
## over has been compiled, its size is known and the
|
|
## previously emitted offset is replaced with the actual
|
|
## jump size.
|
|
## Note that, due to how the language is designed,
|
|
## only up to 2^16 bytecode instructions can
|
|
## be jumped over, so the size of the if/else conditions
|
|
## or loops is limited (hopefully 65 thousands and change
|
|
## instructions are enough for everyone)
|
|
var jump = self.currentChunk.code.len - offset - 2
|
|
if jump > (int uint16.high):
|
|
self.compileError("too much code to jump over")
|
|
else:
|
|
self.currentChunk.code[offset] = uint8 (jump shr 8) and 0xff
|
|
self.currentChunk.code[offset + 1] = uint8 jump and 0xff
|
|
|
|
|
|
proc ifStatement(self: ref Compiler) =
|
|
## Parses if statements in a C-style fashion
|
|
self.parser.consume(TokenType.LP, "The if condition must be parenthesized")
|
|
if self.parser.peek.kind != TokenType.EOF:
|
|
self.expression()
|
|
if self.parser.peek.kind != TokenType.EOF:
|
|
self.parser.consume(TokenType.RP, "The if condition must be parenthesized")
|
|
if self.parser.peek.kind != TokenType.EOF:
|
|
var jump: int = self.emitJump(OpCode.JumpIfFalse)
|
|
self.emitByte(OpCode.Pop)
|
|
self.statement()
|
|
var elseJump = self.emitJump(OpCode.Jump)
|
|
self.patchJump(jump)
|
|
self.emitByte(OpCode.Pop)
|
|
if self.parser.match(TokenType.ELSE):
|
|
self.statement()
|
|
self.patchJump(elseJump)
|
|
else:
|
|
self.parser.parseError(self.parser.previous(), "Invalid syntax")
|
|
else:
|
|
self.parser.parseError(self.parser.previous(), "The if condition must be parenthesized")
|
|
|
|
|
|
proc emitLoop(self: ref Compiler, start: int) =
|
|
## Creates a loop and emits related instructions.
|
|
self.emitByte(OpCode.Loop)
|
|
var offset = self.currentChunk.code.len - start + 2
|
|
if offset > (int uint16.high):
|
|
self.compileError("loop body is too large")
|
|
else:
|
|
self.emitByte(uint8 (offset shr 8) and 0xff)
|
|
self.emitByte(uint8 offset and 0xff)
|
|
|
|
|
|
proc endLooping(self: ref Compiler) =
|
|
## This method is used to make
|
|
## the break statement work and patch
|
|
## it with a jump instruction
|
|
if self.loop.loopEnd != -1:
|
|
self.patchJump(self.loop.loopEnd)
|
|
self.emitByte(OpCode.Pop)
|
|
var i = self.loop.body
|
|
while i < self.currentChunk.code.len:
|
|
if self.currentChunk.code[i] == uint OpCode.Break:
|
|
self.currentChunk.code[i] = uint8 OpCode.Jump
|
|
self.patchJump(i + 1)
|
|
i += 3
|
|
else:
|
|
i += 1
|
|
self.loop = self.loop.outer
|
|
|
|
|
|
proc whileStatement(self: ref Compiler) =
|
|
## Parses while loops in a C-style fashion
|
|
var loop = Loop(depth: self.scopeDepth, outer: self.loop, start: self.currentChunk.code.len, alive: true, loopEnd: -1)
|
|
self.loop = loop
|
|
self.parser.consume(TokenType.LP, "The loop condition must be parenthesized")
|
|
if self.parser.peek.kind != TokenType.EOF:
|
|
self.expression()
|
|
if self.parser.peek.kind != TokenType.EOF:
|
|
self.parser.consume(TokenType.RP, "The loop condition must be parenthesized")
|
|
if self.parser.peek.kind != TokenType.EOF:
|
|
self.loop.loopEnd = self.emitJump(OpCode.JumpIfFalse)
|
|
self.emitByte(OpCode.Pop)
|
|
self.loop.body = self.currentChunk.code.len
|
|
self.statement()
|
|
self.emitLoop(self.loop.start)
|
|
self.patchJump(self.loop.loopEnd)
|
|
self.emitByte(OpCode.Pop)
|
|
else:
|
|
self.parser.parseError(self.parser.previous(), "Invalid syntax")
|
|
else:
|
|
self.parser.parseError(self.parser.previous(), "The loop condition must be parenthesized")
|
|
self.endLooping()
|
|
|
|
|
|
proc forStatement(self: ref Compiler) =
|
|
## Parses for loops in a C-style fashion
|
|
self.beginScope()
|
|
self.parser.consume(TokenType.LP, "The loop condition must be parenthesized")
|
|
if self.parser.peek.kind != TokenType.EOF:
|
|
if self.parser.match(TokenType.SEMICOLON):
|
|
discard
|
|
elif self.parser.match(TokenType.VAR):
|
|
self.varDeclaration()
|
|
else:
|
|
self.expressionStatement()
|
|
var loop = Loop(depth: self.scopeDepth, outer: self.loop, start: self.currentChunk.code.len, alive: true, loopEnd: -1)
|
|
self.loop = loop
|
|
if not self.parser.match(TokenType.SEMICOLON):
|
|
self.expression()
|
|
if self.parser.previous.kind != TokenType.EOF:
|
|
self.parser.consume(TokenType.SEMICOLON, "Expecting ';'")
|
|
self.loop.loopEnd = self.emitJump(OpCode.JumpIfFalse)
|
|
self.emitByte(OpCode.Pop)
|
|
else:
|
|
self.parser.current -= 1
|
|
self.parser.parseError(self.parser.previous(), "Invalid syntax")
|
|
if not self.parser.match(RP):
|
|
var bodyJump = self.emitJump(OpCode.Jump)
|
|
var incrementStart = self.currentChunk.code.len
|
|
if self.parser.peek.kind != TokenType.EOF:
|
|
self.expression()
|
|
self.emitByte(OpCode.Pop)
|
|
self.parser.consume(TokenType.RP, "The loop condition must be parenthesized")
|
|
self.emitLoop(self.loop.start)
|
|
self.loop.start = incrementStart
|
|
self.patchJump(bodyJump)
|
|
if self.parser.peek.kind != TokenType.EOF:
|
|
self.loop.body = self.currentChunk.code.len
|
|
self.statement()
|
|
self.emitLoop(self.loop.start)
|
|
else:
|
|
self.parser.current -= 1
|
|
self.parser.parseError(self.parser.previous(), "Invalid syntax")
|
|
if self.loop.loopEnd != -1:
|
|
self.patchJump(self.loop.loopEnd)
|
|
self.emitByte(OpCode.Pop)
|
|
else:
|
|
self.parser.parseError(self.parser.previous(), "The loop condition must be parenthesized")
|
|
self.endLooping()
|
|
self.endScope()
|
|
|
|
|
|
proc parseBreak(self: ref Compiler) =
|
|
## Parses break statements. A break
|
|
## statement causes the current loop
|
|
## to exit and jump to its end
|
|
if not self.loop.alive:
|
|
self.parser.parseError(self.parser.previous, "'break' outside loop")
|
|
else:
|
|
self.parser.consume(TokenType.SEMICOLON, "missing semicolon after statement")
|
|
var i = self.localCount - 1
|
|
while i >= 0 and self.locals[i].depth > self.loop.depth:
|
|
self.emitByte(OpCode.Pop)
|
|
i -= 1
|
|
discard self.emitJump(OpCode.Break)
|
|
|
|
|
|
proc parseAnd(self: ref Compiler, canAssign: bool) =
|
|
## Parses expressions such as a and b
|
|
var jump = self.emitJump(OpCode.JumpIfFalse)
|
|
self.emitByte(OpCode.Pop)
|
|
self.parsePrecedence(Precedence.And)
|
|
self.patchJump(jump)
|
|
|
|
|
|
proc parseOr(self: ref Compiler, canAssign: bool) =
|
|
## Parses expressions such as a or b
|
|
var elseJump = self.emitJump(OpCode.JumpIfFalse)
|
|
var endJump = self.emitJump(OpCode.Jump)
|
|
self.patchJump(elseJump)
|
|
self.emitByte(OpCode.Pop)
|
|
self.parsePrecedence(Precedence.Or)
|
|
self.patchJump(endJump)
|
|
|
|
|
|
proc continueStatement(self: ref Compiler) =
|
|
## Parses continue statements inside loops.
|
|
## The continue statement causes the loop to skip
|
|
## to the next iteration
|
|
if not self.loop.alive:
|
|
self.parser.parseError(self.parser.previous, "'continue' outside loop")
|
|
else:
|
|
self.parser.consume(TokenType.SEMICOLON, "missing semicolon after statement")
|
|
var i = self.localCount - 1
|
|
while i >= 0 and self.locals[i].depth > self.loop.depth:
|
|
self.emitByte(OpCode.Pop)
|
|
i -= 1
|
|
self.emitLoop(self.loop.start)
|
|
|
|
|
|
proc parseFunction(self: ref Compiler, funType: FunctionType) =
|
|
## Parses function declarations. Functions can have
|
|
## keyword arguments (WIP), but once a parameter is declared
|
|
## as a keyword one, all subsequent parameters must be
|
|
## keyword ones as well
|
|
var self = initCompiler(funType, self, self.parser, self.file)
|
|
self.beginScope()
|
|
self.parser.consume(LP, "Expecting '(' after function name")
|
|
if self.parser.hadError:
|
|
return
|
|
var paramNames: seq[string] = @[]
|
|
var defaultFollows: bool = false
|
|
if not self.parser.check(RP):
|
|
while true:
|
|
self.function.arity += 1
|
|
if self.function.arity + self.function.optionals > 255:
|
|
self.compileError("cannot have more than 255 arguments")
|
|
break
|
|
var paramIdx = self.parseVariable("expecting parameter name")
|
|
if self.parser.hadError:
|
|
return
|
|
if self.parser.previous.lexeme in paramNames:
|
|
self.compileError("duplicate parameter name in function declaration")
|
|
return
|
|
paramNames.add(self.parser.previous.lexeme)
|
|
self.defineVariable(paramIdx)
|
|
if self.parser.match(EQ):
|
|
if self.parser.peek.kind == EOF:
|
|
self.compileError("Unexpected EOF")
|
|
return
|
|
self.function.arity -= 1
|
|
self.function.optionals += 1
|
|
self.expression()
|
|
# self.function.defaults.add(self.parser.previous.lexeme) # TODO
|
|
defaultFollows = true
|
|
elif defaultFollows:
|
|
self.compileError("non-default argument follows default argument")
|
|
return
|
|
if not self.parser.match(COMMA):
|
|
break
|
|
self.parser.consume(RP, "Expecting ')' after parameters")
|
|
self.parser.consume(LB, "Expecting '{' before function body")
|
|
self.parseBlock()
|
|
var fun = self.endCompiler()
|
|
self = self.enclosing
|
|
if self.currentChunk.consts.len < 255:
|
|
self.emitBytes(OpCode.Constant, self.makeConstant(fun))
|
|
else:
|
|
self.emitByte(OpCode.ConstantLong)
|
|
self.emitBytes(self.makeLongConstant(fun))
|
|
|
|
|
|
proc funDeclaration(self: ref Compiler) =
|
|
## Parses function declarations and declares
|
|
## them in the current scope
|
|
var funName = self.parseVariable("expecting function name")
|
|
self.markInitialized()
|
|
self.parseFunction(FunctionType.FUNC)
|
|
self.defineVariable(funName)
|
|
|
|
|
|
proc argumentList(self: ref Compiler): uint8 =
|
|
## Parses arguments passed to function calls
|
|
result = 0
|
|
if not self.parser.check(RP):
|
|
while true:
|
|
self.expression()
|
|
if result == 255:
|
|
self.compileError("cannot have more than 255 arguments")
|
|
return
|
|
result += 1
|
|
if not self.parser.match(COMMA):
|
|
break
|
|
self.parser.consume(RP, "Expecting ')' after arguments")
|
|
|
|
|
|
proc call(self: ref Compiler, canAssign: bool) =
|
|
## Emits appropriate bytecode to call
|
|
## a function
|
|
var argCount = self.argumentList()
|
|
self.emitBytes(OpCode.Call, argCount)
|
|
|
|
|
|
proc returnStatement(self: ref Compiler) =
|
|
## Parses return statements and emits
|
|
## appropriate bytecode instructions
|
|
## for them
|
|
if self.context == SCRIPT:
|
|
self.compileError("'return' outside function")
|
|
if self.parser.match(TokenType.SEMICOLON): # Empty return
|
|
self.emitByte(OpCode.Nil)
|
|
self.emitByte(OpCode.Return)
|
|
else:
|
|
self.expression()
|
|
self.parser.consume(TokenType.SEMICOLON, "missing semicolon after return statement")
|
|
self.emitByte(OpCode.Return)
|
|
|
|
proc statement(self: ref Compiler) =
|
|
## Parses statements
|
|
if self.parser.match(TokenType.FOR):
|
|
self.forStatement()
|
|
elif self.parser.match(IF):
|
|
self.ifStatement()
|
|
elif self.parser.match(TokenType.WHILE):
|
|
self.whileStatement()
|
|
elif self.parser.match(TokenType.RETURN):
|
|
self.returnStatement()
|
|
elif self.parser.match(TokenType.CONTINUE):
|
|
self.continueStatement()
|
|
elif self.parser.match(TokenType.BREAK):
|
|
self.parseBreak()
|
|
elif self.parser.match(LB):
|
|
self.beginScope()
|
|
self.parseBlock()
|
|
self.endScope()
|
|
else:
|
|
self.expressionStatement()
|
|
|
|
|
|
proc declaration(self: ref Compiler) =
|
|
## Parses declarations
|
|
if self.parser.match(FUN):
|
|
self.funDeclaration()
|
|
elif self.parser.match(VAR):
|
|
self.varDeclaration()
|
|
else:
|
|
self.statement()
|
|
if self.parser.panicMode:
|
|
self.synchronize()
|
|
|
|
|
|
# The array of all parse rules
|
|
var rules: array[TokenType, ParseRule] = [
|
|
makeRule(nil, binary, Precedence.Term), # PLUS
|
|
makeRule(unary, binary, Precedence.Term), # MINUS
|
|
makeRule(nil, binary, Precedence.Factor), # SLASH
|
|
makeRule(nil, binary, Precedence.Factor), # STAR
|
|
makeRule(unary, nil, Precedence.None), # NEG
|
|
makeRule(nil, binary, Precedence.Equality), # NE
|
|
makeRule(nil, nil, Precedence.None), # EQ
|
|
makeRule(nil, binary, Precedence.Comparison), # DEQ
|
|
makeRule(nil, binary, Precedence.Comparison), # LT
|
|
makeRule(nil, binary, Precedence.Comparison), # GE
|
|
makeRule(nil, binary, Precedence.Comparison), # LE
|
|
makeRule(nil, binary, Precedence.Factor), # MOD
|
|
makeRule(nil, binary, Precedence.Factor), # POW
|
|
makeRule(nil, binary, Precedence.Comparison), # GT
|
|
makeRule(grouping, call, Precedence.Call), # LP
|
|
makeRule(nil, nil, Precedence.None), # RP
|
|
makeRule(nil, bracket, Precedence.Call), # LS
|
|
makeRule(nil, nil, Precedence.None), # LB
|
|
makeRule(nil, nil, Precedence.None), # RB
|
|
makeRule(nil, nil, Precedence.None), # COMMA
|
|
makeRule(nil, nil, Precedence.None), # DOT
|
|
makeRule(variable, nil, Precedence.None), # ID
|
|
makeRule(nil, nil, Precedence.None), # RS
|
|
makeRule(number, nil, Precedence.None), # NUMBER
|
|
makeRule(strVal, nil, Precedence.None), # STR
|
|
makeRule(nil, nil, Precedence.None), # semicolon
|
|
makeRule(nil, parseAnd, Precedence.And), # AND
|
|
makeRule(nil, nil, Precedence.None), # CLASS
|
|
makeRule(nil, nil, Precedence.None), # ELSE
|
|
makeRule(nil, nil, Precedence.None), # FOR
|
|
makeRule(nil, nil, Precedence.None), # FUN
|
|
makeRule(literal, nil, Precedence.None), # FALSE
|
|
makeRule(nil, nil, Precedence.None), # IF
|
|
makeRule(literal, nil, Precedence.None), # NIL
|
|
makeRule(nil, nil, Precedence.None), # RETURN
|
|
makeRule(nil, nil, Precedence.None), # SUPER
|
|
makeRule(nil, nil, Precedence.None), # THIS
|
|
makeRule(nil, parseOr, Precedence.Or), # OR
|
|
makeRule(literal, nil, Precedence.None), # TRUE
|
|
makeRule(nil, nil, Precedence.None), # VAR
|
|
makeRule(nil, nil, Precedence.None), # WHILE
|
|
makeRule(nil, nil, Precedence.None), # DEL # TODO: Fix del statement to make it GC-aware
|
|
makeRule(nil, nil, Precedence.None), # BREAK
|
|
makeRule(nil, nil, Precedence.None), # EOF
|
|
makeRule(nil, nil, Precedence.None), # TokenType.COLON
|
|
makeRule(nil, nil, Precedence.None), # CONTINUE
|
|
makeRule(nil, binary, Precedence.Term), # CARET
|
|
makeRule(nil, binary, Precedence.Term), # SHL
|
|
makeRule(nil, binary, Precedence.Term), # SHR
|
|
makeRule(literal, nil, Precedence.Term), # INF
|
|
makeRule(literal, nil, Precedence.Term), # NAN
|
|
makeRule(nil, binary, Precedence.Term), # BAND
|
|
makeRule(nil, binary, Precedence.Term), # BOR
|
|
makeRule(unary, nil, Precedence.Term), # TILDE
|
|
]
|
|
|
|
|
|
proc getRule(kind: TokenType): ParseRule =
|
|
## Returns an appropriate precedence rule
|
|
## object for a given token type
|
|
result = rules[kind]
|
|
|
|
|
|
proc compile*(self: ref Compiler, source: string): ptr Function =
|
|
## Compiles a source string into a function
|
|
## object. This wires up all the code
|
|
## inside the parser and the lexer
|
|
var scanner = initLexer(source, self.file)
|
|
var tokens = scanner.lex()
|
|
if not scanner.errored:
|
|
self.parser = initParser(tokens, self.file)
|
|
while not self.parser.match(EOF):
|
|
self.declaration()
|
|
var function = self.endCompiler()
|
|
if not self.parser.hadError:
|
|
return function
|
|
else:
|
|
return nil
|
|
else:
|
|
return nil
|
|
|
|
|
|
proc initParser*(tokens: seq[Token], file: string): Parser =
|
|
result = Parser(current: 0, tokens: tokens, hadError: false, panicMode: false, file: file)
|
|
|
|
proc initCompiler*(context: FunctionType, enclosing: ref Compiler = nil, parser: Parser = initParser(@[], ""), file: string): ref Compiler =
|
|
## Initializes a new compiler object and returns a reference
|
|
## to it
|
|
result = new(Compiler)
|
|
result.parser = parser
|
|
result.function = nil # Garbage collection paranoia
|
|
result.locals = @[]
|
|
result.scopeDepth = 0
|
|
result.localCount = 0
|
|
result.loop = Loop(alive: false, loopEnd: -1)
|
|
result.objects = @[]
|
|
result.context = context
|
|
result.enclosing = enclosing
|
|
result.file = file
|
|
result.parser.file = file
|
|
result.locals.add(Local(depth: 0, name: Token(kind: EOF, lexeme: "")))
|
|
inc(result.localCount)
|
|
result.function = result.markObject(newFunction())
|
|
if context != SCRIPT: # If we're compiling a function, we give it its name
|
|
result.function.name = jstring.newString(enclosing.parser.previous().lexeme)
|
|
|
|
# This way the compiler can be executed on its own
|
|
# without the VM
|
|
when isMainModule:
|
|
var compiler: ref Compiler = initCompiler(SCRIPT, file="test")
|
|
echo "JAPL Compiler REPL"
|
|
while true:
|
|
try:
|
|
stdout.write("=> ")
|
|
var compiled = compiler.compile(stdin.readLine())
|
|
if compiled != nil:
|
|
disassembleChunk(compiled.chunk, "test")
|
|
except IOError:
|
|
echo ""
|
|
break
|
|
|