2020-10-21 22:49:08 +02:00
|
|
|
# Copyright 2020 Mattia Giambirtone
|
|
|
|
#
|
|
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
# you may not use this file except in compliance with the License.
|
|
|
|
# You may obtain a copy of the License at
|
|
|
|
#
|
|
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
#
|
|
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
# See the License for the specific language governing permissions and
|
|
|
|
# limitations under the License.
|
2020-10-21 22:34:04 +02:00
|
|
|
|
2020-10-19 12:32:08 +02:00
|
|
|
## The JAPL bytecode compiler
|
2020-10-18 18:09:12 +02:00
|
|
|
|
|
|
|
|
2020-08-19 12:40:01 +02:00
|
|
|
import strutils
|
2020-10-25 15:08:26 +01:00
|
|
|
import sequtils
|
2020-08-19 12:40:01 +02:00
|
|
|
import algorithm
|
2020-08-08 16:19:44 +02:00
|
|
|
import strformat
|
2021-01-17 16:54:55 +01:00
|
|
|
import tables
|
|
|
|
|
|
|
|
import multibyte
|
2021-02-19 16:54:19 +01:00
|
|
|
import lexer
|
2020-10-23 17:14:55 +02:00
|
|
|
import meta/opcode
|
2020-10-25 12:45:03 +01:00
|
|
|
import meta/token
|
2020-08-20 22:51:56 +02:00
|
|
|
import meta/looptype
|
2020-12-26 17:01:03 +01:00
|
|
|
import types/baseObject
|
|
|
|
import types/function
|
|
|
|
import types/numbers
|
|
|
|
import types/japlString
|
2021-01-17 16:54:55 +01:00
|
|
|
import types/iterable
|
2021-03-18 15:09:36 +01:00
|
|
|
import types/arrayList
|
2020-10-25 15:08:26 +01:00
|
|
|
import config
|
2020-10-17 16:23:59 +02:00
|
|
|
when isMainModule:
|
|
|
|
import util/debug
|
2021-01-17 16:54:55 +01:00
|
|
|
import types/methods
|
2021-02-28 18:09:19 +01:00
|
|
|
when DEBUG_TRACE_COMPILER:
|
|
|
|
import terminal
|
2020-08-08 16:19:44 +02:00
|
|
|
|
2020-12-26 17:01:03 +01:00
|
|
|
|
2020-08-09 10:09:03 +02:00
|
|
|
type
|
2020-12-21 22:43:40 +01:00
|
|
|
Compiler* = ref object
|
2020-10-17 16:23:59 +02:00
|
|
|
## The state of the compiler
|
2020-12-21 22:43:40 +01:00
|
|
|
enclosing*: Compiler
|
2020-10-17 15:57:57 +02:00
|
|
|
function*: ptr Function
|
|
|
|
context*: FunctionType
|
|
|
|
locals*: seq[Local]
|
|
|
|
localCount*: int
|
|
|
|
scopeDepth*: int
|
|
|
|
parser*: Parser
|
|
|
|
loop*: Loop
|
2021-01-17 16:54:55 +01:00
|
|
|
objects*: ptr ArrayList[ptr Obj]
|
|
|
|
file*: ptr String
|
2020-12-28 10:09:52 +01:00
|
|
|
interned*: Table[string, ptr Obj]
|
2021-02-19 17:14:13 +01:00
|
|
|
afterReturn: bool
|
2020-10-25 12:45:03 +01:00
|
|
|
|
|
|
|
Local* = ref object # A local variable
|
|
|
|
name*: Token
|
|
|
|
depth*: int
|
|
|
|
|
|
|
|
Parser* = ref object # A Parser object
|
|
|
|
current*: int
|
2021-01-17 16:54:55 +01:00
|
|
|
tokens*: ptr ArrayList[Token]
|
2020-10-25 12:45:03 +01:00
|
|
|
hadError*: bool
|
|
|
|
panicMode*: bool
|
2021-01-17 16:54:55 +01:00
|
|
|
file*: ptr String
|
2020-10-25 12:45:03 +01:00
|
|
|
|
2020-10-17 15:57:57 +02:00
|
|
|
Precedence {.pure.} = enum
|
|
|
|
None,
|
2021-01-12 09:55:41 +01:00
|
|
|
Assign,
|
2020-10-17 15:57:57 +02:00
|
|
|
Or,
|
|
|
|
And,
|
2021-01-12 09:55:41 +01:00
|
|
|
Eq,
|
|
|
|
Comp,
|
2021-01-12 12:10:15 +01:00
|
|
|
As,
|
|
|
|
Is,
|
2020-10-17 15:57:57 +02:00
|
|
|
Term,
|
|
|
|
Factor,
|
|
|
|
Unary,
|
2021-01-12 09:55:41 +01:00
|
|
|
Exp,
|
2020-10-17 15:57:57 +02:00
|
|
|
Call,
|
|
|
|
Primary
|
2020-08-09 10:09:03 +02:00
|
|
|
|
2020-12-21 22:43:40 +01:00
|
|
|
ParseFn = proc(self: Compiler, canAssign: bool): void
|
2020-08-09 10:09:03 +02:00
|
|
|
|
|
|
|
ParseRule = ref object
|
|
|
|
prefix, infix: ParseFn
|
|
|
|
precedence: Precedence
|
|
|
|
|
|
|
|
|
|
|
|
proc makeRule(prefix, infix: ParseFn, precedence: Precedence): ParseRule =
|
2020-10-18 18:09:12 +02:00
|
|
|
## Creates a new rule for parsing
|
2020-12-26 17:01:03 +01:00
|
|
|
result = ParseRule(prefix: prefix, infix: infix, precedence: precedence)
|
2020-08-08 16:19:44 +02:00
|
|
|
|
|
|
|
|
2021-01-11 08:06:55 +01:00
|
|
|
proc advance(self: Parser): Token =
|
2020-10-18 18:09:12 +02:00
|
|
|
## Steps forward by one in the tokens' list and
|
|
|
|
## increments the current token index
|
2020-08-10 22:36:41 +02:00
|
|
|
result = self.tokens[self.current]
|
2020-08-08 16:19:44 +02:00
|
|
|
inc(self.current)
|
|
|
|
|
|
|
|
|
|
|
|
proc peek(self: Parser): Token =
|
2020-10-19 12:32:08 +02:00
|
|
|
## Returns the current token without consuming it
|
2020-08-08 16:19:44 +02:00
|
|
|
return self.tokens[self.current]
|
|
|
|
|
|
|
|
|
2021-01-16 18:14:22 +01:00
|
|
|
proc peekNext(self: Parser): Token =
|
|
|
|
## Returns the next token without consuming it
|
|
|
|
## or an EOF token if we're at the end of the file
|
|
|
|
if self.current <= len(self.tokens) - 1:
|
|
|
|
return self.tokens[self.current + 1]
|
|
|
|
return Token(kind: EOF, lexeme: "")
|
|
|
|
|
|
|
|
|
2020-08-08 16:19:44 +02:00
|
|
|
proc previous(self: Parser): Token =
|
2020-10-18 18:09:12 +02:00
|
|
|
## Returns the previously consumed token
|
2020-08-08 16:19:44 +02:00
|
|
|
return self.tokens[self.current - 1]
|
|
|
|
|
|
|
|
|
2020-08-15 08:52:58 +02:00
|
|
|
proc check(self: Parser, kind: TokenType): bool =
|
2020-10-19 12:32:08 +02:00
|
|
|
## Checks if the current token is of the expected type
|
2020-10-18 18:09:12 +02:00
|
|
|
## without consuming it
|
2020-08-15 08:52:58 +02:00
|
|
|
return self.peek().kind == kind
|
|
|
|
|
|
|
|
|
2021-01-16 18:14:22 +01:00
|
|
|
proc checkNext(self: Parser, kind: TokenType): bool =
|
|
|
|
## Checks if the next token is of the expected type
|
|
|
|
## without consuming it
|
|
|
|
return self.peekNext().kind == kind
|
|
|
|
|
|
|
|
|
2021-01-11 08:06:55 +01:00
|
|
|
proc match(self: Parser, kind: TokenType): bool =
|
2020-10-19 12:32:08 +02:00
|
|
|
## Calls self.check() and consumes a token if the expected
|
|
|
|
## token type is encountered, in which case true
|
2020-10-18 18:09:12 +02:00
|
|
|
## is returned. False is returned otherwise
|
2020-08-15 08:52:58 +02:00
|
|
|
if not self.check(kind): return false
|
|
|
|
discard self.advance()
|
|
|
|
return true
|
|
|
|
|
|
|
|
|
2021-01-11 08:06:55 +01:00
|
|
|
proc synchronize(self: Parser) =
|
|
|
|
## Synchronizes the parser's state. This is useful when
|
|
|
|
## dealing with parsing errors. When an error occurs, we
|
|
|
|
## note it with our nice panicMode and hadError fields, but
|
|
|
|
## that in itself doesn't allow the parser to go forward
|
|
|
|
## in the code and report other possible errors. On the
|
|
|
|
## other hand, attempting to start parsing the source
|
|
|
|
## right after an error has occurred could lead to a
|
|
|
|
## cascade of unhelpful error messages that complicate
|
|
|
|
## debugging issues. So, when an error occurs, we try
|
|
|
|
## to get back into a state that at least allows us to keep
|
|
|
|
## parsing and pretend the error never happened (the code
|
|
|
|
## would not be compiled anyway so we might as well tell the
|
|
|
|
## user if anything else is wrong with their code). The parser
|
|
|
|
## will skip to the next valid token for a statement, like an
|
|
|
|
## if or a for loop or a class declaration, and then keep
|
|
|
|
## parsing from there. Note that hadError is never reset, but
|
|
|
|
## panicMode is
|
|
|
|
self.panicMode = false
|
|
|
|
while self.peek().kind != TokenType.EOF: # Infinite loops are bad, so we must take EOF into account
|
|
|
|
if self.previous().kind == TokenType.SEMICOLON:
|
|
|
|
return
|
|
|
|
case self.peek().kind:
|
|
|
|
of TokenType.CLASS, TokenType.FUN, TokenType.VAR,
|
2021-02-19 16:54:19 +01:00
|
|
|
TokenType.FOR, TokenType.IF, TokenType.WHILE,
|
2021-01-11 08:06:55 +01:00
|
|
|
TokenType.RETURN: # We found a statement boundary, so the parser bails out
|
|
|
|
return
|
|
|
|
else:
|
|
|
|
discard
|
|
|
|
discard self.advance()
|
|
|
|
|
|
|
|
|
|
|
|
proc parseError(self: Parser, token: Token, message: string) =
|
2020-10-19 12:32:08 +02:00
|
|
|
## Notifies the user about parsing errors, writing them to
|
2020-10-18 18:09:12 +02:00
|
|
|
## the standard error file. This parser is designed to report
|
|
|
|
## all syntatical errors inside a file in one go, rather than
|
|
|
|
## stopping at the first error occurrence. This allows a user
|
|
|
|
## to identify and fix multiple errors without running the parser
|
|
|
|
## multiple times
|
|
|
|
if self.panicMode: # This serves to identify wheter an error already occurred, in which case we return
|
2020-08-08 16:19:44 +02:00
|
|
|
return
|
|
|
|
self.panicMode = true
|
2020-08-09 10:09:03 +02:00
|
|
|
self.hadError = true
|
2020-10-19 12:32:08 +02:00
|
|
|
stderr.write(&"A fatal error occurred while parsing '{self.file}', line {token.line}, at '{token.lexeme}' -> {message}\n")
|
2021-01-11 08:06:55 +01:00
|
|
|
self.synchronize()
|
2020-08-08 16:19:44 +02:00
|
|
|
|
|
|
|
|
2021-01-11 08:06:55 +01:00
|
|
|
proc consume(self: Parser, expected: TokenType, message: string) =
|
2020-10-18 18:09:12 +02:00
|
|
|
## Attempts to consume a token if it is of the expected type
|
|
|
|
## or raises a parsing error with the given message otherwise
|
2020-10-19 12:32:08 +02:00
|
|
|
if self.check(expected):
|
2020-08-09 10:09:03 +02:00
|
|
|
discard self.advance()
|
|
|
|
return
|
|
|
|
self.parseError(self.peek(), message)
|
|
|
|
|
|
|
|
|
2020-12-21 22:43:40 +01:00
|
|
|
proc currentChunk(self: Compiler): var Chunk =
|
2020-10-18 18:09:12 +02:00
|
|
|
## Returns the current chunk being compiled
|
2020-10-16 12:38:07 +02:00
|
|
|
result = self.function.chunk
|
|
|
|
|
|
|
|
|
2020-12-21 22:43:40 +01:00
|
|
|
proc compileError(self: Compiler, message: string) =
|
2020-10-18 18:09:12 +02:00
|
|
|
## Notifies the user about an error occurred during
|
|
|
|
## compilation, writing to the standard error file
|
2020-10-19 12:32:08 +02:00
|
|
|
stderr.write(&"A fatal error occurred while compiling '{self.file}', line {self.parser.peek().line}, at '{self.parser.peek().lexeme}' -> {message}\n")
|
2020-08-17 08:55:33 +02:00
|
|
|
self.parser.hadError = true
|
2020-08-30 10:36:13 +02:00
|
|
|
self.parser.panicMode = true
|
2020-08-19 12:40:01 +02:00
|
|
|
|
2020-08-30 12:35:37 +02:00
|
|
|
|
2020-12-21 22:43:40 +01:00
|
|
|
proc emitByte(self: Compiler, byt: OpCode|uint8) =
|
2020-10-18 18:09:12 +02:00
|
|
|
## Emits a single bytecode instruction and writes it
|
|
|
|
## to the current chunk being compiled
|
2020-10-25 15:08:26 +01:00
|
|
|
when DEBUG_TRACE_COMPILER:
|
2021-02-28 18:09:19 +01:00
|
|
|
write stdout, &"DEBUG - Compiler: Emitting {$byt} (uint8 value of {$(uint8 byt)}"
|
|
|
|
if byt.int() <= OpCode.high().int():
|
|
|
|
write stdout, &"; opcode value of {$byt.OpCode}"
|
|
|
|
write stdout, ")\n"
|
|
|
|
|
2020-10-16 12:38:07 +02:00
|
|
|
self.currentChunk.writeChunk(uint8 byt, self.parser.previous.line)
|
2020-08-08 19:08:09 +02:00
|
|
|
|
|
|
|
|
2020-12-21 22:43:40 +01:00
|
|
|
proc emitBytes(self: Compiler, byt1: OpCode|uint8, byt2: OpCode|uint8) =
|
2020-10-18 18:09:12 +02:00
|
|
|
## Emits multiple bytes instead of a single one, this is useful
|
|
|
|
## to emit operators along with their operands or for multi-byte
|
|
|
|
## instructions that are longer than one byte
|
2020-08-09 10:09:03 +02:00
|
|
|
self.emitByte(uint8 byt1)
|
|
|
|
self.emitByte(uint8 byt2)
|
2020-08-08 19:08:09 +02:00
|
|
|
|
|
|
|
|
2020-12-21 22:43:40 +01:00
|
|
|
proc emitBytes(self: Compiler, bytarr: array[3, uint8]) =
|
2020-10-18 18:09:12 +02:00
|
|
|
## Handy helper method to write an array of 3 bytes into
|
|
|
|
## the current chunk, calling emiteByte(s) on each of its
|
|
|
|
## elements
|
2020-08-17 08:17:27 +02:00
|
|
|
self.emitBytes(bytarr[0], bytarr[1])
|
|
|
|
self.emitByte(bytarr[2])
|
|
|
|
|
|
|
|
|
2021-01-16 11:47:01 +01:00
|
|
|
proc makeConstant(self: Compiler, val: ptr Obj): array[3, uint8] =
|
2020-10-18 18:09:12 +02:00
|
|
|
## Does the same as makeConstant(), but encodes the index in the
|
|
|
|
## chunk's constant table as an array (which is later reconstructed
|
|
|
|
## into an integer at runtime) to store more than 256 constants in the table
|
2021-01-16 11:47:01 +01:00
|
|
|
result = self.currentChunk.addConstant(val)
|
2020-08-08 19:08:09 +02:00
|
|
|
|
|
|
|
|
2020-12-21 22:43:40 +01:00
|
|
|
proc emitConstant(self: Compiler, obj: ptr Obj) =
|
2021-01-16 11:47:01 +01:00
|
|
|
## Emits a Constant instruction along
|
2020-10-18 18:09:12 +02:00
|
|
|
## with its operand
|
2021-01-16 11:47:01 +01:00
|
|
|
self.emitByte(OpCode.Constant)
|
|
|
|
self.emitBytes(self.makeConstant(obj))
|
2020-08-08 19:08:09 +02:00
|
|
|
|
2020-08-09 21:45:50 +02:00
|
|
|
|
2020-10-25 12:45:03 +01:00
|
|
|
proc initParser*(tokens: seq[Token], file: string): Parser
|
2020-10-18 18:09:12 +02:00
|
|
|
proc getRule(kind: TokenType): ParseRule # Forward declarations for later use
|
2020-12-21 22:43:40 +01:00
|
|
|
proc statement(self: Compiler)
|
|
|
|
proc declaration(self: Compiler)
|
|
|
|
proc initCompiler*(context: FunctionType, enclosing: Compiler = nil, parser: Parser = initParser(@[], ""), file: string): Compiler
|
2020-08-09 10:09:03 +02:00
|
|
|
|
2020-08-08 19:08:09 +02:00
|
|
|
|
2020-12-21 22:43:40 +01:00
|
|
|
proc parsePrecedence(self: Compiler, precedence: Precedence) =
|
2020-10-18 18:09:12 +02:00
|
|
|
## Parses expressions using pratt's elegant algorithm to precedence parsing
|
2021-01-11 08:06:55 +01:00
|
|
|
if self.parser.peek().kind == TokenType.EOF:
|
|
|
|
self.parser.parseError(self.parser.peek(), "Expecting expression")
|
|
|
|
return
|
|
|
|
else:
|
|
|
|
discard self.parser.advance()
|
2020-08-09 21:45:50 +02:00
|
|
|
var prefixRule = getRule(self.parser.previous.kind).prefix
|
2021-01-16 11:47:01 +01:00
|
|
|
if prefixRule == nil: # If there is no prefix rule then an expression is expected
|
2020-08-10 10:48:21 +02:00
|
|
|
self.parser.parseError(self.parser.previous, "Expecting expression")
|
2020-08-09 21:45:50 +02:00
|
|
|
return
|
2021-01-12 09:55:41 +01:00
|
|
|
var canAssign = precedence <= Precedence.Assign # This is used to detect invalid assignment targets
|
2020-10-18 18:09:12 +02:00
|
|
|
# such as "hello" = 3;
|
|
|
|
self.prefixRule(canAssign) # otherwise call the prefix rule (e.g. for binary negation)
|
2020-08-19 12:40:01 +02:00
|
|
|
if self.parser.previous.kind == EOF:
|
2020-10-18 18:09:12 +02:00
|
|
|
self.parser.current -= 1 # If we're at EOF, we bail out and restore the EOF terminator so that
|
|
|
|
# the parser behaves accordingly later on
|
2020-08-19 12:40:01 +02:00
|
|
|
return
|
2020-10-18 18:09:12 +02:00
|
|
|
while precedence <= (getRule(self.parser.peek.kind).precedence): # This will parse all expressions with the same precedence
|
|
|
|
# or lower to the current expression
|
2020-08-10 10:48:21 +02:00
|
|
|
var infixRule = getRule(self.parser.advance.kind).infix
|
2020-10-18 18:09:12 +02:00
|
|
|
if self.parser.peek().kind != EOF:
|
2020-08-15 12:07:01 +02:00
|
|
|
self.infixRule(canAssign)
|
2020-08-12 11:41:21 +02:00
|
|
|
else:
|
|
|
|
self.parser.parseError(self.parser.previous, "Expecting expression, got EOF")
|
2021-01-12 09:55:41 +01:00
|
|
|
if canAssign and self.parser.match(TokenType.EQ):
|
2020-08-15 12:07:01 +02:00
|
|
|
self.parser.parseError(self.parser.peek, "Invalid assignment target")
|
2020-08-09 10:09:03 +02:00
|
|
|
|
|
|
|
|
2020-12-21 22:43:40 +01:00
|
|
|
proc expression(self: Compiler) =
|
2020-10-18 18:09:12 +02:00
|
|
|
## Parses expressions
|
2021-01-12 09:55:41 +01:00
|
|
|
self.parsePrecedence(Precedence.Assign) # The highest-level expression is assignment
|
2020-08-09 10:09:03 +02:00
|
|
|
|
|
|
|
|
2020-12-21 22:43:40 +01:00
|
|
|
proc binary(self: Compiler, canAssign: bool) =
|
2020-10-18 18:09:12 +02:00
|
|
|
## Parses binary operators
|
|
|
|
var operator = self.parser.previous().kind
|
2020-10-19 12:32:08 +02:00
|
|
|
var rule = getRule(operator)
|
2020-08-09 10:09:03 +02:00
|
|
|
self.parsePrecedence(Precedence((int rule.precedence) + 1))
|
|
|
|
case operator:
|
2020-10-25 12:45:03 +01:00
|
|
|
of TokenType.PLUS:
|
2020-10-17 15:26:57 +02:00
|
|
|
self.emitByte(OpCode.Add)
|
2020-10-25 12:45:03 +01:00
|
|
|
of TokenType.MINUS:
|
2020-10-17 15:26:57 +02:00
|
|
|
self.emitByte(OpCode.Subtract)
|
2020-10-25 12:45:03 +01:00
|
|
|
of TokenType.SLASH:
|
2020-10-17 15:26:57 +02:00
|
|
|
self.emitByte(OpCode.Divide)
|
2020-10-25 12:45:03 +01:00
|
|
|
of TokenType.STAR:
|
2020-10-17 15:26:57 +02:00
|
|
|
self.emitByte(OpCode.Multiply)
|
2020-10-25 12:45:03 +01:00
|
|
|
of TokenType.MOD:
|
2020-10-17 15:26:57 +02:00
|
|
|
self.emitByte(OpCode.Mod)
|
2020-10-25 12:45:03 +01:00
|
|
|
of TokenType.POW:
|
2020-10-17 15:26:57 +02:00
|
|
|
self.emitByte(OpCode.Pow)
|
2020-10-25 12:45:03 +01:00
|
|
|
of TokenType.NE:
|
2020-10-17 15:26:57 +02:00
|
|
|
self.emitBytes(OpCode.Equal, OpCode.Not)
|
2020-10-25 12:45:03 +01:00
|
|
|
of TokenType.DEQ:
|
2020-10-17 15:26:57 +02:00
|
|
|
self.emitByte(OpCode.Equal)
|
2020-10-25 12:45:03 +01:00
|
|
|
of TokenType.GT:
|
2021-02-27 17:40:50 +01:00
|
|
|
# To allow for chaining of greater/less comparisons in the future (without doing
|
|
|
|
# weird stuff such as allowing false with the greater/less than operators)
|
|
|
|
# we need to move their logic in another function. This will
|
|
|
|
# also allow for a sort of short-circuiting control flow like
|
|
|
|
# for logical ands and ors, because why not?
|
2020-10-17 15:26:57 +02:00
|
|
|
self.emitByte(OpCode.Greater)
|
2020-10-25 12:45:03 +01:00
|
|
|
of TokenType.GE:
|
2021-02-26 12:27:10 +01:00
|
|
|
self.emitByte(OpCode.GreaterOrEqual)
|
2020-10-25 12:45:03 +01:00
|
|
|
of TokenType.LT:
|
2020-10-17 15:26:57 +02:00
|
|
|
self.emitByte(OpCode.Less)
|
2020-10-25 12:45:03 +01:00
|
|
|
of TokenType.LE:
|
2021-02-26 12:27:10 +01:00
|
|
|
self.emitByte(OpCode.LessOrEqual)
|
2020-10-25 12:45:03 +01:00
|
|
|
of TokenType.CARET:
|
2020-10-17 15:26:57 +02:00
|
|
|
self.emitByte(OpCode.Xor)
|
2020-10-25 12:45:03 +01:00
|
|
|
of TokenType.SHL:
|
2020-10-17 15:26:57 +02:00
|
|
|
self.emitByte(OpCode.Shl)
|
2020-10-25 12:45:03 +01:00
|
|
|
of TokenType.SHR:
|
2020-10-17 15:26:57 +02:00
|
|
|
self.emitByte(OpCode.Shr)
|
2020-10-25 12:45:03 +01:00
|
|
|
of TokenType.BOR:
|
2020-10-17 15:26:57 +02:00
|
|
|
self.emitByte(OpCode.Bor)
|
2020-10-25 12:45:03 +01:00
|
|
|
of TokenType.BAND:
|
2020-10-17 15:26:57 +02:00
|
|
|
self.emitByte(OpCode.Band)
|
2021-01-09 18:03:47 +01:00
|
|
|
of TokenType.IS:
|
|
|
|
self.emitByte(OpCode.Is)
|
2021-02-26 12:27:10 +01:00
|
|
|
of TokenType.ISNOT:
|
|
|
|
self.emitBytes(OpCode.Is, Opcode.Not)
|
2021-01-12 12:10:15 +01:00
|
|
|
of TokenType.AS:
|
|
|
|
self.emitByte(OpCode.As)
|
2020-08-09 10:09:03 +02:00
|
|
|
else:
|
2020-10-18 18:09:12 +02:00
|
|
|
discard # Unreachable
|
2020-08-09 10:09:03 +02:00
|
|
|
|
2020-08-10 10:48:21 +02:00
|
|
|
|
2020-12-21 22:43:40 +01:00
|
|
|
proc unary(self: Compiler, canAssign: bool) =
|
2020-10-18 18:09:12 +02:00
|
|
|
## Parses unary expressions such as negation or
|
|
|
|
## binary inversion
|
2020-08-09 10:09:03 +02:00
|
|
|
var operator = self.parser.previous().kind
|
2020-08-13 17:53:23 +02:00
|
|
|
if self.parser.peek().kind != EOF:
|
2020-10-17 16:23:59 +02:00
|
|
|
self.parsePrecedence(Precedence.Unary)
|
2020-08-13 17:53:23 +02:00
|
|
|
else:
|
|
|
|
self.parser.parseError(self.parser.previous, "Expecting expression, got EOF")
|
|
|
|
return
|
2020-08-09 10:09:03 +02:00
|
|
|
case operator:
|
|
|
|
of MINUS:
|
2020-10-17 15:26:57 +02:00
|
|
|
self.emitByte(OpCode.Negate)
|
2020-08-13 23:39:26 +02:00
|
|
|
of NEG:
|
2020-10-17 15:26:57 +02:00
|
|
|
self.emitByte(OpCode.Not)
|
2020-09-29 17:32:42 +02:00
|
|
|
of TILDE:
|
2020-10-17 15:26:57 +02:00
|
|
|
self.emitByte(OpCode.Bnot)
|
2021-01-12 09:55:41 +01:00
|
|
|
of PLUS:
|
|
|
|
discard # Unary + does nothing anyway
|
2020-08-09 10:09:03 +02:00
|
|
|
else:
|
|
|
|
return
|
|
|
|
|
2020-08-13 17:53:23 +02:00
|
|
|
|
2020-12-21 22:43:40 +01:00
|
|
|
template markObject*(self: Compiler, obj: ptr Obj): untyped =
|
2020-10-18 18:09:12 +02:00
|
|
|
## Marks compile-time objects (since those take up memory as well)
|
|
|
|
## for the VM to reclaim space later on
|
2020-10-25 12:45:03 +01:00
|
|
|
let temp = obj
|
2021-01-17 16:54:55 +01:00
|
|
|
self.objects.append(temp)
|
2020-10-25 12:45:03 +01:00
|
|
|
temp
|
2020-08-27 18:15:45 +02:00
|
|
|
|
|
|
|
|
2020-12-21 22:43:40 +01:00
|
|
|
proc strVal(self: Compiler, canAssign: bool) =
|
2020-10-18 18:09:12 +02:00
|
|
|
## Parses string literals
|
2020-08-15 08:52:58 +02:00
|
|
|
var str = self.parser.previous().lexeme
|
2020-10-18 18:09:12 +02:00
|
|
|
var delimiter = &"{str[0]}" # TODO: Add proper escape sequences support
|
2020-08-15 08:52:58 +02:00
|
|
|
str = str.unescape(delimiter, delimiter)
|
2020-12-28 10:09:52 +01:00
|
|
|
if str notin self.interned:
|
|
|
|
self.interned[str] = str.asStr()
|
|
|
|
self.emitConstant(self.markObject(self.interned[str]))
|
|
|
|
else:
|
|
|
|
# We intern only constant strings!
|
|
|
|
# Note that we don't call self.markObject on an already
|
|
|
|
# interned string because that has already been marked
|
|
|
|
self.emitConstant(self.interned[str])
|
2020-08-09 10:09:03 +02:00
|
|
|
|
|
|
|
|
2020-12-21 22:43:40 +01:00
|
|
|
proc bracketAssign(self: Compiler, canAssign: bool) =
|
2020-10-18 18:09:12 +02:00
|
|
|
## Parses assignments such as a[0] = "something"
|
2020-08-24 09:00:23 +02:00
|
|
|
discard # TODO -> Implement this
|
|
|
|
|
|
|
|
|
2020-12-21 22:43:40 +01:00
|
|
|
proc bracket(self: Compiler, canAssign: bool) =
|
2020-12-21 21:53:45 +01:00
|
|
|
## Parses getitem/slice expressions, such as "hello"[0:1]
|
|
|
|
## or someList[5]. Slices can take up to two arguments, a start
|
2020-10-18 18:09:12 +02:00
|
|
|
## and an end index in the chosen iterable.
|
|
|
|
## Both arguments are optional, so doing "hi"[::]
|
2020-12-26 17:01:03 +01:00
|
|
|
## will basically copy the string (gets everything from
|
|
|
|
## start to end of the iterable).
|
2020-10-18 18:09:12 +02:00
|
|
|
## Indexes start from 0, and while the start index is
|
2020-10-19 12:32:08 +02:00
|
|
|
## inclusive, the end index is not. If an end index is
|
2020-12-21 21:53:45 +01:00
|
|
|
## not specified--like in "hello"[0:]--, then the it is
|
2020-10-18 18:09:12 +02:00
|
|
|
## assumed to be the length of the iterable. Likewise,
|
|
|
|
## if the start index is missing, it is assumed to be 0.
|
|
|
|
## Like in Python, using an end index that's out of bounds
|
|
|
|
## will not raise an error. Doing "hello"[0:999] will just
|
2020-12-21 21:53:45 +01:00
|
|
|
## return the whole string instead.
|
|
|
|
## It has to be noted that negative indexes are allowed: -1
|
|
|
|
## means the last element in the iterable, -2 the element
|
|
|
|
## before that and so on, but that if a negative index's value
|
|
|
|
## goes back too far it does NOT loop back to the end of the
|
|
|
|
## iterable and will cause an IndexError at runtime instead
|
2020-10-25 12:45:03 +01:00
|
|
|
if self.parser.peek.kind == TokenType.COLON:
|
2020-10-17 15:26:57 +02:00
|
|
|
self.emitByte(OpCode.Nil)
|
2020-08-14 10:20:42 +02:00
|
|
|
discard self.parser.advance()
|
2020-10-25 12:45:03 +01:00
|
|
|
if self.parser.peek().kind == TokenType.RS:
|
2020-10-17 15:26:57 +02:00
|
|
|
self.emitByte(OpCode.Nil)
|
2020-08-15 08:52:58 +02:00
|
|
|
else:
|
2020-10-17 16:23:59 +02:00
|
|
|
self.parsePrecedence(Precedence.Term)
|
2020-12-21 21:53:45 +01:00
|
|
|
self.emitByte(OpCode.Slice)
|
2020-08-14 10:02:13 +02:00
|
|
|
else:
|
2020-10-17 16:23:59 +02:00
|
|
|
self.parsePrecedence(Precedence.Term)
|
2020-10-25 12:45:03 +01:00
|
|
|
if self.parser.peek().kind == TokenType.RS:
|
2020-12-21 21:53:45 +01:00
|
|
|
self.emitByte(OpCode.GetItem)
|
2020-10-25 12:45:03 +01:00
|
|
|
elif self.parser.peek().kind == TokenType.COLON:
|
2020-08-15 08:52:58 +02:00
|
|
|
discard self.parser.advance()
|
2020-10-25 12:45:03 +01:00
|
|
|
if self.parser.peek().kind == TokenType.RS:
|
2020-10-17 15:26:57 +02:00
|
|
|
self.emitByte(OpCode.Nil)
|
2020-08-15 08:52:58 +02:00
|
|
|
else:
|
2020-10-17 16:23:59 +02:00
|
|
|
self.parsePrecedence(Precedence.Term)
|
2020-12-21 21:53:45 +01:00
|
|
|
self.emitByte(OpCode.Slice)
|
2020-10-25 12:45:03 +01:00
|
|
|
if self.parser.peek().kind == TokenType.EQ:
|
2020-08-24 09:00:23 +02:00
|
|
|
discard self.parser.advance()
|
2020-10-17 16:23:59 +02:00
|
|
|
self.parsePrecedence(Precedence.Term)
|
2020-08-14 10:02:13 +02:00
|
|
|
self.parser.consume(TokenType.RS, "Expecting ']' after slice expression")
|
2020-08-10 10:48:21 +02:00
|
|
|
|
|
|
|
|
2020-12-21 22:43:40 +01:00
|
|
|
proc literal(self: Compiler, canAssign: bool) =
|
2020-10-18 18:09:12 +02:00
|
|
|
## Parses literal values such as true, nan and inf
|
2020-08-13 17:53:23 +02:00
|
|
|
case self.parser.previous().kind:
|
2020-10-25 12:45:03 +01:00
|
|
|
of TokenType.TRUE:
|
2020-10-17 15:26:57 +02:00
|
|
|
self.emitByte(OpCode.True)
|
2020-10-25 12:45:03 +01:00
|
|
|
of TokenType.FALSE:
|
2020-10-17 15:26:57 +02:00
|
|
|
self.emitByte(OpCode.False)
|
2020-08-13 17:53:23 +02:00
|
|
|
of TokenType.NIL:
|
2020-10-17 15:26:57 +02:00
|
|
|
self.emitByte(OpCode.Nil)
|
2020-08-30 12:35:37 +02:00
|
|
|
of TokenType.INF:
|
2020-10-17 15:26:57 +02:00
|
|
|
self.emitByte(OpCode.Inf)
|
2020-08-30 12:35:37 +02:00
|
|
|
of TokenType.NAN:
|
2020-10-17 15:26:57 +02:00
|
|
|
self.emitByte(OpCode.Nan)
|
2020-08-13 17:53:23 +02:00
|
|
|
else:
|
|
|
|
discard # Unreachable
|
2020-08-09 10:09:03 +02:00
|
|
|
|
2020-08-10 10:48:21 +02:00
|
|
|
|
2020-12-21 22:43:40 +01:00
|
|
|
proc number(self: Compiler, canAssign: bool) =
|
2020-10-18 18:09:12 +02:00
|
|
|
## Parses numerical constants
|
2020-10-25 12:45:03 +01:00
|
|
|
var value = self.parser.previous().lexeme
|
|
|
|
try:
|
|
|
|
if "." in value:
|
2020-12-21 22:43:40 +01:00
|
|
|
self.emitConstant(self.markObject(parseFloat(value).asFloat()))
|
2020-10-25 12:45:03 +01:00
|
|
|
else:
|
2020-12-21 22:43:40 +01:00
|
|
|
self.emitConstant(self.markObject(parseInt(value).asInt()))
|
2020-10-25 15:08:08 +01:00
|
|
|
except ValueError:
|
2020-10-25 12:45:03 +01:00
|
|
|
self.compileError("number literal is too big")
|
2020-08-08 19:08:09 +02:00
|
|
|
|
|
|
|
|
2020-12-21 22:43:40 +01:00
|
|
|
proc grouping(self: Compiler, canAssign: bool) =
|
2020-10-18 18:09:12 +02:00
|
|
|
## Parses parenthesized expressions. The only interesting
|
|
|
|
## semantic about parentheses is that they allow lower-precedence
|
|
|
|
## expressions where a higher precedence one is expected
|
2020-10-25 12:45:03 +01:00
|
|
|
if self.parser.match(TokenType.EOF):
|
2020-08-19 12:40:01 +02:00
|
|
|
self.parser.parseError(self.parser.previous, "Expecting ')'")
|
|
|
|
elif self.parser.match(RP):
|
2020-10-17 15:26:57 +02:00
|
|
|
self.emitByte(OpCode.Nil)
|
2020-08-19 12:40:01 +02:00
|
|
|
else:
|
|
|
|
self.expression()
|
2020-10-25 12:45:03 +01:00
|
|
|
self.parser.consume(TokenType.RP, "Expecting ')' after parentheszed expression")
|
2020-08-09 10:09:03 +02:00
|
|
|
|
|
|
|
|
2021-01-16 11:47:01 +01:00
|
|
|
proc identifierConstant(self: Compiler, tok: Token): array[3, uint8] =
|
2020-10-18 18:09:12 +02:00
|
|
|
## Emits instructions for identifiers
|
2020-12-26 17:01:03 +01:00
|
|
|
return self.makeConstant(self.markObject(asStr(tok.lexeme)))
|
2020-08-15 08:52:58 +02:00
|
|
|
|
|
|
|
|
2020-12-21 22:43:40 +01:00
|
|
|
proc addLocal(self: Compiler, name: Token) =
|
2020-10-18 18:09:12 +02:00
|
|
|
## Stores a local variable. Local name resolution
|
|
|
|
## happens at compile time rather than runtime,
|
|
|
|
## unlike global variables which are treated differently.
|
|
|
|
## Note that at first, a local is in a special "uninitialized"
|
2020-10-19 12:32:08 +02:00
|
|
|
## state, this is useful to detect errors such as var a = a;
|
2020-10-18 18:09:12 +02:00
|
|
|
## inside local scopes
|
2020-08-29 09:07:32 +02:00
|
|
|
var local = Local(name: name, depth: -1)
|
2020-08-17 08:17:27 +02:00
|
|
|
inc(self.localCount)
|
|
|
|
self.locals.add(local)
|
|
|
|
|
|
|
|
|
2020-12-21 22:43:40 +01:00
|
|
|
proc declareVariable(self: Compiler) =
|
2020-10-18 18:09:12 +02:00
|
|
|
## Declares a variable, this is only useful
|
|
|
|
## for local variables, there is no way to
|
|
|
|
## "declare" a global at compile time. This
|
|
|
|
## assumption works because locals
|
|
|
|
## and temporaries have stack semantics inside
|
|
|
|
## local scopes
|
2020-08-17 08:17:27 +02:00
|
|
|
if self.scopeDepth == 0:
|
|
|
|
return
|
|
|
|
var name = self.parser.previous()
|
|
|
|
self.addLocal(name)
|
|
|
|
|
|
|
|
|
2021-01-16 11:47:01 +01:00
|
|
|
proc parseVariable(self: Compiler, message: string): array[3, uint8] =
|
2020-10-18 18:09:12 +02:00
|
|
|
## Parses variables and declares them
|
2020-10-25 12:45:03 +01:00
|
|
|
self.parser.consume(TokenType.ID, message)
|
2020-08-17 08:17:27 +02:00
|
|
|
self.declareVariable()
|
|
|
|
if self.scopeDepth > 0:
|
|
|
|
return [uint8 0, uint8 0, uint8 0]
|
2021-01-16 11:47:01 +01:00
|
|
|
return self.identifierConstant(self.parser.previous())
|
2020-08-17 08:17:27 +02:00
|
|
|
|
|
|
|
|
2020-12-21 22:43:40 +01:00
|
|
|
proc markInitialized(self: Compiler) =
|
2020-10-19 12:32:08 +02:00
|
|
|
## Marks the latest defined global as
|
2020-10-18 18:09:12 +02:00
|
|
|
## initialized and ready for use
|
2020-08-30 10:36:13 +02:00
|
|
|
if self.scopeDepth == 0:
|
|
|
|
return
|
2020-08-29 09:07:32 +02:00
|
|
|
self.locals[self.localCount - 1].depth = self.scopeDepth
|
|
|
|
|
|
|
|
|
2020-12-21 22:43:40 +01:00
|
|
|
proc defineVariable(self: Compiler, idx: array[3, uint8]) =
|
2020-10-18 18:09:12 +02:00
|
|
|
## Same as defineVariable, but this is used when
|
|
|
|
## there's more than 255 locals in the chunk's table
|
2020-08-17 08:17:27 +02:00
|
|
|
if self.scopeDepth > 0:
|
2020-08-29 09:07:32 +02:00
|
|
|
self.markInitialized()
|
2020-08-17 08:17:27 +02:00
|
|
|
return
|
2020-10-17 15:26:57 +02:00
|
|
|
self.emitByte(OpCode.DefineGlobal)
|
2020-08-17 08:17:27 +02:00
|
|
|
self.emitBytes(idx)
|
|
|
|
|
|
|
|
|
2020-12-21 22:43:40 +01:00
|
|
|
proc resolveLocal(self: Compiler, name: Token): int =
|
2020-10-18 18:09:12 +02:00
|
|
|
## Resolves a local variable and catches errors such as
|
|
|
|
## var a = a
|
2020-08-19 12:40:01 +02:00
|
|
|
var i = self.localCount - 1
|
|
|
|
for local in reversed(self.locals):
|
2020-08-17 08:17:27 +02:00
|
|
|
if local.name.lexeme == name.lexeme:
|
2020-08-29 09:07:32 +02:00
|
|
|
if local.depth == -1:
|
|
|
|
self.compileError("cannot read local variable in its own initializer")
|
2020-08-17 08:17:27 +02:00
|
|
|
return i
|
2020-08-19 12:40:01 +02:00
|
|
|
i = i - 1
|
2020-08-17 08:17:27 +02:00
|
|
|
return -1
|
|
|
|
|
|
|
|
|
2020-12-21 22:43:40 +01:00
|
|
|
proc namedVariable(self: Compiler, tok: Token, canAssign: bool) =
|
2020-10-18 18:09:12 +02:00
|
|
|
## Handles local and global variables assignment, as well
|
2021-01-16 11:47:01 +01:00
|
|
|
## as variable resolution
|
2020-12-26 17:01:03 +01:00
|
|
|
var
|
|
|
|
arg = self.resolveLocal(tok)
|
|
|
|
casted = cast[array[3, uint8]](arg)
|
2020-08-19 12:40:01 +02:00
|
|
|
get: OpCode
|
|
|
|
set: OpCode
|
|
|
|
if arg != -1:
|
2020-10-17 15:26:57 +02:00
|
|
|
get = OpCode.GetLocal
|
|
|
|
set = OpCode.SetLocal
|
2020-08-19 12:40:01 +02:00
|
|
|
else:
|
2020-10-17 15:26:57 +02:00
|
|
|
get = OpCode.GetGlobal
|
|
|
|
set = OpCode.SetGlobal
|
2021-01-16 11:47:01 +01:00
|
|
|
casted = self.identifierConstant(tok)
|
2021-01-12 09:55:41 +01:00
|
|
|
if self.parser.match(TokenType.EQ) and canAssign:
|
2020-08-19 12:40:01 +02:00
|
|
|
self.expression()
|
|
|
|
self.emitByte(set)
|
|
|
|
self.emitBytes(casted)
|
|
|
|
else:
|
|
|
|
self.emitByte(get)
|
|
|
|
self.emitBytes(casted)
|
|
|
|
|
|
|
|
|
2020-12-21 22:43:40 +01:00
|
|
|
proc variable(self: Compiler, canAssign: bool) =
|
2020-10-18 18:09:12 +02:00
|
|
|
## Emits the code to declare a variable,
|
|
|
|
## both locally and globally
|
2021-01-16 11:47:01 +01:00
|
|
|
self.namedVariable(self.parser.previous(), canAssign)
|
2020-08-15 11:27:04 +02:00
|
|
|
|
|
|
|
|
2020-12-21 22:43:40 +01:00
|
|
|
proc varDeclaration(self: Compiler) =
|
2021-01-16 11:47:01 +01:00
|
|
|
## Parses a variable declaration
|
|
|
|
var name: array[3, uint8] = self.parseVariable("Expecting variable name")
|
2021-01-12 09:55:41 +01:00
|
|
|
if self.parser.match(TokenType.EQ):
|
2020-08-15 08:52:58 +02:00
|
|
|
self.expression()
|
|
|
|
else:
|
2020-10-17 15:26:57 +02:00
|
|
|
self.emitByte(OpCode.Nil)
|
2020-10-25 12:45:03 +01:00
|
|
|
self.parser.consume(TokenType.SEMICOLON, "Missing semicolon after var declaration")
|
2021-01-16 11:47:01 +01:00
|
|
|
self.defineVariable(name)
|
2020-08-15 08:52:58 +02:00
|
|
|
|
|
|
|
|
2020-12-21 22:43:40 +01:00
|
|
|
proc expressionStatement(self: Compiler) =
|
2020-10-18 18:09:12 +02:00
|
|
|
## Parses an expression statement, which is
|
|
|
|
## an expression followed by a semicolon. It then
|
2020-10-19 12:32:08 +02:00
|
|
|
## emits a pop instruction
|
2020-08-15 08:52:58 +02:00
|
|
|
self.expression()
|
2020-10-25 12:45:03 +01:00
|
|
|
self.parser.consume(TokenType.SEMICOLON, "Missing semicolon after expression")
|
2020-10-17 15:26:57 +02:00
|
|
|
self.emitByte(OpCode.Pop)
|
2020-08-15 08:52:58 +02:00
|
|
|
|
|
|
|
|
2021-01-05 14:55:37 +01:00
|
|
|
proc delStatement(self: Compiler) =
|
2020-08-17 08:55:33 +02:00
|
|
|
self.expression()
|
2021-01-05 14:55:37 +01:00
|
|
|
# TODO: isLiteral?
|
|
|
|
if self.parser.previous().kind in {TokenType.NUMBER, TokenType.STR}:
|
2020-08-17 08:55:33 +02:00
|
|
|
self.compileError("cannot delete a literal")
|
|
|
|
var code: OpCode
|
|
|
|
if self.scopeDepth == 0:
|
2020-10-17 15:26:57 +02:00
|
|
|
code = OpCode.DeleteGlobal
|
2020-08-17 08:55:33 +02:00
|
|
|
else:
|
2020-10-17 15:26:57 +02:00
|
|
|
code = OpCode.DeleteLocal
|
2020-10-19 12:32:08 +02:00
|
|
|
self.localCount = self.localCount - 1
|
2021-01-16 11:47:01 +01:00
|
|
|
var name = self.identifierConstant(self.parser.previous())
|
|
|
|
self.emitBytes(code, name[0])
|
|
|
|
self.emitBytes(name[1], name[2])
|
2021-01-05 14:55:37 +01:00
|
|
|
self.parser.consume(TokenType.SEMICOLON, "Missing semicolon after del statement")
|
2020-08-17 08:17:27 +02:00
|
|
|
|
|
|
|
|
2020-12-21 22:43:40 +01:00
|
|
|
proc parseBlock(self: Compiler) =
|
2020-10-18 18:09:12 +02:00
|
|
|
## Parses a block statement, which is basically
|
|
|
|
## a list of other statements
|
2020-10-25 12:45:03 +01:00
|
|
|
while not self.parser.check(TokenType.RB) and not self.parser.check(TokenType.EOF):
|
2020-08-17 08:17:27 +02:00
|
|
|
self.declaration()
|
2020-10-25 12:45:03 +01:00
|
|
|
self.parser.consume(TokenType.RB, "Expecting '}' after block statement")
|
2020-08-17 08:17:27 +02:00
|
|
|
|
2020-08-15 11:38:36 +02:00
|
|
|
|
2020-12-21 22:43:40 +01:00
|
|
|
proc beginScope(self: Compiler) =
|
2020-10-18 18:09:12 +02:00
|
|
|
## Begins a scope by increasing the
|
2020-10-19 12:32:08 +02:00
|
|
|
## current scope depth. This is literally
|
2020-10-18 18:09:12 +02:00
|
|
|
## all it takes to create a scope, since the
|
|
|
|
## only semantically interesting behavior of
|
|
|
|
## scopes is a change in names resolution
|
2020-08-17 08:17:27 +02:00
|
|
|
inc(self.scopeDepth)
|
2020-08-15 11:38:36 +02:00
|
|
|
|
2020-08-15 08:52:58 +02:00
|
|
|
|
2020-12-21 22:43:40 +01:00
|
|
|
proc endScope(self: Compiler) =
|
2020-10-18 18:09:12 +02:00
|
|
|
## Ends a scope, popping off any local that
|
|
|
|
## was created inside it along the way
|
2020-08-17 08:17:27 +02:00
|
|
|
self.scopeDepth = self.scopeDepth - 1
|
2020-10-25 15:08:26 +01:00
|
|
|
var start: Natural = self.localCount
|
2020-08-17 08:17:27 +02:00
|
|
|
while self.localCount > 0 and self.locals[self.localCount - 1].depth > self.scopeDepth:
|
2020-10-17 15:26:57 +02:00
|
|
|
self.emitByte(OpCode.Pop)
|
2020-08-17 08:17:27 +02:00
|
|
|
self.localCount = self.localCount - 1
|
2020-10-25 15:08:26 +01:00
|
|
|
if start >= self.localCount:
|
|
|
|
self.locals.delete(self.localCount, start)
|
2020-08-17 08:17:27 +02:00
|
|
|
|
2020-12-26 17:01:03 +01:00
|
|
|
|
2020-12-21 22:43:40 +01:00
|
|
|
proc emitJump(self: Compiler, opcode: OpCode): int =
|
2020-10-18 18:09:12 +02:00
|
|
|
## Emits a jump instruction with a placeholder offset
|
|
|
|
## that is later patched, check patchJump for more info
|
|
|
|
## about how jumps work
|
2020-08-19 12:40:01 +02:00
|
|
|
self.emitByte(opcode)
|
|
|
|
self.emitByte(0xff)
|
|
|
|
self.emitByte(0xff)
|
2021-02-28 18:09:19 +01:00
|
|
|
when DEBUG_TRACE_COMPILER:
|
|
|
|
setForegroundColor(fgYellow)
|
|
|
|
write stdout, &"DEBUG - Compiler: emit jump @ {self.currentChunk.code.len-2}\n"
|
|
|
|
setForegroundColor(fgDefault)
|
2020-10-16 12:38:07 +02:00
|
|
|
return self.currentChunk.code.len - 2
|
2020-08-19 12:40:01 +02:00
|
|
|
|
|
|
|
|
2020-12-21 22:43:40 +01:00
|
|
|
proc patchJump(self: Compiler, offset: int) =
|
2020-10-18 18:09:12 +02:00
|
|
|
## Patches a previously emitted jump instruction.
|
|
|
|
## Since it's impossible to know how much code
|
|
|
|
## needs to be jumped before compiling the code
|
|
|
|
## itself, jumps are first encoded with a placeholder
|
|
|
|
## offset. Then, after the code that has to be jumped
|
|
|
|
## over has been compiled, its size is known and the
|
|
|
|
## previously emitted offset is replaced with the actual
|
|
|
|
## jump size.
|
|
|
|
## Note that, due to how the language is designed,
|
|
|
|
## only up to 2^16 bytecode instructions can
|
|
|
|
## be jumped over, so the size of the if/else conditions
|
|
|
|
## or loops is limited (hopefully 65 thousands and change
|
|
|
|
## instructions are enough for everyone)
|
2021-02-28 18:09:19 +01:00
|
|
|
|
|
|
|
when DEBUG_TRACE_COMPILER:
|
|
|
|
setForegroundColor(fgYellow)
|
|
|
|
write stdout, &"DEBUG - Compiler: patching jump @ {offset}"
|
2020-12-26 17:01:03 +01:00
|
|
|
let jump = self.currentChunk.code.len - offset - 2
|
2020-08-19 14:19:01 +02:00
|
|
|
if jump > (int uint16.high):
|
2021-02-28 18:09:19 +01:00
|
|
|
when DEBUG_TRACE_COMPILER:
|
|
|
|
setForegroundColor(fgDefault)
|
|
|
|
write stdout, "\n"
|
2020-08-19 14:19:01 +02:00
|
|
|
self.compileError("too much code to jump over")
|
|
|
|
else:
|
2021-01-04 23:49:30 +01:00
|
|
|
let casted = toDouble(jump)
|
2020-12-26 17:01:03 +01:00
|
|
|
self.currentChunk.code[offset] = casted[0]
|
|
|
|
self.currentChunk.code[offset + 1] = casted[1]
|
2021-02-28 18:09:19 +01:00
|
|
|
when DEBUG_TRACE_COMPILER:
|
|
|
|
write stdout, &" points to {casted[0]}, {casted[1]} = {jump}\n"
|
|
|
|
setForegroundColor(fgDefault)
|
2020-08-19 12:40:01 +02:00
|
|
|
|
2020-12-21 22:43:40 +01:00
|
|
|
proc ifStatement(self: Compiler) =
|
2020-10-18 18:09:12 +02:00
|
|
|
## Parses if statements in a C-style fashion
|
2020-10-25 12:45:03 +01:00
|
|
|
self.parser.consume(TokenType.LP, "The if condition must be parenthesized")
|
|
|
|
if self.parser.peek.kind != TokenType.EOF:
|
2020-08-19 14:19:01 +02:00
|
|
|
self.expression()
|
2020-10-25 12:45:03 +01:00
|
|
|
if self.parser.peek.kind != TokenType.EOF:
|
|
|
|
self.parser.consume(TokenType.RP, "The if condition must be parenthesized")
|
|
|
|
if self.parser.peek.kind != TokenType.EOF:
|
2020-10-17 15:26:57 +02:00
|
|
|
var jump: int = self.emitJump(OpCode.JumpIfFalse)
|
|
|
|
self.emitByte(OpCode.Pop)
|
2020-08-19 14:19:01 +02:00
|
|
|
self.statement()
|
2020-10-17 15:26:57 +02:00
|
|
|
var elseJump = self.emitJump(OpCode.Jump)
|
2020-08-19 14:19:01 +02:00
|
|
|
self.patchJump(jump)
|
2020-10-17 15:26:57 +02:00
|
|
|
self.emitByte(OpCode.Pop)
|
2020-10-25 12:45:03 +01:00
|
|
|
if self.parser.match(TokenType.ELSE):
|
2020-08-19 14:19:01 +02:00
|
|
|
self.statement()
|
|
|
|
self.patchJump(elseJump)
|
|
|
|
else:
|
2020-10-25 12:45:03 +01:00
|
|
|
self.parser.parseError(self.parser.previous(), "Invalid syntax")
|
2020-08-19 14:19:01 +02:00
|
|
|
else:
|
2020-10-25 12:45:03 +01:00
|
|
|
self.parser.parseError(self.parser.previous(), "The if condition must be parenthesized")
|
2020-08-19 14:19:01 +02:00
|
|
|
|
|
|
|
|
2020-12-21 22:43:40 +01:00
|
|
|
proc emitLoop(self: Compiler, start: int) =
|
2020-10-18 18:09:12 +02:00
|
|
|
## Creates a loop and emits related instructions.
|
2021-02-28 18:09:19 +01:00
|
|
|
when DEBUG_TRACE_COMPILER:
|
|
|
|
setForegroundColor(fgYellow)
|
|
|
|
write stdout, &"DEBUG - Compiler: emitting loop at start {start} "
|
2020-10-17 15:26:57 +02:00
|
|
|
self.emitByte(OpCode.Loop)
|
2020-10-16 12:38:07 +02:00
|
|
|
var offset = self.currentChunk.code.len - start + 2
|
2020-08-19 14:19:01 +02:00
|
|
|
if offset > (int uint16.high):
|
2021-02-28 18:09:19 +01:00
|
|
|
when DEBUG_TRACE_COMPILER:
|
|
|
|
setForegroundColor(fgDefault)
|
|
|
|
write stdout, "\n"
|
2020-08-19 14:19:01 +02:00
|
|
|
self.compileError("loop body is too large")
|
|
|
|
else:
|
2021-01-04 23:49:30 +01:00
|
|
|
let offsetBytes = toDouble(offset)
|
|
|
|
self.emitByte(offsetBytes[0])
|
|
|
|
self.emitByte(offsetBytes[1])
|
2021-02-28 18:09:19 +01:00
|
|
|
when DEBUG_TRACE_COMPILER:
|
|
|
|
write stdout, &"pointing to {offsetBytes[0]}, {offsetBytes[1]} = {offset}\n"
|
2020-08-19 14:19:01 +02:00
|
|
|
|
|
|
|
|
2020-12-21 22:43:40 +01:00
|
|
|
proc endLooping(self: Compiler) =
|
2020-10-18 18:09:12 +02:00
|
|
|
## This method is used to make
|
|
|
|
## the break statement work and patch
|
|
|
|
## it with a jump instruction
|
2020-08-20 22:51:56 +02:00
|
|
|
if self.loop.loopEnd != -1:
|
|
|
|
self.patchJump(self.loop.loopEnd)
|
2020-10-17 15:26:57 +02:00
|
|
|
self.emitByte(OpCode.Pop)
|
2021-02-28 18:46:21 +01:00
|
|
|
|
|
|
|
for brk in self.loop.breaks:
|
|
|
|
when DEBUG_TRACE_COMPILER:
|
|
|
|
setForegroundColor(fgYellow)
|
|
|
|
write stdout, &"DEBUG - Compiler: patching break at {brk}\n"
|
|
|
|
setForegroundColor(fgDefault)
|
|
|
|
self.currentChunk.code[brk] = OpCode.Jump.uint8
|
|
|
|
self.patchJump(brk + 1)
|
|
|
|
|
2020-08-20 22:51:56 +02:00
|
|
|
self.loop = self.loop.outer
|
|
|
|
|
|
|
|
|
2020-12-21 22:43:40 +01:00
|
|
|
proc whileStatement(self: Compiler) =
|
2020-10-18 18:09:12 +02:00
|
|
|
## Parses while loops in a C-style fashion
|
2021-01-04 23:22:27 +01:00
|
|
|
let loop = Loop(depth: self.scopeDepth, outer: self.loop, start: self.currentChunk.code.len, alive: true, loopEnd: -1)
|
2020-08-20 22:51:56 +02:00
|
|
|
self.loop = loop
|
2020-10-25 12:45:03 +01:00
|
|
|
self.parser.consume(TokenType.LP, "The loop condition must be parenthesized")
|
|
|
|
if self.parser.peek.kind != TokenType.EOF:
|
2020-08-19 14:19:01 +02:00
|
|
|
self.expression()
|
2020-10-25 12:45:03 +01:00
|
|
|
if self.parser.peek.kind != TokenType.EOF:
|
|
|
|
self.parser.consume(TokenType.RP, "The loop condition must be parenthesized")
|
|
|
|
if self.parser.peek.kind != TokenType.EOF:
|
2020-10-17 15:26:57 +02:00
|
|
|
self.loop.loopEnd = self.emitJump(OpCode.JumpIfFalse)
|
|
|
|
self.emitByte(OpCode.Pop)
|
2020-10-16 12:38:07 +02:00
|
|
|
self.loop.body = self.currentChunk.code.len
|
2020-08-20 14:30:00 +02:00
|
|
|
self.statement()
|
2020-08-20 22:51:56 +02:00
|
|
|
self.emitLoop(self.loop.start)
|
2021-01-04 23:22:27 +01:00
|
|
|
#self.patchJump(self.loop.loopEnd) # Prod2: imo will get patched over by endLooping anyways
|
|
|
|
#self.emitByte(OpCode.Pop)
|
2020-08-19 14:19:01 +02:00
|
|
|
else:
|
2020-10-25 12:45:03 +01:00
|
|
|
self.parser.parseError(self.parser.previous(), "Invalid syntax")
|
2020-08-19 14:19:01 +02:00
|
|
|
else:
|
2020-10-25 12:45:03 +01:00
|
|
|
self.parser.parseError(self.parser.previous(), "The loop condition must be parenthesized")
|
2020-08-20 22:51:56 +02:00
|
|
|
self.endLooping()
|
2020-08-19 12:40:01 +02:00
|
|
|
|
|
|
|
|
2020-12-21 22:43:40 +01:00
|
|
|
proc forStatement(self: Compiler) =
|
2020-10-18 18:09:12 +02:00
|
|
|
## Parses for loops in a C-style fashion
|
2020-08-19 16:19:10 +02:00
|
|
|
self.beginScope()
|
2020-10-25 12:45:03 +01:00
|
|
|
self.parser.consume(TokenType.LP, "The loop condition must be parenthesized")
|
|
|
|
if self.parser.peek.kind != TokenType.EOF:
|
|
|
|
if self.parser.match(TokenType.SEMICOLON):
|
2020-08-19 16:19:10 +02:00
|
|
|
discard
|
2020-10-25 12:45:03 +01:00
|
|
|
elif self.parser.match(TokenType.VAR):
|
2020-08-19 16:19:10 +02:00
|
|
|
self.varDeclaration()
|
|
|
|
else:
|
|
|
|
self.expressionStatement()
|
2020-10-16 12:38:07 +02:00
|
|
|
var loop = Loop(depth: self.scopeDepth, outer: self.loop, start: self.currentChunk.code.len, alive: true, loopEnd: -1)
|
2020-08-20 22:51:56 +02:00
|
|
|
self.loop = loop
|
2020-10-25 12:45:03 +01:00
|
|
|
if not self.parser.match(TokenType.SEMICOLON):
|
2020-08-19 16:19:10 +02:00
|
|
|
self.expression()
|
2020-10-25 12:45:03 +01:00
|
|
|
if self.parser.previous.kind != TokenType.EOF:
|
|
|
|
self.parser.consume(TokenType.SEMICOLON, "Expecting ';'")
|
2020-10-17 15:26:57 +02:00
|
|
|
self.loop.loopEnd = self.emitJump(OpCode.JumpIfFalse)
|
|
|
|
self.emitByte(OpCode.Pop)
|
2020-08-19 16:19:10 +02:00
|
|
|
else:
|
|
|
|
self.parser.current -= 1
|
2020-10-25 12:45:03 +01:00
|
|
|
self.parser.parseError(self.parser.previous(), "Invalid syntax")
|
2020-08-19 16:19:10 +02:00
|
|
|
if not self.parser.match(RP):
|
2020-10-17 15:26:57 +02:00
|
|
|
var bodyJump = self.emitJump(OpCode.Jump)
|
2020-10-16 12:38:07 +02:00
|
|
|
var incrementStart = self.currentChunk.code.len
|
2020-10-25 12:45:03 +01:00
|
|
|
if self.parser.peek.kind != TokenType.EOF:
|
2020-08-19 16:19:10 +02:00
|
|
|
self.expression()
|
2020-10-17 15:26:57 +02:00
|
|
|
self.emitByte(OpCode.Pop)
|
2020-10-25 12:45:03 +01:00
|
|
|
self.parser.consume(TokenType.RP, "The loop condition must be parenthesized")
|
2020-08-20 22:51:56 +02:00
|
|
|
self.emitLoop(self.loop.start)
|
|
|
|
self.loop.start = incrementStart
|
2020-08-19 16:19:10 +02:00
|
|
|
self.patchJump(bodyJump)
|
2020-10-25 12:45:03 +01:00
|
|
|
if self.parser.peek.kind != TokenType.EOF:
|
2020-10-16 12:38:07 +02:00
|
|
|
self.loop.body = self.currentChunk.code.len
|
2020-08-19 16:19:10 +02:00
|
|
|
self.statement()
|
2020-08-20 22:51:56 +02:00
|
|
|
self.emitLoop(self.loop.start)
|
2020-08-19 16:19:10 +02:00
|
|
|
else:
|
|
|
|
self.parser.current -= 1
|
2020-10-25 12:45:03 +01:00
|
|
|
self.parser.parseError(self.parser.previous(), "Invalid syntax")
|
2020-08-20 22:51:56 +02:00
|
|
|
if self.loop.loopEnd != -1:
|
|
|
|
self.patchJump(self.loop.loopEnd)
|
2020-10-17 15:26:57 +02:00
|
|
|
self.emitByte(OpCode.Pop)
|
2020-08-19 16:19:10 +02:00
|
|
|
else:
|
2020-10-25 12:45:03 +01:00
|
|
|
self.parser.parseError(self.parser.previous(), "The loop condition must be parenthesized")
|
2020-08-20 22:51:56 +02:00
|
|
|
self.endLooping()
|
|
|
|
self.endScope()
|
2020-08-19 16:19:10 +02:00
|
|
|
|
|
|
|
|
2020-12-21 22:43:40 +01:00
|
|
|
proc parseBreak(self: Compiler) =
|
2020-10-18 18:09:12 +02:00
|
|
|
## Parses break statements. A break
|
|
|
|
## statement causes the current loop
|
|
|
|
## to exit and jump to its end
|
2020-08-20 22:51:56 +02:00
|
|
|
if not self.loop.alive:
|
2020-08-19 21:45:51 +02:00
|
|
|
self.parser.parseError(self.parser.previous, "'break' outside loop")
|
|
|
|
else:
|
2021-01-09 18:03:47 +01:00
|
|
|
self.parser.consume(TokenType.SEMICOLON, "missing semicolon after break statement")
|
2020-08-20 11:23:49 +02:00
|
|
|
var i = self.localCount - 1
|
2020-08-20 22:51:56 +02:00
|
|
|
while i >= 0 and self.locals[i].depth > self.loop.depth:
|
2020-10-17 15:26:57 +02:00
|
|
|
self.emitByte(OpCode.Pop)
|
2020-08-20 11:23:49 +02:00
|
|
|
i -= 1
|
2020-10-17 15:26:57 +02:00
|
|
|
discard self.emitJump(OpCode.Break)
|
2021-02-28 18:46:21 +01:00
|
|
|
self.loop.breaks.add(self.currentChunk.code.len() - 3)
|
2020-08-19 16:19:10 +02:00
|
|
|
|
2020-10-25 12:45:03 +01:00
|
|
|
|
2020-12-21 22:43:40 +01:00
|
|
|
proc parseAnd(self: Compiler, canAssign: bool) =
|
2020-10-18 18:09:12 +02:00
|
|
|
## Parses expressions such as a and b
|
2020-10-17 15:26:57 +02:00
|
|
|
var jump = self.emitJump(OpCode.JumpIfFalse)
|
|
|
|
self.emitByte(OpCode.Pop)
|
2020-10-17 16:23:59 +02:00
|
|
|
self.parsePrecedence(Precedence.And)
|
2020-08-19 13:24:37 +02:00
|
|
|
self.patchJump(jump)
|
|
|
|
|
|
|
|
|
2020-12-21 22:43:40 +01:00
|
|
|
proc parseOr(self: Compiler, canAssign: bool) =
|
2020-10-18 18:09:12 +02:00
|
|
|
## Parses expressions such as a or b
|
2020-10-17 15:26:57 +02:00
|
|
|
var elseJump = self.emitJump(OpCode.JumpIfFalse)
|
|
|
|
var endJump = self.emitJump(OpCode.Jump)
|
2020-08-19 13:24:37 +02:00
|
|
|
self.patchJump(elseJump)
|
2020-10-17 15:26:57 +02:00
|
|
|
self.emitByte(OpCode.Pop)
|
2020-10-17 16:23:59 +02:00
|
|
|
self.parsePrecedence(Precedence.Or)
|
2020-08-19 13:24:37 +02:00
|
|
|
self.patchJump(endJump)
|
|
|
|
|
|
|
|
|
2020-12-21 22:43:40 +01:00
|
|
|
proc continueStatement(self: Compiler) =
|
2020-10-18 18:09:12 +02:00
|
|
|
## Parses continue statements inside loops.
|
2020-10-19 12:32:08 +02:00
|
|
|
## The continue statement causes the loop to skip
|
2020-10-18 18:09:12 +02:00
|
|
|
## to the next iteration
|
2020-08-20 22:51:56 +02:00
|
|
|
if not self.loop.alive:
|
2020-08-20 11:23:49 +02:00
|
|
|
self.parser.parseError(self.parser.previous, "'continue' outside loop")
|
|
|
|
else:
|
2021-01-09 18:03:47 +01:00
|
|
|
self.parser.consume(TokenType.SEMICOLON, "missing semicolon after continue statement")
|
2020-08-20 11:23:49 +02:00
|
|
|
var i = self.localCount - 1
|
2020-08-20 22:51:56 +02:00
|
|
|
while i >= 0 and self.locals[i].depth > self.loop.depth:
|
2020-10-17 15:26:57 +02:00
|
|
|
self.emitByte(OpCode.Pop)
|
2020-08-20 11:23:49 +02:00
|
|
|
i -= 1
|
2020-08-20 22:51:56 +02:00
|
|
|
self.emitLoop(self.loop.start)
|
2020-08-20 11:23:49 +02:00
|
|
|
|
|
|
|
|
2020-12-21 22:43:40 +01:00
|
|
|
proc endCompiler(self: Compiler): ptr Function =
|
|
|
|
## Ends the current compiler instance and returns its
|
|
|
|
## compiled bytecode wrapped around a function object,
|
|
|
|
## also emitting a return instruction with nil as operand.
|
|
|
|
## Because of this, all functions implicitly return nil
|
|
|
|
## if no return statement is supplied
|
|
|
|
self.emitByte(OpCode.Nil)
|
|
|
|
self.emitByte(OpCode.Return)
|
2021-01-16 11:47:01 +01:00
|
|
|
return self.function
|
2020-12-21 22:43:40 +01:00
|
|
|
|
|
|
|
|
|
|
|
proc parseFunction(self: Compiler, funType: FunctionType) =
|
2020-10-18 18:09:12 +02:00
|
|
|
## Parses function declarations. Functions can have
|
|
|
|
## keyword arguments (WIP), but once a parameter is declared
|
2020-10-19 12:32:08 +02:00
|
|
|
## as a keyword one, all subsequent parameters must be
|
2020-10-18 18:09:12 +02:00
|
|
|
## keyword ones as well
|
2021-01-17 16:54:55 +01:00
|
|
|
var self = initCompiler(funType, self, self.parser, self.file.toStr())
|
2020-08-30 10:36:13 +02:00
|
|
|
self.beginScope()
|
2021-01-16 15:11:09 +01:00
|
|
|
if self.parser.check(LB):
|
|
|
|
self.parser.consume(LB, "Expecting '{' before function body")
|
|
|
|
self.parseBlock()
|
|
|
|
var fun = self.endCompiler()
|
|
|
|
self = self.enclosing
|
|
|
|
self.emitByte(OpCode.Constant)
|
|
|
|
self.emitBytes(self.makeConstant(fun))
|
|
|
|
return
|
2021-01-14 12:55:51 +01:00
|
|
|
self.parser.consume(LP, "Expecting '('")
|
2020-09-01 18:09:36 +02:00
|
|
|
if self.parser.hadError:
|
|
|
|
return
|
2020-08-30 10:36:13 +02:00
|
|
|
var paramNames: seq[string] = @[]
|
|
|
|
var defaultFollows: bool = false
|
|
|
|
if not self.parser.check(RP):
|
|
|
|
while true:
|
|
|
|
self.function.arity += 1
|
|
|
|
if self.function.arity + self.function.optionals > 255:
|
2021-01-09 18:03:47 +01:00
|
|
|
self.compileError("functions cannot have more than 255 arguments")
|
2020-08-30 10:36:13 +02:00
|
|
|
break
|
2020-09-01 18:09:36 +02:00
|
|
|
var paramIdx = self.parseVariable("expecting parameter name")
|
|
|
|
if self.parser.hadError:
|
|
|
|
return
|
2020-08-30 10:36:13 +02:00
|
|
|
if self.parser.previous.lexeme in paramNames:
|
|
|
|
self.compileError("duplicate parameter name in function declaration")
|
|
|
|
return
|
|
|
|
paramNames.add(self.parser.previous.lexeme)
|
2020-09-01 18:09:36 +02:00
|
|
|
self.defineVariable(paramIdx)
|
2021-01-12 09:55:41 +01:00
|
|
|
if self.parser.match(TokenType.EQ):
|
2020-09-01 18:09:36 +02:00
|
|
|
if self.parser.peek.kind == EOF:
|
|
|
|
self.compileError("Unexpected EOF")
|
|
|
|
return
|
2020-08-30 10:36:13 +02:00
|
|
|
self.function.arity -= 1
|
|
|
|
self.function.optionals += 1
|
|
|
|
self.expression()
|
2021-02-20 14:10:47 +01:00
|
|
|
self.function.defaults.append(self.parser.previous.lexeme.asStr())
|
2020-08-30 10:36:13 +02:00
|
|
|
defaultFollows = true
|
|
|
|
elif defaultFollows:
|
|
|
|
self.compileError("non-default argument follows default argument")
|
|
|
|
return
|
|
|
|
if not self.parser.match(COMMA):
|
|
|
|
break
|
|
|
|
self.parser.consume(RP, "Expecting ')' after parameters")
|
|
|
|
self.parser.consume(LB, "Expecting '{' before function body")
|
|
|
|
self.parseBlock()
|
|
|
|
var fun = self.endCompiler()
|
2020-09-03 19:24:18 +02:00
|
|
|
self = self.enclosing
|
2021-01-16 11:47:01 +01:00
|
|
|
self.emitByte(OpCode.Constant)
|
|
|
|
self.emitBytes(self.makeConstant(fun))
|
2020-08-30 10:36:13 +02:00
|
|
|
|
2021-01-14 12:55:51 +01:00
|
|
|
|
2021-02-19 16:54:19 +01:00
|
|
|
proc parseLambda(self: Compiler, canAssign: bool) =
|
2021-01-14 12:55:51 +01:00
|
|
|
## Parses lambda expressions of the form => (params) {code}
|
|
|
|
self.parseFunction(FunctionType.LAMBDA)
|
|
|
|
|
|
|
|
|
2021-02-19 16:54:19 +01:00
|
|
|
proc funDeclaration(self: Compiler) =
|
2020-10-18 18:09:12 +02:00
|
|
|
## Parses function declarations and declares
|
|
|
|
## them in the current scope
|
2021-02-19 16:54:19 +01:00
|
|
|
var funName = self.parseVariable("expecting function name")
|
|
|
|
self.markInitialized()
|
|
|
|
self.parseFunction(FunctionType.FUNC)
|
|
|
|
self.defineVariable(funName)
|
2020-08-30 10:36:13 +02:00
|
|
|
|
|
|
|
|
2021-01-16 18:14:22 +01:00
|
|
|
proc argumentList(self: Compiler): tuple[pos: uint8, kw: uint8] =
|
2020-10-18 18:09:12 +02:00
|
|
|
## Parses arguments passed to function calls
|
2021-01-16 18:14:22 +01:00
|
|
|
result.pos = 0
|
|
|
|
result.kw = 0
|
2020-09-01 18:09:36 +02:00
|
|
|
if not self.parser.check(RP):
|
|
|
|
while true:
|
2021-01-16 18:14:22 +01:00
|
|
|
if self.parser.check(ID) and self.parser.checkNext(TokenType.EQ):
|
|
|
|
discard self.parser.advance()
|
|
|
|
discard self.parser.advance()
|
|
|
|
if self.parser.check(EOF):
|
|
|
|
self.parser.parseError(self.parser.previous, "Unexpected EOF")
|
|
|
|
return
|
|
|
|
else:
|
|
|
|
self.expression()
|
|
|
|
if result.pos + result.kw == 255:
|
|
|
|
self.compileError("cannot pass more than 255 arguments")
|
|
|
|
return
|
|
|
|
if not self.parser.match(COMMA):
|
|
|
|
break
|
|
|
|
result.kw += 1
|
|
|
|
else:
|
|
|
|
if self.parser.check(EOF):
|
|
|
|
self.parser.parseError(self.parser.previous, "Unexpected EOF")
|
|
|
|
return
|
|
|
|
if result.kw > 0:
|
|
|
|
self.parser.parseError(self.parser.peek, "positional argument follows default argument")
|
|
|
|
return
|
|
|
|
self.expression()
|
|
|
|
if result.pos == 255:
|
|
|
|
self.compileError("cannot pass more than 255 arguments")
|
|
|
|
return
|
|
|
|
result.pos += 1
|
|
|
|
if not self.parser.match(COMMA):
|
|
|
|
break
|
2020-09-01 18:09:36 +02:00
|
|
|
self.parser.consume(RP, "Expecting ')' after arguments")
|
|
|
|
|
|
|
|
|
2020-12-21 22:43:40 +01:00
|
|
|
proc call(self: Compiler, canAssign: bool) =
|
2020-10-18 18:09:12 +02:00
|
|
|
## Emits appropriate bytecode to call
|
2021-01-09 18:03:47 +01:00
|
|
|
## a function with its arguments
|
2021-01-16 18:14:22 +01:00
|
|
|
# TODO -> Keyword arguments
|
|
|
|
let args = self.argumentList()
|
|
|
|
self.emitBytes(OpCode.Call, args.pos)
|
2020-08-30 10:36:13 +02:00
|
|
|
|
2020-08-29 09:07:32 +02:00
|
|
|
|
2020-12-21 22:43:40 +01:00
|
|
|
proc returnStatement(self: Compiler) =
|
2020-10-19 19:02:40 +02:00
|
|
|
## Parses return statements and emits
|
|
|
|
## appropriate bytecode instructions
|
|
|
|
## for them
|
|
|
|
if self.context == SCRIPT:
|
|
|
|
self.compileError("'return' outside function")
|
2021-02-19 17:14:13 +01:00
|
|
|
self.afterReturn = true
|
2020-10-19 19:02:40 +02:00
|
|
|
if self.parser.match(TokenType.SEMICOLON): # Empty return
|
|
|
|
self.emitByte(OpCode.Nil)
|
|
|
|
self.emitByte(OpCode.Return)
|
|
|
|
else:
|
|
|
|
self.expression()
|
|
|
|
self.parser.consume(TokenType.SEMICOLON, "missing semicolon after return statement")
|
|
|
|
self.emitByte(OpCode.Return)
|
|
|
|
|
2020-12-21 22:43:40 +01:00
|
|
|
|
|
|
|
proc statement(self: Compiler) =
|
2020-10-18 18:09:12 +02:00
|
|
|
## Parses statements
|
2020-08-29 09:07:32 +02:00
|
|
|
if self.parser.match(TokenType.FOR):
|
2020-08-19 16:19:10 +02:00
|
|
|
self.forStatement()
|
2021-02-26 12:27:10 +01:00
|
|
|
elif self.parser.match(TokenType.IF):
|
2020-08-19 12:40:01 +02:00
|
|
|
self.ifStatement()
|
2020-08-19 18:15:48 +02:00
|
|
|
elif self.parser.match(TokenType.WHILE):
|
2020-08-19 14:19:01 +02:00
|
|
|
self.whileStatement()
|
2020-10-19 19:02:40 +02:00
|
|
|
elif self.parser.match(TokenType.RETURN):
|
|
|
|
self.returnStatement()
|
2020-10-25 12:45:03 +01:00
|
|
|
elif self.parser.match(TokenType.CONTINUE):
|
2020-08-20 11:23:49 +02:00
|
|
|
self.continueStatement()
|
2020-10-25 12:45:03 +01:00
|
|
|
elif self.parser.match(TokenType.BREAK):
|
2020-08-19 18:15:48 +02:00
|
|
|
self.parseBreak()
|
2021-01-05 14:55:37 +01:00
|
|
|
elif self.parser.match(TokenType.DEL):
|
|
|
|
self.delStatement()
|
|
|
|
elif self.parser.match(TokenType.LB):
|
2020-08-17 08:17:27 +02:00
|
|
|
self.beginScope()
|
|
|
|
self.parseBlock()
|
|
|
|
self.endScope()
|
2020-08-15 08:52:58 +02:00
|
|
|
else:
|
2020-08-17 08:17:27 +02:00
|
|
|
self.expressionStatement()
|
|
|
|
|
|
|
|
|
2020-12-21 22:43:40 +01:00
|
|
|
proc declaration(self: Compiler) =
|
2020-10-18 18:09:12 +02:00
|
|
|
## Parses declarations
|
2021-02-20 14:10:47 +01:00
|
|
|
# TODO -> Fix this
|
|
|
|
# if self.afterReturn:
|
|
|
|
# self.compileError("dead code after return statement")
|
|
|
|
# self.parser.tokens.append(Token(kind: TokenType.EOF, lexeme: ""))
|
2020-08-29 09:07:32 +02:00
|
|
|
if self.parser.match(FUN):
|
|
|
|
self.funDeclaration()
|
|
|
|
elif self.parser.match(VAR):
|
|
|
|
self.varDeclaration()
|
|
|
|
else:
|
|
|
|
self.statement()
|
2020-08-15 08:52:58 +02:00
|
|
|
|
2021-01-09 18:03:47 +01:00
|
|
|
|
2020-12-28 10:09:52 +01:00
|
|
|
proc freeCompiler*(self: Compiler) =
|
|
|
|
## Frees all the allocated objects
|
|
|
|
## from the compiler
|
2021-01-09 18:03:47 +01:00
|
|
|
when DEBUG_TRACE_ALLOCATION:
|
|
|
|
var objCount = len(self.objects)
|
|
|
|
var objFreed = 0
|
2020-12-28 10:09:52 +01:00
|
|
|
for obj in reversed(self.objects):
|
2021-01-17 16:54:55 +01:00
|
|
|
freeObject(obj)
|
2020-12-28 10:09:52 +01:00
|
|
|
discard self.objects.pop()
|
2021-01-09 18:03:47 +01:00
|
|
|
when DEBUG_TRACE_ALLOCATION:
|
|
|
|
objFreed += 1
|
2020-12-28 10:09:52 +01:00
|
|
|
when DEBUG_TRACE_ALLOCATION:
|
|
|
|
echo &"DEBUG - Compiler: Freed {objFreed} objects out of {objCount} compile-time objects"
|
|
|
|
|
2020-08-15 08:52:58 +02:00
|
|
|
|
2021-01-12 09:55:41 +01:00
|
|
|
# The array of all parse rules.
|
|
|
|
# This array instructs our Pratt parser on how
|
|
|
|
# to parse expressions and statements.
|
|
|
|
# makeRule defines rules for unary and binary
|
|
|
|
# operators as well as the token's precedence
|
2020-10-19 12:32:08 +02:00
|
|
|
var rules: array[TokenType, ParseRule] = [
|
2020-10-17 16:23:59 +02:00
|
|
|
makeRule(nil, binary, Precedence.Term), # PLUS
|
|
|
|
makeRule(unary, binary, Precedence.Term), # MINUS
|
|
|
|
makeRule(nil, binary, Precedence.Factor), # SLASH
|
|
|
|
makeRule(nil, binary, Precedence.Factor), # STAR
|
|
|
|
makeRule(unary, nil, Precedence.None), # NEG
|
2021-01-12 09:55:41 +01:00
|
|
|
makeRule(nil, binary, Precedence.Eq), # NE
|
2020-10-17 16:23:59 +02:00
|
|
|
makeRule(nil, nil, Precedence.None), # EQ
|
2021-01-12 09:55:41 +01:00
|
|
|
makeRule(nil, binary, Precedence.Comp), # DEQ
|
|
|
|
makeRule(nil, binary, Precedence.Comp), # LT
|
|
|
|
makeRule(nil, binary, Precedence.Comp), # GE
|
|
|
|
makeRule(nil, binary, Precedence.Comp), # LE
|
2020-10-17 16:23:59 +02:00
|
|
|
makeRule(nil, binary, Precedence.Factor), # MOD
|
2021-01-12 09:55:41 +01:00
|
|
|
makeRule(nil, binary, Precedence.Exp), # POW
|
|
|
|
makeRule(nil, binary, Precedence.Comp), # GT
|
2020-10-17 16:23:59 +02:00
|
|
|
makeRule(grouping, call, Precedence.Call), # LP
|
|
|
|
makeRule(nil, nil, Precedence.None), # RP
|
|
|
|
makeRule(nil, bracket, Precedence.Call), # LS
|
|
|
|
makeRule(nil, nil, Precedence.None), # LB
|
|
|
|
makeRule(nil, nil, Precedence.None), # RB
|
|
|
|
makeRule(nil, nil, Precedence.None), # COMMA
|
|
|
|
makeRule(nil, nil, Precedence.None), # DOT
|
|
|
|
makeRule(variable, nil, Precedence.None), # ID
|
|
|
|
makeRule(nil, nil, Precedence.None), # RS
|
|
|
|
makeRule(number, nil, Precedence.None), # NUMBER
|
|
|
|
makeRule(strVal, nil, Precedence.None), # STR
|
2021-01-12 09:55:41 +01:00
|
|
|
makeRule(nil, nil, Precedence.None), # SEMICOLON
|
2020-10-17 16:23:59 +02:00
|
|
|
makeRule(nil, parseAnd, Precedence.And), # AND
|
|
|
|
makeRule(nil, nil, Precedence.None), # CLASS
|
|
|
|
makeRule(nil, nil, Precedence.None), # ELSE
|
|
|
|
makeRule(nil, nil, Precedence.None), # FOR
|
|
|
|
makeRule(nil, nil, Precedence.None), # FUN
|
|
|
|
makeRule(literal, nil, Precedence.None), # FALSE
|
|
|
|
makeRule(nil, nil, Precedence.None), # IF
|
|
|
|
makeRule(literal, nil, Precedence.None), # NIL
|
|
|
|
makeRule(nil, nil, Precedence.None), # RETURN
|
|
|
|
makeRule(nil, nil, Precedence.None), # SUPER
|
|
|
|
makeRule(nil, nil, Precedence.None), # THIS
|
|
|
|
makeRule(nil, parseOr, Precedence.Or), # OR
|
|
|
|
makeRule(literal, nil, Precedence.None), # TRUE
|
|
|
|
makeRule(nil, nil, Precedence.None), # VAR
|
|
|
|
makeRule(nil, nil, Precedence.None), # WHILE
|
2021-01-05 14:55:37 +01:00
|
|
|
makeRule(nil, nil, Precedence.None), # DEL
|
2020-10-17 16:23:59 +02:00
|
|
|
makeRule(nil, nil, Precedence.None), # BREAK
|
|
|
|
makeRule(nil, nil, Precedence.None), # EOF
|
2021-01-09 18:03:47 +01:00
|
|
|
makeRule(nil, nil, Precedence.None), # COLON
|
2020-10-17 16:23:59 +02:00
|
|
|
makeRule(nil, nil, Precedence.None), # CONTINUE
|
|
|
|
makeRule(nil, binary, Precedence.Term), # CARET
|
|
|
|
makeRule(nil, binary, Precedence.Term), # SHL
|
|
|
|
makeRule(nil, binary, Precedence.Term), # SHR
|
|
|
|
makeRule(literal, nil, Precedence.Term), # INF
|
|
|
|
makeRule(literal, nil, Precedence.Term), # NAN
|
|
|
|
makeRule(nil, binary, Precedence.Term), # BAND
|
|
|
|
makeRule(nil, binary, Precedence.Term), # BOR
|
2021-01-05 20:32:50 +01:00
|
|
|
makeRule(unary, nil, Precedence.None), # TILDE
|
2021-01-12 12:10:15 +01:00
|
|
|
makeRule(nil, binary, Precedence.Is), # IS
|
2021-01-14 12:55:51 +01:00
|
|
|
makeRule(nil, binary, Precedence.As), # AS
|
2021-02-26 12:27:10 +01:00
|
|
|
makeRule(parseLambda, nil, Precedence.None), # LAMBDA
|
|
|
|
makeRule(nil, binary, Precedence.Is), # ISNOT
|
2021-01-12 12:10:15 +01:00
|
|
|
|
2020-08-09 10:09:03 +02:00
|
|
|
]
|
|
|
|
|
|
|
|
|
|
|
|
proc getRule(kind: TokenType): ParseRule =
|
2020-10-18 18:09:12 +02:00
|
|
|
## Returns an appropriate precedence rule
|
|
|
|
## object for a given token type
|
2020-08-10 10:48:21 +02:00
|
|
|
result = rules[kind]
|
2020-08-09 10:09:03 +02:00
|
|
|
|
|
|
|
|
2020-12-21 22:43:40 +01:00
|
|
|
proc compile*(self: Compiler, source: string): ptr Function =
|
2020-10-18 18:09:12 +02:00
|
|
|
## Compiles a source string into a function
|
|
|
|
## object. This wires up all the code
|
|
|
|
## inside the parser and the lexer
|
2021-01-17 16:54:55 +01:00
|
|
|
var scanner = initLexer(source, self.file.toStr())
|
2020-08-08 16:19:44 +02:00
|
|
|
var tokens = scanner.lex()
|
2020-09-08 00:06:21 +02:00
|
|
|
if not scanner.errored:
|
2021-01-17 16:54:55 +01:00
|
|
|
self.parser = initParser(tokens, self.file.toStr())
|
2020-08-15 08:52:58 +02:00
|
|
|
while not self.parser.match(EOF):
|
|
|
|
self.declaration()
|
2020-08-27 18:15:45 +02:00
|
|
|
var function = self.endCompiler()
|
|
|
|
if not self.parser.hadError:
|
2020-10-25 17:47:53 +01:00
|
|
|
when DEBUG_TRACE_COMPILER:
|
2021-01-16 18:14:22 +01:00
|
|
|
echo "DEBUG - Compiler: Result -> Ok"
|
2020-08-27 18:15:45 +02:00
|
|
|
return function
|
|
|
|
else:
|
2020-10-25 17:47:53 +01:00
|
|
|
when DEBUG_TRACE_COMPILER:
|
2021-01-16 18:14:22 +01:00
|
|
|
echo "DEBUG - Compiler: Result -> ParseError"
|
2020-08-27 18:15:45 +02:00
|
|
|
return nil
|
|
|
|
else:
|
2021-01-16 18:14:22 +01:00
|
|
|
when DEBUG_TRACE_COMPILER:
|
|
|
|
echo "DEBUG - Compiler: Result -> LexingError"
|
2020-08-27 18:15:45 +02:00
|
|
|
return nil
|
|
|
|
|
2020-08-28 00:05:56 +02:00
|
|
|
|
2020-10-25 12:45:03 +01:00
|
|
|
proc initParser*(tokens: seq[Token], file: string): Parser =
|
2021-01-12 09:55:41 +01:00
|
|
|
## Initializes a new Parser obvject and returns a reference
|
|
|
|
## to it
|
|
|
|
# TODO -> Make the parser independent of the compiler. As
|
|
|
|
# of now, the compiler is what drives the parser and while
|
|
|
|
# that might be easier for us it is not an ideal design.
|
|
|
|
# We'll have to devise a standard interface for other people
|
|
|
|
# to try and hook their parsers into JAPL with ease (pretty
|
|
|
|
# much like our lexer now has the sole requirement of
|
|
|
|
# having a lex() procedure that returns a list of tokens)
|
2021-01-17 16:54:55 +01:00
|
|
|
result = Parser(current: 0, tokens: newArrayList[Token](), hadError: false, panicMode: false, file: file.asStr())
|
|
|
|
result.tokens.extend[:Token](tokens)
|
2020-10-25 12:45:03 +01:00
|
|
|
|
2020-12-28 10:09:52 +01:00
|
|
|
|
2020-12-21 22:43:40 +01:00
|
|
|
proc initCompiler*(context: FunctionType, enclosing: Compiler = nil, parser: Parser = initParser(@[], ""), file: string): Compiler =
|
2021-01-12 09:55:41 +01:00
|
|
|
## Initializes a new Compiler object and returns a reference
|
2020-10-18 18:09:12 +02:00
|
|
|
## to it
|
2020-09-03 19:24:18 +02:00
|
|
|
result = new(Compiler)
|
2020-10-16 12:38:07 +02:00
|
|
|
result.parser = parser
|
2020-10-18 18:09:12 +02:00
|
|
|
result.function = nil # Garbage collection paranoia
|
2020-10-16 12:38:07 +02:00
|
|
|
result.locals = @[]
|
2020-09-03 19:24:18 +02:00
|
|
|
result.scopeDepth = 0
|
|
|
|
result.localCount = 0
|
|
|
|
result.loop = Loop(alive: false, loopEnd: -1)
|
2021-01-17 16:54:55 +01:00
|
|
|
result.objects = newArrayList[ptr Obj]()
|
2020-09-03 19:24:18 +02:00
|
|
|
result.context = context
|
|
|
|
result.enclosing = enclosing
|
2021-01-17 16:54:55 +01:00
|
|
|
result.file = file.asStr()
|
|
|
|
result.objects.append(result.file)
|
|
|
|
result.parser.file = result.file
|
2020-08-27 18:15:45 +02:00
|
|
|
result.locals.add(Local(depth: 0, name: Token(kind: EOF, lexeme: "")))
|
2020-08-28 22:04:02 +02:00
|
|
|
inc(result.localCount)
|
2021-02-19 17:14:13 +01:00
|
|
|
result.afterReturn = false
|
2021-01-14 12:55:51 +01:00
|
|
|
case context:
|
|
|
|
of FunctionType.Func:
|
|
|
|
result.function = result.markObject(newFunction(enclosing.parser.previous().lexeme, newChunk()))
|
|
|
|
of FunctionType.Lambda:
|
|
|
|
result.function = result.markObject(newLambda(newChunk()))
|
|
|
|
else: # Script
|
|
|
|
result.function = result.markObject(newFunction("", newChunk()))
|
|
|
|
result.function.name = nil
|
|
|
|
|
2020-09-03 19:24:18 +02:00
|
|
|
|
2020-10-19 12:32:08 +02:00
|
|
|
# This way the compiler can be executed on its own
|
2020-10-17 16:23:59 +02:00
|
|
|
# without the VM
|
|
|
|
when isMainModule:
|
2020-10-19 12:32:08 +02:00
|
|
|
echo "JAPL Compiler REPL"
|
|
|
|
while true:
|
|
|
|
try:
|
2020-12-21 22:43:40 +01:00
|
|
|
var compiler: Compiler = initCompiler(SCRIPT, file="test")
|
2020-10-19 12:32:08 +02:00
|
|
|
stdout.write("=> ")
|
|
|
|
var compiled = compiler.compile(stdin.readLine())
|
|
|
|
if compiled != nil:
|
|
|
|
disassembleChunk(compiled.chunk, "test")
|
|
|
|
except IOError:
|
|
|
|
echo ""
|
|
|
|
break
|
|
|
|
|