Added proper documentation for the compiler and the parser. Minor fixes to other docstrings as well

2020-10-18 18:09:12 +02:00 · 2020-10-18 18:09:12 +02:00 · 4728928903
parent 93a3cea8b5
commit 4728928903
5 changed files with 285 additions and 72 deletions
--- a/nim/compiler.exe
+++ b/nim/compiler.exe
--- a/nim/compiler.nim
+++ b/nim/compiler.nim
@ -1,3 +1,5 @@
+
+
 import strutils
 import algorithm
 import strformat
@ -49,43 +51,59 @@ type


 proc makeRule(prefix, infix: ParseFn, precedence: Precedence): ParseRule =
+    ## Creates a new rule for parsing
    return ParseRule(prefix: prefix, infix: infix, precedence: precedence)


 proc advance(self: var Parser): Token =
+    ## Steps forward by one in the tokens' list and
+    ## increments the current token index
    result = self.tokens[self.current]
    inc(self.current)


 proc peek(self: Parser): Token =
+    ## Returns the next token without consuming it
    return self.tokens[self.current]


 proc previous(self: Parser): Token =
+    ## Returns the previously consumed token
    return self.tokens[self.current - 1]


 proc check(self: Parser, kind: TokenType): bool =
+    ## Checks if the next token is of the expected type
+    ## without consuming it
    return self.peek().kind == kind


 proc match(self: var Parser, kind: TokenType): bool =
+    ## Calls self.check() and advances consumes a token if 
+    ## the expected token is encountered, in which case true
+    ## is returned. False is returned otherwise
    if not self.check(kind): return false
    discard self.advance()
    return true


 proc parseError(self: var Parser, token: Token, message: string) =
-    if self.panicMode:
+    ## Notifies the user about parsing errors, writing them to 
+    ## the standard error file. This parser is designed to report
+    ## all syntatical errors inside a file in one go, rather than
+    ## stopping at the first error occurrence. This allows a user
+    ## to identify and fix multiple errors without running the parser
+    ## multiple times
+    if self.panicMode:    # This serves to identify wheter an error already occurred, in which case we return
        return
    self.panicMode = true
    self.hadError = true
-    echo &"Traceback (most recent call last):"
-    echo &"  File '{self.file}', line {token.line}, at '{token.lexeme}'"
-    echo &"ParseError: {message}"
+    stderr.write(&"A fatal error occurred while parsing '{self.file}', line {token.line}, at '{token.lexeme}' -> {message}")


 proc consume(self: var Parser, expected: TokenType, message: string) =
+    ## Attempts to consume a token if it is of the expected type
+    ## or raises a parsing error with the given message otherwise
    if self.peek().kind == expected:
        discard self.advance()
        return
@ -93,40 +111,57 @@ proc consume(self: var Parser, expected: TokenType, message: string) =


 proc currentChunk(self: ref Compiler): var Chunk =
+    ## Returns the current chunk being compiled
    result = self.function.chunk


 proc compileError(self: ref Compiler, message: string) =
-    echo &"Traceback (most recent call last):"
-    echo &"  File '{self.file}', line {self.parser.peek.line}, at '{self.parser.peek.lexeme}'"
-    echo &"CompileError: {message}"
+    ## Notifies the user about an error occurred during
+    ## compilation, writing to the standard error file
+    stderr.write(&"A fatal error occurred while compiling '{self.file}', line {self.parser.peek().line}, at '{self.parser.peek().lexeme}' -> {message}")
    self.parser.hadError = true
    self.parser.panicMode = true


 proc emitByte(self: ref Compiler, byt: OpCode|uint8) =
+    ## Emits a single bytecode instruction and writes it
+    ## to the current chunk being compiled
+
    self.currentChunk.writeChunk(uint8 byt, self.parser.previous.line)


 proc emitBytes(self: ref Compiler, byt1: OpCode|uint8, byt2: OpCode|uint8) =
+    ## Emits multiple bytes instead of a single one, this is useful
+    ## to emit operators along with their operands or for multi-byte
+    ## instructions that are longer than one byte
    self.emitByte(uint8 byt1)
    self.emitByte(uint8 byt2)


 proc emitBytes(self: ref Compiler, bytarr: array[3, uint8]) =
+    ## Handy helper method to write an array of 3 bytes into
+    ## the current chunk, calling emiteByte(s) on each of its
+    ## elements
    self.emitBytes(bytarr[0], bytarr[1])
    self.emitByte(bytarr[2])


 proc makeConstant(self: ref Compiler, val: Value): uint8 =
+    ## Adds a constant (literal) to the current chunk's
+    ## constants table
    result = uint8 self.currentChunk.addConstant(val)


 proc makeLongConstant(self: ref Compiler, val: Value): array[3, uint8] =
+    ## Does the same as makeConstant(), but encodes the index in the
+    ## chunk's constant table as an array (which is later reconstructed
+    ## into an integer at runtime) to store more than 256 constants in the table
    result = self.currentChunk.writeConstant(val)


 proc emitConstant(self: ref Compiler, value: Value) =
+    ## Emits a Constant or ConstantLong instruction along
+    ## with its operand
    if self.currentChunk().consts.values.len > 255:
        self.emitByte(OpCode.ConstantLong)
        self.emitBytes(self.makeLongConstant(value))
@ -134,32 +169,41 @@ proc emitConstant(self: ref Compiler, value: Value) =
        self.emitBytes(OpCode.Constant, self.makeConstant(value))


-proc getRule(kind: TokenType): ParseRule  # Forward declarations
+proc getRule(kind: TokenType): ParseRule  # Forward declarations for later use
 proc statement(self: ref Compiler)
 proc declaration(self: ref Compiler)
 proc initCompiler*(context: FunctionType, enclosing: ref Compiler = nil, parser: Parser = initParser(@[], ""), file: string): ref Compiler


 proc endCompiler(self: ref Compiler): ptr Function =
+    ## Ends the current compiler instance and returns its
+    ## compiled bytecode wrapped around a function object, 
+    ## also emitting a return instruction with nil as operand.
+    ## Because of this, all functions implicitly return nil
+    ## if no return statement is supplied
    self.emitByte(OpCode.Nil)
    self.emitByte(OpCode.Return)
    return self.function


 proc parsePrecedence(self: ref Compiler, precedence: Precedence) =
+    ## Parses expressions using pratt's elegant algorithm to precedence parsing
    discard self.parser.advance()
    var prefixRule = getRule(self.parser.previous.kind).prefix
-    if prefixRule == nil:
+    if prefixRule == nil:   # If there is no prefix rule than an expression is expected
        self.parser.parseError(self.parser.previous, "Expecting expression")
        return
-    var canAssign = precedence <= Precedence.Assignment
-    self.prefixRule(canAssign)
+    var canAssign = precedence <= Precedence.Assignment   # This is used to detect invalid assignment targets
+    # such as "hello" = 3;
+    self.prefixRule(canAssign)   # otherwise call the prefix rule (e.g. for binary negation)
    if self.parser.previous.kind == EOF:
-        self.parser.current -= 1
+        self.parser.current -= 1    # If we're at EOF, we bail out and restore the EOF terminator so that
+        # the parser behaves accordingly later on
        return
-    while precedence <= (getRule(self.parser.peek.kind).precedence):
+    while precedence <= (getRule(self.parser.peek.kind).precedence):  # This will parse all expressions with the same precedence
+    # or lower to the current expression
        var infixRule = getRule(self.parser.advance.kind).infix
-        if self.parser.peek.kind != EOF:
+        if self.parser.peek().kind != EOF:
            self.infixRule(canAssign)
        else:
            self.parser.parseError(self.parser.previous, "Expecting expression, got EOF")
@ -168,12 +212,14 @@ proc parsePrecedence(self: ref Compiler, precedence: Precedence) =


 proc expression(self: ref Compiler) =
-    self.parsePrecedence(Precedence.Assignment)
+    ## Parses expressions
+    self.parsePrecedence(Precedence.Assignment)  # The highest-level expression is assignment


 proc binary(self: ref Compiler, canAssign: bool) =
-    var operator = self.parser.previous.kind
-    var rule = getRule(operator)
+    ## Parses binary operators
+    var operator = self.parser.previous().kind
+    var rule = getRule(operator)  
    self.parsePrecedence(Precedence((int rule.precedence) + 1))
    case operator:
        of PLUS:
@ -211,10 +257,12 @@ proc binary(self: ref Compiler, canAssign: bool) =
        of BAND:
            self.emitByte(OpCode.Band)
        else:
-            return
+            discard # Unreachable


 proc unary(self: ref Compiler, canAssign: bool) =
+    ## Parses unary expressions such as negation or
+    ## binary inversion
    var operator = self.parser.previous().kind
    if self.parser.peek().kind != EOF:
        self.parsePrecedence(Precedence.Unary)
@ -233,48 +281,66 @@ proc unary(self: ref Compiler, canAssign: bool) =


 template markObject*(self: ref Compiler, obj: untyped): untyped =
+    ## Marks compile-time objects (since those take up memory as well)
+    ## for the VM to reclaim space later on
    self.objects.add(obj)
    obj


 proc strVal(self: ref Compiler, canAssign: bool) =
+    ## Parses string literals
    var str = self.parser.previous().lexeme
-    var delimiter = &"{str[0]}"
+    var delimiter = &"{str[0]}"    # TODO: Add proper escape sequences support
    str = str.unescape(delimiter, delimiter)
    self.emitConstant(Value(kind: OBJECT, obj: self.markObject(newString(str))))


 proc bracketAssign(self: ref Compiler, canAssign: bool) =
+    ## Parses assignments such as a[0] = "something"
    discard # TODO -> Implement this


 proc bracket(self: ref Compiler, canAssign: bool) =
+    ## Parses slice expressions, such as "hello"[0]
+    ## Slice can take up to two arguments, a start
+    ## and an end index in the chosen iterable.
+    ## Both arguments are optional, so doing "hi"[::]
+    ## will basically copy the string into a new object.
+    ## Indexes start from 0, and while the start index is
+    ## inclusive, the end index is not. If an end index is 
+    ## not specified like this "hello"[0:], then the it is
+    ## assumed to be the length of the iterable. Likewise,
+    ## if the start index is missing, it is assumed to be 0.
+    ## Like in Python, using an end index that's out of bounds
+    ## will not raise an error. Doing "hello"[0:999] will just
+    ## return the whole string instead 
    if self.parser.peek.kind == COLON:
        self.emitByte(OpCode.Nil)
        discard self.parser.advance()
-        if self.parser.peek.kind == RS:
+        if self.parser.peek().kind == RS:
            self.emitByte(OpCode.Nil)
        else:
            self.parsePrecedence(Precedence.Term)
        self.emitByte(OpCode.SliceRange)
    else:
        self.parsePrecedence(Precedence.Term)
-        if self.parser.peek.kind == RS:
+        if self.parser.peek().kind == RS:
            self.emitByte(OpCode.Slice)
-        elif self.parser.peek.kind == COLON:
+        elif self.parser.peek().kind == COLON:
            discard self.parser.advance()
-            if self.parser.peek.kind == RS:
+            if self.parser.peek().kind == RS:
                self.emitByte(OpCode.Nil)
            else:
                self.parsePrecedence(Precedence.Term)
            self.emitByte(OpCode.SliceRange)
-    if self.parser.peek.kind == EQ:
+    if self.parser.peek().kind == EQ:
        discard self.parser.advance()
        self.parsePrecedence(Precedence.Term)
    self.parser.consume(TokenType.RS, "Expecting ']' after slice expression")


 proc literal(self: ref Compiler, canAssign: bool) =
+    ## Parses literal values such as true, nan and inf
    case self.parser.previous().kind:
        of TRUE:
            self.emitByte(OpCode.True)
@ -291,11 +357,15 @@ proc literal(self: ref Compiler, canAssign: bool) =


 proc number(self: ref Compiler, canAssign: bool) =
+    ## Parses numerical constants
    var value = self.parser.previous().literal
    self.emitConstant(value)


 proc grouping(self: ref Compiler, canAssign: bool) =
+    ## Parses parenthesized expressions. The only interesting
+    ## semantic about parentheses is that they allow lower-precedence
+    ## expressions where a higher precedence one is expected
    if self.parser.match(EOF):
        self.parser.parseError(self.parser.previous, "Expecting ')'")
    elif self.parser.match(RP):
@ -306,12 +376,29 @@ proc grouping(self: ref Compiler, canAssign: bool) =


 proc synchronize(self: ref Compiler) =
+    ## Synchronizes the parser's state. This is useful when
+    ## dealing with parsing errors. When an error occurs, we
+    ## note it with our nice panicMode and hadError fields, but
+    ## that in itself doesn't allow the parser to go forward
+    ## in the code and report other possible errors. On the 
+    ## other hand, attempting to start parsing the source
+    ## right after an error has occurred could lead to a 
+    ## cascade of unhelpful error messages that complicate
+    ## debugging issues. So, when an error occurs, we try
+    ## to get back into a state that at least allows us to keep
+    ## parsing and pretend the error never happened (the code
+    ## would not be compiled anyway so we might as well tell the
+    ## user if anything else is wrong with their code). The parser
+    ## will skip to the next valid token for a statement, like an
+    ## if or a for loop or a class declaration, and then keep
+    ## parsing from there. Note that hadError is never reset, but
+    ## panidMode is
    self.parser.panicMode = false
-    while self.parser.peek.kind != EOF:
+    while self.parser.peek().kind != EOF:   # Infinite loops are bad, so we must take EOF into account
        if self.parser.previous().kind == SEMICOLON:
            return
-        case self.parser.peek.kind:
-            of TokenType.CLASS, FUN, VAR, TokenType.FOR, IF, TokenType.WHILE, RETURN:
+        case self.parser.peek().kind:
+            of TokenType.CLASS, FUN, VAR, TokenType.FOR, IF, TokenType.WHILE, RETURN:   # We found a statement boundary, so the parser bails out
                return
            else:
                discard
@ -319,20 +406,35 @@ proc synchronize(self: ref Compiler) =


 proc identifierConstant(self: ref Compiler, tok: Token): uint8 =
+    ## Emits instructions for identifiers
    return self.makeConstant(Value(kind: OBJECT, obj: self.markObject(newString(tok.lexeme))))


 proc identifierLongConstant(self: ref Compiler, tok: Token): array[3, uint8] =
+    ## Same as identifierConstant, but this is used when the constant table is longer
+    ## than 255 elements
    return self.makeLongConstant(Value(kind: OBJECT, obj: self.markObject(newString(tok.lexeme))))


 proc addLocal(self: ref Compiler, name: Token) =
+    ## Stores a local variable. Local name resolution
+    ## happens at compile time rather than runtime,
+    ## unlike global variables which are treated differently.
+    ## Note that at first, a local is in a special "uninitialized"
+    ## state, this is useful to detect errors such as var a = a; 
+    ## inside local scopes
    var local = Local(name: name, depth: -1)
    inc(self.localCount)
    self.locals.add(local)


 proc declareVariable(self: ref Compiler) =
+    ## Declares a variable, this is only useful
+    ## for local variables, there is no way to
+    ## "declare" a global at compile time. This
+    ## assumption works because locals
+    ## and temporaries have stack semantics inside
+    ## local scopes
    if self.scopeDepth == 0:
        return
    var name = self.parser.previous()
@ -340,6 +442,7 @@ proc declareVariable(self: ref Compiler) =


 proc parseVariable(self: ref Compiler, message: string): uint8 =
+    ## Parses variables and declares them
    self.parser.consume(ID, message)
    self.declareVariable()
    if self.scopeDepth > 0:
@ -348,6 +451,9 @@ proc parseVariable(self: ref Compiler, message: string): uint8 =


 proc parseLongVariable(self: ref Compiler, message: string): array[3, uint8] =
+    ## Parses variables and declares them. This is used in place
+    ## of parseVariable when there's more than 255 constants
+    ## in the chunk table
    self.parser.consume(ID, message)
    self.declareVariable()
    if self.scopeDepth > 0:
@ -356,12 +462,18 @@ proc parseLongVariable(self: ref Compiler, message: string): array[3, uint8] =


 proc markInitialized(self: ref Compiler) =
+    ## Marks the latest defined global as 
+    ## initialized and ready for use
    if self.scopeDepth == 0:
        return
    self.locals[self.localCount - 1].depth = self.scopeDepth


 proc defineVariable(self: ref Compiler, idx: uint8) =
+    ## Defines a variable, emitting appropriate
+    ## instructions if we're in the local scope
+    ## or marking the last local as initialized
+    ## otherwise
    if self.scopeDepth > 0:
        self.markInitialized()
        return
@ -369,6 +481,8 @@ proc defineVariable(self: ref Compiler, idx: uint8) =


 proc defineVariable(self: ref Compiler, idx: array[3, uint8]) =
+    ## Same as defineVariable, but this is used when
+    ## there's more than 255 locals in the chunk's table
    if self.scopeDepth > 0:
        self.markInitialized()
        return
@ -377,6 +491,8 @@ proc defineVariable(self: ref Compiler, idx: array[3, uint8]) =


 proc resolveLocal(self: ref Compiler, name: Token): int =
+    ## Resolves a local variable and catches errors such as
+    ## var a = a
    var i = self.localCount - 1
    for local in reversed(self.locals):
        if local.name.lexeme == name.lexeme:
@ -388,6 +504,8 @@ proc resolveLocal(self: ref Compiler, name: Token): int =


 proc namedVariable(self: ref Compiler, tok: Token, canAssign: bool) =
+    ## Handles local and global variables assignment, as well
+    ## as variable resolution.
    var arg = self.resolveLocal(tok)
    var
        get: OpCode
@ -407,6 +525,9 @@ proc namedVariable(self: ref Compiler, tok: Token, canAssign: bool) =


 proc namedLongVariable(self: ref Compiler, tok: Token, canAssign: bool) =
+    ## Handles local and global variables assignment, as well
+    ## as variable resolution. This is only called when the constants
+    ## table's length exceeds 255
    var arg = self.resolveLocal(tok)
    var casted = cast[array[3, uint8]](arg)
    var
@ -430,6 +551,8 @@ proc namedLongVariable(self: ref Compiler, tok: Token, canAssign: bool) =


 proc variable(self: ref Compiler, canAssign: bool) =
+    ## Emits the code to declare a variable,
+    ## both locally and globally
    if self.locals.len < 255:
        self.namedVariable(self.parser.previous(), canAssign)
    else:
@ -437,6 +560,9 @@ proc variable(self: ref Compiler, canAssign: bool) =


 proc varDeclaration(self: ref Compiler) =
+    ## Parses a variable declaration, taking into account
+    ## the possibility that the chunk table could already
+    ## be bigger than 255 elements
    var shortName: uint8
    var longName: array[3, uint8]
    var useShort: bool = true
@ -457,6 +583,9 @@ proc varDeclaration(self: ref Compiler) =


 proc expressionStatement(self: ref Compiler) =
+    ## Parses an expression statement, which is
+    ## an expression followed by a semicolon. It then
+    ## emits a pop instruction 
    self.expression()
    self.parser.consume(SEMICOLON, "Missing semicolon after expression")
    self.emitByte(OpCode.Pop)
@ -484,16 +613,25 @@ proc deleteVariable(self: ref Compiler, canAssign: bool) =


 proc parseBlock(self: ref Compiler) =
+    ## Parses a block statement, which is basically
+    ## a list of other statements
    while not self.parser.check(RB) and not self.parser.check(EOF):
        self.declaration()
    self.parser.consume(RB, "Expecting '}' after block statement")


 proc beginScope(self: ref Compiler) =
+    ## Begins a scope by increasing the
+    ## current scope depth. This is literally 
+    ## all it takes to create a scope, since the
+    ## only semantically interesting behavior of
+    ## scopes is a change in names resolution
    inc(self.scopeDepth)


 proc endScope(self: ref Compiler) =
+    ## Ends a scope, popping off any local that
+    ## was created inside it along the way
    self.scopeDepth = self.scopeDepth - 1
    while self.localCount > 0 and self.locals[self.localCount - 1].depth > self.scopeDepth:
        self.emitByte(OpCode.Pop)
@ -501,6 +639,9 @@ proc endScope(self: ref Compiler) =


 proc emitJump(self: ref Compiler, opcode: OpCode): int =
+    ## Emits a jump instruction with a placeholder offset
+    ## that is later patched, check patchJump for more info
+    ## about how jumps work
    self.emitByte(opcode)
    self.emitByte(0xff)
    self.emitByte(0xff)
@ -508,6 +649,19 @@ proc emitJump(self: ref Compiler, opcode: OpCode): int =


 proc patchJump(self: ref Compiler, offset: int) =
+    ## Patches a previously emitted jump instruction.
+    ## Since it's impossible to know how much code
+    ## needs to be jumped before compiling the code
+    ## itself, jumps are first encoded with a placeholder
+    ## offset. Then, after the code that has to be jumped
+    ## over has been compiled, its size is known and the
+    ## previously emitted offset is replaced with the actual
+    ## jump size.
+    ## Note that, due to how the language is designed,
+    ## only up to 2^16 bytecode instructions can
+    ## be jumped over, so the size of the if/else conditions
+    ## or loops is limited (hopefully 65 thousands and change
+    ## instructions are enough for everyone)
    var jump = self.currentChunk.code.len - offset - 2
    if jump > (int uint16.high):
        self.compileError("too much code to jump over")
@ -517,6 +671,7 @@ proc patchJump(self: ref Compiler, offset: int) =


 proc ifStatement(self: ref Compiler) =
+    ## Parses if statements in a C-style fashion
    self.parser.consume(LP, "The if condition must be parenthesized")
    if self.parser.peek.kind != EOF:
        self.expression()
@ -539,6 +694,7 @@ proc ifStatement(self: ref Compiler) =


 proc emitLoop(self: ref Compiler, start: int) =
+    ## Creates a loop and emits related instructions.
    self.emitByte(OpCode.Loop)
    var offset = self.currentChunk.code.len - start + 2
    if offset > (int uint16.high):
@ -549,6 +705,9 @@ proc emitLoop(self: ref Compiler, start: int) =


 proc endLooping(self: ref Compiler) =
+    ## This method is used to make
+    ## the break statement work and patch
+    ## it with a jump instruction
    if self.loop.loopEnd != -1:
        self.patchJump(self.loop.loopEnd)
        self.emitByte(OpCode.Pop)
@ -564,6 +723,7 @@ proc endLooping(self: ref Compiler) =


 proc whileStatement(self: ref Compiler) =
+    ## Parses while loops in a C-style fashion
    var loop = Loop(depth: self.scopeDepth, outer: self.loop, start: self.currentChunk.code.len, alive: true, loopEnd: -1)
    self.loop = loop
    self.parser.consume(LP, "The loop condition must be parenthesized")
@ -587,6 +747,7 @@ proc whileStatement(self: ref Compiler) =


 proc forStatement(self: ref Compiler) =
+    ## Parses for loops in a C-style fashion
    self.beginScope()
    self.parser.consume(LP, "The loop condition must be parenthesized")
    if self.parser.peek.kind != EOF:
@ -634,6 +795,9 @@ proc forStatement(self: ref Compiler) =


 proc parseBreak(self: ref Compiler) =
+    ## Parses break statements. A break
+    ## statement causes the current loop
+    ## to exit and jump to its end
    if not self.loop.alive:
        self.parser.parseError(self.parser.previous, "'break' outside loop")
    else:
@ -645,6 +809,7 @@ proc parseBreak(self: ref Compiler) =
        discard self.emitJump(OpCode.Break)

 proc parseAnd(self: ref Compiler, canAssign: bool) =
+    ## Parses expressions such as a and b
    var jump = self.emitJump(OpCode.JumpIfFalse)
    self.emitByte(OpCode.Pop)
    self.parsePrecedence(Precedence.And)
@ -652,6 +817,7 @@ proc parseAnd(self: ref Compiler, canAssign: bool) =


 proc parseOr(self: ref Compiler, canAssign: bool) =
+    ## Parses expressions such as a or b
    var elseJump = self.emitJump(OpCode.JumpIfFalse)
    var endJump = self.emitJump(OpCode.Jump)
    self.patchJump(elseJump)
@ -661,6 +827,9 @@ proc parseOr(self: ref Compiler, canAssign: bool) =


 proc continueStatement(self: ref Compiler) =
+    ## Parses continue statements inside loops.
+    ## The continue statements causes the loop to skip
+    ## to the next iteration
    if not self.loop.alive:
        self.parser.parseError(self.parser.previous, "'continue' outside loop")
    else:
@ -673,6 +842,10 @@ proc continueStatement(self: ref Compiler) =


 proc parseFunction(self: ref Compiler, funType: FunctionType) =
+    ## Parses function declarations. Functions can have
+    ## keyword arguments (WIP), but once a parameter is declared
+    ## as a keyword one, all subsequent parameters must be 
+    ## keyword ones as well
    var self = initCompiler(funType, self, self.parser, self.file)
    self.beginScope()
    self.parser.consume(LP, "Expecting '(' after function name")
@ -721,6 +894,8 @@ proc parseFunction(self: ref Compiler, funType: FunctionType) =


 proc funDeclaration(self: ref Compiler) =
+    ## Parses function declarations and declares
+    ## them in the current scope
    var funName = self.parseVariable("expecting function name")
    self.markInitialized()
    self.parseFunction(FunctionType.FUNC)
@ -728,6 +903,7 @@ proc funDeclaration(self: ref Compiler) =


 proc argumentList(self: ref Compiler): uint8 =
+    ## Parses arguments passed to function calls
    result = 0
    if not self.parser.check(RP):
        while true:
@ -742,11 +918,14 @@ proc argumentList(self: ref Compiler): uint8 =


 proc call(self: ref Compiler, canAssign: bool) =
+    ## Emits appropriate bytecode to call
+    ## a function
    var argCount = self.argumentList()
    self.emitBytes(OpCode.Call, argCount)


 proc statement(self: ref Compiler) =
+    ## Parses statements
    if self.parser.match(TokenType.FOR):
        self.forStatement()
    elif self.parser.match(IF):
@ -766,6 +945,7 @@ proc statement(self: ref Compiler) =


 proc declaration(self: ref Compiler) =
+    ## Parses declarations
    if self.parser.match(FUN):
        self.funDeclaration()
    elif self.parser.match(VAR):
@ -776,7 +956,8 @@ proc declaration(self: ref Compiler) =
        self.synchronize()


-var rules: array[TokenType, ParseRule] = [
+# The array of all parse rules
+const rules: array[TokenType, ParseRule] = [
    makeRule(nil, binary, Precedence.Term), # PLUS
    makeRule(unary, binary, Precedence.Term), # MINUS
    makeRule(nil, binary, Precedence.Factor), # SLASH
@ -835,10 +1016,15 @@ var rules: array[TokenType, ParseRule] = [


 proc getRule(kind: TokenType): ParseRule =
+    ## Returns an appropriate precedence rule
+    ## object for a given token type
    result = rules[kind]


 proc compile*(self: ref Compiler, source: string): ptr Function =
+    ## Compiles a source string into a function
+    ## object. This wires up all the code
+    ## inside the parser and the lexer
    var scanner = initLexer(source, self.file)
    var tokens = scanner.lex()
    if not scanner.errored:
@ -855,9 +1041,11 @@ proc compile*(self: ref Compiler, source: string): ptr Function =


 proc initCompiler*(context: FunctionType, enclosing: ref Compiler = nil, parser: Parser = initParser(@[], ""), file: string): ref Compiler =
+    ## Initializes a new compiler object and returns a reference
+    ## to it
    result = new(Compiler)
    result.parser = parser
-    result.function = nil
+    result.function = nil   # Garbage collection paranoia
    result.locals = @[]
    result.scopeDepth = 0
    result.localCount = 0
@ -870,10 +1058,10 @@ proc initCompiler*(context: FunctionType, enclosing: ref Compiler = nil, parser:
    result.locals.add(Local(depth: 0, name: Token(kind: EOF, lexeme: "")))
    inc(result.localCount)
    result.function = result.markObject(newFunction())
-    if context != SCRIPT:
+    if context != SCRIPT:   # If we're compiling a function, we give it its name
        result.function.name = newString(enclosing.parser.previous().lexeme)

-# so that the compiler can be executed on its own
+# This wat the compiler can be executed on its own
 # without the VM
 when isMainModule:
    var compiler: ref Compiler = initCompiler(SCRIPT, file="test")
--- a/nim/lexer.nim
+++ b/nim/lexer.nim
@ -1,10 +1,12 @@
-# A simple tokenizer implementation with one character of lookahead.
-# This module has been designed to be easily extendible in its functionality
-# given that JAPL is in a state of high activity and many features are
-# being added along the way. To add support for a new keyword, just create
-# an appropriate TokenType entry in the enum in the file at meta/tokenotype.nim
-# and then add it to the constant RESERVED table. A similar approach applies for
-# other tokens, but multi-character ones require more tweaking
+## A simple tokenizer implementation with one character of lookahead.
+## This module has been designed to be easily extendible in its functionality
+## given that JAPL is in a state of high activity and many features are
+## being added along the way. To add support for a new keyword, just create
+## an appropriate TokenType entry in the enum in the file at meta/tokentype.nim
+## and then add it to the constant RESERVED table. A similar approach applies for
+## other tokens, but multi-character ones require more tweaking.
+## Since this lexer scans the given source string character by character, unicode
+## identifiers are not supported

 import system
 import strutils
@ -14,7 +16,7 @@ import meta/tokentype
 import meta/tokenobject
 import meta/valueobject

-
+# Table of all tokens except reserved keywords
 const TOKENS = to_table({
              '(': TokenType.LP, ')': TokenType.RP,
              '{': TokenType.LB, '}': TokenType.RB,
@ -29,6 +31,7 @@ const TOKENS = to_table({
              '&': TokenType.BAND, '|': TokenType.BOR,
              '~': TokenType.TILDE})

+# Constant table storing all the reserved keywords for JAPL
 const RESERVED = to_table({
                "or": TokenType.OR, "and": TokenType.AND,
                "class": TokenType.CLASS, "fun": TokenType.FUN,
@ -51,15 +54,21 @@ type
        errored*: bool
        file*: string

+
 func initLexer*(source: string, file: string): Lexer =
-  result = Lexer(source: source, tokens: @[], line: 1, start: 0, current: 0, errored: false, file: file)
+    ## Initializes the lexer
+    result = Lexer(source: source, tokens: @[], line: 1, start: 0, current: 0, errored: false, file: file)


 proc done(self: Lexer): bool =
+    ## Returns true if we reached EOF
    result = self.current >= self.source.len


 proc step(self: var Lexer): char =
+    ## Steps one character forward in the
+    ## source file. A null terminator is returned
+    ## if the lexer is at EOF
    if self.done():
        return '\0'
    self.current = self.current + 1
@ -67,6 +76,10 @@ proc step(self: var Lexer): char =


 proc peek(self: Lexer): char =
+    ## Returns the current character in the
+    ## source file without consuming it.
+    ## A null terminator is returned
+    ## if the lexer is at EOF
    if self.done():
        result = '\0'
    else:
@ -74,15 +87,23 @@ proc peek(self: Lexer): char =


 proc match(self: var Lexer, what: char): bool =
+    ## Returns true if the next character matches
+    ## the given character, and consumes it.
+    ## Otherwise, false is returned
    if self.done():
        return false
    elif self.peek() != what:
        return false
-    self.current = self.current + 1
+    self.current += 1
    return true


 proc peekNext(self: Lexer): char =
+    ## Returns the next character
+    ## in the source file without
+    ## consuming it.
+    ## A null terminator is returned
+    ## if the lexer is at EOF
    if self.current + 1 >= self.source.len:
        result = '\0'
    else:
@ -90,6 +111,7 @@ proc peekNext(self: Lexer): char =


 proc createToken(self: var Lexer, tokenType: TokenType, literal: Value): Token =
+    ## Creates a token object for later use in the parser
    result = Token(kind: tokenType,
                   lexeme: self.source[self.start..<self.current],
                   literal: literal,
@ -98,14 +120,13 @@ proc createToken(self: var Lexer, tokenType: TokenType, literal: Value): Token =


 proc parseString(self: var Lexer, delimiter: char) =
+    ## Parses string literals
    while self.peek() != delimiter and not self.done():
        if self.peek() == '\n':
            self.line = self.line + 1
        discard self.step()
    if self.done():
-        echo "Traceback (most recent call last):"
-        echo &"  File '{self.file}', line {self.line} at '{self.peek()}'"
-        echo &"SyntaxError: Unterminated string literal"
+        stderr.write(&"A fatal error occurred while parsing '{self.file}', line {self.line} at '{self.peek()}' -> Unterminated string literal")
        self.errored = true
    discard self.step()
    let value = self.source[self.start..<self.current].asStr() # Get the value between quotes
@ -114,6 +135,10 @@ proc parseString(self: var Lexer, delimiter: char) =


 proc parseNumber(self: var Lexer) =
+    ## Parses numeric literals
+    # TODO: Move the integer conversion to the
+    # compiler for finer error reporting and
+    # handling
    while isDigit(self.peek()):
        discard self.step()
    try:
@ -127,13 +152,14 @@ proc parseNumber(self: var Lexer) =
            var value = parseInt(self.source[self.start..<self.current]).asInt()
            self.tokens.add(self.createToken(TokenType.NUMBER, value))
    except ValueError:
-        echo "Traceback (most recent call last):"
-        echo &"  File '{self.file}', line {self.line} at '{self.peek()}'"
-        echo "OverflowError: integer is too big to convert to int64"
+        stderr.write(&"A fatal error occurred while parsing '{self.file}', line {self.line} at '{self.peek()}' -> integer is too big to convert to int64")
        self.errored = true


 proc parseIdentifier(self: var Lexer) =
+    ## Parses identifiers, note that
+    ## multi-character tokens such as 
+    ## UTF runes are not supported
    while self.peek().isAlphaNumeric():
        discard self.step()
    var text: string = self.source[self.start..<self.current]
@ -145,13 +171,17 @@ proc parseIdentifier(self: var Lexer) =


 proc parseComment(self: var Lexer) =
+    ## Parses multi-line comments. They start
+    ## with /* and end with */, and can be nested.
+    ## A missing comment terminator will raise an
+    ## error
    var closed = false
    while not self.done():
        var finish = self.peek() & self.peekNext()
        if finish == "/*":   # Nested comments
            discard self.step()
            discard self.step()
-            self.parseComment()
+            self.parseComment()   # Recursively parse any other enclosing comments
        elif finish == "*/":
            closed = true
            discard self.step()   # Consume the two ends
@ -160,14 +190,15 @@ proc parseComment(self: var Lexer) =
        discard self.step()
    if self.done() and not closed:
        self.errored = true
-        echo "Traceback (most recent call last):"
-        echo &"  File '{self.file}', line {self.line} at '{self.peek()}'"
-        echo &"SyntaxError: Unexpected EOF"
+        stderr.write(&"A fatal error occurred while parsing '{self.file}', line {self.line} at '{self.peek()}' -> Unexpected EOF")


 proc scanToken(self: var Lexer) =
+    ## Scans a single token. This method is
+    ## called iteratively until the source
+    ## file reaches EOF
    var single = self.step()
-    if single in [' ', '\t', '\r']:
+    if single in [' ', '\t', '\r']:  # We skip whitespaces, tabs and 
        return
    elif single == '\n':
        self.line += 1
@ -201,12 +232,12 @@ proc scanToken(self: var Lexer) =
            self.tokens.add(self.createToken(TOKENS[single], asStr(&"{single}")))
    else:
        self.errored = true
-        echo "Traceback (most recent call last):"
-        echo &"  File '{self.file}', line {self.line} at '{self.peek()}'"
-        echo &"SyntaxError: Unexpected character '{single}'"
+        stderr.write(&"A fatal error occurred while parsing '{self.file}', line {self.line} at '{self.peek()}' -> Unexpected token '{single}'")


 proc lex*(self: var Lexer): seq[Token] =
+    ## Lexes a source file, converting a stream
+    ## of characters into a series of tokens
    while not self.done():
        self.start = self.current
        self.scanToken()
--- a/nim/memory.nim
+++ b/nim/memory.nim
@ -1,10 +1,10 @@
-# This module handles all memory allocation and deallocation for the entire
-# JAPL runtime. Forcing the entire language to route memory allocation
-# into a single module makes it easy to track how much memory we have allocated
-# and simplifies the implementation of a garbage collector.
-# To also make our life as language implementers easier, the internals of the
-# interpreter (which means the tokenizer, the compiler the debugger,
-# and some parts of the Virtual Machine) will use nim's GC
+## This module handles all memory allocation and deallocation for the entire
+## JAPL runtime. Forcing the entire language to route memory allocation
+## into a single module makes it easy to track how much memory we have allocated
+## and simplifies the implementation of a garbage collector.
+## To also make our life as language implementers easier, the internals of the
+## interpreter (which means the tokenizer, the compiler the debugger,
+## and some parts of the Virtual Machine) will use nim's GC


 import segfaults
@ -18,7 +18,7 @@ proc reallocate*(pointer: pointer, oldSize: int, newSize: int): pointer =
            return nil
        result = realloc(pointer, newSize)
    except NilAccessError:
-        echo "MemoryError: could not allocate memory, segmentation fault"
+        echo "A fatal error occurred -> could not allocate memory, segmentation fault"
        quit(71)


--- a/nim/vm.nim
+++ b/nim/vm.nim
@ -1,28 +1,22 @@
+## The JAPL runtime environment, or virtual machine. This is
+## a stack-based bytecode VM.
+
 import algorithm
-import strutils
 import strformat
 import math
 import lenientops
 import common
 import compiler
 import tables
-import util/debug
 import meta/chunk
 import meta/valueobject
 import types/exceptions
 import types/objecttype
 import types/stringtype
 import types/functiontype
-import bitops
 import memory


-proc `**`(a, b: int): int = pow(a.float, b.float).int
-
-
-proc `**`(a, b: float): float = pow(a, b)
-
-
 type
    KeyboardInterrupt* = object of CatchableError