Implemented jump opcodes and LoadUInt64 in the VM, some style and proc changes in the compiler, chunk constants are now type-aware, fixed various inheritance mistakes in the AST node, refined type inference system, moved types back to the compiler and completely redesigned operator parsing to be more modular

This commit is contained in:
Mattia Giambirtone 2022-05-07 10:48:01 +02:00
parent 4af22621ea
commit e823a459c8
10 changed files with 718 additions and 724 deletions

View File

@ -86,6 +86,13 @@ proc pop(self: PeonVM): PeonObject =
return self.stack[self.sp]
proc peek(self: PeonVM): PeonObject =
## Returns the element at the top
## of the stack without consuming
## it
return self.stack[self.sp]
proc readByte(self: PeonVM): uint8 =
## Reads a single byte from the
## bytecode and returns it as an
@ -99,7 +106,7 @@ proc readShort(self: PeonVM): uint16 =
## bytecode and returns them
## as an unsigned 16 bit
## integer
var arr: array[2, uint8]
var arr: array[2, uint8] = [self.readByte(), self.readByte()]
copyMem(result.addr, unsafeAddr(arr), sizeof(arr))
@ -110,29 +117,28 @@ proc readLong(self: PeonVM): uint32 =
## integer. Note however that
## the boundary is capped at
## 24 bits instead of 32
var arr: array[3, uint8]
var arr: array[3, uint8] = [self.readByte(), self.readByte(), self.readByte()]
copyMem(result.addr, unsafeAddr(arr), sizeof(arr))
proc readLongLong(self: PeonVM): uint64 =
## Reads 4 bytes from the
## bytecode and returns them
## as an unsigned 64 bit
## integer
var arr: array[4, uint8]
copyMem(result.addr, unsafeAddr(arr), sizeof(arr))
proc readInt64(self: PeonVM): PeonObject =
proc readInt64(self: PeonVM, idx: int): PeonObject =
## Reads a constant from the
## chunk's constant table and
## returns a Peon object. Assumes
## the constant's type is an Int64
var arr = [self.readByte(), self.readByte(), self.readByte()]
var idx: int
copyMem(idx.addr, arr.addr, sizeof(arr))
# TODO
# result = PeonObject()
## the constant is an Int64
var arr = [self.chunk.byteConsts[idx], self.chunk.byteConsts[idx + 1], self.chunk.byteConsts[idx + 2], self.chunk.byteConsts[idx + 3]]
result = PeonObject(kind: Int64)
copyMem(result.long.addr, arr.addr, sizeof(arr))
proc readUInt64(self: PeonVM, idx: int): PeonObject =
## Reads a constant from the
## chunk's constant table and
## returns a Peon object. Assumes
## the constant is an UInt64
var arr = [self.chunk.byteConsts[idx], self.chunk.byteConsts[idx + 1], self.chunk.byteConsts[idx + 2], self.chunk.byteConsts[idx + 3]]
result = PeonObject(kind: UInt64)
copyMem(result.uLong.addr, arr.addr, sizeof(arr))
proc dispatch*(self: PeonVM) =
@ -141,25 +147,66 @@ proc dispatch*(self: PeonVM) =
while true:
instruction = OpCode(self.readByte())
case instruction:
of OpCode.True:
of LoadTrue:
self.push(self.getBool(true))
of OpCode.False:
of LoadFalse:
self.push(self.getBool(false))
of OpCode.Nan:
of LoadNan:
self.push(self.getNan())
of OpCode.Nil:
of LoadNil:
self.push(self.getNil())
of OpCode.Inf:
of LoadInf:
self.push(self.getInf(true))
of LoadInt64:
self.push(self.readInt64(int(self.readLong())))
of LoadUInt64:
self.push(self.readUInt64(int(self.readLong())))
of OpCode.Return:
# TODO
return
of OpCode.NoOp:
of NoOp:
continue
of OpCode.Pop:
of Pop:
discard self.pop()
of OpCode.Jump:
of Jump:
self.ip = int(self.readShort())
of JumpForwards:
self.ip += int(self.readShort())
of JumpBackwards:
self.ip -= int(self.readShort())
of JumpIfFalse:
if not self.peek().boolean:
self.ip += int(self.readShort())
of JumpIfTrue:
if self.peek().boolean:
self.ip += int(self.readShort())
of JumpIfFalsePop:
if not self.peek().boolean:
self.ip += int(self.readShort())
discard self.pop()
of JumpIfFalseOrPop:
if not self.peek().boolean:
self.ip += int(self.readShort())
else:
discard self.pop()
of LongJumpIfFalse:
if not self.peek().boolean:
self.ip += int(self.readLong())
of LongJumpIfFalsePop:
if not self.peek().boolean:
self.ip += int(self.readLong())
discard self.pop()
of LongJumpForwards:
self.ip += int(self.readLong())
of LongJumpBackwards:
self.ip -= int(self.readLong())
of LongJump:
self.ip = int(self.readLong())
of LongJumpIfFalseOrPop:
if not self.peek().boolean:
self.ip += int(self.readLong())
else:
discard self.pop()
else:
discard

View File

@ -14,8 +14,6 @@
import meta/token
import meta/ast
import meta/errors
import meta/bytecode
import meta/typing
import ../config
import ../util/multibyte
@ -24,41 +22,73 @@ import strformat
import algorithm
import parseutils
import strutils
import sequtils
export ast
export bytecode
export token
export multibyte
type
TypeKind* = enum
## An enumeration of compile-time
## types
Int8, UInt8, Int16, UInt16, Int32,
UInt32, Int64, UInt64, Float32, Float64,
Char, Byte, String, Function, CustomType,
Nil, Nan, Bool, Inf
Type* = ref object
## A wrapper around
## compile-time types
node*: ASTNode
case kind*: TypeKind:
of Function:
returnType*: Type
else:
discard
# This way we don't have recursive dependency issues
import meta/bytecode
export bytecode
type
Name = ref object
## A compile-time wrapper around
## statically resolved names
name: IdentExpr # Name of the identifier
owner: string # Owner of the identifier (module)
depth: int # Scope depth
isPrivate: bool # Is this name private?
isConst: bool # Is this a constant?
isLet: bool # Can this name's value be mutated?
valueType: Type # The name's type
codePos: int # The position in the bytecode
# where this name's StoreVar
# instruction was emitted. This
# is kept so that once we detect
# this name as a closed-over variable
# we can change the StoreVar into a StoreHeap
# Name of the identifier
name: IdentExpr
# Owner of the identifier (module)
owner: string
# Scope depth
depth: int
# Is this name private?
isPrivate: bool
# Is this a constant?
isConst: bool
# Can this name's value be mutated?
isLet: bool
# The name's type
valueType: Type
# For variables, the position in the bytecode
# where its StoreVar instruction was emitted.
# For functions, this marks where the function's
# code begins
codePos: int
Loop = object
## A "loop object" used
## by the compiler to emit
## appropriate jump offsets
## for continue and break
## statements
start: int # Position in the bytecode where the loop starts
depth: int # Scope depth where the loop is located
breakPos: seq[int] # List of positions into our bytecode where we need to
# patch jumps. Used for break statements
# Position in the bytecode where the loop starts
start: int
# Scope depth where the loop is located
depth: int
# Absolute jump offsets into our bytecode that we need to
# patch. Used for break statements
breakPos: seq[int]
Compiler* = ref object
## A wrapper around the Peon compiler's state
@ -192,22 +222,21 @@ proc emitBytes(self: Compiler, bytarr: array[2, uint8]) =
self.emitBytes(bytarr[0], bytarr[1])
proc emitBytes(self: Compiler, bytarr: array[3, uint8]) =
proc emitBytes(self: Compiler, bytarr: openarray[uint8]) =
## Handy helper method to write an array of 3 bytes into
## the current chunk, calling emitByte on each of its
## elements
self.emitBytes(bytarr[0], bytarr[1])
self.emitByte(bytarr[2])
for b in bytarr:
self.emitByte(b)
proc makeConstant(self: Compiler, val: LiteralExpr): array[3, uint8] =
proc makeConstant(self: Compiler, val: Expression, kind: Type): array[3, uint8] =
## Adds a constant to the current chunk's constant table
## and returns its index as a 3-byte array of uint8s
result = self.chunk.addConstant(val)
result = self.chunk.addConstant(val, kind)
proc emitConstant(self: Compiler, obj: LiteralExpr) =
proc emitConstant(self: Compiler, obj: Expression, kind: Type) =
## Emits a LoadConstant instruction along
## with its operand
case self.inferType(obj).kind:
@ -215,7 +244,7 @@ proc emitConstant(self: Compiler, obj: LiteralExpr) =
self.emitByte(LoadInt64)
else:
discard # TODO
self.emitBytes(self.makeConstant(obj))
self.emitBytes(self.makeConstant(obj, kind))
proc emitJump(self: Compiler, opcode: OpCode): int =
@ -348,6 +377,83 @@ proc detectClosureVariable(self: Compiler, name: IdentExpr, depth: int = self.sc
self.chunk.code[entry.codePos + 3] = idx[2]
proc compareTypes(self: Compiler, a, b: Type): bool =
## Compares two type objects
## for equality (works with nil!)
if a == nil:
return b == nil
elif b == nil:
return a == nil
if a.kind != b.kind:
return false
case a.kind:
of Int8, UInt8, Int16, UInt16, Int32,
UInt32, Int64, UInt64, Float32, Float64,
Char, Byte, String, Nil, Nan, Bool, Inf:
return true
of Function:
let
a = FunDecl(a.node)
b = FunDecl(b.node)
if a.name.token.lexeme != b.name.token.lexeme:
return false
elif a.arguments.len() != b.arguments.len():
return false
elif not self.compareTypes(self.inferType(a.returnType), self.inferType(b.returnType)):
return false
for (argA, argB) in zip(a.arguments, b.arguments):
if argA.mutable != argB.mutable:
return false
elif argA.isRef != argB.isRef:
return false
elif argA.isPtr != argB.isPtr:
return false
elif not self.compareTypes(self.inferType(argA.valueType), self.inferType(argB.valueType)):
return false
return true
else:
discard
proc toIntrinsic(name: string): Type =
## Converts a string to an intrinsic
## type if it is valid and returns nil
## otherwise
if name in ["int", "int64", "i64"]:
return Type(kind: Int64)
elif name in ["uint64", "u64"]:
return Type(kind: UInt64)
elif name in ["int32", "i32"]:
return Type(kind: Int32)
elif name in ["uint32", "u32"]:
return Type(kind: UInt32)
elif name in ["int16", "i16"]:
return Type(kind: Int16)
elif name in ["uint16", "u16"]:
return Type(kind: UInt16)
elif name in ["int8", "i8"]:
return Type(kind: Int8)
elif name in ["uint8", "u8"]:
return Type(kind: UInt8)
elif name in ["f64", "float", "float64"]:
return Type(kind: Float64)
elif name in ["f32", "float32"]:
return Type(kind: Float32)
elif name == "byte":
return Type(kind: Byte)
elif name == "char":
return Type(kind: Char)
elif name == "nan":
return Type(kind: Nan)
elif name == "nil":
return Type(kind: Nil)
elif name == "inf":
return Type(kind: Inf)
elif name == "bool":
return Type(kind: Bool)
else:
return nil
proc inferType(self: Compiler, node: LiteralExpr): Type =
## Infers the type of a given literal expression
@ -357,9 +463,9 @@ proc inferType(self: Compiler, node: LiteralExpr): Type =
if len(size) notin 1..2:
self.error("invalid state: inferValueType -> invalid size specifier (This is an internal error and most likely a bug!)")
if size.len() == 1:
return Type(kind: Int64)
return Type(node: node, kind: Int64)
let typ = size[1].toIntrinsic()
if typ != nil:
if not self.compareTypes(typ, nil):
return typ
else:
self.error(&"invalid type specifier '{size[1]}' for int")
@ -368,41 +474,61 @@ proc inferType(self: Compiler, node: LiteralExpr): Type =
if len(size) notin 1..2:
self.error("invalid state: inferValueType -> invalid size specifier (This is an internal error and most likely a bug!)")
if size.len() == 1 or size[1] == "f64":
return Type(kind: Float64)
return Type(node: node, kind: Float64)
let typ = size[1].toIntrinsic()
if typ != nil:
if not self.compareTypes(typ, nil):
return typ
else:
self.error(&"invalid type specifier '{size[1]}' for float")
of nilExpr:
return Type(kind: Nil)
return Type(node: node, kind: Nil)
of trueExpr:
return Type(kind: Bool)
return Type(node: node, kind: Bool)
of falseExpr:
return Type(kind: Bool)
return Type(node: node, kind: Bool)
of nanExpr:
return Type(kind: TypeKind.Nan)
return Type(node: node, kind: TypeKind.Nan)
of infExpr:
return Type(kind: TypeKind.Inf)
return Type(node: node, kind: TypeKind.Inf)
else:
discard # TODO
proc toIntrinsic(self: Compiler, typ: Expression): Type =
## Gets an expression's
## intrinsic type, if possible
if typ == nil:
return nil
case typ.kind:
of trueExpr, falseExpr, intExpr, floatExpr:
return typ.token.lexeme.toIntrinsic()
of identExpr:
let inferred = self.inferType(typ)
if inferred == nil:
return typ.token.lexeme.toIntrinsic()
return inferred
else:
discard
proc inferType(self: Compiler, node: Expression): Type =
## Infers the type of a given expression and
## returns it
case node.kind:
of identExpr:
let name = self.resolve(IdentExpr(node))
let node = IdentExpr(node)
let name = self.resolve(node)
if name != nil:
return name.valueType
else:
return node.name.lexeme.toIntrinsic()
of unaryExpr:
return self.inferType(UnaryExpr(node).a)
of binaryExpr:
let node = BinaryExpr(node)
var a = self.inferType(node.a)
var b = self.inferType(node.b)
if a != b:
if not self.compareTypes(a, b):
return nil
return a
of {intExpr, hexExpr, binExpr, octExpr,
@ -412,26 +538,6 @@ proc inferType(self: Compiler, node: Expression): Type =
return self.inferType(LiteralExpr(node))
else:
discard # Unreachable
proc inferType(self: Compiler, node: Declaration): Type =
## Infers the type of a given declaration if it's
## not already defined and returns it
case node.kind:
of funDecl:
var node = FunDecl(node)
let resolved = self.resolve(node.name)
if resolved != nil:
return resolved.valueType
of NodeKind.varDecl:
var node = VarDecl(node)
let resolved = self.resolve(node.name)
if resolved != nil:
return resolved.valueType
else:
return self.inferType(node.value)
else:
return # Unreachable
proc typeToStr(self: Compiler, typ: Type): string =
@ -450,58 +556,63 @@ proc typeToStr(self: Compiler, typ: Type): string =
var node = FunDecl(typ.node)
for i, argument in node.arguments:
result &= &"{argument.name.token.lexeme}: {self.typeToStr(self.inferType(argument.name))}"
if i < node.arguments.len():
if i < node.arguments.len() - 1:
result &= ", "
result &= ")"
of lambdaExpr:
var node = LambdaExpr(typ.node)
for i, argument in node.arguments:
result &= &"{argument.name.token.lexeme}: {argument.valueType}"
if i < node.arguments.len():
if i < node.arguments.len() - 1:
result &= ", "
result &= ")"
else:
discard # Unreachable
result &= &": {self.typeToStr(typ.returnType)}"
else:
discard
proc toIntrinsic(self: Compiler, typ: Expression): Type =
## Gets an expression's
## intrinsic type, if possible
if typ == nil:
return nil
case typ.kind:
of trueExpr, falseExpr, intExpr, floatExpr:
return typ.token.lexeme.toIntrinsic()
of identExpr:
let inferred = self.inferType(typ)
if inferred != nil:
return
proc inferType(self: Compiler, node: Declaration): Type =
## Infers the type of a given declaration
## and returns it
case node.kind:
of funDecl:
var node = FunDecl(node)
let resolved = self.resolve(node.name)
if resolved != nil:
return resolved.valueType
of NodeKind.varDecl:
var node = VarDecl(node)
let resolved = self.resolve(node.name)
if resolved != nil:
return resolved.valueType
else:
return self.inferType(node.value)
else:
discard
return # Unreachable
## End of utility functions
proc literal(self: Compiler, node: ASTNode) =
## Emits instructions for literals such
## as singletons, strings, numbers and
## collections
case node.kind:
of trueExpr:
self.emitByte(OpCode.True)
self.emitByte(LoadTrue)
of falseExpr:
self.emitByte(OpCode.False)
self.emitByte(LoadFalse)
of nilExpr:
self.emitByte(OpCode.Nil)
self.emitByte(LoadNil)
of infExpr:
self.emitByte(OpCode.Inf)
self.emitByte(LoadInf)
of nanExpr:
self.emitByte(OpCode.Nan)
self.emitByte(LoadNan)
of strExpr:
self.emitConstant(LiteralExpr(node))
self.emitConstant(LiteralExpr(node), Type(kind: String))
# TODO: Take size specifier into account!
of intExpr:
var x: int
var y = IntExpr(node)
@ -509,7 +620,7 @@ proc literal(self: Compiler, node: ASTNode) =
discard parseInt(y.literal.lexeme, x)
except ValueError:
self.error("integer value out of range")
self.emitConstant(y)
self.emitConstant(y, Type(kind: Int64))
of hexExpr:
var x: int
var y = HexExpr(node)
@ -517,9 +628,12 @@ proc literal(self: Compiler, node: ASTNode) =
discard parseHex(y.literal.lexeme, x)
except ValueError:
self.error("integer value out of range")
self.emitConstant(newIntExpr(Token(lexeme: $x, line: y.token.line,
pos: (start: y.token.pos.start, stop: y.token.pos.start +
len($x)))))
let node = newIntExpr(Token(lexeme: $x, line: y.token.line,
pos: (start: y.token.pos.start,
stop: y.token.pos.start + len($x))
)
)
self.emitConstant(node, Type(kind: Int64))
of binExpr:
var x: int
var y = BinExpr(node)
@ -527,9 +641,12 @@ proc literal(self: Compiler, node: ASTNode) =
discard parseBin(y.literal.lexeme, x)
except ValueError:
self.error("integer value out of range")
self.emitConstant(newIntExpr(Token(lexeme: $x, line: y.token.line,
pos: (start: y.token.pos.start, stop: y.token.pos.start +
len($x)))))
let node = newIntExpr(Token(lexeme: $x, line: y.token.line,
pos: (start: y.token.pos.start,
stop: y.token.pos.start + len($x))
)
)
self.emitConstant(node, Type(kind: Int64))
of octExpr:
var x: int
var y = OctExpr(node)
@ -537,9 +654,12 @@ proc literal(self: Compiler, node: ASTNode) =
discard parseOct(y.literal.lexeme, x)
except ValueError:
self.error("integer value out of range")
self.emitConstant(newIntExpr(Token(lexeme: $x, line: y.token.line,
pos: (start: y.token.pos.start, stop: y.token.pos.start +
len($x)))))
let node = newIntExpr(Token(lexeme: $x, line: y.token.line,
pos: (start: y.token.pos.start,
stop: y.token.pos.start + len($x))
)
)
self.emitConstant(node, Type(kind: Int64))
of floatExpr:
var x: float
var y = FloatExpr(node)
@ -547,7 +667,7 @@ proc literal(self: Compiler, node: ASTNode) =
discard parseFloat(y.literal.lexeme, x)
except ValueError:
self.error("floating point value out of range")
self.emitConstant(y)
self.emitConstant(y, Type(kind: Float64))
of awaitExpr:
var y = AwaitExpr(node)
self.expression(y.expression)
@ -557,20 +677,11 @@ proc literal(self: Compiler, node: ASTNode) =
proc unary(self: Compiler, node: UnaryExpr) =
## Compiles unary expressions such as decimal or
## bitwise negation
## Compiles unary expressions such as decimal
## and bitwise negation
self.expression(node.a) # Pushes the operand onto the stack
case node.operator.kind:
of Minus:
self.emitByte(NoOp)
of Plus:
self.emitByte(NoOp)
of TokenType.LogicalNot:
self.emitByte(NoOp)
of Tilde:
self.emitByte(NoOp)
else:
self.error(&"invalid AST node of kind {node.kind} at unary(): {node} (This is an internal error and most likely a bug!)")
# TODO: Find implementation of
# the given operator and call it
proc binary(self: Compiler, node: BinaryExpr) =
@ -583,70 +694,7 @@ proc binary(self: Compiler, node: BinaryExpr) =
# TODO: Find implementation of
# the given operator and call it
case node.operator.kind:
of Plus:
# a + b
self.emitByte(NoOp)
of Minus:
# a - b
self.emitByte(NoOp)
of Star:
# a * b
self.emitByte(NoOp)
of DoubleStar:
# a ** b
self.emitByte(NoOp)
of Percentage:
# a % b
self.emitByte(NoOp)
of FloorDiv:
# a // b
self.emitByte(NoOp)
of Slash:
# a / b
self.emitByte(NoOp)
of Ampersand:
# a & b
self.emitByte(NoOp)
of Caret:
# a ^ b
self.emitByte(NoOp)
of Pipe:
# a | b
self.emitByte(NoOp)
of Is:
# a is b
self.emitByte(NoOp)
of IsNot:
# a isnot b
self.emitByte(NoOp)
of Of:
# a of b
self.emitByte(NoOp)
of As:
# a as b
self.emitByte(NoOp)
of RightShift:
# a >> b
self.emitByte(NoOp)
of LeftShift:
# a << b
self.emitByte(NoOp)
of LessThan:
# a < b
self.emitByte(NoOp)
of GreaterThan:
# a > b
self.emitByte(NoOp)
of DoubleEqual:
# a == b
self.emitByte(NoOp)
of LessOrEqual:
# a <= b
self.emitByte(NoOp)
of GreaterOrEqual:
# a >= b
self.emitByte(NoOp)
of LogicalAnd:
of NoMatch:
# a and b
self.expression(node.a)
var jump: int
@ -657,7 +705,7 @@ proc binary(self: Compiler, node: BinaryExpr) =
self.emitByte(Pop)
self.expression(node.b)
self.patchJump(jump)
of LogicalOr:
of EndOfFile:
# a or b
self.expression(node.a)
let jump = self.emitJump(JumpIfTrue)
@ -691,14 +739,17 @@ proc declareName(self: Compiler, node: Declaration) =
var node = FunDecl(node)
# Declares the function's name in the
# current scope but no StoreVar is emitted
# because a function's name is only useful
# at compile time
# because the name is only useful at compile time.
# TODO: Maybe emit some optional debugging
# metadata to let the VM know where a function's
# code begins and ends (similar to what gcc does with
# CFI in object files) to build stack traces
self.names.add(Name(depth: self.scopeDepth,
isPrivate: node.isPrivate,
isConst: false,
owner: self.currentModule,
valueType: Type(kind: Function, node: node),
codePos: -1,
valueType: Type(kind: Function, node: node, returnType: self.inferType(node.returnType)),
codePos: self.chunk.code.len(),
name: node.name,
isLet: false))
for argument in node.arguments:
@ -709,12 +760,13 @@ proc declareName(self: Compiler, node: Declaration) =
owner: self.currentModule,
isConst: false,
name: argument.name,
valueType: self.inferType(argument.name),
valueType: nil,
codePos: self.chunk.code.len(),
isLet: false))
self.names[^1].valueType = self.inferType(argument.valueType)
self.names[^1].valueType.node = argument.name
self.emitByte(StoreVar)
self.emitBytes(self.names.high().toTriple())
# TODO: Default arguments and unpacking
else:
discard # Unreachable
@ -729,7 +781,7 @@ proc identifier(self: Compiler, node: IdentExpr) =
# no matter the scope depth. If optimizations are enabled, the compiler
# will reuse the same constant every time it is referenced instead of
# allocating a new one each time
self.emitConstant(node)
self.emitConstant(node, self.inferType(node))
else:
self.detectClosureVariable(s.name)
let t = self.getStackPos(node)
@ -750,6 +802,36 @@ proc identifier(self: Compiler, node: IdentExpr) =
self.emitBytes(self.closedOver.high().toTriple())
proc findImpl(self: Compiler, node: FunDecl): seq[Name] =
## Looks for functions matching the given declaration
## in the code that has been compiled so far.
## Returns a list of each matching name object
for obj in reversed(self.names):
# Scopes are indexed backwards!
case obj.valueType.kind:
of Function:
if self.compareTypes(obj.valueType, self.inferType(node)):
result.add(obj)
else:
continue
proc findByName(self: Compiler, name: string): seq[Name] =
## Looks for objects that have been already declared
## with the given name
for obj in reversed(self.names):
if obj.name.token.lexeme == name:
result.add(obj)
proc findByType(self: Compiler, name: string, kind: Type): seq[Name] =
## Looks for objects that have already been declared
## with the given name and type
for obj in self.findByName(name):
if self.compareTypes(obj.valueType, kind):
result.add(obj)
proc assignment(self: Compiler, node: ASTNode) =
## Compiles assignment expressions
case node.kind:
@ -760,42 +842,12 @@ proc assignment(self: Compiler, node: ASTNode) =
if r == nil:
self.error(&"assignment to undeclared name '{name.token.lexeme}'")
elif r.isConst:
self.error(&"cannot assign to '{name.token.lexeme}'")
self.error(&"cannot assign to '{name.token.lexeme}' (constant)")
elif r.isLet:
self.error(&"cannot reassign '{name.token.lexeme}'")
self.expression(node.value)
let t = self.getStackPos(name)
let index = t.pos
case node.token.kind:
of InplaceAdd:
self.emitByte(NoOp)
of InplaceSub:
self.emitByte(NoOp)
of InplaceDiv:
self.emitByte(NoOp)
of InplaceMul:
self.emitByte(NoOp)
of InplacePow:
self.emitByte(NoOp)
of InplaceFloorDiv:
self.emitByte(NoOp)
of InplaceMod:
self.emitByte(NoOp)
of InplaceAnd:
self.emitByte(NoOp)
of InplaceXor:
self.emitByte(NoOp)
of InplaceRightShift:
self.emitByte(NoOp)
of InplaceLeftShift:
self.emitByte(NoOp)
else:
discard # Unreachable
# In-place operators just change
# what values is set to a given
# offset in a dynamic array, so we only
# need to perform the operation as usual
# and then store it again
if index != -1:
if not t.closedOver:
self.emitByte(StoreVar)
@ -805,7 +857,10 @@ proc assignment(self: Compiler, node: ASTNode) =
else:
self.error(&"reference to undeclared name '{node.token.lexeme}'")
of setItemExpr:
let typ = self.inferType(SetItemExpr(node))
let node = SetItemExpr(node)
let typ = self.inferType(node)
if typ == nil:
self.error(&"cannot determine the type of '{node.name.token.lexeme}'")
# TODO
else:
self.error(&"invalid AST node of kind {node.kind} at assignment(): {node} (This is an internal error and most likely a bug)")
@ -904,10 +959,16 @@ proc emitLoop(self: Compiler, begin: int) =
proc whileStmt(self: Compiler, node: WhileStmt) =
## Compiles C-style while loops
## Compiles C-style while loops and
## desugared C-style for loops
let start = self.chunk.code.len()
self.expression(node.condition)
let jump = self.emitJump(JumpIfFalsePop)
var jump: int
if self.enableOptimizations:
jump = self.emitJump(JumpIfFalsePop)
else:
jump = self.emitJump(JumpIfFalse)
self.emitByte(Pop)
self.statement(node.body)
self.patchJump(jump)
self.emitLoop(start)
@ -926,7 +987,7 @@ proc expression(self: Compiler, node: Expression) =
# Note that for setItem and assign we don't convert
# the node to its true type because that type information
# would be lost in the call anyway. The differentiation
# happens in self.assignment
# happens in self.assignment()
of setItemExpr, assignExpr:
self.assignment(node)
of identExpr:
@ -942,11 +1003,11 @@ proc expression(self: Compiler, node: Expression) =
self.binary(BinaryExpr(node))
of intExpr, hexExpr, binExpr, octExpr, strExpr, falseExpr, trueExpr,
infExpr, nanExpr, floatExpr, nilExpr:
# Since all of these AST nodes mostly share
# the same overall structure, and the kind
# discriminant is enough to tell one
# from the other, why bother with
# specialized cases when one is enough?
# Since all of these AST nodes share the
# same overall structure and the kind
# field is enough to tell one from the
# other, why bother with specialized
# cases when one is enough?
self.literal(node)
else:
self.error(&"invalid AST node of kind {node.kind} at expression(): {node} (This is an internal error and most likely a bug)")
@ -984,12 +1045,10 @@ proc returnStmt(self: Compiler, node: ReturnStmt) =
let typ = self.inferType(self.currentFunction)
if returnType == nil and self.currentFunction.returnType != nil:
self.error(&"expected return value of type '{self.currentFunction.returnType.token.lexeme}', but expression has no type")
elif self.currentFunction.returnType == nil:
if node.value.kind != nilExpr:
self.error("non-nil return value is not allowed in functions without an explicit return type")
else:
if returnType != typ:
self.error(&"expected return value of type '{self.typeToStr(typ)}', got '{self.typeToStr(returnType)}' instead")
elif self.currentFunction.returnType == nil and node.value.kind != nilExpr:
self.error("non-nil return value is not allowed in functions without an explicit return type")
elif not self.compareTypes(returnType, typ.returnType):
self.error(&"expected return value of type '{self.typeToStr(typ.returnType)}', got '{self.typeToStr(returnType)}' instead")
self.expression(node.value)
self.emitByte(OpCode.Return)
@ -1013,6 +1072,8 @@ proc continueStmt(self: Compiler, node: ContinueStmt) =
self.emitByte(Jump)
self.emitBytes(self.currentLoop.start.toDouble())
else:
if self.currentLoop.start > 16777215:
self.error("too much code to jump over in continue statement")
self.emitByte(LongJump)
self.emitBytes(self.currentLoop.start.toTriple())
@ -1074,7 +1135,7 @@ proc statement(self: Compiler, node: Statement) =
## while loops!
let loop = self.currentLoop
self.currentLoop = Loop(start: self.chunk.code.len(),
depth: self.scopeDepth, breakPos: @[])
depth: self.scopeDepth, breakPos: @[])
self.whileStmt(WhileStmt(node))
self.patchBreaks()
self.currentLoop = loop
@ -1108,14 +1169,25 @@ proc varDecl(self: Compiler, node: VarDecl) =
proc funDecl(self: Compiler, node: FunDecl) =
## Compiles function declarations
self.declareName(node)
if node.body != nil:
let fnType = self.inferType(node)
let impl = self.findByType(node.name.token.lexeme, fnType)
if impl.len() > 1:
# Oh-oh! We found more than one implementation of
# the same function! Error!
var msg = &"multiple matching implementations of '{node.name.token.lexeme}' found:\n"
for fn in reversed(impl):
var node = Declaration(fn.valueType.node)
discard self.typeToStr(fn.valueType)
msg &= &"- '{node.name.lexeme}' at line {node.token.line} of type {self.typeToStr(fn.valueType)}\n"
self.error(msg)
# We store the current function
var function = self.currentFunction
self.currentFunction = node
# A function's code is just compiled linearly
# and then jumped over
let jmp = self.emitJump(JumpForwards)
self.declareName(node)
# Since the deferred array is a linear
# sequence of instructions and we want
@ -1139,11 +1211,11 @@ proc funDecl(self: Compiler, node: FunDecl) =
# are resolved properly). There's a need for a bit
# of boilerplate code to make closures work, but
# that's about it
self.emitBytes(OpCode.Nil, OpCode.Return)
self.emitBytes(LoadNil, OpCode.Return)
# Currently defer is not functional so we
# just pop the instructions
for i in countup(deferStart, self.deferred.len(), 1):
for i in countup(deferStart, self.deferred.len() - 1, 1):
self.deferred.delete(i)
self.patchJump(jmp)
@ -1182,4 +1254,4 @@ proc compile*(self: Compiler, ast: seq[ASTNode], file: string): Chunk =
self.emitByte(OpCode.Return) # Exits the VM's main loop when used at the global scope
result = self.chunk
if self.ast.len() > 0 and self.scopeDepth != -1:
self.error(&"invalid state: invalid scopeDepth value (expected -1, got {self.scopeDepth}), did you forget to call endScope/beginScope?")
self.error(&"invalid state: invalid scopeDepth value (expected -1, got {self.scopeDepth}), did you forget to call endScope/beginScope?")

View File

@ -583,7 +583,7 @@ proc next(self: Lexer) =
# Keywords and identifiers
self.parseIdentifier()
elif self.match("#"):
# Inline comments
# Inline comments, pragmas, etc.
while not (self.check("\n") or self.done()):
discard self.step()
self.createToken(Comment)
@ -606,10 +606,10 @@ proc next(self: Lexer) =
self.tokens.add(self.getToken(symbol))
return
dec(n)
# None of our conditions matched: we don't know
# what's sitting in front of us, but it definitely
# isn't something we can parse, so it's an error
self.error("invalid syntax")
# We just assume what we have in front of us
# is a symbol
discard self.step()
self.createToken(Symbol)
proc lex*(self: Lexer, source, file: string): seq[Token] =

View File

@ -127,7 +127,7 @@ type
NanExpr* = ref object of LiteralExpr
InfExpr* = ref object of LiteralExpr
IdentExpr* = ref object of LiteralExpr
IdentExpr* = ref object of Expression
name*: Token
GroupingExpr* = ref object of Expression
@ -169,6 +169,7 @@ type
defaults*: seq[Expression]
isGenerator*: bool
isAsync*: bool
isPure*: bool
returnType*: Expression
SliceExpr* = ref object of Expression
@ -207,7 +208,7 @@ type
TryStmt* = ref object of Statement
body*: Statement
handlers*: seq[tuple[body: Statement, exc: IdentExpr, name: IdentExpr]]
handlers*: seq[tuple[body: Statement, exc: IdentExpr]]
finallyClause*: Statement
elseClause*: Statement
@ -249,6 +250,7 @@ type
isAsync*: bool
isGenerator*: bool
isPrivate*: bool
isPure*: bool
returnType*: Expression
@ -333,7 +335,6 @@ proc newIdentExpr*(name: Token): IdentExpr =
result = IdentExpr(kind: identExpr)
result.name = name
result.token = name
result.literal = name
proc newGroupingExpr*(expression: Expression, token: Token): GroupingExpr =
@ -352,6 +353,7 @@ proc newLambdaExpr*(arguments: seq[tuple[name: IdentExpr, valueType: Expression,
result.isAsync = isAsync
result.token = token
result.returnType = returnType
result.isPure = false
proc newGetItemExpr*(obj: Expression, name: IdentExpr, token: Token): GetItemExpr =
@ -462,7 +464,7 @@ proc newRaiseStmt*(exception: Expression, token: Token): RaiseStmt =
result.token = token
proc newTryStmt*(body: Statement, handlers: seq[tuple[body: Statement, exc: IdentExpr, name: IdentExpr]],
proc newTryStmt*(body: Statement, handlers: seq[tuple[body: Statement, exc: IdentExpr]],
finallyClause: Statement,
elseClause: Statement, token: Token): TryStmt =
result = TryStmt(kind: tryStmt)
@ -549,6 +551,7 @@ proc newFunDecl*(name: IdentExpr, arguments: seq[tuple[name: IdentExpr, valueTyp
result.token = token
result.pragmas = pragmas
result.returnType = returnType
result.isPure = false
@ -659,3 +662,6 @@ proc `$`*(self: ASTNode): string =
result &= ")"
else:
discard
proc `==`*(self, other: IdentExpr): bool {.inline.} = self.token == other.token

View File

@ -14,13 +14,13 @@
## Low level bytecode implementation details
import ast
import typing
import ../../util/multibyte
import errors
import strutils
import strformat
import ../../util/multibyte
import ../compiler
export ast
@ -45,7 +45,7 @@ type
## are 3 and 4"
## This is more efficient than using the naive approach, which would encode
## the same line number multiple times and waste considerable amounts of space.
consts*: seq[LiteralExpr]
consts*: seq[Expression]
byteConsts*: seq[uint8]
code*: seq[uint8]
lines*: seq[int]
@ -79,11 +79,11 @@ type
LoadFloat32,
LoadString,
## Singleton opcodes (each of them pushes a constant singleton on the stack)
Nil,
True,
False,
Nan,
Inf,
LoadNil,
LoadTrue,
LoadFalse,
LoadNan,
LoadInf,
## Basic stack operations
Pop, # Pops an element off the stack and discards it
Push, # Pushes x onto the stack
@ -98,10 +98,10 @@ type
Jump, # Absolute, unconditional jump into the bytecode
JumpForwards, # Relative, unconditional, positive jump in the bytecode
JumpBackwards, # Relative, unconditional, negative jump in the bytecode
JumpIfFalse, # Jumps to an absolute index in the bytecode if x is true
JumpIfTrue, # Jumps to an absolute index in the bytecode if x is false
JumpIfFalse, # Jumps to a relative index in the bytecode if x is false
JumpIfTrue, # Jumps to a relative index in the bytecode if x is true
JumpIfFalsePop, # Like JumpIfFalse, but also pops off the stack (regardless of truthyness). Optimization for if statements
JumpIfFalseOrPop, # Jumps to an absolute index in the bytecode if x is false and pops it otherwise (used for logical and)
JumpIfFalseOrPop, # Jumps to an absolute index in the bytecode if x is false and pops otherwise (used for logical and)
## Long variants of jumps (they use a 24-bit operand instead of a 16-bit one)
LongJump,
LongJumpIfFalse,
@ -129,9 +129,9 @@ type
# We group instructions by their operation/operand types for easier handling when debugging
# Simple instructions encompass instructions that push onto/pop off the stack unconditionally (True, False, Pop, etc.)
const simpleInstructions* = {OpCode.Return, OpCode.Nil,
OpCode.True, OpCode.False,
OpCode.Nan, OpCode.Inf,
const simpleInstructions* = {OpCode.Return, LoadNil,
LoadTrue, LoadFalse,
LoadNan, LoadInf,
Pop, OpCode.Raise,
BeginTry, FinishTry,
OpCode.Yield, OpCode.Await,
@ -220,7 +220,7 @@ proc getLine*(self: Chunk, idx: int): int =
raise newException(IndexDefect, "index out of range")
proc findOrAddConstant(self: Chunk, constant: LiteralExpr): int =
proc findOrAddConstant(self: Chunk, constant: Expression, kind: Type): int =
## Small optimization function that reuses the same constant
## if it's already been written before (only if self.reuseConsts
## equals true)
@ -232,15 +232,13 @@ proc findOrAddConstant(self: Chunk, constant: LiteralExpr): int =
if c.kind != constant.kind:
continue
if constant.isConst():
if c.literal.lexeme == constant.literal.lexeme:
if LiteralExpr(c).literal.lexeme == LiteralExpr(constant).literal.lexeme:
# This wouldn't work for stuff like 2e3 and 2000.0, but those
# forms are collapsed in the compiler before being written
# to the constants table
return i
elif constant.kind == identExpr:
var c = IdentExpr(c)
var constant = IdentExpr(constant)
if c.name.lexeme == constant.name.lexeme:
if IdentExpr(c).name.lexeme == IdentExpr(constant).name.lexeme:
return i
else:
continue
@ -248,14 +246,15 @@ proc findOrAddConstant(self: Chunk, constant: LiteralExpr): int =
result = self.consts.high()
proc addConstant*(self: Chunk, constant: LiteralExpr): array[3, uint8] =
## Writes a constant to a chunk. Returns its index casted to a 3-byte
## sequence (array). Constant indexes are reused if a constant is used
## more than once and self.reuseConsts equals true
proc addConstant*(self: Chunk, constant: Expression, kind: Type): array[3, uint8] =
## Writes a constant of the given type in the chunk's constant
## table. Returns its index as an array of 3 unsigned 8 bit integers.
## Constant indexes are reused if a constant is used more than once
## and self.reuseConsts equals true
if self.consts.high() == 16777215:
# The constant index is a 24 bit unsigned integer, so that's as far
# as we can index into the constant table (the same applies
# to our stack by the way). Not that anyone's ever gonna hit this
# limit in the real world, but you know, just in case
raise newException(CompileError, "cannot encode more than 16777216 constants")
result = self.findOrAddConstant(constant).toTriple()
result = self.findOrAddConstant(constant, kind).toTriple()

View File

@ -16,79 +16,74 @@ import strformat
type
TokenType* {.pure.} = enum
## Token types enumeration
## Token types enumeration
# Booleans
True, False,
# Booleans
True, False,
# Other singleton types
Infinity, NotANumber, Nil
# Other singleton types
Infinity, NotANumber, Nil
# Control flow statements
If, Else,
# Control flow statements
If, Else,
# Looping statements
While, For,
# Looping statements
While, For,
# Keywords
Function, Break, Continue,
Var, Let, Const, Is, Return,
Coroutine, Generator, Import,
IsNot, Raise, Assert, Await,
Foreach, Yield, Of, Defer,
Try, Except, Finally, Type,
Operator, Case, Enum, From,
Emit, As, Ptr, Ref
# Keywords
Function, Break, Continue,
Var, Let, Const, Return,
Coroutine, Generator, Import,
Raise, Assert, Await, Foreach,
Yield, Defer, Try, Except,
Finally, Type, Operator, Case,
Enum, From, Ptr, Ref
# Literal types
Integer, Float, String, Identifier,
Binary, Octal, Hex, Char
# Literal types
Integer, Float, String, Identifier,
Binary, Octal, Hex, Char
# Brackets, parentheses,
# operators and others
# Brackets, parentheses,
# operators and others
LeftParen, RightParen, # ()
LeftBrace, RightBrace, # {}
LeftBracket, RightBracket, # []
Dot, Semicolon, Colon, Comma, # . ; : ,
Plus, Minus, Slash, Star, # + - / *
Percentage, DoubleStar, # % **
Caret, Pipe, Ampersand, Tilde, # ^ | & ~
Equal, GreaterThan, LessThan, # = > <
LessOrEqual, GreaterOrEqual, # >= <=
NotEqual, RightShift, LeftShift, # != >> <<
LogicalAnd, LogicalOr, LogicalNot, # and or not
InplaceAdd, InplaceSub, InplaceDiv, # += -= /=
InplaceMod, InplaceMul, InplaceXor, # %= *= ^=
InplaceAnd, InplaceOr, FloorDiv, # &= |= //
DoubleEqual, InplaceFloorDiv, InplacePow, # == //= **=
InplaceRightShift, InplaceLeftShift, # >>= <<=
LeftParen, RightParen, # ()
LeftBrace, RightBrace, # {}
LeftBracket, RightBracket, # []
Dot, Semicolon, Comma, # . ; ,
# Miscellaneous
# Miscellaneous
EndOfFile, # Marks the end of the token stream
NoMatch, # Used internally by the symbol table
Comment, # Useful for documentation comments, pragmas, etc.
# These are not used at the moment but may be
# employed to enforce indentation or other neat
# stuff I haven't thought about yet
Whitespace,
Tab,
EndOfFile, # Marks the end of the token stream
NoMatch, # Used internally by the symbol table
Comment, # Useful for documentation comments, pragmas, etc.
Symbol, # A generic symbol
# These are not used at the moment but may be
# employed to enforce indentation or other neat
# stuff I haven't thought about yet
Whitespace,
Tab,
Token* = ref object
## A token object
kind*: TokenType # Type of the token
lexeme*: string # The lexeme associated to the token
line*: int # The line where the token appears
pos*: tuple[start, stop: int] # The absolute position in the source file
# (0-indexed and inclusive at the beginning)
## A token object
kind*: TokenType # Type of the token
lexeme*: string # The lexeme associated to the token
line*: int # The line where the token appears
pos*: tuple[start, stop: int] # The absolute position in the source file
# (0-indexed and inclusive at the beginning)
proc `$`*(self: Token): string =
if self != nil:
result = &"Token(kind={self.kind}, lexeme='{$(self.lexeme)}', line={self.line}, pos=({self.pos.start}, {self.pos.stop}))"
else:
result = "nil"
## Strinfifies
if self != nil:
result = &"Token(kind={self.kind}, lexeme='{$(self.lexeme)}', line={self.line}, pos=({self.pos.start}, {self.pos.stop}))"
else:
result = "nil"
proc `==`*(self, other: Token): bool =
## Returns self == other
return self.kind == other.kind and self.lexeme == other.lexeme

View File

@ -1,96 +0,0 @@
# Copyright 2022 Mattia Giambirtone & All Contributors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
## Peon's type system
import ast
export ast
type
TypeKind* = enum
## An enumeration of compile-time
## types
Int8, UInt8, Int16, UInt16, Int32,
UInt32, Int64, UInt64, Float32, Float64,
Char, Byte, String, Function, CustomType,
Nil, Nan, Bool, Inf
Type* = ref object
## A wrapper around
## compile-time types
node*: ASTNode
case kind*: TypeKind:
of Function:
returnType*: Type
else:
discard
proc `==`*(self, other: Type): bool =
## Compares two type objects
## for equality
if system.`==`(self, nil):
return system.`==`(other, nil)
elif system.`==`(other, nil):
return system.`==`(self, nil)
if self.kind != other.kind:
return false
case self.kind:
of {Int8, UInt8, Int16, UInt16, Int32,
UInt32, Int64, UInt64, Float32, Float64,
Char, Byte, String, Nil, Nan, Bool, Inf}:
return true
of Function:
discard # TODO
else:
discard
proc toIntrinsic*(name: string): Type =
## Converts a string to an intrinsic
## type if it is valid and returns nil
## otherwise
if name in ["int", "int64", "i64"]:
return Type(kind: Int64)
elif name in ["uint64", "u64"]:
return Type(kind: UInt64)
elif name in ["int32", "i32"]:
return Type(kind: Int32)
elif name in ["uint32", "u32"]:
return Type(kind: UInt32)
elif name in ["int16", "i16"]:
return Type(kind: Int16)
elif name in ["uint16", "u16"]:
return Type(kind: UInt16)
elif name in ["int8", "i8"]:
return Type(kind: Int8)
elif name in ["uint8", "u8"]:
return Type(kind: UInt8)
elif name in ["f64", "float", "float64"]:
return Type(kind: Float64)
elif name in ["f32", "float32"]:
return Type(kind: Float32)
elif name == "byte":
return Type(kind: Byte)
elif name == "char":
return Type(kind: Char)
elif name == "nan":
return Type(kind: Nan)
elif name == "nil":
return Type(kind: Nil)
elif name == "inf":
return Type(kind: Inf)
elif name == "bool":
return Type(kind: Bool)
else:
return nil

View File

@ -16,7 +16,7 @@
import strformat
import strutils
import tables
import meta/token
import meta/ast
@ -28,9 +28,29 @@ export token, ast, errors
type
LoopContext = enum
LoopContext {.pure.} = enum
Loop, None
Precedence {.pure.} = enum
## Operator precedence
## clearly stolen from
## nim
Arrow = 0,
Assign,
Or,
And,
Compare,
Addition,
Multiplication,
Power,
None # Used for stuff that isn't an operator
OperatorTable = ref object
## A table for storing and
## handling the precedence
## of operators
tokens: seq[string]
precedence: TableRef[Precedence, seq[string]]
Parser* = ref object
## A recursive-descent top-down
## parser implementation
@ -63,27 +83,69 @@ type
currentFunction: Declaration
# Stores the current scope depth (0 = global, > 0 local)
scopeDepth: int
# We store user-defined operators for later use
operators: seq[string]
operators: OperatorTable
proc newParser*(): Parser =
proc newOperatorTable: OperatorTable =
## Initializes a new OperatorTable
## object
new(result)
result.tokens = @[]
result.precedence = newTable[Precedence, seq[string]]()
for prec in Precedence:
result.precedence[prec] = @[]
proc addOperator(self: OperatorTable, lexeme: string) =
## Adds an operator to the table. Its precedence
## is inferred from the operator's lexeme (the
## criteria are similar to Nim's)
if lexeme in self.tokens:
return # We've already added it!
var prec = Precedence.high()
if lexeme.len() >= 2 and lexeme[^3..^1] in ["->", "~>", "=>"]:
prec = Arrow
elif lexeme.endsWith("=") and lexeme[0] notin {'<', '>', '!', '?', '~', '='}:
prec = Assign
elif lexeme[0] in {'$', } or lexeme == "**":
prec = Power
elif lexeme[0] in {'*', '%', '/', '\\'}:
prec = Multiplication
elif lexeme[0] in {'+', '-', '|', '~'}:
prec = Addition
elif lexeme[0] in {'<', '>', '=', '!'}:
prec = Compare
elif lexeme == "and":
prec = Precedence.And
elif lexeme == "or":
prec = Precedence.Or
self.tokens.add(lexeme)
self.precedence[prec].add(lexeme)
proc getPrecedence(self: OperatorTable, lexeme: string): Precedence =
## Gets the precedence of a given operator
for (prec, operators) in self.precedence.pairs():
if lexeme in operators:
return prec
proc newParser*: Parser =
## Initializes a new Parser object
new(result)
result.current = 0
result.file = ""
result.tokens = @[]
result.currentFunction = nil
result.currentLoop = None
result.currentLoop = LoopContext.None
result.scopeDepth = 0
result.operators = newOperatorTable()
# Public getters for improved error formatting
proc getCurrent*(self: Parser): int {.inline.} = self.current
proc getCurrentToken*(self: Parser): Token =
if self.getCurrent() >= self.tokens.high() or self.getCurrent() - 1 < 0:
return self.tokens[^1]
else:
return self.tokens[self.current - 1]
proc getCurrentToken*(self: Parser): Token {.inline.} = (if self.getCurrent() >= self.tokens.high() or
self.getCurrent() - 1 < 0: self.tokens[^1] else: self.tokens[self.current - 1])
# Handy templates to make our life easier, thanks nim!
template endOfFile: Token = Token(kind: EndOfFile, lexeme: "", line: -1)
@ -128,15 +190,26 @@ proc error(self: Parser, message: string) {.raises: [ParseError, ValueError].} =
raise newException(ParseError, errorMessage)
proc check(self: Parser, kind: TokenType, distance: int = 0): bool =
# Why do we allow strings or enum members of TokenType? Well, it's simple:
# symbols like ":" and "=" are both valid operator names (therefore they are
# tokenized as symbols), but they are also used in a context where they are just
# separators (for example, the colon is used in type declarations). Since we can't
# tell at tokenization time which of the two contexts we're in, we just treat everything
# as a symbol and in the cases where we need a specific token we just match the string
# directly
proc check[T: TokenType or string](self: Parser, kind: T, distance: int = 0): bool =
## Checks if the given token at the given distance
## matches the expected kind and returns a boolean.
## The distance parameter is passed directly to
## self.peek()
self.peek(distance).kind == kind
when T is TokenType:
self.peek(distance).kind == kind
else:
when T is string:
self.peek(distance).lexeme == kind
proc check(self: Parser, kind: openarray[TokenType]): bool =
proc check[T: TokenType or string](self: Parser, kind: openarray[T]): bool =
## Calls self.check() in a loop with each entry of
## the given openarray of token kinds and returns
## at the first match. Note that this assumes
@ -148,17 +221,17 @@ proc check(self: Parser, kind: openarray[TokenType]): bool =
return false
proc match(self: Parser, kind: TokenType): bool =
proc match[T: TokenType or string](self: Parser, kind: T): bool =
## Behaves like self.check(), except that when a token
## matches it is also consumed
if self.check(kind,):
if self.check(kind):
discard self.step()
result = true
else:
result = false
proc match(self: Parser, kind: openarray[TokenType]): bool =
proc match[T: TokenType or string](self: Parser, kind: openarray[T]): bool =
## Calls self.match() in a loop with each entry of
## the given openarray of token kinds and returns
## at the first match. Note that this assumes
@ -170,7 +243,7 @@ proc match(self: Parser, kind: openarray[TokenType]): bool =
result = false
proc expect(self: Parser, kind: TokenType, message: string = "") =
proc expect[T: TokenType or string](self: Parser, kind: T, message: string = "") =
## Behaves like self.match(), except that
## when a token doesn't match, an error
## is raised. If no error message is
@ -182,16 +255,16 @@ proc expect(self: Parser, kind: TokenType, message: string = "") =
self.error(message)
proc expect(self: Parser, kinds: openarray[TokenType], message: string = "") =
proc expect[T: TokenType or string](self: Parser, kind: openarray[T], message: string = "") =
## Behaves like self.expect(), except that
## an error is raised only if none of the
## given token kinds matches
for kind in kinds:
for k in kind:
if self.match(kind):
return
if message.len() == 0:
self.error(&"""expecting any of the following tokens: {kinds.join(", ")}, but got {self.peek().kind} instead""")
# Forward declarations
proc expression(self: Parser): Expression
@ -200,6 +273,7 @@ proc statement(self: Parser): Statement
proc varDecl(self: Parser, isLet: bool = false, isConst: bool = false): Declaration
proc funDecl(self: Parser, isAsync: bool = false, isGenerator: bool = false, isLambda: bool = false, isOperator: bool = false): Declaration
proc declaration(self: Parser): Declaration
# End of forward declarations
proc primary(self: Parser): Expression =
@ -316,168 +390,116 @@ proc call(self: Parser): Expression =
self.expect(Identifier, "expecting attribute name after '.'")
result = newGetItemExpr(result, newIdentExpr(self.peek(-1)), self.peek(-1))
elif self.match(LeftBracket):
# Slicing such as a[1:2]
# Slicing such as a[1:2], which is then
# translated to `[]`(a, 1, 2)
let tok = self.peek(-1)
var ends: seq[Expression] = @[]
while not self.check(RightBracket) and not self.done():
if self.check(Colon):
if self.check(":"):
ends.add(newNilExpr(Token(lexeme: "nil")))
discard self.step()
else:
ends.add(self.expression())
discard self.match(Colon)
discard self.match(":")
self.expect(RightBracket, "expecting ']'")
result = newSliceExpr(result, ends, tok)
else:
break
## Operator parsing handlers
proc unary(self: Parser): Expression =
## Parses unary expressions
if self.match([Minus, Tilde, LogicalNot, Plus]):
result = newUnaryExpr(self.peek(-1), self.unary())
if self.peek().lexeme in self.operators.tokens:
result = newUnaryExpr(self.step(), self.unary())
else:
result = self.call()
proc customUnaryOperator(self: Parser): Expression =
## Parses user-defined unary expressions
if self.peek().lexeme in self.operators:
discard self.step()
result = newUnaryExpr(self.peek(-1), self.customUnaryOperator())
else:
result = self.unary()
proc pow(self: Parser): Expression =
## Parses exponentiation expressions
result = self.customUnaryOperator()
proc parsePow(self: Parser): Expression =
result = self.unary()
var operator: Token
var right: Expression
while self.match(DoubleStar):
operator = self.peek(-1)
right = self.customUnaryOperator()
result = newBinaryExpr(result, operator, right)
proc mul(self: Parser): Expression =
## Parses multiplication and division expressions
result = self.pow()
var operator: Token
var right: Expression
while self.match([Slash, Percentage, FloorDiv, Star]):
operator = self.peek(-1)
right = self.pow()
result = newBinaryExpr(result, operator, right)
proc add(self: Parser): Expression =
## Parses addition and subtraction expressions
result = self.mul()
var operator: Token
var right: Expression
while self.match([Plus, Minus]):
operator = self.peek(-1)
right = self.mul()
result = newBinaryExpr(result, operator, right)
proc comparison(self: Parser): Expression =
## Parses other comparison expressions
## and some other operators
result = self.add()
var operator: Token
var right: Expression
while self.match([LessThan, GreaterThan, LessOrEqual, GreaterOrEqual, Is, Of, IsNot]):
operator = self.peek(-1)
right = self.add()
result = newBinaryExpr(result, operator, right)
proc equality(self: Parser): Expression =
## Parses equality expressions
result = self.comparison()
var operator: Token
var right: Expression
while self.match([DoubleEqual, NotEqual]):
operator = self.peek(-1)
right = self.comparison()
result = newBinaryExpr(result, operator, right)
proc logicalAnd(self: Parser): Expression =
## Parses logical and expressions
## (a and b)
result = self.equality()
var operator: Token
var right: Expression
while self.match(LogicalAnd):
operator = self.peek(-1)
right = self.equality()
result = newBinaryExpr(result, operator, right)
proc logicalOr(self: Parser): Expression =
## Parses logical or expressions
## (a or b)
result = self.logicalAnd()
var operator: Token
var right: Expression
while self.match(LogicalOr):
operator = self.peek(-1)
right = self.logicalAnd()
result = newBinaryExpr(result, operator, right)
proc bitwiseAnd(self: Parser): Expression =
## Parses a & b expressions
result = self.logicalOr()
var operator: Token
var right: Expression
while self.match(Pipe):
operator = self.peek(-1)
right = self.logicalOr()
result = newBinaryExpr(result, operator, right)
proc bitwiseOr(self: Parser): Expression =
## Parses a | b expressions
result = self.bitwiseAnd()
var operator: Token
var right: Expression
while self.match(Ampersand):
operator = self.peek(-1)
right = self.bitwiseAnd()
result = newBinaryExpr(result, operator, right)
proc customBinaryOperator(self: Parser): Expression =
## Parses user-defined binary operators
result = self.bitwiseOr()
var operator: Token
var right: Expression
while self.peek().lexeme in self.operators:
while self.operators.getPrecedence(self.peek().lexeme) == Power:
operator = self.step()
right = self.bitwiseOr()
right = self.unary()
result = newBinaryExpr(result, operator, right)
proc assignment(self: Parser): Expression =
## Parses assignment, the highest-level
## expression (including stuff like a.b = 1).
## Slice assignments are also parsed here
result = self.customBinaryOperator()
if self.match([Equal, InplaceAdd, InplaceSub, InplaceDiv, InplaceMod,
InplacePow, InplaceMul, InplaceXor, InplaceAnd, InplaceOr,
InplaceFloorDiv, InplaceRightShift, InplaceLeftShift]):
let tok = self.peek(-1)
proc parseMul(self: Parser): Expression =
result = self.parsePow()
var operator: Token
var right: Expression
while self.operators.getPrecedence(self.peek().lexeme) == Multiplication:
operator = self.step()
right = self.parsePow()
result = newBinaryExpr(result, operator, right)
proc parseAdd(self: Parser): Expression =
result = self.parseMul()
var operator: Token
var right: Expression
while self.operators.getPrecedence(self.peek().lexeme) == Addition:
operator = self.step()
right = self.parseMul()
result = newBinaryExpr(result, operator, right)
proc parseCmp(self: Parser): Expression =
result = self.parseAdd()
var operator: Token
var right: Expression
while self.operators.getPrecedence(self.peek().lexeme) == Compare:
operator = self.step()
right = self.parseAdd()
result = newBinaryExpr(result, operator, right)
proc parseAnd(self: Parser): Expression =
result = self.parseCmp()
var operator: Token
var right: Expression
while self.operators.getPrecedence(self.peek().lexeme) == Precedence.And:
operator = self.step()
right = self.parseCmp()
result = newBinaryExpr(result, operator, right)
proc parseOr(self: Parser): Expression =
result = self.parseAnd()
var operator: Token
var right: Expression
while self.operators.getPrecedence(self.peek().lexeme) == Precedence.Or:
operator = self.step()
right = self.parseAnd()
result = newBinaryExpr(result, operator, right)
proc parseAssign(self: Parser): Expression =
result = self.parseOr()
if self.operators.getPrecedence(self.peek().lexeme) == Assign:
let tok = self.step()
var value = self.expression()
if result.kind in {identExpr, sliceExpr}:
result = newAssignExpr(result, value, tok)
elif result.kind == getItemExpr:
result = newSetItemExpr(GetItemExpr(result).obj, GetItemExpr(result).name, value, tok)
else:
self.error("invalid assignment target")
case result.kind:
of identExpr, sliceExpr:
result = newAssignExpr(result, value, tok)
of getItemExpr:
result = newSetItemExpr(GetItemExpr(result).obj, GetItemExpr(result).name, value, tok)
else:
self.error("invalid assignment target")
proc parseArrow(self: Parser): Expression =
result = self.parseAssign()
var operator: Token
var right: Expression
while self.operators.getPrecedence(self.peek().lexeme) == Precedence.Or:
operator = self.step()
right = self.parseAssign()
result = newBinaryExpr(result, operator, right)
## End of operator parsing handlers
proc assertStmt(self: Parser): Statement =
@ -602,7 +624,7 @@ proc forEachStmt(self: Parser): Statement =
self.expect(LeftParen, "expecting '(' after 'foreach'")
self.expect(Identifier)
var identifier = newIdentExpr(self.peek(-1))
self.expect(Colon)
self.expect(":")
var expression = self.expression()
self.expect(RightParen)
var body = self.statement()
@ -628,25 +650,16 @@ proc tryStmt(self: Parser): Statement =
## Parses try/except/else/finally blocks
let tok = self.peek(-1)
var body = self.statement()
var handlers: seq[tuple[body: Statement, exc: IdentExpr, name: IdentExpr]] = @[]
var handlers: seq[tuple[body: Statement, exc: IdentExpr]] = @[]
var finallyClause: Statement
var elseClause: Statement
var asName: IdentExpr
var excName: Expression
var handlerBody: Statement
while self.match(Except):
excName = self.expression()
if excName.kind == identExpr:
handlerBody = self.statement()
handlers.add((body: handlerBody, exc: IdentExpr(excName), name: asName))
asName = nil
elif excName.kind == binaryExpr and BinaryExpr(excName).operator.kind == As:
asName = IdentExpr(BinaryExpr(excName).b)
if BinaryExpr(excName).b.kind != identExpr:
self.error("expecting alias name after 'except ... as'")
elif BinaryExpr(excName).a.kind != identExpr:
self.error("expecting exception name")
excName = BinaryExpr(excName).a
handlers.add((body: handlerBody, exc: IdentExpr(excName)))
else:
excName = nil
if self.match(Else):
@ -760,16 +773,16 @@ proc varDecl(self: Parser, isLet: bool = false, isConst: bool = false): Declarat
var value: Expression
self.expect(Identifier, &"expecting identifier after '{tok.lexeme}'")
var name = newIdentExpr(self.peek(-1))
let isPrivate = not self.match(Star)
let isPrivate = not self.match("*")
self.checkDecl(isPrivate)
var valueType: IdentExpr
if self.match(Colon):
if self.match(":"):
# We don't enforce it here because
# the compiler may be able to infer
# the type later!
self.expect(Identifier, "expecting type name after ':'")
valueType = newIdentExpr(self.peek(-1))
if self.match(Equal):
if self.match("="):
value = self.expression()
if isConst and not value.isConst():
self.error("constant initializer is not a constant")
@ -792,13 +805,16 @@ proc varDecl(self: Parser, isLet: bool = false, isConst: bool = false): Declarat
proc parseDeclArguments(self: Parser, arguments: var seq[tuple[name: IdentExpr, valueType: Expression, mutable: bool, isRef: bool, isPtr: bool]],
parameter: var tuple[name: IdentExpr, valueType: Expression, mutable: bool, isRef: bool, isPtr: bool],
defaults: var seq[Expression]) =
## Helper to parse declaration arguments and avoid code duplication
while not self.check(RightParen):
if arguments.len > 255:
self.error("cannot have more than 255 arguments in function declaration")
self.expect(Identifier, "expecting parameter name")
parameter.name = newIdentExpr(self.peek(-1))
if self.match(Colon):
if self.match(":"):
parameter.mutable = false
parameter.isPtr = false
parameter.isRef = false
if self.match(Var):
parameter.mutable = true
elif self.match(Ptr):
@ -816,7 +832,7 @@ proc parseDeclArguments(self: Parser, arguments: var seq[tuple[name: IdentExpr,
if parameter in arguments:
self.error("duplicate parameter name in function declaration")
arguments.add(parameter)
if self.match(Equal):
if self.match("="):
defaults.add(self.expression())
elif defaults.len() > 0:
self.error("positional argument cannot follow default argument in function declaration")
@ -829,7 +845,7 @@ proc parseDeclArguments(self: Parser, arguments: var seq[tuple[name: IdentExpr,
proc funDecl(self: Parser, isAsync: bool = false, isGenerator: bool = false, isLambda: bool = false, isOperator: bool = false): Declaration =
## Parses functions, coroutines, generators, anonymous functions and custom operators
## Parses functions, coroutines, generators, anonymous functions and operators
let tok = self.peek(-1)
var enclosingFunction = self.currentFunction
var arguments: seq[tuple[name: IdentExpr, valueType: Expression, mutable: bool, isRef: bool, isPtr: bool]] = @[]
@ -842,15 +858,15 @@ proc funDecl(self: Parser, isAsync: bool = false, isGenerator: bool = false, isL
# or an expression. Fortunately anonymous functions
# are nameless, so we can sort the ambiguity by checking
# if there's an identifier after the keyword
self.expect(Identifier, &"expecting function name after '{tok.lexeme}'")
self.checkDecl(not self.check(Star))
self.expect(Identifier, &"expecting identifier after '{tok.lexeme}'")
self.checkDecl(not self.check("*"))
self.currentFunction = newFunDecl(nil, arguments, defaults, newBlockStmt(@[], Token()),
isAsync=isAsync, isGenerator=isGenerator, isPrivate=true,
token=tok, pragmas=(@[]), returnType=nil)
FunDecl(self.currentFunction).name = newIdentExpr(self.peek(-1))
if self.match(Star):
if self.match("*"):
FunDecl(self.currentFunction).isPrivate = false
elif not isLambda and self.check([LeftBrace, Colon, LeftParen]):
elif not isLambda and (self.check([LeftBrace, LeftParen]) or self.check(":")):
# We do a bit of hacking to pretend we never
# wanted to parse this as a declaration in
# the first place and pass control over to
@ -867,7 +883,7 @@ proc funDecl(self: Parser, isAsync: bool = false, isGenerator: bool = false, isL
returnType=nil)
elif not isOperator:
self.error("funDecl: invalid state")
if self.match(Colon):
if self.match(":"):
# Function has explicit return type
if self.match([Function, Coroutine, Generator]):
# The function's return type is another
@ -883,7 +899,7 @@ proc funDecl(self: Parser, isAsync: bool = false, isGenerator: bool = false, isL
var parameter: tuple[name: IdentExpr, valueType: Expression, mutable: bool, isRef: bool, isPtr: bool]
if self.match(LeftParen):
self.parseDeclArguments(arguments, parameter, defaults)
if self.match(Colon):
if self.match(":"):
LambdaExpr(returnType).returnType = self.expression()
else:
returnType = self.expression()
@ -891,7 +907,7 @@ proc funDecl(self: Parser, isAsync: bool = false, isGenerator: bool = false, isL
self.expect(LeftParen)
var parameter: tuple[name: IdentExpr, valueType: Expression, mutable: bool, isRef: bool, isPtr: bool]
self.parseDeclArguments(arguments, parameter, defaults)
if self.match(Colon):
if self.match(":"):
# Function's return type
if self.match([Function, Coroutine, Generator]):
var arguments: seq[tuple[name: IdentExpr, valueType: Expression, mutable: bool, isRef: bool, isPtr: bool]] = @[]
@ -902,7 +918,7 @@ proc funDecl(self: Parser, isAsync: bool = false, isGenerator: bool = false, isL
var parameter: tuple[name: IdentExpr, valueType: Expression, mutable: bool, isRef: bool, isPtr: bool]
if self.match(LeftParen):
self.parseDeclArguments(arguments, parameter, defaults)
if self.match(Colon):
if self.match(":"):
LambdaExpr(returnType).returnType = self.expression()
else:
returnType = self.expression()
@ -926,11 +942,9 @@ proc funDecl(self: Parser, isAsync: bool = false, isGenerator: bool = false, isL
result = self.currentFunction
if isOperator:
if arguments.len() == 0:
self.error("cannot declare argument-less operator")
elif arguments.len() > 2:
self.error("cannot declare operator with more than 2 arguments")
self.error("cannot declare operator without arguments")
elif FunDecl(result).returnType == nil:
self.error("operator cannot have void return type")
self.error("operators must have a return type")
for argument in arguments:
if argument.valueType == nil:
self.error(&"missing type declaration for '{argument.name.token.lexeme}' in function declaration")
@ -939,7 +953,7 @@ proc funDecl(self: Parser, isAsync: bool = false, isGenerator: bool = false, isL
proc expression(self: Parser): Expression =
## Parses expressions
result = self.assignment() # Highest-level expression
result = self.parseArrow() # Highest-level expression
proc expressionStatement(self: Parser): Statement =
@ -1025,14 +1039,10 @@ proc declaration(self: Parser): Declaration =
of Operator:
discard self.step()
result = self.funDecl(isOperator=true)
of Type, TokenType.Whitespace, TokenType.Tab:
discard self.step() # TODO
of Comment:
let tok = self.peek()
if tok.lexeme.startsWith("#pragma["):
discard # TODO: Pragmas
elif tok.lexeme.startsWith("##"):
discard # TODO: Docstrings
of Type, TokenType.Whitespace, TokenType.Tab, Comment:
# TODO: Comments, pragmas, docstrings
discard self.step() # TODO
return newNilExpr(Token(lexeme: "nil"))
else:
result = Declaration(self.statement())
@ -1042,22 +1052,22 @@ proc parse*(self: Parser, tokens: seq[Token], file: string): seq[ASTNode] =
self.tokens = tokens
self.file = file
self.current = 0
self.currentLoop = None
self.currentLoop = LoopContext.None
self.currentFunction = nil
self.scopeDepth = 0
self.operators = @[]
self.operators = newOperatorTable()
for i, token in self.tokens:
# We do a first pass over the tokens
# to find user-defined operators.
# Note that this relies on the lexer
# ending the input with an EOF token
# to find operators. Note that this
# relies on the lexer ending the input
# with an EOF token
if token.kind == Operator:
if i == self.tokens.high():
self.error("invalid state: found malformed tokenizer input while looking for operators (missing EOF)")
self.operators.add(self.tokens[i + 1].lexeme)
self.operators.addOperator(self.tokens[i + 1].lexeme)
if i == self.tokens.high() and token.kind != EndOfFile:
# Since we're iterating this list anyway might as
# well perform some extra checks
self.error("invalid state: found malformed tokenizer input while looking for operators (missing EOF)")
while not self.done():
result.add(self.declaration())
result.add(self.declaration())

View File

@ -2,8 +2,6 @@
import sequtils
import strformat
import strutils
import nimSHA2
import times
import jale/editor as ed
import jale/templates
import jale/plugin/defaults
@ -18,7 +16,6 @@ import frontend/parser as p
import frontend/compiler as c
import backend/vm as v
import util/serializer as s
import util/debugger
# Forward declarations
@ -28,10 +25,18 @@ proc getLineEditor: LineEditor
# Handy dandy compile-time constants
const debugLexer = false
const debugParser = false
const debugCompiler = true
const debugCompiler = false
const debugSerializer = false
when debugSerializer:
import nimSHA2
import times
when debugCompiler:
import util/debugger
when isMainModule:
setControlCHook(proc () {.noconv.} = quit(0))
var
@ -61,7 +66,7 @@ when isMainModule:
if input.len() == 0:
continue
# Currently the parser doesn't handle these tokens well
tokens = filter(tokenizer.lex(input, "<stdin>"), proc (x: Token): bool = x.kind notin {TokenType.Whitespace, Tab})
tokens = filter(tokenizer.lex(input, "stdin"), proc (x: Token): bool = x.kind notin {TokenType.Whitespace, Tab})
if tokens.len() == 0:
continue
when debugLexer:
@ -72,23 +77,25 @@ when isMainModule:
break
echo "\t", token
echo ""
tree = parser.parse(tokens, "<stdin>")
tree = parser.parse(tokens, "stdin")
if tree.len() == 0:
continue
when debugParser:
echo "Parsing step:"
for node in tree:
echo "\t", node
echo ""
compiled = compiler.compile(tree, "<stdin>")
compiled = compiler.compile(tree, "stdin")
when debugCompiler:
echo "Compilation step:"
stdout.write("\t")
echo &"""Raw byte stream: [{compiled.code.join(", ")}]"""
echo "\nBytecode disassembler output below:\n"
disassembleChunk(compiled, "<stdin>")
disassembleChunk(compiled, "stdin")
echo ""
serializer.dumpToFile(compiled, input, "<stdin>", "stdin.pbc")
serializedRaw = serializer.dumpBytes(compiled, input, "<stdin>")
serializer.dumpToFile(compiled, input, "stdin", "stdin.pbc")
serializedRaw = serializer.dumpBytes(compiled, input, "stdin")
serialized = serializer.loadFile("stdin.pbc")
when debugSerializer:
echo "Serialization step: "
@ -116,29 +123,29 @@ when isMainModule:
vm.run(serialized.chunk)
except IOError:
break
# TODO: The code for error reporting completely
# breaks down with multiline input, fix it
# TODO: The code for error reporting completely
# breaks down with multiline input, fix it
except LexingError:
let lineNo = tokenizer.getLine()
let relPos = tokenizer.getRelPos(lineNo)
let line = tokenizer.getSource().splitLines()[lineNo - 1].strip()
echo getCurrentExceptionMsg()
echo &"Source line: {line}"
echo " ".repeat(relPos.start + len("Source line: ")) & "^".repeat(relPos.stop - relPos.start)
# echo &"Source line: {line}"
# echo " ".repeat(relPos.start + len("Source line: ")) & "^".repeat(relPos.stop - relPos.start)
except ParseError:
let lineNo = parser.getCurrentToken().line
let relPos = tokenizer.getRelPos(lineNo)
let line = tokenizer.getSource().splitLines()[lineNo - 1].strip()
echo getCurrentExceptionMsg()
echo &"Source line: {line}"
echo " ".repeat(relPos.start + len("Source line: ")) & "^".repeat(relPos.stop - parser.getCurrentToken().lexeme.len())
# echo &"Source line: {line}"
# echo " ".repeat(relPos.start + len("Source line: ")) & "^".repeat(relPos.stop - parser.getCurrentToken().lexeme.len())
except CompileError:
let lineNo = compiler.getCurrentNode().token.line
let relPos = tokenizer.getRelPos(lineNo)
let line = tokenizer.getSource().splitLines()[lineNo - 1].strip()
echo getCurrentExceptionMsg()
echo &"Source line: {line}"
echo " ".repeat(relPos.start + len("Source line: ")) & "^".repeat(relPos.stop - compiler.getCurrentNode().token.lexeme.len())
# echo &"Source line: {line}"
# echo " ".repeat(relPos.start + len("Source line: ")) & "^".repeat(relPos.stop - compiler.getCurrentNode().token.lexeme.len())
except SerializationError:
echo getCurrentExceptionMsg()
quit(0)
@ -151,10 +158,6 @@ proc fillSymbolTable(tokenizer: Lexer) =
## and keywords
# 1-byte symbols
tokenizer.symbols.addSymbol("+", Plus)
tokenizer.symbols.addSymbol("-", Minus)
tokenizer.symbols.addSymbol("*", Star)
tokenizer.symbols.addSymbol("/", Slash)
tokenizer.symbols.addSymbol("{", LeftBrace)
tokenizer.symbols.addSymbol("}", RightBrace)
tokenizer.symbols.addSymbol("(", LeftParen)
@ -163,45 +166,14 @@ proc fillSymbolTable(tokenizer: Lexer) =
tokenizer.symbols.addSymbol("]", RightBracket)
tokenizer.symbols.addSymbol(".", Dot)
tokenizer.symbols.addSymbol(",", Comma)
tokenizer.symbols.addSymbol(">", TokenType.GreaterThan)
tokenizer.symbols.addSymbol("<", TokenType.LessThan)
tokenizer.symbols.addSymbol(";", Semicolon)
tokenizer.symbols.addSymbol("=", Equal)
tokenizer.symbols.addSymbol("~", Tilde)
tokenizer.symbols.addSymbol("%", Percentage)
tokenizer.symbols.addSymbol(":", Colon)
tokenizer.symbols.addSymbol("&", Ampersand)
tokenizer.symbols.addSymbol("^", Caret)
tokenizer.symbols.addSymbol("|", Pipe)
# 2-byte symbols
tokenizer.symbols.addSymbol("+=", InplaceAdd)
tokenizer.symbols.addSymbol("-=", InplaceSub)
tokenizer.symbols.addSymbol(">=", TokenType.GreaterOrEqual)
tokenizer.symbols.addSymbol("<=", TokenType.LessOrEqual)
tokenizer.symbols.addSymbol("*=", InplaceMul)
tokenizer.symbols.addSymbol("/=", InplaceDiv)
tokenizer.symbols.addSymbol("&=", InplaceAnd)
tokenizer.symbols.addSymbol("!=", NotEqual)
tokenizer.symbols.addSymbol("|=", InplaceOr)
tokenizer.symbols.addSymbol("^=", InplaceXor)
tokenizer.symbols.addSymbol("%=", InplaceMod)
tokenizer.symbols.addSymbol("//", FloorDiv)
tokenizer.symbols.addSymbol("==", DoubleEqual)
tokenizer.symbols.addSymbol("**", DoubleStar)
tokenizer.symbols.addSymbol(">>", RightShift)
tokenizer.symbols.addSymbol("<<", LeftShift)
# 3-byte symbols
tokenizer.symbols.addSymbol("//=", InplaceFloorDiv)
tokenizer.symbols.addSymbol("**=", InplacePow)
tokenizer.symbols.addSymbol(">>=", InplaceRightShift)
tokenizer.symbols.addSymbol("<<=", InplaceLeftShift)
# Keywords
tokenizer.symbols.addKeyword("type", Type)
tokenizer.symbols.addKeyword("type", TokenType.Type)
tokenizer.symbols.addKeyword("enum", Enum)
tokenizer.symbols.addKeyword("case", Case)
tokenizer.symbols.addKeyword("operator", Operator)
tokenizer.symbols.addKeyword("generator", Generator)
tokenizer.symbols.addKeyword("function", Function)
tokenizer.symbols.addKeyword("function", TokenType.Function)
tokenizer.symbols.addKeyword("coroutine", Coroutine)
tokenizer.symbols.addKeyword("break", TokenType.Break)
tokenizer.symbols.addKeyword("continue", Continue)
@ -231,26 +203,12 @@ proc fillSymbolTable(tokenizer: Lexer) =
tokenizer.symbols.addKeyword("nan", NotANumber)
tokenizer.symbols.addKeyword("inf", Infinity)
tokenizer.symbols.addKeyword("nil", TokenType.Nil)
tokenizer.symbols.addKeyword("true", TokenType.True)
tokenizer.symbols.addKeyword("false", TokenType.False)
# These are technically operators, but since
# they fit neatly into the definition for an
# identifier/keyword we parse them as such
# and specialize them later
tokenizer.symbols.addKeyword("isnot", IsNot)
tokenizer.symbols.addKeyword("is", Is)
tokenizer.symbols.addKeyword("is", As)
tokenizer.symbols.addKeyword("of", Of)
tokenizer.symbols.addKeyword("and", TokenType.LogicalAnd)
tokenizer.symbols.addKeyword("or", TokenType.LogicalOr)
tokenizer.symbols.addKeyword("not", TokenType.LogicalNot)
tokenizer.symbols.addKeyword("true", True)
tokenizer.symbols.addKeyword("false", False)
tokenizer.symbols.addKeyword("ref", Ref)
tokenizer.symbols.addKeyword("ptr", Ptr)
# P.S.: There's no reason for the order of addition of
# symbols to be ascending in length (the symbol table uses
# a hashmap internally). You can add/remove symbols (and
# keywords for that matter) as you like!
for sym in [">", "<", "=", "~", "/", "+", "-", "_", "*", "?", "@", ":"]:
tokenizer.symbols.addSymbol(sym, Symbol)
proc getLineEditor: LineEditor =

View File

@ -17,6 +17,7 @@ import ../frontend/meta/bytecode
import ../frontend/meta/token
import ../config
import multibyte
import ../frontend/compiler
import strformat
import strutils
@ -203,7 +204,9 @@ proc readConstants(self: Serializer, stream: seq[byte]): int =
stream = stream[1..^1]
let size = self.bytesToInt([stream[0], stream[1], stream[2]])
stream = stream[3..^1]
discard self.chunk.addConstant(newIdentExpr(Token(lexeme: self.bytesToString(stream[0..<size]))))
self.chunk.consts.add(newIdentExpr(Token(lexeme: self.bytesToString(stream[0..<size]))))
# TODO
# discard self.chunk.addConstant(newIdentExpr(Token(lexeme: self.bytesToString(stream[0..<size]))))
stream = stream[size..^1]
inc(count, size + 4)
else: