Turned off optimizer (breaks compiler) further work on closures and scope resolution

This commit is contained in:
Mattia Giambirtone 2022-04-26 09:29:59 +02:00
parent d38ad5aab4
commit 4839c9a341
4 changed files with 99 additions and 90 deletions

View File

@ -44,6 +44,7 @@ type
depth: int
isPrivate: bool
isConst: bool
valueType: IdentExpr
Loop = object
## A "loop object" used
@ -124,7 +125,7 @@ proc identifier(self: Compiler, node: IdentExpr)
proc varDecl(self: Compiler, node: VarDecl)
## End of forward declarations
## Public getters for nicer error formatting
## Public getter for nicer error formatting
proc getCurrentNode*(self: Compiler): ASTNode = (if self.current >=
self.ast.len(): self.ast[^1] else: self.ast[self.current - 1])
@ -173,7 +174,7 @@ proc emitByte(self: Compiler, byt: OpCode|uint8) =
proc emitBytes(self: Compiler, byt1: OpCode|uint8, byt2: OpCode|uint8) =
## Emits multiple bytes instead of a single one, this is useful
## Emits multiple bytes instead of a single one. This is useful
## to emit operators along with their operands or for multi-byte
## instructions that are longer than one byte
self.emitByte(uint8 byt1)
@ -208,16 +209,6 @@ proc emitConstant(self: Compiler, obj: ASTNode) =
self.emitBytes(self.makeConstant(obj))
proc identifierConstant(self: Compiler, identifier: IdentExpr): array[3, uint8] =
## Emits an identifier name as a string in the current chunk's constant
## table. This is used to load globals declared as dynamic that cannot
## be resolved statically by the compiler
try:
result = self.makeConstant(identifier)
except CompileError:
self.error(getCurrentExceptionMsg())
proc emitJump(self: Compiler, opcode: OpCode): int =
## Emits a dummy jump offset to be patched later. Assumes
## the largest offset (emits 4 bytes, one for the given jump
@ -304,7 +295,7 @@ proc literal(self: Compiler, node: ASTNode) =
var x: int
var y = IntExpr(node)
try:
assert parseInt(y.literal.lexeme, x) == len(y.literal.lexeme)
discard parseInt(y.literal.lexeme, x)
except ValueError:
self.error("integer value out of range")
self.emitConstant(y)
@ -317,7 +308,7 @@ proc literal(self: Compiler, node: ASTNode) =
var x: int
var y = HexExpr(node)
try:
assert parseHex(y.literal.lexeme, x) == len(y.literal.lexeme)
discard parseHex(y.literal.lexeme, x)
except ValueError:
self.error("integer value out of range")
self.emitConstant(newIntExpr(Token(lexeme: $x, line: y.token.line,
@ -327,7 +318,7 @@ proc literal(self: Compiler, node: ASTNode) =
var x: int
var y = BinExpr(node)
try:
assert parseBin(y.literal.lexeme, x) == len(y.literal.lexeme)
discard parseBin(y.literal.lexeme, x)
except ValueError:
self.error("integer value out of range")
self.emitConstant(newIntExpr(Token(lexeme: $x, line: y.token.line,
@ -337,7 +328,7 @@ proc literal(self: Compiler, node: ASTNode) =
var x: int
var y = OctExpr(node)
try:
assert parseOct(y.literal.lexeme, x) == len(y.literal.lexeme)
discard parseOct(y.literal.lexeme, x)
except ValueError:
self.error("integer value out of range")
self.emitConstant(newIntExpr(Token(lexeme: $x, line: y.token.line,
@ -347,7 +338,7 @@ proc literal(self: Compiler, node: ASTNode) =
var x: float
var y = FloatExpr(node)
try:
assert parseFloat(y.literal.lexeme, x) == len(y.literal.lexeme)
discard parseFloat(y.literal.lexeme, x)
except ValueError:
self.error("floating point value out of range")
self.emitConstant(y)
@ -400,7 +391,7 @@ proc unary(self: Compiler, node: UnaryExpr) =
of Minus:
self.emitByte(UnaryNegate)
of Plus:
discard # Unary + does nothing, but we allow it for consistency
self.emitByte(UnaryPlus)
of TokenType.LogicalNot:
self.emitByte(OpCode.LogicalNot)
of Tilde:
@ -479,28 +470,28 @@ proc binary(self: Compiler, node: BinaryExpr) =
proc declareName(self: Compiler, node: ASTNode) =
proc declareName(self: Compiler, node: ASTNode, kind: IdentExpr) =
## Compiles all name declarations
case node.kind:
of NodeKind.varDecl:
var node = VarDecl(node)
# Statically resolved variable here. Creates a new Name entry
# so that self.identifier emits the proper stack offset
# Creates a new Name entry so that self.identifier emits the proper stack offset
if self.names.high() > 16777215:
# If someone ever hits this limit in real-world scenarios, I swear I'll
# slap myself 100 times with a sign saying "I'm dumb". Mark my words
self.error("cannot declare more than 16777215 static variables at a time")
self.error("cannot declare more than 16777215 variables at a time")
self.names.add(Name(depth: self.scopeDepth, name: IdentExpr(node.name),
isPrivate: node.isPrivate,
owner: "",
isConst: node.isConst))
isPrivate: node.isPrivate,
owner: self.currentModule,
isConst: node.isConst,
valueType: kind))
self.emitByte(StoreVar)
self.emitBytes(self.names.high().toTriple())
of funDecl:
var node = FunDecl(node)
# Declares the function's name in the
# current (outer) scope...
self.declareName(node.name)
self.declareName(node.name, IdentExpr(node.returnType))
# ... but its arguments in an inner one!
self.scopeDepth += 1
# (this ugly part is needed because
@ -508,8 +499,8 @@ proc declareName(self: Compiler, node: ASTNode) =
# and decrements the scope depth)
for argument in node.arguments:
if self.names.high() > 16777215:
self.error("cannot declare more than 16777215 static variables at a time")
self.names.add(Name(depth: self.scopeDepth + 1, isPrivate: true, owner: self.currentModule, isConst: false, name: IdentExpr(argument.name)))
self.error("cannot declare more than 16777215 variables at a time")
self.names.add(Name(depth: self.scopeDepth + 1, isPrivate: true, owner: self.currentModule, isConst: false, name: IdentExpr(argument.name), valueType: kind))
self.emitByte(LoadVar)
self.emitBytes(self.names.high().toTriple())
self.scopeDepth -= 1
@ -524,30 +515,37 @@ proc resolveStatic(self: Compiler, name: IdentExpr,
## first name object with the given name. Returns
## nil when the name can't be found. This function
## has no concept of scope depth, because getStaticIndex
## does that job
## does that job. Note that private names declared in
## other modules will not be resolved!
for obj in reversed(self.names):
if obj.name.token.lexeme == name.token.lexeme:
if obj.isPrivate and obj.owner != self.currentModule:
return nil
return obj
return nil
proc getStaticIndex(self: Compiler, name: IdentExpr, depth: int = self.scopeDepth): int =
## Gets the predicted stack position of the given variable
## if it is static, returns -1 if it is to be bound dynamically
## or it does not exist at all and returns -2 if the variable
## is outside of the current local scope and is to be emitted as a closure.
proc getStaticIndex(self: Compiler, name: IdentExpr, depth: int = self.scopeDepth): tuple[closedOver: bool, pos: int] =
## Gets the predicted stack position of the given variable and
## returns a tuple (closedOver, pos) that tells the caller whether
## the variable is to be emitted as a closure as well as its predicted
## stack/closure array position. Returns (false, -1) if the variable's
## location can not be determined at compile time (this is an error!).
## Note that private names declared in other modules will not be resolved!
var i: int = self.names.high()
for variable in reversed(self.names):
if name.name.lexeme == variable.name.name.lexeme:
if variable.depth == depth:
return i
else:
# This tells self.identifier() that this is
# a closed-over variable
return -2
if variable.isPrivate and variable.owner != self.currentModule:
return (false, -1)
if variable.depth == depth or variable.depth == 0:
# variable.depth == 0 for globals!
return (false, i)
elif variable.depth > 0:
for j, closure in reversed(self.closedOver):
if closure.name.lexeme == name.name.lexeme:
return (true, j)
dec(i)
return -1
return (false, -1)
proc identifier(self: Compiler, node: IdentExpr) =
@ -561,16 +559,17 @@ proc identifier(self: Compiler, node: IdentExpr) =
# anyway?)
self.emitConstant(node)
else:
let index = self.getStaticIndex(node)
let t = self.getStaticIndex(node)
let index = t.pos
if index != -1:
if index >= 0:
if t.closedOver:
self.emitByte(LoadVar) # Static name resolution, loads value at index in the stack. Very fast. Much wow.
self.emitBytes(index.toTriple())
else:
if self.closedOver.len() == 0:
self.error("error: closure variable array is empty but LoadHeap would be emitted (this is an internal error and most likely a bug)")
if self.closedOver.len() >= 16777216:
self.error("too many consecutive closed-over variables (max is 16777215)")
self.error("too many consecutive closure-over variables (max is 16777216)")
self.emitByte(LoadHeap) # Heap-allocated closure variable. Stored in a separate "closure array" in the VM that does not have stack semantics
self.emitBytes(self.closedOver.high().toTriple())
else:
@ -587,7 +586,8 @@ proc assignment(self: Compiler, node: ASTNode) =
if r != nil and r.isConst:
self.error("cannot assign to constant")
self.expression(node.value)
let index = self.getStaticIndex(name)
let t = self.getStaticIndex(name)
let index = t.pos
case node.token.kind:
of InplaceAdd:
self.emitByte(BinaryAdd)
@ -623,7 +623,10 @@ proc assignment(self: Compiler, node: ASTNode) =
# but that requires variants for stack,
# heap, and closure variables and I cba
if index != -1:
self.emitByte(StoreVar)
if not t.closedOver:
self.emitByte(StoreVar)
else:
self.emitByte(StoreHeap)
self.emitBytes(index.toTriple())
else:
self.error(&"reference to undeclared name '{node.token.lexeme}'")
@ -746,11 +749,11 @@ proc inferValueType(self: Compiler, node: ASTNode): ASTNode =
# valueType here, we just need to return
# a non-nil value so we don't error out
return DictExpr(node).keyType
of intExpr:
var node = IntExpr(node)
of intExpr, floatExpr, binExpr, octExpr, hexExpr:
var node = LiteralExpr(node)
var size = node.token.lexeme.split("'")
if len(size) notin 1..2:
self.error("invalid state: inferValueType -> invalid size specifier for int")
self.error("invalid state: inferValueType -> invalid size specifier")
elif size.len() == 1:
return newIdentExpr(Token(lexeme: "int"))
elif size[1] in ["u64", "i64", "u32", "i32", "f64", "f32", "i32", "u32", "u8", "i8"]:
@ -777,20 +780,18 @@ proc inferExprType(self: Compiler, node: ASTNode): ASTNode =
case node.kind:
of identExpr:
var node = IdentExpr(node)
if self.getStaticIndex(IdentExpr(node)) == -1:
self.error(&"reference to undeclared name '{node.token.lexeme}'")
# TODO: Find type of identifier
var name = self.resolveStatic(node)
if name == nil:
return nil
return name.valueType
of unaryExpr:
return self.inferValueType(UnaryExpr(node).a)
of binaryExpr:
var node = BinaryExpr(node)
var a = self.inferValueType(node.a)
var b = self.inferValueType(node.b)
# This is obviously not correct, but
# this function is only useful as a
# first type checking step anyway
if a == nil:
return b
var a = self.inferExprType(node.a)
var b = self.inferExprType(node.b)
if a == nil or b == nil:
return nil
return a
of {intExpr, hexExpr, binExpr, octExpr,
strExpr, falseExpr, trueExpr, infExpr,
@ -845,8 +846,8 @@ proc expression(self: Compiler, node: ASTNode) =
# Binary expressions such as 2 ^ 5 and 0.66 * 3.14
self.binary(BinaryExpr(node))
of intExpr, hexExpr, binExpr, octExpr, strExpr, falseExpr, trueExpr,
infExpr, nanExpr, floatExpr, nilExpr,
tupleExpr, setExpr, listExpr, dictExpr:
infExpr, nanExpr, floatExpr, nilExpr, tupleExpr, setExpr, listExpr,
dictExpr:
# Since all of these AST nodes mostly share
# the same overall structure, and the kind
# discriminant is enough to tell one
@ -865,7 +866,7 @@ proc awaitStmt(self: Compiler, node: AwaitStmt) =
## same as an await expression followed by a semicolon.
## Await expressions are the only native construct to
## run coroutines from within an already asynchronous
## loop (which should be orchestrated by an event loop).
## context (which should be orchestrated by an event loop).
## They block in the caller until the callee returns
self.expression(node.expression)
self.emitByte(OpCode.Await)
@ -873,8 +874,8 @@ proc awaitStmt(self: Compiler, node: AwaitStmt) =
proc deferStmt(self: Compiler, node: DeferStmt) =
## Compiles defer statements. A defer statement
## is executed right before the function exits
## (either because of a return or an exception)
## is executed right before its containing function
## exits (either because of a return or an exception)
let current = self.chunk.code.len
self.expression(node.expression)
for i in countup(current, self.chunk.code.high()):
@ -885,6 +886,11 @@ proc deferStmt(self: Compiler, node: DeferStmt) =
proc returnStmt(self: Compiler, node: ReturnStmt) =
## Compiles return statements. An empty return
## implicitly returns nil
let returnType = self.inferExprType(node.value)
if returnType == nil:
self.error("expression has no type")
elif returnType.token.lexeme != self.currentFunction.returnType.token.lexeme:
self.error(&"expected value of type '{self.currentFunction.returnType.token.lexeme}', got '{returnType.token.lexeme}'")
self.expression(node.value)
self.emitByte(OpCode.Return)
@ -994,7 +1000,7 @@ proc varDecl(self: Compiler, node: VarDecl) =
if self.inferDeclType(node) == nil:
self.error(&"Cannot determine the type of '{node.name.token.lexeme}'")
self.expression(node.value)
self.declareName(node)
self.declareName(node, IdentExpr(node.valueType))
proc funDecl(self: Compiler, node: FunDecl) =
@ -1007,7 +1013,7 @@ proc funDecl(self: Compiler, node: FunDecl) =
# A function's code is just compiled linearly
# and then jumped over
let jmp = self.emitJump(JumpForwards)
self.declareName(node)
self.declareName(node, IdentExpr(node.returnType))
# Since the deferred array is a linear
# sequence of instructions and we want
@ -1065,7 +1071,7 @@ proc compile*(self: Compiler, ast: seq[ASTNode], file: string): Chunk =
self.names = @[]
self.scopeDepth = 0
self.currentFunction = nil
self.currentModule = "<main>"
self.currentModule = self.file
self.current = 0
while not self.done():
self.declaration(self.step())

View File

@ -63,10 +63,11 @@ type
LoadConstant = 0u8, # Pushes constant at position x in the constant table onto the stack
## Binary operators
UnaryNegate, # Pushes the result of -x onto the stack
UnaryPlus, # Pushes the result of +x onto the stack
BinaryAdd, # Pushes the result of a + b onto the stack
BinarySubtract, # Pushes the result of a - b onto the stack
BinaryDivide, # Pushes the result of a / b onto the stack (true division). The result is a float
BinaryFloorDiv, # Pushes the result of a // b onto the stack (integer division). The result is always an integer
BinaryDivide, # Pushes the result of a / b onto the stack (true division)
BinaryFloorDiv, # Pushes the result of a // b onto the stack (integer division)
BinaryMultiply, # Pushes the result of a * b onto the stack
BinaryPow, # Pushes the result of a ** b (a to the power of b) onto the stack
BinaryMod, # Pushes the result of a % b onto the stack (modulo division)
@ -90,9 +91,9 @@ type
GreaterOrEqual, # Pushes the result of a >= b onto the stack
LessOrEqual, # Pushes the result of a <= b onto the stack
## Logical operators
LogicalNot, # Pushes true if
LogicalAnd,
LogicalOr,
LogicalNot, # Pushes true onto the stack if x is falsey
LogicalAnd, # Pushes true onto the stack if a and b are truthy and false otherwise
LogicalOr, # Pushes true onto the stack if either a or b are truthy and false otherwise
## Constant opcodes (each of them pushes a singleton on the stack)
Nil,
True,

View File

@ -69,7 +69,7 @@ proc optimizeConstant(self: Optimizer, node: ASTNode): ASTNode =
var x: int
var y = IntExpr(node)
try:
assert parseInt(y.literal.lexeme, x) == len(y.literal.lexeme)
discard parseInt(y.literal.lexeme, x)
except ValueError:
self.newWarning(valueOverflow, node)
result = node
@ -77,7 +77,7 @@ proc optimizeConstant(self: Optimizer, node: ASTNode): ASTNode =
var x: int
var y = HexExpr(node)
try:
assert parseHex(y.literal.lexeme, x) == len(y.literal.lexeme)
discard parseHex(y.literal.lexeme, x)
except ValueError:
self.newWarning(valueOverflow, node)
return node
@ -86,7 +86,7 @@ proc optimizeConstant(self: Optimizer, node: ASTNode): ASTNode =
var x: int
var y = BinExpr(node)
try:
assert parseBin(y.literal.lexeme, x) == len(y.literal.lexeme)
discard parseBin(y.literal.lexeme, x)
except ValueError:
self.newWarning(valueOverflow, node)
return node
@ -95,7 +95,7 @@ proc optimizeConstant(self: Optimizer, node: ASTNode): ASTNode =
var x: int
var y = OctExpr(node)
try:
assert parseOct(y.literal.lexeme, x) == len(y.literal.lexeme)
discard parseOct(y.literal.lexeme, x)
except ValueError:
self.newWarning(valueOverflow, node)
return node
@ -122,7 +122,7 @@ proc optimizeUnary(self: Optimizer, node: UnaryExpr): ASTNode =
case a.kind:
of intExpr:
var x: int
assert parseInt(IntExpr(a).literal.lexeme, x) == len(IntExpr(a).literal.lexeme)
discard parseInt(IntExpr(a).literal.lexeme, x)
case node.operator.kind:
of Tilde:
x = not x
@ -169,8 +169,8 @@ proc optimizeBinary(self: Optimizer, node: BinaryExpr): ASTNode =
if a.kind == intExpr and b.kind == intExpr:
# Optimizes integer operations
var x, y, z: int
assert parseInt(IntExpr(a).literal.lexeme, x) == IntExpr(a).literal.lexeme.len()
assert parseInt(IntExpr(b).literal.lexeme, y) == IntExpr(b).literal.lexeme.len()
discard parseInt(IntExpr(a).literal.lexeme, x)
discard parseInt(IntExpr(b).literal.lexeme, y)
try:
case node.operator.kind:
of Plus:
@ -213,14 +213,14 @@ proc optimizeBinary(self: Optimizer, node: BinaryExpr): ASTNode =
var x, y, z: float
if a.kind == intExpr:
var temp: int
assert parseInt(IntExpr(a).literal.lexeme, temp) == IntExpr(a).literal.lexeme.len()
discard parseInt(IntExpr(a).literal.lexeme, temp) == IntExpr(a).literal.lexeme.len()
x = float(temp)
self.newWarning(implicitConversion, a)
else:
discard parseFloat(FloatExpr(a).literal.lexeme, x)
if b.kind == intExpr:
var temp: int
assert parseInt(IntExpr(b).literal.lexeme, temp) == IntExpr(b).literal.lexeme.len()
discard parseInt(IntExpr(b).literal.lexeme, temp) == IntExpr(b).literal.lexeme.len()
y = float(temp)
self.newWarning(implicitConversion, b)
else:
@ -258,7 +258,7 @@ proc optimizeBinary(self: Optimizer, node: BinaryExpr): ASTNode =
var a = StrExpr(a)
var b = IntExpr(b)
var bb: int
assert parseInt(b.literal.lexeme, bb) == b.literal.lexeme.len()
discard parseInt(b.literal.lexeme, bb)
case node.operator.kind:
of Star:
result = newStrExpr(Token(kind: String, lexeme: "'" & a.literal.lexeme[1..<(^1)].repeat(bb) & "'"))
@ -268,7 +268,7 @@ proc optimizeBinary(self: Optimizer, node: BinaryExpr): ASTNode =
var b = StrExpr(b)
var a = IntExpr(a)
var aa: int
assert parseInt(a.literal.lexeme, aa) == a.literal.lexeme.len()
discard parseInt(a.literal.lexeme, aa)
case node.operator.kind:
of Star:
result = newStrExpr(Token(kind: String, lexeme: "'" & b.literal.lexeme[1..<(^1)].repeat(aa) & "'"))

View File

@ -26,10 +26,10 @@ proc fillSymbolTable(tokenizer: Lexer)
proc getLineEditor: LineEditor
# Handy dandy compile-time constants
const debugLexer = true
const debugParser = true
const debugLexer = false
const debugParser = false
const debugCompiler = true
const debugOptimizer = true
const debugOptimizer = false
const debugSerializer = true
@ -45,7 +45,7 @@ when isMainModule:
serializedRaw: seq[byte]
tokenizer = newLexer()
parser = newParser()
optimizer = newOptimizer()
# optimizer = newOptimizer()
compiler = newCompiler()
serializer = newSerializer()
editor = getLineEditor()
@ -78,7 +78,9 @@ when isMainModule:
for node in tree:
echo "\t", node
echo ""
optimized = optimizer.optimize(tree)
# The optimizer needs work to function properly
# with the compiler
# optimized = optimizer.optimize(tree)
when debugOptimizer:
echo &"Optimization step (constant folding enabled: {optimizer.foldConstants}):"
for node in optimized.tree:
@ -92,7 +94,7 @@ when isMainModule:
else:
stdout.write("No warnings produced\n")
echo ""
compiled = compiler.compile(optimized.tree, "<stdin>")
compiled = compiler.compile(tree, "<stdin>")
when debugCompiler:
echo "Compilation step:"
stdout.write("\t")