Fix compilation issues, add initial parsing test suite and initial work on bytecode backend

This commit is contained in:
Mattia Giambirtone 2024-07-08 12:37:31 +02:00
parent 4fc078d4d8
commit c4d51f91d8
Signed by: nocturn9x
GPG Key ID: 37B83AB6C3BE6514
8 changed files with 270 additions and 54 deletions

2
.gitignore vendored
View File

@ -144,6 +144,8 @@ dmypy.json
cython_debug/
tests/test.pn
*.pbc
# Binary stuff
bin/

View File

@ -1,2 +1,2 @@
--hints:off --deepCopy:on --experimental:strictFuncs --exceptions:setjmp
--deepCopy:on --exceptions:setjmp --hints:off
path="src"

View File

@ -11,7 +11,7 @@ binDir = "bin"
# Dependencies
requires "nim >= 2.1.1"
requires "nim >= 2.0.4"
# requires "jale >= 0.1.1"
before build:

View File

@ -8,41 +8,51 @@ import std/strutils
import std/parseutils
import std/tables
import std/strformat
import std/algorithm
type
DeclWrapper = ref object of RootObj
## A wrapper around names that lets us
## carry bytecode-specific name information
## along with the (typed) name
decl: TypedDecl
stackPos: int # Absolute position in the call stack where this name will be at runtime
FunctionWrapper = ref object
## A wrapper around a typed function
## declaration. This is necessary to
## carry bytecode-specific information
## regarding this function along with
## the typed declaration itself
decl: TypedFunDecl
FunctionWrapper = ref object of DeclWrapper
# The location where the function's code
# begins and ends
location: tuple[start, stop: int]
# List of jumps to be patched within the
# function's code
retJumps: seq[int]
BytecodeGenerator* = ref object
## A bytecode generator
# Current scope depth
scopeDepth: int
# The piece of code we compile into
chunk: Chunk
# Contains all declarations we encountered
names: seq[DeclWrapper]
# The current size of the call
# stack (which is always known
# statically)
stackSize: int
# Stores the position of all jumps
jumps: seq[tuple[patched: bool, offset: int]]
# Metadata regarding function locations (used to construct
# the debugging fields in the resulting bytecode)
functions: seq[tuple[start, stop, pos: int, fn: Name]]
# Used for error reporting
currentFile: string
currentNode: TypedNode
# The typechecker used to validate the peon code we're generating
# bytecode for
typeChecker: TypeChecker
# The current function we're generating code for
currentFunction: FunctionWrapper
# Are we generating code for the main module?
isMainModule: bool
proc newBytecodeGenerator*: BytecodeGenerator =
@ -52,6 +62,7 @@ proc newBytecodeGenerator*: BytecodeGenerator =
proc generateExpression(self: BytecodeGenerator, expression: TypedExpr)
proc dispatch(self: BytecodeGenerator, typedNode: TypedNode)
proc error(self: BytecodeGenerator, msg: string, typedNode: TypedNode = nil) =
@ -207,11 +218,11 @@ proc patchJump(self: BytecodeGenerator, offset: int) =
self.jumps[offset].patched = true
proc handleBuiltinFunction(self: BytecodeGenerator, fn: FunctionWrapper, args: seq[TypedExpr], line: int) =
proc handleBuiltinFunction(self: BytecodeGenerator, fn: Name, args: seq[TypedExpr], line: int) =
## Emits instructions for builtin functions
## such as addition or subtraction
var builtinOp: string
for pragma in FunDecl(fn.decl.node).pragmas:
for pragma in FunDecl(fn.node).pragmas:
if pragma.name.token.lexeme == "magic":
builtinOp = pragma.args[0].token.lexeme
if builtinOp notin ["LogicalOr", "LogicalAnd"]:
@ -315,14 +326,11 @@ proc handleBuiltinFunction(self: BytecodeGenerator, fn: FunctionWrapper, args: s
self.emitByte(PrintInf, line)
of Function:
self.emitByte(LoadString, line)
var loc: string = fn.location.start.toHex()
while loc[0] == '0' and loc.len() > 1:
loc = loc[1..^1]
var str: string
if typ.isLambda:
str = &"anonymous function at 0x{loc}"
str = &"anonymous function at "
else:
str = &"function '{FunDecl(fn.decl.node).name.token.lexeme}' at 0x{loc}"
str = &"function '{FunDecl(fn.node).name.token.lexeme}'"
self.emitBytes(str.len().toTriple(), line)
self.emitBytes(self.chunk.writeConstant(str.toBytes()), line)
self.emitByte(PrintString, line)
@ -370,6 +378,61 @@ proc patchReturnAddress(self: BytecodeGenerator, pos: int) =
self.chunk.consts[pos + 7] = address[7]
proc beginScope(self: BytecodeGenerator) =
## Begins a new lexical scope
inc(self.scopeDepth)
proc endScope(self: BytecodeGenerator) =
## Ends the current local scope
if self.scopeDepth < 0:
self.error("cannot call endScope with depth < 0 (This is an internal error and most likely a bug)")
dec(self.scopeDepth)
# We keep track both of which names are going out of scope
# and how many actually need to be popped off the call stack
# at runtime (since only variables and function arguments
# actually materialize at runtime)
var names: seq[DeclWrapper] = @[]
var popCount = 0
for wrapper in self.names:
# We only pop names in scopes deeper than ours
if wrapper.decl.name.depth > self.scopeDepth:
if wrapper.decl.name.depth == 0 and not self.isMainModule:
# Global names coming from other modules only go out of scope
# when the global scope of the main module is closed (i.e. at
# the end of the whole program)
continue
names.add(wrapper)
# Now we have to actually emit the pop instructions. First
# off, we skip the names that will not exist at runtime,
# because there's no need to emit any instructions to pop them
if wrapper.decl.name.kind notin [NameKind.Var, ]:
continue
inc(popCount)
dec(self.stackSize, popCount)
if popCount > 1:
# If we're popping more than one variable,
# we emit a bunch of PopN instructions until
# the pop count is greater than zero
while popCount > 0:
self.emitByte(PopN, self.currentNode.node.token.line)
self.emitBytes(popCount.toDouble(), self.currentNode.node.token.line)
popCount -= popCount.toDouble().fromDouble().int
elif popCount == 1:
# We only emit PopN if we're popping more than one value
self.emitByte(PopC, self.currentNode.node.token.line)
# This seems *really* slow, but
# what else should I do? Nim doesn't
# allow the removal of items during
# seq iteration so ¯\_(ツ)_/¯
var idx = 0
while idx < self.names.len():
for name in names:
if self.names[idx] == name:
self.names.delete(idx)
inc(idx)
proc generateLiteral(self: BytecodeGenerator, literal: TypedExpr) =
## Emits code for literals
let
@ -391,6 +454,38 @@ proc generateLiteral(self: BytecodeGenerator, literal: TypedExpr) =
self.error(&"unknown typed node of type {node.kind} at generateLiteral()")
proc generateCall(self: BytecodeGenerator, call: TypedCallExpr) =
## Small wrapper that abstracts emitting a call instruction
## for a given function
if call.callee.valueType.isBuiltin:
self.handleBuiltinFunction(call.callee, call.args, call.node.token.line)
return
case call.callee.kind:
of NameKind.Var:
self.error("not implemented")
of NameKind.Function:
self.error("not implemented")
# TODO
var fn = FunctionWrapper()
self.emitByte(LoadUInt64, call.node.token.line)
self.emitBytes(self.chunk.writeConstant(fn.location.stop.toLong()), call.node.token.line)
else:
discard # Unreachable
#if fn.valueType.forwarded:
# self.forwarded.add((fn, self.chunk.consts.high() - 7))
self.emitByte(LoadUInt64, call.node.token.line)
self.emitBytes(self.chunk.writeConstant(0.toLong()), call.node.token.line)
let pos = self.chunk.consts.len() - 8
for arg in reversed(call.args):
self.generateExpression(arg)
# Creates a new call frame and jumps
# to the function's first instruction
# in the code
self.emitByte(Call, call.node.token.line)
self.emitBytes(call.args.len().toTriple(), call.node.token.line)
self.patchReturnAddress(pos)
proc generateUnary(self: BytecodeGenerator, expression: TypedExpr) =
## Emits code for unary expressions
discard # TODO
@ -398,7 +493,7 @@ proc generateUnary(self: BytecodeGenerator, expression: TypedExpr) =
proc generateBinary(self: BytecodeGenerator, expression: TypedExpr) =
## Emits code for binary expressions
discard # TODO
discard
proc generateExpression(self: BytecodeGenerator, expression: TypedExpr) =
@ -416,6 +511,64 @@ proc generateExpression(self: BytecodeGenerator, expression: TypedExpr) =
self.error(&"unknown typed node of type {node.kind} at generateExpression()")
proc generateTypeDecl(self: BytecodeGenerator, decl: TypedTypeDecl) =
# Currently there isn't much we do with type declarations: they
# have no representation in the bytecode (might wanna add some
# debugging fields though)
discard
proc generateFunDecl(self: BytecodeGenerator, decl: TypedFunDecl) =
## Emits code for function declarations
if decl.name.valueType.isBuiltin:
# Builtins are handled at call time
return
var
wrapper = FunctionWrapper(decl: decl)
jmp: int
node = FunDecl(decl.node)
self.names.add(wrapper)
let stackIdx = self.stackSize
inc(self.stackSize)
jmp = self.emitJump(JumpForwards, node.token.line)
wrapper.location.start = self.chunk.code.len()
# We let our debugger know about this function's boundaries
self.chunk.functions.add(self.chunk.code.len().toTriple())
var idx = self.chunk.functions.len()
self.chunk.functions.add(0.toTriple()) # Patched later
self.chunk.functions.add(uint8(node.parameters.len()))
if not node.name.isNil():
self.chunk.functions.add(decl.name.ident.token.lexeme.len().toDouble())
var s = decl.name.ident.token.lexeme
if s.len() >= uint16.high().int:
s = node.name.token.lexeme[0..uint16.high()]
self.chunk.functions.add(s.toBytes())
else:
self.chunk.functions.add(0.toDouble())
self.beginScope()
for piece in decl.body.body:
self.dispatch(piece)
let typ = TypedFunDecl(self.currentFunction.decl).name.valueType.returnType
for jump in self.currentFunction.retJumps:
self.patchJump(jump)
# Terminates the function's context
wrapper.location.stop = self.chunk.code.len()
let stop = wrapper.location.stop.toTriple()
self.emitByte(OpCode.Return, node.token.line)
if not typ.isNil():
self.emitByte(1, node.token.line)
else:
self.emitByte(0, node.token.line)
self.chunk.functions[idx] = stop[0]
self.chunk.functions[idx + 1] = stop[1]
self.chunk.functions[idx + 2] = stop[2]
# Well, we've compiled everything: time to patch
# the jump offset
self.patchJump(jmp)
self.stackSize = stackIdx
proc beginProgram(self: BytecodeGenerator): int =
## Emits boilerplate code to set up
## a peon program
@ -441,6 +594,24 @@ proc endProgram(self: BytecodeGenerator, pos: int) =
self.patchReturnAddress(pos)
proc dispatch(self: BytecodeGenerator, typedNode: TypedNode) =
self.currentNode = typedNode
let currentFile = self.currentFile
if self.currentNode.node.isDecl():
self.currentFile = TypedDecl(typedNode).name.module.ident.token.lexeme
case typedNode.node.kind:
of NodeKind.exprStmt:
self.generateExpression(TypedExprStmt(typedNode).expression)
self.emitByte(Pop, typedNode.node.token.line)
of NodeKind.typeDecl:
self.generateTypeDecl(TypedTypeDecl(typedNode))
of NodeKind.funDecl:
self.generateFunDecl(TypedFunDecl(typedNode))
else:
self.error(&"unknown typed node of type {typedNode.node.kind} at generate()")
self.currentFile = currentFile
proc generate*(self: BytecodeGenerator, compiled: seq[TypedNode], typeChecker: TypeChecker): Chunk =
## Turn the given compilation output
## into a bytecode chunk
@ -448,17 +619,8 @@ proc generate*(self: BytecodeGenerator, compiled: seq[TypedNode], typeChecker: T
self.typeChecker = typeChecker
let offset = self.beginProgram()
self.currentFile = typeChecker.getFile()
self.isMainModule = true
for typedNode in compiled:
self.currentNode = typedNode
let currentFile = self.currentFile
if self.currentNode.node.isDecl():
self.currentFile = TypedDecl(typedNode).name.module.ident.token.lexeme
case typedNode.node.kind:
of exprStmt:
self.generateExpression(TypedExprStmt(typedNode).expression)
self.emitByte(Pop, typedNode.node.token.line)
else:
self.error(&"unknown typed node of type {typedNode.node.kind} at generate()")
self.currentFile = currentFile
self.dispatch(typedNode)
self.endProgram(offset)
result = self.chunk

View File

@ -811,7 +811,6 @@ proc stringify*(self: TypeChecker, typ: TypedNode): string =
proc beginScope(self: TypeChecker) =
## Begins a new lexical scope
assert self.scopeDepth == self.names.high()
inc(self.scopeDepth)
self.names.add(newTable[string, seq[Name]]())
@ -822,7 +821,6 @@ proc endScope(self: TypeChecker) =
## one
discard self.names.pop()
dec(self.scopeDepth)
assert self.scopeDepth == self.names.high()
proc isTypevar(self: Type): bool =
@ -1342,14 +1340,9 @@ proc call(self: TypeChecker, node: CallExpr): TypedExpr =
of Structure:
# TODO
result = newTypedExpr(node, typ)
self.error("not implemented")
of Function:
var typedArgs: seq[tuple[name: string, kind: Type, default: TypedExpr]] = @[]
for arg in args:
if not arg.default.isNil():
typedArgs.add((arg.name, arg.kind, arg.default))
else:
typedArgs.add((arg.name, arg.kind, nil))
result = newTypedCallExpr(node, impl, typedArgs)
result = newTypedCallExpr(node, impl, argExpr)
else:
# TODO?
self.error("not implemented")

View File

@ -99,7 +99,7 @@ type
NameKind* {.pure.} = enum
## A name enumeration type
Default, Var, Module
Default, Var, Module, Function
Name* = ref object
## A generic name object
@ -169,7 +169,7 @@ type
TypedCallExpr* = ref object of TypedExpr
## A typed function call expression
callee*: Name
args*: TypeSignature
args*: seq[TypedExpr]
TypedDecl* = ref object of TypedNode
## A typed declaration node
@ -269,7 +269,7 @@ proc newTypedBinaryExpr*(node: UnaryExpr, kind: Type, a, b: TypedExpr): TypedBin
proc newTypedCallExpr*(node: CallExpr, callee: Name,
args: TypeSignature): TypedCallExpr =
args: seq[TypedExpr]): TypedCallExpr =
## Initializes a new typed function call expression
result = TypedCallExpr(node: node, callee: callee, args: args, kind: callee.valueType.returnType)

View File

@ -688,12 +688,12 @@ proc newTypeDecl*(name: IdentExpr, fields: TypeFields, isPrivate: bool, token: T
result.members = @[]
proc `$`*(self: Parameter): string
proc `$`*(self: TypeField): string
proc `$`*(self: TypeGeneric): string
func `$`*(self: Parameter): string
func `$`*(self: TypeField): string
func `$`*(self: TypeGeneric): string
proc `$`*(self: ASTNode): string =
func `$`*(self: ASTNode): string =
if self.isNil():
return "nil"
case self.kind:
@ -795,15 +795,15 @@ proc `$`*(self: ASTNode): string =
else:
discard
proc `$`*(self: Parameter): string = &"Parameter(name={self.ident}, type={self.valueType}, default={self.default})"
proc `$`*(self: TypeField): string = &"Field(name={self.ident}, type={self.valueType}, default={self.default}, private={self.isPrivate})"
proc `$`*(self: TypeGeneric): string = &"Parameter(name={self.ident}, constraint={self.constr})"
func `$`*(self: Parameter): string = &"Parameter(name={self.ident}, type={self.valueType}, default={self.default})"
func `$`*(self: TypeField): string = &"Field(name={self.ident}, type={self.valueType}, default={self.default}, private={self.isPrivate})"
func `$`*(self: TypeGeneric): string = &"Parameter(name={self.ident}, constraint={self.constr})"
proc `==`*(self, other: IdentExpr): bool {.inline.} = self.token == other.token
func `==`*(self, other: IdentExpr): bool {.inline.} = self.token == other.token
proc getRelativeBoundaries*(self: ASTNode): tuple[start, stop: int] =
func getRelativeBoundaries*(self: ASTNode): tuple[start, stop: int] =
## Recursively computes the position of a node relative
## to its containing line
case self.kind:
@ -841,7 +841,6 @@ proc getRelativeBoundaries*(self: ASTNode): tuple[start, stop: int] =
result = (getRelativeBoundaries(self.obj).start, getRelativeBoundaries(self.name).stop)
of NodeKind.pragmaExpr:
var self = Pragma(self)
let start = self.token.relPos.start
var stop = 0
if self.args.len() > 0:
stop = self.args[^1].token.relPos.stop + 1

60
tests/parse.nim Normal file
View File

@ -0,0 +1,60 @@
import util/testing
import util/fmterr
import frontend/parsing/parser
import std/strformat
when isMainModule:
var suite = newTestSuite()
suite.addTests(@[
testParseFails("missingExprSemicolon", "1", "missing semicolon at end of expression", 1, (0, 0)),
testParseFails("bareSemicolon", ";", "invalid syntax", 1, (0, 0)),
testParseFails("missingClosingParen", "(1", "unterminated parenthesized expression", 1, (1, 1)),
testParseFails("spuriousClosedParen", ")", "unmatched ')'", 1, (0, 0)),
testParseFails("spuriousClosedBracket", "]", "unmatched ']'", 1, (0, 0)),
testParseFails("spuriousClosedBrace", "}", "unmatched '}'", 1, (0, 0)),
testParseFails("positionalArgFollowsKeywordArg", "f(a, b=c, d);", "positional argument cannot follow keyword argument in call", 1, (10, 10)),
testParseFails("invalidAttributeGet", "test.1;", "expecting attribute name after '.'", 1, (5, 5)),
testParseFails("invalidBracketTarget", "1[];", "expecting identifier before '['", 1, (2, 2)),
testParseFails("invalidAssignTarget", "1=2;", "invalid assignment target", 1, (0, 0)),
testParseFails("missingAssertSemicolon", "assert false", "missing semicolon after 'assert'", 1, (7, 11)),
testParseFails("unterminatedBlock", "{1;", "expecting '}'", 1, (2, 2)),
testParseFails("missingBlockName", "block {}", "expecting block name after 'block'", 1, (6, 6)),
testParseFails("missingBlockBrace", "block foo }", "expecting '{' after block name", 1, (10, 10)),
testParseFails("unterminatedNamedBlock", "block foo {1;", "expecting '}'", 1, (12, 12)),
])
var tooManyArguments = testParseFails("tooManyArguments", "f(", "cannot pass more than 255 arguments in call", 1, (767, 767))
for i in 0..255:
# Iterate 256 times
tooManyArguments.source &= "a"
if i < 255:
tooManyArguments.source &= ", "
tooManyArguments.source &= ");"
suite.addTest(tooManyArguments)
echo "Running parser tests"
suite.run(verbose=true)
if suite.successful():
echo "OK: All parser tests were successful"
quit(0)
else:
echo "ERR: Not all tests were successful, details below:\n"
for test in suite.tests:
if test.status in [Failed, Crashed]:
echo &" - {test.name} -> {test.status}"
echo &" Details:"
echo &" - Outcome -> {test.outcome}"
echo &" - Expected state -> {test.expected} "
echo &" - Expected outcome -> {test.getExpectedOutcome()}"
if test.reason.len() > 0:
echo &"\n The test failed for the following reason -> {test.reason}\n"
else:
echo "\n No further information is available about this failure"
if not test.outcome.exc.isNil():
echo &"\n Formatted error message follows\n"
print(ParseError(test.outcome.exc))
echo "\n Formatted error message ends here\n"
quit(-1)