Initial tests for an import system

This commit is contained in:
Mattia Giambirtone 2022-08-14 18:37:06 +02:00
parent f50dd66741
commit c85fff8f67
7 changed files with 126 additions and 84 deletions

View File

@ -16,6 +16,7 @@ import meta/ast
import meta/errors import meta/errors
import ../config import ../config
import ../util/multibyte import ../util/multibyte
import ../util/symbols
import lexer as l import lexer as l
import parser as p import parser as p
@ -215,6 +216,9 @@ proc newCompiler*(enableOptimizations: bool = true, replMode: bool = false): Com
result.compilerProcs["pure"] = handlePurePragma result.compilerProcs["pure"] = handlePurePragma
proc compileModule(self: Compiler, filename: string)
## Public getter for nicer error formatting ## Public getter for nicer error formatting
proc getCurrentNode*(self: Compiler): ASTNode = (if self.current >= proc getCurrentNode*(self: Compiler): ASTNode = (if self.current >=
self.ast.len(): self.ast[^1] else: self.ast[self.current - 1]) self.ast.len(): self.ast[^1] else: self.ast[self.current - 1])
@ -224,6 +228,7 @@ proc getModule*(self: Compiler): string {.inline.} = self.currentModule
## Utility functions ## Utility functions
proc peek(self: Compiler, distance: int = 0): ASTNode = proc peek(self: Compiler, distance: int = 0): ASTNode =
## Peeks at the AST node at the given distance. ## Peeks at the AST node at the given distance.
## If the distance is out of bounds, the last ## If the distance is out of bounds, the last
@ -1506,13 +1511,12 @@ proc forEachStmt(self: Compiler, node: ForEachStmt) =
proc importStmt(self: Compiler, node: ImportStmt) = proc importStmt(self: Compiler, node: ImportStmt) =
## Imports a module at compile time ## Imports a module at compile time
if self.scopeDepth > 0: # TODO: This is obviously horrible. It's just a test
self.error("import statements are only allowed at the top level") let filename = node.moduleName.token.lexeme & ".pn"
var lexer = newLexer() try:
var parser = newParser() self.compileModule(filename)
var compiler = newCompiler() except IOError:
# TODO: Find module self.error(&"""could not import '{filename}': {getCurrentExceptionMsg()}""")
var result {.used.} = compiler.compile(parser.parse(lexer.lex("", node.moduleName.name.lexeme), node.moduleName.name.lexeme), node.moduleName.name.lexeme)
proc statement(self: Compiler, node: Statement) = proc statement(self: Compiler, node: Statement) =
@ -1791,6 +1795,31 @@ proc declaration(self: Compiler, node: Declaration) =
self.statement(Statement(node)) self.statement(Statement(node))
proc compileModule(self: Compiler, filename: string) =
## Compiles an imported module into an existing chunk.
## A temporary compiler object is initialized internally
## and its state is copied from the self argument
var lexer = newLexer()
var parser = newParser()
var compiler = newCompiler()
lexer.fillSymbolTable()
compiler.chunk = self.chunk
compiler.ast = parser.parse(lexer.lex(readFile(filename), filename), filename)
compiler.file = filename
compiler.names = @[]
compiler.scopeDepth = 0
compiler.currentFunction = nil
compiler.currentModule = compiler.file.extractFilename()
compiler.current = 0
compiler.frames = @[0]
while not compiler.done():
compiler.declaration(Declaration(compiler.step()))
self.names &= compiler.names
self.closedOver &= compiler.closedOver
compiler.endScope()
proc compile*(self: Compiler, ast: seq[Declaration], file: string): Chunk = proc compile*(self: Compiler, ast: seq[Declaration], file: string): Chunk =
## Compiles a sequence of AST nodes into a chunk ## Compiles a sequence of AST nodes into a chunk
## object ## object

View File

@ -22,6 +22,8 @@ import os
import meta/token import meta/token
import meta/ast import meta/ast
import meta/errors import meta/errors
import lexer as l
import ../util/symbols
export token, ast, errors export token, ast, errors
@ -662,8 +664,14 @@ proc forEachStmt(self: Parser): Statement =
self.currentLoop = enclosingLoop self.currentLoop = enclosingLoop
proc parse*(self: Parser, tokens: seq[Token], file: string): seq[Declaration]
proc findOperators(self: Parser, tokens: seq[Token])
proc importStmt(self: Parser, fromStmt: bool = false): Statement = proc importStmt(self: Parser, fromStmt: bool = false): Statement =
## Parses import statements ## Parses import statements
if self.scopeDepth > 0:
self.error("import statements are only allowed at the top level")
var tok: Token var tok: Token
if fromStmt: if fromStmt:
tok = self.peek(-2) tok = self.peek(-2)
@ -672,9 +680,16 @@ proc importStmt(self: Parser, fromStmt: bool = false): Statement =
# TODO: New AST node # TODO: New AST node
self.expect(Identifier, "expecting module name(s) after import statement") self.expect(Identifier, "expecting module name(s) after import statement")
endOfLine("missing statement terminator after 'import'") endOfLine("missing statement terminator after 'import'")
result = newImportStmt(newIdentExpr(self.peek(-1), self.scopeDepth), tok) result = newImportStmt(newIdentExpr(self.peek(-2), self.scopeDepth), tok)
var filename = ImportStmt(result).moduleName.token.lexeme & ".pn"
var lexer = newLexer()
lexer.fillSymbolTable()
# TODO: This is obviously horrible. It's just a test
try:
self.findOperators(lexer.lex(readFile(filename), filename))
except IOError:
self.error(&"""could not import '{filename}': {getCurrentExceptionMsg()}""")
proc tryStmt(self: Parser): Statement = proc tryStmt(self: Parser): Statement =
## Parses try/except/else/finally blocks ## Parses try/except/else/finally blocks
@ -1191,6 +1206,23 @@ proc declaration(self: Parser): Declaration =
result = Declaration(self.statement()) result = Declaration(self.statement())
proc findOperators(self: Parser, tokens: seq[Token]) =
## Finds operators in a token stream
for i, token in tokens:
# We do a first pass over the tokens
# to find operators. Note that this
# relies on the lexer ending the input
# with an EOF token
if token.kind == Operator:
if i == tokens.high():
self.error("invalid state: found malformed tokenizer input while looking for operators (missing EOF)", token)
self.operators.addOperator(tokens[i + 1].lexeme)
if i == tokens.high() and token.kind != EndOfFile:
# Since we're iterating this list anyway might as
# well perform some extra checks
self.error("invalid state: found malformed tokenizer input while looking for operators (missing EOF)", token)
proc parse*(self: Parser, tokens: seq[Token], file: string): seq[Declaration] = proc parse*(self: Parser, tokens: seq[Token], file: string): seq[Declaration] =
## Parses a sequence of tokens into a sequence of AST nodes ## Parses a sequence of tokens into a sequence of AST nodes
self.tokens = tokens self.tokens = tokens
@ -1201,19 +1233,7 @@ proc parse*(self: Parser, tokens: seq[Token], file: string): seq[Declaration] =
self.scopeDepth = 0 self.scopeDepth = 0
self.operators = newOperatorTable() self.operators = newOperatorTable()
self.tree = @[] self.tree = @[]
for i, token in self.tokens: self.findOperators(tokens)
# We do a first pass over the tokens
# to find operators. Note that this
# relies on the lexer ending the input
# with an EOF token
if token.kind == Operator:
if i == self.tokens.high():
self.error("invalid state: found malformed tokenizer input while looking for operators (missing EOF)", token)
self.operators.addOperator(self.tokens[i + 1].lexeme)
if i == self.tokens.high() and token.kind != EndOfFile:
# Since we're iterating this list anyway might as
# well perform some extra checks
self.error("invalid state: found malformed tokenizer input while looking for operators (missing EOF)", token)
while not self.done(): while not self.done():
self.tree.add(self.declaration()) self.tree.add(self.declaration())
if self.tree[^1] == nil: if self.tree[^1] == nil:

View File

@ -21,10 +21,10 @@ import frontend/compiler as c
import backend/vm as v import backend/vm as v
import util/serializer as s import util/serializer as s
import util/debugger import util/debugger
import util/symbols
import config import config
# Forward declarations # Forward declarations
proc fillSymbolTable(tokenizer: Lexer)
proc getLineEditor: LineEditor proc getLineEditor: LineEditor
# Handy dandy compile-time constants # Handy dandy compile-time constants
@ -368,66 +368,6 @@ when isMainModule:
runFile(file, interactive, fromString) runFile(file, interactive, fromString)
proc fillSymbolTable(tokenizer: Lexer) =
## Initializes the Lexer's symbol
## table with the builtin symbols
## and keywords
# 1-byte symbols
tokenizer.symbols.addSymbol("{", LeftBrace)
tokenizer.symbols.addSymbol("}", RightBrace)
tokenizer.symbols.addSymbol("(", LeftParen)
tokenizer.symbols.addSymbol(")", RightParen)
tokenizer.symbols.addSymbol("[", LeftBracket)
tokenizer.symbols.addSymbol("]", RightBracket)
tokenizer.symbols.addSymbol(".", Dot)
tokenizer.symbols.addSymbol(",", Comma)
tokenizer.symbols.addSymbol(";", Semicolon)
# tokenizer.symbols.addSymbol("\n", Semicolon) # TODO: Broken
# Keywords
tokenizer.symbols.addKeyword("type", TokenType.Type)
tokenizer.symbols.addKeyword("enum", Enum)
tokenizer.symbols.addKeyword("case", Case)
tokenizer.symbols.addKeyword("operator", Operator)
tokenizer.symbols.addKeyword("generator", Generator)
tokenizer.symbols.addKeyword("fn", TokenType.Function)
tokenizer.symbols.addKeyword("coroutine", Coroutine)
tokenizer.symbols.addKeyword("break", TokenType.Break)
tokenizer.symbols.addKeyword("continue", Continue)
tokenizer.symbols.addKeyword("while", While)
tokenizer.symbols.addKeyword("for", For)
tokenizer.symbols.addKeyword("foreach", Foreach)
tokenizer.symbols.addKeyword("if", If)
tokenizer.symbols.addKeyword("else", Else)
tokenizer.symbols.addKeyword("await", TokenType.Await)
tokenizer.symbols.addKeyword("defer", Defer)
tokenizer.symbols.addKeyword("try", Try)
tokenizer.symbols.addKeyword("except", Except)
tokenizer.symbols.addKeyword("finally", Finally)
tokenizer.symbols.addKeyword("raise", TokenType.Raise)
tokenizer.symbols.addKeyword("assert", TokenType.Assert)
tokenizer.symbols.addKeyword("const", Const)
tokenizer.symbols.addKeyword("let", Let)
tokenizer.symbols.addKeyword("var", TokenType.Var)
tokenizer.symbols.addKeyword("import", Import)
tokenizer.symbols.addKeyword("yield", TokenType.Yield)
tokenizer.symbols.addKeyword("return", TokenType.Return)
tokenizer.symbols.addKeyword("object", Object)
# These are more like expressions with a reserved
# name that produce a value of a builtin type,
# but we don't need to care about that until
# we're in the parsing/ compilation steps so
# it's fine
tokenizer.symbols.addKeyword("nan", NotANumber)
tokenizer.symbols.addKeyword("inf", Infinity)
tokenizer.symbols.addKeyword("nil", TokenType.Nil)
tokenizer.symbols.addKeyword("true", True)
tokenizer.symbols.addKeyword("false", False)
tokenizer.symbols.addKeyword("ref", TokenType.Ref)
tokenizer.symbols.addKeyword("ptr", TokenType.Ptr)
for sym in [">", "<", "=", "~", "/", "+", "-", "_", "*", "?", "@", ":"]:
tokenizer.symbols.addSymbol(sym, Symbol)
proc getLineEditor: LineEditor = proc getLineEditor: LineEditor =
result = newLineEditor() result = newLineEditor()

View File

View File

14
tests/import/fib.pn Normal file
View File

@ -0,0 +1,14 @@
import lib;
fn fib(n: int): int {
if n < 2 {
return n;
}
return fib(n - 2) + fib(n - 1);
}
var x = clock();
print(fib(25));
print(clock() - x);

39
tests/import/lib.pn Normal file
View File

@ -0,0 +1,39 @@
operator `<`*(a, b: int): bool {
#pragma[magic: "LessThanInt64", pure]
}
operator `-`*(a, b: float): float {
#pragma[magic: "SubFloat64", pure]
}
operator `-`*(a, b: int): int {
#pragma[magic: "SubInt64", pure]
}
operator `+`*(a, b: int): int {
#pragma[magic: "AddInt64", pure]
}
operator `/`*(a, b: int): int {
#pragma[magic: "DivInt64", pure]
}
fn clock*: float {
#pragma[magic: "SysClock64", pure]
}
fn print*(x: float) {
#pragma[magic: "GenericPrint"]
}
fn print*(x: int) {
#pragma[magic: "GenericPrint"]
}