From c85fff8f67cace82fb6f575c8b109d4204a12233 Mon Sep 17 00:00:00 2001 From: Mattia Giambirtone Date: Sun, 14 Aug 2022 18:37:06 +0200 Subject: [PATCH] Initial tests for an import system --- src/frontend/compiler.nim | 43 ++++++++++++++++++++++----- src/frontend/parser.nim | 52 ++++++++++++++++++++++---------- src/main.nim | 62 +-------------------------------------- tests/import/a.pn | 0 tests/import/b.pn | 0 tests/import/fib.pn | 14 +++++++++ tests/import/lib.pn | 39 ++++++++++++++++++++++++ 7 files changed, 126 insertions(+), 84 deletions(-) delete mode 100644 tests/import/a.pn delete mode 100644 tests/import/b.pn create mode 100644 tests/import/fib.pn create mode 100644 tests/import/lib.pn diff --git a/src/frontend/compiler.nim b/src/frontend/compiler.nim index 73fe96a..a8edbc4 100644 --- a/src/frontend/compiler.nim +++ b/src/frontend/compiler.nim @@ -16,6 +16,7 @@ import meta/ast import meta/errors import ../config import ../util/multibyte +import ../util/symbols import lexer as l import parser as p @@ -215,6 +216,9 @@ proc newCompiler*(enableOptimizations: bool = true, replMode: bool = false): Com result.compilerProcs["pure"] = handlePurePragma +proc compileModule(self: Compiler, filename: string) + + ## Public getter for nicer error formatting proc getCurrentNode*(self: Compiler): ASTNode = (if self.current >= self.ast.len(): self.ast[^1] else: self.ast[self.current - 1]) @@ -224,6 +228,7 @@ proc getModule*(self: Compiler): string {.inline.} = self.currentModule ## Utility functions + proc peek(self: Compiler, distance: int = 0): ASTNode = ## Peeks at the AST node at the given distance. ## If the distance is out of bounds, the last @@ -1506,13 +1511,12 @@ proc forEachStmt(self: Compiler, node: ForEachStmt) = proc importStmt(self: Compiler, node: ImportStmt) = ## Imports a module at compile time - if self.scopeDepth > 0: - self.error("import statements are only allowed at the top level") - var lexer = newLexer() - var parser = newParser() - var compiler = newCompiler() - # TODO: Find module - var result {.used.} = compiler.compile(parser.parse(lexer.lex("", node.moduleName.name.lexeme), node.moduleName.name.lexeme), node.moduleName.name.lexeme) + # TODO: This is obviously horrible. It's just a test + let filename = node.moduleName.token.lexeme & ".pn" + try: + self.compileModule(filename) + except IOError: + self.error(&"""could not import '{filename}': {getCurrentExceptionMsg()}""") proc statement(self: Compiler, node: Statement) = @@ -1791,6 +1795,31 @@ proc declaration(self: Compiler, node: Declaration) = self.statement(Statement(node)) +proc compileModule(self: Compiler, filename: string) = + ## Compiles an imported module into an existing chunk. + ## A temporary compiler object is initialized internally + ## and its state is copied from the self argument + var lexer = newLexer() + var parser = newParser() + var compiler = newCompiler() + lexer.fillSymbolTable() + compiler.chunk = self.chunk + compiler.ast = parser.parse(lexer.lex(readFile(filename), filename), filename) + compiler.file = filename + compiler.names = @[] + compiler.scopeDepth = 0 + compiler.currentFunction = nil + compiler.currentModule = compiler.file.extractFilename() + compiler.current = 0 + compiler.frames = @[0] + while not compiler.done(): + compiler.declaration(Declaration(compiler.step())) + self.names &= compiler.names + self.closedOver &= compiler.closedOver + compiler.endScope() + + + proc compile*(self: Compiler, ast: seq[Declaration], file: string): Chunk = ## Compiles a sequence of AST nodes into a chunk ## object diff --git a/src/frontend/parser.nim b/src/frontend/parser.nim index 8f0391b..a8b197d 100644 --- a/src/frontend/parser.nim +++ b/src/frontend/parser.nim @@ -22,6 +22,8 @@ import os import meta/token import meta/ast import meta/errors +import lexer as l +import ../util/symbols export token, ast, errors @@ -662,8 +664,14 @@ proc forEachStmt(self: Parser): Statement = self.currentLoop = enclosingLoop +proc parse*(self: Parser, tokens: seq[Token], file: string): seq[Declaration] +proc findOperators(self: Parser, tokens: seq[Token]) + + proc importStmt(self: Parser, fromStmt: bool = false): Statement = ## Parses import statements + if self.scopeDepth > 0: + self.error("import statements are only allowed at the top level") var tok: Token if fromStmt: tok = self.peek(-2) @@ -672,9 +680,16 @@ proc importStmt(self: Parser, fromStmt: bool = false): Statement = # TODO: New AST node self.expect(Identifier, "expecting module name(s) after import statement") endOfLine("missing statement terminator after 'import'") - result = newImportStmt(newIdentExpr(self.peek(-1), self.scopeDepth), tok) - - + result = newImportStmt(newIdentExpr(self.peek(-2), self.scopeDepth), tok) + var filename = ImportStmt(result).moduleName.token.lexeme & ".pn" + var lexer = newLexer() + lexer.fillSymbolTable() + # TODO: This is obviously horrible. It's just a test + try: + self.findOperators(lexer.lex(readFile(filename), filename)) + except IOError: + self.error(&"""could not import '{filename}': {getCurrentExceptionMsg()}""") + proc tryStmt(self: Parser): Statement = ## Parses try/except/else/finally blocks @@ -1191,6 +1206,23 @@ proc declaration(self: Parser): Declaration = result = Declaration(self.statement()) +proc findOperators(self: Parser, tokens: seq[Token]) = + ## Finds operators in a token stream + for i, token in tokens: + # We do a first pass over the tokens + # to find operators. Note that this + # relies on the lexer ending the input + # with an EOF token + if token.kind == Operator: + if i == tokens.high(): + self.error("invalid state: found malformed tokenizer input while looking for operators (missing EOF)", token) + self.operators.addOperator(tokens[i + 1].lexeme) + if i == tokens.high() and token.kind != EndOfFile: + # Since we're iterating this list anyway might as + # well perform some extra checks + self.error("invalid state: found malformed tokenizer input while looking for operators (missing EOF)", token) + + proc parse*(self: Parser, tokens: seq[Token], file: string): seq[Declaration] = ## Parses a sequence of tokens into a sequence of AST nodes self.tokens = tokens @@ -1201,19 +1233,7 @@ proc parse*(self: Parser, tokens: seq[Token], file: string): seq[Declaration] = self.scopeDepth = 0 self.operators = newOperatorTable() self.tree = @[] - for i, token in self.tokens: - # We do a first pass over the tokens - # to find operators. Note that this - # relies on the lexer ending the input - # with an EOF token - if token.kind == Operator: - if i == self.tokens.high(): - self.error("invalid state: found malformed tokenizer input while looking for operators (missing EOF)", token) - self.operators.addOperator(self.tokens[i + 1].lexeme) - if i == self.tokens.high() and token.kind != EndOfFile: - # Since we're iterating this list anyway might as - # well perform some extra checks - self.error("invalid state: found malformed tokenizer input while looking for operators (missing EOF)", token) + self.findOperators(tokens) while not self.done(): self.tree.add(self.declaration()) if self.tree[^1] == nil: diff --git a/src/main.nim b/src/main.nim index e23a6a5..05617c6 100644 --- a/src/main.nim +++ b/src/main.nim @@ -21,10 +21,10 @@ import frontend/compiler as c import backend/vm as v import util/serializer as s import util/debugger +import util/symbols import config # Forward declarations -proc fillSymbolTable(tokenizer: Lexer) proc getLineEditor: LineEditor # Handy dandy compile-time constants @@ -368,66 +368,6 @@ when isMainModule: runFile(file, interactive, fromString) -proc fillSymbolTable(tokenizer: Lexer) = - ## Initializes the Lexer's symbol - ## table with the builtin symbols - ## and keywords - - # 1-byte symbols - tokenizer.symbols.addSymbol("{", LeftBrace) - tokenizer.symbols.addSymbol("}", RightBrace) - tokenizer.symbols.addSymbol("(", LeftParen) - tokenizer.symbols.addSymbol(")", RightParen) - tokenizer.symbols.addSymbol("[", LeftBracket) - tokenizer.symbols.addSymbol("]", RightBracket) - tokenizer.symbols.addSymbol(".", Dot) - tokenizer.symbols.addSymbol(",", Comma) - tokenizer.symbols.addSymbol(";", Semicolon) - # tokenizer.symbols.addSymbol("\n", Semicolon) # TODO: Broken - # Keywords - tokenizer.symbols.addKeyword("type", TokenType.Type) - tokenizer.symbols.addKeyword("enum", Enum) - tokenizer.symbols.addKeyword("case", Case) - tokenizer.symbols.addKeyword("operator", Operator) - tokenizer.symbols.addKeyword("generator", Generator) - tokenizer.symbols.addKeyword("fn", TokenType.Function) - tokenizer.symbols.addKeyword("coroutine", Coroutine) - tokenizer.symbols.addKeyword("break", TokenType.Break) - tokenizer.symbols.addKeyword("continue", Continue) - tokenizer.symbols.addKeyword("while", While) - tokenizer.symbols.addKeyword("for", For) - tokenizer.symbols.addKeyword("foreach", Foreach) - tokenizer.symbols.addKeyword("if", If) - tokenizer.symbols.addKeyword("else", Else) - tokenizer.symbols.addKeyword("await", TokenType.Await) - tokenizer.symbols.addKeyword("defer", Defer) - tokenizer.symbols.addKeyword("try", Try) - tokenizer.symbols.addKeyword("except", Except) - tokenizer.symbols.addKeyword("finally", Finally) - tokenizer.symbols.addKeyword("raise", TokenType.Raise) - tokenizer.symbols.addKeyword("assert", TokenType.Assert) - tokenizer.symbols.addKeyword("const", Const) - tokenizer.symbols.addKeyword("let", Let) - tokenizer.symbols.addKeyword("var", TokenType.Var) - tokenizer.symbols.addKeyword("import", Import) - tokenizer.symbols.addKeyword("yield", TokenType.Yield) - tokenizer.symbols.addKeyword("return", TokenType.Return) - tokenizer.symbols.addKeyword("object", Object) - # These are more like expressions with a reserved - # name that produce a value of a builtin type, - # but we don't need to care about that until - # we're in the parsing/ compilation steps so - # it's fine - tokenizer.symbols.addKeyword("nan", NotANumber) - tokenizer.symbols.addKeyword("inf", Infinity) - tokenizer.symbols.addKeyword("nil", TokenType.Nil) - tokenizer.symbols.addKeyword("true", True) - tokenizer.symbols.addKeyword("false", False) - tokenizer.symbols.addKeyword("ref", TokenType.Ref) - tokenizer.symbols.addKeyword("ptr", TokenType.Ptr) - for sym in [">", "<", "=", "~", "/", "+", "-", "_", "*", "?", "@", ":"]: - tokenizer.symbols.addSymbol(sym, Symbol) - proc getLineEditor: LineEditor = result = newLineEditor() diff --git a/tests/import/a.pn b/tests/import/a.pn deleted file mode 100644 index e69de29..0000000 diff --git a/tests/import/b.pn b/tests/import/b.pn deleted file mode 100644 index e69de29..0000000 diff --git a/tests/import/fib.pn b/tests/import/fib.pn new file mode 100644 index 0000000..7e08723 --- /dev/null +++ b/tests/import/fib.pn @@ -0,0 +1,14 @@ +import lib; + + +fn fib(n: int): int { + if n < 2 { + return n; + } + return fib(n - 2) + fib(n - 1); +} + + +var x = clock(); +print(fib(25)); +print(clock() - x); diff --git a/tests/import/lib.pn b/tests/import/lib.pn new file mode 100644 index 0000000..b19abf8 --- /dev/null +++ b/tests/import/lib.pn @@ -0,0 +1,39 @@ +operator `<`*(a, b: int): bool { + #pragma[magic: "LessThanInt64", pure] +} + + +operator `-`*(a, b: float): float { + #pragma[magic: "SubFloat64", pure] +} + + +operator `-`*(a, b: int): int { + #pragma[magic: "SubInt64", pure] +} + + +operator `+`*(a, b: int): int { + #pragma[magic: "AddInt64", pure] +} + + +operator `/`*(a, b: int): int { + #pragma[magic: "DivInt64", pure] +} + + +fn clock*: float { + #pragma[magic: "SysClock64", pure] +} + + +fn print*(x: float) { + #pragma[magic: "GenericPrint"] +} + + +fn print*(x: int) { + #pragma[magic: "GenericPrint"] +} +