From dac0cca1bcf0bff330ed18559435092336b83f02 Mon Sep 17 00:00:00 2001 From: Mattia Giambirtone Date: Tue, 7 Jun 2022 11:23:08 +0200 Subject: [PATCH] Initial experimental support for parsing pragmas --- src/frontend/compiler.nim | 87 +++++++++++++++++------ src/frontend/lexer.nim | 13 ++-- src/frontend/meta/ast.nim | 17 +++-- src/frontend/meta/token.nim | 4 +- src/frontend/parser.nim | 133 +++++++++++++++++++++++++++++------- src/main.nim | 11 +-- 6 files changed, 205 insertions(+), 60 deletions(-) diff --git a/src/frontend/compiler.nim b/src/frontend/compiler.nim index b40b5f0..c71eff1 100644 --- a/src/frontend/compiler.nim +++ b/src/frontend/compiler.nim @@ -16,7 +16,8 @@ import meta/ast import meta/errors import ../config import ../util/multibyte - +import lexer +import parser import strformat import algorithm @@ -180,6 +181,7 @@ proc newCompiler*(enableOptimizations: bool = true, replMode: bool = false): Com ## Forward declarations +proc compile*(self: Compiler, ast: seq[Declaration], file: string): Chunk proc expression(self: Compiler, node: Expression) proc statement(self: Compiler, node: Statement) proc declaration(self: Compiler, node: Declaration) @@ -779,8 +781,12 @@ proc matchImpl(self: Compiler, name: string, kind: Type): Name = msg &= &", wrong number of arguments ({name.valueType.args.len()} expected, got {kind.args.len()})" else: for i, arg in kind.args: - if not self.compareTypes(arg.kind, name.valueType.args[i].kind): + if name.valueType.args[i].kind.kind == Mutable and arg.kind.kind != Mutable: + msg &= &", first mismatch at position {i + 1}: {name.valueType.args[i].name} is immutable, not 'var'" + break + elif not self.compareTypes(arg.kind, name.valueType.args[i].kind): msg &= &", first mismatch at position {i + 1}: expected argument of type '{self.typeToStr(name.valueType.args[i].kind)}', got '{self.typeToStr(arg.kind)}' instead" + break self.error(msg) elif impl.len() > 1: var msg = &"multiple matching implementations of '{name}' found:\n" @@ -869,7 +875,7 @@ proc binary(self: Compiler, node: BinaryExpr) = ]# -proc declareName(self: Compiler, node: Declaration) = +proc declareName(self: Compiler, node: Declaration, mutable: bool = false) = ## Statically declares a name into the current scope. ## "Declaring" a name only means updating our internal ## list of identifiers so that further calls to resolve() @@ -900,6 +906,8 @@ proc declareName(self: Compiler, node: Declaration) = isLet: node.isLet, isClosedOver: false, line: node.token.line)) + if mutable: + self.names[^1].valueType = Type(kind: Mutable, value: self.names[^1].valueType) # We emit a jump of 0 because this may become a # StoreHeap instruction. If they variable is # not closed over, we'll sadly be wasting a @@ -932,6 +940,10 @@ proc declareName(self: Compiler, node: Declaration) = isClosedOver: false, line: node.token.line)) let fn = self.names[^1] + if fn.valueType.returnType.isNil() and not node.returnType.isNil() and node.returnType.kind == identExpr: + for g in node.generics: + if g.name == IdentExpr(node.returnType): + fn.valueType.returnType = Type(kind: Generic) var name: Name for argument in node.arguments: if self.names.high() > 16777215: @@ -1185,12 +1197,14 @@ proc expression(self: Compiler, node: Expression) = of NodeKind.callExpr: self.callExpr(CallExpr(node)) # TODO of getItemExpr: + discard # TODO: Get rid of this + of pragmaExpr: discard # TODO # Note that for setItem and assign we don't convert # the node to its true type because that type information # would be lost in the call anyway. The differentiation # happens in self.assignment() - of setItemExpr, assignExpr: + of setItemExpr, assignExpr: # TODO: Get rid of this self.assignment(node) of identExpr: self.identifier(IdentExpr(node)) @@ -1260,26 +1274,29 @@ proc endFunctionBeforeReturn(self: Compiler) = proc returnStmt(self: Compiler, node: ReturnStmt) = ## Compiles return statements. An empty return ## implicitly returns nil - let returnType = self.inferType(node.value) - let typ = self.inferType(self.currentFunction) + let actual = self.inferType(node.value) + let expected = self.inferType(self.currentFunction) + var comp: Type = actual + if not expected.isNil() and not expected.returnType.isNil() and expected.returnType.kind in {Reference, Pointer, Mutable}: + comp = expected.returnType.value ## Having the return type - if returnType == nil and typ.returnType != nil: - if node.value != nil: + if actual.isNil() and not expected.returnType.isNil(): + if not node.value.isNil(): if node.value.kind == identExpr: self.error(&"reference to undeclared identifier '{node.value.token.lexeme}'") elif node.value.kind == callExpr and CallExpr(node.value).callee.kind == identExpr: self.error(&"call to undeclared function '{CallExpr(node.value).callee.token.lexeme}'") - self.error(&"expected return value of type '{self.typeToStr(typ.returnType)}', but expression has no type") - elif typ.returnType == nil and returnType != nil: + self.error(&"expected return value of type '{self.typeToStr(expected.returnType)}', but expression has no type") + elif expected.returnType.isNil() and not actual.isNil(): self.error("non-empty return statement is not allowed in void functions") - elif not self.compareTypes(returnType, typ.returnType): - self.error(&"expected return value of type '{self.typeToStr(typ.returnType)}', got '{self.typeToStr(returnType)}' instead") - if node.value != nil: + elif not self.compareTypes(actual, comp): + self.error(&"expected return value of type '{self.typeToStr(comp)}', got '{self.typeToStr(actual)}' instead") + if not node.value.isNil(): self.expression(node.value) self.emitByte(OpCode.SetResult) self.endFunctionBeforeReturn() self.emitByte(OpCode.Return) - if node.value != nil: + if not node.value.isNil(): self.emitByte(1) else: self.emitByte(0) @@ -1292,7 +1309,7 @@ proc yieldStmt(self: Compiler, node: YieldStmt) = proc raiseStmt(self: Compiler, node: RaiseStmt) = - ## Compiles yield statements + ## Compiles raise statements self.expression(node.exception) self.emitByte(OpCode.Raise) @@ -1336,6 +1353,20 @@ proc assertStmt(self: Compiler, node: AssertStmt) = self.emitByte(OpCode.Assert) +proc forEachStmt(self: Compiler, node: ForEachStmt) = + ## Compiles foreach loops + # TODO + + +proc importStmt(self: Compiler, node: ImportStmt) = + ## Imports a module at compile time + if self.scopeDepth > 0: + self.error("import statements are only allowed at the top level") + var compiler = newCompiler() + # TODO: Find module + var result = compiler.compile(newParser().parse(newLexer().lex("", node.moduleName.name.lexeme), node.moduleName.name.lexeme), node.moduleName.name.lexeme) + + proc statement(self: Compiler, node: Statement) = ## Compiles all statements case node.kind: @@ -1365,7 +1396,7 @@ proc statement(self: Compiler, node: Statement) = of NodeKind.returnStmt: self.returnStmt(ReturnStmt(node)) of NodeKind.importStmt: - discard + self.importStmt(ImportStmt(node)) of NodeKind.whileStmt, NodeKind.forStmt: ## Our parser already desugars for loops to ## while loops! @@ -1376,7 +1407,7 @@ proc statement(self: Compiler, node: Statement) = self.patchBreaks() self.currentLoop = loop of NodeKind.forEachStmt: - discard + self.forEachStmt(ForEachStmt(node)) of NodeKind.blockStmt: self.blockStmt(BlockStmt(node)) of NodeKind.yieldStmt: @@ -1405,11 +1436,16 @@ proc varDecl(self: Compiler, node: VarDecl) = if expected != nil: self.error(&"expected value of type '{self.typeToStr(expected)}', but '{node.name.token.lexeme}' is of type '{self.typeToStr(actual)}'") self.expression(node.value) - self.declareName(node) + self.declareName(node, mutable=node.token.kind == Var) self.emitByte(StoreVar) self.emitBytes(self.names.high().toTriple()) +proc typeDecl(self: Compiler, node: TypeDecl) = + ## Compiles type declarations + + + proc funDecl(self: Compiler, node: FunDecl) = ## Compiles function declarations # A function's code is just compiled linearly @@ -1422,8 +1458,16 @@ proc funDecl(self: Compiler, node: FunDecl) = let jmp = self.emitJump(JumpForwards) for argument in node.arguments: self.emitByte(LoadArgument) - if node.returnType != nil and self.inferType(node.returnType) == nil: - self.error(&"cannot infer the type of '{node.returnType.token.lexeme}'") + if not node.returnType.isNil() and self.inferType(node.returnType).isNil(): + var isGeneric = false + if node.returnType.kind == identExpr: + let name = IdentExpr(node.returnType) + for g in node.generics: + if name == g.name: + isGeneric = true + break + if not isGeneric: + self.error(&"cannot infer the type of '{node.returnType.token.lexeme}'") # TODO: Forward declarations if node.body != nil: if BlockStmt(node.body).code.len() == 0: @@ -1512,7 +1556,6 @@ proc patchReturnAddress(self: Compiler, pos: int) = self.chunk.code[pos + 3] = address[3] - proc declaration(self: Compiler, node: Declaration) = ## Compiles all declarations case node.kind: @@ -1520,6 +1563,8 @@ proc declaration(self: Compiler, node: Declaration) = self.varDecl(VarDecl(node)) of NodeKind.funDecl: self.funDecl(FunDecl(node)) + of NodeKind.typeDecl: + self.typeDecl(TypeDecl(node)) else: self.statement(Statement(node)) diff --git a/src/frontend/lexer.nim b/src/frontend/lexer.nim index 04c2f80..7b5f71e 100644 --- a/src/frontend/lexer.nim +++ b/src/frontend/lexer.nim @@ -594,10 +594,13 @@ proc next(self: Lexer) = # Keywords and identifiers self.parseIdentifier() elif self.match("#"): - # Inline comments, pragmas, etc. - while not (self.check("\n") or self.done()): - discard self.step() - self.createToken(Comment) + if not self.match("pragma["): + # Inline comments + while not (self.check("\n") or self.done()): + discard self.step() + self.createToken(Comment) + else: + self.createToken(Pragma) else: # If none of the above conditions matched, there's a few # other options left: @@ -607,7 +610,7 @@ proc next(self: Lexer) = # We handle all of these cases here by trying to # match the longest sequence of characters possible # as either an operator or a statement/expression - # delimiter, erroring out if there's no match + # delimiter var n = self.symbols.getMaxSymbolSize() while n > 0: for symbol in self.symbols.getSymbols(n): diff --git a/src/frontend/meta/ast.nim b/src/frontend/meta/ast.nim index 72017ab..cb60df6 100644 --- a/src/frontend/meta/ast.nim +++ b/src/frontend/meta/ast.nim @@ -265,8 +265,9 @@ type TypeDecl* = ref object of Declaration name*: IdentExpr fields*: seq[tuple[name: IdentExpr, valueType: Expression, - mutable: bool, isRef: bool, isPtr: bool]] + mutable: bool, isRef: bool, isPtr: bool, isPrivate: bool]] defaults*: seq[Expression] + isRef*: bool Pragma* = ref object of Expression name*: IdentExpr @@ -307,6 +308,7 @@ proc newPragma*(name: IdentExpr, args: seq[LiteralExpr]): Pragma = result.kind = pragmaExpr result.args = args result.name = name + result.token = name.token proc newIntExpr*(literal: Token): IntExpr = @@ -596,9 +598,9 @@ proc newFunDecl*(name: IdentExpr, arguments: seq[tuple[name: IdentExpr, valueTyp result.generics = generics -proc newTypeDecl*(name: IdentExpr, fields: seq[tuple[name: IdentExpr, valueType: Expression, mutable: bool, isRef: bool, isPtr: bool]], +proc newTypeDecl*(name: IdentExpr, fields: seq[tuple[name: IdentExpr, valueType: Expression, mutable: bool, isRef: bool, isPtr: bool, isPrivate: bool]], defaults: seq[Expression], isPrivate: bool, token: Token, pragmas: seq[Pragma], - generics: seq[tuple[name: IdentExpr, cond: Expression]]): TypeDecl = + generics: seq[tuple[name: IdentExpr, cond: Expression]], isRef: bool): TypeDecl = result = TypeDecl(kind: typeDecl) result.name = name result.fields = fields @@ -607,6 +609,7 @@ proc newTypeDecl*(name: IdentExpr, fields: seq[tuple[name: IdentExpr, valueType: result.token = token result.pragmas = pragmas result.generics = generics + result.isRef = isRef @@ -690,10 +693,13 @@ proc `$`*(self: ASTNode): string = result &= &"AwaitStmt({self.expression})" of varDecl: var self = VarDecl(self) - result &= &"Var(name={self.name}, value={self.value}, const={self.isConst}, private={self.isPrivate}, type={self.valueType})" + result &= &"Var(name={self.name}, value={self.value}, const={self.isConst}, private={self.isPrivate}, type={self.valueType}, pragmas={self.pragmas})" of funDecl: var self = FunDecl(self) result &= &"""FunDecl(name={self.name}, body={self.body}, type={self.returnType}, arguments=[{self.arguments.join(", ")}], defaults=[{self.defaults.join(", ")}], generics=[{self.generics.join(", ")}], async={self.isAsync}, generator={self.isGenerator}, private={self.isPrivate})""" + of typeDecl: + var self = TypeDecl(self) + result &= &"""TypeDecl(name={self.name}, fields={self.fields}, defaults={self.defaults}, private={self.isPrivate}, pragmas={self.pragmas}, generics={self.generics}, ref={self.isRef})""" of lambdaExpr: var self = LambdaExpr(self) result &= &"""Lambda(body={self.body}, type={self.returnType}, arguments=[{self.arguments.join(", ")}], defaults=[{self.defaults.join(", ")}], generator={self.isGenerator}, async={self.isAsync})""" @@ -715,6 +721,9 @@ proc `$`*(self: ASTNode): string = else: result &= ", elseClause=nil" result &= ")" + of pragmaExpr: + var self = Pragma(self) + result &= &"Pragma(name={self.name}, args={self.args})" else: discard diff --git a/src/frontend/meta/token.nim b/src/frontend/meta/token.nim index 7883f8e..e156851 100644 --- a/src/frontend/meta/token.nim +++ b/src/frontend/meta/token.nim @@ -39,7 +39,7 @@ type Raise, Assert, Await, Foreach, Yield, Defer, Try, Except, Finally, Type, Operator, Case, - Enum, From, Ptr, Ref + Enum, From, Ptr, Ref, Object # Literal types Integer, Float, String, Identifier, @@ -59,6 +59,7 @@ type NoMatch, # Used internally by the symbol table Comment, # Useful for documentation comments, pragmas, etc. Symbol, # A generic symbol + Pragma, # These are not used at the moment but may be # employed to enforce indentation or other neat # stuff I haven't thought about yet @@ -66,6 +67,7 @@ type Tab, + Token* = ref object ## A token object kind*: TokenType # Type of the token diff --git a/src/frontend/parser.nim b/src/frontend/parser.nim index a61a7e7..423821e 100644 --- a/src/frontend/parser.nim +++ b/src/frontend/parser.nim @@ -377,13 +377,12 @@ proc makeCall(self: Parser, callee: Expression): Expression = self.error("call can not have more than 255 arguments") break argument = self.expression() - if argument.kind == assignExpr: + if argument.kind == binaryExpr and BinaryExpr(argument).operator.lexeme == "=": # TODO: This will explode with slices! - if IdentExpr(AssignExpr(argument).name) in argNames: + if IdentExpr(BinaryExpr(argument).a) in argNames: self.error("duplicate keyword argument in call") - argNames.add(IdentExpr(AssignExpr(argument).name)) - arguments.keyword.add((name: IdentExpr(AssignExpr( - argument).name), value: AssignExpr(argument).value)) + argNames.add(IdentExpr(BinaryExpr(argument).a)) + arguments.keyword.add((name: IdentExpr(BinaryExpr(argument).a), value: BinaryExpr(argument).b)) elif arguments.keyword.len() == 0: arguments.positionals.add(argument) else: @@ -1077,27 +1076,110 @@ proc statement(self: Parser): Statement = result = self.expressionStatement() -proc parsePragma(self: Parser): Pragma = +proc parsePragma(self: Parser): tuple[global: bool, pragmas: seq[Pragma]] = ## Parses pragmas - if self.scopeDepth == 0: - ## Pragmas used at the - ## top level are either - ## used for compile-time - ## switches or for global variable - ## declarations - var decl: VarDecl - for node in self.tree: - if node.token.line == self.peek(-1).line and node.kind == varDecl: - decl = VarDecl(node) + result.global = true + var + decl: Declaration = nil + found = false + for node in self.tree: + if node.token.line == self.peek(-1).line and node.kind in {NodeKind.varDecl, typeDecl, funDecl, lambdaExpr}: + decl = node + found = true + break + if not found: + # Dummy declaration + result.global = false + decl = Declaration(pragmas: @[]) + var + name: IdentExpr + args: seq[LiteralExpr] + exp: Expression + while not self.match("]") and not self.done(): + args = @[] + self.expect(Identifier, "expecting pragma name") + name = newIdentExpr(self.peek(-1)) + if not self.match(":"): + if self.match("]"): + decl.pragmas.add(newPragma(name, @[])) break - else: - var decl = self.currentFunction - # TODO + elif self.match("("): + while not self.match(")") and not self.done(): + exp = self.primary() + if not exp.isLiteral(): + self.error("invalid syntax") + args.add(LiteralExpr(exp)) + if not self.match(","): + break + self.expect(")", "unterminated parenthesis in pragma arguments") + else: + exp = self.primary() + if not exp.isLiteral(): + self.error("invalid syntax") + args.add(LiteralExpr(exp)) + if self.match(","): + continue + decl.pragmas.add(newPragma(name, args)) + result.pragmas = decl.pragmas proc typeDecl(self: Parser): TypeDecl = ## Parses type declarations - + let token = self.peek(-1) + self.expect(Identifier, "expecting type name after 'type'") + let isPrivate = not self.match("*") + self.checkDecl(isPrivate) + var name = newIdentExpr(self.peek(-1)) + var isRef = false + var fields: seq[tuple[name: IdentExpr, valueType: Expression, + mutable: bool, isRef: bool, isPtr: bool, isPrivate: bool]] = @[] + var defaults: seq[Expression] = @[] + var generics: seq[tuple[name: IdentExpr, cond: Expression]] = @[] + var pragmas: seq[Pragma] = @[] + result = newTypeDecl(name, fields, defaults, isPrivate, token, pragmas, generics, isRef) + if self.match(LeftBracket): + self.parseGenerics(result) + self.expect("=", "expecting '=' after type name") + case self.step().kind: + of Ref: + isRef = true + echo self.peek() + self.expect(Object, "invalid syntax") + of Object: + discard + else: + self.error("invalid syntax") + self.expect(LeftBrace, "expecting '{' after type declaration") + var + argName: IdentExpr + argMutable: bool + argRef: bool + argPtr: bool + argPrivate: bool + argType: Expression + while not self.match(RightBrace) and not self.done(): + argRef = false + argPtr = false + argMutable = false + self.expect(Identifier, "expecting field name") + argName = newIdentExpr(self.peek(-1)) + argPrivate = not self.match("*") + self.expect(":", "expecting ':' after field name") + case self.step().kind: + of Ref: + argRef = true + of Ptr: + argPtr = true + of Var: + argMutable = true + else: + self.current -= 1 + argType = self.expression() + result.fields.add((argName, argType, argMutable, argRef, argPtr, argPrivate)) + if self.match("="): + result.defaults.add(self.expression()) + self.expect(";", "expecting semicolon after field declaration") + proc declaration(self: Parser): Declaration = ## Parses declarations @@ -1118,10 +1200,13 @@ proc declaration(self: Parser): Declaration = of Operator: discard self.step() result = self.funDecl(isOperator=true) - of TokenType.Comment: - let tok = self.step() - if tok.lexeme.startsWith("#pragma["): - result = self.parsePragma() + of TokenType.Pragma: + discard self.step() + let temp = self.parsePragma() + if not temp.global: + for p in temp.pragmas: + self.tree.add(p) + result = nil of Type: discard self.step() result = self.typeDecl() diff --git a/src/main.nim b/src/main.nim index bdd3b58..fad3aad 100644 --- a/src/main.nim +++ b/src/main.nim @@ -28,11 +28,11 @@ proc fillSymbolTable(tokenizer: Lexer) proc getLineEditor: LineEditor # Handy dandy compile-time constants -const debugLexer = false -const debugParser = false -const debugCompiler = true -const debugSerializer = false -const debugRuntime = false +const debugLexer {.booldefine.} = false +const debugParser {.booldefine.} = false +const debugCompiler {.booldefine.} = false +const debugSerializer {.booldefine.} = false +const debugRuntime {.booldefine.} = false proc repl(vm: PeonVM = newPeonVM()) = @@ -402,6 +402,7 @@ proc fillSymbolTable(tokenizer: Lexer) = tokenizer.symbols.addKeyword("import", Import) tokenizer.symbols.addKeyword("yield", TokenType.Yield) tokenizer.symbols.addKeyword("return", TokenType.Return) + tokenizer.symbols.addKeyword("object", Object) # These are more like expressions with a reserved # name that produce a value of a builtin type, # but we don't need to care about that until