# Copyright 2022 Mattia Giambirtone & All Contributors # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ## An Abstract Syntax Tree (AST) structure for our recursive-descent ## top-down parser. For more info, check out docs/grammar.md import std/strformat import std/strutils import token export token type NodeKind* = enum ## Enumeration of the AST ## node types, sorted by ## precedence # Declarations typeDecl = 0'u8 funDecl, varDecl, # Statements ifStmt, returnStmt, breakStmt, continueStmt, whileStmt, forEachStmt, blockStmt, raiseStmt, assertStmt, tryStmt, yieldStmt, awaitStmt, importStmt, exportStmt, deferStmt, # An expression followed by a semicolon exprStmt, # Expressions assignExpr, lambdaExpr, awaitExpr, yieldExpr, setItemExpr, # Set expressions like a.b = "c" binaryExpr, unaryExpr, sliceExpr, callExpr, getItemExpr, # Get expressions like a.b # Primary expressions groupingExpr, # Parenthesized expressions such as (true) and (3 + 4) trueExpr, falseExpr, strExpr, charExpr, intExpr, floatExpr, hexExpr, octExpr, binExpr, nilExpr, nanExpr, infExpr, identExpr, # Identifier pragmaExpr, varExpr, refExpr, ptrExpr # Here I would've rather used object variants, and in fact that's what was in # place before, but not being able to re-declare a field of the same type in # another case branch is kind of a deal breaker long-term, so until that is # fixed (check out https://github.com/nim-lang/RFCs/issues/368 for more info). # I'll stick to using inheritance instead # Generic AST node types ASTNode* = ref object of RootObj ## An AST node kind*: NodeKind # Regardless of the type of node, we keep the token in the AST node for internal usage. # This is not shown when the node is printed, but makes it a heck of a lot easier to report # errors accurately even deep in the compilation pipeline token*: Token # This weird inheritance chain is needed for the parser to # work properly Declaration* = ref object of ASTNode ## A declaration isPrivate*: bool pragmas*: seq[Pragma] generics*: seq[tuple[name: IdentExpr, cond: Expression]] Statement* = ref object of Declaration ## A statement Expression* = ref object of Statement ## An expression LiteralExpr* = ref object of Expression # Using a string for literals makes it much easier to handle numeric types, as # there is no overflow nor underflow or float precision issues during parsing. # Numbers are just serialized as strings and then converted back to numbers # before being passed to the VM, which also keeps the door open in the future # to implementing bignum arithmetic that can take advantage of natively supported # machine types, meaning that if a numeric type fits into a 64 bit signed/unsigned # int then it is stored in such a type to save space, otherwise it is just converted # to a bigint. Bigfloats with arbitrary-precision arithmetic would also be nice, # although arguably less useful (and probably significantly slower than bigints) literal*: Token IntExpr* = ref object of LiteralExpr OctExpr* = ref object of LiteralExpr HexExpr* = ref object of LiteralExpr BinExpr* = ref object of LiteralExpr FloatExpr* = ref object of LiteralExpr StrExpr* = ref object of LiteralExpr CharExpr* = ref object of LiteralExpr TrueExpr* = ref object of LiteralExpr FalseExpr* = ref object of LiteralExpr NilExpr* = ref object of LiteralExpr NanExpr* = ref object of LiteralExpr InfExpr* = ref object of LiteralExpr IdentExpr* = ref object of Expression name*: Token depth*: int GroupingExpr* = ref object of Expression expression*: Expression GetItemExpr* = ref object of Expression obj*: Expression name*: IdentExpr SetItemExpr* = ref object of GetItemExpr # Since a setItem expression is just # a getItem one followed by an assignment, # inheriting it from getItem makes sense value*: Expression CallExpr* = ref object of Expression callee*: Expression # The object being called arguments*: tuple[positionals: seq[Expression], keyword: seq[tuple[ name: IdentExpr, value: Expression]]] closeParen*: Token # Needed for error reporting UnaryExpr* = ref object of Expression operator*: Token a*: Expression BinaryExpr* = ref object of UnaryExpr # Binary expressions can be seen here as unary # expressions with an extra operand so we just # inherit from that and add a second operand b*: Expression YieldExpr* = ref object of Expression expression*: Expression AwaitExpr* = ref object of Expression expression*: Expression LambdaExpr* = ref object of Expression body*: Statement arguments*: seq[tuple[name: IdentExpr, valueType: Expression]] defaults*: seq[Expression] isGenerator*: bool isAsync*: bool isPure*: bool returnType*: Expression hasExplicitReturn*: bool depth*: int SliceExpr* = ref object of Expression expression*: Expression ends*: seq[Expression] AssignExpr* = ref object of Expression name*: Expression value*: Expression ExprStmt* = ref object of Statement expression*: Expression ImportStmt* = ref object of Statement moduleName*: IdentExpr ExportStmt* = ref object of Statement name*: IdentExpr AssertStmt* = ref object of Statement expression*: Expression RaiseStmt* = ref object of Statement exception*: Expression BlockStmt* = ref object of Statement code*: seq[Declaration] ForStmt* = ref object of Statement discard # Unused ForEachStmt* = ref object of Statement identifier*: IdentExpr expression*: Expression body*: Statement DeferStmt* = ref object of Statement expression*: Expression TryStmt* = ref object of Statement body*: Statement handlers*: seq[tuple[body: Statement, exc: IdentExpr]] finallyClause*: Statement elseClause*: Statement WhileStmt* = ref object of Statement condition*: Expression body*: Statement AwaitStmt* = ref object of Statement expression*: Expression BreakStmt* = ref object of Statement ContinueStmt* = ref object of Statement ReturnStmt* = ref object of Statement value*: Expression IfStmt* = ref object of Statement condition*: Expression thenBranch*: Statement elseBranch*: Statement YieldStmt* = ref object of Statement expression*: Expression VarDecl* = ref object of Declaration name*: IdentExpr value*: Expression isConst*: bool isLet*: bool valueType*: Expression FunDecl* = ref object of Declaration name*: IdentExpr body*: Statement arguments*: seq[tuple[name: IdentExpr, valueType: Expression]] defaults*: seq[Expression] isAsync*: bool isGenerator*: bool isPure*: bool returnType*: Expression hasExplicitReturn*: bool depth*: int TypeDecl* = ref object of Declaration name*: IdentExpr fields*: seq[tuple[name: IdentExpr, valueType: Expression, isPrivate: bool]] defaults*: seq[Expression] isEnum*: bool isRef*: bool parent*: IdentExpr Pragma* = ref object of Expression name*: IdentExpr args*: seq[LiteralExpr] Var* = ref object of Expression value*: Expression Ref* = ref object of Expression value*: Expression Ptr* = ref object of Expression value*: Expression proc isConst*(self: ASTNode): bool = ## Returns true if the given ## AST node represents a value ## of constant type. All integers, ## strings and singletons count as ## constants case self.kind: of intExpr, hexExpr, binExpr, octExpr, strExpr, falseExpr, trueExpr, infExpr, nanExpr, floatExpr, nilExpr: return true else: return false proc isLiteral*(self: ASTNode): bool {.inline.} = ## Returns if the AST node represents a literal self.kind in {intExpr, hexExpr, binExpr, octExpr, strExpr, falseExpr, trueExpr, infExpr, nanExpr, floatExpr, nilExpr } ## AST node constructors proc newASTNode*(kind: NodeKind, token: Token): ASTNode = ## Initializes a new generic ASTNode object new(result) result.kind = kind result.token = token proc newPragma*(name: IdentExpr, args: seq[LiteralExpr]): Pragma = new(result) result.kind = pragmaExpr result.args = args result.name = name result.token = name.token proc newVarExpr*(expression: Expression, token: Token): Var = new(result) result.kind = varExpr result.value = expression result.token = token proc newRefExpr*(expression: Expression, token: Token): Ref = new(result) result.kind = refExpr result.value = expression result.token = token proc newPtrExpr*(expression: Expression, token: Token): Ptr = new(result) result.kind = ptrExpr result.value = expression result.token = token proc newIntExpr*(literal: Token): IntExpr = result = IntExpr(kind: intExpr) result.literal = literal result.token = literal proc newOctExpr*(literal: Token): OctExpr = result = OctExpr(kind: octExpr) result.literal = literal result.token = literal proc newHexExpr*(literal: Token): HexExpr = result = HexExpr(kind: hexExpr) result.literal = literal result.token = literal proc newBinExpr*(literal: Token): BinExpr = result = BinExpr(kind: binExpr) result.literal = literal result.token = literal proc newFloatExpr*(literal: Token): FloatExpr = result = FloatExpr(kind: floatExpr) result.literal = literal result.token = literal proc newTrueExpr*(token: Token): LiteralExpr = LiteralExpr(kind: trueExpr, token: token, literal: token) proc newFalseExpr*(token: Token): LiteralExpr = LiteralExpr(kind: falseExpr, token: token, literal: token) proc newNaNExpr*(token: Token): LiteralExpr = LiteralExpr(kind: nanExpr, token: token, literal: token) proc newNilExpr*(token: Token): LiteralExpr = LiteralExpr(kind: nilExpr, token: token, literal: token) proc newInfExpr*(token: Token): LiteralExpr = LiteralExpr(kind: infExpr, token: token, literal: token) proc newStrExpr*(literal: Token): StrExpr = result = StrExpr(kind: strExpr) result.literal = literal result.token = literal proc newCharExpr*(literal: Token): CharExpr = result = CharExpr(kind: charExpr) result.literal = literal result.token = literal proc newIdentExpr*(name: Token, depth: int = 0): IdentExpr = result = IdentExpr(kind: identExpr) result.name = name result.token = name result.depth = depth proc newGroupingExpr*(expression: Expression, token: Token): GroupingExpr = result = GroupingExpr(kind: groupingExpr) result.expression = expression result.token = token proc newLambdaExpr*(arguments: seq[tuple[name: IdentExpr, valueType: Expression]], defaults: seq[Expression], body: Statement, isAsync, isGenerator: bool, token: Token, depth: int, pragmas: seq[Pragma] = @[], returnType: Expression, generics: seq[tuple[name: IdentExpr, cond: Expression]] = @[], freeVars: seq[IdentExpr] = @[]): LambdaExpr = result = LambdaExpr(kind: lambdaExpr) result.body = body result.arguments = arguments result.defaults = defaults result.isGenerator = isGenerator result.isAsync = isAsync result.token = token result.returnType = returnType result.isPure = false result.pragmas = pragmas result.generics = generics result.depth = depth proc newGetItemExpr*(obj: Expression, name: IdentExpr, token: Token): GetItemExpr = result = GetItemExpr(kind: getItemExpr) result.obj = obj result.name = name result.token = token proc newSetItemExpr*(obj: Expression, name: IdentExpr, value: Expression, token: Token): SetItemExpr = result = SetItemExpr(kind: setItemExpr) result.obj = obj result.name = name result.value = value result.token = token proc newCallExpr*(callee: Expression, arguments: tuple[positionals: seq[ Expression], keyword: seq[tuple[name: IdentExpr, value: Expression]]], token: Token): CallExpr = result = CallExpr(kind: callExpr) result.callee = callee result.arguments = arguments result.token = token proc newSliceExpr*(expression: Expression, ends: seq[Expression], token: Token): SliceExpr = result = SliceExpr(kind: sliceExpr) result.expression = expression result.ends = ends result.token = token proc newUnaryExpr*(operator: Token, a: Expression): UnaryExpr = result = UnaryExpr(kind: unaryExpr) result.operator = operator result.a = a result.token = result.operator proc newBinaryExpr*(a: Expression, operator: Token, b: Expression): BinaryExpr = result = BinaryExpr(kind: binaryExpr) result.operator = operator result.a = a result.b = b result.token = operator proc newYieldExpr*(expression: Expression, token: Token): YieldExpr = result = YieldExpr(kind: yieldExpr) result.expression = expression result.token = token proc newAssignExpr*(name: Expression, value: Expression, token: Token): AssignExpr = result = AssignExpr(kind: assignExpr) result.name = name result.value = value result.token = token proc newAwaitExpr*(expression: Expression, token: Token): AwaitExpr = result = AwaitExpr(kind: awaitExpr) result.expression = expression result.token = token proc newExprStmt*(expression: Expression, token: Token): ExprStmt = result = ExprStmt(kind: exprStmt) result.expression = expression result.token = token proc newImportStmt*(moduleName: IdentExpr, token: Token): ImportStmt = result = ImportStmt(kind: importStmt) result.moduleName = moduleName result.token = token proc newExportStmt*(name: IdentExpr, token: Token): ExportStmt = result = ExportStmt(kind: exportStmt) result.name = name result.token = token proc newYieldStmt*(expression: Expression, token: Token): YieldStmt = result = YieldStmt(kind: yieldStmt) result.expression = expression result.token = token proc newAwaitStmt*(expression: Expression, token: Token): AwaitStmt = result = AwaitStmt(kind: awaitStmt) result.expression = expression result.token = token proc newAssertStmt*(expression: Expression, token: Token): AssertStmt = result = AssertStmt(kind: assertStmt) result.expression = expression result.token = token proc newDeferStmt*(expression: Expression, token: Token): DeferStmt = result = DeferStmt(kind: deferStmt) result.expression = expression result.token = token proc newRaiseStmt*(exception: Expression, token: Token): RaiseStmt = result = RaiseStmt(kind: raiseStmt) result.exception = exception result.token = token proc newTryStmt*(body: Statement, handlers: seq[tuple[body: Statement, exc: IdentExpr]], finallyClause: Statement, elseClause: Statement, token: Token): TryStmt = result = TryStmt(kind: tryStmt) result.body = body result.handlers = handlers result.finallyClause = finallyClause result.elseClause = elseClause result.token = token proc newBlockStmt*(code: seq[Declaration], token: Token): BlockStmt = result = BlockStmt(kind: blockStmt) result.code = code result.token = token proc newWhileStmt*(condition: Expression, body: Statement, token: Token): WhileStmt = result = WhileStmt(kind: whileStmt) result.condition = condition result.body = body result.token = token proc newForEachStmt*(identifier: IdentExpr, expression: Expression, body: Statement, token: Token): ForEachStmt = result = ForEachStmt(kind: forEachStmt) result.identifier = identifier result.expression = expression result.body = body result.token = token proc newBreakStmt*(token: Token): BreakStmt = result = BreakStmt(kind: breakStmt) result.token = token proc newContinueStmt*(token: Token): ContinueStmt = result = ContinueStmt(kind: continueStmt) result.token = token proc newReturnStmt*(value: Expression, token: Token): ReturnStmt = result = ReturnStmt(kind: returnStmt) result.value = value result.token = token proc newIfStmt*(condition: Expression, thenBranch, elseBranch: Statement, token: Token): IfStmt = result = IfStmt(kind: ifStmt) result.condition = condition result.thenBranch = thenBranch result.elseBranch = elseBranch result.token = token proc newVarDecl*(name: IdentExpr, value: Expression, isConst: bool = false, isPrivate: bool = true, token: Token, isLet: bool = false, valueType: Expression, pragmas: seq[Pragma]): VarDecl = result = VarDecl(kind: varDecl) result.name = name result.value = value result.isConst = isConst result.isPrivate = isPrivate result.token = token result.isLet = isLet result.valueType = valueType result.pragmas = pragmas proc newFunDecl*(name: IdentExpr, arguments: seq[tuple[name: IdentExpr, valueType: Expression]], defaults: seq[Expression], body: Statement, isAsync, isGenerator: bool, isPrivate: bool, token: Token, depth: int, pragmas: seq[Pragma] = @[], returnType: Expression, generics: seq[tuple[name: IdentExpr, cond: Expression]] = @[], freeVars: seq[IdentExpr] = @[]): FunDecl = result = FunDecl(kind: funDecl) result.name = name result.arguments = arguments result.defaults = defaults result.body = body result.isAsync = isAsync result.isGenerator = isGenerator result.isPrivate = isPrivate result.token = token result.pragmas = pragmas result.returnType = returnType result.isPure = false result.generics = generics result.depth = depth proc newTypeDecl*(name: IdentExpr, fields: seq[tuple[name: IdentExpr, valueType: Expression, isPrivate: bool]], defaults: seq[Expression], isPrivate: bool, token: Token, pragmas: seq[Pragma], generics: seq[tuple[name: IdentExpr, cond: Expression]], parent: IdentExpr, isEnum: bool, isRef: bool): TypeDecl = result = TypeDecl(kind: typeDecl) result.name = name result.fields = fields result.defaults = defaults result.isPrivate = isPrivate result.token = token result.pragmas = pragmas result.generics = generics result.parent = parent result.isEnum = isEnum result.isRef = isRef proc `$`*(self: ASTNode): string = if self == nil: return "nil" case self.kind: of intExpr, floatExpr, hexExpr, binExpr, octExpr, strExpr, trueExpr, falseExpr, nanExpr, nilExpr, infExpr: if self.kind in {trueExpr, falseExpr, nanExpr, nilExpr, infExpr}: result &= &"Literal({($self.kind)[0..^5]})" elif self.kind == strExpr: result &= &"Literal({LiteralExpr(self).literal.lexeme[1..^2].escape()})" else: result &= &"Literal({LiteralExpr(self).literal.lexeme})" of identExpr: result &= &"Identifier('{IdentExpr(self).name.lexeme}')" of groupingExpr: result &= &"Grouping({GroupingExpr(self).expression})" of getItemExpr: var self = GetItemExpr(self) result &= &"GetItem(obj={self.obj}, name={self.name})" of setItemExpr: var self = SetItemExpr(self) result &= &"SetItem(obj={self.obj}, name={self.value}, value={self.value})" of callExpr: var self = CallExpr(self) result &= &"""Call({self.callee}, arguments=(positionals=[{self.arguments.positionals.join(", ")}], keyword=[{self.arguments.keyword.join(", ")}]))""" of unaryExpr: var self = UnaryExpr(self) result &= &"Unary(Operator('{self.operator.lexeme}'), {self.a})" of binaryExpr: var self = BinaryExpr(self) result &= &"Binary({self.a}, Operator('{self.operator.lexeme}'), {self.b})" of assignExpr: var self = AssignExpr(self) result &= &"Assign(name={self.name}, value={self.value})" of exprStmt: var self = ExprStmt(self) result &= &"ExpressionStatement({self.expression})" of breakStmt: result = "Break()" of importStmt: var self = ImportStmt(self) result &= &"Import({self.moduleName})" of assertStmt: var self = AssertStmt(self) result &= &"Assert({self.expression})" of raiseStmt: var self = RaiseStmt(self) result &= &"Raise({self.exception})" of blockStmt: var self = BlockStmt(self) result &= &"""Block([{self.code.join(", ")}])""" of whileStmt: var self = WhileStmt(self) result &= &"While(condition={self.condition}, body={self.body})" of forEachStmt: var self = ForEachStmt(self) result &= &"ForEach(identifier={self.identifier}, expression={self.expression}, body={self.body})" of returnStmt: var self = ReturnStmt(self) result &= &"Return({self.value})" of yieldExpr: var self = YieldExpr(self) result &= &"Yield({self.expression})" of awaitExpr: var self = AwaitExpr(self) result &= &"Await({self.expression})" of ifStmt: var self = IfStmt(self) if self.elseBranch == nil: result &= &"If(condition={self.condition}, thenBranch={self.thenBranch}, elseBranch=nil)" else: result &= &"If(condition={self.condition}, thenBranch={self.thenBranch}, elseBranch={self.elseBranch})" of yieldStmt: var self = YieldStmt(self) result &= &"YieldStmt({self.expression})" of awaitStmt: var self = AwaitStmt(self) result &= &"AwaitStmt({self.expression})" of varDecl: var self = VarDecl(self) result &= &"Var(name={self.name}, value={self.value}, const={self.isConst}, private={self.isPrivate}, type={self.valueType}, pragmas={self.pragmas})" of funDecl: var self = FunDecl(self) result &= &"""FunDecl(name={self.name}, body={self.body}, type={self.returnType}, arguments=[{self.arguments.join(", ")}], defaults=[{self.defaults.join(", ")}], generics=[{self.generics.join(", ")}], async={self.isAsync}, generator={self.isGenerator}, private={self.isPrivate}, pragmas={self.pragmas})""" of typeDecl: var self = TypeDecl(self) result &= &"""TypeDecl(name={self.name}, fields={self.fields}, defaults={self.defaults}, private={self.isPrivate}, pragmas={self.pragmas}, generics={self.generics}, parent={self.parent}, ref={self.isRef}, enum={self.isEnum})""" of lambdaExpr: var self = LambdaExpr(self) result &= &"""Lambda(body={self.body}, type={self.returnType}, arguments=[{self.arguments.join(", ")}], defaults=[{self.defaults.join(", ")}], generator={self.isGenerator}, async={self.isAsync}, pragmas={self.pragmas})""" of deferStmt: var self = DeferStmt(self) result &= &"Defer({self.expression})" of sliceExpr: var self = SliceExpr(self) result &= &"""Slice({self.expression}, ends=[{self.ends.join(", ")}])""" of tryStmt: var self = TryStmt(self) result &= &"TryStmt(body={self.body}, handlers={self.handlers}" if self.finallyClause != nil: result &= &", finallyClause={self.finallyClause}" else: result &= ", finallyClause=nil" if self.elseClause != nil: result &= &", elseClause={self.elseClause}" else: result &= ", elseClause=nil" result &= ")" of pragmaExpr: var self = Pragma(self) result &= &"Pragma(name={self.name}, args={self.args})" of varExpr: result &= &"Var({Var(self).value})" of refExpr: result &= &"Ptr({Ref(self).value})" of ptrExpr: result &= &"Ptr({Ptr(self).value})" else: discard proc `==`*(self, other: IdentExpr): bool {.inline.} = self.token == other.token proc getRelativeBoundaries*(self: ASTNode): tuple[start, stop: int] = ## Gets the location of a node relative to its line case self.kind: of exprStmt: result = getRelativeBoundaries(ExprStmt(self).expression) of unaryExpr: var self = UnaryExpr(self) result = (self.operator.relPos.start, getRelativeBoundaries(self.a).stop) of binaryExpr: var self = BinaryExpr(self) result = (getRelativeBoundaries(self.a).start, getRelativeBoundaries(self.b).stop) of intExpr, binExpr, hexExpr, octExpr, strExpr, floatExpr: var self = LiteralExpr(self) result = self.literal.relPos of identExpr: var self = IdentExpr(self) result = self.token.relPos of assignExpr: var self = AssignExpr(self) result = (getRelativeBoundaries(self.name).start, getRelativeBoundaries(self.value).stop) of callExpr: var self = CallExpr(self) result = (getRelativeBoundaries(self.callee).start, self.closeParen.relPos.stop) of pragmaExpr: var self = Pragma(self) let start = self.token.relPos.start var stop = 0 if self.args.len() > 0: stop = self.args[^1].token.relPos.stop + 1 else: stop = self.token.relPos.stop + 1 result = (self.token.relPos.start - 8, stop) else: result = (0, 0)