Initial experimental support for parsing pragmas

This commit is contained in:
Mattia Giambirtone 2022-06-07 11:23:08 +02:00
parent aed0f6e8f2
commit dac0cca1bc
6 changed files with 205 additions and 60 deletions

View File

@ -16,7 +16,8 @@ import meta/ast
import meta/errors
import ../config
import ../util/multibyte
import lexer
import parser
import strformat
import algorithm
@ -180,6 +181,7 @@ proc newCompiler*(enableOptimizations: bool = true, replMode: bool = false): Com
## Forward declarations
proc compile*(self: Compiler, ast: seq[Declaration], file: string): Chunk
proc expression(self: Compiler, node: Expression)
proc statement(self: Compiler, node: Statement)
proc declaration(self: Compiler, node: Declaration)
@ -779,8 +781,12 @@ proc matchImpl(self: Compiler, name: string, kind: Type): Name =
msg &= &", wrong number of arguments ({name.valueType.args.len()} expected, got {kind.args.len()})"
else:
for i, arg in kind.args:
if not self.compareTypes(arg.kind, name.valueType.args[i].kind):
if name.valueType.args[i].kind.kind == Mutable and arg.kind.kind != Mutable:
msg &= &", first mismatch at position {i + 1}: {name.valueType.args[i].name} is immutable, not 'var'"
break
elif not self.compareTypes(arg.kind, name.valueType.args[i].kind):
msg &= &", first mismatch at position {i + 1}: expected argument of type '{self.typeToStr(name.valueType.args[i].kind)}', got '{self.typeToStr(arg.kind)}' instead"
break
self.error(msg)
elif impl.len() > 1:
var msg = &"multiple matching implementations of '{name}' found:\n"
@ -869,7 +875,7 @@ proc binary(self: Compiler, node: BinaryExpr) =
]#
proc declareName(self: Compiler, node: Declaration) =
proc declareName(self: Compiler, node: Declaration, mutable: bool = false) =
## Statically declares a name into the current scope.
## "Declaring" a name only means updating our internal
## list of identifiers so that further calls to resolve()
@ -900,6 +906,8 @@ proc declareName(self: Compiler, node: Declaration) =
isLet: node.isLet,
isClosedOver: false,
line: node.token.line))
if mutable:
self.names[^1].valueType = Type(kind: Mutable, value: self.names[^1].valueType)
# We emit a jump of 0 because this may become a
# StoreHeap instruction. If they variable is
# not closed over, we'll sadly be wasting a
@ -932,6 +940,10 @@ proc declareName(self: Compiler, node: Declaration) =
isClosedOver: false,
line: node.token.line))
let fn = self.names[^1]
if fn.valueType.returnType.isNil() and not node.returnType.isNil() and node.returnType.kind == identExpr:
for g in node.generics:
if g.name == IdentExpr(node.returnType):
fn.valueType.returnType = Type(kind: Generic)
var name: Name
for argument in node.arguments:
if self.names.high() > 16777215:
@ -1185,12 +1197,14 @@ proc expression(self: Compiler, node: Expression) =
of NodeKind.callExpr:
self.callExpr(CallExpr(node)) # TODO
of getItemExpr:
discard # TODO: Get rid of this
of pragmaExpr:
discard # TODO
# Note that for setItem and assign we don't convert
# the node to its true type because that type information
# would be lost in the call anyway. The differentiation
# happens in self.assignment()
of setItemExpr, assignExpr:
of setItemExpr, assignExpr: # TODO: Get rid of this
self.assignment(node)
of identExpr:
self.identifier(IdentExpr(node))
@ -1260,26 +1274,29 @@ proc endFunctionBeforeReturn(self: Compiler) =
proc returnStmt(self: Compiler, node: ReturnStmt) =
## Compiles return statements. An empty return
## implicitly returns nil
let returnType = self.inferType(node.value)
let typ = self.inferType(self.currentFunction)
let actual = self.inferType(node.value)
let expected = self.inferType(self.currentFunction)
var comp: Type = actual
if not expected.isNil() and not expected.returnType.isNil() and expected.returnType.kind in {Reference, Pointer, Mutable}:
comp = expected.returnType.value
## Having the return type
if returnType == nil and typ.returnType != nil:
if node.value != nil:
if actual.isNil() and not expected.returnType.isNil():
if not node.value.isNil():
if node.value.kind == identExpr:
self.error(&"reference to undeclared identifier '{node.value.token.lexeme}'")
elif node.value.kind == callExpr and CallExpr(node.value).callee.kind == identExpr:
self.error(&"call to undeclared function '{CallExpr(node.value).callee.token.lexeme}'")
self.error(&"expected return value of type '{self.typeToStr(typ.returnType)}', but expression has no type")
elif typ.returnType == nil and returnType != nil:
self.error(&"expected return value of type '{self.typeToStr(expected.returnType)}', but expression has no type")
elif expected.returnType.isNil() and not actual.isNil():
self.error("non-empty return statement is not allowed in void functions")
elif not self.compareTypes(returnType, typ.returnType):
self.error(&"expected return value of type '{self.typeToStr(typ.returnType)}', got '{self.typeToStr(returnType)}' instead")
if node.value != nil:
elif not self.compareTypes(actual, comp):
self.error(&"expected return value of type '{self.typeToStr(comp)}', got '{self.typeToStr(actual)}' instead")
if not node.value.isNil():
self.expression(node.value)
self.emitByte(OpCode.SetResult)
self.endFunctionBeforeReturn()
self.emitByte(OpCode.Return)
if node.value != nil:
if not node.value.isNil():
self.emitByte(1)
else:
self.emitByte(0)
@ -1292,7 +1309,7 @@ proc yieldStmt(self: Compiler, node: YieldStmt) =
proc raiseStmt(self: Compiler, node: RaiseStmt) =
## Compiles yield statements
## Compiles raise statements
self.expression(node.exception)
self.emitByte(OpCode.Raise)
@ -1336,6 +1353,20 @@ proc assertStmt(self: Compiler, node: AssertStmt) =
self.emitByte(OpCode.Assert)
proc forEachStmt(self: Compiler, node: ForEachStmt) =
## Compiles foreach loops
# TODO
proc importStmt(self: Compiler, node: ImportStmt) =
## Imports a module at compile time
if self.scopeDepth > 0:
self.error("import statements are only allowed at the top level")
var compiler = newCompiler()
# TODO: Find module
var result = compiler.compile(newParser().parse(newLexer().lex("", node.moduleName.name.lexeme), node.moduleName.name.lexeme), node.moduleName.name.lexeme)
proc statement(self: Compiler, node: Statement) =
## Compiles all statements
case node.kind:
@ -1365,7 +1396,7 @@ proc statement(self: Compiler, node: Statement) =
of NodeKind.returnStmt:
self.returnStmt(ReturnStmt(node))
of NodeKind.importStmt:
discard
self.importStmt(ImportStmt(node))
of NodeKind.whileStmt, NodeKind.forStmt:
## Our parser already desugars for loops to
## while loops!
@ -1376,7 +1407,7 @@ proc statement(self: Compiler, node: Statement) =
self.patchBreaks()
self.currentLoop = loop
of NodeKind.forEachStmt:
discard
self.forEachStmt(ForEachStmt(node))
of NodeKind.blockStmt:
self.blockStmt(BlockStmt(node))
of NodeKind.yieldStmt:
@ -1405,11 +1436,16 @@ proc varDecl(self: Compiler, node: VarDecl) =
if expected != nil:
self.error(&"expected value of type '{self.typeToStr(expected)}', but '{node.name.token.lexeme}' is of type '{self.typeToStr(actual)}'")
self.expression(node.value)
self.declareName(node)
self.declareName(node, mutable=node.token.kind == Var)
self.emitByte(StoreVar)
self.emitBytes(self.names.high().toTriple())
proc typeDecl(self: Compiler, node: TypeDecl) =
## Compiles type declarations
proc funDecl(self: Compiler, node: FunDecl) =
## Compiles function declarations
# A function's code is just compiled linearly
@ -1422,8 +1458,16 @@ proc funDecl(self: Compiler, node: FunDecl) =
let jmp = self.emitJump(JumpForwards)
for argument in node.arguments:
self.emitByte(LoadArgument)
if node.returnType != nil and self.inferType(node.returnType) == nil:
self.error(&"cannot infer the type of '{node.returnType.token.lexeme}'")
if not node.returnType.isNil() and self.inferType(node.returnType).isNil():
var isGeneric = false
if node.returnType.kind == identExpr:
let name = IdentExpr(node.returnType)
for g in node.generics:
if name == g.name:
isGeneric = true
break
if not isGeneric:
self.error(&"cannot infer the type of '{node.returnType.token.lexeme}'")
# TODO: Forward declarations
if node.body != nil:
if BlockStmt(node.body).code.len() == 0:
@ -1512,7 +1556,6 @@ proc patchReturnAddress(self: Compiler, pos: int) =
self.chunk.code[pos + 3] = address[3]
proc declaration(self: Compiler, node: Declaration) =
## Compiles all declarations
case node.kind:
@ -1520,6 +1563,8 @@ proc declaration(self: Compiler, node: Declaration) =
self.varDecl(VarDecl(node))
of NodeKind.funDecl:
self.funDecl(FunDecl(node))
of NodeKind.typeDecl:
self.typeDecl(TypeDecl(node))
else:
self.statement(Statement(node))

View File

@ -594,10 +594,13 @@ proc next(self: Lexer) =
# Keywords and identifiers
self.parseIdentifier()
elif self.match("#"):
# Inline comments, pragmas, etc.
while not (self.check("\n") or self.done()):
discard self.step()
self.createToken(Comment)
if not self.match("pragma["):
# Inline comments
while not (self.check("\n") or self.done()):
discard self.step()
self.createToken(Comment)
else:
self.createToken(Pragma)
else:
# If none of the above conditions matched, there's a few
# other options left:
@ -607,7 +610,7 @@ proc next(self: Lexer) =
# We handle all of these cases here by trying to
# match the longest sequence of characters possible
# as either an operator or a statement/expression
# delimiter, erroring out if there's no match
# delimiter
var n = self.symbols.getMaxSymbolSize()
while n > 0:
for symbol in self.symbols.getSymbols(n):

View File

@ -265,8 +265,9 @@ type
TypeDecl* = ref object of Declaration
name*: IdentExpr
fields*: seq[tuple[name: IdentExpr, valueType: Expression,
mutable: bool, isRef: bool, isPtr: bool]]
mutable: bool, isRef: bool, isPtr: bool, isPrivate: bool]]
defaults*: seq[Expression]
isRef*: bool
Pragma* = ref object of Expression
name*: IdentExpr
@ -307,6 +308,7 @@ proc newPragma*(name: IdentExpr, args: seq[LiteralExpr]): Pragma =
result.kind = pragmaExpr
result.args = args
result.name = name
result.token = name.token
proc newIntExpr*(literal: Token): IntExpr =
@ -596,9 +598,9 @@ proc newFunDecl*(name: IdentExpr, arguments: seq[tuple[name: IdentExpr, valueTyp
result.generics = generics
proc newTypeDecl*(name: IdentExpr, fields: seq[tuple[name: IdentExpr, valueType: Expression, mutable: bool, isRef: bool, isPtr: bool]],
proc newTypeDecl*(name: IdentExpr, fields: seq[tuple[name: IdentExpr, valueType: Expression, mutable: bool, isRef: bool, isPtr: bool, isPrivate: bool]],
defaults: seq[Expression], isPrivate: bool, token: Token, pragmas: seq[Pragma],
generics: seq[tuple[name: IdentExpr, cond: Expression]]): TypeDecl =
generics: seq[tuple[name: IdentExpr, cond: Expression]], isRef: bool): TypeDecl =
result = TypeDecl(kind: typeDecl)
result.name = name
result.fields = fields
@ -607,6 +609,7 @@ proc newTypeDecl*(name: IdentExpr, fields: seq[tuple[name: IdentExpr, valueType:
result.token = token
result.pragmas = pragmas
result.generics = generics
result.isRef = isRef
@ -690,10 +693,13 @@ proc `$`*(self: ASTNode): string =
result &= &"AwaitStmt({self.expression})"
of varDecl:
var self = VarDecl(self)
result &= &"Var(name={self.name}, value={self.value}, const={self.isConst}, private={self.isPrivate}, type={self.valueType})"
result &= &"Var(name={self.name}, value={self.value}, const={self.isConst}, private={self.isPrivate}, type={self.valueType}, pragmas={self.pragmas})"
of funDecl:
var self = FunDecl(self)
result &= &"""FunDecl(name={self.name}, body={self.body}, type={self.returnType}, arguments=[{self.arguments.join(", ")}], defaults=[{self.defaults.join(", ")}], generics=[{self.generics.join(", ")}], async={self.isAsync}, generator={self.isGenerator}, private={self.isPrivate})"""
of typeDecl:
var self = TypeDecl(self)
result &= &"""TypeDecl(name={self.name}, fields={self.fields}, defaults={self.defaults}, private={self.isPrivate}, pragmas={self.pragmas}, generics={self.generics}, ref={self.isRef})"""
of lambdaExpr:
var self = LambdaExpr(self)
result &= &"""Lambda(body={self.body}, type={self.returnType}, arguments=[{self.arguments.join(", ")}], defaults=[{self.defaults.join(", ")}], generator={self.isGenerator}, async={self.isAsync})"""
@ -715,6 +721,9 @@ proc `$`*(self: ASTNode): string =
else:
result &= ", elseClause=nil"
result &= ")"
of pragmaExpr:
var self = Pragma(self)
result &= &"Pragma(name={self.name}, args={self.args})"
else:
discard

View File

@ -39,7 +39,7 @@ type
Raise, Assert, Await, Foreach,
Yield, Defer, Try, Except,
Finally, Type, Operator, Case,
Enum, From, Ptr, Ref
Enum, From, Ptr, Ref, Object
# Literal types
Integer, Float, String, Identifier,
@ -59,6 +59,7 @@ type
NoMatch, # Used internally by the symbol table
Comment, # Useful for documentation comments, pragmas, etc.
Symbol, # A generic symbol
Pragma,
# These are not used at the moment but may be
# employed to enforce indentation or other neat
# stuff I haven't thought about yet
@ -66,6 +67,7 @@ type
Tab,
Token* = ref object
## A token object
kind*: TokenType # Type of the token

View File

@ -377,13 +377,12 @@ proc makeCall(self: Parser, callee: Expression): Expression =
self.error("call can not have more than 255 arguments")
break
argument = self.expression()
if argument.kind == assignExpr:
if argument.kind == binaryExpr and BinaryExpr(argument).operator.lexeme == "=":
# TODO: This will explode with slices!
if IdentExpr(AssignExpr(argument).name) in argNames:
if IdentExpr(BinaryExpr(argument).a) in argNames:
self.error("duplicate keyword argument in call")
argNames.add(IdentExpr(AssignExpr(argument).name))
arguments.keyword.add((name: IdentExpr(AssignExpr(
argument).name), value: AssignExpr(argument).value))
argNames.add(IdentExpr(BinaryExpr(argument).a))
arguments.keyword.add((name: IdentExpr(BinaryExpr(argument).a), value: BinaryExpr(argument).b))
elif arguments.keyword.len() == 0:
arguments.positionals.add(argument)
else:
@ -1077,27 +1076,110 @@ proc statement(self: Parser): Statement =
result = self.expressionStatement()
proc parsePragma(self: Parser): Pragma =
proc parsePragma(self: Parser): tuple[global: bool, pragmas: seq[Pragma]] =
## Parses pragmas
if self.scopeDepth == 0:
## Pragmas used at the
## top level are either
## used for compile-time
## switches or for global variable
## declarations
var decl: VarDecl
for node in self.tree:
if node.token.line == self.peek(-1).line and node.kind == varDecl:
decl = VarDecl(node)
result.global = true
var
decl: Declaration = nil
found = false
for node in self.tree:
if node.token.line == self.peek(-1).line and node.kind in {NodeKind.varDecl, typeDecl, funDecl, lambdaExpr}:
decl = node
found = true
break
if not found:
# Dummy declaration
result.global = false
decl = Declaration(pragmas: @[])
var
name: IdentExpr
args: seq[LiteralExpr]
exp: Expression
while not self.match("]") and not self.done():
args = @[]
self.expect(Identifier, "expecting pragma name")
name = newIdentExpr(self.peek(-1))
if not self.match(":"):
if self.match("]"):
decl.pragmas.add(newPragma(name, @[]))
break
else:
var decl = self.currentFunction
# TODO
elif self.match("("):
while not self.match(")") and not self.done():
exp = self.primary()
if not exp.isLiteral():
self.error("invalid syntax")
args.add(LiteralExpr(exp))
if not self.match(","):
break
self.expect(")", "unterminated parenthesis in pragma arguments")
else:
exp = self.primary()
if not exp.isLiteral():
self.error("invalid syntax")
args.add(LiteralExpr(exp))
if self.match(","):
continue
decl.pragmas.add(newPragma(name, args))
result.pragmas = decl.pragmas
proc typeDecl(self: Parser): TypeDecl =
## Parses type declarations
let token = self.peek(-1)
self.expect(Identifier, "expecting type name after 'type'")
let isPrivate = not self.match("*")
self.checkDecl(isPrivate)
var name = newIdentExpr(self.peek(-1))
var isRef = false
var fields: seq[tuple[name: IdentExpr, valueType: Expression,
mutable: bool, isRef: bool, isPtr: bool, isPrivate: bool]] = @[]
var defaults: seq[Expression] = @[]
var generics: seq[tuple[name: IdentExpr, cond: Expression]] = @[]
var pragmas: seq[Pragma] = @[]
result = newTypeDecl(name, fields, defaults, isPrivate, token, pragmas, generics, isRef)
if self.match(LeftBracket):
self.parseGenerics(result)
self.expect("=", "expecting '=' after type name")
case self.step().kind:
of Ref:
isRef = true
echo self.peek()
self.expect(Object, "invalid syntax")
of Object:
discard
else:
self.error("invalid syntax")
self.expect(LeftBrace, "expecting '{' after type declaration")
var
argName: IdentExpr
argMutable: bool
argRef: bool
argPtr: bool
argPrivate: bool
argType: Expression
while not self.match(RightBrace) and not self.done():
argRef = false
argPtr = false
argMutable = false
self.expect(Identifier, "expecting field name")
argName = newIdentExpr(self.peek(-1))
argPrivate = not self.match("*")
self.expect(":", "expecting ':' after field name")
case self.step().kind:
of Ref:
argRef = true
of Ptr:
argPtr = true
of Var:
argMutable = true
else:
self.current -= 1
argType = self.expression()
result.fields.add((argName, argType, argMutable, argRef, argPtr, argPrivate))
if self.match("="):
result.defaults.add(self.expression())
self.expect(";", "expecting semicolon after field declaration")
proc declaration(self: Parser): Declaration =
## Parses declarations
@ -1118,10 +1200,13 @@ proc declaration(self: Parser): Declaration =
of Operator:
discard self.step()
result = self.funDecl(isOperator=true)
of TokenType.Comment:
let tok = self.step()
if tok.lexeme.startsWith("#pragma["):
result = self.parsePragma()
of TokenType.Pragma:
discard self.step()
let temp = self.parsePragma()
if not temp.global:
for p in temp.pragmas:
self.tree.add(p)
result = nil
of Type:
discard self.step()
result = self.typeDecl()

View File

@ -28,11 +28,11 @@ proc fillSymbolTable(tokenizer: Lexer)
proc getLineEditor: LineEditor
# Handy dandy compile-time constants
const debugLexer = false
const debugParser = false
const debugCompiler = true
const debugSerializer = false
const debugRuntime = false
const debugLexer {.booldefine.} = false
const debugParser {.booldefine.} = false
const debugCompiler {.booldefine.} = false
const debugSerializer {.booldefine.} = false
const debugRuntime {.booldefine.} = false
proc repl(vm: PeonVM = newPeonVM()) =
@ -402,6 +402,7 @@ proc fillSymbolTable(tokenizer: Lexer) =
tokenizer.symbols.addKeyword("import", Import)
tokenizer.symbols.addKeyword("yield", TokenType.Yield)
tokenizer.symbols.addKeyword("return", TokenType.Return)
tokenizer.symbols.addKeyword("object", Object)
# These are more like expressions with a reserved
# name that produce a value of a builtin type,
# but we don't need to care about that until