Further work on compiler modularization: converted expression handlers to methods

This commit is contained in:
Mattia Giambirtone 2022-12-15 13:22:34 +01:00
parent dc393bbb34
commit 76ee8c7454
2 changed files with 1132 additions and 1128 deletions

View File

@ -90,10 +90,65 @@ type
types*: seq[tuple[match: bool, kind: Type]]
else:
discard
type
NameKind* {.pure.} = enum
## A name enumeration type
None, Module, Argument, Var, Function, CustomType, Enum
Name* = ref object of RootObj
## A generic name object
# Type of the identifier (NOT of the value!)
case kind*: NameKind
of NameKind.Module:
path*: string
else:
discard
# The name's identifier
ident*: IdentExpr
# Owner of the identifier (module)
owner*: Name
# File where the name is declared
file*: string
# Scope depth
depth*: int
# Is this name private?
isPrivate*: bool
# Is this a constant?
isConst*: bool
# Can this name's value be mutated?
isLet*: bool
# Is this name a generic type?
isGeneric*: bool
# The type of the name's associated
# value
valueType*: Type
# The function that owns this name (may be nil!)
belongsTo*: Name
# Where is this node declared in its file?
line*: int
# Has this name been referenced at least once?
resolved*: bool
# The AST node associated with this node. This
# is needed because we compile function and type
# declarations only if, and when, they're actually
# used
node*: Declaration
# Who is this name exported to? (Only makes sense if isPrivate
# equals false)
exportedTo*: HashSet[Name]
# Has the compiler generates this name internally or
# does it come from user code?
isReal*: bool
# Is this name a builtin?
isBuiltin*: bool
WarningKind* {.pure.} = enum
## A warning enumeration type
UnreachableCode, UnusedName, ShadowOuterScope,
MutateOuterScope
CompileMode* {.pure.} = enum
## A compilation mode enumeration
Debug, Release
@ -102,7 +157,7 @@ type
node*: ASTNode
function*: Declaration
Compiler* = ref object {.inheritable.}
Compiler* = ref object of RootObj
## A wrapper around the Peon compiler's state
# The output of our parser (AST)
@ -120,6 +175,8 @@ type
# swapped for a special instruction that prints
# the result of the expression once it is evaluated
replMode*: bool
# List of all compile-time names
names*: seq[Name]
# Stores line data for error reporting
lines*: seq[tuple[start, stop: int]]
# The source of the current module,
@ -136,4 +193,618 @@ type
# mismatches when we dispatch with match()
showMismatches*: bool
# Are we compiling in debug mode?
mode*: CompileMode
mode*: CompileMode
# The current function being compiled
currentFunction*: Name
# The current module being compiled
currentModule*: Name
# The module importing us, if any
parentModule*: Name
## Public getters for nicer error formatting
proc getCurrentNode*(self: Compiler): ASTNode = (if self.current >= self.ast.len(): self.ast[^1] else: self.ast[self.current - 1])
proc getCurrentFunction*(self: Compiler): Declaration {.inline.} = (if self.currentFunction.isNil(): nil else: self.currentFunction.valueType.fun)
## Some "forward declarations" here (they're actually stubs because nim forces forward declarations to be
## implemented in the same module). They are methods because we need to dispatch to their actual specific
## implementations inside each target module, so we need the runtime type of the compiler object to be
## taken into account
method expression*(self: Compiler, node: Expression, compile: bool = true): Type {.discardable, base.} = nil
method identifier*(self: Compiler, node: IdentExpr, name: Name = nil, compile: bool = true, strict: bool = true): Type {.discardable, base.} = nil
method call*(self: Compiler, node: CallExpr, compile: bool = true): Type {.discardable, base.} = nil
method getItemExpr*(self: Compiler, node: GetItemExpr, compile: bool = true, matching: Type = nil): Type {.discardable, base.} = nil
method unary*(self: Compiler, node: UnaryExpr, compile: bool = true): Type {.discardable, base.} = nil
method binary*(self: Compiler, node: BinaryExpr, compile: bool = true): Type {.discardable, base.} = nil
method lambdaExpr*(self: Compiler, node: LambdaExpr, compile: bool = true): Type {.discardable, base.} = nil
method literal*(self: Compiler, node: ASTNode, compile: bool = true): Type {.discardable, base.} = nil
method infer*(self: Compiler, node: LiteralExpr): Type
method infer*(self: Compiler, node: Expression): Type
method inferOrError*(self: Compiler, node: Expression): Type
method findByName*(self: Compiler, name: string): seq[Name]
method findInModule*(self: Compiler, name: string, module: Name): seq[Name]
method findByType*(self: Compiler, name: string, kind: Type): seq[Name]
method compare*(self: Compiler, a, b: Type): bool
method match*(self: Compiler, name: string, kind: Type, node: ASTNode = nil, allowFwd: bool = true): Name
## End of forward declarations
## Utility functions
proc `$`*(self: Name): string = $(self[])
proc `$`(self: Type): string = $(self[])
proc hash(self: Name): Hash = self.ident.token.lexeme.hash()
proc peek*(self: Compiler, distance: int = 0): ASTNode =
## Peeks at the AST node at the given distance.
## If the distance is out of bounds, the last
## AST node in the tree is returned. A negative
## distance may be used to retrieve previously
## consumed AST nodes
if self.ast.high() == -1 or self.current + distance > self.ast.high() or self.current + distance < 0:
result = self.ast[^1]
else:
result = self.ast[self.current + distance]
proc done*(self: Compiler): bool {.inline.} =
## Returns true if the compiler is done
## compiling, false otherwise
result = self.current > self.ast.high()
proc error*(self: Compiler, message: string, node: ASTNode = nil) {.inline.} =
## Raises a CompileError exception
let node = if node.isNil(): self.getCurrentNode() else: node
raise CompileError(msg: message, node: node, line: node.token.line, file: node.file)
proc warning*(self: Compiler, kind: WarningKind, message: string, name: Name = nil, node: ASTNode = nil) =
## Raises a warning. Note that warnings are always disabled in REPL mode
if self.replMode or kind in self.disabledWarnings:
return
var node: ASTNode = node
var fn: Declaration
if name.isNil():
if node.isNil():
node = self.getCurrentNode()
fn = self.getCurrentFunction()
else:
node = name.node
if node.isNil():
node = self.getCurrentNode()
if not name.belongsTo.isNil():
fn = name.belongsTo.node
else:
fn = self.getCurrentFunction()
var file = self.file
if not name.isNil():
file = name.owner.file
var pos = node.getRelativeBoundaries()
if file notin ["<string>", ""]:
file = relativePath(file, getCurrentDir())
stderr.styledWrite(fgYellow, styleBright, "Warning in ", fgRed, &"{file}:{node.token.line}:{pos.start}")
if not fn.isNil() and fn.kind == funDecl:
stderr.styledWrite(fgYellow, styleBright, " in function ", fgRed, FunDecl(fn).name.token.lexeme)
stderr.styledWriteLine(styleBright, fgDefault, ": ", message)
try:
# We try to be as specific as possible with the warning message, pointing to the
# line it belongs to, but since warnings are not always raised from the source
# file they're generated in, we take into account the fact that retrieving the
# exact warning location may fail and bail out silently if it does
let line = readFile(file).splitLines()[node.token.line - 1].strip(chars={'\n'})
stderr.styledWrite(fgYellow, styleBright, "Source line: ", resetStyle, fgDefault, line[0..<pos.start])
stderr.styledWrite(fgYellow, styleUnderscore, line[pos.start..pos.stop])
stderr.styledWriteLine(fgDefault, line[pos.stop + 1..^1])
except IOError:
discard
except OSError:
discard
except IndexDefect:
# Something probably went wrong (wrong line metadata): bad idea to crash!
discard
proc step*(self: Compiler): ASTNode {.inline.} =
## Steps to the next node and returns
## the consumed one
result = self.peek()
if not self.done():
self.current += 1
# Peon's type inference system is very flexible and can be
# reused across multiple compilation backends
proc resolve*(self: Compiler, name: string): Name =
## Traverses all existing namespaces and returns
## the first object with the given name. Returns
## nil when the name can't be found. Note that
## when a type or function declaration is first
## resolved, it is also compiled on-the-fly
for obj in reversed(self.names):
if obj.ident.token.lexeme == name:
if obj.owner.path != self.currentModule.path:
# We don't own this name, but we
# may still have access to it
if obj.isPrivate:
# Name is private in its owner
# module, so we definitely can't
# use it
continue
elif self.currentModule in obj.exportedTo:
# The name is public in its owner
# module and said module has explicitly
# exported it to us: we can use it
result = obj
break
# If the name is public but not exported in
# its owner module, then we act as if it's
# private. This is to avoid namespace pollution
# from imports (i.e. if module A imports modules
# C and D and module B imports module A, then B
# might not want to also have access to C's and D's
# names as they might clash with its own stuff)
continue
result = obj
result.resolved = true
break
proc resolve*(self: Compiler, name: IdentExpr): Name =
## Version of resolve that takes Identifier
## AST nodes instead of strings
return self.resolve(name.token.lexeme)
proc resolveOrError*[T: IdentExpr | string](self: Compiler, name: T): Name =
## Calls self.resolve() and errors out with an appropriate
## message if it returns nil
result = self.resolve(name)
if result.isNil():
when T is IdentExpr:
self.error(&"reference to undefined name '{name.token.lexeme}'", name)
when T is string:
self.error(&"reference to undefined name '{name}'")
proc compareUnions*(self: Compiler, a, b: seq[tuple[match: bool, kind: Type]]): bool =
## Compares type unions between each other
var
long = a
short = b
if b.len() > a.len():
long = b
short = a
var i = 0
for cond1 in short:
for cond2 in long:
if not self.compare(cond1.kind, cond2.kind) or cond1.match != cond2.match:
continue
inc(i)
return i >= short.len()
method compare*(self: Compiler, a, b: Type): bool =
## Compares two type objects
## for equality
result = false
# Note: 'All' is a type internal to the peon
# compiler that cannot be generated from user
# code in any way. It's used mostly for matching
# function return types (at least until we don't
# have return type inference) and it matches any
# type, including nil
if a.isNil():
return b.isNil() or b.kind == All
elif b.isNil():
return a.isNil() or a.kind == All
elif a.kind == All or b.kind == All:
return true
elif a.kind == b.kind:
# Here we compare types with the same kind discriminant
case a.kind:
of Int8, UInt8, Int16, UInt16, Int32,
UInt32, Int64, UInt64, Float32, Float64,
Char, Byte, String, Nil, TypeKind.Nan, Bool, TypeKind.Inf, Any:
return true
of Union:
return self.compareUnions(a.types, b.types)
of Generic:
return self.compareUnions(a.cond, b.cond)
of Reference, Pointer:
# Here we already know that both
# a and b are of either of the two
# types in this branch, so we just need
# to compare their values
return self.compare(a.value, b.value)
of Function:
# Functions are a bit trickier to compare
if a.args.len() != b.args.len():
return false
if a.isCoroutine != b.isCoroutine or a.isGenerator != b.isGenerator:
return false
if not self.compare(b.returnType, a.returnType):
return false
var i = 0
for (argA, argB) in zip(a.args, b.args):
# When we compare functions with forward
# declarations, or forward declarations
# between each other, we need to be more
# strict (as in: check argument names and
# their default values, any pragma associated
# with the function, and whether they are pure)
if a.forwarded:
if b.forwarded:
if argA.name != argB.name:
return false
else:
if argB.name == "":
# An empty argument name means
# we crafted this type object
# manually, so we don't need
# to match the argument name
continue
if argA.name != argB.name:
return false
elif b.forwarded:
if a.forwarded:
if argA.name != argB.name:
return false
else:
if argA.name == "":
continue
if argA.name != argB.name:
return false
if not self.compare(argA.kind, argB.kind):
return false
return true
else:
discard # TODO: Custom types, enums
elif a.kind == Union:
for constraint in a.types:
if self.compare(constraint.kind, b) and constraint.match:
return true
return false
elif b.kind == Union:
for constraint in b.types:
if self.compare(constraint.kind, a) and constraint.match:
return true
return false
elif a.kind == Generic:
if a.asUnion:
for constraint in a.cond:
if self.compare(constraint.kind, b) and constraint.match:
return true
return false
else:
for constraint in a.cond:
if not self.compare(constraint.kind, b) or not constraint.match:
return false
return true
elif b.kind == Generic:
if b.asUnion:
for constraint in b.cond:
if self.compare(constraint.kind, a) and constraint.match:
return true
return false
else:
for constraint in b.cond:
if not self.compare(constraint.kind, a) or not constraint.match:
return false
return true
elif a.kind == Any or b.kind == Any:
# Here we already know that neither of
# these types are nil, so we can always
# just return true
return true
return false
proc toIntrinsic*(name: string): Type =
## Converts a string to an intrinsic
## type if it is valid and returns nil
## otherwise
if name == "any":
return Type(kind: Any)
elif name == "auto":
return Type(kind: Auto)
elif name in ["int", "int64", "i64"]:
return Type(kind: Int64)
elif name in ["uint64", "u64", "uint"]:
return Type(kind: UInt64)
elif name in ["int32", "i32"]:
return Type(kind: Int32)
elif name in ["uint32", "u32"]:
return Type(kind: UInt32)
elif name in ["int16", "i16", "short"]:
return Type(kind: Int16)
elif name in ["uint16", "u16"]:
return Type(kind: UInt16)
elif name in ["int8", "i8"]:
return Type(kind: Int8)
elif name in ["uint8", "u8"]:
return Type(kind: UInt8)
elif name in ["f64", "float", "float64"]:
return Type(kind: Float64)
elif name in ["f32", "float32"]:
return Type(kind: Float32)
elif name in ["byte", "b"]:
return Type(kind: Byte)
elif name in ["char", "c"]:
return Type(kind: Char)
elif name == "nan":
return Type(kind: TypeKind.Nan)
elif name == "nil":
return Type(kind: Nil)
elif name == "inf":
return Type(kind: TypeKind.Inf)
elif name == "bool":
return Type(kind: Bool)
elif name == "typevar":
return Type(kind: Typevar)
elif name == "string":
return Type(kind: String)
method infer*(self: Compiler, node: LiteralExpr): Type =
## Infers the type of a given literal expression
if node.isNil():
return nil
case node.kind:
of intExpr, binExpr, octExpr, hexExpr:
let size = node.token.lexeme.split("'")
if size.len() == 1:
return Type(kind: Int64)
let typ = size[1].toIntrinsic()
if not self.compare(typ, nil):
return typ
else:
self.error(&"invalid type specifier '{size[1]}' for int", node)
of floatExpr:
let size = node.token.lexeme.split("'")
if size.len() == 1:
return Type(kind: Float64)
let typ = size[1].toIntrinsic()
if not typ.isNil():
return typ
else:
self.error(&"invalid type specifier '{size[1]}' for float", node)
of trueExpr:
return Type(kind: Bool)
of falseExpr:
return Type(kind: Bool)
of strExpr:
return Type(kind: String)
else:
discard # Unreachable
method infer*(self: Compiler, node: Expression): Type =
## Infers the type of a given expression and
## returns it
if node.isNil():
return nil
case node.kind:
of NodeKind.identExpr:
result = self.identifier(IdentExpr(node), compile=false, strict=false)
of NodeKind.unaryExpr:
result = self.unary(UnaryExpr(node), compile=false)
of NodeKind.binaryExpr:
result = self.binary(BinaryExpr(node), compile=false)
of {NodeKind.intExpr, NodeKind.hexExpr, NodeKind.binExpr, NodeKind.octExpr,
NodeKind.strExpr, NodeKind.falseExpr, NodeKind.trueExpr, NodeKind.floatExpr
}:
result = self.infer(LiteralExpr(node))
of NodeKind.callExpr:
result = self.call(CallExpr(node), compile=false)
of NodeKind.refExpr:
result = Type(kind: Reference, value: self.infer(Ref(node).value))
of NodeKind.ptrExpr:
result = Type(kind: Pointer, value: self.infer(Ptr(node).value))
of NodeKind.groupingExpr:
result = self.infer(GroupingExpr(node).expression)
of NodeKind.getItemExpr:
result = self.getItemExpr(GetItemExpr(node), compile=false)
of NodeKind.lambdaExpr:
result = self.lambdaExpr(LambdaExpr(node), compile=false)
else:
discard # TODO
method inferOrError*(self: Compiler, node: Expression): Type =
## Attempts to infer the type of
## the given expression and raises an
## error with if it fails
result = self.infer(node)
if result.isNil():
self.error("expression has no type", node)
method stringify*(self: Compiler, typ: Type): string =
## Returns the string representation of a
## type object
if typ.isNil():
return "nil"
case typ.kind:
of Int8, UInt8, Int16, UInt16, Int32,
UInt32, Int64, UInt64, Float32, Float64,
Char, Byte, String, Nil, TypeKind.Nan, Bool,
TypeKind.Inf, Auto:
result &= ($typ.kind).toLowerAscii()
of Pointer:
result &= &"ptr {self.stringify(typ.value)}"
of Reference:
result &= &"ref {self.stringify(typ.value)}"
of Function:
result &= "fn ("
for i, (argName, argType, argDefault) in typ.args:
result &= &"{argName}: {self.stringify(argType)}"
if not argDefault.isNil():
result &= &" = {argDefault}"
if i < typ.args.len() - 1:
result &= ", "
result &= ")"
if not typ.returnType.isNil():
result &= &": {self.stringify(typ.returnType)}"
if typ.fun.pragmas.len() > 0:
result &= " {"
for i, pragma in typ.fun.pragmas:
result &= &"{pragma.name.token.lexeme}"
if pragma.args.len() > 0:
result &= ": "
for j, arg in pragma.args:
result &= arg.token.lexeme
if j < pragma.args.high():
result &= ", "
if i < typ.fun.pragmas.high():
result &= ", "
else:
result &= "}"
of Any:
return "any"
of Union:
for i, condition in typ.types:
if i > 0:
result &= " | "
if not condition.match:
result &= "~"
result &= self.stringify(condition.kind)
of Generic:
for i, condition in typ.cond:
if i > 0:
result &= " | "
if not condition.match:
result &= "~"
result &= self.stringify(condition.kind)
else:
discard
method findByName*(self: Compiler, name: string): seq[Name] =
## Looks for objects that have been already declared
## with the given name. Returns all objects that apply.
for obj in reversed(self.names):
if obj.ident.token.lexeme == name:
if obj.owner.path != self.currentModule.path:
if obj.isPrivate or self.currentModule notin obj.exportedTo:
continue
result.add(obj)
method findInModule*(self: Compiler, name: string, module: Name): seq[Name] =
## Looks for objects that have been already declared as
## public within the given module with the given name.
## Returns all objects that apply. If the name is an
## empty string, returns all objects within the given
## module, regardless of whether they are exported to
## the current one or not
if name == "":
for obj in reversed(self.names):
if not obj.isPrivate and obj.owner == module:
result.add(obj)
else:
for obj in self.findInModule("", module):
if obj.ident.token.lexeme == name and self.currentModule in obj.exportedTo:
result.add(obj)
method findByType*(self: Compiler, name: string, kind: Type): seq[Name] =
## Looks for objects that have already been declared
## with the given name and type. Returns all objects
## that apply
for obj in self.findByName(name):
if self.compare(obj.valueType, kind):
result.add(obj)
method findAtDepth*(self: Compiler, name: string, depth: int): seq[Name] {.used.} =
## Looks for objects that have been already declared
## with the given name at the given scope depth.
## Returns all objects that apply
for obj in self.findByName(name):
if obj.depth == depth:
result.add(obj)
proc check*(self: Compiler, term: Expression, kind: Type) {.inline.} =
## Checks the type of term against a known type.
## Raises an error if appropriate and returns
## otherwise
let k = self.inferOrError(term)
if not self.compare(k, kind):
self.error(&"expecting value of type {self.stringify(kind)}, got {self.stringify(k)}", term)
elif k.kind == Any and kind.kind != Any:
self.error(&"any is not a valid type in this context")
proc isAny*(typ: Type): bool =
## Returns true if the given type is
## of (or contains) the any type
case typ.kind:
of Any:
return true
of Generic:
for condition in typ.cond:
if condition.kind.isAny():
return true
of Union:
for condition in typ.types:
if condition.kind.isAny():
return true
else:
discard
return false
method match*(self: Compiler, name: string, kind: Type, node: ASTNode = nil, allowFwd: bool = true): Name =
## Tries to find a matching function implementation
## compatible with the given type and returns its
## name object
var impl: seq[Name] = @[]
for obj in self.findByName(name):
if self.compare(kind, obj.valueType):
impl.add(obj)
if impl.len() == 0:
let names = self.findByName(name)
var msg = &"failed to find a suitable implementation for '{name}'"
if names.len() > 0:
msg &= &", found {len(names)} potential candidate"
if names.len() > 1:
msg &= "s"
if self.showMismatches:
msg &= ":"
for name in names:
msg &= &"\n - in {relativePath(name.file, getCurrentDir())}:{name.ident.token.line}:{name.ident.token.relPos.start} -> {self.stringify(name.valueType)}"
if name.valueType.kind != Function:
msg &= ": not a callable"
elif kind.args.len() != name.valueType.args.len():
msg &= &": wrong number of arguments (expected {name.valueType.args.len()}, got {kind.args.len()})"
else:
for i, arg in kind.args:
if not self.compare(arg.kind, name.valueType.args[i].kind):
msg &= &": first mismatch at position {i + 1}: (expected {self.stringify(name.valueType.args[i].kind)}, got {self.stringify(arg.kind)})"
break
else:
msg &= " (compile with --showMismatches for more details)"
else:
msg = &"call to undefined function '{name}'"
self.error(msg, node)
elif impl.len() > 1:
impl = filterIt(impl, not it.valueType.forwarded and not it.valueType.isAuto)
if impl.len() > 1:
# If it's *still* more than one match, then it's an error
var msg = &"multiple matching implementations of '{name}' found"
if self.showMismatches:
msg &= ":"
for fn in reversed(impl):
msg &= &"\n- in {relativePath(fn.file, getCurrentDir())}, line {fn.line} of type {self.stringify(fn.valueType)}"
else:
msg &= " (compile with --showMismatches for more details)"
self.error(msg, node)
if impl[0].valueType.forwarded and not allowFwd:
self.error(&"expecting an implementation for function '{impl[0].ident.token.lexeme}' declared in module '{impl[0].owner.ident.token.lexeme}' at line {impl[0].ident.token.line} of type '{self.stringify(impl[0].valueType)}'")
result = impl[0]
for (a, b) in zip(result.valueType.args, kind.args):
if not a.kind.isAny() and b.kind.isAny():
self.error("any is not a valid type in this context", node)

File diff suppressed because it is too large Load Diff