Minor review and improvements

This commit is contained in:
Mattia Giambirtone 2023-11-08 18:19:20 +01:00
parent 13eea04e74
commit e11ada2fec
Signed by: nocturn9x
GPG Key ID: 8270F9F467971E59
5 changed files with 111 additions and 101 deletions

View File

@ -1,2 +1,2 @@
--hints:off --deepCopy:on
--hints:off --deepCopy:on --experimental:strictFuncs
path="src"

View File

@ -46,24 +46,21 @@ const HelpMessage* = """The peon programming language, Copyright (C) 2023 Mattia
This program is free software, see the license distributed with this program or check
http://www.apache.org/licenses/LICENSE-2.0 for more info.
Basic Usage
-----------
$ peon [options] file.pn Run the given Peon source file
$ peon [options] file.pbc Run the given Peon bytecode file
Note: This is a development tool
Options
-------
-h, --help Show this help text and exit
-v, --version Print the current peon version and exit
-s, --string Execute the passed string as if it was a file
-w, --warnings Turn warnings on or off (default: on). Acceptable values are
yes/on and no/off
--noWarn Disable a specific warning (for example, --noWarn:UserWarning)
--showMismatches Show all mismatches when function dispatching fails (output is really verbose)
--debug-lexer Show the lexer's output
--debug-parser Show the parser's output
--debug-tc Show the typechecker's output
-h, --help Show this help text and exit
-v, --version Print the current peon version and exit
-s, --string Execute the passed string as if it was a file
-w, --warnings Turn warnings on or off (default: on). Acceptable values are
yes/on and no/off
--noWarn Disable a specific warning (for example, --noWarn:UserWarning)
--showMismatches Show all mismatches when function dispatching fails (output is really verbose)
--debugLexer Show the lexer's output
--debugParser Show the parser's output
--debugTypeChecker Show the typechecker's output
--listWarns Show a list of all warnings
"""

View File

@ -42,7 +42,7 @@ type
TypeChecker* = ref object
## The Peon type checker
current: int # The current node we're looking at
tree: seq[Declaration] # The AST for the current module
tree: ParseTree # The AST for the current module
scopeDepth*: int # The current scope depth (0 == global, > 0 == local)
# These objects are needed to parse other
# modules
@ -1736,7 +1736,7 @@ proc validate(self: TypeChecker, node: ASTNode): TypedNode =
self.error(&"failed to dispatch node of type {node.kind}", node)
proc validate*(self: TypeChecker, tree: seq[Declaration], file, source: string, showMismatches: bool = false,
proc validate*(self: TypeChecker, tree: ParseTree, file, source: string, showMismatches: bool = false,
disabledWarnings: seq[WarningKind] = @[]): seq[TypedNode] =
## Transforms a sequence of typeless AST nodes
## into a sequence of typed AST nodes

View File

@ -32,6 +32,9 @@ export token, ast, errors
type
# Just a convenient alias
ParseTree* = seq[ASTNode]
## A parse tree
Precedence {.pure.} = enum
## Operator precedence
## clearly stolen from
@ -88,7 +91,7 @@ type
# Operator table
operators: OperatorTable
# The AST we're producing
tree: seq[Declaration]
tree: seq[ASTNode]
# Stores line data
lines: seq[tuple[start, stop: int]]
# The source of the current module
@ -160,16 +163,9 @@ proc getPrecedence(self: OperatorTable, lexeme: string): Precedence =
proc newParser*: Parser =
## Initializes a new Parser object
new(result)
result.current = 0
result.file = ""
result.tokens = @[]
result.currentFunction = nil
result.loopDepth = 0
result.scopeDepth = 0
# Nim initializes all the other fields
# automatically
result.operators = newOperatorTable()
result.tree = @[]
result.source = ""
result.modules = newTable[string, bool]()
# Public getters for improved error formatting
@ -183,8 +179,20 @@ template endOfFile: Token = Token(kind: EndOfFile, lexeme: "", line: -1)
template endOfLine(msg: string, tok: Token = nil) = self.expect(Semicolon, msg, tok)
# Utility functions
proc peek(self: Parser, distance: int = 0): Token {.inline.} =
proc beginScope(self: Parser) {.inline.} =
## Begins a new lexical scope
inc(self.scopeDepth)
proc endScope(self: Parser) {.inline.} =
## Ends a new lexical scope
dec(self.scopeDepth)
func peek(self: Parser, distance: int = 0): Token {.inline.} =
## Peeks at the token at the given distance.
## If the distance is out of bounds, an EOF
## token is returned. A negative distance may
@ -196,7 +204,7 @@ proc peek(self: Parser, distance: int = 0): Token {.inline.} =
result = self.tokens[self.current + distance]
proc done(self: Parser): bool {.inline.} =
func done(self: Parser): bool {.inline.} =
## Returns true if we're at the
## end of the file. Note that the
## parser expects an explicit
@ -223,18 +231,20 @@ proc error(self: Parser, message: string, token: Token = nil) {.raises: [ParseEr
# Why do we allow strings or enum members of TokenType? Well, it's simple:
# symbols like ":" and "=" are both valid operator names (therefore they are
# symbols like ":" and "=" are both valid operators (and therefore they are
# tokenized as symbols), but they are also used in a context where they are just
# separators (for example, the colon is used in type declarations). Since we can't
# tell at tokenization time which of the two contexts we're in, we just treat everything
# as a symbol and in the cases where we need a specific token we just match the string
# directly
proc check[T: TokenType or string](self: Parser, kind: T, distance: int = 0): bool {.inline.} =
func check[T: TokenType or string](self: Parser, kind: T, distance: int = 0): bool {.inline.} =
## Checks if the given token at the given distance
## matches the expected kind and returns a boolean.
## The distance parameter is passed directly to
## self.peek()
when T is TokenType:
# Usually I'm not a fan of templates, but
# this is kind of nice
self.peek(distance).kind == kind
else:
when T is string:
@ -306,17 +316,18 @@ proc varDecl(self: Parser): Declaration
proc parseFunExpr(self: Parser): LambdaExpr
proc funDecl(self: Parser, isAsync: bool = false, isGenerator: bool = false, isLambda: bool = false, isOperator: bool = false): Declaration
proc declaration(self: Parser): Declaration
proc parse*(self: Parser, tokens: seq[Token], file: string, lines: seq[tuple[start, stop: int]], source: string, persist: bool = false): seq[Declaration]
proc parse*(self: Parser, tokens: seq[Token], file: string, lines: seq[tuple[start, stop: int]], source: string, persist: bool = false): seq[ASTNode]
proc findOperators(self: Parser, tokens: seq[Token])
proc parseOr(self: Parser): Expression
# End of forward declarations
# Top-down parsing handlers
proc primary(self: Parser): Expression =
## Parses primary expressions such
## as integer literals and keywords
## that map to builtin types (true,
## false, nil, etc.)
## Parses primary expressions. A primary
## expression produces a value of a built-in
## type (for example integer literals, lambdas,
## coroutines, etc.)
case self.peek().kind:
of True:
result = newTrueExpr(self.step())
@ -377,7 +388,8 @@ proc primary(self: Parser): Expression =
# We only allow expressions with precedence lower than assignment
# inside ref/ptr/lent/const expressions because this allows us to
# parse variable declarations such as var x: ref type = value; without
# having the code to parse just the type declaration also capture the assignment
# having the code to parse just the type declaration also capture the
# assignment
of TokenType.Ref:
let tok = self.step()
result = newRefExpr(self.parseOr(), tok)
@ -430,8 +442,7 @@ proc makeCall(self: Parser, callee: Expression): CallExpr =
proc parseGenericArgs(self: Parser): Expression =
## Parses expressions like someType[someGeneric]
## that are needed to instantiate generics
## Parses generic instantiation expressions
var item = newIdentExpr(self.peek(-2), self.scopeDepth)
var types: seq[Expression] = @[]
while not self.check(RightBracket) and not self.done():
@ -463,10 +474,10 @@ proc call(self: Parser): Expression =
## Operator parsing handlers
proc unary(self: Parser): Expression =
proc parseUnary(self: Parser): Expression =
## Parses unary expressions
if self.check([Identifier, Symbol]) and self.peek().lexeme in self.operators.tokens:
result = newUnaryExpr(self.step(), self.unary())
result = newUnaryExpr(self.step(), self.parseUnary())
result.file = self.file
else:
result = self.call()
@ -474,12 +485,12 @@ proc unary(self: Parser): Expression =
proc parsePow(self: Parser): Expression =
## Parses power expressions
result = self.unary()
result = self.parseUnary()
var operator: Token
var right: Expression
while self.check([Identifier, Symbol]) and self.operators.getPrecedence(self.peek().lexeme) == Power:
operator = self.step()
right = self.unary()
right = self.parseUnary()
result = newBinaryExpr(result, operator, right)
result.file = self.file
@ -587,8 +598,7 @@ proc parseArrow(self: Parser): Expression =
result.file = self.file
## End of operator parsing handlers
## Statement parsing handlers
proc assertStmt(self: Parser): Statement =
## Parses "assert" statements, which
@ -601,16 +611,6 @@ proc assertStmt(self: Parser): Statement =
result.file = self.file
proc beginScope(self: Parser) {.inline.} =
## Begins a new lexical scope
inc(self.scopeDepth)
proc endScope(self: Parser) {.inline.} =
## Ends a new lexical scope
dec(self.scopeDepth)
proc blockStmt(self: Parser): BlockStmt =
## Parses block statements. A block
## statement simply opens a new local
@ -1352,17 +1352,16 @@ proc findOperators(self: Parser, tokens: seq[Token]) =
# to find operators. Note that this
# relies on the lexer ending the input
# with an EOF token
if token.kind == Operator:
if i == tokens.high():
self.error("invalid state: found malformed tokenizer input while looking for operators (missing EOF)", token)
self.operators.addOperator(tokens[i + 1].lexeme)
if i == tokens.high() and token.kind != EndOfFile:
# Since we're iterating this list anyway we might as
# well perform some extra checks
self.error("invalid state: found malformed tokenizer input while looking for operators (missing EOF)", token)
elif token.kind == Operator:
self.operators.addOperator(tokens[i + 1].lexeme)
proc parse*(self: Parser, tokens: seq[Token], file: string, lines: seq[tuple[start, stop: int]], source: string, persist: bool = false): seq[Declaration] =
proc parse*(self: Parser, tokens: seq[Token], file: string, lines: seq[tuple[start, stop: int]], source: string, persist: bool = false): seq[ASTNode] =
## Parses a sequence of tokens into a sequence of AST nodes
self.tokens = tokens
self.file = file
@ -1377,8 +1376,11 @@ proc parse*(self: Parser, tokens: seq[Token], file: string, lines: seq[tuple[sta
self.operators = newOperatorTable()
self.modules = newTable[string, bool]()
self.findOperators(tokens)
var node: ASTNode
while not self.done():
self.tree.add(self.declaration())
if self.tree[^1] == nil:
self.tree.delete(self.tree.high())
node = self.declaration()
if not node.isNil():
# This only happens because we haven't implemented
# all of our grammar yet. Will be removed soon
self.tree.add(node)
result = self.tree

View File

@ -58,11 +58,16 @@ proc `$`(self: TypedNode): string =
result = &"{self.node}: ? ({self.node.kind})"
proc test(warnings: seq[WarningKind] = @[], mismatches: bool = false) =
proc main(file: string, warnings: seq[WarningKind] = @[], showMismatches: bool = false) =
# TODO
discard
proc test(warnings: seq[WarningKind] = @[], showMismatches: bool = false) =
var
keep = true
tokens: seq[Token] = @[]
tree: seq[Declaration] = @[]
tokens: seq[Token]
tree: ParseTree
typeChecker = newTypeChecker()
lexer = newLexer()
parser = newParser()
@ -107,7 +112,7 @@ proc test(warnings: seq[WarningKind] = @[], mismatches: bool = false) =
if debugTypeChecker:
styledEcho fgCyan, "Typechecker output:"
for typedNode in typeChecker.validate(parser.parse(lexer.lex(input, "<stdin>"), lexer.getFile(), lexer.getLines(), lexer.getSource()),
lexer.getFile(), lexer.getSource(), showMismatches=mismatches, disabledWarnings=warnings):
lexer.getFile(), lexer.getSource(), showMismatches=showMismatches, disabledWarnings=warnings):
if debugTypeChecker:
styledEcho fgGreen, &"\t{typedNode.node} -> {typeChecker.stringify(typedNode)}\n"
echo ""
@ -135,7 +140,7 @@ proc test(warnings: seq[WarningKind] = @[], mismatches: bool = false) =
try:
source = stdin.readLine()
for typedNode in compiler.compile(parser.parse(lexer.lex(source, file), file, lexer.getLines(), lexer.getSource()), lexer.getFile(), lexer.getSource(),
showMismatches=true, disabledWarnings=warnings):
showshowMismatches=true, disabledWarnings=warnings):
echo &"{typedNode.node} -> {compiler.stringify(typedNode)}\n"
except IOError:
echo ""
@ -146,27 +151,26 @@ proc test(warnings: seq[WarningKind] = @[], mismatches: bool = false) =
print(exc)
except CompileError as exc:
print(exc)
]#
#[
when isMainModule:
setControlCHook(proc () {.noconv.} = echo ""; quit(0))
main()
]#
when isMainModule:
setControlCHook(proc () {.noconv.} = quit(0))
var optParser = initOptParser(commandLineParams())
var file: string = ""
var fromString: bool = false
var dump: bool = true
var warnings: seq[WarningKind] = @[]
var mismatches: bool = false
# var mode: CompileMode = CompileMode.Debug
var run: bool = true
# var backend: PeonBackend
var output: string = ""
var
optParser = initOptParser(commandLineParams())
file: string
fromString: bool
#dump: bool = true
warnings: seq[WarningKind] = @[]
showMismatches: bool
#mode: CompileMode = CompileMode.Debug
run: bool = true
#backend: PeonBackend
#output: string
for kind, key, value in optParser.getopt():
case kind:
of cmdArgument:
@ -192,8 +196,8 @@ when isMainModule:
of "string":
file = key
fromString = true
of "noDump":
dump = false
#[of "noDump":
dump = false]#
of "warnings":
if value.toLowerAscii() in ["yes", "on"]:
warnings = @[]
@ -204,41 +208,45 @@ when isMainModule:
stderr.styledWriteLine(fgRed, styleBright, "Error: ", fgDefault, "invalid value for option 'warnings' (valid options are: yes, on, no, off)")
quit()
of "showMismatches":
mismatches = true
showMismatches = true
of "noWarn":
case value:
of "UserWarning":
warnings.add(UserWarning)
else:
stderr.styledWriteLine(fgRed, styleBright, "Error: ", fgDefault, "invalid warning name for option 'noWarn'")
quit()
of "debug-tc":
quit()
of "listWarns":
echo "Currently supported warnings: "
for warning in WarningKind:
echo &" - {warning}"
quit(0)
of "debugTypeChecker":
debugTypeChecker = true
of "compile":
#[of "compile":
run = false
of "output":
output = value
#[
of "backend":
case value:
of "bytecode":
backend = PeonBackend.Bytecode
of "c":
backend = PeonBackend.NativeC
]#
of "debug-dump":
debugSerializer = true
of "debug-lexer":
]#
of "debugLexer":
debugLexer = true
of "debug-parser":
of "debugParser":
debugParser = true
else:
stderr.styledWriteLine(fgRed, styleBright, "Error: ", fgDefault, &"unkown option '{key}'")
quit()
of cmdShortOption:
case key:
of "o":
output = value
#[of "o":
output = value]#
of "h":
echo HELP_MESSAGE
quit()
@ -248,8 +256,8 @@ when isMainModule:
of "s":
file = key
fromString = true
of "n":
dump = false
#[of "n":
dump = false]#
of "w":
if value.toLowerAscii() in ["yes", "on"]:
warnings = @[]
@ -259,8 +267,8 @@ when isMainModule:
else:
stderr.styledWriteLine(fgRed, styleBright, "Error: ", fgDefault, "invalid value for option 'w' (valid options are: yes, on, no, off)")
quit()
of "c":
run = false
#[of "c":
run = false]#
else:
stderr.styledWriteLine(fgRed, styleBright, "Error: ", fgDefault, &"unkown option '{key}'")
quit()
@ -268,6 +276,9 @@ when isMainModule:
echo "usage: peon [options] [filename.pn]"
quit()
if file == "":
test(warnings, mismatches)
test(warnings, showMismatches)
else:
echo "Warning: not implemented yet!"
main(file, warnings, showMismatches)
#[else:
runFile(file, fromString, dump, breaks, warnings, mismatches, mode, run, backend, output)]#
runFile(file, fromString, dump, breaks, warnings, showMismatches, mode, run, backend, output)]#