445 lines
14 KiB
Python
445 lines
14 KiB
Python
|
from .meta.exceptions import ParseError
|
||
|
from .meta.tokentype import TokenType
|
||
|
from .meta.tokenobject import Token
|
||
|
from typing import List, Union
|
||
|
from .meta.expression import Variable, Assignment, Logical, Call, Binary, Unary, Literal, Grouping, Expression, Get, Set, This, Super
|
||
|
from .meta.statement import StatementExpr, Var, Del, Block, If, While, Break, Function, Return, Class
|
||
|
|
||
|
|
||
|
class Parser(object):
|
||
|
"""A simple recursive-descent top-down parser"""
|
||
|
|
||
|
def __init__(self, tokens: List[Token]):
|
||
|
"""Object constructor"""
|
||
|
|
||
|
self.tokens = tokens
|
||
|
self.current: int = 0
|
||
|
|
||
|
def check(self, token_type):
|
||
|
"""
|
||
|
Helper method for self.match
|
||
|
"""
|
||
|
|
||
|
if self.done():
|
||
|
return False
|
||
|
elif self.peek().kind == token_type:
|
||
|
return True
|
||
|
return False
|
||
|
|
||
|
def throw(self, token: Token, message: str) -> ParseError:
|
||
|
"""Returns ParseError with the given message"""
|
||
|
|
||
|
return ParseError(token, message)
|
||
|
|
||
|
def synchronize(self):
|
||
|
"""Synchronizes the parser's state to recover after
|
||
|
an error occurred while parsing"""
|
||
|
|
||
|
self.step()
|
||
|
while not self.done():
|
||
|
if self.previous().kind == TokenType.SEMICOLON:
|
||
|
break
|
||
|
else:
|
||
|
token_type = self.peek().kind
|
||
|
if token_type in (
|
||
|
TokenType.IF, TokenType.CLASS, TokenType.VAR, TokenType.FOR, TokenType.WHILE,
|
||
|
TokenType.RETURN, TokenType.FUN
|
||
|
):
|
||
|
return
|
||
|
self.step()
|
||
|
|
||
|
def peek(self):
|
||
|
"""
|
||
|
Returns a token without consuming it
|
||
|
"""
|
||
|
|
||
|
return self.tokens[self.current]
|
||
|
|
||
|
def previous(self):
|
||
|
"""
|
||
|
Returns the most recently consumed token
|
||
|
"""
|
||
|
|
||
|
return self.tokens[self.current - 1]
|
||
|
|
||
|
def done(self):
|
||
|
"""
|
||
|
Returns True if we reached EOF
|
||
|
"""
|
||
|
|
||
|
return self.peek().kind == TokenType.EOF
|
||
|
|
||
|
def match(self, *types: Union[TokenType, List[TokenType]]):
|
||
|
"""
|
||
|
Checks if the current token matches
|
||
|
any of the given token type(s)
|
||
|
"""
|
||
|
|
||
|
for token_type in types:
|
||
|
if self.check(token_type):
|
||
|
self.step()
|
||
|
return True
|
||
|
return False
|
||
|
|
||
|
def consume(self, token_type, message: str):
|
||
|
"""
|
||
|
Consumes a token, raises an error
|
||
|
with the given message if the current token
|
||
|
differs from the expected one
|
||
|
"""
|
||
|
|
||
|
if self.check(token_type):
|
||
|
return self.step()
|
||
|
raise self.throw(self.peek(), message)
|
||
|
|
||
|
def primary(self):
|
||
|
"""Parses unary expressions (literals)"""
|
||
|
|
||
|
if self.match(TokenType.FALSE):
|
||
|
return Literal(False)
|
||
|
elif self.match(TokenType.TRUE):
|
||
|
return Literal(True)
|
||
|
elif self.match(TokenType.NIL):
|
||
|
return Literal(None)
|
||
|
elif self.match(TokenType.NUM, TokenType.STR):
|
||
|
return Literal(self.previous().literal)
|
||
|
elif self.match(TokenType.LP):
|
||
|
expr: Expression = self.expression()
|
||
|
self.consume(TokenType.RP, "Unexpected error while parsing parenthesized expression")
|
||
|
return Grouping(expr)
|
||
|
elif self.match(TokenType.ID):
|
||
|
return Variable(self.previous())
|
||
|
elif self.match(TokenType.SUPER):
|
||
|
keyword = self.previous()
|
||
|
self.consume(TokenType.DOT, "Expecting '.' after 'super'")
|
||
|
method = self.consume(TokenType.ID, "Expecting superclass method name")
|
||
|
return Super(keyword, method)
|
||
|
elif self.match(TokenType.THIS):
|
||
|
return This(self.previous())
|
||
|
raise self.throw(self.peek(), "Invalid syntax")
|
||
|
|
||
|
def finish_call(self, callee):
|
||
|
"""Parses a function call"""
|
||
|
|
||
|
arguments = []
|
||
|
if not self.check(TokenType.RP):
|
||
|
while True:
|
||
|
if len(arguments) >= 255:
|
||
|
raise self.throw(self.peek(), "Cannot have more than 255 arguments")
|
||
|
arguments.append(self.expression())
|
||
|
if not self.match(TokenType.COMMA):
|
||
|
break
|
||
|
paren = self.consume(TokenType.RP, "Unexpected error while parsing call")
|
||
|
return Call(callee, paren, arguments)
|
||
|
|
||
|
def call(self):
|
||
|
"""Parses call expressions"""
|
||
|
|
||
|
expr = self.primary()
|
||
|
while True:
|
||
|
if self.match(TokenType.LP):
|
||
|
expr = self.finish_call(expr)
|
||
|
elif self.match(TokenType.DOT):
|
||
|
name = self.consume(TokenType.ID, "Expecting property after '.'")
|
||
|
expr = Get(expr, name)
|
||
|
else:
|
||
|
break
|
||
|
return expr
|
||
|
|
||
|
def unary(self):
|
||
|
"""Parses unary expressions"""
|
||
|
|
||
|
if self.match(TokenType.NEG, TokenType.MINUS):
|
||
|
operator: Token = self.previous()
|
||
|
right: Expression = self.unary()
|
||
|
return Unary(operator, right)
|
||
|
return self.call()
|
||
|
|
||
|
def pow(self):
|
||
|
"""Parses pow expressions"""
|
||
|
|
||
|
expr: Expression = self.unary()
|
||
|
while self.match(TokenType.POW):
|
||
|
operator: Token = self.previous()
|
||
|
right: Expression = self.unary()
|
||
|
expr = Binary(expr, operator, right)
|
||
|
return expr
|
||
|
|
||
|
def multiplication(self):
|
||
|
"""
|
||
|
Parses multiplications and divisions
|
||
|
"""
|
||
|
|
||
|
expr: Expression = self.pow()
|
||
|
while self.match(TokenType.STAR, TokenType.SLASH, TokenType.MOD):
|
||
|
operator: Token = self.previous()
|
||
|
right: Expression = self.pow()
|
||
|
expr = Binary(expr, operator, right)
|
||
|
return expr
|
||
|
|
||
|
def addition(self):
|
||
|
"""
|
||
|
Parses additions and subtractions
|
||
|
"""
|
||
|
|
||
|
expr: Expression = self.multiplication()
|
||
|
while self.match(TokenType.PLUS, TokenType.MINUS):
|
||
|
operator: Token = self.previous()
|
||
|
right: Expression = self.multiplication()
|
||
|
expr = Binary(expr, operator, right)
|
||
|
return expr
|
||
|
|
||
|
def comparison(self):
|
||
|
"""
|
||
|
Parses comparison expressions
|
||
|
"""
|
||
|
|
||
|
expr: Expression = self.addition()
|
||
|
while self.match(TokenType.GT, TokenType.GE, TokenType.LT, TokenType.LE, TokenType.NE):
|
||
|
operator: Token = self.previous()
|
||
|
right: Expression = self.addition()
|
||
|
expr = Binary(expr, operator, right)
|
||
|
return expr
|
||
|
|
||
|
def equality(self):
|
||
|
"""
|
||
|
Parses equality expressions
|
||
|
"""
|
||
|
|
||
|
expr: Expression = self.comparison()
|
||
|
while self.match(TokenType.NEG, TokenType.DEQ):
|
||
|
operator: Token = self.previous()
|
||
|
right: Expression = self.comparison()
|
||
|
expr = Binary(expr, operator, right)
|
||
|
return expr
|
||
|
|
||
|
def logical_and(self):
|
||
|
"""Parses a logical and expression"""
|
||
|
|
||
|
expr = self.equality()
|
||
|
while self.match(TokenType.AND):
|
||
|
operator = self.previous()
|
||
|
right = self.equality()
|
||
|
expr = Logical(expr, operator, right)
|
||
|
return expr
|
||
|
|
||
|
def logical_or(self):
|
||
|
"""Parses a logical or expression"""
|
||
|
|
||
|
expr = self.logical_and()
|
||
|
while self.match(TokenType.OR):
|
||
|
operator = self.previous()
|
||
|
right = self.logical_and()
|
||
|
expr = Logical(expr, operator, right)
|
||
|
return expr
|
||
|
|
||
|
def assignment(self):
|
||
|
"""
|
||
|
Parses an assignment expression
|
||
|
"""
|
||
|
|
||
|
expr = self.logical_or()
|
||
|
if self.match(TokenType.EQ):
|
||
|
eq = self.previous()
|
||
|
value = self.assignment()
|
||
|
if isinstance(expr, Variable):
|
||
|
name = expr.name
|
||
|
return Assignment(name, value)
|
||
|
elif isinstance(expr, Get):
|
||
|
return Set(expr.object, expr.name, value)
|
||
|
raise self.throw(eq, "Invalid syntax")
|
||
|
return expr
|
||
|
|
||
|
def expression(self):
|
||
|
"""
|
||
|
Parses an expression
|
||
|
"""
|
||
|
|
||
|
return self.assignment()
|
||
|
|
||
|
def step(self):
|
||
|
"""Steps 1 token forward"""
|
||
|
|
||
|
if not self.done():
|
||
|
self.current += 1
|
||
|
return self.previous()
|
||
|
|
||
|
def del_statement(self):
|
||
|
"""Returns a del AST node"""
|
||
|
|
||
|
value = self.expression()
|
||
|
self.consume(TokenType.SEMICOLON, "Missing semicolon after statement")
|
||
|
return Del(value)
|
||
|
|
||
|
def expression_statement(self):
|
||
|
"""Returns a StatemenrExpr AST node"""
|
||
|
|
||
|
value = self.expression()
|
||
|
self.consume(TokenType.SEMICOLON, "Missing semicolon after statement")
|
||
|
return StatementExpr(value)
|
||
|
|
||
|
def block(self):
|
||
|
"""Returns a new environment to enable block scoping"""
|
||
|
|
||
|
statements = []
|
||
|
while not self.check(TokenType.RB) and not self.done():
|
||
|
statements.append(self.declaration())
|
||
|
self.consume(TokenType.RB, "Unexpected end of block")
|
||
|
return statements
|
||
|
|
||
|
def if_statement(self):
|
||
|
"""Parses an IF statement"""
|
||
|
|
||
|
self.consume(TokenType.LP, "The if condition must be parenthesized")
|
||
|
cond = self.expression()
|
||
|
self.consume(TokenType.RP, "The if condition must be parenthesized")
|
||
|
then_branch = self.statement()
|
||
|
else_branch = None
|
||
|
if self.match(TokenType.ELSE):
|
||
|
else_branch = self.statement()
|
||
|
return If(cond, then_branch, else_branch)
|
||
|
|
||
|
def while_statement(self):
|
||
|
"""Parses a while statement"""
|
||
|
|
||
|
self.consume(TokenType.LP, "The while condition must be parenthesized")
|
||
|
cond = self.expression()
|
||
|
self.consume(TokenType.RP, "The while condition must be parenthesized")
|
||
|
body = self.statement()
|
||
|
return While(cond, body)
|
||
|
|
||
|
def for_statement(self):
|
||
|
"""Parses a for statement"""
|
||
|
|
||
|
self.consume(TokenType.LP, "The for condition must be parenthesized")
|
||
|
if self.match(TokenType.SEMICOLON):
|
||
|
init = None
|
||
|
elif self.match(TokenType.VAR):
|
||
|
init = self.var_declaration()
|
||
|
else:
|
||
|
init = self.expression_statement()
|
||
|
condition = None
|
||
|
if not self.check(TokenType.SEMICOLON):
|
||
|
condition = self.expression()
|
||
|
self.consume(TokenType.SEMICOLON, "Missing semicolon after loop condition")
|
||
|
incr = None
|
||
|
if not self.check(TokenType.RP):
|
||
|
incr = self.expression()
|
||
|
self.consume(TokenType.RP, "The for condition must be parenthesized")
|
||
|
body = self.statement()
|
||
|
if incr:
|
||
|
body = Block([body, StatementExpr(incr)])
|
||
|
if not condition:
|
||
|
condition = Literal(True)
|
||
|
body = While(condition, body)
|
||
|
if init:
|
||
|
body = Block([init, body])
|
||
|
return body
|
||
|
|
||
|
def break_statement(self):
|
||
|
"""Parses a break statement"""
|
||
|
|
||
|
if self.check(TokenType.SEMICOLON):
|
||
|
return self.step()
|
||
|
raise ParseError(self.peek(), "Invalid syntax")
|
||
|
|
||
|
def return_statement(self):
|
||
|
"""Parses a return statement"""
|
||
|
|
||
|
keyword = self.previous()
|
||
|
value = None
|
||
|
if not self.check(TokenType.SEMICOLON):
|
||
|
value = self.expression()
|
||
|
self.consume(TokenType.SEMICOLON, "Missing semicolon after statement")
|
||
|
return Return(keyword, value)
|
||
|
|
||
|
def statement(self):
|
||
|
"""Parses a statement"""
|
||
|
|
||
|
if self.match(TokenType.IF):
|
||
|
return self.if_statement()
|
||
|
elif self.match(TokenType.RETURN):
|
||
|
return self.return_statement()
|
||
|
elif self.match(TokenType.FOR):
|
||
|
return self.for_statement()
|
||
|
elif self.match(TokenType.WHILE):
|
||
|
return self.while_statement()
|
||
|
elif self.match(TokenType.BREAK):
|
||
|
return Break(self.break_statement())
|
||
|
elif self.match(TokenType.LB):
|
||
|
return Block(self.block())
|
||
|
elif self.match(TokenType.DEL):
|
||
|
return self.del_statement()
|
||
|
return self.expression_statement()
|
||
|
|
||
|
def var_declaration(self):
|
||
|
"""Parses a var declaration"""
|
||
|
|
||
|
name = self.consume(TokenType.ID, "Expecting a variable name")
|
||
|
init = None
|
||
|
if self.match(TokenType.EQ):
|
||
|
init = self.expression()
|
||
|
self.consume(TokenType.SEMICOLON, "Missing semicolon after declaration")
|
||
|
return Var(name, init)
|
||
|
|
||
|
def function(self, kind: str):
|
||
|
"""Parses a function declaration"""
|
||
|
|
||
|
name = self.consume(TokenType.ID, f"Expecting {kind} name")
|
||
|
self.consume(TokenType.LP, f"Expecting parenthesis after {kind} name")
|
||
|
parameters = []
|
||
|
if not self.check(TokenType.RP):
|
||
|
while True:
|
||
|
if len(parameters) >= 255:
|
||
|
raise self.throw(self.peek(), "Cannot have more than 255 arguments")
|
||
|
parameter = self.consume(TokenType.ID, "Expecting parameter name")
|
||
|
if parameter in parameters:
|
||
|
raise self.throw(self.peek(), "Multiple parameters with the same name in function declaration are not allowed")
|
||
|
parameters.append(parameter)
|
||
|
if not self.match(TokenType.COMMA):
|
||
|
break
|
||
|
self.consume(TokenType.RP, "Unexpected error while parsing function declaration")
|
||
|
self.consume(TokenType.LB, f"Expecting '{{' before {kind} body")
|
||
|
body = self.block()
|
||
|
return Function(name, parameters, body)
|
||
|
|
||
|
def class_declaration(self):
|
||
|
"""Parses a class declaration"""
|
||
|
|
||
|
name = self.consume(TokenType.ID, "Expecting class name")
|
||
|
superclass = None
|
||
|
if self.match(TokenType.LT):
|
||
|
self.consume(TokenType.ID, "Expecting superclass name")
|
||
|
superclass = Variable(self.previous())
|
||
|
self.consume(TokenType.LB, "Expecting '{' before class body")
|
||
|
methods = []
|
||
|
while not self.check(TokenType.RB) and not self.done():
|
||
|
methods.append(self.function("method"))
|
||
|
self.consume(TokenType.RB, "Expecting '}' after class body")
|
||
|
return Class(name, methods, superclass)
|
||
|
|
||
|
def declaration(self):
|
||
|
"""Parses a declaration"""
|
||
|
|
||
|
try:
|
||
|
if self.match(TokenType.CLASS):
|
||
|
return self.class_declaration()
|
||
|
elif self.match(TokenType.FUN):
|
||
|
return self.function("function")
|
||
|
elif self.match(TokenType.VAR):
|
||
|
return self.var_declaration()
|
||
|
return self.statement()
|
||
|
except ParseError:
|
||
|
self.synchronize()
|
||
|
raise
|
||
|
|
||
|
def parse(self):
|
||
|
"""
|
||
|
Starts to parse
|
||
|
"""
|
||
|
|
||
|
statements = []
|
||
|
while not self.done():
|
||
|
statements.append(self.declaration())
|
||
|
return statements
|