diff --git a/src/backend/lexer.nim b/src/backend/lexer.nim index d2e2b84..a6bf3e9 100644 --- a/src/backend/lexer.nim +++ b/src/backend/lexer.nim @@ -15,6 +15,7 @@ ## A simple and modular tokenizer implementation with arbitrary lookahead import strutils +import parseutils import strformat import tables import meta/token @@ -91,9 +92,8 @@ type line: int start: int current: int - errored*: bool file: string - errorMessage*: string + LexingError* = object of CatchableError proc initLexer*(self: Lexer = nil): Lexer = @@ -107,9 +107,7 @@ proc initLexer*(self: Lexer = nil): Lexer = result.line = 1 result.start = 0 result.current = 0 - result.errored = false result.file = "" - result.errorMessage = "" proc done(self: Lexer): bool = @@ -149,10 +147,7 @@ proc error(self: Lexer, message: string) = ## for the lexer. The lex method will not ## continue tokenizing if it finds out ## an error occurred - if self.errored: - return - self.errored = true - self.errorMessage = &"A fatal error occurred while parsing '{self.file}', line {self.line} at '{self.peek()}' -> {message}" + raise newException(LexingError, &"A fatal error occurred while parsing '{self.file}', line {self.line} at '{self.peek()}' -> {message}") proc check(self: Lexer, what: char, distance: int = 0): bool = @@ -230,6 +225,75 @@ proc createToken(self: Lexer, tokenType: TokenType) = self.tokens.add(tok) +proc parseEscape(self: Lexer) = + # Boring escape sequence parsing. For more info check out + # https://en.wikipedia.org/wiki/Escape_sequences_in_C. + # As of now, \u and \U are not supported, but they'll + # likely be soon. Another notable limitation is that + # \xhhh and \nnn are limited to the size of a char + # (i.e. uint8, or 256 values) + case self.peek(): + of 'a': + self.source[self.current] = cast[char](0x07) + of 'b': + self.source[self.current] = cast[char](0x7f) + of 'e': + self.source[self.current] = cast[char](0x1B) + of 'f': + self.source[self.current] = cast[char](0x0C) + of 'n': + when defined(windows): + # We natively convert LF to CRLF on Windows, and + # gotta thank Microsoft for the extra boilerplate! + self.source[self.current] = cast[char](0x0D) + if not self.done(): + self.source[self.current + 1] = cast[char](0x0A) + else: + # Every other platform is kind enough to use + # the agreed upon LF standard, but again thanks + # to microsoft we need to convert \r\n back to \n + # under actually sensible operating systems + if self.source[self.current - 1] == cast[char](0x0D): + self.source = self.source[0.. declares a raw string literal, where escape sequences ## are not parsed and stay as-is + ## - f -> declares a format string, where variables may be + ## interpolated using curly braces like f"Hello, {name}!". + ## Braces may be escaped using a pair of them, so to represent + ## a literal "{" in an f-string, one would use {{ instead ## Multi-line strings can be declared using matching triplets of ## either single or double quotes. They can span across multiple ## lines and escape sequences in them are not parsed, like in raw ## strings, so a multi-line string prefixed with the "r" modifier ## is redundant, although multi-line byte strings are supported - while not self.check(delimiter) and not self.done(): - if self.check('\n') and mode == "multi": - self.line = self.line + 1 - else: - self.error("unexpected EOL while parsing string literal") - return + while not self.check(delimiter): + if self.check('\n'): + if mode == "multi": + self.line = self.line + 1 + else: + self.error("unexpected EOL while parsing string literal") if mode in ["raw", "multi"]: discard self.step() - elif self.check('\\'): - # Escape sequences. - # We currently support only the basic - # ones, so stuff line \nnn, \xhhh, \uhhhh and - # \Uhhhhhhhh are not supported. For more info - # check https://en.wikipedia.org/wiki/Escape_sequences_in_C + if self.check('\\'): + # This madness here serves to get rid of the slash, since \x is mapped + # to a one-byte sequence but is actually 2 bytes + self.source = self.source[0.. {message}" + raise newException(ParseError, &"A fatal error occurred while parsing '{self.file}', line {self.peek().line} at {lexeme} -> {message}") proc check(self: Parser, kind: TokenType, distance: int = 0): bool = @@ -188,6 +184,8 @@ proc primary(self: Parser): ASTNode = result = newASTNode(self.step(), NodeKind.octExpr) of TokenType.Binary: result = newASTNode(self.step(), NodeKind.binExpr) + of TokenType.String: + result = newASTNode(self.step(), NodeKind.strExpr) else: self.error("invalid syntax") @@ -212,8 +210,6 @@ proc call(self: Parser): ASTNode = ## Parses call expressions and object ## accessing ("dot syntax") result = self.primary() - if result == nil: - return while true: if self.match(TokenType.LeftParen): result = self.make_call(result) @@ -227,10 +223,7 @@ proc call(self: Parser): ASTNode = proc unary(self: Parser): ASTNode = ## Parses unary expressions if self.match([TokenType.Minus, TokenType.Tilde]): - result = self.unary() - if result == nil: - return - result = newASTNode(self.peek(-1), NodeKind.unaryExpr, @[result]) + result = newASTNode(self.peek(-1), NodeKind.unaryExpr, @[self.unary()]) else: result = self.call() @@ -238,8 +231,6 @@ proc unary(self: Parser): ASTNode = proc pow(self: Parser): ASTNode = ## Parses exponentiation expressions result = self.unary() - if result == nil: - return var operator: Token var right: ASTNode while self.match(TokenType.DoubleAsterisk): @@ -251,11 +242,9 @@ proc pow(self: Parser): ASTNode = proc mul(self: Parser): ASTNode = ## Parses multiplication and division expressions result = self.pow() - if result == nil: - return var operator: Token var right: ASTNode - while self.match([TokenType.Slash, TokenType.Percentage, TokenType.FloorDiv]): + while self.match([TokenType.Slash, TokenType.Percentage, TokenType.FloorDiv, TokenType.Asterisk]): operator = self.peek(-1) right = self.pow() result = newASTNode(operator, NodeKind.binaryExpr, @[result, right]) @@ -264,8 +253,6 @@ proc mul(self: Parser): ASTNode = proc add(self: Parser): ASTNode = ## Parses addition and subtraction expressions result = self.mul() - if result == nil: - return var operator: Token var right: ASTNode while self.match([TokenType.Plus, TokenType.Minus]): @@ -277,8 +264,6 @@ proc add(self: Parser): ASTNode = proc comparison(self: Parser): ASTNode = ## Parses comparison expressions result = self.add() - if result == nil: - return var operator: Token var right: ASTNode while self.match([TokenType.LessThan, TokenType.GreaterThan, TokenType.LessOrEqual, TokenType.GreaterOrEqual]): @@ -290,8 +275,6 @@ proc comparison(self: Parser): ASTNode = proc equality(self: Parser): ASTNode = ## Parses equality expressions result = self.comparison() - if result == nil: - return var operator: Token var right: ASTNode while self.match([TokenType.DoubleEqual, TokenType.NotEqual]): @@ -303,8 +286,6 @@ proc equality(self: Parser): ASTNode = proc logical_and(self: Parser): ASTNode = ## Parses logical AND expressions result = self.equality() - if result == nil: - return var operator: Token var right: ASTNode while self.match(TokenType.LogicalAnd): @@ -316,8 +297,6 @@ proc logical_and(self: Parser): ASTNode = proc logical_or(self: Parser): ASTNode = ## Parses logical OR expressions result = self.logical_and() - if result == nil: - return var operator: Token var right: ASTNode while self.match(TokenType.LogicalOr): @@ -335,8 +314,6 @@ proc assignment(self: Parser): ASTNode = ## Parses assignment, the highest-level ## expression result = self.binary() - if result == nil: - return if self.match(TokenType.Equal): var tok = self.peek(-1) var value = self.assignment() @@ -355,8 +332,6 @@ proc expressionStatement(self: Parser): ASTNode = ## Parses expression statements, which ## are expressions followed by a semicolon var expression = self.expression() - if expression == nil: - return endOfLIne("missing semicolon after expression") result = newAstNode(self.peek(-1), NodeKind.exprStmt, @[expression]) @@ -367,8 +342,6 @@ proc delStmt(self: Parser): ASTNode = ## value in the current scope and ## calls its destructor var expression = self.expression() - if expression == nil: - return var temp = expression endOfLIne("missing semicolon after del statement") if expression.kind == NodeKind.groupingExpr: @@ -433,13 +406,20 @@ proc returnStmt(self: Parser): ASTNode = proc importStmt(self: Parser): ASTNode = ## Parses import statements - var name = self.expression() - if name == nil: - return - if name.kind != NodeKind.identExpr: + result = self.expression() + if result.kind != NodeKind.identExpr: self.error("expecting module name after import statement") endOfLine("missing semicolon after import statement") - result = newASTNode(self.peek(-1), NodeKind.importStmt, @[name]) + result = newASTNode(self.peek(-1), NodeKind.importStmt, @[result]) + + +proc whileStmt(self: Parser): ASTNode = + ## Parses a C-style while loop statement + discard self.expect(TokenType.LeftParen, "expecting '(' before loop condition") + var condition = self.expression() + var body = self.statement() + discard self.expect(TokenType.LeftParen, "unterminated loop condition") + result = newASTNode(self.peek(-1), NodeKind.whileStmt, @[condition, body]) proc statement(self: Parser): ASTNode = @@ -463,6 +443,9 @@ proc statement(self: Parser): ASTNode = of TokenType.Import: discard self.step() result = self.importStmt() + of TokenType.While: + discard self.step() + result = self.whileStmt() of TokenType.Async, TokenType.Await, TokenType.Dynamic, TokenType.Foreach: discard self.step() # TODO: Reserved for future use of TokenType.LeftBrace: @@ -485,6 +468,4 @@ proc parse*(self: Parser, tokens: seq[Token], file: string): seq[ASTNode] = self.file = file while not self.done(): result.add(self.declaration()) - if self.errored: - result = @[] - break +