diff --git a/README.md b/README.md new file mode 100644 index 0000000..ac083e6 --- /dev/null +++ b/README.md @@ -0,0 +1,136 @@ +# NimKalc - A math parsing library + +NimKalc is a simple implementation of a recursive-descent top-down parser that can evaluate +mathematical expressions. Notable mentions are support for common mathematical constants (pi, tau, euler's number, etc), +functions (`sin`, `cos`, `tan`...), equation-solving algos using newton's method and scientific notation numbers (such as `2e5`) + + +## Current limitations +- No functions (coming soon) +- No equation-solving (coming soon) +- The parsing is a bit weird because `2 2` will parse the first 2 and just stop instead of erroring out (FIXME) + + +## How to use it + +NimKalc parses mathematical expressions following this process: +- Tokenize the input +- Generate an AST +- Visit the nodes + +Each of these steps can be run separately, but for convenience a wrapper +`eval` procedure has been defined which takes in a string and returns a +single AST node containing the result of the given expression. + +## Supported operators + +Beyond the classical 4 operators (`+`, `-`, `/` and `*`), NimKalc supports: +- `%` for modulo division +- `^` for exponentiation +- unary `-` for negation +- Arbitrarily nested parentheses (__not__ empty ones!) to enforce precedence + + +## Exceptions + +NimKalc defines 2 exceptions: +- `ParseError` is used when the expression is invalid +- `MathError` is used when there is an arithmetical error such as division by 0 or domain errors (e.g. `log(0)`) + +## Design + +NimKalc treats all numerical values as `float` to simplify the implementation of the underlying operators. To tell integers +from floating point numbers the `AstNode` object has a `kind` discriminant which will be equal to `NodeKind.Integer` for ints +and `NodeKind.Float` for decimals. It is advised that you take this into account when using the library + + +__Note__: The string representation of integer nodes won't show the decimal part for clarity + +## String representations + +All of NimKalc's objects implement the `$` operator and are therefore printable. Integer nodes will look like `Integer(x)`, while +floats are represented with `Float(x.x)`. Unary operators print as `Unary(operator, right)`, while binary operators print as `Binary(left, operator, right)`. +Parenthesized expressions print as `Grouping(expr)`, where `expr` is the expression enclosed in parentheses (as an AST node, obviously). +Token objects will print as `Token(kind, lexeme)`: an example for the number 2 would be `Token(Integer, '2')` + + +## Example + +Here is an example of a REPL using all of NimKalc's functionality to evaluate expressions from stdin (can be found at `examples/repl.nim`) + +```nim +import nimkalc/objects/ast +import nimkalc/objects/token +import nimkalc/parsing/parser +import nimkalc/parsing/lexer +import nimkalc/objects/error + + +import strformat +import strutils + + +proc repl() = + ## A simple REPL to demonstrate NimKalc's functionality + var line: string + var result: AstNode + var tokens: seq[Token] + let lexerObj = initLexer() + let parserObj = initParser() + let visitor = initNodeVisitor() + echo "Welcome to the NimKalc REPL, type a math expression and press enter" + while true: + try: + stdout.write("=> ") + line = stdin.readLine() + echo &"Parsing and evaluation of {line} below:" + tokens = lexerObj.lex(line) + # No-one cares about the EOF token after all + echo &"Tokenization of {line}: {tokens[0..^2].join(\", \")}" + result = parserObj.parse(tokens) + echo &"AST for {line}: {result}" + result = visitor.eval(result) + case result.kind: + # The result is an AstNode object, specifically + # either a node of type NodeKind.Float or a NodeKind.Integer + of NodeKind.Float: + echo &"Value of {line}: {result.value}" + of NodeKind.Integer: + echo &"Value of {line}: {int(result.value)}" + else: + discard # Unreachable + except IOError: + echo "\nGoodbye." + break + except ParseError: + echo &"A parsing error occurred: {getCurrentExceptionMsg()}" + except MathError: + echo &"An arithmetic error occurred: {getCurrentExceptionMsg()}" + except OverflowDefect: + echo &"Value overflow/underflow detected: {getCurrentExceptionMsg()}" + + +when isMainModule: + repl() + +``` + +__Note__: If you don't need the intermediate representations shown here (tokens, AST) you can just `import nimkalc` and use +the `eval` procedure, which takes in a string and returns the evaluated result as a primary AST node like so: + +```nim +import nimkalc + +echo eval("2+2") # Prints Integer(4) + +``` + +## Installing + +You can clone this repository and then install the package via nimble: +- `git clone https://github.com/nocturn9x/nimkalc` +- `cd nimkalc` +- `nimble install` + + +__Note__: Nim 1.2.0 or higher is required to build NimKalc! Other versions are likely work if they're not too old, but they have not been tested \ No newline at end of file diff --git a/examples/repl.nim b/examples/repl.nim new file mode 100644 index 0000000..495ad41 --- /dev/null +++ b/examples/repl.nim @@ -0,0 +1,69 @@ +# Copyright 2021 Mattia Giambirtone +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# A simple library to parse and evaluate mathematical expressions + +import nimkalc/objects/ast +import nimkalc/objects/token +import nimkalc/parsing/parser +import nimkalc/parsing/lexer +import nimkalc/objects/error + + +import strformat +import strutils + + +proc repl() = + ## A simple REPL to demonstrate NimKalc's functionality + var line: string + var result: AstNode + var tokens: seq[Token] + let lexerObj = initLexer() + let parserObj = initParser() + let visitor = initNodeVisitor() + echo "Welcome to the NimKalc REPL, type a math expression and press enter" + while true: + try: + stdout.write("=> ") + line = stdin.readLine() + echo &"Parsing and evaluation of {line} below:" + tokens = lexerObj.lex(line) + # No-one cares about the EOF token after all + echo &"Tokenization of {line}: {tokens[0..^2].join(\", \")}" + result = parserObj.parse(tokens) + echo &"AST for {line}: {result}" + result = visitor.eval(result) + case result.kind: + # The result is an AstNode object, specifically + # either a node of type NodeKind.Float or a NodeKind.Integer + of NodeKind.Float: + echo &"Value of {line}: {result.value}" + of NodeKind.Integer: + echo &"Value of {line}: {int(result.value)}" + else: + discard # Unreachable + except IOError: + echo "\nGoodbye." + break + except ParseError: + echo &"A parsing error occurred: {getCurrentExceptionMsg()}" + except MathError: + echo &"An arithmetic error occurred: {getCurrentExceptionMsg()}" + except OverflowDefect: + echo &"Value overflow/underflow detected: {getCurrentExceptionMsg()}" + + +when isMainModule: + repl() \ No newline at end of file diff --git a/nimkalc.nimble b/nimkalc.nimble new file mode 100644 index 0000000..b9f3922 --- /dev/null +++ b/nimkalc.nimble @@ -0,0 +1,11 @@ +# Package + +version = "0.1" +author = "Mattia Giambirtone" +description = "An advanced parsing library for mathematical expressions and equations" +license = "Apache 2.0" +srcDir = "src" + +# Deps + +requires "nim >= 1.2.0" \ No newline at end of file diff --git a/src/nimkalc.nim b/src/nimkalc.nim new file mode 100644 index 0000000..73d2eec --- /dev/null +++ b/src/nimkalc.nim @@ -0,0 +1,29 @@ +# Copyright 2021 Mattia Giambirtone +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +## + +import nimkalc/parsing/parser +import nimkalc/objects/ast +import nimkalc/parsing/lexer + + +proc eval*(source: string): AstNode = + ## Evaluates a mathematical expression as a string + ## and returns a leaf node representing the result + let l = initLexer() + let p = initParser() + let v = initNodeVisitor() + result = v.eval(p.parse(l.lex(source))) + diff --git a/src/nimkalc/objects/ast.nim b/src/nimkalc/objects/ast.nim new file mode 100644 index 0000000..4a1f2c3 --- /dev/null +++ b/src/nimkalc/objects/ast.nim @@ -0,0 +1,175 @@ +# Copyright 2021 Mattia Giambirtone +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# An Abstract Syntax Tree and node visitor implementation +import token +import error + +import strformat +import tables +import math + + +type + NodeKind* {.pure.} = enum + Grouping, Unary, Binary, Integer, + Float + AstNode* = ref object + case kind*: NodeKind + of NodeKind.Grouping: + expr*: AstNode + of NodeKind.Unary: + unOp*: Token + operand*: AstNode + of NodeKind.Binary: + binOp*: Token + left*: AstNode + right*: AstNode + of NodeKind.Integer, NodeKind.Float: + # The kind makes us differentiate between + # floats and integers, but for our purposes + # using a double precision float for everything + # is just easier + value*: float64 + NodeVisitor* = ref object + # A node visitor object + + +proc initNodeVisitor*(): NodeVisitor = + ## Initializes a node visitor + new(result) + + +proc `$`*(self: AstNode): string = + ## Stringifies an AST node + case self.kind: + of NodeKind.Grouping: + result = &"Grouping({self.expr})" + of NodeKind.Unary: + result = &"Unary({$self.unOp.kind}, {$self.operand})" + of NodeKind.Binary: + result = &"Binary({$self.left}, {$self.binOp.kind}, {$self.right})" + of NodeKind.Integer: + result = &"Integer({$int(self.value)})" + of NodeKind.Float: + result = &"Float({$self.value})" + + +# Forward declarations +proc visit_literal(self: NodeVisitor, node: AstNode): AstNode +proc visit_unary(self: NodeVisitor, node: AstNode): AstNode +proc visit_binary(self: NodeVisitor, node: AstNode): AstNode +proc visit_grouping(self: NodeVisitor, node: AstNode): AstNode + + +proc accept(self: AstNode, visitor: NodeVisitor): AstNode = + case self.kind: + of NodeKind.Integer, NodeKind.Float: + result = visitor.visit_literal(self) + of NodeKind.Binary: + result = visitor.visit_binary(self) + of NodeKind.Unary: + result = visitor.visit_unary(self) + of NodeKind.Grouping: + result = visitor.visit_grouping(self) + + +proc eval*(self: NodeVisitor, node: AstNode): AstNode = + ## Evaluates an AST node + result = node.accept(self) + + +proc visit_literal(self: NodeVisitor, node: AstNode): AstNode = + ## Visits a literal AST node (such as integers) + result = node # Not that we can do anything else after all, lol + + +template handleBinary(left, right: AstNode, operator: untyped): AstNode = + ## Handy template that avoids us the hassle of copy-pasting + ## the same checks over and over again in the visitor + let r = operator(left.value, right.value) + if float(int(r)) == r: + ## It's a whole number! + AstNode(kind: NodeKind.Integer, value: r) + else: + AstNode(kind: NodeKind.Float, value: r) + + + +template rightOpNonZero(node: AstNode, opType: string) = + ## Handy template to make sure that the given AST node matches + ## a condition from + if node.value == 0.0: + case node.kind: + of NodeKind.Float: + raise newException(MathError, "float " & opType & " by 0") + of NodeKind.Integer: + raise newException(MathError, "integer " & opType & " by 0") + else: + raise newException(CatchableError, &"invalid node kind '{node.kind}' for rightOpNonZero") + + +template ensureIntegers(left, right: AstNode) = + ## Ensures both operands are integers + if left.kind != NodeKind.Integer or right.kind != NodeKind.Integer: + raise newException(MathError, "an integer is required") + + +proc visit_binary(self: NodeVisitor, node: AstNode): AstNode = + ## Visits a binary AST node and evaluates it + let right = self.eval(node.right) + let left = self.eval(node.left) + case node.binOp.kind: + of TokenType.Plus: + result = handleBinary(left, right, `+`) + of TokenType.Minus: + result = handleBinary(left, right, `-`) + of TokenType.Div: + rightOpNonZero(right, "division") + result = handleBinary(left, right, `/`) + of TokenType.Modulo: + # Modulo is a bit special since we must have integers + rightOpNonZero(right, "modulo") + ensureIntegers(left, right) + result = AstNode(kind: NodeKind.Integer, value: float(int(left.value) mod int(right.value))) + of TokenType.Exp: + result = handleBinary(left, right, pow) + of TokenType.Mul: + result = handleBinary(left, right, `*`) + else: + discard # Unreachable + + +proc visit_unary(self: NodeVisitor, node: AstNode): AstNode = + ## Visits unary expressions and evaluates them + let expr = self.eval(node.operand) + case node.unOp.kind: + of TokenType.Minus: + case expr.kind: + of NodeKind.Float: + result = AstNode(kind: NodeKind.Float, value: -expr.value) + of NodeKind.Integer: + result = AstNode(kind: NodeKind.Integer, value: -expr.value) + else: + discard # Unreachable + else: + discard # Unreachable + + +proc visit_grouping(self: NodeVisitor, node: AstNode): AstNode = + ## Visits grouping (i.e. parenthesized) expressions. Parentheses + ## have no other meaning than to allow a lower-precedence expression + ## where a higher-precedence one is expected so that 2 * (3 + 1) is + ## different from 2 * 3 + 1 + return self.eval(node.expr) diff --git a/src/nimkalc/objects/error.nim b/src/nimkalc/objects/error.nim new file mode 100644 index 0000000..f692926 --- /dev/null +++ b/src/nimkalc/objects/error.nim @@ -0,0 +1,20 @@ +# Copyright 2021 Mattia Giambirtone +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + + +type + ParseError* = object of CatchableError + ## A parsing exception + MathError* = object of ArithmeticDefect diff --git a/src/nimkalc/objects/token.nim b/src/nimkalc/objects/token.nim new file mode 100644 index 0000000..7d39e60 --- /dev/null +++ b/src/nimkalc/objects/token.nim @@ -0,0 +1,35 @@ +# Copyright 2021 Mattia Giambirtone +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# A parsing Token +import strformat + +type + TokenType* {.pure.} = enum + # Data types + Int, Float, + # Operators + Plus, Minus, Div, Exp, Modulo, + Mul, RightParen, LeftParen, + # Other + Eof + Token* = object + # A token object + lexeme*: string + kind*: TokenType + + +proc `$`*(self: Token): string = + ## Returns a string representation of self + result = &"Token({self.kind}, '{self.lexeme}')" \ No newline at end of file diff --git a/src/nimkalc/parsing/lexer.nim b/src/nimkalc/parsing/lexer.nim new file mode 100644 index 0000000..1c53000 --- /dev/null +++ b/src/nimkalc/parsing/lexer.nim @@ -0,0 +1,145 @@ +# Copyright 2021 Mattia Giambirtone +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# A simple lexer module + +import strutils +import strformat +import tables + +import ../objects/token +import ../objects/error + + +# Table of all tokens +const tokens = to_table({ + '(': TokenType.LeftParen, ')': TokenType.RightParen, + '-': TokenType.Minus, '+': TokenType.Plus, + '*': TokenType.Mul, '/': TokenType.Div, + '%': TokenType.Modulo, '^': TokenType.Exp}) +# All the identifiers and constants (such as PI) +# Since they're constant we don't even need to bother adding another +# AST node kind, we can just map the name to a float literal ;) +const identifiers = to_table({ + "pi": Token(kind: TokenType.Float, lexeme: "3.141592653589793"), + "e": Token(kind: TokenType.Float, lexeme: "2.718281828459045"), + "tau": Token(kind: TokenType.Float, lexeme: "6.283185307179586") +}) + + +type + Lexer* = ref object + # A lexer object + source*: string + tokens*: seq[Token] + start*: int + current*: int + + +func initLexer*(): Lexer = + ## Initializes the lexer in an empty state + result = Lexer(source: "", tokens: @[], start: 0, current: 0) + + +func done(self: Lexer): bool = + ## Returns true if we reached EOF + result = self.current >= self.source.len + + +proc step(self: Lexer): char = + ## Steps one character forward in the + ## source. A null terminator is returned + ## if the lexer is at EOF + if self.done(): + return '\0' + self.current = self.current + 1 + result = self.source[self.current - 1] + + +proc peek(self: Lexer): char = + ## Returns the current character in the + ## source without consuming it. + ## A null terminator is returned + ## if the lexer is at EOF + if self.done(): + result = '\0' + else: + result = self.source[self.current] + + +func createToken(self: Lexer, tokenType: TokenType): Token = + ## Creates a token object for later use in the parser + result = Token(kind: tokenType, + lexeme: self.source[self.start..= self.tokens.high() + + +proc peek(self: Parser): Token = + ## Peeks into the tokens list or + ## returns an EOF token if we're at + ## the end of the input + if not self.done(): + result = self.tokens[self.current] + else: + result = endOfFile + + +proc step(self: Parser): Token = + ## Consumes a token from the input and + ## steps forward or returns an EOF token + ## if we're at the end of the input + if not self.done(): + result = self.peek() + self.current += 1 + else: + result = endOfFile + + +proc previous(self: Parser): Token = + ## Returns the previously consumed + ## token + result = self.tokens[self.current - 1] + + +proc check(self: Parser, kind: TokenType): bool = + ## Returns true if the current token matches + ## the given type + result = self.peek().kind == kind + + +proc match(self: Parser, kind: TokenType): bool = + ## Checks if the current token matches the + ## given type and consumes it if it does, returns + ## false otherwise. True is returned if the + ## match is successful + if self.check(kind): + discard self.step() + result = true + else: + result = false + + +proc match(self: Parser, kinds: varargs[TokenType]): bool = + ## Checks if the current token matches any of the + ## given type(s) and consumes it if it does, returns + ## false otherwise. True is returned at + ## the first successful match + for kind in kinds: + if self.match(kind): + return true + result = false + + +proc error(self: Parser, message: string) = + ## Raises a parsing error with the given message + raise newException(ParseError, message) + + +proc expect(self: Parser, kind: TokenType, message: string) = + ## Checks if the current token matches the given type + ## and consumes it if it does, raises an error + ## with the given message otherwise. + if not self.match(kind): + self.error(message) + + +proc primary(self: Parser): AstNode = + ## Parses primary expressions + let value = self.previous() + case value.kind: + of TokenType.Int: + result = AstNode(kind: NodeKind.Integer, value: 0.0) + discard parseFloat(value.lexeme, result.value) + of TokenType.Float: + result = AstNode(kind: NodeKind.Float, value: 0.0) + discard parseFloat(value.lexeme, result.value) + of TokenType.LeftParen: + let expression = self.binary() + self.expect(TokenType.RightParen, "unexpected EOL") + result = AstNode(kind: NodeKind.Grouping, expr: expression) + else: + self.error(&"invalid token of kind '{value.kind}' in primary expression") + + +proc unary(self: Parser): AstNode = + ## Parses unary expressions such as -1 + case self.step().kind: + of TokenType.Minus: + result = AstNode(kind: NodeKind.Unary, unOp: self.previous(), operand: self.unary()) + else: + result = self.primary() + + +proc pow(self: Parser): AstNode = + ## Parses exponentiation + result = self.unary() + var operator: Token + while self.match(TokenType.Exp): + operator = self.previous() + result = AstNode(kind: NodeKind.Binary, left: result, right: self.unary(), binOp: operator) + + +proc mul(self: Parser): AstNode = + ## Parses divisions (including modulo) and + ## multiplications + result = self.pow() + var operator: Token + while self.match(TokenType.Div, TokenType.Modulo, TokenType.Mul): + operator = self.previous() + result = AstNode(kind: NodeKind.Binary, left: result, right: self.pow(), binOp: operator) + + +proc addition(self: Parser): AstNode = + ## Parses additions and subtractions + result = self.mul() + var operator: Token + while self.match(TokenType.Plus, TokenType.Minus): + operator = self.previous() + result = AstNode(kind: NodeKind.Binary, left: result, right: self.mul(), binOp: operator) + + +proc binary(self: Parser): AstNode = + ## Parses binary expressions, the highest + ## level of expression + result = self.addition() + + + +proc parse*(self: Parser, tokens: seq[Token]): AstNode = + ## Parses a list of tokens into an AST tree + self.tokens = tokens + result = self.binary() + +