From 219c5c9ac107f96cbb8996438c41d3140c3b47fa Mon Sep 17 00:00:00 2001 From: nocturn9x Date: Thu, 11 Mar 2021 20:02:51 +0100 Subject: [PATCH] Added a rough functions implementation --- README.md | 50 ++++++++---- src/nimkalc/objects/ast.nim | 136 +++++++++++++++++++++------------ src/nimkalc/objects/error.nim | 7 +- src/nimkalc/objects/token.nim | 4 +- src/nimkalc/parsing/lexer.nim | 22 +++++- src/nimkalc/parsing/parser.nim | 32 +++++++- 6 files changed, 177 insertions(+), 74 deletions(-) diff --git a/README.md b/README.md index 2ca1aeb..0dd74d5 100644 --- a/README.md +++ b/README.md @@ -1,14 +1,27 @@ # NimKalc - A math parsing library NimKalc is a simple implementation of a recursive-descent top-down parser that can evaluate -mathematical expressions. Notable mentions are support for common mathematical constants (pi, tau, euler's number, etc), -functions (`sin`, `cos`, `tan`...), equation-solving algos using newton's method and scientific notation numbers (such as `2e5`) +mathematical expressions. + +__Disclaimer__: This library is __in beta__ and is not fully tested yet. It will be soon, though + +Features: +- Support for mathematical constants (`pi`, `tau` and `e` right now) +- Supported functions: + - `sin` + - `cos` + - `tan` + - `sqrt` + - `root` (for generic roots, takes the base and the argument) + - `log` (logarithm in base `e`) + - `logN` (logarithm in a given base, second argument) +- Parentheses can be used to enforce different precedence levels +- Easy API for tokenization, parsing and evaluation of AST nodes ## Current limitations -- No functions (coming soon) - No equation-solving (coming soon) -- The parsing is a bit weird because `2 2` will parse the first 2 and just stop instead of erroring out (FIXME) +- The parsing is a bit weird because something like `2 2` will parse the first 2 and just stop instead of erroring out (FIXME) ## How to use it @@ -18,9 +31,8 @@ NimKalc parses mathematical expressions following this process: - Generate an AST - Visit the nodes -Each of these steps can be run separately, but for convenience a wrapper -`eval` procedure has been defined which takes in a string and returns a -single AST node containing the result of the given expression. +Each of these steps can be run separately, but for convenience a wrapper `eval` procedure has been defined which takes in a string +and returns a single AST node containing the result of the given expression. ## Supported operators @@ -28,30 +40,39 @@ Beyond the classical 4 operators (`+`, `-`, `/` and `*`), NimKalc supports: - `%` for modulo division - `^` for exponentiation - unary `-` for negation -- Arbitrarily nested parentheses (__not__ empty ones!) to enforce precedence - ## Exceptions -NimKalc defines 2 exceptions: -- `ParseError` is used when the expression is invalid +NimKalc defines various exceptions: +- `NimKalcException` is a generic superclass for all errors +- `ParseError` is used when the expression is syntactically invalid - `MathError` is used when there is an arithmetical error such as division by 0 or domain errors (e.g. `log(0)`) +- `EvaluationError` is used when the runtime evaluation of an expression fails (e.g. trying to call something that isn't a function) ## Design NimKalc treats all numerical values as `float` to simplify the implementation of the underlying operators. To tell integers from floating point numbers the `AstNode` object has a `kind` discriminant which will be equal to `NodeKind.Integer` for ints -and `NodeKind.Float` for decimals. It is advised that you take this into account when using the library +and `NodeKind.Float` for decimals. It is advised that you take this into account when using the library, since integers might +start losing precision when converted from their float counterpart due to the difference of the two types. Everything should +be fine as long as the value doesn't exceed 2 ^ 53, though __Note__: The string representation of integer nodes won't show the decimal part for clarity +Some other notable design choices (due to the underlying simplicity of the language we parse) are as follows: +- Identifiers are checked when tokenizing, since they're all constant +- Mathematical constants are immediately mapped to their real values when tokenizing with no intermediate steps or tokens +- Type errors (such as trying to call an integer) are detected statically at parse time + + ## String representations All of NimKalc's objects implement the `$` operator and are therefore printable. Integer nodes will look like `Integer(x)`, while floats are represented with `Float(x.x)`. Unary operators print as `Unary(operator, right)`, while binary operators print as `Binary(left, operator, right)`. Parenthesized expressions print as `Grouping(expr)`, where `expr` is the expression enclosed in parentheses (as an AST node, obviously). -Token objects will print as `Token(kind, lexeme)`: an example for the number 2 would be `Token(Integer, '2')` +Token objects will print as `Token(kind, lexeme)`: an example for the number 2 would be `Token(Integer, '2')`. Function calls print like `Call(name, args)` +where `name` is the function name and `args` is a `seq[AstNode]` representing the function's arguments ## Example @@ -115,14 +136,13 @@ when isMainModule: ``` -__Note__: If you don't need the intermediate representations shown here (tokens, AST) you can just `import nimkalc` and use +__Note__: If you don't need the intermediate representations shown here (tokens/AST) you can just `import nimkalc` and use the `eval` procedure, which takes in a string and returns the evaluated result as a primary AST node like so: ```nim import nimkalc echo eval("2+2") # Prints Integer(4) - ``` ## Installing diff --git a/src/nimkalc/objects/ast.nim b/src/nimkalc/objects/ast.nim index 4a1f2c3..e312026 100644 --- a/src/nimkalc/objects/ast.nim +++ b/src/nimkalc/objects/ast.nim @@ -19,13 +19,16 @@ import error import strformat import tables import math +import strutils type NodeKind* {.pure.} = enum + # An enum for all kinds of AST nodes Grouping, Unary, Binary, Integer, - Float + Float, Call, Ident AstNode* = ref object + # An AST node object case kind*: NodeKind of NodeKind.Grouping: expr*: AstNode @@ -42,6 +45,11 @@ type # using a double precision float for everything # is just easier value*: float64 + of NodeKind.Ident: + name*: string + of NodeKind.Call: + arguments*: seq[AstNode] + function*: AstNode NodeVisitor* = ref object # A node visitor object @@ -64,35 +72,10 @@ proc `$`*(self: AstNode): string = result = &"Integer({$int(self.value)})" of NodeKind.Float: result = &"Float({$self.value})" - - -# Forward declarations -proc visit_literal(self: NodeVisitor, node: AstNode): AstNode -proc visit_unary(self: NodeVisitor, node: AstNode): AstNode -proc visit_binary(self: NodeVisitor, node: AstNode): AstNode -proc visit_grouping(self: NodeVisitor, node: AstNode): AstNode - - -proc accept(self: AstNode, visitor: NodeVisitor): AstNode = - case self.kind: - of NodeKind.Integer, NodeKind.Float: - result = visitor.visit_literal(self) - of NodeKind.Binary: - result = visitor.visit_binary(self) - of NodeKind.Unary: - result = visitor.visit_unary(self) - of NodeKind.Grouping: - result = visitor.visit_grouping(self) - - -proc eval*(self: NodeVisitor, node: AstNode): AstNode = - ## Evaluates an AST node - result = node.accept(self) - - -proc visit_literal(self: NodeVisitor, node: AstNode): AstNode = - ## Visits a literal AST node (such as integers) - result = node # Not that we can do anything else after all, lol + of NodeKind.Call: + result = &"Call({self.function.name}, {self.arguments})" + of NodeKind.Ident: + result = &"Identifier({self.name})" template handleBinary(left, right: AstNode, operator: untyped): AstNode = @@ -106,18 +89,14 @@ template handleBinary(left, right: AstNode, operator: untyped): AstNode = AstNode(kind: NodeKind.Float, value: r) - -template rightOpNonZero(node: AstNode, opType: string) = - ## Handy template to make sure that the given AST node matches - ## a condition from +template ensureNonZero(node: AstNode) = + ## Handy template to ensure that a given node's value is not 0 if node.value == 0.0: case node.kind: - of NodeKind.Float: - raise newException(MathError, "float " & opType & " by 0") - of NodeKind.Integer: - raise newException(MathError, "integer " & opType & " by 0") + of NodeKind.Float, NodeKind.Integer: + raise newException(MathError, &"{($node.kind).toLowerAscii()} can't be zero") else: - raise newException(CatchableError, &"invalid node kind '{node.kind}' for rightOpNonZero") + raise newException(CatchableError, &"invalid node kind '{node.kind}' for ensureNonZero") template ensureIntegers(left, right: AstNode) = @@ -126,6 +105,73 @@ template ensureIntegers(left, right: AstNode) = raise newException(MathError, "an integer is required") +# Forward declarations +proc visit_literal(self: NodeVisitor, node: AstNode): AstNode +proc visit_unary(self: NodeVisitor, node: AstNode): AstNode +proc visit_binary(self: NodeVisitor, node: AstNode): AstNode +proc visit_grouping(self: NodeVisitor, node: AstNode): AstNode +proc visit_call(self: NodeVisitor, node: AstNode): AstNode + + +proc accept(self: AstNode, visitor: NodeVisitor): AstNode = + ## Implements the accept part of the visitor pattern + ## for our AST visitor + case self.kind: + of NodeKind.Integer, NodeKind.Float, NodeKind.Ident: + result = visitor.visit_literal(self) + of NodeKind.Binary: + result = visitor.visit_binary(self) + of NodeKind.Unary: + result = visitor.visit_unary(self) + of NodeKind.Grouping: + result = visitor.visit_grouping(self) + of NodeKind.Call: + result = visitor.visit_call(self) + + +proc eval*(self: NodeVisitor, node: AstNode): AstNode = + ## Evaluates an AST node + result = node.accept(self) + + +proc visit_literal(self: NodeVisitor, node: AstNode): AstNode = + ## Visits a literal AST node (such as integers) + result = node # Not that we can do anything else after all, lol + + +proc visit_call(self: NodeVisitor, node: AstNode): AstNode = + ## Visits function call expressions + var args: seq[AstNode] = @[] + for arg in node.arguments: + args.add(self.eval(arg)) + if node.function.name == "sin": + let r = sin(args[0].value) + if r is float: + result = AstNode(kind: NodeKind.Float, value: r) + else: + result = AstNode(kind: NodeKind.Integer, value: float(r)) + if node.function.name == "cos": + let r = cos(args[0].value) + if r is float: + result = AstNode(kind: NodeKind.Float, value: r) + else: + result = AstNode(kind: NodeKind.Integer, value: float(r)) + if node.function.name == "tan": + let r = tan(args[0].value) + if r is float: + result = AstNode(kind: NodeKind.Float, value: r) + else: + result = AstNode(kind: NodeKind.Integer, value: float(r)) + + +proc visit_grouping(self: NodeVisitor, node: AstNode): AstNode = + ## Visits grouping (i.e. parenthesized) expressions. Parentheses + ## have no other meaning than to allow a lower-precedence expression + ## where a higher-precedence one is expected so that 2 * (3 + 1) is + ## different from 2 * 3 + 1 + return self.eval(node.expr) + + proc visit_binary(self: NodeVisitor, node: AstNode): AstNode = ## Visits a binary AST node and evaluates it let right = self.eval(node.right) @@ -136,11 +182,11 @@ proc visit_binary(self: NodeVisitor, node: AstNode): AstNode = of TokenType.Minus: result = handleBinary(left, right, `-`) of TokenType.Div: - rightOpNonZero(right, "division") + ensureNonZero(right) result = handleBinary(left, right, `/`) of TokenType.Modulo: # Modulo is a bit special since we must have integers - rightOpNonZero(right, "modulo") + ensureNonZero(right) ensureIntegers(left, right) result = AstNode(kind: NodeKind.Integer, value: float(int(left.value) mod int(right.value))) of TokenType.Exp: @@ -165,11 +211,3 @@ proc visit_unary(self: NodeVisitor, node: AstNode): AstNode = discard # Unreachable else: discard # Unreachable - - -proc visit_grouping(self: NodeVisitor, node: AstNode): AstNode = - ## Visits grouping (i.e. parenthesized) expressions. Parentheses - ## have no other meaning than to allow a lower-precedence expression - ## where a higher-precedence one is expected so that 2 * (3 + 1) is - ## different from 2 * 3 + 1 - return self.eval(node.expr) diff --git a/src/nimkalc/objects/error.nim b/src/nimkalc/objects/error.nim index f692926..df19e81 100644 --- a/src/nimkalc/objects/error.nim +++ b/src/nimkalc/objects/error.nim @@ -15,6 +15,9 @@ type - ParseError* = object of CatchableError + NimKalcException* = object of CatchableError + ParseError* = object of NimKalcException ## A parsing exception - MathError* = object of ArithmeticDefect + MathError* = object of NimKalcException + ## An arithmetic error + EvaluationError* = object of NimKalcException diff --git a/src/nimkalc/objects/token.nim b/src/nimkalc/objects/token.nim index 7d39e60..ac52109 100644 --- a/src/nimkalc/objects/token.nim +++ b/src/nimkalc/objects/token.nim @@ -22,8 +22,10 @@ type # Operators Plus, Minus, Div, Exp, Modulo, Mul, RightParen, LeftParen, + # Identifiers + Ident, # Other - Eof + Eof, Comma Token* = object # A token object lexeme*: string diff --git a/src/nimkalc/parsing/lexer.nim b/src/nimkalc/parsing/lexer.nim index 1c53000..19b117e 100644 --- a/src/nimkalc/parsing/lexer.nim +++ b/src/nimkalc/parsing/lexer.nim @@ -27,15 +27,18 @@ const tokens = to_table({ '(': TokenType.LeftParen, ')': TokenType.RightParen, '-': TokenType.Minus, '+': TokenType.Plus, '*': TokenType.Mul, '/': TokenType.Div, - '%': TokenType.Modulo, '^': TokenType.Exp}) + '%': TokenType.Modulo, '^': TokenType.Exp, + ',': TokenType.Comma}) # All the identifiers and constants (such as PI) # Since they're constant we don't even need to bother adding another # AST node kind, we can just map the name to a float literal ;) -const identifiers = to_table({ +const constants = to_table({ "pi": Token(kind: TokenType.Float, lexeme: "3.141592653589793"), "e": Token(kind: TokenType.Float, lexeme: "2.718281828459045"), "tau": Token(kind: TokenType.Float, lexeme: "6.283185307179586") }) +# Since also math functions are hardcoded, we can use an array +const functions = ["sin", "cos", "tan"] type @@ -88,6 +91,8 @@ func createToken(self: Lexer, tokenType: TokenType): Token = proc parseNumber(self: Lexer) = ## Parses numeric literals var kind = TokenType.Int + var scientific: bool = false + var sign: bool = false while true: if self.peek().isDigit(): discard self.step() @@ -99,6 +104,11 @@ proc parseNumber(self: Lexer) = # Scientific notation kind = TokenType.Float discard self.step() + scientific = true + elif self.peek().toLowerAscii() in {'-', '+'} and scientific and not sign: + # So we can parse stuff like 2e-5 + sign = true + discard self.step() else: break self.tokens.add(self.createToken(kind)) @@ -111,8 +121,10 @@ proc parseIdentifier(self: Lexer) = while self.peek().isAlphaNumeric() or self.peek() in {'_', }: discard self.step() var text: string = self.source[self.start..