Added a rough functions implementation

2021-03-11 20:02:51 +01:00 · 2021-03-11 20:02:51 +01:00 · 219c5c9ac1
parent cd80d3babf
commit 219c5c9ac1
6 changed files with 177 additions and 74 deletions
--- a/README.md
+++ b/README.md
@ -1,14 +1,27 @@
 # NimKalc - A math parsing library

 NimKalc is a simple implementation of a recursive-descent top-down parser that can evaluate
-mathematical expressions. Notable mentions are support for common mathematical constants (pi, tau, euler's number, etc),
-functions (`sin`, `cos`, `tan`...), equation-solving algos using newton's method and scientific notation numbers (such as `2e5`)
+mathematical expressions.
+
+__Disclaimer__: This library is __in beta__ and is not fully tested yet. It will be soon, though
+
+Features:
+- Support for mathematical constants (`pi`, `tau` and `e` right now)
+- Supported functions:
+  - `sin`
+  - `cos`
+  - `tan`
+  - `sqrt`
+  - `root` (for generic roots, takes the base and the argument)
+  - `log` (logarithm in base `e`)
+  - `logN` (logarithm in a given base, second argument)
+- Parentheses can be used to enforce different precedence levels
+- Easy API for tokenization, parsing and evaluation of AST nodes


 ## Current limitations
- No functions (coming soon)
 - No equation-solving (coming soon)
- The parsing is a bit weird because `2 2` will parse the first 2 and just stop instead of erroring out (FIXME)
+- The parsing is a bit weird because something like `2 2` will parse the first 2 and just stop instead of erroring out (FIXME)


 ## How to use it
@ -18,9 +31,8 @@ NimKalc parses mathematical expressions following this process:
 - Generate an AST
 - Visit the nodes

-Each of these steps can be run separately, but for convenience a wrapper
-`eval` procedure has been defined which takes in a string and returns a
-single AST node containing the result of the given expression.
+Each of these steps can be run separately, but for convenience a wrapper `eval` procedure has been defined which takes in a string 
+and returns a single AST node containing the result of the given expression.

 ## Supported operators

@ -28,30 +40,39 @@ Beyond the classical 4 operators (`+`, `-`, `/` and `*`), NimKalc supports:
 - `%` for modulo division
 - `^` for exponentiation
 - unary `-` for negation
- Arbitrarily nested parentheses (__not__ empty ones!) to enforce precedence
-

 ## Exceptions

-NimKalc defines 2 exceptions:
- `ParseError` is used when the expression is invalid
+NimKalc defines various exceptions:
+- `NimKalcException` is a generic superclass for all errors
+- `ParseError` is used when the expression is syntactically invalid
 - `MathError` is used when there is an arithmetical error such as division by 0 or domain errors (e.g. `log(0)`)
+- `EvaluationError` is used when the runtime evaluation of an expression fails (e.g. trying to call something that isn't a function)

 ## Design

 NimKalc treats all numerical values as `float` to simplify the implementation of the underlying operators. To tell integers
 from floating point numbers the `AstNode` object has a `kind` discriminant which will be equal to `NodeKind.Integer` for ints
-and `NodeKind.Float` for decimals. It is advised that you take this into account when using the library
+and `NodeKind.Float` for decimals. It is advised that you take this into account when using the library, since integers might
+start losing precision when converted from their float counterpart due to the difference of the two types. Everything should
+be fine as long as the value doesn't exceed 2 ^ 53, though


 __Note__: The string representation of integer nodes won't show the decimal part for clarity

+Some other notable design choices (due to the underlying simplicity of the language we parse) are as follows:
+- Identifiers are checked when tokenizing, since they're all constant
+- Mathematical constants are immediately mapped to their real values when tokenizing with no intermediate steps or tokens
+- Type errors (such as trying to call an integer) are detected statically at parse time
+
+
 ## String representations

 All of NimKalc's objects implement the `$` operator and are therefore printable. Integer nodes will look like `Integer(x)`, while
 floats are represented with `Float(x.x)`. Unary operators print as `Unary(operator, right)`, while binary operators print as `Binary(left, operator, right)`.
 Parenthesized expressions print as `Grouping(expr)`, where `expr` is the expression enclosed in parentheses (as an AST node, obviously).
-Token objects will print as `Token(kind, lexeme)`: an example for the number 2 would be `Token(Integer, '2')`
+Token objects will print as `Token(kind, lexeme)`: an example for the number 2 would be `Token(Integer, '2')`. Function calls print like `Call(name, args)`
+where `name` is the function name and `args` is a `seq[AstNode]` representing the function's arguments


 ## Example
@ -115,14 +136,13 @@ when isMainModule:

 ```

-__Note__: If you don't need the intermediate representations shown here (tokens, AST) you can just `import nimkalc` and use
+__Note__: If you don't need the intermediate representations shown here (tokens/AST) you can just `import nimkalc` and use
 the `eval` procedure, which takes in a string and returns the evaluated result as a primary AST node like so:

 ```nim
 import nimkalc

 echo eval("2+2")  # Prints Integer(4)
-
 ```

 ## Installing
--- a/src/nimkalc/objects/ast.nim
+++ b/src/nimkalc/objects/ast.nim
@ -19,13 +19,16 @@ import error
 import strformat
 import tables
 import math
+import strutils


 type
  NodeKind* {.pure.} = enum
+    # An enum for all kinds of AST nodes
    Grouping, Unary, Binary, Integer, 
-    Float
+    Float, Call, Ident
  AstNode* = ref object
+    # An AST node object
    case kind*: NodeKind
      of NodeKind.Grouping:
        expr*: AstNode
@ -42,6 +45,11 @@ type
        # using a double precision float for everything
        # is just easier
        value*: float64
+      of NodeKind.Ident:
+        name*: string
+      of NodeKind.Call:
+          arguments*: seq[AstNode]
+          function*: AstNode
  NodeVisitor* = ref object
    # A node visitor object

@ -64,35 +72,10 @@ proc `$`*(self: AstNode): string =
        result = &"Integer({$int(self.value)})"
      of NodeKind.Float:
        result = &"Float({$self.value})"
-
-
-# Forward declarations
-proc visit_literal(self: NodeVisitor, node: AstNode): AstNode
-proc visit_unary(self: NodeVisitor, node: AstNode): AstNode
-proc visit_binary(self: NodeVisitor, node: AstNode): AstNode
-proc visit_grouping(self: NodeVisitor, node: AstNode): AstNode
-
-
-proc accept(self: AstNode, visitor: NodeVisitor): AstNode = 
-  case self.kind:
-    of NodeKind.Integer, NodeKind.Float:
-      result =  visitor.visit_literal(self)
-    of NodeKind.Binary:
-      result = visitor.visit_binary(self)
-    of NodeKind.Unary:
-      result = visitor.visit_unary(self)
-    of NodeKind.Grouping:
-      result = visitor.visit_grouping(self)
-
-
-proc eval*(self: NodeVisitor, node: AstNode): AstNode = 
-  ## Evaluates an AST node
-  result = node.accept(self)
-
-
-proc visit_literal(self: NodeVisitor, node: AstNode): AstNode =
-  ## Visits a literal AST node (such as integers)
-  result = node   # Not that we can do anything else after all, lol
+      of NodeKind.Call:
+        result = &"Call({self.function.name}, {self.arguments})"
+      of NodeKind.Ident:
+        result = &"Identifier({self.name})"


 template handleBinary(left, right: AstNode, operator: untyped): AstNode = 
@ -106,18 +89,14 @@ template handleBinary(left, right: AstNode, operator: untyped): AstNode =
    AstNode(kind: NodeKind.Float, value: r)


-
-template rightOpNonZero(node: AstNode, opType: string) = 
-  ## Handy template to make sure that the given AST node matches
-  ## a condition from 
+template ensureNonZero(node: AstNode) = 
+  ## Handy template to ensure that a given node's value is not 0
  if node.value == 0.0:
      case node.kind:
-        of NodeKind.Float:
-            raise newException(MathError, "float " & opType & " by 0")
-        of NodeKind.Integer:
-            raise newException(MathError, "integer " & opType & " by 0")
+        of NodeKind.Float, NodeKind.Integer:
+            raise newException(MathError, &"{($node.kind).toLowerAscii()} can't be zero")
        else:
-          raise newException(CatchableError, &"invalid node kind '{node.kind}' for rightOpNonZero")
+          raise newException(CatchableError, &"invalid node kind '{node.kind}' for ensureNonZero")


 template ensureIntegers(left, right: AstNode) = 
@ -126,6 +105,73 @@ template ensureIntegers(left, right: AstNode) =
    raise newException(MathError, "an integer is required")


+# Forward declarations
+proc visit_literal(self: NodeVisitor, node: AstNode): AstNode
+proc visit_unary(self: NodeVisitor, node: AstNode): AstNode
+proc visit_binary(self: NodeVisitor, node: AstNode): AstNode
+proc visit_grouping(self: NodeVisitor, node: AstNode): AstNode
+proc visit_call(self: NodeVisitor, node: AstNode): AstNode
+
+
+proc accept(self: AstNode, visitor: NodeVisitor): AstNode = 
+  ## Implements the accept part of the visitor pattern
+  ## for our AST visitor
+  case self.kind:
+    of NodeKind.Integer, NodeKind.Float, NodeKind.Ident:
+      result =  visitor.visit_literal(self)
+    of NodeKind.Binary:
+      result = visitor.visit_binary(self)
+    of NodeKind.Unary:
+      result = visitor.visit_unary(self)
+    of NodeKind.Grouping:
+      result = visitor.visit_grouping(self)
+    of NodeKind.Call:
+      result = visitor.visit_call(self)
+
+
+proc eval*(self: NodeVisitor, node: AstNode): AstNode = 
+  ## Evaluates an AST node
+  result = node.accept(self)
+
+
+proc visit_literal(self: NodeVisitor, node: AstNode): AstNode =
+  ## Visits a literal AST node (such as integers)
+  result = node   # Not that we can do anything else after all, lol
+
+
+proc visit_call(self: NodeVisitor, node: AstNode): AstNode = 
+  ## Visits function call expressions
+  var args: seq[AstNode] = @[]
+  for arg in node.arguments:
+    args.add(self.eval(arg))
+  if node.function.name == "sin":
+    let r = sin(args[0].value) 
+    if r is float:
+      result = AstNode(kind: NodeKind.Float, value: r)
+    else:
+      result = AstNode(kind: NodeKind.Integer, value: float(r))
+  if node.function.name == "cos":
+    let r = cos(args[0].value) 
+    if r is float:
+      result = AstNode(kind: NodeKind.Float, value: r)
+    else:
+      result = AstNode(kind: NodeKind.Integer, value: float(r))
+  if node.function.name == "tan":
+    let r = tan(args[0].value) 
+    if r is float:
+      result = AstNode(kind: NodeKind.Float, value: r)
+    else:
+      result = AstNode(kind: NodeKind.Integer, value: float(r))
+
+
+proc visit_grouping(self: NodeVisitor, node: AstNode): AstNode = 
+  ## Visits grouping (i.e. parenthesized) expressions. Parentheses
+  ## have no other meaning than to allow a lower-precedence expression
+  ## where a higher-precedence one is expected so that 2 * (3 + 1) is
+  ## different from 2 * 3 + 1
+  return self.eval(node.expr)
+
+
 proc visit_binary(self: NodeVisitor, node: AstNode): AstNode = 
  ## Visits a binary AST node and evaluates it
  let right = self.eval(node.right)
@ -136,11 +182,11 @@ proc visit_binary(self: NodeVisitor, node: AstNode): AstNode =
    of TokenType.Minus:
      result = handleBinary(left, right, `-`)
    of TokenType.Div:
-      rightOpNonZero(right, "division")
+      ensureNonZero(right)
      result = handleBinary(left, right, `/`)
    of TokenType.Modulo:
      # Modulo is a bit special since we must have integers
-      rightOpNonZero(right, "modulo")
+      ensureNonZero(right)
      ensureIntegers(left, right)
      result = AstNode(kind: NodeKind.Integer, value: float(int(left.value) mod int(right.value)))
    of TokenType.Exp:
@ -165,11 +211,3 @@ proc visit_unary(self: NodeVisitor, node: AstNode): AstNode =
          discard  # Unreachable
    else:
      discard  # Unreachable
-
-
-proc visit_grouping(self: NodeVisitor, node: AstNode): AstNode = 
-  ## Visits grouping (i.e. parenthesized) expressions. Parentheses
-  ## have no other meaning than to allow a lower-precedence expression
-  ## where a higher-precedence one is expected so that 2 * (3 + 1) is
-  ## different from 2 * 3 + 1
-  return self.eval(node.expr)
--- a/src/nimkalc/objects/error.nim
+++ b/src/nimkalc/objects/error.nim
@ -15,6 +15,9 @@


 type
-  ParseError* = object of CatchableError
+  NimKalcException* = object of CatchableError
+  ParseError* = object of NimKalcException
    ## A parsing exception
-  MathError* = object of ArithmeticDefect
+  MathError* = object of NimKalcException
+    ## An arithmetic error
+  EvaluationError* = object of NimKalcException
--- a/src/nimkalc/objects/token.nim
+++ b/src/nimkalc/objects/token.nim
@ -22,8 +22,10 @@ type
    # Operators
    Plus, Minus, Div, Exp, Modulo,
    Mul, RightParen, LeftParen,
+    # Identifiers
+    Ident,
    # Other
-    Eof
+    Eof, Comma
  Token* = object
    # A token object
    lexeme*: string
--- a/src/nimkalc/parsing/lexer.nim
+++ b/src/nimkalc/parsing/lexer.nim
@ -27,15 +27,18 @@ const tokens = to_table({
              '(': TokenType.LeftParen, ')': TokenType.RightParen,
              '-': TokenType.Minus, '+': TokenType.Plus,
              '*': TokenType.Mul, '/': TokenType.Div,
-              '%': TokenType.Modulo, '^': TokenType.Exp})
+              '%': TokenType.Modulo, '^': TokenType.Exp,
+              ',': TokenType.Comma})
 # All the identifiers and constants (such as PI)
 # Since they're constant we don't even need to bother adding another
 # AST node kind, we can just map the name to a float literal ;)
-const identifiers = to_table({
+const constants = to_table({
    "pi": Token(kind: TokenType.Float, lexeme: "3.141592653589793"),
    "e": Token(kind: TokenType.Float, lexeme: "2.718281828459045"),
    "tau": Token(kind: TokenType.Float, lexeme: "6.283185307179586")
 })
+# Since also math functions are hardcoded, we can use an array
+const functions = ["sin", "cos", "tan"]


 type
@ -88,6 +91,8 @@ func createToken(self: Lexer, tokenType: TokenType): Token =
 proc parseNumber(self: Lexer) =
    ## Parses numeric literals
    var kind = TokenType.Int
+    var scientific: bool = false
+    var sign: bool = false
    while true:
        if self.peek().isDigit():
            discard self.step()
@ -99,6 +104,11 @@ proc parseNumber(self: Lexer) =
            # Scientific notation
            kind = TokenType.Float
            discard self.step()
+            scientific = true
+        elif self.peek().toLowerAscii() in {'-', '+'} and scientific and not sign:
+            # So we can parse stuff like 2e-5
+            sign = true
+            discard self.step()
        else:
            break
    self.tokens.add(self.createToken(kind))
@ -111,8 +121,10 @@ proc parseIdentifier(self: Lexer) =
    while self.peek().isAlphaNumeric() or self.peek() in {'_', }:
        discard self.step()
    var text: string = self.source[self.start..<self.current]
-    if text.toLowerAscii() in identifiers:
-        self.tokens.add(identifiers[text])
+    if text.toLowerAscii() in constants:
+        self.tokens.add(constants[text])
+    elif text.toLowerAscii() in functions:
+        self.tokens.add(self.createToken(TokenType.Ident))
    else:
        raise newException(ParseError, &"Unknown identifier '{text}'")

@ -138,6 +150,8 @@ proc lex*(self: Lexer, source: string): seq[Token] =
    ## Lexes a source string, converting a stream
    ## of characters into a series of tokens
    self.source = source
+    self.tokens = @[]
+    self.current = 0
    while not self.done():
        self.start = self.current
        self.scanToken()
--- a/src/nimkalc/parsing/parser.nim
+++ b/src/nimkalc/parsing/parser.nim
@ -20,6 +20,7 @@ import ../objects/error

 import parseutils
 import strformat
+import tables


 {.experimental: "implicitDeref".}
@ -31,6 +32,9 @@ type
    current: int


+const arities = to_table({"sin": 1, "cos": 1, "tan": 1})
+
+
 proc initParser*(): Parser = 
  new(result)
  result.current = 0
@ -134,17 +138,39 @@ proc primary(self: Parser): AstNode =
      let expression = self.binary()
      self.expect(TokenType.RightParen, "unexpected EOL")
      result = AstNode(kind: NodeKind.Grouping, expr: expression)
+    of TokenType.Ident:
+      result = AstNode(kind: NodeKind.Ident, name: value.lexeme)
    else:
      self.error(&"invalid token of kind '{value.kind}' in primary expression")


+proc call(self: Parser): AstNode = 
+  ## Parses function calls such as sin(2)
+  var expression = self.primary()
+  if self.match(TokenType.LeftParen):
+    if expression.kind != NodeKind.Ident:
+      self.error(&"object of type '{expression.kind}' is not callable")
+    var arguments: seq[AstNode] = @[]
+    if not self.check(TokenType.RightParen):
+      arguments.add(self.binary())
+      while self.match(TokenType.Comma):
+        arguments.add(self.binary())
+    result = AstNode(kind: NodeKind.Call, arguments: arguments, function: expression)
+    if len(arguments) != arities[expression.name]:
+      self.error(&"Wrong number of arguments supplied to function '{expression.name}': expected {arities[expression.name]}, got {len(arguments)}")
+    self.expect(TokenType.RightParen, "unclosed function call")
+  else:
+    result = expression
+
+
+
 proc unary(self: Parser): AstNode = 
  ## Parses unary expressions such as -1
  case self.step().kind:
    of TokenType.Minus:
      result = AstNode(kind: NodeKind.Unary, unOp: self.previous(), operand: self.unary())
    else:
-      result = self.primary()
+      result = self.call()
  

 proc pow(self: Parser): AstNode = 
@ -181,10 +207,10 @@ proc binary(self: Parser): AstNode =
  result = self.addition()


-
 proc parse*(self: Parser, tokens: seq[Token]): AstNode = 
  ## Parses a list of tokens into an AST tree
  self.tokens = tokens
+  self.current = 0
  result = self.binary()
-  
+