From 219c5c9ac107f96cbb8996438c41d3140c3b47fa Mon Sep 17 00:00:00 2001
From: nocturn9x <hackhab@gmail.com>
Date: Thu, 11 Mar 2021 20:02:51 +0100
Subject: [PATCH] Added a rough functions implementation

---
 README.md                      |  50 ++++++++----
 src/nimkalc/objects/ast.nim    | 136 +++++++++++++++++++++------------
 src/nimkalc/objects/error.nim  |   7 +-
 src/nimkalc/objects/token.nim  |   4 +-
 src/nimkalc/parsing/lexer.nim  |  22 +++++-
 src/nimkalc/parsing/parser.nim |  32 +++++++-
 6 files changed, 177 insertions(+), 74 deletions(-)

diff --git a/README.md b/README.md
index 2ca1aeb..0dd74d5 100644
--- a/README.md
+++ b/README.md
@@ -1,14 +1,27 @@
 # NimKalc - A math parsing library
 
 NimKalc is a simple implementation of a recursive-descent top-down parser that can evaluate
-mathematical expressions. Notable mentions are support for common mathematical constants (pi, tau, euler's number, etc),
-functions (`sin`, `cos`, `tan`...), equation-solving algos using newton's method and scientific notation numbers (such as `2e5`)
+mathematical expressions.
+
+__Disclaimer__: This library is __in beta__ and is not fully tested yet. It will be soon, though
+
+Features:
+- Support for mathematical constants (`pi`, `tau` and `e` right now)
+- Supported functions:
+  - `sin`
+  - `cos`
+  - `tan`
+  - `sqrt`
+  - `root` (for generic roots, takes the base and the argument)
+  - `log` (logarithm in base `e`)
+  - `logN` (logarithm in a given base, second argument)
+- Parentheses can be used to enforce different precedence levels
+- Easy API for tokenization, parsing and evaluation of AST nodes
 
 
 ## Current limitations
-- No functions (coming soon)
 - No equation-solving (coming soon)
-- The parsing is a bit weird because `2 2` will parse the first 2 and just stop instead of erroring out (FIXME)
+- The parsing is a bit weird because something like `2 2` will parse the first 2 and just stop instead of erroring out (FIXME)
 
 
 ## How to use it
@@ -18,9 +31,8 @@ NimKalc parses mathematical expressions following this process:
 - Generate an AST
 - Visit the nodes
 
-Each of these steps can be run separately, but for convenience a wrapper
-`eval` procedure has been defined which takes in a string and returns a
-single AST node containing the result of the given expression.
+Each of these steps can be run separately, but for convenience a wrapper `eval` procedure has been defined which takes in a string 
+and returns a single AST node containing the result of the given expression.
 
 ## Supported operators
 
@@ -28,30 +40,39 @@ Beyond the classical 4 operators (`+`, `-`, `/` and `*`), NimKalc supports:
 - `%` for modulo division
 - `^` for exponentiation
 - unary `-` for negation
-- Arbitrarily nested parentheses (__not__ empty ones!) to enforce precedence
-
 
 ## Exceptions
 
-NimKalc defines 2 exceptions:
-- `ParseError` is used when the expression is invalid
+NimKalc defines various exceptions:
+- `NimKalcException` is a generic superclass for all errors
+- `ParseError` is used when the expression is syntactically invalid
 - `MathError` is used when there is an arithmetical error such as division by 0 or domain errors (e.g. `log(0)`)
+- `EvaluationError` is used when the runtime evaluation of an expression fails (e.g. trying to call something that isn't a function)
 
 ## Design
 
 NimKalc treats all numerical values as `float` to simplify the implementation of the underlying operators. To tell integers
 from floating point numbers the `AstNode` object has a `kind` discriminant which will be equal to `NodeKind.Integer` for ints
-and `NodeKind.Float` for decimals. It is advised that you take this into account when using the library
+and `NodeKind.Float` for decimals. It is advised that you take this into account when using the library, since integers might
+start losing precision when converted from their float counterpart due to the difference of the two types. Everything should
+be fine as long as the value doesn't exceed 2 ^ 53, though
 
 
 __Note__: The string representation of integer nodes won't show the decimal part for clarity
 
+Some other notable design choices (due to the underlying simplicity of the language we parse) are as follows:
+- Identifiers are checked when tokenizing, since they're all constant
+- Mathematical constants are immediately mapped to their real values when tokenizing with no intermediate steps or tokens
+- Type errors (such as trying to call an integer) are detected statically at parse time
+
+
 ## String representations
 
 All of NimKalc's objects implement the `$` operator and are therefore printable. Integer nodes will look like `Integer(x)`, while
 floats are represented with `Float(x.x)`. Unary operators print as `Unary(operator, right)`, while binary operators print as `Binary(left, operator, right)`.
 Parenthesized expressions print as `Grouping(expr)`, where `expr` is the expression enclosed in parentheses (as an AST node, obviously).
-Token objects will print as `Token(kind, lexeme)`: an example for the number 2 would be `Token(Integer, '2')`
+Token objects will print as `Token(kind, lexeme)`: an example for the number 2 would be `Token(Integer, '2')`. Function calls print like `Call(name, args)`
+where `name` is the function name and `args` is a `seq[AstNode]` representing the function's arguments
 
 
 ## Example
@@ -115,14 +136,13 @@ when isMainModule:
 
 ```
 
-__Note__: If you don't need the intermediate representations shown here (tokens, AST) you can just `import nimkalc` and use
+__Note__: If you don't need the intermediate representations shown here (tokens/AST) you can just `import nimkalc` and use
 the `eval` procedure, which takes in a string and returns the evaluated result as a primary AST node like so:
 
 ```nim
 import nimkalc
 
 echo eval("2+2")  # Prints Integer(4)
-
 ```
 
 ## Installing
diff --git a/src/nimkalc/objects/ast.nim b/src/nimkalc/objects/ast.nim
index 4a1f2c3..e312026 100644
--- a/src/nimkalc/objects/ast.nim
+++ b/src/nimkalc/objects/ast.nim
@@ -19,13 +19,16 @@ import error
 import strformat
 import tables
 import math
+import strutils
 
 
 type
   NodeKind* {.pure.} = enum
+    # An enum for all kinds of AST nodes
     Grouping, Unary, Binary, Integer, 
-    Float
+    Float, Call, Ident
   AstNode* = ref object
+    # An AST node object
     case kind*: NodeKind
       of NodeKind.Grouping:
         expr*: AstNode
@@ -42,6 +45,11 @@ type
         # using a double precision float for everything
         # is just easier
         value*: float64
+      of NodeKind.Ident:
+        name*: string
+      of NodeKind.Call:
+          arguments*: seq[AstNode]
+          function*: AstNode
   NodeVisitor* = ref object
     # A node visitor object
 
@@ -64,35 +72,10 @@ proc `$`*(self: AstNode): string =
         result = &"Integer({$int(self.value)})"
       of NodeKind.Float:
         result = &"Float({$self.value})"
-
-
-# Forward declarations
-proc visit_literal(self: NodeVisitor, node: AstNode): AstNode
-proc visit_unary(self: NodeVisitor, node: AstNode): AstNode
-proc visit_binary(self: NodeVisitor, node: AstNode): AstNode
-proc visit_grouping(self: NodeVisitor, node: AstNode): AstNode
-
-
-proc accept(self: AstNode, visitor: NodeVisitor): AstNode = 
-  case self.kind:
-    of NodeKind.Integer, NodeKind.Float:
-      result =  visitor.visit_literal(self)
-    of NodeKind.Binary:
-      result = visitor.visit_binary(self)
-    of NodeKind.Unary:
-      result = visitor.visit_unary(self)
-    of NodeKind.Grouping:
-      result = visitor.visit_grouping(self)
-
-
-proc eval*(self: NodeVisitor, node: AstNode): AstNode = 
-  ## Evaluates an AST node
-  result = node.accept(self)
-
-
-proc visit_literal(self: NodeVisitor, node: AstNode): AstNode =
-  ## Visits a literal AST node (such as integers)
-  result = node   # Not that we can do anything else after all, lol
+      of NodeKind.Call:
+        result = &"Call({self.function.name}, {self.arguments})"
+      of NodeKind.Ident:
+        result = &"Identifier({self.name})"
 
 
 template handleBinary(left, right: AstNode, operator: untyped): AstNode = 
@@ -106,18 +89,14 @@ template handleBinary(left, right: AstNode, operator: untyped): AstNode =
     AstNode(kind: NodeKind.Float, value: r)
 
 
-
-template rightOpNonZero(node: AstNode, opType: string) = 
-  ## Handy template to make sure that the given AST node matches
-  ## a condition from 
+template ensureNonZero(node: AstNode) = 
+  ## Handy template to ensure that a given node's value is not 0
   if node.value == 0.0:
       case node.kind:
-        of NodeKind.Float:
-            raise newException(MathError, "float " & opType & " by 0")
-        of NodeKind.Integer:
-            raise newException(MathError, "integer " & opType & " by 0")
+        of NodeKind.Float, NodeKind.Integer:
+            raise newException(MathError, &"{($node.kind).toLowerAscii()} can't be zero")
         else:
-          raise newException(CatchableError, &"invalid node kind '{node.kind}' for rightOpNonZero")
+          raise newException(CatchableError, &"invalid node kind '{node.kind}' for ensureNonZero")
 
 
 template ensureIntegers(left, right: AstNode) = 
@@ -126,6 +105,73 @@ template ensureIntegers(left, right: AstNode) =
     raise newException(MathError, "an integer is required")
 
 
+# Forward declarations
+proc visit_literal(self: NodeVisitor, node: AstNode): AstNode
+proc visit_unary(self: NodeVisitor, node: AstNode): AstNode
+proc visit_binary(self: NodeVisitor, node: AstNode): AstNode
+proc visit_grouping(self: NodeVisitor, node: AstNode): AstNode
+proc visit_call(self: NodeVisitor, node: AstNode): AstNode
+
+
+proc accept(self: AstNode, visitor: NodeVisitor): AstNode = 
+  ## Implements the accept part of the visitor pattern
+  ## for our AST visitor
+  case self.kind:
+    of NodeKind.Integer, NodeKind.Float, NodeKind.Ident:
+      result =  visitor.visit_literal(self)
+    of NodeKind.Binary:
+      result = visitor.visit_binary(self)
+    of NodeKind.Unary:
+      result = visitor.visit_unary(self)
+    of NodeKind.Grouping:
+      result = visitor.visit_grouping(self)
+    of NodeKind.Call:
+      result = visitor.visit_call(self)
+
+
+proc eval*(self: NodeVisitor, node: AstNode): AstNode = 
+  ## Evaluates an AST node
+  result = node.accept(self)
+
+
+proc visit_literal(self: NodeVisitor, node: AstNode): AstNode =
+  ## Visits a literal AST node (such as integers)
+  result = node   # Not that we can do anything else after all, lol
+
+
+proc visit_call(self: NodeVisitor, node: AstNode): AstNode = 
+  ## Visits function call expressions
+  var args: seq[AstNode] = @[]
+  for arg in node.arguments:
+    args.add(self.eval(arg))
+  if node.function.name == "sin":
+    let r = sin(args[0].value) 
+    if r is float:
+      result = AstNode(kind: NodeKind.Float, value: r)
+    else:
+      result = AstNode(kind: NodeKind.Integer, value: float(r))
+  if node.function.name == "cos":
+    let r = cos(args[0].value) 
+    if r is float:
+      result = AstNode(kind: NodeKind.Float, value: r)
+    else:
+      result = AstNode(kind: NodeKind.Integer, value: float(r))
+  if node.function.name == "tan":
+    let r = tan(args[0].value) 
+    if r is float:
+      result = AstNode(kind: NodeKind.Float, value: r)
+    else:
+      result = AstNode(kind: NodeKind.Integer, value: float(r))
+
+
+proc visit_grouping(self: NodeVisitor, node: AstNode): AstNode = 
+  ## Visits grouping (i.e. parenthesized) expressions. Parentheses
+  ## have no other meaning than to allow a lower-precedence expression
+  ## where a higher-precedence one is expected so that 2 * (3 + 1) is
+  ## different from 2 * 3 + 1
+  return self.eval(node.expr)
+
+
 proc visit_binary(self: NodeVisitor, node: AstNode): AstNode = 
   ## Visits a binary AST node and evaluates it
   let right = self.eval(node.right)
@@ -136,11 +182,11 @@ proc visit_binary(self: NodeVisitor, node: AstNode): AstNode =
     of TokenType.Minus:
       result = handleBinary(left, right, `-`)
     of TokenType.Div:
-      rightOpNonZero(right, "division")
+      ensureNonZero(right)
       result = handleBinary(left, right, `/`)
     of TokenType.Modulo:
       # Modulo is a bit special since we must have integers
-      rightOpNonZero(right, "modulo")
+      ensureNonZero(right)
       ensureIntegers(left, right)
       result = AstNode(kind: NodeKind.Integer, value: float(int(left.value) mod int(right.value)))
     of TokenType.Exp:
@@ -165,11 +211,3 @@ proc visit_unary(self: NodeVisitor, node: AstNode): AstNode =
           discard  # Unreachable
     else:
       discard  # Unreachable
-
-
-proc visit_grouping(self: NodeVisitor, node: AstNode): AstNode = 
-  ## Visits grouping (i.e. parenthesized) expressions. Parentheses
-  ## have no other meaning than to allow a lower-precedence expression
-  ## where a higher-precedence one is expected so that 2 * (3 + 1) is
-  ## different from 2 * 3 + 1
-  return self.eval(node.expr)
diff --git a/src/nimkalc/objects/error.nim b/src/nimkalc/objects/error.nim
index f692926..df19e81 100644
--- a/src/nimkalc/objects/error.nim
+++ b/src/nimkalc/objects/error.nim
@@ -15,6 +15,9 @@
 
 
 type
-  ParseError* = object of CatchableError
+  NimKalcException* = object of CatchableError
+  ParseError* = object of NimKalcException
     ## A parsing exception
-  MathError* = object of ArithmeticDefect
+  MathError* = object of NimKalcException
+    ## An arithmetic error
+  EvaluationError* = object of NimKalcException
diff --git a/src/nimkalc/objects/token.nim b/src/nimkalc/objects/token.nim
index 7d39e60..ac52109 100644
--- a/src/nimkalc/objects/token.nim
+++ b/src/nimkalc/objects/token.nim
@@ -22,8 +22,10 @@ type
     # Operators
     Plus, Minus, Div, Exp, Modulo,
     Mul, RightParen, LeftParen,
+    # Identifiers
+    Ident,
     # Other
-    Eof
+    Eof, Comma
   Token* = object
     # A token object
     lexeme*: string
diff --git a/src/nimkalc/parsing/lexer.nim b/src/nimkalc/parsing/lexer.nim
index 1c53000..19b117e 100644
--- a/src/nimkalc/parsing/lexer.nim
+++ b/src/nimkalc/parsing/lexer.nim
@@ -27,15 +27,18 @@ const tokens = to_table({
               '(': TokenType.LeftParen, ')': TokenType.RightParen,
               '-': TokenType.Minus, '+': TokenType.Plus,
               '*': TokenType.Mul, '/': TokenType.Div,
-              '%': TokenType.Modulo, '^': TokenType.Exp})
+              '%': TokenType.Modulo, '^': TokenType.Exp,
+              ',': TokenType.Comma})
 # All the identifiers and constants (such as PI)
 # Since they're constant we don't even need to bother adding another
 # AST node kind, we can just map the name to a float literal ;)
-const identifiers = to_table({
+const constants = to_table({
     "pi": Token(kind: TokenType.Float, lexeme: "3.141592653589793"),
     "e": Token(kind: TokenType.Float, lexeme: "2.718281828459045"),
     "tau": Token(kind: TokenType.Float, lexeme: "6.283185307179586")
 })
+# Since also math functions are hardcoded, we can use an array
+const functions = ["sin", "cos", "tan"]
 
 
 type
@@ -88,6 +91,8 @@ func createToken(self: Lexer, tokenType: TokenType): Token =
 proc parseNumber(self: Lexer) =
     ## Parses numeric literals
     var kind = TokenType.Int
+    var scientific: bool = false
+    var sign: bool = false
     while true:
         if self.peek().isDigit():
             discard self.step()
@@ -99,6 +104,11 @@ proc parseNumber(self: Lexer) =
             # Scientific notation
             kind = TokenType.Float
             discard self.step()
+            scientific = true
+        elif self.peek().toLowerAscii() in {'-', '+'} and scientific and not sign:
+            # So we can parse stuff like 2e-5
+            sign = true
+            discard self.step()
         else:
             break
     self.tokens.add(self.createToken(kind))
@@ -111,8 +121,10 @@ proc parseIdentifier(self: Lexer) =
     while self.peek().isAlphaNumeric() or self.peek() in {'_', }:
         discard self.step()
     var text: string = self.source[self.start..<self.current]
-    if text.toLowerAscii() in identifiers:
-        self.tokens.add(identifiers[text])
+    if text.toLowerAscii() in constants:
+        self.tokens.add(constants[text])
+    elif text.toLowerAscii() in functions:
+        self.tokens.add(self.createToken(TokenType.Ident))
     else:
         raise newException(ParseError, &"Unknown identifier '{text}'")
 
@@ -138,6 +150,8 @@ proc lex*(self: Lexer, source: string): seq[Token] =
     ## Lexes a source string, converting a stream
     ## of characters into a series of tokens
     self.source = source
+    self.tokens = @[]
+    self.current = 0
     while not self.done():
         self.start = self.current
         self.scanToken()
diff --git a/src/nimkalc/parsing/parser.nim b/src/nimkalc/parsing/parser.nim
index f9c6f18..04f8779 100644
--- a/src/nimkalc/parsing/parser.nim
+++ b/src/nimkalc/parsing/parser.nim
@@ -20,6 +20,7 @@ import ../objects/error
 
 import parseutils
 import strformat
+import tables
 
 
 {.experimental: "implicitDeref".}
@@ -31,6 +32,9 @@ type
     current: int
 
 
+const arities = to_table({"sin": 1, "cos": 1, "tan": 1})
+
+
 proc initParser*(): Parser = 
   new(result)
   result.current = 0
@@ -134,17 +138,39 @@ proc primary(self: Parser): AstNode =
       let expression = self.binary()
       self.expect(TokenType.RightParen, "unexpected EOL")
       result = AstNode(kind: NodeKind.Grouping, expr: expression)
+    of TokenType.Ident:
+      result = AstNode(kind: NodeKind.Ident, name: value.lexeme)
     else:
       self.error(&"invalid token of kind '{value.kind}' in primary expression")
 
 
+proc call(self: Parser): AstNode = 
+  ## Parses function calls such as sin(2)
+  var expression = self.primary()
+  if self.match(TokenType.LeftParen):
+    if expression.kind != NodeKind.Ident:
+      self.error(&"object of type '{expression.kind}' is not callable")
+    var arguments: seq[AstNode] = @[]
+    if not self.check(TokenType.RightParen):
+      arguments.add(self.binary())
+      while self.match(TokenType.Comma):
+        arguments.add(self.binary())
+    result = AstNode(kind: NodeKind.Call, arguments: arguments, function: expression)
+    if len(arguments) != arities[expression.name]:
+      self.error(&"Wrong number of arguments supplied to function '{expression.name}': expected {arities[expression.name]}, got {len(arguments)}")
+    self.expect(TokenType.RightParen, "unclosed function call")
+  else:
+    result = expression
+
+
+
 proc unary(self: Parser): AstNode = 
   ## Parses unary expressions such as -1
   case self.step().kind:
     of TokenType.Minus:
       result = AstNode(kind: NodeKind.Unary, unOp: self.previous(), operand: self.unary())
     else:
-      result = self.primary()
+      result = self.call()
   
 
 proc pow(self: Parser): AstNode = 
@@ -181,10 +207,10 @@ proc binary(self: Parser): AstNode =
   result = self.addition()
 
 
-
 proc parse*(self: Parser, tokens: seq[Token]): AstNode = 
   ## Parses a list of tokens into an AST tree
   self.tokens = tokens
+  self.current = 0
   result = self.binary()
-  
+