Version 0.1

This commit is contained in:
nocturn9x 2021-03-11 11:12:49 +01:00
parent 03d43aa829
commit f0394545eb
9 changed files with 810 additions and 0 deletions

136
README.md Normal file
View File

@ -0,0 +1,136 @@
# NimKalc - A math parsing library
NimKalc is a simple implementation of a recursive-descent top-down parser that can evaluate
mathematical expressions. Notable mentions are support for common mathematical constants (pi, tau, euler's number, etc),
functions (`sin`, `cos`, `tan`...), equation-solving algos using newton's method and scientific notation numbers (such as `2e5`)
## Current limitations
- No functions (coming soon)
- No equation-solving (coming soon)
- The parsing is a bit weird because `2 2` will parse the first 2 and just stop instead of erroring out (FIXME)
## How to use it
NimKalc parses mathematical expressions following this process:
- Tokenize the input
- Generate an AST
- Visit the nodes
Each of these steps can be run separately, but for convenience a wrapper
`eval` procedure has been defined which takes in a string and returns a
single AST node containing the result of the given expression.
## Supported operators
Beyond the classical 4 operators (`+`, `-`, `/` and `*`), NimKalc supports:
- `%` for modulo division
- `^` for exponentiation
- unary `-` for negation
- Arbitrarily nested parentheses (__not__ empty ones!) to enforce precedence
## Exceptions
NimKalc defines 2 exceptions:
- `ParseError` is used when the expression is invalid
- `MathError` is used when there is an arithmetical error such as division by 0 or domain errors (e.g. `log(0)`)
## Design
NimKalc treats all numerical values as `float` to simplify the implementation of the underlying operators. To tell integers
from floating point numbers the `AstNode` object has a `kind` discriminant which will be equal to `NodeKind.Integer` for ints
and `NodeKind.Float` for decimals. It is advised that you take this into account when using the library
__Note__: The string representation of integer nodes won't show the decimal part for clarity
## String representations
All of NimKalc's objects implement the `$` operator and are therefore printable. Integer nodes will look like `Integer(x)`, while
floats are represented with `Float(x.x)`. Unary operators print as `Unary(operator, right)`, while binary operators print as `Binary(left, operator, right)`.
Parenthesized expressions print as `Grouping(expr)`, where `expr` is the expression enclosed in parentheses (as an AST node, obviously).
Token objects will print as `Token(kind, lexeme)`: an example for the number 2 would be `Token(Integer, '2')`
## Example
Here is an example of a REPL using all of NimKalc's functionality to evaluate expressions from stdin (can be found at `examples/repl.nim`)
```nim
import nimkalc/objects/ast
import nimkalc/objects/token
import nimkalc/parsing/parser
import nimkalc/parsing/lexer
import nimkalc/objects/error
import strformat
import strutils
proc repl() =
## A simple REPL to demonstrate NimKalc's functionality
var line: string
var result: AstNode
var tokens: seq[Token]
let lexerObj = initLexer()
let parserObj = initParser()
let visitor = initNodeVisitor()
echo "Welcome to the NimKalc REPL, type a math expression and press enter"
while true:
try:
stdout.write("=> ")
line = stdin.readLine()
echo &"Parsing and evaluation of {line} below:"
tokens = lexerObj.lex(line)
# No-one cares about the EOF token after all
echo &"Tokenization of {line}: {tokens[0..^2].join(\", \")}"
result = parserObj.parse(tokens)
echo &"AST for {line}: {result}"
result = visitor.eval(result)
case result.kind:
# The result is an AstNode object, specifically
# either a node of type NodeKind.Float or a NodeKind.Integer
of NodeKind.Float:
echo &"Value of {line}: {result.value}"
of NodeKind.Integer:
echo &"Value of {line}: {int(result.value)}"
else:
discard # Unreachable
except IOError:
echo "\nGoodbye."
break
except ParseError:
echo &"A parsing error occurred: {getCurrentExceptionMsg()}"
except MathError:
echo &"An arithmetic error occurred: {getCurrentExceptionMsg()}"
except OverflowDefect:
echo &"Value overflow/underflow detected: {getCurrentExceptionMsg()}"
when isMainModule:
repl()
```
__Note__: If you don't need the intermediate representations shown here (tokens, AST) you can just `import nimkalc` and use
the `eval` procedure, which takes in a string and returns the evaluated result as a primary AST node like so:
```nim
import nimkalc
echo eval("2+2") # Prints Integer(4)
```
## Installing
You can clone this repository and then install the package via nimble:
- `git clone https://github.com/nocturn9x/nimkalc`
- `cd nimkalc`
- `nimble install`
__Note__: Nim 1.2.0 or higher is required to build NimKalc! Other versions are likely work if they're not too old, but they have not been tested

69
examples/repl.nim Normal file
View File

@ -0,0 +1,69 @@
# Copyright 2021 Mattia Giambirtone
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# A simple library to parse and evaluate mathematical expressions
import nimkalc/objects/ast
import nimkalc/objects/token
import nimkalc/parsing/parser
import nimkalc/parsing/lexer
import nimkalc/objects/error
import strformat
import strutils
proc repl() =
## A simple REPL to demonstrate NimKalc's functionality
var line: string
var result: AstNode
var tokens: seq[Token]
let lexerObj = initLexer()
let parserObj = initParser()
let visitor = initNodeVisitor()
echo "Welcome to the NimKalc REPL, type a math expression and press enter"
while true:
try:
stdout.write("=> ")
line = stdin.readLine()
echo &"Parsing and evaluation of {line} below:"
tokens = lexerObj.lex(line)
# No-one cares about the EOF token after all
echo &"Tokenization of {line}: {tokens[0..^2].join(\", \")}"
result = parserObj.parse(tokens)
echo &"AST for {line}: {result}"
result = visitor.eval(result)
case result.kind:
# The result is an AstNode object, specifically
# either a node of type NodeKind.Float or a NodeKind.Integer
of NodeKind.Float:
echo &"Value of {line}: {result.value}"
of NodeKind.Integer:
echo &"Value of {line}: {int(result.value)}"
else:
discard # Unreachable
except IOError:
echo "\nGoodbye."
break
except ParseError:
echo &"A parsing error occurred: {getCurrentExceptionMsg()}"
except MathError:
echo &"An arithmetic error occurred: {getCurrentExceptionMsg()}"
except OverflowDefect:
echo &"Value overflow/underflow detected: {getCurrentExceptionMsg()}"
when isMainModule:
repl()

11
nimkalc.nimble Normal file
View File

@ -0,0 +1,11 @@
# Package
version = "0.1"
author = "Mattia Giambirtone"
description = "An advanced parsing library for mathematical expressions and equations"
license = "Apache 2.0"
srcDir = "src"
# Deps
requires "nim >= 1.2.0"

29
src/nimkalc.nim Normal file
View File

@ -0,0 +1,29 @@
# Copyright 2021 Mattia Giambirtone
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##
import nimkalc/parsing/parser
import nimkalc/objects/ast
import nimkalc/parsing/lexer
proc eval*(source: string): AstNode =
## Evaluates a mathematical expression as a string
## and returns a leaf node representing the result
let l = initLexer()
let p = initParser()
let v = initNodeVisitor()
result = v.eval(p.parse(l.lex(source)))

175
src/nimkalc/objects/ast.nim Normal file
View File

@ -0,0 +1,175 @@
# Copyright 2021 Mattia Giambirtone
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# An Abstract Syntax Tree and node visitor implementation
import token
import error
import strformat
import tables
import math
type
NodeKind* {.pure.} = enum
Grouping, Unary, Binary, Integer,
Float
AstNode* = ref object
case kind*: NodeKind
of NodeKind.Grouping:
expr*: AstNode
of NodeKind.Unary:
unOp*: Token
operand*: AstNode
of NodeKind.Binary:
binOp*: Token
left*: AstNode
right*: AstNode
of NodeKind.Integer, NodeKind.Float:
# The kind makes us differentiate between
# floats and integers, but for our purposes
# using a double precision float for everything
# is just easier
value*: float64
NodeVisitor* = ref object
# A node visitor object
proc initNodeVisitor*(): NodeVisitor =
## Initializes a node visitor
new(result)
proc `$`*(self: AstNode): string =
## Stringifies an AST node
case self.kind:
of NodeKind.Grouping:
result = &"Grouping({self.expr})"
of NodeKind.Unary:
result = &"Unary({$self.unOp.kind}, {$self.operand})"
of NodeKind.Binary:
result = &"Binary({$self.left}, {$self.binOp.kind}, {$self.right})"
of NodeKind.Integer:
result = &"Integer({$int(self.value)})"
of NodeKind.Float:
result = &"Float({$self.value})"
# Forward declarations
proc visit_literal(self: NodeVisitor, node: AstNode): AstNode
proc visit_unary(self: NodeVisitor, node: AstNode): AstNode
proc visit_binary(self: NodeVisitor, node: AstNode): AstNode
proc visit_grouping(self: NodeVisitor, node: AstNode): AstNode
proc accept(self: AstNode, visitor: NodeVisitor): AstNode =
case self.kind:
of NodeKind.Integer, NodeKind.Float:
result = visitor.visit_literal(self)
of NodeKind.Binary:
result = visitor.visit_binary(self)
of NodeKind.Unary:
result = visitor.visit_unary(self)
of NodeKind.Grouping:
result = visitor.visit_grouping(self)
proc eval*(self: NodeVisitor, node: AstNode): AstNode =
## Evaluates an AST node
result = node.accept(self)
proc visit_literal(self: NodeVisitor, node: AstNode): AstNode =
## Visits a literal AST node (such as integers)
result = node # Not that we can do anything else after all, lol
template handleBinary(left, right: AstNode, operator: untyped): AstNode =
## Handy template that avoids us the hassle of copy-pasting
## the same checks over and over again in the visitor
let r = operator(left.value, right.value)
if float(int(r)) == r:
## It's a whole number!
AstNode(kind: NodeKind.Integer, value: r)
else:
AstNode(kind: NodeKind.Float, value: r)
template rightOpNonZero(node: AstNode, opType: string) =
## Handy template to make sure that the given AST node matches
## a condition from
if node.value == 0.0:
case node.kind:
of NodeKind.Float:
raise newException(MathError, "float " & opType & " by 0")
of NodeKind.Integer:
raise newException(MathError, "integer " & opType & " by 0")
else:
raise newException(CatchableError, &"invalid node kind '{node.kind}' for rightOpNonZero")
template ensureIntegers(left, right: AstNode) =
## Ensures both operands are integers
if left.kind != NodeKind.Integer or right.kind != NodeKind.Integer:
raise newException(MathError, "an integer is required")
proc visit_binary(self: NodeVisitor, node: AstNode): AstNode =
## Visits a binary AST node and evaluates it
let right = self.eval(node.right)
let left = self.eval(node.left)
case node.binOp.kind:
of TokenType.Plus:
result = handleBinary(left, right, `+`)
of TokenType.Minus:
result = handleBinary(left, right, `-`)
of TokenType.Div:
rightOpNonZero(right, "division")
result = handleBinary(left, right, `/`)
of TokenType.Modulo:
# Modulo is a bit special since we must have integers
rightOpNonZero(right, "modulo")
ensureIntegers(left, right)
result = AstNode(kind: NodeKind.Integer, value: float(int(left.value) mod int(right.value)))
of TokenType.Exp:
result = handleBinary(left, right, pow)
of TokenType.Mul:
result = handleBinary(left, right, `*`)
else:
discard # Unreachable
proc visit_unary(self: NodeVisitor, node: AstNode): AstNode =
## Visits unary expressions and evaluates them
let expr = self.eval(node.operand)
case node.unOp.kind:
of TokenType.Minus:
case expr.kind:
of NodeKind.Float:
result = AstNode(kind: NodeKind.Float, value: -expr.value)
of NodeKind.Integer:
result = AstNode(kind: NodeKind.Integer, value: -expr.value)
else:
discard # Unreachable
else:
discard # Unreachable
proc visit_grouping(self: NodeVisitor, node: AstNode): AstNode =
## Visits grouping (i.e. parenthesized) expressions. Parentheses
## have no other meaning than to allow a lower-precedence expression
## where a higher-precedence one is expected so that 2 * (3 + 1) is
## different from 2 * 3 + 1
return self.eval(node.expr)

View File

@ -0,0 +1,20 @@
# Copyright 2021 Mattia Giambirtone
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
type
ParseError* = object of CatchableError
## A parsing exception
MathError* = object of ArithmeticDefect

View File

@ -0,0 +1,35 @@
# Copyright 2021 Mattia Giambirtone
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# A parsing Token
import strformat
type
TokenType* {.pure.} = enum
# Data types
Int, Float,
# Operators
Plus, Minus, Div, Exp, Modulo,
Mul, RightParen, LeftParen,
# Other
Eof
Token* = object
# A token object
lexeme*: string
kind*: TokenType
proc `$`*(self: Token): string =
## Returns a string representation of self
result = &"Token({self.kind}, '{self.lexeme}')"

View File

@ -0,0 +1,145 @@
# Copyright 2021 Mattia Giambirtone
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# A simple lexer module
import strutils
import strformat
import tables
import ../objects/token
import ../objects/error
# Table of all tokens
const tokens = to_table({
'(': TokenType.LeftParen, ')': TokenType.RightParen,
'-': TokenType.Minus, '+': TokenType.Plus,
'*': TokenType.Mul, '/': TokenType.Div,
'%': TokenType.Modulo, '^': TokenType.Exp})
# All the identifiers and constants (such as PI)
# Since they're constant we don't even need to bother adding another
# AST node kind, we can just map the name to a float literal ;)
const identifiers = to_table({
"pi": Token(kind: TokenType.Float, lexeme: "3.141592653589793"),
"e": Token(kind: TokenType.Float, lexeme: "2.718281828459045"),
"tau": Token(kind: TokenType.Float, lexeme: "6.283185307179586")
})
type
Lexer* = ref object
# A lexer object
source*: string
tokens*: seq[Token]
start*: int
current*: int
func initLexer*(): Lexer =
## Initializes the lexer in an empty state
result = Lexer(source: "", tokens: @[], start: 0, current: 0)
func done(self: Lexer): bool =
## Returns true if we reached EOF
result = self.current >= self.source.len
proc step(self: Lexer): char =
## Steps one character forward in the
## source. A null terminator is returned
## if the lexer is at EOF
if self.done():
return '\0'
self.current = self.current + 1
result = self.source[self.current - 1]
proc peek(self: Lexer): char =
## Returns the current character in the
## source without consuming it.
## A null terminator is returned
## if the lexer is at EOF
if self.done():
result = '\0'
else:
result = self.source[self.current]
func createToken(self: Lexer, tokenType: TokenType): Token =
## Creates a token object for later use in the parser
result = Token(kind: tokenType,
lexeme: self.source[self.start..<self.current],
)
proc parseNumber(self: Lexer) =
## Parses numeric literals
var kind = TokenType.Int
while true:
if self.peek().isDigit():
discard self.step()
elif self.peek() == '.':
# The dot for floats
kind = TokenType.Float
discard self.step()
elif self.peek().toLowerAscii() == 'e':
# Scientific notation
kind = TokenType.Float
discard self.step()
else:
break
self.tokens.add(self.createToken(kind))
proc parseIdentifier(self: Lexer) =
## Parses identifiers. Note that
## multi-character tokens such as
## UTF runes are not supported
while self.peek().isAlphaNumeric() or self.peek() in {'_', }:
discard self.step()
var text: string = self.source[self.start..<self.current]
if text.toLowerAscii() in identifiers:
self.tokens.add(identifiers[text])
else:
raise newException(ParseError, &"Unknown identifier '{text}'")
proc scanToken(self: Lexer) =
## Scans a single token. This method is
## called iteratively until the source
## string reaches EOF
var single = self.step()
if single in [' ', '\t', '\r']: # We skip whitespaces, tabs and other stuff
return
elif single.isDigit():
self.parseNumber()
elif single in tokens:
self.tokens.add(self.createToken(tokens[single]))
elif single.isAlphanumeric() or single == '_':
self.parseIdentifier()
else:
raise newException(ParseError, &"Unexpected token '{single}'")
proc lex*(self: Lexer, source: string): seq[Token] =
## Lexes a source string, converting a stream
## of characters into a series of tokens
self.source = source
while not self.done():
self.start = self.current
self.scanToken()
self.tokens.add(Token(kind: TokenType.Eof, lexeme: ""))
result = self.tokens

View File

@ -0,0 +1,190 @@
# Copyright 2021 Mattia Giambirtone
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# A recursive-descent top-down parser for mathematical expressions
import ../objects/token
import ../objects/ast
import ../objects/error
import parseutils
import strformat
{.experimental: "implicitDeref".}
type
Parser* = ref object
tokens: seq[Token]
current: int
proc initParser*(): Parser =
new(result)
result.current = 0
result.tokens = @[]
# Forward declaration
proc binary(self: Parser): AstNode
template endOfFile: Token =
## Creates an EOF token -- utility template
Token(lexeme: "", kind: TokenType.Eof)
func done(self: Parser): bool =
result = self.current >= self.tokens.high()
proc peek(self: Parser): Token =
## Peeks into the tokens list or
## returns an EOF token if we're at
## the end of the input
if not self.done():
result = self.tokens[self.current]
else:
result = endOfFile
proc step(self: Parser): Token =
## Consumes a token from the input and
## steps forward or returns an EOF token
## if we're at the end of the input
if not self.done():
result = self.peek()
self.current += 1
else:
result = endOfFile
proc previous(self: Parser): Token =
## Returns the previously consumed
## token
result = self.tokens[self.current - 1]
proc check(self: Parser, kind: TokenType): bool =
## Returns true if the current token matches
## the given type
result = self.peek().kind == kind
proc match(self: Parser, kind: TokenType): bool =
## Checks if the current token matches the
## given type and consumes it if it does, returns
## false otherwise. True is returned if the
## match is successful
if self.check(kind):
discard self.step()
result = true
else:
result = false
proc match(self: Parser, kinds: varargs[TokenType]): bool =
## Checks if the current token matches any of the
## given type(s) and consumes it if it does, returns
## false otherwise. True is returned at
## the first successful match
for kind in kinds:
if self.match(kind):
return true
result = false
proc error(self: Parser, message: string) =
## Raises a parsing error with the given message
raise newException(ParseError, message)
proc expect(self: Parser, kind: TokenType, message: string) =
## Checks if the current token matches the given type
## and consumes it if it does, raises an error
## with the given message otherwise.
if not self.match(kind):
self.error(message)
proc primary(self: Parser): AstNode =
## Parses primary expressions
let value = self.previous()
case value.kind:
of TokenType.Int:
result = AstNode(kind: NodeKind.Integer, value: 0.0)
discard parseFloat(value.lexeme, result.value)
of TokenType.Float:
result = AstNode(kind: NodeKind.Float, value: 0.0)
discard parseFloat(value.lexeme, result.value)
of TokenType.LeftParen:
let expression = self.binary()
self.expect(TokenType.RightParen, "unexpected EOL")
result = AstNode(kind: NodeKind.Grouping, expr: expression)
else:
self.error(&"invalid token of kind '{value.kind}' in primary expression")
proc unary(self: Parser): AstNode =
## Parses unary expressions such as -1
case self.step().kind:
of TokenType.Minus:
result = AstNode(kind: NodeKind.Unary, unOp: self.previous(), operand: self.unary())
else:
result = self.primary()
proc pow(self: Parser): AstNode =
## Parses exponentiation
result = self.unary()
var operator: Token
while self.match(TokenType.Exp):
operator = self.previous()
result = AstNode(kind: NodeKind.Binary, left: result, right: self.unary(), binOp: operator)
proc mul(self: Parser): AstNode =
## Parses divisions (including modulo) and
## multiplications
result = self.pow()
var operator: Token
while self.match(TokenType.Div, TokenType.Modulo, TokenType.Mul):
operator = self.previous()
result = AstNode(kind: NodeKind.Binary, left: result, right: self.pow(), binOp: operator)
proc addition(self: Parser): AstNode =
## Parses additions and subtractions
result = self.mul()
var operator: Token
while self.match(TokenType.Plus, TokenType.Minus):
operator = self.previous()
result = AstNode(kind: NodeKind.Binary, left: result, right: self.mul(), binOp: operator)
proc binary(self: Parser): AstNode =
## Parses binary expressions, the highest
## level of expression
result = self.addition()
proc parse*(self: Parser, tokens: seq[Token]): AstNode =
## Parses a list of tokens into an AST tree
self.tokens = tokens
result = self.binary()