mirror of https://github.com/nocturn9x/nimkalc.git
Version 0.1
This commit is contained in:
parent
03d43aa829
commit
f0394545eb
|
@ -0,0 +1,136 @@
|
|||
# NimKalc - A math parsing library
|
||||
|
||||
NimKalc is a simple implementation of a recursive-descent top-down parser that can evaluate
|
||||
mathematical expressions. Notable mentions are support for common mathematical constants (pi, tau, euler's number, etc),
|
||||
functions (`sin`, `cos`, `tan`...), equation-solving algos using newton's method and scientific notation numbers (such as `2e5`)
|
||||
|
||||
|
||||
## Current limitations
|
||||
- No functions (coming soon)
|
||||
- No equation-solving (coming soon)
|
||||
- The parsing is a bit weird because `2 2` will parse the first 2 and just stop instead of erroring out (FIXME)
|
||||
|
||||
|
||||
## How to use it
|
||||
|
||||
NimKalc parses mathematical expressions following this process:
|
||||
- Tokenize the input
|
||||
- Generate an AST
|
||||
- Visit the nodes
|
||||
|
||||
Each of these steps can be run separately, but for convenience a wrapper
|
||||
`eval` procedure has been defined which takes in a string and returns a
|
||||
single AST node containing the result of the given expression.
|
||||
|
||||
## Supported operators
|
||||
|
||||
Beyond the classical 4 operators (`+`, `-`, `/` and `*`), NimKalc supports:
|
||||
- `%` for modulo division
|
||||
- `^` for exponentiation
|
||||
- unary `-` for negation
|
||||
- Arbitrarily nested parentheses (__not__ empty ones!) to enforce precedence
|
||||
|
||||
|
||||
## Exceptions
|
||||
|
||||
NimKalc defines 2 exceptions:
|
||||
- `ParseError` is used when the expression is invalid
|
||||
- `MathError` is used when there is an arithmetical error such as division by 0 or domain errors (e.g. `log(0)`)
|
||||
|
||||
## Design
|
||||
|
||||
NimKalc treats all numerical values as `float` to simplify the implementation of the underlying operators. To tell integers
|
||||
from floating point numbers the `AstNode` object has a `kind` discriminant which will be equal to `NodeKind.Integer` for ints
|
||||
and `NodeKind.Float` for decimals. It is advised that you take this into account when using the library
|
||||
|
||||
|
||||
__Note__: The string representation of integer nodes won't show the decimal part for clarity
|
||||
|
||||
## String representations
|
||||
|
||||
All of NimKalc's objects implement the `$` operator and are therefore printable. Integer nodes will look like `Integer(x)`, while
|
||||
floats are represented with `Float(x.x)`. Unary operators print as `Unary(operator, right)`, while binary operators print as `Binary(left, operator, right)`.
|
||||
Parenthesized expressions print as `Grouping(expr)`, where `expr` is the expression enclosed in parentheses (as an AST node, obviously).
|
||||
Token objects will print as `Token(kind, lexeme)`: an example for the number 2 would be `Token(Integer, '2')`
|
||||
|
||||
|
||||
## Example
|
||||
|
||||
Here is an example of a REPL using all of NimKalc's functionality to evaluate expressions from stdin (can be found at `examples/repl.nim`)
|
||||
|
||||
```nim
|
||||
import nimkalc/objects/ast
|
||||
import nimkalc/objects/token
|
||||
import nimkalc/parsing/parser
|
||||
import nimkalc/parsing/lexer
|
||||
import nimkalc/objects/error
|
||||
|
||||
|
||||
import strformat
|
||||
import strutils
|
||||
|
||||
|
||||
proc repl() =
|
||||
## A simple REPL to demonstrate NimKalc's functionality
|
||||
var line: string
|
||||
var result: AstNode
|
||||
var tokens: seq[Token]
|
||||
let lexerObj = initLexer()
|
||||
let parserObj = initParser()
|
||||
let visitor = initNodeVisitor()
|
||||
echo "Welcome to the NimKalc REPL, type a math expression and press enter"
|
||||
while true:
|
||||
try:
|
||||
stdout.write("=> ")
|
||||
line = stdin.readLine()
|
||||
echo &"Parsing and evaluation of {line} below:"
|
||||
tokens = lexerObj.lex(line)
|
||||
# No-one cares about the EOF token after all
|
||||
echo &"Tokenization of {line}: {tokens[0..^2].join(\", \")}"
|
||||
result = parserObj.parse(tokens)
|
||||
echo &"AST for {line}: {result}"
|
||||
result = visitor.eval(result)
|
||||
case result.kind:
|
||||
# The result is an AstNode object, specifically
|
||||
# either a node of type NodeKind.Float or a NodeKind.Integer
|
||||
of NodeKind.Float:
|
||||
echo &"Value of {line}: {result.value}"
|
||||
of NodeKind.Integer:
|
||||
echo &"Value of {line}: {int(result.value)}"
|
||||
else:
|
||||
discard # Unreachable
|
||||
except IOError:
|
||||
echo "\nGoodbye."
|
||||
break
|
||||
except ParseError:
|
||||
echo &"A parsing error occurred: {getCurrentExceptionMsg()}"
|
||||
except MathError:
|
||||
echo &"An arithmetic error occurred: {getCurrentExceptionMsg()}"
|
||||
except OverflowDefect:
|
||||
echo &"Value overflow/underflow detected: {getCurrentExceptionMsg()}"
|
||||
|
||||
|
||||
when isMainModule:
|
||||
repl()
|
||||
|
||||
```
|
||||
|
||||
__Note__: If you don't need the intermediate representations shown here (tokens, AST) you can just `import nimkalc` and use
|
||||
the `eval` procedure, which takes in a string and returns the evaluated result as a primary AST node like so:
|
||||
|
||||
```nim
|
||||
import nimkalc
|
||||
|
||||
echo eval("2+2") # Prints Integer(4)
|
||||
|
||||
```
|
||||
|
||||
## Installing
|
||||
|
||||
You can clone this repository and then install the package via nimble:
|
||||
- `git clone https://github.com/nocturn9x/nimkalc`
|
||||
- `cd nimkalc`
|
||||
- `nimble install`
|
||||
|
||||
|
||||
__Note__: Nim 1.2.0 or higher is required to build NimKalc! Other versions are likely work if they're not too old, but they have not been tested
|
|
@ -0,0 +1,69 @@
|
|||
# Copyright 2021 Mattia Giambirtone
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# A simple library to parse and evaluate mathematical expressions
|
||||
|
||||
import nimkalc/objects/ast
|
||||
import nimkalc/objects/token
|
||||
import nimkalc/parsing/parser
|
||||
import nimkalc/parsing/lexer
|
||||
import nimkalc/objects/error
|
||||
|
||||
|
||||
import strformat
|
||||
import strutils
|
||||
|
||||
|
||||
proc repl() =
|
||||
## A simple REPL to demonstrate NimKalc's functionality
|
||||
var line: string
|
||||
var result: AstNode
|
||||
var tokens: seq[Token]
|
||||
let lexerObj = initLexer()
|
||||
let parserObj = initParser()
|
||||
let visitor = initNodeVisitor()
|
||||
echo "Welcome to the NimKalc REPL, type a math expression and press enter"
|
||||
while true:
|
||||
try:
|
||||
stdout.write("=> ")
|
||||
line = stdin.readLine()
|
||||
echo &"Parsing and evaluation of {line} below:"
|
||||
tokens = lexerObj.lex(line)
|
||||
# No-one cares about the EOF token after all
|
||||
echo &"Tokenization of {line}: {tokens[0..^2].join(\", \")}"
|
||||
result = parserObj.parse(tokens)
|
||||
echo &"AST for {line}: {result}"
|
||||
result = visitor.eval(result)
|
||||
case result.kind:
|
||||
# The result is an AstNode object, specifically
|
||||
# either a node of type NodeKind.Float or a NodeKind.Integer
|
||||
of NodeKind.Float:
|
||||
echo &"Value of {line}: {result.value}"
|
||||
of NodeKind.Integer:
|
||||
echo &"Value of {line}: {int(result.value)}"
|
||||
else:
|
||||
discard # Unreachable
|
||||
except IOError:
|
||||
echo "\nGoodbye."
|
||||
break
|
||||
except ParseError:
|
||||
echo &"A parsing error occurred: {getCurrentExceptionMsg()}"
|
||||
except MathError:
|
||||
echo &"An arithmetic error occurred: {getCurrentExceptionMsg()}"
|
||||
except OverflowDefect:
|
||||
echo &"Value overflow/underflow detected: {getCurrentExceptionMsg()}"
|
||||
|
||||
|
||||
when isMainModule:
|
||||
repl()
|
|
@ -0,0 +1,11 @@
|
|||
# Package
|
||||
|
||||
version = "0.1"
|
||||
author = "Mattia Giambirtone"
|
||||
description = "An advanced parsing library for mathematical expressions and equations"
|
||||
license = "Apache 2.0"
|
||||
srcDir = "src"
|
||||
|
||||
# Deps
|
||||
|
||||
requires "nim >= 1.2.0"
|
|
@ -0,0 +1,29 @@
|
|||
# Copyright 2021 Mattia Giambirtone
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
##
|
||||
|
||||
import nimkalc/parsing/parser
|
||||
import nimkalc/objects/ast
|
||||
import nimkalc/parsing/lexer
|
||||
|
||||
|
||||
proc eval*(source: string): AstNode =
|
||||
## Evaluates a mathematical expression as a string
|
||||
## and returns a leaf node representing the result
|
||||
let l = initLexer()
|
||||
let p = initParser()
|
||||
let v = initNodeVisitor()
|
||||
result = v.eval(p.parse(l.lex(source)))
|
||||
|
|
@ -0,0 +1,175 @@
|
|||
# Copyright 2021 Mattia Giambirtone
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# An Abstract Syntax Tree and node visitor implementation
|
||||
import token
|
||||
import error
|
||||
|
||||
import strformat
|
||||
import tables
|
||||
import math
|
||||
|
||||
|
||||
type
|
||||
NodeKind* {.pure.} = enum
|
||||
Grouping, Unary, Binary, Integer,
|
||||
Float
|
||||
AstNode* = ref object
|
||||
case kind*: NodeKind
|
||||
of NodeKind.Grouping:
|
||||
expr*: AstNode
|
||||
of NodeKind.Unary:
|
||||
unOp*: Token
|
||||
operand*: AstNode
|
||||
of NodeKind.Binary:
|
||||
binOp*: Token
|
||||
left*: AstNode
|
||||
right*: AstNode
|
||||
of NodeKind.Integer, NodeKind.Float:
|
||||
# The kind makes us differentiate between
|
||||
# floats and integers, but for our purposes
|
||||
# using a double precision float for everything
|
||||
# is just easier
|
||||
value*: float64
|
||||
NodeVisitor* = ref object
|
||||
# A node visitor object
|
||||
|
||||
|
||||
proc initNodeVisitor*(): NodeVisitor =
|
||||
## Initializes a node visitor
|
||||
new(result)
|
||||
|
||||
|
||||
proc `$`*(self: AstNode): string =
|
||||
## Stringifies an AST node
|
||||
case self.kind:
|
||||
of NodeKind.Grouping:
|
||||
result = &"Grouping({self.expr})"
|
||||
of NodeKind.Unary:
|
||||
result = &"Unary({$self.unOp.kind}, {$self.operand})"
|
||||
of NodeKind.Binary:
|
||||
result = &"Binary({$self.left}, {$self.binOp.kind}, {$self.right})"
|
||||
of NodeKind.Integer:
|
||||
result = &"Integer({$int(self.value)})"
|
||||
of NodeKind.Float:
|
||||
result = &"Float({$self.value})"
|
||||
|
||||
|
||||
# Forward declarations
|
||||
proc visit_literal(self: NodeVisitor, node: AstNode): AstNode
|
||||
proc visit_unary(self: NodeVisitor, node: AstNode): AstNode
|
||||
proc visit_binary(self: NodeVisitor, node: AstNode): AstNode
|
||||
proc visit_grouping(self: NodeVisitor, node: AstNode): AstNode
|
||||
|
||||
|
||||
proc accept(self: AstNode, visitor: NodeVisitor): AstNode =
|
||||
case self.kind:
|
||||
of NodeKind.Integer, NodeKind.Float:
|
||||
result = visitor.visit_literal(self)
|
||||
of NodeKind.Binary:
|
||||
result = visitor.visit_binary(self)
|
||||
of NodeKind.Unary:
|
||||
result = visitor.visit_unary(self)
|
||||
of NodeKind.Grouping:
|
||||
result = visitor.visit_grouping(self)
|
||||
|
||||
|
||||
proc eval*(self: NodeVisitor, node: AstNode): AstNode =
|
||||
## Evaluates an AST node
|
||||
result = node.accept(self)
|
||||
|
||||
|
||||
proc visit_literal(self: NodeVisitor, node: AstNode): AstNode =
|
||||
## Visits a literal AST node (such as integers)
|
||||
result = node # Not that we can do anything else after all, lol
|
||||
|
||||
|
||||
template handleBinary(left, right: AstNode, operator: untyped): AstNode =
|
||||
## Handy template that avoids us the hassle of copy-pasting
|
||||
## the same checks over and over again in the visitor
|
||||
let r = operator(left.value, right.value)
|
||||
if float(int(r)) == r:
|
||||
## It's a whole number!
|
||||
AstNode(kind: NodeKind.Integer, value: r)
|
||||
else:
|
||||
AstNode(kind: NodeKind.Float, value: r)
|
||||
|
||||
|
||||
|
||||
template rightOpNonZero(node: AstNode, opType: string) =
|
||||
## Handy template to make sure that the given AST node matches
|
||||
## a condition from
|
||||
if node.value == 0.0:
|
||||
case node.kind:
|
||||
of NodeKind.Float:
|
||||
raise newException(MathError, "float " & opType & " by 0")
|
||||
of NodeKind.Integer:
|
||||
raise newException(MathError, "integer " & opType & " by 0")
|
||||
else:
|
||||
raise newException(CatchableError, &"invalid node kind '{node.kind}' for rightOpNonZero")
|
||||
|
||||
|
||||
template ensureIntegers(left, right: AstNode) =
|
||||
## Ensures both operands are integers
|
||||
if left.kind != NodeKind.Integer or right.kind != NodeKind.Integer:
|
||||
raise newException(MathError, "an integer is required")
|
||||
|
||||
|
||||
proc visit_binary(self: NodeVisitor, node: AstNode): AstNode =
|
||||
## Visits a binary AST node and evaluates it
|
||||
let right = self.eval(node.right)
|
||||
let left = self.eval(node.left)
|
||||
case node.binOp.kind:
|
||||
of TokenType.Plus:
|
||||
result = handleBinary(left, right, `+`)
|
||||
of TokenType.Minus:
|
||||
result = handleBinary(left, right, `-`)
|
||||
of TokenType.Div:
|
||||
rightOpNonZero(right, "division")
|
||||
result = handleBinary(left, right, `/`)
|
||||
of TokenType.Modulo:
|
||||
# Modulo is a bit special since we must have integers
|
||||
rightOpNonZero(right, "modulo")
|
||||
ensureIntegers(left, right)
|
||||
result = AstNode(kind: NodeKind.Integer, value: float(int(left.value) mod int(right.value)))
|
||||
of TokenType.Exp:
|
||||
result = handleBinary(left, right, pow)
|
||||
of TokenType.Mul:
|
||||
result = handleBinary(left, right, `*`)
|
||||
else:
|
||||
discard # Unreachable
|
||||
|
||||
|
||||
proc visit_unary(self: NodeVisitor, node: AstNode): AstNode =
|
||||
## Visits unary expressions and evaluates them
|
||||
let expr = self.eval(node.operand)
|
||||
case node.unOp.kind:
|
||||
of TokenType.Minus:
|
||||
case expr.kind:
|
||||
of NodeKind.Float:
|
||||
result = AstNode(kind: NodeKind.Float, value: -expr.value)
|
||||
of NodeKind.Integer:
|
||||
result = AstNode(kind: NodeKind.Integer, value: -expr.value)
|
||||
else:
|
||||
discard # Unreachable
|
||||
else:
|
||||
discard # Unreachable
|
||||
|
||||
|
||||
proc visit_grouping(self: NodeVisitor, node: AstNode): AstNode =
|
||||
## Visits grouping (i.e. parenthesized) expressions. Parentheses
|
||||
## have no other meaning than to allow a lower-precedence expression
|
||||
## where a higher-precedence one is expected so that 2 * (3 + 1) is
|
||||
## different from 2 * 3 + 1
|
||||
return self.eval(node.expr)
|
|
@ -0,0 +1,20 @@
|
|||
# Copyright 2021 Mattia Giambirtone
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
|
||||
|
||||
type
|
||||
ParseError* = object of CatchableError
|
||||
## A parsing exception
|
||||
MathError* = object of ArithmeticDefect
|
|
@ -0,0 +1,35 @@
|
|||
# Copyright 2021 Mattia Giambirtone
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# A parsing Token
|
||||
import strformat
|
||||
|
||||
type
|
||||
TokenType* {.pure.} = enum
|
||||
# Data types
|
||||
Int, Float,
|
||||
# Operators
|
||||
Plus, Minus, Div, Exp, Modulo,
|
||||
Mul, RightParen, LeftParen,
|
||||
# Other
|
||||
Eof
|
||||
Token* = object
|
||||
# A token object
|
||||
lexeme*: string
|
||||
kind*: TokenType
|
||||
|
||||
|
||||
proc `$`*(self: Token): string =
|
||||
## Returns a string representation of self
|
||||
result = &"Token({self.kind}, '{self.lexeme}')"
|
|
@ -0,0 +1,145 @@
|
|||
# Copyright 2021 Mattia Giambirtone
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# A simple lexer module
|
||||
|
||||
import strutils
|
||||
import strformat
|
||||
import tables
|
||||
|
||||
import ../objects/token
|
||||
import ../objects/error
|
||||
|
||||
|
||||
# Table of all tokens
|
||||
const tokens = to_table({
|
||||
'(': TokenType.LeftParen, ')': TokenType.RightParen,
|
||||
'-': TokenType.Minus, '+': TokenType.Plus,
|
||||
'*': TokenType.Mul, '/': TokenType.Div,
|
||||
'%': TokenType.Modulo, '^': TokenType.Exp})
|
||||
# All the identifiers and constants (such as PI)
|
||||
# Since they're constant we don't even need to bother adding another
|
||||
# AST node kind, we can just map the name to a float literal ;)
|
||||
const identifiers = to_table({
|
||||
"pi": Token(kind: TokenType.Float, lexeme: "3.141592653589793"),
|
||||
"e": Token(kind: TokenType.Float, lexeme: "2.718281828459045"),
|
||||
"tau": Token(kind: TokenType.Float, lexeme: "6.283185307179586")
|
||||
})
|
||||
|
||||
|
||||
type
|
||||
Lexer* = ref object
|
||||
# A lexer object
|
||||
source*: string
|
||||
tokens*: seq[Token]
|
||||
start*: int
|
||||
current*: int
|
||||
|
||||
|
||||
func initLexer*(): Lexer =
|
||||
## Initializes the lexer in an empty state
|
||||
result = Lexer(source: "", tokens: @[], start: 0, current: 0)
|
||||
|
||||
|
||||
func done(self: Lexer): bool =
|
||||
## Returns true if we reached EOF
|
||||
result = self.current >= self.source.len
|
||||
|
||||
|
||||
proc step(self: Lexer): char =
|
||||
## Steps one character forward in the
|
||||
## source. A null terminator is returned
|
||||
## if the lexer is at EOF
|
||||
if self.done():
|
||||
return '\0'
|
||||
self.current = self.current + 1
|
||||
result = self.source[self.current - 1]
|
||||
|
||||
|
||||
proc peek(self: Lexer): char =
|
||||
## Returns the current character in the
|
||||
## source without consuming it.
|
||||
## A null terminator is returned
|
||||
## if the lexer is at EOF
|
||||
if self.done():
|
||||
result = '\0'
|
||||
else:
|
||||
result = self.source[self.current]
|
||||
|
||||
|
||||
func createToken(self: Lexer, tokenType: TokenType): Token =
|
||||
## Creates a token object for later use in the parser
|
||||
result = Token(kind: tokenType,
|
||||
lexeme: self.source[self.start..<self.current],
|
||||
)
|
||||
|
||||
|
||||
proc parseNumber(self: Lexer) =
|
||||
## Parses numeric literals
|
||||
var kind = TokenType.Int
|
||||
while true:
|
||||
if self.peek().isDigit():
|
||||
discard self.step()
|
||||
elif self.peek() == '.':
|
||||
# The dot for floats
|
||||
kind = TokenType.Float
|
||||
discard self.step()
|
||||
elif self.peek().toLowerAscii() == 'e':
|
||||
# Scientific notation
|
||||
kind = TokenType.Float
|
||||
discard self.step()
|
||||
else:
|
||||
break
|
||||
self.tokens.add(self.createToken(kind))
|
||||
|
||||
|
||||
proc parseIdentifier(self: Lexer) =
|
||||
## Parses identifiers. Note that
|
||||
## multi-character tokens such as
|
||||
## UTF runes are not supported
|
||||
while self.peek().isAlphaNumeric() or self.peek() in {'_', }:
|
||||
discard self.step()
|
||||
var text: string = self.source[self.start..<self.current]
|
||||
if text.toLowerAscii() in identifiers:
|
||||
self.tokens.add(identifiers[text])
|
||||
else:
|
||||
raise newException(ParseError, &"Unknown identifier '{text}'")
|
||||
|
||||
|
||||
proc scanToken(self: Lexer) =
|
||||
## Scans a single token. This method is
|
||||
## called iteratively until the source
|
||||
## string reaches EOF
|
||||
var single = self.step()
|
||||
if single in [' ', '\t', '\r']: # We skip whitespaces, tabs and other stuff
|
||||
return
|
||||
elif single.isDigit():
|
||||
self.parseNumber()
|
||||
elif single in tokens:
|
||||
self.tokens.add(self.createToken(tokens[single]))
|
||||
elif single.isAlphanumeric() or single == '_':
|
||||
self.parseIdentifier()
|
||||
else:
|
||||
raise newException(ParseError, &"Unexpected token '{single}'")
|
||||
|
||||
|
||||
proc lex*(self: Lexer, source: string): seq[Token] =
|
||||
## Lexes a source string, converting a stream
|
||||
## of characters into a series of tokens
|
||||
self.source = source
|
||||
while not self.done():
|
||||
self.start = self.current
|
||||
self.scanToken()
|
||||
self.tokens.add(Token(kind: TokenType.Eof, lexeme: ""))
|
||||
result = self.tokens
|
|
@ -0,0 +1,190 @@
|
|||
# Copyright 2021 Mattia Giambirtone
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# A recursive-descent top-down parser for mathematical expressions
|
||||
|
||||
import ../objects/token
|
||||
import ../objects/ast
|
||||
import ../objects/error
|
||||
|
||||
import parseutils
|
||||
import strformat
|
||||
|
||||
|
||||
{.experimental: "implicitDeref".}
|
||||
|
||||
|
||||
type
|
||||
Parser* = ref object
|
||||
tokens: seq[Token]
|
||||
current: int
|
||||
|
||||
|
||||
proc initParser*(): Parser =
|
||||
new(result)
|
||||
result.current = 0
|
||||
result.tokens = @[]
|
||||
|
||||
|
||||
# Forward declaration
|
||||
|
||||
proc binary(self: Parser): AstNode
|
||||
|
||||
|
||||
template endOfFile: Token =
|
||||
## Creates an EOF token -- utility template
|
||||
Token(lexeme: "", kind: TokenType.Eof)
|
||||
|
||||
|
||||
func done(self: Parser): bool =
|
||||
result = self.current >= self.tokens.high()
|
||||
|
||||
|
||||
proc peek(self: Parser): Token =
|
||||
## Peeks into the tokens list or
|
||||
## returns an EOF token if we're at
|
||||
## the end of the input
|
||||
if not self.done():
|
||||
result = self.tokens[self.current]
|
||||
else:
|
||||
result = endOfFile
|
||||
|
||||
|
||||
proc step(self: Parser): Token =
|
||||
## Consumes a token from the input and
|
||||
## steps forward or returns an EOF token
|
||||
## if we're at the end of the input
|
||||
if not self.done():
|
||||
result = self.peek()
|
||||
self.current += 1
|
||||
else:
|
||||
result = endOfFile
|
||||
|
||||
|
||||
proc previous(self: Parser): Token =
|
||||
## Returns the previously consumed
|
||||
## token
|
||||
result = self.tokens[self.current - 1]
|
||||
|
||||
|
||||
proc check(self: Parser, kind: TokenType): bool =
|
||||
## Returns true if the current token matches
|
||||
## the given type
|
||||
result = self.peek().kind == kind
|
||||
|
||||
|
||||
proc match(self: Parser, kind: TokenType): bool =
|
||||
## Checks if the current token matches the
|
||||
## given type and consumes it if it does, returns
|
||||
## false otherwise. True is returned if the
|
||||
## match is successful
|
||||
if self.check(kind):
|
||||
discard self.step()
|
||||
result = true
|
||||
else:
|
||||
result = false
|
||||
|
||||
|
||||
proc match(self: Parser, kinds: varargs[TokenType]): bool =
|
||||
## Checks if the current token matches any of the
|
||||
## given type(s) and consumes it if it does, returns
|
||||
## false otherwise. True is returned at
|
||||
## the first successful match
|
||||
for kind in kinds:
|
||||
if self.match(kind):
|
||||
return true
|
||||
result = false
|
||||
|
||||
|
||||
proc error(self: Parser, message: string) =
|
||||
## Raises a parsing error with the given message
|
||||
raise newException(ParseError, message)
|
||||
|
||||
|
||||
proc expect(self: Parser, kind: TokenType, message: string) =
|
||||
## Checks if the current token matches the given type
|
||||
## and consumes it if it does, raises an error
|
||||
## with the given message otherwise.
|
||||
if not self.match(kind):
|
||||
self.error(message)
|
||||
|
||||
|
||||
proc primary(self: Parser): AstNode =
|
||||
## Parses primary expressions
|
||||
let value = self.previous()
|
||||
case value.kind:
|
||||
of TokenType.Int:
|
||||
result = AstNode(kind: NodeKind.Integer, value: 0.0)
|
||||
discard parseFloat(value.lexeme, result.value)
|
||||
of TokenType.Float:
|
||||
result = AstNode(kind: NodeKind.Float, value: 0.0)
|
||||
discard parseFloat(value.lexeme, result.value)
|
||||
of TokenType.LeftParen:
|
||||
let expression = self.binary()
|
||||
self.expect(TokenType.RightParen, "unexpected EOL")
|
||||
result = AstNode(kind: NodeKind.Grouping, expr: expression)
|
||||
else:
|
||||
self.error(&"invalid token of kind '{value.kind}' in primary expression")
|
||||
|
||||
|
||||
proc unary(self: Parser): AstNode =
|
||||
## Parses unary expressions such as -1
|
||||
case self.step().kind:
|
||||
of TokenType.Minus:
|
||||
result = AstNode(kind: NodeKind.Unary, unOp: self.previous(), operand: self.unary())
|
||||
else:
|
||||
result = self.primary()
|
||||
|
||||
|
||||
proc pow(self: Parser): AstNode =
|
||||
## Parses exponentiation
|
||||
result = self.unary()
|
||||
var operator: Token
|
||||
while self.match(TokenType.Exp):
|
||||
operator = self.previous()
|
||||
result = AstNode(kind: NodeKind.Binary, left: result, right: self.unary(), binOp: operator)
|
||||
|
||||
|
||||
proc mul(self: Parser): AstNode =
|
||||
## Parses divisions (including modulo) and
|
||||
## multiplications
|
||||
result = self.pow()
|
||||
var operator: Token
|
||||
while self.match(TokenType.Div, TokenType.Modulo, TokenType.Mul):
|
||||
operator = self.previous()
|
||||
result = AstNode(kind: NodeKind.Binary, left: result, right: self.pow(), binOp: operator)
|
||||
|
||||
|
||||
proc addition(self: Parser): AstNode =
|
||||
## Parses additions and subtractions
|
||||
result = self.mul()
|
||||
var operator: Token
|
||||
while self.match(TokenType.Plus, TokenType.Minus):
|
||||
operator = self.previous()
|
||||
result = AstNode(kind: NodeKind.Binary, left: result, right: self.mul(), binOp: operator)
|
||||
|
||||
|
||||
proc binary(self: Parser): AstNode =
|
||||
## Parses binary expressions, the highest
|
||||
## level of expression
|
||||
result = self.addition()
|
||||
|
||||
|
||||
|
||||
proc parse*(self: Parser, tokens: seq[Token]): AstNode =
|
||||
## Parses a list of tokens into an AST tree
|
||||
self.tokens = tokens
|
||||
result = self.binary()
|
||||
|
||||
|
Loading…
Reference in New Issue