Moved utility functions to neural network library

Added matrix.diagflat() and fast softmax derivative
2023-03-20 12:11:40 +01:00 · 2023-03-20 12:02:00 +01:00
3 changed files with 69 additions and 45 deletions
--- a/src/main.nim
+++ b/src/main.nim
@ -1,46 +1,11 @@
 import nn/network
 import nn/util/matrix

-import std/math

-
-
-# Mean squared error
-proc mse(a, b: Matrix[float]): float = 
-    result = (b - a).apply(proc (x: float): float = pow(x, 2), axis = -1).sum() / len(a).float
-
-# Derivative of MSE
-func dxMSE*(x, y: Matrix[float]): Matrix[float] = 2.0 * (x - y)
-
-# A bunch of vectorized activation functions
-func sigmoid*(input: Matrix[float]): Matrix[float] = 
-    result = input.apply(proc (x: float): float = 1 / (1 + exp(-x)) , axis = -1)
-
-func sigmoidDerivative*(input: Matrix[float]): Matrix[float] = sigmoid(input) * (1.0 - sigmoid(input))
-
-
-func softmax*(input: Matrix[float]): Matrix[float] = 
-    var input = input - input.max()
-    result = input.apply(math.exp, axis = -1) / input.apply(math.exp, axis = -1).sum()
-
-func softmaxDerivative*(input: Matrix[float]): Matrix[float] = zeros[float](input.shape)
-
-
-func step*(input: Matrix[float]): Matrix[float] = input.apply(proc (x: float): float = (if x < 0.0: 0.0 else: x), axis = -1)
-func silu*(input: Matrix[float]): Matrix[float] = input.apply(proc (x: float): float = 1 / (1 + exp(-x)), axis= -1)
-func relu*(input: Matrix[float]): Matrix[float] = input.apply(proc (x: float): float = max(0.0, x), axis = -1)
-
-func htan*(input: Matrix[float]): Matrix[float] = 
-    let f = proc (x: float): float = 
-        let temp = exp(2 * x)
-        result = (temp - 1) / (temp + 1)
-    input.apply(f, axis = -1)
-
-
-var mlp = newNeuralNetwork(@[newDenseLayer(2, 3, newActivation(sigmoid, sigmoidDerivative)), 
-                             newDenseLayer(3, 2, newActivation(sigmoid, sigmoidDerivative)),
-                             newDenseLayer(2, 3, newActivation(softmax, softmaxDerivative))],
-                           lossFunc=newLoss(mse, dxMSE), learnRate=0.05, momentum=0.55,
+var mlp = newNeuralNetwork(@[newDenseLayer(2, 3, Sigmoid), 
+                             newDenseLayer(3, 2, Sigmoid),
+                             newDenseLayer(2, 3, Softmax)],
+                           lossFunc=MSE, learnRate=0.05, momentum=0.55,
                           weightRange=(start: -1.0, stop: 1.0), biasRange=(start: -10.0, stop: 10.0))
 echo mlp.feedforward(newMatrix[float](@[1.0, 2.0]))

--- a/src/nn/network.nim
+++ b/src/nn/network.nim
@ -17,6 +17,7 @@ import util/matrix

 import std/strformat
 import std/random
+import std/math


 randomize()
@ -142,4 +143,50 @@ proc feedforward*(self: NeuralNetwork, data: Matrix[float]): Matrix[float] =

 proc backprop(self: NeuralNetwork, x, y: Matrix[float]) {.used.} =
    ## Performs a single backpropagation step and updates the
-    ## gradients for our weights and biases, layer by layer
+    ## gradients for our weights and biases, layer by layer
+
+
+## Utility functions
+
+# Mean squared error
+proc mse(a, b: Matrix[float]): float = 
+    result = (b - a).apply(proc (x: float): float = pow(x, 2), axis = -1).sum() / len(a).float
+
+# Derivative of MSE
+func dxMSE(x, y: Matrix[float]): Matrix[float] = 2.0 * (x - y)
+
+# A bunch of vectorized activation functions
+func sigmoid(input: Matrix[float]): Matrix[float] = 
+    result = input.apply(proc (x: float): float = 1 / (1 + exp(-x)) , axis = -1)
+
+func sigmoidDerivative(input: Matrix[float]): Matrix[float] = sigmoid(input) * (1.0 - sigmoid(input))
+
+
+func softmax(input: Matrix[float]): Matrix[float] = 
+    var input = input - input.max()
+    result = input.apply(math.exp, axis = -1) / input.apply(math.exp, axis = -1).sum()
+
+
+func softmaxDerivative(input: Matrix[float]): Matrix[float] =
+    var input = input.reshape(input.shape.cols, 1)
+    result = input.diagflat() - input.dot(input.transpose())
+
+
+func step(input: Matrix[float]): Matrix[float] {.used.} = input.apply(proc (x: float): float = (if x < 0.0: 0.0 else: x), axis = -1)
+func silu(input: Matrix[float]): Matrix[float] {.used.} = input.apply(proc (x: float): float = 1 / (1 + exp(-x)), axis= -1)
+func relu(input: Matrix[float]): Matrix[float] {.used.} = input.apply(proc (x: float): float = max(0.0, x), axis = -1)
+
+func htan(input: Matrix[float]): Matrix[float] {.used.} = 
+    let f = proc (x: float): float = 
+        let temp = exp(2 * x)
+        result = (temp - 1) / (temp + 1)
+    input.apply(f, axis = -1)
+
+{.push.}
+{.hints: off.}   # So nim doesn't complain about the naming
+var Sigmoid* = newActivation(sigmoid, sigmoidDerivative)
+var Softmax* = newActivation(softmax, softmaxDerivative)
+var MSE* = newLoss(mse, dxMSE)
+{.pop.}
+
+
--- a/src/nn/util/matrix.nim
+++ b/src/nn/util/matrix.nim
@ -662,8 +662,8 @@ proc `==`*[T](a: Matrix[T], b: MatrixView[T]): Matrix[bool] =


 proc diag*[T](a: Matrix[T], k: int = 0): Matrix[T] =
-    ## Returns the kth diagonal of 
-    ## the given matrix if a is 2-D 
+    ## Returns the kth diagonal of
+    ## the given matrix if a is 2-D
    ## or a 2-D matrix with a on its
    ## kth diagonal if it is 1-D
    if a.shape.rows > 0:
@ -686,6 +686,12 @@ proc diag*[T](a: Matrix[T], k: int = 0): Matrix[T] =
            inc(current.col)


+proc diagflat*[T](a: Matrix[T], k: int = 0): Matrix[T] =
+    ## Create a 2-D array with the flattened
+    ## input as a diagonal
+    result = a.flatten().diag(k)
+
+
 proc fliplr*[T](self: Matrix[T]): Matrix[T] =
    ## Flips each row in the matrix left
    ## to right. A copy is returned
@ -937,7 +943,9 @@ proc dot*[T](self, other: Matrix[T]): Matrix[T] =


 proc where*[T](cond: Matrix[bool], x, y: Matrix[T]): Matrix[T] =
-    ## Behaves like numpy.where()
+    ## Return elements chosen from x or y depending on cond
+    ## Where cond is true, take elements from x, otherwise
+    ## take elements from y
    when not defined(release):
        if not (x.shape == y.shape and y.shape == cond.shape):
            raise newException(ValueError, &"all inputs must be of equal shape for where()")
@ -960,7 +968,9 @@ proc where*[T](cond: Matrix[bool], x, y: Matrix[T]): Matrix[T] =


 proc where*[T](cond: Matrix[bool], x: Matrix[T], y: T): Matrix[T] =
-    ## Behaves like numpy.where, but with a constant
+    ## Behaves like where but with a constant instead of
+    ## an array. When cond is true, take elements from x,
+    ## otherwise take y
    when not defined(release):
        if not (x.shape == cond.shape):
            raise newException(ValueError, &"all inputs must be of equal shape for where()")
@ -1072,4 +1082,6 @@ when isMainModule:
    doAssert all(j.diag(1) == newMatrix[int](@[2, 4]))
    doAssert all(j.diag(2) == newMatrix[int](@[1]))
    var o = newMatrix[int](@[1, 2, 3])
-    doAssert all(o.diag() == newMatrix[int](@[@[1, 0, 0], @[0, 2, 0], @[0, 0, 3]]))
+    doAssert all(o.diag() == newMatrix[int](@[@[1, 0, 0], @[0, 2, 0], @[0, 0, 3]]))
+    var n = newMatrix[int](@[@[1, 2], @[3, 4]])
+    doAssert all(n.diagflat() == newMatrix[int](@[@[1, 0, 0, 0], @[0, 2, 0, 0], @[0, 0, 3, 0], @[0, 0, 0, 4]]))
Author	SHA1	Message	Date
Mattia Giambirtone	ac0cdfdc92	Moved utility functions to neural network library	2023-03-20 12:11:40 +01:00
Mattia Giambirtone	01525da889	Added matrix.diagflat() and fast softmax derivative	2023-03-20 12:02:00 +01:00