From 924eb8cf6da0aabcb9299955d22bfddf1db1a0f0 Mon Sep 17 00:00:00 2001
From: Mattia Giambirtone <nocturn9x@nocturn9x.space>
Date: Tue, 28 Mar 2023 10:28:05 +0200
Subject: [PATCH] Fixed argmax

---
 src/main.nim        | 18 +++++++++---------
 src/nn/network.nim  | 33 ++++++++++++++++++---------------
 src/util/matrix.nim | 23 +++++++++++------------
 3 files changed, 38 insertions(+), 36 deletions(-)

diff --git a/src/main.nim b/src/main.nim
index 3868677..059ec95 100644
--- a/src/main.nim
+++ b/src/main.nim
@@ -85,8 +85,8 @@ proc main =
  "isn", "isn't", "ma", "mightn", "mightn't", "mustn", "mustn't", "needn", 
  "needn't", "shan", "shan't", "shouldn",  "shouldn't", "wasn", "wasn't", 
  "weren", "weren't", "won", "won't", "wouldn", "wouldn't"]
-    const epochs = 10
-    const batch = 200
+    const epochs = 100
+    const batch = 400
     const inputSize = 512
     let encoder = newLabelEncoder()
     let cleaner = newTextPreprocessor(stopwords=newMatrix(stopwords), toLower=true,
@@ -97,11 +97,11 @@ proc main =
                                         newDenseLayer(8, 16),
                                         newDenseLayer(16, 2)],
                                         lossFunc=MSE,
-                                        activationFunc=Softmax,
-                                        learnRate=5,
-                                        momentum=0.3,
-                                        weightRange=(-10.0, 10.0),
-                                        biasRange=(-10.0, 10.0))
+                                        activationFunc=SiLU,
+                                        learnRate=0.001,
+                                        momentum=0.0,
+                                        weightRange=(-1.0, 1.0),
+                                        biasRange=(-1.0, 1.0))
     echo "ProjectSydney v0.2b - Accuracy test"
     echo "Loading dataset and testset"
     let loadTime = cpuTime()
@@ -159,9 +159,9 @@ proc main =
     echo ""
     echo &"Training neural network for {epochs} epochs with batch size of {batch}"
     let trainTime = cpuTime()
-    classifier.train(epochs, batch, trainData, #[testData]#)
+    classifier.train(epochs, batch, trainData, testData)
     echo &"Training completed in {cpuTime() - trainTime:.2f} seconds, running test"
-    echo "\nTest parameters"
+    echo "Test parameters"
     echo &"\t- Test size: {len(data.testset)}"
     let testTime = cpuTime()
     var pred: seq[int] = @[]
diff --git a/src/nn/network.nim b/src/nn/network.nim
index a5fa0c6..3464e95 100644
--- a/src/nn/network.nim
+++ b/src/nn/network.nim
@@ -106,6 +106,9 @@ proc newNeuralNetwork*(topology: seq[Layer], lossFunc: Loss, activationFunc: Act
     ## Weights and biases are initialized with 
     ## random values in the chosen range using
     ## nim's default PRNG
+    when not defined(release):
+        if momentum > 1.0:
+            raise newException(ValueError, "momentum should not be greater than one")
     new(result)
     result.layers = topology
     for layer in result.layers:
@@ -139,7 +142,8 @@ proc fastFeedForward(self: NeuralNetwork, x: Matrix[float]): Matrix[float] =
     ## (activated) output from the last layer is returned
     result = x
     for layer in self.layers:
-        result = self.activation.function(layer.feed(result))
+        result = layer.feed(result)
+    result = self.activation.function(result)
 
 
 proc predict*(self: NeuralNetwork, x: Matrix[float]): int =
@@ -170,7 +174,7 @@ proc backprop(self: NeuralNetwork, x, y: Matrix[float]): tuple[weights, biases:
         deltaW: seq[Matrix[float]] = @[]
         deltaB: seq[Matrix[float]] = @[]
         # Activations of each layer
-        activations: seq[Matrix[float]] = @[]
+        activations: seq[Matrix[float]] = @[x]
         # Unactivated outputs of each layer
         unactivated: seq[Matrix[float]] = @[]
 
@@ -191,13 +195,13 @@ proc backprop(self: NeuralNetwork, x, y: Matrix[float]): tuple[weights, biases:
     # have to do fancy calculus stuff to figure out the derivative)
     var diff: Matrix[float] = self.loss.derivative(activations[^1], y) * self.activation.derivative(unactivated[^1])
     deltaB[^1].replace(diff)
-    deltaW[^1].replace(activations[^2].transpose())
+    deltaW[^1].replace(diff.dot(activations[^2].transpose()))
     # Backwards pass (actually the backwards pass began two lines earlier, we're just feeding
     # the correction back through the rest of the network now)
-    for l in 1..<self.layers.high():
-        diff = self.layers[^l].weights.transpose().dot(diff) * self.activation.derivative(unactivated[^l])
+    for l in 2..self.layers.high():
+        diff = self.layers[^(l + 1)].weights.transpose().dot(diff) * self.activation.derivative(unactivated[^l])
         deltaB[^l].replace(diff)
-        deltaW[^l].replace(diff.dot(activations[^l].transpose()))
+        deltaW[^l].replace(diff.dot(activations[^(l - 1)].transpose()))
     return (deltaW, deltaB)
 
 
@@ -205,21 +209,20 @@ proc miniBatch(self: NeuralNetwork, data: seq[tuple[x, y: Matrix[float]]]) =
     ## Performs a single mini-batch step in stochastic gradient
     ## descent and updates the network's weights and biases
     ## accordingly
-    var gradient: tuple[weights, biases: seq[Matrix[float]]]
-    # New weights and biases
     var 
+        # New weights and biases
         weights: seq[Matrix[float]] = @[]
         biases: seq[Matrix[float]] = @[]
+        gradient: tuple[weights, biases: seq[Matrix[float]]]
     for layer in self.layers:
         weights.add(zeros[float](layer.weights.shape))
         biases.add(zeros[float](layer.biases.shape))
     for dataPoint in data:
         gradient = self.backprop(dataPoint.x, dataPoint.y)
-        for i, (currentBiases, newBiases) in zip(biases, gradient.biases):
-            biases[i] = currentBiases + newBiases
-        for i, (currentWeights, newWeights) in zip(weights, gradient.weights):
-            weights[i] = currentWeights + newWeights
-    
+        for i, newBiases in gradient.biases:
+            biases[i].replace(newBiases)
+        for i, newWeights in gradient.weights:
+            weights[i].replace(newWeights)
     # We use hyperparameters such as the learn rate and momentum
     # to further control how fast (or slowly) the network converges
     # onto a local minimum of the gradient of our loss function. To
@@ -246,9 +249,9 @@ proc miniBatch(self: NeuralNetwork, data: seq[tuple[x, y: Matrix[float]]]) =
     # overfitting by not letting the network train over the same data over and over
     # again)
     for (layer, newBiases) in zip(self.layers, biases):
-        layer.biases = (layer.biases - nudge) * newBiases
+        layer.biases = layer.biases - (newBiases * nudge)
     for (layer, newWeights) in zip(self.layers, weights):
-        layer.weights = (layer.weights - nudge) * newWeights
+        layer.weights = layer.weights - (newWeights * nudge)
 
 
 proc eval(self: NeuralNetwork, data: seq[tuple[x, y: Matrix[float]]]): float =
diff --git a/src/util/matrix.nim b/src/util/matrix.nim
index ff03c0b..7b33fad 100644
--- a/src/util/matrix.nim
+++ b/src/util/matrix.nim
@@ -1045,18 +1045,17 @@ proc argmax*[T](self: Matrix[T]): int =
     ## into the matrix
     var m: T = self[0, 0]
     var 
-        row = 0
-        col = 0
-    while row < self.shape.rows:
-        while col < self.shape.cols:
-            if self[row, col] > m:
-                m = self[row, col]
-    if self.shape.rows == 0:
-        while col < self.shape.cols:
-            if self[0, col] > m:
-                m = self[0, col]
-            inc(col)
-    return self.getIndex(row, col)
+        r = 0
+        c = 0
+    for i, row in self:
+        for j, e in row:
+            if e == m:
+                continue
+            elif e > m:
+                m = e
+                r = i
+                c = j
+    return self.getIndex(r, c)
 
 
 proc contains*[T](self: Matrix[T], e: T): bool =