Fixed argmax

2023-03-28 10:28:05 +02:00 · 2023-03-28 10:28:05 +02:00 · 924eb8cf6d
parent 844883ced7
commit 924eb8cf6d
3 changed files with 38 additions and 36 deletions
--- a/src/main.nim
+++ b/src/main.nim
@ -85,8 +85,8 @@ proc main =
 "isn", "isn't", "ma", "mightn", "mightn't", "mustn", "mustn't", "needn", 
 "needn't", "shan", "shan't", "shouldn",  "shouldn't", "wasn", "wasn't", 
 "weren", "weren't", "won", "won't", "wouldn", "wouldn't"]
-    const epochs = 10
+    const epochs = 100
-    const batch = 200
+    const batch = 400
    const inputSize = 512
    let encoder = newLabelEncoder()
    let cleaner = newTextPreprocessor(stopwords=newMatrix(stopwords), toLower=true,
@ -97,11 +97,11 @@ proc main =
                                        newDenseLayer(8, 16),
                                        newDenseLayer(16, 2)],
                                        lossFunc=MSE,
-                                        activationFunc=Softmax,
+                                        activationFunc=SiLU,
-                                        learnRate=5,
+                                        learnRate=0.001,
-                                        momentum=0.3,
+                                        momentum=0.0,
-                                        weightRange=(-10.0, 10.0),
+                                        weightRange=(-1.0, 1.0),
-                                        biasRange=(-10.0, 10.0))
+                                        biasRange=(-1.0, 1.0))
    echo "ProjectSydney v0.2b - Accuracy test"
    echo "Loading dataset and testset"
    let loadTime = cpuTime()
@ -159,9 +159,9 @@ proc main =
    echo ""
    echo &"Training neural network for {epochs} epochs with batch size of {batch}"
    let trainTime = cpuTime()
-    classifier.train(epochs, batch, trainData, #[testData]#)
+    classifier.train(epochs, batch, trainData, testData)
    echo &"Training completed in {cpuTime() - trainTime:.2f} seconds, running test"
-    echo "\nTest parameters"
+    echo "Test parameters"
    echo &"\t- Test size: {len(data.testset)}"
    let testTime = cpuTime()
    var pred: seq[int] = @[]
--- a/src/nn/network.nim
+++ b/src/nn/network.nim
@ -106,6 +106,9 @@ proc newNeuralNetwork*(topology: seq[Layer], lossFunc: Loss, activationFunc: Act
    ## Weights and biases are initialized with 
    ## random values in the chosen range using
    ## nim's default PRNG
    when not defined(release):
        if momentum > 1.0:
            raise newException(ValueError, "momentum should not be greater than one")
    new(result)
    result.layers = topology
    for layer in result.layers:
@ -139,7 +142,8 @@ proc fastFeedForward(self: NeuralNetwork, x: Matrix[float]): Matrix[float] =
    ## (activated) output from the last layer is returned
    result = x
    for layer in self.layers:
-        result = self.activation.function(layer.feed(result))
+        result = layer.feed(result)
    result = self.activation.function(result)
 proc predict*(self: NeuralNetwork, x: Matrix[float]): int =
@ -170,7 +174,7 @@ proc backprop(self: NeuralNetwork, x, y: Matrix[float]): tuple[weights, biases:
        deltaW: seq[Matrix[float]] = @[]
        deltaB: seq[Matrix[float]] = @[]
        # Activations of each layer
-        activations: seq[Matrix[float]] = @[]
+        activations: seq[Matrix[float]] = @[x]
        # Unactivated outputs of each layer
        unactivated: seq[Matrix[float]] = @[]
@ -191,13 +195,13 @@ proc backprop(self: NeuralNetwork, x, y: Matrix[float]): tuple[weights, biases:
    # have to do fancy calculus stuff to figure out the derivative)
    var diff: Matrix[float] = self.loss.derivative(activations[^1], y) * self.activation.derivative(unactivated[^1])
    deltaB[^1].replace(diff)
-    deltaW[^1].replace(activations[^2].transpose())
+    deltaW[^1].replace(diff.dot(activations[^2].transpose()))
    # Backwards pass (actually the backwards pass began two lines earlier, we're just feeding
    # the correction back through the rest of the network now)
-    for l in 1..<self.layers.high():
+    for l in 2..self.layers.high():
-        diff = self.layers[^l].weights.transpose().dot(diff) * self.activation.derivative(unactivated[^l])
+        diff = self.layers[^(l + 1)].weights.transpose().dot(diff) * self.activation.derivative(unactivated[^l])
        deltaB[^l].replace(diff)
-        deltaW[^l].replace(diff.dot(activations[^l].transpose()))
+        deltaW[^l].replace(diff.dot(activations[^(l - 1)].transpose()))
    return (deltaW, deltaB)
@ -205,21 +209,20 @@ proc miniBatch(self: NeuralNetwork, data: seq[tuple[x, y: Matrix[float]]]) =
    ## Performs a single mini-batch step in stochastic gradient
    ## descent and updates the network's weights and biases
    ## accordingly
    var gradient: tuple[weights, biases: seq[Matrix[float]]]
    # New weights and biases
    var 
        # New weights and biases
        weights: seq[Matrix[float]] = @[]
        biases: seq[Matrix[float]] = @[]
        gradient: tuple[weights, biases: seq[Matrix[float]]]
    for layer in self.layers:
        weights.add(zeros[float](layer.weights.shape))
        biases.add(zeros[float](layer.biases.shape))
    for dataPoint in data:
        gradient = self.backprop(dataPoint.x, dataPoint.y)
-        for i, (currentBiases, newBiases) in zip(biases, gradient.biases):
+        for i, newBiases in gradient.biases:
-            biases[i] = currentBiases + newBiases
+            biases[i].replace(newBiases)
-        for i, (currentWeights, newWeights) in zip(weights, gradient.weights):
+        for i, newWeights in gradient.weights:
-            weights[i] = currentWeights + newWeights
+            weights[i].replace(newWeights)
    # We use hyperparameters such as the learn rate and momentum
    # to further control how fast (or slowly) the network converges
    # onto a local minimum of the gradient of our loss function. To
@ -246,9 +249,9 @@ proc miniBatch(self: NeuralNetwork, data: seq[tuple[x, y: Matrix[float]]]) =
    # overfitting by not letting the network train over the same data over and over
    # again)
    for (layer, newBiases) in zip(self.layers, biases):
-        layer.biases = (layer.biases - nudge) * newBiases
+        layer.biases = layer.biases - (newBiases * nudge)
    for (layer, newWeights) in zip(self.layers, weights):
-        layer.weights = (layer.weights - nudge) * newWeights
+        layer.weights = layer.weights - (newWeights * nudge)
 proc eval(self: NeuralNetwork, data: seq[tuple[x, y: Matrix[float]]]): float =
--- a/src/util/matrix.nim
+++ b/src/util/matrix.nim
@ -1045,18 +1045,17 @@ proc argmax*[T](self: Matrix[T]): int =
    ## into the matrix
    var m: T = self[0, 0]
    var 
-        row = 0
+        r = 0
-        col = 0
+        c = 0
-    while row < self.shape.rows:
+    for i, row in self:
-        while col < self.shape.cols:
+        for j, e in row:
-            if self[row, col] > m:
+            if e == m:
-                m = self[row, col]
+                continue
-    if self.shape.rows == 0:
+            elif e > m:
-        while col < self.shape.cols:
+                m = e
-            if self[0, col] > m:
+                r = i
-                m = self[0, col]
+                c = j
-            inc(col)
+    return self.getIndex(r, c)
    return self.getIndex(row, col)
 proc contains*[T](self: Matrix[T], e: T): bool =