From 924eb8cf6da0aabcb9299955d22bfddf1db1a0f0 Mon Sep 17 00:00:00 2001 From: Mattia Giambirtone Date: Tue, 28 Mar 2023 10:28:05 +0200 Subject: [PATCH] Fixed argmax --- src/main.nim | 18 +++++++++--------- src/nn/network.nim | 33 ++++++++++++++++++--------------- src/util/matrix.nim | 23 +++++++++++------------ 3 files changed, 38 insertions(+), 36 deletions(-) diff --git a/src/main.nim b/src/main.nim index 3868677..059ec95 100644 --- a/src/main.nim +++ b/src/main.nim @@ -85,8 +85,8 @@ proc main = "isn", "isn't", "ma", "mightn", "mightn't", "mustn", "mustn't", "needn", "needn't", "shan", "shan't", "shouldn", "shouldn't", "wasn", "wasn't", "weren", "weren't", "won", "won't", "wouldn", "wouldn't"] - const epochs = 10 - const batch = 200 + const epochs = 100 + const batch = 400 const inputSize = 512 let encoder = newLabelEncoder() let cleaner = newTextPreprocessor(stopwords=newMatrix(stopwords), toLower=true, @@ -97,11 +97,11 @@ proc main = newDenseLayer(8, 16), newDenseLayer(16, 2)], lossFunc=MSE, - activationFunc=Softmax, - learnRate=5, - momentum=0.3, - weightRange=(-10.0, 10.0), - biasRange=(-10.0, 10.0)) + activationFunc=SiLU, + learnRate=0.001, + momentum=0.0, + weightRange=(-1.0, 1.0), + biasRange=(-1.0, 1.0)) echo "ProjectSydney v0.2b - Accuracy test" echo "Loading dataset and testset" let loadTime = cpuTime() @@ -159,9 +159,9 @@ proc main = echo "" echo &"Training neural network for {epochs} epochs with batch size of {batch}" let trainTime = cpuTime() - classifier.train(epochs, batch, trainData, #[testData]#) + classifier.train(epochs, batch, trainData, testData) echo &"Training completed in {cpuTime() - trainTime:.2f} seconds, running test" - echo "\nTest parameters" + echo "Test parameters" echo &"\t- Test size: {len(data.testset)}" let testTime = cpuTime() var pred: seq[int] = @[] diff --git a/src/nn/network.nim b/src/nn/network.nim index a5fa0c6..3464e95 100644 --- a/src/nn/network.nim +++ b/src/nn/network.nim @@ -106,6 +106,9 @@ proc newNeuralNetwork*(topology: seq[Layer], lossFunc: Loss, activationFunc: Act ## Weights and biases are initialized with ## random values in the chosen range using ## nim's default PRNG + when not defined(release): + if momentum > 1.0: + raise newException(ValueError, "momentum should not be greater than one") new(result) result.layers = topology for layer in result.layers: @@ -139,7 +142,8 @@ proc fastFeedForward(self: NeuralNetwork, x: Matrix[float]): Matrix[float] = ## (activated) output from the last layer is returned result = x for layer in self.layers: - result = self.activation.function(layer.feed(result)) + result = layer.feed(result) + result = self.activation.function(result) proc predict*(self: NeuralNetwork, x: Matrix[float]): int = @@ -170,7 +174,7 @@ proc backprop(self: NeuralNetwork, x, y: Matrix[float]): tuple[weights, biases: deltaW: seq[Matrix[float]] = @[] deltaB: seq[Matrix[float]] = @[] # Activations of each layer - activations: seq[Matrix[float]] = @[] + activations: seq[Matrix[float]] = @[x] # Unactivated outputs of each layer unactivated: seq[Matrix[float]] = @[] @@ -191,13 +195,13 @@ proc backprop(self: NeuralNetwork, x, y: Matrix[float]): tuple[weights, biases: # have to do fancy calculus stuff to figure out the derivative) var diff: Matrix[float] = self.loss.derivative(activations[^1], y) * self.activation.derivative(unactivated[^1]) deltaB[^1].replace(diff) - deltaW[^1].replace(activations[^2].transpose()) + deltaW[^1].replace(diff.dot(activations[^2].transpose())) # Backwards pass (actually the backwards pass began two lines earlier, we're just feeding # the correction back through the rest of the network now) - for l in 1.. m: - m = self[row, col] - if self.shape.rows == 0: - while col < self.shape.cols: - if self[0, col] > m: - m = self[0, col] - inc(col) - return self.getIndex(row, col) + r = 0 + c = 0 + for i, row in self: + for j, e in row: + if e == m: + continue + elif e > m: + m = e + r = i + c = j + return self.getIndex(r, c) proc contains*[T](self: Matrix[T], e: T): bool =