Implemented momentum

This commit is contained in:
Mattia Giambirtone 2023-03-21 19:10:30 +01:00
parent 9baede9b54
commit e3265fac68
Signed by: nocturn9x
GPG Key ID: 8270F9F467971E59
1 changed files with 20 additions and 2 deletions

View File

@ -215,6 +215,24 @@ proc miniBatch(self: NeuralNetwork, data: seq[tuple[x, y: Matrix[float]]]) =
biases[i] = currentBiases + newBiases
for i, (currentWeights, newWeights) in zip(weights, gradient.weights):
weights[i] = currentWeights + newWeights
# We use hyperparameters such as the learn rate and momentum
# to further control how fast (or slowly) the network converges
# onto a local minimum of the gradient of our loss function. To
# be completely honest I'm not entirely sure why we're dividing the
# learn rate by the size of our batch (if you didn't already notice I
# stole a lot of this code. I swear I'm a good programmer. Please hire
# me): my best guess would be that this way it gets "normalized" (as
# if we were training on the entire dataset at once even though we
# aren't) when it's < 1 and are otherwise scaling it to the size of
# our batch when it's > 1. I have some vague ideas as to why that may
# make sense, but it's a wild guess really
var nudge = self.learnRate / data.len().float
if self.momentum > 0:
# I _could_ go look at how other libraries implement
# momentum, OR I could pull a formula out of my ass
# and hope it works. Let's run with that, hm?
nudge *= (1 / self.momentum)
# The backpropagation algorithm lets us find the direction of steepest ascent
# in the gradient of our cost function (which, remember, we're trying to minimize
# by climbing it down), so we subtract that from the current weights and biases
@ -224,9 +242,9 @@ proc miniBatch(self: NeuralNetwork, data: seq[tuple[x, y: Matrix[float]]]) =
# overfitting by not letting the network train over the same data over and over
# again)
for (layer, newBiases) in zip(self.layers, biases):
layer.biases = layer.biases - (self.learnRate / data.len().float) * newBiases
layer.biases = (layer.biases - nudge) * newBiases
for (layer, newWeights) in zip(self.layers, weights):
layer.weights = layer.weights - (self.learnRate / data.len().float) * newWeights
layer.weights = (layer.weights - nudge) * newWeights
proc train*(self: NeuralNetwork, epochs: int, batchSize: int, data: var seq[tuple[x, y: Matrix[float]]]) =