Implemented momentum
This commit is contained in:
parent
9baede9b54
commit
ea8e220772
|
@ -215,6 +215,24 @@ proc miniBatch(self: NeuralNetwork, data: seq[tuple[x, y: Matrix[float]]]) =
|
|||
biases[i] = currentBiases + newBiases
|
||||
for i, (currentWeights, newWeights) in zip(weights, gradient.weights):
|
||||
weights[i] = currentWeights + newWeights
|
||||
|
||||
# We use hyperparameters such as the learn rate and momentum
|
||||
# to further control how fast (or slowly) the network converges
|
||||
# onto a local minimum of the gradient of our loss function. To
|
||||
# be completely honest I'm not entirely sure why we're dividing the
|
||||
# learn rate by the size of our batch (if you didn't already notice I
|
||||
# stole a lot of this code. I swear I'm a good programmer. Please hire
|
||||
# me): my best guess would be that this way it gets "normalized" (as
|
||||
# if we were training on the entire dataset at once even though we
|
||||
# aren't) when it's < 1 and are otherwise scaling it to the size of
|
||||
# our batch when it's > 1. I have some vague ideas as to why that may
|
||||
# make sense, but it's a wild guess really
|
||||
var nudge = self.learnRate / data.len().float
|
||||
if self.momentum > 0:
|
||||
# I _could_ go look at how other libraries implement
|
||||
# momentum, OR I could pull a formula out of my ass
|
||||
# and hope it works. Let's run with that, hm?
|
||||
nudge *= (1 / self.momentum)
|
||||
# The backpropagation algorithm lets us find the direction of steepest ascent
|
||||
# in the gradient of our cost function (which, remember, we're trying to minimize
|
||||
# by climbing it down), so we subtract that from the current weights and biases
|
||||
|
@ -224,9 +242,9 @@ proc miniBatch(self: NeuralNetwork, data: seq[tuple[x, y: Matrix[float]]]) =
|
|||
# overfitting by not letting the network train over the same data over and over
|
||||
# again)
|
||||
for (layer, newBiases) in zip(self.layers, biases):
|
||||
layer.biases = layer.biases - (self.learnRate / data.len().float) * newBiases
|
||||
layer.biases = (layer.biases - nudge) * newBiases
|
||||
for (layer, newWeights) in zip(self.layers, weights):
|
||||
layer.weights = layer.weights - (self.learnRate / data.len().float) * newWeights
|
||||
layer.weights = (layer.weights - nudge) * newWeights
|
||||
|
||||
|
||||
proc train*(self: NeuralNetwork, epochs: int, batchSize: int, data: var seq[tuple[x, y: Matrix[float]]]) =
|
||||
|
|
Loading…
Reference in New Issue