Implemented momentum
This commit is contained in:
parent
9baede9b54
commit
e3265fac68
|
@ -215,6 +215,24 @@ proc miniBatch(self: NeuralNetwork, data: seq[tuple[x, y: Matrix[float]]]) =
|
||||||
biases[i] = currentBiases + newBiases
|
biases[i] = currentBiases + newBiases
|
||||||
for i, (currentWeights, newWeights) in zip(weights, gradient.weights):
|
for i, (currentWeights, newWeights) in zip(weights, gradient.weights):
|
||||||
weights[i] = currentWeights + newWeights
|
weights[i] = currentWeights + newWeights
|
||||||
|
|
||||||
|
# We use hyperparameters such as the learn rate and momentum
|
||||||
|
# to further control how fast (or slowly) the network converges
|
||||||
|
# onto a local minimum of the gradient of our loss function. To
|
||||||
|
# be completely honest I'm not entirely sure why we're dividing the
|
||||||
|
# learn rate by the size of our batch (if you didn't already notice I
|
||||||
|
# stole a lot of this code. I swear I'm a good programmer. Please hire
|
||||||
|
# me): my best guess would be that this way it gets "normalized" (as
|
||||||
|
# if we were training on the entire dataset at once even though we
|
||||||
|
# aren't) when it's < 1 and are otherwise scaling it to the size of
|
||||||
|
# our batch when it's > 1. I have some vague ideas as to why that may
|
||||||
|
# make sense, but it's a wild guess really
|
||||||
|
var nudge = self.learnRate / data.len().float
|
||||||
|
if self.momentum > 0:
|
||||||
|
# I _could_ go look at how other libraries implement
|
||||||
|
# momentum, OR I could pull a formula out of my ass
|
||||||
|
# and hope it works. Let's run with that, hm?
|
||||||
|
nudge *= (1 / self.momentum)
|
||||||
# The backpropagation algorithm lets us find the direction of steepest ascent
|
# The backpropagation algorithm lets us find the direction of steepest ascent
|
||||||
# in the gradient of our cost function (which, remember, we're trying to minimize
|
# in the gradient of our cost function (which, remember, we're trying to minimize
|
||||||
# by climbing it down), so we subtract that from the current weights and biases
|
# by climbing it down), so we subtract that from the current weights and biases
|
||||||
|
@ -224,9 +242,9 @@ proc miniBatch(self: NeuralNetwork, data: seq[tuple[x, y: Matrix[float]]]) =
|
||||||
# overfitting by not letting the network train over the same data over and over
|
# overfitting by not letting the network train over the same data over and over
|
||||||
# again)
|
# again)
|
||||||
for (layer, newBiases) in zip(self.layers, biases):
|
for (layer, newBiases) in zip(self.layers, biases):
|
||||||
layer.biases = layer.biases - (self.learnRate / data.len().float) * newBiases
|
layer.biases = (layer.biases - nudge) * newBiases
|
||||||
for (layer, newWeights) in zip(self.layers, weights):
|
for (layer, newWeights) in zip(self.layers, weights):
|
||||||
layer.weights = layer.weights - (self.learnRate / data.len().float) * newWeights
|
layer.weights = (layer.weights - nudge) * newWeights
|
||||||
|
|
||||||
|
|
||||||
proc train*(self: NeuralNetwork, epochs: int, batchSize: int, data: var seq[tuple[x, y: Matrix[float]]]) =
|
proc train*(self: NeuralNetwork, epochs: int, batchSize: int, data: var seq[tuple[x, y: Matrix[float]]]) =
|
||||||
|
|
Loading…
Reference in New Issue