Initial ground work for neural network complete

This commit is contained in:
Mattia Giambirtone 2023-03-21 16:44:08 +01:00
parent 9506c554ec
commit a97cec41a6
Signed by: nocturn9x
GPG Key ID: 8270F9F467971E59
3 changed files with 134 additions and 19 deletions

View File

@ -2,9 +2,9 @@ import nn/network
import nn/util/matrix
var mlp = newNeuralNetwork(@[newDenseLayer(2, 3, Sigmoid),
newDenseLayer(3, 2, Sigmoid),
newDenseLayer(2, 3, Softmax)],
var mlp = newNeuralNetwork(@[newDenseLayer(784, 10, Sigmoid),
newDenseLayer(10, 16, Sigmoid),
newDenseLayer(16, 10, Softmax)],
lossFunc=MSE, learnRate=0.05, momentum=0.55,
weightRange=(start: -1.0, stop: 1.0), biasRange=(start: -1.0, stop: 1.0))
echo mlp.feedforward(newMatrix[float](@[1.0, 2.0]))
weightRange=(start: -1.0, stop: 1.0),
biasRange=(start: -1.0, stop: 1.0))

View File

@ -18,6 +18,7 @@ import util/matrix
import std/strformat
import std/random
import std/math
import std/sequtils
randomize()
@ -128,22 +129,94 @@ proc newNeuralNetwork*(topology: seq[Layer], lossFunc: Loss, learnRate: float, m
result.momentum = momentum
proc feedforward*(self: NeuralNetwork, data: Matrix[float]): Matrix[float] =
## Feeds the given input through the network and returns
## a 1D array with the output
when not defined(release):
if data.shape.rows > 1:
raise newException(ValueError, "input data must be one-dimensional")
if data.shape.cols != self.layers[0].inputSize:
raise newException(ValueError, &"input is of the wrong shape (expecting (1, {self.layers[0].inputSize}), got ({data.shape.rows}, {data.shape.cols}) instead)")
result = data
proc backprop(self: NeuralNetwork, x, y: Matrix[float]): tuple[weights, biases: seq[Matrix[float]]] =
## Performs a single backpropagation step and returns the
## gradient of the cost function for the weights and biases
## of the network according to the given training sample
var
# The deltas for the weights and biases of
# each layer in the network
deltaW: seq[Matrix[float]] = @[]
deltaB: seq[Matrix[float]] = @[]
# Activations of each layer
activation = x
activations: seq[Matrix[float]] = @[x]
# Unactivated outputs of each layer
unactivated: seq[Matrix[float]] = @[]
# Forward pass through the network
for layer in self.layers:
result = layer.activation.function(layer.weights.dot(result) + layer.biases)
deltaW.add(zeros[float](layer.weights.shape))
deltaB.add(zeros[float](layer.biases.shape))
unactivated.add(layer.weights.dot(activation) + layer.biases)
activations.add(layer.activation.function(unactivated[^1]))
# Backwards pass
# The negative gradient of each layer for this sample: this is a
# partial derivative, so the multiplication here is just an
# application of the chain rule!
var diff: Matrix[float] = self.loss.derivative(activations[^1], y) * self.layers[^1].activation.derivative(unactivated[^1])
deltaB[^1].replace(diff)
deltaW[^1].replace(activations[^2].transpose())
for l in 2..self.layers.high():
# The ^ makes our indeces start from the back instead of
# from the front, so we're really iterating over our layers
# backwards!
diff = self.layers[^l].weights.transpose.dot(diff) * self.layers[^l].activation.derivative(unactivated[^l])
deltaB[^l].replace(diff)
deltaW[^l].replace(diff.dot(activations[^(l - 1)].transpose()))
return (deltaW, deltaB)
proc backprop(self: NeuralNetwork, x, y: Matrix[float]) {.used.} =
## Performs a single backpropagation step and updates the
## gradients for our weights and biases, layer by layer
proc miniBatch(self: NeuralNetwork, data: seq[tuple[x, y: Matrix[float]]]) =
## Performs a single mini-batch step in stochastic gradient
## descent and updates the network's weights and biases
## accordingly
var gradient: tuple[weights, biases: seq[Matrix[float]]]
# New weights and biases
var
weights: seq[Matrix[float]] = @[]
biases: seq[Matrix[float]] = @[]
for layer in self.layers:
weights.add(zeros[float](layer.weights.shape))
biases.add(zeros[float](layer.biases.shape))
for dataPoint in data:
gradient = self.backprop(dataPoint.x, dataPoint.y)
for i, (currentBiases, newBiases) in zip(biases, gradient.biases):
biases[i] = currentBiases + newBiases
for i, (currentWeights, newWeights) in zip(weights, gradient.weights):
weights[i] = currentWeights + newWeights
# The backpropagation algorithm lets us find the gradient of steepest ascent
# in our cost function, so we subtract it from the current weights and biases
# to descend it the fastest (it's not actually *the* fastest because true gradient
# descent would perform this over all training samples, but it's a pretty good
# approximation nonetheless, it converges quickly and it actually helps prevent
# overfitting by not letting the network train over the same data over and over
# again)
for (layer, newBiases) in zip(self.layers, biases):
layer.biases = layer.biases - (self.learnRate / data.len().float) * newBiases
for (layer, newWeights) in zip(self.layers, weights):
layer.weights = layer.weights - (self.learnRate / data.len().float) * newWeights
proc train*(self: NeuralNetwork, epochs: int, batchSize: int, data: var seq[tuple[x, y: Matrix[float]]]) =
## Train the network on the given data for the speficied
## number of epochs using the given batch size by applying
## stochastic gradient descent
var batches: seq[seq[tuple[x, y: Matrix[float]]]]
for epoch in 0..<epochs:
# We shuffle the data so that different epochs work
# on different data points. This will hopefully help
# the network generalize its training onto unseen data
shuffle(data)
batches = @[]
var i = 0
while i < data.len():
batches.add(@[])
for j in 0..<batchSize:
batches[^1].add(data[i])
i += batchSize
for batch in batches:
self.miniBatch(batch)
## Utility functions

View File

@ -425,7 +425,6 @@ proc copy*[T](self: MatrixView[T]): Matrix[T] =
for e in self:
result.data[].add(e)
result.shape = self.shape
result.m = self.m
proc dup*[T](self: MatrixView[T]): MatrixView[T] =
@ -462,6 +461,20 @@ proc `/`*[T](a: Matrix[T], b: T): Matrix[T] = a.copy().apply(divide, b, axis= -1
proc `/`*[T](a: T, b: Matrix[T]): Matrix[T] = b.copy().apply(divide, a, axis= -1)
proc `+`*[T](a: MatrixView[T], b: T): Matrix[T] = a.copy().apply(add, b, axis= -1)
proc `+`*[T](a: T, b: MatrixView[T]): Matrix[T] = b.copy().apply(add, a, axis= -1)
proc `-`*[T](a: MatrixView[T], b: T): Matrix[T] = a.copy().apply(sub, b, axis= -1)
proc `-`*[T](a: T, b: MatrixView[T]): Matrix[T] = b.copy().apply(sub, a, axis= -1)
proc `-`*[T](a: MatrixView[T]): Matrix[T] = a.copy().apply(neg, a, axis= -1)
proc `*`*[T](a: MatrixView[T], b: T): Matrix[T] = a.copy().apply(mul, b, axis = -1)
proc `*`*[T](a: T, b: MatrixView[T]): Matrix[T] = b.copy().apply(mul, a, axis= -1)
proc `/`*[T](a: MatrixView[T], b: T): Matrix[T] = a.copy().apply(divide, b, axis= -1)
proc `/`*[T](a: T, b: MatrixView[T]): Matrix[T] = b.copy().apply(divide, a, axis= -1)
# matrix/matrix operations. They produce a new matrix with the
# result of the operation
@ -942,6 +955,23 @@ proc dot*[T](self, other: Matrix[T]): Matrix[T] =
return self * other
proc dot*[T](self: MatrixView[T], other: Matrix[T]): Matrix[T] =
## Computes the dot product of the two
## input matrices
when not defined(release):
if self.shape.cols != other.shape.cols:
raise newException(ValueError, &"incompatible argument shapes for dot product")
result = zeros[T]((0, self.shape.rows))
for i in 0..<result.shape.cols:
result[0, i] = (other[0] * self[i]).sum()
proc dot*[T](self: Matrix[T], other: MatrixView[T]): Matrix[T] {.inline.} = result = other.dot(self)
proc dot*[T](self, other: MatrixView[T]): T = (self * other).sum()
proc where*[T](cond: Matrix[bool], x, y: Matrix[T]): Matrix[T] =
## Return elements chosen from x or y depending on cond
## Where cond is true, take elements from x, otherwise
@ -1046,6 +1076,18 @@ proc count*[T](self: Matrix[T], e: T): int =
inc(result)
proc replace*[T](self: Matrix[T], other: Matrix[T], copy: bool = false) =
## Replaces the data in self with the data from
## other (a copy is not performed unless copy equals
## true). A reference to the object is returned
if copy:
self.data[] = other.data[]
else:
self.data = other.data
self.order = other.order
self.shape = other.shape
when isMainModule:
import math