NNExperiments/src/nn/network.nim

145 lines
6.1 KiB
Nim

# Copyright 2022 Mattia Giambirtone & All Contributors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import util/matrix
import std/strformat
import std/random
randomize()
type
NeuralNetwork* = ref object
## A generic feed-forward
## neural network
layers*: seq[Layer]
loss: Loss # The cost function along with its derivative
# The network's learn rate determines
# the amount of progress that is made
# at each step when performing gradient
# descent
learnRate*: float
# The momentum serves to speed up convergence
# time when performing SGD: the higher the output
# of the derivative of the cost function, the more
# we nudge our inputs for our next epoch
momentum*: float
Loss* = ref object
## A loss function and its derivative
function: proc (a, b: Matrix[float]): float
derivative: proc (x, y: float): float {.noSideEffect.}
Activation* = ref object
## An activation function
function: proc (input: Matrix[float]): Matrix[float] {.noSideEffect.}
derivative: proc (x, y: float): float {.noSideEffect.}
Layer* = ref object
## A generic neural network
## layer
inputSize*: int # The number of inputs we process
outputSize*: int # The number of outputs we produce
weights*: Matrix[float] # The weights for each connection (2D)
biases*: Matrix[float] # The biases for each neuron (1D)
gradients: tuple[weights, biases: Matrix[float]] # Gradient coefficients for weights and biases
activation: Activation # The layer's activation function
proc `$`*(self: Layer): string =
## Returns a string representation
## of the layer
result = &"Layer(inputs={self.inputSize}, outputs={self.outputSize})"
proc `$`*(self: NeuralNetwork): string =
## Returns a string representation
## of the network
result = &"NeuralNetwork(learnRate={self.learnRate}, layers={self.layers})"
proc newLoss*(function: proc (a, b: Matrix[float]): float, derivative: proc (x, y: float): float {.noSideEffect.}): Loss =
## Creates a new Loss object
new(result)
result.function = function
result.derivative = derivative
proc newActivation*(function: proc (input: Matrix[float]): Matrix[float] {.noSideEffect.}, derivative: proc (x, y: float): float {.noSideEffect.}): Activation =
## Creates a new Activation object
new(result)
result.function = function
result.derivative = derivative
proc newDenseLayer*(inputSize: int, outputSize: int, activationFunc: Activation): Layer =
## Creates a new dense layer with inputSize input
## parameters and outputSize outgoing outputs and
## using the chosen activation function.
new(result)
result.inputSize = inputSize
result.outputSize = outputSize
result.activation = activationFunc
proc newNeuralNetwork*(topology: seq[Layer], lossFunc: Loss, learnRate: float, momentum: float,
weightRange, biasRange: tuple[start, stop: float]): NeuralNetwork =
## Initializes a new neural network with
## the given topology and iperparameters.
## Weights and biases are initialized with
## random values in the chosen range
new(result)
result.layers = topology
for layer in result.layers:
var biases = newSeqOfCap[float](layer.outputSize)
var biasGradients = newSeqOfCap[float](layer.outputSize)
for _ in 0..<layer.outputSize:
biases.add(rand(biasRange.start..biasRange.stop))
biasGradients.add(0.0)
var weights = newSeqOfCap[float](layer.inputSize * layer.outputSize)
var weightGradients = newSeqOfCap[float](layer.inputSize * layer.outputSize)
for _ in 0..<layer.outputSize:
for _ in 0..<layer.inputSize:
weights.add(rand(weightRange.start..weightRange.stop))
weightGradients.add(0.0)
layer.biases = newMatrix[float](biases)
# Why swap outputSize and inputSize in the matrix shape? The reason is simple: this
# spares us from having to transpose it later when we perform the dot product (I get
# that it's a constant time operation, but if we can avoid it altogether, that's even
# better!)
layer.weights = newMatrixFromSeq[float](weights, (layer.outputSize, layer.inputSize))
layer.gradients = (weights: newMatrix[float](weightGradients),
biases: newMatrixFromSeq[float](biasGradients, (layer.outputSize, layer.inputSize)))
result.loss = lossFunc
result.learnRate = learnRate
result.momentum = momentum
proc feedforward*(self: NeuralNetwork, data: Matrix[float]): Matrix[float] =
## Feeds the given input through the network and returns
## a 1D array with the output
when not defined(release):
if data.shape.rows > 1:
raise newException(ValueError, "input data must be one-dimensional")
if data.shape.cols != self.layers[0].inputSize:
raise newException(ValueError, &"input is of the wrong shape (expecting (1, {self.layers[0].inputSize}), got ({data.shape.rows}, {data.shape.cols}) instead)")
result = data
for layer in self.layers:
result = layer.activation.function(layer.weights.dot(result) + layer.biases)
proc backprop(self: NeuralNetwork, x, y: Matrix[float]) {.used.} =
## Performs a single backpropagation step and updates the
## gradients for our weights and biases, layer by layer