2022-12-20 12:08:24 +01:00
# Copyright 2022 Mattia Giambirtone & All Contributors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import util / matrix
import std / strformat
2022-12-23 00:17:57 +01:00
import std / random
2023-03-20 12:11:40 +01:00
import std / math
2022-12-23 00:17:57 +01:00
randomize ( )
2022-12-20 12:08:24 +01:00
2022-12-23 00:17:57 +01:00
type
2022-12-20 12:08:24 +01:00
NeuralNetwork * = ref object
2022-12-22 21:35:16 +01:00
## A generic feed-forward
## neural network
2022-12-23 00:17:57 +01:00
layers * : seq [ Layer ]
2023-03-20 10:01:06 +01:00
loss : Loss # The cost function along with its derivative
# The network's learn rate determines
# the amount of progress that is made
# at each step when performing gradient
# descent
2022-12-23 00:17:57 +01:00
learnRate * : float
2023-03-20 10:01:06 +01:00
# The momentum serves to speed up convergence
# time when performing SGD: the higher the output
# of the derivative of the cost function, the more
# we nudge our inputs for our next epoch
momentum * : float
2022-12-23 00:17:57 +01:00
Loss * = ref object
2023-03-20 10:01:06 +01:00
## A loss function and its derivative
function : proc ( a , b : Matrix [ float ] ) : float
2023-03-20 10:31:09 +01:00
derivative : proc ( x , y : Matrix [ float ] ) : Matrix [ float ] {. noSideEffect . }
2022-12-23 00:17:57 +01:00
Activation * = ref object
## An activation function
2023-03-20 10:01:06 +01:00
function : proc ( input : Matrix [ float ] ) : Matrix [ float ] {. noSideEffect . }
2023-03-20 11:41:23 +01:00
derivative : proc ( x : Matrix [ float ] ) : Matrix [ float ] {. noSideEffect . }
2022-12-23 00:17:57 +01:00
Layer * = ref object
## A generic neural network
## layer
2023-03-20 10:01:06 +01:00
inputSize * : int # The number of inputs we process
outputSize * : int # The number of outputs we produce
2022-12-23 00:17:57 +01:00
weights * : Matrix [ float ] # The weights for each connection (2D)
biases * : Matrix [ float ] # The biases for each neuron (1D)
2023-03-20 10:01:06 +01:00
gradients : tuple [ weights , biases : Matrix [ float ] ] # Gradient coefficients for weights and biases
activation : Activation # The layer's activation function
2022-12-23 00:17:57 +01:00
proc `$` * ( self : Layer ) : string =
## Returns a string representation
## of the layer
result = & " Layer(inputs={self.inputSize}, outputs={self.outputSize}) "
2022-12-20 12:08:24 +01:00
2022-12-23 00:17:57 +01:00
proc `$` * ( self : NeuralNetwork ) : string =
## Returns a string representation
## of the network
result = & " NeuralNetwork(learnRate={self.learnRate}, layers={self.layers}) "
2023-03-20 10:31:09 +01:00
proc newLoss * ( function : proc ( a , b : Matrix [ float ] ) : float , derivative : proc ( x , y : Matrix [ float ] ) : Matrix [ float ] {. noSideEffect . } ) : Loss =
2022-12-23 00:17:57 +01:00
## Creates a new Loss object
new ( result )
result . function = function
result . derivative = derivative
2023-03-20 11:41:23 +01:00
proc newActivation * ( function : proc ( input : Matrix [ float ] ) : Matrix [ float ] {. noSideEffect . } , derivative : proc ( x : Matrix [ float ] ) : Matrix [ float ] {. noSideEffect . } ) : Activation =
2022-12-23 00:17:57 +01:00
## Creates a new Activation object
new ( result )
result . function = function
result . derivative = derivative
2022-12-20 12:08:24 +01:00
2022-12-23 00:17:57 +01:00
2023-03-20 10:01:06 +01:00
proc newDenseLayer * ( inputSize : int , outputSize : int , activationFunc : Activation ) : Layer =
## Creates a new dense layer with inputSize input
## parameters and outputSize outgoing outputs and
## using the chosen activation function.
2022-12-23 00:17:57 +01:00
new ( result )
result . inputSize = inputSize
result . outputSize = outputSize
result . activation = activationFunc
2023-03-20 10:01:06 +01:00
proc newNeuralNetwork * ( topology : seq [ Layer ] , lossFunc : Loss , learnRate : float , momentum : float ,
weightRange , biasRange : tuple [ start , stop : float ] ) : NeuralNetwork =
## Initializes a new neural network with
2023-03-21 12:27:16 +01:00
## the given topology and hyperparameters.
2023-03-20 10:01:06 +01:00
## Weights and biases are initialized with
## random values in the chosen range
new ( result )
result . layers = topology
for layer in result . layers :
var biases = newSeqOfCap [ float ] ( layer . outputSize )
var biasGradients = newSeqOfCap [ float ] ( layer . outputSize )
for _ in 0 .. < layer . outputSize :
biases . add ( rand ( biasRange . start .. biasRange . stop ) )
biasGradients . add ( 0 .0 )
var weights = newSeqOfCap [ float ] ( layer . inputSize * layer . outputSize )
var weightGradients = newSeqOfCap [ float ] ( layer . inputSize * layer . outputSize )
for _ in 0 .. < layer . outputSize :
for _ in 0 .. < layer . inputSize :
weights . add ( rand ( weightRange . start .. weightRange . stop ) )
weightGradients . add ( 0 .0 )
layer . biases = newMatrix [ float ] ( biases )
# Why swap outputSize and inputSize in the matrix shape? The reason is simple: this
# spares us from having to transpose it later when we perform the dot product (I get
# that it's a constant time operation, but if we can avoid it altogether, that's even
# better!)
layer . weights = newMatrixFromSeq [ float ] ( weights , ( layer . outputSize , layer . inputSize ) )
layer . gradients = ( weights : newMatrix [ float ] ( weightGradients ) ,
biases : newMatrixFromSeq [ float ] ( biasGradients , ( layer . outputSize , layer . inputSize ) ) )
2022-12-23 00:17:57 +01:00
result . loss = lossFunc
result . learnRate = learnRate
2023-03-20 10:01:06 +01:00
result . momentum = momentum
2022-12-20 12:08:24 +01:00
2022-12-23 00:17:57 +01:00
2023-03-20 10:01:06 +01:00
proc feedforward * ( self : NeuralNetwork , data : Matrix [ float ] ) : Matrix [ float ] =
## Feeds the given input through the network and returns
## a 1D array with the output
2022-12-20 12:08:24 +01:00
when not defined ( release ) :
if data . shape . rows > 1 :
raise newException ( ValueError , " input data must be one-dimensional " )
if data . shape . cols ! = self . layers [ 0 ] . inputSize :
raise newException ( ValueError , & " input is of the wrong shape (expecting (1, {self.layers[0].inputSize}), got ({data.shape.rows}, {data.shape.cols}) instead) " )
result = data
for layer in self . layers :
2023-03-20 10:01:06 +01:00
result = layer . activation . function ( layer . weights . dot ( result ) + layer . biases )
2022-12-22 21:35:16 +01:00
2023-03-20 10:01:06 +01:00
proc backprop ( self : NeuralNetwork , x , y : Matrix [ float ] ) {. used . } =
## Performs a single backpropagation step and updates the
2023-03-20 12:11:40 +01:00
## gradients for our weights and biases, layer by layer
## Utility functions
# Mean squared error
proc mse ( a , b : Matrix [ float ] ) : float =
result = ( b - a ) . apply ( proc ( x : float ) : float = pow ( x , 2 ) , axis = - 1 ) . sum ( ) / len ( a ) . float
# Derivative of MSE
func dxMSE ( x , y : Matrix [ float ] ) : Matrix [ float ] = 2 .0 * ( x - y )
# A bunch of vectorized activation functions
2023-03-20 12:40:01 +01:00
2023-03-20 12:11:40 +01:00
func sigmoid ( input : Matrix [ float ] ) : Matrix [ float ] =
result = input . apply ( proc ( x : float ) : float = 1 / ( 1 + exp ( - x ) ) , axis = - 1 )
func sigmoidDerivative ( input : Matrix [ float ] ) : Matrix [ float ] = sigmoid ( input ) * ( 1 .0 - sigmoid ( input ) )
func softmax ( input : Matrix [ float ] ) : Matrix [ float ] =
var input = input - input . max ( )
result = input . apply ( math . exp , axis = - 1 ) / input . apply ( math . exp , axis = - 1 ) . sum ( )
func softmaxDerivative ( input : Matrix [ float ] ) : Matrix [ float ] =
var input = input . reshape ( input . shape . cols , 1 )
result = input . diagflat ( ) - input . dot ( input . transpose ( ) )
2023-03-20 12:40:01 +01:00
# TODO: Add derivatives for this stuff
2023-03-20 12:11:40 +01:00
func step ( input : Matrix [ float ] ) : Matrix [ float ] {. used . } = input . apply ( proc ( x : float ) : float = ( if x < 0 .0 : 0 .0 else : x ) , axis = - 1 )
func silu ( input : Matrix [ float ] ) : Matrix [ float ] {. used . } = input . apply ( proc ( x : float ) : float = 1 / ( 1 + exp ( - x ) ) , axis = - 1 )
func relu ( input : Matrix [ float ] ) : Matrix [ float ] {. used . } = input . apply ( proc ( x : float ) : float = max ( 0 .0 , x ) , axis = - 1 )
func htan ( input : Matrix [ float ] ) : Matrix [ float ] {. used . } =
let f = proc ( x : float ) : float =
let temp = exp ( 2 * x )
result = ( temp - 1 ) / ( temp + 1 )
input . apply ( f , axis = - 1 )
{. push . }
{. hints : off . } # So nim doesn't complain about the naming
var Sigmoid * = newActivation ( sigmoid , sigmoidDerivative )
var Softmax * = newActivation ( softmax , softmaxDerivative )
var MSE * = newLoss ( mse , dxMSE )
{. pop . }