NNExperiments/src/nn/network.nim

# Copyright 2022 Mattia Giambirtone & All Contributors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import util/matrix


import std/strformat
import std/random
import std/math


randomize()


type
    NeuralNetwork* = ref object
        ## A generic feed-forward 
        ## neural network
        layers*: seq[Layer]
        loss: Loss                            # The cost function along with its derivative
        # The network's learn rate determines
        # the amount of progress that is made
        # at each step when performing gradient
        # descent
        learnRate*: float
        # The momentum serves to speed up convergence
        # time when performing SGD: the higher the output
        # of the derivative of the cost function, the more
        # we nudge our inputs for our next epoch
        momentum*: float
    Loss* = ref object
        ## A loss function and its derivative
        function: proc (a, b: Matrix[float]): float
        derivative: proc (x, y: Matrix[float]): Matrix[float] {.noSideEffect.}
    Activation* = ref object
        ## An activation function
        function: proc (input: Matrix[float]): Matrix[float] {.noSideEffect.}
        derivative: proc (x: Matrix[float]): Matrix[float] {.noSideEffect.}
    Layer* = ref object
        ## A generic neural network
        ## layer
        inputSize*: int                                      # The number of inputs we process
        outputSize*: int                                     # The number of outputs we produce
        weights*: Matrix[float]                              # The weights for each connection (2D)
        biases*: Matrix[float]                               # The biases for each neuron (1D)
        gradients: tuple[weights, biases: Matrix[float]]     # Gradient coefficients for weights and biases
        activation: Activation                               # The layer's activation function


proc `$`*(self: Layer): string =
    ## Returns a string representation
    ## of the layer
    result = &"Layer(inputs={self.inputSize}, outputs={self.outputSize})"


proc `$`*(self: NeuralNetwork): string =
    ## Returns a string representation
    ## of the network
    result = &"NeuralNetwork(learnRate={self.learnRate}, layers={self.layers})"


proc newLoss*(function: proc (a, b: Matrix[float]): float, derivative: proc (x, y: Matrix[float]): Matrix[float] {.noSideEffect.}): Loss =
    ## Creates a new Loss object
    new(result)
    result.function = function
    result.derivative = derivative


proc newActivation*(function: proc (input: Matrix[float]): Matrix[float] {.noSideEffect.}, derivative: proc (x: Matrix[float]): Matrix[float] {.noSideEffect.}): Activation =
    ## Creates a new Activation object
    new(result)
    result.function = function
    result.derivative = derivative


proc newDenseLayer*(inputSize: int, outputSize: int, activationFunc: Activation): Layer =
    ## Creates a new dense layer with inputSize input 
    ## parameters and outputSize outgoing outputs and
    ## using the chosen activation function.
    new(result)
    result.inputSize = inputSize
    result.outputSize = outputSize
    result.activation = activationFunc


proc newNeuralNetwork*(topology: seq[Layer], lossFunc: Loss, learnRate: float, momentum: float,
                       weightRange, biasRange: tuple[start, stop: float]): NeuralNetwork =
    ## Initializes a new neural network with
    ## the given topology and hyperparameters.
    ## Weights and biases are initialized with 
    ## random values in the chosen range
    new(result)
    result.layers = topology
    for layer in result.layers:
        var biases = newSeqOfCap[float](layer.outputSize)
        var biasGradients = newSeqOfCap[float](layer.outputSize)
        for _ in 0..<layer.outputSize:
            biases.add(rand(biasRange.start..biasRange.stop))
            biasGradients.add(0.0)
        var weights = newSeqOfCap[float](layer.inputSize * layer.outputSize)
        var weightGradients = newSeqOfCap[float](layer.inputSize * layer.outputSize)
        for _ in 0..<layer.outputSize:
            for _ in 0..<layer.inputSize:
                weights.add(rand(weightRange.start..weightRange.stop))
                weightGradients.add(0.0)
        layer.biases = newMatrix[float](biases)
        # Why swap outputSize and inputSize in the matrix shape? The reason is simple: this
        # spares us from having to transpose it later when we perform the dot product (I get
        # that it's a constant time operation, but if we can avoid it altogether, that's even
        # better!)
        layer.weights = newMatrixFromSeq[float](weights, (layer.outputSize, layer.inputSize))
        layer.gradients = (weights: newMatrix[float](weightGradients),
                           biases: newMatrixFromSeq[float](biasGradients, (layer.outputSize, layer.inputSize)))
    result.loss = lossFunc
    result.learnRate = learnRate
    result.momentum = momentum


proc feedforward*(self: NeuralNetwork, data: Matrix[float]): Matrix[float] =
    ## Feeds the given input through the network and returns
    ## a 1D array with the output
    when not defined(release):
        if data.shape.rows > 1:
            raise newException(ValueError, "input data must be one-dimensional")
        if data.shape.cols != self.layers[0].inputSize:
            raise newException(ValueError, &"input is of the wrong shape (expecting (1, {self.layers[0].inputSize}), got ({data.shape.rows}, {data.shape.cols}) instead)")
    result = data
    for layer in self.layers:
        result = layer.activation.function(layer.weights.dot(result) + layer.biases)


proc backprop(self: NeuralNetwork, x, y: Matrix[float]) {.used.} =
    ## Performs a single backpropagation step and updates the
    ## gradients for our weights and biases, layer by layer


## Utility functions

# Mean squared error
proc mse(a, b: Matrix[float]): float = 
    result = (b - a).apply(proc (x: float): float = pow(x, 2), axis = -1).sum() / len(a).float

# Derivative of MSE
func dxMSE(x, y: Matrix[float]): Matrix[float] = 2.0 * (x - y)

# A bunch of vectorized activation functions

func sigmoid(input: Matrix[float]): Matrix[float] = 
    result = input.apply(proc (x: float): float = 1 / (1 + exp(-x)) , axis = -1)

func sigmoidDerivative(input: Matrix[float]): Matrix[float] = sigmoid(input) * (1.0 - sigmoid(input))


func softmax(input: Matrix[float]): Matrix[float] = 
    var input = input - input.max()
    result = input.apply(math.exp, axis = -1) / input.apply(math.exp, axis = -1).sum()

func softmaxDerivative(input: Matrix[float]): Matrix[float] =
    var input = input.reshape(input.shape.cols, 1)
    result = input.diagflat() - input.dot(input.transpose())


# TODO: Add derivatives for this stuff
func step(input: Matrix[float]): Matrix[float] {.used.} = input.apply(proc (x: float): float = (if x < 0.0: 0.0 else: x), axis = -1)
func silu(input: Matrix[float]): Matrix[float] {.used.} = input.apply(proc (x: float): float = 1 / (1 + exp(-x)), axis= -1)
func relu(input: Matrix[float]): Matrix[float] {.used.} = input.apply(proc (x: float): float = max(0.0, x), axis = -1)

func htan(input: Matrix[float]): Matrix[float] {.used.} = 
    let f = proc (x: float): float = 
        let temp = exp(2 * x)
        result = (temp - 1) / (temp + 1)
    input.apply(f, axis = -1)

{.push.}
{.hints: off.}   # So nim doesn't complain about the naming
var Sigmoid* = newActivation(sigmoid, sigmoidDerivative)
var Softmax* = newActivation(softmax, softmaxDerivative)
var MSE* = newLoss(mse, dxMSE)
{.pop.}
Added initial work on multilayer perceptron 2022-12-20 12:08:24 +01:00			`# Copyright 2022 Mattia Giambirtone & All Contributors`
			`#`
			`# Licensed under the Apache License, Version 2.0 (the "License");`
			`# you may not use this file except in compliance with the License.`
			`# You may obtain a copy of the License at`
			`#`
			`# http://www.apache.org/licenses/LICENSE-2.0`
			`#`
			`# Unless required by applicable law or agreed to in writing, software`
			`# distributed under the License is distributed on an "AS IS" BASIS,`
			`# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`# See the License for the specific language governing permissions and`
			`# limitations under the License.`

			`import util/matrix`


			`import std/strformat`
Initial work on tris test 2022-12-23 00:17:57 +01:00			`import std/random`
Moved utility functions to neural network library 2023-03-20 12:11:40 +01:00			`import std/math`
Initial work on tris test 2022-12-23 00:17:57 +01:00

			`randomize()`
Added initial work on multilayer perceptron 2022-12-20 12:08:24 +01:00

Initial work on tris test 2022-12-23 00:17:57 +01:00			`type`
Added initial work on multilayer perceptron 2022-12-20 12:08:24 +01:00			`NeuralNetwork* = ref object`
Initial work on genetic algorithm for tris 2022-12-22 21:35:16 +01:00			`## A generic feed-forward`
			`## neural network`
Initial work on tris test 2022-12-23 00:17:57 +01:00			`layers*: seq[Layer]`
Major updates to library architecture and additions to matrix library 2023-03-20 10:01:06 +01:00			`loss: Loss # The cost function along with its derivative`
			`# The network's learn rate determines`
			`# the amount of progress that is made`
			`# at each step when performing gradient`
			`# descent`
Initial work on tris test 2022-12-23 00:17:57 +01:00			`learnRate*: float`
Major updates to library architecture and additions to matrix library 2023-03-20 10:01:06 +01:00			`# The momentum serves to speed up convergence`
			`# time when performing SGD: the higher the output`
			`# of the derivative of the cost function, the more`
			`# we nudge our inputs for our next epoch`
			`momentum*: float`
Initial work on tris test 2022-12-23 00:17:57 +01:00			`Loss* = ref object`
Major updates to library architecture and additions to matrix library 2023-03-20 10:01:06 +01:00			`## A loss function and its derivative`
			`function: proc (a, b: Matrix[float]): float`
Derivatives are now vectorized 2023-03-20 10:31:09 +01:00			`derivative: proc (x, y: Matrix[float]): Matrix[float] {.noSideEffect.}`
Initial work on tris test 2022-12-23 00:17:57 +01:00			`Activation* = ref object`
			`## An activation function`
Major updates to library architecture and additions to matrix library 2023-03-20 10:01:06 +01:00			`function: proc (input: Matrix[float]): Matrix[float] {.noSideEffect.}`
Extended diag() method and updated matrix tests 2023-03-20 11:41:23 +01:00			`derivative: proc (x: Matrix[float]): Matrix[float] {.noSideEffect.}`
Initial work on tris test 2022-12-23 00:17:57 +01:00			`Layer* = ref object`
			`## A generic neural network`
			`## layer`
Major updates to library architecture and additions to matrix library 2023-03-20 10:01:06 +01:00			`inputSize*: int # The number of inputs we process`
			`outputSize*: int # The number of outputs we produce`
Initial work on tris test 2022-12-23 00:17:57 +01:00			`weights*: Matrix[float] # The weights for each connection (2D)`
			`biases*: Matrix[float] # The biases for each neuron (1D)`
Major updates to library architecture and additions to matrix library 2023-03-20 10:01:06 +01:00			`gradients: tuple[weights, biases: Matrix[float]] # Gradient coefficients for weights and biases`
			`activation: Activation # The layer's activation function`

Initial work on tris test 2022-12-23 00:17:57 +01:00

			proc `$`*(self: Layer): string =
			`## Returns a string representation`
			`## of the layer`
			`result = &"Layer(inputs={self.inputSize}, outputs={self.outputSize})"`

Added initial work on multilayer perceptron 2022-12-20 12:08:24 +01:00
Initial work on tris test 2022-12-23 00:17:57 +01:00			proc `$`*(self: NeuralNetwork): string =
			`## Returns a string representation`
			`## of the network`
			`result = &"NeuralNetwork(learnRate={self.learnRate}, layers={self.layers})"`


Derivatives are now vectorized 2023-03-20 10:31:09 +01:00			`proc newLoss*(function: proc (a, b: Matrix[float]): float, derivative: proc (x, y: Matrix[float]): Matrix[float] {.noSideEffect.}): Loss =`
Initial work on tris test 2022-12-23 00:17:57 +01:00			`## Creates a new Loss object`
			`new(result)`
			`result.function = function`
			`result.derivative = derivative`


Extended diag() method and updated matrix tests 2023-03-20 11:41:23 +01:00			`proc newActivation*(function: proc (input: Matrix[float]): Matrix[float] {.noSideEffect.}, derivative: proc (x: Matrix[float]): Matrix[float] {.noSideEffect.}): Activation =`
Initial work on tris test 2022-12-23 00:17:57 +01:00			`## Creates a new Activation object`
			`new(result)`
			`result.function = function`
			`result.derivative = derivative`
Added initial work on multilayer perceptron 2022-12-20 12:08:24 +01:00
Initial work on tris test 2022-12-23 00:17:57 +01:00
Major updates to library architecture and additions to matrix library 2023-03-20 10:01:06 +01:00			`proc newDenseLayer*(inputSize: int, outputSize: int, activationFunc: Activation): Layer =`
			`## Creates a new dense layer with inputSize input`
			`## parameters and outputSize outgoing outputs and`
			`## using the chosen activation function.`
Initial work on tris test 2022-12-23 00:17:57 +01:00			`new(result)`
			`result.inputSize = inputSize`
			`result.outputSize = outputSize`
			`result.activation = activationFunc`
Major updates to library architecture and additions to matrix library 2023-03-20 10:01:06 +01:00

			`proc newNeuralNetwork*(topology: seq[Layer], lossFunc: Loss, learnRate: float, momentum: float,`
			`weightRange, biasRange: tuple[start, stop: float]): NeuralNetwork =`
			`## Initializes a new neural network with`
Typos Signed-off-by: Mattia Giambirtone <nocturn9x@nocturn9x.space> 2023-03-21 12:27:16 +01:00			`## the given topology and hyperparameters.`
Major updates to library architecture and additions to matrix library 2023-03-20 10:01:06 +01:00			`## Weights and biases are initialized with`
			`## random values in the chosen range`
			`new(result)`
			`result.layers = topology`
			`for layer in result.layers:`
			`var biases = newSeqOfCap[float](layer.outputSize)`
			`var biasGradients = newSeqOfCap[float](layer.outputSize)`
			`for _ in 0..<layer.outputSize:`
			`biases.add(rand(biasRange.start..biasRange.stop))`
			`biasGradients.add(0.0)`
			`var weights = newSeqOfCap[float](layer.inputSize * layer.outputSize)`
			`var weightGradients = newSeqOfCap[float](layer.inputSize * layer.outputSize)`
			`for _ in 0..<layer.outputSize:`
			`for _ in 0..<layer.inputSize:`
			`weights.add(rand(weightRange.start..weightRange.stop))`
			`weightGradients.add(0.0)`
			`layer.biases = newMatrix[float](biases)`
			`# Why swap outputSize and inputSize in the matrix shape? The reason is simple: this`
			`# spares us from having to transpose it later when we perform the dot product (I get`
			`# that it's a constant time operation, but if we can avoid it altogether, that's even`
			`# better!)`
			`layer.weights = newMatrixFromSeq[float](weights, (layer.outputSize, layer.inputSize))`
			`layer.gradients = (weights: newMatrix[float](weightGradients),`
			`biases: newMatrixFromSeq[float](biasGradients, (layer.outputSize, layer.inputSize)))`
Initial work on tris test 2022-12-23 00:17:57 +01:00			`result.loss = lossFunc`
			`result.learnRate = learnRate`
Major updates to library architecture and additions to matrix library 2023-03-20 10:01:06 +01:00			`result.momentum = momentum`
Added initial work on multilayer perceptron 2022-12-20 12:08:24 +01:00
Initial work on tris test 2022-12-23 00:17:57 +01:00
Major updates to library architecture and additions to matrix library 2023-03-20 10:01:06 +01:00			`proc feedforward*(self: NeuralNetwork, data: Matrix[float]): Matrix[float] =`
			`## Feeds the given input through the network and returns`
			`## a 1D array with the output`
Added initial work on multilayer perceptron 2022-12-20 12:08:24 +01:00			`when not defined(release):`
			`if data.shape.rows > 1:`
			`raise newException(ValueError, "input data must be one-dimensional")`
			`if data.shape.cols != self.layers[0].inputSize:`
			`raise newException(ValueError, &"input is of the wrong shape (expecting (1, {self.layers[0].inputSize}), got ({data.shape.rows}, {data.shape.cols}) instead)")`
			`result = data`
			`for layer in self.layers:`
Major updates to library architecture and additions to matrix library 2023-03-20 10:01:06 +01:00			`result = layer.activation.function(layer.weights.dot(result) + layer.biases)`

Initial work on genetic algorithm for tris 2022-12-22 21:35:16 +01:00
Major updates to library architecture and additions to matrix library 2023-03-20 10:01:06 +01:00			`proc backprop(self: NeuralNetwork, x, y: Matrix[float]) {.used.} =`
			`## Performs a single backpropagation step and updates the`
Moved utility functions to neural network library 2023-03-20 12:11:40 +01:00			`## gradients for our weights and biases, layer by layer`


			`## Utility functions`

			`# Mean squared error`
			`proc mse(a, b: Matrix[float]): float =`
			`result = (b - a).apply(proc (x: float): float = pow(x, 2), axis = -1).sum() / len(a).float`

			`# Derivative of MSE`
			`func dxMSE(x, y: Matrix[float]): Matrix[float] = 2.0 * (x - y)`

			`# A bunch of vectorized activation functions`
Minor updates 2023-03-20 12:40:01 +01:00
Moved utility functions to neural network library 2023-03-20 12:11:40 +01:00			`func sigmoid(input: Matrix[float]): Matrix[float] =`
			`result = input.apply(proc (x: float): float = 1 / (1 + exp(-x)) , axis = -1)`

			`func sigmoidDerivative(input: Matrix[float]): Matrix[float] = sigmoid(input) * (1.0 - sigmoid(input))`


			`func softmax(input: Matrix[float]): Matrix[float] =`
			`var input = input - input.max()`
			`result = input.apply(math.exp, axis = -1) / input.apply(math.exp, axis = -1).sum()`

			`func softmaxDerivative(input: Matrix[float]): Matrix[float] =`
			`var input = input.reshape(input.shape.cols, 1)`
			`result = input.diagflat() - input.dot(input.transpose())`


Minor updates 2023-03-20 12:40:01 +01:00			`# TODO: Add derivatives for this stuff`
Moved utility functions to neural network library 2023-03-20 12:11:40 +01:00			`func step(input: Matrix[float]): Matrix[float] {.used.} = input.apply(proc (x: float): float = (if x < 0.0: 0.0 else: x), axis = -1)`
			`func silu(input: Matrix[float]): Matrix[float] {.used.} = input.apply(proc (x: float): float = 1 / (1 + exp(-x)), axis= -1)`
			`func relu(input: Matrix[float]): Matrix[float] {.used.} = input.apply(proc (x: float): float = max(0.0, x), axis = -1)`

			`func htan(input: Matrix[float]): Matrix[float] {.used.} =`
			`let f = proc (x: float): float =`
			`let temp = exp(2 * x)`
			`result = (temp - 1) / (temp + 1)`
			`input.apply(f, axis = -1)`

			`{.push.}`
			`{.hints: off.} # So nim doesn't complain about the naming`
			`var Sigmoid* = newActivation(sigmoid, sigmoidDerivative)`
			`var Softmax* = newActivation(softmax, softmaxDerivative)`
			`var MSE* = newLoss(mse, dxMSE)`
			`{.pop.}`