Major updates to library architecture and additions to matrix library

Attempts to fix genetic training
2023-03-20 10:01:06 +01:00 · 2022-12-23 09:41:27 +01:00 · 2022-12-23 09:39:25 +01:00 · 2022-12-23 00:19:39 +01:00 · 2022-12-23 00:18:06 +01:00 · 2022-12-22 21:35:16 +01:00
4 changed files with 1324 additions and 0 deletions
--- a/src/main.nim
+++ b/src/main.nim
@ -0,0 +1,41 @@
+import nn/network
+import nn/util/matrix
+
+import std/math
+
+
+
+# Mean squared error
+proc mse(a, b: Matrix[float]): float = 
+    result = (b - a).apply(proc (x: float): float = pow(x, 2), axis = -1).sum() / len(a).float
+
+# Derivative of MSE
+func dxMSE*(x, y: float): float = 2 * (x - y)
+
+func dx*(x, y: float): float = 0.0
+
+# A bunch of vectorized activation functions
+func sigmoid*(input: Matrix[float]): Matrix[float] = 
+    result = input.apply(proc (x: float): float = 1 / (1 + exp(-x)) , axis = -1)
+
+func softmax*(input: Matrix[float]): Matrix[float] = 
+    var input = input - input.max()
+    result = input.apply(math.exp, axis = -1) / input.apply(math.exp, axis = -1).sum()
+
+func step*(input: Matrix[float]): Matrix[float] = input.apply(proc (x: float): float = (if x < 0.0: 0.0 else: x), axis = -1)
+func silu*(input: Matrix[float]): Matrix[float] = input.apply(proc (x: float): float = 1 / (1 + exp(-x)), axis= -1)
+func relu*(input: Matrix[float]): Matrix[float] = input.apply(proc (x: float): float = max(0.0, x), axis = -1)
+
+func htan*(input: Matrix[float]): Matrix[float] = 
+    let f = proc (x: float): float = 
+        let temp = exp(2 * x)
+        result = (temp - 1) / (temp + 1)
+    input.apply(f, axis = -1)
+
+
+var mlp = newNeuralNetwork(@[newDenseLayer(2, 3, newActivation(sigmoid, dx)), newDenseLayer(3, 2, newActivation(sigmoid, dx)),
+                             newDenseLayer(2, 3, newActivation(softmax, dx))],
+                           lossFunc=newLoss(mse, dxMSE),
+                           learnRate=0.05, weightRange=(start: -1.0, stop: 1.0), biasRange=(start: -10.0, stop: 10.0),
+                           momentum=0.55)
+echo mlp.feedforward(newMatrix[float](@[1.0, 2.0]))
--- a/src/nn/network.nim
+++ b/src/nn/network.nim
@ -0,0 +1,145 @@
+# Copyright 2022 Mattia Giambirtone & All Contributors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import util/matrix
+
+
+import std/strformat
+import std/random
+
+
+randomize()
+
+
+type
+    NeuralNetwork* = ref object
+        ## A generic feed-forward 
+        ## neural network
+        layers*: seq[Layer]
+        loss: Loss                            # The cost function along with its derivative
+        # The network's learn rate determines
+        # the amount of progress that is made
+        # at each step when performing gradient
+        # descent
+        learnRate*: float
+        # The momentum serves to speed up convergence
+        # time when performing SGD: the higher the output
+        # of the derivative of the cost function, the more
+        # we nudge our inputs for our next epoch
+        momentum*: float
+    Loss* = ref object
+        ## A loss function and its derivative
+        function: proc (a, b: Matrix[float]): float
+        derivative: proc (x, y: float): float {.noSideEffect.}
+    Activation* = ref object
+        ## An activation function
+        function: proc (input: Matrix[float]): Matrix[float] {.noSideEffect.}
+        derivative: proc (x, y: float): float {.noSideEffect.}
+    Layer* = ref object
+        ## A generic neural network
+        ## layer
+        inputSize*: int                                      # The number of inputs we process
+        outputSize*: int                                     # The number of outputs we produce
+        weights*: Matrix[float]                              # The weights for each connection (2D)
+        biases*: Matrix[float]                               # The biases for each neuron (1D)
+        gradients: tuple[weights, biases: Matrix[float]]     # Gradient coefficients for weights and biases
+        activation: Activation                               # The layer's activation function
+
+
+
+proc `$`*(self: Layer): string =
+    ## Returns a string representation
+    ## of the layer
+    result = &"Layer(inputs={self.inputSize}, outputs={self.outputSize})"
+
+
+proc `$`*(self: NeuralNetwork): string =
+    ## Returns a string representation
+    ## of the network
+    result = &"NeuralNetwork(learnRate={self.learnRate}, layers={self.layers})"
+
+
+proc newLoss*(function: proc (a, b: Matrix[float]): float, derivative: proc (x, y: float): float {.noSideEffect.}): Loss =
+    ## Creates a new Loss object
+    new(result)
+    result.function = function
+    result.derivative = derivative
+
+
+proc newActivation*(function: proc (input: Matrix[float]): Matrix[float] {.noSideEffect.}, derivative: proc (x, y: float): float {.noSideEffect.}): Activation =
+    ## Creates a new Activation object
+    new(result)
+    result.function = function
+    result.derivative = derivative
+
+
+proc newDenseLayer*(inputSize: int, outputSize: int, activationFunc: Activation): Layer =
+    ## Creates a new dense layer with inputSize input 
+    ## parameters and outputSize outgoing outputs and
+    ## using the chosen activation function.
+    new(result)
+    result.inputSize = inputSize
+    result.outputSize = outputSize
+    result.activation = activationFunc
+
+
+proc newNeuralNetwork*(topology: seq[Layer], lossFunc: Loss, learnRate: float, momentum: float,
+                       weightRange, biasRange: tuple[start, stop: float]): NeuralNetwork =
+    ## Initializes a new neural network with
+    ## the given topology and iperparameters.
+    ## Weights and biases are initialized with 
+    ## random values in the chosen range
+    new(result)
+    result.layers = topology
+    for layer in result.layers:
+        var biases = newSeqOfCap[float](layer.outputSize)
+        var biasGradients = newSeqOfCap[float](layer.outputSize)
+        for _ in 0..<layer.outputSize:
+            biases.add(rand(biasRange.start..biasRange.stop))
+            biasGradients.add(0.0)
+        var weights = newSeqOfCap[float](layer.inputSize * layer.outputSize)
+        var weightGradients = newSeqOfCap[float](layer.inputSize * layer.outputSize)
+        for _ in 0..<layer.outputSize:
+            for _ in 0..<layer.inputSize:
+                weights.add(rand(weightRange.start..weightRange.stop))
+                weightGradients.add(0.0)
+        layer.biases = newMatrix[float](biases)
+        # Why swap outputSize and inputSize in the matrix shape? The reason is simple: this
+        # spares us from having to transpose it later when we perform the dot product (I get
+        # that it's a constant time operation, but if we can avoid it altogether, that's even
+        # better!)
+        layer.weights = newMatrixFromSeq[float](weights, (layer.outputSize, layer.inputSize))
+        layer.gradients = (weights: newMatrix[float](weightGradients),
+                           biases: newMatrixFromSeq[float](biasGradients, (layer.outputSize, layer.inputSize)))
+    result.loss = lossFunc
+    result.learnRate = learnRate
+    result.momentum = momentum
+
+
+proc feedforward*(self: NeuralNetwork, data: Matrix[float]): Matrix[float] =
+    ## Feeds the given input through the network and returns
+    ## a 1D array with the output
+    when not defined(release):
+        if data.shape.rows > 1:
+            raise newException(ValueError, "input data must be one-dimensional")
+        if data.shape.cols != self.layers[0].inputSize:
+            raise newException(ValueError, &"input is of the wrong shape (expecting (1, {self.layers[0].inputSize}), got ({data.shape.rows}, {data.shape.cols}) instead)")
+    result = data
+    for layer in self.layers:
+        result = layer.activation.function(layer.weights.dot(result) + layer.biases)
+
+
+proc backprop(self: NeuralNetwork, x, y: Matrix[float]) {.used.} =
+    ## Performs a single backpropagation step and updates the
+    ## gradients for our weights and biases, layer by layer
--- a/src/nn/util/matrix.nim
+++ b/src/nn/util/matrix.nim
--- a/src/nn/util/preprocessing.nim
+++ b/src/nn/util/preprocessing.nim
@ -0,0 +1,76 @@
+# Copyright 2022 Mattia Giambirtone
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+## Various data preprocessing tools
+
+import matrix
+
+
+import strformat
+import sets
+
+
+type
+    LabelEncoder* = ref object
+        ## An encoder to assign a numerical value in the
+        ## range from 0 to n_labels - 1 to the labels
+        # of some categorical data, reversibly
+        isFit: bool
+        labels: Matrix[string]
+
+
+proc newLabelEncoder*: LabelEncoder =
+    ## Initializes a new LabelEncoder object
+    new(result)
+
+
+proc toOrderedSet[T](m: Matrix[T]): OrderedSet[T] =
+    result = initOrderedSet[T]()
+    for row in m:
+        for element in row:
+            result.incl(element)
+
+
+proc fit*(self: LabelEncoder, labels: Matrix[string]) =
+    # Fits the encoder to the given labels
+    var lbl: seq[string] = @[]
+    for label in toOrderedSet(labels):
+        lbl.add(label)
+    self.labels = newMatrix(lbl)
+    self.is_fit = true
+
+
+proc transform*(self: LabelEncoder, labels: Matrix[string]): Matrix[int] =
+    ## Transforms a vector of labels into a vector of encoded
+    ## integers. Duplicate labels are assigned the same integer
+    assert self.isFit, "The estimator must be fit!"
+    var res: seq[int] = @[]
+    for row in labels:
+        for label in row: 
+            if label notin self.labels:
+                raise newException(ValueError, &"Unknown label '{label}'")
+            res.add(self.labels.raw[].find(label))
+    result = newMatrix(res)
+
+
+proc reverseTransform*(self: LabelEncoder, labels: Matrix[int]): Matrix[string] =
+    ## Reverses the transformation of the integer labels back to a string
+    assert self.is_fit, "The estimator must be fit!"
+    var res: seq[string] = @[]
+    for row in labels:
+        for label in row:
+            if label notin 0..<self.labels.len():
+                raise newException(ValueError, &"Unknown encoded label '{label}'")
+            res.add(self.labels[0, label])
+    result = newMatrix(res)
Author	SHA1	Message	Date
Mattia Giambirtone	e91869e5ab	Major updates to library architecture and additions to matrix library	2023-03-20 10:01:06 +01:00
Mattia Giambirtone	93e71ff336	Attempts to fix genetic training	2022-12-23 09:41:27 +01:00
Mattia Giambirtone	fbed641470	Attempts to fix genetic training	2022-12-23 09:39:25 +01:00
Mattia Giambirtone	6698148d2c	Minor fix	2022-12-23 00:19:39 +01:00
Mattia Giambirtone	3baacadb1c	Initial work on tris test	2022-12-23 00:18:06 +01:00
Mattia Giambirtone	1f875e6f2b	Initial work on genetic algorithm for tris	2022-12-22 21:35:16 +01:00
Mattia Giambirtone	d6e5e148aa	Many fixes to matrix library and minor changes	2022-12-22 14:55:26 +01:00
Mattia Giambirtone	dc44b65e94	Added initial work on multilayer perceptron	2022-12-20 12:08:24 +01:00