Compare commits
8 Commits
a5b8b230db
...
e91869e5ab
Author | SHA1 | Date |
---|---|---|
Mattia Giambirtone | e91869e5ab | |
Mattia Giambirtone | 93e71ff336 | |
Mattia Giambirtone | fbed641470 | |
Mattia Giambirtone | 6698148d2c | |
Mattia Giambirtone | 3baacadb1c | |
Mattia Giambirtone | 1f875e6f2b | |
Mattia Giambirtone | d6e5e148aa | |
Mattia Giambirtone | dc44b65e94 |
|
@ -0,0 +1,41 @@
|
|||
import nn/network
|
||||
import nn/util/matrix
|
||||
|
||||
import std/math
|
||||
|
||||
|
||||
|
||||
# Mean squared error
|
||||
proc mse(a, b: Matrix[float]): float =
|
||||
result = (b - a).apply(proc (x: float): float = pow(x, 2), axis = -1).sum() / len(a).float
|
||||
|
||||
# Derivative of MSE
|
||||
func dxMSE*(x, y: float): float = 2 * (x - y)
|
||||
|
||||
func dx*(x, y: float): float = 0.0
|
||||
|
||||
# A bunch of vectorized activation functions
|
||||
func sigmoid*(input: Matrix[float]): Matrix[float] =
|
||||
result = input.apply(proc (x: float): float = 1 / (1 + exp(-x)) , axis = -1)
|
||||
|
||||
func softmax*(input: Matrix[float]): Matrix[float] =
|
||||
var input = input - input.max()
|
||||
result = input.apply(math.exp, axis = -1) / input.apply(math.exp, axis = -1).sum()
|
||||
|
||||
func step*(input: Matrix[float]): Matrix[float] = input.apply(proc (x: float): float = (if x < 0.0: 0.0 else: x), axis = -1)
|
||||
func silu*(input: Matrix[float]): Matrix[float] = input.apply(proc (x: float): float = 1 / (1 + exp(-x)), axis= -1)
|
||||
func relu*(input: Matrix[float]): Matrix[float] = input.apply(proc (x: float): float = max(0.0, x), axis = -1)
|
||||
|
||||
func htan*(input: Matrix[float]): Matrix[float] =
|
||||
let f = proc (x: float): float =
|
||||
let temp = exp(2 * x)
|
||||
result = (temp - 1) / (temp + 1)
|
||||
input.apply(f, axis = -1)
|
||||
|
||||
|
||||
var mlp = newNeuralNetwork(@[newDenseLayer(2, 3, newActivation(sigmoid, dx)), newDenseLayer(3, 2, newActivation(sigmoid, dx)),
|
||||
newDenseLayer(2, 3, newActivation(softmax, dx))],
|
||||
lossFunc=newLoss(mse, dxMSE),
|
||||
learnRate=0.05, weightRange=(start: -1.0, stop: 1.0), biasRange=(start: -10.0, stop: 10.0),
|
||||
momentum=0.55)
|
||||
echo mlp.feedforward(newMatrix[float](@[1.0, 2.0]))
|
|
@ -0,0 +1,145 @@
|
|||
# Copyright 2022 Mattia Giambirtone & All Contributors
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import util/matrix
|
||||
|
||||
|
||||
import std/strformat
|
||||
import std/random
|
||||
|
||||
|
||||
randomize()
|
||||
|
||||
|
||||
type
|
||||
NeuralNetwork* = ref object
|
||||
## A generic feed-forward
|
||||
## neural network
|
||||
layers*: seq[Layer]
|
||||
loss: Loss # The cost function along with its derivative
|
||||
# The network's learn rate determines
|
||||
# the amount of progress that is made
|
||||
# at each step when performing gradient
|
||||
# descent
|
||||
learnRate*: float
|
||||
# The momentum serves to speed up convergence
|
||||
# time when performing SGD: the higher the output
|
||||
# of the derivative of the cost function, the more
|
||||
# we nudge our inputs for our next epoch
|
||||
momentum*: float
|
||||
Loss* = ref object
|
||||
## A loss function and its derivative
|
||||
function: proc (a, b: Matrix[float]): float
|
||||
derivative: proc (x, y: float): float {.noSideEffect.}
|
||||
Activation* = ref object
|
||||
## An activation function
|
||||
function: proc (input: Matrix[float]): Matrix[float] {.noSideEffect.}
|
||||
derivative: proc (x, y: float): float {.noSideEffect.}
|
||||
Layer* = ref object
|
||||
## A generic neural network
|
||||
## layer
|
||||
inputSize*: int # The number of inputs we process
|
||||
outputSize*: int # The number of outputs we produce
|
||||
weights*: Matrix[float] # The weights for each connection (2D)
|
||||
biases*: Matrix[float] # The biases for each neuron (1D)
|
||||
gradients: tuple[weights, biases: Matrix[float]] # Gradient coefficients for weights and biases
|
||||
activation: Activation # The layer's activation function
|
||||
|
||||
|
||||
|
||||
proc `$`*(self: Layer): string =
|
||||
## Returns a string representation
|
||||
## of the layer
|
||||
result = &"Layer(inputs={self.inputSize}, outputs={self.outputSize})"
|
||||
|
||||
|
||||
proc `$`*(self: NeuralNetwork): string =
|
||||
## Returns a string representation
|
||||
## of the network
|
||||
result = &"NeuralNetwork(learnRate={self.learnRate}, layers={self.layers})"
|
||||
|
||||
|
||||
proc newLoss*(function: proc (a, b: Matrix[float]): float, derivative: proc (x, y: float): float {.noSideEffect.}): Loss =
|
||||
## Creates a new Loss object
|
||||
new(result)
|
||||
result.function = function
|
||||
result.derivative = derivative
|
||||
|
||||
|
||||
proc newActivation*(function: proc (input: Matrix[float]): Matrix[float] {.noSideEffect.}, derivative: proc (x, y: float): float {.noSideEffect.}): Activation =
|
||||
## Creates a new Activation object
|
||||
new(result)
|
||||
result.function = function
|
||||
result.derivative = derivative
|
||||
|
||||
|
||||
proc newDenseLayer*(inputSize: int, outputSize: int, activationFunc: Activation): Layer =
|
||||
## Creates a new dense layer with inputSize input
|
||||
## parameters and outputSize outgoing outputs and
|
||||
## using the chosen activation function.
|
||||
new(result)
|
||||
result.inputSize = inputSize
|
||||
result.outputSize = outputSize
|
||||
result.activation = activationFunc
|
||||
|
||||
|
||||
proc newNeuralNetwork*(topology: seq[Layer], lossFunc: Loss, learnRate: float, momentum: float,
|
||||
weightRange, biasRange: tuple[start, stop: float]): NeuralNetwork =
|
||||
## Initializes a new neural network with
|
||||
## the given topology and iperparameters.
|
||||
## Weights and biases are initialized with
|
||||
## random values in the chosen range
|
||||
new(result)
|
||||
result.layers = topology
|
||||
for layer in result.layers:
|
||||
var biases = newSeqOfCap[float](layer.outputSize)
|
||||
var biasGradients = newSeqOfCap[float](layer.outputSize)
|
||||
for _ in 0..<layer.outputSize:
|
||||
biases.add(rand(biasRange.start..biasRange.stop))
|
||||
biasGradients.add(0.0)
|
||||
var weights = newSeqOfCap[float](layer.inputSize * layer.outputSize)
|
||||
var weightGradients = newSeqOfCap[float](layer.inputSize * layer.outputSize)
|
||||
for _ in 0..<layer.outputSize:
|
||||
for _ in 0..<layer.inputSize:
|
||||
weights.add(rand(weightRange.start..weightRange.stop))
|
||||
weightGradients.add(0.0)
|
||||
layer.biases = newMatrix[float](biases)
|
||||
# Why swap outputSize and inputSize in the matrix shape? The reason is simple: this
|
||||
# spares us from having to transpose it later when we perform the dot product (I get
|
||||
# that it's a constant time operation, but if we can avoid it altogether, that's even
|
||||
# better!)
|
||||
layer.weights = newMatrixFromSeq[float](weights, (layer.outputSize, layer.inputSize))
|
||||
layer.gradients = (weights: newMatrix[float](weightGradients),
|
||||
biases: newMatrixFromSeq[float](biasGradients, (layer.outputSize, layer.inputSize)))
|
||||
result.loss = lossFunc
|
||||
result.learnRate = learnRate
|
||||
result.momentum = momentum
|
||||
|
||||
|
||||
proc feedforward*(self: NeuralNetwork, data: Matrix[float]): Matrix[float] =
|
||||
## Feeds the given input through the network and returns
|
||||
## a 1D array with the output
|
||||
when not defined(release):
|
||||
if data.shape.rows > 1:
|
||||
raise newException(ValueError, "input data must be one-dimensional")
|
||||
if data.shape.cols != self.layers[0].inputSize:
|
||||
raise newException(ValueError, &"input is of the wrong shape (expecting (1, {self.layers[0].inputSize}), got ({data.shape.rows}, {data.shape.cols}) instead)")
|
||||
result = data
|
||||
for layer in self.layers:
|
||||
result = layer.activation.function(layer.weights.dot(result) + layer.biases)
|
||||
|
||||
|
||||
proc backprop(self: NeuralNetwork, x, y: Matrix[float]) {.used.} =
|
||||
## Performs a single backpropagation step and updates the
|
||||
## gradients for our weights and biases, layer by layer
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,76 @@
|
|||
# Copyright 2022 Mattia Giambirtone
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
## Various data preprocessing tools
|
||||
|
||||
import matrix
|
||||
|
||||
|
||||
import strformat
|
||||
import sets
|
||||
|
||||
|
||||
type
|
||||
LabelEncoder* = ref object
|
||||
## An encoder to assign a numerical value in the
|
||||
## range from 0 to n_labels - 1 to the labels
|
||||
# of some categorical data, reversibly
|
||||
isFit: bool
|
||||
labels: Matrix[string]
|
||||
|
||||
|
||||
proc newLabelEncoder*: LabelEncoder =
|
||||
## Initializes a new LabelEncoder object
|
||||
new(result)
|
||||
|
||||
|
||||
proc toOrderedSet[T](m: Matrix[T]): OrderedSet[T] =
|
||||
result = initOrderedSet[T]()
|
||||
for row in m:
|
||||
for element in row:
|
||||
result.incl(element)
|
||||
|
||||
|
||||
proc fit*(self: LabelEncoder, labels: Matrix[string]) =
|
||||
# Fits the encoder to the given labels
|
||||
var lbl: seq[string] = @[]
|
||||
for label in toOrderedSet(labels):
|
||||
lbl.add(label)
|
||||
self.labels = newMatrix(lbl)
|
||||
self.is_fit = true
|
||||
|
||||
|
||||
proc transform*(self: LabelEncoder, labels: Matrix[string]): Matrix[int] =
|
||||
## Transforms a vector of labels into a vector of encoded
|
||||
## integers. Duplicate labels are assigned the same integer
|
||||
assert self.isFit, "The estimator must be fit!"
|
||||
var res: seq[int] = @[]
|
||||
for row in labels:
|
||||
for label in row:
|
||||
if label notin self.labels:
|
||||
raise newException(ValueError, &"Unknown label '{label}'")
|
||||
res.add(self.labels.raw[].find(label))
|
||||
result = newMatrix(res)
|
||||
|
||||
|
||||
proc reverseTransform*(self: LabelEncoder, labels: Matrix[int]): Matrix[string] =
|
||||
## Reverses the transformation of the integer labels back to a string
|
||||
assert self.is_fit, "The estimator must be fit!"
|
||||
var res: seq[string] = @[]
|
||||
for row in labels:
|
||||
for label in row:
|
||||
if label notin 0..<self.labels.len():
|
||||
raise newException(ValueError, &"Unknown encoded label '{label}'")
|
||||
res.add(self.labels[0, label])
|
||||
result = newMatrix(res)
|
Loading…
Reference in New Issue