Compare commits
2 Commits
d83205e09d
...
ac0cdfdc92
Author | SHA1 | Date |
---|---|---|
Mattia Giambirtone | ac0cdfdc92 | |
Mattia Giambirtone | 01525da889 |
43
src/main.nim
43
src/main.nim
|
@ -1,46 +1,11 @@
|
|||
import nn/network
|
||||
import nn/util/matrix
|
||||
|
||||
import std/math
|
||||
|
||||
|
||||
|
||||
# Mean squared error
|
||||
proc mse(a, b: Matrix[float]): float =
|
||||
result = (b - a).apply(proc (x: float): float = pow(x, 2), axis = -1).sum() / len(a).float
|
||||
|
||||
# Derivative of MSE
|
||||
func dxMSE*(x, y: Matrix[float]): Matrix[float] = 2.0 * (x - y)
|
||||
|
||||
# A bunch of vectorized activation functions
|
||||
func sigmoid*(input: Matrix[float]): Matrix[float] =
|
||||
result = input.apply(proc (x: float): float = 1 / (1 + exp(-x)) , axis = -1)
|
||||
|
||||
func sigmoidDerivative*(input: Matrix[float]): Matrix[float] = sigmoid(input) * (1.0 - sigmoid(input))
|
||||
|
||||
|
||||
func softmax*(input: Matrix[float]): Matrix[float] =
|
||||
var input = input - input.max()
|
||||
result = input.apply(math.exp, axis = -1) / input.apply(math.exp, axis = -1).sum()
|
||||
|
||||
func softmaxDerivative*(input: Matrix[float]): Matrix[float] = zeros[float](input.shape)
|
||||
|
||||
|
||||
func step*(input: Matrix[float]): Matrix[float] = input.apply(proc (x: float): float = (if x < 0.0: 0.0 else: x), axis = -1)
|
||||
func silu*(input: Matrix[float]): Matrix[float] = input.apply(proc (x: float): float = 1 / (1 + exp(-x)), axis= -1)
|
||||
func relu*(input: Matrix[float]): Matrix[float] = input.apply(proc (x: float): float = max(0.0, x), axis = -1)
|
||||
|
||||
func htan*(input: Matrix[float]): Matrix[float] =
|
||||
let f = proc (x: float): float =
|
||||
let temp = exp(2 * x)
|
||||
result = (temp - 1) / (temp + 1)
|
||||
input.apply(f, axis = -1)
|
||||
|
||||
|
||||
var mlp = newNeuralNetwork(@[newDenseLayer(2, 3, newActivation(sigmoid, sigmoidDerivative)),
|
||||
newDenseLayer(3, 2, newActivation(sigmoid, sigmoidDerivative)),
|
||||
newDenseLayer(2, 3, newActivation(softmax, softmaxDerivative))],
|
||||
lossFunc=newLoss(mse, dxMSE), learnRate=0.05, momentum=0.55,
|
||||
var mlp = newNeuralNetwork(@[newDenseLayer(2, 3, Sigmoid),
|
||||
newDenseLayer(3, 2, Sigmoid),
|
||||
newDenseLayer(2, 3, Softmax)],
|
||||
lossFunc=MSE, learnRate=0.05, momentum=0.55,
|
||||
weightRange=(start: -1.0, stop: 1.0), biasRange=(start: -10.0, stop: 10.0))
|
||||
echo mlp.feedforward(newMatrix[float](@[1.0, 2.0]))
|
||||
|
||||
|
|
|
@ -17,6 +17,7 @@ import util/matrix
|
|||
|
||||
import std/strformat
|
||||
import std/random
|
||||
import std/math
|
||||
|
||||
|
||||
randomize()
|
||||
|
@ -142,4 +143,50 @@ proc feedforward*(self: NeuralNetwork, data: Matrix[float]): Matrix[float] =
|
|||
|
||||
proc backprop(self: NeuralNetwork, x, y: Matrix[float]) {.used.} =
|
||||
## Performs a single backpropagation step and updates the
|
||||
## gradients for our weights and biases, layer by layer
|
||||
## gradients for our weights and biases, layer by layer
|
||||
|
||||
|
||||
## Utility functions
|
||||
|
||||
# Mean squared error
|
||||
proc mse(a, b: Matrix[float]): float =
|
||||
result = (b - a).apply(proc (x: float): float = pow(x, 2), axis = -1).sum() / len(a).float
|
||||
|
||||
# Derivative of MSE
|
||||
func dxMSE(x, y: Matrix[float]): Matrix[float] = 2.0 * (x - y)
|
||||
|
||||
# A bunch of vectorized activation functions
|
||||
func sigmoid(input: Matrix[float]): Matrix[float] =
|
||||
result = input.apply(proc (x: float): float = 1 / (1 + exp(-x)) , axis = -1)
|
||||
|
||||
func sigmoidDerivative(input: Matrix[float]): Matrix[float] = sigmoid(input) * (1.0 - sigmoid(input))
|
||||
|
||||
|
||||
func softmax(input: Matrix[float]): Matrix[float] =
|
||||
var input = input - input.max()
|
||||
result = input.apply(math.exp, axis = -1) / input.apply(math.exp, axis = -1).sum()
|
||||
|
||||
|
||||
func softmaxDerivative(input: Matrix[float]): Matrix[float] =
|
||||
var input = input.reshape(input.shape.cols, 1)
|
||||
result = input.diagflat() - input.dot(input.transpose())
|
||||
|
||||
|
||||
func step(input: Matrix[float]): Matrix[float] {.used.} = input.apply(proc (x: float): float = (if x < 0.0: 0.0 else: x), axis = -1)
|
||||
func silu(input: Matrix[float]): Matrix[float] {.used.} = input.apply(proc (x: float): float = 1 / (1 + exp(-x)), axis= -1)
|
||||
func relu(input: Matrix[float]): Matrix[float] {.used.} = input.apply(proc (x: float): float = max(0.0, x), axis = -1)
|
||||
|
||||
func htan(input: Matrix[float]): Matrix[float] {.used.} =
|
||||
let f = proc (x: float): float =
|
||||
let temp = exp(2 * x)
|
||||
result = (temp - 1) / (temp + 1)
|
||||
input.apply(f, axis = -1)
|
||||
|
||||
{.push.}
|
||||
{.hints: off.} # So nim doesn't complain about the naming
|
||||
var Sigmoid* = newActivation(sigmoid, sigmoidDerivative)
|
||||
var Softmax* = newActivation(softmax, softmaxDerivative)
|
||||
var MSE* = newLoss(mse, dxMSE)
|
||||
{.pop.}
|
||||
|
||||
|
||||
|
|
|
@ -662,8 +662,8 @@ proc `==`*[T](a: Matrix[T], b: MatrixView[T]): Matrix[bool] =
|
|||
|
||||
|
||||
proc diag*[T](a: Matrix[T], k: int = 0): Matrix[T] =
|
||||
## Returns the kth diagonal of
|
||||
## the given matrix if a is 2-D
|
||||
## Returns the kth diagonal of
|
||||
## the given matrix if a is 2-D
|
||||
## or a 2-D matrix with a on its
|
||||
## kth diagonal if it is 1-D
|
||||
if a.shape.rows > 0:
|
||||
|
@ -686,6 +686,12 @@ proc diag*[T](a: Matrix[T], k: int = 0): Matrix[T] =
|
|||
inc(current.col)
|
||||
|
||||
|
||||
proc diagflat*[T](a: Matrix[T], k: int = 0): Matrix[T] =
|
||||
## Create a 2-D array with the flattened
|
||||
## input as a diagonal
|
||||
result = a.flatten().diag(k)
|
||||
|
||||
|
||||
proc fliplr*[T](self: Matrix[T]): Matrix[T] =
|
||||
## Flips each row in the matrix left
|
||||
## to right. A copy is returned
|
||||
|
@ -937,7 +943,9 @@ proc dot*[T](self, other: Matrix[T]): Matrix[T] =
|
|||
|
||||
|
||||
proc where*[T](cond: Matrix[bool], x, y: Matrix[T]): Matrix[T] =
|
||||
## Behaves like numpy.where()
|
||||
## Return elements chosen from x or y depending on cond
|
||||
## Where cond is true, take elements from x, otherwise
|
||||
## take elements from y
|
||||
when not defined(release):
|
||||
if not (x.shape == y.shape and y.shape == cond.shape):
|
||||
raise newException(ValueError, &"all inputs must be of equal shape for where()")
|
||||
|
@ -960,7 +968,9 @@ proc where*[T](cond: Matrix[bool], x, y: Matrix[T]): Matrix[T] =
|
|||
|
||||
|
||||
proc where*[T](cond: Matrix[bool], x: Matrix[T], y: T): Matrix[T] =
|
||||
## Behaves like numpy.where, but with a constant
|
||||
## Behaves like where but with a constant instead of
|
||||
## an array. When cond is true, take elements from x,
|
||||
## otherwise take y
|
||||
when not defined(release):
|
||||
if not (x.shape == cond.shape):
|
||||
raise newException(ValueError, &"all inputs must be of equal shape for where()")
|
||||
|
@ -1072,4 +1082,6 @@ when isMainModule:
|
|||
doAssert all(j.diag(1) == newMatrix[int](@[2, 4]))
|
||||
doAssert all(j.diag(2) == newMatrix[int](@[1]))
|
||||
var o = newMatrix[int](@[1, 2, 3])
|
||||
doAssert all(o.diag() == newMatrix[int](@[@[1, 0, 0], @[0, 2, 0], @[0, 0, 3]]))
|
||||
doAssert all(o.diag() == newMatrix[int](@[@[1, 0, 0], @[0, 2, 0], @[0, 0, 3]]))
|
||||
var n = newMatrix[int](@[@[1, 2], @[3, 4]])
|
||||
doAssert all(n.diagflat() == newMatrix[int](@[@[1, 0, 0, 0], @[0, 2, 0, 0], @[0, 0, 3, 0], @[0, 0, 0, 4]]))
|
Loading…
Reference in New Issue