From e91869e5ab9c2e27472fc5952d01c0cdf9b8ea99 Mon Sep 17 00:00:00 2001 From: Mattia Giambirtone Date: Mon, 20 Mar 2023 10:01:06 +0100 Subject: [PATCH] Major updates to library architecture and additions to matrix library --- src/main.nim | 146 ++++++--------------------- src/nn/network.nim | 121 ++++++++++++----------- src/nn/util/matrix.nim | 219 ++++++++++++++++++++++++++++++++--------- src/nn/util/tris.nim | 83 ---------------- 4 files changed, 267 insertions(+), 302 deletions(-) delete mode 100644 src/nn/util/tris.nim diff --git a/src/main.nim b/src/main.nim index 0fe412f..3333cb5 100644 --- a/src/main.nim +++ b/src/main.nim @@ -1,123 +1,41 @@ import nn/network import nn/util/matrix -import nn/util/tris - -import std/tables import std/math -import std/random -import std/algorithm -import std/strformat -## A bunch of activation functions -func step*(input: float): float = (if input < 0.0: 0.0 else: 1.0) -func sigmoid*(input: float): float = 1 / (1 + exp(-input)) -func silu*(input: float): float = 1 / (1 + exp(-input)) -func relu*(input: float): float = max(0.0, input) -func htan*(input: float): float = - let temp = exp(2 * input) - result = (temp - 1) / (temp + 1) +# Mean squared error +proc mse(a, b: Matrix[float]): float = + result = (b - a).apply(proc (x: float): float = pow(x, 2), axis = -1).sum() / len(a).float + +# Derivative of MSE +func dxMSE*(x, y: float): float = 2 * (x - y) + +func dx*(x, y: float): float = 0.0 + +# A bunch of vectorized activation functions +func sigmoid*(input: Matrix[float]): Matrix[float] = + result = input.apply(proc (x: float): float = 1 / (1 + exp(-x)) , axis = -1) + +func softmax*(input: Matrix[float]): Matrix[float] = + var input = input - input.max() + result = input.apply(math.exp, axis = -1) / input.apply(math.exp, axis = -1).sum() + +func step*(input: Matrix[float]): Matrix[float] = input.apply(proc (x: float): float = (if x < 0.0: 0.0 else: x), axis = -1) +func silu*(input: Matrix[float]): Matrix[float] = input.apply(proc (x: float): float = 1 / (1 + exp(-x)), axis= -1) +func relu*(input: Matrix[float]): Matrix[float] = input.apply(proc (x: float): float = max(0.0, x), axis = -1) + +func htan*(input: Matrix[float]): Matrix[float] = + let f = proc (x: float): float = + let temp = exp(2 * x) + result = (temp - 1) / (temp + 1) + input.apply(f, axis = -1) -func ind2sub(n: int, shape: tuple[rows, cols: int]): tuple[row, col: int] = - ## Converts an absolute index into an x, y pair - return (n div shape.rows, n mod shape.cols) - - -proc loss(params: TableRef[string, float]): float = - ## Our loss function for tris - if params.hasKey("sameMove"): - result = 24 - params["moves"] - else: - result = params["moves"] - if int(params["result"]) == GameStatus.Draw.int: - result += 6 - elif int(params["result"]) == GameStatus.Lose.int: - result += 12 - echo result - - -proc compareNetworks(a, b: NeuralNetwork): int = - if a.params.len() == 0: - return -1 - elif b.params.len() == 0: - return 1 - return cmp(loss(a.params), loss(b.params)) - - -proc crossover(a, b: NeuralNetwork): NeuralNetwork = - result = deepCopy(a) - var i = 0 - while i < a.layers.len(): - # We inherit 50% of our weights and biases from our first - # parent and the other 50% from the other parent - result.layers[i].weights = where(rand[float](a.layers[i].weights.shape) >= 0.5, a.layers[i].weights, b.layers[i].weights) - result.layers[i].biases = where(rand[float](a.layers[i].biases.shape) >= 0.5, a.layers[i].biases, b.layers[i].biases) - # Now we sprinkle some mutations into the inherited weights - # and biases, just to spice things up. If learnRate = 0.02, - # then 2% of our weights and biases will randomly change - result.layers[i].weights = where(rand[float](result.layers[i].weights.shape) < a.learnRate, rand[float](result.layers[i].weights.shape), - result.layers[i].weights) - result.layers[i].biases = where(rand[float](result.layers[i].biases.shape) < a.learnRate, rand[float](result.layers[i].biases.shape), - result.layers[i].biases) - inc(i) - result.learnRate = a.learnRate - - -## Our training program -const Population = 100 -const Iterations = 300 -const Epochs = 10 -const Take = 15 - - -var networks: seq[NeuralNetwork] = @[] -var best: seq[NeuralNetwork] = @[] -for _ in 0.. 1: raise newException(ValueError, "input data must be one-dimensional") @@ -136,5 +137,9 @@ proc compute*(self: NeuralNetwork, data: Matrix[float]): Matrix[float] = raise newException(ValueError, &"input is of the wrong shape (expecting (1, {self.layers[0].inputSize}), got ({data.shape.rows}, {data.shape.cols}) instead)") result = data for layer in self.layers: - result = (layer.weights.dot(result).sum() + layer.biases).apply(self.activation.function, axis= -1) + result = layer.activation.function(layer.weights.dot(result) + layer.biases) + +proc backprop(self: NeuralNetwork, x, y: Matrix[float]) {.used.} = + ## Performs a single backpropagation step and updates the + ## gradients for our weights and biases, layer by layer \ No newline at end of file diff --git a/src/nn/util/matrix.nim b/src/nn/util/matrix.nim index 91101f6..a55d304 100644 --- a/src/nn/util/matrix.nim +++ b/src/nn/util/matrix.nim @@ -1,4 +1,4 @@ -# Copyright 2022 Mattia Giambirtone & All Contributors +# Copyright 2023 Mattia Giambirtone & All Contributors # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -33,7 +33,13 @@ type row: int # The row in the matrix to which we point to -proc getSize(shape: tuple[rows, cols: int]): int = +# Simple one-line helpers +func len*[T](self: Matrix[T]): int {.inline.} = self.data[].len() +func len*[T](self: MatrixView[T]): int {.inline.} = self.shape.cols +func raw*[T](self: Matrix[T]): ref seq[T] {.inline.} = self.data + + +proc getSize*(shape: tuple[rows, cols: int]): int = ## Helper to get the size required for the ## underlying data array for a matrix of the ## given shape @@ -84,6 +90,16 @@ proc newMatrix*[T](data: seq[seq[T]], order: MatrixOrder = RowMajor): Matrix[T] idx = col +proc newMatrixFromSeq*[T](data: seq[T], shape: tuple[rows, cols: int], order: MatrixOrder = RowMajor): Matrix[T] = + ## Creates a new matrix of the given shape from a flat + ## sequence + new(result) + new(result.data) + result.data[] = data + result.shape = shape + result.order = order + + proc zeros*[T: int | float](shape: tuple[rows, cols: int], order: MatrixOrder = RowMajor): Matrix[T] = ## Creates a new matrix of the given shape ## filled with zeros @@ -115,6 +131,7 @@ proc ones*[T: int | float](shape: tuple[rows, cols: int], order: MatrixOrder = R for _ in 0.. 0 and b.shape.rows > 0 and a.shape != b.shape: + raise newException(ValueError, &"incompatible argument shapes for addition") + elif (a.shape.rows == 0 or b.shape.rows == 0) and a.shape.cols != b.shape.cols: + raise newException(ValueError, &"incompatible argument shapes for addition") + if a.shape.rows == 0 and b.shape.rows == 0: + return a[0] + b[0] + new(result) + new(result.data) + result.data[] = newSeqOfCap[T](result.shape.getSize()) + result.shape = a.shape + result.order = RowMajor + if result.shape.rows > 1: + for row in 0..= a.shape.cols: + return newMatrix[T](@[]) + var current = offset.ind2sub(a.shape) var res = newSeqOfCap[T](a.shape.getSize()) - case diagonal: - of 0: - for i in 0..`*[T](a, b: Matrix[T]): Matrix[bool] = when not defined(release): if a.shape != b.shape: @@ -710,14 +784,22 @@ proc any*(a: Matrix[bool]): bool = return false +proc index*[T](self: Matrix[T], x: T): tuple[row, col: int] = + ## Returns the location of the given + ## item in the matrix. A tuple of (-1, -1) + ## is returned if the item is not found + for i, row in self: + for j, e in row: + if e == x: + return (i, j) + return (-1, -1) + + # Specular definitions of commutative operators proc `<`*[T](a, b: Matrix[T]): Matrix[bool] = b > a -proc `!=`*[T](a, b: Matrix[T]): Matrix[bool] = not a == b proc `*`*[T](a: Matrix[T], b: MatrixView[T]): Matrix[T] = b * a proc `==`*[T](a: T, b: Matrix[T]): Matrix[bool] = b == a proc `==`*[T](a: MatrixView[T], b: Matrix[T]): Matrix[bool] = b == a -proc `!=`*[T](a: Matrix[T], b: T): Matrix[bool] = not a == b -proc `!=`*[T](a: T, b: Matrix[T]): Matrix[bool] = not b == a proc toRowMajor*[T](self: Matrix[T], copy: bool = true): Matrix[T] = @@ -762,15 +844,17 @@ proc toColumnMajor*[T](self: Matrix[T], copy: bool = true): Matrix[T] = # Matrices and matrix views are iterable! iterator items*[T](self: Matrix[T]): MatrixView[T] = - for row in 0.. 0: + for row in 0.. 0: + for column in 0.. 0: return $(self[0]) result &= "[" for i, row in self: @@ -864,8 +948,32 @@ proc where*[T](cond: Matrix[bool], x, y: Matrix[T]): Matrix[T] = col = 0 +proc where*[T](cond: Matrix[bool], x: Matrix[T], y: T): Matrix[T] = + ## Behaves like numpy.where, but with a constant + when not defined(release): + if not (x.shape == cond.shape): + raise newException(ValueError, &"all inputs must be of equal shape for where()") + result = x.copy() + var + row = 0 + col = 0 + if cond.shape.rows == 0: + while col < cond.shape.cols: + if not cond[0, col]: + result[0, col] = y + inc(col) + while row < cond.shape.rows: + while col < cond.shape.cols: + if not cond[row, col]: + result[row, col] = y + inc(col) + inc(row) + col = 0 + + # Just a helper to avoid mistakes and so that x.where(x > 10, y) works as expected -proc where*[T](self: Matrix[T], cond: Matrix[bool], other: Matrix[T]): Matrix[T] = cond.where(self, other) +proc where*[T](self: Matrix[T], cond: Matrix[bool], other: Matrix[T]): Matrix[T] {.inline.} = cond.where(self, other) +proc where*[T](self: Matrix[T], cond: Matrix[bool], other: T): Matrix[T] {.inline.} = cond.where(self, other) proc max*[T](self: Matrix[T]): T = @@ -899,13 +1007,22 @@ proc argmax*[T](self: Matrix[T]): int = proc contains*[T](self: Matrix[T], e: T): bool = - ## Returns wherher the matrix contains + ## Returns whether the matrix contains ## the element e for row in self: for element in row: if element == e: return true return false + + +proc count*[T](self: Matrix[T], e: T): int = + ## Returns the number of occurrences + ## of e in self + for row in self: + for k in row: + if k == e: + inc(result) when isMainModule: @@ -932,6 +1049,14 @@ when isMainModule: assert (x < 5).where(x, x * 10).sum() == 360, "where mismatch" assert all((x < 5).where(x, x * 10) == x.where(x < 5, x * 10)), "where mismatch" assert x.max() == 9, "max mismatch" - assert x.argmax() == 9, "argmax mismatch" - discard newMatrix[int](@[12, 23]).dot(newMatrix[int](@[@[11, 22], @[33, 44]])) - discard newMatrix[int](@[@[1, 2, 3], @[2, 3, 4]]).dot(newMatrix[int](@[1, 2, 3])) \ No newline at end of file + assert x.argmax() == 10, "argmax mismatch" + assert all(newMatrix[int](@[12, 23]).dot(newMatrix[int](@[@[11, 22], @[33, 44]])) == newMatrix[int](@[891, 1276])) + assert all(newMatrix[int](@[@[1, 2, 3], @[2, 3, 4]]).dot(newMatrix[int](@[1, 2, 3])) == newMatrix[int](@[14, 20])) + assert all(m.diag() == newMatrix[int](@[1, 5])) + assert all(m.diag(1) == newMatrix[int](@[2, 6])) + assert all(m.diag(2) == newMatrix[int](@[3])) + assert m.diag(3).len() == 0 + var j = m.fliplr() + assert all(j.diag() == newMatrix[int](@[3, 5])) + assert all(j.diag(1) == newMatrix[int](@[2, 4])) + assert all(j.diag(2) == newMatrix[int](@[1])) diff --git a/src/nn/util/tris.nim b/src/nn/util/tris.nim deleted file mode 100644 index 7ae1baa..0000000 --- a/src/nn/util/tris.nim +++ /dev/null @@ -1,83 +0,0 @@ -import matrix - - -type - TileKind* = enum - ## A tile enumeration kind - Empty = 0, - Self, - Enemy - GameStatus* = enum - ## A game status enumeration - Playing, - Win, - Lose, - Draw - TrisGame* = ref object - map*: Matrix[int] - moves*: int - - -proc newTrisGame*: TrisGame = - ## Creates a new TrisGame object - new(result) - result.map = zeros[int]((3, 3)) - result.moves = 0 - - -proc get*(self: TrisGame): GameStatus = - ## Returns the game status - # Checks for rows - for _, row in self.map: - if all(row == newMatrix[int](@[1, 1, 1])): - return Win - elif all(row == newMatrix[int](@[2, 2, 2])): - return Lose - # Checks for columns - for _, col in self.map.transpose: - if all(col == newMatrix[int](@[1, 1, 1])): - return Win - elif all(col == newMatrix[int](@[2, 2, 2])): - return Lose - # Checks for diagonals - for i in 0..<2: - if all(self.map.diag(i) == newMatrix[int](@[1, 1, 1])): - return Win - elif all(self.map.diag(i) == newMatrix[int](@[2, 2, 2])): - return Lose - # No check was successful and there's no empty slots: draw! - if not any(self.map == 0): - return Draw - # There are empty slots and no one won yet, we're still in game! - return Playing - - -proc `$`*(self: TrisGame): string = - ## Stringifies self - return $self.map - - -proc place*(self: TrisGame, tile: TileKind, x, y: int) = - ## Places a tile onto the playing board - if TileKind(self.map[x, y]) == Empty: - self.map[x, y] = int(tile) - if tile == Self: - inc(self.moves) - - -when isMainModule: - var game = newTrisGame() - game.place(Enemy, 0, 0) - game.place(Enemy, 0, 1) - assert game.get() == Playing - game.place(Enemy, 0, 2) - assert game.get() == Lose - game.place(Self, 0, 2) - assert game.get() == Playing - game.place(Enemy, 1, 1) - game.place(Enemy, 2, 2) - assert game.get() == Lose - game.place(Self, 2, 2) - assert game.get() == Playing - game.place(Self, 1, 2) - assert game.get() == Win