From e91869e5ab9c2e27472fc5952d01c0cdf9b8ea99 Mon Sep 17 00:00:00 2001
From: Mattia Giambirtone <nocturn9x@nocturn9x.space>
Date: Mon, 20 Mar 2023 10:01:06 +0100
Subject: [PATCH] Major updates to library architecture and additions to matrix
 library

---
 src/main.nim           | 146 ++++++---------------------
 src/nn/network.nim     | 121 ++++++++++++-----------
 src/nn/util/matrix.nim | 219 ++++++++++++++++++++++++++++++++---------
 src/nn/util/tris.nim   |  83 ----------------
 4 files changed, 267 insertions(+), 302 deletions(-)
 delete mode 100644 src/nn/util/tris.nim

diff --git a/src/main.nim b/src/main.nim
index 0fe412f..3333cb5 100644
--- a/src/main.nim
+++ b/src/main.nim
@@ -1,123 +1,41 @@
 import nn/network
 import nn/util/matrix
-import nn/util/tris
 
-
-import std/tables
 import std/math
-import std/random
-import std/algorithm
-import std/strformat
 
 
-## A bunch of activation functions
 
-func step*(input: float): float = (if input < 0.0: 0.0 else: 1.0)
-func sigmoid*(input: float): float = 1 / (1 + exp(-input))
-func silu*(input: float): float = 1 / (1 + exp(-input))
-func relu*(input: float): float = max(0.0, input)
-func htan*(input: float): float = 
-    let temp = exp(2 * input)
-    result = (temp - 1) / (temp + 1)
+# Mean squared error
+proc mse(a, b: Matrix[float]): float = 
+    result = (b - a).apply(proc (x: float): float = pow(x, 2), axis = -1).sum() / len(a).float
+
+# Derivative of MSE
+func dxMSE*(x, y: float): float = 2 * (x - y)
+
+func dx*(x, y: float): float = 0.0
+
+# A bunch of vectorized activation functions
+func sigmoid*(input: Matrix[float]): Matrix[float] = 
+    result = input.apply(proc (x: float): float = 1 / (1 + exp(-x)) , axis = -1)
+
+func softmax*(input: Matrix[float]): Matrix[float] = 
+    var input = input - input.max()
+    result = input.apply(math.exp, axis = -1) / input.apply(math.exp, axis = -1).sum()
+
+func step*(input: Matrix[float]): Matrix[float] = input.apply(proc (x: float): float = (if x < 0.0: 0.0 else: x), axis = -1)
+func silu*(input: Matrix[float]): Matrix[float] = input.apply(proc (x: float): float = 1 / (1 + exp(-x)), axis= -1)
+func relu*(input: Matrix[float]): Matrix[float] = input.apply(proc (x: float): float = max(0.0, x), axis = -1)
+
+func htan*(input: Matrix[float]): Matrix[float] = 
+    let f = proc (x: float): float = 
+        let temp = exp(2 * x)
+        result = (temp - 1) / (temp + 1)
+    input.apply(f, axis = -1)
 
 
-func ind2sub(n: int, shape: tuple[rows, cols: int]): tuple[row, col: int] =
-    ## Converts an absolute index into an x, y pair
-    return (n div shape.rows, n mod shape.cols)    
-
-
-proc loss(params: TableRef[string, float]): float =
-    ## Our loss function for tris
-    if params.hasKey("sameMove"):
-        result = 24 - params["moves"]
-    else:
-        result = params["moves"]
-        if int(params["result"]) == GameStatus.Draw.int:
-            result += 6
-        elif int(params["result"]) == GameStatus.Lose.int:
-            result += 12
-    echo result
-
-
-proc compareNetworks(a, b: NeuralNetwork): int =
-    if a.params.len() == 0:
-        return -1
-    elif b.params.len() == 0:
-        return 1
-    return cmp(loss(a.params), loss(b.params))
-
-
-proc crossover(a, b: NeuralNetwork): NeuralNetwork =
-    result = deepCopy(a)
-    var i = 0
-    while i < a.layers.len():
-        # We inherit 50% of our weights and biases from our first 
-        # parent and the other 50% from the other parent
-        result.layers[i].weights = where(rand[float](a.layers[i].weights.shape) >= 0.5, a.layers[i].weights, b.layers[i].weights)
-        result.layers[i].biases = where(rand[float](a.layers[i].biases.shape) >= 0.5, a.layers[i].biases, b.layers[i].biases)
-        # Now we sprinkle some mutations into the inherited weights
-        # and biases, just to spice things up. If learnRate = 0.02,
-        # then 2% of our weights and biases will randomly change
-        result.layers[i].weights = where(rand[float](result.layers[i].weights.shape) < a.learnRate, rand[float](result.layers[i].weights.shape), 
-                                         result.layers[i].weights)
-        result.layers[i].biases = where(rand[float](result.layers[i].biases.shape) < a.learnRate, rand[float](result.layers[i].biases.shape), 
-                                         result.layers[i].biases)                           
-        inc(i)
-    result.learnRate = a.learnRate
-
-
-## Our training program
-const Population = 100
-const Iterations = 300
-const Epochs = 10
-const Take = 15
-
-
-var networks: seq[NeuralNetwork] = @[]
-var best: seq[NeuralNetwork] = @[]
-for _ in 0..<Population:
-    networks.add(newNeuralNetwork(@[9, 16, 12, 9], activationFunc=newActivation(sigmoid, func (x, y: float): float = 0.0), 
-                          lossFunc=newLoss(loss, func (x, y: float): float = 0.0), weightRange=(-1.0, +1.0), biasRange=(-0.5, 0.5), 
-                          learnRate=0.02))
-
-var gameOne: TrisGame
-var gameTwo: TrisGame
-var one: NeuralNetwork
-var two: NeuralNetwork
-var pos: tuple[row, col: int]
-
-for epoch in 0..<Epochs:
-    for iteration in 0..<Iterations:
-        gameOne = newTrisGame()
-        gameTwo = newTrisGame()
-        one = sample(networks)
-        two = sample(networks)
-        while one == two:
-            two = sample(networks)
-        while gameOne.get() == Playing:
-            pos = ind2sub(one.compute(gameOne.map.flatten().asType(float)).argmax() - 1, gameOne.map.shape)
-            gameOne.place(TileKind.Self, pos.row, pos.col)
-            gameTwo.place(TileKind.Enemy, pos.row, pos.col)
-            pos = ind2sub(two.compute(gameTwo.map.flatten().asType(float)).argmax() - 1, gameTwo.map.shape)
-            if TileKind(gameOne.map[pos.row, pos.col]) != Empty:
-                # We consider this a loss
-                one.params["result"] = float(Lose)
-                two.params["result"] = float(Lose)
-                one.params["sameMove"] = 1.0
-                two.params["sameMove"] = 1.0
-                break
-            gameTwo.place(TileKind.Self, pos.row, pos.col)
-            gameOne.place(TileKind.Enemy, pos.row, pos.col)
-        if not one.params.hasKey("sameMove"):
-            one.params["result"] = gameOne.get().float()
-            two.params["result"] = gameTwo.get().float()
-        one.params["moves"] = gameOne.moves.float()
-        two.params["moves"] = gameTwo.moves.float()
-    networks.sort(cmp=compareNetworks)
-    best = networks[0..<Take]
-    while networks.len() < Population:
-        one = sample(best)
-        two = sample(best)
-        while one == two:
-            two = sample(best)
-        networks.add(one.crossover(two))
\ No newline at end of file
+var mlp = newNeuralNetwork(@[newDenseLayer(2, 3, newActivation(sigmoid, dx)), newDenseLayer(3, 2, newActivation(sigmoid, dx)),
+                             newDenseLayer(2, 3, newActivation(softmax, dx))],
+                           lossFunc=newLoss(mse, dxMSE),
+                           learnRate=0.05, weightRange=(start: -1.0, stop: 1.0), biasRange=(start: -10.0, stop: 10.0),
+                           momentum=0.55)
+echo mlp.feedforward(newMatrix[float](@[1.0, 2.0]))
diff --git a/src/nn/network.nim b/src/nn/network.nim
index c859314..5623838 100644
--- a/src/nn/network.nim
+++ b/src/nn/network.nim
@@ -16,7 +16,6 @@ import util/matrix
 
 
 import std/strformat
-import std/tables
 import std/random
 
 
@@ -28,35 +27,35 @@ type
         ## A generic feed-forward 
         ## neural network
         layers*: seq[Layer]
-        activation: Activation                                 # The activation function along with its derivative
-        loss: Loss                                             # The cost function along with its derivative
-        # This parameter has a different meaning depending on
-        # whether we're learning using backpropagation with gradient 
-        # descent (in which case it is the amount by which we increase 
-        # our input for the next epoch) or using a genetic approach 
-        # (where it will be the rate of mutation for each layer)
+        loss: Loss                            # The cost function along with its derivative
+        # The network's learn rate determines
+        # the amount of progress that is made
+        # at each step when performing gradient
+        # descent
         learnRate*: float
-        # Extra parameters
-        params*: TableRef[string, float]
-    # Note: The derivatives of the loss and activation
-    # function are only meaningful when performing gradient
-    # descent!
+        # The momentum serves to speed up convergence
+        # time when performing SGD: the higher the output
+        # of the derivative of the cost function, the more
+        # we nudge our inputs for our next epoch
+        momentum*: float
     Loss* = ref object
-        ## A loss function
-        function: proc (params: TableRef[string, float]): float
+        ## A loss function and its derivative
+        function: proc (a, b: Matrix[float]): float
         derivative: proc (x, y: float): float {.noSideEffect.}
     Activation* = ref object
         ## An activation function
-        function: proc (input: float): float {.noSideEffect.}
+        function: proc (input: Matrix[float]): Matrix[float] {.noSideEffect.}
         derivative: proc (x, y: float): float {.noSideEffect.}
     Layer* = ref object
         ## A generic neural network
         ## layer
-        inputSize*: int                                     # The number of inputs we process
-        outputSize*: int                                    # The number of outputs we produce
+        inputSize*: int                                      # The number of inputs we process
+        outputSize*: int                                     # The number of outputs we produce
         weights*: Matrix[float]                              # The weights for each connection (2D)
         biases*: Matrix[float]                               # The biases for each neuron (1D)
-        gradients: tuple[weights, biases: Matrix[float]]    # Gradient coefficients for weights and biases, if using gradient descent
+        gradients: tuple[weights, biases: Matrix[float]]     # Gradient coefficients for weights and biases
+        activation: Activation                               # The layer's activation function
+
 
 
 proc `$`*(self: Layer): string =
@@ -71,64 +70,66 @@ proc `$`*(self: NeuralNetwork): string =
     result = &"NeuralNetwork(learnRate={self.learnRate}, layers={self.layers})"
 
 
-proc newLoss*(function: proc (params: TableRef[string, float]): float, derivative: proc (x, y: float): float {.noSideEffect.}): Loss =
+proc newLoss*(function: proc (a, b: Matrix[float]): float, derivative: proc (x, y: float): float {.noSideEffect.}): Loss =
     ## Creates a new Loss object
     new(result)
     result.function = function
     result.derivative = derivative
 
 
-proc newActivation*(function: proc (input: float): float {.noSideEffect.}, derivative: proc (x, y: float): float {.noSideEffect.}): Activation =
+proc newActivation*(function: proc (input: Matrix[float]): Matrix[float] {.noSideEffect.}, derivative: proc (x, y: float): float {.noSideEffect.}): Activation =
     ## Creates a new Activation object
     new(result)
     result.function = function
     result.derivative = derivative
 
 
-proc newLayer*(inputSize: int, outputSize: int, weightRange, biasRange: tuple[start, stop: float]): Layer =
-    ## Creates a new layer with inputSize input 
-    ## parameters and outputSize outgoing outputs.
-    ## Weights are initialized with random values
-    ## in the chosen range
+proc newDenseLayer*(inputSize: int, outputSize: int, activationFunc: Activation): Layer =
+    ## Creates a new dense layer with inputSize input 
+    ## parameters and outputSize outgoing outputs and
+    ## using the chosen activation function.
     new(result)
     result.inputSize = inputSize
     result.outputSize = outputSize
-    var biases = newSeqOfCap[float](outputSize)
-    var biasGradients = newSeqOfCap[float](outputSize)
-    for _ in 0..<outputSize:
-        biases.add(rand(biasRange.start..biasRange.stop))
-        biasGradients.add(0.0)
-    var weights = newSeqOfCap[seq[float]](inputSize * outputSize)
-    var weightGradients = newSeqOfCap[seq[float]](inputSize * outputSize)
-    for _ in 0..<outputSize:
-        weights.add(@[])
-        weightGradients.add(@[])
-        for _ in 0..<inputSize:
-            weights[^1].add(rand(weightRange.start..weightRange.stop))
-            weightGradients[^1].add(0)
-    result.biases = newMatrix[float](biases)
-    result.weights = newMatrix[float](weights)
-    result.gradients = (weights: newMatrix[float](weightGradients), biases: newMatrix[float](biasGradients))
-
-    
-
-proc newNeuralNetwork*(layers: seq[int], activationFunc: Activation, lossFunc: Loss,
-                       learnRate: float, weightRange, biasRange: tuple[start, stop: float]): NeuralNetwork =
-    ## Initializes a new neural network
-    ## with the given layer layout
-    new(result)
-    result.layers = newSeqOfCap[Layer](len(layers))
-    for i in 0..<layers.high():
-        result.layers.add(newLayer(layers[i], layers[i + 1], weightRange, biasRange))
     result.activation = activationFunc
+
+
+proc newNeuralNetwork*(topology: seq[Layer], lossFunc: Loss, learnRate: float, momentum: float,
+                       weightRange, biasRange: tuple[start, stop: float]): NeuralNetwork =
+    ## Initializes a new neural network with
+    ## the given topology and iperparameters.
+    ## Weights and biases are initialized with 
+    ## random values in the chosen range
+    new(result)
+    result.layers = topology
+    for layer in result.layers:
+        var biases = newSeqOfCap[float](layer.outputSize)
+        var biasGradients = newSeqOfCap[float](layer.outputSize)
+        for _ in 0..<layer.outputSize:
+            biases.add(rand(biasRange.start..biasRange.stop))
+            biasGradients.add(0.0)
+        var weights = newSeqOfCap[float](layer.inputSize * layer.outputSize)
+        var weightGradients = newSeqOfCap[float](layer.inputSize * layer.outputSize)
+        for _ in 0..<layer.outputSize:
+            for _ in 0..<layer.inputSize:
+                weights.add(rand(weightRange.start..weightRange.stop))
+                weightGradients.add(0.0)
+        layer.biases = newMatrix[float](biases)
+        # Why swap outputSize and inputSize in the matrix shape? The reason is simple: this
+        # spares us from having to transpose it later when we perform the dot product (I get
+        # that it's a constant time operation, but if we can avoid it altogether, that's even
+        # better!)
+        layer.weights = newMatrixFromSeq[float](weights, (layer.outputSize, layer.inputSize))
+        layer.gradients = (weights: newMatrix[float](weightGradients),
+                           biases: newMatrixFromSeq[float](biasGradients, (layer.outputSize, layer.inputSize)))
     result.loss = lossFunc
     result.learnRate = learnRate
-    result.params = newTable[string, float]()
+    result.momentum = momentum
 
 
-proc compute*(self: NeuralNetwork, data: Matrix[float]): Matrix[float] =
-    ## Performs a computation and returns a 1D array
-    ## with the output
+proc feedforward*(self: NeuralNetwork, data: Matrix[float]): Matrix[float] =
+    ## Feeds the given input through the network and returns
+    ## a 1D array with the output
     when not defined(release):
         if data.shape.rows > 1:
             raise newException(ValueError, "input data must be one-dimensional")
@@ -136,5 +137,9 @@ proc compute*(self: NeuralNetwork, data: Matrix[float]): Matrix[float] =
             raise newException(ValueError, &"input is of the wrong shape (expecting (1, {self.layers[0].inputSize}), got ({data.shape.rows}, {data.shape.cols}) instead)")
     result = data
     for layer in self.layers:
-        result = (layer.weights.dot(result).sum() + layer.biases).apply(self.activation.function, axis= -1)
+        result = layer.activation.function(layer.weights.dot(result) + layer.biases)
 
+
+proc backprop(self: NeuralNetwork, x, y: Matrix[float]) {.used.} =
+    ## Performs a single backpropagation step and updates the
+    ## gradients for our weights and biases, layer by layer
\ No newline at end of file
diff --git a/src/nn/util/matrix.nim b/src/nn/util/matrix.nim
index 91101f6..a55d304 100644
--- a/src/nn/util/matrix.nim
+++ b/src/nn/util/matrix.nim
@@ -1,4 +1,4 @@
-# Copyright 2022 Mattia Giambirtone & All Contributors
+# Copyright 2023 Mattia Giambirtone & All Contributors
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -33,7 +33,13 @@ type
         row: int        # The row in the matrix to which we point to
 
 
-proc getSize(shape: tuple[rows, cols: int]): int =
+# Simple one-line helpers
+func len*[T](self: Matrix[T]): int {.inline.} = self.data[].len()
+func len*[T](self: MatrixView[T]): int {.inline.} = self.shape.cols
+func raw*[T](self: Matrix[T]): ref seq[T] {.inline.} = self.data
+
+
+proc getSize*(shape: tuple[rows, cols: int]): int =
     ## Helper to get the size required for the
     ## underlying data array for a matrix of the
     ## given shape
@@ -84,6 +90,16 @@ proc newMatrix*[T](data: seq[seq[T]], order: MatrixOrder = RowMajor): Matrix[T]
                 idx = col
 
 
+proc newMatrixFromSeq*[T](data: seq[T], shape: tuple[rows, cols: int], order: MatrixOrder = RowMajor): Matrix[T] =
+    ## Creates a new matrix of the given shape from a flat
+    ## sequence
+    new(result)
+    new(result.data)
+    result.data[] = data
+    result.shape = shape
+    result.order = order
+
+
 proc zeros*[T: int | float](shape: tuple[rows, cols: int], order: MatrixOrder = RowMajor): Matrix[T] =
     ## Creates a new matrix of the given shape
     ## filled with zeros
@@ -115,6 +131,7 @@ proc ones*[T: int | float](shape: tuple[rows, cols: int], order: MatrixOrder = R
         for _ in 0..<size:
             result.data[].add(1.0)
 
+
 proc rand*[T: int | float](shape: tuple[rows, cols: int], order: MatrixOrder = RowMajor): Matrix[T] =
     ## Creates a new matrix of the given shape
     ## filled with random values between 0 and
@@ -142,13 +159,7 @@ proc asType*[T](self: Matrix[T], kind: typedesc): Matrix[kind] =
     result.order = self.order
 
 
-# Simple one-line helpers and forward declarations
-func len*[T](self: Matrix[T]): int {.inline.} = self.data[].len()
-func len*[T](self: MatrixView[T]): int {.inline.} = self.shape.cols
-func raw*[T](self: Matrix[T]): ref seq[T] {.inline.} = self.data
-
-
-func getIndex[T](self: Matrix[T], row, col: int): int =
+func getIndex*[T](self: Matrix[T], row, col: int): int =
     ## Converts an (x, y) coordinate pair into a single 
     ## integer index into our array, taking the internal 
     ## array order into account
@@ -158,6 +169,13 @@ func getIndex[T](self: Matrix[T], row, col: int): int =
         result = col * self.shape.rows + row
 
 
+func ind2sub*(n: int, shape: tuple[rows, cols: int]): tuple[row, col: int] =
+    ## Converts an absolute index into an x, y pair
+    if shape.rows == 0:
+        return (0, n)
+    return (n div shape.cols, n mod shape.cols)
+
+
 proc `[]`*[T](self: Matrix[T], row, col: int): T =
     ## Gets the element the given row and
     ## column into the matrix
@@ -172,8 +190,8 @@ proc `[]`*[T](self: Matrix[T], row: int): MatrixView[T] =
     ## Gets a single row in the matrix. No data copies
     ## occur and a view into the original matrix is
     ## returned
-    var idx = self.getIndex(row, 0)
     when not defined(release):
+        var idx = self.getIndex(row, 0)
         if idx notin 0..<self.data[].len():
             raise newException(IndexDefect, &"row {row} is out of range for matrix of shape ({self.shape.rows}, {self.shape.cols})")
     new(result)
@@ -182,7 +200,7 @@ proc `[]`*[T](self: Matrix[T], row: int): MatrixView[T] =
 
 
 proc `[]`*[T](self: MatrixView[T], col: int): T =
-    ## Gets the element the given row into
+    ## Gets the element at the given column into
     ## the matrix view
     var idx = self.m.getIndex(self.row, col)
     when not defined(release):
@@ -202,7 +220,7 @@ proc `[]=`*[T](self: Matrix[T], row, col: int, val: T) =
 
 
 proc `[]=`*[T](self: MatrixView[T], col: int, val: T) =
-    ## Sets the element at the given row 
+    ## Sets the element at the given column
     ## into the matrix view to the value 
     ## val
     var idx = self.m.getIndex(0, col)
@@ -211,7 +229,6 @@ proc `[]=`*[T](self: MatrixView[T], col: int, val: T) =
             raise newException(IndexDefect, &"column {col} is out of range for view of shape ({self.shape.rows}, {self.shape.cols})")
     self.m.data[idx] = val
 
-    
 
 # Shape management
 proc reshape*[T](self: Matrix[T], shape: tuple[rows, cols: int]): Matrix[T] =
@@ -486,6 +503,44 @@ proc `+`*[T](a, b: Matrix[T]): Matrix[T] =
         result = a[0] + b[0]
 
 
+proc `-`*[T](a, b: MatrixView[T]): Matrix[T] =
+    ## Performs the vector sum of the 
+    ## given matrix views and returns a new
+    ## vector with the result
+    when not defined(release):
+        if a.shape.cols != b.shape.cols:  # Basically if their length is different
+            raise newException(ValueError, &"incompatible argument shapes for addition")
+    new(result)
+    new(result.data)
+    result.shape = a.shape
+    result.order = RowMajor
+    result.data[] = newSeqOfCap[T](result.shape.getSize())
+    for i in 0..<a.shape.cols:
+        result.data[].add(a[i] - b[i])
+
+
+proc `-`*[T](a, b: Matrix[T]): Matrix[T] =
+    when not defined(release):
+        if a.shape.rows > 0 and b.shape.rows > 0 and a.shape != b.shape:
+            raise newException(ValueError, &"incompatible argument shapes for addition")
+        elif (a.shape.rows == 0 or b.shape.rows == 0) and a.shape.cols != b.shape.cols:
+            raise newException(ValueError, &"incompatible argument shapes for addition")
+    if a.shape.rows == 0 and b.shape.rows == 0:
+        return a[0] + b[0]
+    new(result)
+    new(result.data)
+    result.data[] = newSeqOfCap[T](result.shape.getSize())
+    result.shape = a.shape
+    result.order = RowMajor
+    if result.shape.rows > 1:
+        for row in 0..<result.shape.rows:
+            for m in a[row] - b[row]:
+                for element in m:
+                    result.data[].add(element)
+    else:
+        result = a[0] - b[0]
+
+
 proc `*`*[T](a, b: MatrixView[T]): Matrix[T] =
     ## Performs the vector product of the 
     ## given matrix views and returns a new
@@ -606,29 +661,32 @@ proc `==`*[T](a: Matrix[T], b: MatrixView[T]): Matrix[bool] =
     return a[0] == b
 
 
-proc diag*[T](a: Matrix[T], diagonal: int): Matrix[T] =
-    ## Returns the chosen diagonal of the given
-    ## matrix as a linear array. Diagonal 0 means left, 
-    ## 1 means right
-    when not defined(release):
-        if a.shape.rows != a.shape.cols:
-            raise newException(ValueError, "only square matrices have diagonals")
+proc diag*[T](a: Matrix[T], offset: int = 0): Matrix[T] =
+    ## Returns the diagonal of the given
+    ## matrix starting at the given offset
+    if offset >= a.shape.cols:
+        return newMatrix[T](@[])
+    var current = offset.ind2sub(a.shape)
     var res = newSeqOfCap[T](a.shape.getSize())
-    case diagonal:
-        of 0:
-            for i in 0..<a.shape.rows:
-                res.add(a[i, i])
-        of 1:
-            for i in 0..<a.shape.rows:
-                res.add(a[i, a.shape.rows - i])
-        else:
-            when not defined(release):
-                raise newException(ValueError, &"invalid diagonal {diagonal} for matrix")
-            else:
-                discard
+    while current.row < a.shape.rows and current.col < a.shape.cols:
+        res.add(a.data[a.getIndex(current.row, current.col)])
+        inc(current.row)
+        inc(current.col)
     result = newMatrix(res)
 
 
+proc fliplr*[T](self: Matrix[T]): Matrix[T] =
+    ## Flips each row in the matrix left
+    ## to right. A copy is returned
+    new(result)
+    result.shape = self.shape
+    result.order = self.order
+    new(result.data)
+    result.data[] = newSeqOfCap[T](self.shape.getSize())
+    for row in self:
+        for i in countdown(row.len() - 1, 0, 1):
+            result.data[].add(row[i])
+
 
 proc `==`*[T](a, b: Matrix[T]): Matrix[bool] =
     when not defined(release):
@@ -646,6 +704,22 @@ proc `==`*[T](a, b: Matrix[T]): Matrix[bool] =
             result.data[].add(a[r, c] == b[r, c])
 
 
+proc `!=`*[T](a, b: Matrix[T]): Matrix[bool] =
+    when not defined(release):
+        if a.shape != b.shape:
+            raise newException(ValueError, "can't compare matrices of different shapes")
+    new(result)
+    new(result.data)
+    result.shape = a.shape
+    result.order = RowMajor
+    result.data[] = newSeqOfCap[bool](result.shape.getSize())
+    if a.shape.rows == 0:
+        result = a[0] == b[0]
+    for r in 0..<a.shape.rows:
+        for c in 0..<a.shape.cols:
+            result.data[].add(a[r, c] != b[r, c])
+
+
 proc `>`*[T](a, b: Matrix[T]): Matrix[bool] =
     when not defined(release):
         if a.shape != b.shape:
@@ -710,14 +784,22 @@ proc any*(a: Matrix[bool]): bool =
     return false
 
 
+proc index*[T](self: Matrix[T], x: T): tuple[row, col: int] =
+    ## Returns the location of the given
+    ## item in the matrix. A tuple of (-1, -1)
+    ## is returned if the item is not found
+    for i, row in self:
+        for j, e in row:
+            if e == x:
+                return (i, j)
+    return (-1, -1)
+
+
 # Specular definitions of commutative operators
 proc `<`*[T](a, b: Matrix[T]): Matrix[bool] = b > a
-proc `!=`*[T](a, b: Matrix[T]): Matrix[bool] = not a == b
 proc `*`*[T](a: Matrix[T], b: MatrixView[T]): Matrix[T] = b * a
 proc `==`*[T](a: T, b: Matrix[T]): Matrix[bool] = b == a
 proc `==`*[T](a: MatrixView[T], b: Matrix[T]): Matrix[bool] = b == a
-proc `!=`*[T](a: Matrix[T], b: T): Matrix[bool] = not a == b
-proc `!=`*[T](a: T, b: Matrix[T]): Matrix[bool] = not b == a
 
 
 proc toRowMajor*[T](self: Matrix[T], copy: bool = true): Matrix[T] =
@@ -762,15 +844,17 @@ proc toColumnMajor*[T](self: Matrix[T], copy: bool = true): Matrix[T] =
 # Matrices and matrix views are iterable!
 
 iterator items*[T](self: Matrix[T]): MatrixView[T] =
-    for row in 0..<self.shape.rows:
-        yield self[row]
-    if self.shape.rows == 0:
-        yield self[0]
+    if self.len() > 0:
+        for row in 0..<self.shape.rows:
+            yield self[row]
+        if self.shape.rows == 0:
+            yield self[0]
 
 
 iterator items*[T](self: MatrixView[T]): T =
-    for column in 0..<self.shape.cols:
-        yield self[column]
+    if self.len() > 0:
+        for column in 0..<self.shape.cols:
+            yield self[column]
 
 
 iterator pairs*[T](self: Matrix[T]): tuple[i: int, val: MatrixView[T]] =
@@ -799,7 +883,7 @@ proc `$`*[T](self: MatrixView[T]): string =
 
 proc `$`*[T](self: Matrix[T]): string =
     ## Stringifies the matrix
-    if self.shape.rows == 0:
+    if self.shape.rows == 0 and self.len() > 0:
         return $(self[0])
     result &= "["
     for i, row in self:
@@ -864,8 +948,32 @@ proc where*[T](cond: Matrix[bool], x, y: Matrix[T]): Matrix[T] =
         col = 0
 
 
+proc where*[T](cond: Matrix[bool], x: Matrix[T], y: T): Matrix[T] =
+    ## Behaves like numpy.where, but with a constant
+    when not defined(release):
+        if not (x.shape == cond.shape):
+            raise newException(ValueError, &"all inputs must be of equal shape for where()")
+    result = x.copy()
+    var 
+        row = 0
+        col = 0
+    if cond.shape.rows == 0:
+        while col < cond.shape.cols:
+            if not cond[0, col]:
+                result[0, col] = y
+            inc(col)
+    while row < cond.shape.rows:
+        while col < cond.shape.cols:
+            if not cond[row, col]:
+                result[row, col] = y
+            inc(col)
+        inc(row)
+        col = 0
+
+
 # Just a helper to avoid mistakes and so that x.where(x > 10, y) works as expected
-proc where*[T](self: Matrix[T], cond: Matrix[bool], other: Matrix[T]): Matrix[T] = cond.where(self, other)
+proc where*[T](self: Matrix[T], cond: Matrix[bool], other: Matrix[T]): Matrix[T] {.inline.} = cond.where(self, other)
+proc where*[T](self: Matrix[T], cond: Matrix[bool], other: T): Matrix[T] {.inline.} = cond.where(self, other)
 
 
 proc max*[T](self: Matrix[T]): T =
@@ -899,13 +1007,22 @@ proc argmax*[T](self: Matrix[T]): int =
 
 
 proc contains*[T](self: Matrix[T], e: T): bool =
-    ## Returns wherher the matrix contains
+    ## Returns whether the matrix contains
     ## the element e
     for row in self:
         for element in row:
             if element == e:
                 return true
     return false
+    
+
+proc count*[T](self: Matrix[T], e: T): int =
+    ## Returns the number of occurrences 
+    ## of e in self
+    for row in self:
+        for k in row:
+            if k == e:
+                inc(result) 
 
 
 when isMainModule:
@@ -932,6 +1049,14 @@ when isMainModule:
     assert (x < 5).where(x, x * 10).sum() == 360, "where mismatch"
     assert all((x < 5).where(x, x * 10) == x.where(x < 5, x * 10)), "where mismatch"
     assert x.max() == 9, "max mismatch"
-    assert x.argmax() == 9, "argmax mismatch"
-    discard newMatrix[int](@[12, 23]).dot(newMatrix[int](@[@[11, 22], @[33, 44]]))
-    discard newMatrix[int](@[@[1, 2, 3], @[2, 3, 4]]).dot(newMatrix[int](@[1, 2, 3]))
\ No newline at end of file
+    assert x.argmax() == 10, "argmax mismatch"
+    assert all(newMatrix[int](@[12, 23]).dot(newMatrix[int](@[@[11, 22], @[33, 44]])) == newMatrix[int](@[891, 1276]))
+    assert all(newMatrix[int](@[@[1, 2, 3], @[2, 3, 4]]).dot(newMatrix[int](@[1, 2, 3])) == newMatrix[int](@[14, 20]))
+    assert all(m.diag() == newMatrix[int](@[1, 5]))
+    assert all(m.diag(1) == newMatrix[int](@[2, 6]))
+    assert all(m.diag(2) == newMatrix[int](@[3]))
+    assert m.diag(3).len() == 0
+    var j = m.fliplr()
+    assert all(j.diag() == newMatrix[int](@[3, 5]))
+    assert all(j.diag(1) == newMatrix[int](@[2, 4]))
+    assert all(j.diag(2) == newMatrix[int](@[1]))
diff --git a/src/nn/util/tris.nim b/src/nn/util/tris.nim
deleted file mode 100644
index 7ae1baa..0000000
--- a/src/nn/util/tris.nim
+++ /dev/null
@@ -1,83 +0,0 @@
-import matrix
-
-
-type 
-    TileKind* = enum
-        ## A tile enumeration kind
-        Empty = 0,
-        Self,
-        Enemy
-    GameStatus* = enum
-        ## A game status enumeration
-        Playing,
-        Win,
-        Lose,
-        Draw
-    TrisGame* = ref object
-        map*: Matrix[int]
-        moves*: int
-
-
-proc newTrisGame*: TrisGame =
-    ## Creates a new TrisGame object
-    new(result)
-    result.map = zeros[int]((3, 3))
-    result.moves = 0
-
-
-proc get*(self: TrisGame): GameStatus =
-    ## Returns the game status
-    # Checks for rows
-    for _, row in self.map:
-        if all(row == newMatrix[int](@[1, 1, 1])):
-            return Win
-        elif all(row == newMatrix[int](@[2, 2, 2])):
-            return Lose
-    # Checks for columns
-    for _, col in self.map.transpose:
-        if all(col == newMatrix[int](@[1, 1, 1])):
-            return Win
-        elif all(col == newMatrix[int](@[2, 2, 2])):
-            return Lose
-    # Checks for diagonals
-    for i in 0..<2:
-        if all(self.map.diag(i) == newMatrix[int](@[1, 1, 1])):
-            return Win
-        elif all(self.map.diag(i) == newMatrix[int](@[2, 2, 2])):
-            return Lose
-    # No check was successful and there's no empty slots: draw!
-    if not any(self.map == 0):
-        return Draw
-    # There are empty slots and no one won yet, we're still in game!
-    return Playing
-
-
-proc `$`*(self: TrisGame): string =
-    ## Stringifies self
-    return $self.map
-
-
-proc place*(self: TrisGame, tile: TileKind, x, y: int) =
-    ## Places a tile onto the playing board
-    if TileKind(self.map[x, y]) == Empty:
-        self.map[x, y] = int(tile)
-    if tile == Self:
-        inc(self.moves)
-
-
-when isMainModule:
-    var game = newTrisGame()
-    game.place(Enemy, 0, 0)
-    game.place(Enemy, 0, 1)
-    assert game.get() == Playing
-    game.place(Enemy, 0, 2)
-    assert game.get() == Lose
-    game.place(Self, 0, 2)
-    assert game.get() == Playing
-    game.place(Enemy, 1, 1)
-    game.place(Enemy, 2, 2)
-    assert game.get() == Lose
-    game.place(Self, 2, 2)
-    assert game.get() == Playing
-    game.place(Self, 1, 2)
-    assert game.get() == Win