From d6e5e148aab8257e24f63dc7ca06c51528fa54fe Mon Sep 17 00:00:00 2001 From: Mattia Giambirtone Date: Thu, 22 Dec 2022 14:55:26 +0100 Subject: [PATCH] Many fixes to matrix library and minor changes --- src/main.nim | 2 +- src/nn/layer.nim | 13 +- src/nn/util/activations.nim | 6 +- src/nn/util/matrix.nim | 520 +++++++++++++++++++++--------------- 4 files changed, 309 insertions(+), 232 deletions(-) diff --git a/src/main.nim b/src/main.nim index fb15852..b1061e4 100644 --- a/src/main.nim +++ b/src/main.nim @@ -3,7 +3,7 @@ import nn/util/activations import nn/util/losses -var net = newNeuralNetwork(@[2, 3, 2], activationFunc=newActivation(sigmoid, proc (x, y: float): float = 0.0), +var net = newNeuralNetwork(@[2, 3, 2], activationFunc=newActivation(sigmoid, func (x, y: float): float = 0.0), lossFunc=newLoss(mse, mse), weightRange=(-1.0, +1.0), learnRate=0.05) var prediction = net.predict(newMatrix[float](@[2.7, 3.0])) echo prediction diff --git a/src/nn/layer.nim b/src/nn/layer.nim index fa3244d..dd9d30a 100644 --- a/src/nn/layer.nim +++ b/src/nn/layer.nim @@ -74,13 +74,10 @@ proc compute*(self: Layer, data: Matrix[float]): Matrix[float] = ## Computes the output of a given layer with ## the given input data and returns it as a ## one-dimensional array - var sequence = newSeqOfCap[float](self.outputSize) - for i, weights in self.weights: - # This looks fancy, but it's just abstracting some of the - # complexity away to the matrix library and is equivalent - # to the nested for-loop approach (although more idiomatic - # and probably faster) - sequence.add(self.activation.function((weights * data).sum() + self.biases[0, i])) - result = newMatrix[float](sequence) + result = ((self.weights * data).sum() + self.biases).apply(self.activation.function, axis= -1) + +proc cost*(self: Layer, x: Matrix[float], Y: Matrix[float]): float = + ## Returns the total cost of this layer + diff --git a/src/nn/util/activations.nim b/src/nn/util/activations.nim index 902d871..85b25aa 100644 --- a/src/nn/util/activations.nim +++ b/src/nn/util/activations.nim @@ -24,11 +24,11 @@ func htan*(input: float): float = type Activation* = ref object - function*: proc (input: float): float - derivative*: proc (x, y: float): float + function*: proc (input: float): float {.noSideEffect.} + derivative*: proc (x, y: float): float {.noSideEffect.} -proc newActivation*(function: proc (input: float): float, derivative: proc (x, y: float): float): Activation = +proc newActivation*(function: proc (input: float): float {.noSideEffect.}, derivative: proc (x, y: float): float {.noSideEffect.}): Activation = ## Creates a new activation object new(result) result.function = function diff --git a/src/nn/util/matrix.nim b/src/nn/util/matrix.nim index 31e39f7..eb05b8c 100644 --- a/src/nn/util/matrix.nim +++ b/src/nn/util/matrix.nim @@ -14,7 +14,6 @@ from std/strformat import `&` from std/sequtils import zip -from std/strutils import join type @@ -29,10 +28,6 @@ type ## A zero-copy view into a matrix m: Matrix[T] # The matrix that owns the row we point to row: int # The row in the matrix to which we point to - # Even though a MatrixView has no - # rows, we keep the same field for - # consistency with the Matrix type - shape*: tuple[rows, cols: int] proc getSize(shape: tuple[rows, cols: int]): int = @@ -44,6 +39,10 @@ proc getSize(shape: tuple[rows, cols: int]): int = return shape.cols * shape.rows +proc shape*[T](self: MatrixView[T]): tuple[rows, cols: int] = + return (0, self.m.shape.cols) + + proc newMatrix*[T](data: seq[T]): Matrix[T] = ## Initializes a new matrix from a given ## 1D sequence @@ -94,8 +93,6 @@ proc zeros*[T](shape: tuple[rows, cols: int], order: MatrixOrder = RowMajor): Ma func len*[T](self: Matrix[T]): int {.inline.} = self.data[].len() func len*[T](self: MatrixView[T]): int {.inline.} = self.shape.cols func raw*[T](self: Matrix[T]): ref seq[T] {.inline.} = self.data -proc dup*[T](self: Matrix[T]): Matrix[T] -proc copy*[T](self: Matrix[T]): Matrix[T] func getIndex[T](self: Matrix[T], row, col: int): int = @@ -113,7 +110,7 @@ proc `[]`*[T](self: Matrix[T], row, col: int): T {.raises: [IndexDefect, ValueEr ## column into the matrix var idx = self.getIndex(row, col) when not defined(release): - if idx notin 0.. 1: - result &= "[" - for row in 0.. 1: - result &= "]" - - -proc `$`*[T](self: MatrixView[T]): string = - ## Stringifies the matrix view - result = "[" - var i = 0 - while i < self.shape.cols: - result &= $self[i] - if i < self.shape.cols - 1: - result &= ", " - inc(i) - result &= "]" # Shape management @@ -208,8 +167,7 @@ proc reshape*[T](self: Matrix[T], shape: tuple[rows, cols: int]): Matrix[T] {.ra if shape.getSize() != self.data[].len(): raise newException(ValueError, &"shape ({shape.rows}, {shape.cols}) is invalid for matrix of length {self.len()}") result = self.dup() - if shape.rows > 1: - self.shape = shape + result.shape = shape proc reshape*[T](self: Matrix[T], rows, cols: int): Matrix[T] {.raises: [ValueError].} = @@ -220,11 +178,8 @@ proc reshape*[T](self: Matrix[T], rows, cols: int): Matrix[T] {.raises: [ValueEr proc transpose*[T](self: Matrix[T]): Matrix[T] = ## Transposes rows and columns in the given ## matrix. No data copies occur - result = self.dup() - result.data = self.data - discard result.reshape(self.shape.cols, self.shape.rows) - if result.shape.rows > 0: - result.order = if result.order == RowMajor: ColumnMajor else: RowMajor + result = self.reshape(self.shape.cols, self.shape.rows) + result.order = if result.order == RowMajor: ColumnMajor else: RowMajor proc flatten*[T](self: Matrix[T]): Matrix[T] = @@ -236,7 +191,7 @@ proc flatten*[T](self: Matrix[T]): Matrix[T] = # Helpers for fast applying of operations along an axis -proc apply*[T](self: Matrix[T], op: proc (a, b: T): T, b: T, copy: bool = false, axis: int): Matrix[T] = +proc apply*[T](self: Matrix[T], op: proc (a, b: T): T {.noSideEffect.}, b: T, copy: bool = false, axis: int): Matrix[T] = ## Applies a binary operator to every ## element in the given axis of the ## given matrix (0 = rows, 1 = columns, @@ -247,22 +202,23 @@ proc apply*[T](self: Matrix[T], op: proc (a, b: T): T, b: T, copy: bool = false, result = self.copy() case axis: of 0: - # Stores the indeces of the values - # we'll delete after we're done. This - # is because applying along - var indeces: seq[int] = @[] + for r in 0.. added: + result.data[].delete(0) + result.shape.rows = 0 + result.shape.cols = added + result.order = RowMajor + + proc sum*[T](self: MatrixView[T]): T = ## Returns the sum of all elements ## in the matrix view @@ -350,40 +323,6 @@ proc sum*[T](self: MatrixView[T]): T = inc(i) -proc sub*[T](self: Matrix[T], axis: int, copy: bool = true): Matrix[T] = - var self = self - if copy: - self = self.copy() - result = self - var indeces: seq[int] = @[] - case axis: - of 1: - for r in 0.. 0 and b.shape.rows > 0 and a.shape != b.shape: + raise newException(ValueError, &"incompatible argument shapes for addition") + elif (a.shape.rows == 0 or b.shape.rows == 0) and a.shape.cols != b.shape.cols: + raise newException(ValueError, &"incompatible argument shapes for addition") + if a.shape.rows == 0 and b.shape.rows == 0: + return a[0] + b[0] + new(result) + new(result.data) + result.data[] = newSeqOfCap[T](result.shape.getSize()) + result.shape = a.shape + result.order = RowMajor + if result.shape.rows > 1: + for row in 0.. 0 and b.shape.rows > 0 and a.shape.cols != b.shape.rows: + raise newException(ValueError, &"incompatible argument shapes for multiplication") + elif (a.shape.rows == 0 or b.shape.rows == 0) and a.shape.cols != b.shape.cols: + raise newException(ValueError, &"incompatible argument shapes for multiplication") + new(result) + new(result.data) + result.shape = (a.shape.rows, b.shape.cols) + result.order = RowMajor + result.data[] = newSeqOfCap[T](result.shape.getSize()) + if result.shape.rows > 1: + if a.shape.rows == b.shape.rows: + for row in 0..`*[T](a: Matrix[T], b: T): Matrix[bool] = new(result) new(result.data) - result.data[] = newSeqOfCap[T](result.shape.rows * result.shape.cols) - for i in 0.. b) -proc `*`*[T](a, b: Matrix[T]): Matrix[T] {.raises: [ValueError].} = - when not defined(release): - if a.shape.cols != b.shape.rows: - raise newException(ValueError, &"incompatible argument shapes for multiplication") - if a.order != b.order: - raise newException(ValueError, "can't multiply matrices with different ordering") +proc `<=`*[T](a: Matrix[T], b: T): Matrix[bool] = new(result) new(result.data) - result.shape = (a.shape.rows, b.shape.cols) - result.order = RowMajor - result.data[] = newSeqOfCap[T](result.shape.rows * result.shape.cols) - for (e, k) in zip(a.data[], b.data[]): - result.data[].add(e * k) + result.shape = a.shape + result.data[] = newSeqOfCap[bool](result.shape.getSize()) + for e in a.data[]: + result.data[].add(e <= b) + + +proc `>=`*[T](a: Matrix[T], b: T): Matrix[bool] = + new(result) + new(result.data) + result.shape = a.shape + result.data[] = newSeqOfCap[bool](result.shape.getSize()) + for e in a.data[]: + result.data[].add(e >= b) -# Comparison operators. They produce a new matrix with boolean values proc `==`*[T](a, b: Matrix[T]): Matrix[bool] {.raises: [ValueError].} = when not defined(release): @@ -518,7 +529,7 @@ proc `==`*[T](a, b: Matrix[T]): Matrix[bool] {.raises: [ValueError].} = new(result.data) result.shape = a.shape result.order = RowMajor - result.data[] = newSeqOfCap[bool](result.shape.rows * result.shape.cols) + result.data[] = newSeqOfCap[bool](result.shape.getSize()) for r in 0..`*[T](a, b: Matrix[T]): Matrix[bool] {.raises: [ValueError].} = new(result.data) result.shape = a.shape result.order = RowMajor - result.data[] = newSeqOfCap[bool](result.shape.rows * result.shape.cols) + result.data[] = newSeqOfCap[bool](result.shape.getSize()) for r in 0.. b[r, c]) @@ -546,7 +557,7 @@ proc `>=`*[T](a, b: Matrix[T]): Matrix[bool] {.raises: [ValueError].} = new(result.data) result.shape = a.shape result.order = RowMajor - result.data[] = newSeqOfCap[bool](result.shape.rows * result.shape.cols) + result.data[] = newSeqOfCap[bool](result.shape.getSize()) for r in 0..= b[r, c]) @@ -560,7 +571,7 @@ proc `<=`*[T](a, b: Matrix[T]): Matrix[bool] {.raises: [ValueError].} = new(result.data) result.shape = a.shape result.order = RowMajor - result.data[] = newSeqOfCap[bool](result.shape.rows * result.shape.cols) + result.data[] = newSeqOfCap[bool](result.shape.getSize()) for r in 0.. orig.high(): inc(col) idx = col @@ -605,6 +616,7 @@ proc toRowMajor*[T](self: Matrix[T]): Matrix[T] = proc toColumnMajor*[T](self: Matrix[T]): Matrix[T] = ## Converts a row-major matrix to a ## column-major one + new(result) if self.order == ColumnMajor: return self.order = ColumnMajor @@ -612,9 +624,9 @@ proc toColumnMajor*[T](self: Matrix[T]): Matrix[T] = self.data[] = @[] var idx = 0 var col = 0 - while col < result.shape.cols: - result.data[].add(orig[idx]) - idx += result.shape.cols + while col < self.shape.cols: + self.data[].add(orig[idx]) + idx += self.shape.cols if idx > orig.high(): inc(col) idx = col @@ -626,6 +638,8 @@ proc toColumnMajor*[T](self: Matrix[T]): Matrix[T] = iterator items*[T](self: Matrix[T]): MatrixView[T] = for row in 0..