From dc44b65e940cfb7da8d9301f2cf82204deeaa004 Mon Sep 17 00:00:00 2001 From: Mattia Giambirtone Date: Tue, 20 Dec 2022 12:08:24 +0100 Subject: [PATCH] Added initial work on multilayer perceptron --- src/main.nim | 9 + src/nn/layer.nim | 86 +++++ src/nn/network.nim | 59 ++++ src/nn/util/activations.nim | 35 ++ src/nn/util/losses.nim | 29 ++ src/nn/util/matrix.nim | 664 ++++++++++++++++++++++++++++++++++++ 6 files changed, 882 insertions(+) create mode 100644 src/main.nim create mode 100644 src/nn/layer.nim create mode 100644 src/nn/network.nim create mode 100644 src/nn/util/activations.nim create mode 100644 src/nn/util/losses.nim create mode 100644 src/nn/util/matrix.nim diff --git a/src/main.nim b/src/main.nim new file mode 100644 index 0000000..fb15852 --- /dev/null +++ b/src/main.nim @@ -0,0 +1,9 @@ +import nn/network +import nn/util/activations +import nn/util/losses + + +var net = newNeuralNetwork(@[2, 3, 2], activationFunc=newActivation(sigmoid, proc (x, y: float): float = 0.0), + lossFunc=newLoss(mse, mse), weightRange=(-1.0, +1.0), learnRate=0.05) +var prediction = net.predict(newMatrix[float](@[2.7, 3.0])) +echo prediction diff --git a/src/nn/layer.nim b/src/nn/layer.nim new file mode 100644 index 0000000..fa3244d --- /dev/null +++ b/src/nn/layer.nim @@ -0,0 +1,86 @@ +# Copyright 2022 Mattia Giambirtone & All Contributors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import util/matrix +import util/losses +import util/activations + +import std/strformat +import std/random + + +randomize() + +type + Layer* = ref object + ## A generic neural network + ## layer + inputSize*: int # The number of inputs we process + outputSize*: int # The number of outputs we produce + weights*: Matrix[float] # The weights for each connection (2D) + biases*: Matrix[float] # The biases for each neuron (1D) + activation: Activation # The activation function along with its derivative + loss: Loss # The cost function used in training + gradients: tuple[weights, biases: Matrix[float]] # Gradient coefficients for weights and biases + learnRate: float # The speed at which we perform gradient descent + + +proc `$`*(self: Layer): string = + ## Returns a string representation + ## of the layer + result = &"Layer(inputs={self.inputSize}, outputs={self.outputSize})" + + +proc newLayer*(inputSize: int, outputSize: int, activation: Activation, loss: Loss, learnRate: float, weightRange: tuple[start, stop: float]): Layer = + ## Creates a new layer with inputSize input + ## parameters and outputSize outgoing outputs. + ## Weights are initialized with random values + ## in the chosen range + new(result) + result.inputSize = inputSize + result.outputSize = outputSize + var biases = newSeqOfCap[float](outputSize) + var biasGradients = newSeqOfCap[float](outputSize) + for _ in 0.. 1: + raise newException(ValueError, "input data must be one-dimensional") + if data.shape.cols != self.layers[0].inputSize: + raise newException(ValueError, &"input is of the wrong shape (expecting (1, {self.layers[0].inputSize}), got ({data.shape.rows}, {data.shape.cols}) instead)") + result = data + for layer in self.layers: + result = layer.compute(result) + + +proc classify*(self: NeuralNetwork, data: Matrix[float]): int = + ## Performs a prediction and returns the label + ## with the highest likelyhood + result = maxIndex(self.predict(data).raw[]) diff --git a/src/nn/util/activations.nim b/src/nn/util/activations.nim new file mode 100644 index 0000000..902d871 --- /dev/null +++ b/src/nn/util/activations.nim @@ -0,0 +1,35 @@ +# Copyright 2022 Mattia Giambirtone & All Contributors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import std/math + + +func step*(input: float): float = (if input < 0.0: 0.0 else: 1.0) +func sigmoid*(input: float): float = 1 / (1 + exp(-input)) +func silu*(input: float): float = 1 / (1 + exp(-input)) +func relu*(input: float): float = max(0.0, input) +func htan*(input: float): float = + let temp = exp(2 * input) + result = (temp - 1) / (temp + 1) + + +type Activation* = ref object + function*: proc (input: float): float + derivative*: proc (x, y: float): float + + +proc newActivation*(function: proc (input: float): float, derivative: proc (x, y: float): float): Activation = + ## Creates a new activation object + new(result) + result.function = function + result.derivative = derivative \ No newline at end of file diff --git a/src/nn/util/losses.nim b/src/nn/util/losses.nim new file mode 100644 index 0000000..5244e08 --- /dev/null +++ b/src/nn/util/losses.nim @@ -0,0 +1,29 @@ +# Copyright 2022 Mattia Giambirtone & All Contributors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import std/math + +# Mean squared error +func mse*(x, y: float): float = pow(x - y, 2) + + +type Loss* = ref object + function*: proc (x, y: float): float + derivative*: proc (x, y: float): float + + +proc newLoss*(function: proc (x, y: float): float, derivative: proc (x, y: float): float): Loss = + ## Creates a new activation object + new(result) + result.function = function + result.derivative = derivative \ No newline at end of file diff --git a/src/nn/util/matrix.nim b/src/nn/util/matrix.nim new file mode 100644 index 0000000..31e39f7 --- /dev/null +++ b/src/nn/util/matrix.nim @@ -0,0 +1,664 @@ +# Copyright 2022 Mattia Giambirtone & All Contributors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from std/strformat import `&` +from std/sequtils import zip +from std/strutils import join + + +type + MatrixOrder* = enum + RowMajor, ColumnMajor + Matrix*[T] = ref object + ## A matrix object + data: ref seq[T] # Nim seqs are value types, so this is needed to avoid copies on assignment + shape*: tuple[rows, cols: int] + order*: MatrixOrder + MatrixView*[T] = ref object + ## A zero-copy view into a matrix + m: Matrix[T] # The matrix that owns the row we point to + row: int # The row in the matrix to which we point to + # Even though a MatrixView has no + # rows, we keep the same field for + # consistency with the Matrix type + shape*: tuple[rows, cols: int] + + +proc getSize(shape: tuple[rows, cols: int]): int = + ## Helper to get the size required for the + ## underlying data array for a matrix of the + ## given shape + if shape.cols == 0: + return shape.rows + return shape.cols * shape.rows + + +proc newMatrix*[T](data: seq[T]): Matrix[T] = + ## Initializes a new matrix from a given + ## 1D sequence + new(result) + new(result.data) + result.data[] = data + result.shape = (rows: 0, cols: len(data)) + result.order = RowMajor + + +proc newMatrix*[T](data: seq[seq[T]], order: MatrixOrder = RowMajor): Matrix[T] {.raises: [ValueError].} = + ## Initializes a new matrix from a given + ## 2D sequence + new(result) + new(result.data) + var temp: seq[T] = @[] + result.shape = (rows: len(data), cols: len(data[0])) + result.data[] = newSeqOfCap[T](result.shape.getSize()) + result.order = order + for sub in data: + if len(sub) != result.shape.cols: + raise newException(ValueError, "invalid shape of input data (mismatching column)") + for j in sub: + temp.add(j) + if order == RowMajor: + for j in temp: + result.data[].add(j) + else: + var idx = 0 + var col = 0 + while col < result.shape.cols: + result.data[].add(temp[idx]) + idx += result.shape.cols + if idx > temp.high(): + inc(col) + idx = col + + +proc zeros*[T](shape: tuple[rows, cols: int], order: MatrixOrder = RowMajor): Matrix[T] = + ## Creates a new matrix of the given shape + ## filled with zeros + new(result) + new(result.data) + result.data[] = @[] + + +# Simple one-line helpers and forward declarations +func len*[T](self: Matrix[T]): int {.inline.} = self.data[].len() +func len*[T](self: MatrixView[T]): int {.inline.} = self.shape.cols +func raw*[T](self: Matrix[T]): ref seq[T] {.inline.} = self.data +proc dup*[T](self: Matrix[T]): Matrix[T] +proc copy*[T](self: Matrix[T]): Matrix[T] + + +func getIndex[T](self: Matrix[T], row, col: int): int = + ## Converts an (x, y) coordinate pair into a single + ## integer index into our array, taking the internal + ## array order into account + if self.order == RowMajor: + result = row * self.shape.cols + col + else: + result = col * self.shape.rows + row + + +proc `[]`*[T](self: Matrix[T], row, col: int): T {.raises: [IndexDefect, ValueError].} = + ## Gets the element the given row and + ## column into the matrix + var idx = self.getIndex(row, col) + when not defined(release): + if idx notin 0.. 1: + result &= "[" + for row in 0.. 1: + result &= "]" + + +proc `$`*[T](self: MatrixView[T]): string = + ## Stringifies the matrix view + result = "[" + var i = 0 + while i < self.shape.cols: + result &= $self[i] + if i < self.shape.cols - 1: + result &= ", " + inc(i) + result &= "]" + + +# Shape management +proc reshape*[T](self: Matrix[T], shape: tuple[rows, cols: int]): Matrix[T] {.raises: [ValueError].} = + ## Reshapes the given matrix. No data copies occur + when not defined(release): + if shape.getSize() != self.data[].len(): + raise newException(ValueError, &"shape ({shape.rows}, {shape.cols}) is invalid for matrix of length {self.len()}") + result = self.dup() + if shape.rows > 1: + self.shape = shape + + +proc reshape*[T](self: Matrix[T], rows, cols: int): Matrix[T] {.raises: [ValueError].} = + ## Reshapes the given matrix. No data copies occur + result = self.reshape((rows, cols)) + + +proc transpose*[T](self: Matrix[T]): Matrix[T] = + ## Transposes rows and columns in the given + ## matrix. No data copies occur + result = self.dup() + result.data = self.data + discard result.reshape(self.shape.cols, self.shape.rows) + if result.shape.rows > 0: + result.order = if result.order == RowMajor: ColumnMajor else: RowMajor + + +proc flatten*[T](self: Matrix[T]): Matrix[T] = + ## Flattens the matrix into a vector. No + ## data copies occur + result = self.dup() + result.data = self.data + result = result.reshape(0, len(self)) + + +# Helpers for fast applying of operations along an axis +proc apply*[T](self: Matrix[T], op: proc (a, b: T): T, b: T, copy: bool = false, axis: int): Matrix[T] = + ## Applies a binary operator to every + ## element in the given axis of the + ## given matrix (0 = rows, 1 = columns, + ## -1 = both). No copies occur unless + ## copy equals true + result = self + if copy: + result = self.copy() + case axis: + of 0: + # Stores the indeces of the values + # we'll delete after we're done. This + # is because applying along + var indeces: seq[int] = @[] + of 1: + discard + of -1: + for i, row in self: + for j, item in row: + self[i, j] = op(j, b) + else: + raise newException(ValueError, &"axis {axis} is invalid for matrix") + + +proc apply*[T](self: Matrix[T], op: proc (a: T): T, copy: bool = false, axis: int): Matrix[T] = + ## Applies a unary operator to every + ## element in the given axis of the + ## given matrix (0 = rows, 1 = columns, + ## -1 = both). No copies occur unless + ## copy equals true + result = self + if copy: + result = self.copy() + for i, row in self: + for j, item in row: + self[i, j] = op(j) + + +proc apply*[T](self: MatrixView[T], op: proc (a, b: T): T, b: T, copy: bool = false, axis: int): MatrixView[T] = + ## Applies a binary operator to every + ## element in the given axis of the + ## given matrix view (0 = rows, 1 = columns, + ## -1 = both). No copies occur unless + ## copy equals true + result = self + if copy: + result = self.copy() + for i, j in self: + self[i] = op(j, b) + + +proc apply*[T](self: MatrixView[T], op: proc (a: T): T, copy: bool = false, axis: int): MatrixView[T] = + ## Applies a unary operator to every + ## element in the given axis of the + ## given matrix view (0 = rows, 1 = columns, + ## -1 = both). No copies occur unless + ## copy equals true + result = self + if copy: + result = self.copy() + for i, j in self: + self[i] = op(j) + + +# Operations along an axis +proc sum*[T](self: Matrix[T], axis: int, copy: bool = true): Matrix[T] = + ## Performs the sum of all the elements + ## on a given axis in-place (unless copy + ## equals true). The output matrix is + ## returned + var self = self + if copy: + self = self.copy() + result = self + var indeces: seq[int] = @[] + case axis: + of 1: + for r in 0..`*[T](a, b: Matrix[T]): Matrix[bool] {.raises: [ValueError].} = + when not defined(release): + if a.shape != b.shape: + raise newException(ValueError, "can't compare matrices of different shapes") + new(result) + new(result.data) + result.shape = a.shape + result.order = RowMajor + result.data[] = newSeqOfCap[bool](result.shape.rows * result.shape.cols) + for r in 0.. b[r, c]) + + +proc `>=`*[T](a, b: Matrix[T]): Matrix[bool] {.raises: [ValueError].} = + when not defined(release): + if a.shape != b.shape: + raise newException(ValueError, "can't compare matrices of different shapes") + new(result) + new(result.data) + result.shape = a.shape + result.order = RowMajor + result.data[] = newSeqOfCap[bool](result.shape.rows * result.shape.cols) + for r in 0..= b[r, c]) + + +proc `<=`*[T](a, b: Matrix[T]): Matrix[bool] {.raises: [ValueError].} = + when not defined(release): + if a.shape != b.shape: + raise newException(ValueError, "can't compare matrices of different shapes") + new(result) + new(result.data) + result.shape = a.shape + result.order = RowMajor + result.data[] = newSeqOfCap[bool](result.shape.rows * result.shape.cols) + for r in 0.. a +proc `!=`*[T](a, b: Matrix[T]): Matrix[bool] {.raises: [ValueError].} = not a == b +proc `*`*[T](a: Matrix[T], b: MatrixView[T]): Matrix[T] {.raises: [ValueError].} = b * a +proc `==`*[T](a: T, b: Matrix[T]): Matrix[bool] = b == a +proc `!=`*[T](a: Matrix[T], b: T): Matrix[bool] = not a == b +proc `!=`*[T](a: T, b: Matrix[T]): Matrix[bool] = not b == a + + +proc toRowMajor*[T](self: Matrix[T]): Matrix[T] = + ## Converts a column-major matrix to a + ## row-major one + if self.order == RowMajor: + return + self.order = RowMajor + let orig = self.data[] + self.data[] = @[] + var idx = 0 + var col = 0 + while col < result.shape.cols: + result.data[].add(orig[idx]) + idx += result.shape.cols + if idx > orig.high(): + inc(col) + idx = col + result = self + + +proc toColumnMajor*[T](self: Matrix[T]): Matrix[T] = + ## Converts a row-major matrix to a + ## column-major one + if self.order == ColumnMajor: + return + self.order = ColumnMajor + let orig = self.data[] + self.data[] = @[] + var idx = 0 + var col = 0 + while col < result.shape.cols: + result.data[].add(orig[idx]) + idx += result.shape.cols + if idx > orig.high(): + inc(col) + idx = col + result = self + + +# Matrices and matrix views are iterable! + +iterator items*[T](self: Matrix[T]): MatrixView[T] = + for row in 0..