Added initial work on multilayer perceptron
This commit is contained in:
parent
6303b3ba94
commit
dc44b65e94
|
@ -0,0 +1,9 @@
|
|||
import nn/network
|
||||
import nn/util/activations
|
||||
import nn/util/losses
|
||||
|
||||
|
||||
var net = newNeuralNetwork(@[2, 3, 2], activationFunc=newActivation(sigmoid, proc (x, y: float): float = 0.0),
|
||||
lossFunc=newLoss(mse, mse), weightRange=(-1.0, +1.0), learnRate=0.05)
|
||||
var prediction = net.predict(newMatrix[float](@[2.7, 3.0]))
|
||||
echo prediction
|
|
@ -0,0 +1,86 @@
|
|||
# Copyright 2022 Mattia Giambirtone & All Contributors
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
import util/matrix
|
||||
import util/losses
|
||||
import util/activations
|
||||
|
||||
import std/strformat
|
||||
import std/random
|
||||
|
||||
|
||||
randomize()
|
||||
|
||||
type
|
||||
Layer* = ref object
|
||||
## A generic neural network
|
||||
## layer
|
||||
inputSize*: int # The number of inputs we process
|
||||
outputSize*: int # The number of outputs we produce
|
||||
weights*: Matrix[float] # The weights for each connection (2D)
|
||||
biases*: Matrix[float] # The biases for each neuron (1D)
|
||||
activation: Activation # The activation function along with its derivative
|
||||
loss: Loss # The cost function used in training
|
||||
gradients: tuple[weights, biases: Matrix[float]] # Gradient coefficients for weights and biases
|
||||
learnRate: float # The speed at which we perform gradient descent
|
||||
|
||||
|
||||
proc `$`*(self: Layer): string =
|
||||
## Returns a string representation
|
||||
## of the layer
|
||||
result = &"Layer(inputs={self.inputSize}, outputs={self.outputSize})"
|
||||
|
||||
|
||||
proc newLayer*(inputSize: int, outputSize: int, activation: Activation, loss: Loss, learnRate: float, weightRange: tuple[start, stop: float]): Layer =
|
||||
## Creates a new layer with inputSize input
|
||||
## parameters and outputSize outgoing outputs.
|
||||
## Weights are initialized with random values
|
||||
## in the chosen range
|
||||
new(result)
|
||||
result.inputSize = inputSize
|
||||
result.outputSize = outputSize
|
||||
var biases = newSeqOfCap[float](outputSize)
|
||||
var biasGradients = newSeqOfCap[float](outputSize)
|
||||
for _ in 0..<outputSize:
|
||||
biases.add(0.0)
|
||||
biasGradients.add(0.0)
|
||||
var weights = newSeqOfCap[seq[float]](inputSize * outputSize)
|
||||
var weightGradients = newSeqOfCap[seq[float]](inputSize * outputSize)
|
||||
for _ in 0..<outputSize:
|
||||
weights.add(@[])
|
||||
weightGradients.add(@[])
|
||||
for _ in 0..<inputSize:
|
||||
weights[^1].add(rand(weightRange.start..weightRange.stop))
|
||||
weightGradients[^1].add(0)
|
||||
result.biases = newMatrix[float](biases)
|
||||
result.weights = newMatrix[float](weights)
|
||||
result.activation = activation
|
||||
result.loss = loss
|
||||
result.gradients = (weights: newMatrix[float](weightGradients), biases: newMatrix[float](biasGradients))
|
||||
result.learnRate = learnRate
|
||||
|
||||
|
||||
proc compute*(self: Layer, data: Matrix[float]): Matrix[float] =
|
||||
## Computes the output of a given layer with
|
||||
## the given input data and returns it as a
|
||||
## one-dimensional array
|
||||
var sequence = newSeqOfCap[float](self.outputSize)
|
||||
for i, weights in self.weights:
|
||||
# This looks fancy, but it's just abstracting some of the
|
||||
# complexity away to the matrix library and is equivalent
|
||||
# to the nested for-loop approach (although more idiomatic
|
||||
# and probably faster)
|
||||
sequence.add(self.activation.function((weights * data).sum() + self.biases[0, i]))
|
||||
result = newMatrix[float](sequence)
|
||||
|
||||
|
|
@ -0,0 +1,59 @@
|
|||
# Copyright 2022 Mattia Giambirtone & All Contributors
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import layer
|
||||
import util/matrix
|
||||
import util/activations
|
||||
import util/losses
|
||||
|
||||
export layer, matrix, losses, activations
|
||||
|
||||
|
||||
import std/sequtils
|
||||
import std/strformat
|
||||
|
||||
|
||||
type
|
||||
NeuralNetwork* = ref object
|
||||
## A generic neural network
|
||||
layers*: seq[Layer]
|
||||
|
||||
|
||||
proc newNeuralNetwork*(layers: seq[int], activationFunc: Activation, lossFunc: Loss, learnRate: float,
|
||||
weightRange: tuple[start, stop: float]): NeuralNetwork =
|
||||
## Initializes a new neural network
|
||||
## with the given layer layout
|
||||
new(result)
|
||||
result.layers = newSeqOfCap[Layer](len(layers))
|
||||
for i in 0..<layers.high():
|
||||
result.layers.add(newLayer(layers[i], layers[i + 1], activationFunc, lossFunc, learnRate, weightRange))
|
||||
|
||||
|
||||
proc predict*(self: NeuralNetwork, data: Matrix[float]): Matrix[float] =
|
||||
## Performs a prediction and returns a 1D array
|
||||
## with the output
|
||||
when not defined(release):
|
||||
if data.shape.rows > 1:
|
||||
raise newException(ValueError, "input data must be one-dimensional")
|
||||
if data.shape.cols != self.layers[0].inputSize:
|
||||
raise newException(ValueError, &"input is of the wrong shape (expecting (1, {self.layers[0].inputSize}), got ({data.shape.rows}, {data.shape.cols}) instead)")
|
||||
result = data
|
||||
for layer in self.layers:
|
||||
result = layer.compute(result)
|
||||
|
||||
|
||||
proc classify*(self: NeuralNetwork, data: Matrix[float]): int =
|
||||
## Performs a prediction and returns the label
|
||||
## with the highest likelyhood
|
||||
result = maxIndex(self.predict(data).raw[])
|
|
@ -0,0 +1,35 @@
|
|||
# Copyright 2022 Mattia Giambirtone & All Contributors
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
import std/math
|
||||
|
||||
|
||||
func step*(input: float): float = (if input < 0.0: 0.0 else: 1.0)
|
||||
func sigmoid*(input: float): float = 1 / (1 + exp(-input))
|
||||
func silu*(input: float): float = 1 / (1 + exp(-input))
|
||||
func relu*(input: float): float = max(0.0, input)
|
||||
func htan*(input: float): float =
|
||||
let temp = exp(2 * input)
|
||||
result = (temp - 1) / (temp + 1)
|
||||
|
||||
|
||||
type Activation* = ref object
|
||||
function*: proc (input: float): float
|
||||
derivative*: proc (x, y: float): float
|
||||
|
||||
|
||||
proc newActivation*(function: proc (input: float): float, derivative: proc (x, y: float): float): Activation =
|
||||
## Creates a new activation object
|
||||
new(result)
|
||||
result.function = function
|
||||
result.derivative = derivative
|
|
@ -0,0 +1,29 @@
|
|||
# Copyright 2022 Mattia Giambirtone & All Contributors
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
import std/math
|
||||
|
||||
# Mean squared error
|
||||
func mse*(x, y: float): float = pow(x - y, 2)
|
||||
|
||||
|
||||
type Loss* = ref object
|
||||
function*: proc (x, y: float): float
|
||||
derivative*: proc (x, y: float): float
|
||||
|
||||
|
||||
proc newLoss*(function: proc (x, y: float): float, derivative: proc (x, y: float): float): Loss =
|
||||
## Creates a new activation object
|
||||
new(result)
|
||||
result.function = function
|
||||
result.derivative = derivative
|
|
@ -0,0 +1,664 @@
|
|||
# Copyright 2022 Mattia Giambirtone & All Contributors
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from std/strformat import `&`
|
||||
from std/sequtils import zip
|
||||
from std/strutils import join
|
||||
|
||||
|
||||
type
|
||||
MatrixOrder* = enum
|
||||
RowMajor, ColumnMajor
|
||||
Matrix*[T] = ref object
|
||||
## A matrix object
|
||||
data: ref seq[T] # Nim seqs are value types, so this is needed to avoid copies on assignment
|
||||
shape*: tuple[rows, cols: int]
|
||||
order*: MatrixOrder
|
||||
MatrixView*[T] = ref object
|
||||
## A zero-copy view into a matrix
|
||||
m: Matrix[T] # The matrix that owns the row we point to
|
||||
row: int # The row in the matrix to which we point to
|
||||
# Even though a MatrixView has no
|
||||
# rows, we keep the same field for
|
||||
# consistency with the Matrix type
|
||||
shape*: tuple[rows, cols: int]
|
||||
|
||||
|
||||
proc getSize(shape: tuple[rows, cols: int]): int =
|
||||
## Helper to get the size required for the
|
||||
## underlying data array for a matrix of the
|
||||
## given shape
|
||||
if shape.cols == 0:
|
||||
return shape.rows
|
||||
return shape.cols * shape.rows
|
||||
|
||||
|
||||
proc newMatrix*[T](data: seq[T]): Matrix[T] =
|
||||
## Initializes a new matrix from a given
|
||||
## 1D sequence
|
||||
new(result)
|
||||
new(result.data)
|
||||
result.data[] = data
|
||||
result.shape = (rows: 0, cols: len(data))
|
||||
result.order = RowMajor
|
||||
|
||||
|
||||
proc newMatrix*[T](data: seq[seq[T]], order: MatrixOrder = RowMajor): Matrix[T] {.raises: [ValueError].} =
|
||||
## Initializes a new matrix from a given
|
||||
## 2D sequence
|
||||
new(result)
|
||||
new(result.data)
|
||||
var temp: seq[T] = @[]
|
||||
result.shape = (rows: len(data), cols: len(data[0]))
|
||||
result.data[] = newSeqOfCap[T](result.shape.getSize())
|
||||
result.order = order
|
||||
for sub in data:
|
||||
if len(sub) != result.shape.cols:
|
||||
raise newException(ValueError, "invalid shape of input data (mismatching column)")
|
||||
for j in sub:
|
||||
temp.add(j)
|
||||
if order == RowMajor:
|
||||
for j in temp:
|
||||
result.data[].add(j)
|
||||
else:
|
||||
var idx = 0
|
||||
var col = 0
|
||||
while col < result.shape.cols:
|
||||
result.data[].add(temp[idx])
|
||||
idx += result.shape.cols
|
||||
if idx > temp.high():
|
||||
inc(col)
|
||||
idx = col
|
||||
|
||||
|
||||
proc zeros*[T](shape: tuple[rows, cols: int], order: MatrixOrder = RowMajor): Matrix[T] =
|
||||
## Creates a new matrix of the given shape
|
||||
## filled with zeros
|
||||
new(result)
|
||||
new(result.data)
|
||||
result.data[] = @[]
|
||||
|
||||
|
||||
# Simple one-line helpers and forward declarations
|
||||
func len*[T](self: Matrix[T]): int {.inline.} = self.data[].len()
|
||||
func len*[T](self: MatrixView[T]): int {.inline.} = self.shape.cols
|
||||
func raw*[T](self: Matrix[T]): ref seq[T] {.inline.} = self.data
|
||||
proc dup*[T](self: Matrix[T]): Matrix[T]
|
||||
proc copy*[T](self: Matrix[T]): Matrix[T]
|
||||
|
||||
|
||||
func getIndex[T](self: Matrix[T], row, col: int): int =
|
||||
## Converts an (x, y) coordinate pair into a single
|
||||
## integer index into our array, taking the internal
|
||||
## array order into account
|
||||
if self.order == RowMajor:
|
||||
result = row * self.shape.cols + col
|
||||
else:
|
||||
result = col * self.shape.rows + row
|
||||
|
||||
|
||||
proc `[]`*[T](self: Matrix[T], row, col: int): T {.raises: [IndexDefect, ValueError].} =
|
||||
## Gets the element the given row and
|
||||
## column into the matrix
|
||||
var idx = self.getIndex(row, col)
|
||||
when not defined(release):
|
||||
if idx notin 0..<self.data[].len() + 1:
|
||||
raise newException(IndexDefect, &"index ({row}, {col}) is out of range for matrix of shape ({self.shape.rows}, {self.shape.cols})")
|
||||
return self.data[idx]
|
||||
|
||||
|
||||
proc `[]`*[T](self: Matrix[T], row: int): MatrixView[T] {.raises: [IndexDefect, ValueError].} =
|
||||
## Gets a single row in the matrix. No data copies
|
||||
## occur and a view into the original matrix is
|
||||
## returned
|
||||
var idx = self.getIndex(row, 0)
|
||||
when not defined(release):
|
||||
if idx notin 0..<self.data[].len() + 1:
|
||||
raise newException(IndexDefect, &"row {row} is out of range for matrix of shape ({self.shape.rows}, {self.shape.cols})")
|
||||
new(result)
|
||||
result.m = self
|
||||
result.row = row
|
||||
result.shape = (0, self.shape.cols)
|
||||
|
||||
|
||||
proc `[]`*[T](self: MatrixView[T], col: int): T {.raises: [IndexDefect, ValueError].} =
|
||||
## Gets the element the given row into
|
||||
## the matrix view
|
||||
var idx = self.m.getIndex(self.row, col)
|
||||
when not defined(release):
|
||||
if idx notin 0..<self.m.data[].len() + 1:
|
||||
raise newException(IndexDefect, &"column {col} is out of range for view of shape ({self.shape.rows}, {self.shape.cols})")
|
||||
result = self.m.data[idx]
|
||||
|
||||
|
||||
proc `[]=`*[T](self: Matrix[T], row, col: int, val: T) {.raises: [IndexDefect, ValueError].} =
|
||||
## Sets the element at the given row and
|
||||
## column into the matrix to value val
|
||||
var idx = self.getIndex(row, col)
|
||||
when not defined(release):
|
||||
if idx notin 0..<self.data[].len() + 1:
|
||||
raise newException(IndexDefect, &"index ({row}, {col}) is out of range for matrix of shape ({self.shape.rows}, {self.shape.cols})")
|
||||
self.data[idx] = val
|
||||
|
||||
|
||||
proc `[]=`*[T](self: MatrixView[T], col: int, val: T) {.raises: [IndexDefect, ValueError].} =
|
||||
## Sets the element at the given row
|
||||
## into the matrix view to the value
|
||||
## val
|
||||
var idx = self.m.getIndex(0, col)
|
||||
when not defined(release):
|
||||
if idx notin 0..<self.m.data[].len() + 1:
|
||||
raise newException(IndexDefect, &"column {col} is out of range for view of shape ({self.shape.rows}, {self.shape.cols})")
|
||||
self.m.data[idx] = val
|
||||
|
||||
|
||||
proc `$`*[T](self: Matrix[T]): string =
|
||||
## Stringifies the matrix
|
||||
var col: int
|
||||
if self.shape.cols == 0:
|
||||
col = self.len()
|
||||
else:
|
||||
col = self.shape.cols
|
||||
if self.shape.rows == 0:
|
||||
return &"""[{self.data[].join(", ")}]"""
|
||||
if self.shape.rows > 1:
|
||||
result &= "["
|
||||
for row in 0..<self.shape.rows:
|
||||
result &= "["
|
||||
for column in 0..<col:
|
||||
result &= $self[row, column]
|
||||
if column < col - 1:
|
||||
result &= ", "
|
||||
if row < self.shape.rows - 1:
|
||||
result &= "], \n"
|
||||
result &= " "
|
||||
else:
|
||||
result &= "]"
|
||||
if self.shape.rows > 1:
|
||||
result &= "]"
|
||||
|
||||
|
||||
proc `$`*[T](self: MatrixView[T]): string =
|
||||
## Stringifies the matrix view
|
||||
result = "["
|
||||
var i = 0
|
||||
while i < self.shape.cols:
|
||||
result &= $self[i]
|
||||
if i < self.shape.cols - 1:
|
||||
result &= ", "
|
||||
inc(i)
|
||||
result &= "]"
|
||||
|
||||
|
||||
# Shape management
|
||||
proc reshape*[T](self: Matrix[T], shape: tuple[rows, cols: int]): Matrix[T] {.raises: [ValueError].} =
|
||||
## Reshapes the given matrix. No data copies occur
|
||||
when not defined(release):
|
||||
if shape.getSize() != self.data[].len():
|
||||
raise newException(ValueError, &"shape ({shape.rows}, {shape.cols}) is invalid for matrix of length {self.len()}")
|
||||
result = self.dup()
|
||||
if shape.rows > 1:
|
||||
self.shape = shape
|
||||
|
||||
|
||||
proc reshape*[T](self: Matrix[T], rows, cols: int): Matrix[T] {.raises: [ValueError].} =
|
||||
## Reshapes the given matrix. No data copies occur
|
||||
result = self.reshape((rows, cols))
|
||||
|
||||
|
||||
proc transpose*[T](self: Matrix[T]): Matrix[T] =
|
||||
## Transposes rows and columns in the given
|
||||
## matrix. No data copies occur
|
||||
result = self.dup()
|
||||
result.data = self.data
|
||||
discard result.reshape(self.shape.cols, self.shape.rows)
|
||||
if result.shape.rows > 0:
|
||||
result.order = if result.order == RowMajor: ColumnMajor else: RowMajor
|
||||
|
||||
|
||||
proc flatten*[T](self: Matrix[T]): Matrix[T] =
|
||||
## Flattens the matrix into a vector. No
|
||||
## data copies occur
|
||||
result = self.dup()
|
||||
result.data = self.data
|
||||
result = result.reshape(0, len(self))
|
||||
|
||||
|
||||
# Helpers for fast applying of operations along an axis
|
||||
proc apply*[T](self: Matrix[T], op: proc (a, b: T): T, b: T, copy: bool = false, axis: int): Matrix[T] =
|
||||
## Applies a binary operator to every
|
||||
## element in the given axis of the
|
||||
## given matrix (0 = rows, 1 = columns,
|
||||
## -1 = both). No copies occur unless
|
||||
## copy equals true
|
||||
result = self
|
||||
if copy:
|
||||
result = self.copy()
|
||||
case axis:
|
||||
of 0:
|
||||
# Stores the indeces of the values
|
||||
# we'll delete after we're done. This
|
||||
# is because applying along
|
||||
var indeces: seq[int] = @[]
|
||||
of 1:
|
||||
discard
|
||||
of -1:
|
||||
for i, row in self:
|
||||
for j, item in row:
|
||||
self[i, j] = op(j, b)
|
||||
else:
|
||||
raise newException(ValueError, &"axis {axis} is invalid for matrix")
|
||||
|
||||
|
||||
proc apply*[T](self: Matrix[T], op: proc (a: T): T, copy: bool = false, axis: int): Matrix[T] =
|
||||
## Applies a unary operator to every
|
||||
## element in the given axis of the
|
||||
## given matrix (0 = rows, 1 = columns,
|
||||
## -1 = both). No copies occur unless
|
||||
## copy equals true
|
||||
result = self
|
||||
if copy:
|
||||
result = self.copy()
|
||||
for i, row in self:
|
||||
for j, item in row:
|
||||
self[i, j] = op(j)
|
||||
|
||||
|
||||
proc apply*[T](self: MatrixView[T], op: proc (a, b: T): T, b: T, copy: bool = false, axis: int): MatrixView[T] =
|
||||
## Applies a binary operator to every
|
||||
## element in the given axis of the
|
||||
## given matrix view (0 = rows, 1 = columns,
|
||||
## -1 = both). No copies occur unless
|
||||
## copy equals true
|
||||
result = self
|
||||
if copy:
|
||||
result = self.copy()
|
||||
for i, j in self:
|
||||
self[i] = op(j, b)
|
||||
|
||||
|
||||
proc apply*[T](self: MatrixView[T], op: proc (a: T): T, copy: bool = false, axis: int): MatrixView[T] =
|
||||
## Applies a unary operator to every
|
||||
## element in the given axis of the
|
||||
## given matrix view (0 = rows, 1 = columns,
|
||||
## -1 = both). No copies occur unless
|
||||
## copy equals true
|
||||
result = self
|
||||
if copy:
|
||||
result = self.copy()
|
||||
for i, j in self:
|
||||
self[i] = op(j)
|
||||
|
||||
|
||||
# Operations along an axis
|
||||
proc sum*[T](self: Matrix[T], axis: int, copy: bool = true): Matrix[T] =
|
||||
## Performs the sum of all the elements
|
||||
## on a given axis in-place (unless copy
|
||||
## equals true). The output matrix is
|
||||
## returned
|
||||
var self = self
|
||||
if copy:
|
||||
self = self.copy()
|
||||
result = self
|
||||
var indeces: seq[int] = @[]
|
||||
case axis:
|
||||
of 1:
|
||||
for r in 0..<self.shape.rows:
|
||||
for c in 1..<self.shape.cols:
|
||||
self[r, 0] = self[r, 0] + self[r, c]
|
||||
indeces.add(self.getIndex(r, c))
|
||||
of 0:
|
||||
for r in 0..<self.shape.rows - 1:
|
||||
for c in 0..self.shape.cols - 1:
|
||||
self[r, c] = self[r, c] + self[r + 1, c]
|
||||
indeces.add(self.getIndex(r + 1, c))
|
||||
else:
|
||||
when not defined(release):
|
||||
raise newException(ValueError, &"axis {axis} is invalid for matrix")
|
||||
else:
|
||||
discard
|
||||
self.shape.cols = self.len()
|
||||
self.shape.rows = 0
|
||||
for i, index in indeces:
|
||||
self.data[].delete(index - i)
|
||||
|
||||
|
||||
proc sum*[T](self: Matrix[T]): T =
|
||||
## Returns the sum of all elements
|
||||
## in the matrix
|
||||
for e in self.data[]:
|
||||
result += e
|
||||
|
||||
|
||||
proc sum*[T](self: MatrixView[T]): T =
|
||||
## Returns the sum of all elements
|
||||
## in the matrix view
|
||||
var i = 0
|
||||
while i < self.shape.cols:
|
||||
result += self[i]
|
||||
inc(i)
|
||||
|
||||
|
||||
proc sub*[T](self: Matrix[T], axis: int, copy: bool = true): Matrix[T] =
|
||||
var self = self
|
||||
if copy:
|
||||
self = self.copy()
|
||||
result = self
|
||||
var indeces: seq[int] = @[]
|
||||
case axis:
|
||||
of 1:
|
||||
for r in 0..<self.shape.rows:
|
||||
for c in 1..<self.shape.cols:
|
||||
self[r, 0] = self[r, 0] - self[r, c]
|
||||
indeces.add(self.getIndex(r, c))
|
||||
of 0:
|
||||
for r in 0..<self.shape.rows - 1:
|
||||
for c in 0..self.shape.cols - 1:
|
||||
self[r, c] = self[r, c] - self[r + 1, c]
|
||||
indeces.add(self.getIndex(r + 1, c))
|
||||
else:
|
||||
when not defined(release):
|
||||
raise newException(ValueError, &"axis {axis} is invalid for matrix")
|
||||
else:
|
||||
discard
|
||||
self.shape.cols = 0
|
||||
self.shape.rows = 1
|
||||
self.order = RowMajor
|
||||
for i, index in indeces:
|
||||
self.data[].delete(index - i)
|
||||
|
||||
|
||||
proc sub*[T](self: Matrix[T]): T =
|
||||
for e in self.data[]:
|
||||
result -= e
|
||||
|
||||
|
||||
proc copy*[T](self: Matrix[T]): Matrix[T] =
|
||||
## Creates a new copy of the given matrix
|
||||
## (copies the underlying data!)
|
||||
new(result)
|
||||
new(result.data)
|
||||
result.data[] = self.data[]
|
||||
result.shape = self.shape
|
||||
result.order = self.order
|
||||
|
||||
|
||||
proc dup*[T](self: Matrix[T]): Matrix[T] =
|
||||
## Creates a new shallow copy of the given
|
||||
## matrix, without copying the underlying
|
||||
## data
|
||||
new(result)
|
||||
result.data = self.data
|
||||
result.shape = self.shape
|
||||
result.order = self.order
|
||||
|
||||
|
||||
proc copy*[T](self: MatrixView[T]): Matrix[T] =
|
||||
## Creates a new copy of the given matrix
|
||||
## view. Only the data of the chosen row is
|
||||
## copied
|
||||
new(result)
|
||||
new(result.data)
|
||||
for e in self:
|
||||
result.data[].add(e)
|
||||
result.shape = self.shape
|
||||
result.m = self.m
|
||||
|
||||
|
||||
proc dup*[T](self: MatrixView[T]): MatrixView[T] =
|
||||
## Creates a new shallow copy of the given
|
||||
## matrix view, without copying the underlying
|
||||
## data
|
||||
new(result)
|
||||
result.m = self.m
|
||||
result.shape = self.shape
|
||||
|
||||
# matrix/scalar operations
|
||||
|
||||
# Wrappers because builtins are not
|
||||
# procvars
|
||||
proc add*[T](a, b: T): T = a + b
|
||||
proc sub*[T](a, b: T): T = a - b
|
||||
proc mul*[T](a, b: T): T = a * b
|
||||
proc divide*[T](a, b: T): T = a / b
|
||||
proc neg*[T](a: T): T = -a
|
||||
|
||||
# Warning: These *all* perform copies of the underlying matrix!
|
||||
template `+`*[T](a: Matrix[T], b: T): Matrix[T] = a.copy().apply(add, b)
|
||||
template `+`*[T](a: T, b: Matrix[T]): Matrix[T] = b.copy().apply(add, a)
|
||||
|
||||
template `-`*[T](a: Matrix[T], b: T): Matrix[T] = a.copy().apply(sub, b)
|
||||
template `-`*[T](a: T, b: Matrix[T]): Matrix[T] = b.copy().apply(sub, a)
|
||||
template `-`*[T](a: Matrix[T]): Matrix[T] = a.copy().apply(neg, a)
|
||||
|
||||
template `*`*[T](a: Matrix[T], b: T): Matrix[T] = a.copy().apply(mul, b)
|
||||
template `*`*[T](a: T, b: Matrix[T]): Matrix[T] = b.copy().apply(mul, a)
|
||||
|
||||
template `/`*[T](a: Matrix[T], b: T): Matrix[T] = a.copy().apply(divide, b)
|
||||
template `/`*[T](a: T, b: Matrix[T]): Matrix[T] = b.copy().apply(divide, a)
|
||||
|
||||
|
||||
# matrix/scalar comparisons
|
||||
proc `==`*[T](a: Matrix[T], b: T): Matrix[bool] =
|
||||
new(result)
|
||||
new(result.data)
|
||||
result.shape = a.shape
|
||||
result.data[] = newSeqOfCap[bool](result.shape.rows * result.shape.cols)
|
||||
for e in a.data[]:
|
||||
result.data[].add(e == b)
|
||||
|
||||
|
||||
# matrix/matrix operations. They produce a new matrix with the
|
||||
# result of the operation
|
||||
|
||||
proc `+`*[T](a, b: Matrix[T]): Matrix[T] {.raises: [ValueError].} =
|
||||
when not defined(release):
|
||||
if a.shape != b.shape:
|
||||
raise newException(ValueError, "can't add matrices of different shapes")
|
||||
if a.order != b.order:
|
||||
raise newException(ValueError, "can't add matrices with different ordering")
|
||||
new(result)
|
||||
new(result.data)
|
||||
result.data[] = newSeqOfCap[T](result.shape.rows * result.shape.cols)
|
||||
result.shape = a.shape
|
||||
result.order = RowMajor
|
||||
for (e, k) in zip(a.data[], b.data[]):
|
||||
result.data[].add(e + k)
|
||||
|
||||
|
||||
proc `*`*[T](a: MatrixView[T], b: Matrix[T]): Matrix[T] {.raises: [ValueError].} =
|
||||
when not defined(release):
|
||||
echo a
|
||||
echo b
|
||||
if a.shape.cols != b.shape.rows:
|
||||
raise newException(ValueError, &"incompatible argument shapes for multiplication")
|
||||
if a.m.order != b.order:
|
||||
raise newException(ValueError, "can't multiply matrices with different ordering")
|
||||
new(result)
|
||||
new(result.data)
|
||||
result.data[] = newSeqOfCap[T](result.shape.rows * result.shape.cols)
|
||||
for i in 0..<a.shape.cols:
|
||||
result.data[].add(a[i] * b[0, i])
|
||||
result.shape = a.shape
|
||||
result.order = RowMajor
|
||||
|
||||
|
||||
proc `*`*[T](a, b: Matrix[T]): Matrix[T] {.raises: [ValueError].} =
|
||||
when not defined(release):
|
||||
if a.shape.cols != b.shape.rows:
|
||||
raise newException(ValueError, &"incompatible argument shapes for multiplication")
|
||||
if a.order != b.order:
|
||||
raise newException(ValueError, "can't multiply matrices with different ordering")
|
||||
new(result)
|
||||
new(result.data)
|
||||
result.shape = (a.shape.rows, b.shape.cols)
|
||||
result.order = RowMajor
|
||||
result.data[] = newSeqOfCap[T](result.shape.rows * result.shape.cols)
|
||||
for (e, k) in zip(a.data[], b.data[]):
|
||||
result.data[].add(e * k)
|
||||
|
||||
# Comparison operators. They produce a new matrix with boolean values
|
||||
|
||||
proc `==`*[T](a, b: Matrix[T]): Matrix[bool] {.raises: [ValueError].} =
|
||||
when not defined(release):
|
||||
if a.shape != b.shape:
|
||||
raise newException(ValueError, "can't compare matrices of different shapes")
|
||||
new(result)
|
||||
new(result.data)
|
||||
result.shape = a.shape
|
||||
result.order = RowMajor
|
||||
result.data[] = newSeqOfCap[bool](result.shape.rows * result.shape.cols)
|
||||
for r in 0..<a.shape.rows:
|
||||
for c in 0..<a.shape.cols:
|
||||
result.data[].add(a[r, c] == b[r, c])
|
||||
|
||||
|
||||
proc `>`*[T](a, b: Matrix[T]): Matrix[bool] {.raises: [ValueError].} =
|
||||
when not defined(release):
|
||||
if a.shape != b.shape:
|
||||
raise newException(ValueError, "can't compare matrices of different shapes")
|
||||
new(result)
|
||||
new(result.data)
|
||||
result.shape = a.shape
|
||||
result.order = RowMajor
|
||||
result.data[] = newSeqOfCap[bool](result.shape.rows * result.shape.cols)
|
||||
for r in 0..<a.shape.rows:
|
||||
for c in 0..<a.shape.cols:
|
||||
result.data[].add(a[r, c] > b[r, c])
|
||||
|
||||
|
||||
proc `>=`*[T](a, b: Matrix[T]): Matrix[bool] {.raises: [ValueError].} =
|
||||
when not defined(release):
|
||||
if a.shape != b.shape:
|
||||
raise newException(ValueError, "can't compare matrices of different shapes")
|
||||
new(result)
|
||||
new(result.data)
|
||||
result.shape = a.shape
|
||||
result.order = RowMajor
|
||||
result.data[] = newSeqOfCap[bool](result.shape.rows * result.shape.cols)
|
||||
for r in 0..<a.shape.rows:
|
||||
for c in 0..<a.shape.cols:
|
||||
result.data[].add(a[r, c] >= b[r, c])
|
||||
|
||||
|
||||
proc `<=`*[T](a, b: Matrix[T]): Matrix[bool] {.raises: [ValueError].} =
|
||||
when not defined(release):
|
||||
if a.shape != b.shape:
|
||||
raise newException(ValueError, "can't compare matrices of different shapes")
|
||||
new(result)
|
||||
new(result.data)
|
||||
result.shape = a.shape
|
||||
result.order = RowMajor
|
||||
result.data[] = newSeqOfCap[bool](result.shape.rows * result.shape.cols)
|
||||
for r in 0..<a.shape.rows:
|
||||
for c in 0..<a.shape.cols:
|
||||
result.data[].add(a[r, c] <= b[r, c])
|
||||
|
||||
|
||||
proc all*(a: Matrix[bool]): bool =
|
||||
# Helper for boolean comparisons
|
||||
for e in a.data[]:
|
||||
if not e:
|
||||
return false
|
||||
return true
|
||||
|
||||
|
||||
# Specular definitions of commutative operators
|
||||
proc `<`*[T](a, b: Matrix[T]): Matrix[bool] {.raises: [ValueError].} = b > a
|
||||
proc `!=`*[T](a, b: Matrix[T]): Matrix[bool] {.raises: [ValueError].} = not a == b
|
||||
proc `*`*[T](a: Matrix[T], b: MatrixView[T]): Matrix[T] {.raises: [ValueError].} = b * a
|
||||
proc `==`*[T](a: T, b: Matrix[T]): Matrix[bool] = b == a
|
||||
proc `!=`*[T](a: Matrix[T], b: T): Matrix[bool] = not a == b
|
||||
proc `!=`*[T](a: T, b: Matrix[T]): Matrix[bool] = not b == a
|
||||
|
||||
|
||||
proc toRowMajor*[T](self: Matrix[T]): Matrix[T] =
|
||||
## Converts a column-major matrix to a
|
||||
## row-major one
|
||||
if self.order == RowMajor:
|
||||
return
|
||||
self.order = RowMajor
|
||||
let orig = self.data[]
|
||||
self.data[] = @[]
|
||||
var idx = 0
|
||||
var col = 0
|
||||
while col < result.shape.cols:
|
||||
result.data[].add(orig[idx])
|
||||
idx += result.shape.cols
|
||||
if idx > orig.high():
|
||||
inc(col)
|
||||
idx = col
|
||||
result = self
|
||||
|
||||
|
||||
proc toColumnMajor*[T](self: Matrix[T]): Matrix[T] =
|
||||
## Converts a row-major matrix to a
|
||||
## column-major one
|
||||
if self.order == ColumnMajor:
|
||||
return
|
||||
self.order = ColumnMajor
|
||||
let orig = self.data[]
|
||||
self.data[] = @[]
|
||||
var idx = 0
|
||||
var col = 0
|
||||
while col < result.shape.cols:
|
||||
result.data[].add(orig[idx])
|
||||
idx += result.shape.cols
|
||||
if idx > orig.high():
|
||||
inc(col)
|
||||
idx = col
|
||||
result = self
|
||||
|
||||
|
||||
# Matrices and matrix views are iterable!
|
||||
|
||||
iterator items*[T](self: Matrix[T]): MatrixView[T] =
|
||||
for row in 0..<self.shape.rows:
|
||||
yield self[row]
|
||||
|
||||
|
||||
iterator items*[T](self: MatrixView[T]): T =
|
||||
for column in 0..<self.shape.cols:
|
||||
yield self[column]
|
||||
|
||||
|
||||
iterator pairs*[T](self: Matrix[T]): tuple[i: int, val: MatrixView[T]] =
|
||||
var i = 0
|
||||
for row in self:
|
||||
yield (i, row)
|
||||
|
||||
|
||||
iterator pairs*[T](self: MatrixView[T]): tuple[i: int, val: T] =
|
||||
var i = 0
|
||||
for col in self:
|
||||
yield (i, col)
|
||||
|
||||
|
||||
proc dot*[T](self, other: Matrix[T]): Matrix[T] =
|
||||
## Computes the dot product of the two
|
||||
## input matrices
|
||||
|
||||
|
||||
when isMainModule:
|
||||
# var m = newMatrix[int](@[@[1, 2, 3], @[4, 5, 6]])
|
||||
# var k = m.transpose()
|
||||
# assert all(m.transpose() == k), "transpose mismatch"
|
||||
# assert k.sum() == m.sum(), "element sum mismatch"
|
||||
# assert k.sub() == m.sub(), "element sub mismatch"
|
||||
# assert all(k.sum(axis=1) == m.sum(axis=0)), "sum over axis mismatch"
|
||||
# assert all(k.sum(axis=0) == m.sum(axis=1)), "sum over axis mismatch"
|
||||
# assert all(k.sub(axis=1) == m.sub(axis=0)), "sub over axis mismatch"
|
||||
# assert all(k.sub(axis=0) == m.sub(axis=1)), "sub over axis mismatch"
|
||||
var y = newMatrix[int](@[1, 2, 3, 4])
|
||||
echo y.sum(axis=0)
|
Loading…
Reference in New Issue