NNExperiments/src/nn/util/matrix.nim

664 lines
22 KiB
Nim
Raw Normal View History

# Copyright 2022 Mattia Giambirtone & All Contributors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from std/strformat import `&`
from std/sequtils import zip
from std/strutils import join
type
MatrixOrder* = enum
RowMajor, ColumnMajor
Matrix*[T] = ref object
## A matrix object
data: ref seq[T] # Nim seqs are value types, so this is needed to avoid copies on assignment
shape*: tuple[rows, cols: int]
order*: MatrixOrder
MatrixView*[T] = ref object
## A zero-copy view into a matrix
m: Matrix[T] # The matrix that owns the row we point to
row: int # The row in the matrix to which we point to
# Even though a MatrixView has no
# rows, we keep the same field for
# consistency with the Matrix type
shape*: tuple[rows, cols: int]
proc getSize(shape: tuple[rows, cols: int]): int =
## Helper to get the size required for the
## underlying data array for a matrix of the
## given shape
if shape.cols == 0:
return shape.rows
return shape.cols * shape.rows
proc newMatrix*[T](data: seq[T]): Matrix[T] =
## Initializes a new matrix from a given
## 1D sequence
new(result)
new(result.data)
result.data[] = data
result.shape = (rows: 0, cols: len(data))
result.order = RowMajor
proc newMatrix*[T](data: seq[seq[T]], order: MatrixOrder = RowMajor): Matrix[T] {.raises: [ValueError].} =
## Initializes a new matrix from a given
## 2D sequence
new(result)
new(result.data)
var temp: seq[T] = @[]
result.shape = (rows: len(data), cols: len(data[0]))
result.data[] = newSeqOfCap[T](result.shape.getSize())
result.order = order
for sub in data:
if len(sub) != result.shape.cols:
raise newException(ValueError, "invalid shape of input data (mismatching column)")
for j in sub:
temp.add(j)
if order == RowMajor:
for j in temp:
result.data[].add(j)
else:
var idx = 0
var col = 0
while col < result.shape.cols:
result.data[].add(temp[idx])
idx += result.shape.cols
if idx > temp.high():
inc(col)
idx = col
proc zeros*[T](shape: tuple[rows, cols: int], order: MatrixOrder = RowMajor): Matrix[T] =
## Creates a new matrix of the given shape
## filled with zeros
new(result)
new(result.data)
result.data[] = @[]
# Simple one-line helpers and forward declarations
func len*[T](self: Matrix[T]): int {.inline.} = self.data[].len()
func len*[T](self: MatrixView[T]): int {.inline.} = self.shape.cols
func raw*[T](self: Matrix[T]): ref seq[T] {.inline.} = self.data
proc dup*[T](self: Matrix[T]): Matrix[T]
proc copy*[T](self: Matrix[T]): Matrix[T]
func getIndex[T](self: Matrix[T], row, col: int): int =
## Converts an (x, y) coordinate pair into a single
## integer index into our array, taking the internal
## array order into account
if self.order == RowMajor:
result = row * self.shape.cols + col
else:
result = col * self.shape.rows + row
proc `[]`*[T](self: Matrix[T], row, col: int): T {.raises: [IndexDefect, ValueError].} =
## Gets the element the given row and
## column into the matrix
var idx = self.getIndex(row, col)
when not defined(release):
if idx notin 0..<self.data[].len() + 1:
raise newException(IndexDefect, &"index ({row}, {col}) is out of range for matrix of shape ({self.shape.rows}, {self.shape.cols})")
return self.data[idx]
proc `[]`*[T](self: Matrix[T], row: int): MatrixView[T] {.raises: [IndexDefect, ValueError].} =
## Gets a single row in the matrix. No data copies
## occur and a view into the original matrix is
## returned
var idx = self.getIndex(row, 0)
when not defined(release):
if idx notin 0..<self.data[].len() + 1:
raise newException(IndexDefect, &"row {row} is out of range for matrix of shape ({self.shape.rows}, {self.shape.cols})")
new(result)
result.m = self
result.row = row
result.shape = (0, self.shape.cols)
proc `[]`*[T](self: MatrixView[T], col: int): T {.raises: [IndexDefect, ValueError].} =
## Gets the element the given row into
## the matrix view
var idx = self.m.getIndex(self.row, col)
when not defined(release):
if idx notin 0..<self.m.data[].len() + 1:
raise newException(IndexDefect, &"column {col} is out of range for view of shape ({self.shape.rows}, {self.shape.cols})")
result = self.m.data[idx]
proc `[]=`*[T](self: Matrix[T], row, col: int, val: T) {.raises: [IndexDefect, ValueError].} =
## Sets the element at the given row and
## column into the matrix to value val
var idx = self.getIndex(row, col)
when not defined(release):
if idx notin 0..<self.data[].len() + 1:
raise newException(IndexDefect, &"index ({row}, {col}) is out of range for matrix of shape ({self.shape.rows}, {self.shape.cols})")
self.data[idx] = val
proc `[]=`*[T](self: MatrixView[T], col: int, val: T) {.raises: [IndexDefect, ValueError].} =
## Sets the element at the given row
## into the matrix view to the value
## val
var idx = self.m.getIndex(0, col)
when not defined(release):
if idx notin 0..<self.m.data[].len() + 1:
raise newException(IndexDefect, &"column {col} is out of range for view of shape ({self.shape.rows}, {self.shape.cols})")
self.m.data[idx] = val
proc `$`*[T](self: Matrix[T]): string =
## Stringifies the matrix
var col: int
if self.shape.cols == 0:
col = self.len()
else:
col = self.shape.cols
if self.shape.rows == 0:
return &"""[{self.data[].join(", ")}]"""
if self.shape.rows > 1:
result &= "["
for row in 0..<self.shape.rows:
result &= "["
for column in 0..<col:
result &= $self[row, column]
if column < col - 1:
result &= ", "
if row < self.shape.rows - 1:
result &= "], \n"
result &= " "
else:
result &= "]"
if self.shape.rows > 1:
result &= "]"
proc `$`*[T](self: MatrixView[T]): string =
## Stringifies the matrix view
result = "["
var i = 0
while i < self.shape.cols:
result &= $self[i]
if i < self.shape.cols - 1:
result &= ", "
inc(i)
result &= "]"
# Shape management
proc reshape*[T](self: Matrix[T], shape: tuple[rows, cols: int]): Matrix[T] {.raises: [ValueError].} =
## Reshapes the given matrix. No data copies occur
when not defined(release):
if shape.getSize() != self.data[].len():
raise newException(ValueError, &"shape ({shape.rows}, {shape.cols}) is invalid for matrix of length {self.len()}")
result = self.dup()
if shape.rows > 1:
self.shape = shape
proc reshape*[T](self: Matrix[T], rows, cols: int): Matrix[T] {.raises: [ValueError].} =
## Reshapes the given matrix. No data copies occur
result = self.reshape((rows, cols))
proc transpose*[T](self: Matrix[T]): Matrix[T] =
## Transposes rows and columns in the given
## matrix. No data copies occur
result = self.dup()
result.data = self.data
discard result.reshape(self.shape.cols, self.shape.rows)
if result.shape.rows > 0:
result.order = if result.order == RowMajor: ColumnMajor else: RowMajor
proc flatten*[T](self: Matrix[T]): Matrix[T] =
## Flattens the matrix into a vector. No
## data copies occur
result = self.dup()
result.data = self.data
result = result.reshape(0, len(self))
# Helpers for fast applying of operations along an axis
proc apply*[T](self: Matrix[T], op: proc (a, b: T): T, b: T, copy: bool = false, axis: int): Matrix[T] =
## Applies a binary operator to every
## element in the given axis of the
## given matrix (0 = rows, 1 = columns,
## -1 = both). No copies occur unless
## copy equals true
result = self
if copy:
result = self.copy()
case axis:
of 0:
# Stores the indeces of the values
# we'll delete after we're done. This
# is because applying along
var indeces: seq[int] = @[]
of 1:
discard
of -1:
for i, row in self:
for j, item in row:
self[i, j] = op(j, b)
else:
raise newException(ValueError, &"axis {axis} is invalid for matrix")
proc apply*[T](self: Matrix[T], op: proc (a: T): T, copy: bool = false, axis: int): Matrix[T] =
## Applies a unary operator to every
## element in the given axis of the
## given matrix (0 = rows, 1 = columns,
## -1 = both). No copies occur unless
## copy equals true
result = self
if copy:
result = self.copy()
for i, row in self:
for j, item in row:
self[i, j] = op(j)
proc apply*[T](self: MatrixView[T], op: proc (a, b: T): T, b: T, copy: bool = false, axis: int): MatrixView[T] =
## Applies a binary operator to every
## element in the given axis of the
## given matrix view (0 = rows, 1 = columns,
## -1 = both). No copies occur unless
## copy equals true
result = self
if copy:
result = self.copy()
for i, j in self:
self[i] = op(j, b)
proc apply*[T](self: MatrixView[T], op: proc (a: T): T, copy: bool = false, axis: int): MatrixView[T] =
## Applies a unary operator to every
## element in the given axis of the
## given matrix view (0 = rows, 1 = columns,
## -1 = both). No copies occur unless
## copy equals true
result = self
if copy:
result = self.copy()
for i, j in self:
self[i] = op(j)
# Operations along an axis
proc sum*[T](self: Matrix[T], axis: int, copy: bool = true): Matrix[T] =
## Performs the sum of all the elements
## on a given axis in-place (unless copy
## equals true). The output matrix is
## returned
var self = self
if copy:
self = self.copy()
result = self
var indeces: seq[int] = @[]
case axis:
of 1:
for r in 0..<self.shape.rows:
for c in 1..<self.shape.cols:
self[r, 0] = self[r, 0] + self[r, c]
indeces.add(self.getIndex(r, c))
of 0:
for r in 0..<self.shape.rows - 1:
for c in 0..self.shape.cols - 1:
self[r, c] = self[r, c] + self[r + 1, c]
indeces.add(self.getIndex(r + 1, c))
else:
when not defined(release):
raise newException(ValueError, &"axis {axis} is invalid for matrix")
else:
discard
self.shape.cols = self.len()
self.shape.rows = 0
for i, index in indeces:
self.data[].delete(index - i)
proc sum*[T](self: Matrix[T]): T =
## Returns the sum of all elements
## in the matrix
for e in self.data[]:
result += e
proc sum*[T](self: MatrixView[T]): T =
## Returns the sum of all elements
## in the matrix view
var i = 0
while i < self.shape.cols:
result += self[i]
inc(i)
proc sub*[T](self: Matrix[T], axis: int, copy: bool = true): Matrix[T] =
var self = self
if copy:
self = self.copy()
result = self
var indeces: seq[int] = @[]
case axis:
of 1:
for r in 0..<self.shape.rows:
for c in 1..<self.shape.cols:
self[r, 0] = self[r, 0] - self[r, c]
indeces.add(self.getIndex(r, c))
of 0:
for r in 0..<self.shape.rows - 1:
for c in 0..self.shape.cols - 1:
self[r, c] = self[r, c] - self[r + 1, c]
indeces.add(self.getIndex(r + 1, c))
else:
when not defined(release):
raise newException(ValueError, &"axis {axis} is invalid for matrix")
else:
discard
self.shape.cols = 0
self.shape.rows = 1
self.order = RowMajor
for i, index in indeces:
self.data[].delete(index - i)
proc sub*[T](self: Matrix[T]): T =
for e in self.data[]:
result -= e
proc copy*[T](self: Matrix[T]): Matrix[T] =
## Creates a new copy of the given matrix
## (copies the underlying data!)
new(result)
new(result.data)
result.data[] = self.data[]
result.shape = self.shape
result.order = self.order
proc dup*[T](self: Matrix[T]): Matrix[T] =
## Creates a new shallow copy of the given
## matrix, without copying the underlying
## data
new(result)
result.data = self.data
result.shape = self.shape
result.order = self.order
proc copy*[T](self: MatrixView[T]): Matrix[T] =
## Creates a new copy of the given matrix
## view. Only the data of the chosen row is
## copied
new(result)
new(result.data)
for e in self:
result.data[].add(e)
result.shape = self.shape
result.m = self.m
proc dup*[T](self: MatrixView[T]): MatrixView[T] =
## Creates a new shallow copy of the given
## matrix view, without copying the underlying
## data
new(result)
result.m = self.m
result.shape = self.shape
# matrix/scalar operations
# Wrappers because builtins are not
# procvars
proc add*[T](a, b: T): T = a + b
proc sub*[T](a, b: T): T = a - b
proc mul*[T](a, b: T): T = a * b
proc divide*[T](a, b: T): T = a / b
proc neg*[T](a: T): T = -a
# Warning: These *all* perform copies of the underlying matrix!
template `+`*[T](a: Matrix[T], b: T): Matrix[T] = a.copy().apply(add, b)
template `+`*[T](a: T, b: Matrix[T]): Matrix[T] = b.copy().apply(add, a)
template `-`*[T](a: Matrix[T], b: T): Matrix[T] = a.copy().apply(sub, b)
template `-`*[T](a: T, b: Matrix[T]): Matrix[T] = b.copy().apply(sub, a)
template `-`*[T](a: Matrix[T]): Matrix[T] = a.copy().apply(neg, a)
template `*`*[T](a: Matrix[T], b: T): Matrix[T] = a.copy().apply(mul, b)
template `*`*[T](a: T, b: Matrix[T]): Matrix[T] = b.copy().apply(mul, a)
template `/`*[T](a: Matrix[T], b: T): Matrix[T] = a.copy().apply(divide, b)
template `/`*[T](a: T, b: Matrix[T]): Matrix[T] = b.copy().apply(divide, a)
# matrix/scalar comparisons
proc `==`*[T](a: Matrix[T], b: T): Matrix[bool] =
new(result)
new(result.data)
result.shape = a.shape
result.data[] = newSeqOfCap[bool](result.shape.rows * result.shape.cols)
for e in a.data[]:
result.data[].add(e == b)
# matrix/matrix operations. They produce a new matrix with the
# result of the operation
proc `+`*[T](a, b: Matrix[T]): Matrix[T] {.raises: [ValueError].} =
when not defined(release):
if a.shape != b.shape:
raise newException(ValueError, "can't add matrices of different shapes")
if a.order != b.order:
raise newException(ValueError, "can't add matrices with different ordering")
new(result)
new(result.data)
result.data[] = newSeqOfCap[T](result.shape.rows * result.shape.cols)
result.shape = a.shape
result.order = RowMajor
for (e, k) in zip(a.data[], b.data[]):
result.data[].add(e + k)
proc `*`*[T](a: MatrixView[T], b: Matrix[T]): Matrix[T] {.raises: [ValueError].} =
when not defined(release):
echo a
echo b
if a.shape.cols != b.shape.rows:
raise newException(ValueError, &"incompatible argument shapes for multiplication")
if a.m.order != b.order:
raise newException(ValueError, "can't multiply matrices with different ordering")
new(result)
new(result.data)
result.data[] = newSeqOfCap[T](result.shape.rows * result.shape.cols)
for i in 0..<a.shape.cols:
result.data[].add(a[i] * b[0, i])
result.shape = a.shape
result.order = RowMajor
proc `*`*[T](a, b: Matrix[T]): Matrix[T] {.raises: [ValueError].} =
when not defined(release):
if a.shape.cols != b.shape.rows:
raise newException(ValueError, &"incompatible argument shapes for multiplication")
if a.order != b.order:
raise newException(ValueError, "can't multiply matrices with different ordering")
new(result)
new(result.data)
result.shape = (a.shape.rows, b.shape.cols)
result.order = RowMajor
result.data[] = newSeqOfCap[T](result.shape.rows * result.shape.cols)
for (e, k) in zip(a.data[], b.data[]):
result.data[].add(e * k)
# Comparison operators. They produce a new matrix with boolean values
proc `==`*[T](a, b: Matrix[T]): Matrix[bool] {.raises: [ValueError].} =
when not defined(release):
if a.shape != b.shape:
raise newException(ValueError, "can't compare matrices of different shapes")
new(result)
new(result.data)
result.shape = a.shape
result.order = RowMajor
result.data[] = newSeqOfCap[bool](result.shape.rows * result.shape.cols)
for r in 0..<a.shape.rows:
for c in 0..<a.shape.cols:
result.data[].add(a[r, c] == b[r, c])
proc `>`*[T](a, b: Matrix[T]): Matrix[bool] {.raises: [ValueError].} =
when not defined(release):
if a.shape != b.shape:
raise newException(ValueError, "can't compare matrices of different shapes")
new(result)
new(result.data)
result.shape = a.shape
result.order = RowMajor
result.data[] = newSeqOfCap[bool](result.shape.rows * result.shape.cols)
for r in 0..<a.shape.rows:
for c in 0..<a.shape.cols:
result.data[].add(a[r, c] > b[r, c])
proc `>=`*[T](a, b: Matrix[T]): Matrix[bool] {.raises: [ValueError].} =
when not defined(release):
if a.shape != b.shape:
raise newException(ValueError, "can't compare matrices of different shapes")
new(result)
new(result.data)
result.shape = a.shape
result.order = RowMajor
result.data[] = newSeqOfCap[bool](result.shape.rows * result.shape.cols)
for r in 0..<a.shape.rows:
for c in 0..<a.shape.cols:
result.data[].add(a[r, c] >= b[r, c])
proc `<=`*[T](a, b: Matrix[T]): Matrix[bool] {.raises: [ValueError].} =
when not defined(release):
if a.shape != b.shape:
raise newException(ValueError, "can't compare matrices of different shapes")
new(result)
new(result.data)
result.shape = a.shape
result.order = RowMajor
result.data[] = newSeqOfCap[bool](result.shape.rows * result.shape.cols)
for r in 0..<a.shape.rows:
for c in 0..<a.shape.cols:
result.data[].add(a[r, c] <= b[r, c])
proc all*(a: Matrix[bool]): bool =
# Helper for boolean comparisons
for e in a.data[]:
if not e:
return false
return true
# Specular definitions of commutative operators
proc `<`*[T](a, b: Matrix[T]): Matrix[bool] {.raises: [ValueError].} = b > a
proc `!=`*[T](a, b: Matrix[T]): Matrix[bool] {.raises: [ValueError].} = not a == b
proc `*`*[T](a: Matrix[T], b: MatrixView[T]): Matrix[T] {.raises: [ValueError].} = b * a
proc `==`*[T](a: T, b: Matrix[T]): Matrix[bool] = b == a
proc `!=`*[T](a: Matrix[T], b: T): Matrix[bool] = not a == b
proc `!=`*[T](a: T, b: Matrix[T]): Matrix[bool] = not b == a
proc toRowMajor*[T](self: Matrix[T]): Matrix[T] =
## Converts a column-major matrix to a
## row-major one
if self.order == RowMajor:
return
self.order = RowMajor
let orig = self.data[]
self.data[] = @[]
var idx = 0
var col = 0
while col < result.shape.cols:
result.data[].add(orig[idx])
idx += result.shape.cols
if idx > orig.high():
inc(col)
idx = col
result = self
proc toColumnMajor*[T](self: Matrix[T]): Matrix[T] =
## Converts a row-major matrix to a
## column-major one
if self.order == ColumnMajor:
return
self.order = ColumnMajor
let orig = self.data[]
self.data[] = @[]
var idx = 0
var col = 0
while col < result.shape.cols:
result.data[].add(orig[idx])
idx += result.shape.cols
if idx > orig.high():
inc(col)
idx = col
result = self
# Matrices and matrix views are iterable!
iterator items*[T](self: Matrix[T]): MatrixView[T] =
for row in 0..<self.shape.rows:
yield self[row]
iterator items*[T](self: MatrixView[T]): T =
for column in 0..<self.shape.cols:
yield self[column]
iterator pairs*[T](self: Matrix[T]): tuple[i: int, val: MatrixView[T]] =
var i = 0
for row in self:
yield (i, row)
iterator pairs*[T](self: MatrixView[T]): tuple[i: int, val: T] =
var i = 0
for col in self:
yield (i, col)
proc dot*[T](self, other: Matrix[T]): Matrix[T] =
## Computes the dot product of the two
## input matrices
when isMainModule:
# var m = newMatrix[int](@[@[1, 2, 3], @[4, 5, 6]])
# var k = m.transpose()
# assert all(m.transpose() == k), "transpose mismatch"
# assert k.sum() == m.sum(), "element sum mismatch"
# assert k.sub() == m.sub(), "element sub mismatch"
# assert all(k.sum(axis=1) == m.sum(axis=0)), "sum over axis mismatch"
# assert all(k.sum(axis=0) == m.sum(axis=1)), "sum over axis mismatch"
# assert all(k.sub(axis=1) == m.sub(axis=0)), "sub over axis mismatch"
# assert all(k.sub(axis=0) == m.sub(axis=1)), "sub over axis mismatch"
var y = newMatrix[int](@[1, 2, 3, 4])
echo y.sum(axis=0)