# Copyright 2023 Mattia Giambirtone & All Contributors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from std/strformat import `&`
import std/random


randomize()


type
    MatrixOrder* = enum
        RowMajor, ColumnMajor
    Matrix*[T] = ref object
        ## A matrix object 
        data: ref seq[T]     # Nim seqs are value types, so this is needed to avoid copies on assignment
        shape*: tuple[rows, cols: int]
        order*: MatrixOrder
    MatrixView*[T] = ref object
        ## A zero-copy view into a matrix
        m: Matrix[T]    # The matrix that owns the row we point to
        row: int        # The row in the matrix to which we point to


# Simple one-line helpers
func len*[T](self: Matrix[T]): int {.inline.} = self.data[].len()
func len*[T](self: MatrixView[T]): int {.inline.} = self.shape.cols
func raw*[T](self: Matrix[T]): ref seq[T] {.inline.} = self.data


proc getSize*(shape: tuple[rows, cols: int]): int =
    ## Helper to get the size required for the
    ## underlying data array for a matrix of the
    ## given shape
    if shape.rows == 0:
        return shape.cols
    return shape.cols * shape.rows


proc shape*[T](self: MatrixView[T]): tuple[rows, cols: int] = 
    return (0, self.m.shape.cols)


proc newMatrix*[T](data: seq[T]): Matrix[T] =
    ## Initializes a new matrix from a given
    ## 1D sequence
    new(result)
    new(result.data)
    result.data[] = data
    result.shape = (rows: 0, cols: len(data))
    result.order = RowMajor


proc newMatrix*[T](data: seq[seq[T]], order: MatrixOrder = RowMajor): Matrix[T] =
    ## Initializes a new matrix from a given 
    ## 2D sequence
    new(result)
    new(result.data)
    var temp: seq[T] = @[]
    result.shape = (rows: len(data), cols: len(data[0]))
    result.data[] = newSeqOfCap[T](result.shape.getSize())
    result.order = order
    for sub in data:
        if len(sub) != result.shape.cols:
            raise newException(ValueError, "invalid shape of input data (mismatching column)")
        for j in sub:
            temp.add(j)
    if order == RowMajor:
        for j in temp:
            result.data[].add(j)
    else:
        var idx = 0
        var col = 0
        while col < result.shape.cols:
            result.data[].add(temp[idx])
            idx += result.shape.cols
            if idx > temp.high():
                inc(col)
                idx = col


proc newMatrixFromSeq*[T](data: seq[T], shape: tuple[rows, cols: int], order: MatrixOrder = RowMajor): Matrix[T] =
    ## Creates a new matrix of the given shape from a flat
    ## sequence
    new(result)
    new(result.data)
    result.data[] = data
    result.shape = shape
    result.order = order


proc zeros*[T: int | float](shape: tuple[rows, cols: int], order: MatrixOrder = RowMajor): Matrix[T] =
    ## Creates a new matrix of the given shape
    ## filled with zeros
    new(result)
    new(result.data)
    result.data[] = @[]
    let size = shape.getSize()
    result.shape = shape
    when T is int:
        for _ in 0..<size:
            result.data[].add(0)
    when T is float:
        for _ in 0..<size:
            result.data[].add(0.0)


proc ones*[T: int | float](shape: tuple[rows, cols: int], order: MatrixOrder = RowMajor): Matrix[T] =
    ## Creates a new matrix of the given shape
    ## filled with ones
    new(result)
    new(result.data)
    result.data[] = @[]
    let size = shape.getSize()
    result.shape = shape
    when T is int:
        for _ in 0..<size:
            result.data[].add(1)
    when T is float:
        for _ in 0..<size:
            result.data[].add(1.0)


proc rand*[T: int | float](shape: tuple[rows, cols: int], order: MatrixOrder = RowMajor): Matrix[T] =
    ## Creates a new matrix of the given shape
    ## filled with random values between 0 and
    ## 1
    new(result)
    new(result.data)
    result.data[] = @[]
    let size = shape.getSize()
    result.shape = shape
    when T is int:
        for _ in 0..<size:
            result.data[].add(rand(0..1))
    when T is float:
        for _ in 0..<size:
            result.data[].add(rand(0.0..1.0))


proc asType*[T](self: Matrix[T], kind: typedesc): Matrix[kind] =
    ## Same as np.array.astype(...)
    new(result)
    new(result.data)
    for e in self.data[]:
        result.data[].add(kind(e))
    result.shape = self.shape
    result.order = self.order


func getIndex*[T](self: Matrix[T], row, col: int): int =
    ## Converts an (x, y) coordinate pair into a single 
    ## integer index into our array, taking the internal 
    ## array order into account
    if self.order == RowMajor:
        result = row * self.shape.cols + col
    else:
        result = col * self.shape.rows + row


func ind2sub*(n: int, shape: tuple[rows, cols: int]): tuple[row, col: int] =
    ## Converts an absolute index into an x, y pair
    if shape.rows == 0:
        return (0, n)
    return (n div shape.cols, n mod shape.cols)


proc `[]`*[T](self: Matrix[T], row, col: int): T =
    ## Gets the element the given row and
    ## column into the matrix
    var idx = self.getIndex(row, col)
    when not defined(release):
        if idx notin 0..<self.data[].len():
            raise newException(IndexDefect, &"index ({row}, {col}) is out of range for matrix of shape ({self.shape.rows}, {self.shape.cols})")
    return self.data[idx]


proc `[]`*[T](self: Matrix[T], row: int): MatrixView[T] =
    ## Gets a single row in the matrix. No data copies
    ## occur and a view into the original matrix is
    ## returned
    when not defined(release):
        var idx = self.getIndex(row, 0)
        if idx notin 0..<self.data[].len():
            raise newException(IndexDefect, &"row {row} is out of range for matrix of shape ({self.shape.rows}, {self.shape.cols})")
    new(result)
    result.m = self
    result.row = row


proc `[]`*[T](self: MatrixView[T], col: int): T =
    ## Gets the element at the given column into
    ## the matrix view
    var idx = self.m.getIndex(self.row, col)
    when not defined(release):
        if idx notin 0..<self.m.data[].len():
            raise newException(IndexDefect, &"column {col} is out of range for view of shape ({self.shape.rows}, {self.shape.cols})")
    result = self.m.data[idx]


proc `[]=`*[T](self: Matrix[T], row, col: int, val: T) =
    ## Sets the element at the given row and
    ## column into the matrix to value val
    var idx = self.getIndex(row, col)
    when not defined(release):
        if idx notin 0..<self.data[].len():
            raise newException(IndexDefect, &"index ({row}, {col}) is out of range for matrix of shape ({self.shape.rows}, {self.shape.cols})")
    self.data[idx] = val


proc `[]=`*[T](self: MatrixView[T], col: int, val: T) =
    ## Sets the element at the given column
    ## into the matrix view to the value 
    ## val
    var idx = self.m.getIndex(0, col)
    when not defined(release):
        if idx notin 0..<self.m.data[].len():
            raise newException(IndexDefect, &"column {col} is out of range for view of shape ({self.shape.rows}, {self.shape.cols})")
    self.m.data[idx] = val


# Shape management
proc reshape*[T](self: Matrix[T], shape: tuple[rows, cols: int]): Matrix[T] =
    ## Reshapes the given matrix. No data copies occur
    when not defined(release):
        if shape.getSize() != self.data[].len():
            raise newException(ValueError, &"shape ({shape.rows}, {shape.cols}) is invalid for matrix of length {self.len()}")
    result = self.dup()
    result.shape = shape


proc reshape*[T](self: Matrix[T], rows, cols: int): Matrix[T] =
    ## Reshapes the given matrix. No data copies occur
    result = self.reshape((rows, cols))


proc transpose*[T](self: Matrix[T]): Matrix[T] =
    ## Transposes rows and columns in the given 
    ## matrix. No data copies occur
    if self.shape.rows == 0:
        return self
    result = self.reshape(self.shape.cols, self.shape.rows)
    result.order = if result.order == RowMajor: ColumnMajor else: RowMajor


proc flatten*[T](self: Matrix[T]): Matrix[T] =
    ## Flattens the matrix into a vector
    new(result)
    new(result.data)
    for row in self:
        for element in row:
            result.data[].add(element)
    result.order = RowMajor
    result.shape = (0, len(self))


# Helpers for fast applying of operations along an axis
proc apply*[T](self: Matrix[T], op: proc (a, b: T): T {.noSideEffect.}, b: T, copy: bool = false, axis: int): Matrix[T] =
    ## Applies a binary operator to every
    ## element in the given axis of the
    ## given matrix (0 = rows, 1 = columns, 
    ## -1 = both). No copies occur unless 
    ## copy equals true
    result = self
    if copy:
        result = self.copy()
    case axis:
        of 0:
            for r in 0..<self.shape.rows:
                for c in 1..<self.shape.cols:
                    result[r, 0] = op(result[r, 0], b)
        of 1:
            for r in 0..<self.shape.rows - 1:
                for c in 0..self.shape.cols - 1:
                    result[r, c] = op(result[r, c], b)
        of -1:
            for i, row in result:
                for j, item in row:
                    result[i, j] = op(item, b)
        else:
            raise newException(ValueError, &"axis {axis} is invalid for matrix") 


proc apply*[T](self: Matrix[T], op: proc (a: T): T {.noSideEffect.}, copy: bool = false, axis: int): Matrix[T] =
    ## Applies a unary operator to every
    ## element in the given axis of the
    ## given matrix (0 = rows, 1 = columns, 
    ## -1 = both). No copies occur unless 
    ## copy equals true
    result = self
    if copy:
        result = self.copy()
    case axis:
        of 0:
            for r in 0..<self.shape.rows:
                for c in 1..<self.shape.cols:
                    result[r, 0] = op(result[r, 0])
        of 1:
            for r in 0..<self.shape.rows - 1:
                for c in 0..self.shape.cols - 1:
                    result[r, c] = op(result[r, c])
        of -1:
            for i, row in result:
                for j, item in row:
                    result[i, j] = op(item)
        else:
            raise newException(ValueError, &"axis {axis} is invalid for matrix") 


proc apply*[T](self: MatrixView[T], op: proc (a, b: T): T {.noSideEffect.}, b: T, copy: bool = false): MatrixView[T] =
    ## Applies a binary operator to every
    ## element in the matrix view. No copies
    ## occur unless copy equals true
    result = self
    if copy:
        result = self.copy()
    for i, j in self:
        self[i] = op(j, b)


proc apply*[T](self: MatrixView[T], op: proc (a: T): T {.noSideEffect.}, copy: bool = false): MatrixView[T] =
    ## Applies a unary operator to every
    ## element in the matrix view. No copies
    ## occur unless copy equals true
    result = self
    if copy:
        result = self.copy()
    for i, j in self:
        self[i] = op(j)


proc sum*[T](self: Matrix[T]): T =
    ## Returns the sum of all elements
    ## in the matrix
    for e in self.data[]:
        result += e


# Operations along an axis
proc sum*[T](self: Matrix[T], axis: int, copy: bool = true): Matrix[T] =
    ## Performs the sum of all the elements
    ## on a given axis in-place (unless copy
    ## equals true). The output matrix is 
    ## returned
    when not defined(release):
        if axis == 1 and self.shape.rows == 0:
            raise newException(ValueError, &"axis {axis} is invalid for matrix of dimension 1")
    var self = self
    if copy:
        self = self.copy()
    result = self
    var added: int  = 0
    case axis:
        of 1:
            for row in result:
                inc(added)
                result.data[].add(row.sum())
        of 0:
            var row = 0
            var value: T
            for col in 0..<result.shape.cols:
                while row < result.shape.rows:
                    value += result[row, col]
                    inc(row)
                result.data[].add(value)
                inc(added)
                value = T.default
                row = 0
        else:
            when not defined(release):
                raise newException(ValueError, &"axis {axis} is invalid for matrix")
            else:
                discard
    while result.data[].len() > added:
        result.data[].delete(0)
    result.shape.rows = 0
    result.shape.cols = added
    result.order = RowMajor


proc sum*[T](self: MatrixView[T]): T =
    ## Returns the sum of all elements
    ## in the matrix view
    var i = 0
    while i < self.shape.cols:
        result += self[i]
        inc(i)


proc copy*[T](self: Matrix[T]): Matrix[T] =
    ## Creates a new copy of the given matrix
    ## (copies the underlying data!)
    new(result)
    new(result.data)
    result.data[] = self.data[]
    result.shape = self.shape
    result.order = self.order


proc dup*[T](self: Matrix[T]): Matrix[T] =
    ## Creates a new shallow copy of the given
    ## matrix, without copying the underlying
    ## data
    new(result)
    result.data = self.data
    result.shape = self.shape
    result.order = self.order


proc copy*[T](self: MatrixView[T]): Matrix[T] =
    ## Creates a new copy of the given matrix
    ## view. Only the data of the chosen row is
    ## copied
    new(result)
    new(result.data)
    for e in self:
        result.data[].add(e)
    result.shape = self.shape


proc dup*[T](self: MatrixView[T]): MatrixView[T] =
    ## Creates a new shallow copy of the given
    ## matrix view, without copying the underlying
    ## data
    new(result)
    result.m = self.m
    result.shape = self.shape
    result.row = self.row

# matrix/scalar operations

# Wrappers because builtins are not
# procvars
func add*[T](a, b: T): T = a + b
func sub*[T](a, b: T): T = a - b
func mul*[T](a, b: T): T = a * b
func divide*[T](a, b: T): T = a / b
func neg*[T](a: T): T = -a

# Warning: These *all* perform copies of the underlying matrix!
proc `+`*[T](a: Matrix[T], b: T): Matrix[T] = a.copy().apply(add, b, axis= -1)
proc `+`*[T](a: T, b: Matrix[T]): Matrix[T] = b.copy().apply(add, a, axis= -1)

proc `-`*[T](a: Matrix[T], b: T): Matrix[T] = a.copy().apply(sub, b, axis= -1)
proc `-`*[T](a: T, b: Matrix[T]): Matrix[T] = b.copy().apply(sub, a, axis= -1)
proc `-`*[T](a: Matrix[T]): Matrix[T] = a.copy().apply(neg, a, axis= -1)

proc `*`*[T](a: Matrix[T], b: T): Matrix[T] = a.copy().apply(mul, b, axis = -1)
proc `*`*[T](a: T, b: Matrix[T]): Matrix[T] = b.copy().apply(mul, a, axis= -1)

proc `/`*[T](a: Matrix[T], b: T): Matrix[T] = a.copy().apply(divide, b, axis= -1)
proc `/`*[T](a: T, b: Matrix[T]): Matrix[T] = b.copy().apply(divide, a, axis= -1)


proc `+`*[T](a: MatrixView[T], b: T): Matrix[T] = a.copy().apply(add, b, axis= -1)
proc `+`*[T](a: T, b: MatrixView[T]): Matrix[T] = b.copy().apply(add, a, axis= -1)

proc `-`*[T](a: MatrixView[T], b: T): Matrix[T] = a.copy().apply(sub, b, axis= -1)
proc `-`*[T](a: T, b: MatrixView[T]): Matrix[T] = b.copy().apply(sub, a, axis= -1)
proc `-`*[T](a: MatrixView[T]): Matrix[T] = a.copy().apply(neg, a, axis= -1)

proc `*`*[T](a: MatrixView[T], b: T): Matrix[T] = a.copy().apply(mul, b, axis = -1)
proc `*`*[T](a: T, b: MatrixView[T]): Matrix[T] = b.copy().apply(mul, a, axis= -1)

proc `/`*[T](a: MatrixView[T], b: T): Matrix[T] = a.copy().apply(divide, b, axis= -1)
proc `/`*[T](a: T, b: MatrixView[T]): Matrix[T] = b.copy().apply(divide, a, axis= -1)


# matrix/matrix operations. They produce a new matrix with the
# result of the operation

proc `+`*[T](a, b: MatrixView[T]): Matrix[T] =
    ## Performs the vector sum of the 
    ## given matrix views and returns a new
    ## vector with the result
    when not defined(release):
        if a.shape.cols != b.shape.cols:  # Basically if their length is different
            raise newException(ValueError, &"incompatible argument shapes for addition")
    new(result)
    new(result.data)
    result.shape = a.shape
    result.order = RowMajor
    result.data[] = newSeqOfCap[T](result.shape.getSize())
    for i in 0..<a.shape.cols:
        result.data[].add(a[i] + b[i])


proc `+`*[T](a, b: Matrix[T]): Matrix[T] =
    ## Performs matrix additions between the
    ## two inputs
    when not defined(release):
        if a.shape.rows > 0 and b.shape.rows > 0 and a.shape != b.shape:
            raise newException(ValueError, &"incompatible argument shapes for addition")
        elif (a.shape.rows == 0 or b.shape.rows == 0) and a.shape.cols != b.shape.cols:
            raise newException(ValueError, &"incompatible argument shapes for addition")
    if a.shape.rows == 0 and b.shape.rows == 0:
        return a[0] + b[0]
    if a.shape.rows == 0:
        result = zeros[T](b.shape)
        for i, row in b:
            for j, e in (row + a[0])[0]:
                result[i, j] = e
    elif b.shape.rows == 0:
        result = zeros[T](a.shape)
        for i, row in a:
            for j, e in (row + b[0])[0]:
                result[i, j] = e
    else:
        result = zeros[T](a.shape)
        for i, row in a:
            for j, e in (b[i] + row)[0]:
                result[i, j] = e


proc `-`*[T](a, b: MatrixView[T]): Matrix[T] =
    ## Performs the vector difference of the 
    ## given matrix views and returns a new
    ## vector with the result
    when not defined(release):
        if a.shape.cols != b.shape.cols:  # Basically if their length is different
            raise newException(ValueError, &"incompatible argument shapes for addition")
    new(result)
    new(result.data)
    result.shape = a.shape
    result.order = RowMajor
    result.data[] = newSeqOfCap[T](result.shape.getSize())
    for i in 0..<a.shape.cols:
        result.data[].add(a[i] - b[i])


proc `-`*[T](a, b: Matrix[T]): Matrix[T] =
    when not defined(release):
        if a.shape.rows > 0 and b.shape.rows > 0 and a.shape != b.shape:
            raise newException(ValueError, &"incompatible argument shapes for addition")
        elif (a.shape.rows == 0 or b.shape.rows == 0) and a.shape.cols != b.shape.cols:
            raise newException(ValueError, &"incompatible argument shapes for addition")
    if a.shape.rows == 0 and b.shape.rows == 0:
        return a[0] + b[0]
    new(result)
    new(result.data)
    result.data[] = newSeqOfCap[T](result.shape.getSize())
    result.shape = a.shape
    result.order = RowMajor
    if result.shape.rows > 1:
        for row in 0..<result.shape.rows:
            for m in a[row] - b[row]:
                for element in m:
                    result.data[].add(element)
    else:
        result = a[0] - b[0]


proc `*`*[T](a, b: MatrixView[T]): Matrix[T] =
    ## Performs the vector product of the 
    ## given matrix views and returns a new
    ## vector with the result
    when not defined(release):
        if a.shape.cols != b.shape.cols:  # Basically if their length is different
            raise newException(ValueError, &"incompatible argument shapes for multiplication")
    new(result)
    new(result.data)
    result.shape = a.shape
    result.order = RowMajor
    result.data[] = newSeqOfCap[T](result.shape.getSize())
    for i in 0..<a.shape.cols:
        result.data[].add(a[i] * b[i])


proc `*`*[T](a, b: Matrix[T]): Matrix[T] =
    ## Performs matrix additions between the
    ## two inputs
    when not defined(release):
        if a.shape.rows > 0 and b.shape.rows > 0 and a.shape != b.shape:
            raise newException(ValueError, &"incompatible argument shapes for multiplication")
        elif (a.shape.rows == 0 or b.shape.rows == 0) and a.shape.cols != b.shape.cols:
            raise newException(ValueError, &"incompatible argument shapes for multiplication")
    if a.shape.rows == 0 and b.shape.rows == 0:
        return a[0] * b[0]
    if a.shape.rows == 0:
        result = zeros[T](b.shape)
        for i, row in b:
            for j, e in (row * a[0])[0]:
                result[i, j] = e
    elif b.shape.rows == 0:
        result = zeros[T](a.shape)
        for i, row in a:
            for j, e in (row * b[0])[0]:
                result[i, j] = e
    else:
        result = zeros[T](a.shape)
        for i, row in a:
            for j, e in (b[i] * row)[0]:
                result[i, j] = e


# Comparison operators. They produce a new matrix of the same
# shape as the input(s) and containing boolean values (the result of
# the comparison element-wise). Useful for use in where()

# matrix/scalar comparisons
proc `==`*[T](a: Matrix[T], b: T): Matrix[bool] = 
    new(result)
    new(result.data)
    result.shape = a.shape
    result.data[] = newSeqOfCap[bool](result.shape.getSize())
    for e in a.data[]:
        result.data[].add(e == b)


proc `<`*[T](a: Matrix[T], b: T): Matrix[bool] = 
    new(result)
    new(result.data)
    result.shape = a.shape
    result.data[] = newSeqOfCap[bool](result.shape.getSize())
    for e in a.data[]:
        result.data[].add(e < b)


proc `>`*[T](a: Matrix[T], b: T): Matrix[bool] = 
    new(result)
    new(result.data)
    result.shape = a.shape
    result.data[] = newSeqOfCap[bool](result.shape.getSize())
    for e in a.data[]:
        result.data[].add(e > b)


proc `<=`*[T](a: Matrix[T], b: T): Matrix[bool] = 
    new(result)
    new(result.data)
    result.shape = a.shape
    result.data[] = newSeqOfCap[bool](result.shape.getSize())
    for e in a.data[]:
        result.data[].add(e <= b)


proc `>=`*[T](a: Matrix[T], b: T): Matrix[bool] = 
    new(result)
    new(result.data)
    result.shape = a.shape
    result.data[] = newSeqOfCap[bool](result.shape.getSize())
    for e in a.data[]:
        result.data[].add(e >= b)


proc `==`*[T](a: MatrixView[T], b: MatrixView[T]): Matrix[bool] =
    when not defined(release):
        if a.len() != b.len():
            raise newException(ValueError, "invalid shapes for comparison")
    new(result)
    new(result.data)
    result.shape = a.shape
    result.order = RowMajor
    result.data[] = newSeqOfCap[bool](result.shape.getSize())
    var col = 0
    while col < result.shape.cols:
        result.data[].add(a[col] == b[col])
        inc(col)


proc `==`*[T](a: Matrix[T], b: MatrixView[T]): Matrix[bool] =
    when not defined(release):
        if a.shape.cols != b.len() or a.shape.rows > 0:
            raise newException(ValueError, "invalid shapes for comparison")
    return a[0] == b


proc diag*[T](a: Matrix[T], k: int = 0): Matrix[T] =
    ## Returns the kth diagonal of
    ## the given matrix if a is 2-D
    ## or a 2-D matrix with a on its
    ## kth diagonal if it is 1-D
    if a.shape.rows > 0:
        if k >= a.shape.cols:
            return newMatrix[T](@[])
        var current = k.ind2sub(a.shape)
        var res = newSeqOfCap[T](a.shape.getSize())
        while current.row < a.shape.rows and current.col < a.shape.cols:
            res.add(a.data[a.getIndex(current.row, current.col)])
            inc(current.row)
            inc(current.col)
        result = newMatrix(res)
    else:
        let size = len(a) + k
        result = zeros[T]((size, size))
        var current = k.ind2sub(a.shape)
        for e in a[0]:
            result[current.row, current.col] = e
            inc(current.row)
            inc(current.col)


proc diagflat*[T](a: Matrix[T], k: int = 0): Matrix[T] =
    ## Create a 2-D array with the flattened
    ## input as a diagonal
    result = a.flatten().diag(k)


proc fliplr*[T](self: Matrix[T]): Matrix[T] =
    ## Flips each row in the matrix left
    ## to right. A copy is returned
    new(result)
    result.shape = self.shape
    result.order = self.order
    new(result.data)
    result.data[] = newSeqOfCap[T](self.shape.getSize())
    for row in self:
        for i in countdown(row.len() - 1, 0, 1):
            result.data[].add(row[i])


proc `==`*[T](a, b: Matrix[T]): Matrix[bool] =
    when not defined(release):
        if a.shape != b.shape:
            raise newException(ValueError, "can't compare matrices of different shapes")
    new(result)
    new(result.data)
    result.shape = a.shape
    result.order = RowMajor
    result.data[] = newSeqOfCap[bool](result.shape.getSize())
    if a.shape.rows == 0:
        result = a[0] == b[0]
    for r in 0..<a.shape.rows:
        for c in 0..<a.shape.cols:
            result.data[].add(a[r, c] == b[r, c])


proc `!=`*[T](a, b: Matrix[T]): Matrix[bool] =
    when not defined(release):
        if a.shape != b.shape:
            raise newException(ValueError, "can't compare matrices of different shapes")
    new(result)
    new(result.data)
    result.shape = a.shape
    result.order = RowMajor
    result.data[] = newSeqOfCap[bool](result.shape.getSize())
    if a.shape.rows == 0:
        result = a[0] == b[0]
    for r in 0..<a.shape.rows:
        for c in 0..<a.shape.cols:
            result.data[].add(a[r, c] != b[r, c])


proc `>`*[T](a, b: Matrix[T]): Matrix[bool] =
    when not defined(release):
        if a.shape != b.shape:
            raise newException(ValueError, "can't compare matrices of different shapes")
    new(result)
    new(result.data)
    result.shape = a.shape
    result.order = RowMajor
    result.data[] = newSeqOfCap[bool](result.shape.getSize())
    if a.shape.rows == 0:
        result = a[0] > b[0]
    for r in 0..<a.shape.rows:
        for c in 0..<a.shape.cols:
            result.data[].add(a[r, c] > b[r, c])


proc `>=`*[T](a, b: Matrix[T]): Matrix[bool] =
    when not defined(release):
        if a.shape != b.shape:
            raise newException(ValueError, "can't compare matrices of different shapes")
    new(result)
    new(result.data)
    result.shape = a.shape
    result.order = RowMajor
    result.data[] = newSeqOfCap[bool](result.shape.getSize())
    if a.shape.rows == 0:
        result = a[0] >= b[0]
    for r in 0..<a.shape.rows:
        for c in 0..<a.shape.cols:
            result.data[].add(a[r, c] >= b[r, c])


proc `<=`*[T](a, b: Matrix[T]): Matrix[bool] =
    when not defined(release):
        if a.shape != b.shape:
            raise newException(ValueError, "can't compare matrices of different shapes")
    new(result)
    new(result.data)
    result.shape = a.shape
    result.order = RowMajor
    result.data[] = newSeqOfCap[bool](result.shape.getSize())
    if a.shape.rows == 0:
        result = a[0] <= b[0]
    for r in 0..<a.shape.rows:
        for c in 0..<a.shape.cols:
            result.data[].add(a[r, c] <= b[r, c])


proc all*(a: Matrix[bool]): bool =
    # Helper for boolean comparisons
    for e in a.data[]:
        if not e:
            return false
    return true


proc any*(a: Matrix[bool]): bool =
    # Helper for boolean comparisons
    for e in a.data[]:
        if e:
            return true
    return false


proc index*[T](self: Matrix[T], x: T): tuple[row, col: int] =
    ## Returns the location of the given
    ## item in the matrix. A tuple of (-1, -1)
    ## is returned if the item is not found
    for i, row in self:
        for j, e in row:
            if e == x:
                return (i, j)
    return (-1, -1)


# Specular definitions of commutative operators
proc `<`*[T](a, b: Matrix[T]): Matrix[bool] = b > a
proc `*`*[T](a: Matrix[T], b: MatrixView[T]): Matrix[T] = b * a
proc `==`*[T](a: T, b: Matrix[T]): Matrix[bool] = b == a
proc `==`*[T](a: MatrixView[T], b: Matrix[T]): Matrix[bool] = b == a


proc toRowMajor*[T](self: Matrix[T], copy: bool = true): Matrix[T] =
    ## Converts a column-major matrix to a
    ## row-major one. Returns a copy unless
    ## copy equals false
    if self.order == RowMajor:
        return self
    if copy:
        result = self.copy()
    else:
        result = self
    result.order = RowMajor
    for row in self:
        for element in row:
            self.data[].add(element)


proc toColumnMajor*[T](self: Matrix[T], copy: bool = true): Matrix[T] =
    ## Converts a row-major matrix to a
    ## column-major one
    if self.order == ColumnMajor:
        return self
    if copy:
        result = self.copy()
    else:
        result = self
    self.order = ColumnMajor
    let orig = self.data[]
    self.data[] = @[]
    var idx = 0
    var col = 0
    while col < self.shape.cols:
        self.data[].add(orig[idx])
        idx += self.shape.cols
        if idx > orig.high():
            inc(col)
            idx = col
    result = self


# Matrices and matrix views are iterable!

iterator items*[T](self: Matrix[T]): MatrixView[T] =
    if self.len() > 0:
        for row in 0..<self.shape.rows:
            yield self[row]
        if self.shape.rows == 0:
            yield self[0]


iterator items*[T](self: MatrixView[T]): T =
    if self.len() > 0:
        for column in 0..<self.shape.cols:
            yield self[column]


iterator pairs*[T](self: Matrix[T]): tuple[i: int, val: MatrixView[T]] =
    var i = 0
    for row in self:
        yield (i, row)
        inc(i)


iterator pairs*[T](self: MatrixView[T]): tuple[i: int, val: T] =
    var i = 0
    for col in self:
        yield (i, col)
        inc(i)


proc `$`*[T](self: MatrixView[T]): string =
    ## Stringifies the matrix view
    result = "["
    for j, e in self:
        result &= $e
        if j < self.shape.cols - 1:
            result &= ", "
    result &= "]"


proc `$`*[T](self: Matrix[T]): string =
    ## Stringifies the matrix
    if self.shape.rows == 0 and self.len() > 0:
        return $(self[0])
    result &= "["
    for i, row in self:
        result &= "["
        for j, e in row:
            result &= $e
            if j < self.shape.cols - 1:
                result &= ", "
        if i < self.shape.rows - 1:
            result &= "], \n"
            result &= " "
        else:
            result &= "]"
    result &= "]"


proc dot*[T](self, other: Matrix[T]): Matrix[T] =
    ## Computes the dot product of the two
    ## input matrices
    if self.shape.rows > 1 and other.shape.rows > 1:
        when not defined(release):
            if self.shape.rows != other.shape.cols:
                raise newException(ValueError, &"incompatible argument shapes for dot product")
        result = zeros[T]((self.shape.rows, other.shape.cols))
        var other = other.transpose()
        for i in 0..<result.shape.rows:
            for j in 0..<result.shape.cols:
                result[i, j] = (self[i] * other[j]).sum()
    elif self.shape.rows > 1:
        when not defined(release):
            if self.shape.cols != other.shape.cols:
                raise newException(ValueError, &"incompatible argument shapes for dot product")
        result = zeros[T]((0, self.shape.rows))
        for i in 0..<result.shape.cols:
            result[0, i] = (self[i] * other[0]).sum()
    elif other.shape.rows > 1:
        return other.transpose().dot(self)
    else:
        return self * other


proc dot*[T](self: MatrixView[T], other: Matrix[T]): Matrix[T] =
    ## Computes the dot product of the two
    ## input matrices
    when not defined(release):
        if self.shape.cols != other.shape.cols:
            raise newException(ValueError, &"incompatible argument shapes for dot product")
    result = zeros[T]((0, self.shape.rows))
    for i in 0..<result.shape.cols:
        result[0, i] = (other[0] * self[i]).sum()


proc dot*[T](self: Matrix[T], other: MatrixView[T]): Matrix[T] {.inline.} = result = other.dot(self)


proc dot*[T](self, other: MatrixView[T]): T = (self * other).sum()

    
proc where*[T](cond: Matrix[bool], x, y: Matrix[T]): Matrix[T] =
    ## Return elements chosen from x or y depending on cond
    ## Where cond is true, take elements from x, otherwise
    ## take elements from y
    when not defined(release):
        if not (x.shape == y.shape and y.shape == cond.shape):
            raise newException(ValueError, &"all inputs must be of equal shape for where()")
    result = x.copy()
    var 
        row = 0
        col = 0
    if cond.shape.rows == 0:
        while col < cond.shape.cols:
            if not cond[0, col]:
                result[0, col] = y[0, col]
            inc(col)
    while row < cond.shape.rows:
        while col < cond.shape.cols:
            if not cond[row, col]:
                result[row, col] = y[row, col]
            inc(col)
        inc(row)
        col = 0


proc where*[T](cond: Matrix[bool], x: Matrix[T], y: T): Matrix[T] =
    ## Behaves like where but with a constant instead of
    ## an array. When cond is true, take elements from x,
    ## otherwise take y
    when not defined(release):
        if not (x.shape == cond.shape):
            raise newException(ValueError, &"all inputs must be of equal shape for where()")
    result = x.copy()
    var 
        row = 0
        col = 0
    if cond.shape.rows == 0:
        while col < cond.shape.cols:
            if not cond[0, col]:
                result[0, col] = y
            inc(col)
    while row < cond.shape.rows:
        while col < cond.shape.cols:
            if not cond[row, col]:
                result[row, col] = y
            inc(col)
        inc(row)
        col = 0


# Just a helper to avoid mistakes and so that x.where(x > 10, y) works as expected
proc where*[T](self: Matrix[T], cond: Matrix[bool], other: Matrix[T]): Matrix[T] {.inline.} = cond.where(self, other)
proc where*[T](self: Matrix[T], cond: Matrix[bool], other: T): Matrix[T] {.inline.} = cond.where(self, other)


proc max*[T](self: Matrix[T]): T =
    ## Returns the largest element
    ## into the matrix
    var m: T = self[0, 0]
    for row in self:
        for element in row:
            if m < element:
                m = element
    return m


proc argmax*[T](self: Matrix[T]): int =
    ## Returns the index of largest element
    ## into the matrix
    var m: T = self[0, 0]
    var 
        row = 0
        col = 0
    while row < self.shape.rows:
        while col < self.shape.cols:
            if self[row, col] > m:
                m = self[row, col]
    if self.shape.rows == 0:
        while col < self.shape.cols:
            if self[0, col] > m:
                m = self[0, col]
            inc(col)
    return self.getIndex(row, col)


proc contains*[T](self: Matrix[T], e: T): bool =
    ## Returns whether the matrix contains
    ## the element e
    for row in self:
        for element in row:
            if element == e:
                return true
    return false
    

proc count*[T](self: Matrix[T], e: T): int =
    ## Returns the number of occurrences 
    ## of e in self
    for row in self:
        for k in row:
            if k == e:
                inc(result) 


proc replace*[T](self: Matrix[T], other: Matrix[T], copy: bool = false) =
    ## Replaces the data in self with the data from
    ## other (a copy is not performed unless copy equals
    ## true)
    if copy:
        self.data[] = other.data[]
    else:
        self.data = other.data
    self.order = other.order
    self.shape = other.shape


when isMainModule:
    import math

    proc pow(a, b: int): int =
        return a ^ b

    var m = newMatrix[int](@[@[1, 2, 3], @[4, 5, 6]])
    var k = m.transpose()
    doAssert k[2, 1] == m[1, 2], "transpose mismatch"
    doAssert all(m.transpose() == k), "transpose mismatch"
    doAssert k.sum() == m.sum(), "element sum mismatch"
    doAssert all(k.sum(axis=1) == m.sum(axis=0)), "sum over axis mismatch"
    doAssert all(k.sum(axis=0) == m.sum(axis=1)), "sum over axis mismatch" 
    var y = newMatrix[int](@[1, 2, 3, 4])
    doAssert y.sum() == 10, "element sum mismatch"
    doAssert (y + y).sum() == 20, "matrix sum mismatch"
    doAssert all(m + m == m * 2), "m + m != m * 2"
    var z = newMatrix[int](@[1, 2, 3])
    doAssert (m * z).sum() == 46, "matrix multiplication mismatch"
    doAssert all(z * z == z.apply(pow, 2, axis = -1, copy=true)), "matrix multiplication mismatch"
    var x = newMatrix[int](@[0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
    doAssert (x < 5).where(x, x * 10).sum() == 360, "where mismatch"
    doAssert all((x < 5).where(x, x * 10) == x.where(x < 5, x * 10)), "where mismatch"
    doAssert x.max() == 9, "max mismatch"
    doAssert x.argmax() == 10, "argmax mismatch"
    doAssert all(newMatrix[int](@[12, 23]).dot(newMatrix[int](@[@[11, 22], @[33, 44]])) == newMatrix[int](@[891, 1276])), "dot product mismatch"
    doAssert all(newMatrix[int](@[@[1, 2, 3], @[2, 3, 4]]).dot(newMatrix[int](@[1, 2, 3])) == newMatrix[int](@[14, 20])), "dot product mismatch"
    doAssert all(m.diag() == newMatrix[int](@[1, 5])), "diagonal mismatch"
    doAssert all(m.diag(1) == newMatrix[int](@[2, 6])), "diagonal mismatch"
    doAssert all(m.diag(2) == newMatrix[int](@[3])), "diagonal mismatch"
    doAssert m.diag(3).len() == 0, "diagonal mismatch"
    var j = m.fliplr()
    doAssert all(j.diag() == newMatrix[int](@[3, 5])), "diagonal mismatch"
    doAssert all(j.diag(1) == newMatrix[int](@[2, 4])), "diagonal mismatch"
    doAssert all(j.diag(2) == newMatrix[int](@[1])), "diagonal mismatch"
    doAssert j.diag(3).len() == 0, "diagonal mismatch"
    var o = newMatrix[int](@[1, 2, 3])
    doAssert all(o.diag() == newMatrix[int](@[@[1, 0, 0], @[0, 2, 0], @[0, 0, 3]])), "diagonal mismatch"
    var n = newMatrix[int](@[@[1, 2], @[3, 4]])
    doAssert all(n.diagflat() == newMatrix[int](@[@[1, 0, 0, 0], @[0, 2, 0, 0], @[0, 0, 3, 0], @[0, 0, 0, 4]])), "diagflat mismatch"
    doAssert (newMatrix[int](@[1, 2, 3]) + newMatrix[int](@[@[1, 2], @[3, 4], @[5, 6]]).transpose()).sum() == 33, "matrix sum mismatch"
    doAssert (newMatrix[int](@[@[1, 2], @[3, 4], @[5, 6]]).transpose() + newMatrix[int](@[1, 2, 3])).sum() == 33, "matrix sum mismatch"