diff --git a/README.md b/README.md index 5f6b56d..cde6227 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,11 @@ # NNExperiments +AI stuff. + +## TODOs + +- Regularization (L1/L2) +- Implement momentum +- Optimize matrix multiplication +- ??? +- Profit diff --git a/src/main.nim b/src/main.nim index 01a5d39..195d535 100644 --- a/src/main.nim +++ b/src/main.nim @@ -2,9 +2,11 @@ import nn/network import nn/util/matrix -var mlp = newNeuralNetwork(@[newDenseLayer(784, 10, Sigmoid), - newDenseLayer(10, 16, Sigmoid), - newDenseLayer(16, 10, Softmax)], - lossFunc=MSE, learnRate=0.05, momentum=0.55, +var mlp = newNeuralNetwork(@[newDenseLayer(784, 10), + newDenseLayer(10, 16), + newDenseLayer(16, 10)], + lossFunc=MSE, activationFunc=Softmax, + learnRate=0.05, momentum=0.55, weightRange=(start: -1.0, stop: 1.0), biasRange=(start: -1.0, stop: 1.0)) + diff --git a/src/nn/network.nim b/src/nn/network.nim index 1d38655..423db38 100644 --- a/src/nn/network.nim +++ b/src/nn/network.nim @@ -29,7 +29,8 @@ type ## A generic feed-forward ## neural network layers*: seq[Layer] - loss: Loss # The cost function along with its derivative + loss: Loss # The network's cost function + activation: Activation # The network's activation function # The network's learn rate determines # the amount of progress that is made # at each step when performing gradient @@ -41,23 +42,25 @@ type # we nudge our inputs for our next epoch momentum*: float Loss* = ref object - ## A loss function and its derivative + ## A vectorized loss function and its derivative function: proc (a, b: Matrix[float]): float derivative: proc (x, y: Matrix[float]): Matrix[float] {.noSideEffect.} Activation* = ref object - ## An activation function + ## A vectorized activation function and its + ## derivative function: proc (input: Matrix[float]): Matrix[float] {.noSideEffect.} derivative: proc (x: Matrix[float]): Matrix[float] {.noSideEffect.} + LayerKind* = enum + ## A layer enumeration + Dense, Dropout, Sparse Layer* = ref object ## A generic neural network ## layer + kind*: LayerKind # TODO (add dropout and sparse layer!) inputSize*: int # The number of inputs we process outputSize*: int # The number of outputs we produce weights*: Matrix[float] # The weights for each connection (2D) biases*: Matrix[float] # The biases for each neuron (1D) - gradients: tuple[weights, biases: Matrix[float]] # Gradient coefficients for weights and biases - activation: Activation # The layer's activation function - proc `$`*(self: Layer): string = @@ -86,82 +89,109 @@ proc newActivation*(function: proc (input: Matrix[float]): Matrix[float] {.noSid result.derivative = derivative -proc newDenseLayer*(inputSize: int, outputSize: int, activationFunc: Activation): Layer = +proc newDenseLayer*(inputSize: int, outputSize: int): Layer = ## Creates a new dense layer with inputSize input - ## parameters and outputSize outgoing outputs and - ## using the chosen activation function. + ## parameters and outputSize outgoing outputs. new(result) result.inputSize = inputSize result.outputSize = outputSize - result.activation = activationFunc + result.kind = Dense -proc newNeuralNetwork*(topology: seq[Layer], lossFunc: Loss, learnRate: float, momentum: float, - weightRange, biasRange: tuple[start, stop: float]): NeuralNetwork = +proc newNeuralNetwork*(topology: seq[Layer], lossFunc: Loss, activationFunc: Activation, + learnRate: float, momentum: float, weightRange, + biasRange: tuple[start, stop: float]): NeuralNetwork = ## Initializes a new neural network with ## the given topology and hyperparameters. ## Weights and biases are initialized with - ## random values in the chosen range + ## random values in the chosen range using + ## nim's default PRNG new(result) result.layers = topology for layer in result.layers: var biases = newSeqOfCap[float](layer.outputSize) - var biasGradients = newSeqOfCap[float](layer.outputSize) for _ in 0.. 0.0, 0.0) # TODO: Add derivatives for this stuff func step(input: Matrix[float]): Matrix[float] {.used.} = input.apply(proc (x: float): float = (if x < 0.0: 0.0 else: x), axis = -1) func silu(input: Matrix[float]): Matrix[float] {.used.} = input.apply(proc (x: float): float = 1 / (1 + exp(-x)), axis= -1) -func relu(input: Matrix[float]): Matrix[float] {.used.} = input.apply(proc (x: float): float = max(0.0, x), axis = -1) + func htan(input: Matrix[float]): Matrix[float] {.used.} = let f = proc (x: float): float = @@ -260,6 +299,7 @@ func htan(input: Matrix[float]): Matrix[float] {.used.} = {.hints: off.} # So nim doesn't complain about the naming var Sigmoid* = newActivation(sigmoid, sigmoidDerivative) var Softmax* = newActivation(softmax, softmaxDerivative) +var ReLU* = newActivation(relu, dxRelu) var MSE* = newLoss(mse, dxMSE) {.pop.} diff --git a/src/nn/util/matrix.nim b/src/nn/util/matrix.nim index 708895c..4e58d7f 100644 --- a/src/nn/util/matrix.nim +++ b/src/nn/util/matrix.nim @@ -1079,7 +1079,7 @@ proc count*[T](self: Matrix[T], e: T): int = proc replace*[T](self: Matrix[T], other: Matrix[T], copy: bool = false) = ## Replaces the data in self with the data from ## other (a copy is not performed unless copy equals - ## true). A reference to the object is returned + ## true) if copy: self.data[] = other.data[] else: