Fixed softmax derivative and dot product

2023-03-22 16:35:56 +01:00 · 2023-03-22 16:35:56 +01:00 · 844883ced7
parent 338732e9f9
commit 844883ced7
3 changed files with 48 additions and 35 deletions
--- a/src/main.nim
+++ b/src/main.nim
@ -33,22 +33,23 @@ proc loadData: tuple[corpus, results, testset, testResults: Matrix[string]] =
              testResults: newMatrix(testResults))
-proc testMetrics(predictions, y: Matrix[float]): tuple[accuracy, precision, recall: float, tP, tN, fP, fN: int] =
+proc testMetrics(predictions, y: Matrix[int]): tuple[accuracy, precision, recall: float, tP, tN, fP, fN: int] =
    # Computes the test metrics given the predictions and the 
    # expected results
    var confusion = @[0, 0, 0, 0] # TP, TN, FP, FN
    var success = 0
    var i = 0
    while i < y.len():
        inc(i)
        var predicted = predictions[0][i]
        var expected = y[0][i]
        if predicted == expected:
            success += 1
-            if predicted == 1.0:
+            if predicted == 1:
                confusion[0] += 1
            else:
                confusion[1] += 1
-        elif predicted == 1.0 and expected == 0.0:
+        elif predicted == 1 and expected == 0:
            confusion[2] += 1
        else:
            confusion[3] += 1
@ -84,7 +85,7 @@ proc main =
 "isn", "isn't", "ma", "mightn", "mightn't", "mustn", "mustn't", "needn", 
 "needn't", "shan", "shan't", "shouldn",  "shouldn't", "wasn", "wasn't", 
 "weren", "weren't", "won", "won't", "wouldn", "wouldn't"]
-    const epochs = 100
+    const epochs = 10
    const batch = 200
    const inputSize = 512
    let encoder = newLabelEncoder()
@ -96,13 +97,13 @@ proc main =
                                        newDenseLayer(8, 16),
                                        newDenseLayer(16, 2)],
                                        lossFunc=MSE,
-                                        activationFunc=ReLU,
+                                        activationFunc=Softmax,
                                        learnRate=5,
                                        momentum=0.3,
                                        weightRange=(-10.0, 10.0),
                                        biasRange=(-10.0, 10.0))
    echo "ProjectSydney v0.2b - Accuracy test"
-    echo "\nLoading dataset and testset"
+    echo "Loading dataset and testset"
    let loadTime = cpuTime()
    let data = loadData()
    echo &"Data loaded in {cpuTime() - loadTime:.2f} seconds"
@ -110,15 +111,16 @@ proc main =
    let vectorTime = cpuTime()
    let xTrain = vectorizer.fitTransform(data.corpus, data.corpus)
    let yTrain = encoder.fitTransform(data.results, data.results)[0]
-    echo &"Vectorized in {cpuTime() - vectorTime:.2f} seconds"
+    echo &"Data vectorized in {cpuTime() - vectorTime:.2f} seconds"
-    echo &"Feature count: {len(vectorizer.getFeatureNames())}"
+    echo &"\t- Feature count: {len(vectorizer.getFeatureNames())}"
-    echo &"Vocabulary size: {len(vectorizer.getVocabulary())}"
+    echo &"\t- Vocabulary size: {len(vectorizer.getVocabulary())}"
-    echo &"Corpus size: {len(data.corpus)}"
+    echo &"\t- Corpus size: {len(data.corpus)}"
    let yTest = encoder.transform(data.testResults)[0]
    let xTest = vectorizer.transform(data.testset)
    var tempData: seq[float] = newSeqOfCap[float](inputSize)
    var trainData: seq[tuple[x, y: Matrix[float]]] = @[]
    var testData: seq[tuple[x, y: Matrix[float]]] = @[]
    var testTruth: seq[int] = @[]
    # Pad the data to fit into the network
    for i, row in xTrain:
        for e in row:
@ -140,14 +142,16 @@ proc main =
        while tempData.len() < inputSize:
            tempData.add(0.0)
        if yTest[i] == 1:
            testTruth.add(1)
            testData.add((newMatrix[float](tempData), newMatrix[float](@[1.0, 0.0])))
        else:
            testTruth.add(0)
            testData.add((newMatrix[float](tempData), newMatrix[float](@[0.0, 1.0])))
        tempData.setLen(0)
    echo "Classifier parameters"
-    echo &"\tLearn rate: {classifier.learnRate}"
+    echo &"\t- Learn rate: {classifier.learnRate}"
-    echo &"\tMomentum: {classifier.momentum}"
+    echo &"\t- Momentum: {classifier.momentum}"
-    stdout.write("\tNetwork layout: ")
+    stdout.write("\t- Network layout: ")
    for i, layer in classifier.layers:
        stdout.write(&"{layer.inputSize}x{layer.outputSize}")
        if i < classifier.layers.high():
@ -155,24 +159,27 @@ proc main =
    echo ""
    echo &"Training neural network for {epochs} epochs with batch size of {batch}"
    let trainTime = cpuTime()
-    classifier.train(epochs, batch, trainData, testData)
+    classifier.train(epochs, batch, trainData, #[testData]#)
-    echo &"Training completed in {cpuTime() - trainTime:.2f} seconds"
+    echo &"Training completed in {cpuTime() - trainTime:.2f} seconds, running test"
-    #[echo "\nTest parameters"
+    echo "\nTest parameters"
-    echo &"\tTest size: {len(data.testset)}"
+    echo &"\t- Test size: {len(data.testset)}"
    let testTime = cpuTime()
-    let predictions = classifier.fastFeedForward(xTest)
+    var pred: seq[int] = @[]
-    let metrics = testMetrics(predictions, y_test)
+    for sample in xTest:
-    echo &"\nTest completed in {cpuTime() - testTime:.2f} seconds, metrics below"
+        pred.add(classifier.predict(sample.copy()))
-    echo &"\tAccuracy: {metrics.accuracy * 100:.2f}%"
+    let predictions = newMatrix[int](pred)
-    echo &"\tRecall: {metrics.recall:.2f}"
+    let metrics = testMetrics(predictions, newMatrix[int](testTruth))
-    echo &"\tPrecision: {metrics.precision:.2f}"
+    echo &"Test completed in {cpuTime() - testTime:.2f} seconds, metrics below"
-    echo &"\tF1-score: {pow((pow(metrics.precision, -1) + pow(metrics.recall, -1)) / 2, -1):.2f}"
+    echo &"\t- Accuracy: {metrics.accuracy * 100:.2f}%"
    echo &"\t- Recall: {metrics.recall:.2f}"
    echo &"\t- Precision: {metrics.precision:.2f}"
    echo &"\t- F1-score: {pow((pow(metrics.precision, -1) + pow(metrics.recall, -1)) / 2, -1):.2f}"
    echo "\tConfusion matrix"
-    echo &"\t\tTrue positives: {metrics.tP}"
+    echo &"\t\t- True positives: {metrics.tP}"
-    echo &"\t\tTrue negatives: {metrics.tN}"
+    echo &"\t\t- True negatives: {metrics.tN}"
-    echo &"\t\tFalse negatives: {metrics.fN}"
+    echo &"\t\t- False negatives: {metrics.fN}"
-    echo &"\t\tFalse positives: {metrics.fP}"
+    echo &"\t\t- False positives: {metrics.fP}"
-    ]#
+
 when isMainModule:
--- a/src/nn/network.nim
+++ b/src/nn/network.nim
@ -134,7 +134,7 @@ proc feed(self: Layer, x: Matrix[float]): Matrix[float] =
    result = self.weights.dot(x) + self.biases
-proc fastFeedForward(self: NeuralNetwork, x: Matrix[float]): Matrix[float] {.used.} =
+proc fastFeedForward(self: NeuralNetwork, x: Matrix[float]): Matrix[float] =
    ## Feeds the given input through the network. The
    ## (activated) output from the last layer is returned
    result = x
@ -142,6 +142,11 @@ proc fastFeedForward(self: NeuralNetwork, x: Matrix[float]): Matrix[float] {.use
        result = self.activation.function(layer.feed(result))
 proc predict*(self: NeuralNetwork, x: Matrix[float]): int =
    ## Performs a prediction on x
    result = self.fastFeedForward(x).argmax()
 proc feedForward(self: NeuralNetwork, x: Matrix[float]): seq[Matrix[float]] =
    ## Feeds the given input through the network.
    ## All unactivated outputs from each layer are
@ -306,11 +311,12 @@ proc softmax(input: Matrix[float]): Matrix[float] =
    var input = input - input.max()
    result = input.apply(math.exp, axis = -1) / input.apply(math.exp, axis = -1).sum()
 proc softmaxDerivative(input: Matrix[float]): Matrix[float] =
    # I stole this too, by the way
    var input = input.reshape(input.shape.cols, 1)
    # I _love_ stealing functions from numpy!
-    result = input.diagflat() - input.dot(input.transpose())
+    result = (input.diagflat() - input.dot(input.transpose())).flatten( )
 proc relu(input: Matrix[float]): Matrix[float] = input.apply(proc (x: float): float = max(0.0, x), axis = -1)
 proc dxRelu(input: Matrix[float]): Matrix[float] = where(input > 0.0, ones[float](input.shape), 0)
--- a/src/util/matrix.nim
+++ b/src/util/matrix.nim
@ -552,7 +552,7 @@ proc `-`*[T](a, b: Matrix[T]): Matrix[T] =
    result.data[] = newSeqOfCap[T](result.shape.getSize())
    result.shape = a.shape
    result.order = RowMajor
-    if result.shape.rows > 1:
+    if result.shape.rows >= 1:
        for row in 0..<result.shape.rows:
            for m in a[row] - b[row]:
                for element in m:
@ -935,7 +935,7 @@ proc `$`*[T](self: Matrix[T]): string =
 proc dot*[T](self, other: Matrix[T]): Matrix[T] =
    ## Computes the dot product of the two
    ## input matrices
-    if self.shape.rows > 1 and other.shape.rows > 1:
+    if self.shape.rows >= 1 and other.shape.rows >= 1:
        when not defined(release):
            if self.shape.rows != other.shape.cols:
                raise newException(ValueError, &"incompatible argument shapes for dot product")
@ -944,14 +944,14 @@ proc dot*[T](self, other: Matrix[T]): Matrix[T] =
        for i in 0..<result.shape.rows:
            for j in 0..<result.shape.cols:
                result[i, j] = (self[i] * other[j]).sum()
-    elif self.shape.rows > 1:
+    elif self.shape.rows >= 1:
        when not defined(release):
            if self.shape.cols != other.shape.cols:
                raise newException(ValueError, &"incompatible argument shapes for dot product")
        result = zeros[T]((0, self.shape.rows))
        for i in 0..<result.shape.cols:
            result[0, i] = (self[i] * other[0]).sum()
-    elif other.shape.rows > 1:
+    elif other.shape.rows >= 1:
        return other.transpose().dot(self)
    else:
        return self * other