diff --git a/src/main.nim b/src/main.nim index f87ac2d..3868677 100644 --- a/src/main.nim +++ b/src/main.nim @@ -33,22 +33,23 @@ proc loadData: tuple[corpus, results, testset, testResults: Matrix[string]] = testResults: newMatrix(testResults)) -proc testMetrics(predictions, y: Matrix[float]): tuple[accuracy, precision, recall: float, tP, tN, fP, fN: int] = +proc testMetrics(predictions, y: Matrix[int]): tuple[accuracy, precision, recall: float, tP, tN, fP, fN: int] = # Computes the test metrics given the predictions and the # expected results var confusion = @[0, 0, 0, 0] # TP, TN, FP, FN var success = 0 var i = 0 while i < y.len(): + inc(i) var predicted = predictions[0][i] var expected = y[0][i] if predicted == expected: success += 1 - if predicted == 1.0: + if predicted == 1: confusion[0] += 1 else: confusion[1] += 1 - elif predicted == 1.0 and expected == 0.0: + elif predicted == 1 and expected == 0: confusion[2] += 1 else: confusion[3] += 1 @@ -84,7 +85,7 @@ proc main = "isn", "isn't", "ma", "mightn", "mightn't", "mustn", "mustn't", "needn", "needn't", "shan", "shan't", "shouldn", "shouldn't", "wasn", "wasn't", "weren", "weren't", "won", "won't", "wouldn", "wouldn't"] - const epochs = 100 + const epochs = 10 const batch = 200 const inputSize = 512 let encoder = newLabelEncoder() @@ -96,13 +97,13 @@ proc main = newDenseLayer(8, 16), newDenseLayer(16, 2)], lossFunc=MSE, - activationFunc=ReLU, + activationFunc=Softmax, learnRate=5, momentum=0.3, weightRange=(-10.0, 10.0), biasRange=(-10.0, 10.0)) echo "ProjectSydney v0.2b - Accuracy test" - echo "\nLoading dataset and testset" + echo "Loading dataset and testset" let loadTime = cpuTime() let data = loadData() echo &"Data loaded in {cpuTime() - loadTime:.2f} seconds" @@ -110,15 +111,16 @@ proc main = let vectorTime = cpuTime() let xTrain = vectorizer.fitTransform(data.corpus, data.corpus) let yTrain = encoder.fitTransform(data.results, data.results)[0] - echo &"Vectorized in {cpuTime() - vectorTime:.2f} seconds" - echo &"Feature count: {len(vectorizer.getFeatureNames())}" - echo &"Vocabulary size: {len(vectorizer.getVocabulary())}" - echo &"Corpus size: {len(data.corpus)}" + echo &"Data vectorized in {cpuTime() - vectorTime:.2f} seconds" + echo &"\t- Feature count: {len(vectorizer.getFeatureNames())}" + echo &"\t- Vocabulary size: {len(vectorizer.getVocabulary())}" + echo &"\t- Corpus size: {len(data.corpus)}" let yTest = encoder.transform(data.testResults)[0] let xTest = vectorizer.transform(data.testset) var tempData: seq[float] = newSeqOfCap[float](inputSize) var trainData: seq[tuple[x, y: Matrix[float]]] = @[] var testData: seq[tuple[x, y: Matrix[float]]] = @[] + var testTruth: seq[int] = @[] # Pad the data to fit into the network for i, row in xTrain: for e in row: @@ -140,14 +142,16 @@ proc main = while tempData.len() < inputSize: tempData.add(0.0) if yTest[i] == 1: + testTruth.add(1) testData.add((newMatrix[float](tempData), newMatrix[float](@[1.0, 0.0]))) else: + testTruth.add(0) testData.add((newMatrix[float](tempData), newMatrix[float](@[0.0, 1.0]))) tempData.setLen(0) echo "Classifier parameters" - echo &"\tLearn rate: {classifier.learnRate}" - echo &"\tMomentum: {classifier.momentum}" - stdout.write("\tNetwork layout: ") + echo &"\t- Learn rate: {classifier.learnRate}" + echo &"\t- Momentum: {classifier.momentum}" + stdout.write("\t- Network layout: ") for i, layer in classifier.layers: stdout.write(&"{layer.inputSize}x{layer.outputSize}") if i < classifier.layers.high(): @@ -155,24 +159,27 @@ proc main = echo "" echo &"Training neural network for {epochs} epochs with batch size of {batch}" let trainTime = cpuTime() - classifier.train(epochs, batch, trainData, testData) - echo &"Training completed in {cpuTime() - trainTime:.2f} seconds" - #[echo "\nTest parameters" - echo &"\tTest size: {len(data.testset)}" + classifier.train(epochs, batch, trainData, #[testData]#) + echo &"Training completed in {cpuTime() - trainTime:.2f} seconds, running test" + echo "\nTest parameters" + echo &"\t- Test size: {len(data.testset)}" let testTime = cpuTime() - let predictions = classifier.fastFeedForward(xTest) - let metrics = testMetrics(predictions, y_test) - echo &"\nTest completed in {cpuTime() - testTime:.2f} seconds, metrics below" - echo &"\tAccuracy: {metrics.accuracy * 100:.2f}%" - echo &"\tRecall: {metrics.recall:.2f}" - echo &"\tPrecision: {metrics.precision:.2f}" - echo &"\tF1-score: {pow((pow(metrics.precision, -1) + pow(metrics.recall, -1)) / 2, -1):.2f}" + var pred: seq[int] = @[] + for sample in xTest: + pred.add(classifier.predict(sample.copy())) + let predictions = newMatrix[int](pred) + let metrics = testMetrics(predictions, newMatrix[int](testTruth)) + echo &"Test completed in {cpuTime() - testTime:.2f} seconds, metrics below" + echo &"\t- Accuracy: {metrics.accuracy * 100:.2f}%" + echo &"\t- Recall: {metrics.recall:.2f}" + echo &"\t- Precision: {metrics.precision:.2f}" + echo &"\t- F1-score: {pow((pow(metrics.precision, -1) + pow(metrics.recall, -1)) / 2, -1):.2f}" echo "\tConfusion matrix" - echo &"\t\tTrue positives: {metrics.tP}" - echo &"\t\tTrue negatives: {metrics.tN}" - echo &"\t\tFalse negatives: {metrics.fN}" - echo &"\t\tFalse positives: {metrics.fP}" - ]# + echo &"\t\t- True positives: {metrics.tP}" + echo &"\t\t- True negatives: {metrics.tN}" + echo &"\t\t- False negatives: {metrics.fN}" + echo &"\t\t- False positives: {metrics.fP}" + when isMainModule: diff --git a/src/nn/network.nim b/src/nn/network.nim index 344af5a..a5fa0c6 100644 --- a/src/nn/network.nim +++ b/src/nn/network.nim @@ -134,7 +134,7 @@ proc feed(self: Layer, x: Matrix[float]): Matrix[float] = result = self.weights.dot(x) + self.biases -proc fastFeedForward(self: NeuralNetwork, x: Matrix[float]): Matrix[float] {.used.} = +proc fastFeedForward(self: NeuralNetwork, x: Matrix[float]): Matrix[float] = ## Feeds the given input through the network. The ## (activated) output from the last layer is returned result = x @@ -142,6 +142,11 @@ proc fastFeedForward(self: NeuralNetwork, x: Matrix[float]): Matrix[float] {.use result = self.activation.function(layer.feed(result)) +proc predict*(self: NeuralNetwork, x: Matrix[float]): int = + ## Performs a prediction on x + result = self.fastFeedForward(x).argmax() + + proc feedForward(self: NeuralNetwork, x: Matrix[float]): seq[Matrix[float]] = ## Feeds the given input through the network. ## All unactivated outputs from each layer are @@ -306,11 +311,12 @@ proc softmax(input: Matrix[float]): Matrix[float] = var input = input - input.max() result = input.apply(math.exp, axis = -1) / input.apply(math.exp, axis = -1).sum() + proc softmaxDerivative(input: Matrix[float]): Matrix[float] = # I stole this too, by the way var input = input.reshape(input.shape.cols, 1) # I _love_ stealing functions from numpy! - result = input.diagflat() - input.dot(input.transpose()) + result = (input.diagflat() - input.dot(input.transpose())).flatten( ) proc relu(input: Matrix[float]): Matrix[float] = input.apply(proc (x: float): float = max(0.0, x), axis = -1) proc dxRelu(input: Matrix[float]): Matrix[float] = where(input > 0.0, ones[float](input.shape), 0) diff --git a/src/util/matrix.nim b/src/util/matrix.nim index 92bc306..ff03c0b 100644 --- a/src/util/matrix.nim +++ b/src/util/matrix.nim @@ -552,7 +552,7 @@ proc `-`*[T](a, b: Matrix[T]): Matrix[T] = result.data[] = newSeqOfCap[T](result.shape.getSize()) result.shape = a.shape result.order = RowMajor - if result.shape.rows > 1: + if result.shape.rows >= 1: for row in 0.. 1 and other.shape.rows > 1: + if self.shape.rows >= 1 and other.shape.rows >= 1: when not defined(release): if self.shape.rows != other.shape.cols: raise newException(ValueError, &"incompatible argument shapes for dot product") @@ -944,14 +944,14 @@ proc dot*[T](self, other: Matrix[T]): Matrix[T] = for i in 0.. 1: + elif self.shape.rows >= 1: when not defined(release): if self.shape.cols != other.shape.cols: raise newException(ValueError, &"incompatible argument shapes for dot product") result = zeros[T]((0, self.shape.rows)) for i in 0.. 1: + elif other.shape.rows >= 1: return other.transpose().dot(self) else: return self * other