Fixed softmax derivative and dot product

This commit is contained in:
Mattia Giambirtone 2023-03-22 16:35:56 +01:00
parent 338732e9f9
commit 844883ced7
Signed by: nocturn9x
GPG Key ID: 8270F9F467971E59
3 changed files with 48 additions and 35 deletions

View File

@ -33,22 +33,23 @@ proc loadData: tuple[corpus, results, testset, testResults: Matrix[string]] =
testResults: newMatrix(testResults))
proc testMetrics(predictions, y: Matrix[float]): tuple[accuracy, precision, recall: float, tP, tN, fP, fN: int] =
proc testMetrics(predictions, y: Matrix[int]): tuple[accuracy, precision, recall: float, tP, tN, fP, fN: int] =
# Computes the test metrics given the predictions and the
# expected results
var confusion = @[0, 0, 0, 0] # TP, TN, FP, FN
var success = 0
var i = 0
while i < y.len():
inc(i)
var predicted = predictions[0][i]
var expected = y[0][i]
if predicted == expected:
success += 1
if predicted == 1.0:
if predicted == 1:
confusion[0] += 1
else:
confusion[1] += 1
elif predicted == 1.0 and expected == 0.0:
elif predicted == 1 and expected == 0:
confusion[2] += 1
else:
confusion[3] += 1
@ -84,7 +85,7 @@ proc main =
"isn", "isn't", "ma", "mightn", "mightn't", "mustn", "mustn't", "needn",
"needn't", "shan", "shan't", "shouldn", "shouldn't", "wasn", "wasn't",
"weren", "weren't", "won", "won't", "wouldn", "wouldn't"]
const epochs = 100
const epochs = 10
const batch = 200
const inputSize = 512
let encoder = newLabelEncoder()
@ -96,13 +97,13 @@ proc main =
newDenseLayer(8, 16),
newDenseLayer(16, 2)],
lossFunc=MSE,
activationFunc=ReLU,
activationFunc=Softmax,
learnRate=5,
momentum=0.3,
weightRange=(-10.0, 10.0),
biasRange=(-10.0, 10.0))
echo "ProjectSydney v0.2b - Accuracy test"
echo "\nLoading dataset and testset"
echo "Loading dataset and testset"
let loadTime = cpuTime()
let data = loadData()
echo &"Data loaded in {cpuTime() - loadTime:.2f} seconds"
@ -110,15 +111,16 @@ proc main =
let vectorTime = cpuTime()
let xTrain = vectorizer.fitTransform(data.corpus, data.corpus)
let yTrain = encoder.fitTransform(data.results, data.results)[0]
echo &"Vectorized in {cpuTime() - vectorTime:.2f} seconds"
echo &"Feature count: {len(vectorizer.getFeatureNames())}"
echo &"Vocabulary size: {len(vectorizer.getVocabulary())}"
echo &"Corpus size: {len(data.corpus)}"
echo &"Data vectorized in {cpuTime() - vectorTime:.2f} seconds"
echo &"\t- Feature count: {len(vectorizer.getFeatureNames())}"
echo &"\t- Vocabulary size: {len(vectorizer.getVocabulary())}"
echo &"\t- Corpus size: {len(data.corpus)}"
let yTest = encoder.transform(data.testResults)[0]
let xTest = vectorizer.transform(data.testset)
var tempData: seq[float] = newSeqOfCap[float](inputSize)
var trainData: seq[tuple[x, y: Matrix[float]]] = @[]
var testData: seq[tuple[x, y: Matrix[float]]] = @[]
var testTruth: seq[int] = @[]
# Pad the data to fit into the network
for i, row in xTrain:
for e in row:
@ -140,14 +142,16 @@ proc main =
while tempData.len() < inputSize:
tempData.add(0.0)
if yTest[i] == 1:
testTruth.add(1)
testData.add((newMatrix[float](tempData), newMatrix[float](@[1.0, 0.0])))
else:
testTruth.add(0)
testData.add((newMatrix[float](tempData), newMatrix[float](@[0.0, 1.0])))
tempData.setLen(0)
echo "Classifier parameters"
echo &"\tLearn rate: {classifier.learnRate}"
echo &"\tMomentum: {classifier.momentum}"
stdout.write("\tNetwork layout: ")
echo &"\t- Learn rate: {classifier.learnRate}"
echo &"\t- Momentum: {classifier.momentum}"
stdout.write("\t- Network layout: ")
for i, layer in classifier.layers:
stdout.write(&"{layer.inputSize}x{layer.outputSize}")
if i < classifier.layers.high():
@ -155,24 +159,27 @@ proc main =
echo ""
echo &"Training neural network for {epochs} epochs with batch size of {batch}"
let trainTime = cpuTime()
classifier.train(epochs, batch, trainData, testData)
echo &"Training completed in {cpuTime() - trainTime:.2f} seconds"
#[echo "\nTest parameters"
echo &"\tTest size: {len(data.testset)}"
classifier.train(epochs, batch, trainData, #[testData]#)
echo &"Training completed in {cpuTime() - trainTime:.2f} seconds, running test"
echo "\nTest parameters"
echo &"\t- Test size: {len(data.testset)}"
let testTime = cpuTime()
let predictions = classifier.fastFeedForward(xTest)
let metrics = testMetrics(predictions, y_test)
echo &"\nTest completed in {cpuTime() - testTime:.2f} seconds, metrics below"
echo &"\tAccuracy: {metrics.accuracy * 100:.2f}%"
echo &"\tRecall: {metrics.recall:.2f}"
echo &"\tPrecision: {metrics.precision:.2f}"
echo &"\tF1-score: {pow((pow(metrics.precision, -1) + pow(metrics.recall, -1)) / 2, -1):.2f}"
var pred: seq[int] = @[]
for sample in xTest:
pred.add(classifier.predict(sample.copy()))
let predictions = newMatrix[int](pred)
let metrics = testMetrics(predictions, newMatrix[int](testTruth))
echo &"Test completed in {cpuTime() - testTime:.2f} seconds, metrics below"
echo &"\t- Accuracy: {metrics.accuracy * 100:.2f}%"
echo &"\t- Recall: {metrics.recall:.2f}"
echo &"\t- Precision: {metrics.precision:.2f}"
echo &"\t- F1-score: {pow((pow(metrics.precision, -1) + pow(metrics.recall, -1)) / 2, -1):.2f}"
echo "\tConfusion matrix"
echo &"\t\tTrue positives: {metrics.tP}"
echo &"\t\tTrue negatives: {metrics.tN}"
echo &"\t\tFalse negatives: {metrics.fN}"
echo &"\t\tFalse positives: {metrics.fP}"
]#
echo &"\t\t- True positives: {metrics.tP}"
echo &"\t\t- True negatives: {metrics.tN}"
echo &"\t\t- False negatives: {metrics.fN}"
echo &"\t\t- False positives: {metrics.fP}"
when isMainModule:

View File

@ -134,7 +134,7 @@ proc feed(self: Layer, x: Matrix[float]): Matrix[float] =
result = self.weights.dot(x) + self.biases
proc fastFeedForward(self: NeuralNetwork, x: Matrix[float]): Matrix[float] {.used.} =
proc fastFeedForward(self: NeuralNetwork, x: Matrix[float]): Matrix[float] =
## Feeds the given input through the network. The
## (activated) output from the last layer is returned
result = x
@ -142,6 +142,11 @@ proc fastFeedForward(self: NeuralNetwork, x: Matrix[float]): Matrix[float] {.use
result = self.activation.function(layer.feed(result))
proc predict*(self: NeuralNetwork, x: Matrix[float]): int =
## Performs a prediction on x
result = self.fastFeedForward(x).argmax()
proc feedForward(self: NeuralNetwork, x: Matrix[float]): seq[Matrix[float]] =
## Feeds the given input through the network.
## All unactivated outputs from each layer are
@ -306,11 +311,12 @@ proc softmax(input: Matrix[float]): Matrix[float] =
var input = input - input.max()
result = input.apply(math.exp, axis = -1) / input.apply(math.exp, axis = -1).sum()
proc softmaxDerivative(input: Matrix[float]): Matrix[float] =
# I stole this too, by the way
var input = input.reshape(input.shape.cols, 1)
# I _love_ stealing functions from numpy!
result = input.diagflat() - input.dot(input.transpose())
result = (input.diagflat() - input.dot(input.transpose())).flatten( )
proc relu(input: Matrix[float]): Matrix[float] = input.apply(proc (x: float): float = max(0.0, x), axis = -1)
proc dxRelu(input: Matrix[float]): Matrix[float] = where(input > 0.0, ones[float](input.shape), 0)

View File

@ -552,7 +552,7 @@ proc `-`*[T](a, b: Matrix[T]): Matrix[T] =
result.data[] = newSeqOfCap[T](result.shape.getSize())
result.shape = a.shape
result.order = RowMajor
if result.shape.rows > 1:
if result.shape.rows >= 1:
for row in 0..<result.shape.rows:
for m in a[row] - b[row]:
for element in m:
@ -935,7 +935,7 @@ proc `$`*[T](self: Matrix[T]): string =
proc dot*[T](self, other: Matrix[T]): Matrix[T] =
## Computes the dot product of the two
## input matrices
if self.shape.rows > 1 and other.shape.rows > 1:
if self.shape.rows >= 1 and other.shape.rows >= 1:
when not defined(release):
if self.shape.rows != other.shape.cols:
raise newException(ValueError, &"incompatible argument shapes for dot product")
@ -944,14 +944,14 @@ proc dot*[T](self, other: Matrix[T]): Matrix[T] =
for i in 0..<result.shape.rows:
for j in 0..<result.shape.cols:
result[i, j] = (self[i] * other[j]).sum()
elif self.shape.rows > 1:
elif self.shape.rows >= 1:
when not defined(release):
if self.shape.cols != other.shape.cols:
raise newException(ValueError, &"incompatible argument shapes for dot product")
result = zeros[T]((0, self.shape.rows))
for i in 0..<result.shape.cols:
result[0, i] = (self[i] * other[0]).sum()
elif other.shape.rows > 1:
elif other.shape.rows >= 1:
return other.transpose().dot(self)
else:
return self * other