Fixed softmax derivative and dot product
This commit is contained in:
parent
338732e9f9
commit
844883ced7
65
src/main.nim
65
src/main.nim
|
@ -33,22 +33,23 @@ proc loadData: tuple[corpus, results, testset, testResults: Matrix[string]] =
|
||||||
testResults: newMatrix(testResults))
|
testResults: newMatrix(testResults))
|
||||||
|
|
||||||
|
|
||||||
proc testMetrics(predictions, y: Matrix[float]): tuple[accuracy, precision, recall: float, tP, tN, fP, fN: int] =
|
proc testMetrics(predictions, y: Matrix[int]): tuple[accuracy, precision, recall: float, tP, tN, fP, fN: int] =
|
||||||
# Computes the test metrics given the predictions and the
|
# Computes the test metrics given the predictions and the
|
||||||
# expected results
|
# expected results
|
||||||
var confusion = @[0, 0, 0, 0] # TP, TN, FP, FN
|
var confusion = @[0, 0, 0, 0] # TP, TN, FP, FN
|
||||||
var success = 0
|
var success = 0
|
||||||
var i = 0
|
var i = 0
|
||||||
while i < y.len():
|
while i < y.len():
|
||||||
|
inc(i)
|
||||||
var predicted = predictions[0][i]
|
var predicted = predictions[0][i]
|
||||||
var expected = y[0][i]
|
var expected = y[0][i]
|
||||||
if predicted == expected:
|
if predicted == expected:
|
||||||
success += 1
|
success += 1
|
||||||
if predicted == 1.0:
|
if predicted == 1:
|
||||||
confusion[0] += 1
|
confusion[0] += 1
|
||||||
else:
|
else:
|
||||||
confusion[1] += 1
|
confusion[1] += 1
|
||||||
elif predicted == 1.0 and expected == 0.0:
|
elif predicted == 1 and expected == 0:
|
||||||
confusion[2] += 1
|
confusion[2] += 1
|
||||||
else:
|
else:
|
||||||
confusion[3] += 1
|
confusion[3] += 1
|
||||||
|
@ -84,7 +85,7 @@ proc main =
|
||||||
"isn", "isn't", "ma", "mightn", "mightn't", "mustn", "mustn't", "needn",
|
"isn", "isn't", "ma", "mightn", "mightn't", "mustn", "mustn't", "needn",
|
||||||
"needn't", "shan", "shan't", "shouldn", "shouldn't", "wasn", "wasn't",
|
"needn't", "shan", "shan't", "shouldn", "shouldn't", "wasn", "wasn't",
|
||||||
"weren", "weren't", "won", "won't", "wouldn", "wouldn't"]
|
"weren", "weren't", "won", "won't", "wouldn", "wouldn't"]
|
||||||
const epochs = 100
|
const epochs = 10
|
||||||
const batch = 200
|
const batch = 200
|
||||||
const inputSize = 512
|
const inputSize = 512
|
||||||
let encoder = newLabelEncoder()
|
let encoder = newLabelEncoder()
|
||||||
|
@ -96,13 +97,13 @@ proc main =
|
||||||
newDenseLayer(8, 16),
|
newDenseLayer(8, 16),
|
||||||
newDenseLayer(16, 2)],
|
newDenseLayer(16, 2)],
|
||||||
lossFunc=MSE,
|
lossFunc=MSE,
|
||||||
activationFunc=ReLU,
|
activationFunc=Softmax,
|
||||||
learnRate=5,
|
learnRate=5,
|
||||||
momentum=0.3,
|
momentum=0.3,
|
||||||
weightRange=(-10.0, 10.0),
|
weightRange=(-10.0, 10.0),
|
||||||
biasRange=(-10.0, 10.0))
|
biasRange=(-10.0, 10.0))
|
||||||
echo "ProjectSydney v0.2b - Accuracy test"
|
echo "ProjectSydney v0.2b - Accuracy test"
|
||||||
echo "\nLoading dataset and testset"
|
echo "Loading dataset and testset"
|
||||||
let loadTime = cpuTime()
|
let loadTime = cpuTime()
|
||||||
let data = loadData()
|
let data = loadData()
|
||||||
echo &"Data loaded in {cpuTime() - loadTime:.2f} seconds"
|
echo &"Data loaded in {cpuTime() - loadTime:.2f} seconds"
|
||||||
|
@ -110,15 +111,16 @@ proc main =
|
||||||
let vectorTime = cpuTime()
|
let vectorTime = cpuTime()
|
||||||
let xTrain = vectorizer.fitTransform(data.corpus, data.corpus)
|
let xTrain = vectorizer.fitTransform(data.corpus, data.corpus)
|
||||||
let yTrain = encoder.fitTransform(data.results, data.results)[0]
|
let yTrain = encoder.fitTransform(data.results, data.results)[0]
|
||||||
echo &"Vectorized in {cpuTime() - vectorTime:.2f} seconds"
|
echo &"Data vectorized in {cpuTime() - vectorTime:.2f} seconds"
|
||||||
echo &"Feature count: {len(vectorizer.getFeatureNames())}"
|
echo &"\t- Feature count: {len(vectorizer.getFeatureNames())}"
|
||||||
echo &"Vocabulary size: {len(vectorizer.getVocabulary())}"
|
echo &"\t- Vocabulary size: {len(vectorizer.getVocabulary())}"
|
||||||
echo &"Corpus size: {len(data.corpus)}"
|
echo &"\t- Corpus size: {len(data.corpus)}"
|
||||||
let yTest = encoder.transform(data.testResults)[0]
|
let yTest = encoder.transform(data.testResults)[0]
|
||||||
let xTest = vectorizer.transform(data.testset)
|
let xTest = vectorizer.transform(data.testset)
|
||||||
var tempData: seq[float] = newSeqOfCap[float](inputSize)
|
var tempData: seq[float] = newSeqOfCap[float](inputSize)
|
||||||
var trainData: seq[tuple[x, y: Matrix[float]]] = @[]
|
var trainData: seq[tuple[x, y: Matrix[float]]] = @[]
|
||||||
var testData: seq[tuple[x, y: Matrix[float]]] = @[]
|
var testData: seq[tuple[x, y: Matrix[float]]] = @[]
|
||||||
|
var testTruth: seq[int] = @[]
|
||||||
# Pad the data to fit into the network
|
# Pad the data to fit into the network
|
||||||
for i, row in xTrain:
|
for i, row in xTrain:
|
||||||
for e in row:
|
for e in row:
|
||||||
|
@ -140,14 +142,16 @@ proc main =
|
||||||
while tempData.len() < inputSize:
|
while tempData.len() < inputSize:
|
||||||
tempData.add(0.0)
|
tempData.add(0.0)
|
||||||
if yTest[i] == 1:
|
if yTest[i] == 1:
|
||||||
|
testTruth.add(1)
|
||||||
testData.add((newMatrix[float](tempData), newMatrix[float](@[1.0, 0.0])))
|
testData.add((newMatrix[float](tempData), newMatrix[float](@[1.0, 0.0])))
|
||||||
else:
|
else:
|
||||||
|
testTruth.add(0)
|
||||||
testData.add((newMatrix[float](tempData), newMatrix[float](@[0.0, 1.0])))
|
testData.add((newMatrix[float](tempData), newMatrix[float](@[0.0, 1.0])))
|
||||||
tempData.setLen(0)
|
tempData.setLen(0)
|
||||||
echo "Classifier parameters"
|
echo "Classifier parameters"
|
||||||
echo &"\tLearn rate: {classifier.learnRate}"
|
echo &"\t- Learn rate: {classifier.learnRate}"
|
||||||
echo &"\tMomentum: {classifier.momentum}"
|
echo &"\t- Momentum: {classifier.momentum}"
|
||||||
stdout.write("\tNetwork layout: ")
|
stdout.write("\t- Network layout: ")
|
||||||
for i, layer in classifier.layers:
|
for i, layer in classifier.layers:
|
||||||
stdout.write(&"{layer.inputSize}x{layer.outputSize}")
|
stdout.write(&"{layer.inputSize}x{layer.outputSize}")
|
||||||
if i < classifier.layers.high():
|
if i < classifier.layers.high():
|
||||||
|
@ -155,24 +159,27 @@ proc main =
|
||||||
echo ""
|
echo ""
|
||||||
echo &"Training neural network for {epochs} epochs with batch size of {batch}"
|
echo &"Training neural network for {epochs} epochs with batch size of {batch}"
|
||||||
let trainTime = cpuTime()
|
let trainTime = cpuTime()
|
||||||
classifier.train(epochs, batch, trainData, testData)
|
classifier.train(epochs, batch, trainData, #[testData]#)
|
||||||
echo &"Training completed in {cpuTime() - trainTime:.2f} seconds"
|
echo &"Training completed in {cpuTime() - trainTime:.2f} seconds, running test"
|
||||||
#[echo "\nTest parameters"
|
echo "\nTest parameters"
|
||||||
echo &"\tTest size: {len(data.testset)}"
|
echo &"\t- Test size: {len(data.testset)}"
|
||||||
let testTime = cpuTime()
|
let testTime = cpuTime()
|
||||||
let predictions = classifier.fastFeedForward(xTest)
|
var pred: seq[int] = @[]
|
||||||
let metrics = testMetrics(predictions, y_test)
|
for sample in xTest:
|
||||||
echo &"\nTest completed in {cpuTime() - testTime:.2f} seconds, metrics below"
|
pred.add(classifier.predict(sample.copy()))
|
||||||
echo &"\tAccuracy: {metrics.accuracy * 100:.2f}%"
|
let predictions = newMatrix[int](pred)
|
||||||
echo &"\tRecall: {metrics.recall:.2f}"
|
let metrics = testMetrics(predictions, newMatrix[int](testTruth))
|
||||||
echo &"\tPrecision: {metrics.precision:.2f}"
|
echo &"Test completed in {cpuTime() - testTime:.2f} seconds, metrics below"
|
||||||
echo &"\tF1-score: {pow((pow(metrics.precision, -1) + pow(metrics.recall, -1)) / 2, -1):.2f}"
|
echo &"\t- Accuracy: {metrics.accuracy * 100:.2f}%"
|
||||||
|
echo &"\t- Recall: {metrics.recall:.2f}"
|
||||||
|
echo &"\t- Precision: {metrics.precision:.2f}"
|
||||||
|
echo &"\t- F1-score: {pow((pow(metrics.precision, -1) + pow(metrics.recall, -1)) / 2, -1):.2f}"
|
||||||
echo "\tConfusion matrix"
|
echo "\tConfusion matrix"
|
||||||
echo &"\t\tTrue positives: {metrics.tP}"
|
echo &"\t\t- True positives: {metrics.tP}"
|
||||||
echo &"\t\tTrue negatives: {metrics.tN}"
|
echo &"\t\t- True negatives: {metrics.tN}"
|
||||||
echo &"\t\tFalse negatives: {metrics.fN}"
|
echo &"\t\t- False negatives: {metrics.fN}"
|
||||||
echo &"\t\tFalse positives: {metrics.fP}"
|
echo &"\t\t- False positives: {metrics.fP}"
|
||||||
]#
|
|
||||||
|
|
||||||
|
|
||||||
when isMainModule:
|
when isMainModule:
|
||||||
|
|
|
@ -134,7 +134,7 @@ proc feed(self: Layer, x: Matrix[float]): Matrix[float] =
|
||||||
result = self.weights.dot(x) + self.biases
|
result = self.weights.dot(x) + self.biases
|
||||||
|
|
||||||
|
|
||||||
proc fastFeedForward(self: NeuralNetwork, x: Matrix[float]): Matrix[float] {.used.} =
|
proc fastFeedForward(self: NeuralNetwork, x: Matrix[float]): Matrix[float] =
|
||||||
## Feeds the given input through the network. The
|
## Feeds the given input through the network. The
|
||||||
## (activated) output from the last layer is returned
|
## (activated) output from the last layer is returned
|
||||||
result = x
|
result = x
|
||||||
|
@ -142,6 +142,11 @@ proc fastFeedForward(self: NeuralNetwork, x: Matrix[float]): Matrix[float] {.use
|
||||||
result = self.activation.function(layer.feed(result))
|
result = self.activation.function(layer.feed(result))
|
||||||
|
|
||||||
|
|
||||||
|
proc predict*(self: NeuralNetwork, x: Matrix[float]): int =
|
||||||
|
## Performs a prediction on x
|
||||||
|
result = self.fastFeedForward(x).argmax()
|
||||||
|
|
||||||
|
|
||||||
proc feedForward(self: NeuralNetwork, x: Matrix[float]): seq[Matrix[float]] =
|
proc feedForward(self: NeuralNetwork, x: Matrix[float]): seq[Matrix[float]] =
|
||||||
## Feeds the given input through the network.
|
## Feeds the given input through the network.
|
||||||
## All unactivated outputs from each layer are
|
## All unactivated outputs from each layer are
|
||||||
|
@ -306,11 +311,12 @@ proc softmax(input: Matrix[float]): Matrix[float] =
|
||||||
var input = input - input.max()
|
var input = input - input.max()
|
||||||
result = input.apply(math.exp, axis = -1) / input.apply(math.exp, axis = -1).sum()
|
result = input.apply(math.exp, axis = -1) / input.apply(math.exp, axis = -1).sum()
|
||||||
|
|
||||||
|
|
||||||
proc softmaxDerivative(input: Matrix[float]): Matrix[float] =
|
proc softmaxDerivative(input: Matrix[float]): Matrix[float] =
|
||||||
# I stole this too, by the way
|
# I stole this too, by the way
|
||||||
var input = input.reshape(input.shape.cols, 1)
|
var input = input.reshape(input.shape.cols, 1)
|
||||||
# I _love_ stealing functions from numpy!
|
# I _love_ stealing functions from numpy!
|
||||||
result = input.diagflat() - input.dot(input.transpose())
|
result = (input.diagflat() - input.dot(input.transpose())).flatten( )
|
||||||
|
|
||||||
proc relu(input: Matrix[float]): Matrix[float] = input.apply(proc (x: float): float = max(0.0, x), axis = -1)
|
proc relu(input: Matrix[float]): Matrix[float] = input.apply(proc (x: float): float = max(0.0, x), axis = -1)
|
||||||
proc dxRelu(input: Matrix[float]): Matrix[float] = where(input > 0.0, ones[float](input.shape), 0)
|
proc dxRelu(input: Matrix[float]): Matrix[float] = where(input > 0.0, ones[float](input.shape), 0)
|
||||||
|
|
|
@ -552,7 +552,7 @@ proc `-`*[T](a, b: Matrix[T]): Matrix[T] =
|
||||||
result.data[] = newSeqOfCap[T](result.shape.getSize())
|
result.data[] = newSeqOfCap[T](result.shape.getSize())
|
||||||
result.shape = a.shape
|
result.shape = a.shape
|
||||||
result.order = RowMajor
|
result.order = RowMajor
|
||||||
if result.shape.rows > 1:
|
if result.shape.rows >= 1:
|
||||||
for row in 0..<result.shape.rows:
|
for row in 0..<result.shape.rows:
|
||||||
for m in a[row] - b[row]:
|
for m in a[row] - b[row]:
|
||||||
for element in m:
|
for element in m:
|
||||||
|
@ -935,7 +935,7 @@ proc `$`*[T](self: Matrix[T]): string =
|
||||||
proc dot*[T](self, other: Matrix[T]): Matrix[T] =
|
proc dot*[T](self, other: Matrix[T]): Matrix[T] =
|
||||||
## Computes the dot product of the two
|
## Computes the dot product of the two
|
||||||
## input matrices
|
## input matrices
|
||||||
if self.shape.rows > 1 and other.shape.rows > 1:
|
if self.shape.rows >= 1 and other.shape.rows >= 1:
|
||||||
when not defined(release):
|
when not defined(release):
|
||||||
if self.shape.rows != other.shape.cols:
|
if self.shape.rows != other.shape.cols:
|
||||||
raise newException(ValueError, &"incompatible argument shapes for dot product")
|
raise newException(ValueError, &"incompatible argument shapes for dot product")
|
||||||
|
@ -944,14 +944,14 @@ proc dot*[T](self, other: Matrix[T]): Matrix[T] =
|
||||||
for i in 0..<result.shape.rows:
|
for i in 0..<result.shape.rows:
|
||||||
for j in 0..<result.shape.cols:
|
for j in 0..<result.shape.cols:
|
||||||
result[i, j] = (self[i] * other[j]).sum()
|
result[i, j] = (self[i] * other[j]).sum()
|
||||||
elif self.shape.rows > 1:
|
elif self.shape.rows >= 1:
|
||||||
when not defined(release):
|
when not defined(release):
|
||||||
if self.shape.cols != other.shape.cols:
|
if self.shape.cols != other.shape.cols:
|
||||||
raise newException(ValueError, &"incompatible argument shapes for dot product")
|
raise newException(ValueError, &"incompatible argument shapes for dot product")
|
||||||
result = zeros[T]((0, self.shape.rows))
|
result = zeros[T]((0, self.shape.rows))
|
||||||
for i in 0..<result.shape.cols:
|
for i in 0..<result.shape.cols:
|
||||||
result[0, i] = (self[i] * other[0]).sum()
|
result[0, i] = (self[i] * other[0]).sum()
|
||||||
elif other.shape.rows > 1:
|
elif other.shape.rows >= 1:
|
||||||
return other.transpose().dot(self)
|
return other.transpose().dot(self)
|
||||||
else:
|
else:
|
||||||
return self * other
|
return self * other
|
||||||
|
|
Loading…
Reference in New Issue