Fixed softmax derivative and dot product
This commit is contained in:
parent
338732e9f9
commit
844883ced7
65
src/main.nim
65
src/main.nim
|
@ -33,22 +33,23 @@ proc loadData: tuple[corpus, results, testset, testResults: Matrix[string]] =
|
|||
testResults: newMatrix(testResults))
|
||||
|
||||
|
||||
proc testMetrics(predictions, y: Matrix[float]): tuple[accuracy, precision, recall: float, tP, tN, fP, fN: int] =
|
||||
proc testMetrics(predictions, y: Matrix[int]): tuple[accuracy, precision, recall: float, tP, tN, fP, fN: int] =
|
||||
# Computes the test metrics given the predictions and the
|
||||
# expected results
|
||||
var confusion = @[0, 0, 0, 0] # TP, TN, FP, FN
|
||||
var success = 0
|
||||
var i = 0
|
||||
while i < y.len():
|
||||
inc(i)
|
||||
var predicted = predictions[0][i]
|
||||
var expected = y[0][i]
|
||||
if predicted == expected:
|
||||
success += 1
|
||||
if predicted == 1.0:
|
||||
if predicted == 1:
|
||||
confusion[0] += 1
|
||||
else:
|
||||
confusion[1] += 1
|
||||
elif predicted == 1.0 and expected == 0.0:
|
||||
elif predicted == 1 and expected == 0:
|
||||
confusion[2] += 1
|
||||
else:
|
||||
confusion[3] += 1
|
||||
|
@ -84,7 +85,7 @@ proc main =
|
|||
"isn", "isn't", "ma", "mightn", "mightn't", "mustn", "mustn't", "needn",
|
||||
"needn't", "shan", "shan't", "shouldn", "shouldn't", "wasn", "wasn't",
|
||||
"weren", "weren't", "won", "won't", "wouldn", "wouldn't"]
|
||||
const epochs = 100
|
||||
const epochs = 10
|
||||
const batch = 200
|
||||
const inputSize = 512
|
||||
let encoder = newLabelEncoder()
|
||||
|
@ -96,13 +97,13 @@ proc main =
|
|||
newDenseLayer(8, 16),
|
||||
newDenseLayer(16, 2)],
|
||||
lossFunc=MSE,
|
||||
activationFunc=ReLU,
|
||||
activationFunc=Softmax,
|
||||
learnRate=5,
|
||||
momentum=0.3,
|
||||
weightRange=(-10.0, 10.0),
|
||||
biasRange=(-10.0, 10.0))
|
||||
echo "ProjectSydney v0.2b - Accuracy test"
|
||||
echo "\nLoading dataset and testset"
|
||||
echo "Loading dataset and testset"
|
||||
let loadTime = cpuTime()
|
||||
let data = loadData()
|
||||
echo &"Data loaded in {cpuTime() - loadTime:.2f} seconds"
|
||||
|
@ -110,15 +111,16 @@ proc main =
|
|||
let vectorTime = cpuTime()
|
||||
let xTrain = vectorizer.fitTransform(data.corpus, data.corpus)
|
||||
let yTrain = encoder.fitTransform(data.results, data.results)[0]
|
||||
echo &"Vectorized in {cpuTime() - vectorTime:.2f} seconds"
|
||||
echo &"Feature count: {len(vectorizer.getFeatureNames())}"
|
||||
echo &"Vocabulary size: {len(vectorizer.getVocabulary())}"
|
||||
echo &"Corpus size: {len(data.corpus)}"
|
||||
echo &"Data vectorized in {cpuTime() - vectorTime:.2f} seconds"
|
||||
echo &"\t- Feature count: {len(vectorizer.getFeatureNames())}"
|
||||
echo &"\t- Vocabulary size: {len(vectorizer.getVocabulary())}"
|
||||
echo &"\t- Corpus size: {len(data.corpus)}"
|
||||
let yTest = encoder.transform(data.testResults)[0]
|
||||
let xTest = vectorizer.transform(data.testset)
|
||||
var tempData: seq[float] = newSeqOfCap[float](inputSize)
|
||||
var trainData: seq[tuple[x, y: Matrix[float]]] = @[]
|
||||
var testData: seq[tuple[x, y: Matrix[float]]] = @[]
|
||||
var testTruth: seq[int] = @[]
|
||||
# Pad the data to fit into the network
|
||||
for i, row in xTrain:
|
||||
for e in row:
|
||||
|
@ -140,14 +142,16 @@ proc main =
|
|||
while tempData.len() < inputSize:
|
||||
tempData.add(0.0)
|
||||
if yTest[i] == 1:
|
||||
testTruth.add(1)
|
||||
testData.add((newMatrix[float](tempData), newMatrix[float](@[1.0, 0.0])))
|
||||
else:
|
||||
testTruth.add(0)
|
||||
testData.add((newMatrix[float](tempData), newMatrix[float](@[0.0, 1.0])))
|
||||
tempData.setLen(0)
|
||||
echo "Classifier parameters"
|
||||
echo &"\tLearn rate: {classifier.learnRate}"
|
||||
echo &"\tMomentum: {classifier.momentum}"
|
||||
stdout.write("\tNetwork layout: ")
|
||||
echo &"\t- Learn rate: {classifier.learnRate}"
|
||||
echo &"\t- Momentum: {classifier.momentum}"
|
||||
stdout.write("\t- Network layout: ")
|
||||
for i, layer in classifier.layers:
|
||||
stdout.write(&"{layer.inputSize}x{layer.outputSize}")
|
||||
if i < classifier.layers.high():
|
||||
|
@ -155,24 +159,27 @@ proc main =
|
|||
echo ""
|
||||
echo &"Training neural network for {epochs} epochs with batch size of {batch}"
|
||||
let trainTime = cpuTime()
|
||||
classifier.train(epochs, batch, trainData, testData)
|
||||
echo &"Training completed in {cpuTime() - trainTime:.2f} seconds"
|
||||
#[echo "\nTest parameters"
|
||||
echo &"\tTest size: {len(data.testset)}"
|
||||
classifier.train(epochs, batch, trainData, #[testData]#)
|
||||
echo &"Training completed in {cpuTime() - trainTime:.2f} seconds, running test"
|
||||
echo "\nTest parameters"
|
||||
echo &"\t- Test size: {len(data.testset)}"
|
||||
let testTime = cpuTime()
|
||||
let predictions = classifier.fastFeedForward(xTest)
|
||||
let metrics = testMetrics(predictions, y_test)
|
||||
echo &"\nTest completed in {cpuTime() - testTime:.2f} seconds, metrics below"
|
||||
echo &"\tAccuracy: {metrics.accuracy * 100:.2f}%"
|
||||
echo &"\tRecall: {metrics.recall:.2f}"
|
||||
echo &"\tPrecision: {metrics.precision:.2f}"
|
||||
echo &"\tF1-score: {pow((pow(metrics.precision, -1) + pow(metrics.recall, -1)) / 2, -1):.2f}"
|
||||
var pred: seq[int] = @[]
|
||||
for sample in xTest:
|
||||
pred.add(classifier.predict(sample.copy()))
|
||||
let predictions = newMatrix[int](pred)
|
||||
let metrics = testMetrics(predictions, newMatrix[int](testTruth))
|
||||
echo &"Test completed in {cpuTime() - testTime:.2f} seconds, metrics below"
|
||||
echo &"\t- Accuracy: {metrics.accuracy * 100:.2f}%"
|
||||
echo &"\t- Recall: {metrics.recall:.2f}"
|
||||
echo &"\t- Precision: {metrics.precision:.2f}"
|
||||
echo &"\t- F1-score: {pow((pow(metrics.precision, -1) + pow(metrics.recall, -1)) / 2, -1):.2f}"
|
||||
echo "\tConfusion matrix"
|
||||
echo &"\t\tTrue positives: {metrics.tP}"
|
||||
echo &"\t\tTrue negatives: {metrics.tN}"
|
||||
echo &"\t\tFalse negatives: {metrics.fN}"
|
||||
echo &"\t\tFalse positives: {metrics.fP}"
|
||||
]#
|
||||
echo &"\t\t- True positives: {metrics.tP}"
|
||||
echo &"\t\t- True negatives: {metrics.tN}"
|
||||
echo &"\t\t- False negatives: {metrics.fN}"
|
||||
echo &"\t\t- False positives: {metrics.fP}"
|
||||
|
||||
|
||||
|
||||
when isMainModule:
|
||||
|
|
|
@ -134,7 +134,7 @@ proc feed(self: Layer, x: Matrix[float]): Matrix[float] =
|
|||
result = self.weights.dot(x) + self.biases
|
||||
|
||||
|
||||
proc fastFeedForward(self: NeuralNetwork, x: Matrix[float]): Matrix[float] {.used.} =
|
||||
proc fastFeedForward(self: NeuralNetwork, x: Matrix[float]): Matrix[float] =
|
||||
## Feeds the given input through the network. The
|
||||
## (activated) output from the last layer is returned
|
||||
result = x
|
||||
|
@ -142,6 +142,11 @@ proc fastFeedForward(self: NeuralNetwork, x: Matrix[float]): Matrix[float] {.use
|
|||
result = self.activation.function(layer.feed(result))
|
||||
|
||||
|
||||
proc predict*(self: NeuralNetwork, x: Matrix[float]): int =
|
||||
## Performs a prediction on x
|
||||
result = self.fastFeedForward(x).argmax()
|
||||
|
||||
|
||||
proc feedForward(self: NeuralNetwork, x: Matrix[float]): seq[Matrix[float]] =
|
||||
## Feeds the given input through the network.
|
||||
## All unactivated outputs from each layer are
|
||||
|
@ -306,11 +311,12 @@ proc softmax(input: Matrix[float]): Matrix[float] =
|
|||
var input = input - input.max()
|
||||
result = input.apply(math.exp, axis = -1) / input.apply(math.exp, axis = -1).sum()
|
||||
|
||||
|
||||
proc softmaxDerivative(input: Matrix[float]): Matrix[float] =
|
||||
# I stole this too, by the way
|
||||
var input = input.reshape(input.shape.cols, 1)
|
||||
# I _love_ stealing functions from numpy!
|
||||
result = input.diagflat() - input.dot(input.transpose())
|
||||
result = (input.diagflat() - input.dot(input.transpose())).flatten( )
|
||||
|
||||
proc relu(input: Matrix[float]): Matrix[float] = input.apply(proc (x: float): float = max(0.0, x), axis = -1)
|
||||
proc dxRelu(input: Matrix[float]): Matrix[float] = where(input > 0.0, ones[float](input.shape), 0)
|
||||
|
|
|
@ -552,7 +552,7 @@ proc `-`*[T](a, b: Matrix[T]): Matrix[T] =
|
|||
result.data[] = newSeqOfCap[T](result.shape.getSize())
|
||||
result.shape = a.shape
|
||||
result.order = RowMajor
|
||||
if result.shape.rows > 1:
|
||||
if result.shape.rows >= 1:
|
||||
for row in 0..<result.shape.rows:
|
||||
for m in a[row] - b[row]:
|
||||
for element in m:
|
||||
|
@ -935,7 +935,7 @@ proc `$`*[T](self: Matrix[T]): string =
|
|||
proc dot*[T](self, other: Matrix[T]): Matrix[T] =
|
||||
## Computes the dot product of the two
|
||||
## input matrices
|
||||
if self.shape.rows > 1 and other.shape.rows > 1:
|
||||
if self.shape.rows >= 1 and other.shape.rows >= 1:
|
||||
when not defined(release):
|
||||
if self.shape.rows != other.shape.cols:
|
||||
raise newException(ValueError, &"incompatible argument shapes for dot product")
|
||||
|
@ -944,14 +944,14 @@ proc dot*[T](self, other: Matrix[T]): Matrix[T] =
|
|||
for i in 0..<result.shape.rows:
|
||||
for j in 0..<result.shape.cols:
|
||||
result[i, j] = (self[i] * other[j]).sum()
|
||||
elif self.shape.rows > 1:
|
||||
elif self.shape.rows >= 1:
|
||||
when not defined(release):
|
||||
if self.shape.cols != other.shape.cols:
|
||||
raise newException(ValueError, &"incompatible argument shapes for dot product")
|
||||
result = zeros[T]((0, self.shape.rows))
|
||||
for i in 0..<result.shape.cols:
|
||||
result[0, i] = (self[i] * other[0]).sum()
|
||||
elif other.shape.rows > 1:
|
||||
elif other.shape.rows >= 1:
|
||||
return other.transpose().dot(self)
|
||||
else:
|
||||
return self * other
|
||||
|
|
Loading…
Reference in New Issue