Fixed argmax

This commit is contained in:
Mattia Giambirtone 2023-03-28 10:28:05 +02:00
parent 844883ced7
commit 924eb8cf6d
Signed by: nocturn9x
GPG Key ID: 8270F9F467971E59
3 changed files with 38 additions and 36 deletions

View File

@ -85,8 +85,8 @@ proc main =
"isn", "isn't", "ma", "mightn", "mightn't", "mustn", "mustn't", "needn",
"needn't", "shan", "shan't", "shouldn", "shouldn't", "wasn", "wasn't",
"weren", "weren't", "won", "won't", "wouldn", "wouldn't"]
const epochs = 10
const batch = 200
const epochs = 100
const batch = 400
const inputSize = 512
let encoder = newLabelEncoder()
let cleaner = newTextPreprocessor(stopwords=newMatrix(stopwords), toLower=true,
@ -97,11 +97,11 @@ proc main =
newDenseLayer(8, 16),
newDenseLayer(16, 2)],
lossFunc=MSE,
activationFunc=Softmax,
learnRate=5,
momentum=0.3,
weightRange=(-10.0, 10.0),
biasRange=(-10.0, 10.0))
activationFunc=SiLU,
learnRate=0.001,
momentum=0.0,
weightRange=(-1.0, 1.0),
biasRange=(-1.0, 1.0))
echo "ProjectSydney v0.2b - Accuracy test"
echo "Loading dataset and testset"
let loadTime = cpuTime()
@ -159,9 +159,9 @@ proc main =
echo ""
echo &"Training neural network for {epochs} epochs with batch size of {batch}"
let trainTime = cpuTime()
classifier.train(epochs, batch, trainData, #[testData]#)
classifier.train(epochs, batch, trainData, testData)
echo &"Training completed in {cpuTime() - trainTime:.2f} seconds, running test"
echo "\nTest parameters"
echo "Test parameters"
echo &"\t- Test size: {len(data.testset)}"
let testTime = cpuTime()
var pred: seq[int] = @[]

View File

@ -106,6 +106,9 @@ proc newNeuralNetwork*(topology: seq[Layer], lossFunc: Loss, activationFunc: Act
## Weights and biases are initialized with
## random values in the chosen range using
## nim's default PRNG
when not defined(release):
if momentum > 1.0:
raise newException(ValueError, "momentum should not be greater than one")
new(result)
result.layers = topology
for layer in result.layers:
@ -139,7 +142,8 @@ proc fastFeedForward(self: NeuralNetwork, x: Matrix[float]): Matrix[float] =
## (activated) output from the last layer is returned
result = x
for layer in self.layers:
result = self.activation.function(layer.feed(result))
result = layer.feed(result)
result = self.activation.function(result)
proc predict*(self: NeuralNetwork, x: Matrix[float]): int =
@ -170,7 +174,7 @@ proc backprop(self: NeuralNetwork, x, y: Matrix[float]): tuple[weights, biases:
deltaW: seq[Matrix[float]] = @[]
deltaB: seq[Matrix[float]] = @[]
# Activations of each layer
activations: seq[Matrix[float]] = @[]
activations: seq[Matrix[float]] = @[x]
# Unactivated outputs of each layer
unactivated: seq[Matrix[float]] = @[]
@ -191,13 +195,13 @@ proc backprop(self: NeuralNetwork, x, y: Matrix[float]): tuple[weights, biases:
# have to do fancy calculus stuff to figure out the derivative)
var diff: Matrix[float] = self.loss.derivative(activations[^1], y) * self.activation.derivative(unactivated[^1])
deltaB[^1].replace(diff)
deltaW[^1].replace(activations[^2].transpose())
deltaW[^1].replace(diff.dot(activations[^2].transpose()))
# Backwards pass (actually the backwards pass began two lines earlier, we're just feeding
# the correction back through the rest of the network now)
for l in 1..<self.layers.high():
diff = self.layers[^l].weights.transpose().dot(diff) * self.activation.derivative(unactivated[^l])
for l in 2..self.layers.high():
diff = self.layers[^(l + 1)].weights.transpose().dot(diff) * self.activation.derivative(unactivated[^l])
deltaB[^l].replace(diff)
deltaW[^l].replace(diff.dot(activations[^l].transpose()))
deltaW[^l].replace(diff.dot(activations[^(l - 1)].transpose()))
return (deltaW, deltaB)
@ -205,21 +209,20 @@ proc miniBatch(self: NeuralNetwork, data: seq[tuple[x, y: Matrix[float]]]) =
## Performs a single mini-batch step in stochastic gradient
## descent and updates the network's weights and biases
## accordingly
var gradient: tuple[weights, biases: seq[Matrix[float]]]
# New weights and biases
var
# New weights and biases
weights: seq[Matrix[float]] = @[]
biases: seq[Matrix[float]] = @[]
gradient: tuple[weights, biases: seq[Matrix[float]]]
for layer in self.layers:
weights.add(zeros[float](layer.weights.shape))
biases.add(zeros[float](layer.biases.shape))
for dataPoint in data:
gradient = self.backprop(dataPoint.x, dataPoint.y)
for i, (currentBiases, newBiases) in zip(biases, gradient.biases):
biases[i] = currentBiases + newBiases
for i, (currentWeights, newWeights) in zip(weights, gradient.weights):
weights[i] = currentWeights + newWeights
for i, newBiases in gradient.biases:
biases[i].replace(newBiases)
for i, newWeights in gradient.weights:
weights[i].replace(newWeights)
# We use hyperparameters such as the learn rate and momentum
# to further control how fast (or slowly) the network converges
# onto a local minimum of the gradient of our loss function. To
@ -246,9 +249,9 @@ proc miniBatch(self: NeuralNetwork, data: seq[tuple[x, y: Matrix[float]]]) =
# overfitting by not letting the network train over the same data over and over
# again)
for (layer, newBiases) in zip(self.layers, biases):
layer.biases = (layer.biases - nudge) * newBiases
layer.biases = layer.biases - (newBiases * nudge)
for (layer, newWeights) in zip(self.layers, weights):
layer.weights = (layer.weights - nudge) * newWeights
layer.weights = layer.weights - (newWeights * nudge)
proc eval(self: NeuralNetwork, data: seq[tuple[x, y: Matrix[float]]]): float =

View File

@ -1045,18 +1045,17 @@ proc argmax*[T](self: Matrix[T]): int =
## into the matrix
var m: T = self[0, 0]
var
row = 0
col = 0
while row < self.shape.rows:
while col < self.shape.cols:
if self[row, col] > m:
m = self[row, col]
if self.shape.rows == 0:
while col < self.shape.cols:
if self[0, col] > m:
m = self[0, col]
inc(col)
return self.getIndex(row, col)
r = 0
c = 0
for i, row in self:
for j, e in row:
if e == m:
continue
elif e > m:
m = e
r = i
c = j
return self.getIndex(r, c)
proc contains*[T](self: Matrix[T], e: T): bool =