Fixed argmax
This commit is contained in:
parent
844883ced7
commit
924eb8cf6d
18
src/main.nim
18
src/main.nim
|
@ -85,8 +85,8 @@ proc main =
|
||||||
"isn", "isn't", "ma", "mightn", "mightn't", "mustn", "mustn't", "needn",
|
"isn", "isn't", "ma", "mightn", "mightn't", "mustn", "mustn't", "needn",
|
||||||
"needn't", "shan", "shan't", "shouldn", "shouldn't", "wasn", "wasn't",
|
"needn't", "shan", "shan't", "shouldn", "shouldn't", "wasn", "wasn't",
|
||||||
"weren", "weren't", "won", "won't", "wouldn", "wouldn't"]
|
"weren", "weren't", "won", "won't", "wouldn", "wouldn't"]
|
||||||
const epochs = 10
|
const epochs = 100
|
||||||
const batch = 200
|
const batch = 400
|
||||||
const inputSize = 512
|
const inputSize = 512
|
||||||
let encoder = newLabelEncoder()
|
let encoder = newLabelEncoder()
|
||||||
let cleaner = newTextPreprocessor(stopwords=newMatrix(stopwords), toLower=true,
|
let cleaner = newTextPreprocessor(stopwords=newMatrix(stopwords), toLower=true,
|
||||||
|
@ -97,11 +97,11 @@ proc main =
|
||||||
newDenseLayer(8, 16),
|
newDenseLayer(8, 16),
|
||||||
newDenseLayer(16, 2)],
|
newDenseLayer(16, 2)],
|
||||||
lossFunc=MSE,
|
lossFunc=MSE,
|
||||||
activationFunc=Softmax,
|
activationFunc=SiLU,
|
||||||
learnRate=5,
|
learnRate=0.001,
|
||||||
momentum=0.3,
|
momentum=0.0,
|
||||||
weightRange=(-10.0, 10.0),
|
weightRange=(-1.0, 1.0),
|
||||||
biasRange=(-10.0, 10.0))
|
biasRange=(-1.0, 1.0))
|
||||||
echo "ProjectSydney v0.2b - Accuracy test"
|
echo "ProjectSydney v0.2b - Accuracy test"
|
||||||
echo "Loading dataset and testset"
|
echo "Loading dataset and testset"
|
||||||
let loadTime = cpuTime()
|
let loadTime = cpuTime()
|
||||||
|
@ -159,9 +159,9 @@ proc main =
|
||||||
echo ""
|
echo ""
|
||||||
echo &"Training neural network for {epochs} epochs with batch size of {batch}"
|
echo &"Training neural network for {epochs} epochs with batch size of {batch}"
|
||||||
let trainTime = cpuTime()
|
let trainTime = cpuTime()
|
||||||
classifier.train(epochs, batch, trainData, #[testData]#)
|
classifier.train(epochs, batch, trainData, testData)
|
||||||
echo &"Training completed in {cpuTime() - trainTime:.2f} seconds, running test"
|
echo &"Training completed in {cpuTime() - trainTime:.2f} seconds, running test"
|
||||||
echo "\nTest parameters"
|
echo "Test parameters"
|
||||||
echo &"\t- Test size: {len(data.testset)}"
|
echo &"\t- Test size: {len(data.testset)}"
|
||||||
let testTime = cpuTime()
|
let testTime = cpuTime()
|
||||||
var pred: seq[int] = @[]
|
var pred: seq[int] = @[]
|
||||||
|
|
|
@ -106,6 +106,9 @@ proc newNeuralNetwork*(topology: seq[Layer], lossFunc: Loss, activationFunc: Act
|
||||||
## Weights and biases are initialized with
|
## Weights and biases are initialized with
|
||||||
## random values in the chosen range using
|
## random values in the chosen range using
|
||||||
## nim's default PRNG
|
## nim's default PRNG
|
||||||
|
when not defined(release):
|
||||||
|
if momentum > 1.0:
|
||||||
|
raise newException(ValueError, "momentum should not be greater than one")
|
||||||
new(result)
|
new(result)
|
||||||
result.layers = topology
|
result.layers = topology
|
||||||
for layer in result.layers:
|
for layer in result.layers:
|
||||||
|
@ -139,7 +142,8 @@ proc fastFeedForward(self: NeuralNetwork, x: Matrix[float]): Matrix[float] =
|
||||||
## (activated) output from the last layer is returned
|
## (activated) output from the last layer is returned
|
||||||
result = x
|
result = x
|
||||||
for layer in self.layers:
|
for layer in self.layers:
|
||||||
result = self.activation.function(layer.feed(result))
|
result = layer.feed(result)
|
||||||
|
result = self.activation.function(result)
|
||||||
|
|
||||||
|
|
||||||
proc predict*(self: NeuralNetwork, x: Matrix[float]): int =
|
proc predict*(self: NeuralNetwork, x: Matrix[float]): int =
|
||||||
|
@ -170,7 +174,7 @@ proc backprop(self: NeuralNetwork, x, y: Matrix[float]): tuple[weights, biases:
|
||||||
deltaW: seq[Matrix[float]] = @[]
|
deltaW: seq[Matrix[float]] = @[]
|
||||||
deltaB: seq[Matrix[float]] = @[]
|
deltaB: seq[Matrix[float]] = @[]
|
||||||
# Activations of each layer
|
# Activations of each layer
|
||||||
activations: seq[Matrix[float]] = @[]
|
activations: seq[Matrix[float]] = @[x]
|
||||||
# Unactivated outputs of each layer
|
# Unactivated outputs of each layer
|
||||||
unactivated: seq[Matrix[float]] = @[]
|
unactivated: seq[Matrix[float]] = @[]
|
||||||
|
|
||||||
|
@ -191,13 +195,13 @@ proc backprop(self: NeuralNetwork, x, y: Matrix[float]): tuple[weights, biases:
|
||||||
# have to do fancy calculus stuff to figure out the derivative)
|
# have to do fancy calculus stuff to figure out the derivative)
|
||||||
var diff: Matrix[float] = self.loss.derivative(activations[^1], y) * self.activation.derivative(unactivated[^1])
|
var diff: Matrix[float] = self.loss.derivative(activations[^1], y) * self.activation.derivative(unactivated[^1])
|
||||||
deltaB[^1].replace(diff)
|
deltaB[^1].replace(diff)
|
||||||
deltaW[^1].replace(activations[^2].transpose())
|
deltaW[^1].replace(diff.dot(activations[^2].transpose()))
|
||||||
# Backwards pass (actually the backwards pass began two lines earlier, we're just feeding
|
# Backwards pass (actually the backwards pass began two lines earlier, we're just feeding
|
||||||
# the correction back through the rest of the network now)
|
# the correction back through the rest of the network now)
|
||||||
for l in 1..<self.layers.high():
|
for l in 2..self.layers.high():
|
||||||
diff = self.layers[^l].weights.transpose().dot(diff) * self.activation.derivative(unactivated[^l])
|
diff = self.layers[^(l + 1)].weights.transpose().dot(diff) * self.activation.derivative(unactivated[^l])
|
||||||
deltaB[^l].replace(diff)
|
deltaB[^l].replace(diff)
|
||||||
deltaW[^l].replace(diff.dot(activations[^l].transpose()))
|
deltaW[^l].replace(diff.dot(activations[^(l - 1)].transpose()))
|
||||||
return (deltaW, deltaB)
|
return (deltaW, deltaB)
|
||||||
|
|
||||||
|
|
||||||
|
@ -205,21 +209,20 @@ proc miniBatch(self: NeuralNetwork, data: seq[tuple[x, y: Matrix[float]]]) =
|
||||||
## Performs a single mini-batch step in stochastic gradient
|
## Performs a single mini-batch step in stochastic gradient
|
||||||
## descent and updates the network's weights and biases
|
## descent and updates the network's weights and biases
|
||||||
## accordingly
|
## accordingly
|
||||||
var gradient: tuple[weights, biases: seq[Matrix[float]]]
|
|
||||||
# New weights and biases
|
|
||||||
var
|
var
|
||||||
|
# New weights and biases
|
||||||
weights: seq[Matrix[float]] = @[]
|
weights: seq[Matrix[float]] = @[]
|
||||||
biases: seq[Matrix[float]] = @[]
|
biases: seq[Matrix[float]] = @[]
|
||||||
|
gradient: tuple[weights, biases: seq[Matrix[float]]]
|
||||||
for layer in self.layers:
|
for layer in self.layers:
|
||||||
weights.add(zeros[float](layer.weights.shape))
|
weights.add(zeros[float](layer.weights.shape))
|
||||||
biases.add(zeros[float](layer.biases.shape))
|
biases.add(zeros[float](layer.biases.shape))
|
||||||
for dataPoint in data:
|
for dataPoint in data:
|
||||||
gradient = self.backprop(dataPoint.x, dataPoint.y)
|
gradient = self.backprop(dataPoint.x, dataPoint.y)
|
||||||
for i, (currentBiases, newBiases) in zip(biases, gradient.biases):
|
for i, newBiases in gradient.biases:
|
||||||
biases[i] = currentBiases + newBiases
|
biases[i].replace(newBiases)
|
||||||
for i, (currentWeights, newWeights) in zip(weights, gradient.weights):
|
for i, newWeights in gradient.weights:
|
||||||
weights[i] = currentWeights + newWeights
|
weights[i].replace(newWeights)
|
||||||
|
|
||||||
# We use hyperparameters such as the learn rate and momentum
|
# We use hyperparameters such as the learn rate and momentum
|
||||||
# to further control how fast (or slowly) the network converges
|
# to further control how fast (or slowly) the network converges
|
||||||
# onto a local minimum of the gradient of our loss function. To
|
# onto a local minimum of the gradient of our loss function. To
|
||||||
|
@ -246,9 +249,9 @@ proc miniBatch(self: NeuralNetwork, data: seq[tuple[x, y: Matrix[float]]]) =
|
||||||
# overfitting by not letting the network train over the same data over and over
|
# overfitting by not letting the network train over the same data over and over
|
||||||
# again)
|
# again)
|
||||||
for (layer, newBiases) in zip(self.layers, biases):
|
for (layer, newBiases) in zip(self.layers, biases):
|
||||||
layer.biases = (layer.biases - nudge) * newBiases
|
layer.biases = layer.biases - (newBiases * nudge)
|
||||||
for (layer, newWeights) in zip(self.layers, weights):
|
for (layer, newWeights) in zip(self.layers, weights):
|
||||||
layer.weights = (layer.weights - nudge) * newWeights
|
layer.weights = layer.weights - (newWeights * nudge)
|
||||||
|
|
||||||
|
|
||||||
proc eval(self: NeuralNetwork, data: seq[tuple[x, y: Matrix[float]]]): float =
|
proc eval(self: NeuralNetwork, data: seq[tuple[x, y: Matrix[float]]]): float =
|
||||||
|
|
|
@ -1045,18 +1045,17 @@ proc argmax*[T](self: Matrix[T]): int =
|
||||||
## into the matrix
|
## into the matrix
|
||||||
var m: T = self[0, 0]
|
var m: T = self[0, 0]
|
||||||
var
|
var
|
||||||
row = 0
|
r = 0
|
||||||
col = 0
|
c = 0
|
||||||
while row < self.shape.rows:
|
for i, row in self:
|
||||||
while col < self.shape.cols:
|
for j, e in row:
|
||||||
if self[row, col] > m:
|
if e == m:
|
||||||
m = self[row, col]
|
continue
|
||||||
if self.shape.rows == 0:
|
elif e > m:
|
||||||
while col < self.shape.cols:
|
m = e
|
||||||
if self[0, col] > m:
|
r = i
|
||||||
m = self[0, col]
|
c = j
|
||||||
inc(col)
|
return self.getIndex(r, c)
|
||||||
return self.getIndex(row, col)
|
|
||||||
|
|
||||||
|
|
||||||
proc contains*[T](self: Matrix[T], e: T): bool =
|
proc contains*[T](self: Matrix[T], e: T): bool =
|
||||||
|
|
Loading…
Reference in New Issue