Fixed argmax
This commit is contained in:
parent
844883ced7
commit
924eb8cf6d
18
src/main.nim
18
src/main.nim
|
@ -85,8 +85,8 @@ proc main =
|
|||
"isn", "isn't", "ma", "mightn", "mightn't", "mustn", "mustn't", "needn",
|
||||
"needn't", "shan", "shan't", "shouldn", "shouldn't", "wasn", "wasn't",
|
||||
"weren", "weren't", "won", "won't", "wouldn", "wouldn't"]
|
||||
const epochs = 10
|
||||
const batch = 200
|
||||
const epochs = 100
|
||||
const batch = 400
|
||||
const inputSize = 512
|
||||
let encoder = newLabelEncoder()
|
||||
let cleaner = newTextPreprocessor(stopwords=newMatrix(stopwords), toLower=true,
|
||||
|
@ -97,11 +97,11 @@ proc main =
|
|||
newDenseLayer(8, 16),
|
||||
newDenseLayer(16, 2)],
|
||||
lossFunc=MSE,
|
||||
activationFunc=Softmax,
|
||||
learnRate=5,
|
||||
momentum=0.3,
|
||||
weightRange=(-10.0, 10.0),
|
||||
biasRange=(-10.0, 10.0))
|
||||
activationFunc=SiLU,
|
||||
learnRate=0.001,
|
||||
momentum=0.0,
|
||||
weightRange=(-1.0, 1.0),
|
||||
biasRange=(-1.0, 1.0))
|
||||
echo "ProjectSydney v0.2b - Accuracy test"
|
||||
echo "Loading dataset and testset"
|
||||
let loadTime = cpuTime()
|
||||
|
@ -159,9 +159,9 @@ proc main =
|
|||
echo ""
|
||||
echo &"Training neural network for {epochs} epochs with batch size of {batch}"
|
||||
let trainTime = cpuTime()
|
||||
classifier.train(epochs, batch, trainData, #[testData]#)
|
||||
classifier.train(epochs, batch, trainData, testData)
|
||||
echo &"Training completed in {cpuTime() - trainTime:.2f} seconds, running test"
|
||||
echo "\nTest parameters"
|
||||
echo "Test parameters"
|
||||
echo &"\t- Test size: {len(data.testset)}"
|
||||
let testTime = cpuTime()
|
||||
var pred: seq[int] = @[]
|
||||
|
|
|
@ -106,6 +106,9 @@ proc newNeuralNetwork*(topology: seq[Layer], lossFunc: Loss, activationFunc: Act
|
|||
## Weights and biases are initialized with
|
||||
## random values in the chosen range using
|
||||
## nim's default PRNG
|
||||
when not defined(release):
|
||||
if momentum > 1.0:
|
||||
raise newException(ValueError, "momentum should not be greater than one")
|
||||
new(result)
|
||||
result.layers = topology
|
||||
for layer in result.layers:
|
||||
|
@ -139,7 +142,8 @@ proc fastFeedForward(self: NeuralNetwork, x: Matrix[float]): Matrix[float] =
|
|||
## (activated) output from the last layer is returned
|
||||
result = x
|
||||
for layer in self.layers:
|
||||
result = self.activation.function(layer.feed(result))
|
||||
result = layer.feed(result)
|
||||
result = self.activation.function(result)
|
||||
|
||||
|
||||
proc predict*(self: NeuralNetwork, x: Matrix[float]): int =
|
||||
|
@ -170,7 +174,7 @@ proc backprop(self: NeuralNetwork, x, y: Matrix[float]): tuple[weights, biases:
|
|||
deltaW: seq[Matrix[float]] = @[]
|
||||
deltaB: seq[Matrix[float]] = @[]
|
||||
# Activations of each layer
|
||||
activations: seq[Matrix[float]] = @[]
|
||||
activations: seq[Matrix[float]] = @[x]
|
||||
# Unactivated outputs of each layer
|
||||
unactivated: seq[Matrix[float]] = @[]
|
||||
|
||||
|
@ -191,13 +195,13 @@ proc backprop(self: NeuralNetwork, x, y: Matrix[float]): tuple[weights, biases:
|
|||
# have to do fancy calculus stuff to figure out the derivative)
|
||||
var diff: Matrix[float] = self.loss.derivative(activations[^1], y) * self.activation.derivative(unactivated[^1])
|
||||
deltaB[^1].replace(diff)
|
||||
deltaW[^1].replace(activations[^2].transpose())
|
||||
deltaW[^1].replace(diff.dot(activations[^2].transpose()))
|
||||
# Backwards pass (actually the backwards pass began two lines earlier, we're just feeding
|
||||
# the correction back through the rest of the network now)
|
||||
for l in 1..<self.layers.high():
|
||||
diff = self.layers[^l].weights.transpose().dot(diff) * self.activation.derivative(unactivated[^l])
|
||||
for l in 2..self.layers.high():
|
||||
diff = self.layers[^(l + 1)].weights.transpose().dot(diff) * self.activation.derivative(unactivated[^l])
|
||||
deltaB[^l].replace(diff)
|
||||
deltaW[^l].replace(diff.dot(activations[^l].transpose()))
|
||||
deltaW[^l].replace(diff.dot(activations[^(l - 1)].transpose()))
|
||||
return (deltaW, deltaB)
|
||||
|
||||
|
||||
|
@ -205,21 +209,20 @@ proc miniBatch(self: NeuralNetwork, data: seq[tuple[x, y: Matrix[float]]]) =
|
|||
## Performs a single mini-batch step in stochastic gradient
|
||||
## descent and updates the network's weights and biases
|
||||
## accordingly
|
||||
var gradient: tuple[weights, biases: seq[Matrix[float]]]
|
||||
# New weights and biases
|
||||
var
|
||||
# New weights and biases
|
||||
weights: seq[Matrix[float]] = @[]
|
||||
biases: seq[Matrix[float]] = @[]
|
||||
gradient: tuple[weights, biases: seq[Matrix[float]]]
|
||||
for layer in self.layers:
|
||||
weights.add(zeros[float](layer.weights.shape))
|
||||
biases.add(zeros[float](layer.biases.shape))
|
||||
for dataPoint in data:
|
||||
gradient = self.backprop(dataPoint.x, dataPoint.y)
|
||||
for i, (currentBiases, newBiases) in zip(biases, gradient.biases):
|
||||
biases[i] = currentBiases + newBiases
|
||||
for i, (currentWeights, newWeights) in zip(weights, gradient.weights):
|
||||
weights[i] = currentWeights + newWeights
|
||||
|
||||
for i, newBiases in gradient.biases:
|
||||
biases[i].replace(newBiases)
|
||||
for i, newWeights in gradient.weights:
|
||||
weights[i].replace(newWeights)
|
||||
# We use hyperparameters such as the learn rate and momentum
|
||||
# to further control how fast (or slowly) the network converges
|
||||
# onto a local minimum of the gradient of our loss function. To
|
||||
|
@ -246,9 +249,9 @@ proc miniBatch(self: NeuralNetwork, data: seq[tuple[x, y: Matrix[float]]]) =
|
|||
# overfitting by not letting the network train over the same data over and over
|
||||
# again)
|
||||
for (layer, newBiases) in zip(self.layers, biases):
|
||||
layer.biases = (layer.biases - nudge) * newBiases
|
||||
layer.biases = layer.biases - (newBiases * nudge)
|
||||
for (layer, newWeights) in zip(self.layers, weights):
|
||||
layer.weights = (layer.weights - nudge) * newWeights
|
||||
layer.weights = layer.weights - (newWeights * nudge)
|
||||
|
||||
|
||||
proc eval(self: NeuralNetwork, data: seq[tuple[x, y: Matrix[float]]]): float =
|
||||
|
|
|
@ -1045,18 +1045,17 @@ proc argmax*[T](self: Matrix[T]): int =
|
|||
## into the matrix
|
||||
var m: T = self[0, 0]
|
||||
var
|
||||
row = 0
|
||||
col = 0
|
||||
while row < self.shape.rows:
|
||||
while col < self.shape.cols:
|
||||
if self[row, col] > m:
|
||||
m = self[row, col]
|
||||
if self.shape.rows == 0:
|
||||
while col < self.shape.cols:
|
||||
if self[0, col] > m:
|
||||
m = self[0, col]
|
||||
inc(col)
|
||||
return self.getIndex(row, col)
|
||||
r = 0
|
||||
c = 0
|
||||
for i, row in self:
|
||||
for j, e in row:
|
||||
if e == m:
|
||||
continue
|
||||
elif e > m:
|
||||
m = e
|
||||
r = i
|
||||
c = j
|
||||
return self.getIndex(r, c)
|
||||
|
||||
|
||||
proc contains*[T](self: Matrix[T], e: T): bool =
|
||||
|
|
Loading…
Reference in New Issue