Working learner, dataloader, and optimizer

This commit is contained in:
Quinniboi10
2025-10-22 17:13:30 -05:00
committed by Quinn
parent c691296a7b
commit 31f9c7b265
19 changed files with 8944 additions and 16 deletions

7988
external/stb_image.h vendored Normal file

File diff suppressed because it is too large Load Diff

22
src/Ember.cpp Normal file
View File

@@ -0,0 +1,22 @@
#include "learner.h"
int main() {
Ember::Network net(
Ember::layers::Input(28 * 28),
Ember::layers::Linear(28 * 28, 64),
Ember::activations::ReLU(),
Ember::layers::Linear(64, 10),
Ember::activations::Softmax()
);
Ember::dataloaders::ImageDataLoader dataloader("../datasets/MNIST/", 128, 0.9, 4, 28, 28);
Ember::optimizers::SGD optimizer(net, 0.9);
Ember::Learner learner(net, dataloader, optimizer, Ember::loss::MeanSquaredError());
net.setMode(Ember::NetworkMode::TRAIN);
std::cout << net << std::endl;
learner.learn(0.05, 2);
}

View File

@@ -5,12 +5,59 @@ namespace Ember {
float ReLU(const float x) {
return std::max(x, 0.0f);
}
namespace derivatives {
float ReLU(const float x) {
return x > 0 ? 1 : 0;
}
}
}
namespace activations {
void ReLU::forward(const Layer& previous) {
for (usize prev = 0; prev < previous.size; prev++)
values[prev] = internal::activations::ReLU(previous.getOutputs()[prev]);
values[prev] = internal::activations::ReLU(previous.values[prev]);
}
Tensor<1> ReLU::backward(const Layer& previous, const Tensor<1>& gradOutput) const {
Tensor<1> result(gradOutput.size());
for (usize prev = 0; prev < gradOutput.size(); prev++)
result[prev] = gradOutput[prev] * internal::activations::derivatives::ReLU(previous.values[prev]);
return result;
}
void Softmax::forward(const Layer& previous) {
values.resize(previous.size);
float maxIn = previous.values[0];
for (usize i = 1; i < previous.size; i++)
maxIn = std::max(maxIn, previous.values[i]);
float sum = 0.0f;
for (usize i = 0; i < previous.size; i++) {
values[i] = std::exp(previous.values[i] - maxIn);
sum += values[i];
}
if (sum == 0.0f)
for (auto& v : values) v = 1.0f / previous.size;
else
for (auto& v : values) v /= sum;
}
Tensor<1> Softmax::backward(const Layer& previous, const Tensor<1>& gradOutput) const {
const usize n = gradOutput.size();
Tensor<1> result(n);
// Compute dot product of gradOutput and softmax output (values)
float dot = 0.0f;
for (usize i = 0; i < n; ++i)
dot += values[i] * gradOutput[i];
// Compute gradient for each element
for (usize i = 0; i < n; ++i)
result[i] = values[i] * (gradOutput[i] - dot);
return result;
}
}
}

View File

@@ -11,9 +11,21 @@ namespace Ember {
struct ReLU : internal::ActivationLayer {
void forward(const Layer& previous) override;
Tensor<1> backward(const Layer& previous, const Tensor<1>& gradOutput) const override;
std::string str() const override {
return fmt::format("ReLU - applied to {} features", size);
}
};
struct Softmax : internal::ActivationLayer {
void forward(const Layer& previous) override;
Tensor<1> backward(const Layer& previous, const Tensor<1>& gradOutput) const override;
std::string str() const override {
return fmt::format("Softmax - applied to {} features", size);
}
};
}
}

137
src/dataloader.cpp Normal file
View File

@@ -0,0 +1,137 @@
#include "dataloader.h"
#include "../external/fmt/format.h"
#define STB_IMAGE_IMPLEMENTATION
#include "../external/stb_image.h"
#include <filesystem>
std::vector<float> loadGreyscaleImage(const std::string& path, const Ember::usize w, const Ember::usize h) {
int width, height, channels;
unsigned char* data = stbi_load(path.data(), &width, &height, &channels, 1);
if (!data)
throw std::runtime_error("Failed to load image: " + path);
std::vector<float> vec(width * height);
if ((w == static_cast<Ember::usize>(width) || w == 0) && (h == static_cast<Ember::usize>(height) || h == 0)) {
for (Ember::usize i = 0; i < width * height; i++)
vec[i] = data[i] / 255.0f;
}
else {
// Simple nearest-neighbor resize
for (Ember::usize y = 0; y < h; ++y) {
for (Ember::usize x = 0; x < w; ++x) {
const int sourceX = x * width / w;
const int sourceY = y * height / h;
const int sourceIdx = sourceY * width + sourceX;
const int destIdx = y * w + x;
vec[destIdx] = data[sourceIdx] / 255.0f;
}
}
}
stbi_image_free(data);
return vec;
}
namespace Ember::dataloaders {
ImageDataLoader::ImageDataLoader(const std::string& dataDir, const u64 batchSize, const float trainSplit, const u64 threads, const usize width, const usize height)
: DataLoader(batchSize, trainSplit, threads) {
this->width = width;
this->height = height;
fmt::println("Attempting to open data dir '{}'", dataDir);
if (!std::filesystem::exists(dataDir) || !std::filesystem::is_directory(dataDir))
exitWithMsg("Data directory does not exist or is not a directory: " + dataDir, 1);
this->dataDir = dataDir;
for (const auto &entry: std::filesystem::directory_iterator(this->dataDir)) {
if (entry.is_directory())
types.push_back(entry.path().string());
}
fmt::println("Found {} types", types.size());
samplesPerType.resize(types.size());
numSamples = 0;
for (usize typeIdx = 0; typeIdx < types.size(); typeIdx++) {
for (const auto &entry: std::filesystem::directory_iterator(types[typeIdx])) {
if (entry.is_regular_file()) {
numSamples++;
samplesPerType[typeIdx]++;
}
}
}
fmt::println("Using train to test ratio of {:.2f} with approximately {:.0f} train samples and {:.0f} test samples", trainSplit / (1 - trainSplit), numSamples * trainSplit, numSamples * (1 - trainSplit));
}
void ImageDataLoader::loadBatch(const usize batchIdx) {
data[batchIdx].clear();
data[batchIdx].reserve(batchSize);
if (types.empty())
exitWithMsg(fmt::format("No types found in '{}'", dataDir), 1);
std::mutex dataMut;
#pragma omp parallel for num_threads(threads)
for (usize i = 0; i < batchSize; i++) {
// Randomly pick a type
std::uniform_int_distribution<usize> typeDist(0, types.size() - 1);
const usize typeIdx = typeDist(rng);
const std::string& typeDir = types[typeIdx];
// Gather image files in that directory
std::vector<std::filesystem::path> imgs;
for (const auto& entry : std::filesystem::directory_iterator(typeDir)) {
if (entry.is_regular_file())
imgs.push_back(entry.path());
}
if (imgs.empty())
exitWithMsg(fmt::format("No images found in '{}'", typeDir), 1);
// Randomly pick an image
std::uniform_int_distribution<usize> imgDist(0, imgs.size() * trainSplit - 1);
const usize imgIdx = imgDist(rng);
std::vector<float> input = loadGreyscaleImage(imgs[imgIdx].string(), width, height);
std::vector<float> target(types.size(), 0);
target[typeIdx] = 1;
dataMut.lock();
data[batchIdx].emplace_back(input, target);
dataMut.unlock();
}
}
void ImageDataLoader::loadTestSet() {
data[currBatch].clear();
if (types.empty())
exitWithMsg(fmt::format("No types found in '{}'", dataDir), 1);
for (usize typeIdx = 0; typeIdx < types.size(); typeIdx++) {
u64 currIdx = 0;
for (const auto& entry : std::filesystem::directory_iterator(types[typeIdx])) {
if (entry.is_regular_file()) {
if (currIdx < samplesPerType[typeIdx] * trainSplit - 1) {
currIdx++;
continue;
}
std::vector<float> input = loadGreyscaleImage(entry.path().string(), width, height);
std::vector<float> target(types.size());
target[typeIdx] = 1;
data[currBatch].emplace_back(input, target);
}
}
}
}
}

96
src/dataloader.h Normal file
View File

@@ -0,0 +1,96 @@
#pragma once
#include "types.h"
#include <vector>
#include <future>
#include <random>
#include <array>
namespace Ember {
namespace internal {
struct DataPoint {
std::vector<float> input;
std::vector<float> target;
DataPoint(const std::vector<float>& input, const std::vector<float>& target) : input(input), target(target) {}
};
struct DataLoader {
u64 threads;
u64 batchSize;
float trainSplit;
u64 numSamples;
usize currBatch;
std::future<void> dataFuture;
std::array<std::vector<DataPoint>, 2> data;
DataLoader(const u64 batchSize, const float trainSplit, const u64 threads) {
this->threads = threads;
this->batchSize = batchSize;
this->trainSplit = trainSplit;
this->numSamples = 0;
this->currBatch = 0;
data[0].reserve(batchSize);
data[1].reserve(batchSize);
}
// Loads batch into other buffer
virtual void loadBatch(const usize batchIdx) = 0;
virtual void loadTestSet() = 0;
bool hasNext() const {
return data[currBatch].size() > 0;
}
DataPoint next() {
assert(hasNext());
const DataPoint dataPoint = data[currBatch].back();
data[currBatch].pop_back();
return dataPoint;
}
// Attempts to load data asynchronously if threads > 0
void asyncPreloadBatch() {
dataFuture = std::async(threads > 0 ? std::launch::async : std::launch::deferred, [this]() { loadBatch(currBatch ^ 1); });
}
void waitForBatch() {
if (dataFuture.valid())
dataFuture.get();
}
const std::vector<DataPoint>& batchData() const {
return data[currBatch];
}
void swapBuffers() {
currBatch ^= 1;
}
virtual ~DataLoader() = default;
};
}
namespace dataloaders {
struct ImageDataLoader : internal::DataLoader {
std::string dataDir;
std::vector<std::string> types;
std::vector<u64> samplesPerType;
std::mt19937 rng{ std::random_device{}() };
usize width;
usize height;
ImageDataLoader(const std::string& dataDir, const u64 batchSize, const float trainSplit, const u64 threads = 0, const usize width = 0, const usize height = 0);
void loadBatch(const usize batchIdx) override;
void loadTestSet() override;
};
}
}

View File

@@ -2,6 +2,10 @@
#include "tensor.h"
#include <utility>
#include <string>
#include <thread>
namespace Ember {
namespace internal {
struct Layer {
@@ -21,8 +25,6 @@ namespace Ember {
}
virtual void forward(const Layer& previous) = 0;
virtual Tensor<1>& getOutputs() { return values; };
virtual const Tensor<1>& getOutputs() const { return values; };
virtual std::string str() const = 0;
virtual ~Layer() = default;
@@ -32,15 +34,29 @@ namespace Ember {
Tensor<1> weights; // Indexed [previous][current], flattened to prev * size + curr
Tensor<1> biases;
usize threadCount;
ComputeLayer() = delete;
ComputeLayer(const usize previousSize, const usize size) : Layer(size) {
threadCount = std::max<usize>(1, std::thread::hardware_concurrency());
threadCount = std::min<usize>(threadCount, size / 2);
this->weights.resize(previousSize * size);
this->biases.resize(size);
}
void setThreadCount(const usize threadCount) {
this->threadCount = std::max<usize>(1, threadCount);
this->threadCount = std::min<usize>(threadCount, size / 2);
}
virtual std::tuple<Tensor<1>, Tensor<1>, Tensor<1>> backward(const Layer& previous, const Tensor<1>& gradOutput) const = 0;
};
struct ActivationLayer : Layer {};
struct ActivationLayer : Layer {
virtual Tensor<1> backward(const Layer& previous, const Tensor<1>& gradOutput) const = 0;
};
}
namespace layers {
@@ -64,15 +80,55 @@ namespace Ember {
const usize inputSize = previous.size;
const usize outputSize = size;
// Move biases into the target vector
values = biases;
std::vector<std::thread> threads;
// This instruction tells the compiler to run across all threads
#pragma omp parallel for schedule(auto)
for (usize prev = 0; prev < inputSize; prev++) {
for (usize curr = 0; curr < outputSize; curr++)
values[curr] += previous.getOutputs()[prev] * weights[prev * size + curr];
threadCount = 1;
const auto worker = [&](const usize threadId) {
// Divide the range across threads
const usize start = (outputSize * threadId) / threadCount;
const usize end = std::min((outputSize * (threadId + 1)) / threadCount, outputSize);
for (usize curr = start; curr < end; curr++) {
float sum = biases[curr];
for (usize prev = 0; prev < inputSize; prev++)
sum += previous.values[prev] * weights[prev * size + curr];
values[curr] = sum;
}
};
// Launch worker threads
for (usize t = 1; t < threadCount; t++)
threads.emplace_back(worker, t);
// Run thread 0 on the main thread
worker(0);
// Join all threads
for (std::thread& t : threads)
if (t.joinable())
t.join();
}
std::tuple<Tensor<1>, Tensor<1>, Tensor<1>> backward(const Layer& previous, const Tensor<1>& gradOutput) const override {
const usize inputSize = previous.size;
const usize outputSize = size;
Tensor<1> gradInput(inputSize, 0.0f);
Tensor<1> weightGrad(weights.size(), 0.0f);
Tensor<1> biasGrad(size, 0.0f);
// Compute gradients
for (usize curr = 0; curr < outputSize; curr++) {
biasGrad[curr] = gradOutput[curr];
for (usize prev = 0; prev < inputSize; prev++) {
const usize wIndex = prev * outputSize + curr;
gradInput[prev] += weights[wIndex] * gradOutput[curr];
weightGrad[wIndex] += previous.values[prev] * gradOutput[curr];
}
}
return { gradInput, weightGrad, biasGrad };
}
std::string str() const override {

169
src/learner.cpp Normal file
View File

@@ -0,0 +1,169 @@
#include "learner.h"
#include "progbar.h"
#include <algorithm>
namespace Ember {
std::vector<internal::Gradient> Learner::backward(const std::vector<float> &target) const {
std::vector<internal::Gradient> gradients(net.layers.size());
Tensor<1> error = lossFunc->backward(net.output(), target);
for (usize idx = net.layers.size() - 1; idx > 0; idx--) {
auto* layer = net.layers[idx].get();
if (const auto* actLayer = dynamic_cast<internal::ActivationLayer*>(layer)) {
error = actLayer->backward(*net.layers[idx - 1], error);
}
else if (const auto* compLayer = dynamic_cast<internal::ComputeLayer*>(layer)) {
auto [gradInput, weightGrad, biasGrad] = compLayer->backward(*net.layers[idx - 1], error);
gradients[idx] = internal::Gradient(weightGrad, biasGrad);
error = gradInput;
}
}
return gradients;
}
void Learner::applyGradients(const usize batchSize, const std::vector<Tensor<1>>& weightGradAccum, const std::vector<Tensor<1>>& biasGradAccum) {
const float batchScalar = 1.0f / batchSize;
// Apply gradients to weights and biases
for (usize l = net.layers.size() - 1; l > 0; l--) {
if (const auto& currLayer = dynamic_cast<internal::ComputeLayer*>(net.layers[l].get())) {
assert(optimizer.weightGradients[l].size() == currLayer->weights.size());
assert(optimizer.biasGradients[l].size() == currLayer->biases.size());
for (usize i = 0; i < optimizer.weightGradients[l].size(); i++)
optimizer.weightGradients[l][i] += weightGradAccum[l][i] * batchScalar;
for (usize i = 0; i < currLayer->size; i++)
optimizer.biasGradients[l][i] += biasGradAccum[l][i] * batchScalar;
}
}
}
void Learner::learn(const float lr, const usize epochs, usize threads) {
if (threads == 0)
threads = std::thread::hardware_concurrency();
if (threads == 0) {
std::cerr << "Failed to detect number of threads. Defaulting to 1" << std::endl;
threads = 1;
}
const u64 batchSize = dataLoader.batchSize;
const u64 batchesPerEpoch = dataLoader.numSamples / batchSize;
fmt::println("Training for {} batches with {} batches per epoch", batchesPerEpoch * epochs, batchesPerEpoch);
std::cout << "Epoch Train loss Test loss Test accuracy\n\n" << std::endl;
// Returns { test loss, test accuracy }
const auto getTestLossAcc = [&]() {
float loss = 0;
usize numCorrect = 0;
dataLoader.loadTestSet();
const usize testSize = dataLoader.batchData().size();
while (dataLoader.hasNext()) {
internal::DataPoint data = dataLoader.next();
net.forward(data.input);
loss += lossFunc->forward(net.layers.back()->values, data.target);
usize guess = 0;
usize goal = 0;
for (usize i = 0; i < data.target.size(); i++) {
if (net.layers.back()->values[i] > net.layers.back()->values[guess])
guess = i;
if (data.target[i] > data.target[goal])
goal = i;
}
numCorrect += (guess == goal);
}
return std::pair<float, float>{ loss / (testSize ? testSize : 1), numCorrect / static_cast<float>(testSize ? testSize : 1) };
};
// Initialize accumulators
std::vector<Tensor<1>> weightGradAccum(net.layers.size());
std::vector<Tensor<1>> biasGradAccum(net.layers.size());
for (usize i = 1; i < net.layers.size(); i++) {
if (const auto* compLayer = dynamic_cast<internal::ComputeLayer*>(net.layers[i].get())) {
weightGradAccum[i].resize(compLayer->weights.size());
biasGradAccum[i].resize(compLayer->biases.size());
}
}
// Preload first batch
dataLoader.asyncPreloadBatch();
// Main loop
for (usize epoch = 0; epoch < epochs; epoch++) {
double trainLoss = 0;
ProgressBar progressBar{};
for (u64 batchIdx = 0; batchIdx < batchesPerEpoch; batchIdx++) {
// Reset accumulators per mini-batch
for (auto& t : weightGradAccum)
t.fill(0);
for (auto& t : biasGradAccum)
t.fill(0);
dataLoader.waitForBatch();
dataLoader.swapBuffers();
// Instantly start loading next batch
dataLoader.asyncPreloadBatch();
for (u64 sample = 0; sample < batchSize; sample++) {
const internal::DataPoint& data = dataLoader.next();
net.forward(data.input);
// Accumulate training loss
trainLoss += lossFunc->forward(net.layers.back()->values, data.target);
const auto gradients = backward(data.target);
// Accumulate gradients
for (usize l = 1; l < net.layers.size(); l++) {
const auto& prevLayer = net.layers[l - 1];
if (const auto* compLayer = dynamic_cast<internal::ComputeLayer*>(net.layers[l].get())) {
for (usize i = 0; i < compLayer->size; i++) {
for (usize j = 0; j < prevLayer->size; j++) {
const usize idx = j * compLayer->size + i;
assert(l < weightGradAccum.size());
assert(idx < weightGradAccum[l].size());
assert(idx < gradients[l].weightGrad.size());
weightGradAccum[l][idx] += gradients[l].weightGrad[idx];
}
assert(l < biasGradAccum.size());
assert(i < biasGradAccum[l].size());
assert(i < gradients[l].biasGrad.size());
biasGradAccum[l][i] += gradients[l].biasGrad[i];
}
}
}
}
applyGradients(batchSize, weightGradAccum, biasGradAccum);
optimizer.clipGrad(1);
optimizer.step(lr);
optimizer.zeroGrad();
internal::cursor::up();
internal::cursor::up();
internal::cursor::begin();
fmt::println("{:>5L}{:>14.5f}{:>13}{:>17}", epoch, trainLoss / batchIdx / batchSize, "Pending", "Pending");
std::cout << progressBar.report(batchIdx, batchesPerEpoch, 63) << " " << std::endl;
}
const auto [testLoss, testAccuracy] = getTestLossAcc();
internal::cursor::up();
internal::cursor::clear();
internal::cursor::up();
fmt::println("{:>5L}{:>14.5f}{:>13.5f}{:>17.2f}%\n\n", epoch, trainLoss / batchesPerEpoch / batchSize, testLoss, testAccuracy * 100);
}
}
}

41
src/learner.h Normal file
View File

@@ -0,0 +1,41 @@
#pragma once
#include "activation.h"
#include "dataloader.h"
#include "optimizer.h"
#include "loss.h"
namespace Ember {
namespace internal {
struct Gradient {
Tensor<1> weightGrad;
Tensor<1> biasGrad;
Gradient() = default;
Gradient(const Tensor<1>& weightGrad, const Tensor<1>& biasGrad) : weightGrad(weightGrad), biasGrad(biasGrad) {}
};
}
struct Learner {
Network& net;
internal::DataLoader& dataLoader;
internal::Optimizer& optimizer;
std::unique_ptr<internal::LossFunction> lossFunc;
template<typename LossFunction>
Learner(Network& net, internal::DataLoader& dataLoader, internal::Optimizer& optimizer, const LossFunction&& lossFunc) : net(net), dataLoader(dataLoader), optimizer(optimizer) {
this->lossFunc = std::make_unique<std::decay_t<LossFunction>>(lossFunc);
}
// Returns a vector of gradients
// RETURNS VALUES ORDERED FROM LAST TO FIRST LAYER
std::vector<internal::Gradient> backward(const std::vector<float>& target) const;
// Apply a gradient to the optimizer
void applyGradients(const usize batchSize, const std::vector<Tensor<1>>& weightGradAccum, const std::vector<Tensor<1>>& biasGradAccum);
// Main trainer functionality is through this function
// Trains a neural network
void learn(const float lr, const usize epochs, usize threads = 0);
};
}

23
src/loss.cpp Normal file
View File

@@ -0,0 +1,23 @@
#include "loss.h"
namespace Ember::loss {
float MeanSquaredError::forward(const Tensor<1>& output, const std::vector<float>& target) {
assert(output.size() == target.size());
float loss = 0;
for (usize i = 0; i < output.size(); i++)
loss += std::pow(output[i] - target[i], 2);
return loss / output.size();
}
Tensor<1> MeanSquaredError::backward(const Tensor<1>& output, const std::vector<float>& target) {
Tensor<1> gradient;
gradient.resize(output.size());
const float scalar = 2.0f / output.size();
for (usize i = 0; i < output.size(); i++)
gradient[i] = (output[i] - target[i]) * scalar;
return gradient;
}
}

22
src/loss.h Normal file
View File

@@ -0,0 +1,22 @@
#pragma once
#include <vector>
#include "layer.h"
namespace Ember {
namespace internal {
struct LossFunction {
virtual float forward(const Tensor<1>& output, const std::vector<float>& target) = 0;
virtual Tensor<1> backward(const Tensor<1>& output, const std::vector<float>& target) = 0;
virtual ~LossFunction() = default;
};
}
namespace loss {
struct MeanSquaredError : internal::LossFunction {
float forward(const Tensor<1>& output, const std::vector<float> &target) override;
Tensor<1> backward(const Tensor<1>& output, const std::vector<float> &target) override;
};
}
}

View File

@@ -1,15 +1,21 @@
#include "network.h"
namespace Ember {
void Network::setMode(const NetworkMode mode) {
for (usize i = 1; i < layers.size(); i++)
if (auto* layer = dynamic_cast<internal::ComputeLayer*>(layers[i].get()); layer != nullptr)
layer->setThreadCount(mode == NetworkMode::EVAL ? std::thread::hardware_concurrency() : 1);
}
void Network::forward(const Tensor<1>& input) {
layers[0]->getOutputs() = input;
layers[0]->values = input;
for (usize i = 1; i < layers.size(); i++)
layers[i]->forward(*layers[i - 1]);
}
const Tensor<1>& Network::output() const {
return layers.back()->getOutputs();
return layers.back()->values;
}
std::ostream& operator<<(std::ostream& os, const Network& net) {

View File

@@ -6,6 +6,11 @@
#include <memory>
namespace Ember {
enum class NetworkMode {
EVAL,
TRAIN
};
struct Network {
std::vector<std::unique_ptr<internal::Layer>> layers;
@@ -53,6 +58,8 @@ namespace Ember {
_init(true, std::forward<Args>(args)...);
}
void setMode(const NetworkMode mode);
void forward(const Tensor<1>& input);
const Tensor<1>& output() const;

111
src/optimizer.cpp Normal file
View File

@@ -0,0 +1,111 @@
#include "optimizer.h"
namespace Ember {
namespace internal {
Optimizer::Optimizer(Network& net) : net(net) {
weightGradients.resize(net.layers.size());
biasGradients.resize(net.layers.size());
for (usize i = 1; i < net.layers.size(); i++) {
const std::unique_ptr<Layer>& l = net.layers[i];
const auto* layer = dynamic_cast<ComputeLayer*>(l.get());
if (!layer)
continue;
weightGradients[i].resize(layer->weights.size());
biasGradients[i].resize(layer->biases.size());
}
}
void Optimizer::zeroGrad() {
for (Tensor<1>& grad : weightGradients)
grad.fill(0);
for (Tensor<1>& grad : biasGradients)
grad.fill(0);
}
void Optimizer::clipGrad(const float maxNorm) {
// Compute total norm of all gradients (weights and biases) across all layers
double totalNormSq = 0.0;
// Weights gradients
for (const auto& layerGradients : weightGradients)
for (const float wg : layerGradients)
totalNormSq += wg * wg;
// Bias gradients
for (const auto& layerGradients : biasGradients)
for (const float bg : layerGradients)
totalNormSq += bg * bg;
const float totalNorm = std::sqrt(totalNormSq);
// Scale all gradients if needed
if (totalNorm > maxNorm && totalNorm > 0.0f) {
const float scale = maxNorm / totalNorm;
// Weights gradients
for (auto& layerGradients : weightGradients)
for (float& wg : layerGradients)
wg *= scale;
// Bias gradients
for (auto& layerGradients : biasGradients)
for (float& bg : layerGradients)
bg *= scale;
}
}
}
namespace optimizers {
SGD::SGD(Network& net, const float momentum) : Optimizer(net), momentum(momentum) {
weightVelocities.resize(net.layers.size());
biasVelocities.resize(net.layers.size());
for (usize i = 1; i < net.layers.size(); i++) {
const std::unique_ptr<internal::Layer>& l = net.layers[i];
const auto* layer = dynamic_cast<internal::ComputeLayer*>(l.get());
if (!layer)
continue;
weightVelocities[i].resize(layer->weights.size());
biasVelocities[i].resize(layer->biases.size());
}
}
SGD::SGD(const SGD& other)
: Optimizer(other),
weightVelocities(other.weightVelocities),
biasVelocities(other.biasVelocities),
momentum(other.momentum) {}
void SGD::step(const float lr) {
for (usize lIdx = 1; lIdx < net.layers.size(); lIdx++) {
std::unique_ptr<internal::Layer>& l = net.layers[lIdx];
auto* layer = dynamic_cast<internal::ComputeLayer*>(l.get());
if (!layer)
continue;
assert(weightVelocities[lIdx].size() == layer->weights.size());
assert(biasVelocities[lIdx].size() == layer->biases.size());
assert(weightGradients[lIdx].size() == layer->weights.size());
assert(biasGradients[lIdx].size() == layer->biases.size());
// Update weights with momentum
for (usize i = 0; i < layer->weights.size(); i++) {
weightVelocities[lIdx][i] = momentum * weightVelocities[lIdx][i] - lr * weightGradients[lIdx][i];
layer->weights[i] += weightVelocities[lIdx][i];
}
// Update biases with momentum
for (usize i = 0; i < layer->biases.size(); i++) {
biasVelocities[lIdx][i] = momentum * biasVelocities[lIdx][i] - lr * biasGradients[lIdx][i];
layer->biases[i] += biasVelocities[lIdx][i];
}
}
}
std::unique_ptr<internal::Optimizer> SGD::clone() const {
return std::make_unique<SGD>(*this);
}
}
}

46
src/optimizer.h Normal file
View File

@@ -0,0 +1,46 @@
#pragma once
#include <vector>
#include "network.h"
namespace Ember {
namespace internal {
struct Optimizer {
Network& net;
std::vector<Tensor<1>> weightGradients;
std::vector<Tensor<1>> biasGradients;
explicit Optimizer(Network& net);
Optimizer(const Optimizer& other) : net(other.net), weightGradients(other.weightGradients), biasGradients(other.biasGradients) {}
void zeroGrad();
void clipGrad(const float maxNorm);
virtual void step(float lr) = 0;
virtual std::unique_ptr<Optimizer> clone() const = 0;
virtual ~Optimizer() = default;
};
}
namespace optimizers {
struct SGD : internal::Optimizer {
std::vector<Tensor<1>> weightVelocities;
std::vector<Tensor<1>> biasVelocities;
float momentum;
SGD(Network& net, const float momentum = 0.9f);
SGD(const SGD& other);
void step(const float lr) override;
std::unique_ptr<Optimizer> clone() const override;
};
}
}

39
src/progbar.h Normal file
View File

@@ -0,0 +1,39 @@
#pragma once
#include "stopwatch.h"
#include "../external/fmt/format.h"
#include <sstream>
namespace Ember {
struct ProgressBar {
Stopwatch<std::chrono::milliseconds> start;
ProgressBar() {
start.start();
}
std::string report(const u64 progress, const u64 total, const u64 barWidth) {
std::ostringstream out;
out << fmt::format("{:>4.0f}% ", static_cast<float>(progress * 100) / total);
const u64 pos = barWidth * progress / total;
out << "\u2595";
for (u64 i = 0; i < barWidth - 1; ++i) {
if (i < pos) out << "\u2588";
else out << " ";
}
out << "\u258F";
const u64 elapsed = std::max<u64>(start.elapsed(), 1);
const u64 msRemaining = (total - progress) * elapsed / std::max<u64>(progress, 1);
out << fmt::format(" {}/{} at {:.2f} per sec with {} remaining", progress, total, static_cast<float>(progress) / elapsed * 1000, formatTime(msRemaining));
return out.str();
}
};
}

64
src/stopwatch.h Normal file
View File

@@ -0,0 +1,64 @@
#pragma once
#include <chrono>
#include "types.h"
namespace Ember {
inline std::string formatTime(const u64 timeInMS) {
long long seconds = timeInMS / 1000;
const long long hours = seconds / 3600;
seconds %= 3600;
const long long minutes = seconds / 60;
seconds %= 60;
std::string result;
if (hours > 0)
result += std::to_string(hours) + "h ";
if (minutes > 0 || hours > 0)
result += std::to_string(minutes) + "m ";
if (seconds > 0 || minutes > 0 || hours > 0)
result += std::to_string(seconds) + "s";
if (result == "")
return std::to_string(timeInMS) + "ms";
return result;
}
template<typename Precision>
class Stopwatch {
std::chrono::high_resolution_clock::time_point startTime;
std::chrono::high_resolution_clock::time_point pauseTime;
bool paused;
u64 pausedTime;
public:
Stopwatch() { start(); }
void start() {
startTime = std::chrono::high_resolution_clock::now();
pausedTime = 0;
paused = false;
}
void reset() { start(); }
u64 elapsed() {
u64 pausedTime = this->pausedTime;
if (paused)
pausedTime += std::chrono::duration_cast<Precision>(std::chrono::high_resolution_clock::now() - pauseTime).count();
return std::chrono::duration_cast<Precision>(std::chrono::high_resolution_clock::now() - startTime).count() - pausedTime;
}
void pause() {
paused = true;
pauseTime = std::chrono::high_resolution_clock::now();
}
void resume() {
paused = false;
pausedTime += std::chrono::duration_cast<Precision>(std::chrono::high_resolution_clock::now() - pauseTime).count();
}
};
}

View File

@@ -4,6 +4,8 @@
#include "../external/fmt/format.h"
#include <vector>
namespace Ember {
// Tensor recursive case
template<usize dimensionality>
@@ -25,6 +27,11 @@ namespace Ember {
subTensor.resize(restDims...);
}
void fill(const float value) {
for (auto& subTensor : data)
subTensor.fill(value);
}
usize size() const { return data.size(); }
auto begin() { return data.begin(); }
@@ -48,12 +55,16 @@ namespace Ember {
Tensor() = default;
Tensor(const std::vector<float> &data) : data(data) {}
explicit Tensor(const usize size) : data(size) {}
explicit Tensor(const usize size, const float def = 0.0f) : data(size, def) {}
void resize(const usize size) {
data.resize(size);
}
void fill(const float value) {
std::fill(data.begin(), data.end(), value);
}
usize size() const { return data.size(); }
auto begin() { return data.begin(); }

View File

@@ -1,10 +1,15 @@
#pragma once
#include <algorithm>
#include <iostream>
#include <cassert>
#include <cstdint>
#include <vector>
#ifdef _WIN32
#define NOMINMAX
#include <windows.h>
#include <fcntl.h>
#include <io.h>
#endif
namespace Ember {
#define exitWithMsg(msg, code) \
@@ -24,4 +29,30 @@ std::exit(code); \
using i8 = int8_t;
using usize = size_t;
namespace internal {
namespace cursor {
[[maybe_unused]] inline void clearAll(std::ostream& out = std::cout) { out << "\033[2J\033[H"; }
[[maybe_unused]] inline void clear(std::ostream& out = std::cout) { out << "\033[2K\r"; }
[[maybe_unused]] inline void clearDown(std::ostream& out = std::cout) { out << "\x1b[J"; }
[[maybe_unused]] inline void home(std::ostream& out = std::cout) { out << "\033[H"; }
[[maybe_unused]] inline void up(std::ostream& out = std::cout) { out << "\033[A"; }
[[maybe_unused]] inline void down(std::ostream& out = std::cout) { out << "\033[B"; }
[[maybe_unused]] inline void begin(std::ostream& out = std::cout) { out << "\033[1G"; }
[[maybe_unused]] inline void goTo(const usize x, const usize y, std::ostream& out = std::cout) { out << "\033[" << y << ";" << x << "H"; }
[[maybe_unused]] inline void hide(std::ostream& out = std::cout) { out << "\033[?25l"; }
[[maybe_unused]] inline void show(std::ostream& out = std::cout) { out << "\033[?25h"; }
}
struct UnicodeTerminalInitializer {
UnicodeTerminalInitializer() {
#ifdef _WIN32
SetConsoleOutputCP(CP_UTF8);
#endif
}
};
static inline UnicodeTerminalInitializer unicodeTerminalInitializer;
}
}