Rework tensors, switch to C++23

This commit is contained in:
Quinn
2025-10-29 22:35:12 -05:00
parent 1fa79d46c0
commit e293e0ea78
10 changed files with 143 additions and 137 deletions

View File

@@ -2,4 +2,6 @@
> A neural network trainer written in C++
All code is under the namespace Ember
More information will be added as development continues
More information will be added as development continues
Requires C++23 OpenMP OpenBLAS (Boost for debug builds)

View File

@@ -11,7 +11,7 @@ endif
# Compiler and flags
CXX := clang++
CXXFLAGS := -O3 -std=c++20 -flto -funroll-loops -DNDEBUG
CXXFLAGS := -O3 -std=c++23 -flto -funroll-loops -DNDEBUG
ifeq ($(OS),Windows_NT)
ARCH := $(PROCESSOR_ARCHITECTURE)
@@ -59,12 +59,12 @@ endif
# Debug build
.PHONY: debug
debug: CXXFLAGS = -O3 -std=c++20 -flto -fsanitize=address,undefined -fno-omit-frame-pointer -D_GLIBCXX_DEBUG -D_GLIBCXX_DEBUG_PEDANTIC -Wall -Wextra
debug: CXXFLAGS = -O3 -std=c++23 -flto -fsanitize=address,undefined -fno-omit-frame-pointer -D_GLIBCXX_DEBUG -D_GLIBCXX_DEBUG_PEDANTIC -Wall -Wextra
debug: all
# Debug build
.PHONY: profile
profile: CXXFLAGS = -O3 -std=c++20 -flto -funroll-loops -ggdb -fno-omit-frame-pointer -DNDEBUG
profile: CXXFLAGS = -O3 -std=c++23 -flto -funroll-loops -ggdb -fno-omit-frame-pointer -DNDEBUG
profile: all
# Force rebuild

View File

@@ -44,7 +44,7 @@ namespace Ember {
else
for (auto& v : values) v /= sum;
}
Tensor<1> Softmax::backward(const Layer& previous, const Tensor<1>& gradOutput) const {
Tensor<1> Softmax::backward([[maybe_unused]] const Layer& previous, const Tensor<1>& gradOutput) const {
const usize n = gradOutput.size();
Tensor<1> result(n);

View File

@@ -35,7 +35,7 @@ namespace Ember {
};
struct ComputeLayer : Layer {
BlasMatrix weights; // previousSize rows and size cols
Tensor<2> weights; // previousSize rows and size cols
Tensor<1> biases;
ComputeLayer() = delete;
@@ -48,7 +48,7 @@ namespace Ember {
this->weights.resize(size, previousSize);
}
virtual std::tuple<Tensor<1>, BlasMatrix, Tensor<1>> backward(const Layer& previous, const Tensor<1>& gradOutput) const = 0;
virtual std::tuple<Tensor<1>, Tensor<2>, Tensor<1>> backward(const Layer& previous, const Tensor<1>& gradOutput) const = 0;
};
struct ActivationLayer : Layer {
@@ -62,7 +62,7 @@ namespace Ember {
struct Input : internal::Layer {
explicit Input(const usize size) : Layer(size) {}
void forward(const Layer& previous) override {}
void forward([[maybe_unused]] const Layer& previous) override {}
std::unique_ptr<Layer> clone() override {
return std::make_unique<Input>(*this);
@@ -99,7 +99,7 @@ namespace Ember {
outputSize, // rows of W
inputSize, // cols of W
1.0f, // alpha
weights.data.data(), // W data
weights.ptr(), // W data
inputSize, // lda (leading dimension, number of cols)
previous.values.ptr(), // x vector
1, // incx
@@ -109,20 +109,20 @@ namespace Ember {
);
}
std::tuple<Tensor<1>, BlasMatrix, Tensor<1>> backward(const Layer& previous, const Tensor<1>& gradOutput) const override {
std::tuple<Tensor<1>, Tensor<2>, Tensor<1>> backward(const Layer& previous, const Tensor<1>& gradOutput) const override {
const usize inputSize = previous.size;
const usize outputSize = size;
Tensor<1> gradInput(inputSize, 0.0f);
BlasMatrix weightGrad(weights.rows, weights.cols);
Tensor<1> biasGrad(size, 0.0f);
Tensor<1> gradInput(inputSize);
Tensor<2> weightGrad(weights.dims());
Tensor<1> biasGrad(size);
// Compute gradients
for (usize curr = 0; curr < outputSize; curr++) {
biasGrad[curr] = gradOutput[curr];
for (usize prev = 0; prev < inputSize; prev++) {
gradInput[prev] += weights(curr, prev) * gradOutput[curr];
weightGrad(curr, prev) += previous.values[prev] * gradOutput[curr];
gradInput[prev] += weights[curr, prev] * gradOutput[curr];
weightGrad[curr, prev] += previous.values[prev] * gradOutput[curr];
}
}
@@ -134,9 +134,9 @@ namespace Ember {
}
std::string str() const override {
return fmt::format("Linear - {} input features and {} output features", weights.cols, size);
return fmt::format("Linear - {} input features and {} output features", weights.dim(1), size);
}
u64 numParams() const override { return weights.data.size() + biases.size(); }
u64 numParams() const override { return weights.size() + biases.size(); }
};
}
}

View File

@@ -26,7 +26,7 @@ namespace Ember {
return gradients;
}
void Learner::applyGradients(const usize batchSize, const std::vector<BlasMatrix>& weightGradAccum, const std::vector<Tensor<1>>& biasGradAccum) {
void Learner::applyGradients(const usize batchSize, const std::vector<Tensor<2>>& weightGradAccum, const std::vector<Tensor<1>>& biasGradAccum) {
const float batchScalar = 1.0f / batchSize;
// Apply gradients to the optimizer
for (usize l = net.layers.size() - 1; l > 0; l--) {
@@ -61,7 +61,7 @@ namespace Ember {
std::pair<float, float> test{};
// Accumulators
std::vector<BlasMatrix> weightGradAccum(net.layers.size());
std::vector<Tensor<2>> weightGradAccum(net.layers.size());
std::vector<Tensor<1>> biasGradAccum(net.layers.size());
const u64 batchSize = dataLoader.batchSize;
@@ -118,7 +118,7 @@ namespace Ember {
for (usize i = 1; i < net.layers.size(); i++) {
if (auto* compLayer = dynamic_cast<internal::ComputeLayer*>(net.layers[i].get())) {
weightGradAccum[i].resize(compLayer->weights.rows, compLayer->weights.cols);
weightGradAccum[i].resize(compLayer->weights.dims());
biasGradAccum[i].resize(compLayer->biases.size());
computeLayers.push_back(compLayer);

View File

@@ -9,11 +9,11 @@
namespace Ember {
namespace internal {
struct Gradient {
BlasMatrix weightGrad;
Tensor<2> weightGrad;
Tensor<1> biasGrad;
Gradient() = default;
Gradient(const BlasMatrix& weightGrad, const Tensor<1>& biasGrad) : weightGrad(weightGrad), biasGrad(biasGrad) {}
Gradient(const Tensor<2>& weightGrad, const Tensor<1>& biasGrad) : weightGrad(weightGrad), biasGrad(biasGrad) {}
};
}
@@ -56,7 +56,7 @@ namespace Ember {
std::vector<internal::Gradient> backward(const Network& net, const std::vector<float>& target) const;
// Apply a gradient to the optimizer
void applyGradients(const usize batchSize, const std::vector<BlasMatrix>& weightGradAccum, const std::vector<Tensor<1>>& biasGradAccum);
void applyGradients(const usize batchSize, const std::vector<Tensor<2>>& weightGradAccum, const std::vector<Tensor<1>>& biasGradAccum);
// Main trainer functionality is through this function
// Trains a neural network

View File

@@ -11,13 +11,13 @@ namespace Ember {
if (!layer)
continue;
weightGradients[i].resize(layer->weights.rows, layer->weights.cols);
weightGradients[i].resize(layer->weights.dims());
biasGradients[i].resize(layer->biases.size());
}
}
void Optimizer::zeroGrad() {
for (BlasMatrix& grad : weightGradients)
for (auto& grad : weightGradients)
grad.fill(0);
for (Tensor<1>& grad : biasGradients)
@@ -29,7 +29,7 @@ namespace Ember {
double totalNormSq = 0.0;
// Weights gradients
for (const auto& layerGradients : weightGradients)
for (const float wg : layerGradients.data)
for (const float wg : layerGradients)
totalNormSq += wg * wg;
// Bias gradients
@@ -45,7 +45,7 @@ namespace Ember {
// Weights gradients
for (auto& layerGradients : weightGradients)
for (float& wg : layerGradients.data)
for (float& wg : layerGradients)
wg *= scale;
// Bias gradients
@@ -67,7 +67,7 @@ namespace Ember {
if (!layer)
continue;
weightVelocities[i].resize(layer->weights.rows, layer->weights.cols);
weightVelocities[i].resize(layer->weights.dims());
biasVelocities[i].resize(layer->biases.size());
}
}
@@ -85,15 +85,15 @@ namespace Ember {
assert(biasGradients[lIdx].size() == layer->biases.size());
// Update weights with momentum
for (usize i = 0; i < layer->weights.data.size(); i++) {
for (usize i = 0; i < layer->weights.size(); i++) {
weightVelocities[lIdx].data[i] = momentum * weightVelocities[lIdx].data[i] - lr * weightGradients[lIdx].data[i];
layer->weights.data[i] += weightVelocities[lIdx].data[i];
}
// Update biases with momentum
for (usize i = 0; i < layer->biases.size(); i++) {
biasVelocities[lIdx][i] = momentum * biasVelocities[lIdx][i] - lr * biasGradients[lIdx][i];
layer->biases[i] += biasVelocities[lIdx][i];
biasVelocities[lIdx].data[i] = momentum * biasVelocities[lIdx].data[i] - lr * biasGradients[lIdx].data[i];
layer->biases.data[i] += biasVelocities[lIdx].data[i];
}
}
}
@@ -118,9 +118,9 @@ namespace Ember {
if (!layer)
continue;
weightVelocities[i].resize(layer->weights.rows, layer->weights.cols);
weightVelocities[i].resize(layer->weights.dims());
biasVelocities[i].resize(layer->biases.size());
weightMomentum[i].resize(layer->weights.rows, layer->weights.cols);
weightMomentum[i].resize(layer->weights.dims());
biasMomentum[i].resize(layer->biases.size());
}
}
@@ -142,7 +142,7 @@ namespace Ember {
assert(biasGradients[lIdx].size() == layer->biases.size());
// Update weights
for (usize i = 0; i < layer->weights.data.size(); i++) {
for (usize i = 0; i < layer->weights.size(); i++) {
layer->weights.data[i] *= 1.0f - lr * decay;
weightMomentum[lIdx].data[i] = beta1 * weightMomentum[lIdx].data[i] + (1.0f - beta1) * weightGradients[lIdx].data[i];
@@ -157,16 +157,16 @@ namespace Ember {
// Update biases
for (usize i = 0; i < layer->biases.size(); i++) {
layer->biases[i] *= (1.0f - lr * decay);
layer->biases.data[i] *= (1.0f - lr * decay);
biasMomentum[lIdx][i] = beta1 * biasMomentum[lIdx][i] + (1.0f - beta1) * biasGradients[lIdx][i];
biasVelocities[lIdx][i] = beta2 * biasVelocities[lIdx][i] + (1.0f - beta2) * biasGradients[lIdx][i] * biasGradients[lIdx][i];
biasMomentum[lIdx].data[i] = beta1 * biasMomentum[lIdx].data[i] + (1.0f - beta1) * biasGradients[lIdx].data[i];
biasVelocities[lIdx].data[i] = beta2 * biasVelocities[lIdx].data[i] + (1.0f - beta2) * biasGradients[lIdx].data[i] * biasGradients[lIdx].data[i];
// Bias correction
const float mHat = biasMomentum[lIdx][i] / biasCorr1;
const float vHat = biasVelocities[lIdx][i] / biasCorr2;
const float mHat = biasMomentum[lIdx].data[i] / biasCorr1;
const float vHat = biasVelocities[lIdx].data[i] / biasCorr2;
layer->biases[i] -= lr * mHat / (std::sqrt(vHat) + epsilon);
layer->biases.data[i] -= lr * mHat / (std::sqrt(vHat) + epsilon);
}
}
}

View File

@@ -9,7 +9,7 @@ namespace Ember {
struct Optimizer {
Network& net;
std::vector<BlasMatrix> weightGradients;
std::vector<Tensor<2>> weightGradients;
std::vector<Tensor<1>> biasGradients;
explicit Optimizer(Network& net);
@@ -29,7 +29,7 @@ namespace Ember {
namespace optimizers {
struct SGD : internal::Optimizer {
std::vector<BlasMatrix> weightVelocities;
std::vector<Tensor<2>> weightVelocities;
std::vector<Tensor<1>> biasVelocities;
float momentum;
@@ -49,9 +49,9 @@ namespace Ember {
float decay;
usize iteration = 0;
std::vector<BlasMatrix> weightVelocities;
std::vector<Tensor<2>> weightVelocities;
std::vector<Tensor<1>> biasVelocities;
std::vector<BlasMatrix> weightMomentum;
std::vector<Tensor<2>> weightMomentum;
std::vector<Tensor<1>> biasMomentum;
explicit Adam(Network& net, const float beta1 = 0.9f, const float beta2 = 0.999f, const float epsilon = 1e-08, const float decay = 0.01f);

View File

@@ -5,118 +5,108 @@
#include "../external/fmt/format.h"
#include <vector>
#include <array>
namespace Ember {
// Tensor recursive case
// Tensor currently has a rather foolish implementation since it doesn't flatten the memory
namespace internal {
template <typename T>
concept UsizeLike = std::is_same_v<std::decay_t<T>, usize>;
}
template<usize dimensionality>
struct Tensor {
static_assert(dimensionality > 1, "dimensionality must be >= 1");
std::vector<Tensor<dimensionality - 1>> data;
Tensor() = default;
// Resize with a variadic pack of sizes, e.g. tensor.resize(3, 4, 5);
template<typename... Args>
void resize(const usize firstDim, Args... restDims) {
static_assert(sizeof...(Args) == dimensionality - 1,
"Number of arguments to resize() must match tensor dimensionality");
data.resize(firstDim);
for (auto& subTensor : data)
subTensor.resize(restDims...);
}
void fill(const float value) {
for (auto& subTensor : data)
subTensor.fill(value);
}
usize size() const { return data.size(); }
auto begin() { return data.begin(); }
auto end() { return data.end(); }
auto begin() const { return data.begin(); }
auto end() const { return data.end(); }
Tensor<dimensionality - 1>& operator[](const usize idx) { return data[idx]; }
const Tensor<dimensionality - 1>& operator[](const usize idx) const { return data[idx]; }
Tensor& operator=(const Tensor& other) {
data = other.data;
return *this;
}
};
// Tensor base case
template<>
struct Tensor<1> {
std::array<usize, dimensionality> dimensions;
std::vector<float> data;
Tensor() = default;
Tensor(const std::vector<float> &data) : data(data) {}
explicit Tensor(const usize size, const float def = 0.0f) : data(size, def) {}
void resize(const usize size) {
template<internal::UsizeLike... Args>
explicit Tensor(const Args... args) {
static_assert(sizeof...(Args) == dimensionality, "Tensor must have the same size and dimensionality");
dimensions = { args... };
data.resize((args * ...));
}
explicit Tensor(const std::array<usize, dimensionality>& dimensions) : dimensions(dimensions) {
u64 size = 1;
for (const usize d : dimensions)
size *= d;
data.resize(size);
}
void fill(const float value) {
std::fill(data.begin(), data.end(), value);
Tensor(const std::vector<float>& input) requires (dimensionality == 1) {
dimensions[0] = input.size();
data = input;
}
usize size() const { return data.size(); }
auto begin() { return data.begin(); }
auto end() { return data.end(); }
auto begin() const { return data.begin(); }
auto end() const { return data.end(); }
auto ptr() { return data.data(); }
auto ptr() const { return data.data(); }
float& operator[](const usize idx) { return data[idx]; }
const float& operator[](const usize idx) const { return data[idx]; }
Tensor& operator=(const Tensor& other) = default;
friend std::ostream& operator<<(std::ostream& os, const Tensor<1>& t) {
os << "[";
for (usize i = 0; i < t.size(); i++) {
if (i < t.size() - 1)
os << fmt::format("{}, ", t.data[i]);
else
os << fmt::format("{}]", t.data[i]);
}
return os;
template<internal::UsizeLike... Args>
void resize(Args... args) {
static_assert(sizeof...(Args) == dimensionality, "Resized tensor must have the same dimensionality");
dimensions = { args... };
data.resize((args * ...));
}
};
struct BlasMatrix {
usize rows{};
usize cols{};
std::vector<float> data;
void resize(const std::array<usize, dimensionality>& newDims) {
u64 size = 1;
dimensions = newDims;
BlasMatrix() = default;
BlasMatrix(const usize rows, const usize cols) : rows(rows), cols(cols), data(rows * cols) {}
void resize(const usize rows, const usize cols) {
this->rows = rows;
this->cols = cols;
data.resize(rows * cols);
for (const usize d : dimensions)
size *= d;
data.resize(size);
}
float* ptr() { return data.data(); }
const float* ptr() const { return data.data(); }
usize size() const { return data.size(); }
auto begin() { return data.begin(); }
auto begin() const { return data.begin(); }
auto end() { return data.end(); }
auto end() const { return data.end(); }
void fill(const float value) {
for (float& f : data)
f = value;
}
// (i, j) access (row i, column j)
float& operator()(const usize i, const usize j) { return data[i * cols + j]; }
const float& operator()(const usize i, const usize j) const { return data[i * cols + j]; }
// Get the dimensionality
auto dims() { return dimensions; }
const auto& dims() const { return dimensions; }
usize dim(const usize idx) const { return dimensions[idx]; }
template<typename... Args>
float& operator[](Args... args) {
static_assert(sizeof...(Args) == dimensionality, "Access must match tensor dimensionality");
std::array<usize, dimensionality> indices{ static_cast<usize>(args)... };
usize idx = 0;
usize stride = 1;
for (int i = dimensionality - 1; i >= 0; i--) {
idx += indices[i] * stride;
stride *= dimensions[i];
}
assert(idx < data.size());
return data[idx];
}
template<typename... Args>
const float& operator[](Args... args) const {
static_assert(sizeof...(Args) == dimensionality, "Access must match tensor dimensionality");
std::array<usize, dimensionality> indices{ static_cast<usize>(args)... };
usize idx = 0;
usize stride = 1;
for (int i = dimensionality - 1; i >= 0; i--) {
idx += indices[i] * stride;
stride *= dimensions[i];
}
assert(idx < data.size());
return data[idx];
}
};
}

View File

@@ -1,7 +1,6 @@
#pragma once
#include <iostream>
#include <cassert>
#include <cstdint>
#ifdef _WIN32
@@ -11,12 +10,27 @@
#include <io.h>
#endif
#ifndef NDEBUG
#include <boost/stacktrace.hpp>
#endif
#undef assert
namespace Ember {
#define exitWithMsg(msg, code) \
{ \
std::cout << "**ERROR** " << msg << std::endl; \
std::exit(code); \
}
#define exitWithMsg(msg, code) { \
std::cout << "**ERROR** " << msg << std::endl; \
std::exit(code); \
}
#ifndef NDEBUG
#define assert(x) \
if (!(x)) [[unlikely]] { \
std::cout << std::endl << std::endl << boost::stacktrace::stacktrace() << std::endl << "Assertion failed: " << #x << ", file " << __FILE__ << ", line " << __LINE__ << std::endl; \
std::terminate(); \
}
#else
#define assert(x) ;
#endif
using u64 = uint64_t;
using u32 = uint32_t;