diff --git a/include/ShkyeraTensor.hpp b/include/ShkyeraTensor.hpp index 0d0a5a1..ca647eb 100644 --- a/include/ShkyeraTensor.hpp +++ b/include/ShkyeraTensor.hpp @@ -1 +1,12 @@ -#include "src/Value.hpp" +#pragma once + +#include "src/core/Type.hpp" +#include "src/core/Value.hpp" +#include "src/core/Vector.hpp" +#include "src/nn/Activation.hpp" +#include "src/nn/Layer.hpp" +#include "src/nn/Loss.hpp" +#include "src/nn/Module.hpp" +#include "src/nn/Neuron.hpp" +#include "src/nn/Optimizer.hpp" +#include "src/nn/Sequential.hpp" diff --git a/include/src/core/Type.hpp b/include/src/core/Type.hpp new file mode 100644 index 0000000..07c805c --- /dev/null +++ b/include/src/core/Type.hpp @@ -0,0 +1,10 @@ +#pragma once + +namespace shkyera::Type { + +using float32 = float; +using float64 = double; +using f32 = float; +using f64 = double; + +} // namespace shkyera::Type diff --git a/include/src/core/Utils.hpp b/include/src/core/Utils.hpp new file mode 100644 index 0000000..943bdc2 --- /dev/null +++ b/include/src/core/Utils.hpp @@ -0,0 +1,42 @@ +#pragma once + +#include + +namespace shkyera::utils { + +std::random_device rand_dev; +std::mt19937 generator(rand_dev()); + +template std::enable_if_t, T> sample(T from, T to) { + std::uniform_real_distribution distribution(from, to); + return distribution(generator); +} + +template std::enable_if_t, std::vector> sample(T from, T to, size_t size) { + std::uniform_real_distribution distribution(from, to); + + std::vector sampled(size); + for (size_t i = 0; i < size; i++) { + sampled[i] = distribution(generator); + } + + return sampled; +} + +template std::enable_if_t, T> sample(T from, T to) { + std::uniform_int_distribution distribution(from, to); + return distribution(generator); +} + +template std::enable_if_t, std::vector> sample(T from, T to, size_t size) { + std::uniform_int_distribution distribution(from, to); + + std::vector sampled(size); + for (size_t i = 0; i < size; i++) { + sampled[i] = distribution(generator); + } + + return sampled; +} + +} // namespace shkyera::utils diff --git a/include/src/Value.hpp b/include/src/core/Value.hpp similarity index 76% rename from include/src/Value.hpp rename to include/src/core/Value.hpp index c2b6355..e37a2ca 100644 --- a/include/src/Value.hpp +++ b/include/src/core/Value.hpp @@ -1,14 +1,22 @@ +#pragma once + #include #include #include #include #include +#include "Type.hpp" + namespace shkyera { +template class Optimizer; template class Value; template using ValuePtr = std::shared_ptr>; +using Val32 = Value; +using Val64 = Value; + template class Value : public std::enable_shared_from_this> { private: T _data = 0; @@ -22,12 +30,16 @@ template class Value : public std::enable_shared_from_this std::vector> topologicalSort(std::vector> &sorted, std::unordered_set *> &visited); public: + friend class Optimizer; + static ValuePtr create(T data); void backward(); - T getGradient() { return _gradient; } + T getGradient(); ValuePtr tanh(); + ValuePtr relu(); + ValuePtr sigmoid(); ValuePtr exp(); ValuePtr pow(ValuePtr exponent); @@ -35,6 +47,7 @@ template class Value : public std::enable_shared_from_this template friend ValuePtr operator-(ValuePtr a, ValuePtr b); template friend ValuePtr operator*(ValuePtr a, ValuePtr b); template friend ValuePtr operator/(ValuePtr a, ValuePtr b); + template friend ValuePtr operator-(ValuePtr a); template friend std::ostream &operator<<(std::ostream &os, const ValuePtr &value); }; @@ -43,6 +56,8 @@ template Value::Value(T data) : _data(data) {} template ValuePtr Value::create(T data) { return std::shared_ptr>(new Value(data)); } +template T Value::getGradient() { return _gradient; } + template ValuePtr operator+(ValuePtr a, ValuePtr b) { ValuePtr result = Value::create(a->_data + b->_data); result->_children = {a, b}; @@ -54,6 +69,8 @@ template ValuePtr operator+(ValuePtr a, ValuePtr b) { return result; } +template ValuePtr operator-(ValuePtr a, ValuePtr b) { return a + (-b); } + template ValuePtr operator*(ValuePtr a, ValuePtr b) { ValuePtr result = Value::create(a->_data * b->_data); result->_children = {a, b}; @@ -67,13 +84,39 @@ template ValuePtr operator*(ValuePtr a, ValuePtr b) { template ValuePtr operator/(ValuePtr a, ValuePtr b) { return a * (b->pow(Value::create(-1))); } +template ValuePtr operator-(ValuePtr a) { return Value::create(-1) * a; } + template ValuePtr Value::tanh() { auto thisValue = this->shared_from_this(); ValuePtr result = Value::create((std::exp(2 * thisValue->_data) - 1) / (std::exp(2 * thisValue->_data) + 1)); result->_children = {thisValue}; result->_backward = [thisValue, result]() { - thisValue->_gradient += (1 - (thisValue->_data * thisValue->_data)) * result->_gradient; + thisValue->_gradient += (1 - (result->_data * result->_data)) * result->_gradient; + }; + + return result; +} + +template ValuePtr Value::sigmoid() { + auto thisValue = this->shared_from_this(); + + ValuePtr result = Value::create(1 / (std::exp(-thisValue->_data) + 1)); + result->_children = {thisValue}; + result->_backward = [thisValue, result]() { + thisValue->_gradient += result->_data * (1 - result->_data) * result->_gradient; + }; + + return result; +} + +template ValuePtr Value::relu() { + auto thisValue = this->shared_from_this(); + + ValuePtr result = Value::create(_data > 0 ? _data : 0); + result->_children = {thisValue}; + result->_backward = [thisValue, result]() { + thisValue->_gradient += (result->_data > 0 ? 1 : 0) * result->_gradient; }; return result; @@ -103,11 +146,6 @@ template ValuePtr Value::pow(ValuePtr exponent) { return result; } -template std::ostream &operator<<(std::ostream &os, const ValuePtr &value) { - os << "Value(data=" << value->_data << ")"; - return os; -} - template std::vector> Value::topologicalSort() { std::vector> sorted; std::unordered_set *> visited; @@ -138,4 +176,9 @@ template void Value::backward() { } } +template std::ostream &operator<<(std::ostream &os, const ValuePtr &value) { + os << "Value(data=" << value->_data << ")"; + return os; +} + } // namespace shkyera diff --git a/include/src/core/Vector.hpp b/include/src/core/Vector.hpp new file mode 100644 index 0000000..2e00234 --- /dev/null +++ b/include/src/core/Vector.hpp @@ -0,0 +1,70 @@ +#pragma once + +#include + +#include "Type.hpp" +#include "Value.hpp" + +namespace shkyera { + +template class Vector; +using Vec32 = Vector; +using Vec64 = Vector; + +template class Vector { + private: + std::vector> _values; + + public: + Vector() = default; + Vector(std::vector> values); + static Vector of(const std::vector &values); + + ValuePtr dot(const Vector &other) const; + ValuePtr operator[](size_t index) const; + + size_t size() const; + + template friend std::ostream &operator<<(std::ostream &os, const Vector &vector); +}; + +template Vector::Vector(std::vector> values) { _values = values; } + +template Vector Vector::of(const std::vector &values) { + std::vector> valuePtrs; + valuePtrs.reserve(values.size()); + + std::for_each(values.begin(), values.end(), + [&valuePtrs](const T &v) { valuePtrs.emplace_back(Value::create(v)); }); + + return valuePtrs; +} + +template size_t Vector::size() const { return _values.size(); } + +template ValuePtr Vector::dot(const Vector &other) const { + if (other.size() != size()) { + throw std::invalid_argument("Vectors need to be of the same size to compute the dot product. Sizes are " + + std::to_string(size()) + " and " + std::to_string(other.size()) + "."); + } + + ValuePtr result = Value::create(0); + for (size_t i = 0; i < size(); ++i) + result = result + (_values[i] * other[i]); + + return result; +} + +template ValuePtr Vector::operator[](size_t index) const { return _values[index]; } + +template std::ostream &operator<<(std::ostream &os, const Vector &vector) { + os << "Vector(size=" << vector.size() << ", data={"; + + for (const ValuePtr val : vector._values) + os << val << ' '; + + os << "})"; + return os; +} + +} // namespace shkyera diff --git a/include/src/nn/Activation.hpp b/include/src/nn/Activation.hpp new file mode 100644 index 0000000..f0085fd --- /dev/null +++ b/include/src/nn/Activation.hpp @@ -0,0 +1,16 @@ +#pragma once + +#include "../core/Type.hpp" +#include "../core/Value.hpp" + +namespace shkyera::Activation { + +template using Function = std::function(ValuePtr)>; + +template Function tanh = [](ValuePtr a) { return a->tanh(); }; +template Function relu = [](ValuePtr a) { return a->relu(); }; +template Function exp = [](ValuePtr a) { return a->exp(); }; +template Function sigmoid = [](ValuePtr a) { return a->sigmoid(); }; +template Function linear = [](ValuePtr a) { return a; }; + +} // namespace shkyera::Activation diff --git a/include/src/nn/Layer.hpp b/include/src/nn/Layer.hpp new file mode 100644 index 0000000..37c10f1 --- /dev/null +++ b/include/src/nn/Layer.hpp @@ -0,0 +1,60 @@ +#pragma once + +#include "../core/Type.hpp" +#include "Activation.hpp" +#include "Module.hpp" +#include "Neuron.hpp" + +namespace shkyera { + +template class Layer; +template using LayerPtr = std::shared_ptr>; + +using Layer32 = Layer; +using Layer64 = Layer; + +template class Layer : public Module { + private: + std::vector> _neurons; + + Layer(size_t input, size_t size, Activation::Function activation = Activation::relu); + + public: + static LayerPtr create(size_t input, size_t size, Activation::Function activation = Activation::relu); + + virtual Vector operator()(const Vector &x) const override; + virtual std::vector> parameters() const override; +}; + +template Layer::Layer(size_t input, size_t size, Activation::Function activation) { + _neurons.reserve(size); + for (size_t i = 0; i < size; ++i) { + _neurons.emplace_back(Neuron(input, activation)); + } +} + +template LayerPtr Layer::create(size_t input, size_t size, Activation::Function activation) { + return std::shared_ptr>(new Layer(input, size, activation)); +} + +template Vector Layer::operator()(const Vector &x) const { + std::vector> output(_neurons.size()); + + for (size_t i = 0; i < _neurons.size(); i++) { + output[i] = _neurons[i](x)[0]; + } + + return Vector(output); +} + +template std::vector> Layer::parameters() const { + std::vector> params; + for (const Neuron &n : _neurons) { + std::vector> neuronParams = n.parameters(); + params.insert(params.end(), neuronParams.begin(), neuronParams.end()); + } + + return params; +} + +} // namespace shkyera diff --git a/include/src/nn/Loss.hpp b/include/src/nn/Loss.hpp new file mode 100644 index 0000000..99fc4e8 --- /dev/null +++ b/include/src/nn/Loss.hpp @@ -0,0 +1,29 @@ +#pragma once + +#include "../core/Value.hpp" + +namespace shkyera::Loss { + +template using Function = std::function(Vector a, Vector b)>; + +using Function32 = Function; +using Function64 = Function; + +template +Function MSE = [](Vector a, Vector b) { + if (a.size() != b.size()) { + throw std::invalid_argument("Vectors need to be of the same size to compute the MSE loss. Sizes are " + + std::to_string(a.size()) + " and " + std::to_string(b.size()) + "."); + } + + ValuePtr loss = Value::create(0); + for (size_t i = 0; i < a.size(); ++i) { + loss = loss + ((a[i] - b[i])->pow(Value::create(2))); + } + + loss->backward(); + + return loss; +}; + +} // namespace shkyera::Loss diff --git a/include/src/nn/Module.hpp b/include/src/nn/Module.hpp new file mode 100644 index 0000000..3724266 --- /dev/null +++ b/include/src/nn/Module.hpp @@ -0,0 +1,20 @@ +#pragma once + +#include "../core/Vector.hpp" + +namespace shkyera { + +template class Module; +template using ModulePtr = std::shared_ptr>; + +template class Module { + protected: + Module() = default; + + public: + Vector forward(const Vector &x) const { return (*this)(x); } + virtual Vector operator()(const Vector &x) const { return x; } + virtual std::vector> parameters() const { return {}; } +}; + +} // namespace shkyera diff --git a/include/src/nn/Neuron.hpp b/include/src/nn/Neuron.hpp new file mode 100644 index 0000000..223c39c --- /dev/null +++ b/include/src/nn/Neuron.hpp @@ -0,0 +1,57 @@ +#pragma once + +#include "../core/Type.hpp" +#include "../core/Utils.hpp" +#include "../core/Value.hpp" +#include "../core/Vector.hpp" +#include "Module.hpp" + +namespace shkyera { + +template class Neuron; +using Neuron32 = Neuron; +using Neuron64 = Neuron; + +template class Neuron { + private: + ValuePtr _bias; + Vector _weights; + std::function(ValuePtr)> _activation = [](ValuePtr a) { return a; }; + + public: + Neuron(size_t input); + Neuron(size_t input, std::function(ValuePtr)> activation); + + Vector operator()(const Vector &x) const; + std::vector> parameters() const; +}; + +template Neuron::Neuron(size_t input) { + auto weights = utils::sample(-1, 1, input); + + _weights = Vector::of(weights); + _bias = Value::create(utils::sample(-1, 1)); +} + +template +Neuron::Neuron(size_t input, std::function(ValuePtr)> activation) : Neuron(input) { + _activation = activation; +} + +template Vector Neuron::operator()(const Vector &x) const { + return Vector({_activation(_bias + _weights.dot(x))}); +} + +template std::vector> Neuron::parameters() const { + std::vector> params; + params.reserve(_weights.size() + 1); + + for (size_t i = 0; i < _weights.size(); ++i) + params.push_back(_weights[i]); + + params.push_back(_bias); + + return params; +} + +} // namespace shkyera diff --git a/include/src/nn/Optimizer.hpp b/include/src/nn/Optimizer.hpp new file mode 100644 index 0000000..4122c38 --- /dev/null +++ b/include/src/nn/Optimizer.hpp @@ -0,0 +1,40 @@ +#pragma once + +#include + +#include "../core/Type.hpp" +#include "../core/Value.hpp" +#include "Module.hpp" + +namespace shkyera { + +using Optimizer32 = Optimizer; +using Optimizer64 = Optimizer; + +template class Optimizer { + private: + std::vector> _parameters; + T _learningRate; + + public: + Optimizer(std::vector> params, T learningRate); + + void resetGradient(); + void stepGradient(); +}; + +template +Optimizer::Optimizer(std::vector> params, T learningRate) : _learningRate(learningRate) { + _parameters = params; +} + +template void Optimizer::resetGradient() { + std::for_each(_parameters.begin(), _parameters.end(), [](ValuePtr val) { val->_gradient = 0; }); +} + +template void Optimizer::stepGradient() { + std::for_each(_parameters.begin(), _parameters.end(), + [this](ValuePtr val) { val->_data -= _learningRate * val->_gradient; }); +} + +} // namespace shkyera diff --git a/include/src/nn/Sequential.hpp b/include/src/nn/Sequential.hpp new file mode 100644 index 0000000..ed1da8c --- /dev/null +++ b/include/src/nn/Sequential.hpp @@ -0,0 +1,76 @@ +#pragma once + +#include "../core/Type.hpp" +#include "Activation.hpp" +#include "Module.hpp" + +namespace shkyera { + +template class Sequential; +template class SequentialBuilder; +template using SequentialPtr = std::shared_ptr>; + +using Sequential32 = Sequential; +using Sequential64 = Sequential; +using SequentialBuilder32 = SequentialBuilder; +using SequentialBuilder64 = SequentialBuilder; + +template class Sequential : public Module { + private: + std::vector> _layers; + + Sequential(const std::vector> &layers); + + public: + static SequentialPtr create(const std::vector> &layers); + + virtual Vector operator()(const Vector &x) const override; + virtual std::vector> parameters() const override; +}; + +template class SequentialBuilder { + private: + std::vector> _layers; + + SequentialBuilder() = default; + + public: + static SequentialBuilder begin(); + + SequentialBuilder add(ModulePtr layer); + SequentialPtr build(); +}; + +template Sequential::Sequential(const std::vector> &layers) : _layers(layers) {} + +template SequentialPtr Sequential::create(const std::vector> &layers) { + return std::shared_ptr>(new Sequential(layers)); +} + +template Vector Sequential::operator()(const Vector &x) const { + Vector out = (*_layers[0])(x); + + std::for_each(_layers.begin() + 1, _layers.end(), [&out](ModulePtr layer) { out = layer->forward(out); }); + + return out; +} + +template std::vector> Sequential::parameters() const { + std::vector> params; + + for (const ModulePtr &l : _layers) { + std::vector> layerParams = l->parameters(); + params.insert(params.end(), layerParams.begin(), layerParams.end()); + } + + return params; +} + +template SequentialBuilder SequentialBuilder::begin() { return SequentialBuilder(); } +template SequentialBuilder SequentialBuilder::add(ModulePtr layer) { + _layers.push_back(layer); + return *this; +} +template SequentialPtr SequentialBuilder::build() { return Sequential::create(_layers); } + +} // namespace shkyera diff --git a/tests/main.cpp b/tests/main.cpp index 4c55f16..5f898f9 100644 --- a/tests/main.cpp +++ b/tests/main.cpp @@ -3,16 +3,29 @@ int main() { using namespace shkyera; - auto a = Value::create(1); - auto b = a * a; - auto c = b / Value::create(7); - auto d = c->tanh(); - auto e = d->pow(Value::create(2)); - auto f = e->exp(); + std::vector xs = {Vec32::of({0, 0}), Vec32::of({1, 0}), Vec32::of({0, 1}), Vec32::of({1, 1})}; + std::vector ys = {Vec32::of({0}), Vec32::of({1}), Vec32::of({1}), Vec32::of({0})}; - std::cerr << f << '\n'; - f->backward(); + // clang-format off + auto mlp = SequentialBuilder::begin() + .add(Layer32::create(2, 15, Activation::relu)) + .add(Layer32::create(15, 5, Activation::relu)) + .add(Layer32::create(5, 1, Activation::sigmoid)) + .build(); + // clang-format on - for (auto v : {a, b, c, d, e, f}) - std::cerr << v->getGradient() << '\n'; + auto optimizer = Optimizer(mlp->parameters(), 0.1); + auto lossFunction = Loss::MSE; + + for (size_t epoch = 0; epoch < 1000; epoch++) { + optimizer.resetGradient(); + for (size_t sample = 0; sample < xs.size(); ++sample) { + + auto pred = mlp->forward(xs[sample]); + auto loss = lossFunction(pred, ys[sample]); + + std::cerr << loss << '\n'; + } + optimizer.stepGradient(); + } }