From ecebf0de67d26e6c706c88fe0dff45be525f21c9 Mon Sep 17 00:00:00 2001 From: "szewczyk.franciszek02" Date: Mon, 6 Nov 2023 16:02:31 +0100 Subject: [PATCH 1/6] Activation and Neurons --- include/ShkyeraTensor.hpp | 4 ++++ include/src/Neuron.hpp | 36 ++++++++++++++++++++++++++++ include/src/Utils.hpp | 42 +++++++++++++++++++++++++++++++++ include/src/Value.hpp | 32 ++++++++++++++++++++++++- include/src/Vector.hpp | 49 +++++++++++++++++++++++++++++++++++++++ tests/main.cpp | 14 ++++------- 6 files changed, 166 insertions(+), 11 deletions(-) create mode 100644 include/src/Neuron.hpp create mode 100644 include/src/Utils.hpp create mode 100644 include/src/Vector.hpp diff --git a/include/ShkyeraTensor.hpp b/include/ShkyeraTensor.hpp index 0d0a5a1..7195781 100644 --- a/include/ShkyeraTensor.hpp +++ b/include/ShkyeraTensor.hpp @@ -1 +1,5 @@ +#pragma once + +#include "src/Neuron.hpp" #include "src/Value.hpp" +#include "src/Vector.hpp" diff --git a/include/src/Neuron.hpp b/include/src/Neuron.hpp new file mode 100644 index 0000000..fe16eb1 --- /dev/null +++ b/include/src/Neuron.hpp @@ -0,0 +1,36 @@ +#pragma once + +#include "Utils.hpp" +#include "Value.hpp" +#include "Vector.hpp" + +namespace shkyera { + +template class Neuron { + private: + ValuePtr _bias; + Vector _weights; + std::function(ValuePtr)> _activation = [](ValuePtr a) { return a; }; + + public: + Neuron(size_t input); + Neuron(size_t input, std::function(ValuePtr)> activation); + + ValuePtr operator()(const Vector &x); +}; + +template Neuron::Neuron(size_t input) { + _bias = Value::create(utils::sample(-1, 1)); + _weights = utils::sample(-1, 1, input); +} + +template +Neuron::Neuron(size_t input, std::function(ValuePtr)> activation) : Neuron(input) { + _activation = activation; +} + +template ValuePtr Neuron::operator()(const Vector &x) { + return _activation(_bias + _weights.dot(x)); +} + +} // namespace shkyera diff --git a/include/src/Utils.hpp b/include/src/Utils.hpp new file mode 100644 index 0000000..943bdc2 --- /dev/null +++ b/include/src/Utils.hpp @@ -0,0 +1,42 @@ +#pragma once + +#include + +namespace shkyera::utils { + +std::random_device rand_dev; +std::mt19937 generator(rand_dev()); + +template std::enable_if_t, T> sample(T from, T to) { + std::uniform_real_distribution distribution(from, to); + return distribution(generator); +} + +template std::enable_if_t, std::vector> sample(T from, T to, size_t size) { + std::uniform_real_distribution distribution(from, to); + + std::vector sampled(size); + for (size_t i = 0; i < size; i++) { + sampled[i] = distribution(generator); + } + + return sampled; +} + +template std::enable_if_t, T> sample(T from, T to) { + std::uniform_int_distribution distribution(from, to); + return distribution(generator); +} + +template std::enable_if_t, std::vector> sample(T from, T to, size_t size) { + std::uniform_int_distribution distribution(from, to); + + std::vector sampled(size); + for (size_t i = 0; i < size; i++) { + sampled[i] = distribution(generator); + } + + return sampled; +} + +} // namespace shkyera::utils diff --git a/include/src/Value.hpp b/include/src/Value.hpp index c2b6355..0cb2ef0 100644 --- a/include/src/Value.hpp +++ b/include/src/Value.hpp @@ -1,3 +1,5 @@ +#pragma once + #include #include #include @@ -25,9 +27,10 @@ template class Value : public std::enable_shared_from_this static ValuePtr create(T data); void backward(); - T getGradient() { return _gradient; } + T getGradient(); ValuePtr tanh(); + ValuePtr relu(); ValuePtr exp(); ValuePtr pow(ValuePtr exponent); @@ -35,6 +38,7 @@ template class Value : public std::enable_shared_from_this template friend ValuePtr operator-(ValuePtr a, ValuePtr b); template friend ValuePtr operator*(ValuePtr a, ValuePtr b); template friend ValuePtr operator/(ValuePtr a, ValuePtr b); + template friend ValuePtr operator-(ValuePtr a); template friend std::ostream &operator<<(std::ostream &os, const ValuePtr &value); }; @@ -43,6 +47,8 @@ template Value::Value(T data) : _data(data) {} template ValuePtr Value::create(T data) { return std::shared_ptr>(new Value(data)); } +template T Value::getGradient() { return _gradient; } + template ValuePtr operator+(ValuePtr a, ValuePtr b) { ValuePtr result = Value::create(a->_data + b->_data); result->_children = {a, b}; @@ -54,6 +60,8 @@ template ValuePtr operator+(ValuePtr a, ValuePtr b) { return result; } +template ValuePtr operator-(ValuePtr a, ValuePtr b) { return a + (-b); } + template ValuePtr operator*(ValuePtr a, ValuePtr b) { ValuePtr result = Value::create(a->_data * b->_data); result->_children = {a, b}; @@ -67,6 +75,8 @@ template ValuePtr operator*(ValuePtr a, ValuePtr b) { template ValuePtr operator/(ValuePtr a, ValuePtr b) { return a * (b->pow(Value::create(-1))); } +template ValuePtr operator-(ValuePtr a) { return Value::create(-1) * a; } + template ValuePtr Value::tanh() { auto thisValue = this->shared_from_this(); @@ -79,6 +89,18 @@ template ValuePtr Value::tanh() { return result; } +template ValuePtr Value::relu() { + auto thisValue = this->shared_from_this(); + + ValuePtr result = Value::create(_data > 0 ? _data : 0); + result->_children = {thisValue}; + result->_backward = [thisValue, result]() { + thisValue->_gradient += (result->_data > 0 ? 1 : 0) * result->_gradient; + }; + + return result; +} + template ValuePtr Value::exp() { auto thisValue = this->shared_from_this(); @@ -138,4 +160,12 @@ template void Value::backward() { } } +namespace Activation { + +template std::function(ValuePtr)> tanh = [](ValuePtr a) { return a->tanh(); }; +template std::function(ValuePtr)> relu = [](ValuePtr a) { return a->relu(); }; +template std::function(ValuePtr)> exp = [](ValuePtr a) { return a->exp(); }; + +} // namespace Activation + } // namespace shkyera diff --git a/include/src/Vector.hpp b/include/src/Vector.hpp new file mode 100644 index 0000000..c0c2213 --- /dev/null +++ b/include/src/Vector.hpp @@ -0,0 +1,49 @@ +#pragma once + +#include + +#include "Value.hpp" + +namespace shkyera { + +template class Vector { + private: + std::vector> _values; + + public: + Vector() = default; + Vector(const std::vector &values); + Vector(std::vector> values); + + ValuePtr dot(const Vector &other) const; + + ValuePtr operator[](size_t index) const; + + size_t size() const; +}; + +template Vector::Vector(const std::vector &values) { + _values.reserve(values.size()); + std::for_each(values.begin(), values.end(), [this](const T &val) { _values.push_back(Value::create(val)); }); +} + +template Vector::Vector(std::vector> values) { _values = values; } + +template size_t Vector::size() const { return _values.size(); } + +template ValuePtr Vector::dot(const Vector &other) const { + if (other.size() != size()) { + throw std::invalid_argument("Vectors need to be of the same size to compute the dot product. Sizes are " + + std::to_string(size()) + " and " + std::to_string(other.size()) + "."); + } + + ValuePtr result = Value::create(0); + for (size_t i = 0; i < size(); ++i) + result = result + (_values[i] * other[i]); + + return result; +} + +template ValuePtr Vector::operator[](size_t index) const { return _values[index]; } + +} // namespace shkyera diff --git a/tests/main.cpp b/tests/main.cpp index 4c55f16..2df5bc8 100644 --- a/tests/main.cpp +++ b/tests/main.cpp @@ -3,16 +3,10 @@ int main() { using namespace shkyera; - auto a = Value::create(1); - auto b = a * a; - auto c = b / Value::create(7); - auto d = c->tanh(); - auto e = d->pow(Value::create(2)); - auto f = e->exp(); + auto n = Neuron(5, Activation::tanh); + auto x = Vector({1, 2, 3, 4, 5}); - std::cerr << f << '\n'; - f->backward(); + auto a = n(x); - for (auto v : {a, b, c, d, e, f}) - std::cerr << v->getGradient() << '\n'; + std::cerr << a << '\n'; } From bb579f728f718a831958a8ba7e05ef8633e81805 Mon Sep 17 00:00:00 2001 From: "szewczyk.franciszek02" Date: Mon, 6 Nov 2023 16:27:12 +0100 Subject: [PATCH 2/6] Layer definition --- include/ShkyeraTensor.hpp | 2 ++ include/src/Activation.hpp | 13 +++++++++++++ include/src/Layer.hpp | 33 +++++++++++++++++++++++++++++++++ include/src/Neuron.hpp | 4 ++-- include/src/Value.hpp | 16 ++++------------ include/src/Vector.hpp | 12 ++++++++++++ tests/main.cpp | 6 +++--- 7 files changed, 69 insertions(+), 17 deletions(-) create mode 100644 include/src/Activation.hpp create mode 100644 include/src/Layer.hpp diff --git a/include/ShkyeraTensor.hpp b/include/ShkyeraTensor.hpp index 7195781..d2d99ca 100644 --- a/include/ShkyeraTensor.hpp +++ b/include/ShkyeraTensor.hpp @@ -1,5 +1,7 @@ #pragma once +#include "src/Activation.hpp" +#include "src/Layer.hpp" #include "src/Neuron.hpp" #include "src/Value.hpp" #include "src/Vector.hpp" diff --git a/include/src/Activation.hpp b/include/src/Activation.hpp new file mode 100644 index 0000000..069e838 --- /dev/null +++ b/include/src/Activation.hpp @@ -0,0 +1,13 @@ +#pragma once + +#include "Value.hpp" + +namespace shkyera::Activation { + +template using Function = std::function(ValuePtr)>; + +template Function tanh = [](ValuePtr a) { return a->tanh(); }; +template Function relu = [](ValuePtr a) { return a->relu(); }; +template Function exp = [](ValuePtr a) { return a->exp(); }; + +} // namespace shkyera::Activation diff --git a/include/src/Layer.hpp b/include/src/Layer.hpp new file mode 100644 index 0000000..b4cb993 --- /dev/null +++ b/include/src/Layer.hpp @@ -0,0 +1,33 @@ +#pragma once + +#include "Activation.hpp" +#include "Neuron.hpp" + +namespace shkyera { + +template class Layer { + private: + std::vector> _neurons; + + public: + Layer(size_t size, size_t input, Activation::Function activation = Activation::relu); + + Vector operator()(const Vector &x) const; +}; + +template Layer::Layer(size_t size, size_t input, Activation::Function activation) { + _neurons.reserve(size); + for (size_t i = 0; i < size; ++i) { + _neurons.emplace_back(Neuron(input, activation)); + } +} + +template Vector Layer::operator()(const Vector &x) const { + std::vector> output(_neurons.size()); + for (size_t i = 0; i < _neurons.size(); i++) { + output[i] = _neurons[i](x); + } + return Vector(output); +} + +} // namespace shkyera diff --git a/include/src/Neuron.hpp b/include/src/Neuron.hpp index fe16eb1..2b872b7 100644 --- a/include/src/Neuron.hpp +++ b/include/src/Neuron.hpp @@ -16,7 +16,7 @@ template class Neuron { Neuron(size_t input); Neuron(size_t input, std::function(ValuePtr)> activation); - ValuePtr operator()(const Vector &x); + ValuePtr operator()(const Vector &x) const; }; template Neuron::Neuron(size_t input) { @@ -29,7 +29,7 @@ Neuron::Neuron(size_t input, std::function(ValuePtr)> activati _activation = activation; } -template ValuePtr Neuron::operator()(const Vector &x) { +template ValuePtr Neuron::operator()(const Vector &x) const { return _activation(_bias + _weights.dot(x)); } diff --git a/include/src/Value.hpp b/include/src/Value.hpp index 0cb2ef0..5fabe4b 100644 --- a/include/src/Value.hpp +++ b/include/src/Value.hpp @@ -125,11 +125,6 @@ template ValuePtr Value::pow(ValuePtr exponent) { return result; } -template std::ostream &operator<<(std::ostream &os, const ValuePtr &value) { - os << "Value(data=" << value->_data << ")"; - return os; -} - template std::vector> Value::topologicalSort() { std::vector> sorted; std::unordered_set *> visited; @@ -160,12 +155,9 @@ template void Value::backward() { } } -namespace Activation { - -template std::function(ValuePtr)> tanh = [](ValuePtr a) { return a->tanh(); }; -template std::function(ValuePtr)> relu = [](ValuePtr a) { return a->relu(); }; -template std::function(ValuePtr)> exp = [](ValuePtr a) { return a->exp(); }; - -} // namespace Activation +template std::ostream &operator<<(std::ostream &os, const ValuePtr &value) { + os << "Value(data=" << value->_data << ")"; + return os; +} } // namespace shkyera diff --git a/include/src/Vector.hpp b/include/src/Vector.hpp index c0c2213..1827cc7 100644 --- a/include/src/Vector.hpp +++ b/include/src/Vector.hpp @@ -20,6 +20,8 @@ template class Vector { ValuePtr operator[](size_t index) const; size_t size() const; + + template friend std::ostream &operator<<(std::ostream &os, const Vector &vector); }; template Vector::Vector(const std::vector &values) { @@ -46,4 +48,14 @@ template ValuePtr Vector::dot(const Vector &other) const { template ValuePtr Vector::operator[](size_t index) const { return _values[index]; } +template std::ostream &operator<<(std::ostream &os, const Vector &vector) { + os << "Vector(size=" << vector.size() << ", data={"; + + for (const ValuePtr val : vector._values) + os << val << ' '; + + os << "})"; + return os; +} + } // namespace shkyera diff --git a/tests/main.cpp b/tests/main.cpp index 2df5bc8..761f62f 100644 --- a/tests/main.cpp +++ b/tests/main.cpp @@ -3,10 +3,10 @@ int main() { using namespace shkyera; - auto n = Neuron(5, Activation::tanh); - auto x = Vector({1, 2, 3, 4, 5}); + auto x = Vector({2, 3}); + auto layer = Layer(5, 2, Activation::tanh); - auto a = n(x); + auto a = layer(x); std::cerr << a << '\n'; } From 4247f9def239d82919e75a6115ace400984953c9 Mon Sep 17 00:00:00 2001 From: "szewczyk.franciszek02" Date: Mon, 6 Nov 2023 21:26:22 +0100 Subject: [PATCH 3/6] Abstracting optimization --- include/ShkyeraTensor.hpp | 3 +++ include/src/Layer.hpp | 24 +++++++++++++---- include/src/Loss.hpp | 26 ++++++++++++++++++ include/src/MLP.hpp | 56 +++++++++++++++++++++++++++++++++++++++ include/src/Module.hpp | 13 +++++++++ include/src/Neuron.hpp | 26 ++++++++++++++---- include/src/Vector.hpp | 18 ++++++++----- tests/main.cpp | 18 ++++++++++--- 8 files changed, 163 insertions(+), 21 deletions(-) create mode 100644 include/src/Loss.hpp create mode 100644 include/src/MLP.hpp create mode 100644 include/src/Module.hpp diff --git a/include/ShkyeraTensor.hpp b/include/ShkyeraTensor.hpp index d2d99ca..afe41aa 100644 --- a/include/ShkyeraTensor.hpp +++ b/include/ShkyeraTensor.hpp @@ -2,6 +2,9 @@ #include "src/Activation.hpp" #include "src/Layer.hpp" +#include "src/Loss.hpp" +#include "src/MLP.hpp" +#include "src/Module.hpp" #include "src/Neuron.hpp" #include "src/Value.hpp" #include "src/Vector.hpp" diff --git a/include/src/Layer.hpp b/include/src/Layer.hpp index b4cb993..db4d6d2 100644 --- a/include/src/Layer.hpp +++ b/include/src/Layer.hpp @@ -1,21 +1,23 @@ #pragma once #include "Activation.hpp" +#include "Module.hpp" #include "Neuron.hpp" namespace shkyera { -template class Layer { +template class Layer : public Module { private: std::vector> _neurons; public: - Layer(size_t size, size_t input, Activation::Function activation = Activation::relu); + Layer(size_t input, size_t size, Activation::Function activation = Activation::relu); - Vector operator()(const Vector &x) const; + virtual Vector operator()(const Vector &x) const override; + virtual std::vector> paremeters() const override; }; -template Layer::Layer(size_t size, size_t input, Activation::Function activation) { +template Layer::Layer(size_t input, size_t size, Activation::Function activation) { _neurons.reserve(size); for (size_t i = 0; i < size; ++i) { _neurons.emplace_back(Neuron(input, activation)); @@ -24,10 +26,22 @@ template Layer::Layer(size_t size, size_t input, Activation::Fun template Vector Layer::operator()(const Vector &x) const { std::vector> output(_neurons.size()); + for (size_t i = 0; i < _neurons.size(); i++) { - output[i] = _neurons[i](x); + output[i] = _neurons[i](x)[0]; } + return Vector(output); } +template std::vector> Layer::parameters() const { + std::vector> params; + for (const Neuron &n : _neurons) { + std::vector> neuronParams = n.parameters(); + params.insert(params.end(), neuronParams.begin(), neuronParams.end()); + } + + return params; +} + } // namespace shkyera diff --git a/include/src/Loss.hpp b/include/src/Loss.hpp new file mode 100644 index 0000000..793a0c0 --- /dev/null +++ b/include/src/Loss.hpp @@ -0,0 +1,26 @@ +#pragma once + +#include "Value.hpp" + +namespace shkyera::Loss { + +template using Function = std::function(Vector a, Vector b)>; + +template +Function MSE = [](Vector a, Vector b) { + if (a.size() != b.size()) { + throw std::invalid_argument("Vectors need to be of the same size to compute the MSE loss. Sizes are " + + std::to_string(a.size()) + " and " + std::to_string(b.size()) + "."); + } + + ValuePtr loss = Value::create(0); + for (size_t i = 0; i < a.size(); ++i) { + loss = loss + ((a[i] - b[i])->pow(Value::create(2))); + } + + loss->backward(); + + return loss; +}; + +} // namespace shkyera::Loss diff --git a/include/src/MLP.hpp b/include/src/MLP.hpp new file mode 100644 index 0000000..7dfc9ce --- /dev/null +++ b/include/src/MLP.hpp @@ -0,0 +1,56 @@ +#pragma once + +#include "Activation.hpp" +#include "Layer.hpp" +#include "Module.hpp" +#include "Vector.hpp" + +namespace shkyera { + +template class MLP : public Module { + private: + std::vector> _layers; + + public: + MLP(size_t input, std::vector sizes, std::vector> activations); + + virtual Vector operator()(const Vector &x) const override; + virtual std::vector> paremeters() const override; +}; + +template +MLP::MLP(size_t input, std::vector sizes, std::vector> activations) { + if (sizes.size() == 0) + throw std::invalid_argument("MLP must be passed at least one size: an output size."); + if (sizes.size() != activations.size()) + throw std::invalid_argument( + "MLP constructor must be passed the vector of sizes of the same size as vector of activations. Sizes are " + + std::to_string(sizes.size()) + " and " + std::to_string(activations.size()) + "."); + + _layers.reserve(sizes.size()); + _layers.emplace_back(Layer(input, sizes[0], activations[0])); + + for (size_t i = 1; i < sizes.size(); ++i) { + _layers.emplace_back(Layer(sizes[i - 1], sizes[i], activations[i])); + } +} + +template Vector MLP::operator()(const Vector &x) const { + Vector out = _layers[0](x); + + std::for_each(_layers.begin() + 1, _layers.end(), [&out](Layer layer) { out = layer(out); }); + + return out; +} + +template std::vector> MLP::parameters() const { + std::vector> params; + for (const Layer &l : _layers) { + std::vector> layerParams = l.parameters(); + params.insert(params.end(), layerParams.begin(), layerParams.end()); + } + + return params; +} + +} // namespace shkyera diff --git a/include/src/Module.hpp b/include/src/Module.hpp new file mode 100644 index 0000000..a480619 --- /dev/null +++ b/include/src/Module.hpp @@ -0,0 +1,13 @@ +#pragma once + +#include "Vector.hpp" + +namespace shkyera { + +template class Module { + public: + virtual Vector operator()(const Vector &x) const = 0; + virtual std::vector> parameters() const = 0; +}; + +} // namespace shkyera diff --git a/include/src/Neuron.hpp b/include/src/Neuron.hpp index 2b872b7..0d1e739 100644 --- a/include/src/Neuron.hpp +++ b/include/src/Neuron.hpp @@ -1,12 +1,13 @@ #pragma once +#include "Module.hpp" #include "Utils.hpp" #include "Value.hpp" #include "Vector.hpp" namespace shkyera { -template class Neuron { +template class Neuron : public Module { private: ValuePtr _bias; Vector _weights; @@ -16,12 +17,15 @@ template class Neuron { Neuron(size_t input); Neuron(size_t input, std::function(ValuePtr)> activation); - ValuePtr operator()(const Vector &x) const; + virtual Vector operator()(const Vector &x) const override; + virtual std::vector> parameters() const override; }; template Neuron::Neuron(size_t input) { + auto weights = utils::sample(-1, 1, input); + + _weights = Vector::of(weights); _bias = Value::create(utils::sample(-1, 1)); - _weights = utils::sample(-1, 1, input); } template @@ -29,8 +33,20 @@ Neuron::Neuron(size_t input, std::function(ValuePtr)> activati _activation = activation; } -template ValuePtr Neuron::operator()(const Vector &x) const { - return _activation(_bias + _weights.dot(x)); +template Vector Neuron::operator()(const Vector &x) const { + return Vector({_activation(_bias + _weights.dot(x))}); +} + +template std::vector> Neuron::parameters() const { + std::vector> params; + params.reserve(_weights.size() + 1); + + for (size_t i = 0; i < _weights.size(); ++i) + params.push_back(_weights[i]); + + params.push_back(_bias); + + return params; } } // namespace shkyera diff --git a/include/src/Vector.hpp b/include/src/Vector.hpp index 1827cc7..87c9bc5 100644 --- a/include/src/Vector.hpp +++ b/include/src/Vector.hpp @@ -12,11 +12,10 @@ template class Vector { public: Vector() = default; - Vector(const std::vector &values); Vector(std::vector> values); + static Vector of(const std::vector &values); ValuePtr dot(const Vector &other) const; - ValuePtr operator[](size_t index) const; size_t size() const; @@ -24,13 +23,18 @@ template class Vector { template friend std::ostream &operator<<(std::ostream &os, const Vector &vector); }; -template Vector::Vector(const std::vector &values) { - _values.reserve(values.size()); - std::for_each(values.begin(), values.end(), [this](const T &val) { _values.push_back(Value::create(val)); }); -} - template Vector::Vector(std::vector> values) { _values = values; } +template Vector Vector::of(const std::vector &values) { + std::vector> valuePtrs; + valuePtrs.reserve(values.size()); + + std::for_each(values.begin(), values.end(), + [&valuePtrs](const T &v) { valuePtrs.emplace_back(Value::create(v)); }); + + return valuePtrs; +} + template size_t Vector::size() const { return _values.size(); } template ValuePtr Vector::dot(const Vector &other) const { diff --git a/tests/main.cpp b/tests/main.cpp index 761f62f..3da3343 100644 --- a/tests/main.cpp +++ b/tests/main.cpp @@ -3,10 +3,20 @@ int main() { using namespace shkyera; - auto x = Vector({2, 3}); - auto layer = Layer(5, 2, Activation::tanh); + std::vector> xs = {Vector::of({0, 0}), Vector::of({1, 0}), + Vector::of({0, 1}), Vector::of({1, 1})}; + std::vector> ys = {Vector::of({0}), Vector::of({1}), Vector::of({1}), + Vector::of({0})}; - auto a = layer(x); + auto mlp = MLP(2, {3, 1}, {Activation::tanh, Activation::tanh}); + auto lossFunction = Loss::MSE; - std::cerr << a << '\n'; + for (size_t epoch = 0; epoch < 10; epoch++) { + for (size_t sample = 0; sample < xs.size(); ++sample) { + auto pred = mlp(xs[sample]); + auto loss = lossFunction(pred, ys[sample]); + + std::cerr << loss << '\n'; + } + } } From 6702a9bf6c3f4aa74eee3ee71ffee58bd72c8356 Mon Sep 17 00:00:00 2001 From: "szewczyk.franciszek02" Date: Mon, 6 Nov 2023 23:17:51 +0100 Subject: [PATCH 4/6] Working XOR! --- include/ShkyeraTensor.hpp | 2 ++ include/src/Activation.hpp | 2 ++ include/src/Layer.hpp | 4 ++-- include/src/MLP.hpp | 4 ++-- include/src/Optimizer.hpp | 36 ++++++++++++++++++++++++++++++++++++ include/src/Type.hpp | 10 ++++++++++ include/src/Value.hpp | 18 +++++++++++++++++- tests/main.cpp | 21 ++++++++++++++------- 8 files changed, 85 insertions(+), 12 deletions(-) create mode 100644 include/src/Optimizer.hpp create mode 100644 include/src/Type.hpp diff --git a/include/ShkyeraTensor.hpp b/include/ShkyeraTensor.hpp index afe41aa..4c6216c 100644 --- a/include/ShkyeraTensor.hpp +++ b/include/ShkyeraTensor.hpp @@ -6,5 +6,7 @@ #include "src/MLP.hpp" #include "src/Module.hpp" #include "src/Neuron.hpp" +#include "src/Optimizer.hpp" +#include "src/Type.hpp" #include "src/Value.hpp" #include "src/Vector.hpp" diff --git a/include/src/Activation.hpp b/include/src/Activation.hpp index 069e838..26e0e7e 100644 --- a/include/src/Activation.hpp +++ b/include/src/Activation.hpp @@ -9,5 +9,7 @@ template using Function = std::function(ValuePtr)>; template Function tanh = [](ValuePtr a) { return a->tanh(); }; template Function relu = [](ValuePtr a) { return a->relu(); }; template Function exp = [](ValuePtr a) { return a->exp(); }; +template Function sigmoid = [](ValuePtr a) { return a->sigmoid(); }; +template Function linear = [](ValuePtr a) { return a; }; } // namespace shkyera::Activation diff --git a/include/src/Layer.hpp b/include/src/Layer.hpp index db4d6d2..18b1867 100644 --- a/include/src/Layer.hpp +++ b/include/src/Layer.hpp @@ -14,7 +14,7 @@ template class Layer : public Module { Layer(size_t input, size_t size, Activation::Function activation = Activation::relu); virtual Vector operator()(const Vector &x) const override; - virtual std::vector> paremeters() const override; + virtual std::vector> parameters() const override; }; template Layer::Layer(size_t input, size_t size, Activation::Function activation) { @@ -36,7 +36,7 @@ template Vector Layer::operator()(const Vector &x) const { template std::vector> Layer::parameters() const { std::vector> params; - for (const Neuron &n : _neurons) { + for (const Neuron &n : _neurons) { std::vector> neuronParams = n.parameters(); params.insert(params.end(), neuronParams.begin(), neuronParams.end()); } diff --git a/include/src/MLP.hpp b/include/src/MLP.hpp index 7dfc9ce..9d19c3f 100644 --- a/include/src/MLP.hpp +++ b/include/src/MLP.hpp @@ -15,7 +15,7 @@ template class MLP : public Module { MLP(size_t input, std::vector sizes, std::vector> activations); virtual Vector operator()(const Vector &x) const override; - virtual std::vector> paremeters() const override; + virtual std::vector> parameters() const override; }; template @@ -45,7 +45,7 @@ template Vector MLP::operator()(const Vector &x) const { template std::vector> MLP::parameters() const { std::vector> params; - for (const Layer &l : _layers) { + for (const Layer &l : _layers) { std::vector> layerParams = l.parameters(); params.insert(params.end(), layerParams.begin(), layerParams.end()); } diff --git a/include/src/Optimizer.hpp b/include/src/Optimizer.hpp new file mode 100644 index 0000000..3b291c6 --- /dev/null +++ b/include/src/Optimizer.hpp @@ -0,0 +1,36 @@ +#pragma once + +#include + +#include "Module.hpp" +#include "Value.hpp" + +namespace shkyera { + +template class Optimizer { + private: + std::vector> _parameters; + T _learningRate; + + public: + Optimizer(std::vector> params, T learningRate); + + void resetGradient(); + void stepGradient(); +}; + +template +Optimizer::Optimizer(std::vector> params, T learningRate) : _learningRate(learningRate) { + _parameters = params; +} + +template void Optimizer::resetGradient() { + std::for_each(_parameters.begin(), _parameters.end(), [](ValuePtr val) { val->_gradient = 0; }); +} + +template void Optimizer::stepGradient() { + std::for_each(_parameters.begin(), _parameters.end(), + [this](ValuePtr val) { val->_data -= _learningRate * val->_gradient; }); +} + +} // namespace shkyera diff --git a/include/src/Type.hpp b/include/src/Type.hpp new file mode 100644 index 0000000..07c805c --- /dev/null +++ b/include/src/Type.hpp @@ -0,0 +1,10 @@ +#pragma once + +namespace shkyera::Type { + +using float32 = float; +using float64 = double; +using f32 = float; +using f64 = double; + +} // namespace shkyera::Type diff --git a/include/src/Value.hpp b/include/src/Value.hpp index 5fabe4b..e427ed8 100644 --- a/include/src/Value.hpp +++ b/include/src/Value.hpp @@ -8,6 +8,7 @@ namespace shkyera { +template class Optimizer; template class Value; template using ValuePtr = std::shared_ptr>; @@ -24,6 +25,8 @@ template class Value : public std::enable_shared_from_this std::vector> topologicalSort(std::vector> &sorted, std::unordered_set *> &visited); public: + friend class Optimizer; + static ValuePtr create(T data); void backward(); @@ -31,6 +34,7 @@ template class Value : public std::enable_shared_from_this ValuePtr tanh(); ValuePtr relu(); + ValuePtr sigmoid(); ValuePtr exp(); ValuePtr pow(ValuePtr exponent); @@ -83,7 +87,19 @@ template ValuePtr Value::tanh() { ValuePtr result = Value::create((std::exp(2 * thisValue->_data) - 1) / (std::exp(2 * thisValue->_data) + 1)); result->_children = {thisValue}; result->_backward = [thisValue, result]() { - thisValue->_gradient += (1 - (thisValue->_data * thisValue->_data)) * result->_gradient; + thisValue->_gradient += (1 - (result->_data * result->_data)) * result->_gradient; + }; + + return result; +} + +template ValuePtr Value::sigmoid() { + auto thisValue = this->shared_from_this(); + + ValuePtr result = Value::create(1 / (std::exp(-thisValue->_data) + 1)); + result->_children = {thisValue}; + result->_backward = [thisValue, result]() { + thisValue->_gradient += result->_data * (1 - result->_data) * result->_gradient; }; return result; diff --git a/tests/main.cpp b/tests/main.cpp index 3da3343..bbf1cfb 100644 --- a/tests/main.cpp +++ b/tests/main.cpp @@ -3,20 +3,27 @@ int main() { using namespace shkyera; - std::vector> xs = {Vector::of({0, 0}), Vector::of({1, 0}), - Vector::of({0, 1}), Vector::of({1, 1})}; - std::vector> ys = {Vector::of({0}), Vector::of({1}), Vector::of({1}), - Vector::of({0})}; + using T = Type::float32; - auto mlp = MLP(2, {3, 1}, {Activation::tanh, Activation::tanh}); - auto lossFunction = Loss::MSE; + std::vector> xs = {Vector::of({0, 0}), Vector::of({1, 0}), Vector::of({0, 1}), + Vector::of({1, 1})}; + std::vector> ys = {Vector::of({0}), Vector::of({1}), Vector::of({1}), Vector::of({0})}; + + auto mlp = MLP(2, {5, 5, 1}, {Activation::relu, Activation::relu, Activation::sigmoid}); + auto optimizer = Optimizer(mlp.parameters(), 0.1); + auto lossFunction = Loss::MSE; + + for (size_t epoch = 0; epoch < 1000; epoch++) { + optimizer.resetGradient(); - for (size_t epoch = 0; epoch < 10; epoch++) { for (size_t sample = 0; sample < xs.size(); ++sample) { + auto pred = mlp(xs[sample]); auto loss = lossFunction(pred, ys[sample]); std::cerr << loss << '\n'; } + + optimizer.stepGradient(); } } From d3ea8b88a7ec00f52f4e40951ae72c495333d3b5 Mon Sep 17 00:00:00 2001 From: "szewczyk.franciszek02" Date: Tue, 7 Nov 2023 22:52:01 +0100 Subject: [PATCH 5/6] Sequential module --- include/ShkyeraTensor.hpp | 20 ++++---- include/src/MLP.hpp | 56 --------------------- include/src/Module.hpp | 13 ----- include/src/{ => core}/Type.hpp | 0 include/src/{ => core}/Utils.hpp | 0 include/src/{ => core}/Value.hpp | 5 ++ include/src/{ => core}/Vector.hpp | 5 ++ include/src/{ => nn}/Activation.hpp | 3 +- include/src/{ => nn}/Layer.hpp | 15 +++++- include/src/{ => nn}/Loss.hpp | 5 +- include/src/nn/Module.hpp | 20 ++++++++ include/src/{ => nn}/Neuron.hpp | 17 ++++--- include/src/{ => nn}/Optimizer.hpp | 6 ++- include/src/nn/Sequential.hpp | 76 +++++++++++++++++++++++++++++ tests/main.cpp | 22 +++++---- 15 files changed, 164 insertions(+), 99 deletions(-) delete mode 100644 include/src/MLP.hpp delete mode 100644 include/src/Module.hpp rename include/src/{ => core}/Type.hpp (100%) rename include/src/{ => core}/Utils.hpp (100%) rename include/src/{ => core}/Value.hpp (98%) rename include/src/{ => core}/Vector.hpp (93%) rename include/src/{ => nn}/Activation.hpp (90%) rename include/src/{ => nn}/Layer.hpp (71%) rename include/src/{ => nn}/Loss.hpp (85%) create mode 100644 include/src/nn/Module.hpp rename include/src/{ => nn}/Neuron.hpp (75%) rename include/src/{ => nn}/Optimizer.hpp (85%) create mode 100644 include/src/nn/Sequential.hpp diff --git a/include/ShkyeraTensor.hpp b/include/ShkyeraTensor.hpp index 4c6216c..ca647eb 100644 --- a/include/ShkyeraTensor.hpp +++ b/include/ShkyeraTensor.hpp @@ -1,12 +1,12 @@ #pragma once -#include "src/Activation.hpp" -#include "src/Layer.hpp" -#include "src/Loss.hpp" -#include "src/MLP.hpp" -#include "src/Module.hpp" -#include "src/Neuron.hpp" -#include "src/Optimizer.hpp" -#include "src/Type.hpp" -#include "src/Value.hpp" -#include "src/Vector.hpp" +#include "src/core/Type.hpp" +#include "src/core/Value.hpp" +#include "src/core/Vector.hpp" +#include "src/nn/Activation.hpp" +#include "src/nn/Layer.hpp" +#include "src/nn/Loss.hpp" +#include "src/nn/Module.hpp" +#include "src/nn/Neuron.hpp" +#include "src/nn/Optimizer.hpp" +#include "src/nn/Sequential.hpp" diff --git a/include/src/MLP.hpp b/include/src/MLP.hpp deleted file mode 100644 index 9d19c3f..0000000 --- a/include/src/MLP.hpp +++ /dev/null @@ -1,56 +0,0 @@ -#pragma once - -#include "Activation.hpp" -#include "Layer.hpp" -#include "Module.hpp" -#include "Vector.hpp" - -namespace shkyera { - -template class MLP : public Module { - private: - std::vector> _layers; - - public: - MLP(size_t input, std::vector sizes, std::vector> activations); - - virtual Vector operator()(const Vector &x) const override; - virtual std::vector> parameters() const override; -}; - -template -MLP::MLP(size_t input, std::vector sizes, std::vector> activations) { - if (sizes.size() == 0) - throw std::invalid_argument("MLP must be passed at least one size: an output size."); - if (sizes.size() != activations.size()) - throw std::invalid_argument( - "MLP constructor must be passed the vector of sizes of the same size as vector of activations. Sizes are " + - std::to_string(sizes.size()) + " and " + std::to_string(activations.size()) + "."); - - _layers.reserve(sizes.size()); - _layers.emplace_back(Layer(input, sizes[0], activations[0])); - - for (size_t i = 1; i < sizes.size(); ++i) { - _layers.emplace_back(Layer(sizes[i - 1], sizes[i], activations[i])); - } -} - -template Vector MLP::operator()(const Vector &x) const { - Vector out = _layers[0](x); - - std::for_each(_layers.begin() + 1, _layers.end(), [&out](Layer layer) { out = layer(out); }); - - return out; -} - -template std::vector> MLP::parameters() const { - std::vector> params; - for (const Layer &l : _layers) { - std::vector> layerParams = l.parameters(); - params.insert(params.end(), layerParams.begin(), layerParams.end()); - } - - return params; -} - -} // namespace shkyera diff --git a/include/src/Module.hpp b/include/src/Module.hpp deleted file mode 100644 index a480619..0000000 --- a/include/src/Module.hpp +++ /dev/null @@ -1,13 +0,0 @@ -#pragma once - -#include "Vector.hpp" - -namespace shkyera { - -template class Module { - public: - virtual Vector operator()(const Vector &x) const = 0; - virtual std::vector> parameters() const = 0; -}; - -} // namespace shkyera diff --git a/include/src/Type.hpp b/include/src/core/Type.hpp similarity index 100% rename from include/src/Type.hpp rename to include/src/core/Type.hpp diff --git a/include/src/Utils.hpp b/include/src/core/Utils.hpp similarity index 100% rename from include/src/Utils.hpp rename to include/src/core/Utils.hpp diff --git a/include/src/Value.hpp b/include/src/core/Value.hpp similarity index 98% rename from include/src/Value.hpp rename to include/src/core/Value.hpp index e427ed8..e37a2ca 100644 --- a/include/src/Value.hpp +++ b/include/src/core/Value.hpp @@ -6,12 +6,17 @@ #include #include +#include "Type.hpp" + namespace shkyera { template class Optimizer; template class Value; template using ValuePtr = std::shared_ptr>; +using Val32 = Value; +using Val64 = Value; + template class Value : public std::enable_shared_from_this> { private: T _data = 0; diff --git a/include/src/Vector.hpp b/include/src/core/Vector.hpp similarity index 93% rename from include/src/Vector.hpp rename to include/src/core/Vector.hpp index 87c9bc5..2e00234 100644 --- a/include/src/Vector.hpp +++ b/include/src/core/Vector.hpp @@ -2,10 +2,15 @@ #include +#include "Type.hpp" #include "Value.hpp" namespace shkyera { +template class Vector; +using Vec32 = Vector; +using Vec64 = Vector; + template class Vector { private: std::vector> _values; diff --git a/include/src/Activation.hpp b/include/src/nn/Activation.hpp similarity index 90% rename from include/src/Activation.hpp rename to include/src/nn/Activation.hpp index 26e0e7e..f0085fd 100644 --- a/include/src/Activation.hpp +++ b/include/src/nn/Activation.hpp @@ -1,6 +1,7 @@ #pragma once -#include "Value.hpp" +#include "../core/Type.hpp" +#include "../core/Value.hpp" namespace shkyera::Activation { diff --git a/include/src/Layer.hpp b/include/src/nn/Layer.hpp similarity index 71% rename from include/src/Layer.hpp rename to include/src/nn/Layer.hpp index 18b1867..37c10f1 100644 --- a/include/src/Layer.hpp +++ b/include/src/nn/Layer.hpp @@ -1,18 +1,27 @@ #pragma once +#include "../core/Type.hpp" #include "Activation.hpp" #include "Module.hpp" #include "Neuron.hpp" namespace shkyera { +template class Layer; +template using LayerPtr = std::shared_ptr>; + +using Layer32 = Layer; +using Layer64 = Layer; + template class Layer : public Module { private: std::vector> _neurons; - public: Layer(size_t input, size_t size, Activation::Function activation = Activation::relu); + public: + static LayerPtr create(size_t input, size_t size, Activation::Function activation = Activation::relu); + virtual Vector operator()(const Vector &x) const override; virtual std::vector> parameters() const override; }; @@ -24,6 +33,10 @@ template Layer::Layer(size_t input, size_t size, Activation::Fun } } +template LayerPtr Layer::create(size_t input, size_t size, Activation::Function activation) { + return std::shared_ptr>(new Layer(input, size, activation)); +} + template Vector Layer::operator()(const Vector &x) const { std::vector> output(_neurons.size()); diff --git a/include/src/Loss.hpp b/include/src/nn/Loss.hpp similarity index 85% rename from include/src/Loss.hpp rename to include/src/nn/Loss.hpp index 793a0c0..99fc4e8 100644 --- a/include/src/Loss.hpp +++ b/include/src/nn/Loss.hpp @@ -1,11 +1,14 @@ #pragma once -#include "Value.hpp" +#include "../core/Value.hpp" namespace shkyera::Loss { template using Function = std::function(Vector a, Vector b)>; +using Function32 = Function; +using Function64 = Function; + template Function MSE = [](Vector a, Vector b) { if (a.size() != b.size()) { diff --git a/include/src/nn/Module.hpp b/include/src/nn/Module.hpp new file mode 100644 index 0000000..3724266 --- /dev/null +++ b/include/src/nn/Module.hpp @@ -0,0 +1,20 @@ +#pragma once + +#include "../core/Vector.hpp" + +namespace shkyera { + +template class Module; +template using ModulePtr = std::shared_ptr>; + +template class Module { + protected: + Module() = default; + + public: + Vector forward(const Vector &x) const { return (*this)(x); } + virtual Vector operator()(const Vector &x) const { return x; } + virtual std::vector> parameters() const { return {}; } +}; + +} // namespace shkyera diff --git a/include/src/Neuron.hpp b/include/src/nn/Neuron.hpp similarity index 75% rename from include/src/Neuron.hpp rename to include/src/nn/Neuron.hpp index 0d1e739..223c39c 100644 --- a/include/src/Neuron.hpp +++ b/include/src/nn/Neuron.hpp @@ -1,13 +1,18 @@ #pragma once +#include "../core/Type.hpp" +#include "../core/Utils.hpp" +#include "../core/Value.hpp" +#include "../core/Vector.hpp" #include "Module.hpp" -#include "Utils.hpp" -#include "Value.hpp" -#include "Vector.hpp" namespace shkyera { -template class Neuron : public Module { +template class Neuron; +using Neuron32 = Neuron; +using Neuron64 = Neuron; + +template class Neuron { private: ValuePtr _bias; Vector _weights; @@ -17,8 +22,8 @@ template class Neuron : public Module { Neuron(size_t input); Neuron(size_t input, std::function(ValuePtr)> activation); - virtual Vector operator()(const Vector &x) const override; - virtual std::vector> parameters() const override; + Vector operator()(const Vector &x) const; + std::vector> parameters() const; }; template Neuron::Neuron(size_t input) { diff --git a/include/src/Optimizer.hpp b/include/src/nn/Optimizer.hpp similarity index 85% rename from include/src/Optimizer.hpp rename to include/src/nn/Optimizer.hpp index 3b291c6..4122c38 100644 --- a/include/src/Optimizer.hpp +++ b/include/src/nn/Optimizer.hpp @@ -2,11 +2,15 @@ #include +#include "../core/Type.hpp" +#include "../core/Value.hpp" #include "Module.hpp" -#include "Value.hpp" namespace shkyera { +using Optimizer32 = Optimizer; +using Optimizer64 = Optimizer; + template class Optimizer { private: std::vector> _parameters; diff --git a/include/src/nn/Sequential.hpp b/include/src/nn/Sequential.hpp new file mode 100644 index 0000000..ed1da8c --- /dev/null +++ b/include/src/nn/Sequential.hpp @@ -0,0 +1,76 @@ +#pragma once + +#include "../core/Type.hpp" +#include "Activation.hpp" +#include "Module.hpp" + +namespace shkyera { + +template class Sequential; +template class SequentialBuilder; +template using SequentialPtr = std::shared_ptr>; + +using Sequential32 = Sequential; +using Sequential64 = Sequential; +using SequentialBuilder32 = SequentialBuilder; +using SequentialBuilder64 = SequentialBuilder; + +template class Sequential : public Module { + private: + std::vector> _layers; + + Sequential(const std::vector> &layers); + + public: + static SequentialPtr create(const std::vector> &layers); + + virtual Vector operator()(const Vector &x) const override; + virtual std::vector> parameters() const override; +}; + +template class SequentialBuilder { + private: + std::vector> _layers; + + SequentialBuilder() = default; + + public: + static SequentialBuilder begin(); + + SequentialBuilder add(ModulePtr layer); + SequentialPtr build(); +}; + +template Sequential::Sequential(const std::vector> &layers) : _layers(layers) {} + +template SequentialPtr Sequential::create(const std::vector> &layers) { + return std::shared_ptr>(new Sequential(layers)); +} + +template Vector Sequential::operator()(const Vector &x) const { + Vector out = (*_layers[0])(x); + + std::for_each(_layers.begin() + 1, _layers.end(), [&out](ModulePtr layer) { out = layer->forward(out); }); + + return out; +} + +template std::vector> Sequential::parameters() const { + std::vector> params; + + for (const ModulePtr &l : _layers) { + std::vector> layerParams = l->parameters(); + params.insert(params.end(), layerParams.begin(), layerParams.end()); + } + + return params; +} + +template SequentialBuilder SequentialBuilder::begin() { return SequentialBuilder(); } +template SequentialBuilder SequentialBuilder::add(ModulePtr layer) { + _layers.push_back(layer); + return *this; +} +template SequentialPtr SequentialBuilder::build() { return Sequential::create(_layers); } + +} // namespace shkyera diff --git a/tests/main.cpp b/tests/main.cpp index bbf1cfb..d21d44f 100644 --- a/tests/main.cpp +++ b/tests/main.cpp @@ -3,27 +3,29 @@ int main() { using namespace shkyera; - using T = Type::float32; + std::vector xs = {Vec32::of({0, 0}), Vec32::of({1, 0}), Vec32::of({0, 1}), Vec32::of({1, 1})}; + std::vector ys = {Vec32::of({0}), Vec32::of({1}), Vec32::of({1}), Vec32::of({0})}; - std::vector> xs = {Vector::of({0, 0}), Vector::of({1, 0}), Vector::of({0, 1}), - Vector::of({1, 1})}; - std::vector> ys = {Vector::of({0}), Vector::of({1}), Vector::of({1}), Vector::of({0})}; + // clang-format off + auto mlp = SequentialBuilder::begin() + .add(Layer32::create(2, 15, Activation::relu)) + .add(Layer32::create(15, 5, Activation::relu)) + .add(Layer32::create(5, 1, Activation::sigmoid)) + .build(); + // clang-format on - auto mlp = MLP(2, {5, 5, 1}, {Activation::relu, Activation::relu, Activation::sigmoid}); - auto optimizer = Optimizer(mlp.parameters(), 0.1); - auto lossFunction = Loss::MSE; + auto optimizer = Optimizer32(mlp->parameters(), 0.1); + auto lossFunction = Loss::MSE; for (size_t epoch = 0; epoch < 1000; epoch++) { optimizer.resetGradient(); - for (size_t sample = 0; sample < xs.size(); ++sample) { - auto pred = mlp(xs[sample]); + auto pred = mlp->forward(xs[sample]); auto loss = lossFunction(pred, ys[sample]); std::cerr << loss << '\n'; } - optimizer.stepGradient(); } } From 7f2207f34f5c00ee0940fcf9aaa4272512aa5134 Mon Sep 17 00:00:00 2001 From: "szewczyk.franciszek02" Date: Tue, 7 Nov 2023 22:52:26 +0100 Subject: [PATCH 6/6] Explicit type of the optimizer --- tests/main.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/main.cpp b/tests/main.cpp index d21d44f..5f898f9 100644 --- a/tests/main.cpp +++ b/tests/main.cpp @@ -14,7 +14,7 @@ int main() { .build(); // clang-format on - auto optimizer = Optimizer32(mlp->parameters(), 0.1); + auto optimizer = Optimizer(mlp->parameters(), 0.1); auto lossFunction = Loss::MSE; for (size_t epoch = 0; epoch < 1000; epoch++) {