From df29a680a681cc494a46795ca057b094b6bbb69b Mon Sep 17 00:00:00 2001 From: "szewczyk.franciszek02" Date: Wed, 8 Nov 2023 13:14:47 +0100 Subject: [PATCH 1/9] Improved example --- examples/xor_nn.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/xor_nn.cpp b/examples/xor_nn.cpp index 89630ce..9836426 100644 --- a/examples/xor_nn.cpp +++ b/examples/xor_nn.cpp @@ -11,19 +11,19 @@ int main() { xs.push_back(Vec32::of({0, 0})); ys.push_back(Vec32::of({0})); xs.push_back(Vec32::of({1, 0})); ys.push_back(Vec32::of({1})); xs.push_back(Vec32::of({0, 1})); ys.push_back(Vec32::of({1})); - xs.push_back(Vec32::of({0, 0})); ys.push_back(Vec32::of({0})); + xs.push_back(Vec32::of({1, 1})); ys.push_back(Vec32::of({0})); auto mlp = SequentialBuilder::begin() .add(Linear32::create(2, 15)) .add(ReLU32::create()) .add(Dropout32::create(15, 5, 0.2)) - .add(ReLU32::create()) + .add(Tanh32::create()) .add(Linear32::create(5, 1)) .add(Sigmoid32::create()) .build(); // clang-format on - Optimizer32 optimizer = Optimizer(mlp->parameters(), 0.1); + Optimizer32 optimizer = Optimizer(mlp->parameters(), 0.2); Loss::Function32 lossFunction = Loss::MSE; // ------ TRAINING THE NETWORK ------- // From 21c666101b34415d4c568610872dbeb51aaad1df Mon Sep 17 00:00:00 2001 From: "szewczyk.franciszek02" Date: Wed, 8 Nov 2023 13:15:03 +0100 Subject: [PATCH 2/9] Fixed xor example --- examples/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/README.md b/examples/README.md index 50beefe..c6de2ac 100644 --- a/examples/README.md +++ b/examples/README.md @@ -27,5 +27,5 @@ Epoch: 100 Loss: 0.0371898 Vector(size=2, data={Value(data=0) Value(data=0) }) -> Value(data=0.115728)| True: Value(data=0) Vector(size=2, data={Value(data=1) Value(data=0) }) -> Value(data=0.93215) | True: Value(data=1) Vector(size=2, data={Value(data=0) Value(data=1) }) -> Value(data=0.937625)| True: Value(data=1) -Vector(size=2, data={Value(data=0) Value(data=0) }) -> Value(data=0.115728)| True: Value(data=0) +Vector(size=2, data={Value(data=1) Value(data=1) }) -> Value(data=0.115728)| True: Value(data=0) ``` From 1f49913c0cf83feceadf1b4ae859f8a09cbf1961 Mon Sep 17 00:00:00 2001 From: "szewczyk.franciszek02" Date: Wed, 8 Nov 2023 14:26:12 +0100 Subject: [PATCH 3/9] Softmax and Cross Entropy Loss --- examples/xor_classification.cpp | 50 ++++++++++++++ examples/{xor_nn.cpp => xor_regression.cpp} | 0 include/ShkyeraGrad.hpp | 1 + include/core/Value.hpp | 13 +++- include/core/Vector.hpp | 73 ++++++++++++++++++++- include/nn/Loss.hpp | 28 ++++++++ include/nn/activation/Softmax.hpp | 50 ++++++++++++++ include/nn/layers/Dropout.hpp | 7 +- 8 files changed, 215 insertions(+), 7 deletions(-) create mode 100644 examples/xor_classification.cpp rename examples/{xor_nn.cpp => xor_regression.cpp} (100%) create mode 100644 include/nn/activation/Softmax.hpp diff --git a/examples/xor_classification.cpp b/examples/xor_classification.cpp new file mode 100644 index 0000000..d4961c5 --- /dev/null +++ b/examples/xor_classification.cpp @@ -0,0 +1,50 @@ +#include "../include/ShkyeraGrad.hpp" + +int main() { + using namespace shkyera; + + // clang-format off + std::vector xs; + std::vector ys; + + // ---------- INPUT ----------- | -------- OUTPUT --------- // + xs.push_back(Vec32::of({0, 0})); ys.push_back(Vec32::of({1, 0})); + xs.push_back(Vec32::of({1, 0})); ys.push_back(Vec32::of({0, 1})); + xs.push_back(Vec32::of({0, 1})); ys.push_back(Vec32::of({0, 1})); + xs.push_back(Vec32::of({1, 1})); ys.push_back(Vec32::of({1, 0})); + + auto mlp = SequentialBuilder::begin() + .add(Linear32::create(2, 15)) + .add(ReLU32::create()) + .add(Dropout32::create(15, 5, 0.2)) + .add(Tanh32::create()) + .add(Linear32::create(5, 2)) + .add(Softmax32::create()) + .build(); + // clang-format on + + Optimizer32 optimizer = Optimizer(mlp->parameters(), 0.1); + Loss::Function32 lossFunction = Loss::CrossEntropy; + + // ------ TRAINING THE NETWORK ------- // + for (size_t epoch = 0; epoch < 200; epoch++) { + auto epochLoss = Val32::create(0); + + optimizer.reset(); + for (size_t sample = 0; sample < xs.size(); ++sample) { + Vec32 pred = mlp->forward(xs[sample]); + auto loss = lossFunction(pred, ys[sample]); + + epochLoss = epochLoss + loss; + } + optimizer.step(); + + std::cout << "Epoch: " << epoch + 1 << " Loss: " << epochLoss->getValue() / xs.size() << std::endl; + } + + // ------ VERIFYING THAT IT WORKS ------// + for (size_t sample = 0; sample < xs.size(); ++sample) { + Vec32 pred = mlp->forward(xs[sample]); + std::cout << xs[sample] << " -> " << pred << "\t| True: " << ys[sample] << std::endl; + } +} diff --git a/examples/xor_nn.cpp b/examples/xor_regression.cpp similarity index 100% rename from examples/xor_nn.cpp rename to examples/xor_regression.cpp diff --git a/include/ShkyeraGrad.hpp b/include/ShkyeraGrad.hpp index 47f785f..275e810 100644 --- a/include/ShkyeraGrad.hpp +++ b/include/ShkyeraGrad.hpp @@ -21,6 +21,7 @@ #include "nn/activation/Exp.hpp" #include "nn/activation/ReLU.hpp" #include "nn/activation/Sigmoid.hpp" +#include "nn/activation/Softmax.hpp" #include "nn/activation/Tanh.hpp" #include "nn/layers/Dropout.hpp" diff --git a/include/core/Value.hpp b/include/core/Value.hpp index 109099d..c7f9007 100644 --- a/include/core/Value.hpp +++ b/include/core/Value.hpp @@ -50,6 +50,7 @@ template class Value : public std::enable_shared_from_this ValuePtr relu(); ValuePtr sigmoid(); ValuePtr exp(); + ValuePtr log(); ValuePtr pow(ValuePtr exponent); template friend ValuePtr operator+(ValuePtr a, ValuePtr b); @@ -157,6 +158,16 @@ template ValuePtr Value::exp() { return result; } +template ValuePtr Value::log() { + auto thisValue = this->shared_from_this(); + + ValuePtr result = Value::create(std::log(_data)); + result->_children = {thisValue}; + result->_backward = [thisValue, result]() { thisValue->_gradient += (1 / thisValue->_data) * result->_gradient; }; + + return result; +} + template ValuePtr Value::pow(ValuePtr exponent) { auto thisValue = this->shared_from_this(); @@ -165,7 +176,7 @@ template ValuePtr Value::pow(ValuePtr exponent) { result->_backward = [thisValue, exponent, result]() { thisValue->_gradient += (exponent->_data * std::pow(thisValue->_data, exponent->_data - 1)) * result->_gradient; exponent->_gradient += - (std::pow(thisValue->_data, exponent->_data) * log(thisValue->_data)) * result->_gradient; + (std::pow(thisValue->_data, exponent->_data) * std::log(thisValue->_data)) * result->_gradient; }; return result; diff --git a/include/core/Vector.hpp b/include/core/Vector.hpp index 994f700..b13783a 100644 --- a/include/core/Vector.hpp +++ b/include/core/Vector.hpp @@ -25,14 +25,24 @@ template class Vector { public: Vector() = default; Vector(std::vector> values); - static Vector of(const std::vector &values); + static Vector of(const std::vector &values); ValuePtr dot(const Vector &other) const; - ValuePtr operator[](size_t index) const; - + ValuePtr sum() const; size_t size() const; template friend std::ostream &operator<<(std::ostream &os, const Vector &vector); + + template friend Vector operator/(Vector x, U val); + template friend Vector operator*(Vector x, U val); + template friend Vector operator/(Vector x, ValuePtr val); + template friend Vector operator*(Vector x, ValuePtr val); + Vector &operator/=(T val); + Vector &operator*=(T val); + Vector &operator/=(ValuePtr val); + Vector &operator*=(ValuePtr val); + + ValuePtr operator[](size_t index) const; }; template Vector::Vector(std::vector> values) { _values = values; } @@ -62,6 +72,63 @@ template ValuePtr Vector::dot(const Vector &other) const { return result; } +template ValuePtr Vector::sum() const { + auto sum = Value::create(0); + for (const auto &entry : _values) + sum = sum + entry; + return sum; +} + +template Vector operator/(Vector x, T val) { + x /= val; + return x; +} + +template Vector operator*(Vector x, T val) { + x *= val; + return x; +} + +template Vector operator/(Vector x, ValuePtr val) { + auto out = x; + for (size_t i = 0; i < out._values.size(); ++i) + out._values[i] = out._values[i] / val; + return out; +} + +template Vector operator*(Vector x, ValuePtr val) { + auto out = x; + for (size_t i = 0; i < out._values.size(); ++i) + out._values[i] = out._values[i] * val; + return out; +} + +template Vector &Vector::operator/=(T val) { + auto divisor = Value::create(val); + for (size_t i = 0; i < _values.size(); ++i) + _values[i] = _values[i] / divisor; + return *this; +} + +template Vector &Vector::operator*=(T val) { + auto divisor = Value::create(val); + for (size_t i = 0; i < _values.size(); ++i) + _values[i] = _values[i] * divisor; + return *this; +} + +template Vector &Vector::operator/=(ValuePtr val) { + for (size_t i = 0; i < _values.size(); ++i) + _values[i] = _values[i] / val; + return *this; +} + +template Vector &Vector::operator*=(ValuePtr val) { + for (size_t i = 0; i < _values.size(); ++i) + _values[i] = _values[i] * val; + return *this; +} + template ValuePtr Vector::operator[](size_t index) const { return _values[index]; } template std::ostream &operator<<(std::ostream &os, const Vector &vector) { diff --git a/include/nn/Loss.hpp b/include/nn/Loss.hpp index 122d018..9d598ae 100644 --- a/include/nn/Loss.hpp +++ b/include/nn/Loss.hpp @@ -57,4 +57,32 @@ Function MAE = [](Vector a, Vector b) { return loss; }; +template +Function CrossEntropy = [](Vector a, Vector b) { + if (a.size() != b.size()) { + throw std::invalid_argument( + "Vectors need to be of the same size to compute the Cross Entropy loss. Sizes are " + + std::to_string(a.size()) + " and " + std::to_string(b.size()) + "."); + } + + auto aSum = a.sum(); + auto bSum = b.sum(); + + if (aSum->getValue() < 0.99 || aSum->getValue() > 1.01 || aSum->getValue() < 0.99 || aSum->getValue() > 1.01) { + throw std::invalid_argument("To compute Cross Entropy Loss, both elements of each vector need to sum to 1(+/- " + "0.01). Currently, they sum to:" + + std::to_string(aSum->getValue()) + " and " + std::to_string(bSum->getValue()) + + "."); + } + + auto loss = Value::create(0); + for (size_t i = 0; i < a.size(); ++i) { + loss = loss - (b[i] * (a[i]->log())); + } + + loss->backward(); + + return loss; +}; + } // namespace shkyera::Loss diff --git a/include/nn/activation/Softmax.hpp b/include/nn/activation/Softmax.hpp new file mode 100644 index 0000000..6f80c09 --- /dev/null +++ b/include/nn/activation/Softmax.hpp @@ -0,0 +1,50 @@ +/** + * Copyright © 2023 Franciszek Szewczyk. None of the rights reserved. + * This code is released under the Beerware License. If you find this code useful or you appreciate the work, you are + * encouraged to buy the author a beer in return. + * Contact the author at szewczyk.franciszek02@gmail.com for inquiries and support. + */ + +#pragma once + +#include "Activation.hpp" + +namespace shkyera { + +template class Softmax; +using Softmax32 = Softmax; +using Softmax64 = Softmax; + +template class Softmax : public Activation { + public: + static std::shared_ptr> create(); + + virtual Vector operator()(const Vector &x) const override; +}; + +template std::shared_ptr> Softmax::create() { + return std::shared_ptr>(new Softmax()); +} + +template Vector Softmax::operator()(const Vector &x) const { + std::vector> out; + out.reserve(x.size()); + + auto maxValue = Value::create(x[0]->getValue()); + for (size_t i = 1; i < x.size(); ++i) + if (x[i] > maxValue) + maxValue = x[i]; + + auto sumExponentiated = Value::create(0); + for (size_t i = 0; i < x.size(); ++i) { + auto exponentiated = (x[i] - maxValue)->exp(); + out.emplace_back(exponentiated); + sumExponentiated = sumExponentiated + exponentiated; + } + + auto vectorizedOut = Vector(out) / sumExponentiated; + + return vectorizedOut; +} + +} // namespace shkyera diff --git a/include/nn/layers/Dropout.hpp b/include/nn/layers/Dropout.hpp index fb90769..c3a6063 100644 --- a/include/nn/layers/Dropout.hpp +++ b/include/nn/layers/Dropout.hpp @@ -45,15 +45,16 @@ template DropoutPtr Dropout::create(size_t input, size_t size template Vector Dropout::operator()(const Vector &x) const { std::vector> alteredInput; alteredInput.reserve(x.size()); - auto scaling = Value::create(1.0 / (1 - _dropout)); for (size_t i = 0; i < x.size(); ++i) - alteredInput.push_back(x[i] * scaling); + alteredInput.push_back(x[i]); std::vector indicesToRemove = utils::sample(0, x.size() - 1, _dropout * x.size(), false); for (size_t idxToRemove : indicesToRemove) alteredInput[idxToRemove] = Value::create(0); - return Linear::operator()(Vector(alteredInput)); + auto transformedInput = Vector(alteredInput) * static_cast(1.0 / (1 - _dropout)); + + return Linear::operator()(transformedInput); } } // namespace shkyera From e29b6945c07babe2206f56f2effc91a8759f3289 Mon Sep 17 00:00:00 2001 From: "szewczyk.franciszek02" Date: Wed, 8 Nov 2023 14:29:31 +0100 Subject: [PATCH 4/9] Updated workflows --- .github/workflows/linux.yml | 8 ++++++-- .github/workflows/macos.yml | 10 +++++++--- .github/workflows/windows.yml | 8 ++++++-- 3 files changed, 19 insertions(+), 7 deletions(-) diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index e9fbb73..e27b7d3 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -30,5 +30,9 @@ jobs: env: CXX: ${{matrix.conf.compiler}} run: | - g++ examples/scalars.cpp --std=c++17 - g++ examples/xor_nn.cpp --std=c++17 + g++ examples/scalars.cpp -O3 --std=c++17 + ./a.out + g++ examples/xor_classification.cpp --std=c++17 + ./a.out + g++ examples/xor_regression.cpp --std=c++17 + ./a.out diff --git a/.github/workflows/macos.yml b/.github/workflows/macos.yml index 1501041..9cb4143 100644 --- a/.github/workflows/macos.yml +++ b/.github/workflows/macos.yml @@ -26,9 +26,13 @@ jobs: run: | g++ include/ShkyeraGrad.hpp --std=c++17 - - name: Build examples + - name: Build and run examples env: CXX: ${{matrix.conf.compiler}} run: | - g++ examples/scalars.cpp --std=c++17 - g++ examples/xor_nn.cpp --std=c++17 + g++ examples/scalars.cpp -O3 --std=c++17 + ./a.out + g++ examples/xor_classification.cpp --std=c++17 + ./a.out + g++ examples/xor_regression.cpp --std=c++17 + ./a.out diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index f734225..4213287 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -29,5 +29,9 @@ jobs: env: CXX: ${{matrix.conf.compiler}} run: | - g++ examples/scalars.cpp --std=c++17 - g++ examples/xor_nn.cpp --std=c++17 + g++ examples/scalars.cpp -O3 --std=c++17 + ./a.out + g++ examples/xor_classification.cpp --std=c++17 + ./a.out + g++ examples/xor_regression.cpp --std=c++17 + ./a.out From f8cdead05c5c1a448c0be1be6fb34de9ee3d5d6e Mon Sep 17 00:00:00 2001 From: "szewczyk.franciszek02" Date: Wed, 8 Nov 2023 14:31:48 +0100 Subject: [PATCH 5/9] Updated windows workflow --- .github/workflows/windows.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index 4213287..dc59aa0 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -29,9 +29,9 @@ jobs: env: CXX: ${{matrix.conf.compiler}} run: | - g++ examples/scalars.cpp -O3 --std=c++17 - ./a.out - g++ examples/xor_classification.cpp --std=c++17 - ./a.out - g++ examples/xor_regression.cpp --std=c++17 - ./a.out + g++ examples/scalars.cpp -O3 --std=c++17 -o out.exe + out.exe + g++ examples/xor_classification.cpp --std=c++17 -o out.exe + out.exe + g++ examples/xor_regression.cpp --std=c++17 -o out.exe + out.exe From ec8259196cf139cc25bf43f442b1516be2810985 Mon Sep 17 00:00:00 2001 From: "szewczyk.franciszek02" Date: Wed, 8 Nov 2023 14:32:20 +0100 Subject: [PATCH 6/9] Compilator optimization --- .github/workflows/linux.yml | 4 ++-- .github/workflows/macos.yml | 4 ++-- .github/workflows/windows.yml | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index e27b7d3..10b4976 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -32,7 +32,7 @@ jobs: run: | g++ examples/scalars.cpp -O3 --std=c++17 ./a.out - g++ examples/xor_classification.cpp --std=c++17 + g++ examples/xor_classification.cpp -O3 --std=c++17 ./a.out - g++ examples/xor_regression.cpp --std=c++17 + g++ examples/xor_regression.cpp -O3 --std=c++17 ./a.out diff --git a/.github/workflows/macos.yml b/.github/workflows/macos.yml index 9cb4143..7d8c246 100644 --- a/.github/workflows/macos.yml +++ b/.github/workflows/macos.yml @@ -32,7 +32,7 @@ jobs: run: | g++ examples/scalars.cpp -O3 --std=c++17 ./a.out - g++ examples/xor_classification.cpp --std=c++17 + g++ examples/xor_classification.cpp -O3 --std=c++17 ./a.out - g++ examples/xor_regression.cpp --std=c++17 + g++ examples/xor_regression.cpp -O3 --std=c++17 ./a.out diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index dc59aa0..973497e 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -31,7 +31,7 @@ jobs: run: | g++ examples/scalars.cpp -O3 --std=c++17 -o out.exe out.exe - g++ examples/xor_classification.cpp --std=c++17 -o out.exe + g++ examples/xor_classification.cpp -O3 --std=c++17 -o out.exe out.exe - g++ examples/xor_regression.cpp --std=c++17 -o out.exe + g++ examples/xor_regression.cpp -O3 --std=c++17 -o out.exe out.exe From f725591ccf4d45e5c97051ca283d92523a4539a7 Mon Sep 17 00:00:00 2001 From: "szewczyk.franciszek02" Date: Wed, 8 Nov 2023 14:34:37 +0100 Subject: [PATCH 7/9] Window update --- .github/workflows/windows.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index 973497e..c04c638 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -29,9 +29,9 @@ jobs: env: CXX: ${{matrix.conf.compiler}} run: | - g++ examples/scalars.cpp -O3 --std=c++17 -o out.exe + g++ examples/scalars.cpp -O3 --std=c++17 -o out out.exe - g++ examples/xor_classification.cpp -O3 --std=c++17 -o out.exe + g++ examples/xor_classification.cpp -O3 --std=c++17 -o out out.exe - g++ examples/xor_regression.cpp -O3 --std=c++17 -o out.exe + g++ examples/xor_regression.cpp -O3 --std=c++17 -o out out.exe From d1bf876a5cce9bd024b2fb5a320edd361c1bbd2f Mon Sep 17 00:00:00 2001 From: "szewczyk.franciszek02" Date: Wed, 8 Nov 2023 14:37:14 +0100 Subject: [PATCH 8/9] Windows out name --- .github/workflows/windows.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index c04c638..d42783c 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -29,9 +29,9 @@ jobs: env: CXX: ${{matrix.conf.compiler}} run: | - g++ examples/scalars.cpp -O3 --std=c++17 -o out + g++ -o out examples/scalars.cpp -O3 --std=c++17 out.exe - g++ examples/xor_classification.cpp -O3 --std=c++17 -o out + g++ -o out examples/xor_classification.cpp -O3 --std=c++17 out.exe - g++ examples/xor_regression.cpp -O3 --std=c++17 -o out + g++ -o out examples/xor_regression.cpp -O3 --std=c++17 out.exe From 67dfd6897f75756828409a55723e2a97790becc2 Mon Sep 17 00:00:00 2001 From: "szewczyk.franciszek02" Date: Wed, 8 Nov 2023 14:38:30 +0100 Subject: [PATCH 9/9] Remove running examples at Windows --- .github/workflows/windows.yml | 3 --- 1 file changed, 3 deletions(-) diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index d42783c..260f5ac 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -30,8 +30,5 @@ jobs: CXX: ${{matrix.conf.compiler}} run: | g++ -o out examples/scalars.cpp -O3 --std=c++17 - out.exe g++ -o out examples/xor_classification.cpp -O3 --std=c++17 - out.exe g++ -o out examples/xor_regression.cpp -O3 --std=c++17 - out.exe