From fc8c0ec5919ad0d8cc30c3553537283b29f60086 Mon Sep 17 00:00:00 2001
From: "szewczyk.franciszek02" <szewczyk.franciszek02@gmail.com>
Date: Fri, 10 Nov 2023 01:02:52 +0100
Subject: [PATCH] Image class and MNIST example

---
 .github/workflows/linux.yml                   |  2 +-
 .github/workflows/macos.yml                   |  2 +-
 .github/workflows/windows.yml                 |  2 +-
 docs/tutorials/GetStarted.md                  |  2 +-
 examples/README.md                            |  2 +-
 examples/mnist.cpp                            | 79 +++++++++++++++++++
 examples/{xor_regression.cpp => xor.cpp}      |  0
 include/ShkyeraGrad.hpp                       |  1 +
 include/core/Image.hpp                        | 61 ++++++++++++++
 include/core/Value.hpp                        |  5 ++
 include/core/Vector.hpp                       | 28 +++++++
 .../external/{stb_image.hpp => stb_image.h}   |  0
 include/nn/Loss.hpp                           |  7 +-
 include/nn/optimizers/Optimizer.hpp           |  3 +-
 14 files changed, 186 insertions(+), 8 deletions(-)
 create mode 100644 examples/mnist.cpp
 rename examples/{xor_regression.cpp => xor.cpp} (100%)
 create mode 100644 include/core/Image.hpp
 rename include/external/{stb_image.hpp => stb_image.h} (100%)
diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml
index a80dc65..f848a43 100644
--- a/.github/workflows/linux.yml
+++ b/.github/workflows/linux.yml
@@ -32,5 +32,5 @@ jobs:
         run: |
           g++ examples/scalars.cpp -O3 --std=c++17
           ./a.out
-          g++ examples/xor_regression.cpp -O3 --std=c++17
+          g++ examples/xor.cpp -O3 --std=c++17
           ./a.out
diff --git a/.github/workflows/macos.yml b/.github/workflows/macos.yml
index 29110d0..b2efec4 100644
--- a/.github/workflows/macos.yml
+++ b/.github/workflows/macos.yml
@@ -32,5 +32,5 @@ jobs:
         run: |
           g++ examples/scalars.cpp -O3 --std=c++17
           ./a.out
-          g++ examples/xor_regression.cpp -O3 --std=c++17
+          g++ examples/xor.cpp -O3 --std=c++17
           ./a.out
diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml
index f09efd6..4d3f197 100644
--- a/.github/workflows/windows.yml
+++ b/.github/workflows/windows.yml
@@ -30,4 +30,4 @@ jobs:
           CXX: ${{matrix.conf.compiler}}
         run: |
           g++ -o out examples/scalars.cpp -O3 --std=c++17
-          g++ -o out examples/xor_regression.cpp -O3 --std=c++17
+          g++ -o out examples/xor.cpp -O3 --std=c++17
diff --git a/docs/tutorials/GetStarted.md b/docs/tutorials/GetStarted.md
index e426608..f3cdaf2 100644
--- a/docs/tutorials/GetStarted.md
+++ b/docs/tutorials/GetStarted.md
@@ -233,7 +233,7 @@ for (size_t sample = 0; sample < xs.size(); ++sample) {         // Go through ea
 }
 ```
 
-In case you got lost along the way, check out the `examples/xor_regression.cpp` file. It contains the exact same code and is ready to run :)
+In case you got lost along the way, check out the `examples/xor.cpp` file. It contains the exact same code and is ready to run :)
 
 ### Results
 
diff --git a/examples/README.md b/examples/README.md
index ff64fd9..42ee238 100644
--- a/examples/README.md
+++ b/examples/README.md
@@ -3,7 +3,7 @@
 To compile an example, simply run the following command:
 
 ```
-g++ --std=c++17 xor_nn.cpp
+g++ --std=c++17 xor.cpp
 ```
 
 Remember to replace the file name with the appropriate name :)
diff --git a/examples/mnist.cpp b/examples/mnist.cpp
new file mode 100644
index 0000000..2ea8be5
--- /dev/null
+++ b/examples/mnist.cpp
@@ -0,0 +1,79 @@
+#include <filesystem>
+#include <iostream>
+
+#include "../include/ShkyeraGrad.hpp"
+
+namespace fs = std::filesystem;
+using namespace shkyera;
+
+Dataset<Vec32, Vec32> load(std::string directory) {
+    Dataset<Vec32, Vec32> dataset;
+
+    std::cerr << "Loading [" << std::flush;
+    for (size_t digit = 0; digit < 10; ++digit) {
+        std::cerr << "▮" << std::flush;
+        int added = 0;
+        for (const auto &entry : fs::directory_iterator(directory + std::to_string(digit))) {
+            Image image(entry.path().string());
+            auto target = Vec32::oneHotEncode(digit, 10);
+
+            dataset.addSample(image.flatten<Type::float32>() / 255.0f, target);
+        }
+    }
+    std::cerr << "]" << std::endl;
+
+    return dataset;
+}
+
+int main() {
+    Dataset<Vec32, Vec32> trainData = load("datasets/mnist/train/");
+    std::cerr << "Loaded training data." << std::endl;
+
+    DataLoader trainLoader(trainData, 16, true);
+
+    // clang-format off
+    auto mlp = SequentialBuilder32::begin()
+                .add(Linear32::create(784, 100))
+                .add(ReLU32::create())
+                .add(Linear32::create(100, 50))
+                .add(Sigmoid32::create())
+                .add(Linear32::create(50, 10))
+                .add(Softmax32::create())
+                .build();
+    // clang-format on
+
+    auto optimizer = Adam32(mlp->parameters(), 0.01, 0.99);
+    auto lossFunction = Loss::CrossEntropy<Type::float32>;
+
+    for (size_t epoch = 0; epoch < 50; epoch++) {
+        float epochLoss = 0;
+        double epochAccuracy = 0;
+
+        for (const auto [x, y] : trainLoader) {
+            optimizer.reset();
+
+            auto pred = mlp->forward(x);
+
+            double accuracy = 0;
+            for (size_t i = 0; i < pred.size(); ++i) {
+                size_t predictedDigit = pred[i].argMax();
+                size_t trueDigit = y[i].argMax();
+
+                if (predictedDigit == trueDigit)
+                    accuracy += 1;
+            }
+
+            accuracy /= pred.size();
+            epochAccuracy += accuracy;
+
+            auto loss = Loss::compute(lossFunction, pred, y);
+            epochLoss = epochLoss + loss->getValue();
+
+            optimizer.step();
+
+            std::cerr << "Loss: " << loss->getValue() << " Accuracy: " << accuracy << std::endl;
+        }
+        std::cerr << "Epoch: " << epoch + 1 << " Loss: " << epochLoss / trainLoader.getTotalBatches()
+                  << " Accuracy: " << epochAccuracy / trainLoader.getTotalBatches() << std::endl;
+    }
+}
diff --git a/examples/xor_regression.cpp b/examples/xor.cpp
similarity index 100%
rename from examples/xor_regression.cpp
rename to examples/xor.cpp
diff --git a/include/ShkyeraGrad.hpp b/include/ShkyeraGrad.hpp
index 2752afe..f506037 100644
--- a/include/ShkyeraGrad.hpp
+++ b/include/ShkyeraGrad.hpp
@@ -7,6 +7,7 @@
 
 #pragma once
 
+#include "core/Image.hpp"
 #include "core/Type.hpp"
 #include "core/Utils.hpp"
 #include "core/Value.hpp"
diff --git a/include/core/Image.hpp b/include/core/Image.hpp
new file mode 100644
index 0000000..5c8082b
--- /dev/null
+++ b/include/core/Image.hpp
@@ -0,0 +1,61 @@
+/**
+ * Copyright © 2023 Franciszek Szewczyk. None of the rights reserved.
+ * This code is released under the Beerware License. If you find this code useful or you appreciate the work, you are
+ * encouraged to buy the author a beer in return.
+ * Contact the author at szewczyk.franciszek02@gmail.com for inquiries and support.
+ */
+
+#pragma once
+
+#include <string>
+#include <vector>
+
+#define STB_IMAGE_IMPLEMENTATION
+#include "../external/stb_image.h"
+
+#include "Vector.hpp"
+
+namespace shkyera {
+
+class Image {
+  private:
+    std::vector<uint8_t> _data;
+
+  public:
+    Image() = default;
+    Image(std::string filename, bool grayscale = true);
+
+    template <typename T> Vector<T> flatten(size_t takeEvery = 1) const;
+};
+
+Image::Image(std::string filename, bool grayscale) {
+    int width, height, channels;
+    uint8_t *imageData = nullptr;
+
+    if (grayscale)
+        imageData = stbi_load(filename.c_str(), &width, &height, &channels, 1);
+    else
+        imageData = stbi_load(filename.c_str(), &width, &height, &channels, 3);
+
+    if (!imageData) {
+        std::cerr << "Error loading image: " << filename << std::endl;
+        return;
+    }
+
+    if (grayscale)
+        _data.assign(imageData, imageData + (width * height));
+    else
+        _data.assign(imageData, imageData + (width * height * 3));
+
+    stbi_image_free(imageData);
+}
+
+template <typename T> Vector<T> Image::flatten(size_t takeEvery) const {
+    std::vector<T> converted;
+    converted.reserve(_data.size());
+    for (size_t i = 0; i < _data.size(); i += takeEvery)
+        converted.push_back(static_cast<T>(_data[i]));
+    return Vector<T>::of(converted);
+}
+
+} // namespace shkyera
diff --git a/include/core/Value.hpp b/include/core/Value.hpp
index 937fd69..d23cf1b 100644
--- a/include/core/Value.hpp
+++ b/include/core/Value.hpp
@@ -238,6 +238,11 @@ template <typename T> void Value<T>::backward() {
     for (auto val = sorted.rbegin(); val != sorted.rend(); val++) {
         (*val)->_backward();
     }
+
+    for (auto s : sorted) {
+        s->_children = {};
+        s->_backward = []() {};
+    }
 }
 
 template <typename T> std::ostream &operator<<(std::ostream &os, const ValuePtr<T> &value) {
diff --git a/include/core/Vector.hpp b/include/core/Vector.hpp
index 493a4bc..5c84766 100644
--- a/include/core/Vector.hpp
+++ b/include/core/Vector.hpp
@@ -8,6 +8,7 @@
 #pragma once
 
 #include <exception>
+#include <numeric>
 
 #include "Type.hpp"
 #include "Value.hpp"
@@ -30,10 +31,12 @@ template <typename T> class Vector {
 
     static Vector<T> of(const std::vector<T> &values);
     template <typename... Args> static Vector<T> of(const Args &...args);
+    static Vector<T> oneHotEncode(size_t index, size_t size);
 
     ValuePtr<T> dot(const Vector<T> &other) const;
     ValuePtr<T> sum() const;
     size_t size() const;
+    size_t argMax() const;
 
     template <typename U> friend std::ostream &operator<<(std::ostream &os, const Vector<U> &vector);
 
@@ -85,8 +88,33 @@ template <typename T> template <typename... Args> Vector<T> Vector<T>::of(const
     return Vector<T>(valuePtrs);
 }
 
+template <typename T> Vector<T> Vector<T>::oneHotEncode(size_t index, size_t size) {
+    std::vector<ValuePtr<T>> valuePtrs(size);
+
+    for (size_t i = 0; i < size; ++i) {
+        if (i == index)
+            valuePtrs[i] = Value<T>::create(1);
+        else
+            valuePtrs[i] = Value<T>::create(0);
+    }
+
+    return valuePtrs;
+}
+
 template <typename T> size_t Vector<T>::size() const { return _values.size(); }
 
+template <typename T> size_t Vector<T>::argMax() const {
+    T largest = std::numeric_limits<T>::lowest();
+    size_t largestIndex = 0;
+    for (size_t i = 0; i < _values.size(); ++i) {
+        if (_values[i]->getValue() > largest) {
+            largest = _values[i]->getValue();
+            largestIndex = i;
+        }
+    }
+    return largestIndex;
+}
+
 template <typename T> ValuePtr<T> Vector<T>::dot(const Vector<T> &other) const {
     if (other.size() != size()) {
         throw std::invalid_argument("Vectors need to be of the same size to compute the dot product. Sizes are " +
diff --git a/include/external/stb_image.hpp b/include/external/stb_image.h
similarity index 100%
rename from include/external/stb_image.hpp
rename to include/external/stb_image.h
diff --git a/include/nn/Loss.hpp b/include/nn/Loss.hpp
index 735773a..29d7e2b 100644
--- a/include/nn/Loss.hpp
+++ b/include/nn/Loss.hpp
@@ -71,10 +71,13 @@ Function<T> CrossEntropy = [](Vector<T> a, Vector<T> b) {
                                     std::to_string(aSum->getValue()) + " and " + std::to_string(bSum->getValue()) +
                                     ".");
     }
-
+    auto eps = Value<T>::create(1e-8);
     auto loss = Value<T>::create(0);
     for (size_t i = 0; i < a.size(); ++i) {
-        loss = loss - (b[i] * (a[i]->log()));
+        if (a[i] < eps)
+            loss = loss - (b[i] * (eps->log()));
+        else
+            loss = loss - (b[i] * (a[i]->log()));
     }
 
     return loss;
diff --git a/include/nn/optimizers/Optimizer.hpp b/include/nn/optimizers/Optimizer.hpp
index 83cb994..87ceb84 100644
--- a/include/nn/optimizers/Optimizer.hpp
+++ b/include/nn/optimizers/Optimizer.hpp
@@ -36,7 +36,8 @@ Optimizer<T>::Optimizer(std::vector<ValuePtr<T>> params, T learningRate) : _lear
 }
 
 template <typename T> void Optimizer<T>::reset() {
-    std::for_each(_parameters.begin(), _parameters.end(), [](ValuePtr<T> val) { val->_gradient = 0; });
+    for (ValuePtr<T> &val : _parameters)
+        val->_gradient = 0;
 }
 
 template <typename T> void Optimizer<T>::step() {