Merge pull request #11 from fszewczyk/sgd

SGD with Momentum Optimizer
fszewczyk · Nov 8, 2023 · 0bff0af · 0bff0af
2 parents cf3ce43 + e595b73
commit 0bff0af
Show file tree

Hide file tree

Showing 4 changed files with 67 additions and 1 deletion.
diff --git a/examples/xor_classification.cpp b/examples/xor_classification.cpp
@@ -25,7 +25,7 @@ int main() {
                 .build();
     // clang-format on
 
-    Adam32 optimizer = Adam<Type::float32>(mlp->parameters(), 0.01);
+    SGD32 optimizer = SGD<Type::float32>(mlp->parameters(), 0.1);
     Loss::Function32 lossFunction = Loss::CrossEntropy<Type::float32>;
 
     // ------ TRAINING THE NETWORK ------- //

diff --git a/include/ShkyeraGrad.hpp b/include/ShkyeraGrad.hpp
@@ -18,6 +18,7 @@
 
 #include "nn/optimizers/Adam.hpp"
 #include "nn/optimizers/Optimizer.hpp"
+#include "nn/optimizers/SGD.hpp"
 
 #include "nn/activation/Activation.hpp"
 #include "nn/activation/Exp.hpp"

diff --git a/include/core/Value.hpp b/include/core/Value.hpp
@@ -20,6 +20,7 @@ namespace shkyera {
 
 template <typename T> class Optimizer;
 template <typename T> class Adam;
+template <typename T> class SGD;
 
 template <typename T> class Value;
 template <typename T> using ValuePtr = std::shared_ptr<Value<T>>;
@@ -42,6 +43,7 @@ template <typename T> class Value : public std::enable_shared_from_this<Value<T>
   public:
     friend class Optimizer<T>;
     friend class Adam<T>;
+    friend class SGD<T>;
 
     static ValuePtr<T> create(T data);
 

diff --git a/include/nn/optimizers/SGD.hpp b/include/nn/optimizers/SGD.hpp
@@ -0,0 +1,63 @@
+/**
+ * Copyright © 2023 Franciszek Szewczyk. None of the rights reserved.
+ * This code is released under the Beerware License. If you find this code useful or you appreciate the work, you are
+ * encouraged to buy the author a beer in return.
+ * Contact the author at [email protected] for inquiries and support.
+ */
+
+#pragma once
+
+#include <unordered_map>
+#include <vector>
+
+#include "../../core/Type.hpp"
+#include "../../core/Value.hpp"
+#include "../Module.hpp"
+#include "Optimizer.hpp"
+
+namespace shkyera {
+
+template <typename T> class SGD;
+using SGD32 = SGD<Type::float32>;
+using SGD64 = SGD<Type::float32>;
+
+template <typename T> class SGD : public Optimizer<T> {
+  private:
+    T _momentum;
+    std::unordered_map<Value<T> *, T> _moment;
+
+    T getMoment(const ValuePtr<T> &v);
+
+  public:
+    SGD(std::vector<ValuePtr<T>> params, T learningRate, T momentum = 0.9);
+
+    void step() override;
+};
+
+template <typename T>
+SGD<T>::SGD(std::vector<ValuePtr<T>> params, T learningRate, T momentum) : Optimizer<T>(params, learningRate) {
+    _momentum = momentum;
+}
+
+template <typename T> void SGD<T>::step() {
+    static bool initialized = false;
+
+    for (const ValuePtr<T> &param : this->_parameters) {
+        T gradient = param->getGradient();
+        T moment = initialized ? _momentum * getMoment(param) + (1 - _momentum) * gradient : gradient;
+        _moment.insert({param.get(), moment});
+
+        param->_data -= this->_learningRate * moment;
+    }
+}
+
+template <typename T> T SGD<T>::getMoment(const ValuePtr<T> &v) {
+    auto moment = _moment.find(v.get());
+    if (moment == _moment.end()) {
+        _moment.insert({v.get(), 0});
+        return 0;
+    }
+    return moment->second;
+}
+
+} // namespace shkyera