Merge pull request #17 from fszewczyk/nag-optimizer

Nesterov Accelerated Gradient
fszewczyk · Nov 9, 2023 · e0373a0 · e0373a0
2 parents a6e6b1a + b41c0f0
commit e0373a0
Show file tree

Hide file tree

Showing 3 changed files with 59 additions and 0 deletions.
diff --git a/include/ShkyeraGrad.hpp b/include/ShkyeraGrad.hpp
@@ -18,6 +18,7 @@
 #include "nn/Sequential.hpp"
 
 #include "nn/optimizers/Adam.hpp"
+#include "nn/optimizers/NAG.hpp"
 #include "nn/optimizers/Optimizer.hpp"
 #include "nn/optimizers/SGD.hpp"
 

diff --git a/include/core/Value.hpp b/include/core/Value.hpp
@@ -23,6 +23,7 @@ namespace shkyera {
 template <typename T> class Optimizer;
 template <typename T> class Adam;
 template <typename T> class SGD;
+template <typename T> class NAG;
 
 template <typename T> class Value;
 template <typename T> using ValuePtr = std::shared_ptr<Value<T>>;
@@ -47,6 +48,7 @@ template <typename T> class Value : public std::enable_shared_from_this<Value<T>
     friend class Optimizer<T>;
     friend class Adam<T>;
     friend class SGD<T>;
+    friend class NAG<T>;
 
     static ValuePtr<T> create(T data);
 

diff --git a/include/nn/optimizers/NAG.hpp b/include/nn/optimizers/NAG.hpp
@@ -0,0 +1,56 @@
+/**
+ * Copyright © 2023 Franciszek Szewczyk. None of the rights reserved.
+ * This code is released under the Beerware License. If you find this code useful or you appreciate the work, you are
+ * encouraged to buy the author a beer in return.
+ * Contact the author at [email protected] for inquiries and support.
+ */
+
+#pragma once
+
+#include <unordered_map>
+#include <vector>
+
+#include "../../core/Type.hpp"
+#include "../../core/Value.hpp"
+#include "../Module.hpp"
+#include "Optimizer.hpp"
+
+namespace shkyera {
+
+template <typename T> class NAG;
+using NAG32 = NAG<Type::float32>;
+using NAG64 = NAG<Type::float32>;
+
+template <typename T> class NAG : public Optimizer<T> {
+  private:
+    T _momentum;
+    std::vector<T> _moments;
+
+  public:
+    NAG(std::vector<ValuePtr<T>> params, T learningRate, T momentum = 0.9);
+
+    void step() override;
+};
+
+template <typename T>
+NAG<T>::NAG(std::vector<ValuePtr<T>> params, T learningRate, T momentum) : Optimizer<T>(params, learningRate) {
+    _momentum = momentum;
+    _moments.resize(params.size(), 0);
+}
+
+template <typename T> void NAG<T>::step() {
+    static bool initialized = false;
+
+    for (size_t i = 0; i < this->_parameters.size(); ++i) {
+        const ValuePtr<T> &param = this->_parameters[i];
+
+        T gradient = param->getGradient();
+        T moment = initialized ? _momentum * _moments[i] + (1 - _momentum) * gradient : gradient;
+
+        param->_data -= this->_learningRate * (moment + _momentum * _moments[i]);
+
+        _moments[i] = moment;
+    }
+}
+
+} // namespace shkyera