Merge pull request #5 from fszewczyk/neural-net

Sequential model
fszewczyk · Nov 7, 2023 · 08743a3 · 08743a3
2 parents df9f197 + 7f2207f
commit 08743a3
Show file tree

Hide file tree

Showing 13 changed files with 505 additions and 18 deletions.
diff --git a/include/ShkyeraTensor.hpp b/include/ShkyeraTensor.hpp
@@ -1 +1,12 @@
-#include "src/Value.hpp"
+#pragma once
+
+#include "src/core/Type.hpp"
+#include "src/core/Value.hpp"
+#include "src/core/Vector.hpp"
+#include "src/nn/Activation.hpp"
+#include "src/nn/Layer.hpp"
+#include "src/nn/Loss.hpp"
+#include "src/nn/Module.hpp"
+#include "src/nn/Neuron.hpp"
+#include "src/nn/Optimizer.hpp"
+#include "src/nn/Sequential.hpp"
diff --git a/include/src/core/Type.hpp b/include/src/core/Type.hpp
@@ -0,0 +1,10 @@
+#pragma once
+
+namespace shkyera::Type {
+
+using float32 = float;
+using float64 = double;
+using f32 = float;
+using f64 = double;
+
+} // namespace shkyera::Type
diff --git a/include/src/core/Utils.hpp b/include/src/core/Utils.hpp
@@ -0,0 +1,42 @@
+#pragma once
+
+#include <random>
+
+namespace shkyera::utils {
+
+std::random_device rand_dev;
+std::mt19937 generator(rand_dev());
+
+template <typename T> std::enable_if_t<!std::is_integral_v<T>, T> sample(T from, T to) {
+    std::uniform_real_distribution<T> distribution(from, to);
+    return distribution(generator);
+}
+
+template <typename T> std::enable_if_t<!std::is_integral_v<T>, std::vector<T>> sample(T from, T to, size_t size) {
+    std::uniform_real_distribution<T> distribution(from, to);
+
+    std::vector<T> sampled(size);
+    for (size_t i = 0; i < size; i++) {
+        sampled[i] = distribution(generator);
+    }
+
+    return sampled;
+}
+
+template <typename T> std::enable_if_t<std::is_integral_v<T>, T> sample(T from, T to) {
+    std::uniform_int_distribution<T> distribution(from, to);
+    return distribution(generator);
+}
+
+template <typename T> std::enable_if_t<std::is_integral_v<T>, std::vector<T>> sample(T from, T to, size_t size) {
+    std::uniform_int_distribution<T> distribution(from, to);
+
+    std::vector<T> sampled(size);
+    for (size_t i = 0; i < size; i++) {
+        sampled[i] = distribution(generator);
+    }
+
+    return sampled;
+}
+
+} // namespace shkyera::utils
diff --git a/include/src/Value.hpp → include/src/core/Value.hpp b/include/src/Value.hpp → include/src/core/Value.hpp
@@ -1,14 +1,22 @@
+#pragma once
+
 #include <cmath>
 #include <iostream>
 #include <memory>
 #include <unordered_set>
 #include <vector>
 
+#include "Type.hpp"
+
 namespace shkyera {
 
+template <typename T> class Optimizer;
 template <typename T> class Value;
 template <typename T> using ValuePtr = std::shared_ptr<Value<T>>;
 
+using Val32 = Value<Type::float32>;
+using Val64 = Value<Type::float64>;
+
 template <typename T> class Value : public std::enable_shared_from_this<Value<T>> {
   private:
     T _data = 0;
@@ -22,19 +30,24 @@ template <typename T> class Value : public std::enable_shared_from_this<Value<T>
     std::vector<ValuePtr<T>> topologicalSort(std::vector<ValuePtr<T>> &sorted, std::unordered_set<Value<T> *> &visited);
 
   public:
+    friend class Optimizer<T>;
+
     static ValuePtr<T> create(T data);
 
     void backward();
-    T getGradient() { return _gradient; }
+    T getGradient();
 
     ValuePtr<T> tanh();
+    ValuePtr<T> relu();
+    ValuePtr<T> sigmoid();
     ValuePtr<T> exp();
     ValuePtr<T> pow(ValuePtr<T> exponent);
 
     template <typename U> friend ValuePtr<U> operator+(ValuePtr<U> a, ValuePtr<U> b);
     template <typename U> friend ValuePtr<U> operator-(ValuePtr<U> a, ValuePtr<U> b);
     template <typename U> friend ValuePtr<U> operator*(ValuePtr<U> a, ValuePtr<U> b);
     template <typename U> friend ValuePtr<U> operator/(ValuePtr<U> a, ValuePtr<U> b);
+    template <typename U> friend ValuePtr<U> operator-(ValuePtr<U> a);
 
     template <typename U> friend std::ostream &operator<<(std::ostream &os, const ValuePtr<U> &value);
 };
@@ -43,6 +56,8 @@ template <typename T> Value<T>::Value(T data) : _data(data) {}
 
 template <typename T> ValuePtr<T> Value<T>::create(T data) { return std::shared_ptr<Value<T>>(new Value<T>(data)); }
 
+template <typename T> T Value<T>::getGradient() { return _gradient; }
+
 template <typename T> ValuePtr<T> operator+(ValuePtr<T> a, ValuePtr<T> b) {
     ValuePtr<T> result = Value<T>::create(a->_data + b->_data);
     result->_children = {a, b};
@@ -54,6 +69,8 @@ template <typename T> ValuePtr<T> operator+(ValuePtr<T> a, ValuePtr<T> b) {
     return result;
 }
 
+template <typename T> ValuePtr<T> operator-(ValuePtr<T> a, ValuePtr<T> b) { return a + (-b); }
+
 template <typename T> ValuePtr<T> operator*(ValuePtr<T> a, ValuePtr<T> b) {
     ValuePtr<T> result = Value<T>::create(a->_data * b->_data);
     result->_children = {a, b};
@@ -67,13 +84,39 @@ template <typename T> ValuePtr<T> operator*(ValuePtr<T> a, ValuePtr<T> b) {
 
 template <typename T> ValuePtr<T> operator/(ValuePtr<T> a, ValuePtr<T> b) { return a * (b->pow(Value<T>::create(-1))); }
 
+template <typename T> ValuePtr<T> operator-(ValuePtr<T> a) { return Value<T>::create(-1) * a; }
+
 template <typename T> ValuePtr<T> Value<T>::tanh() {
     auto thisValue = this->shared_from_this();
 
     ValuePtr<T> result = Value<T>::create((std::exp(2 * thisValue->_data) - 1) / (std::exp(2 * thisValue->_data) + 1));
     result->_children = {thisValue};
     result->_backward = [thisValue, result]() {
-        thisValue->_gradient += (1 - (thisValue->_data * thisValue->_data)) * result->_gradient;
+        thisValue->_gradient += (1 - (result->_data * result->_data)) * result->_gradient;
+    };
+
+    return result;
+}
+
+template <typename T> ValuePtr<T> Value<T>::sigmoid() {
+    auto thisValue = this->shared_from_this();
+
+    ValuePtr<T> result = Value<T>::create(1 / (std::exp(-thisValue->_data) + 1));
+    result->_children = {thisValue};
+    result->_backward = [thisValue, result]() {
+        thisValue->_gradient += result->_data * (1 - result->_data) * result->_gradient;
+    };
+
+    return result;
+}
+
+template <typename T> ValuePtr<T> Value<T>::relu() {
+    auto thisValue = this->shared_from_this();
+
+    ValuePtr<T> result = Value<T>::create(_data > 0 ? _data : 0);
+    result->_children = {thisValue};
+    result->_backward = [thisValue, result]() {
+        thisValue->_gradient += (result->_data > 0 ? 1 : 0) * result->_gradient;
     };
 
     return result;
@@ -103,11 +146,6 @@ template <typename T> ValuePtr<T> Value<T>::pow(ValuePtr<T> exponent) {
     return result;
 }
 
-template <typename T> std::ostream &operator<<(std::ostream &os, const ValuePtr<T> &value) {
-    os << "Value(data=" << value->_data << ")";
-    return os;
-}
-
 template <typename T> std::vector<ValuePtr<T>> Value<T>::topologicalSort() {
     std::vector<ValuePtr<T>> sorted;
     std::unordered_set<Value<T> *> visited;
@@ -138,4 +176,9 @@ template <typename T> void Value<T>::backward() {
     }
 }
 
+template <typename T> std::ostream &operator<<(std::ostream &os, const ValuePtr<T> &value) {
+    os << "Value(data=" << value->_data << ")";
+    return os;
+}
+
 } // namespace shkyera
diff --git a/include/src/core/Vector.hpp b/include/src/core/Vector.hpp
@@ -0,0 +1,70 @@
+#pragma once
+
+#include <exception>
+
+#include "Type.hpp"
+#include "Value.hpp"
+
+namespace shkyera {
+
+template <typename T> class Vector;
+using Vec32 = Vector<Type::float32>;
+using Vec64 = Vector<Type::float64>;
+
+template <typename T> class Vector {
+  private:
+    std::vector<ValuePtr<T>> _values;
+
+  public:
+    Vector() = default;
+    Vector(std::vector<ValuePtr<T>> values);
+    static Vector<T> of(const std::vector<T> &values);
+
+    ValuePtr<T> dot(const Vector<T> &other) const;
+    ValuePtr<T> operator[](size_t index) const;
+
+    size_t size() const;
+
+    template <typename U> friend std::ostream &operator<<(std::ostream &os, const Vector<U> &vector);
+};
+
+template <typename T> Vector<T>::Vector(std::vector<ValuePtr<T>> values) { _values = values; }
+
+template <typename T> Vector<T> Vector<T>::of(const std::vector<T> &values) {
+    std::vector<ValuePtr<T>> valuePtrs;
+    valuePtrs.reserve(values.size());
+
+    std::for_each(values.begin(), values.end(),
+                  [&valuePtrs](const T &v) { valuePtrs.emplace_back(Value<T>::create(v)); });
+
+    return valuePtrs;
+}
+
+template <typename T> size_t Vector<T>::size() const { return _values.size(); }
+
+template <typename T> ValuePtr<T> Vector<T>::dot(const Vector<T> &other) const {
+    if (other.size() != size()) {
+        throw std::invalid_argument("Vectors need to be of the same size to compute the dot product. Sizes are " +
+                                    std::to_string(size()) + " and " + std::to_string(other.size()) + ".");
+    }
+
+    ValuePtr<T> result = Value<T>::create(0);
+    for (size_t i = 0; i < size(); ++i)
+        result = result + (_values[i] * other[i]);
+
+    return result;
+}
+
+template <typename T> ValuePtr<T> Vector<T>::operator[](size_t index) const { return _values[index]; }
+
+template <typename T> std::ostream &operator<<(std::ostream &os, const Vector<T> &vector) {
+    os << "Vector(size=" << vector.size() << ", data={";
+
+    for (const ValuePtr<T> val : vector._values)
+        os << val << ' ';
+
+    os << "})";
+    return os;
+}
+
+} // namespace shkyera
diff --git a/include/src/nn/Activation.hpp b/include/src/nn/Activation.hpp
@@ -0,0 +1,16 @@
+#pragma once
+
+#include "../core/Type.hpp"
+#include "../core/Value.hpp"
+
+namespace shkyera::Activation {
+
+template <typename T> using Function = std::function<ValuePtr<T>(ValuePtr<T>)>;
+
+template <typename T> Function<T> tanh = [](ValuePtr<T> a) { return a->tanh(); };
+template <typename T> Function<T> relu = [](ValuePtr<T> a) { return a->relu(); };
+template <typename T> Function<T> exp = [](ValuePtr<T> a) { return a->exp(); };
+template <typename T> Function<T> sigmoid = [](ValuePtr<T> a) { return a->sigmoid(); };
+template <typename T> Function<T> linear = [](ValuePtr<T> a) { return a; };
+
+} // namespace shkyera::Activation
diff --git a/include/src/nn/Layer.hpp b/include/src/nn/Layer.hpp
@@ -0,0 +1,60 @@
+#pragma once
+
+#include "../core/Type.hpp"
+#include "Activation.hpp"
+#include "Module.hpp"
+#include "Neuron.hpp"
+
+namespace shkyera {
+
+template <typename T> class Layer;
+template <typename T> using LayerPtr = std::shared_ptr<Layer<T>>;
+
+using Layer32 = Layer<Type::float32>;
+using Layer64 = Layer<Type::float32>;
+
+template <typename T> class Layer : public Module<T> {
+  private:
+    std::vector<Neuron<T>> _neurons;
+
+    Layer(size_t input, size_t size, Activation::Function<T> activation = Activation::relu<T>);
+
+  public:
+    static LayerPtr<T> create(size_t input, size_t size, Activation::Function<T> activation = Activation::relu<T>);
+
+    virtual Vector<T> operator()(const Vector<T> &x) const override;
+    virtual std::vector<ValuePtr<T>> parameters() const override;
+};
+
+template <typename T> Layer<T>::Layer(size_t input, size_t size, Activation::Function<T> activation) {
+    _neurons.reserve(size);
+    for (size_t i = 0; i < size; ++i) {
+        _neurons.emplace_back(Neuron<T>(input, activation));
+    }
+}
+
+template <typename T> LayerPtr<T> Layer<T>::create(size_t input, size_t size, Activation::Function<T> activation) {
+    return std::shared_ptr<Layer<T>>(new Layer<T>(input, size, activation));
+}
+
+template <typename T> Vector<T> Layer<T>::operator()(const Vector<T> &x) const {
+    std::vector<ValuePtr<T>> output(_neurons.size());
+
+    for (size_t i = 0; i < _neurons.size(); i++) {
+        output[i] = _neurons[i](x)[0];
+    }
+
+    return Vector<T>(output);
+}
+
+template <typename T> std::vector<ValuePtr<T>> Layer<T>::parameters() const {
+    std::vector<ValuePtr<T>> params;
+    for (const Neuron<T> &n : _neurons) {
+        std::vector<ValuePtr<T>> neuronParams = n.parameters();
+        params.insert(params.end(), neuronParams.begin(), neuronParams.end());
+    }
+
+    return params;
+}
+
+} // namespace shkyera
diff --git a/include/src/nn/Loss.hpp b/include/src/nn/Loss.hpp
@@ -0,0 +1,29 @@
+#pragma once
+
+#include "../core/Value.hpp"
+
+namespace shkyera::Loss {
+
+template <typename T> using Function = std::function<ValuePtr<T>(Vector<T> a, Vector<T> b)>;
+
+using Function32 = Function<Type::float32>;
+using Function64 = Function<Type::float64>;
+
+template <typename T>
+Function<T> MSE = [](Vector<T> a, Vector<T> b) {
+    if (a.size() != b.size()) {
+        throw std::invalid_argument("Vectors need to be of the same size to compute the MSE loss. Sizes are " +
+                                    std::to_string(a.size()) + " and " + std::to_string(b.size()) + ".");
+    }
+
+    ValuePtr<T> loss = Value<T>::create(0);
+    for (size_t i = 0; i < a.size(); ++i) {
+        loss = loss + ((a[i] - b[i])->pow(Value<T>::create(2)));
+    }
+
+    loss->backward();
+
+    return loss;
+};
+
+} // namespace shkyera::Loss
diff --git a/include/src/nn/Module.hpp b/include/src/nn/Module.hpp
@@ -0,0 +1,20 @@
+#pragma once
+
+#include "../core/Vector.hpp"
+
+namespace shkyera {
+
+template <typename T> class Module;
+template <typename T> using ModulePtr = std::shared_ptr<Module<T>>;
+
+template <typename T> class Module {
+  protected:
+    Module() = default;
+
+  public:
+    Vector<T> forward(const Vector<T> &x) const { return (*this)(x); }
+    virtual Vector<T> operator()(const Vector<T> &x) const { return x; }
+    virtual std::vector<ValuePtr<T>> parameters() const { return {}; }
+};
+
+} // namespace shkyera