Skip to content

Commit

Permalink
Merge pull request #5 from fszewczyk/neural-net
Browse files Browse the repository at this point in the history
Sequential model
  • Loading branch information
fszewczyk authored Nov 7, 2023
2 parents df9f197 + 7f2207f commit 08743a3
Show file tree
Hide file tree
Showing 13 changed files with 505 additions and 18 deletions.
13 changes: 12 additions & 1 deletion include/ShkyeraTensor.hpp
Original file line number Diff line number Diff line change
@@ -1 +1,12 @@
#include "src/Value.hpp"
#pragma once

#include "src/core/Type.hpp"
#include "src/core/Value.hpp"
#include "src/core/Vector.hpp"
#include "src/nn/Activation.hpp"
#include "src/nn/Layer.hpp"
#include "src/nn/Loss.hpp"
#include "src/nn/Module.hpp"
#include "src/nn/Neuron.hpp"
#include "src/nn/Optimizer.hpp"
#include "src/nn/Sequential.hpp"
10 changes: 10 additions & 0 deletions include/src/core/Type.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#pragma once

namespace shkyera::Type {

using float32 = float;
using float64 = double;
using f32 = float;
using f64 = double;

} // namespace shkyera::Type
42 changes: 42 additions & 0 deletions include/src/core/Utils.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
#pragma once

#include <random>

namespace shkyera::utils {

std::random_device rand_dev;
std::mt19937 generator(rand_dev());

template <typename T> std::enable_if_t<!std::is_integral_v<T>, T> sample(T from, T to) {
std::uniform_real_distribution<T> distribution(from, to);
return distribution(generator);
}

template <typename T> std::enable_if_t<!std::is_integral_v<T>, std::vector<T>> sample(T from, T to, size_t size) {
std::uniform_real_distribution<T> distribution(from, to);

std::vector<T> sampled(size);
for (size_t i = 0; i < size; i++) {
sampled[i] = distribution(generator);
}

return sampled;
}

template <typename T> std::enable_if_t<std::is_integral_v<T>, T> sample(T from, T to) {
std::uniform_int_distribution<T> distribution(from, to);
return distribution(generator);
}

template <typename T> std::enable_if_t<std::is_integral_v<T>, std::vector<T>> sample(T from, T to, size_t size) {
std::uniform_int_distribution<T> distribution(from, to);

std::vector<T> sampled(size);
for (size_t i = 0; i < size; i++) {
sampled[i] = distribution(generator);
}

return sampled;
}

} // namespace shkyera::utils
57 changes: 50 additions & 7 deletions include/src/Value.hpp → include/src/core/Value.hpp
Original file line number Diff line number Diff line change
@@ -1,14 +1,22 @@
#pragma once

#include <cmath>
#include <iostream>
#include <memory>
#include <unordered_set>
#include <vector>

#include "Type.hpp"

namespace shkyera {

template <typename T> class Optimizer;
template <typename T> class Value;
template <typename T> using ValuePtr = std::shared_ptr<Value<T>>;

using Val32 = Value<Type::float32>;
using Val64 = Value<Type::float64>;

template <typename T> class Value : public std::enable_shared_from_this<Value<T>> {
private:
T _data = 0;
Expand All @@ -22,19 +30,24 @@ template <typename T> class Value : public std::enable_shared_from_this<Value<T>
std::vector<ValuePtr<T>> topologicalSort(std::vector<ValuePtr<T>> &sorted, std::unordered_set<Value<T> *> &visited);

public:
friend class Optimizer<T>;

static ValuePtr<T> create(T data);

void backward();
T getGradient() { return _gradient; }
T getGradient();

ValuePtr<T> tanh();
ValuePtr<T> relu();
ValuePtr<T> sigmoid();
ValuePtr<T> exp();
ValuePtr<T> pow(ValuePtr<T> exponent);

template <typename U> friend ValuePtr<U> operator+(ValuePtr<U> a, ValuePtr<U> b);
template <typename U> friend ValuePtr<U> operator-(ValuePtr<U> a, ValuePtr<U> b);
template <typename U> friend ValuePtr<U> operator*(ValuePtr<U> a, ValuePtr<U> b);
template <typename U> friend ValuePtr<U> operator/(ValuePtr<U> a, ValuePtr<U> b);
template <typename U> friend ValuePtr<U> operator-(ValuePtr<U> a);

template <typename U> friend std::ostream &operator<<(std::ostream &os, const ValuePtr<U> &value);
};
Expand All @@ -43,6 +56,8 @@ template <typename T> Value<T>::Value(T data) : _data(data) {}

template <typename T> ValuePtr<T> Value<T>::create(T data) { return std::shared_ptr<Value<T>>(new Value<T>(data)); }

template <typename T> T Value<T>::getGradient() { return _gradient; }

template <typename T> ValuePtr<T> operator+(ValuePtr<T> a, ValuePtr<T> b) {
ValuePtr<T> result = Value<T>::create(a->_data + b->_data);
result->_children = {a, b};
Expand All @@ -54,6 +69,8 @@ template <typename T> ValuePtr<T> operator+(ValuePtr<T> a, ValuePtr<T> b) {
return result;
}

template <typename T> ValuePtr<T> operator-(ValuePtr<T> a, ValuePtr<T> b) { return a + (-b); }

template <typename T> ValuePtr<T> operator*(ValuePtr<T> a, ValuePtr<T> b) {
ValuePtr<T> result = Value<T>::create(a->_data * b->_data);
result->_children = {a, b};
Expand All @@ -67,13 +84,39 @@ template <typename T> ValuePtr<T> operator*(ValuePtr<T> a, ValuePtr<T> b) {

template <typename T> ValuePtr<T> operator/(ValuePtr<T> a, ValuePtr<T> b) { return a * (b->pow(Value<T>::create(-1))); }

template <typename T> ValuePtr<T> operator-(ValuePtr<T> a) { return Value<T>::create(-1) * a; }

template <typename T> ValuePtr<T> Value<T>::tanh() {
auto thisValue = this->shared_from_this();

ValuePtr<T> result = Value<T>::create((std::exp(2 * thisValue->_data) - 1) / (std::exp(2 * thisValue->_data) + 1));
result->_children = {thisValue};
result->_backward = [thisValue, result]() {
thisValue->_gradient += (1 - (thisValue->_data * thisValue->_data)) * result->_gradient;
thisValue->_gradient += (1 - (result->_data * result->_data)) * result->_gradient;
};

return result;
}

template <typename T> ValuePtr<T> Value<T>::sigmoid() {
auto thisValue = this->shared_from_this();

ValuePtr<T> result = Value<T>::create(1 / (std::exp(-thisValue->_data) + 1));
result->_children = {thisValue};
result->_backward = [thisValue, result]() {
thisValue->_gradient += result->_data * (1 - result->_data) * result->_gradient;
};

return result;
}

template <typename T> ValuePtr<T> Value<T>::relu() {
auto thisValue = this->shared_from_this();

ValuePtr<T> result = Value<T>::create(_data > 0 ? _data : 0);
result->_children = {thisValue};
result->_backward = [thisValue, result]() {
thisValue->_gradient += (result->_data > 0 ? 1 : 0) * result->_gradient;
};

return result;
Expand Down Expand Up @@ -103,11 +146,6 @@ template <typename T> ValuePtr<T> Value<T>::pow(ValuePtr<T> exponent) {
return result;
}

template <typename T> std::ostream &operator<<(std::ostream &os, const ValuePtr<T> &value) {
os << "Value(data=" << value->_data << ")";
return os;
}

template <typename T> std::vector<ValuePtr<T>> Value<T>::topologicalSort() {
std::vector<ValuePtr<T>> sorted;
std::unordered_set<Value<T> *> visited;
Expand Down Expand Up @@ -138,4 +176,9 @@ template <typename T> void Value<T>::backward() {
}
}

template <typename T> std::ostream &operator<<(std::ostream &os, const ValuePtr<T> &value) {
os << "Value(data=" << value->_data << ")";
return os;
}

} // namespace shkyera
70 changes: 70 additions & 0 deletions include/src/core/Vector.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
#pragma once

#include <exception>

#include "Type.hpp"
#include "Value.hpp"

namespace shkyera {

template <typename T> class Vector;
using Vec32 = Vector<Type::float32>;
using Vec64 = Vector<Type::float64>;

template <typename T> class Vector {
private:
std::vector<ValuePtr<T>> _values;

public:
Vector() = default;
Vector(std::vector<ValuePtr<T>> values);
static Vector<T> of(const std::vector<T> &values);

ValuePtr<T> dot(const Vector<T> &other) const;
ValuePtr<T> operator[](size_t index) const;

size_t size() const;

template <typename U> friend std::ostream &operator<<(std::ostream &os, const Vector<U> &vector);
};

template <typename T> Vector<T>::Vector(std::vector<ValuePtr<T>> values) { _values = values; }

template <typename T> Vector<T> Vector<T>::of(const std::vector<T> &values) {
std::vector<ValuePtr<T>> valuePtrs;
valuePtrs.reserve(values.size());

std::for_each(values.begin(), values.end(),
[&valuePtrs](const T &v) { valuePtrs.emplace_back(Value<T>::create(v)); });

return valuePtrs;
}

template <typename T> size_t Vector<T>::size() const { return _values.size(); }

template <typename T> ValuePtr<T> Vector<T>::dot(const Vector<T> &other) const {
if (other.size() != size()) {
throw std::invalid_argument("Vectors need to be of the same size to compute the dot product. Sizes are " +
std::to_string(size()) + " and " + std::to_string(other.size()) + ".");
}

ValuePtr<T> result = Value<T>::create(0);
for (size_t i = 0; i < size(); ++i)
result = result + (_values[i] * other[i]);

return result;
}

template <typename T> ValuePtr<T> Vector<T>::operator[](size_t index) const { return _values[index]; }

template <typename T> std::ostream &operator<<(std::ostream &os, const Vector<T> &vector) {
os << "Vector(size=" << vector.size() << ", data={";

for (const ValuePtr<T> val : vector._values)
os << val << ' ';

os << "})";
return os;
}

} // namespace shkyera
16 changes: 16 additions & 0 deletions include/src/nn/Activation.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#pragma once

#include "../core/Type.hpp"
#include "../core/Value.hpp"

namespace shkyera::Activation {

template <typename T> using Function = std::function<ValuePtr<T>(ValuePtr<T>)>;

template <typename T> Function<T> tanh = [](ValuePtr<T> a) { return a->tanh(); };
template <typename T> Function<T> relu = [](ValuePtr<T> a) { return a->relu(); };
template <typename T> Function<T> exp = [](ValuePtr<T> a) { return a->exp(); };
template <typename T> Function<T> sigmoid = [](ValuePtr<T> a) { return a->sigmoid(); };
template <typename T> Function<T> linear = [](ValuePtr<T> a) { return a; };

} // namespace shkyera::Activation
60 changes: 60 additions & 0 deletions include/src/nn/Layer.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
#pragma once

#include "../core/Type.hpp"
#include "Activation.hpp"
#include "Module.hpp"
#include "Neuron.hpp"

namespace shkyera {

template <typename T> class Layer;
template <typename T> using LayerPtr = std::shared_ptr<Layer<T>>;

using Layer32 = Layer<Type::float32>;
using Layer64 = Layer<Type::float32>;

template <typename T> class Layer : public Module<T> {
private:
std::vector<Neuron<T>> _neurons;

Layer(size_t input, size_t size, Activation::Function<T> activation = Activation::relu<T>);

public:
static LayerPtr<T> create(size_t input, size_t size, Activation::Function<T> activation = Activation::relu<T>);

virtual Vector<T> operator()(const Vector<T> &x) const override;
virtual std::vector<ValuePtr<T>> parameters() const override;
};

template <typename T> Layer<T>::Layer(size_t input, size_t size, Activation::Function<T> activation) {
_neurons.reserve(size);
for (size_t i = 0; i < size; ++i) {
_neurons.emplace_back(Neuron<T>(input, activation));
}
}

template <typename T> LayerPtr<T> Layer<T>::create(size_t input, size_t size, Activation::Function<T> activation) {
return std::shared_ptr<Layer<T>>(new Layer<T>(input, size, activation));
}

template <typename T> Vector<T> Layer<T>::operator()(const Vector<T> &x) const {
std::vector<ValuePtr<T>> output(_neurons.size());

for (size_t i = 0; i < _neurons.size(); i++) {
output[i] = _neurons[i](x)[0];
}

return Vector<T>(output);
}

template <typename T> std::vector<ValuePtr<T>> Layer<T>::parameters() const {
std::vector<ValuePtr<T>> params;
for (const Neuron<T> &n : _neurons) {
std::vector<ValuePtr<T>> neuronParams = n.parameters();
params.insert(params.end(), neuronParams.begin(), neuronParams.end());
}

return params;
}

} // namespace shkyera
29 changes: 29 additions & 0 deletions include/src/nn/Loss.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#pragma once

#include "../core/Value.hpp"

namespace shkyera::Loss {

template <typename T> using Function = std::function<ValuePtr<T>(Vector<T> a, Vector<T> b)>;

using Function32 = Function<Type::float32>;
using Function64 = Function<Type::float64>;

template <typename T>
Function<T> MSE = [](Vector<T> a, Vector<T> b) {
if (a.size() != b.size()) {
throw std::invalid_argument("Vectors need to be of the same size to compute the MSE loss. Sizes are " +
std::to_string(a.size()) + " and " + std::to_string(b.size()) + ".");
}

ValuePtr<T> loss = Value<T>::create(0);
for (size_t i = 0; i < a.size(); ++i) {
loss = loss + ((a[i] - b[i])->pow(Value<T>::create(2)));
}

loss->backward();

return loss;
};

} // namespace shkyera::Loss
20 changes: 20 additions & 0 deletions include/src/nn/Module.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#pragma once

#include "../core/Vector.hpp"

namespace shkyera {

template <typename T> class Module;
template <typename T> using ModulePtr = std::shared_ptr<Module<T>>;

template <typename T> class Module {
protected:
Module() = default;

public:
Vector<T> forward(const Vector<T> &x) const { return (*this)(x); }
virtual Vector<T> operator()(const Vector<T> &x) const { return x; }
virtual std::vector<ValuePtr<T>> parameters() const { return {}; }
};

} // namespace shkyera
Loading

0 comments on commit 08743a3

Please sign in to comment.