From d4debfbbeaf9f0659751960782c58a0a2ad56c91 Mon Sep 17 00:00:00 2001 From: "szewczyk.franciszek02" Date: Thu, 9 Nov 2023 21:10:29 +0100 Subject: [PATCH] Updated example and README --- README.md | 40 ++++++++++++++++++------------------- examples/xor_regression.cpp | 15 +++++++++----- include/ShkyeraGrad.hpp | 1 + include/core/Utils.hpp | 2 ++ 4 files changed, 33 insertions(+), 25 deletions(-) diff --git a/README.md b/README.md index 6a8583a..b24e7ad 100644 --- a/README.md +++ b/README.md @@ -34,14 +34,17 @@ int main() { using namespace shkyera; using T = Type::float32; - std::vector xs; - std::vector ys; - - // ---------- INPUT ----------- | -------- OUTPUT --------- // - xs.push_back(Vec32::of(0, 0)); ys.push_back(Vec32::of(0)); - xs.push_back(Vec32::of(1, 0)); ys.push_back(Vec32::of(1)); - xs.push_back(Vec32::of(0, 1)); ys.push_back(Vec32::of(1)); - xs.push_back(Vec32::of(1, 1)); ys.push_back(Vec32::of(0)); + // This is our XOR dataset. It maps from Vec32 to Vec32 + Dataset data; + data.addSample(Vec32::of(0, 0), Vec32::of(0)); + data.addSample(Vec32::of(0, 1), Vec32::of(1)); + data.addSample(Vec32::of(1, 0), Vec32::of(1)); + data.addSample(Vec32::of(1, 1), Vec32::of(0)); + + // The is the data loader, it will take care of batching + size_t batchSize = 2; + bool shuffle = true; + DataLoader loader(data, batchSize, shuffle); auto network = SequentialBuilder::begin() .add(Linear32::create(2, 15)) @@ -52,29 +55,26 @@ int main() { .add(Sigmoid32::create()) .build(); - - auto optimizer = Adam32(network->parameters(), 0.05); + auto optimizer = Adam32(network->parameters(), 0.1); auto lossFunction = Loss::MSE; for (size_t epoch = 0; epoch < 100; epoch++) { // We train for 100 epochs auto epochLoss = Val32::create(0); - optimizer.reset(); // Reset the gradients - for (size_t sample = 0; sample < xs.size(); ++sample) { // We go through each sample - Vec32 pred = network->forward(xs[sample]); // We get some prediction - auto loss = lossFunction(pred, ys[sample]); // And calculate its error - - epochLoss = epochLoss + loss; // Store the loss for feedback + optimizer.reset(); // Reset the gradients + for (const auto &[x, y] : loader) { // For each batch + auto pred = network->forward(x); // We get some prediction + epochLoss = epochLoss + Loss::compute(lossFunction, pred, y); // And calculate its error } optimizer.step(); // Update the parameters - auto averageLoss = epochLoss / Val32::create(xs.size()); + auto averageLoss = epochLoss / Val32::create(loader.getTotalBatches()); std::cout << "Epoch: " << epoch + 1 << " Loss: " << averageLoss->getValue() << std::endl; } - for (size_t sample = 0; sample < xs.size(); ++sample) { // Go through each example - Vec32 pred = network->forward(xs[sample]); // Predict result - std::cout << xs[sample] << " -> " << pred[0] << "\t| True: " << ys[sample][0] << std::endl; + for (auto &[x, y] : data) { // Go through each example + auto pred = network->forward(x); // We get some prediction + std::cout << x << " -> " << pred[0] << "\t| True: " << y[0] << std::endl; } } ``` diff --git a/examples/xor_regression.cpp b/examples/xor_regression.cpp index d3e77d1..7b1fe2b 100644 --- a/examples/xor_regression.cpp +++ b/examples/xor_regression.cpp @@ -5,13 +5,18 @@ int main() { using T = Type::float32; // clang-format off + // This is our XOR dataset. It maps from Vec32 to Vec32 Dataset data; - data.addSample(Vec32::of(0, 0), Vec32::of(0)); data.addSample(Vec32::of(0, 1), Vec32::of(1)); data.addSample(Vec32::of(1, 0), Vec32::of(1)); data.addSample(Vec32::of(1, 1), Vec32::of(0)); + // The is the data loader, it will take care of batching + size_t batchSize = 2; + bool shuffle = true; + DataLoader loader(data, batchSize, shuffle); + auto network = SequentialBuilder::begin() .add(Linear32::create(2, 15)) .add(ReLU32::create()) @@ -22,20 +27,20 @@ int main() { .build(); // clang-format on - auto optimizer = Adam32(network->parameters(), 0.05); + auto optimizer = Adam32(network->parameters(), 0.1); auto lossFunction = Loss::MSE; for (size_t epoch = 0; epoch < 100; epoch++) { // We train for 100 epochs auto epochLoss = Val32::create(0); - optimizer.reset(); // Reset the gradients - for (auto &[x, y] : data) { + optimizer.reset(); // Reset the gradients + for (const auto &[x, y] : loader) { // For each batch auto pred = network->forward(x); // We get some prediction epochLoss = epochLoss + Loss::compute(lossFunction, pred, y); // And calculate its error } optimizer.step(); // Update the parameters - auto averageLoss = epochLoss / Val32::create(data.size()); + auto averageLoss = epochLoss / Val32::create(loader.getTotalBatches()); std::cout << "Epoch: " << epoch + 1 << " Loss: " << averageLoss->getValue() << std::endl; } diff --git a/include/ShkyeraGrad.hpp b/include/ShkyeraGrad.hpp index 8df151e..2752afe 100644 --- a/include/ShkyeraGrad.hpp +++ b/include/ShkyeraGrad.hpp @@ -17,6 +17,7 @@ #include "nn/Neuron.hpp" #include "nn/Sequential.hpp" +#include "nn/data/DataLoader.hpp" #include "nn/data/Dataset.hpp" #include "nn/optimizers/AdaMax.hpp" diff --git a/include/core/Utils.hpp b/include/core/Utils.hpp index aeaa34d..d0ceaed 100644 --- a/include/core/Utils.hpp +++ b/include/core/Utils.hpp @@ -53,6 +53,8 @@ std::enable_if_t, std::vector> sample(T from, T to, siz return sampled; } +template void shuffle(std::vector &vec) { std::shuffle(vec.begin(), vec.end(), rand_dev); } + template auto startTimer() { return Clock::now(); } template