diff --git a/md_docs_tutorials_Cheatsheet.html b/md_docs_tutorials_Cheatsheet.html
index 352786e..ce459a0 100644
--- a/md_docs_tutorials_Cheatsheet.html
+++ b/md_docs_tutorials_Cheatsheet.html
@@ -89,8 +89,58 @@
 <div class="title">Cheatsheet </div>  </div>
 </div><!--header-->
 <div class="contents">
-<div class="textblock"><p>This page contains all the info you need to develop your models using Shkyera Grad. </p>
-</div></div><!-- contents -->
+<div class="textblock"><p>This page contains all the info you need to develop your models using Shkyera Grad.</p>
+<h1><a class="anchor" id="autotoc_md2"></a>
+Types</h1>
+<p>Almost all of the classes in <em>Shkyera Grad</em> are implemented using templates. To simplify creation of these objects, we introduced a standard way to instantiate objects with floating-point template parameters, i.e.</p>
+<div class="fragment"><div class="line">Linear32 = Linear&lt;float&gt;</div>
+<div class="line">Optimizer32 = Optimizer&lt;Type::float32&gt;&gt;</div>
+<div class="line">Loss::MSE64 = Loss::MSE&lt;double&gt;</div>
+<div class="line">Adam64 = Adam&lt;Type::f64&gt;</div>
+<div class="line"> </div>
+<div class="line">{Class}32 = {Class}&lt;Type::float32&gt; = {Class}&lt;<span class="keywordtype">float</span>&gt;</div>
+<div class="line">{Class}64 = {Class}&lt;Type::float64&gt; = {Class}&lt;<span class="keywordtype">double</span>&gt;</div>
+</div><!-- fragment --><h1><a class="anchor" id="autotoc_md3"></a>
+Layers</h1>
+<p>Here's a full list of available layers:</p>
+<div class="fragment"><div class="line"><span class="keyword">auto</span> linear = Linear32::create(inputSize, outputSize);</div>
+<div class="line"><span class="keyword">auto</span> dropout = Dropout32::create(inputSize, outputSize, dropoutRate);</div>
+</div><!-- fragment --><h1><a class="anchor" id="autotoc_md4"></a>
+Optimizers</h1>
+<p>These are all implemented optimizers:</p>
+<div class="fragment"><div class="line"><span class="keyword">auto</span> simple = Optimizer32(network-&gt;parameters(), learningRate);</div>
+<div class="line"><span class="keyword">auto</span> sgdWithMomentum = SGD32(network-&gt;parameters(), learningRate, momentum = 0.9);</div>
+<div class="line"><span class="keyword">auto</span> adam = Adam32(network-&gt;parameters(), learningRate, beta1 = 0.9, beta2=0.999, epsilon=1e-8);</div>
+</div><!-- fragment --><h1><a class="anchor" id="autotoc_md5"></a>
+Loss functions</h1>
+<p>Optimization can be performed according to these predefined loss functions:</p>
+<div class="fragment"><div class="line"><span class="keyword">auto</span> L1 = Loss::MAE32;</div>
+<div class="line"><span class="keyword">auto</span> L2 = Loss::MSE32;</div>
+<div class="line"><span class="keyword">auto</span> crossEntropy = Loss::CrossEntropy32;</div>
+</div><!-- fragment --><h1><a class="anchor" id="autotoc_md6"></a>
+Generic Training Loop</h1>
+<p>Simply copy-pase this code to quickly train your network:</p>
+<div class="fragment"><div class="line"><span class="keyword">using</span> T = Type::float32; <span class="comment">// feel free to change it to float64</span></div>
+<div class="line"> </div>
+<div class="line"><span class="keyword">auto</span> optimizer = Adam&lt;T&gt;(network-&gt;parameters(), 0.05);</div>
+<div class="line"><span class="keyword">auto</span> lossFunction = Loss::MSE&lt;T&gt;;</div>
+<div class="line"> </div>
+<div class="line"><span class="keywordflow">for</span> (<span class="keywordtype">size_t</span> epoch = 0; epoch &lt; 100; epoch++) {</div>
+<div class="line">    <span class="keyword">auto</span> epochLoss = Value&lt;T&gt;::create(0);</div>
+<div class="line"> </div>
+<div class="line">    optimizer.reset();</div>
+<div class="line">    <span class="keywordflow">for</span> (<span class="keywordtype">size_t</span> sample = 0; sample &lt; xs.size(); ++sample) {</div>
+<div class="line">        Vector&lt;T&gt; pred = network-&gt;forward(xs[sample]);</div>
+<div class="line">        <span class="keyword">auto</span> loss = lossFunction(pred, ys[sample]);</div>
+<div class="line"> </div>
+<div class="line">        epochLoss = epochLoss + loss;</div>
+<div class="line">    }</div>
+<div class="line">    optimizer.step();</div>
+<div class="line"> </div>
+<div class="line">    <span class="keyword">auto</span> averageLoss = epochLoss / Value&lt;T&gt;::create(xs.size());</div>
+<div class="line">    std::cout &lt;&lt; <span class="stringliteral">&quot;Epoch: &quot;</span> &lt;&lt; epoch + 1 &lt;&lt; <span class="stringliteral">&quot; Loss: &quot;</span> &lt;&lt; averageLoss-&gt;getValue() &lt;&lt; std::endl;</div>
+<div class="line">}</div>
+</div><!-- fragment --> </div></div><!-- contents -->
 </div><!-- PageDoc -->
 </div><!-- doc-content -->
 <!-- start footer part -->
diff --git a/md_docs_tutorials_GetStarted.html b/md_docs_tutorials_GetStarted.html
index c9068c2..3f583d3 100644
--- a/md_docs_tutorials_GetStarted.html
+++ b/md_docs_tutorials_GetStarted.html
@@ -90,7 +90,7 @@
 </div><!--header-->
 <div class="contents">
 <div class="textblock"><p>Hello! Let's get right into it. By the end of this guide, you will have created and trained your first neural in <em>Shkyera Grad</em>!</p>
-<h1><a class="anchor" id="autotoc_md3"></a>
+<h1><a class="anchor" id="autotoc_md8"></a>
 Setup</h1>
 <p>This is easy, <em>Shkyera Grad</em> is a header-only library, so simply clone the repositoryu into your project:</p>
 <div class="fragment"><div class="line">git clone https://github.com/fszewczyk/shkyera-grad.git</div>
@@ -98,7 +98,7 @@ <h1><a class="anchor" id="autotoc_md3"></a>
 <div class="fragment"><div class="line"><span class="preprocessor">#include &quot;shkyera-grad/include/ShkyeraGrad.hpp&quot;</span></div>
 </div><!-- fragment --><p>Now, you can use all the features of this small engine.</p>
 <dl class="section note"><dt>Note</dt><dd><em>Shkyera Grad</em> is tested in C++17. Make sure your compiler supports this version.</dd></dl>
-<h1><a class="anchor" id="autotoc_md4"></a>
+<h1><a class="anchor" id="autotoc_md9"></a>
 Scalars</h1>
 <p>Internally, <em>Shkyera Grad</em> <b>always</b> operates on individual scalars. For most purposes, you do not need to deal with them directly, but it's nice to understand how they work. Each scalar is wrapped inside a <code>Value</code> class. However, you should never instantiate objects of this type yourself. Instead, you should use the provided interface in the following way.</p>
 <div class="fragment"><div class="line"><span class="comment">// Creates a floating-point scalar</span></div>
@@ -138,7 +138,7 @@ <h1><a class="anchor" id="autotoc_md4"></a>
 <div class="line">a-&gt;getGradient();               <span class="comment">// dc/da = 3.0</span></div>
 <div class="line">b-&gt;getGradient();               <span class="comment">// dc/db = 2.0</span></div>
 </div><!-- fragment --><p>If you want some refreshment on derivatives, check out <a href="https://www.youtube.com/watch?v=9vKqVkMQHKk">this wonderful video</a>.</p>
-<h1><a class="anchor" id="autotoc_md5"></a>
+<h1><a class="anchor" id="autotoc_md10"></a>
 Vector</h1>
 <p>Multiple scalars can be grouped together in a <code>Vector</code> to simplify operating on them. Input to any <code>Module</code> (more on them later) is a <code>Vector</code>. This abstraction provides some functionality that allows you to compute, for example a dot product.</p>
 <div class="fragment"><div class="line"><span class="comment">// The easiest way to create a Vector</span></div>
@@ -157,7 +157,7 @@ <h1><a class="anchor" id="autotoc_md5"></a>
 <div class="line"><span class="keyword">auto</span> d = a.dot(b)       <span class="comment">// c = 1 * 2 + 2 * 3 + 3 * 4 = 20</span></div>
 <div class="line">d-&gt;backward();          <span class="comment">// You can compute of this result since it&#39;s a scalar!</span></div>
 </div><!-- fragment --><p><code>Vectors</code> are very useful since this is the way both the input and output data is represented. Each sample consits of an input <code>Vector</code> and a target output <code>Vector</code>.</p>
-<h1><a class="anchor" id="autotoc_md6"></a>
+<h1><a class="anchor" id="autotoc_md11"></a>
 Sequential</h1>
 <p>Nice! You got the basics! Let's build a network. The best way to create a model is through the use of the <code>Sequential</code> interface. Each function that transforms an input <code>Vector</code> into some output <code>Vector</code> is implemented as a <code>Module</code>. This includes neural layers as well as activation functions. Hey, even <code>Sequential</code> is a <code>Module</code>. This allows for creating complex strctures while using a common, simple interface.</p>
 <p>You can create your first neural network using <code>SequentialBuilder</code> in the following way.</p>
@@ -170,7 +170,7 @@ <h1><a class="anchor" id="autotoc_md6"></a>
 <div class="line">               .build();                            <span class="comment">// Don&#39;t forget to actually build your network</span></div>
 </div><!-- fragment --><p>@warn Remember that subsequent layers have to have matching input and output sizes.</p>
 <dl class="section note"><dt>Note</dt><dd>For the full list of available layers and activation functions, check out the Cheat Sheet.</dd></dl>
-<h1><a class="anchor" id="autotoc_md7"></a>
+<h1><a class="anchor" id="autotoc_md12"></a>
 Training</h1>
 <p>To train our network, we need to define an <code>Optimizer</code> that will optimizer the parameters as well as the <code>Loss</code> function that we will minimize. <em>Shkyera Grad</em> comes with a set of well-known optimizers and loss functions. Again, check out the Cheat Sheet for a complete list.</p>
 <div class="fragment"><div class="line"><span class="comment">// Simple stochastic gradient descent optimizer with 0.01 learning rate</span></div>
@@ -192,7 +192,7 @@ <h1><a class="anchor" id="autotoc_md7"></a>
 <div class="line">Loss::MSE&lt;T&gt;            <span class="comment">// Mean Squared Error</span></div>
 <div class="line">Loss::CrossEntropy&lt;T&gt;   <span class="comment">// Cross Entropy Loss - good for classification</span></div>
 </div><!-- fragment --><p>They are implemented as lambda functions, not as objects, so they do not need to be instantiated.</p>
-<h1><a class="anchor" id="autotoc_md8"></a>
+<h1><a class="anchor" id="autotoc_md13"></a>
 Learning XOR</h1>
 <p>XOR (Exclusive OR) is a simple Boolean function that maps two values two one:</p>
 <table class="markdownTable">
@@ -207,7 +207,7 @@ <h1><a class="anchor" id="autotoc_md8"></a>
 <tr class="markdownTableRowEven">
 <td class="markdownTableBodyNone">1   </td><td class="markdownTableBodyNone">1   </td><td class="markdownTableBodyNone">0   </td></tr>
 </table>
-<h2><a class="anchor" id="autotoc_md9"></a>
+<h2><a class="anchor" id="autotoc_md14"></a>
 Let's define our dataset.</h2>
 <p>Here, we basically pase the table above into <code>Vector</code>s.</p>
 <div class="fragment"><div class="line">std::vector&lt;Vec32&gt; xs;</div>
@@ -218,7 +218,7 @@ <h2><a class="anchor" id="autotoc_md9"></a>
 <div class="line">xs.push_back(Vec32::of({1, 0})); ys.push_back(Vec32::of({1}));</div>
 <div class="line">xs.push_back(Vec32::of({0, 1})); ys.push_back(Vec32::of({1}));</div>
 <div class="line">xs.push_back(Vec32::of({1, 1})); ys.push_back(Vec32::of({0}));</div>
-</div><!-- fragment --><h2><a class="anchor" id="autotoc_md10"></a>
+</div><!-- fragment --><h2><a class="anchor" id="autotoc_md15"></a>
 Neural Network</h2>
 <p>We define a simple neural network to predict this function. Our network has a total of three layers. It is a bit of an overkill for this task, but we will use it for learning purposes.</p>
 <div class="fragment"><div class="line"><span class="keyword">auto</span> network = SequentialBuilder&lt;Type::float32&gt;::begin()</div>
@@ -229,7 +229,7 @@ <h2><a class="anchor" id="autotoc_md9"></a>
 <div class="line">                .add(Linear32::create(5, 1))</div>
 <div class="line">                .add(Sigmoid32::create())</div>
 <div class="line">                .build();</div>
-</div><!-- fragment --><h2><a class="anchor" id="autotoc_md11"></a>
+</div><!-- fragment --><h2><a class="anchor" id="autotoc_md16"></a>
 Training Loop</h2>
 <p>Now, we just need to specify the optimizer and the loss function we want to use:</p>
 <div class="fragment"><div class="line"><span class="keyword">auto</span> optimizer = Adam32(network-&gt;parameters(), 0.05);</div>
@@ -250,7 +250,7 @@ <h2><a class="anchor" id="autotoc_md9"></a>
 <div class="line">    <span class="keyword">auto</span> averageLoss = epochLoss / Val32::create(xs.size());</div>
 <div class="line">    std::cout &lt;&lt; <span class="stringliteral">&quot;Epoch: &quot;</span> &lt;&lt; epoch + 1 &lt;&lt; <span class="stringliteral">&quot; Loss: &quot;</span> &lt;&lt; averageLoss-&gt;getValue() &lt;&lt; std::endl;</div>
 <div class="line">}</div>
-</div><!-- fragment --><h2><a class="anchor" id="autotoc_md12"></a>
+</div><!-- fragment --><h2><a class="anchor" id="autotoc_md17"></a>
 Verifying the results</h2>
 <p>After the training, let's inspect how our network behaves.</p>
 <div class="fragment"><div class="line"><span class="keywordflow">for</span> (<span class="keywordtype">size_t</span> sample = 0; sample &lt; xs.size(); ++sample) {         <span class="comment">// Go through each example</span></div>
@@ -258,7 +258,7 @@ <h2><a class="anchor" id="autotoc_md9"></a>
 <div class="line">    std::cout &lt;&lt; xs[sample] &lt;&lt; <span class="stringliteral">&quot; -&gt; &quot;</span> &lt;&lt; pred &lt;&lt; <span class="stringliteral">&quot;\t| True: &quot;</span> &lt;&lt; ys[sample] &lt;&lt; std::endl;</div>
 <div class="line">}</div>
 </div><!-- fragment --><p>In case you got lost along the way, check out the <code>examples/xor_regression.cpp</code> file. It contains the exact same code and is ready to run :)</p>
-<h2><a class="anchor" id="autotoc_md13"></a>
+<h2><a class="anchor" id="autotoc_md18"></a>
 Results</h2>
 <p>Nice! After compiling and running this code (make sure to use C++17), you should see something like this:</p>
 <div class="fragment"><div class="line">Epoch: 1 Loss: 0.263062</div>
diff --git a/navtreedata.js b/navtreedata.js
index e9dd728..18a8e2a 100644
--- a/navtreedata.js
+++ b/navtreedata.js
@@ -25,20 +25,26 @@
 var NAVTREE =
 [
   [ "Shkyera Grad", "index.html", [
-    [ "Cheatsheet", "md_docs_tutorials_Cheatsheet.html", null ],
-    [ "Get Started", "md_docs_tutorials_GetStarted.html", [
+    [ "Cheatsheet", "md_docs_tutorials_Cheatsheet.html", [
       [ "Usage", "index.html#autotoc_md0", null ],
-      [ "Setup", "md_docs_tutorials_GetStarted.html#autotoc_md3", null ],
-      [ "Scalars", "md_docs_tutorials_GetStarted.html#autotoc_md4", null ],
-      [ "Vector", "md_docs_tutorials_GetStarted.html#autotoc_md5", null ],
-      [ "Sequential", "md_docs_tutorials_GetStarted.html#autotoc_md6", null ],
-      [ "Training", "md_docs_tutorials_GetStarted.html#autotoc_md7", null ],
-      [ "Learning XOR", "md_docs_tutorials_GetStarted.html#autotoc_md8", [
-        [ "Let's define our dataset.", "md_docs_tutorials_GetStarted.html#autotoc_md9", null ],
-        [ "Neural Network", "md_docs_tutorials_GetStarted.html#autotoc_md10", null ],
-        [ "Training Loop", "md_docs_tutorials_GetStarted.html#autotoc_md11", null ],
-        [ "Verifying the results", "md_docs_tutorials_GetStarted.html#autotoc_md12", null ],
-        [ "Results", "md_docs_tutorials_GetStarted.html#autotoc_md13", null ]
+      [ "Types", "md_docs_tutorials_Cheatsheet.html#autotoc_md2", null ],
+      [ "Layers", "md_docs_tutorials_Cheatsheet.html#autotoc_md3", null ],
+      [ "Optimizers", "md_docs_tutorials_Cheatsheet.html#autotoc_md4", null ],
+      [ "Loss functions", "md_docs_tutorials_Cheatsheet.html#autotoc_md5", null ],
+      [ "Generic Training Loop", "md_docs_tutorials_Cheatsheet.html#autotoc_md6", null ]
+    ] ],
+    [ "Get Started", "md_docs_tutorials_GetStarted.html", [
+      [ "Setup", "md_docs_tutorials_GetStarted.html#autotoc_md8", null ],
+      [ "Scalars", "md_docs_tutorials_GetStarted.html#autotoc_md9", null ],
+      [ "Vector", "md_docs_tutorials_GetStarted.html#autotoc_md10", null ],
+      [ "Sequential", "md_docs_tutorials_GetStarted.html#autotoc_md11", null ],
+      [ "Training", "md_docs_tutorials_GetStarted.html#autotoc_md12", null ],
+      [ "Learning XOR", "md_docs_tutorials_GetStarted.html#autotoc_md13", [
+        [ "Let's define our dataset.", "md_docs_tutorials_GetStarted.html#autotoc_md14", null ],
+        [ "Neural Network", "md_docs_tutorials_GetStarted.html#autotoc_md15", null ],
+        [ "Training Loop", "md_docs_tutorials_GetStarted.html#autotoc_md16", null ],
+        [ "Verifying the results", "md_docs_tutorials_GetStarted.html#autotoc_md17", null ],
+        [ "Results", "md_docs_tutorials_GetStarted.html#autotoc_md18", null ]
       ] ]
     ] ]
   ] ]
diff --git a/navtreeindex0.js b/navtreeindex0.js
index d3abc05..ee3001c 100644
--- a/navtreeindex0.js
+++ b/navtreeindex0.js
@@ -3,17 +3,22 @@ var NAVTREEINDEX0 =
 "index.html":[],
 "index.html#autotoc_md0":[0],
 "md_docs_tutorials_Cheatsheet.html":[0],
+"md_docs_tutorials_Cheatsheet.html#autotoc_md2":[0,1],
+"md_docs_tutorials_Cheatsheet.html#autotoc_md3":[0,2],
+"md_docs_tutorials_Cheatsheet.html#autotoc_md4":[0,3],
+"md_docs_tutorials_Cheatsheet.html#autotoc_md5":[0,4],
+"md_docs_tutorials_Cheatsheet.html#autotoc_md6":[0,5],
 "md_docs_tutorials_GetStarted.html":[1],
-"md_docs_tutorials_GetStarted.html#autotoc_md10":[1,6,1],
-"md_docs_tutorials_GetStarted.html#autotoc_md11":[1,6,2],
-"md_docs_tutorials_GetStarted.html#autotoc_md12":[1,6,3],
-"md_docs_tutorials_GetStarted.html#autotoc_md13":[1,6,4],
-"md_docs_tutorials_GetStarted.html#autotoc_md3":[1,1],
-"md_docs_tutorials_GetStarted.html#autotoc_md4":[1,2],
-"md_docs_tutorials_GetStarted.html#autotoc_md5":[1,3],
-"md_docs_tutorials_GetStarted.html#autotoc_md6":[1,4],
-"md_docs_tutorials_GetStarted.html#autotoc_md7":[1,5],
-"md_docs_tutorials_GetStarted.html#autotoc_md8":[1,6],
-"md_docs_tutorials_GetStarted.html#autotoc_md9":[1,6,0],
+"md_docs_tutorials_GetStarted.html#autotoc_md10":[1,2],
+"md_docs_tutorials_GetStarted.html#autotoc_md11":[1,3],
+"md_docs_tutorials_GetStarted.html#autotoc_md12":[1,4],
+"md_docs_tutorials_GetStarted.html#autotoc_md13":[1,5],
+"md_docs_tutorials_GetStarted.html#autotoc_md14":[1,5,0],
+"md_docs_tutorials_GetStarted.html#autotoc_md15":[1,5,1],
+"md_docs_tutorials_GetStarted.html#autotoc_md16":[1,5,2],
+"md_docs_tutorials_GetStarted.html#autotoc_md17":[1,5,3],
+"md_docs_tutorials_GetStarted.html#autotoc_md18":[1,5,4],
+"md_docs_tutorials_GetStarted.html#autotoc_md8":[1,0],
+"md_docs_tutorials_GetStarted.html#autotoc_md9":[1,1],
 "pages.html":[]
 };