From ce11760f6f6e3fcecf7fd692d42b866650e20807 Mon Sep 17 00:00:00 2001
From: Dubslow <bunslow@gmail.com>
Date: Wed, 23 May 2018 12:59:18 -0500
Subject: [PATCH 1/2] Rewrite comments in training configuration to be more
 newbie friendly

---
 training/tf/configs/example.yaml | 39 ++++++++++++++++++++++++--------
 1 file changed, 30 insertions(+), 9 deletions(-)

diff --git a/training/tf/configs/example.yaml b/training/tf/configs/example.yaml
index a03b9d499..5e453b3fd 100644
--- a/training/tf/configs/example.yaml
+++ b/training/tf/configs/example.yaml
@@ -1,29 +1,50 @@
 %YAML 1.2
 ---
+# Training works as follows: one `step` means loading `batch_size`
+# samples into GPU memory, then doing gradient descent on those samples.
+# Leela training is done in discrete cycles (unlike A0's fancy
+# continuous-distributed setup), with the new weights being published
+# after each cycle. A cycle does `total_steps` of training. The
+# samples are selected from `num_chunks` games, at random (controlled
+# by the "shuffle buffer" of size `shuffle_size`). Only `train_ratio`
+# games are used for training, the remainder being used for testing.
+# Thus, the total number of times that a given position is trained on,
+# samples per position, is given by
+# `total_steps` * `batch_size` / (`num_chunks` * `train_ratio` * ply_per_game)
+
 name: 'kb1-64x6'                       # ideally no spaces
 gpu: 0                                 # gpu id to process on
 
 dataset: 
   num_chunks: 100000                   # newest nof chunks to parse
-  train_ratio: 0.90                    # trainingset ratio
-  input: '/path/to/chunks/*/draw/'     # supports glob
+  train_ratio: 0.90                    # fraction of games for training (instead of testing)
+  input: '/path/to/chunks/*/draw/'     # path to data dir; supports glob
 
 training:
-    batch_size: 2048                   # training batch
-    total_steps: 140000                # terminate after these steps
-    shuffle_size: 524288               # size of the shuffle buffer
-    lr_values:                         # list of learning rates
+    batch_size: 2048                   # samples trained in one step
+    total_steps: 140000                # number of training steps per publishing cycle
+    shuffle_size: 524288
+    lr_values:                         # learning rate schedule
         - 0.02
         - 0.002
         - 0.0005
-    lr_boundaries:                     # list of boundaries
+    lr_boundaries:                     # "boundaries" are in units of steps
         - 100000
         - 130000
-    policy_loss_weight: 1.0            # weight of policy loss
-    value_loss_weight: 1.0             # weight of value loss
+    policy_loss_weight: 1.0            # weight of policy head relative to regularization
+    value_loss_weight: 1.0             # weight of value head relative to regularization
     path: '/path/to/store/networks'    # network storage dir
 
 model:
   filters: 64
   residual_blocks: 6
 ...
+
+# `lr_values` is the set of learning rates, used according to the
+# `lr_boundaries` schedule of using a given learning rate for how
+# many training steps.
+# `policy_loss_weight` and `value_loss_weight` determine how much
+# either the policy head or value head should be emphasized by the
+# gradient descent, relative to each other and to the regularization
+# (where regularization helps keep weights closer to 0, which helps
+# the network to generalize better to novel positions)

From 7918c53e2a748e12de259d7f8ba412a0d77b58e9 Mon Sep 17 00:00:00 2001
From: Dubslow <bunslow@gmail.com>
Date: Thu, 24 May 2018 11:01:46 -0500
Subject: [PATCH 2/2] add lr boundary example

---
 training/tf/configs/example.yaml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/training/tf/configs/example.yaml b/training/tf/configs/example.yaml
index 5e453b3fd..9194c649c 100644
--- a/training/tf/configs/example.yaml
+++ b/training/tf/configs/example.yaml
@@ -42,7 +42,8 @@ model:
 
 # `lr_values` is the set of learning rates, used according to the
 # `lr_boundaries` schedule of using a given learning rate for how
-# many training steps.
+# many training steps. (At lr_bnd_1, switch from lr_1 to lr_2. At
+# lr_bnd_2, switch from lr_2 to lr_3, etc.)
 # `policy_loss_weight` and `value_loss_weight` determine how much
 # either the policy head or value head should be emphasized by the
 # gradient descent, relative to each other and to the regularization