Merge pull request #96 from FluxML/dev

For a 0.1.8 release
FluxML · Mar 3, 2021 · 0c09fc3 · 0c09fc3
2 parents 7e88534 + 4cee033
commit 0c09fc3
Show file tree

Hide file tree

Showing 6 changed files with 68 additions and 51 deletions.
diff --git a/Project.toml b/Project.toml
@@ -1,7 +1,7 @@
 name = "MLJFlux"
 uuid = "094fc8d1-fd35-5302-93ea-dabda2abf845"
 authors = ["Anthony D. Blaom <[email protected]>", "Ayush Shridhar <[email protected]>"]
-version = "0.1.7"
+version = "0.1.8"
 
 [deps]
 CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597"

diff --git a/src/classifier.jl b/src/classifier.jl
@@ -64,8 +64,9 @@ function MLJModelInterface.fit(model::NeuralNetworkClassifier,
                           model.lambda,
                           model.alpha,
                           verbosity,
-                          data,
-                          model.acceleration)
+                          model.acceleration,
+                          data[1],
+                          data[2])
 
     cache = (deepcopy(model), data, history, n_input, n_output, optimiser)
     fitresult = (chain, levels)
@@ -123,8 +124,9 @@ function MLJModelInterface.update(model::NeuralNetworkClassifier,
                           model.lambda,
                           model.alpha,
                           verbosity,
-                          data,
-                          model.acceleration)
+                          model.acceleration,
+                          data[1],
+                          data[2])
     if keep_chain
         # note: history[1] = old_history[end]
         history = vcat(old_history[1:end-1], history)

diff --git a/src/core.jl b/src/core.jl
@@ -46,6 +46,24 @@ end
 (::Mover{<:CPU1})(data) = Flux.cpu(data)
 (::Mover{<:CUDALibs})(data) = Flux.gpu(data)
 
+"""
+Custom training loop. Here, `loss_func` is the objective
+function to optimise, `parameters` are the model parameters,
+`optimiser` is the optimizer to be used, `X` (input features)is a
+vector of arrays where the last dimension is the batch size. `y`
+is the target observation vector.
+"""
+function train!(loss_func, parameters, optimiser, X, y)
+    for i=1:length(X)
+        gs = Flux.gradient(parameters) do
+            training_loss = loss_func(X[i], y[i])
+            return training_loss
+        end
+        Flux.update!(optimiser, parameters, gs)
+    end
+end
+
+
 """
     fit!(chain,
          optimiser,
@@ -54,8 +72,9 @@ end
          lambda,
          alpha,
          verbosity,
-         data,
-         acceleration)
+         acceleration,
+         X,
+         y)
 
 Optimize a Flux model `chain` using the regularization parameters
 `lambda` (strength) and `alpha` (l2/l1 mix), where `loss(yhat, y) ` is
@@ -65,7 +84,8 @@ target predictions `yhat` and target observations `y`.
 Here `chain` is a `Flux.Chain` object, or other "Flux model" such that
 `Flux.params(chain)` returns the parameters to be optimised.
 
-The training `data` is a vector of tuples of the form `(X, y)` where:
+The `X` argument is the training features and `y` argument is the
+target:
 
 - `X` and `y` have type `Array{<:AbstractFloat}`
 
@@ -95,29 +115,30 @@ mutate the argument `chain`, depending on cpu <-> gpu movements.
 
 """
 function  fit!(chain, optimiser, loss, epochs,
-               lambda, alpha, verbosity, data, acceleration)
+               lambda, alpha, verbosity, acceleration, X, y)
 
     # intitialize and start progress meter:
     meter = Progress(epochs+1, dt=0, desc="Optimising neural net:",
                      barglyphs=BarGlyphs("[=> ]"), barlen=25, color=:yellow)
     verbosity != 1 || next!(meter)
 
     move = Mover(acceleration)
-    data = move(data)
+    X = move(X)
+    y = move(y)
     chain = move(chain)
 
     loss_func(x, y) = loss(chain(x), y)
 
     # initiate history:
-    prev_loss = mean(loss_func(data[i][1], data[i][2]) for i=1:length(data))
+    prev_loss = mean(loss_func(X[i], y[i]) for i=1:length(X))
     history = [prev_loss,]
 
     for i in 1:epochs
         # We're taking data in a Flux-fashion.
 #        @show i rand()
-        Flux.train!(loss_func, Flux.params(chain), data, optimiser)
+        train!(loss_func, Flux.params(chain), optimiser, X, y)
         current_loss =
-            mean(loss_func(data[i][1], data[i][2]) for i=1:length(data))
+            mean(loss_func(X[i], y[i]) for i=1:length(X))
         verbosity < 2 ||
             @info "Loss is $(round(current_loss; sigdigits=4))"
         push!(history, current_loss)
@@ -282,5 +303,5 @@ function collate(model, X, y)
     row_batches = Base.Iterators.partition(1:nrows(y), model.batch_size)
     Xmatrix = reformat(X)
     ymatrix = reformat(y)
-    return [(_get(Xmatrix, b), _get(ymatrix, b)) for b in row_batches]
+    return [_get(Xmatrix, b) for b in row_batches], [_get(ymatrix, b) for b in row_batches]
 end
diff --git a/src/image.jl b/src/image.jl
@@ -70,8 +70,9 @@ function MLJModelInterface.fit(model::ImageClassifier,
                           model.lambda,
                           model.alpha,
                           verbosity,
-                          data,
-                          model.acceleration)
+                          model.acceleration,
+                          data[1],
+                          data[2])
 
     # `optimiser` is now mutated
 
@@ -136,8 +137,9 @@ function MLJModelInterface.update(model::ImageClassifier,
                           model.lambda,
                           model.alpha,
                           verbosity,
-                          data,
-                          model.acceleration)
+                          model.acceleration,
+                          data[1],
+                          data[2])
     if keep_chain
         # note: history[1] = old_history[end]
         history = vcat(old_history[1:end-1], history)

diff --git a/src/regressor.jl b/src/regressor.jl
@@ -104,8 +104,10 @@ function MLJModelInterface.fit(model::Regressor, verbosity::Int, X, y)
                           model.lambda,
                           model.alpha,
                           verbosity,
-                          data,
-                          model.acceleration)
+                          #data,
+                          model.acceleration,
+                          data[1],
+                          data[2])
 
     # note: "state" part of `optimiser` is now mutated!
 
@@ -160,8 +162,10 @@ function MLJModelInterface.update(model::Regressor,
                           model.lambda,
                           model.alpha,
                           verbosity,
-                          data,
-                          model.acceleration)
+                          #data,
+                          model.acceleration,
+                          data[1],
+                          data[2])
     if keep_chain
         # note: history[1] = old_history[end]
         history = vcat(old_history[1:end-1], history)

diff --git a/test/core.jl b/test/core.jl
@@ -23,57 +23,42 @@ end
     model = MLJFlux.NeuralNetworkRegressor()
     model.batch_size= 3
     @test MLJFlux.collate(model, X, y) ==
-        [(Xmatrix'[:,1:3], y[1:3]),
-         (Xmatrix'[:,4:6], y[4:6]),
-         (Xmatrix'[:,7:9], y[7:9]),
-         (Xmatrix'[:,10:10], y[10:10])]
+        ([Xmatrix'[:,1:3], Xmatrix'[:,4:6], Xmatrix'[:,7:9], Xmatrix'[:,10:10]], [y[1:3], y[4:6], y[7:9], y[10:10]])
 
     # NeuralNetworClassifier:
     y = categorical(['a', 'b', 'a', 'a', 'b', 'a', 'a', 'a', 'b', 'a'])
     model = MLJFlux.NeuralNetworkClassifier()
     model.batch_size = 3
     data = MLJFlux.collate(model, X, y)
-    @test first.(data) ==
-        [Xmatrix'[:,1:3], Xmatrix'[:,4:6],
-          Xmatrix'[:,7:9], Xmatrix'[:,10:10]]
-    @test last.(data) ==
-        [[1 0 1; 0 1 0], [1 0 1; 0 1 0],
-         [1 1 0; 0 0 1], reshape([1; 0], (2,1))]
+
+    @test data == ([Xmatrix'[:,1:3], Xmatrix'[:,4:6], Xmatrix'[:,7:9], Xmatrix'[:,10:10]],
+        [[1 0 1; 0 1 0], [1 0 1; 0 1 0], [1 1 0; 0 0 1], reshape([1; 0], (2,1))])
 
     # MultitargetNeuralNetworRegressor:
     ymatrix = rand(10, 2)
     y = MLJBase.table(ymatrix) # a rowaccess table
     model = MLJFlux.NeuralNetworkRegressor()
     model.batch_size= 3
-    @test MLJFlux.collate(model, X, y) ==
-        [(Xmatrix'[:,1:3], ymatrix'[:,1:3]),
-         (Xmatrix'[:,4:6], ymatrix'[:,4:6]),
-         (Xmatrix'[:,7:9], ymatrix'[:,7:9]),
-         (Xmatrix'[:,10:10], ymatrix'[:,10:10])]
+    @test MLJFlux.collate(model, X, y) == 
+        ([Xmatrix'[:,1:3], Xmatrix'[:,4:6], Xmatrix'[:,7:9], Xmatrix'[:,10:10]],
+                [ymatrix'[:,1:3], ymatrix'[:,4:6], ymatrix'[:,7:9], ymatrix'[:,10:10]])
+
     y = Tables.columntable(y) # try a columnaccess table
     @test MLJFlux.collate(model, X, y) ==
-        [(Xmatrix'[:,1:3], ymatrix'[:,1:3]),
-         (Xmatrix'[:,4:6], ymatrix'[:,4:6]),
-         (Xmatrix'[:,7:9], ymatrix'[:,7:9]),
-         (Xmatrix'[:,10:10], ymatrix'[:,10:10])]
+        ([Xmatrix'[:,1:3], Xmatrix'[:,4:6], Xmatrix'[:,7:9], Xmatrix'[:,10:10]], 
+            [ymatrix'[:,1:3], ymatrix'[:,4:6], ymatrix'[:,7:9], ymatrix'[:,10:10]])
 
     # ImageClassifier
     Xmatrix = coerce(rand(6, 6, 1, 10), GrayImage)
     y = categorical(['a', 'b', 'a', 'a', 'b', 'a', 'a', 'a', 'b', 'a'])
     model = MLJFlux.ImageClassifier(batch_size=2)
 
     data = MLJFlux.collate(model, Xmatrix, y)
-    @test  first.(data) ==
-        [Float32.(cat(Xmatrix[1], Xmatrix[2], dims=4)),
-        Float32.(cat(Xmatrix[3], Xmatrix[4], dims=4)),
-        Float32.(cat(Xmatrix[5], Xmatrix[6], dims=4)),
-        Float32.(cat(Xmatrix[7], Xmatrix[8], dims=4)),
-        Float32.(cat(Xmatrix[9], Xmatrix[10], dims=4)),
-        ]
+    @test  first.(data) == (Float32.(cat(Xmatrix[1], Xmatrix[2], dims=4)), [1 0;0 1])
 
     expected_y = [[1 0;0 1], [1 1;0 0], [0 1; 1 0], [1 1;0 0], [0 1; 1 0]]
     for i=1:5
-        @test Int.(last.(data)[i]) == expected_y[i]
+        @test Int.(last(data)[i]) == expected_y[i]
     end
 
 end
@@ -88,6 +73,9 @@ data = [(Xmatrix'[:,1:20], y[1:20]),
         (Xmatrix'[:,41:60], y[41:60]),
         (Xmatrix'[:,61:80], y[61:80]),
         (Xmatrix'[:, 81:100], y[81:100])]
+
+data = ([Xmatrix'[:,1:20], Xmatrix'[:,21:40], Xmatrix'[:,41:60], Xmatrix'[:,61:80], Xmatrix'[:,81:100]],
+            [y[1:20], y[21:40], y[41:60], y[61:80], y[81:100]])
 
 # construct two chains with identical state, except one has
 # dropout and the other does not:
@@ -114,15 +102,15 @@ epochs = 10
 
     _chain_yes_drop, history = MLJFlux.fit!(chain_yes_drop,
                                             Flux.Optimise.ADAM(0.001),
-                                            Flux.mse, epochs, 0, 0, 0, data, accel)
+                                            Flux.mse, epochs, 0, 0, 0, accel, data[1], data[2])
 
     println()
 
     Random.seed!(123)
 
     _chain_no_drop, history = MLJFlux.fit!(chain_no_drop,
                                            Flux.Optimise.ADAM(0.001),
-                                           Flux.mse, epochs, 0, 0, 0, data, accel)
+                                           Flux.mse, epochs, 0, 0, 0, accel, data[1], data[2])
 
     # check chains have different behaviour after training:
     @test !(_chain_yes_drop(test_input) ≈ _chain_no_drop(test_input))