Skip to content

Commit

Permalink
Merge pull request #96 from FluxML/dev
Browse files Browse the repository at this point in the history
For a 0.1.8 release
  • Loading branch information
ablaom authored Mar 3, 2021
2 parents 7e88534 + 4cee033 commit 0c09fc3
Show file tree
Hide file tree
Showing 6 changed files with 68 additions and 51 deletions.
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "MLJFlux"
uuid = "094fc8d1-fd35-5302-93ea-dabda2abf845"
authors = ["Anthony D. Blaom <[email protected]>", "Ayush Shridhar <[email protected]>"]
version = "0.1.7"
version = "0.1.8"

[deps]
CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597"
Expand Down
10 changes: 6 additions & 4 deletions src/classifier.jl
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,9 @@ function MLJModelInterface.fit(model::NeuralNetworkClassifier,
model.lambda,
model.alpha,
verbosity,
data,
model.acceleration)
model.acceleration,
data[1],
data[2])

cache = (deepcopy(model), data, history, n_input, n_output, optimiser)
fitresult = (chain, levels)
Expand Down Expand Up @@ -123,8 +124,9 @@ function MLJModelInterface.update(model::NeuralNetworkClassifier,
model.lambda,
model.alpha,
verbosity,
data,
model.acceleration)
model.acceleration,
data[1],
data[2])
if keep_chain
# note: history[1] = old_history[end]
history = vcat(old_history[1:end-1], history)
Expand Down
39 changes: 30 additions & 9 deletions src/core.jl
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,24 @@ end
(::Mover{<:CPU1})(data) = Flux.cpu(data)
(::Mover{<:CUDALibs})(data) = Flux.gpu(data)

"""
Custom training loop. Here, `loss_func` is the objective
function to optimise, `parameters` are the model parameters,
`optimiser` is the optimizer to be used, `X` (input features)is a
vector of arrays where the last dimension is the batch size. `y`
is the target observation vector.
"""
function train!(loss_func, parameters, optimiser, X, y)
for i=1:length(X)
gs = Flux.gradient(parameters) do
training_loss = loss_func(X[i], y[i])
return training_loss
end
Flux.update!(optimiser, parameters, gs)
end
end


"""
fit!(chain,
optimiser,
Expand All @@ -54,8 +72,9 @@ end
lambda,
alpha,
verbosity,
data,
acceleration)
acceleration,
X,
y)
Optimize a Flux model `chain` using the regularization parameters
`lambda` (strength) and `alpha` (l2/l1 mix), where `loss(yhat, y) ` is
Expand All @@ -65,7 +84,8 @@ target predictions `yhat` and target observations `y`.
Here `chain` is a `Flux.Chain` object, or other "Flux model" such that
`Flux.params(chain)` returns the parameters to be optimised.
The training `data` is a vector of tuples of the form `(X, y)` where:
The `X` argument is the training features and `y` argument is the
target:
- `X` and `y` have type `Array{<:AbstractFloat}`
Expand Down Expand Up @@ -95,29 +115,30 @@ mutate the argument `chain`, depending on cpu <-> gpu movements.
"""
function fit!(chain, optimiser, loss, epochs,
lambda, alpha, verbosity, data, acceleration)
lambda, alpha, verbosity, acceleration, X, y)

# intitialize and start progress meter:
meter = Progress(epochs+1, dt=0, desc="Optimising neural net:",
barglyphs=BarGlyphs("[=> ]"), barlen=25, color=:yellow)
verbosity != 1 || next!(meter)

move = Mover(acceleration)
data = move(data)
X = move(X)
y = move(y)
chain = move(chain)

loss_func(x, y) = loss(chain(x), y)

# initiate history:
prev_loss = mean(loss_func(data[i][1], data[i][2]) for i=1:length(data))
prev_loss = mean(loss_func(X[i], y[i]) for i=1:length(X))
history = [prev_loss,]

for i in 1:epochs
# We're taking data in a Flux-fashion.
# @show i rand()
Flux.train!(loss_func, Flux.params(chain), data, optimiser)
train!(loss_func, Flux.params(chain), optimiser, X, y)
current_loss =
mean(loss_func(data[i][1], data[i][2]) for i=1:length(data))
mean(loss_func(X[i], y[i]) for i=1:length(X))
verbosity < 2 ||
@info "Loss is $(round(current_loss; sigdigits=4))"
push!(history, current_loss)
Expand Down Expand Up @@ -282,5 +303,5 @@ function collate(model, X, y)
row_batches = Base.Iterators.partition(1:nrows(y), model.batch_size)
Xmatrix = reformat(X)
ymatrix = reformat(y)
return [(_get(Xmatrix, b), _get(ymatrix, b)) for b in row_batches]
return [_get(Xmatrix, b) for b in row_batches], [_get(ymatrix, b) for b in row_batches]
end
10 changes: 6 additions & 4 deletions src/image.jl
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,9 @@ function MLJModelInterface.fit(model::ImageClassifier,
model.lambda,
model.alpha,
verbosity,
data,
model.acceleration)
model.acceleration,
data[1],
data[2])

# `optimiser` is now mutated

Expand Down Expand Up @@ -136,8 +137,9 @@ function MLJModelInterface.update(model::ImageClassifier,
model.lambda,
model.alpha,
verbosity,
data,
model.acceleration)
model.acceleration,
data[1],
data[2])
if keep_chain
# note: history[1] = old_history[end]
history = vcat(old_history[1:end-1], history)
Expand Down
12 changes: 8 additions & 4 deletions src/regressor.jl
Original file line number Diff line number Diff line change
Expand Up @@ -104,8 +104,10 @@ function MLJModelInterface.fit(model::Regressor, verbosity::Int, X, y)
model.lambda,
model.alpha,
verbosity,
data,
model.acceleration)
#data,
model.acceleration,
data[1],
data[2])

# note: "state" part of `optimiser` is now mutated!

Expand Down Expand Up @@ -160,8 +162,10 @@ function MLJModelInterface.update(model::Regressor,
model.lambda,
model.alpha,
verbosity,
data,
model.acceleration)
#data,
model.acceleration,
data[1],
data[2])
if keep_chain
# note: history[1] = old_history[end]
history = vcat(old_history[1:end-1], history)
Expand Down
46 changes: 17 additions & 29 deletions test/core.jl
Original file line number Diff line number Diff line change
Expand Up @@ -23,57 +23,42 @@ end
model = MLJFlux.NeuralNetworkRegressor()
model.batch_size= 3
@test MLJFlux.collate(model, X, y) ==
[(Xmatrix'[:,1:3], y[1:3]),
(Xmatrix'[:,4:6], y[4:6]),
(Xmatrix'[:,7:9], y[7:9]),
(Xmatrix'[:,10:10], y[10:10])]
([Xmatrix'[:,1:3], Xmatrix'[:,4:6], Xmatrix'[:,7:9], Xmatrix'[:,10:10]], [y[1:3], y[4:6], y[7:9], y[10:10]])

# NeuralNetworClassifier:
y = categorical(['a', 'b', 'a', 'a', 'b', 'a', 'a', 'a', 'b', 'a'])
model = MLJFlux.NeuralNetworkClassifier()
model.batch_size = 3
data = MLJFlux.collate(model, X, y)
@test first.(data) ==
[Xmatrix'[:,1:3], Xmatrix'[:,4:6],
Xmatrix'[:,7:9], Xmatrix'[:,10:10]]
@test last.(data) ==
[[1 0 1; 0 1 0], [1 0 1; 0 1 0],
[1 1 0; 0 0 1], reshape([1; 0], (2,1))]

@test data == ([Xmatrix'[:,1:3], Xmatrix'[:,4:6], Xmatrix'[:,7:9], Xmatrix'[:,10:10]],
[[1 0 1; 0 1 0], [1 0 1; 0 1 0], [1 1 0; 0 0 1], reshape([1; 0], (2,1))])

# MultitargetNeuralNetworRegressor:
ymatrix = rand(10, 2)
y = MLJBase.table(ymatrix) # a rowaccess table
model = MLJFlux.NeuralNetworkRegressor()
model.batch_size= 3
@test MLJFlux.collate(model, X, y) ==
[(Xmatrix'[:,1:3], ymatrix'[:,1:3]),
(Xmatrix'[:,4:6], ymatrix'[:,4:6]),
(Xmatrix'[:,7:9], ymatrix'[:,7:9]),
(Xmatrix'[:,10:10], ymatrix'[:,10:10])]
@test MLJFlux.collate(model, X, y) ==
([Xmatrix'[:,1:3], Xmatrix'[:,4:6], Xmatrix'[:,7:9], Xmatrix'[:,10:10]],
[ymatrix'[:,1:3], ymatrix'[:,4:6], ymatrix'[:,7:9], ymatrix'[:,10:10]])

y = Tables.columntable(y) # try a columnaccess table
@test MLJFlux.collate(model, X, y) ==
[(Xmatrix'[:,1:3], ymatrix'[:,1:3]),
(Xmatrix'[:,4:6], ymatrix'[:,4:6]),
(Xmatrix'[:,7:9], ymatrix'[:,7:9]),
(Xmatrix'[:,10:10], ymatrix'[:,10:10])]
([Xmatrix'[:,1:3], Xmatrix'[:,4:6], Xmatrix'[:,7:9], Xmatrix'[:,10:10]],
[ymatrix'[:,1:3], ymatrix'[:,4:6], ymatrix'[:,7:9], ymatrix'[:,10:10]])

# ImageClassifier
Xmatrix = coerce(rand(6, 6, 1, 10), GrayImage)
y = categorical(['a', 'b', 'a', 'a', 'b', 'a', 'a', 'a', 'b', 'a'])
model = MLJFlux.ImageClassifier(batch_size=2)

data = MLJFlux.collate(model, Xmatrix, y)
@test first.(data) ==
[Float32.(cat(Xmatrix[1], Xmatrix[2], dims=4)),
Float32.(cat(Xmatrix[3], Xmatrix[4], dims=4)),
Float32.(cat(Xmatrix[5], Xmatrix[6], dims=4)),
Float32.(cat(Xmatrix[7], Xmatrix[8], dims=4)),
Float32.(cat(Xmatrix[9], Xmatrix[10], dims=4)),
]
@test first.(data) == (Float32.(cat(Xmatrix[1], Xmatrix[2], dims=4)), [1 0;0 1])

expected_y = [[1 0;0 1], [1 1;0 0], [0 1; 1 0], [1 1;0 0], [0 1; 1 0]]
for i=1:5
@test Int.(last.(data)[i]) == expected_y[i]
@test Int.(last(data)[i]) == expected_y[i]
end

end
Expand All @@ -88,6 +73,9 @@ data = [(Xmatrix'[:,1:20], y[1:20]),
(Xmatrix'[:,41:60], y[41:60]),
(Xmatrix'[:,61:80], y[61:80]),
(Xmatrix'[:, 81:100], y[81:100])]

data = ([Xmatrix'[:,1:20], Xmatrix'[:,21:40], Xmatrix'[:,41:60], Xmatrix'[:,61:80], Xmatrix'[:,81:100]],
[y[1:20], y[21:40], y[41:60], y[61:80], y[81:100]])

# construct two chains with identical state, except one has
# dropout and the other does not:
Expand All @@ -114,15 +102,15 @@ epochs = 10

_chain_yes_drop, history = MLJFlux.fit!(chain_yes_drop,
Flux.Optimise.ADAM(0.001),
Flux.mse, epochs, 0, 0, 0, data, accel)
Flux.mse, epochs, 0, 0, 0, accel, data[1], data[2])

println()

Random.seed!(123)

_chain_no_drop, history = MLJFlux.fit!(chain_no_drop,
Flux.Optimise.ADAM(0.001),
Flux.mse, epochs, 0, 0, 0, data, accel)
Flux.mse, epochs, 0, 0, 0, accel, data[1], data[2])

# check chains have different behaviour after training:
@test !(_chain_yes_drop(test_input) _chain_no_drop(test_input))
Expand Down

0 comments on commit 0c09fc3

Please sign in to comment.