Skip to content

Commit

Permalink
Merge pull request #88 from FluxML/dev
Browse files Browse the repository at this point in the history
For a 0.1.7 release
  • Loading branch information
ablaom authored Feb 23, 2021
2 parents 601c38d + 3ce61c2 commit 03f17d2
Show file tree
Hide file tree
Showing 11 changed files with 159 additions and 33 deletions.
16 changes: 16 additions & 0 deletions .buildkite/pipeline.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
steps:
- label: "Julia v1"
plugins:
- JuliaCI/julia#v1:
version: "1"
- JuliaCI/julia-test#v1:
- JuliaCI/julia-coverage#v1:
codecov: true
agents:
queue: "juliagpu"
cuda: "*"
timeout_in_minutes: 60

env:
JULIA_PKG_SERVER: "" # it often struggles with our large artifacts
# SECRET_CODECOV_TOKEN: ""
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "MLJFlux"
uuid = "094fc8d1-fd35-5302-93ea-dabda2abf845"
authors = ["Anthony D. Blaom <[email protected]>", "Ayush Shridhar <[email protected]>"]
version = "0.1.6"
version = "0.1.7"

[deps]
CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597"
Expand Down
24 changes: 19 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,22 @@ An interface to the Flux deep learning models for the
[MLJ](https://github.com/alan-turing-institute/MLJ.jl) machine
learning framework

[![Build Status](https://github.com/alan-turing-institute/MLJFlux.jl/workflows/CI/badge.svg)](https://github.com/alan-turing-institute/MLJFlux.jl/actions) [![Coverage Status](https://coveralls.io/repos/github/alan-turing-institute/MLJFlux.jl/badge.svg?branch=master)](https://coveralls.io/github/alan-turing-institute/MLJFlux.jl?branch=master)
| Branch | Julia | CPU CI | GPU CI | Coverage |
| -------- | ----- | ------ | ----- | -------- |
| `master` | v1 | [![Continuous Integration (CPU)][gha-img-master]][gha-url] | [![Continuous Integration (GPU)][buildkite-julia1-img-master]][buildkite-url] | [![Code Coverage][coveralls-img-master]][coveralls-url] |
| `dev` | v1 | [![Continuous Integration (CPU)][gha-img-dev]][gha-url] | [![Continuous Integration (GPU)][buildkite-julia1-img-dev]][buildkite-url] | [![Code Coverage][coveralls-img-dev]][coveralls-url] |

[gha-img-master]: https://github.com/FluxML/MLJFlux.jl/workflows/CI/badge.svg?branch=master "Continuous Integration (CPU)"
[gha-img-dev]: https://github.com/FluxML/MLJFlux.jl/workflows/CI/badge.svg?branch=dev "Continuous Integration (CPU)"
[gha-url]: https://github.com/FluxML/MLJFlux.jl/actions/workflows/ci.yml

[buildkite-julia1-img-master]: https://badge.buildkite.com/ae439e1f6ed6f178342a0ed166d0983de6ec1b72325e4e3e7e.svg?branch=master&step=Julia%20v1 "Continuous Integration (GPU)"
[buildkite-julia1-img-dev]: https://badge.buildkite.com/ae439e1f6ed6f178342a0ed166d0983de6ec1b72325e4e3e7e.svg?branch=dev&step=Julia%20v1 "Continuous Integration (GPU)"
[buildkite-url]: https://buildkite.com/julialang/mljflux-dot-jl

[coveralls-img-master]: https://coveralls.io/repos/github/alan-turing-institute/MLJFlux.jl/badge.svg?branch=master "Code Coverage"
[coveralls-img-dev]: https://coveralls.io/repos/github/alan-turing-institute/MLJFlux.jl/badge.svg?branch=dev "Code Coverage"
[coveralls-url]: https://github.com/FluxML/MLJFlux.jl/actions/workflows/ci.yml

MLJFlux makes it possible to apply the machine learning
meta-algorithms provided by MLJ - such as out-of-sample performance
Expand Down Expand Up @@ -203,7 +218,7 @@ All models share the following hyper-parameters:
7. `alpha`: The L2/L1 mix of regularization. Default = 0. Range = [0, 1]
8. `acceleration`: Use `CUDALibs()` for training on GPU; default is `CPU1()`.
8. `acceleration`: Use `CUDALibs()` for training on GPU; default is `CPU1()`.
9. `optimiser_changes_trigger_retraining`: True if fitting an
associated machine should trigger retraining from scratch whenever
Expand Down Expand Up @@ -244,7 +259,7 @@ function MLJFlux.build(nn::MyNetwork, n_in, n_out)
end
```
Note here that `n_in` and `n_out` depend on the size of the data (see
Note here that `n_in` and `n_out` depend on the size of the data (see
Table 1).
More generally, defining a new builder means defining a new struct
Expand Down Expand Up @@ -298,7 +313,7 @@ you *should* use MLJ loss functions in MLJ meta-algorithms.
We define a builder that builds a chain with six alternating
convolution and max-pool layers, and a final dense layer, which we
apply to the MNIST image dataset.
apply to the MNIST image dataset.
First we define a generic builder (working for any image size, color
or gray):
Expand Down Expand Up @@ -390,4 +405,3 @@ julia> evaluate!(mach,
│ misclassification_rate │ 0.0467 │ [0.0467] │
└────────────────────────┴───────────────┴────────────┘
```
13 changes: 9 additions & 4 deletions src/classifier.jl
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ function MLJModelInterface.fit(model::NeuralNetworkClassifier,
data,
model.acceleration)

cache = (deepcopy(model), data, history, n_input, n_output)
cache = (deepcopy(model), data, history, n_input, n_output, optimiser)
fitresult = (chain, levels)
report = (training_losses=history, )

Expand All @@ -90,7 +90,7 @@ function MLJModelInterface.update(model::NeuralNetworkClassifier,
X,
y)

old_model, data, old_history, n_input, n_output = old_cache
old_model, data, old_history, n_input, n_output, optimiser = old_cache
old_chain, levels = old_fitresult

optimiser_flag = model.optimiser_changes_trigger_retraining &&
Expand All @@ -109,7 +109,12 @@ function MLJModelInterface.update(model::NeuralNetworkClassifier,
epochs = model.epochs
end

optimiser = deepcopy(model.optimiser)
# we only get to keep the optimiser "state" carried over from
# previous training if we're doing a warm restart and the user has not
# changed the optimiser hyper-parameter:
if !keep_chain || model.optimiser != old_model.optimiser
optimiser = deepcopy(model.optimiser)
end

chain, history = fit!(chain,
optimiser,
Expand All @@ -126,7 +131,7 @@ function MLJModelInterface.update(model::NeuralNetworkClassifier,
end

fitresult = (chain, levels)
cache = (deepcopy(model), data, history, n_input, n_output)
cache = (deepcopy(model), data, history, n_input, n_output, optimiser)
report = (training_losses=history, )

return fitresult, cache, report
Expand Down
4 changes: 2 additions & 2 deletions src/core.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
## EXPOSE OPTIMISERS TO MLJ (for eg, tuning)

# Here we: (i) Make the optimiser structs "transarent" so that their
# Here we: (i) Make the optimiser structs "transparent" so that their
# field values are exposed by calls to MLJ.params; and (ii) Overload
# `==` for optimisers, so that we can detect when their parameters
# remain unchanged on calls to MLJModelInterface.update methods.
Expand Down Expand Up @@ -134,7 +134,7 @@ function fit!(chain, optimiser, loss, epochs,
verbosity != 1 || next!(meter)

end

return Flux.cpu(chain), history

end
Expand Down
15 changes: 11 additions & 4 deletions src/image.jl
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,9 @@ function MLJModelInterface.fit(model::ImageClassifier,
data,
model.acceleration)

cache = deepcopy(model), data, history, n_input, n_output
# `optimiser` is now mutated

cache = (deepcopy(model), data, history, n_input, n_output, optimiser)
fitresult = (chain, levels)

report = (training_losses=history, )
Expand All @@ -96,7 +98,7 @@ function MLJModelInterface.update(model::ImageClassifier,
X,
y)

old_model, data, old_history, n_input, n_output = old_cache
old_model, data, old_history, n_input, n_output, optimiser = old_cache
old_chain, levels = old_fitresult

optimiser_flag = model.optimiser_changes_trigger_retraining &&
Expand All @@ -120,7 +122,12 @@ function MLJModelInterface.update(model::ImageClassifier,
epochs = model.epochs
end

optimiser = deepcopy(model.optimiser)
# we only get to keep the optimiser "state" carried over from
# previous training if we're doing a warm restart and the user has not
# changed the optimiser hyper-parameter:
if !keep_chain || model.optimiser != old_model.optimiser
optimiser = deepcopy(model.optimiser)
end

chain, history = fit!(chain,
optimiser,
Expand All @@ -137,7 +144,7 @@ function MLJModelInterface.update(model::ImageClassifier,
end

fitresult = (chain, levels)
cache = (deepcopy(model), data, history, n_input, n_output)
cache = (deepcopy(model), data, history, n_input, n_output, optimiser)
report = (training_losses=history, )

return fitresult, cache, report
Expand Down
34 changes: 23 additions & 11 deletions src/regressor.jl
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ function NeuralNetworkRegressor(; builder::B = Linear()
, optimiser_changes_trigger_retraining=false
, acceleration = CPU1()
) where {B,O,L}

model = NeuralNetworkRegressor{B,O,L}(builder
, optimiser
, loss
Expand All @@ -30,10 +30,10 @@ function NeuralNetworkRegressor(; builder::B = Linear()
, alpha
, optimiser_changes_trigger_retraining
, acceleration)

message = clean!(model)
isempty(message) || @warn message

return model
end

Expand Down Expand Up @@ -75,7 +75,7 @@ function MultitargetNeuralNetworkRegressor(; builder::B = Linear()

return model
end

const Regressor =
Union{NeuralNetworkRegressor, MultitargetNeuralNetworkRegressor}

Expand Down Expand Up @@ -107,7 +107,9 @@ function MLJModelInterface.fit(model::Regressor, verbosity::Int, X, y)
data,
model.acceleration)

cache = (deepcopy(model), data, history, n_input, n_output)
# note: "state" part of `optimiser` is now mutated!

cache = (deepcopy(model), data, history, n_input, n_output, optimiser)
fitresult = (chain, target_is_multivariate, target_column_names)
report = (training_losses=history,)

Expand All @@ -122,7 +124,11 @@ function MLJModelInterface.update(model::Regressor,
X,
y)

old_model, data, old_history, n_input, n_output = old_cache
# note: the `optimiser` in `old_cache` stores "state" (eg,
# momentum); the "state" part of the `optimiser` field of `model`
# and of `old_model` play no role

old_model, data, old_history, n_input, n_output, optimiser = old_cache
old_chain, target_is_multivariate, target_column_names = old_fitresult

optimiser_flag = model.optimiser_changes_trigger_retraining &&
Expand All @@ -140,7 +146,12 @@ function MLJModelInterface.update(model::Regressor,
epochs = model.epochs
end

optimiser = deepcopy(model.optimiser)
# we only get to keep the optimiser "state" carried over from
# previous training if we're doing a warm restart and the user has not
# changed the optimiser hyper-parameter:
if !keep_chain || model.optimiser != old_model.optimiser
optimiser = deepcopy(model.optimiser)
end

chain, history = fit!(chain,
optimiser,
Expand All @@ -155,8 +166,9 @@ function MLJModelInterface.update(model::Regressor,
# note: history[1] = old_history[end]
history = vcat(old_history[1:end-1], history)
end

fitresult = (chain, target_is_multivariate, target_column_names)
cache = (deepcopy(model), data, history, n_input, n_output)
cache = (deepcopy(model), data, history, n_input, n_output, optimiser)
report = (training_losses=history,)

return fitresult, cache, report
Expand All @@ -167,16 +179,16 @@ function MLJModelInterface.predict(model::Regressor, fitresult, Xnew_)

chain , target_is_multivariate, target_column_names = fitresult

Xnew_ = MLJModelInterface.matrix(Xnew_)
Xnew_ = MLJModelInterface.matrix(Xnew_)

if target_is_multivariate
ypred = [chain(values.(Xnew_[i, :]))
for i in 1:size(Xnew_, 1)]
for i in 1:size(Xnew_, 1)]
return MLJModelInterface.table(reduce(hcat, y for y in ypred)',
names=target_column_names)
else
return [chain(values.(Xnew_[i, :]))[1]
for i in 1:size(Xnew_, 1)]
for i in 1:size(Xnew_, 1)]
end
end

Expand Down
8 changes: 8 additions & 0 deletions test/classifier.jl
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,14 @@ losses = []
push!(losses, first_last_training_loss[2])
yhat = MLJBase.predict(mach, rows=test);
@test mean(MLJBase.cross_entropy(yhat, y[test])) < 0.95*loss_baseline

optimisertest(MLJFlux.NeuralNetworkClassifier,
X,
y,
builder,
optimiser,
accel)

end

# check different resources (CPU1, CUDALibs, etc)) give about the same loss:
Expand Down
10 changes: 8 additions & 2 deletions test/image.jl
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,9 @@ losses = []
@test basictest(MLJFlux.ImageClassifier, images, labels,
model.builder, model.optimiser, 0.95, accel)

@test optimisertest(MLJFlux.ImageClassifier, images, labels,
model.builder, model.optimiser, accel)

end

# check different resources (CPU1, CUDALibs, etc)) give about the same loss:
Expand Down Expand Up @@ -112,7 +115,7 @@ end

# check different resources (CPU1, CUDALibs, etc)) give about the same loss:
reference = losses[1]
@test all(x->abs(x - reference)/reference < 1e-4, losses[2:end])
@test all(x->abs(x - reference)/reference < 1e-3, losses[2:end])


## BASIC IMAGE TESTS COLOR
Expand All @@ -139,7 +142,7 @@ losses = []
# tests update logic, etc (see test_utililites.jl):
@test basictest(MLJFlux.ImageClassifier, images, labels,
model.builder, model.optimiser, 0.95, accel)

@time fitresult, cache, _report = MLJBase.fit(model, 0, images, labels)
pred = MLJBase.predict(model, fitresult, images[1:6])
first_last_training_loss = _report[1][[1, end]]
Expand All @@ -153,6 +156,9 @@ losses = []
acceleration=accel)
fitresult, cache, _report = MLJBase.fit(model, 0, images, labels);

@test optimisertest(MLJFlux.ImageClassifier, images, labels,
model.builder, model.optimiser, accel)

end

# check different resources (CPU1, CUDALibs, etc)) give about the same loss:
Expand Down
22 changes: 19 additions & 3 deletions test/regressor.jl
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ train, test = MLJBase.partition(1:N, 0.7)
@testset_accelerated "NeuralNetworkRegressor" accel begin

Random.seed!(123)

basictest(MLJFlux.NeuralNetworkRegressor,
X,
y,
Expand All @@ -38,6 +38,14 @@ train, test = MLJBase.partition(1:N, 0.7)
truth = y[test]
goal = 0.9*model.loss(truth .- mean(truth), 0)
@test model.loss(yhat, truth) < goal

optimisertest(MLJFlux.NeuralNetworkRegressor,
X,
y,
builder,
optimiser,
accel)

end

# check different resources (CPU1, CUDALibs, etc)) give about the same loss:
Expand All @@ -53,7 +61,7 @@ losses = []
@testset_accelerated "MultitargetNeuralNetworkRegressor" accel begin

Random.seed!(123)

basictest(MLJFlux.MultitargetNeuralNetworkRegressor,
X,
y,
Expand All @@ -69,11 +77,19 @@ losses = []
fit(model, 0, MLJBase.selectrows(X, train), selectrows(y, train))
first_last_training_loss = rpt[1][[1, end]]
push!(losses, first_last_training_loss[2])
# @show first_last_training_loss
# @show first_last_training_loss
yhat = predict(model, fitresult, selectrows(X, test))
truth = ymatrix[test]
goal = 0.9*model.loss(truth .- mean(truth), 0)
@test model.loss(Tables.matrix(yhat), truth) < goal

optimisertest(MLJFlux.MultitargetNeuralNetworkRegressor,
X,
y,
builder,
optimiser,
accel)

end

# check different resources (CPU1, CUDALibs, etc)) give about the same loss:
Expand Down
Loading

0 comments on commit 03f17d2

Please sign in to comment.