Merge pull request #277 from FluxML/dev

For a 0.6.0 release
FluxML · Sep 29, 2024 · de2b3c6 · de2b3c6
2 parents aea9436 + 19d4275
commit de2b3c6
Show file tree

Hide file tree

Showing 23 changed files with 1,608 additions and 390 deletions.
diff --git a/Project.toml b/Project.toml
@@ -1,7 +1,7 @@
 name = "MLJFlux"
 uuid = "094fc8d1-fd35-5302-93ea-dabda2abf845"
 authors = ["Anthony D. Blaom <[email protected]>", "Ayush Shridhar <[email protected]>"]
-version = "0.5.1"
+version = "0.6.0"
 
 [deps]
 CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597"
@@ -33,6 +33,7 @@ julia = "1.9"
 [extras]
 CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
+Logging = "56ddb016-857b-54e1-b83d-db4d58db5568"
 MLJBase = "a7f614a8-145f-11e9-1d2a-a57a1082229d"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3"
@@ -42,4 +43,4 @@ Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 cuDNN = "02a925ec-e4fe-4b08-9a7e-0d78e3d38ccd"
 
 [targets]
-test = ["CUDA", "cuDNN", "LinearAlgebra", "MLJBase", "Random", "StableRNGs", "StatisticalMeasures", "StatsBase", "Test"]
+test = ["CUDA", "cuDNN", "LinearAlgebra", "Logging", "MLJBase", "Random", "StableRNGs", "StatisticalMeasures", "StatsBase", "Test"]
diff --git a/README.md b/README.md
@@ -1,8 +1,12 @@
-# MLJFlux
+<div align="left">
+    <img width="490" alt="image" src="https://github.com/FluxML/MLJFlux.jl/assets/49572294/f28300e9-44cb-4d1a-8acf-8a682230be31">
+
+An interface to the Flux deep learning models for the [MLJ](https://github.com/alan-turing-institute/MLJ.jl) machine learning framework
+
+</div>
+
+[![Stable](https://img.shields.io/badge/docs-stable-blue.svg)](https://fluxml.github.io/MLJFlux.jl/dev/)
 
-An interface to the Flux deep learning models for the
-[MLJ](https://github.com/alan-turing-institute/MLJ.jl) machine
-learning framework.
 
 | Branch   | Julia | CPU CI | GPU CI | Coverage |
 | -------- | ----- | ------ | -----  | -------- |
@@ -21,7 +25,6 @@ learning framework.
 [coveralls-img-dev]: https://coveralls.io/repos/github/alan-turing-institute/MLJFlux.jl/badge.svg?branch=dev "Code Coverage"
 [coveralls-url]: https://github.com/FluxML/MLJFlux.jl/actions/workflows/ci.yml
 
-[![Stable](https://img.shields.io/badge/docs-stable-blue.svg)](https://fluxml.github.io/MLJFlux.jl/dev/)
 
 
 ## Code Snippet
@@ -56,10 +59,10 @@ Wrap in "iteration controls":
 ```julia
 stop_conditions = [
     Step(1),            # Apply controls every epoch
-    NumberLimit(1000),   # Don't train for more than 100 steps
-    Patience(4),        # Stop after 5 iterations of deteriation in validation loss
-    NumberSinceBest(5), # Or if the best loss occurred 9 iterations ago
-    TimeLimit(30/60),   # Or if 30 minutes passed
+    NumberLimit(1000),  # Don't train for more than 1000 steps
+    Patience(4),        # Stop after 4 iterations of deteriation in validation loss
+    NumberSinceBest(5), # Or if the best loss occurred 5 iterations ago
+    TimeLimit(30/60),   # Or if 30 minutes has passed
 ]
 
 validation_losses = []

diff --git a/docs/src/extended_examples/MNIST/notebook.jl b/docs/src/extended_examples/MNIST/notebook.jl
@@ -3,12 +3,18 @@
 # This tutorial is available as a Jupyter notebook or julia script
 # [here](https://github.com/FluxML/MLJFlux.jl/tree/dev/docs/src/extended_examples/MNIST).
 
-using Pkg #!md
-const DIR = @__DIR__ #!md
-Pkg.activate(DIR) #!md
-Pkg.instantiate() #!md
+# The following code block assumes the current directory contains `Manifest.toml` and
+# `Project.toml` files tested for this demonstration, available
+# [here](https://github.com/FluxML/MLJFlux.jl/tree/dev/docs/src/extended_examples/MNIST).
+# Otherwise, you can try running `using Pkg; Pkg.activate(temp=true)` instead, and
+# manually add the relevant packages to the temporary environment created.
+
+using Pkg
+const DIR = @__DIR__
+Pkg.activate(DIR)
+Pkg.instantiate()
 
-# **Julia version** is assumed to be 1.10.*
+# **Julia version** is assumed to be ≥ 1.10**
 
 using MLJ
 using Flux

diff --git a/docs/src/extended_examples/spam_detection/notebook.jl b/docs/src/extended_examples/spam_detection/notebook.jl
@@ -10,9 +10,15 @@
 # **Warning.** This demo includes some non-idiomatic use of MLJ to allow use of the
 # Flux.jl `Embedding` layer. It is not recommended for MLJ beginners.
 
-using Pkg #!md
-Pkg.activate(@__DIR__); #!md
-Pkg.instantiate(); #!md
+# The following code block assumes the current directory contains `Manifest.toml` and
+# `Project.toml` files tested for this demonstration, available
+# [here](https://github.com/FluxML/MLJFlux.jl/tree/dev/docs/src/extended_examples/spam_detection).
+# Otherwise, you can try running `using Pkg; Pkg.activate(temp=true)` instead, and
+# manually add the relevant packages to the temporary environment created.
+
+using Pkg
+Pkg.activate(@__DIR__);
+Pkg.instantiate();
 
 # ### Basic Imports
 using MLJ

diff --git a/src/MLJFlux.jl b/src/MLJFlux.jl
@@ -1,7 +1,6 @@
 module MLJFlux
 
 export CUDALibs, CPU1
-
 import Flux
 using MLJModelInterface
 using MLJModelInterface.ScientificTypesBase
@@ -17,22 +16,24 @@ import Metalhead
 import Optimisers
 
 include("utilities.jl")
-const MMI=MLJModelInterface
+const MMI = MLJModelInterface
 
+include("encoders.jl")
+include("entity_embedding.jl")
 include("builders.jl")
 include("metalhead.jl")
 include("types.jl")
 include("core.jl")
 include("regressor.jl")
 include("classifier.jl")
 include("image.jl")
+include("fit_utils.jl")
+include("entity_embedding_utils.jl")
 include("mlj_model_interface.jl")
 
 export NeuralNetworkRegressor, MultitargetNeuralNetworkRegressor
 export NeuralNetworkClassifier, NeuralNetworkBinaryClassifier, ImageClassifier
 export CUDALibs, CPU1
 
 include("deprecated.jl")
-
-
-end #module
+end # module
diff --git a/src/classifier.jl b/src/classifier.jl
@@ -5,7 +5,6 @@
 
 A private method that returns the shape of the input and output of the model for given
 data `X` and `y`.
-
 """
 function MLJFlux.shape(model::NeuralNetworkClassifier, X, y)
     X = X isa Matrix ? Tables.table(X) : X
@@ -14,6 +13,7 @@ function MLJFlux.shape(model::NeuralNetworkClassifier, X, y)
     n_input = Tables.schema(X).names |> length
     return (n_input, n_output)
 end
+is_embedding_enabled(::NeuralNetworkClassifier) = true
 
 # builds the end-to-end Flux chain needed, given the `model` and `shape`:
 MLJFlux.build(
@@ -29,24 +29,28 @@ MLJFlux.fitresult(
     model::Union{NeuralNetworkClassifier, NeuralNetworkBinaryClassifier},
     chain,
     y,
-) = (chain, MLJModelInterface.classes(y[1]))
+    ordinal_mappings = nothing,
+    embedding_matrices = nothing,
+) = (chain, MLJModelInterface.classes(y[1]), ordinal_mappings, embedding_matrices)
 
 function MLJModelInterface.predict(
     model::NeuralNetworkClassifier,
     fitresult,
     Xnew,
-    )
-    chain, levels = fitresult
+)
+    chain, levels, ordinal_mappings, _ = fitresult
+    Xnew = ordinal_encoder_transform(Xnew, ordinal_mappings)        # what if Xnew is a matrix
     X = reformat(Xnew)
     probs = vcat([chain(tomat(X[:, i]))' for i in 1:size(X, 2)]...)
     return MLJModelInterface.UnivariateFinite(levels, probs)
 end
 
+
 MLJModelInterface.metadata_model(
     NeuralNetworkClassifier,
-    input_scitype=Union{AbstractMatrix{Continuous},Table(Continuous)},
-    target_scitype=AbstractVector{<:Finite},
-    load_path="MLJFlux.NeuralNetworkClassifier",
+    input_scitype = Union{AbstractMatrix{Continuous}, Table(Continuous, Finite)},
+    target_scitype = AbstractVector{<:Finite},
+    load_path = "MLJFlux.NeuralNetworkClassifier",
 )
 
 #### Binary Classifier
@@ -56,21 +60,23 @@ function MLJFlux.shape(model::NeuralNetworkBinaryClassifier, X, y)
     n_input = Tables.schema(X).names |> length
     return (n_input, 1) # n_output is always 1 for a binary classifier
 end
+is_embedding_enabled(::NeuralNetworkBinaryClassifier) = true
 
 function MLJModelInterface.predict(
     model::NeuralNetworkBinaryClassifier,
     fitresult,
     Xnew,
-    )
-    chain, levels = fitresult
+)
+    chain, levels, ordinal_mappings, _ = fitresult
+    Xnew = ordinal_encoder_transform(Xnew, ordinal_mappings)
     X = reformat(Xnew)
     probs = vec(chain(X))
     return MLJModelInterface.UnivariateFinite(levels, probs; augment = true)
 end
 
 MLJModelInterface.metadata_model(
     NeuralNetworkBinaryClassifier,
-    input_scitype=Union{AbstractMatrix{Continuous},Table(Continuous)},
-    target_scitype=AbstractVector{<:Finite{2}},
-    load_path="MLJFlux.NeuralNetworkBinaryClassifier",
+    input_scitype = Union{AbstractMatrix{Continuous}, Table(Continuous, Finite)},
+    target_scitype = AbstractVector{<:Finite{2}},
+    load_path = "MLJFlux.NeuralNetworkBinaryClassifier",
 )
diff --git a/src/core.jl b/src/core.jl
@@ -24,6 +24,8 @@ end
         y,
     ) -> updated_chain, updated_optimiser_state, training_loss
 
+**Private method.**
+
 Update the parameters of a Flux `chain`, where:
 
 - `model` is typically an `MLJFluxModel` instance, but could be any object such that
@@ -77,6 +79,8 @@ end
         y,
     ) -> (updated_chain, updated_optimiser_state, history)
 
+**Private method.**
+
 Optimize a Flux model `chain`, where `(yhat, y) -> loss(yhat, y)` is the loss function
 inferred from the `model`. Typically, `model` will be an `MLJFluxModel` instance, but it
 could be any object such that `model.loss` is a Flux.jl loss function.
@@ -162,6 +166,8 @@ end
 """
     gpu_isdead()
 
+**Private method.**
+
 Returns `true` if `acceleration=CUDALibs()` option is unavailable, and
 false otherwise.
 
@@ -171,6 +177,8 @@ gpu_isdead() = Flux.gpu([1.0,]) isa Array
 """
     nrows(X)
 
+**Private method.**
+
 Find the number of rows of `X`, where `X` is an `AbstractVector or
 Tables.jl table.
 """
@@ -268,15 +276,22 @@ input `X` and target `y` in the form required by
 by `model.batch_size`.)
 
 """
-function collate(model, X, y)
+function collate(model, X, y, verbosity)
     row_batches = Base.Iterators.partition(1:nrows(y), model.batch_size)
-    Xmatrix = reformat(X)
+    Xmatrix = _f32(reformat(X), verbosity)
     ymatrix = reformat(y)
     return [_get(Xmatrix, b) for b in row_batches], [_get(ymatrix, b) for b in row_batches]
 end
-function collate(model::NeuralNetworkBinaryClassifier, X, y)
+function collate(model::NeuralNetworkBinaryClassifier, X, y, verbosity)
     row_batches = Base.Iterators.partition(1:nrows(y), model.batch_size)
-    Xmatrix = reformat(X)
+    Xmatrix = _f32(reformat(X), verbosity)
     yvec = (y .== classes(y)[2])' # convert to boolean
     return [_get(Xmatrix, b) for b in row_batches], [_get(yvec, b) for b in row_batches]
 end
+
+_f32(x::AbstractArray{Float32}, verbosity) = x
+function _f32(x::AbstractArray, verbosity)
+    verbosity > 0 && @info "MLJFlux: converting input data to Float32"
+    return Float32.(x)
+end
+