✨ Add basic docs skeleton and README integration

FluxML · May 22, 2024 · ee3e0b8 · ee3e0b8
1 parent 3946a68
commit ee3e0b8
Show file tree

Hide file tree

Showing 20 changed files with 842 additions and 50 deletions.
diff --git a/docs/Manifest.toml b/docs/Manifest.toml
diff --git a/docs/Project.toml b/docs/Project.toml
@@ -2,4 +2,5 @@
 Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
 DocumenterTools = "35a29f4d-8980-5a13-9543-d66fff28ecb8"
 Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
+MLDatasets = "eb30cadb-4394-5ae3-aed4-317e484a6458"
 MLJFlux = "094fc8d1-fd35-5302-93ea-dabda2abf845"
diff --git a/docs/make.jl b/docs/make.jl
@@ -7,6 +7,7 @@ DocMeta.setdocmeta!(MLJFlux, :DocTestSetup, :(using MLJFlux); recursive=true)
 makedocs(
 	sitename = "MLJFlux",
     format = Documenter.HTML(;
+    collapselevel = 1,
     assets = [
         "assets/favicon.ico",
         asset(
@@ -23,10 +24,24 @@ makedocs(
 	modules = [MLJFlux],
 	warnonly = true,
 	pages = ["Introduction" => "index.md",
-            "API"=> "api.md",
-            "Features" => Any[
-                "Tuning"=>"features/tuning.md",
-                "Early Stopping"=>"features/early.md",
+            "Interface"=> Any[
+                "Summary"=>"interface/Summary.md",
+                "Builders"=>"interface/Builders.md",
+                "Custom Builders"=>"interface/Custom Builders.md",
+                "Classification"=>"interface/Classification.md",
+                "Regression"=>"interface/Regression.md",
+                "Multi-Target Regression"=>"interface/Multitarget Regression.md",
+                "Image Classification"=>"interface/Image Classification.md",
+            ],
+            "Workflow Examples" => Any[
+                "Incremental Training"=>"workflow examples/Incremental Training.md",
+                "Validation and Hyperparameter Tuning"=>"workflow examples/Hyperparameter Tuning.md",
+                "Early Stopping"=>"workflow examples/Early Stopping.md",
+                "Model Composition"=>"workflow examples/Composition.md",
+            ],
+            "Tutorials"=>Any[
+                "MNIST Digits Classification"=>"full tutorials/MNIST.md",
+                "Boston House Prices Prediction"=>"full tutorials/Boston.md",
             ],
             "Contributing" => "contributing.md",
             "About" => "about.md"],

diff --git a/docs/src/api.md b/docs/src/api.md
diff --git a/docs/src/assets/themes/documenter-light.css b/docs/src/assets/themes/documenter-light.css
@@ -11716,4 +11716,18 @@ code.hljs {
 
 .input.is-rounded, #documenter .docs-sidebar form.docs-search>input {
   margin: 1.5rem 0.0rem !important;
+}
+
+th, td {
+  text-align: left !important;
+}
+
+summary {
+  cursor: pointer;
+  margin: 1rem 0rem;
+
+}
+
+details {
+  margin-bottom: 1.5rem;
 }
diff --git a/docs/src/contributing.md b/docs/src/contributing.md
@@ -0,0 +1,15 @@
+### Adding new models to MLJFlux (advanced)
+
+This section is mainly for MLJFlux developers. It assumes familiarity
+with the [MLJ model
+API](https://alan-turing-institute.github.io/MLJ.jl/dev/adding_models_for_general_use/)
+
+If one subtypes a new model type as either
+`MLJFlux.MLJFluxProbabilistic` or `MLJFlux.MLJFluxDeterministic`, then
+instead of defining new methods for `MLJModelInterface.fit` and
+`MLJModelInterface.update` one can make use of fallbacks by
+implementing the lower level methods `shape`, `build`, and
+`fitresult`. See the [classifier source code](/src/classifier.jl) for
+an example.
+
+One still needs to implement a new `predict` method.
diff --git a/docs/src/features/early.md → docs/src/full tutorials/Boston.md b/docs/src/features/early.md → docs/src/full tutorials/Boston.md
diff --git a/docs/src/full tutorials/MNIST.md b/docs/src/full tutorials/MNIST.md
@@ -0,0 +1,96 @@
+## Image Classification Example
+An expanded version of this example, with early stopping and
+snapshots, is available [here](/examples/mnist).
+
+We define a builder that builds a chain with six alternating
+convolution and max-pool layers, and a final dense layer, which we
+apply to the MNIST image dataset.
+
+First we define a generic builder (working for any image size, color
+or gray):
+
+```julia
+using MLJ
+using Flux
+using MLDatasets
+
+# helper function
+function flatten(x::AbstractArray)
+	return reshape(x, :, size(x)[end])
+end
+
+import MLJFlux
+mutable struct MyConvBuilder
+	filter_size::Int
+	channels1::Int
+	channels2::Int
+	channels3::Int
+end
+
+function MLJFlux.build(b::MyConvBuilder, rng, n_in, n_out, n_channels)
+
+	k, c1, c2, c3 = b.filter_size, b.channels1, b.channels2, b.channels3
+
+	mod(k, 2) == 1 || error("`filter_size` must be odd. ")
+
+	# padding to preserve image size on convolution:
+	p = div(k - 1, 2)
+
+	front = Chain(
+			   Conv((k, k), n_channels => c1, pad=(p, p), relu),
+			   MaxPool((2, 2)),
+			   Conv((k, k), c1 => c2, pad=(p, p), relu),
+			   MaxPool((2, 2)),
+			   Conv((k, k), c2 => c3, pad=(p, p), relu),
+			   MaxPool((2 ,2)),
+			   flatten)
+	d = Flux.outputsize(front, (n_in..., n_channels, 1)) |> first
+	return Chain(front, Dense(d, n_out))
+end
+```
+Next, we load some of the MNIST data and check scientific types
+conform to those is the table above:
+
+```julia
+N = 500
+Xraw, yraw = MNIST.traindata();
+Xraw = Xraw[:,:,1:N];
+yraw = yraw[1:N];
+
+scitype(Xraw)
+```
+```julia
+scitype(yraw)
+```
+
+Inputs should have element scitype `GrayImage`:
+
+```julia
+X = coerce(Xraw, GrayImage);
+```
+
+For classifiers, target must have element scitype `<: Finite`:
+
+```julia
+y = coerce(yraw, Multiclass);
+```
+
+Instantiating an image classifier model:
+
+```julia
+ImageClassifier = @load ImageClassifier
+clf = ImageClassifier(builder=MyConvBuilder(3, 16, 32, 32),
+					  epochs=10,
+					  loss=Flux.crossentropy)
+```
+
+And evaluating the accuracy of the model on a 30% holdout set:
+
+```julia
+mach = machine(clf, X, y)
+
+evaluate!(mach,
+				 resampling=Holdout(rng=123, fraction_train=0.7),
+				 operation=predict_mode,
+				 measure=misclassification_rate)
+```
diff --git a/docs/src/index.md b/docs/src/index.md
@@ -2,7 +2,7 @@
 
 A Julia package integrating deep learning Flux models with MLJ.
 
-### Objectives
+## Objectives
 
 - Provide a user-friendly and high-level interface to fundamental [Flux](https://fluxml.ai/Flux.jl/stable/) deep learning models while still being extensible by supporting custom models written with Flux
 
@@ -15,7 +15,7 @@ A Julia package integrating deep learning Flux models with MLJ.
 
 Also note that MLJFlux is limited to training models only when all training data fits into memory, though it still supports automatic batching of data.
 
-### Installation
+## Installation
 
 ```julia
 import Pkg
@@ -24,7 +24,7 @@ Pkg.add(["MLJ", "MLJFlux", "Flux"])
 ```
 You only need `Flux` if you need to build a custom architecture or experiment with different optimizers, loss functions and activations.
 
-### Quick Start
+## Quick Start
 First load and instantiate mode:
 ```@example
 using MLJ, Flux, MLJFlux
@@ -41,7 +41,7 @@ clf = NeuralNetworkClassifier(
     optimiser=Flux.ADAM(0.01),
     batch_size=8,
     epochs=100, 
-    acceleration=CUDALibs()
+    acceleration=CUDALibs()         # For GPU support
     )
 
 # 3. Wrap it in a machine in fit
@@ -54,9 +54,23 @@ evaluate!(mach, resampling=cv, measure=accuracy)
 ```
 As you can see we were able to use MLJ functionality (i.e., cross validation) with a Flux deep learning model. All arguments provided also have defaults.
 
-Notice that we were also able to define the neural network in a high-level fashion by only specifying the number of neurons per each hidden layer and the activation function. Meanwhile, `MLJFlux` was able to infer the input and output layer as well as use a suitable default for the loss function and output activation given the classification task.
+Notice that we were also able to define the neural network in a high-level fashion by only specifying the number of neurons per each hidden layer and the activation function. Meanwhile, `MLJFlux` was able to infer the input and output layer as well as use a suitable default for the loss function and output activation given the classification task. Notice as well that we did not need to implement a training or prediction loop as in `Flux`.
 
-### Flux or MLJFlux?
+## Basic idea
+
+As in the example above, any MLJFlux model has a `builder` hyperparameter, an object encoding
+instructions for creating a neural network given the data that the
+model eventually sees (e.g., the number of classes in a classification
+problem). While each MLJ model has a simple default builder, users
+may need to define custom builders to get optimal results,
+and this will require familiarity with the [Flux
+API](https://fluxml.ai/Flux.jl/stable/) for defining a neural network
+chain.
+
+In the future MLJFlux may provide a larger assortment of canned
+builders. Pull requests introducing new ones are most welcome.
+
+## Flux or MLJFlux?
 [Flux](https://fluxml.ai/Flux.jl/stable/) is a deep learning framework in Julia that comes with everything you need to build deep learning models (i.e., GPU support, automatic differentiation, layers, activations, losses, optimizers, etc.). [MLJFlux](https://github.com/FluxML/MLJFlux.jl) wraps models built with Flux which provides a more high-level interface for building and training such models. More importantly, it empowers Flux models by extending their support to many common machine learning workflows that are possible via MLJ such as:
 
 - **Estimating performance** of your model using a holdout set or other resampling strategy (e.g., cross-validation) as measured by one or more metrics (e.g., loss functions) that may not have been used in training

diff --git a/docs/src/interface/Builders.md b/docs/src/interface/Builders.md
@@ -0,0 +1,16 @@
+
+```@docs
+MLJFlux.Linear
+```
+
+```@docs
+MLJFlux.Short
+```
+
+```@docs
+MLJFlux.MLP
+```
+
+```@docs
+MLJFlux.@builder
+```
diff --git a/docs/src/interface/Classification.md b/docs/src/interface/Classification.md
@@ -0,0 +1,3 @@
+```@docs
+MLJFlux.NeuralNetworkClassifier
+```
diff --git a/docs/src/interface/Custom Builders.md b/docs/src/interface/Custom Builders.md
@@ -0,0 +1,61 @@
+### Defining Custom Builders
+
+Following is an example defining a new builder for creating a simple
+fully-connected neural network with two hidden layers, with `n1` nodes
+in the first hidden layer, and `n2` nodes in the second, for use in
+any of the first three models in Table 1. The definition includes one
+mutable struct and one method:
+
+```julia
+mutable struct MyBuilder <: MLJFlux.Builder
+	n1 :: Int
+	n2 :: Int
+end
+
+function MLJFlux.build(nn::MyBuilder, rng, n_in, n_out)
+	init = Flux.glorot_uniform(rng)
+	return Chain(Dense(n_in, nn.n1, init=init),
+				 Dense(nn.n1, nn.n2, init=init),
+				 Dense(nn.n2, n_out, init=init))
+end
+```
+
+Note here that `n_in` and `n_out` depend on the size of the data (see
+Table 1).
+
+For a concrete image classification example, see
+[examples/mnist](examples/mnist).
+
+More generally, defining a new builder means defining a new struct
+sub-typing `MLJFlux.Builder` and defining a new `MLJFlux.build` method
+with one of these signatures:
+
+```julia
+MLJFlux.build(builder::MyBuilder, rng, n_in, n_out)
+MLJFlux.build(builder::MyBuilder, rng, n_in, n_out, n_channels) # for use with `ImageClassifier`
+```
+
+This method must return a `Flux.Chain` instance, `chain`, subject to the
+following conditions:
+
+- `chain(x)` must make sense:
+
+	- for any `x <: Array{<:AbstractFloat, 2}` of size `(n_in,
+	  batch_size)` where `batch_size` is any integer (for use with one
+	  of the first three model types); or
+
+	- for any `x <: Array{<:Float32, 4}` of size `(W, H, n_channels,
+	  batch_size)`, where `(W, H) = n_in`, `n_channels` is 1 or 3, and
+	  `batch_size` is any integer (for use with `ImageClassifier`)
+
+- The object returned by `chain(x)` must be an `AbstractFloat` vector
+  of length `n_out`.
+
+Alternatively, use `MLJFlux.@builder(neural_net)` to automatically create a builder for
+any valid Flux chain expression `neural_net`, where the symbols `n_in`, `n_out`,
+`n_channels` and `rng` can appear literally, with the interpretations explained above. For
+example,
+
+```
+builder = MLJFlux.@builder Chain(Dense(n_in, 128), Dense(128, n_out, tanh))
+```
diff --git a/docs/src/interface/Image Classification.md b/docs/src/interface/Image Classification.md
@@ -0,0 +1,3 @@
+```@docs
+MLJFlux.ImageClassifier
+```
diff --git a/docs/src/interface/Multitarget Regression.md b/docs/src/interface/Multitarget Regression.md
@@ -0,0 +1,3 @@
+```@docs
+MLJFlux.MultitargetNeuralNetworkRegressor
+```
diff --git a/docs/src/interface/Regression.md b/docs/src/interface/Regression.md
@@ -0,0 +1,3 @@
+```@docs
+MLJFlux.NeuralNetworkRegressor
+```