diff --git a/Project.toml b/Project.toml index 08a6e6de6..140fb7cf3 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "Lux" uuid = "b2108857-7c20-44ae-9111-449ecde12c47" authors = ["Avik Pal and contributors"] -version = "1.2.2" +version = "1.2.3" [deps] ADTypes = "47edcb42-4c32-4615-8424-f2b9edc5f35b" @@ -95,7 +95,7 @@ MacroTools = "0.5.13" Markdown = "1.10" NCCL = "0.1.1" NNlib = "0.9.24" -Optimisers = "0.3.3" +Optimisers = "0.3.3, 0.4" Preferences = "1.4.3" Random = "1.10" Reactant = "0.2.4" diff --git a/docs/Project.toml b/docs/Project.toml index a48d7c818..7ef3bb514 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -50,7 +50,7 @@ LuxCore = "1" LuxLib = "1.3.4" LuxTestUtils = "1.5" MLDataDevices = "1.4" -Optimisers = "0.3.3" +Optimisers = "0.3.3, 0.4" Pkg = "1.10" Printf = "1.10" Random = "1.10" diff --git a/examples/Basics/Project.toml b/examples/Basics/Project.toml index f24bb08ca..4e44d404c 100644 --- a/examples/Basics/Project.toml +++ b/examples/Basics/Project.toml @@ -13,5 +13,5 @@ ComponentArrays = "0.15" ForwardDiff = "0.10" Lux = "1" LuxCUDA = "0.3" -Optimisers = "0.3" +Optimisers = "0.3.3, 0.4" Zygote = "0.6" diff --git a/examples/ConvMixer/Project.toml b/examples/ConvMixer/Project.toml index acfbd4758..d1ffac2cd 100644 --- a/examples/ConvMixer/Project.toml +++ b/examples/ConvMixer/Project.toml @@ -31,7 +31,7 @@ LuxCUDA = "0.3.2" MLDatasets = "0.7.14" MLUtils = "0.4.4" OneHotArrays = "0.2.5" -Optimisers = "0.3.3" +Optimisers = "0.4" PreferenceTools = "0.1.2" Printf = "1.10" ProgressBars = "1.5.1" diff --git a/examples/ConvMixer/README.md b/examples/ConvMixer/README.md index f16d8850d..f072c1074 100644 --- a/examples/ConvMixer/README.md +++ b/examples/ConvMixer/README.md @@ -17,7 +17,7 @@ julia --startup-file=no \ --threads=auto \ main.jl \ --lr-max=0.05 \ - --weight-decay=0.000005 + --weight-decay=0.0001 ``` Here's an example of the output of the above command (on a V100 32GB GPU): @@ -76,11 +76,7 @@ Flags ## Notes - 1. Weight-Decay with Adam in Optimisers.jl works differently from `torch.optim.AdamW`, - so you might need to adjust the value of `--weight-decay` to get the same results. - Pytorch multiplies the weight decay with the learning rate, whereas in Optimisers.jl - the learning rate is decoupled from the weight decay. - 2. To match the results from the original repo, we need more augmentation strategies, that + 1. To match the results from the original repo, we need more augmentation strategies, that are currently not implemented in DataAugmentation.jl. - 3. Don't compare the reported timings in that repo against the numbers here. They time the + 2. Don't compare the reported timings in that repo against the numbers here. They time the entire loop. We only time the training part of the loop. diff --git a/examples/ConvMixer/main.jl b/examples/ConvMixer/main.jl index 56ca4115f..03ddc63a5 100644 --- a/examples/ConvMixer/main.jl +++ b/examples/ConvMixer/main.jl @@ -22,17 +22,17 @@ function get_dataloaders(batchsize) cifar10_std = (0.2471, 0.2435, 0.2616) train_transform = RandomResizeCrop((32, 32)) |> - Maybe(FlipX()) |> + Maybe(FlipX{2}()) |> ImageToTensor() |> Normalize(cifar10_mean, cifar10_std) test_transform = ImageToTensor() |> Normalize(cifar10_mean, cifar10_std) trainset = TensorDataset(CIFAR10(:train), train_transform) - trainloader = DataLoader(trainset; batchsize, shuffle=true, buffer=true, parallel=true) + trainloader = DataLoader(trainset; batchsize, shuffle=true, parallel=true) testset = TensorDataset(CIFAR10(:test), test_transform) - testloader = DataLoader(testset; batchsize, shuffle=false, buffer=true, parallel=true) + testloader = DataLoader(testset; batchsize, shuffle=false, parallel=true) return trainloader, testloader end diff --git a/examples/DDIM/Project.toml b/examples/DDIM/Project.toml index 2f76e047c..4608d02b2 100644 --- a/examples/DDIM/Project.toml +++ b/examples/DDIM/Project.toml @@ -36,7 +36,7 @@ JLD2 = "0.4.48, 0.5" Lux = "1" LuxCUDA = "0.3" MLUtils = "0.4" -Optimisers = " 0.3" +Optimisers = "0.3, 0.4" ParameterSchedulers = "0.4.1" ProgressBars = "1" Random = "1.10" diff --git a/examples/HyperNet/Project.toml b/examples/HyperNet/Project.toml index 9213cd35f..da572377e 100644 --- a/examples/HyperNet/Project.toml +++ b/examples/HyperNet/Project.toml @@ -20,8 +20,8 @@ Lux = "1" LuxCUDA = "0.3" MLDatasets = "0.7" MLUtils = "0.4" -OneHotArrays = "0.2" -Optimisers = "0.3" +OneHotArrays = "0.2.5" +Optimisers = "0.3.3, 0.4" Setfield = "1" Statistics = "1" Zygote = "0.6" diff --git a/examples/ImageNet/Project.toml b/examples/ImageNet/Project.toml index 7abae248c..792a1341c 100644 --- a/examples/ImageNet/Project.toml +++ b/examples/ImageNet/Project.toml @@ -38,7 +38,7 @@ MLUtils = "0.4.4" MPI = "0.20.21" NCCL = "0.1.1" OneHotArrays = "0.2.5" -Optimisers = "0.3.3" +Optimisers = "0.3.3, 0.4" ParameterSchedulers = "0.4.2" Random = "1.10" Setfield = "1.1.1" diff --git a/examples/NeuralODE/Project.toml b/examples/NeuralODE/Project.toml index 69f354cfb..e9aa48aa6 100644 --- a/examples/NeuralODE/Project.toml +++ b/examples/NeuralODE/Project.toml @@ -20,8 +20,8 @@ Lux = "1" LuxCUDA = "0.3" MLDatasets = "0.7" MLUtils = "0.4" -OneHotArrays = "0.2" -Optimisers = "0.3" +OneHotArrays = "0.2.5" +Optimisers = "0.3.3, 0.4" OrdinaryDiffEqTsit5 = "1" SciMLSensitivity = "7.63" Statistics = "1" diff --git a/examples/PINN2DPDE/Project.toml b/examples/PINN2DPDE/Project.toml index 7b1a5787a..03e427a64 100644 --- a/examples/PINN2DPDE/Project.toml +++ b/examples/PINN2DPDE/Project.toml @@ -18,7 +18,7 @@ Lux = "1" LuxCUDA = "0.3.3" MLUtils = "0.4.4" OnlineStats = "1.7.1" -Optimisers = "0.3.3" +Optimisers = "0.3.3, 0.4" Printf = "1.10" Random = "1.10" Statistics = "1.10" diff --git a/examples/PolynomialFitting/Project.toml b/examples/PolynomialFitting/Project.toml index b607ee600..5cf0394ef 100644 --- a/examples/PolynomialFitting/Project.toml +++ b/examples/PolynomialFitting/Project.toml @@ -14,6 +14,6 @@ ADTypes = "1" CairoMakie = "0.12" Lux = "1" LuxCUDA = "0.3" -Optimisers = "0.3" +Optimisers = "0.3.3, 0.4" Statistics = "1" Zygote = "0.6" diff --git a/examples/SimpleChains/Project.toml b/examples/SimpleChains/Project.toml index 8d504559f..33304a7dc 100644 --- a/examples/SimpleChains/Project.toml +++ b/examples/SimpleChains/Project.toml @@ -16,7 +16,7 @@ Lux = "1" MLDatasets = "0.7.14" MLUtils = "0.4" OneHotArrays = "0.2.5" -Optimisers = "0.3.2" +Optimisers = "0.3.3, 0.4" Random = "1" SimpleChains = "0.4.6" Zygote = "0.6.69" diff --git a/examples/SimpleRNN/Project.toml b/examples/SimpleRNN/Project.toml index 2bd4f5864..02dff511c 100644 --- a/examples/SimpleRNN/Project.toml +++ b/examples/SimpleRNN/Project.toml @@ -16,6 +16,6 @@ JLD2 = "0.5" Lux = "1" LuxCUDA = "0.3" MLUtils = "0.4" -Optimisers = "0.3" +Optimisers = "0.3.3, 0.4" Statistics = "1" Zygote = "0.6" diff --git a/lib/LuxCore/test/Project.toml b/lib/LuxCore/test/Project.toml index 6d3c3d7f7..1088992ba 100644 --- a/lib/LuxCore/test/Project.toml +++ b/lib/LuxCore/test/Project.toml @@ -15,6 +15,6 @@ EnzymeCore = "0.8.5" ExplicitImports = "1.9.0" Functors = "0.4.12" MLDataDevices = "1.0.0" -Optimisers = "0.3.3" +Optimisers = "0.3.3, 0.4" Random = "1.10" Test = "1.10" diff --git a/lib/MLDataDevices/Project.toml b/lib/MLDataDevices/Project.toml index 96bc0fd0c..4d4f67433 100644 --- a/lib/MLDataDevices/Project.toml +++ b/lib/MLDataDevices/Project.toml @@ -1,7 +1,7 @@ name = "MLDataDevices" uuid = "7e8f7934-dd98-4c1a-8fe8-92b47a384d40" authors = ["Avik Pal and contributors"] -version = "1.5.0" +version = "1.5.1" [deps] Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" @@ -19,6 +19,7 @@ FillArrays = "1a297f60-69ca-5386-bcde-b61e274b549b" GPUArrays = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7" MLUtils = "f1d291b0-491e-4a28-83b9-f70985020b54" Metal = "dde4c033-4e86-420c-a63e-0dd931031962" +OneHotArrays = "0b1bfda6-eb8a-41d2-88d8-f5af5cad476f" Reactant = "3c362404-f566-11ee-1572-e11a4b42c853" RecursiveArrayTools = "731186ca-8d62-57ce-b412-fbd966d074cd" ReverseDiff = "37e2e3b7-166d-5795-8a7a-e32c996b4267" @@ -36,6 +37,7 @@ MLDataDevicesFillArraysExt = "FillArrays" MLDataDevicesGPUArraysExt = "GPUArrays" MLDataDevicesMLUtilsExt = "MLUtils" MLDataDevicesMetalExt = ["GPUArrays", "Metal"] +MLDataDevicesOneHotArraysExt = "OneHotArrays" MLDataDevicesReactantExt = "Reactant" MLDataDevicesRecursiveArrayToolsExt = "RecursiveArrayTools" MLDataDevicesReverseDiffExt = "ReverseDiff" @@ -57,6 +59,7 @@ Functors = "0.4.8" GPUArrays = "10, 11" MLUtils = "0.4.4" Metal = "1" +OneHotArrays = "0.2.5" Preferences = "1.4" Random = "1.10" Reactant = "0.2.4" diff --git a/lib/MLDataDevices/ext/MLDataDevicesOneHotArraysExt.jl b/lib/MLDataDevices/ext/MLDataDevicesOneHotArraysExt.jl new file mode 100644 index 000000000..ceb6d6bde --- /dev/null +++ b/lib/MLDataDevices/ext/MLDataDevicesOneHotArraysExt.jl @@ -0,0 +1,17 @@ +module MLDataDevicesOneHotArraysExt + +using Adapt: Adapt +using MLDataDevices: MLDataDevices, Internal, ReactantDevice, CPUDevice +using OneHotArrays: OneHotArray + +for op in (:get_device, :get_device_type) + @eval Internal.$(op)(x::OneHotArray) = Internal.$(op)(x.indices) +end + +# Reactant doesn't pay very nicely with OneHotArrays at the moment +function Adapt.adapt_structure(dev::ReactantDevice, x::OneHotArray) + x_cpu = Adapt.adapt_structure(CPUDevice(), x) + return Adapt.adapt_storage(dev, convert(Array, x_cpu)) +end + +end diff --git a/lib/MLDataDevices/test/Project.toml b/lib/MLDataDevices/test/Project.toml index 9914e0f57..1fb732d37 100644 --- a/lib/MLDataDevices/test/Project.toml +++ b/lib/MLDataDevices/test/Project.toml @@ -9,6 +9,7 @@ FillArrays = "1a297f60-69ca-5386-bcde-b61e274b549b" ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" Functors = "d9f16b24-f501-4c13-a1f2-28368ffc5196" MLUtils = "f1d291b0-491e-4a28-83b9-f70985020b54" +OneHotArrays = "0b1bfda6-eb8a-41d2-88d8-f5af5cad476f" Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" RecursiveArrayTools = "731186ca-8d62-57ce-b412-fbd966d074cd" @@ -30,6 +31,7 @@ FillArrays = "1" ForwardDiff = "0.10.36" Functors = "0.4.8" MLUtils = "0.4" +OneHotArrays = "0.2.5" Pkg = "1.10" Random = "1.10" RecursiveArrayTools = "3.8" diff --git a/lib/MLDataDevices/test/misc_tests.jl b/lib/MLDataDevices/test/misc_tests.jl index d9b3f8bd4..42d27cf00 100644 --- a/lib/MLDataDevices/test/misc_tests.jl +++ b/lib/MLDataDevices/test/misc_tests.jl @@ -5,6 +5,8 @@ using ReverseDiff, Tracker, ForwardDiff using SparseArrays, FillArrays, Zygote, RecursiveArrayTools using Functors: Functors +const BACKEND_GROUP = lowercase(get(ENV, "BACKEND_GROUP", "none")) + @testset "Issues Patches" begin @testset "#10 patch" begin dev = CPUDevice() @@ -231,3 +233,23 @@ end g = Zygote.gradient(x -> cpu(gpu(x) * gpu(x))[1,2], Float32[1 2 3; 4 5 6; 7 8 9])[1] @test g isa Matrix{Float32} end + +@testset "OneHotArrays" begin + using OneHotArrays + + x = onehotbatch("abracadabra", 'a':'e', 'e') + @test get_device(x) isa CPUDevice + + gdev = gpu_device() + x_g = gdev(x) + @test get_device(x_g) isa parameterless_type(typeof(gdev)) + + if BACKEND_GROUP == "none" || BACKEND_GROUP == "reactant" + using Reactant + + rdev = reactant_device() + x_rd = rdev(x) + @test get_device(x_rd) isa ReactantDevice + @test x_rd isa Reactant.ConcreteRArray{Bool, 2} + end +end diff --git a/test/Project.toml b/test/Project.toml index 90ee23de9..1a2b73f0c 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -65,7 +65,7 @@ MLUtils = "0.4.3" NNlib = "0.9.24" Octavian = "0.3.28" OneHotArrays = "0.2.5" -Optimisers = "0.3.3" +Optimisers = "0.3.3, 0.4" Pkg = "1.10" Preferences = "1.4.3" Random = "1.10"