diff --git a/.travis.yml b/.travis.yml index 59667b8..c882d67 100644 --- a/.travis.yml +++ b/.travis.yml @@ -3,13 +3,15 @@ os: - osx - linux julia: - - 0.6 + - 1.0 - nightly notifications: email: false matrix: allow_failures: - julia: nightly -script: - - if [[ -a .git/shallow ]]; then git fetch --unshallow; fi - - julia -e 'Pkg.clone(pwd()); Pkg.build("LowRankModels"); Pkg.test("LowRankModels"; coverage=true)'; +after_success: + # push coverage results to Coveralls + - julia -e 'import Pkg; Pkg.add("Coverage"); using Coverage; Coveralls.submit(Coveralls.process_folder())' + # push coverage results to Codecov + - julia -e 'import Pkg; Pkg.add("Coverage"); using Coverage; Codecov.submit(Codecov.process_folder())' diff --git a/Manifest.toml b/Manifest.toml new file mode 100644 index 0000000..4f6670d --- /dev/null +++ b/Manifest.toml @@ -0,0 +1,387 @@ +# This file is machine-generated - editing it directly is not advised + +[[Arpack]] +deps = ["BinaryProvider", "Libdl", "LinearAlgebra"] +git-tree-sha1 = "07a2c077bdd4b6d23a40342a8a108e2ee5e58ab6" +uuid = "7d9fca2a-8960-54d3-9f78-7d1dccf2cb97" +version = "0.3.1" + +[[Base64]] +uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f" + +[[BinDeps]] +deps = ["Compat", "Libdl", "SHA", "URIParser"] +git-tree-sha1 = "12093ca6cdd0ee547c39b1870e0c9c3f154d9ca9" +uuid = "9e28174c-4ba2-5203-b857-d8d62c4213ee" +version = "0.8.10" + +[[BinaryProvider]] +deps = ["Libdl", "Logging", "SHA"] +git-tree-sha1 = "c7361ce8a2129f20b0e05a89f7070820cfed6648" +uuid = "b99e7846-7c00-51b0-8f62-c81ae34c0232" +version = "0.5.6" + +[[CSV]] +deps = ["CategoricalArrays", "DataFrames", "Dates", "Mmap", "Parsers", "PooledArrays", "Profile", "Tables", "Unicode", "WeakRefStrings"] +git-tree-sha1 = "a7df9250dff3aba96436580dd6ac00d712364cab" +uuid = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b" +version = "0.5.9" + +[[Calculus]] +deps = ["Compat"] +git-tree-sha1 = "f60954495a7afcee4136f78d1d60350abd37a409" +uuid = "49dc2e85-a5d0-5ad3-a950-438e2897f1b9" +version = "0.4.1" + +[[CategoricalArrays]] +deps = ["Compat", "Future", "JSON", "Missings", "Printf", "Reexport"] +git-tree-sha1 = "26601961df6afacdd16d67c1eec6cfe75e5ae9ab" +uuid = "324d7699-5711-5eae-9e2f-1d82baa6b597" +version = "0.5.4" + +[[CodecZlib]] +deps = ["BinaryProvider", "Libdl", "Test", "TranscodingStreams"] +git-tree-sha1 = "36bbf5374c661054d41410dc53ff752972583b9b" +uuid = "944b1d66-785c-5afd-91f1-9de20f533193" +version = "0.5.2" + +[[CommonSubexpressions]] +deps = ["Test"] +git-tree-sha1 = "efdaf19ab11c7889334ca247ff4c9f7c322817b0" +uuid = "bbf7d656-a473-5ed7-a52c-81e309532950" +version = "0.2.0" + +[[Compat]] +deps = ["Base64", "Dates", "DelimitedFiles", "Distributed", "InteractiveUtils", "LibGit2", "Libdl", "LinearAlgebra", "Markdown", "Mmap", "Pkg", "Printf", "REPL", "Random", "Serialization", "SharedArrays", "Sockets", "SparseArrays", "Statistics", "Test", "UUIDs", "Unicode"] +git-tree-sha1 = "84aa74986c5b9b898b0d1acaf3258741ee64754f" +uuid = "34da2185-b29b-5c13-b0c7-acf172513d20" +version = "2.1.0" + +[[DataFrames]] +deps = ["CategoricalArrays", "Compat", "InvertedIndices", "IteratorInterfaceExtensions", "Missings", "PooledArrays", "Printf", "REPL", "Reexport", "SortingAlgorithms", "Statistics", "StatsBase", "TableTraits", "Tables", "Unicode"] +git-tree-sha1 = "def266a7e5eb6f633ef4c72633d2a328b9450052" +uuid = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" +version = "0.19.0" + +[[DataStructures]] +deps = ["InteractiveUtils", "OrderedCollections"] +git-tree-sha1 = "0809951a1774dc724da22d26e4289bbaab77809a" +uuid = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8" +version = "0.17.0" + +[[DataValueInterfaces]] +git-tree-sha1 = "bfc1187b79289637fa0ef6d4436ebdfe6905cbd6" +uuid = "e2d170a0-9d28-54be-80f0-106bbe20a464" +version = "1.0.0" + +[[Dates]] +deps = ["Printf"] +uuid = "ade2ca70-3891-5945-98fb-dc099432e06a" + +[[DelimitedFiles]] +deps = ["Mmap"] +uuid = "8bb1440f-4735-579b-a4ab-409b98df4dab" + +[[DiffEqDiffTools]] +deps = ["LinearAlgebra", "SparseArrays", "StaticArrays"] +git-tree-sha1 = "b992345a39b4d9681342ae795a8dacc100730182" +uuid = "01453d9d-ee7c-5054-8395-0335cb756afa" +version = "0.14.0" + +[[DiffResults]] +deps = ["Compat", "StaticArrays"] +git-tree-sha1 = "34a4a1e8be7bc99bc9c611b895b5baf37a80584c" +uuid = "163ba53b-c6d8-5494-b064-1a9d43ac40c5" +version = "0.0.4" + +[[DiffRules]] +deps = ["Random", "Test"] +git-tree-sha1 = "dc0869fb2f5b23466b32ea799bd82c76480167f7" +uuid = "b552c78f-8df3-52c6-915a-8e097449b14b" +version = "0.0.10" + +[[Distributed]] +deps = ["Random", "Serialization", "Sockets"] +uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b" + +[[EzXML]] +deps = ["BinaryProvider", "Libdl", "Printf"] +git-tree-sha1 = "724e13b7522563a18ae4a5cc4a9792ae3b0da3e6" +uuid = "8f5d6c58-4d21-5cfd-889c-e3ad7ee6a615" +version = "0.9.3" + +[[FileIO]] +deps = ["Pkg"] +git-tree-sha1 = "351f001a78aa1b7ad2696e386e110b5abd071c71" +uuid = "5789e2e9-d7fb-5bc7-8068-2c6fae9b9549" +version = "1.0.7" + +[[FillArrays]] +deps = ["LinearAlgebra", "Random", "SparseArrays", "Test"] +git-tree-sha1 = "9ab8f76758cbabba8d7f103c51dce7f73fcf8e92" +uuid = "1a297f60-69ca-5386-bcde-b61e274b549b" +version = "0.6.3" + +[[ForwardDiff]] +deps = ["CommonSubexpressions", "DiffResults", "DiffRules", "InteractiveUtils", "LinearAlgebra", "NaNMath", "Random", "SparseArrays", "SpecialFunctions", "StaticArrays", "Test"] +git-tree-sha1 = "4c4d727f1b7e0092134fabfab6396b8945c1ea5b" +uuid = "f6369f11-7733-5829-9624-2563aa707210" +version = "0.10.3" + +[[Future]] +deps = ["Random"] +uuid = "9fa8497b-333b-5362-9e8d-4d0656e87820" + +[[InteractiveUtils]] +deps = ["Markdown"] +uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240" + +[[InvertedIndices]] +deps = ["Test"] +git-tree-sha1 = "15732c475062348b0165684ffe28e85ea8396afc" +uuid = "41ab1584-1d38-5bbf-9106-f11c6c58b48f" +version = "1.0.0" + +[[IteratorInterfaceExtensions]] +git-tree-sha1 = "a3f24677c21f5bbe9d2a714f95dcd58337fb2856" +uuid = "82899510-4779-5014-852e-03e436cf321d" +version = "1.0.0" + +[[JSON]] +deps = ["Dates", "Mmap", "Parsers", "Unicode"] +git-tree-sha1 = "b34d7cef7b337321e97d22242c3c2b91f476748e" +uuid = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" +version = "0.21.0" + +[[LibGit2]] +uuid = "76f85450-5226-5b5a-8eaa-529ad045b433" + +[[Libdl]] +uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb" + +[[LineSearches]] +deps = ["LinearAlgebra", "NLSolversBase", "NaNMath", "Parameters", "Printf", "Test"] +git-tree-sha1 = "54eb90e8dbe745d617c78dee1d6ae95c7f6f5779" +uuid = "d3d80556-e9d4-5f37-9878-2ab0fcc64255" +version = "7.0.1" + +[[LinearAlgebra]] +deps = ["Libdl"] +uuid = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" + +[[Logging]] +uuid = "56ddb016-857b-54e1-b83d-db4d58db5568" + +[[Markdown]] +deps = ["Base64"] +uuid = "d6f4376e-aef5-505a-96c1-9c027394607a" + +[[Missings]] +deps = ["SparseArrays", "Test"] +git-tree-sha1 = "f0719736664b4358aa9ec173077d4285775f8007" +uuid = "e1d29d7a-bbdc-5cf2-9ac0-f12de2c33e28" +version = "0.4.1" + +[[Mmap]] +uuid = "a63ad114-7e13-5084-954f-fe012c677804" + +[[Mocking]] +deps = ["Compat", "Dates"] +git-tree-sha1 = "4bf69aaf823b119b034e091e16b18311aa191663" +uuid = "78c3b35d-d492-501b-9361-3d52fe80e533" +version = "0.5.7" + +[[NLSolversBase]] +deps = ["Calculus", "DiffEqDiffTools", "DiffResults", "Distributed", "ForwardDiff", "LinearAlgebra", "Random", "SparseArrays", "Test"] +git-tree-sha1 = "0c6f0e7f2178f78239cfb75310359eed10f2cacb" +uuid = "d41bc354-129a-5804-8e4c-c37616107c6c" +version = "7.3.1" + +[[NMF]] +deps = ["LinearAlgebra", "Printf", "Random", "Statistics", "StatsBase", "Test"] +git-tree-sha1 = "7292dbfbc5501f9a2ae3cd748551c5e500de5f0f" +uuid = "6ef6ca0d-6ad7-5ff6-b225-e928bfa0a386" +version = "0.4.0" + +[[NaNMath]] +deps = ["Compat"] +git-tree-sha1 = "ce3b85e484a5d4c71dd5316215069311135fa9f2" +uuid = "77ba4419-2d1f-58cd-9bb1-8ffee604a2e3" +version = "0.3.2" + +[[Optim]] +deps = ["Calculus", "DiffEqDiffTools", "FillArrays", "ForwardDiff", "LineSearches", "LinearAlgebra", "NLSolversBase", "NaNMath", "Parameters", "PositiveFactorizations", "Printf", "Random", "SparseArrays", "StatsBase"] +git-tree-sha1 = "05efd63b639afdd3425909cd3af73fe9c1373cf3" +uuid = "429524aa-4258-5aef-a3af-852621145aeb" +version = "0.19.1" + +[[OrderedCollections]] +deps = ["Random", "Serialization", "Test"] +git-tree-sha1 = "c4c13474d23c60d20a67b217f1d7f22a40edf8f1" +uuid = "bac558e1-5e72-5ebc-8fee-abe8a469f55d" +version = "1.1.0" + +[[Parameters]] +deps = ["Markdown", "OrderedCollections", "REPL", "Test"] +git-tree-sha1 = "70bdbfb2bceabb15345c0b54be4544813b3444e4" +uuid = "d96e819e-fc66-5662-9728-84c9c7592b0a" +version = "0.10.3" + +[[Parsers]] +deps = ["Dates", "Test"] +git-tree-sha1 = "db2b35dedab3c0e46dc15996d170af07a5ab91c9" +uuid = "69de0a69-1ddd-5017-9359-2bf0b02dc9f0" +version = "0.3.6" + +[[Pkg]] +deps = ["Dates", "LibGit2", "Markdown", "Printf", "REPL", "Random", "SHA", "UUIDs"] +uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" + +[[PooledArrays]] +git-tree-sha1 = "6e8c38927cb6e9ae144f7277c753714861b27d14" +uuid = "2dfb63ee-cc39-5dd5-95bd-886bf059d720" +version = "0.5.2" + +[[PositiveFactorizations]] +deps = ["LinearAlgebra", "Test"] +git-tree-sha1 = "957c3dd7c33895469760ce873082fbb6b3620641" +uuid = "85a6dd25-e78a-55b7-8502-1745935b8125" +version = "0.2.2" + +[[Printf]] +deps = ["Unicode"] +uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7" + +[[Profile]] +deps = ["Printf"] +uuid = "9abbd945-dff8-562f-b5e8-e1ebf5ef1b79" + +[[RData]] +deps = ["CategoricalArrays", "CodecZlib", "DataFrames", "Dates", "FileIO", "TimeZones"] +git-tree-sha1 = "3b0fc2f7df61b8890502851281c1eb0d2407d6ac" +uuid = "df47a6cb-8c03-5eed-afd8-b6050d6c41da" +version = "0.6.2" + +[[RDatasets]] +deps = ["CSV", "CodecZlib", "DataFrames", "FileIO", "Printf", "RData", "Reexport", "Test"] +git-tree-sha1 = "4d93a52b94397bf0a9477d04e7a151a688672695" +uuid = "ce6b1742-4840-55fa-b093-852dadbb1d8b" +version = "0.6.1" + +[[REPL]] +deps = ["InteractiveUtils", "Markdown", "Sockets"] +uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb" + +[[Random]] +deps = ["Serialization"] +uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" + +[[Reexport]] +deps = ["Pkg"] +git-tree-sha1 = "7b1d07f411bc8ddb7977ec7f377b97b158514fe0" +uuid = "189a3867-3050-52da-a836-e630ba90ab69" +version = "0.2.0" + +[[Requires]] +deps = ["Test"] +git-tree-sha1 = "f6fbf4ba64d295e146e49e021207993b6b48c7d1" +uuid = "ae029012-a4dd-5104-9daa-d747884805df" +version = "0.5.2" + +[[SHA]] +uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce" + +[[ScikitLearnBase]] +deps = ["LinearAlgebra", "Random", "Statistics"] +git-tree-sha1 = "7877e55c1523a4b336b433da39c8e8c08d2f221f" +uuid = "6e75b9c4-186b-50bd-896f-2d2496a4843e" +version = "0.5.0" + +[[Serialization]] +uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b" + +[[SharedArrays]] +deps = ["Distributed", "Mmap", "Random", "Serialization"] +uuid = "1a1011a3-84de-559e-8e89-a11a2f7dc383" + +[[Sockets]] +uuid = "6462fe0b-24de-5631-8697-dd941f90decc" + +[[SortingAlgorithms]] +deps = ["DataStructures", "Random", "Test"] +git-tree-sha1 = "03f5898c9959f8115e30bc7226ada7d0df554ddd" +uuid = "a2af1166-a08f-5f64-846c-94a0d3cef48c" +version = "0.3.1" + +[[SparseArrays]] +deps = ["LinearAlgebra", "Random"] +uuid = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" + +[[SpecialFunctions]] +deps = ["BinDeps", "BinaryProvider", "Libdl", "Test"] +git-tree-sha1 = "0b45dc2e45ed77f445617b99ff2adf0f5b0f23ea" +uuid = "276daf66-3868-5448-9aa4-cd146d93841b" +version = "0.7.2" + +[[StaticArrays]] +deps = ["LinearAlgebra", "Random", "Statistics"] +git-tree-sha1 = "db23bbf50064c582b6f2b9b043c8e7e98ea8c0c6" +uuid = "90137ffa-7385-5640-81b9-e52037218182" +version = "0.11.0" + +[[Statistics]] +deps = ["LinearAlgebra", "SparseArrays"] +uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" + +[[StatsBase]] +deps = ["DataStructures", "LinearAlgebra", "Missings", "Printf", "Random", "SortingAlgorithms", "SparseArrays", "Statistics"] +git-tree-sha1 = "2b6ca97be7ddfad5d9f16a13fe277d29f3d11c23" +uuid = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" +version = "0.31.0" + +[[TableTraits]] +deps = ["IteratorInterfaceExtensions"] +git-tree-sha1 = "b1ad568ba658d8cbb3b892ed5380a6f3e781a81e" +uuid = "3783bdb8-4a98-5b6b-af9a-565f29a5fe9c" +version = "1.0.0" + +[[Tables]] +deps = ["DataValueInterfaces", "IteratorInterfaceExtensions", "LinearAlgebra", "Requires", "TableTraits", "Test"] +git-tree-sha1 = "2e5d1a0d9b574ee2ed0c1a2fe32807de022376dd" +uuid = "bd369af6-aec1-5ad0-b16a-f7cc5008161c" +version = "0.2.9" + +[[Test]] +deps = ["Distributed", "InteractiveUtils", "Logging", "Random"] +uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40" + +[[TimeZones]] +deps = ["Dates", "EzXML", "Mocking", "Printf", "Serialization", "Unicode"] +git-tree-sha1 = "859bfc1832ea52e413c96fa5c92130516db62bdb" +uuid = "f269a46b-ccf7-5d73-abea-4c690281aa53" +version = "0.9.1" + +[[TranscodingStreams]] +deps = ["Random", "Test"] +git-tree-sha1 = "a25d8e5a28c3b1b06d3859f30757d43106791919" +uuid = "3bb67fe8-82b1-5028-8e26-92a6c54297fa" +version = "0.9.4" + +[[URIParser]] +deps = ["Test", "Unicode"] +git-tree-sha1 = "6ddf8244220dfda2f17539fa8c9de20d6c575b69" +uuid = "30578b45-9adc-5946-b283-645ec420af67" +version = "0.4.0" + +[[UUIDs]] +deps = ["Random", "SHA"] +uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4" + +[[Unicode]] +uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5" + +[[WeakRefStrings]] +deps = ["Random", "Test"] +git-tree-sha1 = "9a0bb82eede528debe631b642eeb48a631a69bc2" +uuid = "ea10d353-3f73-51f8-a26c-33c1cb351aa5" +version = "0.6.1" diff --git a/Project.toml b/Project.toml new file mode 100644 index 0000000..7095f74 --- /dev/null +++ b/Project.toml @@ -0,0 +1,24 @@ +name = "LowRankModels" +uuid = "15d4e49f-4837-5ea3-a885-5b28bfa376dc" +keywords = ["matrix factorization", "missing data", "imputation"] +license = "MIT" +version = "1.0.0" + +[deps] +Arpack = "7d9fca2a-8960-54d3-9f78-7d1dccf2cb97" +DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" +LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" +NMF = "6ef6ca0d-6ad7-5ff6-b225-e928bfa0a386" +Optim = "429524aa-4258-5aef-a3af-852621145aeb" +Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" +Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" +ScikitLearnBase = "6e75b9c4-186b-50bd-896f-2d2496a4843e" +SharedArrays = "1a1011a3-84de-559e-8e89-a11a2f7dc383" +SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" +StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" + +[extras] +RDatasets = "ce6b1742-4840-55fa-b093-852dadbb1d8b" + +[targets] +test = ["RDatasets"] diff --git a/README.md b/README.md index c9340c4..da7907d 100644 --- a/README.md +++ b/README.md @@ -19,7 +19,7 @@ to construct a model suitable for a particular data set. In particular, it supports * using different loss functions for different columns of the data array, - which is useful when data types are heterogeneous + which is useful when data mutable structs are heterogeneous (eg, real, boolean, and ordinal columns); * fitting the model to only *some* of the entries in the table, which is useful for data tables with many missing (unobserved) entries; and * adding offsets and scalings to the model without destroying sparsity, @@ -47,7 +47,7 @@ The data is modeled as `X'*Y`, where `X` is a `k`x`m` matrix and `Y` is a `k`x`n minimize sum_{(i,j) in obs} losses[j]((X'*Y)[i,j], A[i,j]) + sum_i rx(X[:,i]) + sum_j ry(Y[:,j]) -The basic type used by LowRankModels.jl is the GLRM. To form a GLRM, +The basic mutable struct used by LowRankModels.jl is the GLRM. To form a GLRM, the user specifies * the data `A` (any `AbstractArray`, such as an array, a sparse matrix, or a data frame) @@ -68,7 +68,7 @@ see the discussion of [sparse matrices](https://github.com/madeleineudell/LowRan `X₀` and `Y₀` are initialization matrices that represent a starting guess for the optimization. -Losses and regularizers must be of type `Loss` and `Regularizer`, respectively, +Losses and regularizers must be of mutable struct `Loss` and `Regularizer`, respectively, and may be chosen from a list of supported losses and regularizers, which include Losses: @@ -99,7 +99,7 @@ Regularizers: Each of these losses and regularizers can be scaled (for example, to increase the importance of the loss relative to the regularizer) -by calling `scale!(loss, newscale)`. +by calling `mul!(loss, newscale)`. Users may also implement their own losses and regularizers, or adjust internal parameters of the losses and regularizers; see [losses.jl](https://github.com/madeleineudell/LowRankModels.jl/blob/src/losses.jl) and [regularizers.jl](https://github.com/madeleineudell/LowRankModels.jl/blob/master/src/regularizers.jl) for more details. @@ -151,7 +151,7 @@ Then initialize the model using GLRM(A,losses,rx,ry,k, obs=obs) If `A` is a DataFrame and you just want the model to ignore -any entry that is of type `NA`, you can use +any entry that is of mutable struct `NA`, you can use obs = observations(A) @@ -219,7 +219,7 @@ and ordinal HingeLoss loss for integer columns, a small amount of QuadLoss regularization, and scaling and adding an offset to the model as described [here](#scaling). It returns the column labels for the columns it fit, along with the model. -Right now, all other data types are ignored. +Right now, all other data mutable structs are ignored. `NaN` values are treated as missing values (`NA`s) and ignored in the fit. The full call signature is @@ -232,7 +232,7 @@ function GLRM(df::DataFrame, k::Int; You can modify the losses or regularizers, or turn off offsets or scaling, using these keyword arguments. -Or to specify a map from data types to losses, define a new loss_map from datatypes to losses (like probabilistic_losses, below): +Or to specify a map from data mutable structs to losses, define a new loss_map from datatypes to losses (like probabilistic_losses, below): ``` probabilistic_losses = Dict{Symbol, Any}( :real => QuadLoss, @@ -413,7 +413,7 @@ These functions should help you choose adequate regularization for your model. ## Regularization paths -* `regularization_path(glrm::GLRM; params=Params(), reg_params=logspace(2,-2,5), holdout_proportion=.1, verbose=true, ch::ConvergenceHistory=ConvergenceHistory("reg_path"))`: computes the train and test error for GLRMs varying the scaling of the regularization through any scaling factor in the array `reg_params`. +* `regularization_path(glrm::GLRM; params=Params(), reg_params=exp10.(range(2,stop=-2,length=5)), holdout_proportion=.1, verbose=true, ch::ConvergenceHistory=ConvergenceHistory("reg_path"))`: computes the train and test error for GLRMs varying the scaling of the regularization through any scaling factor in the array `reg_params`. ## Utilities diff --git a/REQUIRE b/REQUIRE index 9b243f4..b3be9ef 100644 --- a/REQUIRE +++ b/REQUIRE @@ -1,10 +1,9 @@ -julia 0.6 +julia 1.0 +LinearAlgebra +Random DataFrames StatsBase Optim NMF -Compat ScikitLearnBase -DataArrays -FactCheck -Missings +Arpack diff --git a/examples/censored.jl b/examples/censored.jl index 1a233c3..96213e2 100644 --- a/examples/censored.jl +++ b/examples/censored.jl @@ -1,6 +1,6 @@ -using DataFrames, LowRankModels, Compat +using LowRankModels, DataFrames, Random, StatsBase -srand(0) +Random.seed!(0) println("censored data example") # boolean example with only entries greater than threshold t observed @@ -11,7 +11,7 @@ A = rand(m,ktrue)*rand(ktrue,n) B = round.(Int, ktrue*rand(m,n) .>= A) # Bernoulli samples with probability proportional to A losses = fill(QuadLoss(),n) r = QuadReg(.1) -@compat obs = @compat Array{Tuple{Int,Int}}(0) +obs = Array{Tuple{Int,Int}}(undef,0) for i=1:m for j=1:n if B[i,j] == 1 @@ -21,27 +21,27 @@ for i=1:m end (train_observed_features, train_observed_examples, test_observed_features, test_observed_examples) = - get_train_and_test(obs, m, n, .2) + get_train_and_test(obs, m, n, .2) train_glrm = GLRM(B,losses,r,r,k, observed_features=train_observed_features, observed_examples=train_observed_examples,) test_glrm = GLRM(B,losses,r,r,k, observed_features=test_observed_features, observed_examples=test_observed_examples) -function censored_regularization_path(train_glrm::GLRM, test_glrm::GLRM; params=Params(), reg_params=logspace(2,-2,5), +function censored_regularization_path(train_glrm::GLRM, test_glrm::GLRM; params=Params(), reg_params=exp10.(range(2,stop=-2,length=5)), holdout_proportion=.1, verbose=true, ch::ConvergenceHistory=ConvergenceHistory("reg_path")) m,n = size(train_glrm.A) ntrain = sum(map(length, train_glrm.observed_features)) ntest = sum(map(length, test_glrm.observed_features)) - train_error = @compat Array{Float64}(length(reg_params)) - test_error = @compat Array{Float64}(length(reg_params)) - @compat solution = @compat Array{Tuple{Float64,Float64}}(length(reg_params)) - train_time = @compat Array{Float64}(length(reg_params)) + train_error = Array{Float64}(undef,length(reg_params)) + test_error = Array{Float64}(undef,length(reg_params)) + solution = Array{Tuple{Float64,Float64}}(undef, length(reg_params)) + train_time = Array{Float64}(undef, length(reg_params)) for iparam=1:length(reg_params) reg_param = reg_params[iparam] # evaluate train and test error if verbose println("fitting train GLRM for reg_param $reg_param") end - scale!(train_glrm.rx, reg_param) - scale!(train_glrm.ry, reg_param) + mul!(train_glrm.rx, reg_param) + mul!(train_glrm.ry, reg_param) train_glrm.X, train_glrm.Y = randn(train_glrm.k,m), randn(train_glrm.k,n) X, Y, ch = fit!(train_glrm; params=params, ch=ch, verbose=verbose) train_time[iparam] = ch.times[end] @@ -56,10 +56,11 @@ function censored_regularization_path(train_glrm::GLRM, test_glrm::GLRM; params= return train_error, test_error, train_time, reg_params, solution end -train_error, test_error, train_time, reg_params, solution = +(train_error, test_error, train_time, reg_params, solution) = censored_regularization_path(train_glrm, test_glrm, params=ProxGradParams(1, max_iter=50, abs_tol=.001, min_stepsize=.1), - reg_params=logspace(2,-2,3)) + reg_params=exp10.(range(2, stop=-2, length=3)) ) + df = DataFrame(train_error = train_error, test_error = test_error, train_time = train_time, reg_param = reg_params, solution_1norm = [s[2] for s in solution]) println(df) diff --git a/examples/cross_validate.jl b/examples/cross_validate.jl index 4ced365..d576aca 100644 --- a/examples/cross_validate.jl +++ b/examples/cross_validate.jl @@ -1,7 +1,7 @@ -using DataFrames, LowRankModels +using DataFrames, LowRankModels, Random, SparseArrays println("cross validation example") -srand(5) +Random.seed!(5) do_cv = true do_cv_by_iter = true @@ -31,7 +31,7 @@ if do_reg_path println("Computing regularization path") params = Params(1.0, max_iter=50, abs_tol=.00001, min_stepsize=.01) train_error, test_error, train_time, reg_params = - regularization_path(glrm, params=params, reg_params=logspace(2,-2,15)) + regularization_path(glrm, params=params, reg_params=exp10.(range(2,stop=-2,length=15))) df = DataFrame(train_error = train_error, test_error = test_error, train_time = train_time, reg_param = reg_params) if do_plot diff --git a/examples/fit_rdataset.jl b/examples/fit_rdataset.jl index ce92501..6600a52 100644 --- a/examples/fit_rdataset.jl +++ b/examples/fit_rdataset.jl @@ -5,11 +5,11 @@ using LowRankModels # pick a data set df = RDatasets.dataset("psych", "msq") -# make a GLRM on the whole dataframe using type imputation +# make a GLRM on the whole dataframe using mutable struct imputation auto_glrm = GLRM(df, 3) -# now we'll try it without type imputation -# we'll just fit four of the columns, to try out all four data types +# now we'll try it without mutable struct imputation +# we'll just fit four of the columns, to try out all four data mutable structs dd = DataFrame([df[s] for s in [:TOD, :Scale, :Vigorous, :Wakeful]]) dd[end] = (dd[end].==1) datatypes = [:real, :cat, :ord, :bool] diff --git a/examples/precision_at_k.jl b/examples/precision_at_k.jl index 3ac86be..7976228 100644 --- a/examples/precision_at_k.jl +++ b/examples/precision_at_k.jl @@ -1,4 +1,4 @@ -using DataFrames, LowRankModels, Compat +using DataFrames, LowRankModels # boolean example with only entries greater than threshold t observed # ie, censored data @@ -9,7 +9,7 @@ println("max value of A is ",maximum(maximum(A))," which is less than $ktrue") B = round.(Int, ktrue*rand(m,n) .>= A) # Bernoulli samples with probability proportional to A losses = fill(QuadLoss(),n) r = QuadReg(.1) -obs = @compat Array{Tuple{Int,Int}}(0) +obs = Array{Tuple{Int,Int}}(undef, 0) for i=1:m for j=1:n if B[i,j] == 1 @@ -24,8 +24,9 @@ end train_glrm = GLRM(B,losses,r,r,k, observed_features=train_observed_features, observed_examples=train_observed_examples) train_error, test_error, prec_at_k, train_time, reg_params, solution = - precision_at_k(train_glrm, test_observed_features, params=Params(1,200,.00001,.01), - reg_params=logspace(2,-2,9)) + precision_at_k(train_glrm, test_observed_features, + params=Params(1, max_iter=200, abs_tol=0.00001, min_stepsize=0.01), + reg_params=exp10.(range(2,step=-2,length=9))) df = DataFrame(train_error = train_error, prec_at_k = prec_at_k, - train_time = train_time, reg_param = reg_params, solution_1norm = [s[2] for s in solution]) + train_time = train_time, reg_param = reg_params, solution_1norm = [s[2] for s in solution]); println(df) diff --git a/examples/sharetest.jl b/examples/sharetest.jl index 4a4c95c..87b2e6e 100644 --- a/examples/sharetest.jl +++ b/examples/sharetest.jl @@ -3,7 +3,7 @@ println("loading LowRankModels") function fit_pca(m,n,k) # matrix to encode - srand(1) + Random.seed!(1) A = randn(m,k)*randn(k,n) X=randn(k,m) Y=randn(k,n) @@ -19,5 +19,5 @@ function fit_pca(m,n,k) return A,X,Y,ch end -@everywhere srand(1) +@everywhere Random.seed!(1) fit_pca(100,100,50) \ No newline at end of file diff --git a/examples/simple_glrms.jl b/examples/simple_glrms.jl index 7ade066..e563ae6 100644 --- a/examples/simple_glrms.jl +++ b/examples/simple_glrms.jl @@ -100,7 +100,7 @@ function fit_soft_kmeans(m,n,k) end if true - srand(10) + Random.seed!(10) fit_pca(100,100,2) fit_pca_nucnorm(100,100,2) fit_pca_nucnorm_sparse(500,500,2,10000) diff --git a/src/LowRankModels.jl b/src/LowRankModels.jl index c2cd908..edc0191 100644 --- a/src/LowRankModels.jl +++ b/src/LowRankModels.jl @@ -2,11 +2,14 @@ __precompile__() module LowRankModels -using Compat -using Missings - -import Base: scale!, show -import StatsBase: fit!, mode +using Printf +using SharedArrays +using SparseArrays +using Random + +import LinearAlgebra: dot, norm, Diagonal, rmul!, mul! +import Base: show +import StatsBase: fit!, mode, mean, var, std # define losses, regularizers, convergence history include("domains.jl") diff --git a/src/algorithms/proxgrad.jl b/src/algorithms/proxgrad.jl index 626dfd3..98bf399 100644 --- a/src/algorithms/proxgrad.jl +++ b/src/algorithms/proxgrad.jl @@ -1,7 +1,7 @@ ### Proximal gradient method export ProxGradParams, fit! -type ProxGradParams<:AbstractParams +mutable struct ProxGradParams<:AbstractParams stepsize::Float64 # initial stepsize max_iter::Int # maximum number of outer iterations inner_iter_X::Int # how many prox grad steps to take on X before moving on to Y (and vice versa) @@ -42,7 +42,7 @@ function fit!(glrm::GLRM, params::ProxGradParams; ry = glrm.ry X = glrm.X; Y = glrm.Y # check that we didn't initialize to zero (otherwise we will never move) - if vecnorm(Y) == 0 + if norm(Y) == 0 Y = .1*randn(k,d) end k = glrm.k @@ -62,7 +62,7 @@ function fit!(glrm::GLRM, params::ProxGradParams; glrm.Y = randn(glrm.k, d) end - XY = @compat Array{Float64}((m, d)) + XY = Array{Float64}(undef, (m, d)) gemm!('T','N',1.0,X,Y,0.0,XY) # XY = X' * Y initial calculation # step size (will be scaled below to ensure it never exceeds 1/\|g\|_2 or so for any subproblem) @@ -116,7 +116,7 @@ function fit!(glrm::GLRM, params::ProxGradParams; for inneri=1:params.inner_iter_X for e=1:m # for every example x_e == ve[e] - scale!(g, 0) # reset gradient to 0 + fill!(g, 0.) # reset gradient to 0 # compute gradient of L with respect to Xᵢ as follows: # ∇{Xᵢ}L = Σⱼ dLⱼ(XᵢYⱼ)/dXᵢ for f in glrm.observed_features[e] @@ -141,11 +141,11 @@ function fit!(glrm::GLRM, params::ProxGradParams; ## prox step: Xᵢ = prox_rx(Xᵢ, α/l) prox!(rx[e],newve[e],stepsize) if row_objective(glrm, e, newve[e]) < obj_by_row[e] - copy!(ve[e], newve[e]) + copyto!(ve[e], newve[e]) alpharow[e] *= 1.05 break else # the stepsize was too big; undo and try again only smaller - copy!(newve[e], ve[e]) + copyto!(newve[e], ve[e]) alpharow[e] *= .7 if alpharow[e] < params.min_stepsize alpharow[e] = params.min_stepsize * 1.1 @@ -158,7 +158,7 @@ function fit!(glrm::GLRM, params::ProxGradParams; end # inner iteration # STEP 2: Y update for inneri=1:params.inner_iter_Y - scale!(G, 0) + fill!(G, 0.) for f=1:n # compute gradient of L with respect to Yⱼ as follows: # ∇{Yⱼ}L = Σⱼ dLⱼ(XᵢYⱼ)/dYⱼ @@ -185,12 +185,12 @@ function fit!(glrm::GLRM, params::ProxGradParams; prox!(ry[f],newvf[f],stepsize) new_obj_by_col = col_objective(glrm, f, newvf[f]) if new_obj_by_col < obj_by_col[f] - copy!(vf[f], newvf[f]) + copyto!(vf[f], newvf[f]) alphacol[f] *= 1.05 obj_by_col[f] = new_obj_by_col break else - copy!(newvf[f], vf[f]) + copyto!(newvf[f], vf[f]) alphacol[f] *= .7 if alphacol[f] < params.min_stepsize alphacol[f] = params.min_stepsize * 1.1 diff --git a/src/algorithms/proxgrad_multithread.jl b/src/algorithms/proxgrad_multithread.jl index 0204a45..5f0ff33 100644 --- a/src/algorithms/proxgrad_multithread.jl +++ b/src/algorithms/proxgrad_multithread.jl @@ -1,7 +1,7 @@ ### Proximal gradient method export ProxGradParams, fit! -type ProxGradParams<:AbstractParams +mutable struct ProxGradParams<:AbstractParams stepsize::Float64 # initial stepsize max_iter::Int # maximum number of outer iterations inner_iter_X::Int # how many prox grad steps to take on X before moving on to Y (and vice versa) @@ -42,7 +42,7 @@ function fit!(glrm::GLRM, params::ProxGradParams; ry = glrm.ry X = glrm.X; Y = glrm.Y # check that we didn't initialize to zero (otherwise we will never move) - if vecnorm(Y) == 0 + if norm(Y) == 0 Y = .1*randn(k,d) end k = glrm.k @@ -62,7 +62,7 @@ function fit!(glrm::GLRM, params::ProxGradParams; glrm.Y = randn(glrm.k, d) end - XY = @compat Array{Float64}((m, d)) + XY = Array{Float64}(undef, (m, d)) gemm!('T','N',1.0,X,Y,0.0,XY) # XY = X' * Y initial calculation # step size (will be scaled below to ensure it never exceeds 1/\|g\|_2 or so for any subproblem) @@ -116,7 +116,8 @@ function fit!(glrm::GLRM, params::ProxGradParams; for inneri=1:params.inner_iter_X Threads.@threads for e=1:m # for every example x_e == ve[e] - scale!(g[Threads.threadid()], 0) # reset gradient to 0 + # for e=1:m # for every example x_e == ve[e] + g[Threads.threadid()] .= 0 # reset gradient to 0 # compute gradient of L with respect to Xᵢ as follows: # ∇{Xᵢ}L = Σⱼ dLⱼ(XᵢYⱼ)/dXᵢ for f in glrm.observed_features[e] @@ -141,11 +142,11 @@ function fit!(glrm::GLRM, params::ProxGradParams; ## prox step: Xᵢ = prox_rx(Xᵢ, α/l) prox!(rx[e],newve[e],stepsize) if row_objective(glrm, e, newve[e]) < obj_by_row[e] - copy!(ve[e], newve[e]) + copyto!(ve[e], newve[e]) alpharow[e] *= 1.05 # choose a more aggressive stepsize break else # the stepsize was too big; undo and try again only smaller - copy!(newve[e], ve[e]) + copyto!(newve[e], ve[e]) alpharow[e] *= .7 # choose a less aggressive stepsize if alpharow[e] < params.min_stepsize alpharow[e] = params.min_stepsize * 1.1 @@ -158,8 +159,9 @@ function fit!(glrm::GLRM, params::ProxGradParams; end # inner iteration # STEP 2: Y update for inneri=1:params.inner_iter_Y - scale!(G, 0) + G .= 0 Threads.@threads for f=1:n + # for f=1:n # compute gradient of L with respect to Yⱼ as follows: # ∇{Yⱼ}L = Σⱼ dLⱼ(XᵢYⱼ)/dYⱼ for e in glrm.observed_examples[f] @@ -185,12 +187,12 @@ function fit!(glrm::GLRM, params::ProxGradParams; prox!(ry[f],newvf[f],stepsize) new_obj_by_col = col_objective(glrm, f, newvf[f]) if new_obj_by_col < obj_by_col[f] - copy!(vf[f], newvf[f]) + copyto!(vf[f], newvf[f]) alphacol[f] *= 1.05 obj_by_col[f] = new_obj_by_col break else - copy!(newvf[f], vf[f]) + copyto!(newvf[f], vf[f]) alphacol[f] *= .7 if alphacol[f] < params.min_stepsize alphacol[f] = params.min_stepsize * 1.1 diff --git a/src/algorithms/quad_streaming.jl b/src/algorithms/quad_streaming.jl index 5b7a55a..8408645 100644 --- a/src/algorithms/quad_streaming.jl +++ b/src/algorithms/quad_streaming.jl @@ -4,7 +4,7 @@ export StreamingParams, streaming_fit!, streaming_impute! -type StreamingParams<:AbstractParams +mutable struct StreamingParams<:AbstractParams T0::Int # number of rows to use to initialize Y before streaming begins stepsize::Float64 # stepsize (inverse of memory) Y_update_interval::Int # how often to prox Y @@ -139,7 +139,7 @@ function streaming_impute!(glrm::GLRM, params::StreamingParams=StreamingParams() end """ Constructs new GLRM on subset of rows of the data from input glrm """ -function keep_rows(glrm, r::Range{Int}) +function keep_rows(glrm, r::UnitRange{Int}) @assert maximum(r) <= size(glrm.A, 1) obs = flatten_observations(glrm.observed_features) first_row = minimum(r) diff --git a/src/algorithms/sparse_proxgrad.jl b/src/algorithms/sparse_proxgrad.jl index bc44b07..e0702cb 100644 --- a/src/algorithms/sparse_proxgrad.jl +++ b/src/algorithms/sparse_proxgrad.jl @@ -1,7 +1,7 @@ ### Proximal gradient method export SparseProxGradParams, fit! -type SparseProxGradParams<:AbstractParams +mutable struct SparseProxGradParams<:AbstractParams stepsize::Float64 # initial stepsize max_iter::Int # maximum number of outer iterations inner_iter::Int # how many prox grad steps to take on X before moving on to Y (and vice versa) @@ -60,7 +60,7 @@ function fit!(glrm::GLRM, params::SparseProxGradParams; # STEP 1: X update for inneri=1:params.inner_iter for e=1:m # doing this means looping over XY in row-major order, but otherwise we couldn't parallelize over Xᵢs - scale!(g, 0)# reset gradient to 0 + mul!(g, 0)# reset gradient to 0 # compute gradient of L with respect to Xᵢ as follows: # ∇{Xᵢ}L = Σⱼ dLⱼ(XᵢYⱼ)/dXᵢ for f in glrm.observed_features[e] @@ -71,7 +71,7 @@ function fit!(glrm::GLRM, params::SparseProxGradParams; end # take a proximal gradient step l = length(glrm.observed_features[e]) + 1 - scale!(g, -alpha/l) + mul!(g, -alpha/l) ## gradient step: Xᵢ += -(α/l) * ∇{Xᵢ}L axpy!(1,g,ve[e]) ## prox step: Xᵢ = prox_rx(Xᵢ, α/l) @@ -81,7 +81,7 @@ function fit!(glrm::GLRM, params::SparseProxGradParams; # STEP 2: Y update for inneri=1:params.inner_iter for f=1:n - scale!(g, 0) # reset gradient to 0 + mul!(g, 0) # reset gradient to 0 # compute gradient of L with respect to Yⱼ as follows: # ∇{Yⱼ}L = Σⱼ dLⱼ(XᵢYⱼ)/dYⱼ for e in glrm.observed_examples[f] @@ -91,7 +91,7 @@ function fit!(glrm::GLRM, params::SparseProxGradParams; end # take a proximal gradient step l = length(glrm.observed_examples[f]) + 1 - scale!(g, -alpha/l) + mul!(g, -alpha/l) ## gradient step: Yⱼ += -(α/l) * ∇{Yⱼ}L axpy!(1,g,vf[f]) ## prox step: Yⱼ = prox_ryⱼ(Yⱼ, α/l) diff --git a/src/convergence.jl b/src/convergence.jl index 8694cfc..bcaf039 100644 --- a/src/convergence.jl +++ b/src/convergence.jl @@ -1,6 +1,6 @@ export ConvergenceHistory, update_ch! -type ConvergenceHistory +mutable struct ConvergenceHistory name::AbstractString objective::Array dual_objective::Array diff --git a/src/cross_validate.jl b/src/cross_validate.jl index 4ae6409..5f0019e 100644 --- a/src/cross_validate.jl +++ b/src/cross_validate.jl @@ -17,10 +17,10 @@ function cross_validate(glrm::AbstractGLRM; obs = flatten_observations(glrm.observed_features) if verbose println("computing CV folds") end folds = getfolds(obs, nfolds, size(glrm.A)..., do_check = do_obs_check) - train_glrms = Array{typeof(glrm)}(nfolds) - test_glrms = Array{typeof(glrm)}(nfolds) - train_error = @compat Array{Float64}(nfolds) - test_error = @compat Array{Float64}(nfolds) + train_glrms = Array{typeof(glrm)}(undef, nfolds) + test_glrms = Array{typeof(glrm)}(undef, nfolds) + train_error = Array{Float64}(undef, nfolds) + test_error = Array{Float64}(undef, nfolds) for ifold=1:use_folds if verbose println("\nforming train and test GLRM for fold $ifold") end train_observed_features, train_observed_examples, test_observed_features, test_observed_examples = folds[ifold] @@ -52,12 +52,12 @@ function cross_validate(glrm::AbstractGLRM; return train_error, test_error, train_glrms, test_glrms end -@compat function getfolds(obs::Array{Tuple{Int,Int},1}, nfolds, m, n; ntrials = 5, do_check = true) +function getfolds(obs::Array{Tuple{Int,Int},1}, nfolds, m, n; ntrials = 5, do_check = true) # partition elements of obs into nfolds groups - groups = @compat Array{Int}(size(obs)) + groups = Array{Int}(undef, size(obs)) rand!(groups, 1:nfolds) # fill an array with random 1 through N # create the training and testing observations for each fold - folds = @compat Array{Tuple}(nfolds) + folds = Array{Tuple}(undef, nfolds) for itrial = 1:ntrials enough_observations = 0 for ifold=1:nfolds @@ -89,7 +89,7 @@ end function get_train_and_test(obs, m, n, holdout_proportion=.1) # generate random uniform number for each observation - groups = @compat Array{Float64}(size(obs)) + groups = Array{Float64}(undef, size(obs)) rand!(groups) # create the training and testing observations @@ -103,7 +103,7 @@ function get_train_and_test(obs, m, n, holdout_proportion=.1) end function flatten_observations(observed_features::ObsArray) - obs = @compat Array{Tuple{Int,Int}}(0) + obs = Array{Tuple{Int,Int}}(undef, 0) for (i, features_in_example_i) in enumerate(observed_features) for j in features_in_example_i push!(obs, (i,j)) @@ -124,7 +124,7 @@ function flatten(x, y) end y end -flatten{T}(x::Array{T})=flatten(x,Array(T, 0)) +flatten(x::Array{T}) where T=flatten(x,Array(T, 0)) function flattenarray(x, y) if typeof(x)<:Array @@ -136,7 +136,7 @@ function flattenarray(x, y) end y end -flattenarray{T}(x::Array{T})=flattenarray(x,Array(T, 0)) +flattenarray(x::Array{T}) where T=flattenarray(x,Array(T, 0)) function cv_by_iter(glrm::AbstractGLRM, holdout_proportion=.1, params=Params(100,max_iter=1,abs_tol=.01,min_stepsize=.01), @@ -163,8 +163,8 @@ function cv_by_iter(glrm::AbstractGLRM, holdout_proportion=.1, niters = params.max_iter params.max_iter = 1 - train_error = @compat Array{Float64}(niters) - test_error = @compat Array{Float64}(niters) + train_error = Array{Float64}(undef, niters) + test_error = Array{Float64}(undef, niters) if verbose @printf("%12s%12s%12s\n", "train error", "test error", "time") t0 = time() @@ -181,7 +181,7 @@ function cv_by_iter(glrm::AbstractGLRM, holdout_proportion=.1, return train_error, test_error end -function regularization_path(glrm::AbstractGLRM; params=Params(), reg_params=logspace(2,-2,5), +function regularization_path(glrm::AbstractGLRM; params=Params(), reg_params=exp10.(range(2,stop=-2,length=5)), holdout_proportion=.1, verbose=true, ch::ConvergenceHistory=ConvergenceHistory("reg_path")) if verbose println("flattening observations") end @@ -211,16 +211,16 @@ end # For each value of the regularization parameter, # compute the training error, ie, average error (sum over (i,j) in train_glrm.obs of L_j(A_ij, x_i y_j)) # and the test error, ie, average error (sum over (i,j) in test_glrm.obs of L_j(A_ij, x_i y_j)) -function regularization_path(train_glrm::AbstractGLRM, test_glrm::AbstractGLRM; params=Params(), reg_params=logspace(2,-2,5), +function regularization_path(train_glrm::AbstractGLRM, test_glrm::AbstractGLRM; params=Params(), reg_params=exp10.(range(2,stop=-2,length=5)), verbose=true, ch::ConvergenceHistory=ConvergenceHistory("reg_path")) - train_error = @compat Array{Float64}(length(reg_params)) - test_error = @compat Array{Float64}(length(reg_params)) + train_error = Array{Float64}(undef, length(reg_params)) + test_error = Array{Float64}(undef, length(reg_params)) ntrain = sum(map(length, train_glrm.observed_features)) ntest = sum(map(length, test_glrm.observed_features)) if verbose println("training model on $ntrain samples and testing on $ntest") end @show params - train_time = @compat Array{Float64}(length(reg_params)) + train_time = Array{Float64}(undef, length(reg_params)) for iparam=1:length(reg_params) reg_param = reg_params[iparam] # evaluate train and test error @@ -240,18 +240,18 @@ function regularization_path(train_glrm::AbstractGLRM, test_glrm::AbstractGLRM; end -function precision_at_k(train_glrm::GLRM, test_observed_features; params=Params(), reg_params=logspace(2,-2,5), +function precision_at_k(train_glrm::GLRM, test_observed_features; params=Params(), reg_params=exp10.(range(2,stop=-2,length=5)), holdout_proportion=.1, verbose=true, ch::ConvergenceHistory=ConvergenceHistory("reg_path"), kprec=10) m,n = size(train_glrm.A) ntrain = sum(map(length, train_glrm.observed_features)) ntest = sum(map(length, test_observed_features)) train_observed_features = train_glrm.observed_features - train_error = @compat Array{Float64}(length(reg_params)) - test_error = @compat Array{Float64}(length(reg_params)) - prec_at_k = @compat Array{Float64}(length(reg_params)) - @compat solution = @compat Array{Tuple{Float64,Float64}}(length(reg_params)) - train_time = @compat Array{Float64}(length(reg_params)) + train_error = Array{Float64}(undef, length(reg_params)) + test_error = Array{Float64}(undef, length(reg_params)) + prec_at_k = Array{Float64}(undef, length(reg_params)) + solution = Array{Tuple{Float64,Float64}}(undef, length(reg_params)) + train_time = Array{Float64}(undef, length(reg_params)) test_glrm = GLRM(train_glrm.A, train_glrm.losses, train_glrm.rx, train_glrm.ry, train_glrm.k, X=copy(train_glrm.X), Y=copy(train_glrm.Y), observed_features = test_observed_features) @@ -259,8 +259,8 @@ function precision_at_k(train_glrm::GLRM, test_observed_features; params=Params( reg_param = reg_params[iparam] # evaluate train error if verbose println("fitting train GLRM for reg_param $reg_param") end - scale!(train_glrm.rx, reg_param) - scale!(train_glrm.ry, reg_param) + mul!(train_glrm.rx, reg_param) + mul!(train_glrm.ry, reg_param) train_glrm.X, train_glrm.Y = randn(train_glrm.k,m), randn(train_glrm.k,n) # this bypasses the error checking in GLRM(). Risky. X, Y, ch = fit!(train_glrm, params, ch=ch, verbose=verbose) train_time[iparam] = ch.times[end] @@ -296,8 +296,8 @@ function precision_at_k(train_glrm::GLRM, test_observed_features; params=Params( end end prec_at_k[iparam] = true_pos / (true_pos + false_pos) - if verbose println("\prec_at_k: $(prec_at_k[iparam])") end - solution[iparam] = (sum(X)+sum(Y), sum(abs(X))+sum(abs(Y))) + if verbose println("\tprec_at_k: $(prec_at_k[iparam])") end + solution[iparam] = (sum(X)+sum(Y), sum(abs.(X))+sum(abs.(Y))) if verbose println("\tsum of solution, one norm of solution: $(solution[iparam])") end end return train_error, test_error, prec_at_k, train_time, reg_params, solution diff --git a/src/domains.jl b/src/domains.jl index 6d90c77..83fb0f4 100644 --- a/src/domains.jl +++ b/src/domains.jl @@ -18,21 +18,21 @@ export Domain, # the abstract type RealDomain, BoolDomain, OrdinalDomain, PeriodicDomain, CountDomain, CategoricalDomain, # the domains copy -@compat abstract type Domain end +abstract type Domain end ########################################## REALS ########################################## # Real data can take values from ℜ -immutable RealDomain<:Domain +struct RealDomain<:Domain end ########################################## BOOLS ########################################## # Boolean data should take values from {true, false} -immutable BoolDomain<:Domain +struct BoolDomain<:Domain end ########################################## ORDINALS ########################################## # Ordinal data should take integer values ranging from `min` to `max` -immutable OrdinalDomain<:Domain +struct OrdinalDomain<:Domain min::Int max::Int function OrdinalDomain(min, max) @@ -45,7 +45,7 @@ end ########################################## ORDINALS ########################################## # Categorical data should take integer values ranging from 1 to `max` -immutable CategoricalDomain<:Domain +struct CategoricalDomain<:Domain min::Int max::Int end @@ -53,12 +53,12 @@ CategoricalDomain(m::Int) = CategoricalDomain(1,m) ########################################## PERIODIC ########################################## # Periodic data can take values from ℜ, but given a period T, we should have error_metric(a,a+T) = 0 -immutable PeriodicDomain<:Domain +struct PeriodicDomain<:Domain T::Float64 # the period end ########################################## COUNTS ########################################## # Count data can take values over ℕ, which we approximate as {0, 1, 2 ... `max_count`} -immutable CountDomain<:Domain +struct CountDomain<:Domain max_count::Int # the biggest possible count end diff --git a/src/evaluate_fit.jl b/src/evaluate_fit.jl index 68e28ec..5c58b59 100644 --- a/src/evaluate_fit.jl +++ b/src/evaluate_fit.jl @@ -59,8 +59,7 @@ function objective(glrm::GLRM, X::Array{Float64,2}, Y::Array{Float64,2}; yidxs = get_yidxs(glrm.losses), kwargs...) @assert(size(Y)==(glrm.k,yidxs[end][end])) @assert(size(X)==(glrm.k,size(glrm.A,1))) - XY = @compat Array{Float64}((size(X,2), size(Y,2))) - XY = Array{Float64}((size(X,2), size(Y,2))) + XY = Array{Float64}(undef, (size(X,2), size(Y,2))) if sparse # Calculate X'*Y only at observed entries of A m,n = size(glrm.A) @@ -148,7 +147,7 @@ function error_metric(glrm::AbstractGLRM, XY::Array{Float64,2}, domains::Array{D end # The user can also pass in X and Y and `error_metric` will compute XY for them function error_metric(glrm::AbstractGLRM, X::Array{Float64,2}, Y::Array{Float64,2}, domains::Array{Domain,1}=Domain[l.domain for l in glrm.losses]; kwargs...) - XY = @compat Array{Float64}((size(X,2), size(Y,2))) + XY = Array{Float64}((size(X,2), size(Y,2))) gemm!('T','N',1.0,X,Y,0.0,XY) error_metric(glrm, XY, domains; kwargs...) end diff --git a/src/fit.jl b/src/fit.jl index f026209..87d2544 100644 --- a/src/fit.jl +++ b/src/fit.jl @@ -1,11 +1,11 @@ export fit, fit!, Params ### PARAMETERS TYPE -@compat abstract type AbstractParams end +abstract type AbstractParams end Params(args...; kwargs...) = ProxGradParams(args...; kwargs...) # default in-place fitting uses proximal gradient method -@compat function fit!(glrm::AbstractGLRM; kwargs...) +function fit!(glrm::AbstractGLRM; kwargs...) kwdict = Dict(kwargs) if :params in keys(kwdict) return fit!(glrm, kwdict[:params]; kwargs...) @@ -22,8 +22,8 @@ end # fit without modifying the glrm object function fit(glrm::AbstractGLRM, args...; kwargs...) - X0 = @compat Array{Float64}(size(glrm.X)) - Y0 = @compat Array{Float64}(size(glrm.Y)) + X0 = Array{Float64}(undef, size(glrm.X)) + Y0 = Array{Float64}(undef, size(glrm.Y)) copy!(X0, glrm.X); copy!(Y0, glrm.Y) X,Y,ch = fit!(glrm, args...; kwargs...) copy!(glrm.X, X0); copy!(glrm.Y, Y0) diff --git a/src/fit_dataframe.jl b/src/fit_dataframe.jl index 3a62310..0eaa53c 100644 --- a/src/fit_dataframe.jl +++ b/src/fit_dataframe.jl @@ -1,12 +1,10 @@ +# ======================================== +# REVIEW THIS IN LIGHT OF NEW DATAFRAMES +# ======================================== + import Base: isnan -import DataArrays: DataArray, isna, dropna, NA, NAtype -if VERSION < v"0.6.0-pre" - import DataArrays: array -else - import DataArrays: isnan -end import DataFrames: DataFrame, ncol, convert -import Missings: Missing, missing, ismissing + export GLRM, observations, expand_categoricals!, NaNs_to_NAs!, NAs_to_0s!, NaNs_to_Missing!, ismissing_vec @@ -31,7 +29,7 @@ function GLRM(df::DataFrame, k::Int, datatypes::Array{Symbol,1}; end # validate input # XXX check for undefined entry? - for dt in datatypes + for dt in datatype if !(dt in keys(loss_map)) error("data types must be either :real, :bool, :ord, or :cat, not $dt") end @@ -118,12 +116,9 @@ function map_to_numbers!(df, j::Int, datatype::Symbol) return df[j] end -function getval{T}(x::Nullable{T}) - x.value -end -function getval{T<:Number}(x::T) - x -end +getval(x::Union{T, Nothing}) where T = x.value +getval(x::T) where T<:Number = x + function map_to_numbers!(df, j::Int, loss::Type{QuadLoss}) if all(xi -> is_number_or_null(xi), df[j][!ismissing_vec(df[j])]) @@ -206,11 +201,11 @@ function pick_loss(l::Type{MultinomialOrdinalLoss}, col) end end -observations(da::DataArray) = df_observations(da) +observations(da::Array{Union{T, Missing}}) where T = df_observations(da) observations(df::DataFrame) = df_observations(df) # isnan -> ismissing function df_observations(da) - obs = @compat Tuple{Int, Int}[] + obs = Tuple{Int, Int}[] m,n = size(da) for j=1:n # follow column-major order. First element of index in innermost loop for i=1:m @@ -271,32 +266,7 @@ end # convert NaNs to NAs # isnan(x::NAtype) = false isnan(x::AbstractString) = false -isnan{T}(x::Nullable{T}) = isnan(x.value) - -# letting these two functions be here for now, just to catch bugs. -function NaNs_to_NAs!(df::DataFrame) - m,n = size(df) - for j=1:n # follow column-major order. First element of index in innermost loop - for i=1:m - if !isna(df[i,j]) && isnan(df[i,j]) - df[i,j] = NA - end - end - end - return df -end - -function NAs_to_0s!(df::DataFrame) - m,n = size(df) - for j=1:n # follow column-major order. First element of index in innermost loop - for i=1:m - if isna(df[i,j]) - df[i,j] = 0 - end - end - end - return df -end +isnan(x::Union{T, Nothing}) where T = isnan(x.value) # same functionality as above. function NaNs_to_Missing!(df::DataFrame) @@ -308,6 +278,4 @@ function NaNs_to_Missing!(df::DataFrame) end # ismissing(Array{Union{T,Missing},1}) doesn't exist. -function ismissing_vec(V::Array{Any,1}) - return Bool[ismissing(x) for x in V[:]] -end +ismissing_vec(V::Array{Any,1}) = Bool[ismissing(x) for x in V[:]] diff --git a/src/fit_dataframe_w_type_imputation.jl b/src/fit_dataframe_w_type_imputation.jl index 4420899..41ea9de 100644 --- a/src/fit_dataframe_w_type_imputation.jl +++ b/src/fit_dataframe_w_type_imputation.jl @@ -1,14 +1,7 @@ import Base: isnan -import DataArrays: DataArray, isna, dropna, NA, NAtype -if VERSION < v"0.6.0" - import DataArrays: array -else - import DataArrays: isnan -end import DataFrames: DataFrame, ncol, convert export GLRM -import Missings: Missing, missing, ismissing # TODO: identify categoricals automatically from PooledDataArray columns @@ -99,6 +92,8 @@ function get_loss_types(df::DataFrame) try maxs[j] = maximum(skipmissing(col)) mins[j] = minimum(skipmissing(col)) + catch + nothing end end @@ -146,8 +141,10 @@ function get_ordinals(df::DataFrame) for i in 1:nord col = df[ord_idx[i]] try - maxs[i] = maximum(dropna(col)) - mins[i] = minimum(dropna(col)) + maxs[i] = maximum(dropmissing(col)) + mins[i] = minimum(dropmissing(col)) + catch + nothing end end diff --git a/src/glrm.jl b/src/glrm.jl index 92fe22a..b97bbf7 100644 --- a/src/glrm.jl +++ b/src/glrm.jl @@ -1,15 +1,15 @@ -import Base: size, axpy! -import Base.LinAlg.scale! -import Base.BLAS: gemm! -@compat abstract type AbstractGLRM end +import LinearAlgebra: size, axpy! +import LinearAlgebra.BLAS: gemm! + +abstract type AbstractGLRM end export AbstractGLRM, GLRM, getindex, size, scale_regularizer! -@compat const ObsArray = @compat(Union{Array{Array{Int,1},1}, Array{UnitRange{Int},1}}) +const ObsArray = Union{Array{Array{Int,1},1}, Array{UnitRange{Int},1}} ### GLRM TYPE -type GLRM<:AbstractGLRM +mutable struct GLRM<:AbstractGLRM A # The data table losses::Array{Loss,1} # array of loss functions rx::Array{Regularizer,1} # Array of regularizers to be applied to each column of X @@ -84,7 +84,7 @@ parameter_estimate(glrm::GLRM) = (glrm.X, glrm.Y) function scale_regularizer!(glrm::GLRM, newscale::Number) - scale!(glrm.rx, newscale) - scale!(glrm.ry, newscale) + mul!(glrm.rx, newscale) + mul!(glrm.ry, newscale) return glrm end diff --git a/src/impute_and_err.jl b/src/impute_and_err.jl index acf3052..ee8d8a3 100644 --- a/src/impute_and_err.jl +++ b/src/impute_and_err.jl @@ -27,11 +27,11 @@ export impute, error_metric, errors # function for general use -roundcutoff{T<:Number}(x,a::T,b::T) = T(min(max(round(x),a),b)) +roundcutoff(x,a::T,b::T) where T<:Number = T(min(max(round(x),a),b)) # Error metrics for general use squared_error(a_imputed::Number, a::Number) = (a_imputed-a)^2 -misclassification{T}(a_imputed::T, a::T) = float(!(a_imputed==a)) # return 0.0 if equal, 1.0 else +misclassification(a_imputed::T, a::T) where T = float(!(a_imputed==a)) # return 0.0 if equal, 1.0 else # use the default loss domain imputation if no domain provided impute(l::Loss, u::Float64) = impute(l.domain, l, u) @@ -79,7 +79,7 @@ function impute(D::OrdinalDomain, l::WeightedHingeLoss, u::Float64) a_imputed = (u>0 ? ceil(1/u) : floor(1/u)) roundcutoff(a_imputed, D.min, D.max) end -impute(D::OrdinalDomain, l::OrdisticLoss, u::AbstractArray) = indmin(u.^2) +impute(D::OrdinalDomain, l::OrdisticLoss, u::AbstractArray) = argmin(u.^2) # MultinomialOrdinalLoss # l(u, a) = -log(p(u, a)) @@ -93,12 +93,12 @@ function impute(D::OrdinalDomain, l::MultinomialOrdinalLoss, u::AbstractArray) enforce_MNLOrdRules!(u) eu = exp.(u) p = [1-eu[1], -diff(eu)..., eu[end]] - return indmax(p) + return argmax(p) end # generic method function impute(D::OrdinalDomain, l::Loss, u::AbstractArray) - (D.min:D.max)[indmin([evaluate(l, u, i) for i in D.min:D.max])] + (D.min:D.max)[argmin([evaluate(l, u, i) for i in D.min:D.max])] end function error_metric(D::OrdinalDomain, l::Loss, u::Float64, a::Number) @@ -109,8 +109,8 @@ end ########################################## CATEGORICALS ########################################## # Categorical data should take integer values ranging from 1 to `max` -impute(D::CategoricalDomain, l::MultinomialLoss, u::Array{Float64}) = indmax(u) -impute(D::CategoricalDomain, l::OvALoss, u::Array{Float64}) = indmax(u) +impute(D::CategoricalDomain, l::MultinomialLoss, u::Array{Float64}) = argmax(u) +impute(D::CategoricalDomain, l::OvALoss, u::Array{Float64}) = argmax(u) function error_metric(D::CategoricalDomain, l::Loss, u::Array{Float64}, a::Number) a_imputed = impute(D, l, u) @@ -144,14 +144,13 @@ end #################################################################################### # Use impute and error_metric over arrays -function impute{DomainSubtype<:Domain,LossSubtype<:Loss}( - domains::Array{DomainSubtype,1}, +function impute(domains::Array{DomainSubtype,1}, losses::Array{LossSubtype,1}, - U::Array{Float64,2}) + U::Array{Float64,2}) where {DomainSubtype<:Domain, LossSubtype<:Loss} m, d = size(U) n = length(losses) yidxs = get_yidxs(losses) - A_imputed = Array{Number}((m, n)); + A_imputed = Array{Number}(undef, (m, n)); for f in 1:n for i in 1:m if length(yidxs[f]) > 1 @@ -163,7 +162,7 @@ function impute{DomainSubtype<:Domain,LossSubtype<:Loss}( end return A_imputed end -function impute{LossSubtype<:Loss}(losses::Array{LossSubtype,1}, U::Array{Float64,2}) +function impute(losses::Array{LossSubtype,1}, U::Array{Float64,2}) where LossSubtype<:Loss domains = Domain[l.domain for l in losses] impute(domains, losses, U) end diff --git a/src/initialize.jl b/src/initialize.jl index de0ef09..e4ba4ca 100644 --- a/src/initialize.jl +++ b/src/initialize.jl @@ -1,6 +1,6 @@ -import StatsBase.sample, StatsBase.wsample +import StatsBase: sample, wsample export init_kmeanspp!, init_svd!, init_nndsvd! -import NMF.nndsvd +import Arpack: svds # kmeans++ initialization, but with missing data # we make sure never to look at "unobserved" entries in A @@ -74,7 +74,7 @@ function init_svd!(glrm::GLRM; offset=true, scale=true, TOL = 1e-10) end end else - error("No default mapping to real valued matrix for domains of type $type(glrm.losses[f].domain)") + error("No default mapping to real valued matrix for domains of type $typeof(glrm.losses[f].domain)") end end end @@ -95,7 +95,7 @@ function init_svd!(glrm::GLRM; offset=true, scale=true, TOL = 1e-10) if stds[j] < TOL || isnan(stds[j]) stds[j] = 1 end - Astd[glrm.observed_examples[f],j] = Areal[glrm.observed_examples[f],j] - means[j] + Astd[glrm.observed_examples[f],j] = Areal[glrm.observed_examples[f],j] .- means[j] end end if offset @@ -126,49 +126,9 @@ function init_svd!(glrm::GLRM; offset=true, scale=true, TOL = 1e-10) @assert(size(glrm.X, 2) >= m) @assert(size(glrm.Y, 1) >= k) @assert(size(glrm.Y, 2) >= d) - glrm.X[1:k,1:m] = Diagonal(sqrt.(ASVD[:S]))*ASVD[:U]' # recall X is transposed as per column major order. - glrm.Y[1:k,1:d] = Diagonal(sqrt.(ASVD[:S]))*ASVD[:Vt]*Diagonal(stds) + glrm.X[1:k,1:m] = Diagonal(sqrt.(ASVD.S))*ASVD.U' # recall X is transposed as per column major order. + glrm.Y[1:k,1:d] = Diagonal(sqrt.(ASVD.S))*ASVD.Vt*Diagonal(stds) return glrm end -function init_nndsvd!(glrm::GLRM; scale::Bool=true, zeroh::Bool=false, - variant::Symbol=:std, max_iters::Int=0) - # NNDSVD initialization: - # Boutsidis C, Gallopoulos E (2007). SVD based initialization: A head - # start for nonnegative matrix factorization. Pattern Recognition - m,n = size(glrm.A) - - # only initialize based on observed entries - A_init = zeros(m,n) - for i = 1:n - A_init[glrm.observed_examples[i],i] = glrm.A[glrm.observed_examples[i],i] - end - - # scale all columns by the Loss.scale parameter - if scale - for i = 1:n - A_init[:,i] .*= glrm.losses[i].scale - end - end - - # run the first nndsvd initialization - W,H = nndsvd(A_init, glrm.k, zeroh=zeroh, variant=variant) - glrm.X = W' - glrm.Y = H - - # If max_iters>0 do a soft impute for the missing entries of A. - # Iterate: Estimate missing entries of A with W*H - # Update (W,H) nndsvd estimate based on new A - for iter = 1:max_iters - # Update missing entries of A_init - for j = 1:n - for i = setdiff(1:m,glrm.observed_examples[j]) - A_init[i,j] = dot(glrm.X[:,i],glrm.Y[:,j]) - end - end - # Re-estimate W and H - W,H = nndsvd(A_init, glrm.k, zeroh=zeroh, variant=variant) - glrm.X = W' - glrm.Y = H - end -end +include("initialize_nmf.jl") diff --git a/src/initialize_nmf.jl b/src/initialize_nmf.jl new file mode 100644 index 0000000..cd1ce66 --- /dev/null +++ b/src/initialize_nmf.jl @@ -0,0 +1,43 @@ +import NMF.nndsvd + +function init_nndsvd!(glrm::GLRM; scale::Bool=true, zeroh::Bool=false, + variant::Symbol=:std, max_iters::Int=0) + # NNDSVD initialization: + # Boutsidis C, Gallopoulos E (2007). SVD based initialization: A head + # start for nonnegative matrix factorization. Pattern Recognition + m,n = size(glrm.A) + + # only initialize based on observed entries + A_init = zeros(m,n) + for i = 1:n + A_init[glrm.observed_examples[i],i] = glrm.A[glrm.observed_examples[i],i] + end + + # scale all columns by the Loss.scale parameter + if scale + for i = 1:n + A_init[:,i] .*= glrm.losses[i].scale + end + end + + # run the first nndsvd initialization + W,H = nndsvd(A_init, glrm.k, zeroh=zeroh, variant=variant) + glrm.X = W' + glrm.Y = H + + # If max_iters>0 do a soft impute for the missing entries of A. + # Iterate: Estimate missing entries of A with W*H + # Update (W,H) nndsvd estimate based on new A + for iter = 1:max_iters + # Update missing entries of A_init + for j = 1:n + for i = setdiff(1:m,glrm.observed_examples[j]) + A_init[i,j] = dot(glrm.X[:,i],glrm.Y[:,j]) + end + end + # Re-estimate W and H + W,H = nndsvd(A_init, glrm.k, zeroh=zeroh, variant=variant) + glrm.X = W' + glrm.Y = H + end +end diff --git a/src/losses.jl b/src/losses.jl index fb1a116..2d1d595 100644 --- a/src/losses.jl +++ b/src/losses.jl @@ -35,7 +35,7 @@ # `impute(d::Domain, l::my_loss_type, u::Array{Float64})` (in impute_and_err.jl) # Finds a = argmin l(u,a), the most likely value for an observation given a parameter u -import Base: scale!, *, convert +import Base: *, convert import Optim: optimize, LBFGS export Loss, DiffLoss, ClassificationLoss, SingleDimLoss, # categories of Losses @@ -46,22 +46,22 @@ export Loss, MultinomialLoss, OvALoss, # losses for predicting nominals (categoricals) PeriodicLoss, # losses for predicting periodic variables evaluate, grad, M_estimator, # methods on losses - avgerror, scale, scale!, *, + avgerror, scale, mul!, *, embedding_dim, get_yidxs, datalevels, domain -@compat abstract type Loss end +abstract type Loss end # a DiffLoss is one in which l(u,a) = f(u-a) AND argmin f(x) = 0 # for example, QuadLoss(u,a)=(u-a)² and we can write f(x)=x² and x=u-a -@compat abstract type DiffLoss<:Loss end +abstract type DiffLoss<:Loss end # a ClassificationLoss is one in which observed values are true = 1 or false = 0 = -1 AND argmin_a L(u,a) = u>=0 ? true : false -@compat abstract type ClassificationLoss<:Loss end +abstract type ClassificationLoss<:Loss end # Single Dimensional losses are DiffLosses or ClassificationLosses, which allow optimized evaluate and grad functions -@compat const SingleDimLoss = Union{DiffLoss, ClassificationLoss} +const SingleDimLoss = Union{DiffLoss, ClassificationLoss} -scale!(l::Loss, newscale::Number) = (l.scale = newscale; l) +mul!(l::Loss, newscale::Number) = (l.scale = newscale; l) scale(l::Loss) = l.scale -*(newscale::Number, l::Loss) = (newl = copy(l); scale!(newl, newscale)) -*(l::Loss, newscale::Number) = (newl = copy(l); scale!(newl, newscale)) +*(newscale::Number, l::Loss) = (newl = copy(l); mul!(newl, newscale)) +*(l::Loss, newscale::Number) = (newl = copy(l); mul!(newl, newscale)) domain(l::Loss) = l.domain @@ -70,17 +70,17 @@ domain(l::Loss) = l.domain # default number of columns # number of columns is higher for multidimensional losses embedding_dim(l::Loss) = 1 -embedding_dim{LossSubtype<:Loss}(l::Array{LossSubtype,1}) = sum(map(embedding_dim, l)) +embedding_dim(l::Array{LossSubtype,1}) where LossSubtype<:Loss = sum(map(embedding_dim, l)) # find spans of loss functions (for multidimensional losses) -function get_yidxs{LossSubtype<:Loss}(losses::Array{LossSubtype,1}) +function get_yidxs(losses::Array{LossSubtype,1}) where LossSubtype<:Loss n = length(losses) ds = map(embedding_dim, losses) d = sum(ds) featurestartidxs = cumsum(append!([1], ds)) # find which columns of Y map to which columns of A (for multidimensional losses) - U = Union{Range{Int}, Int} - @compat yidxs = Array{U}(n) + U = Union{UnitRange{Int}, Int} + yidxs = Array{U}(undef, n) for f = 1:n if ds[f] == 1 @@ -135,7 +135,7 @@ end ########################################## QUADRATIC ########################################## # f: ℜxℜ -> ℜ -type QuadLoss<:DiffLoss +mutable struct QuadLoss<:DiffLoss scale::Float64 domain::Domain end @@ -149,7 +149,7 @@ M_estimator(l::QuadLoss, a::AbstractArray) = mean(a) ########################################## L1 ########################################## # f: ℜxℜ -> ℜ -type L1Loss<:DiffLoss +mutable struct L1Loss<:DiffLoss scale::Float64 domain::Domain end @@ -163,7 +163,7 @@ M_estimator(l::L1Loss, a::AbstractArray) = median(a) ########################################## HUBER ########################################## # f: ℜxℜ -> ℜ -type HuberLoss<:DiffLoss +mutable struct HuberLoss<:DiffLoss scale::Float64 domain::Domain crossover::Float64 # where QuadLoss loss ends and linear loss begins; =1 for standard HuberLoss @@ -183,7 +183,7 @@ grad(l::HuberLoss,u::Float64,a::Number) = abs(u-a)>l.crossover ? sign(u-a)*l.sca # define (u)_+ = max(u,0), (u)_- = max(-u,0) so (u)_+ + (u)_- = |u| # f(u,a) = { quantile (a - u)_+ + (1-quantile) (a - u)_- # fits the `quantile`th quantile of the distribution -type QuantileLoss<:DiffLoss +mutable struct QuantileLoss<:DiffLoss scale::Float64 domain::Domain quantile::Float64 # fit the alphath quantile @@ -206,7 +206,7 @@ M_estimator(l::QuantileLoss, a::AbstractArray) = quantile(a, l.quantile) # f: ℜxℜ -> ℜ # f(u,a) = w * (1 - cos((a-u)*(2*pi)/T)) # this measures how far away u and a are on a circle of circumference T. -type PeriodicLoss<:DiffLoss +mutable struct PeriodicLoss<:DiffLoss T::Float64 # the length of the period scale::Float64 domain::Domain @@ -228,14 +228,14 @@ end # BEWARE: # 1) this is a reparametrized poisson: we parametrize the mean as exp(u) so that u can take any real value and still produce a positive mean # 2) THIS LOSS MAY CAUSE MODEL INSTABLITY AND DIFFICULTY FITTING. -type PoissonLoss<:Loss +mutable struct PoissonLoss<:Loss scale::Float64 domain::Domain end PoissonLoss(max_count=2^31::Int; domain=CountDomain(max_count)::Domain) = PoissonLoss(1.0, domain) function evaluate(l::PoissonLoss, u::Float64, a::Number) - l.scale*(exp(u) - a*u + (a==0? 0 : a*(log(a)-1))) # log(a!) ~ a==0? 0 : a*(log(a)-1) + l.scale*(exp(u) - a*u + (a==0 ? 0 : a*(log(a)-1))) # log(a!) ~ a==0 ? 0 : a*(log(a)-1) end grad(l::PoissonLoss, u::Float64, a::Number) = l.scale*(exp(u) - a) @@ -244,7 +244,7 @@ M_estimator(l::PoissonLoss, a::AbstractArray) = log(mean(a)) ########################################## ORDINAL HINGE ########################################## # f: ℜx{min, min+1... max-1, max} -> ℜ -type OrdinalHingeLoss<:Loss +mutable struct OrdinalHingeLoss<:Loss min::Integer max::Integer scale::Float64 @@ -295,7 +295,7 @@ M_estimator(l::OrdinalHingeLoss, a::AbstractArray) = median(a) ########################################## LOGISTIC ########################################## # f: ℜx{-1,1}-> ℜ -type LogisticLoss<:ClassificationLoss +mutable struct LogisticLoss<:ClassificationLoss scale::Float64 domain::Domain end @@ -314,7 +314,7 @@ end # f: ℜx{-1,1} -> ℜ # f(u,a) = { w * max(1-a*u, 0) for a = -1 # = { c * w * max(1-a*u, 0) for a = 1 -type WeightedHingeLoss<:ClassificationLoss +mutable struct WeightedHingeLoss<:ClassificationLoss scale::Float64 domain::Domain case_weight_ratio::Float64 # >1 for trues to have more confidence than falses, <1 for opposite @@ -357,7 +357,7 @@ end # often known as the softmax function # f(u, a) = exp(u[a]) / (sum_{a'} exp(u[a'])) # = 1 / (sum_{a'} exp(u[a'] - u[a])) -type MultinomialLoss<:Loss +mutable struct MultinomialLoss<:Loss max::Integer scale::Float64 domain::Domain @@ -416,7 +416,7 @@ end ########################################## One vs All loss ########################################## # f: ℜx{1, 2, ..., max-1, max} -> ℜ -type OvALoss<:Loss +mutable struct OvALoss<:Loss max::Integer bin_loss::Loss scale::Float64 @@ -459,7 +459,7 @@ end ########################################## Bigger vs Smaller loss ########################################## # f: ℜx{1, 2, ..., max-1} -> ℜ -type BvSLoss<:Loss +mutable struct BvSLoss<:Loss max::Integer bin_loss::Loss scale::Float64 @@ -505,7 +505,7 @@ end # f computes the (negative log likelihood of the) multinomial logit, # often known as the softmax function # f(u, a) = exp(u[a]) / (sum_{a'} exp(u[a'])) -type OrdisticLoss<:Loss +mutable struct OrdisticLoss<:Loss max::Integer scale::Float64 domain::Domain @@ -577,7 +577,7 @@ end # the most probable value a is the index of the first # positive entry of u -type MultinomialOrdinalLoss<:Loss +mutable struct MultinomialOrdinalLoss<:Loss max::Integer scale::Float64 domain::Domain @@ -651,7 +651,7 @@ end #Optimized vector evaluate on single-dimensional losses function evaluate(l::SingleDimLoss, u::Vector{Float64}, a::AbstractVector) losseval = (x::Float64, y::Number) -> evaluate(l, x, y) - mapped = zeros(u) + mapped = fill!(similar(u),0.) map!(losseval, mapped, u, a) reduce(+, mapped) end @@ -680,7 +680,7 @@ end # Optimized vector grad on single-dimensional losses function grad(l::SingleDimLoss, u::Vector{Float64}, a::AbstractVector) lossgrad = (x::Float64,y::Number) -> grad(l, x, y) - mapped = zeros(u) + mapped = fill!(similar(u),0.) map!(lossgrad, mapped, u, a) end diff --git a/src/modify_glrm.jl b/src/modify_glrm.jl index 4310b9e..5e76d38 100644 --- a/src/modify_glrm.jl +++ b/src/modify_glrm.jl @@ -42,10 +42,10 @@ function equilibrate_variance!(glrm::AbstractGLRM, columns_to_scale = 1:size(glr end if varlossi > 0 # rescale the losses and regularizers for each column by the inverse of the empirical variance - scale!(glrm.losses[i], scale(glrm.losses[i])/varlossi) + mul!(glrm.losses[i], scale(glrm.losses[i])/varlossi) end if varregi > 0 - scale!(glrm.ry[i], scale(glrm.ry[i])/varregi) + mul!(glrm.ry[i], scale(glrm.ry[i])/varregi) end end return glrm @@ -62,19 +62,19 @@ function prob_scale!(glrm, columns_to_scale = 1:size(glrm.A,2)) if typeof(glrm.losses[i]) == QuadLoss && length(nomissing) > 0 varlossi = var(skipmissing(glrm.A[i])) # estimate the variance if varlossi > TOL - scale!(glrm.losses[i], 1/(2*varlossi)) # this is the correct -loglik of gaussian with variance fixed at estimate + mul!(glrm.losses[i], 1/(2*varlossi)) # this is the correct -loglik of gaussian with variance fixed at estimate else warn("column $i has a variance of $varlossi; not scaling it to avoid dividing by zero.") end elseif typeof(glrm.losses[i]) == HuberLoss && length(nomissing) > 0 varlossi = avgerror(glrm.losses[i], glrm.A[i]) # estimate the width of the distribution if varlossi > TOL - scale!(glrm.losses[i], 1/(2*varlossi)) # this is not the correct -loglik of huber with estimates for variance and mean of poisson, but that's probably ok + mul!(glrm.losses[i], 1/(2*varlossi)) # this is not the correct -loglik of huber with estimates for variance and mean of poisson, but that's probably ok else warn("column $i has a variance of $varlossi; not scaling it to avoid dividing by zero.") end else # none of the other distributions have any free parameters to estimate, so this is the correct -loglik - scale!(glrm.losses[i], 1) + mul!(glrm.losses[i], 1) end end return glrm diff --git a/src/regularizers.jl b/src/regularizers.jl index 7013f8a..b80254b 100644 --- a/src/regularizers.jl +++ b/src/regularizers.jl @@ -3,7 +3,7 @@ # the abstract type Regularizer. # Regularizers should implement `evaluate` and `prox`. -import Base.scale!, Base.* +import Base: * export Regularizer, ProductRegularizer, # abstract types # concrete regularizers @@ -19,7 +19,7 @@ export Regularizer, ProductRegularizer, # abstract types # methods on regularizers prox!, prox, # utilities - scale, scale!, * + scale, mul!, * # numerical tolerance TOL = 1e-12 @@ -27,17 +27,17 @@ TOL = 1e-12 # regularizers # regularizers r should have the method `prox` defined such that # prox(r)(u,alpha) = argmin_x( alpha r(x) + 1/2 \|x - u\|_2^2) -@compat abstract type Regularizer end -@compat abstract type MatrixRegularizer <: LowRankModels.Regularizer end +abstract type Regularizer end +abstract type MatrixRegularizer <: LowRankModels.Regularizer end # default inplace prox operator (slower than if inplace prox is implemented) prox!(r::Regularizer,u::AbstractArray,alpha::Number) = (v = prox(r,u,alpha); @simd for i=1:length(u) @inbounds u[i]=v[i] end; u) # default scaling scale(r::Regularizer) = r.scale -scale!(r::Regularizer, newscale::Number) = (r.scale = newscale; r) -scale!(rs::Array{Regularizer}, newscale::Number) = (for r in rs scale!(r, newscale) end; rs) -*(newscale::Number, r::Regularizer) = (newr = typeof(r)(); scale!(newr, scale(r)*newscale); newr) +mul!(r::Regularizer, newscale::Number) = (r.scale = newscale; r) +mul!(rs::Array{Regularizer}, newscale::Number) = (for r in rs mul!(r, newscale) end; rs) +*(newscale::Number, r::Regularizer) = (newr = typeof(r)(); mul!(newr, scale(r)*newscale); newr) ## utilities @@ -49,12 +49,12 @@ function allnonneg(a::AbstractArray) end ## QuadLoss regularization -type QuadReg<:Regularizer +mutable struct QuadReg<:Regularizer scale::Float64 end QuadReg() = QuadReg(1) prox(r::QuadReg,u::AbstractArray,alpha::Number) = 1/(1+2*alpha*r.scale)*u -prox!(r::QuadReg,u::Array{Float64},alpha::Number) = scale!(u, 1/(1+2*alpha*r.scale)) +prox!(r::QuadReg,u::Array{Float64},alpha::Number) = rmul!(u, 1/(1+2*alpha*r.scale)) evaluate(r::QuadReg,a::AbstractArray) = r.scale*sum(abs2, a) ## constrained QuadLoss regularization @@ -65,18 +65,18 @@ evaluate(r::QuadReg,a::AbstractArray) = r.scale*sum(abs2, a) ## constraining the maxnorm of XY to be <= mu is achieved ## by setting glrm.rx = QuadConstraint(sqrt(mu)) ## and the same for every element of glrm.ry -type QuadConstraint<:Regularizer +mutable struct QuadConstraint<:Regularizer max_2norm::Float64 end QuadConstraint() = QuadConstraint(1) prox(r::QuadConstraint,u::AbstractArray,alpha::Number) = (r.max_2norm)/norm(u)*u -prox!(r::QuadConstraint,u::Array{Float64},alpha::Number) = scale!(u, (r.max_2norm)/norm(u)) +prox!(r::QuadConstraint,u::Array{Float64},alpha::Number) = mul!(u, (r.max_2norm)/norm(u)) evaluate(r::QuadConstraint,u::AbstractArray) = norm(u) > r.max_2norm + TOL ? Inf : 0 scale(r::QuadConstraint) = 1 -scale!(r::QuadConstraint, newscale::Number) = 1 +mul!(r::QuadConstraint, newscale::Number) = 1 ## one norm regularization -type OneReg<:Regularizer +mutable struct OneReg<:Regularizer scale::Float64 end OneReg() = OneReg(1) @@ -89,17 +89,17 @@ evaluate(r::OneReg,a::AbstractArray) = r.scale*sum(abs,a) ## no regularization -type ZeroReg<:Regularizer +mutable struct ZeroReg<:Regularizer end prox(r::ZeroReg,u::AbstractArray,alpha::Number) = u prox!(r::ZeroReg,u::Array{Float64},alpha::Number) = u evaluate(r::ZeroReg,a::AbstractArray) = 0 scale(r::ZeroReg) = 0 -scale!(r::ZeroReg, newscale::Number) = 0 +mul!(r::ZeroReg, newscale::Number) = 0 ## indicator of the nonnegative orthant ## (enforces nonnegativity, eg for nonnegative matrix factorization) -type NonNegConstraint<:Regularizer +mutable struct NonNegConstraint<:Regularizer end prox(r::NonNegConstraint,u::AbstractArray,alpha::Number=1) = broadcast(max,u,0) prox!(r::NonNegConstraint,u::Array{Float64},alpha::Number=1) = (@simd for i=1:length(u) @inbounds u[i] = max(u[i], 0) end; u) @@ -112,11 +112,11 @@ function evaluate(r::NonNegConstraint,a::AbstractArray) return 0 end scale(r::NonNegConstraint) = 1 -scale!(r::NonNegConstraint, newscale::Number) = 1 +mul!(r::NonNegConstraint, newscale::Number) = 1 ## one norm regularization restricted to nonnegative orthant ## (enforces nonnegativity, in addition to one norm regularization) -type NonNegOneReg<:Regularizer +mutable struct NonNegOneReg<:Regularizer scale::Float64 end NonNegOneReg() = NonNegOneReg(1) @@ -136,17 +136,17 @@ function evaluate(r::NonNegOneReg,a::AbstractArray) return r.scale*sum(a) end scale(r::NonNegOneReg) = 1 -scale!(r::NonNegOneReg, newscale::Number) = 1 +mul!(r::NonNegOneReg, newscale::Number) = 1 ## Quadratic regularization restricted to nonnegative domain ## (Enforces nonnegativity alongside quadratic regularization) -type NonNegQuadReg +mutable struct NonNegQuadReg scale::Float64 end NonNegQuadReg() = NonNegQuadReg(1) prox(r::NonNegQuadReg,u::AbstractArray,alpha::Number) = max.(1/(1+2*alpha*r.scale)*u, 0) prox!(r::NonNegQuadReg,u::AbstractArray,alpha::Number) = begin - scale!(u, 1/(1+2*alpha*r.scale)) + mul!(u, 1/(1+2*alpha*r.scale)) maxval = maximum(u) clamp!(u, 0, maxval) end @@ -161,7 +161,7 @@ end ## indicator of the last entry being equal to 1 ## (allows an unpenalized offset term into the glrm when used in conjunction with lastentry_unpenalized) -type lastentry1<:Regularizer +mutable struct lastentry1<:Regularizer r::Regularizer end lastentry1() = lastentry1(ZeroReg()) @@ -172,11 +172,11 @@ prox!(r::lastentry1,u::AbstractArray{Float64,2},alpha::Number=1) = (prox!(r.r,vi evaluate(r::lastentry1,a::AbstractArray{Float64,1}) = (a[end]==1 ? evaluate(r.r,a[1:end-1]) : Inf) evaluate(r::lastentry1,a::AbstractArray{Float64,2}) = (all(a[end,:].==1) ? evaluate(r.r,a[1:end-1,:]) : Inf) scale(r::lastentry1) = scale(r.r) -scale!(r::lastentry1, newscale::Number) = scale!(r.r, newscale) +mul!(r::lastentry1, newscale::Number) = mul!(r.r, newscale) ## makes the last entry unpenalized ## (allows an unpenalized offset term into the glrm when used in conjunction with lastentry1) -type lastentry_unpenalized<:Regularizer +mutable struct lastentry_unpenalized<:Regularizer r::Regularizer end lastentry_unpenalized() = lastentry_unpenalized(ZeroReg()) @@ -187,11 +187,11 @@ prox(r::lastentry_unpenalized,u::AbstractArray{Float64,2},alpha::Number=1) = [pr prox!(r::lastentry_unpenalized,u::AbstractArray{Float64,2},alpha::Number=1) = (prox!(r.r,view(u,1:size(u,1)-1,:),alpha); u) evaluate(r::lastentry_unpenalized,a::AbstractArray{Float64,2}) = evaluate(r.r,a[1:end-1,:]) scale(r::lastentry_unpenalized) = scale(r.r) -scale!(r::lastentry_unpenalized, newscale::Number) = scale!(r.r, newscale) +mul!(r::lastentry_unpenalized, newscale::Number) = mul!(r.r, newscale) ## fixes the values of the first n elements of the column to be y ## optionally regularizes the last k-n elements with regularizer r -type fixed_latent_features<:Regularizer +mutable struct fixed_latent_features<:Regularizer r::Regularizer y::Array{Float64,1} # the values of the fixed latent features n::Int # length of y @@ -208,11 +208,11 @@ function prox!(r::fixed_latent_features,u::Array{Float64},alpha::Number) end evaluate(r::fixed_latent_features, a::AbstractArray) = a[1:r.n]==r.y ? evaluate(r.r, a[(r.n+1):end]) : Inf scale(r::fixed_latent_features) = scale(r.r) -scale!(r::fixed_latent_features, newscale::Number) = scale!(r.r, newscale) +mul!(r::fixed_latent_features, newscale::Number) = mul!(r.r, newscale) ## fixes the values of the last n elements of the column to be y ## optionally regularizes the first k-n elements with regularizer r -type fixed_last_latent_features<:Regularizer +mutable struct fixed_last_latent_features<:Regularizer r::Regularizer y::Array{Float64,1} # the values of the fixed latent features n::Int # length of y @@ -229,14 +229,14 @@ function prox!(r::fixed_last_latent_features,u::Array{Float64},alpha::Number) end evaluate(r::fixed_last_latent_features, a::AbstractArray) = a[length(a)-r.n+1:end]==r.y ? evaluate(r.r, a[1:length(a)-r.n]) : Inf scale(r::fixed_last_latent_features) = scale(r.r) -scale!(r::fixed_last_latent_features, newscale::Number) = scale!(r.r, newscale) +mul!(r::fixed_last_latent_features, newscale::Number) = mul!(r.r, newscale) ## indicator of 1-sparse vectors ## (enforces that exact 1 entry is nonzero, eg for orthogonal NNMF) -type OneSparseConstraint<:Regularizer +mutable struct OneSparseConstraint<:Regularizer end -prox(r::OneSparseConstraint, u::AbstractArray, alpha::Number=0) = (idx = indmax(u); v=zeros(size(u)); v[idx]=u[idx]; v) -prox!(r::OneSparseConstraint, u::Array, alpha::Number=0) = (idx = indmax(u); ui = u[idx]; scale!(u,0); u[idx]=ui; u) +prox(r::OneSparseConstraint, u::AbstractArray, alpha::Number=0) = (idx = argmax(u); v=zeros(size(u)); v[idx]=u[idx]; v) +prox!(r::OneSparseConstraint, u::Array, alpha::Number=0) = (idx = argmax(u); ui = u[idx]; mul!(u,0); u[idx]=ui; u) function evaluate(r::OneSparseConstraint, a::AbstractArray) oneflag = false for ai in a @@ -253,10 +253,10 @@ function evaluate(r::OneSparseConstraint, a::AbstractArray) return 0 end scale(r::OneSparseConstraint) = 1 -scale!(r::OneSparseConstraint, newscale::Number) = 1 +mul!(r::OneSparseConstraint, newscale::Number) = 1 ## Indicator of k-sparse vectors -type KSparseConstraint<:Regularizer +mutable struct KSparseConstraint<:Regularizer k::Int end function evaluate(r::KSparseConstraint, a::AbstractArray) @@ -286,17 +286,17 @@ function prox!(r::KSparseConstraint, u::Array, alpha::Number) k = r.k ids = selectperm(u, 1:k, by=abs, rev=true) vals = u[ids] - scale!(u,0) + mul!(u,0) u[ids] = vals u end ## indicator of 1-sparse unit vectors ## (enforces that exact 1 entry is 1 and all others are zero, eg for kmeans) -type UnitOneSparseConstraint<:Regularizer +mutable struct UnitOneSparseConstraint<:Regularizer end -prox(r::UnitOneSparseConstraint, u::AbstractArray, alpha::Number=0) = (idx = indmax(u); v=zeros(size(u)); v[idx]=1; v) -prox!(r::UnitOneSparseConstraint, u::Array, alpha::Number=0) = (idx = indmax(u); scale!(u,0); u[idx]=1; u) +prox(r::UnitOneSparseConstraint, u::AbstractArray, alpha::Number=0) = (idx = argmax(u); v=zeros(size(u)); v[idx]=1; v) +prox!(r::UnitOneSparseConstraint, u::Array, alpha::Number=0) = (idx = argmax(u); mul!(u,0); u[idx]=1; u) function evaluate(r::UnitOneSparseConstraint, a::AbstractArray) oneflag = false @@ -316,12 +316,12 @@ function evaluate(r::UnitOneSparseConstraint, a::AbstractArray) return 0 end scale(r::UnitOneSparseConstraint) = 1 -scale!(r::UnitOneSparseConstraint, newscale::Number) = 1 +mul!(r::UnitOneSparseConstraint, newscale::Number) = 1 ## indicator of vectors in the simplex: nonnegative vectors with unit l1 norm ## (eg for QuadLoss mixtures, ie soft kmeans) ## prox for the simplex is derived by Chen and Ye in [this paper](http://arxiv.org/pdf/1101.6081v2.pdf) -type SimplexConstraint<:Regularizer +mutable struct SimplexConstraint<:Regularizer end function prox(r::SimplexConstraint, u::AbstractArray, alpha::Number=0) n = length(u) @@ -334,7 +334,7 @@ function prox(r::SimplexConstraint, u::AbstractArray, alpha::Number=0) break end end - max.(u - t, 0) + max.(u .- t, 0) end function evaluate(r::SimplexConstraint,a::AbstractArray) # check it's a unit vector @@ -346,7 +346,7 @@ function evaluate(r::SimplexConstraint,a::AbstractArray) return 0 end scale(r::SimplexConstraint) = 1 -scale!(r::SimplexConstraint, newscale::Number) = 1 +mul!(r::SimplexConstraint, newscale::Number) = 1 ## ordinal regularizer ## a block regularizer which @@ -354,13 +354,13 @@ scale!(r::SimplexConstraint, newscale::Number) = 1 # 2) forces the last entry of each column to be increasing # 3) applies an internal regularizer to the first k-1 entries of each column ## should always be used in conjunction with lastentry1 regularization on x -type OrdinalReg<:Regularizer +mutable struct OrdinalReg<:Regularizer r::Regularizer end OrdinalReg() = OrdinalReg(ZeroReg()) prox(r::OrdinalReg,u::AbstractArray,alpha::Number) = (uc = copy(u); prox!(r,uc,alpha)) function prox!(r::OrdinalReg,u::AbstractArray,alpha::Number) - um = mean(u[1:end-1, :], 2) + um = mean(u[1:end-1, :], dims=2) prox!(r.r,um,alpha) for i=1:size(u,1)-1 for j=1:size(u,2) @@ -378,18 +378,18 @@ function prox!(r::OrdinalReg,u::AbstractArray,alpha::Number) end evaluate(r::OrdinalReg,a::AbstractArray) = evaluate(r.r,a[1:end-1,1]) scale(r::OrdinalReg) = scale(r.r) -scale!(r::OrdinalReg, newscale::Number) = scale!(r.r, newscale) +mul!(r::OrdinalReg, newscale::Number) = mul!(r.r, newscale) # make sure we don't add two offsets cuz that's weird lastentry_unpenalized(r::OrdinalReg) = r -type MNLOrdinalReg<:Regularizer +mutable struct MNLOrdinalReg<:Regularizer r::Regularizer end MNLOrdinalReg() = MNLOrdinalReg(ZeroReg()) prox(r::MNLOrdinalReg,u::AbstractArray,alpha::Number) = (uc = copy(u); prox!(r,uc,alpha)) function prox!(r::MNLOrdinalReg,u::AbstractArray,alpha::Number; TOL=1e-3) - um = mean(u[1:end-1, :], 2) + um = mean(u[1:end-1, :], dims=2) prox!(r.r,um,alpha) for i=1:size(u,1)-1 for j=1:size(u,2) @@ -405,12 +405,12 @@ function prox!(r::MNLOrdinalReg,u::AbstractArray,alpha::Number; TOL=1e-3) end evaluate(r::MNLOrdinalReg,a::AbstractArray) = evaluate(r.r,a[1:end-1,1]) scale(r::MNLOrdinalReg) = scale(r.r) -scale!(r::MNLOrdinalReg, newscale::Number) = scale!(r.r, newscale) +mul!(r::MNLOrdinalReg, newscale::Number) = mul!(r.r, newscale) # make sure we don't add two offsets cuz that's weird lastentry_unpenalized(r::MNLOrdinalReg) = r ## Quadratic regularization with non-zero mean -type RemQuadReg<:Regularizer +mutable struct RemQuadReg<:Regularizer scale::Float64 m::Array{Float64, 1} end @@ -419,7 +419,7 @@ prox(r::RemQuadReg, u::AbstractArray, alpha::Number) = (u + 2 * alpha * r.scale * r.m) / (1 + 2 * alpha * r.scale) prox!(r::RemQuadReg, u::Array{Float64}, alpha::Number) = begin broadcast!(.+, u, u, 2 * alpha * r.scale * r.m) - scale!(u, 1 / (1 + 2 * alpha * r.scale)) + mul!(u, 1 / (1 + 2 * alpha * r.scale)) end evaluate(r::RemQuadReg, a::AbstractArray) = r.scale * sum(abs2, a - r.m) diff --git a/src/rsvd.jl b/src/rsvd.jl index fcc5859..0e5e656 100644 --- a/src/rsvd.jl +++ b/src/rsvd.jl @@ -1,6 +1,6 @@ #### randomized SVD (from Jiahao Chen, based on http://arxiv.org/pdf/0909.4061.pdf) -import Base.LinAlg.SVD +import LinearAlgebra: SVD #The simplest possible randomized svd #Inputs diff --git a/src/sample.jl b/src/sample.jl index 9ef3bd2..89e7820 100644 --- a/src/sample.jl +++ b/src/sample.jl @@ -21,7 +21,7 @@ # DataTypes are assigned to each column of the data and are not part of the low-rank model itself, they just serve # as a way to evaluate the performance of the low-rank model. -import StatsBase: sample, WeightVec +import StatsBase: sample, Weights export sample, sample_missing ########################################## REALS ########################################## @@ -42,7 +42,7 @@ end # This is fast and works for any loss. function sample(D::BoolDomain, l::Loss, u::AbstractArray) prob = exp.(-[evaluate(l, u, i) for i in (true, false)]) - return sample(WeightVec(prob)) + return sample(Weights(prob)) end ########################################## ORDINALS ########################################## @@ -59,14 +59,14 @@ end # generic method function sample(D::OrdinalDomain, l::Loss, u::AbstractArray) prob = exp.(-[evaluate(l, u, i) for i in D.min:D.max]) - return sample(WeightVec(prob)) + return sample(Weights(prob)) end ########################################## CATEGORICALS ########################################## # Categorical data should take integer values ranging from 1 to `max` function sample(D::CategoricalDomain, l::MultinomialLoss, u::Array{Float64}) - return sample(WeightVec(exp.(u))) + return sample(Weights(exp.(u))) end # sample(D::CategoricalDomain, l::OvALoss, u::Array{Float64}) = ?? @@ -74,7 +74,7 @@ end # generic method function sample(D::CategoricalDomain, l::Loss, u::AbstractArray) prob = exp.(-[evaluate(l, u, i) for i in D.min:D.max]) - return sample(WeightVec(prob)) + return sample(Weights(prob)) end ########################################## PERIODIC ########################################## @@ -91,10 +91,10 @@ sample(D::CountDomain, l::Loss, u::Float64) = sample(OrdinalDomain(0,D.max_count #################################################################################### # Use impute and error_metric over arrays -function sample{DomainSubtype<:Domain,LossSubtype<:Loss}( +function sample( domains::Array{DomainSubtype,1}, losses::Array{LossSubtype,1}, - U::Array{Float64,2}) # U = X'*Y + U::Array{Float64,2}) where {DomainSubtype<:Domain,LossSubtype<:Loss} m, d = size(U) n = length(losses) yidxs = get_yidxs(losses) @@ -134,7 +134,7 @@ function sample(glrm::GLRM, do_sample::Function=all_entries, is_dense::Bool=true # even if all data for some real loss take integer values for j=1:n if isa(domains[j], RealDomain) && isa(glrm.A[j], DataArray{Int64,1}) - domains[j] = OrdinalDomain(minimum(dropna(glrm.A[j])), maximum(dropna(glrm.A[j]))) + domains[j] = OrdinalDomain(minimum(dropmissing(glrm.A[j])), maximum(dropmissing(glrm.A[j]))) end end A_sampled = copy(glrm.A); @@ -151,7 +151,7 @@ function sample(glrm::GLRM, do_sample::Function=all_entries, is_dense::Bool=true return A_sampled end -function sample{LossSubtype<:Loss}(losses::Array{LossSubtype,1}, U::Array{Float64,2}) +function sample(losses::Array{LossSubtype,1}, U::Array{Float64,2}) where LossSubtype<:Loss domains = Domain[domain(l) for l in losses] sample(domains, losses, U) end diff --git a/src/scikitlearn.jl b/src/scikitlearn.jl index a9e6fbb..f60f412 100644 --- a/src/scikitlearn.jl +++ b/src/scikitlearn.jl @@ -13,7 +13,7 @@ export SkGLRM, PCA, QPCA, NNMF, KMeans, RPCA # max_iter are hyperparameters and need to be visible/changeable by # set_params for grid-search. # There are other ways of setting it up, but this seems like the simplest. -type SkGLRM <: ScikitLearnBase.BaseEstimator +mutable struct SkGLRM <: ScikitLearnBase.BaseEstimator # Hyperparameters: those will be passed to GLRM, so it doesn't matter if # they're not typed. fit_params # if fit_params != nothing, it has priority over abs_tol, etc. @@ -51,17 +51,22 @@ function do_fit!(skglrm::SkGLRM, glrm::GLRM) fit!(glrm, fit_params; verbose=skglrm.verbose) end +function ind2sub(a, i) + i2s[i] +end + function build_glrm(skglrm::SkGLRM, X, missing_values) k = skglrm.k == -1 ? size(X, 2) : skglrm.k - obs = [ind2sub(missing_values, x) for x in find(.!missing_values)] + i2s = CartesianIndices(missing_values) + obs = [i2s[x] for x in (LinearIndices(.!missing_values))[findall(.!missing_values)] ] rx, ry = skglrm.rx, skglrm.ry if skglrm.rx_scale !== nothing rx = copy(rx) - scale!(rx, skglrm.rx_scale) + mul!(rx, skglrm.rx_scale) end if skglrm.ry_scale !== nothing ry = copy(ry) - scale!(ry, skglrm.ry_scale) + mul!(ry, skglrm.ry_scale) end GLRM(X, skglrm.loss, rx, ry, k; obs=obs) end @@ -105,7 +110,7 @@ ScikitLearnBase.inverse_transform(skglrm::SkGLRM, X) = X * skglrm.glrm.Y function ScikitLearnBase.predict(km::SkGLRM, X) X2 = ScikitLearnBase.transform(km, X) # This performs the "argmax" over the columns to get the cluster # - return mapslices(indmax, X2, 2)[:] + return mapslices(argmax, X2, 2)[:] end ################################################################################ diff --git a/src/shareglrm.jl b/src/shareglrm.jl index f9f88b6..0fa255f 100644 --- a/src/shareglrm.jl +++ b/src/shareglrm.jl @@ -1,12 +1,12 @@ -import Base: size, axpy! -import Base.LinAlg: scale! -import Base.BLAS: gemm! -import Base: shmem_rand, shmem_randn + +import LinearAlgebra: size, axpy! +import LinearAlgebra.BLAS: gemm! +#import Base: shmem_rand, shmem_randn export ShareGLRM, share ### GLRM TYPE -type ShareGLRM{L<:Loss, R<:Regularizer}<:AbstractGLRM +mutable struct ShareGLRM{L<:Loss, R<:Regularizer}<:AbstractGLRM A::SharedArray # The data table transformed into a coded array losses::Array{L,1} # array of loss functions rx::Regularizer # The regularization to be applied to each row of Xᵀ (column of X) diff --git a/src/simple_glrms.jl b/src/simple_glrms.jl index dfba068..91e7648 100644 --- a/src/simple_glrms.jl +++ b/src/simple_glrms.jl @@ -29,7 +29,7 @@ end function kmeans(A::AbstractArray, k::Int; kwargs...) loss = QuadLoss() ry = ZeroReg() - rx = UnitOneSparseConstraint() + rx = UnitOneSparseConstraint() return GLRM(A,loss,rx,ry,k; kwargs...) end @@ -39,4 +39,4 @@ function rpca(A::AbstractArray, k::Int; scale=1.0::Float64, kwargs...) loss = HuberLoss() r = QuadReg(scale) return GLRM(A,loss,r,r,k; kwargs...) -end \ No newline at end of file +end diff --git a/src/utilities/conveniencemethods.jl b/src/utilities/conveniencemethods.jl index 2a281ea..38e7e51 100644 --- a/src/utilities/conveniencemethods.jl +++ b/src/utilities/conveniencemethods.jl @@ -7,7 +7,7 @@ export copy, copy_estimate, GLRM for T in :[Loss, Regularizer, AbstractGLRM].args @eval function copy(r::$T) - fieldvals = [getfield(r, f) for f in fieldnames(r)] + fieldvals = [getfield(r, f) for f in fieldnames(typeof(r))] return typeof(r)(fieldvals...) end end @@ -18,7 +18,7 @@ function copy_estimate(g::GLRM) g.observed_features,g.observed_examples, copy(g.X),copy(g.Y)) end -# domains are immutable, so this is ok +# domains are struct, so this is ok copy(d::Domain) = d ############################################################## diff --git a/src/utilities/deprecated.jl b/src/utilities/deprecated.jl index a411687..a7eb735 100644 --- a/src/utilities/deprecated.jl +++ b/src/utilities/deprecated.jl @@ -1,6 +1,6 @@ -using Base.depwarn +using Base: depwarn -@compat Base.@deprecate GLRM(A::AbstractArray, obs::Array{Tuple{Int, Int}, 1}, args...; kwargs...) GLRM(A, args...; obs = obs, kwargs...) +Base.@deprecate GLRM(A::AbstractArray, obs::Array{Tuple{Int, Int}, 1}, args...; kwargs...) GLRM(A, args...; obs = obs, kwargs...) Base.@deprecate ProxGradParams(s::Number,m::Int,c::Float64,ms::Float64) ProxGradParams(s, max_iter=m, abs_tol=c, min_stepsize=ms) diff --git a/test/basic_functionality.jl b/test/basic_functionality.jl index e00b448..2bec12c 100644 --- a/test/basic_functionality.jl +++ b/test/basic_functionality.jl @@ -1,6 +1,6 @@ using LowRankModels # tests basic functionality of glrm.jl -srand(1); +Random.seed!(1); m,n,k,s = 100,100,5,100*100; # matrix to encode X_real, Y_real = randn(m,k), randn(k,n); @@ -12,7 +12,7 @@ glrm = GLRM(A,losses,rx,ry,5, scale=false, offset=false, X=randn(k,m), Y=randn(k p = Params(1, max_iter=200, abs_tol=0.0000001, min_stepsize=0.001) @time X,Y,ch = fit!(glrm, params=p); Ah = X'*Y; -p.abs_tol > abs(vecnorm(A-Ah)^2 - ch.objective[end]) +p.abs_tol > abs(norm(A-Ah)^2 - ch.objective[end]) function validate_folds(trf,tre,tsf,tse) for i=1:length(trf) diff --git a/test/err_test.jl b/test/err_test.jl index 0a8b77d..9b0fb3b 100644 --- a/test/err_test.jl +++ b/test/err_test.jl @@ -1,5 +1,5 @@ using LowRankModels -srand(1); +Random.seed!(1); test_losses = Loss[ QuadLoss(), diff --git a/test/hello_world.jl b/test/hello_world.jl index 4ff2c96..05f1477 100644 --- a/test/hello_world.jl +++ b/test/hello_world.jl @@ -1,23 +1,23 @@ using LowRankModels -# loss types to test -real_loss_types = [QuadLoss, HuberLoss] -bool_loss_types = [HingeLoss] -ordinal_loss_types = [OrdinalHingeLoss, BvSLoss] -categorical_loss_types = [MultinomialLoss, OvALoss] +# loss mutable structs to test +real_loss_mutable structs = [QuadLoss, HuberLoss] +bool_loss_mutable structs = [HingeLoss] +ordinal_loss_mutable structs = [OrdinalHingeLoss, BvSLoss] +categorical_loss_mutable structs = [MultinomialLoss, OvALoss] #instantiate losses ncat = 4 # maximum categorical levels nord = 5 # maximum ordinal levels -real_losses = [l() for l in real_loss_types] -bool_losses = [l() for l in bool_loss_types] -ordinal_losses = [l(rand(3:nord)) for l in ordinal_loss_types] -categorical_losses = [l(rand(3:ncat)) for l in categorical_loss_types] +real_losses = [l() for l in real_loss_mutable structs] +bool_losses = [l() for l in bool_loss_mutable structs] +ordinal_losses = [l(rand(3:nord)) for l in ordinal_loss_mutable structs] +categorical_losses = [l(rand(3:ncat)) for l in categorical_loss_mutable structs] losses = [real_losses..., bool_losses..., ordinal_losses..., categorical_losses...] # scale losses for different columns for loss in losses - scale!(loss, rand()) + mul!(loss, rand()) end # regularizers to test diff --git a/test/init_test.jl b/test/init_test.jl index 837bd80..e53d79a 100644 --- a/test/init_test.jl +++ b/test/init_test.jl @@ -1,4 +1,4 @@ -using Base.Test +using Test using LowRankModels ## Tests for NNDSVD initialization diff --git a/test/loss_test.jl b/test/loss_test.jl index feb2f7a..03b3d18 100644 --- a/test/loss_test.jl +++ b/test/loss_test.jl @@ -1,7 +1,7 @@ using LowRankModels # test losses in losses.jl -srand(1); +Random.seed!(1); losses = [ QuadLoss(), diff --git a/test/mult_reg.jl b/test/mult_reg.jl index 3abc649..5159c2b 100644 --- a/test/mult_reg.jl +++ b/test/mult_reg.jl @@ -1,5 +1,5 @@ using LowRankModels -using Base.Test +using Test # Parameters n = 200 @@ -9,7 +9,7 @@ eta = 0.01 delta = 1e-3 # Generate problem -srand(1) +Random.seed!(1) Um = randn(r, n) Vm = randn(r, m) U = Um .+ sqrt(eta) * randn(r, n) diff --git a/test/prob_tests/BvSLoss.jl b/test/prob_tests/BvSLoss.jl index 40caedb..f38e902 100644 --- a/test/prob_tests/BvSLoss.jl +++ b/test/prob_tests/BvSLoss.jl @@ -1,10 +1,11 @@ -using LowRankModels +using LowRankModels, Random import StatsBase: sample, Weights +import LinearAlgebra: norm # test ordistic loss ## generate data -srand(1); +Random.seed!(1); m,n,k = 100,100,3; kfit = k+1 nlevels = 5; # number of levels @@ -20,7 +21,7 @@ for j=1:n # this scheme doesn't work to ensure uniform sampling T_real[:,j] = sort(T_real[:,j]) end -signedsums = @compat Array{Float64}(d, nlevels) +signedsums = Array{Float64}(undef, d, nlevels) for i=1:d for j=1:nlevels signedsums[i,j] = i