diff --git a/.travis.yml b/.travis.yml
index 59667b8..c882d67 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -3,13 +3,15 @@ os:
   - osx
   - linux
 julia:
-  - 0.6
+  - 1.0
   - nightly
 notifications:
   email: false
 matrix:
   allow_failures:
     - julia: nightly
-script:
-  - if [[ -a .git/shallow ]]; then git fetch --unshallow; fi
-  - julia -e 'Pkg.clone(pwd()); Pkg.build("LowRankModels"); Pkg.test("LowRankModels"; coverage=true)';
+after_success:
+  # push coverage results to Coveralls
+  - julia -e 'import Pkg; Pkg.add("Coverage"); using Coverage; Coveralls.submit(Coveralls.process_folder())'
+  # push coverage results to Codecov
+  - julia -e 'import Pkg; Pkg.add("Coverage"); using Coverage; Codecov.submit(Codecov.process_folder())'
diff --git a/Manifest.toml b/Manifest.toml
new file mode 100644
index 0000000..4f6670d
--- /dev/null
+++ b/Manifest.toml
@@ -0,0 +1,387 @@
+# This file is machine-generated - editing it directly is not advised
+
+[[Arpack]]
+deps = ["BinaryProvider", "Libdl", "LinearAlgebra"]
+git-tree-sha1 = "07a2c077bdd4b6d23a40342a8a108e2ee5e58ab6"
+uuid = "7d9fca2a-8960-54d3-9f78-7d1dccf2cb97"
+version = "0.3.1"
+
+[[Base64]]
+uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"
+
+[[BinDeps]]
+deps = ["Compat", "Libdl", "SHA", "URIParser"]
+git-tree-sha1 = "12093ca6cdd0ee547c39b1870e0c9c3f154d9ca9"
+uuid = "9e28174c-4ba2-5203-b857-d8d62c4213ee"
+version = "0.8.10"
+
+[[BinaryProvider]]
+deps = ["Libdl", "Logging", "SHA"]
+git-tree-sha1 = "c7361ce8a2129f20b0e05a89f7070820cfed6648"
+uuid = "b99e7846-7c00-51b0-8f62-c81ae34c0232"
+version = "0.5.6"
+
+[[CSV]]
+deps = ["CategoricalArrays", "DataFrames", "Dates", "Mmap", "Parsers", "PooledArrays", "Profile", "Tables", "Unicode", "WeakRefStrings"]
+git-tree-sha1 = "a7df9250dff3aba96436580dd6ac00d712364cab"
+uuid = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
+version = "0.5.9"
+
+[[Calculus]]
+deps = ["Compat"]
+git-tree-sha1 = "f60954495a7afcee4136f78d1d60350abd37a409"
+uuid = "49dc2e85-a5d0-5ad3-a950-438e2897f1b9"
+version = "0.4.1"
+
+[[CategoricalArrays]]
+deps = ["Compat", "Future", "JSON", "Missings", "Printf", "Reexport"]
+git-tree-sha1 = "26601961df6afacdd16d67c1eec6cfe75e5ae9ab"
+uuid = "324d7699-5711-5eae-9e2f-1d82baa6b597"
+version = "0.5.4"
+
+[[CodecZlib]]
+deps = ["BinaryProvider", "Libdl", "Test", "TranscodingStreams"]
+git-tree-sha1 = "36bbf5374c661054d41410dc53ff752972583b9b"
+uuid = "944b1d66-785c-5afd-91f1-9de20f533193"
+version = "0.5.2"
+
+[[CommonSubexpressions]]
+deps = ["Test"]
+git-tree-sha1 = "efdaf19ab11c7889334ca247ff4c9f7c322817b0"
+uuid = "bbf7d656-a473-5ed7-a52c-81e309532950"
+version = "0.2.0"
+
+[[Compat]]
+deps = ["Base64", "Dates", "DelimitedFiles", "Distributed", "InteractiveUtils", "LibGit2", "Libdl", "LinearAlgebra", "Markdown", "Mmap", "Pkg", "Printf", "REPL", "Random", "Serialization", "SharedArrays", "Sockets", "SparseArrays", "Statistics", "Test", "UUIDs", "Unicode"]
+git-tree-sha1 = "84aa74986c5b9b898b0d1acaf3258741ee64754f"
+uuid = "34da2185-b29b-5c13-b0c7-acf172513d20"
+version = "2.1.0"
+
+[[DataFrames]]
+deps = ["CategoricalArrays", "Compat", "InvertedIndices", "IteratorInterfaceExtensions", "Missings", "PooledArrays", "Printf", "REPL", "Reexport", "SortingAlgorithms", "Statistics", "StatsBase", "TableTraits", "Tables", "Unicode"]
+git-tree-sha1 = "def266a7e5eb6f633ef4c72633d2a328b9450052"
+uuid = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
+version = "0.19.0"
+
+[[DataStructures]]
+deps = ["InteractiveUtils", "OrderedCollections"]
+git-tree-sha1 = "0809951a1774dc724da22d26e4289bbaab77809a"
+uuid = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
+version = "0.17.0"
+
+[[DataValueInterfaces]]
+git-tree-sha1 = "bfc1187b79289637fa0ef6d4436ebdfe6905cbd6"
+uuid = "e2d170a0-9d28-54be-80f0-106bbe20a464"
+version = "1.0.0"
+
+[[Dates]]
+deps = ["Printf"]
+uuid = "ade2ca70-3891-5945-98fb-dc099432e06a"
+
+[[DelimitedFiles]]
+deps = ["Mmap"]
+uuid = "8bb1440f-4735-579b-a4ab-409b98df4dab"
+
+[[DiffEqDiffTools]]
+deps = ["LinearAlgebra", "SparseArrays", "StaticArrays"]
+git-tree-sha1 = "b992345a39b4d9681342ae795a8dacc100730182"
+uuid = "01453d9d-ee7c-5054-8395-0335cb756afa"
+version = "0.14.0"
+
+[[DiffResults]]
+deps = ["Compat", "StaticArrays"]
+git-tree-sha1 = "34a4a1e8be7bc99bc9c611b895b5baf37a80584c"
+uuid = "163ba53b-c6d8-5494-b064-1a9d43ac40c5"
+version = "0.0.4"
+
+[[DiffRules]]
+deps = ["Random", "Test"]
+git-tree-sha1 = "dc0869fb2f5b23466b32ea799bd82c76480167f7"
+uuid = "b552c78f-8df3-52c6-915a-8e097449b14b"
+version = "0.0.10"
+
+[[Distributed]]
+deps = ["Random", "Serialization", "Sockets"]
+uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b"
+
+[[EzXML]]
+deps = ["BinaryProvider", "Libdl", "Printf"]
+git-tree-sha1 = "724e13b7522563a18ae4a5cc4a9792ae3b0da3e6"
+uuid = "8f5d6c58-4d21-5cfd-889c-e3ad7ee6a615"
+version = "0.9.3"
+
+[[FileIO]]
+deps = ["Pkg"]
+git-tree-sha1 = "351f001a78aa1b7ad2696e386e110b5abd071c71"
+uuid = "5789e2e9-d7fb-5bc7-8068-2c6fae9b9549"
+version = "1.0.7"
+
+[[FillArrays]]
+deps = ["LinearAlgebra", "Random", "SparseArrays", "Test"]
+git-tree-sha1 = "9ab8f76758cbabba8d7f103c51dce7f73fcf8e92"
+uuid = "1a297f60-69ca-5386-bcde-b61e274b549b"
+version = "0.6.3"
+
+[[ForwardDiff]]
+deps = ["CommonSubexpressions", "DiffResults", "DiffRules", "InteractiveUtils", "LinearAlgebra", "NaNMath", "Random", "SparseArrays", "SpecialFunctions", "StaticArrays", "Test"]
+git-tree-sha1 = "4c4d727f1b7e0092134fabfab6396b8945c1ea5b"
+uuid = "f6369f11-7733-5829-9624-2563aa707210"
+version = "0.10.3"
+
+[[Future]]
+deps = ["Random"]
+uuid = "9fa8497b-333b-5362-9e8d-4d0656e87820"
+
+[[InteractiveUtils]]
+deps = ["Markdown"]
+uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
+
+[[InvertedIndices]]
+deps = ["Test"]
+git-tree-sha1 = "15732c475062348b0165684ffe28e85ea8396afc"
+uuid = "41ab1584-1d38-5bbf-9106-f11c6c58b48f"
+version = "1.0.0"
+
+[[IteratorInterfaceExtensions]]
+git-tree-sha1 = "a3f24677c21f5bbe9d2a714f95dcd58337fb2856"
+uuid = "82899510-4779-5014-852e-03e436cf321d"
+version = "1.0.0"
+
+[[JSON]]
+deps = ["Dates", "Mmap", "Parsers", "Unicode"]
+git-tree-sha1 = "b34d7cef7b337321e97d22242c3c2b91f476748e"
+uuid = "682c06a0-de6a-54ab-a142-c8b1cf79cde6"
+version = "0.21.0"
+
+[[LibGit2]]
+uuid = "76f85450-5226-5b5a-8eaa-529ad045b433"
+
+[[Libdl]]
+uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
+
+[[LineSearches]]
+deps = ["LinearAlgebra", "NLSolversBase", "NaNMath", "Parameters", "Printf", "Test"]
+git-tree-sha1 = "54eb90e8dbe745d617c78dee1d6ae95c7f6f5779"
+uuid = "d3d80556-e9d4-5f37-9878-2ab0fcc64255"
+version = "7.0.1"
+
+[[LinearAlgebra]]
+deps = ["Libdl"]
+uuid = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
+
+[[Logging]]
+uuid = "56ddb016-857b-54e1-b83d-db4d58db5568"
+
+[[Markdown]]
+deps = ["Base64"]
+uuid = "d6f4376e-aef5-505a-96c1-9c027394607a"
+
+[[Missings]]
+deps = ["SparseArrays", "Test"]
+git-tree-sha1 = "f0719736664b4358aa9ec173077d4285775f8007"
+uuid = "e1d29d7a-bbdc-5cf2-9ac0-f12de2c33e28"
+version = "0.4.1"
+
+[[Mmap]]
+uuid = "a63ad114-7e13-5084-954f-fe012c677804"
+
+[[Mocking]]
+deps = ["Compat", "Dates"]
+git-tree-sha1 = "4bf69aaf823b119b034e091e16b18311aa191663"
+uuid = "78c3b35d-d492-501b-9361-3d52fe80e533"
+version = "0.5.7"
+
+[[NLSolversBase]]
+deps = ["Calculus", "DiffEqDiffTools", "DiffResults", "Distributed", "ForwardDiff", "LinearAlgebra", "Random", "SparseArrays", "Test"]
+git-tree-sha1 = "0c6f0e7f2178f78239cfb75310359eed10f2cacb"
+uuid = "d41bc354-129a-5804-8e4c-c37616107c6c"
+version = "7.3.1"
+
+[[NMF]]
+deps = ["LinearAlgebra", "Printf", "Random", "Statistics", "StatsBase", "Test"]
+git-tree-sha1 = "7292dbfbc5501f9a2ae3cd748551c5e500de5f0f"
+uuid = "6ef6ca0d-6ad7-5ff6-b225-e928bfa0a386"
+version = "0.4.0"
+
+[[NaNMath]]
+deps = ["Compat"]
+git-tree-sha1 = "ce3b85e484a5d4c71dd5316215069311135fa9f2"
+uuid = "77ba4419-2d1f-58cd-9bb1-8ffee604a2e3"
+version = "0.3.2"
+
+[[Optim]]
+deps = ["Calculus", "DiffEqDiffTools", "FillArrays", "ForwardDiff", "LineSearches", "LinearAlgebra", "NLSolversBase", "NaNMath", "Parameters", "PositiveFactorizations", "Printf", "Random", "SparseArrays", "StatsBase"]
+git-tree-sha1 = "05efd63b639afdd3425909cd3af73fe9c1373cf3"
+uuid = "429524aa-4258-5aef-a3af-852621145aeb"
+version = "0.19.1"
+
+[[OrderedCollections]]
+deps = ["Random", "Serialization", "Test"]
+git-tree-sha1 = "c4c13474d23c60d20a67b217f1d7f22a40edf8f1"
+uuid = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
+version = "1.1.0"
+
+[[Parameters]]
+deps = ["Markdown", "OrderedCollections", "REPL", "Test"]
+git-tree-sha1 = "70bdbfb2bceabb15345c0b54be4544813b3444e4"
+uuid = "d96e819e-fc66-5662-9728-84c9c7592b0a"
+version = "0.10.3"
+
+[[Parsers]]
+deps = ["Dates", "Test"]
+git-tree-sha1 = "db2b35dedab3c0e46dc15996d170af07a5ab91c9"
+uuid = "69de0a69-1ddd-5017-9359-2bf0b02dc9f0"
+version = "0.3.6"
+
+[[Pkg]]
+deps = ["Dates", "LibGit2", "Markdown", "Printf", "REPL", "Random", "SHA", "UUIDs"]
+uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
+
+[[PooledArrays]]
+git-tree-sha1 = "6e8c38927cb6e9ae144f7277c753714861b27d14"
+uuid = "2dfb63ee-cc39-5dd5-95bd-886bf059d720"
+version = "0.5.2"
+
+[[PositiveFactorizations]]
+deps = ["LinearAlgebra", "Test"]
+git-tree-sha1 = "957c3dd7c33895469760ce873082fbb6b3620641"
+uuid = "85a6dd25-e78a-55b7-8502-1745935b8125"
+version = "0.2.2"
+
+[[Printf]]
+deps = ["Unicode"]
+uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7"
+
+[[Profile]]
+deps = ["Printf"]
+uuid = "9abbd945-dff8-562f-b5e8-e1ebf5ef1b79"
+
+[[RData]]
+deps = ["CategoricalArrays", "CodecZlib", "DataFrames", "Dates", "FileIO", "TimeZones"]
+git-tree-sha1 = "3b0fc2f7df61b8890502851281c1eb0d2407d6ac"
+uuid = "df47a6cb-8c03-5eed-afd8-b6050d6c41da"
+version = "0.6.2"
+
+[[RDatasets]]
+deps = ["CSV", "CodecZlib", "DataFrames", "FileIO", "Printf", "RData", "Reexport", "Test"]
+git-tree-sha1 = "4d93a52b94397bf0a9477d04e7a151a688672695"
+uuid = "ce6b1742-4840-55fa-b093-852dadbb1d8b"
+version = "0.6.1"
+
+[[REPL]]
+deps = ["InteractiveUtils", "Markdown", "Sockets"]
+uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb"
+
+[[Random]]
+deps = ["Serialization"]
+uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
+
+[[Reexport]]
+deps = ["Pkg"]
+git-tree-sha1 = "7b1d07f411bc8ddb7977ec7f377b97b158514fe0"
+uuid = "189a3867-3050-52da-a836-e630ba90ab69"
+version = "0.2.0"
+
+[[Requires]]
+deps = ["Test"]
+git-tree-sha1 = "f6fbf4ba64d295e146e49e021207993b6b48c7d1"
+uuid = "ae029012-a4dd-5104-9daa-d747884805df"
+version = "0.5.2"
+
+[[SHA]]
+uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce"
+
+[[ScikitLearnBase]]
+deps = ["LinearAlgebra", "Random", "Statistics"]
+git-tree-sha1 = "7877e55c1523a4b336b433da39c8e8c08d2f221f"
+uuid = "6e75b9c4-186b-50bd-896f-2d2496a4843e"
+version = "0.5.0"
+
+[[Serialization]]
+uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
+
+[[SharedArrays]]
+deps = ["Distributed", "Mmap", "Random", "Serialization"]
+uuid = "1a1011a3-84de-559e-8e89-a11a2f7dc383"
+
+[[Sockets]]
+uuid = "6462fe0b-24de-5631-8697-dd941f90decc"
+
+[[SortingAlgorithms]]
+deps = ["DataStructures", "Random", "Test"]
+git-tree-sha1 = "03f5898c9959f8115e30bc7226ada7d0df554ddd"
+uuid = "a2af1166-a08f-5f64-846c-94a0d3cef48c"
+version = "0.3.1"
+
+[[SparseArrays]]
+deps = ["LinearAlgebra", "Random"]
+uuid = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
+
+[[SpecialFunctions]]
+deps = ["BinDeps", "BinaryProvider", "Libdl", "Test"]
+git-tree-sha1 = "0b45dc2e45ed77f445617b99ff2adf0f5b0f23ea"
+uuid = "276daf66-3868-5448-9aa4-cd146d93841b"
+version = "0.7.2"
+
+[[StaticArrays]]
+deps = ["LinearAlgebra", "Random", "Statistics"]
+git-tree-sha1 = "db23bbf50064c582b6f2b9b043c8e7e98ea8c0c6"
+uuid = "90137ffa-7385-5640-81b9-e52037218182"
+version = "0.11.0"
+
+[[Statistics]]
+deps = ["LinearAlgebra", "SparseArrays"]
+uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
+
+[[StatsBase]]
+deps = ["DataStructures", "LinearAlgebra", "Missings", "Printf", "Random", "SortingAlgorithms", "SparseArrays", "Statistics"]
+git-tree-sha1 = "2b6ca97be7ddfad5d9f16a13fe277d29f3d11c23"
+uuid = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
+version = "0.31.0"
+
+[[TableTraits]]
+deps = ["IteratorInterfaceExtensions"]
+git-tree-sha1 = "b1ad568ba658d8cbb3b892ed5380a6f3e781a81e"
+uuid = "3783bdb8-4a98-5b6b-af9a-565f29a5fe9c"
+version = "1.0.0"
+
+[[Tables]]
+deps = ["DataValueInterfaces", "IteratorInterfaceExtensions", "LinearAlgebra", "Requires", "TableTraits", "Test"]
+git-tree-sha1 = "2e5d1a0d9b574ee2ed0c1a2fe32807de022376dd"
+uuid = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"
+version = "0.2.9"
+
+[[Test]]
+deps = ["Distributed", "InteractiveUtils", "Logging", "Random"]
+uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+
+[[TimeZones]]
+deps = ["Dates", "EzXML", "Mocking", "Printf", "Serialization", "Unicode"]
+git-tree-sha1 = "859bfc1832ea52e413c96fa5c92130516db62bdb"
+uuid = "f269a46b-ccf7-5d73-abea-4c690281aa53"
+version = "0.9.1"
+
+[[TranscodingStreams]]
+deps = ["Random", "Test"]
+git-tree-sha1 = "a25d8e5a28c3b1b06d3859f30757d43106791919"
+uuid = "3bb67fe8-82b1-5028-8e26-92a6c54297fa"
+version = "0.9.4"
+
+[[URIParser]]
+deps = ["Test", "Unicode"]
+git-tree-sha1 = "6ddf8244220dfda2f17539fa8c9de20d6c575b69"
+uuid = "30578b45-9adc-5946-b283-645ec420af67"
+version = "0.4.0"
+
+[[UUIDs]]
+deps = ["Random", "SHA"]
+uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
+
+[[Unicode]]
+uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5"
+
+[[WeakRefStrings]]
+deps = ["Random", "Test"]
+git-tree-sha1 = "9a0bb82eede528debe631b642eeb48a631a69bc2"
+uuid = "ea10d353-3f73-51f8-a26c-33c1cb351aa5"
+version = "0.6.1"
diff --git a/Project.toml b/Project.toml
new file mode 100644
index 0000000..7095f74
--- /dev/null
+++ b/Project.toml
@@ -0,0 +1,24 @@
+name = "LowRankModels"
+uuid = "15d4e49f-4837-5ea3-a885-5b28bfa376dc"
+keywords = ["matrix factorization", "missing data", "imputation"]
+license = "MIT"
+version = "1.0.0"
+
+[deps]
+Arpack = "7d9fca2a-8960-54d3-9f78-7d1dccf2cb97"
+DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
+LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
+NMF = "6ef6ca0d-6ad7-5ff6-b225-e928bfa0a386"
+Optim = "429524aa-4258-5aef-a3af-852621145aeb"
+Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
+Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
+ScikitLearnBase = "6e75b9c4-186b-50bd-896f-2d2496a4843e"
+SharedArrays = "1a1011a3-84de-559e-8e89-a11a2f7dc383"
+SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
+StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
+
+[extras]
+RDatasets = "ce6b1742-4840-55fa-b093-852dadbb1d8b"
+
+[targets]
+test = ["RDatasets"]
diff --git a/README.md b/README.md
index c9340c4..da7907d 100644
--- a/README.md
+++ b/README.md
@@ -19,7 +19,7 @@ to construct a model suitable for a particular data set.
 In particular, it supports
 
 * using different loss functions for different columns of the data array,
-  which is useful when data types are heterogeneous
+  which is useful when data mutable structs are heterogeneous
   (eg, real, boolean, and ordinal columns);
 * fitting the model to only *some* of the entries in the table, which is useful for data tables with many missing (unobserved) entries; and
 * adding offsets and scalings to the model without destroying sparsity,
@@ -47,7 +47,7 @@ The data is modeled as `X'*Y`, where `X` is a `k`x`m` matrix and `Y` is a `k`x`n
 
     minimize sum_{(i,j) in obs} losses[j]((X'*Y)[i,j], A[i,j]) + sum_i rx(X[:,i]) + sum_j ry(Y[:,j])
 
-The basic type used by LowRankModels.jl is the GLRM. To form a GLRM,
+The basic mutable struct used by LowRankModels.jl is the GLRM. To form a GLRM,
 the user specifies
 
 * the data `A` (any `AbstractArray`, such as an array, a sparse matrix, or a data frame)
@@ -68,7 +68,7 @@ see the discussion of [sparse matrices](https://github.com/madeleineudell/LowRan
 `X₀` and `Y₀` are initialization
 matrices that represent a starting guess for the optimization.
 
-Losses and regularizers must be of type `Loss` and `Regularizer`, respectively,
+Losses and regularizers must be of mutable struct `Loss` and `Regularizer`, respectively,
 and may be chosen from a list of supported losses and regularizers, which include
 
 Losses:
@@ -99,7 +99,7 @@ Regularizers:
 
 Each of these losses and regularizers can be scaled
 (for example, to increase the importance of the loss relative to the regularizer)
-by calling `scale!(loss, newscale)`.
+by calling `mul!(loss, newscale)`.
 Users may also implement their own losses and regularizers,
 or adjust internal parameters of the losses and regularizers;
 see [losses.jl](https://github.com/madeleineudell/LowRankModels.jl/blob/src/losses.jl) and [regularizers.jl](https://github.com/madeleineudell/LowRankModels.jl/blob/master/src/regularizers.jl) for more details.
@@ -151,7 +151,7 @@ Then initialize the model using
     GLRM(A,losses,rx,ry,k, obs=obs)
 
 If `A` is a DataFrame and you just want the model to ignore
-any entry that is of type `NA`, you can use
+any entry that is of mutable struct `NA`, you can use
 
     obs = observations(A)
 
@@ -219,7 +219,7 @@ and ordinal HingeLoss loss for integer columns,
 a small amount of QuadLoss regularization,
 and scaling and adding an offset to the model as described [here](#scaling).
 It returns the column labels for the columns it fit, along with the model.
-Right now, all other data types are ignored.
+Right now, all other data mutable structs are ignored.
 `NaN` values are treated as missing values (`NA`s) and ignored in the fit.
 
 The full call signature is
@@ -232,7 +232,7 @@ function GLRM(df::DataFrame, k::Int;
 You can modify the losses or regularizers, or turn off offsets or scaling,
 using these keyword arguments.
 
-Or to specify a map from data types to losses, define a new loss_map from datatypes to losses (like probabilistic_losses, below):
+Or to specify a map from data mutable structs to losses, define a new loss_map from datatypes to losses (like probabilistic_losses, below):
 ```
 probabilistic_losses = Dict{Symbol, Any}(
     :real        => QuadLoss,
@@ -413,7 +413,7 @@ These functions should help you choose adequate regularization for your model.
 
 ## Regularization paths
 
-* `regularization_path(glrm::GLRM; params=Params(), reg_params=logspace(2,-2,5), holdout_proportion=.1, verbose=true, ch::ConvergenceHistory=ConvergenceHistory("reg_path"))`: computes the train and test error for GLRMs varying the scaling of the regularization through any scaling factor in the array `reg_params`.
+* `regularization_path(glrm::GLRM; params=Params(), reg_params=exp10.(range(2,stop=-2,length=5)), holdout_proportion=.1, verbose=true, ch::ConvergenceHistory=ConvergenceHistory("reg_path"))`: computes the train and test error for GLRMs varying the scaling of the regularization through any scaling factor in the array `reg_params`.
 
 ## Utilities
 
diff --git a/REQUIRE b/REQUIRE
index 9b243f4..b3be9ef 100644
--- a/REQUIRE
+++ b/REQUIRE
@@ -1,10 +1,9 @@
-julia 0.6
+julia 1.0
+LinearAlgebra
+Random
 DataFrames
 StatsBase
 Optim
 NMF
-Compat
 ScikitLearnBase
-DataArrays
-FactCheck
-Missings
+Arpack
diff --git a/examples/censored.jl b/examples/censored.jl
index 1a233c3..96213e2 100644
--- a/examples/censored.jl
+++ b/examples/censored.jl
@@ -1,6 +1,6 @@
-using DataFrames, LowRankModels, Compat
+using LowRankModels, DataFrames, Random, StatsBase
 
-srand(0)
+Random.seed!(0)
 
 println("censored data example")
 # boolean example with only entries greater than threshold t observed
@@ -11,7 +11,7 @@ A = rand(m,ktrue)*rand(ktrue,n)
 B = round.(Int, ktrue*rand(m,n) .>= A) # Bernoulli samples with probability proportional to A
 losses = fill(QuadLoss(),n)
 r = QuadReg(.1)
-@compat obs = @compat Array{Tuple{Int,Int}}(0)
+obs = Array{Tuple{Int,Int}}(undef,0)
 for i=1:m
     for j=1:n
         if B[i,j] == 1
@@ -21,27 +21,27 @@ for i=1:m
 end
 (train_observed_features, train_observed_examples,
     test_observed_features,  test_observed_examples) =
-    get_train_and_test(obs, m, n, .2)
+        get_train_and_test(obs, m, n, .2)
 
 train_glrm = GLRM(B,losses,r,r,k, observed_features=train_observed_features, observed_examples=train_observed_examples,)
 test_glrm = GLRM(B,losses,r,r,k, observed_features=test_observed_features, observed_examples=test_observed_examples)
 
-function censored_regularization_path(train_glrm::GLRM, test_glrm::GLRM; params=Params(), reg_params=logspace(2,-2,5),
+function censored_regularization_path(train_glrm::GLRM, test_glrm::GLRM; params=Params(), reg_params=exp10.(range(2,stop=-2,length=5)),
                                          holdout_proportion=.1, verbose=true,
                                          ch::ConvergenceHistory=ConvergenceHistory("reg_path"))
     m,n = size(train_glrm.A)
     ntrain = sum(map(length, train_glrm.observed_features))
     ntest = sum(map(length, test_glrm.observed_features))
-    train_error = @compat Array{Float64}(length(reg_params))
-    test_error = @compat Array{Float64}(length(reg_params))
-    @compat solution = @compat Array{Tuple{Float64,Float64}}(length(reg_params))
-    train_time = @compat Array{Float64}(length(reg_params))
+    train_error = Array{Float64}(undef,length(reg_params))
+    test_error = Array{Float64}(undef,length(reg_params))
+    solution = Array{Tuple{Float64,Float64}}(undef, length(reg_params))
+    train_time = Array{Float64}(undef, length(reg_params))
     for iparam=1:length(reg_params)
         reg_param = reg_params[iparam]
         # evaluate train and test error
         if verbose println("fitting train GLRM for reg_param $reg_param") end
-        scale!(train_glrm.rx, reg_param)
-        scale!(train_glrm.ry, reg_param)
+        mul!(train_glrm.rx, reg_param)
+        mul!(train_glrm.ry, reg_param)
         train_glrm.X, train_glrm.Y = randn(train_glrm.k,m), randn(train_glrm.k,n)
         X, Y, ch = fit!(train_glrm; params=params, ch=ch, verbose=verbose)
         train_time[iparam] = ch.times[end]
@@ -56,10 +56,11 @@ function censored_regularization_path(train_glrm::GLRM, test_glrm::GLRM; params=
     return train_error, test_error, train_time, reg_params, solution
 end
 
-train_error, test_error, train_time, reg_params, solution =
+(train_error, test_error, train_time, reg_params, solution) =
     censored_regularization_path(train_glrm, test_glrm,
                                  params=ProxGradParams(1, max_iter=50, abs_tol=.001, min_stepsize=.1),
-                                 reg_params=logspace(2,-2,3))
+                                 reg_params=exp10.(range(2, stop=-2, length=3)) )
+
 df = DataFrame(train_error = train_error, test_error = test_error,
                    train_time = train_time, reg_param = reg_params, solution_1norm = [s[2] for s in solution])
 println(df)
diff --git a/examples/cross_validate.jl b/examples/cross_validate.jl
index 4ced365..d576aca 100644
--- a/examples/cross_validate.jl
+++ b/examples/cross_validate.jl
@@ -1,7 +1,7 @@
-using DataFrames, LowRankModels
+using DataFrames, LowRankModels, Random, SparseArrays
 
 println("cross validation example")
-srand(5)
+Random.seed!(5)
 
 do_cv = true
 do_cv_by_iter = true
@@ -31,7 +31,7 @@ if do_reg_path
     println("Computing regularization path")
     params = Params(1.0, max_iter=50, abs_tol=.00001, min_stepsize=.01)
     train_error, test_error, train_time, reg_params =
-    regularization_path(glrm, params=params, reg_params=logspace(2,-2,15))
+    regularization_path(glrm, params=params, reg_params=exp10.(range(2,stop=-2,length=15)))
     df = DataFrame(train_error = train_error, test_error = test_error,
                    train_time = train_time, reg_param = reg_params)
 	if do_plot
diff --git a/examples/fit_rdataset.jl b/examples/fit_rdataset.jl
index ce92501..6600a52 100644
--- a/examples/fit_rdataset.jl
+++ b/examples/fit_rdataset.jl
@@ -5,11 +5,11 @@ using LowRankModels
 # pick a data set
 df = RDatasets.dataset("psych", "msq")
 
-# make a GLRM on the whole dataframe using type imputation
+# make a GLRM on the whole dataframe using mutable struct imputation
 auto_glrm = GLRM(df, 3)
 
-# now we'll try it without type imputation
-# we'll just fit four of the columns, to try out all four data types
+# now we'll try it without mutable struct imputation
+# we'll just fit four of the columns, to try out all four data mutable structs
 dd = DataFrame([df[s] for s in [:TOD, :Scale, :Vigorous, :Wakeful]])
 dd[end] = (dd[end].==1)
 datatypes = [:real, :cat, :ord, :bool]
diff --git a/examples/precision_at_k.jl b/examples/precision_at_k.jl
index 3ac86be..7976228 100644
--- a/examples/precision_at_k.jl
+++ b/examples/precision_at_k.jl
@@ -1,4 +1,4 @@
-using DataFrames, LowRankModels, Compat
+using DataFrames, LowRankModels
 
 # boolean example with only entries greater than threshold t observed
 # ie, censored data
@@ -9,7 +9,7 @@ println("max value of A is ",maximum(maximum(A))," which is less than $ktrue")
 B = round.(Int, ktrue*rand(m,n) .>= A) # Bernoulli samples with probability proportional to A
 losses = fill(QuadLoss(),n)
 r = QuadReg(.1)
-obs = @compat Array{Tuple{Int,Int}}(0)
+obs = Array{Tuple{Int,Int}}(undef, 0)
 for i=1:m
     for j=1:n
         if B[i,j] == 1
@@ -24,8 +24,9 @@ end
 train_glrm = GLRM(B,losses,r,r,k, observed_features=train_observed_features, observed_examples=train_observed_examples)
 
 train_error, test_error, prec_at_k, train_time, reg_params, solution =
-    precision_at_k(train_glrm, test_observed_features, params=Params(1,200,.00001,.01),
-                                 reg_params=logspace(2,-2,9))
+    precision_at_k(train_glrm, test_observed_features,
+    					params=Params(1, max_iter=200, abs_tol=0.00001, min_stepsize=0.01),
+                                 reg_params=exp10.(range(2,step=-2,length=9)))
 df = DataFrame(train_error = train_error, prec_at_k = prec_at_k,
-                   train_time = train_time, reg_param = reg_params, solution_1norm = [s[2] for s in solution])
+                   train_time = train_time, reg_param = reg_params, solution_1norm = [s[2] for s in solution]);
 println(df)
diff --git a/examples/sharetest.jl b/examples/sharetest.jl
index 4a4c95c..87b2e6e 100644
--- a/examples/sharetest.jl
+++ b/examples/sharetest.jl
@@ -3,7 +3,7 @@ println("loading LowRankModels")
 
 function fit_pca(m,n,k)
 	# matrix to encode
-	srand(1)
+	Random.seed!(1)
 	A = randn(m,k)*randn(k,n)
 	X=randn(k,m)
 	Y=randn(k,n)
@@ -19,5 +19,5 @@ function fit_pca(m,n,k)
 	return A,X,Y,ch
 end
 
-@everywhere srand(1)
+@everywhere Random.seed!(1)
 fit_pca(100,100,50)	
\ No newline at end of file
diff --git a/examples/simple_glrms.jl b/examples/simple_glrms.jl
index 7ade066..e563ae6 100644
--- a/examples/simple_glrms.jl
+++ b/examples/simple_glrms.jl
@@ -100,7 +100,7 @@ function fit_soft_kmeans(m,n,k)
 end
 
 if true
-	srand(10)
+	Random.seed!(10)
 	fit_pca(100,100,2)
 	fit_pca_nucnorm(100,100,2)
 	fit_pca_nucnorm_sparse(500,500,2,10000)
diff --git a/src/LowRankModels.jl b/src/LowRankModels.jl
index c2cd908..edc0191 100644
--- a/src/LowRankModels.jl
+++ b/src/LowRankModels.jl
@@ -2,11 +2,14 @@ __precompile__()
 
 module LowRankModels
 
-using Compat
-using Missings
-
-import Base: scale!, show
-import StatsBase: fit!, mode
+using Printf
+using SharedArrays
+using SparseArrays
+using Random
+
+import LinearAlgebra: dot, norm, Diagonal, rmul!, mul!
+import Base: show
+import StatsBase: fit!, mode, mean, var, std
 
 # define losses, regularizers, convergence history
 include("domains.jl")
diff --git a/src/algorithms/proxgrad.jl b/src/algorithms/proxgrad.jl
index 626dfd3..98bf399 100644
--- a/src/algorithms/proxgrad.jl
+++ b/src/algorithms/proxgrad.jl
@@ -1,7 +1,7 @@
 ### Proximal gradient method
 export ProxGradParams, fit!
 
-type ProxGradParams<:AbstractParams
+mutable struct ProxGradParams<:AbstractParams
     stepsize::Float64 # initial stepsize
     max_iter::Int # maximum number of outer iterations
     inner_iter_X::Int # how many prox grad steps to take on X before moving on to Y (and vice versa)
@@ -42,7 +42,7 @@ function fit!(glrm::GLRM, params::ProxGradParams;
 	ry = glrm.ry
 	X = glrm.X; Y = glrm.Y
     # check that we didn't initialize to zero (otherwise we will never move)
-    if vecnorm(Y) == 0
+    if norm(Y) == 0
     	Y = .1*randn(k,d)
     end
 	k = glrm.k
@@ -62,7 +62,7 @@ function fit!(glrm::GLRM, params::ProxGradParams;
         glrm.Y = randn(glrm.k, d)
     end
 
-    XY = @compat Array{Float64}((m, d))
+    XY = Array{Float64}(undef, (m, d))
     gemm!('T','N',1.0,X,Y,0.0,XY) # XY = X' * Y initial calculation
 
     # step size (will be scaled below to ensure it never exceeds 1/\|g\|_2 or so for any subproblem)
@@ -116,7 +116,7 @@ function fit!(glrm::GLRM, params::ProxGradParams;
 
         for inneri=1:params.inner_iter_X
         for e=1:m # for every example x_e == ve[e]
-            scale!(g, 0) # reset gradient to 0
+            fill!(g, 0.) # reset gradient to 0
             # compute gradient of L with respect to Xᵢ as follows:
             # ∇{Xᵢ}L = Σⱼ dLⱼ(XᵢYⱼ)/dXᵢ
             for f in glrm.observed_features[e]
@@ -141,11 +141,11 @@ function fit!(glrm::GLRM, params::ProxGradParams;
                 ## prox step: Xᵢ = prox_rx(Xᵢ, α/l)
                 prox!(rx[e],newve[e],stepsize)
                 if row_objective(glrm, e, newve[e]) < obj_by_row[e]
-                    copy!(ve[e], newve[e])
+                    copyto!(ve[e], newve[e])
                     alpharow[e] *= 1.05
                     break
                 else # the stepsize was too big; undo and try again only smaller
-                    copy!(newve[e], ve[e])
+                    copyto!(newve[e], ve[e])
                     alpharow[e] *= .7
                     if alpharow[e] < params.min_stepsize
                         alpharow[e] = params.min_stepsize * 1.1
@@ -158,7 +158,7 @@ function fit!(glrm::GLRM, params::ProxGradParams;
         end # inner iteration
 # STEP 2: Y update
         for inneri=1:params.inner_iter_Y
-        scale!(G, 0)
+        fill!(G, 0.)
         for f=1:n
             # compute gradient of L with respect to Yⱼ as follows:
             # ∇{Yⱼ}L = Σⱼ dLⱼ(XᵢYⱼ)/dYⱼ
@@ -185,12 +185,12 @@ function fit!(glrm::GLRM, params::ProxGradParams;
                 prox!(ry[f],newvf[f],stepsize)
                 new_obj_by_col = col_objective(glrm, f, newvf[f])
                 if new_obj_by_col < obj_by_col[f]
-                    copy!(vf[f], newvf[f])
+                    copyto!(vf[f], newvf[f])
                     alphacol[f] *= 1.05
                     obj_by_col[f] = new_obj_by_col
                     break
                 else
-                    copy!(newvf[f], vf[f])
+                    copyto!(newvf[f], vf[f])
                     alphacol[f] *= .7
                     if alphacol[f] < params.min_stepsize
                         alphacol[f] = params.min_stepsize * 1.1
diff --git a/src/algorithms/proxgrad_multithread.jl b/src/algorithms/proxgrad_multithread.jl
index 0204a45..5f0ff33 100644
--- a/src/algorithms/proxgrad_multithread.jl
+++ b/src/algorithms/proxgrad_multithread.jl
@@ -1,7 +1,7 @@
 ### Proximal gradient method
 export ProxGradParams, fit!
 
-type ProxGradParams<:AbstractParams
+mutable struct ProxGradParams<:AbstractParams
     stepsize::Float64 # initial stepsize
     max_iter::Int # maximum number of outer iterations
     inner_iter_X::Int # how many prox grad steps to take on X before moving on to Y (and vice versa)
@@ -42,7 +42,7 @@ function fit!(glrm::GLRM, params::ProxGradParams;
 	ry = glrm.ry
 	X = glrm.X; Y = glrm.Y
   # check that we didn't initialize to zero (otherwise we will never move)
-  if vecnorm(Y) == 0
+  if norm(Y) == 0
   	Y = .1*randn(k,d)
   end
 	k = glrm.k
@@ -62,7 +62,7 @@ function fit!(glrm::GLRM, params::ProxGradParams;
         glrm.Y = randn(glrm.k, d)
     end
 
-    XY = @compat Array{Float64}((m, d))
+    XY = Array{Float64}(undef, (m, d))
     gemm!('T','N',1.0,X,Y,0.0,XY) # XY = X' * Y initial calculation
 
     # step size (will be scaled below to ensure it never exceeds 1/\|g\|_2 or so for any subproblem)
@@ -116,7 +116,8 @@ function fit!(glrm::GLRM, params::ProxGradParams;
 
         for inneri=1:params.inner_iter_X
         Threads.@threads for e=1:m # for every example x_e == ve[e]
-            scale!(g[Threads.threadid()], 0) # reset gradient to 0
+		# for e=1:m # for every example x_e == ve[e]
+            g[Threads.threadid()] .= 0 # reset gradient to 0
             # compute gradient of L with respect to Xᵢ as follows:
             # ∇{Xᵢ}L = Σⱼ dLⱼ(XᵢYⱼ)/dXᵢ
             for f in glrm.observed_features[e]
@@ -141,11 +142,11 @@ function fit!(glrm::GLRM, params::ProxGradParams;
                 ## prox step: Xᵢ = prox_rx(Xᵢ, α/l)
                 prox!(rx[e],newve[e],stepsize)
                 if row_objective(glrm, e, newve[e]) < obj_by_row[e]
-                    copy!(ve[e], newve[e])
+                    copyto!(ve[e], newve[e])
                     alpharow[e] *= 1.05 # choose a more aggressive stepsize
                     break
                 else # the stepsize was too big; undo and try again only smaller
-                    copy!(newve[e], ve[e])
+                    copyto!(newve[e], ve[e])
                     alpharow[e] *= .7 # choose a less aggressive stepsize
                     if alpharow[e] < params.min_stepsize
                         alpharow[e] = params.min_stepsize * 1.1
@@ -158,8 +159,9 @@ function fit!(glrm::GLRM, params::ProxGradParams;
         end # inner iteration
 # STEP 2: Y update
         for inneri=1:params.inner_iter_Y
-        scale!(G, 0)
+        G .= 0
         Threads.@threads for f=1:n
+		# for f=1:n
             # compute gradient of L with respect to Yⱼ as follows:
             # ∇{Yⱼ}L = Σⱼ dLⱼ(XᵢYⱼ)/dYⱼ
             for e in glrm.observed_examples[f]
@@ -185,12 +187,12 @@ function fit!(glrm::GLRM, params::ProxGradParams;
                 prox!(ry[f],newvf[f],stepsize)
                 new_obj_by_col = col_objective(glrm, f, newvf[f])
                 if new_obj_by_col < obj_by_col[f]
-                    copy!(vf[f], newvf[f])
+                    copyto!(vf[f], newvf[f])
                     alphacol[f] *= 1.05
                     obj_by_col[f] = new_obj_by_col
                     break
                 else
-                    copy!(newvf[f], vf[f])
+                    copyto!(newvf[f], vf[f])
                     alphacol[f] *= .7
                     if alphacol[f] < params.min_stepsize
                         alphacol[f] = params.min_stepsize * 1.1
diff --git a/src/algorithms/quad_streaming.jl b/src/algorithms/quad_streaming.jl
index 5b7a55a..8408645 100644
--- a/src/algorithms/quad_streaming.jl
+++ b/src/algorithms/quad_streaming.jl
@@ -4,7 +4,7 @@
 
 export StreamingParams, streaming_fit!, streaming_impute!
 
-type StreamingParams<:AbstractParams
+mutable struct StreamingParams<:AbstractParams
     T0::Int # number of rows to use to initialize Y before streaming begins
     stepsize::Float64 # stepsize (inverse of memory)
     Y_update_interval::Int # how often to prox Y
@@ -139,7 +139,7 @@ function streaming_impute!(glrm::GLRM, params::StreamingParams=StreamingParams()
 end
 
 """ Constructs new GLRM on subset of rows of the data from input glrm """
-function keep_rows(glrm, r::Range{Int})
+function keep_rows(glrm, r::UnitRange{Int})
   @assert maximum(r) <= size(glrm.A, 1)
   obs = flatten_observations(glrm.observed_features)
   first_row = minimum(r)
diff --git a/src/algorithms/sparse_proxgrad.jl b/src/algorithms/sparse_proxgrad.jl
index bc44b07..e0702cb 100644
--- a/src/algorithms/sparse_proxgrad.jl
+++ b/src/algorithms/sparse_proxgrad.jl
@@ -1,7 +1,7 @@
 ### Proximal gradient method
 export SparseProxGradParams, fit!
 
-type SparseProxGradParams<:AbstractParams
+mutable struct SparseProxGradParams<:AbstractParams
     stepsize::Float64 # initial stepsize
     max_iter::Int # maximum number of outer iterations
     inner_iter::Int # how many prox grad steps to take on X before moving on to Y (and vice versa)
@@ -60,7 +60,7 @@ function fit!(glrm::GLRM, params::SparseProxGradParams;
 # STEP 1: X update
         for inneri=1:params.inner_iter
         for e=1:m # doing this means looping over XY in row-major order, but otherwise we couldn't parallelize over Xᵢs
-            scale!(g, 0)# reset gradient to 0
+            mul!(g, 0)# reset gradient to 0
             # compute gradient of L with respect to Xᵢ as follows:
             # ∇{Xᵢ}L = Σⱼ dLⱼ(XᵢYⱼ)/dXᵢ
             for f in glrm.observed_features[e]
@@ -71,7 +71,7 @@ function fit!(glrm::GLRM, params::SparseProxGradParams;
             end
             # take a proximal gradient step
             l = length(glrm.observed_features[e]) + 1
-            scale!(g, -alpha/l)
+            mul!(g, -alpha/l)
             ## gradient step: Xᵢ += -(α/l) * ∇{Xᵢ}L
             axpy!(1,g,ve[e])
             ## prox step: Xᵢ = prox_rx(Xᵢ, α/l)
@@ -81,7 +81,7 @@ function fit!(glrm::GLRM, params::SparseProxGradParams;
 # STEP 2: Y update
         for inneri=1:params.inner_iter
         for f=1:n
-            scale!(g, 0) # reset gradient to 0
+            mul!(g, 0) # reset gradient to 0
             # compute gradient of L with respect to Yⱼ as follows:
             # ∇{Yⱼ}L = Σⱼ dLⱼ(XᵢYⱼ)/dYⱼ
             for e in glrm.observed_examples[f]
@@ -91,7 +91,7 @@ function fit!(glrm::GLRM, params::SparseProxGradParams;
             end
             # take a proximal gradient step
             l = length(glrm.observed_examples[f]) + 1
-            scale!(g, -alpha/l)
+            mul!(g, -alpha/l)
             ## gradient step: Yⱼ += -(α/l) * ∇{Yⱼ}L
             axpy!(1,g,vf[f])
             ## prox step: Yⱼ = prox_ryⱼ(Yⱼ, α/l)
diff --git a/src/convergence.jl b/src/convergence.jl
index 8694cfc..bcaf039 100644
--- a/src/convergence.jl
+++ b/src/convergence.jl
@@ -1,6 +1,6 @@
 export ConvergenceHistory, update_ch!
 
-type ConvergenceHistory
+mutable struct ConvergenceHistory
     name::AbstractString
     objective::Array
     dual_objective::Array
diff --git a/src/cross_validate.jl b/src/cross_validate.jl
index 4ae6409..5f0019e 100644
--- a/src/cross_validate.jl
+++ b/src/cross_validate.jl
@@ -17,10 +17,10 @@ function cross_validate(glrm::AbstractGLRM;
     obs = flatten_observations(glrm.observed_features)
     if verbose println("computing CV folds") end
     folds = getfolds(obs, nfolds, size(glrm.A)..., do_check = do_obs_check)
-    train_glrms = Array{typeof(glrm)}(nfolds)
-    test_glrms = Array{typeof(glrm)}(nfolds)
-    train_error = @compat Array{Float64}(nfolds)
-    test_error = @compat Array{Float64}(nfolds)
+    train_glrms = Array{typeof(glrm)}(undef, nfolds)
+    test_glrms = Array{typeof(glrm)}(undef, nfolds)
+    train_error = Array{Float64}(undef, nfolds)
+    test_error = Array{Float64}(undef, nfolds)
     for ifold=1:use_folds
         if verbose println("\nforming train and test GLRM for fold $ifold") end
         train_observed_features, train_observed_examples, test_observed_features, test_observed_examples = folds[ifold]
@@ -52,12 +52,12 @@ function cross_validate(glrm::AbstractGLRM;
     return train_error, test_error, train_glrms, test_glrms
 end
 
-@compat function getfolds(obs::Array{Tuple{Int,Int},1}, nfolds, m, n; ntrials = 5, do_check = true)
+function getfolds(obs::Array{Tuple{Int,Int},1}, nfolds, m, n; ntrials = 5, do_check = true)
     # partition elements of obs into nfolds groups
-    groups = @compat Array{Int}(size(obs))
+    groups = Array{Int}(undef, size(obs))
     rand!(groups, 1:nfolds)  # fill an array with random 1 through N
     # create the training and testing observations for each fold
-    folds = @compat Array{Tuple}(nfolds)
+    folds = Array{Tuple}(undef, nfolds)
     for itrial = 1:ntrials
         enough_observations = 0
         for ifold=1:nfolds
@@ -89,7 +89,7 @@ end
 function get_train_and_test(obs, m, n, holdout_proportion=.1)
 
     # generate random uniform number for each observation
-    groups = @compat Array{Float64}(size(obs))
+    groups = Array{Float64}(undef, size(obs))
     rand!(groups)
 
     # create the training and testing observations
@@ -103,7 +103,7 @@ function get_train_and_test(obs, m, n, holdout_proportion=.1)
 end
 
 function flatten_observations(observed_features::ObsArray)
-    obs = @compat Array{Tuple{Int,Int}}(0)
+    obs = Array{Tuple{Int,Int}}(undef, 0)
     for (i, features_in_example_i) in enumerate(observed_features)
         for j in features_in_example_i
             push!(obs, (i,j))
@@ -124,7 +124,7 @@ function flatten(x, y)
     end
     y
 end
-flatten{T}(x::Array{T})=flatten(x,Array(T, 0))
+flatten(x::Array{T}) where T=flatten(x,Array(T, 0))
 
 function flattenarray(x, y)
     if typeof(x)<:Array
@@ -136,7 +136,7 @@ function flattenarray(x, y)
     end
     y
 end
-flattenarray{T}(x::Array{T})=flattenarray(x,Array(T, 0))
+flattenarray(x::Array{T}) where T=flattenarray(x,Array(T, 0))
 
 function cv_by_iter(glrm::AbstractGLRM, holdout_proportion=.1,
                     params=Params(100,max_iter=1,abs_tol=.01,min_stepsize=.01),
@@ -163,8 +163,8 @@ function cv_by_iter(glrm::AbstractGLRM, holdout_proportion=.1,
 
     niters = params.max_iter
     params.max_iter = 1
-    train_error = @compat Array{Float64}(niters)
-    test_error = @compat Array{Float64}(niters)
+    train_error = Array{Float64}(undef, niters)
+    test_error = Array{Float64}(undef, niters)
     if verbose
         @printf("%12s%12s%12s\n", "train error", "test error", "time")
         t0 = time()
@@ -181,7 +181,7 @@ function cv_by_iter(glrm::AbstractGLRM, holdout_proportion=.1,
     return train_error, test_error
 end
 
-function regularization_path(glrm::AbstractGLRM; params=Params(), reg_params=logspace(2,-2,5),
+function regularization_path(glrm::AbstractGLRM; params=Params(), reg_params=exp10.(range(2,stop=-2,length=5)),
                                          holdout_proportion=.1, verbose=true,
                                          ch::ConvergenceHistory=ConvergenceHistory("reg_path"))
     if verbose println("flattening observations") end
@@ -211,16 +211,16 @@ end
 # For each value of the regularization parameter,
 # compute the training error, ie, average error (sum over (i,j) in train_glrm.obs of L_j(A_ij, x_i y_j))
 # and the test error, ie, average error (sum over (i,j) in test_glrm.obs of L_j(A_ij, x_i y_j))
-function regularization_path(train_glrm::AbstractGLRM, test_glrm::AbstractGLRM; params=Params(), reg_params=logspace(2,-2,5),
+function regularization_path(train_glrm::AbstractGLRM, test_glrm::AbstractGLRM; params=Params(), reg_params=exp10.(range(2,stop=-2,length=5)),
                                          verbose=true,
                                          ch::ConvergenceHistory=ConvergenceHistory("reg_path"))
-    train_error = @compat Array{Float64}(length(reg_params))
-    test_error = @compat Array{Float64}(length(reg_params))
+    train_error = Array{Float64}(undef, length(reg_params))
+    test_error = Array{Float64}(undef, length(reg_params))
     ntrain = sum(map(length, train_glrm.observed_features))
     ntest = sum(map(length, test_glrm.observed_features))
     if verbose println("training model on $ntrain samples and testing on $ntest") end
     @show params
-    train_time = @compat Array{Float64}(length(reg_params))
+    train_time = Array{Float64}(undef, length(reg_params))
     for iparam=1:length(reg_params)
         reg_param = reg_params[iparam]
         # evaluate train and test error
@@ -240,18 +240,18 @@ function regularization_path(train_glrm::AbstractGLRM, test_glrm::AbstractGLRM;
 end
 
 
-function precision_at_k(train_glrm::GLRM, test_observed_features; params=Params(), reg_params=logspace(2,-2,5),
+function precision_at_k(train_glrm::GLRM, test_observed_features; params=Params(), reg_params=exp10.(range(2,stop=-2,length=5)),
                         holdout_proportion=.1, verbose=true,
                         ch::ConvergenceHistory=ConvergenceHistory("reg_path"), kprec=10)
     m,n = size(train_glrm.A)
     ntrain = sum(map(length, train_glrm.observed_features))
     ntest = sum(map(length, test_observed_features))
     train_observed_features = train_glrm.observed_features
-    train_error = @compat Array{Float64}(length(reg_params))
-    test_error = @compat Array{Float64}(length(reg_params))
-    prec_at_k = @compat Array{Float64}(length(reg_params))
-    @compat solution = @compat Array{Tuple{Float64,Float64}}(length(reg_params))
-    train_time = @compat Array{Float64}(length(reg_params))
+    train_error = Array{Float64}(undef, length(reg_params))
+    test_error = Array{Float64}(undef, length(reg_params))
+    prec_at_k = Array{Float64}(undef, length(reg_params))
+    solution = Array{Tuple{Float64,Float64}}(undef, length(reg_params))
+    train_time = Array{Float64}(undef, length(reg_params))
     test_glrm = GLRM(train_glrm.A, train_glrm.losses, train_glrm.rx, train_glrm.ry, train_glrm.k,
                      X=copy(train_glrm.X), Y=copy(train_glrm.Y),
                      observed_features = test_observed_features)
@@ -259,8 +259,8 @@ function precision_at_k(train_glrm::GLRM, test_observed_features; params=Params(
         reg_param = reg_params[iparam]
         # evaluate train error
         if verbose println("fitting train GLRM for reg_param $reg_param") end
-        scale!(train_glrm.rx, reg_param)
-        scale!(train_glrm.ry, reg_param)
+        mul!(train_glrm.rx, reg_param)
+        mul!(train_glrm.ry, reg_param)
         train_glrm.X, train_glrm.Y = randn(train_glrm.k,m), randn(train_glrm.k,n) # this bypasses the error checking in GLRM(). Risky.
         X, Y, ch = fit!(train_glrm, params, ch=ch, verbose=verbose)
         train_time[iparam] = ch.times[end]
@@ -296,8 +296,8 @@ function precision_at_k(train_glrm::GLRM, test_observed_features; params=Params(
             end
         end
         prec_at_k[iparam] = true_pos / (true_pos + false_pos)
-        if verbose println("\prec_at_k:  $(prec_at_k[iparam])") end
-        solution[iparam] = (sum(X)+sum(Y), sum(abs(X))+sum(abs(Y)))
+        if verbose println("\tprec_at_k:  $(prec_at_k[iparam])") end
+        solution[iparam] = (sum(X)+sum(Y), sum(abs.(X))+sum(abs.(Y)))
         if verbose println("\tsum of solution, one norm of solution:  $(solution[iparam])") end
     end
     return train_error, test_error, prec_at_k, train_time, reg_params, solution
diff --git a/src/domains.jl b/src/domains.jl
index 6d90c77..83fb0f4 100644
--- a/src/domains.jl
+++ b/src/domains.jl
@@ -18,21 +18,21 @@ export Domain, # the abstract type
 	   RealDomain, BoolDomain, OrdinalDomain, PeriodicDomain, CountDomain, CategoricalDomain, # the domains
 	   copy
 
-@compat abstract type Domain end
+abstract type Domain end
 
 ########################################## REALS ##########################################
 # Real data can take values from ℜ
-immutable RealDomain<:Domain
+struct RealDomain<:Domain
 end
 
 ########################################## BOOLS ##########################################
 # Boolean data should take values from {true, false}
-immutable BoolDomain<:Domain
+struct BoolDomain<:Domain
 end
 
 ########################################## ORDINALS ##########################################
 # Ordinal data should take integer values ranging from `min` to `max`
-immutable OrdinalDomain<:Domain
+struct OrdinalDomain<:Domain
 	min::Int
 	max::Int
 	function OrdinalDomain(min, max)
@@ -45,7 +45,7 @@ end
 
 ########################################## ORDINALS ##########################################
 # Categorical data should take integer values ranging from 1 to `max`
-immutable CategoricalDomain<:Domain
+struct CategoricalDomain<:Domain
 	min::Int
 	max::Int
 end
@@ -53,12 +53,12 @@ CategoricalDomain(m::Int) = CategoricalDomain(1,m)
 
 ########################################## PERIODIC ##########################################
 # Periodic data can take values from ℜ, but given a period T, we should have error_metric(a,a+T) = 0
-immutable PeriodicDomain<:Domain
+struct PeriodicDomain<:Domain
 	T::Float64 # the period
 end
 
 ########################################## COUNTS ##########################################
 # Count data can take values over ℕ, which we approximate as {0, 1, 2 ... `max_count`}
-immutable CountDomain<:Domain
+struct CountDomain<:Domain
 	max_count::Int # the biggest possible count
 end
diff --git a/src/evaluate_fit.jl b/src/evaluate_fit.jl
index 68e28ec..5c58b59 100644
--- a/src/evaluate_fit.jl
+++ b/src/evaluate_fit.jl
@@ -59,8 +59,7 @@ function objective(glrm::GLRM, X::Array{Float64,2}, Y::Array{Float64,2};
                    yidxs = get_yidxs(glrm.losses), kwargs...)
     @assert(size(Y)==(glrm.k,yidxs[end][end]))
     @assert(size(X)==(glrm.k,size(glrm.A,1)))
-    XY = @compat Array{Float64}((size(X,2), size(Y,2)))
-    XY = Array{Float64}((size(X,2), size(Y,2)))
+    XY = Array{Float64}(undef, (size(X,2), size(Y,2)))
     if sparse
         # Calculate X'*Y only at observed entries of A
         m,n = size(glrm.A)
@@ -148,7 +147,7 @@ function error_metric(glrm::AbstractGLRM, XY::Array{Float64,2}, domains::Array{D
 end
 # The user can also pass in X and Y and `error_metric` will compute XY for them
 function error_metric(glrm::AbstractGLRM, X::Array{Float64,2}, Y::Array{Float64,2}, domains::Array{Domain,1}=Domain[l.domain for l in glrm.losses]; kwargs...)
-    XY = @compat Array{Float64}((size(X,2), size(Y,2)))
+    XY = Array{Float64}((size(X,2), size(Y,2)))
     gemm!('T','N',1.0,X,Y,0.0,XY)
     error_metric(glrm, XY, domains; kwargs...)
 end
diff --git a/src/fit.jl b/src/fit.jl
index f026209..87d2544 100644
--- a/src/fit.jl
+++ b/src/fit.jl
@@ -1,11 +1,11 @@
 export fit, fit!, Params
 
 ### PARAMETERS TYPE
-@compat abstract type AbstractParams end
+abstract type AbstractParams end
 Params(args...; kwargs...) = ProxGradParams(args...; kwargs...)
 
 # default in-place fitting uses proximal gradient method
-@compat function fit!(glrm::AbstractGLRM; kwargs...)
+function fit!(glrm::AbstractGLRM; kwargs...)
     kwdict = Dict(kwargs)
     if :params in keys(kwdict)
         return fit!(glrm, kwdict[:params]; kwargs...)
@@ -22,8 +22,8 @@ end
 
 # fit without modifying the glrm object
 function fit(glrm::AbstractGLRM, args...; kwargs...)
-    X0 = @compat Array{Float64}(size(glrm.X))
-    Y0 = @compat Array{Float64}(size(glrm.Y))
+    X0 = Array{Float64}(undef, size(glrm.X))
+    Y0 = Array{Float64}(undef, size(glrm.Y))
     copy!(X0, glrm.X); copy!(Y0, glrm.Y)
     X,Y,ch = fit!(glrm, args...; kwargs...)
     copy!(glrm.X, X0); copy!(glrm.Y, Y0)
diff --git a/src/fit_dataframe.jl b/src/fit_dataframe.jl
index 3a62310..0eaa53c 100644
--- a/src/fit_dataframe.jl
+++ b/src/fit_dataframe.jl
@@ -1,12 +1,10 @@
+# ========================================
+# REVIEW THIS IN LIGHT OF NEW DATAFRAMES
+# ========================================
+
 import Base: isnan
-import DataArrays: DataArray, isna, dropna, NA, NAtype
-if VERSION < v"0.6.0-pre"
-  import DataArrays: array
-else
-  import DataArrays: isnan
-end
 import DataFrames: DataFrame, ncol, convert
-import Missings: Missing, missing, ismissing
+
 
 export GLRM, observations, expand_categoricals!, NaNs_to_NAs!, NAs_to_0s!, NaNs_to_Missing!, ismissing_vec
 
@@ -31,7 +29,7 @@ function GLRM(df::DataFrame, k::Int, datatypes::Array{Symbol,1};
     end
     # validate input
     # XXX check for undefined entry?
-    for dt in datatypes
+    for dt in datatype
         if !(dt in keys(loss_map))
             error("data types must be either :real, :bool, :ord, or :cat, not $dt")
         end
@@ -118,12 +116,9 @@ function map_to_numbers!(df, j::Int, datatype::Symbol)
     return df[j]
 end
 
-function getval{T}(x::Nullable{T})
-  x.value
-end
-function getval{T<:Number}(x::T)
-  x
-end
+getval(x::Union{T, Nothing}) where T = x.value
+getval(x::T) where T<:Number = x
+
 
 function map_to_numbers!(df, j::Int, loss::Type{QuadLoss})
     if all(xi -> is_number_or_null(xi), df[j][!ismissing_vec(df[j])])
@@ -206,11 +201,11 @@ function pick_loss(l::Type{MultinomialOrdinalLoss}, col)
     end
 end
 
-observations(da::DataArray) = df_observations(da)
+observations(da::Array{Union{T, Missing}}) where T = df_observations(da)
 observations(df::DataFrame) = df_observations(df)
 # isnan -> ismissing
 function df_observations(da)
-    obs = @compat Tuple{Int, Int}[]
+    obs = Tuple{Int, Int}[]
     m,n = size(da)
     for j=1:n # follow column-major order. First element of index in innermost loop
         for i=1:m
@@ -271,32 +266,7 @@ end
 # convert NaNs to NAs
 # isnan(x::NAtype) = false
 isnan(x::AbstractString) = false
-isnan{T}(x::Nullable{T}) = isnan(x.value)
-
-# letting these two functions be here for now, just to catch bugs.
-function NaNs_to_NAs!(df::DataFrame)
-    m,n = size(df)
-    for j=1:n # follow column-major order. First element of index in innermost loop
-        for i=1:m
-            if !isna(df[i,j]) && isnan(df[i,j])
-                df[i,j] = NA
-            end
-        end
-    end
-    return df
-end
-
-function NAs_to_0s!(df::DataFrame)
-    m,n = size(df)
-    for j=1:n # follow column-major order. First element of index in innermost loop
-        for i=1:m
-            if isna(df[i,j])
-                df[i,j] = 0
-            end
-        end
-    end
-    return df
-end
+isnan(x::Union{T, Nothing}) where T = isnan(x.value)
 
 # same functionality as above.
 function NaNs_to_Missing!(df::DataFrame)
@@ -308,6 +278,4 @@ function NaNs_to_Missing!(df::DataFrame)
 end
 
 # ismissing(Array{Union{T,Missing},1}) doesn't exist.
-function ismissing_vec(V::Array{Any,1})
-   return Bool[ismissing(x) for x in V[:]]
-end
+ismissing_vec(V::Array{Any,1}) = Bool[ismissing(x) for x in V[:]]
diff --git a/src/fit_dataframe_w_type_imputation.jl b/src/fit_dataframe_w_type_imputation.jl
index 4420899..41ea9de 100644
--- a/src/fit_dataframe_w_type_imputation.jl
+++ b/src/fit_dataframe_w_type_imputation.jl
@@ -1,14 +1,7 @@
 import Base: isnan
-import DataArrays: DataArray, isna, dropna, NA, NAtype
-if VERSION < v"0.6.0"
-  import DataArrays: array
-else
-  import DataArrays: isnan
-end
 import DataFrames: DataFrame, ncol, convert
 export GLRM
 
-import Missings: Missing, missing, ismissing
 
 
 # TODO: identify categoricals automatically from PooledDataArray columns
@@ -99,6 +92,8 @@ function get_loss_types(df::DataFrame)
         try
             maxs[j] = maximum(skipmissing(col))
             mins[j] = minimum(skipmissing(col))
+        catch
+            nothing
         end
     end
 
@@ -146,8 +141,10 @@ function get_ordinals(df::DataFrame)
     for i in 1:nord
         col = df[ord_idx[i]]
         try
-            maxs[i] = maximum(dropna(col))
-            mins[i] = minimum(dropna(col))
+            maxs[i] = maximum(dropmissing(col))
+            mins[i] = minimum(dropmissing(col))
+        catch
+            nothing
         end
     end
 
diff --git a/src/glrm.jl b/src/glrm.jl
index 92fe22a..b97bbf7 100644
--- a/src/glrm.jl
+++ b/src/glrm.jl
@@ -1,15 +1,15 @@
-import Base: size, axpy!
-import Base.LinAlg.scale!
-import Base.BLAS: gemm!
 
-@compat abstract type AbstractGLRM end
+import LinearAlgebra: size, axpy!
+import LinearAlgebra.BLAS: gemm!
+
+abstract type AbstractGLRM end
 
 export AbstractGLRM, GLRM, getindex, size, scale_regularizer!
 
-@compat const ObsArray = @compat(Union{Array{Array{Int,1},1}, Array{UnitRange{Int},1}})
+const ObsArray = Union{Array{Array{Int,1},1}, Array{UnitRange{Int},1}}
 
 ### GLRM TYPE
-type GLRM<:AbstractGLRM
+mutable struct GLRM<:AbstractGLRM
     A                            # The data table
     losses::Array{Loss,1}        # array of loss functions
     rx::Array{Regularizer,1}               # Array of regularizers to be applied to each column of X
@@ -84,7 +84,7 @@ parameter_estimate(glrm::GLRM) = (glrm.X, glrm.Y)
 
 
 function scale_regularizer!(glrm::GLRM, newscale::Number)
-    scale!(glrm.rx, newscale)
-    scale!(glrm.ry, newscale)
+    mul!(glrm.rx, newscale)
+    mul!(glrm.ry, newscale)
     return glrm
 end
diff --git a/src/impute_and_err.jl b/src/impute_and_err.jl
index acf3052..ee8d8a3 100644
--- a/src/impute_and_err.jl
+++ b/src/impute_and_err.jl
@@ -27,11 +27,11 @@
 export impute, error_metric, errors
 
 # function for general use
-roundcutoff{T<:Number}(x,a::T,b::T) = T(min(max(round(x),a),b))
+roundcutoff(x,a::T,b::T) where T<:Number = T(min(max(round(x),a),b))
 
 # Error metrics for general use
 squared_error(a_imputed::Number, a::Number) = (a_imputed-a)^2
-misclassification{T}(a_imputed::T, a::T) = float(!(a_imputed==a)) # return 0.0 if equal, 1.0 else
+misclassification(a_imputed::T, a::T) where T = float(!(a_imputed==a)) # return 0.0 if equal, 1.0 else
 
 # use the default loss domain imputation if no domain provided
 impute(l::Loss, u::Float64) = impute(l.domain, l, u)
@@ -79,7 +79,7 @@ function impute(D::OrdinalDomain, l::WeightedHingeLoss, u::Float64)
 	a_imputed = (u>0 ? ceil(1/u) : floor(1/u))
 	roundcutoff(a_imputed, D.min, D.max)
 end
-impute(D::OrdinalDomain, l::OrdisticLoss, u::AbstractArray) = indmin(u.^2)
+impute(D::OrdinalDomain, l::OrdisticLoss, u::AbstractArray) = argmin(u.^2)
 
 # MultinomialOrdinalLoss
 # l(u, a) = -log(p(u, a))
@@ -93,12 +93,12 @@ function impute(D::OrdinalDomain, l::MultinomialOrdinalLoss, u::AbstractArray)
 	enforce_MNLOrdRules!(u)
 	eu = exp.(u)
 	p = [1-eu[1], -diff(eu)..., eu[end]]
-	return indmax(p)
+	return argmax(p)
 end
 
 # generic method
 function impute(D::OrdinalDomain, l::Loss, u::AbstractArray)
-    (D.min:D.max)[indmin([evaluate(l, u, i) for i in D.min:D.max])]
+    (D.min:D.max)[argmin([evaluate(l, u, i) for i in D.min:D.max])]
 end
 
 function error_metric(D::OrdinalDomain, l::Loss, u::Float64, a::Number)
@@ -109,8 +109,8 @@ end
 ########################################## CATEGORICALS ##########################################
 # Categorical data should take integer values ranging from 1 to `max`
 
-impute(D::CategoricalDomain, l::MultinomialLoss, u::Array{Float64}) = indmax(u)
-impute(D::CategoricalDomain, l::OvALoss, u::Array{Float64}) = indmax(u)
+impute(D::CategoricalDomain, l::MultinomialLoss, u::Array{Float64}) = argmax(u)
+impute(D::CategoricalDomain, l::OvALoss, u::Array{Float64}) = argmax(u)
 
 function error_metric(D::CategoricalDomain, l::Loss, u::Array{Float64}, a::Number)
     a_imputed = impute(D, l, u)
@@ -144,14 +144,13 @@ end
 
 ####################################################################################
 # Use impute and error_metric over arrays
-function impute{DomainSubtype<:Domain,LossSubtype<:Loss}(
-			domains::Array{DomainSubtype,1},
+function impute(domains::Array{DomainSubtype,1},
 			losses::Array{LossSubtype,1},
-			U::Array{Float64,2})
+			U::Array{Float64,2}) where {DomainSubtype<:Domain, LossSubtype<:Loss}
 	m, d = size(U)
 	n = length(losses)
 	yidxs = get_yidxs(losses)
-	A_imputed = Array{Number}((m, n));
+	A_imputed = Array{Number}(undef, (m, n));
 	for f in 1:n
 		for i in 1:m
 			if length(yidxs[f]) > 1
@@ -163,7 +162,7 @@ function impute{DomainSubtype<:Domain,LossSubtype<:Loss}(
 	end
 	return A_imputed
 end
-function impute{LossSubtype<:Loss}(losses::Array{LossSubtype,1}, U::Array{Float64,2})
+function impute(losses::Array{LossSubtype,1}, U::Array{Float64,2}) where LossSubtype<:Loss
 	domains = Domain[l.domain for l in losses]
 	impute(domains, losses, U)
 end
diff --git a/src/initialize.jl b/src/initialize.jl
index de0ef09..e4ba4ca 100644
--- a/src/initialize.jl
+++ b/src/initialize.jl
@@ -1,6 +1,6 @@
-import StatsBase.sample, StatsBase.wsample
+import StatsBase: sample, wsample
 export init_kmeanspp!, init_svd!, init_nndsvd!
-import NMF.nndsvd
+import Arpack: svds
 
 # kmeans++ initialization, but with missing data
 # we make sure never to look at "unobserved" entries in A
@@ -74,7 +74,7 @@ function init_svd!(glrm::GLRM; offset=true, scale=true, TOL = 1e-10)
 	                    end
 									  end
                 else
-                    error("No default mapping to real valued matrix for domains of type $type(glrm.losses[f].domain)")
+                    error("No default mapping to real valued matrix for domains of type $typeof(glrm.losses[f].domain)")
                 end
             end
         end
@@ -95,7 +95,7 @@ function init_svd!(glrm::GLRM; offset=true, scale=true, TOL = 1e-10)
             if stds[j] < TOL || isnan(stds[j])
                 stds[j] = 1
             end
-            Astd[glrm.observed_examples[f],j] = Areal[glrm.observed_examples[f],j] - means[j]
+            Astd[glrm.observed_examples[f],j] = Areal[glrm.observed_examples[f],j] .- means[j]
         end
     end
     if offset
@@ -126,49 +126,9 @@ function init_svd!(glrm::GLRM; offset=true, scale=true, TOL = 1e-10)
     @assert(size(glrm.X, 2) >= m)
     @assert(size(glrm.Y, 1) >= k)
     @assert(size(glrm.Y, 2) >= d)
-    glrm.X[1:k,1:m] = Diagonal(sqrt.(ASVD[:S]))*ASVD[:U]' # recall X is transposed as per column major order.
-    glrm.Y[1:k,1:d] = Diagonal(sqrt.(ASVD[:S]))*ASVD[:Vt]*Diagonal(stds)
+    glrm.X[1:k,1:m] = Diagonal(sqrt.(ASVD.S))*ASVD.U' # recall X is transposed as per column major order.
+    glrm.Y[1:k,1:d] = Diagonal(sqrt.(ASVD.S))*ASVD.Vt*Diagonal(stds)
     return glrm
 end
 
-function init_nndsvd!(glrm::GLRM; scale::Bool=true, zeroh::Bool=false,
-                      variant::Symbol=:std, max_iters::Int=0)
-    # NNDSVD initialization:
-    #    Boutsidis C, Gallopoulos E (2007). SVD based initialization: A head
-    #    start for nonnegative matrix factorization. Pattern Recognition
-    m,n = size(glrm.A)
-
-    # only initialize based on observed entries
-    A_init = zeros(m,n)
-    for i = 1:n
-        A_init[glrm.observed_examples[i],i] = glrm.A[glrm.observed_examples[i],i]
-    end
-
-    # scale all columns by the Loss.scale parameter
-    if scale
-        for i = 1:n
-            A_init[:,i] .*= glrm.losses[i].scale
-        end
-    end
-
-    # run the first nndsvd initialization
-    W,H = nndsvd(A_init, glrm.k, zeroh=zeroh, variant=variant)
-    glrm.X = W'
-    glrm.Y = H
-
-    # If max_iters>0 do a soft impute for the missing entries of A.
-    #   Iterate: Estimate missing entries of A with W*H
-    #            Update (W,H) nndsvd estimate based on new A
-    for iter = 1:max_iters
-        # Update missing entries of A_init
-        for j = 1:n
-            for i = setdiff(1:m,glrm.observed_examples[j])
-                A_init[i,j] = dot(glrm.X[:,i],glrm.Y[:,j])
-            end
-        end
-        # Re-estimate W and H
-        W,H = nndsvd(A_init, glrm.k, zeroh=zeroh, variant=variant)
-        glrm.X = W'
-        glrm.Y = H
-    end
-end
+include("initialize_nmf.jl")
diff --git a/src/initialize_nmf.jl b/src/initialize_nmf.jl
new file mode 100644
index 0000000..cd1ce66
--- /dev/null
+++ b/src/initialize_nmf.jl
@@ -0,0 +1,43 @@
+import NMF.nndsvd
+
+function init_nndsvd!(glrm::GLRM; scale::Bool=true, zeroh::Bool=false,
+                      variant::Symbol=:std, max_iters::Int=0)
+    # NNDSVD initialization:
+    #    Boutsidis C, Gallopoulos E (2007). SVD based initialization: A head
+    #    start for nonnegative matrix factorization. Pattern Recognition
+    m,n = size(glrm.A)
+
+    # only initialize based on observed entries
+    A_init = zeros(m,n)
+    for i = 1:n
+        A_init[glrm.observed_examples[i],i] = glrm.A[glrm.observed_examples[i],i]
+    end
+
+    # scale all columns by the Loss.scale parameter
+    if scale
+        for i = 1:n
+            A_init[:,i] .*= glrm.losses[i].scale
+        end
+    end
+
+    # run the first nndsvd initialization
+    W,H = nndsvd(A_init, glrm.k, zeroh=zeroh, variant=variant)
+    glrm.X = W'
+    glrm.Y = H
+
+    # If max_iters>0 do a soft impute for the missing entries of A.
+    #   Iterate: Estimate missing entries of A with W*H
+    #            Update (W,H) nndsvd estimate based on new A
+    for iter = 1:max_iters
+        # Update missing entries of A_init
+        for j = 1:n
+            for i = setdiff(1:m,glrm.observed_examples[j])
+                A_init[i,j] = dot(glrm.X[:,i],glrm.Y[:,j])
+            end
+        end
+        # Re-estimate W and H
+        W,H = nndsvd(A_init, glrm.k, zeroh=zeroh, variant=variant)
+        glrm.X = W'
+        glrm.Y = H
+    end
+end
diff --git a/src/losses.jl b/src/losses.jl
index fb1a116..2d1d595 100644
--- a/src/losses.jl
+++ b/src/losses.jl
@@ -35,7 +35,7 @@
 #     `impute(d::Domain, l::my_loss_type, u::Array{Float64})` (in impute_and_err.jl)
 #           Finds a = argmin l(u,a), the most likely value for an observation given a parameter u
 
-import Base: scale!, *, convert
+import Base: *, convert
 import Optim: optimize, LBFGS
 export Loss,
        DiffLoss, ClassificationLoss, SingleDimLoss, # categories of Losses
@@ -46,22 +46,22 @@ export Loss,
        MultinomialLoss, OvALoss, # losses for predicting nominals (categoricals)
        PeriodicLoss, # losses for predicting periodic variables
        evaluate, grad, M_estimator, # methods on losses
-       avgerror, scale, scale!, *,
+       avgerror, scale, mul!, *,
        embedding_dim, get_yidxs, datalevels, domain
 
-@compat abstract type Loss end
+abstract type Loss end
 # a DiffLoss is one in which l(u,a) = f(u-a) AND argmin f(x) = 0
 # for example, QuadLoss(u,a)=(u-a)² and we can write f(x)=x² and x=u-a
-@compat abstract type DiffLoss<:Loss end
+abstract type DiffLoss<:Loss end
 # a ClassificationLoss is one in which observed values are true = 1 or false = 0 = -1 AND argmin_a L(u,a) = u>=0 ? true : false
-@compat abstract type ClassificationLoss<:Loss end
+abstract type ClassificationLoss<:Loss end
 # Single Dimensional losses are DiffLosses or ClassificationLosses, which allow optimized evaluate and grad functions
-@compat const SingleDimLoss = Union{DiffLoss, ClassificationLoss}
+const SingleDimLoss = Union{DiffLoss, ClassificationLoss}
 
-scale!(l::Loss, newscale::Number) = (l.scale = newscale; l)
+mul!(l::Loss, newscale::Number) = (l.scale = newscale; l)
 scale(l::Loss) = l.scale
-*(newscale::Number, l::Loss) = (newl = copy(l); scale!(newl, newscale))
-*(l::Loss, newscale::Number) = (newl = copy(l); scale!(newl, newscale))
+*(newscale::Number, l::Loss) = (newl = copy(l); mul!(newl, newscale))
+*(l::Loss, newscale::Number) = (newl = copy(l); mul!(newl, newscale))
 
 domain(l::Loss) = l.domain
 
@@ -70,17 +70,17 @@ domain(l::Loss) = l.domain
 # default number of columns
 # number of columns is higher for multidimensional losses
 embedding_dim(l::Loss) = 1
-embedding_dim{LossSubtype<:Loss}(l::Array{LossSubtype,1}) = sum(map(embedding_dim, l))
+embedding_dim(l::Array{LossSubtype,1}) where LossSubtype<:Loss = sum(map(embedding_dim, l))
 
 # find spans of loss functions (for multidimensional losses)
-function get_yidxs{LossSubtype<:Loss}(losses::Array{LossSubtype,1})
+function get_yidxs(losses::Array{LossSubtype,1}) where LossSubtype<:Loss
     n = length(losses)
     ds = map(embedding_dim, losses)
     d = sum(ds)
     featurestartidxs = cumsum(append!([1], ds))
     # find which columns of Y map to which columns of A (for multidimensional losses)
-    U = Union{Range{Int}, Int}
-    @compat yidxs = Array{U}(n)
+    U = Union{UnitRange{Int}, Int}
+    yidxs = Array{U}(undef, n)
 
     for f = 1:n
         if ds[f] == 1
@@ -135,7 +135,7 @@ end
 
 ########################################## QUADRATIC ##########################################
 # f: ℜxℜ -> ℜ
-type QuadLoss<:DiffLoss
+mutable struct QuadLoss<:DiffLoss
     scale::Float64
     domain::Domain
 end
@@ -149,7 +149,7 @@ M_estimator(l::QuadLoss, a::AbstractArray) = mean(a)
 
 ########################################## L1 ##########################################
 # f: ℜxℜ -> ℜ
-type L1Loss<:DiffLoss
+mutable struct L1Loss<:DiffLoss
     scale::Float64
     domain::Domain
 end
@@ -163,7 +163,7 @@ M_estimator(l::L1Loss, a::AbstractArray) = median(a)
 
 ########################################## HUBER ##########################################
 # f: ℜxℜ -> ℜ
-type HuberLoss<:DiffLoss
+mutable struct HuberLoss<:DiffLoss
     scale::Float64
     domain::Domain
     crossover::Float64 # where QuadLoss loss ends and linear loss begins; =1 for standard HuberLoss
@@ -183,7 +183,7 @@ grad(l::HuberLoss,u::Float64,a::Number) = abs(u-a)>l.crossover ? sign(u-a)*l.sca
 # define (u)_+ = max(u,0), (u)_- = max(-u,0) so (u)_+ + (u)_- = |u|
 # f(u,a) = {    quantile (a - u)_+ + (1-quantile) (a - u)_-
 # fits the `quantile`th quantile of the distribution
-type QuantileLoss<:DiffLoss
+mutable struct QuantileLoss<:DiffLoss
     scale::Float64
     domain::Domain
     quantile::Float64 # fit the alphath quantile
@@ -206,7 +206,7 @@ M_estimator(l::QuantileLoss, a::AbstractArray) = quantile(a, l.quantile)
 # f: ℜxℜ -> ℜ
 # f(u,a) = w * (1 - cos((a-u)*(2*pi)/T))
 # this measures how far away u and a are on a circle of circumference T.
-type PeriodicLoss<:DiffLoss
+mutable struct PeriodicLoss<:DiffLoss
     T::Float64 # the length of the period
     scale::Float64
     domain::Domain
@@ -228,14 +228,14 @@ end
 # BEWARE:
 # 1) this is a reparametrized poisson: we parametrize the mean as exp(u) so that u can take any real value and still produce a positive mean
 # 2) THIS LOSS MAY CAUSE MODEL INSTABLITY AND DIFFICULTY FITTING.
-type PoissonLoss<:Loss
+mutable struct PoissonLoss<:Loss
     scale::Float64
     domain::Domain
 end
 PoissonLoss(max_count=2^31::Int; domain=CountDomain(max_count)::Domain) = PoissonLoss(1.0, domain)
 
 function evaluate(l::PoissonLoss, u::Float64, a::Number)
-    l.scale*(exp(u) - a*u + (a==0? 0 : a*(log(a)-1))) # log(a!) ~ a==0? 0 : a*(log(a)-1)
+    l.scale*(exp(u) - a*u + (a==0 ? 0 : a*(log(a)-1))) # log(a!) ~ a==0 ? 0 : a*(log(a)-1)
 end
 
 grad(l::PoissonLoss, u::Float64, a::Number) = l.scale*(exp(u) - a)
@@ -244,7 +244,7 @@ M_estimator(l::PoissonLoss, a::AbstractArray) = log(mean(a))
 
 ########################################## ORDINAL HINGE ##########################################
 # f: ℜx{min, min+1... max-1, max} -> ℜ
-type OrdinalHingeLoss<:Loss
+mutable struct OrdinalHingeLoss<:Loss
     min::Integer
     max::Integer
     scale::Float64
@@ -295,7 +295,7 @@ M_estimator(l::OrdinalHingeLoss, a::AbstractArray) = median(a)
 
 ########################################## LOGISTIC ##########################################
 # f: ℜx{-1,1}-> ℜ
-type LogisticLoss<:ClassificationLoss
+mutable struct LogisticLoss<:ClassificationLoss
     scale::Float64
     domain::Domain
 end
@@ -314,7 +314,7 @@ end
 # f: ℜx{-1,1} -> ℜ
 # f(u,a) = {     w * max(1-a*u, 0) for a = -1
 #        = { c * w * max(1-a*u, 0) for a =  1
-type WeightedHingeLoss<:ClassificationLoss
+mutable struct WeightedHingeLoss<:ClassificationLoss
     scale::Float64
     domain::Domain
     case_weight_ratio::Float64 # >1 for trues to have more confidence than falses, <1 for opposite
@@ -357,7 +357,7 @@ end
 # often known as the softmax function
 # f(u, a) = exp(u[a]) / (sum_{a'} exp(u[a']))
 #         = 1         / (sum_{a'} exp(u[a'] - u[a]))
-type MultinomialLoss<:Loss
+mutable struct MultinomialLoss<:Loss
     max::Integer
     scale::Float64
     domain::Domain
@@ -416,7 +416,7 @@ end
 
 ########################################## One vs All loss ##########################################
 # f: ℜx{1, 2, ..., max-1, max} -> ℜ
-type OvALoss<:Loss
+mutable struct OvALoss<:Loss
     max::Integer
     bin_loss::Loss
     scale::Float64
@@ -459,7 +459,7 @@ end
 
 ########################################## Bigger vs Smaller loss ##########################################
 # f: ℜx{1, 2, ..., max-1} -> ℜ
-type BvSLoss<:Loss
+mutable struct BvSLoss<:Loss
     max::Integer
     bin_loss::Loss
     scale::Float64
@@ -505,7 +505,7 @@ end
 # f computes the (negative log likelihood of the) multinomial logit,
 # often known as the softmax function
 # f(u, a) = exp(u[a]) / (sum_{a'} exp(u[a']))
-type OrdisticLoss<:Loss
+mutable struct OrdisticLoss<:Loss
     max::Integer
     scale::Float64
     domain::Domain
@@ -577,7 +577,7 @@ end
 # the most probable value a is the index of the first
 # positive entry of u
 
-type MultinomialOrdinalLoss<:Loss
+mutable struct MultinomialOrdinalLoss<:Loss
     max::Integer
     scale::Float64
     domain::Domain
@@ -651,7 +651,7 @@ end
 #Optimized vector evaluate on single-dimensional losses
 function evaluate(l::SingleDimLoss, u::Vector{Float64}, a::AbstractVector)
   losseval = (x::Float64, y::Number) -> evaluate(l, x, y)
-  mapped = zeros(u)
+  mapped = fill!(similar(u),0.)
   map!(losseval, mapped, u, a)
   reduce(+, mapped)
 end
@@ -680,7 +680,7 @@ end
 # Optimized vector grad on single-dimensional losses
 function grad(l::SingleDimLoss, u::Vector{Float64}, a::AbstractVector)
   lossgrad = (x::Float64,y::Number) -> grad(l, x, y)
-  mapped = zeros(u)
+  mapped = fill!(similar(u),0.)
   map!(lossgrad, mapped, u, a)
 end
 
diff --git a/src/modify_glrm.jl b/src/modify_glrm.jl
index 4310b9e..5e76d38 100644
--- a/src/modify_glrm.jl
+++ b/src/modify_glrm.jl
@@ -42,10 +42,10 @@ function equilibrate_variance!(glrm::AbstractGLRM, columns_to_scale = 1:size(glr
         end
         if varlossi > 0
             # rescale the losses and regularizers for each column by the inverse of the empirical variance
-            scale!(glrm.losses[i], scale(glrm.losses[i])/varlossi)
+            mul!(glrm.losses[i], scale(glrm.losses[i])/varlossi)
         end
         if varregi > 0
-            scale!(glrm.ry[i], scale(glrm.ry[i])/varregi)
+            mul!(glrm.ry[i], scale(glrm.ry[i])/varregi)
         end
     end
     return glrm
@@ -62,19 +62,19 @@ function prob_scale!(glrm, columns_to_scale = 1:size(glrm.A,2))
         if typeof(glrm.losses[i]) == QuadLoss && length(nomissing) > 0
             varlossi = var(skipmissing(glrm.A[i])) # estimate the variance
             if varlossi > TOL
-    		  scale!(glrm.losses[i], 1/(2*varlossi)) # this is the correct -loglik of gaussian with variance fixed at estimate
+    		  mul!(glrm.losses[i], 1/(2*varlossi)) # this is the correct -loglik of gaussian with variance fixed at estimate
     	    else
     		  warn("column $i has a variance of $varlossi; not scaling it to avoid dividing by zero.")
     	    end
         elseif typeof(glrm.losses[i]) == HuberLoss && length(nomissing) > 0
             varlossi = avgerror(glrm.losses[i], glrm.A[i]) # estimate the width of the distribution
             if varlossi > TOL
-              scale!(glrm.losses[i], 1/(2*varlossi)) # this is not the correct -loglik of huber with estimates for variance and mean of poisson, but that's probably ok
+              mul!(glrm.losses[i], 1/(2*varlossi)) # this is not the correct -loglik of huber with estimates for variance and mean of poisson, but that's probably ok
             else
               warn("column $i has a variance of $varlossi; not scaling it to avoid dividing by zero.")
             end
         else # none of the other distributions have any free parameters to estimate, so this is the correct -loglik
-            scale!(glrm.losses[i], 1)
+            mul!(glrm.losses[i], 1)
         end
     end
     return glrm
diff --git a/src/regularizers.jl b/src/regularizers.jl
index 7013f8a..b80254b 100644
--- a/src/regularizers.jl
+++ b/src/regularizers.jl
@@ -3,7 +3,7 @@
 # the abstract type Regularizer.
 # Regularizers should implement `evaluate` and `prox`.
 
-import Base.scale!, Base.*
+import Base: *
 
 export Regularizer, ProductRegularizer, # abstract types
        # concrete regularizers
@@ -19,7 +19,7 @@ export Regularizer, ProductRegularizer, # abstract types
        # methods on regularizers
        prox!, prox,
        # utilities
-       scale, scale!, *
+       scale, mul!, *
 
 # numerical tolerance
 TOL = 1e-12
@@ -27,17 +27,17 @@ TOL = 1e-12
 # regularizers
 # regularizers r should have the method `prox` defined such that
 # prox(r)(u,alpha) = argmin_x( alpha r(x) + 1/2 \|x - u\|_2^2)
-@compat abstract type Regularizer end
-@compat abstract type MatrixRegularizer <: LowRankModels.Regularizer end
+abstract type Regularizer end
+abstract type MatrixRegularizer <: LowRankModels.Regularizer end
 
 # default inplace prox operator (slower than if inplace prox is implemented)
 prox!(r::Regularizer,u::AbstractArray,alpha::Number) = (v = prox(r,u,alpha); @simd for i=1:length(u) @inbounds u[i]=v[i] end; u)
 
 # default scaling
 scale(r::Regularizer) = r.scale
-scale!(r::Regularizer, newscale::Number) = (r.scale = newscale; r)
-scale!(rs::Array{Regularizer}, newscale::Number) = (for r in rs scale!(r, newscale) end; rs)
-*(newscale::Number, r::Regularizer) = (newr = typeof(r)(); scale!(newr, scale(r)*newscale); newr)
+mul!(r::Regularizer, newscale::Number) = (r.scale = newscale; r)
+mul!(rs::Array{Regularizer}, newscale::Number) = (for r in rs mul!(r, newscale) end; rs)
+*(newscale::Number, r::Regularizer) = (newr = typeof(r)(); mul!(newr, scale(r)*newscale); newr)
 
 ## utilities
 
@@ -49,12 +49,12 @@ function allnonneg(a::AbstractArray)
 end
 
 ## QuadLoss regularization
-type QuadReg<:Regularizer
+mutable struct QuadReg<:Regularizer
     scale::Float64
 end
 QuadReg() = QuadReg(1)
 prox(r::QuadReg,u::AbstractArray,alpha::Number) = 1/(1+2*alpha*r.scale)*u
-prox!(r::QuadReg,u::Array{Float64},alpha::Number) = scale!(u, 1/(1+2*alpha*r.scale))
+prox!(r::QuadReg,u::Array{Float64},alpha::Number) = rmul!(u, 1/(1+2*alpha*r.scale))
 evaluate(r::QuadReg,a::AbstractArray) = r.scale*sum(abs2, a)
 
 ## constrained QuadLoss regularization
@@ -65,18 +65,18 @@ evaluate(r::QuadReg,a::AbstractArray) = r.scale*sum(abs2, a)
 ##   constraining the maxnorm of XY to be <= mu is achieved
 ##   by setting glrm.rx = QuadConstraint(sqrt(mu))
 ##   and the same for every element of glrm.ry
-type QuadConstraint<:Regularizer
+mutable struct QuadConstraint<:Regularizer
     max_2norm::Float64
 end
 QuadConstraint() = QuadConstraint(1)
 prox(r::QuadConstraint,u::AbstractArray,alpha::Number) = (r.max_2norm)/norm(u)*u
-prox!(r::QuadConstraint,u::Array{Float64},alpha::Number) = scale!(u, (r.max_2norm)/norm(u))
+prox!(r::QuadConstraint,u::Array{Float64},alpha::Number) = mul!(u, (r.max_2norm)/norm(u))
 evaluate(r::QuadConstraint,u::AbstractArray) = norm(u) > r.max_2norm + TOL ? Inf : 0
 scale(r::QuadConstraint) = 1
-scale!(r::QuadConstraint, newscale::Number) = 1
+mul!(r::QuadConstraint, newscale::Number) = 1
 
 ## one norm regularization
-type OneReg<:Regularizer
+mutable struct OneReg<:Regularizer
     scale::Float64
 end
 OneReg() = OneReg(1)
@@ -89,17 +89,17 @@ evaluate(r::OneReg,a::AbstractArray) = r.scale*sum(abs,a)
 
 
 ## no regularization
-type ZeroReg<:Regularizer
+mutable struct ZeroReg<:Regularizer
 end
 prox(r::ZeroReg,u::AbstractArray,alpha::Number) = u
 prox!(r::ZeroReg,u::Array{Float64},alpha::Number) = u
 evaluate(r::ZeroReg,a::AbstractArray) = 0
 scale(r::ZeroReg) = 0
-scale!(r::ZeroReg, newscale::Number) = 0
+mul!(r::ZeroReg, newscale::Number) = 0
 
 ## indicator of the nonnegative orthant
 ## (enforces nonnegativity, eg for nonnegative matrix factorization)
-type NonNegConstraint<:Regularizer
+mutable struct NonNegConstraint<:Regularizer
 end
 prox(r::NonNegConstraint,u::AbstractArray,alpha::Number=1) = broadcast(max,u,0)
 prox!(r::NonNegConstraint,u::Array{Float64},alpha::Number=1) = (@simd for i=1:length(u) @inbounds u[i] = max(u[i], 0) end; u)
@@ -112,11 +112,11 @@ function evaluate(r::NonNegConstraint,a::AbstractArray)
     return 0
 end
 scale(r::NonNegConstraint) = 1
-scale!(r::NonNegConstraint, newscale::Number) = 1
+mul!(r::NonNegConstraint, newscale::Number) = 1
 
 ## one norm regularization restricted to nonnegative orthant
 ## (enforces nonnegativity, in addition to one norm regularization)
-type NonNegOneReg<:Regularizer
+mutable struct NonNegOneReg<:Regularizer
     scale::Float64
 end
 NonNegOneReg() = NonNegOneReg(1)
@@ -136,17 +136,17 @@ function evaluate(r::NonNegOneReg,a::AbstractArray)
     return r.scale*sum(a)
 end
 scale(r::NonNegOneReg) = 1
-scale!(r::NonNegOneReg, newscale::Number) = 1
+mul!(r::NonNegOneReg, newscale::Number) = 1
 
 ## Quadratic regularization restricted to nonnegative domain
 ## (Enforces nonnegativity alongside quadratic regularization)
-type NonNegQuadReg
+mutable struct NonNegQuadReg
     scale::Float64
 end
 NonNegQuadReg() = NonNegQuadReg(1)
 prox(r::NonNegQuadReg,u::AbstractArray,alpha::Number) = max.(1/(1+2*alpha*r.scale)*u, 0)
 prox!(r::NonNegQuadReg,u::AbstractArray,alpha::Number) = begin
-  scale!(u, 1/(1+2*alpha*r.scale))
+  mul!(u, 1/(1+2*alpha*r.scale))
   maxval = maximum(u)
   clamp!(u, 0, maxval)
 end
@@ -161,7 +161,7 @@ end
 
 ## indicator of the last entry being equal to 1
 ## (allows an unpenalized offset term into the glrm when used in conjunction with lastentry_unpenalized)
-type lastentry1<:Regularizer
+mutable struct lastentry1<:Regularizer
     r::Regularizer
 end
 lastentry1() = lastentry1(ZeroReg())
@@ -172,11 +172,11 @@ prox!(r::lastentry1,u::AbstractArray{Float64,2},alpha::Number=1) = (prox!(r.r,vi
 evaluate(r::lastentry1,a::AbstractArray{Float64,1}) = (a[end]==1 ? evaluate(r.r,a[1:end-1]) : Inf)
 evaluate(r::lastentry1,a::AbstractArray{Float64,2}) = (all(a[end,:].==1) ? evaluate(r.r,a[1:end-1,:]) : Inf)
 scale(r::lastentry1) = scale(r.r)
-scale!(r::lastentry1, newscale::Number) = scale!(r.r, newscale)
+mul!(r::lastentry1, newscale::Number) = mul!(r.r, newscale)
 
 ## makes the last entry unpenalized
 ## (allows an unpenalized offset term into the glrm when used in conjunction with lastentry1)
-type lastentry_unpenalized<:Regularizer
+mutable struct lastentry_unpenalized<:Regularizer
     r::Regularizer
 end
 lastentry_unpenalized() = lastentry_unpenalized(ZeroReg())
@@ -187,11 +187,11 @@ prox(r::lastentry_unpenalized,u::AbstractArray{Float64,2},alpha::Number=1) = [pr
 prox!(r::lastentry_unpenalized,u::AbstractArray{Float64,2},alpha::Number=1) = (prox!(r.r,view(u,1:size(u,1)-1,:),alpha); u)
 evaluate(r::lastentry_unpenalized,a::AbstractArray{Float64,2}) = evaluate(r.r,a[1:end-1,:])
 scale(r::lastentry_unpenalized) = scale(r.r)
-scale!(r::lastentry_unpenalized, newscale::Number) = scale!(r.r, newscale)
+mul!(r::lastentry_unpenalized, newscale::Number) = mul!(r.r, newscale)
 
 ## fixes the values of the first n elements of the column to be y
 ## optionally regularizes the last k-n elements with regularizer r
-type fixed_latent_features<:Regularizer
+mutable struct fixed_latent_features<:Regularizer
     r::Regularizer
     y::Array{Float64,1} # the values of the fixed latent features
     n::Int # length of y
@@ -208,11 +208,11 @@ function prox!(r::fixed_latent_features,u::Array{Float64},alpha::Number)
 end
 evaluate(r::fixed_latent_features, a::AbstractArray) = a[1:r.n]==r.y ? evaluate(r.r, a[(r.n+1):end]) : Inf
 scale(r::fixed_latent_features) = scale(r.r)
-scale!(r::fixed_latent_features, newscale::Number) = scale!(r.r, newscale)
+mul!(r::fixed_latent_features, newscale::Number) = mul!(r.r, newscale)
 
 ## fixes the values of the last n elements of the column to be y
 ## optionally regularizes the first k-n elements with regularizer r
-type fixed_last_latent_features<:Regularizer
+mutable struct fixed_last_latent_features<:Regularizer
     r::Regularizer
     y::Array{Float64,1} # the values of the fixed latent features
     n::Int # length of y
@@ -229,14 +229,14 @@ function prox!(r::fixed_last_latent_features,u::Array{Float64},alpha::Number)
 end
 evaluate(r::fixed_last_latent_features, a::AbstractArray) = a[length(a)-r.n+1:end]==r.y ? evaluate(r.r, a[1:length(a)-r.n]) : Inf
 scale(r::fixed_last_latent_features) = scale(r.r)
-scale!(r::fixed_last_latent_features, newscale::Number) = scale!(r.r, newscale)
+mul!(r::fixed_last_latent_features, newscale::Number) = mul!(r.r, newscale)
 
 ## indicator of 1-sparse vectors
 ## (enforces that exact 1 entry is nonzero, eg for orthogonal NNMF)
-type OneSparseConstraint<:Regularizer
+mutable struct OneSparseConstraint<:Regularizer
 end
-prox(r::OneSparseConstraint, u::AbstractArray, alpha::Number=0) = (idx = indmax(u); v=zeros(size(u)); v[idx]=u[idx]; v)
-prox!(r::OneSparseConstraint, u::Array, alpha::Number=0) = (idx = indmax(u); ui = u[idx]; scale!(u,0); u[idx]=ui; u)
+prox(r::OneSparseConstraint, u::AbstractArray, alpha::Number=0) = (idx = argmax(u); v=zeros(size(u)); v[idx]=u[idx]; v)
+prox!(r::OneSparseConstraint, u::Array, alpha::Number=0) = (idx = argmax(u); ui = u[idx]; mul!(u,0); u[idx]=ui; u)
 function evaluate(r::OneSparseConstraint, a::AbstractArray)
     oneflag = false
     for ai in a
@@ -253,10 +253,10 @@ function evaluate(r::OneSparseConstraint, a::AbstractArray)
     return 0
 end
 scale(r::OneSparseConstraint) = 1
-scale!(r::OneSparseConstraint, newscale::Number) = 1
+mul!(r::OneSparseConstraint, newscale::Number) = 1
 
 ## Indicator of k-sparse vectors
-type KSparseConstraint<:Regularizer
+mutable struct KSparseConstraint<:Regularizer
   k::Int
 end
 function evaluate(r::KSparseConstraint, a::AbstractArray)
@@ -286,17 +286,17 @@ function prox!(r::KSparseConstraint, u::Array, alpha::Number)
   k = r.k
   ids = selectperm(u, 1:k, by=abs, rev=true)
   vals = u[ids]
-  scale!(u,0)
+  mul!(u,0)
   u[ids] = vals
   u
 end
 
 ## indicator of 1-sparse unit vectors
 ## (enforces that exact 1 entry is 1 and all others are zero, eg for kmeans)
-type UnitOneSparseConstraint<:Regularizer
+mutable struct UnitOneSparseConstraint<:Regularizer
 end
-prox(r::UnitOneSparseConstraint, u::AbstractArray, alpha::Number=0) = (idx = indmax(u); v=zeros(size(u)); v[idx]=1; v)
-prox!(r::UnitOneSparseConstraint, u::Array, alpha::Number=0) = (idx = indmax(u); scale!(u,0); u[idx]=1; u)
+prox(r::UnitOneSparseConstraint, u::AbstractArray, alpha::Number=0) = (idx = argmax(u); v=zeros(size(u)); v[idx]=1; v)
+prox!(r::UnitOneSparseConstraint, u::Array, alpha::Number=0) = (idx = argmax(u); mul!(u,0); u[idx]=1; u)
 
 function evaluate(r::UnitOneSparseConstraint, a::AbstractArray)
     oneflag = false
@@ -316,12 +316,12 @@ function evaluate(r::UnitOneSparseConstraint, a::AbstractArray)
     return 0
 end
 scale(r::UnitOneSparseConstraint) = 1
-scale!(r::UnitOneSparseConstraint, newscale::Number) = 1
+mul!(r::UnitOneSparseConstraint, newscale::Number) = 1
 
 ## indicator of vectors in the simplex: nonnegative vectors with unit l1 norm
 ## (eg for QuadLoss mixtures, ie soft kmeans)
 ## prox for the simplex is derived by Chen and Ye in [this paper](http://arxiv.org/pdf/1101.6081v2.pdf)
-type SimplexConstraint<:Regularizer
+mutable struct SimplexConstraint<:Regularizer
 end
 function prox(r::SimplexConstraint, u::AbstractArray, alpha::Number=0)
     n = length(u)
@@ -334,7 +334,7 @@ function prox(r::SimplexConstraint, u::AbstractArray, alpha::Number=0)
             break
         end
     end
-    max.(u - t, 0)
+    max.(u .- t, 0)
 end
 function evaluate(r::SimplexConstraint,a::AbstractArray)
     # check it's a unit vector
@@ -346,7 +346,7 @@ function evaluate(r::SimplexConstraint,a::AbstractArray)
     return 0
 end
 scale(r::SimplexConstraint) = 1
-scale!(r::SimplexConstraint, newscale::Number) = 1
+mul!(r::SimplexConstraint, newscale::Number) = 1
 
 ## ordinal regularizer
 ## a block regularizer which
@@ -354,13 +354,13 @@ scale!(r::SimplexConstraint, newscale::Number) = 1
     # 2) forces the last entry of each column to be increasing
     # 3) applies an internal regularizer to the first k-1 entries of each column
 ## should always be used in conjunction with lastentry1 regularization on x
-type OrdinalReg<:Regularizer
+mutable struct OrdinalReg<:Regularizer
     r::Regularizer
 end
 OrdinalReg() = OrdinalReg(ZeroReg())
 prox(r::OrdinalReg,u::AbstractArray,alpha::Number) = (uc = copy(u); prox!(r,uc,alpha))
 function prox!(r::OrdinalReg,u::AbstractArray,alpha::Number)
-    um = mean(u[1:end-1, :], 2)
+    um = mean(u[1:end-1, :], dims=2)
     prox!(r.r,um,alpha)
     for i=1:size(u,1)-1
         for j=1:size(u,2)
@@ -378,18 +378,18 @@ function prox!(r::OrdinalReg,u::AbstractArray,alpha::Number)
 end
 evaluate(r::OrdinalReg,a::AbstractArray) = evaluate(r.r,a[1:end-1,1])
 scale(r::OrdinalReg) = scale(r.r)
-scale!(r::OrdinalReg, newscale::Number) = scale!(r.r, newscale)
+mul!(r::OrdinalReg, newscale::Number) = mul!(r.r, newscale)
 
 # make sure we don't add two offsets cuz that's weird
 lastentry_unpenalized(r::OrdinalReg) = r
 
-type MNLOrdinalReg<:Regularizer
+mutable struct MNLOrdinalReg<:Regularizer
     r::Regularizer
 end
 MNLOrdinalReg() = MNLOrdinalReg(ZeroReg())
 prox(r::MNLOrdinalReg,u::AbstractArray,alpha::Number) = (uc = copy(u); prox!(r,uc,alpha))
 function prox!(r::MNLOrdinalReg,u::AbstractArray,alpha::Number; TOL=1e-3)
-    um = mean(u[1:end-1, :], 2)
+    um = mean(u[1:end-1, :], dims=2)
     prox!(r.r,um,alpha)
     for i=1:size(u,1)-1
         for j=1:size(u,2)
@@ -405,12 +405,12 @@ function prox!(r::MNLOrdinalReg,u::AbstractArray,alpha::Number; TOL=1e-3)
 end
 evaluate(r::MNLOrdinalReg,a::AbstractArray) = evaluate(r.r,a[1:end-1,1])
 scale(r::MNLOrdinalReg) = scale(r.r)
-scale!(r::MNLOrdinalReg, newscale::Number) = scale!(r.r, newscale)
+mul!(r::MNLOrdinalReg, newscale::Number) = mul!(r.r, newscale)
 # make sure we don't add two offsets cuz that's weird
 lastentry_unpenalized(r::MNLOrdinalReg) = r
 
 ## Quadratic regularization with non-zero mean
-type RemQuadReg<:Regularizer
+mutable struct RemQuadReg<:Regularizer
         scale::Float64
         m::Array{Float64, 1}
 end
@@ -419,7 +419,7 @@ prox(r::RemQuadReg, u::AbstractArray, alpha::Number) =
      (u + 2 * alpha * r.scale * r.m) / (1 + 2 * alpha * r.scale)
 prox!(r::RemQuadReg, u::Array{Float64}, alpha::Number) = begin
         broadcast!(.+, u, u, 2 * alpha * r.scale * r.m)
-        scale!(u, 1 / (1 + 2 * alpha * r.scale))
+        mul!(u, 1 / (1 + 2 * alpha * r.scale))
 end
 evaluate(r::RemQuadReg, a::AbstractArray) = r.scale * sum(abs2, a - r.m)
 
diff --git a/src/rsvd.jl b/src/rsvd.jl
index fcc5859..0e5e656 100644
--- a/src/rsvd.jl
+++ b/src/rsvd.jl
@@ -1,6 +1,6 @@
 #### randomized SVD (from Jiahao Chen, based on http://arxiv.org/pdf/0909.4061.pdf)
 
-import Base.LinAlg.SVD
+import LinearAlgebra: SVD
 
 #The simplest possible randomized svd
 #Inputs
diff --git a/src/sample.jl b/src/sample.jl
index 9ef3bd2..89e7820 100644
--- a/src/sample.jl
+++ b/src/sample.jl
@@ -21,7 +21,7 @@
 # DataTypes are assigned to each column of the data and are not part of the low-rank model itself, they just serve
 # as a way to evaluate the performance of the low-rank model.
 
-import StatsBase: sample, WeightVec
+import StatsBase: sample, Weights
 export sample, sample_missing
 
 ########################################## REALS ##########################################
@@ -42,7 +42,7 @@ end
 # This is fast and works for any loss.
 function sample(D::BoolDomain, l::Loss, u::AbstractArray)
     prob = exp.(-[evaluate(l, u, i) for i in (true, false)])
-		return sample(WeightVec(prob))
+		return sample(Weights(prob))
 end
 
 ########################################## ORDINALS ##########################################
@@ -59,14 +59,14 @@ end
 # generic method
 function sample(D::OrdinalDomain, l::Loss, u::AbstractArray)
     prob = exp.(-[evaluate(l, u, i) for i in D.min:D.max])
-		return sample(WeightVec(prob))
+		return sample(Weights(prob))
 end
 
 ########################################## CATEGORICALS ##########################################
 # Categorical data should take integer values ranging from 1 to `max`
 
 function sample(D::CategoricalDomain, l::MultinomialLoss, u::Array{Float64})
-	return sample(WeightVec(exp.(u)))
+	return sample(Weights(exp.(u)))
 end
 
 # sample(D::CategoricalDomain, l::OvALoss, u::Array{Float64}) = ??
@@ -74,7 +74,7 @@ end
 # generic method
 function sample(D::CategoricalDomain, l::Loss, u::AbstractArray)
     prob = exp.(-[evaluate(l, u, i) for i in D.min:D.max])
-		return sample(WeightVec(prob))
+		return sample(Weights(prob))
 end
 
 ########################################## PERIODIC ##########################################
@@ -91,10 +91,10 @@ sample(D::CountDomain, l::Loss, u::Float64) = sample(OrdinalDomain(0,D.max_count
 
 ####################################################################################
 # Use impute and error_metric over arrays
-function sample{DomainSubtype<:Domain,LossSubtype<:Loss}(
+function sample(
 			domains::Array{DomainSubtype,1},
 			losses::Array{LossSubtype,1},
-			U::Array{Float64,2}) # U = X'*Y
+			U::Array{Float64,2}) where {DomainSubtype<:Domain,LossSubtype<:Loss}
 	m, d = size(U)
 	n = length(losses)
 	yidxs = get_yidxs(losses)
@@ -134,7 +134,7 @@ function sample(glrm::GLRM, do_sample::Function=all_entries, is_dense::Bool=true
 	# even if all data for some real loss take integer values
 	for j=1:n
 		if isa(domains[j], RealDomain) && isa(glrm.A[j], DataArray{Int64,1})
-			domains[j] = OrdinalDomain(minimum(dropna(glrm.A[j])), maximum(dropna(glrm.A[j])))
+			domains[j] = OrdinalDomain(minimum(dropmissing(glrm.A[j])), maximum(dropmissing(glrm.A[j])))
 		end
 	end
 	A_sampled = copy(glrm.A);
@@ -151,7 +151,7 @@ function sample(glrm::GLRM, do_sample::Function=all_entries, is_dense::Bool=true
 	return A_sampled
 end
 
-function sample{LossSubtype<:Loss}(losses::Array{LossSubtype,1}, U::Array{Float64,2})
+function sample(losses::Array{LossSubtype,1}, U::Array{Float64,2}) where LossSubtype<:Loss
 	domains = Domain[domain(l) for l in losses]
 	sample(domains, losses, U)
 end
diff --git a/src/scikitlearn.jl b/src/scikitlearn.jl
index a9e6fbb..f60f412 100644
--- a/src/scikitlearn.jl
+++ b/src/scikitlearn.jl
@@ -13,7 +13,7 @@ export SkGLRM, PCA, QPCA, NNMF, KMeans, RPCA
 # max_iter are hyperparameters and need to be visible/changeable by
 # set_params for grid-search.
 # There are other ways of setting it up, but this seems like the simplest.
-type SkGLRM <: ScikitLearnBase.BaseEstimator
+mutable struct SkGLRM <: ScikitLearnBase.BaseEstimator
     # Hyperparameters: those will be passed to GLRM, so it doesn't matter if
     # they're not typed.
     fit_params # if fit_params != nothing, it has priority over abs_tol, etc.
@@ -51,17 +51,22 @@ function do_fit!(skglrm::SkGLRM, glrm::GLRM)
     fit!(glrm, fit_params; verbose=skglrm.verbose)
 end
 
+function ind2sub(a, i)
+    i2s[i]
+end
+
 function build_glrm(skglrm::SkGLRM, X, missing_values)
     k = skglrm.k == -1 ? size(X, 2) : skglrm.k
-    obs = [ind2sub(missing_values, x) for x in find(.!missing_values)]
+    i2s = CartesianIndices(missing_values)
+    obs = [i2s[x] for x in  (LinearIndices(.!missing_values))[findall(.!missing_values)] ]
     rx, ry = skglrm.rx, skglrm.ry
     if skglrm.rx_scale !== nothing
         rx = copy(rx)
-        scale!(rx, skglrm.rx_scale)
+        mul!(rx, skglrm.rx_scale)
     end
     if skglrm.ry_scale !== nothing
         ry = copy(ry)
-        scale!(ry, skglrm.ry_scale)
+        mul!(ry, skglrm.ry_scale)
     end
     GLRM(X, skglrm.loss, rx, ry, k; obs=obs)
 end
@@ -105,7 +110,7 @@ ScikitLearnBase.inverse_transform(skglrm::SkGLRM, X) = X * skglrm.glrm.Y
 function ScikitLearnBase.predict(km::SkGLRM, X)
     X2 = ScikitLearnBase.transform(km, X)
     # This performs the "argmax" over the columns to get the cluster #
-    return mapslices(indmax, X2, 2)[:]
+    return mapslices(argmax, X2, 2)[:]
 end
 
 ################################################################################
diff --git a/src/shareglrm.jl b/src/shareglrm.jl
index f9f88b6..0fa255f 100644
--- a/src/shareglrm.jl
+++ b/src/shareglrm.jl
@@ -1,12 +1,12 @@
-import Base: size, axpy!
-import Base.LinAlg: scale!
-import Base.BLAS: gemm!
-import Base: shmem_rand, shmem_randn
+
+import LinearAlgebra: size, axpy!
+import LinearAlgebra.BLAS: gemm!
+#import Base: shmem_rand, shmem_randn
 
 export ShareGLRM, share
 
 ### GLRM TYPE
-type ShareGLRM{L<:Loss, R<:Regularizer}<:AbstractGLRM
+mutable struct ShareGLRM{L<:Loss, R<:Regularizer}<:AbstractGLRM
     A::SharedArray               # The data table transformed into a coded array
     losses::Array{L,1}           # array of loss functions
     rx::Regularizer              # The regularization to be applied to each row of Xᵀ (column of X)
diff --git a/src/simple_glrms.jl b/src/simple_glrms.jl
index dfba068..91e7648 100644
--- a/src/simple_glrms.jl
+++ b/src/simple_glrms.jl
@@ -29,7 +29,7 @@ end
 function kmeans(A::AbstractArray, k::Int; kwargs...)
 	loss = QuadLoss()
 	ry = ZeroReg()
-	rx = UnitOneSparseConstraint() 
+	rx = UnitOneSparseConstraint()
 	return GLRM(A,loss,rx,ry,k; kwargs...)
 end
 
@@ -39,4 +39,4 @@ function rpca(A::AbstractArray, k::Int; scale=1.0::Float64, kwargs...)
 	loss = HuberLoss()
 	r = QuadReg(scale)
 	return GLRM(A,loss,r,r,k; kwargs...)
-end
\ No newline at end of file
+end
diff --git a/src/utilities/conveniencemethods.jl b/src/utilities/conveniencemethods.jl
index 2a281ea..38e7e51 100644
--- a/src/utilities/conveniencemethods.jl
+++ b/src/utilities/conveniencemethods.jl
@@ -7,7 +7,7 @@ export copy, copy_estimate, GLRM
 
 for T in :[Loss, Regularizer, AbstractGLRM].args
   @eval function copy(r::$T)
-    fieldvals = [getfield(r, f) for f in fieldnames(r)]
+    fieldvals = [getfield(r, f) for f in fieldnames(typeof(r))]
     return typeof(r)(fieldvals...)
   end
 end
@@ -18,7 +18,7 @@ function copy_estimate(g::GLRM)
               g.observed_features,g.observed_examples,
               copy(g.X),copy(g.Y))
 end
-# domains are immutable, so this is ok
+# domains are struct, so this is ok
 copy(d::Domain) = d
 
 ##############################################################
diff --git a/src/utilities/deprecated.jl b/src/utilities/deprecated.jl
index a411687..a7eb735 100644
--- a/src/utilities/deprecated.jl
+++ b/src/utilities/deprecated.jl
@@ -1,6 +1,6 @@
-using Base.depwarn
+using Base: depwarn
 
-@compat Base.@deprecate GLRM(A::AbstractArray, obs::Array{Tuple{Int, Int}, 1}, args...; kwargs...) GLRM(A, args...; obs = obs, kwargs...)
+Base.@deprecate GLRM(A::AbstractArray, obs::Array{Tuple{Int, Int}, 1}, args...; kwargs...) GLRM(A, args...; obs = obs, kwargs...)
 
 Base.@deprecate ProxGradParams(s::Number,m::Int,c::Float64,ms::Float64) ProxGradParams(s, max_iter=m, abs_tol=c, min_stepsize=ms)
 
diff --git a/test/basic_functionality.jl b/test/basic_functionality.jl
index e00b448..2bec12c 100644
--- a/test/basic_functionality.jl
+++ b/test/basic_functionality.jl
@@ -1,6 +1,6 @@
 using LowRankModels
 # tests basic functionality of glrm.jl
-srand(1);
+Random.seed!(1);
 m,n,k,s = 100,100,5,100*100;
 # matrix to encode
 X_real, Y_real = randn(m,k), randn(k,n);
@@ -12,7 +12,7 @@ glrm = GLRM(A,losses,rx,ry,5, scale=false, offset=false, X=randn(k,m), Y=randn(k
 p = Params(1, max_iter=200, abs_tol=0.0000001, min_stepsize=0.001)
 @time X,Y,ch = fit!(glrm, params=p);
 Ah = X'*Y;
-p.abs_tol > abs(vecnorm(A-Ah)^2 - ch.objective[end])
+p.abs_tol > abs(norm(A-Ah)^2 - ch.objective[end])
 
 function validate_folds(trf,tre,tsf,tse)
 	for i=1:length(trf)
diff --git a/test/err_test.jl b/test/err_test.jl
index 0a8b77d..9b0fb3b 100644
--- a/test/err_test.jl
+++ b/test/err_test.jl
@@ -1,5 +1,5 @@
 using LowRankModels
-srand(1);
+Random.seed!(1);
 
 test_losses = Loss[
 QuadLoss(), 	
diff --git a/test/hello_world.jl b/test/hello_world.jl
index 4ff2c96..05f1477 100644
--- a/test/hello_world.jl
+++ b/test/hello_world.jl
@@ -1,23 +1,23 @@
 using LowRankModels
 
-# loss types to test
-real_loss_types = [QuadLoss, HuberLoss]
-bool_loss_types = [HingeLoss]
-ordinal_loss_types = [OrdinalHingeLoss, BvSLoss]
-categorical_loss_types = [MultinomialLoss, OvALoss]
+# loss mutable structs to test
+real_loss_mutable structs = [QuadLoss, HuberLoss]
+bool_loss_mutable structs = [HingeLoss]
+ordinal_loss_mutable structs = [OrdinalHingeLoss, BvSLoss]
+categorical_loss_mutable structs = [MultinomialLoss, OvALoss]
 
 #instantiate losses
 ncat = 4 # maximum categorical levels
 nord = 5 # maximum ordinal levels
-real_losses = [l() for l in real_loss_types]
-bool_losses = [l() for l in bool_loss_types]
-ordinal_losses = [l(rand(3:nord)) for l in ordinal_loss_types]
-categorical_losses = [l(rand(3:ncat)) for l in categorical_loss_types]
+real_losses = [l() for l in real_loss_mutable structs]
+bool_losses = [l() for l in bool_loss_mutable structs]
+ordinal_losses = [l(rand(3:nord)) for l in ordinal_loss_mutable structs]
+categorical_losses = [l(rand(3:ncat)) for l in categorical_loss_mutable structs]
 losses = [real_losses..., bool_losses..., ordinal_losses..., categorical_losses...]
 
 # scale losses for different columns
 for loss in losses
-  scale!(loss, rand())
+  mul!(loss, rand())
 end
 
 # regularizers to test
diff --git a/test/init_test.jl b/test/init_test.jl
index 837bd80..e53d79a 100644
--- a/test/init_test.jl
+++ b/test/init_test.jl
@@ -1,4 +1,4 @@
-using Base.Test
+using Test
 using LowRankModels
 
 ## Tests for NNDSVD initialization
diff --git a/test/loss_test.jl b/test/loss_test.jl
index feb2f7a..03b3d18 100644
--- a/test/loss_test.jl
+++ b/test/loss_test.jl
@@ -1,7 +1,7 @@
 using LowRankModels
 
 # test losses in losses.jl
-srand(1);
+Random.seed!(1);
 
 losses = [
 QuadLoss(),
diff --git a/test/mult_reg.jl b/test/mult_reg.jl
index 3abc649..5159c2b 100644
--- a/test/mult_reg.jl
+++ b/test/mult_reg.jl
@@ -1,5 +1,5 @@
 using LowRankModels
-using Base.Test
+using Test
 
 # Parameters
 n = 200
@@ -9,7 +9,7 @@ eta = 0.01
 delta = 1e-3
 
 # Generate problem
-srand(1)
+Random.seed!(1)
 Um = randn(r, n)
 Vm = randn(r, m)
 U = Um .+ sqrt(eta) * randn(r, n)
diff --git a/test/prob_tests/BvSLoss.jl b/test/prob_tests/BvSLoss.jl
index 40caedb..f38e902 100644
--- a/test/prob_tests/BvSLoss.jl
+++ b/test/prob_tests/BvSLoss.jl
@@ -1,10 +1,11 @@
-using LowRankModels
+using LowRankModels,  Random
 import StatsBase: sample, Weights
+import LinearAlgebra: norm
 
 # test ordistic loss
 
 ## generate data
-srand(1);
+Random.seed!(1);
 m,n,k = 100,100,3;
 kfit = k+1
 nlevels = 5; # number of levels
@@ -20,7 +21,7 @@ for j=1:n # this scheme doesn't work to ensure uniform sampling
 	T_real[:,j] = sort(T_real[:,j])
 end
 
-signedsums = @compat Array{Float64}(d, nlevels)
+signedsums = Array{Float64}(undef, d, nlevels)
 for i=1:d
     for j=1:nlevels
         signedsums[i,j] = i<j ? 1 : -1
@@ -32,8 +33,8 @@ XYplusT = zeros(Float64, (m,D))
 A = zeros(Int, (m, n))
 for i=1:m
 	for j=1:n
-		u = XY[i,j] + T_real[:,j]
-		XYplusT[i,(j-1)*d+(1:d)] = u
+		u = XY[i,j] .+ T_real[:,j]
+		XYplusT[i,(j-1)*d .+ (1:d)] = u
 		diffs = u'*signedsums
 		wv = Weights(Float64[exp(-diffs[l]) for l in 1:nlevels])
 		l = sample(wv)
@@ -41,7 +42,7 @@ for i=1:m
 	end
 end
 # loss is insensitive to shifts; regularizer should pick this shift
-XYplusT = XYplusT .- mean(XYplusT, 2)
+XYplusT = XYplusT .- mean(XYplusT, dims=2)
 
 # and the model
 losses = BvSLoss(nlevels)
@@ -52,30 +53,30 @@ glrm = GLRM(A,losses,rx,ry,kfit, scale=false, offset=false, X=randn(kfit,m), Y=r
 @time X,Y,ch = fit!(glrm);
 XYh = X'*Y#[:,1:d:D];
 @show ch.objective
-println("After fitting, parameters differ from true parameters by $(vecnorm(XYplusT - XYh)/sqrt(prod(size(XYplusT)))) in RMSE")
+println("After fitting, parameters differ from true parameters by $(norm(XYplusT - XYh)/sqrt(prod(size(XYplusT)))) in RMSE")
 A_imputed = impute(glrm)
-println("After fitting, $(sum(A_imputed .!= A) / prod(size(A))*100)\% of imputed entries are wrong")
-println("After fitting, imputed entries are off by $(sum(abs,(A_imputed - A)) / prod(size(A))*100)\% on average")
-println("(Picking randomly, $((nlevels-1)/nlevels*100)\% of entries would be wrong.)\n")
+println("After fitting, $(sum(A_imputed .!= A) / prod(size(A))*100)% of imputed entries are wrong")
+println("After fitting, imputed entries are off by $(sum(abs,(A_imputed - A)) / prod(size(A))*100)% on average")
+println("(Picking randomly, $((nlevels-1)/nlevels*100)% of entries would be wrong.)\n")
 
 # initialize
 init_svd!(glrm)
 XYh = glrm.X' * glrm.Y
-println("After initialization with the svd, parameters differ from true parameters by $(vecnorm(XYplusT - XYh)/sqrt(prod(size(XYplusT)))) in RMSE")
+println("After initialization with the svd, parameters differ from true parameters by $(norm(XYplusT - XYh)/sqrt(prod(size(XYplusT)))) in RMSE")
 A_imputed = impute(glrm)
-println("After initialization with the svd, $(sum(A_imputed .!= A) / prod(size(A))*100)\% of imputed entries are wrong")
-println("After initialization with the svd, imputed entries are off by $(sum(abs,(A_imputed - A)) / prod(size(A))*100)\% on average")
-println("(Picking randomly, $((nlevels-1)/nlevels*100)\% of entries would be wrong.)\n")
+println("After initialization with the svd, $(sum(A_imputed .!= A) / prod(size(A))*100)% of imputed entries are wrong")
+println("After initialization with the svd, imputed entries are off by $(sum(abs,(A_imputed - A)) / prod(size(A))*100)% on average")
+println("(Picking randomly, $((nlevels-1)/nlevels*100)% of entries would be wrong.)\n")
 
 # fit w/ initialization
 @time X,Y,ch = fit!(glrm);
 XYh = X'*Y#[:,1:d:D];
 @show ch.objective
-println("After fitting, parameters differ from true parameters by $(vecnorm(XYplusT - XYh)/sqrt(prod(size(XYplusT)))) in RMSE")
+println("After fitting, parameters differ from true parameters by $(norm(XYplusT - XYh)/sqrt(prod(size(XYplusT)))) in RMSE")
 A_imputed = impute(glrm)
-println("After fitting, $(sum(A_imputed .!= A) / prod(size(A))*100)\% of imputed entries are wrong")
-println("After fitting, imputed entries are off by $(sum(abs,(A_imputed - A)) / prod(size(A))*100)\% on average")
-println("(Picking randomly, $((nlevels-1)/nlevels*100)\% of entries would be wrong.)\n")
+println("After fitting, $(sum(A_imputed .!= A) / prod(size(A))*100)% of imputed entries are wrong")
+println("After fitting, imputed entries are off by $(sum(abs.(A_imputed - A)) / prod(size(A))*100)% on average")
+println("(Picking randomly, $((nlevels-1)/nlevels*100)% of entries would be wrong.)\n")
 
 # test scaling
-scale!(glrm.ry, 3)
+mul!(glrm.ry, 3)
diff --git a/test/prob_tests/LogisticLoss.jl b/test/prob_tests/LogisticLoss.jl
index 31524ab..dc3195f 100644
--- a/test/prob_tests/LogisticLoss.jl
+++ b/test/prob_tests/LogisticLoss.jl
@@ -1,10 +1,11 @@
-using LowRankModels
+using LowRankModels,  Random
 import StatsBase: sample, Weights
+import LinearAlgebra: norm 
 
 # test quadratic loss
 
 ## generate data
-srand(1);
+Random.seed!(1);
 m,n,k = 1000,1000,3;
 kfit = k+1
 # variance of measurement
@@ -33,26 +34,26 @@ glrm = GLRM(A,losses,rx,ry,kfit)
 # fit w/o initialization
 @time X,Y,ch = fit!(glrm);
 XYh = X'*Y;
-println("After fitting, parameters differ from true parameters by $(vecnorm(XY - XYh)/sqrt(prod(size(XY)))) in RMSE")
+println("After fitting, parameters differ from true parameters by $(norm(XY - XYh)/sqrt(prod(size(XY)))) in RMSE")
 A_imputed = impute(glrm)
-println("After fitting, $(sum(A_imputed .!= A) / prod(size(A))*100)\% of imputed entries are wrong")
-println("After fitting, imputed entries are off by $(sum(abs.(A_imputed - A)) / prod(size(A))*100)\% on average")
-println("(Picking randomly, 50\% of entries would be wrong.)\n")
+println("After fitting, $(sum(A_imputed .!= A) / prod(size(A))*100)% of imputed entries are wrong")
+println("After fitting, imputed entries are off by $(sum(abs.(A_imputed - A)) / prod(size(A))*100)% on average")
+println("(Picking randomly, 50% of entries would be wrong.)\n")
 
 # initialize
 init_svd!(glrm)
 XYh = glrm.X' * glrm.Y
-println("After initialization with the svd, parameters differ from true parameters by $(vecnorm(XY - XYh)/sqrt(prod(size(XY)))) in RMSE")
+println("After initialization with the svd, parameters differ from true parameters by $(norm(XY - XYh)/sqrt(prod(size(XY)))) in RMSE")
 A_imputed = impute(glrm)
-println("After fitting, $(sum(A_imputed .!= A) / prod(size(A))*100)\% of imputed entries are wrong")
-println("After fitting, imputed entries are off by $(sum(abs.(A_imputed - A)) / prod(size(A))*100)\% on average")
-println("(Picking randomly, 50\% of entries would be wrong.)\n")
+println("After fitting, $(sum(A_imputed .!= A) / prod(size(A))*100)% of imputed entries are wrong")
+println("After fitting, imputed entries are off by $(sum(abs.(A_imputed - A)) / prod(size(A))*100)% on average")
+println("(Picking randomly, 50% of entries would be wrong.)\n")
 
 # fit w/ initialization
 @time X,Y,ch = fit!(glrm);
 XYh = X'*Y;
-println("After fitting, parameters differ from true parameters by $(vecnorm(XY - XYh)/sqrt(prod(size(XY)))) in RMSE")
+println("After fitting, parameters differ from true parameters by $(norm(XY - XYh)/sqrt(prod(size(XY)))) in RMSE")
 A_imputed = impute(glrm)
-println("After fitting, $(sum(A_imputed .!= A) / prod(size(A))*100)\% of imputed entries are wrong")
-println("After fitting, imputed entries are off by $(sum(abs.(A_imputed - A)) / prod(size(A))*100)\% on average")
-println("(Picking randomly, 50\% of entries would be wrong.)\n")
+println("After fitting, $(sum(A_imputed .!= A) / prod(size(A))*100)% of imputed entries are wrong")
+println("After fitting, imputed entries are off by $(sum(abs.(A_imputed - A)) / prod(size(A))*100)% on average")
+println("(Picking randomly, 50% of entries would be wrong.)\n")
diff --git a/test/prob_tests/MultinomialLoss.jl b/test/prob_tests/MultinomialLoss.jl
index fdc01c3..5d95902 100644
--- a/test/prob_tests/MultinomialLoss.jl
+++ b/test/prob_tests/MultinomialLoss.jl
@@ -1,9 +1,10 @@
-using LowRankModels
-import StatsBase: sample, Weights
+using LowRankModels,  Random
+import StatsBase: sample, Weights, mean
+import LinearAlgebra: norm 
 
 # tests MNL loss
 
-srand(1);
+Random.seed!(1);
 m,n,k = 200,50,2;
 kfit = k+1
 K = 4; # number of categories
@@ -18,7 +19,7 @@ yidxs = get_yidxs(losses)
 for i=1:m
 	for j=1:n
 		mef = mean(XY[i,yidxs[j]])
-		XY[i,yidxs[j]] = XY[i,yidxs[j]] - mef
+		XY[i,yidxs[j]] = XY[i,yidxs[j]] .- mef
 	end
 end
 
@@ -40,24 +41,24 @@ glrm = GLRM(A,losses,rx,ry,kfit, scale=false, offset=false, X=randn(kfit,m), Y=r
 @time X,Y,ch = fit!(glrm);
 XYh = X'*Y;
 @show ch.objective
-println("After fitting, parameters differ from true parameters by $(vecnorm(XY - XYh)/sqrt(prod(size(XY)))) in RMSE")
+println("After fitting, parameters differ from true parameters by $(norm(XY - XYh)/sqrt(prod(size(XY)))) in RMSE")
 A_imputed = impute(glrm)
-println("After fitting, $(sum(A_imputed .!= A) / prod(size(A))*100)\% of imputed entries are wrong")
-println("(Picking randomly, $((K-1)/K*100)\% of entries would be wrong.)\n")
+println("After fitting, $(sum(A_imputed .!= A) / prod(size(A))*100)% of imputed entries are wrong")
+println("(Picking randomly, $((K-1)/K*100)% of entries would be wrong.)\n")
 
 # initialize
 init_svd!(glrm)
 XYh = glrm.X' * glrm.Y
-println("After initialization with the svd, parameters differ from true parameters by $(vecnorm(XY - XYh)/sqrt(prod(size(XY)))) in RMSE")
+println("After initialization with the svd, parameters differ from true parameters by $(norm(XY - XYh)/sqrt(prod(size(XY)))) in RMSE")
 A_imputed = impute(glrm)
-println("After initialization with the svd, $(sum(A_imputed .!= A) / prod(size(A))*100)\% of imputed entries are wrong")
-println("(Picking randomly, $((K-1)/K*100)\% of entries would be wrong.)\n")
+println("After initialization with the svd, $(sum(A_imputed .!= A) / prod(size(A))*100)% of imputed entries are wrong")
+println("(Picking randomly, $((K-1)/K*100)% of entries would be wrong.)\n")
 
 # fit w/ initialization
 @time X,Y,ch = fit!(glrm);
 XYh = X'*Y;
 @show ch.objective
-println("After fitting, parameters differ from true parameters by $(vecnorm(XY - XYh)/sqrt(prod(size(XY)))) in RMSE")
+println("After fitting, parameters differ from true parameters by $(norm(XY - XYh)/sqrt(prod(size(XY)))) in RMSE")
 A_imputed = impute(glrm)
-println("After fitting, $(sum(A_imputed .!= A) / prod(size(A))*100)\% of imputed entries are wrong")
-println("(Picking randomly, $((K-1)/K*100)\% of entries would be wrong.)\n")
+println("After fitting, $(sum(A_imputed .!= A) / prod(size(A))*100)% of imputed entries are wrong")
+println("(Picking randomly, $((K-1)/K*100)% of entries would be wrong.)\n")
diff --git a/test/prob_tests/MultinomialOrdinalLoss.jl b/test/prob_tests/MultinomialOrdinalLoss.jl
index 840c099..4885078 100644
--- a/test/prob_tests/MultinomialOrdinalLoss.jl
+++ b/test/prob_tests/MultinomialOrdinalLoss.jl
@@ -1,10 +1,11 @@
-using LowRankModels
-import StatsBase: sample, Weights
+using LowRankModels, Random
+import StatsBase: sample, Weights, mean
+import LinearAlgebra: norm
 
 # test MNL Ordinal loss
 
 ## generate data
-srand(1);
+Random.seed!(1);
 m,n,k = 100,100,2;
 kfit = k+1
 nlevels = 5; # number of levels
@@ -20,7 +21,7 @@ for j=1:n # this scheme doesn't work to ensure uniform sampling
 	T_real[:,j] = sort(T_real[:,j])
 end
 
-signedsums = @compat Array{Float64}(d, nlevels)
+signedsums = Array{Float64}(undef, d, nlevels)
 for i=1:d
     for j=1:nlevels
         signedsums[i,j] = i<j ? 1 : -1
@@ -32,8 +33,8 @@ XYplusT = zeros(Float64, (m,D))
 A = zeros(Int, (m, n))
 for i=1:m
 	for j=1:n
-		u = XY[i,j] + T_real[:,j]
-		XYplusT[i,(j-1)*d+(1:d)] = u
+		u = XY[i,j] .+ T_real[:,j]
+		XYplusT[i,(j-1)*d .+ (1:d)] = u
 		diffs = u'*signedsums
 		wv = Weights(Float64[exp(-diffs[l]) for l in 1:nlevels])
 		l = sample(wv)
@@ -41,7 +42,7 @@ for i=1:m
 	end
 end
 # loss is insensitive to shifts; regularizer should pick this shift
-XYplusT = XYplusT .- mean(XYplusT, 2)
+XYplusT = XYplusT .- mean(XYplusT, dims=2)
 
 # and the model
 losses = fill(MultinomialOrdinalLoss(nlevels),n)
@@ -57,30 +58,30 @@ glrm = GLRM(A,losses,rx,ry,kfit, scale=false, offset=false, X=randn(kfit,m), Y=Y
 @time X,Y,ch = fit!(glrm);
 XYh = X'*Y#[:,1:d:D];
 @show ch.objective
-println("After fitting, parameters differ from true parameters by $(vecnorm(XYplusT - XYh)/sqrt(prod(size(XYplusT)))) in RMSE")
+println("After fitting, parameters differ from true parameters by $(norm(XYplusT - XYh)/sqrt(prod(size(XYplusT)))) in RMSE")
 A_imputed = impute(glrm)
-println("After fitting, $(sum(A_imputed .!= A) / prod(size(A))*100)\% of imputed entries are wrong")
-println("After fitting, imputed entries are off by $(sum(abs.(A_imputed - A)) / prod(size(A))*100)\% on average")
-println("(Picking randomly, $((nlevels-1)/nlevels*100)\% of entries would be wrong.)\n")
+println("After fitting, $(sum(A_imputed .!= A) / prod(size(A))*100)% of imputed entries are wrong")
+println("After fitting, imputed entries are off by $(sum(abs.(A_imputed - A)) / prod(size(A))*100)% on average")
+println("(Picking randomly, $((nlevels-1)/nlevels*100)% of entries would be wrong.)\n")
 
 # initialize
 init_svd!(glrm)
 XYh = glrm.X' * glrm.Y
-println("After initialization with the svd, parameters differ from true parameters by $(vecnorm(XYplusT - XYh)/sqrt(prod(size(XYplusT)))) in RMSE")
+println("After initialization with the svd, parameters differ from true parameters by $(norm(XYplusT - XYh)/sqrt(prod(size(XYplusT)))) in RMSE")
 A_imputed = impute(glrm)
-println("After initialization with the svd, $(sum(A_imputed .!= A) / prod(size(A))*100)\% of imputed entries are wrong")
-println("After initialization with the svd, imputed entries are off by $(sum(abs,(A_imputed - A)) / prod(size(A))*100)\% on average")
-println("(Picking randomly, $((nlevels-1)/nlevels*100)\% of entries would be wrong.)\n")
+println("After initialization with the svd, $(sum(A_imputed .!= A) / prod(size(A))*100)% of imputed entries are wrong")
+println("After initialization with the svd, imputed entries are off by $(sum(abs,(A_imputed - A)) / prod(size(A))*100)% on average")
+println("(Picking randomly, $((nlevels-1)/nlevels*100)% of entries would be wrong.)\n")
 
 # fit w/ initialization
 @time X,Y,ch = fit!(glrm);
 XYh = X'*Y#[:,1:d:D];
 @show ch.objective
-println("After fitting, parameters differ from true parameters by $(vecnorm(XYplusT - XYh)/sqrt(prod(size(XYplusT)))) in RMSE")
+println("After fitting, parameters differ from true parameters by $(norm(XYplusT - XYh)/sqrt(prod(size(XYplusT)))) in RMSE")
 A_imputed = impute(glrm)
-println("After fitting, $(sum(A_imputed .!= A) / prod(size(A))*100)\% of imputed entries are wrong")
-println("After fitting, imputed entries are off by $(sum(abs,(A_imputed - A)) / prod(size(A))*100)\% on average")
-println("(Picking randomly, $((nlevels-1)/nlevels*100)\% of entries would be wrong.)\n")
+println("After fitting, $(sum(A_imputed .!= A) / prod(size(A))*100)% of imputed entries are wrong")
+println("After fitting, imputed entries are off by $(sum(abs,(A_imputed - A)) / prod(size(A))*100)% on average")
+println("(Picking randomly, $((nlevels-1)/nlevels*100)% of entries would be wrong.)\n")
 
 # test scaling
-scale!(glrm.ry, 3)
+mul!(glrm.ry, 3)
diff --git a/test/prob_tests/OrdisticLoss.jl b/test/prob_tests/OrdisticLoss.jl
index 04f0219..76b5f98 100644
--- a/test/prob_tests/OrdisticLoss.jl
+++ b/test/prob_tests/OrdisticLoss.jl
@@ -1,10 +1,11 @@
-using LowRankModels
+using LowRankModels,  Random
 import StatsBase: sample, Weights
+import LinearAlgebra: norm 
 
 # test ordistic loss
 
 ## generate data
-srand(1);
+Random.seed!(1);
 m,n,k = 200,50,3;
 kfit = k+1
 d = 7; # number of levels
@@ -41,28 +42,28 @@ p = Params(1, max_iter=10, abs_tol=0.0000001, min_stepsize=0.000001)
 @time X,Y,ch = fit!(glrm, params=p);
 XYh = X'*Y[:,1:d:D];
 @show ch.objective
-println("After fitting, parameters differ from true parameters by $(vecnorm(XY - XYh)/sqrt(prod(size(XY)))) in RMSE")
+println("After fitting, parameters differ from true parameters by $(norm(XY - XYh)/sqrt(prod(size(XY)))) in RMSE")
 A_imputed = impute(glrm)
-println("After fitting, $(sum(A_imputed .!= A) / prod(size(A))*100)\% of imputed entries are wrong")
-println("After fitting, imputed entries are off by $(sum(abs,(A_imputed - A)) / prod(size(A))*100)\% on average")
-println("(Picking randomly, $((d-1)/d*100)\% of entries would be wrong.)\n")
+println("After fitting, $(sum(A_imputed .!= A) / prod(size(A))*100)% of imputed entries are wrong")
+println("After fitting, imputed entries are off by $(sum(abs,(A_imputed - A)) / prod(size(A))*100)% on average")
+println("(Picking randomly, $((d-1)/d*100)% of entries would be wrong.)\n")
 
 # initialize
 init_svd!(glrm)
 XYh = glrm.X' * glrm.Y[:,1:d:D]
-println("After initialization with the svd, parameters differ from true parameters by $(vecnorm(XY - XYh)/sqrt(prod(size(XY)))) in RMSE")
+println("After initialization with the svd, parameters differ from true parameters by $(norm(XY - XYh)/sqrt(prod(size(XY)))) in RMSE")
 A_imputed = impute(glrm)
-println("After initialization with the svd, $(sum(A_imputed .!= A) / prod(size(A))*100)\% of imputed entries are wrong")
-println("After fitting, imputed entries are off by $(sum(abs,(A_imputed - A)) / prod(size(A))*100)\% on average")
-println("(Picking randomly, $((d-1)/d*100)\% of entries would be wrong.)\n")
+println("After initialization with the svd, $(sum(A_imputed .!= A) / prod(size(A))*100)% of imputed entries are wrong")
+println("After fitting, imputed entries are off by $(sum(abs,(A_imputed - A)) / prod(size(A))*100)% on average")
+println("(Picking randomly, $((d-1)/d*100)% of entries would be wrong.)\n")
 
 # fit w/ initialization
 p = Params(1, max_iter=10, abs_tol=0.0000001, min_stepsize=0.000001)
 @time X,Y,ch = fit!(glrm, params=p);
 XYh = X'*Y[:,1:d:D];
 @show ch.objective
-println("After fitting, parameters differ from true parameters by $(vecnorm(XY - XYh)/sqrt(prod(size(XY)))) in RMSE")
+println("After fitting, parameters differ from true parameters by $(norm(XY - XYh)/sqrt(prod(size(XY)))) in RMSE")
 A_imputed = impute(glrm)
-println("After fitting, $(sum(A_imputed .!= A) / prod(size(A))*100)\% of imputed entries are wrong")
-println("After fitting, imputed entries are off by $(sum(abs,(A_imputed - A)) / prod(size(A))*100)\% on average")
-println("(Picking randomly, $((d-1)/d*100)\% of entries would be wrong.)\n")
+println("After fitting, $(sum(A_imputed .!= A) / prod(size(A))*100)% of imputed entries are wrong")
+println("After fitting, imputed entries are off by $(sum(abs,(A_imputed - A)) / prod(size(A))*100)% on average")
+println("(Picking randomly, $((d-1)/d*100)% of entries would be wrong.)\n")
diff --git a/test/prob_tests/OvALoss.jl b/test/prob_tests/OvALoss.jl
index 29c1913..42d33d0 100644
--- a/test/prob_tests/OvALoss.jl
+++ b/test/prob_tests/OvALoss.jl
@@ -1,9 +1,9 @@
-using LowRankModels
-import StatsBase: sample, Weights
-
+using LowRankModels, Random
+import StatsBase: sample, Weights, mean
+import LinearAlgebra: norm 
 # tests OvALoss
 
-srand(1);
+Random.seed!(1);
 m,n,k = 200,50,2;
 kfit = k+1
 K = 4; # number of categories
@@ -18,7 +18,7 @@ yidxs = get_yidxs(losses)
 for i=1:m
 	for j=1:n
 		mef = mean(XY[i,yidxs[j]])
-		XY[i,yidxs[j]] = XY[i,yidxs[j]] - mef
+		XY[i,yidxs[j]] = XY[i,yidxs[j]] .- mef
 	end
 end
 
@@ -40,24 +40,24 @@ glrm = GLRM(A,losses,rx,ry,kfit, scale=false, offset=false, X=randn(kfit,m), Y=r
 @time X,Y,ch = fit!(glrm);
 XYh = X'*Y;
 @show ch.objective
-println("After fitting, parameters differ from true parameters by $(vecnorm(XY - XYh)/sqrt(prod(size(XY)))) in RMSE")
+println("After fitting, parameters differ from true parameters by $(norm(XY - XYh)/sqrt(prod(size(XY)))) in RMSE")
 A_imputed = impute(glrm)
-println("After fitting, $(sum(A_imputed .!= A) / prod(size(A))*100)\% of imputed entries are wrong")
-println("(Picking randomly, $((K-1)/K*100)\% of entries would be wrong.)\n")
+println("After fitting, $(sum(A_imputed .!= A) / prod(size(A))*100)% of imputed entries are wrong")
+println("(Picking randomly, $((K-1)/K*100)% of entries would be wrong.)\n")
 
 # initialize
 init_svd!(glrm)
 XYh = glrm.X' * glrm.Y
-println("After initialization with the svd, parameters differ from true parameters by $(vecnorm(XY - XYh)/sqrt(prod(size(XY)))) in RMSE")
+println("After initialization with the svd, parameters differ from true parameters by $(norm(XY - XYh)/sqrt(prod(size(XY)))) in RMSE")
 A_imputed = impute(glrm)
-println("After initialization with the svd, $(sum(A_imputed .!= A) / prod(size(A))*100)\% of imputed entries are wrong")
-println("(Picking randomly, $((K-1)/K*100)\% of entries would be wrong.)\n")
+println("After initialization with the svd, $(sum(A_imputed .!= A) / prod(size(A))*100)% of imputed entries are wrong")
+println("(Picking randomly, $((K-1)/K*100)% of entries would be wrong.)\n")
 
 # fit w/ initialization
 @time X,Y,ch = fit!(glrm);
 XYh = X'*Y;
 @show ch.objective
-println("After fitting, parameters differ from true parameters by $(vecnorm(XY - XYh)/sqrt(prod(size(XY)))) in RMSE")
+println("After fitting, parameters differ from true parameters by $(norm(XY - XYh)/sqrt(prod(size(XY)))) in RMSE")
 A_imputed = impute(glrm)
-println("After fitting, $(sum(A_imputed .!= A) / prod(size(A))*100)\% of imputed entries are wrong")
-println("(Picking randomly, $((K-1)/K*100)\% of entries would be wrong.)\n")
+println("After fitting, $(sum(A_imputed .!= A) / prod(size(A))*100)% of imputed entries are wrong")
+println("(Picking randomly, $((K-1)/K*100)% of entries would be wrong.)\n")
diff --git a/test/prob_tests/QuadLoss.jl b/test/prob_tests/QuadLoss.jl
index 06f4271..3982df5 100644
--- a/test/prob_tests/QuadLoss.jl
+++ b/test/prob_tests/QuadLoss.jl
@@ -1,10 +1,11 @@
-using LowRankModels
-import StatsBase: sample, Weights
+using LowRankModels,  Random
+import StatsBase: sample, Weights, std
+import LinearAlgebra: norm 
 
 # test quadratic loss
 
 ## generate data
-srand(1);
+Random.seed!(1);
 m,n,k = 300,300,3;
 kfit = k+1
 # variance of measurement
@@ -27,14 +28,14 @@ glrm = GLRM(A,losses,rx,ry,kfit)
 # fit w/o initialization
 @time X,Y,ch = fit!(glrm);
 XYh = X'*Y;
-println("After fitting, parameters differ from true parameters by $(vecnorm(XY - XYh)/sqrt(prod(size(XY)))) in RMSE\n")
+println("After fitting, parameters differ from true parameters by $(norm(XY - XYh)/sqrt(prod(size(XY)))) in RMSE\n")
 
 # initialize
 init_svd!(glrm)
 XYh = glrm.X' * glrm.Y
-println("After initialization with the svd, parameters differ from true parameters by $(vecnorm(XY - XYh)/sqrt(prod(size(XY)))) in RMSE\n")
+println("After initialization with the svd, parameters differ from true parameters by $(norm(XY - XYh)/sqrt(prod(size(XY)))) in RMSE\n")
 
 # fit w/ initialization
 @time X,Y,ch = fit!(glrm);
 XYh = X'*Y;
-println("After fitting, parameters differ from true parameters by $(vecnorm(XY - XYh)/sqrt(prod(size(XY)))) in RMSE")
+println("After fitting, parameters differ from true parameters by $(norm(XY - XYh)/sqrt(prod(size(XY)))) in RMSE")
diff --git a/test/reg_test.jl b/test/reg_test.jl
index aafcf2f..ca7c8e9 100644
--- a/test/reg_test.jl
+++ b/test/reg_test.jl
@@ -1,5 +1,5 @@
 using LowRankModels
-using Base.Test
+using Test
 
 TOL = 1e-5
 
diff --git a/test/runtests.jl b/test/runtests.jl
index a566ba4..209dd32 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -1,5 +1,5 @@
 using LowRankModels
-using Base.Test
+using Test, Random
 
 ###############################################################
 # verify all examples run
@@ -14,10 +14,10 @@ include("prob_tests/runtests.jl")
 import ScikitLearnBase
 
 # Check that KMeans can correctly separate two non-overlapping Gaussians
-srand(21)
-gaussian1 = randn(100, 2) + 5
-gaussian2 = randn(50, 2) - 10
+Random.seed!(21)
+gaussian1 = randn(100, 2) .+ 5.
+gaussian2 = randn(50, 2) .- 10.
 A = vcat(gaussian1, gaussian2)
 
 model = ScikitLearnBase.fit!(LowRankModels.KMeans(), A)
-@test Set(sum(ScikitLearnBase.transform(model, A), 1)) == Set([100, 50])
+@test Set(sum(ScikitLearnBase.transform(model, A), dims=1)) == Set([100, 50])
diff --git a/test/share_test.jl b/test/share_test.jl
index b9218df..5ecfa43 100644
--- a/test/share_test.jl
+++ b/test/share_test.jl
@@ -1,9 +1,9 @@
 @everywhere using LowRankModels
-using Base.Test
+using Test
 
 function fit_pca(m,n,k)
 	# matrix to encode
-	srand(1)
+	Random.seed!(1)
 	# generate a matrix with rank k
 	A = randn(m,k)*randn(k,n)
 	# fit a PCA model with rank k
@@ -16,7 +16,7 @@ function fit_pca(m,n,k)
 	return A,X,Y,ch
 end
 
-@everywhere srand(1)
+@everywhere Random.seed!(1)
 A,X,Y,ch = fit_pca(100,100,50)	
 
 # make sure objective went down
diff --git a/test/sparse_test.jl b/test/sparse_test.jl
index 44ce1d0..dca3b00 100644
--- a/test/sparse_test.jl
+++ b/test/sparse_test.jl
@@ -1,4 +1,4 @@
-using Base.Test
+using Test
 using LowRankModels
 
 # Check that sparse algorithm converges to the same solution
diff --git a/todo.md b/todo.md
index d26e52b..af842c3 100644
--- a/todo.md
+++ b/todo.md
@@ -1,17 +1,11 @@
 # To Do
 
-* Make multidimensional categorical hinge loss
-
-* Set up a single command to:
+* Set up a single command for multiple imputation:
 	a) pick m random subsets of data,
 	b) choose model of rank k, regularization constant α for each subset,
 	c) impute missing data from each of the m selected models.
 	* (nandana will do this)
 
-* Call julia from R
-	* i write a wrapper
-	* nandana writes the UX
-
 * Documentation!
 	* how to think about mpca
 	* imputation
@@ -24,8 +18,3 @@
 * Poisson loss
 	* scaling?
 	* to log or not to log? that is the interpretative issue
-
-
-# Bad news on 0.6
-
-* `scale!(l::Loss, v::Number)` seems to call `*(v, l)`, which is not in-place