diff --git a/.codecov.yml b/.codecov.yml
index aa8c4305..77d8b35c 100644
--- a/.codecov.yml
+++ b/.codecov.yml
@@ -6,3 +6,5 @@ coverage:
     project:
       default:
         only_pulls: true # Only show the `codecov/project` commit status on pull requests.
+ignore:
+  - "src/solvers/lib/*.jl"
diff --git a/Project.toml b/Project.toml
index d6743ec9..46911515 100644
--- a/Project.toml
+++ b/Project.toml
@@ -2,7 +2,6 @@ name = "SparseArrays"
 uuid = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
 
 [deps]
-DelimitedFiles = "8bb1440f-4735-579b-a4ab-409b98df4dab"
 Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
@@ -16,4 +15,4 @@ Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [targets]
-test = ["Dates", "Test", "Printf", "InteractiveUtils"]
+test = ["Dates", "InteractiveUtils", "Printf", "Test"]
diff --git a/README.md b/README.md
index 2e4b8936..3eeb0af6 100644
--- a/README.md
+++ b/README.md
@@ -19,3 +19,15 @@
 This package ships as part of the Julia stdlib.
 
 SparseArrays.jl provides functionality for working with sparse arrays in Julia.
+
+## Using development versions of this package
+
+To use a newer version of this package, you need to build Julia from scratch. The build process is the same as any other build except that `DEPS_GIT="SparseArrays"` should be passed to `make` i.e. `make DEPS_GIT="SparseArrays"`. Then after the build is complete, the git repo in `stdlib/SparseArrays` can be used to check out the desired commit/branch. Alternatively, you need to change the commit used in `stdlib/SparseArrays.version`. It is also possible to do both to get an up to date git repo directly. There is no need to rebuild julia in case of changes in `stdlib/SparseArrays` (or `SparseArrays-<sha1>` if `DEPS_GIT` was not used) as the package is not in the sysimage but having a git repo is convenient.
+
+It's also possible to load a development version of the package using [the trick used in the Section named "Using the development version of Pkg.jl" in the `Pkg.jl` repo](https://github.com/JuliaLang/Pkg.jl#using-the-development-version-of-pkgjl), but the capabilities are limited as all other packages will depend on the stdlib version of the package and will not work with the modified package. 
+
+The main environment may become inconsistent so you might need to run `Pkg.instantiate()` and/or `Pkg.resolve()` in the main or project environments if Julia complains about missing `Serialization.jl` in this package's dependencies. 
+
+For older (1.8 and before) `SuiteSparse.jl` needs to be bumped too.
+
+
diff --git a/docs/src/index.md b/docs/src/index.md
index b44c206b..9cfafbc4 100644
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -35,10 +35,13 @@ one place over.
 All operations on sparse matrices are carefully implemented to exploit the CSC data structure
 for performance, and to avoid expensive operations.
 
-If you have data in CSC format from a different application or library, and wish to import it
-in Julia, make sure that you use 1-based indexing. The row indices in every column need to be
-sorted. If your `SparseMatrixCSC` object contains unsorted row indices, one quick way to sort
-them is by doing a double transpose.
+If you have data in CSC format from a different application or
+library, and wish to import it in Julia, make sure that you use
+1-based indexing. The row indices in every column need to be sorted,
+and if they are not, the matrix will display incorrectly.  If your
+`SparseMatrixCSC` object contains unsorted row indices, one quick way
+to sort them is by doing a double transpose. Since the transpose operation
+is lazy, make a copy to materialize each transpose.
 
 In some applications, it is convenient to store explicit zero values in a `SparseMatrixCSC`. These
 *are* accepted by functions in `Base` (but there is no guarantee that they will be preserved in
@@ -242,7 +245,7 @@ Several other Julia packages provide sparse matrix implementations that should b
 
 1. [SuiteSparseGraphBLAS.jl](https://github.com/JuliaSparse/SuiteSparseGraphBLAS.jl) is a wrapper over the fast, multithreaded SuiteSparse:GraphBLAS C library. On CPU this is typically the fastest option, often significantly outperforming MKLSparse.
 
-2. [CUDA.jl](https://github.com/JuliaGPU/CUDA.jl) exposes the [CUSPARSE](https://docs.nvidia.com/cuda/cusparse/index.html) library for GPU sparse matrix operations. 
+2. [CUDA.jl](https://github.com/JuliaGPU/CUDA.jl) exposes the [CUSPARSE](https://docs.nvidia.com/cuda/cusparse/index.html) library for GPU sparse matrix operations.
 
 3. [SparseMatricesCSR.jl](https://github.com/gridap/SparseMatricesCSR.jl) provides a Julia native implementation of the Compressed Sparse Rows (CSR) format.
 
diff --git a/src/SparseArrays.jl b/src/SparseArrays.jl
index 4b49eb7f..9264480c 100644
--- a/src/SparseArrays.jl
+++ b/src/SparseArrays.jl
@@ -32,6 +32,16 @@ export AbstractSparseArray, AbstractSparseMatrix, AbstractSparseVector,
     sprand, sprandn, spzeros, nnz, permute, findnz,  fkeep!, ftranspose!,
     sparse_hcat, sparse_vcat, sparse_hvcat
 
+# helper function needed in sparsematrix, sparsevector and higherorderfns
+# `iszero` and `!iszero` don't guarantee to return a boolean but we need one that does
+# to remove the handle the structure of the array.
+@inline _iszero(x) = iszero(x) === true
+@inline _iszero(x::Number) = Base.iszero(x)
+@inline _iszero(x::AbstractArray) = Base.iszero(x)
+@inline _isnotzero(x) = iszero(x) !== true # like `!iszero(x)`, but handles `x::Missing`
+@inline _isnotzero(x::Number) = !iszero(x)
+@inline _isnotzero(x::AbstractArray) = !iszero(x)
+
 include("abstractsparse.jl")
 include("sparsematrix.jl")
 include("sparseconvert.jl")
diff --git a/src/abstractsparse.jl b/src/abstractsparse.jl
index e4af4db2..40360b12 100644
--- a/src/abstractsparse.jl
+++ b/src/abstractsparse.jl
@@ -75,13 +75,13 @@ end
 
 # The following two methods should be overloaded by concrete types to avoid
 # allocating the I = findall(...)
-_sparse_findnextnz(v::AbstractSparseArray, i) = (I = findall(!iszero, v); n = searchsortedfirst(I, i); n<=length(I) ? I[n] : nothing)
-_sparse_findprevnz(v::AbstractSparseArray, i) = (I = findall(!iszero, v); n = searchsortedlast(I, i);  !iszero(n)   ? I[n] : nothing)
+_sparse_findnextnz(v::AbstractSparseArray, i) = (I = findall(_isnotzero, v); n = searchsortedfirst(I, i); n<=length(I) ? I[n] : nothing)
+_sparse_findprevnz(v::AbstractSparseArray, i) = (I = findall(_isnotzero, v); n = searchsortedlast(I, i);  _isnotzero(n) ? I[n] : nothing)
 
 function findnext(f::Function, v::AbstractSparseArray, i)
     # short-circuit the case f == !iszero because that avoids
     # allocating e.g. zero(BigInt) for the f(zero(...)) test.
-    if nnz(v) == length(v) || (f != (!iszero) && f(zero(eltype(v))))
+    if nnz(v) == length(v) || (f != (!iszero) && f != _isnotzero && f(zero(eltype(v))))
         return invoke(findnext, Tuple{Function,Any,Any}, f, v, i)
     end
     j = _sparse_findnextnz(v, i)
@@ -94,7 +94,7 @@ end
 function findprev(f::Function, v::AbstractSparseArray, i)
     # short-circuit the case f == !iszero because that avoids
     # allocating e.g. zero(BigInt) for the f(zero(...)) test.
-    if nnz(v) == length(v) || (f != (!iszero) && f(zero(eltype(v))))
+    if nnz(v) == length(v) || (f != (!iszero) && f != _isnotzero && f(zero(eltype(v))))
         return invoke(findprev, Tuple{Function,Any,Any}, f, v, i)
     end
     j = _sparse_findprevnz(v, i)
@@ -133,3 +133,54 @@ Equivalent to `zip(findnz(A)...)` but does not allocated
 function iternz end
 
 widelength(x::AbstractSparseArray) = prod(Int64.(size(x)))
+
+
+const _restore_scalar_indexing = Expr[]
+const _destroy_scalar_indexing = Expr[]
+"""
+    @RCI f
+
+records the function `f` to be overwritten (and restored) with `allowscalar(::Bool)`. This is an
+experimental feature.
+
+Note that it will evaluate the function in the top level of the package. The original code for `f`
+is stored in `_restore_scalar_indexing` and a function that has the same definition as `f` but 
+returns an error is stored in `_destroy_scalar_indexing`.
+"""
+macro RCI(exp)
+    # evaluate to not push any broken code in the arrays when developping this package. 
+    # also ensures that restore has the exact same effect.
+    @eval $exp
+    if length(exp.args) == 2 && exp.head ∈ (:function, :(=))
+        push!(_restore_scalar_indexing, exp)
+        push!(_destroy_scalar_indexing,
+            Expr(exp.head,
+            exp.args[1],
+            :(error("scalar indexing was turned off"))))
+    else
+        error("can't parse expression")
+    end
+    return
+end
+
+"""
+    allowscalar(::Bool)
+
+An experimental function that allows one to disable and re-enable scalar indexing for sparse matrices and vectors. 
+
+`allowscalar(false)` will disable scalar indexing for sparse matrices and vectors. 
+`allowscalar(true)` will restore the original scalar indexing functionality.
+
+Since this function overwrites existing definitions, it will lead to recompilation. It is useful mainly when testing
+code for devices such as [GPUs](https://cuda.juliagpu.org/stable/usage/workflow/), where the presence of scalar indexing can lead to substantial slowdowns. 
+Disabling scalar indexing during such tests can help identify performance bottlenecks quickly.
+"""
+allowscalar(p::Bool) = if p
+    for i in _restore_scalar_indexing
+        @eval $i
+    end
+else
+    for i in _destroy_scalar_indexing
+        @eval $i
+    end
+end
diff --git a/src/higherorderfns.jl b/src/higherorderfns.jl
index 8381bc82..628b84cb 100644
--- a/src/higherorderfns.jl
+++ b/src/higherorderfns.jl
@@ -9,7 +9,8 @@ import Base: map, map!, broadcast, copy, copyto!
 using Base: front, tail, to_shape
 using ..SparseArrays: SparseVector, SparseMatrixCSC, AbstractSparseVector, AbstractSparseMatrixCSC,
                       AbstractSparseMatrix, AbstractSparseArray, indtype, nnz, nzrange, spzeros,
-                      SparseVectorUnion, AdjOrTransSparseVectorUnion, nonzeroinds, nonzeros, rowvals, getcolptr, widelength
+                      SparseVectorUnion, AdjOrTransSparseVectorUnion, nonzeroinds, nonzeros,
+                      rowvals, getcolptr, widelength, _iszero, _isnotzero
 using Base.Broadcast: BroadcastStyle, Broadcasted, flatten
 using LinearAlgebra
 
@@ -202,9 +203,6 @@ end
 # helper functions for map[!]/broadcast[!] entry points (and related methods below)
 @inline _sumnnzs(A) = nnz(A)
 @inline _sumnnzs(A, Bs...) = nnz(A) + _sumnnzs(Bs...)
-@inline _iszero(x) = x == 0
-@inline _iszero(x::Number) = Base.iszero(x)
-@inline _iszero(x::AbstractArray) = Base.iszero(x)
 @inline _zeros_eltypes(A) = (zero(eltype(A)),)
 @inline _zeros_eltypes(A, Bs...) = (zero(eltype(A)), _zeros_eltypes(Bs...)...)
 @inline _promote_indtype(A) = indtype(A)
@@ -244,7 +242,7 @@ function _map_zeropres!(f::Tf, C::SparseVecOrMat, A::SparseVecOrMat) where Tf
         setcolptr!(C, j, Ck)
         for Ak in colrange(A, j)
             Cx = f(storedvals(A)[Ak])
-            if !_iszero(Cx)
+            if _isnotzero(Cx)
                 Ck > spaceC && (spaceC = expandstorage!(C, Ck + nnz(A) - (Ak - 1)))
                 storedinds(C)[Ck] = storedinds(A)[Ak]
                 storedvals(C)[Ck] = Cx
@@ -325,7 +323,7 @@ function _map_zeropres!(f::Tf, C::SparseVecOrMat, A::SparseVecOrMat, B::SparseVe
             # cases are equally or more likely than the Ai < Bi and Bi < Ai cases. Hence
             # the ordering of the conditional chain above differs from that in the
             # corresponding broadcast code (below).
-            if !_iszero(Cx)
+            if _isnotzero(Cx)
                 Ck > spaceC && (spaceC = expandstorage!(C, Ck + (nnz(A) - (Ak - 1)) + (nnz(B) - (Bk - 1))))
                 storedinds(C)[Ck] = Ci
                 storedvals(C)[Ck] = Cx
@@ -386,7 +384,7 @@ function _map_zeropres!(f::Tf, C::SparseVecOrMat, As::Vararg{SparseVecOrMat,N})
         while activerow < rowsentinel
             vals, ks, rows = _fusedupdate_all(rowsentinel, activerow, rows, ks, stopks, As)
             Cx = f(vals...)
-            if !_iszero(Cx)
+            if _isnotzero(Cx)
                 Ck > spaceC && (spaceC = expandstorage!(C, Int(min(widelength(C), Ck + _sumnnzs(As...) - (sum(ks) - N)))))
                 storedinds(C)[Ck] = activerow
                 storedvals(C)[Ck] = Cx
@@ -477,7 +475,7 @@ function _broadcast_zeropres!(f::Tf, C::SparseVecOrMat, A::SparseVecOrMat) where
             bccolrangejA = numcols(A) == 1 ? colrange(A, 1) : colrange(A, j)
             for Ak in bccolrangejA
                 Cx = f(storedvals(A)[Ak])
-                if !_iszero(Cx)
+                if _isnotzero(Cx)
                     Ck > spaceC && (spaceC = expandstorage!(C, _unchecked_maxnnzbcres(size(C), A)))
                     storedinds(C)[Ck] = storedinds(A)[Ak]
                     storedvals(C)[Ck] = Cx
@@ -496,7 +494,7 @@ function _broadcast_zeropres!(f::Tf, C::SparseVecOrMat, A::SparseVecOrMat) where
             # contains a nonzero value x but f(Ax) is nonetheless zero, so we need store
             # nothing in C's jth column. if to the contrary fofAx is nonzero, then we must
             # densely populate C's jth column with fofAx.
-            if !_iszero(fofAx)
+            if _isnotzero(fofAx)
                 for Ci::indtype(C) in 1:numrows(C)
                     Ck > spaceC && (spaceC = expandstorage!(C, _unchecked_maxnnzbcres(size(C), A)))
                     storedinds(C)[Ck] = Ci
@@ -599,7 +597,7 @@ function _broadcast_zeropres!(f::Tf, C::SparseVecOrMat, A::SparseVecOrMat, B::Sp
                 # pattern) the Ai < Bi and Bi < Ai cases are equally or more likely than the
                 # Ai == Bi and termination cases. Hence the ordering of the conditional
                 # chain above differs from that in the corresponding map code.
-                if !_iszero(Cx)
+                if _isnotzero(Cx)
                     Ck > spaceC && (spaceC = expandstorage!(C, _unchecked_maxnnzbcres(size(C), A, B)))
                     storedinds(C)[Ck] = Ci
                     storedvals(C)[Ck] = Cx
@@ -616,7 +614,7 @@ function _broadcast_zeropres!(f::Tf, C::SparseVecOrMat, A::SparseVecOrMat, B::Sp
             Ax = Ak < stopAk ? storedvals(A)[Ak] : zero(eltype(A))
             Bx = Bk < stopBk ? storedvals(B)[Bk] : zero(eltype(B))
             Cx = f(Ax, Bx)
-            if !_iszero(Cx)
+            if _isnotzero(Cx)
                 for Ci::indtype(C) in 1:numrows(C)
                     Ck > spaceC && (spaceC = expandstorage!(C, _unchecked_maxnnzbcres(size(C), A, B)))
                     storedinds(C)[Ck] = Ci
@@ -638,7 +636,7 @@ function _broadcast_zeropres!(f::Tf, C::SparseVecOrMat, A::SparseVecOrMat, B::Sp
                 # B's jth column without storing every entry in C's jth column
                 while Bk < stopBk
                     Cx = f(Ax, storedvals(B)[Bk])
-                    if !_iszero(Cx)
+                    if _isnotzero(Cx)
                         Ck > spaceC && (spaceC = expandstorage!(C, _unchecked_maxnnzbcres(size(C), A, B)))
                         storedinds(C)[Ck] = storedinds(B)[Bk]
                         storedvals(C)[Ck] = Cx
@@ -657,7 +655,7 @@ function _broadcast_zeropres!(f::Tf, C::SparseVecOrMat, A::SparseVecOrMat, B::Sp
                     else
                         Cx = fvAzB
                     end
-                    if !_iszero(Cx)
+                    if _isnotzero(Cx)
                         Ck > spaceC && (spaceC = expandstorage!(C, _unchecked_maxnnzbcres(size(C), A, B)))
                         storedinds(C)[Ck] = Ci
                         storedvals(C)[Ck] = Cx
@@ -679,7 +677,7 @@ function _broadcast_zeropres!(f::Tf, C::SparseVecOrMat, A::SparseVecOrMat, B::Sp
                 # A's jth column without storing every entry in C's jth column
                 while Ak < stopAk
                     Cx = f(storedvals(A)[Ak], Bx)
-                    if !_iszero(Cx)
+                    if _isnotzero(Cx)
                         Ck > spaceC && (spaceC = expandstorage!(C, _unchecked_maxnnzbcres(size(C), A, B)))
                         storedinds(C)[Ck] = storedinds(A)[Ak]
                         storedvals(C)[Ck] = Cx
@@ -698,7 +696,7 @@ function _broadcast_zeropres!(f::Tf, C::SparseVecOrMat, A::SparseVecOrMat, B::Sp
                     else
                         Cx = fzAvB
                     end
-                    if !_iszero(Cx)
+                    if _isnotzero(Cx)
                         Ck > spaceC && (spaceC = expandstorage!(C, _unchecked_maxnnzbcres(size(C), A, B)))
                         storedinds(C)[Ck] = Ci
                         storedvals(C)[Ck] = Cx
@@ -888,7 +886,7 @@ function _broadcast_zeropres!(f::Tf, C::SparseVecOrMat, As::Vararg{SparseVecOrMa
             while activerow < rowsentinel
                 args, ks, rows = _fusedupdatebc_all(rowsentinel, activerow, rows, defargs, ks, stopks, As)
                 Cx = f(args...)
-                if !_iszero(Cx)
+                if _isnotzero(Cx)
                     Ck > spaceC && (spaceC = expandstorage!(C, _unchecked_maxnnzbcres(size(C), As)))
                     storedinds(C)[Ck] = activerow
                     storedvals(C)[Ck] = Cx
@@ -905,7 +903,7 @@ function _broadcast_zeropres!(f::Tf, C::SparseVecOrMat, As::Vararg{SparseVecOrMa
                 else
                     Cx = defaultCx
                 end
-                if !_iszero(Cx)
+                if _isnotzero(Cx)
                     Ck > spaceC && (spaceC = expandstorage!(C, _unchecked_maxnnzbcres(size(C), As)))
                     storedinds(C)[Ck] = Ci
                     storedvals(C)[Ck] = Cx
diff --git a/src/linalg.jl b/src/linalg.jl
index 357d468b..a1f675d1 100644
--- a/src/linalg.jl
+++ b/src/linalg.jl
@@ -341,7 +341,7 @@ function dot(x::AbstractVector, A::AbstractSparseMatrixCSC, y::AbstractVector)
     nzvals = getnzval(A)
     @inbounds for col in 1:n
         ycol = y[col]
-        if !iszero(ycol)
+        if _isnotzero(ycol)
             temp = zero(T)
             for k in nzrange(A, col)
                 temp += adjoint(x[rvals[k]]) * nzvals[k]
@@ -1450,7 +1450,7 @@ kron(A::SparseVectorUnion, B::AdjOrTransSparseVectorUnion) = A .* B
 
 ## det, inv, cond
 
-inv(A::AbstractSparseMatrixCSC) = error("The inverse of a sparse matrix can often be dense and can cause the computer to run out of memory. If you are sure you have enough memory, please convert your matrix to a dense matrix, e.g. by calling `Matrix`.")
+inv(A::AbstractSparseMatrixCSC) = error("The inverse of a sparse matrix can often be dense and can cause the computer to run out of memory. If you are sure you have enough memory, please either convert your matrix to a dense matrix, e.g. by calling `Matrix` or if `A` can be factorized, use `\\` on the dense identity matrix, e.g. `A \\ Matrix{eltype(A)}(I, size(A)...)` restrictions of `\\` on sparse lhs applies. Altenatively, `A\\b` is generally preferable to `inv(A)*b`")
 
 # TODO
 
diff --git a/src/solvers/cholmod.jl b/src/solvers/cholmod.jl
index 64a4c2a7..4881923e 100644
--- a/src/solvers/cholmod.jl
+++ b/src/solvers/cholmod.jl
@@ -138,7 +138,7 @@ macro cholmod_param(kwarg, code)
     value = kwarg.args[2]
 
     common_param = # Read `common.param`
-        Expr(:., :(COMMONS[Threads.threadid()][]), QuoteNode(param))
+        Expr(:., :(task_local_storage(:cholmod_common)[]), QuoteNode(param))
 
     return quote
         default_value = $common_param
@@ -151,7 +151,18 @@ macro cholmod_param(kwarg, code)
     end
 end
 
-const COMMONS = Vector{Ref{cholmod_common}}()
+function newcommon(; print = 0)
+    common = finalizer(cholmod_l_finish, Ref(cholmod_common()))
+    result = cholmod_l_start(common)
+    @assert result == TRUE "failed to run `cholmod_l_start`!"
+    common[].print = 0  # no printing from CHOLMOD by default
+    common[].error_handler = @cfunction(error_handler, Cvoid, (Cint, Cstring, Cint, Cstring))
+    return common
+end
+
+function getcommon()
+    return get!(newcommon, task_local_storage(), :cholmod_common)::Ref{cholmod_common}
+end
 
 const BUILD_VERSION = VersionNumber(CHOLMOD_MAIN_VERSION, CHOLMOD_SUB_VERSION, CHOLMOD_SUBSUB_VERSION)
 
@@ -218,20 +229,6 @@ function __init__()
                  """
         end
 
-        ### Initiate CHOLMOD
-        ### common controls the type of factorization and keeps pointers
-        ### to temporary memory. We need to manage a copy for each thread.
-        nt = Threads.nthreads()
-        resize!(COMMONS, nt)
-        errorhandler = @cfunction(error_handler, Cvoid, (Cint, Cstring, Cint, Cstring))
-        for i in 1:nt
-            COMMONS[i] = Ref(cholmod_common())
-            result = cholmod_l_start(COMMONS[i])
-            @assert result == TRUE "failed to run `cholmod_l_start`!"
-            COMMONS[i][].print = 0  # no printing from CHOLMOD by default
-            COMMONS[i][].error_handler = errorhandler
-        end
-
         # Register gc tracked allocator if CHOLMOD is new enough
         if current_version >= v"3.0.0"
             cnfg = cglobal((:SuiteSparse_config, :libsuitesparseconfig), Ptr{Cvoid})
@@ -390,35 +387,35 @@ Factor(FC::FactorComponent) = FC.F
 
 # Dense wrappers
 function allocate_dense(m::Integer, n::Integer, d::Integer, ::Type{Tv}) where {Tv<:VTypes}
-    Dense{Tv}(cholmod_l_allocate_dense(m, n, d, xtyp(Tv), COMMONS[Threads.threadid()]))
+    Dense{Tv}(cholmod_l_allocate_dense(m, n, d, xtyp(Tv), getcommon()))
 end
 
 function free!(p::Ptr{cholmod_dense})
-    cholmod_l_free_dense(Ref(p), COMMONS[Threads.threadid()]) == TRUE
+    cholmod_l_free_dense(Ref(p), getcommon()) == TRUE
 end
 
 function zeros(m::Integer, n::Integer, ::Type{Tv}) where Tv<:VTypes
-    Dense{Tv}(cholmod_l_zeros(m, n, xtyp(Tv), COMMONS[Threads.threadid()]))
+    Dense{Tv}(cholmod_l_zeros(m, n, xtyp(Tv), getcommon()))
 end
 zeros(m::Integer, n::Integer) = zeros(m, n, Float64)
 
 function ones(m::Integer, n::Integer, ::Type{Tv}) where Tv<:VTypes
-    Dense{Tv}(cholmod_l_ones(m, n, xtyp(Tv), COMMONS[Threads.threadid()]))
+    Dense{Tv}(cholmod_l_ones(m, n, xtyp(Tv), getcommon()))
 end
 ones(m::Integer, n::Integer) = ones(m, n, Float64)
 
 function eye(m::Integer, n::Integer, ::Type{Tv}) where Tv<:VTypes
-    Dense{Tv}(cholmod_l_eye(m, n, xtyp(Tv), COMMONS[Threads.threadid()]))
+    Dense{Tv}(cholmod_l_eye(m, n, xtyp(Tv), getcommon()))
 end
 eye(m::Integer, n::Integer) = eye(m, n, Float64)
 eye(n::Integer) = eye(n, n, Float64)
 
 function copy(A::Dense{Tv}) where Tv<:VTypes
-    Dense{Tv}(cholmod_l_copy_dense(A, COMMONS[Threads.threadid()]))
+    Dense{Tv}(cholmod_l_copy_dense(A, getcommon()))
 end
 
 function sort!(S::Sparse{Tv}) where Tv<:VTypes
-    cholmod_l_sort(S, COMMONS[Threads.threadid()])
+    cholmod_l_sort(S, getcommon())
     return S
 end
 
@@ -431,93 +428,93 @@ function norm_dense(D::Dense{Tv}, p::Integer) where Tv<:VTypes
     elseif p != 0 && p != 1
         throw(ArgumentError("second argument must be either 0 (Inf norm), 1, or 2"))
     end
-    cholmod_l_norm_dense(D, p, COMMONS[Threads.threadid()])
+    cholmod_l_norm_dense(D, p, getcommon())
 end
 
 function check_dense(A::Dense{Tv}) where Tv<:VTypes
-    cholmod_l_check_dense(pointer(A), COMMONS[Threads.threadid()]) != 0
+    cholmod_l_check_dense(pointer(A), getcommon()) != 0
 end
 
 # Non-Dense wrappers
 function allocate_sparse(nrow::Integer, ncol::Integer, nzmax::Integer,
         sorted::Bool, packed::Bool, stype::Integer, ::Type{Tv}) where {Tv<:VTypes}
     Sparse{Tv}(cholmod_l_allocate_sparse(nrow, ncol, nzmax, sorted, packed, stype,
-                                         xtyp(Tv), COMMONS[Threads.threadid()]))
+                                         xtyp(Tv), getcommon()))
 end
 
 function free!(ptr::Ptr{cholmod_sparse})
-    cholmod_l_free_sparse(Ref(ptr), COMMONS[Threads.threadid()]) == TRUE
+    cholmod_l_free_sparse(Ref(ptr), getcommon()) == TRUE
 end
 
 function free!(ptr::Ptr{cholmod_factor})
     # Warning! Important that finalizer doesn't modify the global Common struct.
-    cholmod_l_free_factor(Ref(ptr), COMMONS[Threads.threadid()]) == TRUE
+    cholmod_l_free_factor(Ref(ptr), getcommon()) == TRUE
 end
 
 function aat(A::Sparse{Tv}, fset::Vector{SuiteSparse_long}, mode::Integer) where Tv<:VRealTypes
-    Sparse{Tv}(cholmod_l_aat(A, fset, length(fset), mode, COMMONS[Threads.threadid()]))
+    Sparse{Tv}(cholmod_l_aat(A, fset, length(fset), mode, getcommon()))
 end
 
 function sparse_to_dense(A::Sparse{Tv}) where Tv<:VTypes
-    Dense{Tv}(cholmod_l_sparse_to_dense(A, COMMONS[Threads.threadid()]))
+    Dense{Tv}(cholmod_l_sparse_to_dense(A, getcommon()))
 end
 function dense_to_sparse(D::Dense{Tv}, ::Type{SuiteSparse_long}) where Tv<:VTypes
-    Sparse{Tv}(cholmod_l_dense_to_sparse(D, true, COMMONS[Threads.threadid()]))
+    Sparse{Tv}(cholmod_l_dense_to_sparse(D, true, getcommon()))
 end
 
 function factor_to_sparse!(F::Factor{Tv}) where Tv<:VTypes
     ss = unsafe_load(pointer(F))
     ss.xtype == CHOLMOD_PATTERN && throw(CHOLMODException("only numeric factors are supported"))
-    Sparse{Tv}(cholmod_l_factor_to_sparse(F, COMMONS[Threads.threadid()]))
+    Sparse{Tv}(cholmod_l_factor_to_sparse(F, getcommon()))
 end
 
 function change_factor!(F::Factor{Tv}, to_ll::Bool, to_super::Bool, to_packed::Bool,
                         to_monotonic::Bool) where Tv<:VTypes
-    cholmod_l_change_factor(xtyp(Tv), to_ll, to_super, to_packed, to_monotonic, F, COMMONS[Threads.threadid()]) == TRUE
+    cholmod_l_change_factor(xtyp(Tv), to_ll, to_super, to_packed, to_monotonic, F, getcommon()) == TRUE
 end
 
 function check_sparse(A::Sparse{Tv}) where Tv<:VTypes
-    cholmod_l_check_sparse(A, COMMONS[Threads.threadid()]) != 0
+    cholmod_l_check_sparse(A, getcommon()) != 0
 end
 
 function check_factor(F::Factor{Tv}) where Tv<:VTypes
-    cholmod_l_check_factor(F, COMMONS[Threads.threadid()]) != 0
+    cholmod_l_check_factor(F, getcommon()) != 0
 end
 
-nnz(A::Sparse{<:VTypes}) = cholmod_l_nnz(A, COMMONS[Threads.threadid()])
+nnz(A::Sparse{<:VTypes}) = cholmod_l_nnz(A, getcommon())
 
 function speye(m::Integer, n::Integer, ::Type{Tv}) where Tv<:VTypes
-    Sparse{Tv}(cholmod_l_speye(m, n, xtyp(Tv), COMMONS[Threads.threadid()]))
+    Sparse{Tv}(cholmod_l_speye(m, n, xtyp(Tv), getcommon()))
 end
 
 function spzeros(m::Integer, n::Integer, nzmax::Integer, ::Type{Tv}) where Tv<:VTypes
-    Sparse{Tv}(cholmod_l_spzeros(m, n, nzmax, xtyp(Tv), COMMONS[Threads.threadid()]))
+    Sparse{Tv}(cholmod_l_spzeros(m, n, nzmax, xtyp(Tv), getcommon()))
 end
 
 function transpose_(A::Sparse{Tv}, values::Integer) where Tv<:VTypes
-    Sparse{Tv}(cholmod_l_transpose(A, values, COMMONS[Threads.threadid()]))
+    Sparse{Tv}(cholmod_l_transpose(A, values, getcommon()))
 end
 
 function copy(F::Factor{Tv}) where Tv<:VTypes
-    Factor{Tv}(cholmod_l_copy_factor(F, COMMONS[Threads.threadid()]))
+    Factor{Tv}(cholmod_l_copy_factor(F, getcommon()))
 end
 function copy(A::Sparse{Tv}) where Tv<:VTypes
-    Sparse{Tv}(cholmod_l_copy_sparse(A, COMMONS[Threads.threadid()]))
+    Sparse{Tv}(cholmod_l_copy_sparse(A, getcommon()))
 end
 function copy(A::Sparse{Tv}, stype::Integer, mode::Integer) where Tv<:VRealTypes
-    Sparse{Tv}(cholmod_l_copy(A, stype, mode, COMMONS[Threads.threadid()]))
+    Sparse{Tv}(cholmod_l_copy(A, stype, mode, getcommon()))
 end
 
 function print_sparse(A::Sparse{Tv}, name::String) where Tv<:VTypes
     isascii(name) || error("non-ASCII name: $name")
     @cholmod_param print = 3 begin
-        cholmod_l_print_sparse(A, name, COMMONS[Threads.threadid()])
+        cholmod_l_print_sparse(A, name, getcommon())
     end
     nothing
 end
 function print_factor(F::Factor{Tv}, name::String) where Tv<:VTypes
     @cholmod_param print = 3 begin
-        cholmod_l_print_factor(F, name, COMMONS[Threads.threadid()])
+        cholmod_l_print_factor(F, name, getcommon())
     end
     nothing
 end
@@ -529,18 +526,18 @@ function ssmult(A::Sparse{Tv}, B::Sparse{Tv}, stype::Integer,
     if lA.ncol != lB.nrow
         throw(DimensionMismatch("inner matrix dimensions do not fit"))
     end
-    Sparse{Tv}(cholmod_l_ssmult(A, B, stype, values, sorted, COMMONS[Threads.threadid()]))
+    Sparse{Tv}(cholmod_l_ssmult(A, B, stype, values, sorted, getcommon()))
 end
 
 function norm_sparse(A::Sparse{Tv}, norm::Integer) where Tv<:VTypes
     if norm != 0 && norm != 1
         throw(ArgumentError("norm argument must be either 0 or 1"))
     end
-    cholmod_l_norm_sparse(A, norm, COMMONS[Threads.threadid()])
+    cholmod_l_norm_sparse(A, norm, getcommon())
 end
 
 function horzcat(A::Sparse{Tv}, B::Sparse{Tv}, values::Bool) where Tv<:VRealTypes
-    Sparse{Tv}(cholmod_l_horzcat(A, B, values, COMMONS[Threads.threadid()]))
+    Sparse{Tv}(cholmod_l_horzcat(A, B, values, getcommon()))
 end
 
 function scale!(S::Dense{Tv}, scale::Integer, A::Sparse{Tv}) where Tv<:VRealTypes
@@ -567,7 +564,7 @@ function scale!(S::Dense{Tv}, scale::Integer, A::Sparse{Tv}) where Tv<:VRealType
     end
 
     sA = unsafe_load(pointer(A))
-    cholmod_l_scale(S, scale, A, COMMONS[Threads.threadid()])
+    cholmod_l_scale(S, scale, A, getcommon())
     A
 end
 
@@ -579,12 +576,12 @@ function sdmult!(A::Sparse{Tv}, transpose::Bool,
     if nc != size(X, 1)
         throw(DimensionMismatch("incompatible dimensions, $nc and $(size(X,1))"))
     end
-    cholmod_l_sdmult(A, transpose, Ref(α), Ref(β), X, Y, COMMONS[Threads.threadid()])
+    cholmod_l_sdmult(A, transpose, Ref(α), Ref(β), X, Y, getcommon())
     Y
 end
 
 function vertcat(A::Sparse{Tv}, B::Sparse{Tv}, values::Bool) where Tv<:VRealTypes
-    Sparse{Tv}(cholmod_l_vertcat(A, B, values, COMMONS[Threads.threadid()]))
+    Sparse{Tv}(cholmod_l_vertcat(A, B, values, getcommon()))
 end
 
 function symmetry(A::Sparse{Tv}, option::Integer) where Tv<:VTypes
@@ -593,27 +590,27 @@ function symmetry(A::Sparse{Tv}, option::Integer) where Tv<:VTypes
     nzoffdiag = Ref{SuiteSparse_long}()
     nzdiag = Ref{SuiteSparse_long}()
     rv = cholmod_l_symmetry(A, option, xmatched, pmatched,
-                            nzoffdiag, nzdiag, COMMONS[Threads.threadid()])
+                            nzoffdiag, nzdiag, getcommon())
     rv, xmatched[], pmatched[], nzoffdiag[], nzdiag[]
 end
 
 # For analyze, analyze_p, and factorize_p!, the Common argument must be
 # supplied in order to control if the factorization is LLt or LDLt
 function analyze(A::Sparse{Tv}) where Tv<:VTypes
-    Factor{Tv}(cholmod_l_analyze(A, COMMONS[Threads.threadid()]))
+    Factor{Tv}(cholmod_l_analyze(A, getcommon()))
 end
 function analyze_p(A::Sparse{Tv}, perm::Vector{SuiteSparse_long}) where Tv<:VTypes
     length(perm) != size(A,1) && throw(BoundsError())
-    Factor{Tv}(cholmod_l_analyze_p(A, perm, C_NULL, 0, COMMONS[Threads.threadid()]))
+    Factor{Tv}(cholmod_l_analyze_p(A, perm, C_NULL, 0, getcommon()))
 end
 function factorize!(A::Sparse{Tv}, F::Factor{Tv}) where Tv<:VTypes
-    cholmod_l_factorize(A, F, COMMONS[Threads.threadid()])
+    cholmod_l_factorize(A, F, getcommon())
     F
 end
 function factorize_p!(A::Sparse{Tv}, β::Real, F::Factor{Tv}) where Tv<:VTypes
     # note that β is passed as a complex number (double beta[2]),
     # but the CHOLMOD manual says that only beta[0] (real part) is used
-    cholmod_l_factorize_p(A, Ref{Cdouble}(β), C_NULL, 0, F, COMMONS[Threads.threadid()])
+    cholmod_l_factorize_p(A, Ref{Cdouble}(β), C_NULL, 0, F, getcommon())
     F
 end
 
@@ -630,7 +627,7 @@ function solve(sys::Integer, F::Factor{Tv}, B::Dense{Tv}) where Tv<:VTypes
             throw(LinearAlgebra.ZeroPivotException(s.minor))
         end
     end
-    Dense{Tv}(cholmod_l_solve(sys, F, B, COMMONS[Threads.threadid()]))
+    Dense{Tv}(cholmod_l_solve(sys, F, B, getcommon()))
 end
 
 function spsolve(sys::Integer, F::Factor{Tv}, B::Sparse{Tv}) where Tv<:VTypes
@@ -638,12 +635,12 @@ function spsolve(sys::Integer, F::Factor{Tv}, B::Sparse{Tv}) where Tv<:VTypes
         throw(DimensionMismatch("LHS and RHS should have the same number of rows. " *
             "LHS has $(size(F,1)) rows, but RHS has $(size(B,1)) rows."))
     end
-    Sparse{Tv}(cholmod_l_spsolve(sys, F, B, COMMONS[Threads.threadid()]))
+    Sparse{Tv}(cholmod_l_spsolve(sys, F, B, getcommon()))
 end
 
 # Autodetects the types
 function read_sparse(file::Libc.FILE, ::Type{SuiteSparse_long})
-    Sparse(cholmod_l_read_sparse(file.ptr, COMMONS[Threads.threadid()]))
+    Sparse(cholmod_l_read_sparse(file.ptr, getcommon()))
 end
 
 function read_sparse(file::IO, T)
@@ -1418,7 +1415,7 @@ function lowrankupdowndate!(F::Factor{Tv}, C::Sparse{Tv}, update::Cint) where Tv
     if lF.n != lC.nrow
         throw(DimensionMismatch("matrix dimensions do not fit"))
     end
-    cholmod_l_updown(update, C, F, COMMONS[Threads.threadid()])
+    cholmod_l_updown(update, C, F, getcommon())
     F
 end
 
diff --git a/src/solvers/spqr.jl b/src/solvers/spqr.jl
index 9272c528..5160d8b1 100644
--- a/src/solvers/spqr.jl
+++ b/src/solvers/spqr.jl
@@ -2,7 +2,7 @@
 
 module SPQR
 
-import Base: \
+import Base: \, *
 using Base: require_one_based_indexing
 using LinearAlgebra
 using ..LibSuiteSparse: SuiteSparseQR_C
@@ -66,7 +66,7 @@ function _qr!(ordering::Integer, tol::Real, econ::Integer, getCTX::Integer,
         H,              # m-by-nh Householder vectors
         HPinv,          # size m row permutation
         HTau,           # 1-by-nh Householder coefficients
-        CHOLMOD.COMMONS[Threads.threadid()]) # /* workspace and parameters */
+        CHOLMOD.getcommon()) # /* workspace and parameters */
 
     if rnk < 0
         error("Sparse QR factorization failed")
@@ -83,7 +83,7 @@ function _qr!(ordering::Integer, tol::Real, econ::Integer, getCTX::Integer,
         # Free memory allocated by SPQR. This call will make sure that the
         # correct deallocator function is called and that the memory count in
         # the common struct is updated
-        cholmod_l_free(n, sizeof(CHOLMOD.SuiteSparse_long), e, CHOLMOD.COMMONS[Threads.threadid()])
+        cholmod_l_free(n, sizeof(CHOLMOD.SuiteSparse_long), e, CHOLMOD.getcommon())
     end
     hpinv = HPinv[]
     if hpinv == C_NULL
@@ -96,7 +96,7 @@ function _qr!(ordering::Integer, tol::Real, econ::Integer, getCTX::Integer,
         # Free memory allocated by SPQR. This call will make sure that the
         # correct deallocator function is called and that the memory count in
         # the common struct is updated
-        cholmod_l_free(m, sizeof(CHOLMOD.SuiteSparse_long), hpinv, CHOLMOD.COMMONS[Threads.threadid()])
+        cholmod_l_free(m, sizeof(CHOLMOD.SuiteSparse_long), hpinv, CHOLMOD.getcommon())
     end
 
     return rnk, _E, _HPinv
@@ -167,11 +167,7 @@ julia> A = sparse([1,2,3,4], [1,1,2,2], [1.0,1.0,1.0,1.0])
 julia> qr(A)
 SparseArrays.SPQR.QRSparse{Float64, Int64}
 Q factor:
-4×4 SparseArrays.SPQR.QRSparseQ{Float64, Int64}:
- -0.707107   0.0        0.0       -0.707107
-  0.0       -0.707107  -0.707107   0.0
-  0.0       -0.707107   0.707107   0.0
- -0.707107   0.0        0.0        0.707107
+4×4 SparseArrays.SPQR.QRSparseQ{Float64, Int64}
 R factor:
 2×2 SparseMatrixCSC{Float64, Int64} with 2 stored entries:
  -1.41421    ⋅
@@ -284,6 +280,9 @@ function LinearAlgebra.rmul!(A::StridedMatrix, adjQ::Adjoint{<:Any,<:QRSparseQ})
     return A
 end
 
+(*)(Q::QRSparseQ, B::SparseMatrixCSC) = sparse(Q) * B
+(*)(A::SparseMatrixCSC, Q::QRSparseQ) = A * sparse(Q)
+
 @inline function Base.getproperty(F::QRSparse, d::Symbol)
     if d === :Q
         return QRSparseQ(F.factors, F.τ, size(F, 2))
@@ -312,6 +311,9 @@ function Base.show(io::IO, mime::MIME{Symbol("text/plain")}, F::QRSparse)
     println(io, "\nColumn permutation:")
     show(io, mime, F.pcol)
 end
+function Base.show(io::IO, ::MIME{Symbol("text/plain")}, Q::QRSparseQ)
+    summary(io, Q)
+end
 
 # With a real lhs and complex rhs with the same precision, we can reinterpret
 # the complex rhs as a real rhs with twice the number of columns
@@ -324,7 +326,7 @@ _ret_size(F::QRSparse, b::AbstractVector) = (size(F, 2),)
 _ret_size(F::QRSparse, B::AbstractMatrix) = (size(F, 2), size(B, 2))
 
 LinearAlgebra.rank(F::QRSparse) = reduce(max, view(rowvals(F.R), 1:nnz(F.R)), init = eltype(rowvals(F.R))(0))
-LinearAlgebra.rank(S::SparseMatrixCSC) = rank(qr(S))
+LinearAlgebra.rank(S::SparseMatrixCSC; tol=_default_tol(S)) = rank(qr(S; tol))
 
 function (\)(F::QRSparse{T}, B::VecOrMat{Complex{T}}) where T<:LinearAlgebra.BlasReal
 # |z1|z3|  reinterpret  |x1|x2|x3|x4|  transpose  |x1|y1|  reshape  |x1|y1|x3|y3|
diff --git a/src/solvers/umfpack.jl b/src/solvers/umfpack.jl
index 3c62ca1b..ca58e698 100644
--- a/src/solvers/umfpack.jl
+++ b/src/solvers/umfpack.jl
@@ -9,7 +9,7 @@ using LinearAlgebra
 import LinearAlgebra: Factorization, checksquare, det, logabsdet, lu, lu!, ldiv!
 
 using SparseArrays
-using SparseArrays: getcolptr
+using SparseArrays: getcolptr, AbstractSparseMatrixCSC
 import SparseArrays: nnz
 
 import Serialization: AbstractSerializer, deserialize
@@ -41,7 +41,22 @@ import ..LibSuiteSparse:
     ## Sizes of Control and Info arrays for returning information from solver
     UMFPACK_INFO,
     UMFPACK_CONTROL,
+    # index of the control arrays in ZERO BASED indexing
     UMFPACK_PRL,
+    UMFPACK_DENSE_ROW,
+    UMFPACK_DENSE_COL,
+    UMFPACK_BLOCK_SIZE,
+    UMFPACK_ORDERING,
+    UMFPACK_FIXQ,
+    UMFPACK_AMD_DENSE,
+    UMFPACK_AGGRESSIVE,
+    UMFPACK_SINGLETONS,
+    UMFPACK_ALLOC_INIT,
+    UMFPACK_SYM_PIVOT_TOLERANCE,
+    UMFPACK_SCALE,
+    UMFPACK_FRONT_ALLOC_INIT,
+    UMFPACK_DROPTOL,
+    UMFPACK_IRSTEP,
     ## Status codes
     UMFPACK_OK,
     UMFPACK_WARNING_singular_matrix,
@@ -60,6 +75,23 @@ import ..LibSuiteSparse:
     UMFPACK_ERROR_file_IO,
     UMFPACK_ERROR_ordering_failed
 
+# Julia uses one based indexing so here we are
+const JL_UMFPACK_PRL = UMFPACK_PRL + 1
+const JL_UMFPACK_DENSE_ROW = UMFPACK_DENSE_ROW + 1
+const JL_UMFPACK_DENSE_COL = UMFPACK_DENSE_COL + 1
+const JL_UMFPACK_BLOCK_SIZE = UMFPACK_BLOCK_SIZE + 1
+const JL_UMFPACK_ORDERING = UMFPACK_ORDERING + 1
+const JL_UMFPACK_FIXQ = UMFPACK_FIXQ + 1
+const JL_UMFPACK_AMD_DENSE = UMFPACK_AMD_DENSE + 1
+const JL_UMFPACK_AGGRESSIVE = UMFPACK_AGGRESSIVE + 1
+const JL_UMFPACK_SINGLETONS = UMFPACK_SINGLETONS + 1
+const JL_UMFPACK_ALLOC_INIT = UMFPACK_ALLOC_INIT + 1
+const JL_UMFPACK_SYM_PIVOT_TOLERANCE = UMFPACK_SYM_PIVOT_TOLERANCE + 1
+const JL_UMFPACK_SCALE = UMFPACK_SCALE + 1
+const JL_UMFPACK_FRONT_ALLOC_INIT = UMFPACK_FRONT_ALLOC_INIT + 1
+const JL_UMFPACK_DROPTOL = UMFPACK_DROPTOL + 1
+const JL_UMFPACK_IRSTEP = UMFPACK_IRSTEP + 1
+
 struct MatrixIllConditionedException <: Exception
     msg::String
 end
@@ -119,11 +151,6 @@ const UMFVTypes = Union{Float64,ComplexF64}
 
 ## UMFPACK
 
-# the control and info arrays
-const umf_ctrl = Vector{Float64}(undef, UMFPACK_CONTROL)
-umfpack_dl_defaults(umf_ctrl)
-const umf_info = Vector{Float64}(undef, UMFPACK_INFO)
-
 function show_umf_ctrl(control::Vector{Float64}, level::Real = 2.0)
     old_prt::Float64 = control[1]
     control[1] = Float64(level)
@@ -131,7 +158,7 @@ function show_umf_ctrl(control::Vector{Float64}, level::Real = 2.0)
     control[1] = old_prt
 end
 
-function show_umf_info(control::Vector{Float64}, info::Vector{Float64}=umf_info, level::Real = 2.0)
+function show_umf_info(control::Vector{Float64}, info::Vector{Float64}, level::Real = 2.0)
     old_prt::Float64 = control[1]
     control[1] = Float64(level)
     umfpack_dl_report_info(control, info)
@@ -140,21 +167,28 @@ end
 
 
 
-
 """
 Working space for Umfpack so `ldiv!` doesn't allocate.
 
-To use multiple threads, each thread should have their own workspace that can be allocated using `Base.similar(::UmfpackWS)` and passed as a kwarg to `ldiv!`.
-Alternativly see `duplicate(::UmfpackLU)`
+To use multiple threads, each thread should have their own workspace that can be allocated using `Base.similar(::UmfpackWS)`
+and passed as a kwarg to `ldiv!`. Alternativly see `copy(::UmfpackLU)`. The constructor is overloaded so to create appropriate
+sized working space given the lu factorization or the sparse matrix and if refinement is on.
 """
 struct UmfpackWS{T<:UMFITypes}
     Wi::Vector{T}
     W::Vector{Float64}
 end
 
-# TODO, this actually doesn't need to be this big if iter refinement is off
-UmfpackWS(S::SparseMatrixCSC{Float64, T}) where T = UmfpackWS{T}(Vector{T}(undef, size(S, 2)), Vector{Float64}(undef, 5 * size(S, 2)))
-UmfpackWS(S::SparseMatrixCSC{ComplexF64, T}) where T = UmfpackWS{T}(Vector{T}(undef, size(S, 2)), Vector{Float64}(undef, 10 * size(S, 2)))
+UmfpackWS(S::AbstractSparseMatrixCSC{Tv,Ti}, refinement::Bool) where {Tv,Ti} = UmfpackWS{Ti}(
+    Vector{Ti}(undef, size(S, 2)),
+    Vector{Float64}(undef, workspace_W_size(S, refinement)))
+
+function Base.resize!(W::UmfpackWS, S, refinement::Bool; expand_only=false)
+    (!expand_only || length(W.Wi) < size(S, 2)) && resize!(W.Wi, size(S, 2))
+    ws = workspace_W_size(S, refinement)
+    (!expand_only || length(W.W) < ws) && resize!(W.W, ws)
+    return 
+end
 
 Base.similar(w::UmfpackWS) = UmfpackWS(similar(w.Wi), similar(w.W))
 
@@ -174,10 +208,33 @@ mutable struct UmfpackLU{Tv<:UMFVTypes,Ti<:UMFITypes} <: Factorization{Tv}
     lock::ReentrantLock
 end
 
+workspace_W_size(F::UmfpackLU) = workspace_W_size(F, has_refinement(F))
+workspace_W_size(S::Union{UmfpackLU{<:AbstractFloat}, AbstractSparseMatrixCSC{<:AbstractFloat}}, refinement::Bool) = refinement ? 5 * size(S, 2) : size(S, 2)
+workspace_W_size(S::Union{UmfpackLU{<:Complex}, AbstractSparseMatrixCSC{<:Complex}}, refinement::Bool) = refinement ? 10 * size(S, 2) : 4 * size(S, 2)
+
+const ATLU = Union{Transpose{<:Any, <:UmfpackLU}, Adjoint{<:Any, <:UmfpackLU}}
+has_refinement(F::ATLU) = has_refinement(F.parent)
+has_refinement(F::UmfpackLU) = has_refinement(F.control)
+has_refinement(control::AbstractVector) = control[JL_UMFPACK_IRSTEP] > 0
+
+# auto magick resize, should this only expand and not shrink?
+getworkspace(F::UmfpackLU) = @lock F.lock begin
+        resize!(F.workspace, F, has_refinement(F); expand_only=true)
+        F.workspace
+    end
+
+UmfpackWS(F::UmfpackLU{Tv, Ti}, refinement::Bool=has_refinement(F)) where {Tv, Ti} = UmfpackWS(
+        Vector{Ti}(undef, size(F, 2)),
+        Vector{Float64}(undef, workspace_W_size(F, refinement)))
+UmfpackWS(F::ATLU, refinement::Bool=has_refinement(F)) = UmfpackWS(F.parent, refinement)
+    
 """
-A shallow copy of UmfpackLU to use simultaniously
+    copy(F::UmfpackLU, [ws::UmfpackWS]) -> UmfpackLU
+A shallow copy of UmfpackLU to use in multithreaded applications. This function duplicates the working space, control and locks.
+It can also take transposed or adjoint `UmfpackLU`s.
 """
-duplicate(F::UmfpackLU) = UmfpackLU(
+# Not using simlar helps if the actual needed size has changed as it would need to be resized again
+Base.copy(F::UmfpackLU, ws=UmfpackWS(F)) = UmfpackLU(
     F.symbolic,
     F.numeric,
     F.m, F.n,
@@ -185,45 +242,42 @@ duplicate(F::UmfpackLU) = UmfpackLU(
     F.rowval,
     F.nzval,
     F.status,
-    similar(F.workspace),
+    ws,
     copy(F.control),
     copy(F.info),
     ReentrantLock())
+Base.copy(F::T, ws=UmfpackWS(F)) where {T <: ATLU} = T(copy(F.parent, ws))
 
 Base.adjoint(F::UmfpackLU) = Adjoint(F)
 Base.transpose(F::UmfpackLU) = Transpose(F)
 
-function Base.lock(F::UmfpackLU)
-    if !trylock(F.lock)
-        @info """waiting for UmfpackLU's lock, it's safe to ignore this message.
-        see the documentation for Umfpack""" maxlog=1
-        lock(F.lock)
-    end
-end
-@inline Base.trylock(F::UmfpackLU) = trylock(F.lock)
-@inline Base.unlock(F::UmfpackLU) = unlock(F.lock)
-
-function show_umf_ctrl(F::UmfpackLU, level::Real = 2.0)
+function Base.lock(f::Function, F::UmfpackLU)
     lock(F)
     try
-        show_umf_ctrl(F.control, level)
+        f()
     finally
         unlock(F)
     end
 end
-
-function show_umf_info(F::UmfpackLU, level::Real = 2.0)
-    lock(F)
-    try
-        show_umf_info(F.control, F.info, level)
-    finally
-        unlock(F)
-    end
+Base.lock(F::UmfpackLU) = if !trylock(F.lock)
+    @info """waiting for UmfpackLU's lock, it's safe to ignore this message.
+    see the documentation for Umfpack""" maxlog = 1
+    lock(F.lock)
 end
 
+@inline Base.trylock(F::UmfpackLU) = trylock(F.lock)
+@inline Base.unlock(F::UmfpackLU) = unlock(F.lock)
+
+show_umf_ctrl(F::UmfpackLU, level::Real=2.0) =
+    @lock F show_umf_ctrl(F.control, level)
+
+
+show_umf_info(F::UmfpackLU, level::Real=2.0) =
+    @lock F show_umf_info(F.control, F.info, level)
+
 
 """
-    lu(A::SparseMatrixCSC; check = true) -> F::UmfpackLU
+    lu(A::AbstractSparseMatrixCSC; check = true, q = nothing, control = get_umfpack_control()) -> F::UmfpackLU
 
 Compute the LU factorization of a sparse matrix `A`.
 
@@ -235,6 +289,15 @@ When `check = true`, an error is thrown if the decomposition fails.
 When `check = false`, responsibility for checking the decomposition's
 validity (via [`issuccess`](@ref)) lies with the user.
 
+The permutation `q` can either be a permutation vector or `nothing`. If no permutation vector
+is proveded or `q` is `nothing`, UMFPACK's default is used. If the permutation is not zero based, a
+zero based copy is made.
+
+The `control` vector default to the package's default configs for umfpacks but can be changed passing a
+vector of length `UMFPACK_CONTROL`. See the UMFPACK manual for possible configurations. The corresponding
+variables are named `JL_UMFPACK_` since julia uses one based indexing.
+
+
 The individual components of the factorization `F` can be accessed by indexing:
 
 | Component | Description                         |
@@ -249,7 +312,7 @@ The individual components of the factorization `F` can be accessed by indexing:
 The relation between `F` and `A` is
 
 `F.L*F.U == (F.Rs .* A)[F.p, F.q]`
-
+ 
 `F` further supports the following functions:
 
 - [`\\`](@ref)
@@ -258,7 +321,7 @@ The relation between `F` and `A` is
 See also [`lu!`](@ref)
 
 !!! note
-    `lu(A::SparseMatrixCSC)` uses the UMFPACK[^ACM832] library that is part of
+    `lu(A::AbstractSparseMatrixCSC)` uses the UMFPACK[^ACM832] library that is part of
     [SuiteSparse](https://github.com/DrTimothyAldenDavis/SuiteSparse).
     As this library only supports sparse matrices with [`Float64`](@ref) or
     `ComplexF64` elements, `lu` converts `A` into a copy that is of type
@@ -266,58 +329,67 @@ See also [`lu!`](@ref)
 
 [^ACM832]: Davis, Timothy A. (2004b). Algorithm 832: UMFPACK V4.3---an Unsymmetric-Pattern Multifrontal Method. ACM Trans. Math. Softw., 30(2), 196–199. [doi:10.1145/992200.992206](https://doi.org/10.1145/992200.992206)
 """
-function lu(S::SparseMatrixCSC{<:UMFVTypes,<:UMFITypes}; check::Bool = true)
+function lu(S::AbstractSparseMatrixCSC{Tv, Ti};
+    check::Bool = true, q=nothing, control=get_umfpack_control(Tv, Ti)) where
+    {Tv<:UMFVTypes,Ti<:UMFITypes}
+
     zerobased = getcolptr(S)[1] == 0
     res = UmfpackLU(C_NULL, C_NULL, size(S, 1), size(S, 2),
                     zerobased ? copy(getcolptr(S)) : decrement(getcolptr(S)),
                     zerobased ? copy(rowvals(S)) : decrement(rowvals(S)),
-                    copy(nonzeros(S)), 0, UmfpackWS(S),
-                    copy(umf_ctrl),
-                    copy(umf_info),
+                    copy(nonzeros(S)), 0, UmfpackWS(S, has_refinement(control)),
+                    copy(control),
+                    Vector{Float64}(undef, UMFPACK_INFO),
                     ReentrantLock())
 
-    finalizer(umfpack_free_symbolic, res)
-    umfpack_numeric!(res)
+    finalizer(umfpack_free_symbolic_nl, res)
+    umfpack_numeric!(res; q)
     check && (issuccess(res) || throw(LinearAlgebra.SingularException(0)))
     return res
 end
-lu(A::SparseMatrixCSC{<:Union{Float16,Float32},Ti};
+lu(A::AbstractSparseMatrixCSC{<:Union{Float16,Float32},Ti};
    check::Bool = true) where {Ti<:UMFITypes} =
     lu(convert(SparseMatrixCSC{Float64,Ti}, A); check = check)
-lu(A::SparseMatrixCSC{<:Union{ComplexF16,ComplexF32},Ti};
+lu(A::AbstractSparseMatrixCSC{<:Union{ComplexF16,ComplexF32},Ti};
    check::Bool = true) where {Ti<:UMFITypes} =
     lu(convert(SparseMatrixCSC{ComplexF64,Ti}, A); check = check)
-lu(A::Union{SparseMatrixCSC{T},SparseMatrixCSC{Complex{T}}};
+lu(A::Union{AbstractSparseMatrixCSC{T},AbstractSparseMatrixCSC{Complex{T}}};
    check::Bool = true) where {T<:AbstractFloat} =
     throw(ArgumentError(string("matrix type ", typeof(A), "not supported. ",
     "Try lu(convert(SparseMatrixCSC{Float64/ComplexF64,Int}, A)) for ",
     "sparse floating point LU using UMFPACK or lu(Array(A)) for generic ",
     "dense LU.")))
-lu(A::SparseMatrixCSC; check::Bool = true) = lu(float(A); check = check)
+lu(A::AbstractSparseMatrixCSC; check::Bool = true) = lu(float(A); check = check)
 
 # We could do this as lu(A') = lu(A)' with UMFPACK, but the user could want to do one over the other
-lu(A::Union{Adjoint{T, S}, Transpose{T, S}}; check::Bool = true) where {T<:UMFVTypes, S<:SparseMatrixCSC{T}} =
+lu(A::Union{Adjoint{T, S}, Transpose{T, S}}; check::Bool = true) where {T<:UMFVTypes, S<:AbstractSparseMatrixCSC{T}} =
 lu(copy(A); check)
 
 """
-    lu!(F::UmfpackLU, A::SparseMatrixCSC; check=true) -> F::UmfpackLU
+    lu!(F::UmfpackLU, A::AbstractSparseMatrixCSC; check=true, reuse_symbolic=true, q=nothing) -> F::UmfpackLU
 
 Compute the LU factorization of a sparse matrix `A`, reusing the symbolic
-factorization of an already existing LU factorization stored in `F`. The
-sparse matrix `A` must have an identical nonzero pattern as the matrix used
-to create the LU factorization `F`, otherwise an error is thrown.
+factorization of an already existing LU factorization stored in `F`.
+Unless `reuse_symbolic` is set to false, the sparse matrix `A` must have an
+identical nonzero pattern as the matrix used to create the LU factorization `F`,
+otherwise an error is thrown. If the size of `A` and `F` differ, all vectors will
+be resized accordingly.
 
 When `check = true`, an error is thrown if the decomposition fails.
 When `check = false`, responsibility for checking the decomposition's
 validity (via [`issuccess`](@ref)) lies with the user.
 
+The permutation `q` can either be a permutation vector or `nothing`. If no permutation vector
+is proveded or `q` is `nothing`, UMFPACK's default is used. If the permutation is not zero based, a
+zero based copy is made.
+
 See also [`lu`](@ref)
 
 !!! note
-    `lu!(F::UmfpackLU, A::SparseMatrixCSC)` uses the UMFPACK library that is part of
+    `lu!(F::UmfpackLU, A::AbstractSparseMatrixCSC)` uses the UMFPACK library that is part of
     SuiteSparse. As this library only supports sparse matrices with [`Float64`](@ref) or
-    `ComplexF64` elements, `lu!` converts `A` into a copy that is of type
-    `SparseMatrixCSC{Float64}` or `SparseMatrixCSC{ComplexF64}` as appropriate.
+    `ComplexF64` elements, `lu!` will automatically convert the types to those set by the LU
+    factorization or `SparseMatrixCSC{ComplexF64}` as appropriate.
 
 !!! compat "Julia 1.5"
     `lu!` for `UmfpackLU` requires at least Julia 1.5.
@@ -338,33 +410,43 @@ julia> F \\ ones(2)
  1.0
 ```
 """
-function lu!(F::UmfpackLU, S::SparseMatrixCSC{<:UMFVTypes,<:UMFITypes}; check::Bool=true)
+function lu!(F::UmfpackLU, S::AbstractSparseMatrixCSC;
+  check::Bool=true, reuse_symbolic::Bool=true, q=nothing)
     zerobased = getcolptr(S)[1] == 0
-    # resize workspace if needed
-    if F.n < size(S, 2)
-        F.workspace = UmfpackWS(S)
-    end
 
     F.m = size(S, 1)
     F.n = size(S, 2)
-    F.colptr = zerobased ? copy(getcolptr(S)) : decrement(getcolptr(S))
-    F.rowval = zerobased ? copy(rowvals(S)) : decrement(rowvals(S))
-    F.nzval = copy(nonzeros(S))
 
-    umfpack_numeric!(F, reuse_numeric = false)
+    # resize workspace if needed
+    resize!(F.workspace, S, has_refinement(F))
+
+    resize!(F.colptr, length(getcolptr(S)))
+    if zerobased
+        F.colptr .= getcolptr(S)
+    else
+        F.colptr .= getcolptr(S) .- one(eltype(S))
+    end
+
+    resize!(F.rowval, length(rowvals(S)))
+    if zerobased
+        F.rowval .= rowvals(S)
+    else
+        F.rowval .= rowvals(S) .- one(eltype(S))
+    end
+
+    resize!(F.nzval, length(nonzeros(S)))
+    F.nzval .= nonzeros(S)
+
+    if !reuse_symbolic && F.symbolic != C_NULL
+        umfpack_free_symbolic(F)
+        F.symbolic = C_NULL
+    end
+
+    umfpack_numeric!(F; reuse_numeric=false, q)
+
     check && (issuccess(F) || throw(LinearAlgebra.SingularException(0)))
     return F
 end
-lu!(F::UmfpackLU, A::SparseMatrixCSC{<:Union{Float16,Float32},Ti};
-   check::Bool = true) where {Ti<:UMFITypes} =
-    lu!(F, convert(SparseMatrixCSC{Float64,Ti}, A); check = check)
-lu!(F::UmfpackLU, A::SparseMatrixCSC{<:Union{ComplexF16,ComplexF32},Ti};
-   check::Bool = true) where {Ti<:UMFITypes} =
-    lu!(F, convert(SparseMatrixCSC{ComplexF64,Ti}, A); check = check)
-lu!(F::UmfpackLU, A::Union{SparseMatrixCSC{T},SparseMatrixCSC{Complex{T}}};
-   check::Bool = true) where {T<:AbstractFloat} =
-    throw(ArgumentError(string("matrix type ", typeof(A), "not supported.")))
-lu!(F::UmfpackLU, A::SparseMatrixCSC; check::Bool = true) = lu!(F, float(A); check = check)
 
 size(F::UmfpackLU) = (F.m, F.n)
 function size(F::UmfpackLU, dim::Integer)
@@ -393,7 +475,7 @@ function show(io::IO, mime::MIME{Symbol("text/plain")}, F::UmfpackLU)
     end
 end
 
-function deserialize(s::AbstractSerializer, t::Type{UmfpackLU{Tv,Ti}}) where {Tv,Ti}
+function deserialize(s::AbstractSerializer, ::Type{UmfpackLU{Tv,Ti}}) where {Tv,Ti}
     symbolic = deserialize(s)
     numeric  = deserialize(s)
     m        = deserialize(s)
@@ -405,11 +487,12 @@ function deserialize(s::AbstractSerializer, t::Type{UmfpackLU{Tv,Ti}}) where {Tv
     workspace= deserialize(s)
     control  = deserialize(s)
     info     = deserialize(s)
+    lock     = deserialize(s)
     obj      = UmfpackLU{Tv,Ti}(symbolic, numeric, m, n,
         colptr, rowval, nzval, status,
-        workspace, control, info, ReentrantLock())
+        workspace, control, info, lock)
 
-    finalizer(umfpack_free_symbolic, obj)
+    finalizer(umfpack_free_symbolic_nl, obj)
 
     return obj
 end
@@ -441,7 +524,9 @@ umf_nm(nm,Tv,Ti) = "umfpack_" * (Tv === :Float64 ? "d" : "z") * (Ti === :Int64 ?
 
 for itype in UmfpackIndexTypes
     sym_r = Symbol(umf_nm("symbolic", :Float64, itype))
+    symq_r = Symbol(umf_nm("qsymbolic", :Float64, itype))
     sym_c = Symbol(umf_nm("symbolic", :ComplexF64, itype))
+    symq_c = Symbol(umf_nm("qsymbolic", :ComplexF64, itype))
     num_r = Symbol(umf_nm("numeric", :Float64, itype))
     num_c = Symbol(umf_nm("numeric", :ComplexF64, itype))
     sol_r = Symbol(umf_nm("solve", :Float64, itype))
@@ -455,135 +540,133 @@ for itype in UmfpackIndexTypes
     get_num_r = Symbol(umf_nm("get_numeric", :Float64, itype))
     get_num_z = Symbol(umf_nm("get_numeric", :ComplexF64, itype))
     @eval begin
-        function umfpack_symbolic!(U::UmfpackLU{Float64,$itype})
-            if U.symbolic != C_NULL return U end
-            lock(U)
-            try
-                tmp = Vector{Ptr{Cvoid}}(undef, 1)
-                @isok $sym_r(U.m, U.n, U.colptr, U.rowval, U.nzval, tmp, U.control, U.info)
-                U.symbolic = tmp[1]
-            finally
-                unlock(U)
+        function umfpack_symbolic!(U::UmfpackLU{Float64,$itype}, q::Union{Nothing, StridedVector{$itype}})
+            U.symbolic != C_NULL && return U
+
+            @lock U begin
+                tmp = Ref{Ptr{Cvoid}}(C_NULL)
+                if q === nothing
+                    @isok $sym_r(U.m, U.n, U.colptr, U.rowval, U.nzval, tmp, U.control, U.info)
+                else
+                    qq = minimum(q) == 1 ? q .- one(eltype(q)) : q
+                    @isok $symq_r(U.m, U.n, U.colptr, U.rowval, U.nzval, qq, tmp, U.control, U.info)
+                end
+                U.symbolic = tmp[]
             end
             return U
         end
-        function umfpack_symbolic!(U::UmfpackLU{ComplexF64,$itype})
-            if U.symbolic != C_NULL return U end
-            lock(U)
-            try
-                tmp = Vector{Ptr{Cvoid}}(undef, 1)
-                @isok $sym_c(U.m, U.n, U.colptr, U.rowval, real(U.nzval), imag(U.nzval), tmp,
-                            U.control, U.info)
-                U.symbolic = tmp[1]
-            finally
-                unlock(U)
+        function umfpack_symbolic!(U::UmfpackLU{ComplexF64,$itype}, q::Union{Nothing, StridedVector{$itype}})
+            U.symbolic != C_NULL && return U
+            @lock U begin
+                tmp = Ref{Ptr{Cvoid}}(C_NULL)
+                if q === nothing
+                    @isok $sym_c(U.m, U.n, U.colptr, U.rowval, real(U.nzval), imag(U.nzval), tmp,
+                                 U.control, U.info)
+                else
+                    qq = minimum(q) == 1 ? q .- one(eltype(q)) : q
+                    @isok $symq_c(U.m, U.n, U.colptr, U.rowval, real(U.nzval), imag(U.nzval), qq, tmp, U.control, U.info)
+                end
+                U.symbolic = tmp[]
             end
             return U
         end
-        function umfpack_numeric!(U::UmfpackLU{Float64,$itype}; reuse_numeric = true)
-            lock(U)
-            try
-                if (reuse_numeric && U.numeric != C_NULL) return U end
-                if U.symbolic == C_NULL umfpack_symbolic!(U) end
+        function umfpack_numeric!(U::UmfpackLU{Float64,$itype}; reuse_numeric=true, q=nothing)
+            @lock U begin
+                if (reuse_numeric && U.numeric != C_NULL)
+                    return U
+                end
+                if U.symbolic == C_NULL
+                    umfpack_symbolic!(U, q)
+                end
 
-                tmp = Vector{Ptr{Cvoid}}(undef, 1)
+
+                tmp = Ref{Ptr{Cvoid}}(C_NULL)
                 status = $num_r(U.colptr, U.rowval, U.nzval, U.symbolic, tmp, U.control, U.info)
                 U.status = status
                 if status != UMFPACK_WARNING_singular_matrix
                     umferror(status)
                 end
                 U.numeric != C_NULL && umfpack_free_numeric(U)
-                U.numeric = tmp[1]
-            finally
-                unlock(U)
+                U.numeric = tmp[]
             end
             return U
         end
-        function umfpack_numeric!(U::UmfpackLU{ComplexF64,$itype}; reuse_numeric = true)
-            lock(U)
-            try
+        function umfpack_numeric!(U::UmfpackLU{ComplexF64,$itype}; reuse_numeric=true, q=nothing)
+            @lock U begin
                 if (reuse_numeric && U.numeric != C_NULL) return U end
-                if U.symbolic == C_NULL umfpack_symbolic!(U) end
+                if U.symbolic == C_NULL umfpack_symbolic!(U, q) end
+
 
-                tmp = Vector{Ptr{Cvoid}}(undef, 1)
+                tmp = Ref{Ptr{Cvoid}}(C_NULL)
                 status = $num_c(U.colptr, U.rowval, real(U.nzval), imag(U.nzval), U.symbolic, tmp,
-                                U.control, U.info)
+                    U.control, U.info)
                 U.status = status
                 if status != UMFPACK_WARNING_singular_matrix
                     umferror(status)
                 end
                 U.numeric != C_NULL && umfpack_free_numeric(U)
-                U.numeric = tmp[1]
-            finally
-                unlock(U)
+                U.numeric = tmp[]
             end
             return U
         end
-        function solve!(
-            x::StridedVector{Float64}, lu::UmfpackLU{Float64,$itype}, b::StridedVector{Float64}, typ::Integer)
+        function solve!(x::StridedVector{Float64},
+            lu::UmfpackLU{Float64,$itype}, b::StridedVector{Float64},
+            typ::Integer; workspace = getworkspace(lu))
             if x === b
                 throw(ArgumentError("output array must not be aliased with input array"))
             end
             if stride(x, 1) != 1 || stride(b, 1) != 1
                 throw(ArgumentError("in and output vectors must have unit strides"))
             end
-            if size(lu, 2) > length(lu.workspace.Wi)
-                throw(ArgumentError("Wi should be at least larger than `size(Af, 2)`"))
+            if size(lu, 2) > length(workspace.Wi)
+                throw(ArgumentError("Wi should be larger than `size(Af, 2)`"))
             end
-            lock(lu)
-            try
+            if workspace_W_size(lu) > length(workspace.W)
+                throw(ArguementError("W should be larger than `workspace_W_size(Af)`"))
+            end
+            @lock lu begin
                 umfpack_numeric!(lu)
-                (size(b,1) == lu.m) && (size(b) == size(x)) || throw(DimensionMismatch())
+                (size(b, 1) == lu.m) && (size(b) == size(x)) || throw(DimensionMismatch())
 
                 @isok $wsol_r(typ, lu.colptr, lu.rowval, lu.nzval,
-                            x, b, lu.numeric, lu.control,
-                            lu.info, lu.workspace.Wi, lu.workspace.W)
-            finally
-                unlock(lu)
+                    x, b, lu.numeric, lu.control,
+                    lu.info, workspace.Wi, workspace.W)
             end
             return x
         end
-        function solve!(
-            x::StridedVector{ComplexF64}, lu::UmfpackLU{ComplexF64,$itype}, b::StridedVector{ComplexF64}, typ::Integer)
+        function solve!(x::StridedVector{ComplexF64},
+            lu::UmfpackLU{ComplexF64,$itype}, b::StridedVector{ComplexF64},
+            typ::Integer; workspace = getworkspace(lu))
             if x === b
                 throw(ArgumentError("output array must not be aliased with input array"))
             end
             if stride(x, 1) != 1 || stride(b, 1) != 1
                 throw(ArgumentError("in and output vectors must have unit strides"))
             end
-            if size(lu, 2) > length(lu.workspace.Wi)
+            if size(lu, 2) > length(workspace.Wi)
                 throw(ArgumentError("Wi should be at least larger than `size(Af, 2)`"))
             end
-            lock(lu)
-            try
+            if workspace_W_size(lu) > length(workspace.W)
+                throw(ArguementError("W should be larger than `workspace_W_size(Af)`"))
+            end
+            @lock lu begin
                 umfpack_numeric!(lu)
                 (size(b, 1) == lu.m) && (size(b) == size(x)) || throw(DimensionMismatch())
                 @isok $wsol_c(typ, lu.colptr, lu.rowval, lu.nzval, C_NULL, x, C_NULL, b,
-                            C_NULL, lu.numeric, lu.control, lu.info, lu.workspace.Wi, lu.workspace.W)
-            finally
-                unlock(lu)
+                    C_NULL, lu.numeric, lu.control, lu.info, workspace.Wi, workspace.W)
             end
             return x
         end
         function det(lu::UmfpackLU{Float64,$itype})
-            mx = Ref{Float64}()
-            lock(lu)
-            try
-                @isok $det_r(mx, C_NULL, lu.numeric, lu.info)
-            finally
-                unlock(lu)
-            end
+            mx = Ref{Float64}(zero(Float64))
+            @lock lu @isok($det_r(mx, C_NULL, lu.numeric, lu.info))
             mx[]
         end
+
         function det(lu::UmfpackLU{ComplexF64,$itype})
-            mx = Ref{Float64}()
-            mz = Ref{Float64}()
-            lock(lu)
-            try
-                @isok $det_z(mx, mz, C_NULL, lu.numeric, lu.info)
-            finally
-                unlock(lu)
-            end
+            mx = Ref{Float64}(zero(Float64))
+            mz = Ref{Float64}(zero(Float64))
+            @lock lu @isok($det_z(mx, mz, C_NULL, lu.numeric, lu.info))
             complex(mx[], mz[])
         end
         function logabsdet(F::UmfpackLU{T, $itype}) where {T<:Union{Float64,ComplexF64}} # return log(abs(det)) and sign(det)
@@ -604,20 +687,20 @@ for itype in UmfpackIndexTypes
             return abs_det, s * P
         end
         function umf_lunz(lu::UmfpackLU{Float64,$itype})
-            lnz = Ref{$itype}()
-            unz = Ref{$itype}()
-            n_row = Ref{$itype}()
-            n_col = Ref{$itype}()
-            nz_diag = Ref{$itype}()
+            lnz = Ref{$itype}(zero($itype))
+            unz = Ref{$itype}(zero($itype))
+            n_row = Ref{$itype}(zero($itype))
+            n_col = Ref{$itype}(zero($itype))
+            nz_diag = Ref{$itype}(zero($itype))
             @isok $lunz_r(lnz, unz, n_row, n_col, nz_diag, lu.numeric)
             (lnz[], unz[], n_row[], n_col[], nz_diag[])
         end
         function umf_lunz(lu::UmfpackLU{ComplexF64,$itype})
-            lnz = Ref{$itype}()
-            unz = Ref{$itype}()
-            n_row = Ref{$itype}()
-            n_col = Ref{$itype}()
-            nz_diag = Ref{$itype}()
+            lnz = Ref{$itype}(zero($itype))
+            unz = Ref{$itype}(zero($itype))
+            n_row = Ref{$itype}(zero($itype))
+            n_col = Ref{$itype}(zero($itype))
+            nz_diag = Ref{$itype}(zero($itype))
             @isok $lunz_z(lnz, unz, n_row, n_col, nz_diag, lu.numeric)
             (lnz[], unz[], n_row[], n_col[], nz_diag[])
         end
@@ -883,54 +966,66 @@ function _AqldivB_kernel!(X::StridedMatrix{Tb}, lu::UmfpackLU{Float64},
 end
 
 for Tv in (:Float64, :ComplexF64), Ti in UmfpackIndexTypes
-    f = Symbol(umf_nm("free_symbolic", Tv, Ti))
-    @eval begin
-        function umfpack_free_symbolic(lu::UmfpackLU{$Tv,$Ti})
-            if lu.symbolic == C_NULL return lu end
-            umfpack_free_numeric(lu)
-            $f([lu.symbolic])
+    # no lock version for the finalizer
+    _free_symbolic = Symbol(umf_nm("free_symbolic", Tv, Ti))
+    @eval function umfpack_free_symbolic_nl(lu::UmfpackLU{$Tv,$Ti})
+        if lu.symbolic != C_NULL
+            umfpack_free_numeric_nl(lu)
+            $_free_symbolic(Ref(lu.symbolic))
             lu.symbolic = C_NULL
-            return lu
         end
+        return lu
     end
-
-    f = Symbol(umf_nm("free_numeric", Tv, Ti))
-    @eval begin
-
-        function umfpack_free_numeric(lu::UmfpackLU{$Tv,$Ti})
-            if lu.numeric == C_NULL return lu end
-            $f([lu.numeric])
+    _free_numeric = Symbol(umf_nm("free_numeric", Tv, Ti))
+    @eval function umfpack_free_numeric_nl(lu::UmfpackLU{$Tv,$Ti})
+        if lu.numeric != C_NULL
+            $_free_numeric(Ref(lu.numeric))
             lu.numeric = C_NULL
-            return lu
         end
+        return lu
     end
-end
 
-function umfpack_report_symbolic(lu::UmfpackLU, level::Real=4.0)
-    lock(lu)
-    try
-        umfpack_symbolic!(lu)
-        old_prl::Float64 = lu.control[UMFPACK_PRL]
-        lu.ctrol[UMFPACK_PRL] = Float64(level)
-        @isok umfpack_dl_report_symbolic(lu.symbolic, lu.control)
-        lu.control[UMFPACK_PRL] = old_prl
-    finally
-        unlock(lu)
+    _report_symbolic = Symbol(umf_nm("report_symbolic", Tv, Ti))
+    @eval umfpack_report_symbolic(lu::UmfpackLU{$Tv,$Ti}, level::Real=4; q=nothing) =
+        @lock lu begin
+            umfpack_symbolic!(lu, q)
+            old_prl = lu.control[JL_UMFPACK_PRL]
+            lu.control[JL_UMFPACK_PRL] = level
+            @isok $_report_symbolic(lu.symbolic, lu.control)
+            lu.control[JL_UMFPACK_PRL] = old_prl
+            lu
+        end
+    _report_numeric = Symbol(umf_nm("report_numeric", Tv, Ti))
+    @eval umfpack_report_numeric(lu::UmfpackLU{$Tv,$Ti}, level::Real=4; q=nothing) =
+        @lock lu begin
+            umfpack_numeric!(lu; q)
+            old_prl = lu.control[JL_UMFPACK_PRL]
+            lu.control[JL_UMFPACK_PRL] = level
+            @isok $_report_numeric(lu.numeric, lu.control)
+            lu.control[JL_UMFPACK_PRL] = old_prl
+            lu
+        end
+    # the control and info arrays
+    _defaults = Symbol(umf_nm("defaults", Tv, Ti))
+    @eval function get_umfpack_control(::Type{$Tv}, ::Type{$Ti})
+        control = Vector{Float64}(undef, UMFPACK_CONTROL)
+        $_defaults(control)
+        # Put julia's config here
+        # disable iterative refinement by default Issue #122
+        control[JL_UMFPACK_IRSTEP] = 0
+
+        return control
     end
 end
 
-function umfpack_report_numeric(lu::UmfpackLU, level::Real=0.4)
-    lock(lu)
-    try
-        old_prl::Float64 = lu.control[UMFPACK_PRL]
-        lu.control[UMFPACK_PRL] = Float64(level)
-        @isok umfpack_dl_report_numeric(num, lu.control)
-        lu.control[UMFPACK_PRL] = old_prl
-
-    finally
-        unlock(lu)
-    end
+umfpack_free_numeric(lu::UmfpackLU) =
+@lock lu begin
+    umfpack_free_numeric_nl(lu)
+    lu
+end
+umfpack_free_symbolic(lu::UmfpackLU) =
+@lock lu begin
+    umfpack_free_symblic_nl(lu)
 end
-
 
 end # UMFPACK module
diff --git a/src/sparsematrix.jl b/src/sparsematrix.jl
index a6e4541d..6a8b7eb2 100644
--- a/src/sparsematrix.jl
+++ b/src/sparsematrix.jl
@@ -260,14 +260,38 @@ function Base.print_array(io::IO, S::AbstractSparseMatrixCSCInclAdjointAndTransp
 end
 
 # always show matrices as `sparse(I, J, K)`
-function Base.show(io::IO, S::AbstractSparseMatrixCSCInclAdjointAndTranspose)
-    _checkbuffers(S)
+function Base.show(io::IO, _S::AbstractSparseMatrixCSCInclAdjointAndTranspose)
+    _checkbuffers(_S)
     # can't use `findnz`, because that expects all values not to be #undef
+    S = _S isa Adjoint || _S isa Transpose ? _S.parent : _S
     I = rowvals(S)
-    J = [col for col = 1 : size(S, 2) for k = getcolptr(S)[col] : (getcolptr(S)[col+1]-1)]
     K = nonzeros(S)
     m, n = size(S)
-    print(io, "sparse(", I, ", ", J, ", ", K, ", ", m, ", ", n, ")")
+    if _S isa Adjoint
+        print(io, "adjoint(")
+    elseif _S isa Transpose
+        print(io, "transpose(")
+    end
+    print(io, "sparse(", I, ", ")
+    if length(I) == 0
+        print(io, eltype(getcolptr(S)), "[]")
+    else
+        print(io, "[")
+        il = nnz(S) - 1
+        for col in 1:size(S, 2),
+            k in getcolptr(S)[col] : (getcolptr(S)[col+1]-1)
+            print(io, col)
+            if il > 0
+                print(io, ", ")
+                il -= 1
+            end
+        end
+        print(io, "]")
+    end
+    print(io, ", ", K, ", ", m, ", ", n, ")")
+    if _S isa Adjoint || _S isa Transpose
+        print(io, ")")
+    end
 end
 
 const brailleBlocks = UInt16['⠁', '⠂', '⠄', '⡀', '⠈', '⠐', '⠠', '⢀']
@@ -673,7 +697,7 @@ function SparseMatrixCSC{Tv,Ti}(D::Diagonal) where {Tv,Ti}
     m = length(D.diag)
     m == 0 && return SparseMatrixCSC{Tv,Ti}(zeros(Tv, 0, 0))
 
-    nz = count(!iszero, D.diag)
+    nz = count(_isnotzero, D.diag)
     nz_counter = 1
 
     rowval = Vector{Ti}(undef, nz)
@@ -684,7 +708,7 @@ function SparseMatrixCSC{Tv,Ti}(D::Diagonal) where {Tv,Ti}
     colptr = Vector{Ti}(undef, m+1)
 
     @inbounds for i=1:m
-        if !iszero(D.diag[i])
+        if _isnotzero(D.diag[i])
             colptr[i] = nz_counter
             rowval[nz_counter] = i
             nzval[nz_counter]  = D.diag[i]
@@ -707,7 +731,7 @@ function SparseMatrixCSC{Tv,Ti}(M::AbstractMatrix) where {Tv,Ti}
     i = 0
     for v in M
         i += 1
-        if !iszero(v)
+        if _isnotzero(v)
             push!(I, i)
             push!(V, v)
         end
@@ -716,7 +740,7 @@ function SparseMatrixCSC{Tv,Ti}(M::AbstractMatrix) where {Tv,Ti}
 end
 
 function SparseMatrixCSC{Tv,Ti}(M::StridedMatrix) where {Tv,Ti}
-    nz = count(!iszero, M)
+    nz = count(_isnotzero, M)
     colptr = zeros(Ti, size(M, 2) + 1)
     nzval = Vector{Tv}(undef, nz)
     rowval = Vector{Ti}(undef, nz)
@@ -725,7 +749,7 @@ function SparseMatrixCSC{Tv,Ti}(M::StridedMatrix) where {Tv,Ti}
     @inbounds for j in 1:size(M, 2)
         for i in 1:size(M, 1)
             v = M[i, j]
-            if !iszero(v)
+            if _isnotzero(v)
                 rowval[cnt] = i
                 nzval[cnt] = v
                 cnt += 1
@@ -748,6 +772,41 @@ SparseMatrixCSC{Tv}(M::Transpose{<:Any,<:AbstractSparseMatrixCSC}) where {Tv} =
 SparseMatrixCSC{Tv,Ti}(M::Adjoint{<:Any,<:AbstractSparseMatrixCSC}) where {Tv,Ti} = SparseMatrixCSC{Tv,Ti}(copy(M))
 SparseMatrixCSC{Tv,Ti}(M::Transpose{<:Any,<:AbstractSparseMatrixCSC}) where {Tv,Ti} = SparseMatrixCSC{Tv,Ti}(copy(M))
 
+# we can only view AbstractQs as columns
+SparseMatrixCSC{Tv,Ti}(Q::LinearAlgebra.AbstractQ) where {Tv,Ti} = sparse_with_lmul(Tv, Ti, Q)
+
+"""
+    sparse_with_lmul(Tv, Ti, Q) -> SparseMatrixCSC
+
+Helper function that creates a `SparseMatrixCSC{Tv,Ti}` representation of `Q`, where `Q` is
+supposed to not have fast `getindex` or not admit an iteration protocol at all, but instead
+a fast `lmul!(Q, v)` for dense vectors `v`. The prime example for such `Q`s is the Q factor
+of a (sparse) QR decomposition.
+"""
+function sparse_with_lmul(Tv, Ti, Q)
+    colptr = zeros(Ti, size(Q, 2) + 1)
+    nzval = Tv[]
+    rowval = Ti[]
+    col = zeros(eltype(Q), size(Q, 1))
+
+    colptr[1] = 1
+    ind = 1
+    for j in axes(Q, 2)
+        fill!(col, false)
+        col[j] = one(Tv)
+        lmul!(Q, col)
+        for (i, v) in enumerate(col)
+            if _isnotzero(v)
+                push!(nzval, v)
+                push!(rowval, i)
+                ind += 1
+            end
+        end
+        colptr[j + 1] = ind
+    end
+    return SparseMatrixCSC{Tv,Ti}(size(Q)..., colptr, rowval, nzval)
+end
+
 # converting from SparseMatrixCSC to other matrix types
 function Matrix(S::AbstractSparseMatrixCSC{Tv}) where Tv
     _checkbuffers(S)
@@ -1137,9 +1196,9 @@ end
 """
     ftranspose!(X::AbstractSparseMatrixCSC{Tv,Ti}, A::AbstractSparseMatrixCSC{Tv,Ti}, f::Function) where {Tv,Ti}
 
-Transpose `A` and store it in `X` while applying the function `f` to the non-zero elements. 
+Transpose `A` and store it in `X` while applying the function `f` to the non-zero elements.
 Does not remove the zeros created by `f`. `size(X)` must be equal to `size(transpose(A))`.
-No additonal memory is allocated other than resizing the rowval and nzval of `X`, if needed. 
+No additonal memory is allocated other than resizing the rowval and nzval of `X`, if needed.
 
 See `halfperm!`
 """
@@ -1158,9 +1217,9 @@ end
 """
     transpose!(X::AbstractSparseMatrixCSC{Tv,Ti}, A::AbstractSparseMatrixCSC{Tv,Ti}) where {Tv,Ti}
 
-Transpose the matrix `A` and stores it in the matrix `X`. 
+Transpose the matrix `A` and stores it in the matrix `X`.
 `size(X)` must be equal to `size(transpose(A))`.
-No additonal memory is allocated other than resizing the rowval and nzval of `X`, if needed. 
+No additonal memory is allocated other than resizing the rowval and nzval of `X`, if needed.
 
 See `halfperm!`
 """
@@ -1170,8 +1229,8 @@ transpose!(X::AbstractSparseMatrixCSC{Tv,Ti}, A::AbstractSparseMatrixCSC{Tv,Ti})
     adjoint!(X::AbstractSparseMatrixCSC{Tv,Ti}, A::AbstractSparseMatrixCSC{Tv,Ti}) where {Tv,Ti}
 
 Transpose the matrix `A` and stores the adjoint of the elements in the matrix `X`.
-`size(X)` must be equal to `size(transpose(A))`. 
-No additonal memory is allocated other than resizing the rowval and nzval of `X`, if needed. 
+`size(X)` must be equal to `size(transpose(A))`.
+No additonal memory is allocated other than resizing the rowval and nzval of `X`, if needed.
 
 See `halfperm!`
 """
@@ -1581,7 +1640,7 @@ Removes stored numerical zeros from `A`.
 For an out-of-place version, see [`dropzeros`](@ref). For
 algorithmic information, see `fkeep!`.
 """
-dropzeros!(A::AbstractSparseMatrixCSC) = fkeep!(A, (i, j, x) -> !iszero(x))
+dropzeros!(A::AbstractSparseMatrixCSC) = fkeep!(A, (i, j, x) -> _isnotzero(x))
 
 """
     dropzeros(A::AbstractSparseMatrixCSC;)
@@ -2190,7 +2249,7 @@ function _findz(A::AbstractSparseMatrixCSC{Tv,Ti}, rows=1:size(A, 1), cols=1:siz
             (r1 <= r2 ) && (r2 = searchsortedlast(rowval, rowmax, r1, r2, Forward))
         end
         row = rowmin
-        while (r1 <= r2) && (row == rowval[r1]) && !iszero(nzval[r1])
+        while (r1 <= r2) && (row == rowval[r1]) && _isnotzero(nzval[r1])
             r1 += 1
             row += 1
         end
@@ -2292,9 +2351,9 @@ function rangesearch(haystack::AbstractRange, needle)
     (rem==0 && 1<=i+1<=length(haystack)) ? i+1 : 0
 end
 
-getindex(A::AbstractSparseMatrixCSC, I::Tuple{Integer,Integer}) = getindex(A, I[1], I[2])
+@RCI getindex(A::AbstractSparseMatrixCSC, I::Tuple{Integer,Integer}) = getindex(A, I[1], I[2])
 
-function getindex(A::AbstractSparseMatrixCSC{T}, i0::Integer, i1::Integer) where T
+@RCI function getindex(A::AbstractSparseMatrixCSC{T}, i0::Integer, i1::Integer) where T
     @boundscheck checkbounds(A, i0, i1)
     r1 = Int(getcolptr(A)[i1])
     r2 = Int(getcolptr(A)[i1+1]-1)
@@ -2740,7 +2799,7 @@ getindex(A::AbstractSparseMatrixCSC, I::AbstractVector{Bool}, J::AbstractVector{
 ## setindex!
 
 # dispatch helper for #29034
-setindex!(A::AbstractSparseMatrixCSC, _v, _i::Integer, _j::Integer) = _setindex_scalar!(A, _v, _i, _j)
+@RCI setindex!(A::AbstractSparseMatrixCSC, _v, _i::Integer, _j::Integer) = _setindex_scalar!(A, _v, _i, _j)
 
 function _setindex_scalar!(A::AbstractSparseMatrixCSC{Tv,Ti}, _v, _i::Integer, _j::Integer) where {Tv,Ti<:Integer}
     v = convert(Tv, _v)
@@ -2759,7 +2818,7 @@ function _setindex_scalar!(A::AbstractSparseMatrixCSC{Tv,Ti}, _v, _i::Integer, _
     end
     # Column j does not contain entry A[i,j]. If v is nonzero, insert entry A[i,j] = v
     # and return. If to the contrary v is zero, then simply return.
-    if !iszero(v)
+    if _isnotzero(v)
         nz = getcolptr(A)[size(A, 2)+1]
         # throw exception before state is partially modified
         !isbitstype(Ti) || nz < typemax(Ti) ||
@@ -2796,7 +2855,7 @@ function Base.fill!(V::SubArray{Tv, <:Any, <:AbstractSparseMatrixCSC{Tv}, <:Tupl
     if (I[1] < 1 || I[end] > size(A, 1)) || (J[1] < 1 || J[end] > size(A, 2))
         throw(BoundsError(A, (I, J)))
     end
-    if iszero(x)
+    if _iszero(x)
         _spsetz_setindex!(A, I, J)
     else
         _spsetnz_setindex!(A, convert(Tv, x), I, J)
@@ -3644,7 +3703,7 @@ function is_hermsym(A::AbstractSparseMatrixCSC, check::Function)
                 # We therefore "catch up" here while making sure that
                 # the elements are actually zero.
                 while row2 < col
-                    if !iszero(nzval[offset])
+                    if _isnotzero(nzval[offset])
                         return false
                     end
                     offset += 1
@@ -3682,7 +3741,7 @@ function istriu(A::AbstractSparseMatrixCSC)
             if rowval[l1-i] <= col
                 break
             end
-            if !iszero(nzval[l1-i])
+            if _isnotzero(nzval[l1-i])
                 return false
             end
         end
@@ -3701,7 +3760,7 @@ function istril(A::AbstractSparseMatrixCSC)
             if rowval[i] >= col
                 break
             end
-            if !iszero(nzval[i])
+            if _isnotzero(nzval[i])
                 return false
             end
         end
@@ -3894,13 +3953,13 @@ end
 
 ## Uniform matrix arithmetic
 
-(+)(A::AbstractSparseMatrixCSC{Tv, Ti}, J::UniformScaling{T}) where {T<:Number, Tv, Ti} = 
+(+)(A::AbstractSparseMatrixCSC{Tv, Ti}, J::UniformScaling{T}) where {T<:Number, Tv, Ti} =
     A + sparse(T, Ti, J, size(A)...)
-(+)(J::UniformScaling{T}, A::AbstractSparseMatrixCSC{Tv, Ti}) where {T<:Number, Tv, Ti} = 
+(+)(J::UniformScaling{T}, A::AbstractSparseMatrixCSC{Tv, Ti}) where {T<:Number, Tv, Ti} =
     sparse(T, Ti, J, size(A)...) + A
-(-)(A::AbstractSparseMatrixCSC{Tv, Ti}, J::UniformScaling{T}) where {T<:Number, Tv, Ti} = 
+(-)(A::AbstractSparseMatrixCSC{Tv, Ti}, J::UniformScaling{T}) where {T<:Number, Tv, Ti} =
     A - sparse(T, Ti, J, size(A)...)
-(-)(J::UniformScaling{T}, A::AbstractSparseMatrixCSC{Tv, Ti}) where {T<:Number, Tv, Ti} = 
+(-)(J::UniformScaling{T}, A::AbstractSparseMatrixCSC{Tv, Ti}) where {T<:Number, Tv, Ti} =
     sparse(T, Ti, J, size(A)...) - A
 
 
diff --git a/src/sparsevector.jl b/src/sparsevector.jl
index 4c60ced1..d8424ae2 100644
--- a/src/sparsevector.jl
+++ b/src/sparsevector.jl
@@ -14,19 +14,24 @@ using LinearAlgebra: _SpecialArrays, _DenseConcatGroup
     SparseVector{Tv,Ti<:Integer} <: AbstractSparseVector{Tv,Ti}
 
 Vector type for storing sparse vectors. Can be created by passing the length of the vector,
-a *sorted* vector of non-zero indices, and a vector of non-zero values. 
+a *sorted* vector of non-zero indices, and a vector of non-zero values.
+
+For instance, the vector `[5, 6, 0, 7]` can be represented as
 
-For instance, the Vector `[5, 6, 0, 7]` can be represented as 
 ```julia
 SparseVector(4, [1, 2, 4], [5, 6, 7])
 ```
-This indicates that the index 1 is 5, the index 2 is 6, the index 3 is `zero(Int)`, and index 4 is 7.
 
-It may be more convenient to create sparse vectors directly from dense vectors using `sparse` as 
+This indicates that the element at index 1 is 5, at index 2 is 6, at index 3 is `zero(Int)`,
+and at index 4 is 7.
+
+It may be more convenient to create sparse vectors directly from dense vectors using `sparse` as
+
 ```julia
 sparse([5, 6, 0, 7])
-``` 
-yeilds the same sparse vector.
+```
+
+yields the same sparse vector.
 """
 struct SparseVector{Tv,Ti<:Integer} <: AbstractSparseVector{Tv,Ti}
     n::Ti              # Length of the sparse vector
@@ -48,11 +53,12 @@ SparseVector(n::Integer, nzind::Vector{Ti}, nzval::Vector{Tv}) where {Tv,Ti} =
 # union of such a view and a SparseVector so we define an alias for such a union as well
 const SparseColumnView{Tv,Ti}  = SubArray{Tv,1,<:AbstractSparseMatrixCSC{Tv,Ti},Tuple{Base.Slice{Base.OneTo{Int}},Int},false}
 const SparseVectorView{Tv,Ti}  = SubArray{Tv,1,<:AbstractSparseVector{Tv,Ti},Tuple{Base.Slice{Base.OneTo{Int}}},false}
-const SparseVectorUnion{Tv,Ti} = Union{SparseVector{Tv,Ti}, SparseColumnView{Tv,Ti}, SparseVectorView{Tv,Ti}}
+const SparseVectorUnion{Tv,Ti} = Union{AbstractSparseVector{Tv,Ti}, SparseColumnView{Tv,Ti}, SparseVectorView{Tv,Ti}}
 const AdjOrTransSparseVectorUnion{Tv,Ti} = LinearAlgebra.AdjOrTrans{Tv, <:SparseVectorUnion{Tv,Ti}}
 
 ### Basic properties
 
+length(x::SparseVector)   = getfield(x, :n)
 size(x::SparseVector)     = (getfield(x, :n),)
 count(f, x::SparseVector) = count(f, nonzeros(x)) + f(zero(eltype(x)))*(length(x) - nnz(x))
 
@@ -333,7 +339,7 @@ end
 
 ### Element access
 
-function setindex!(x::SparseVector{Tv,Ti}, v::Tv, i::Ti) where {Tv,Ti<:Integer}
+@RCI function setindex!(x::SparseVector{Tv,Ti}, v::Tv, i::Ti) where {Tv,Ti<:Integer}
     checkbounds(x, i)
     nzind = nonzeroinds(x)
     nzval = nonzeros(x)
@@ -343,7 +349,7 @@ function setindex!(x::SparseVector{Tv,Ti}, v::Tv, i::Ti) where {Tv,Ti<:Integer}
     if 1 <= k <= m && nzind[k] == i  # i found
         nzval[k] = v
     else  # i not found
-        if !iszero(v)
+        if _isnotzero(v)
             insert!(nzind, k, i)
             insert!(nzval, k, v)
         end
@@ -351,7 +357,7 @@ function setindex!(x::SparseVector{Tv,Ti}, v::Tv, i::Ti) where {Tv,Ti<:Integer}
     x
 end
 
-setindex!(x::SparseVector{Tv,Ti}, v, i::Integer) where {Tv,Ti<:Integer} =
+@RCI setindex!(x::SparseVector{Tv,Ti}, v, i::Integer) where {Tv,Ti<:Integer} =
     setindex!(x, convert(Tv, v), convert(Ti, i))
 
 
@@ -434,7 +440,7 @@ function _dense2indval!(nzind::Vector{Ti}, nzval::Vector{Tv}, s::AbstractArray{T
     n = length(s)
     c = 0
     @inbounds for (i, v) in enumerate(s)
-        if !iszero(v)
+        if _isnotzero(v)
             if c >= cap
                 cap = (cap == 0) ? 1 : 2*cap
                 resize!(nzind, cap)
@@ -779,7 +785,7 @@ findall(p::Base.Fix2{typeof(in)}, x::SparseVector{<:Any,Ti}) where {Ti} =
     findnz(x::SparseVector)
 
 Return a tuple `(I, V)`  where `I` is the indices of the stored ("structurally non-zero")
-values in sparse vector `x` and `V` is a vector of the values.
+values in sparse vector-like `x` and `V` is a vector of the values.
 
 # Examples
 ```jldoctest
@@ -794,7 +800,7 @@ julia> findnz(x)
 ([1, 4, 6, 8], [1, 2, 4, 3])
 ```
 """
-function findnz(x::SparseVector{Tv,Ti}) where {Tv,Ti}
+function findnz(x::SparseVectorUnion{Tv,Ti}) where {Tv,Ti}
     numnz = nnz(x)
 
     I = Vector{Ti}(undef, numnz)
@@ -858,7 +864,7 @@ function _spgetindex(m::Int, nzind::AbstractVector{Ti}, nzval::AbstractVector{Tv
     (ii <= m && nzind[ii] == i) ? nzval[ii] : zero(Tv)
 end
 
-function getindex(x::AbstractSparseVector, i::Integer)
+@RCI function getindex(x::AbstractSparseVector, i::Integer)
     checkbounds(x, i)
     _spgetindex(nnz(x), nonzeroinds(x), nonzeros(x), i)
 end
@@ -1220,7 +1226,7 @@ macro unarymap_nz2z_z2z(op, TF)
             @inbounds for j = 1:m
                 i = xnzind[j]
                 v = $(op)(xnzval[j])
-                if v != zero(v)
+                if _isnotzero(v)
                     ir += 1
                     ynzind[ir] = i
                     ynzval[ir] = v
@@ -1272,7 +1278,7 @@ function _binarymap(f::Function,
                     y::AbstractSparseVector{Ty},
                     mode::Int) where {Tx,Ty}
     0 <= mode <= 2 || throw(ArgumentError("Incorrect mode $mode."))
-    R = typeof(f(zero(Tx), zero(Ty)))
+    R = Base.Broadcast.combine_eltypes(f, (x, y))
     n = length(x)
     length(y) == n || throw(DimensionMismatch())
 
@@ -1287,9 +1293,6 @@ function _binarymap(f::Function,
     rind = Vector{Int}(undef, cap)
     rval = Vector{R}(undef, cap)
     ir = 0
-    ix = 1
-    iy = 1
-
     ir = (
         mode == 0 ? _binarymap_mode_0!(f, mx, my,
             xnzind, xnzval, ynzind, ynzval, rind, rval) :
@@ -1307,6 +1310,7 @@ end
 function _binarymap_mode_0!(f::Function, mx::Int, my::Int,
                             xnzind, xnzval, ynzind, ynzval, rind, rval)
     # f(nz, nz) -> nz, f(z, nz) -> z, f(nz, z) ->  z
+    require_one_based_indexing(xnzind, ynzind, xnzval, ynzval, rind, rval)
     ir = 0; ix = 1; iy = 1
     @inbounds while ix <= mx && iy <= my
         jx = xnzind[ix]
@@ -1329,13 +1333,14 @@ function _binarymap_mode_1!(f::Function, mx::Int, my::Int,
                             ynzind, ynzval::AbstractVector{Ty},
                             rind, rval) where {Tx,Ty}
     # f(nz, nz) -> z/nz, f(z, nz) -> nz, f(nz, z) -> nz
+    require_one_based_indexing(xnzind, ynzind, xnzval, ynzval, rind, rval)
     ir = 0; ix = 1; iy = 1
     @inbounds while ix <= mx && iy <= my
         jx = xnzind[ix]
         jy = ynzind[iy]
         if jx == jy
             v = f(xnzval[ix], ynzval[iy])
-            if v != zero(v)
+            if _isnotzero(v)
                 ir += 1; rind[ir] = jx; rval[ir] = v
             end
             ix += 1; iy += 1
@@ -1367,25 +1372,26 @@ function _binarymap_mode_2!(f::Function, mx::Int, my::Int,
                             ynzind, ynzval::AbstractVector{Ty},
                             rind, rval) where {Tx,Ty}
     # f(nz, nz) -> z/nz, f(z, nz) -> z/nz, f(nz, z) -> z/nz
+    require_one_based_indexing(xnzind, ynzind, xnzval, ynzval, rind, rval)
     ir = 0; ix = 1; iy = 1
     @inbounds while ix <= mx && iy <= my
         jx = xnzind[ix]
         jy = ynzind[iy]
         if jx == jy
             v = f(xnzval[ix], ynzval[iy])
-            if v != zero(v)
+            if _isnotzero(v)
                 ir += 1; rind[ir] = jx; rval[ir] = v
             end
             ix += 1; iy += 1
         elseif jx < jy
             v = f(xnzval[ix], zero(Ty))
-            if v != zero(v)
+            if _isnotzero(v)
                 ir += 1; rind[ir] = jx; rval[ir] = v
             end
             ix += 1
         else
             v = f(zero(Tx), ynzval[iy])
-            if v != zero(v)
+            if _isnotzero(v)
                 ir += 1; rind[ir] = jy; rval[ir] = v
             end
             iy += 1
@@ -1393,14 +1399,14 @@ function _binarymap_mode_2!(f::Function, mx::Int, my::Int,
     end
     @inbounds while ix <= mx
         v = f(xnzval[ix], zero(Ty))
-        if v != zero(v)
+        if _isnotzero(v)
             ir += 1; rind[ir] = xnzind[ix]; rval[ir] = v
         end
         ix += 1
     end
     @inbounds while iy <= my
         v = f(zero(Tx), ynzval[iy])
-        if v != zero(v)
+        if _isnotzero(v)
             ir += 1; rind[ir] = ynzind[iy]; rval[ir] = v
         end
         iy += 1
@@ -1411,24 +1417,36 @@ end
 # definition of a few known broadcasted/mapped binary functions — all others defer to HigherOrderFunctions
 
 _bcast_binary_map(f, x, y, mode) = length(x) == length(y) ? _binarymap(f, x, y, mode) : HigherOrderFns._diffshape_broadcast(f, x, y)
+_getmode(::typeof(+), ::Type, ::Type) = 1
+_getmode(::typeof(-), ::Type, ::Type) = 1
+_getmode(::typeof(*), ::Type, ::Type) = 0
+_getmode(::typeof(*), ::Type{Union{Missing, T}}, ::Type) where {T} = 2
+_getmode(::typeof(*), ::Type, ::Type{Union{Missing, T}}) where {T} = 2
+_getmode(::typeof(*), ::Type{Union{Missing, T}}, ::Type{Union{Missing, S}}) where {T,S} = 2
+_getmode(::typeof(min), ::Type, ::Type) = 2
+_getmode(::typeof(max), ::Type, ::Type) = 2
 for (fun, mode) in [(:+, 1), (:-, 1), (:*, 0), (:min, 2), (:max, 2)]
     fun in (:+, :-) && @eval begin
         # Addition and subtraction can be defined directly on the arrays (without map/broadcast)
         $(fun)(x::AbstractSparseVector, y::AbstractSparseVector) = _binarymap($(fun), x, y, $mode)
     end
     @eval begin
-        map(::typeof($fun), x::AbstractSparseVector, y::AbstractSparseVector) = _binarymap($fun, x, y, $mode)
-        map(::typeof($fun), x::SparseVector, y::SparseVector) = _binarymap($fun, x, y, $mode)
-        broadcast(::typeof($fun), x::AbstractSparseVector, y::AbstractSparseVector) = _bcast_binary_map($fun, x, y, $mode)
-        broadcast(::typeof($fun), x::SparseVector, y::SparseVector) = _bcast_binary_map($fun, x, y, $mode)
+        map(::typeof($fun), x::AbstractSparseVector{Tx}, y::AbstractSparseVector{Ty}) where {Tx, Ty} =
+            _binarymap($fun, x, y, _getmode($fun, Tx, Ty))
+        map(::typeof($fun), x::SparseVector{Tx}, y::SparseVector{Ty}) where {Tx, Ty} =
+            _binarymap($fun, x, y, _getmode($fun, Tx, Ty))
+        broadcast(::typeof($fun), x::AbstractSparseVector{Tx}, y::AbstractSparseVector{Ty}) where {Tx, Ty} =
+            _bcast_binary_map($fun, x, y, _getmode($fun, Tx, Ty))
+        broadcast(::typeof($fun), x::SparseVector{Tx}, y::SparseVector{Ty}) where {Tx, Ty} =
+            _bcast_binary_map($fun, x, y, _getmode($fun, Tx, Ty))
     end
 end
 
 ### Reduction
-Base.reducedim_initarray(A::AbstractSparseVector, region, v0, ::Type{R}) where {R} =
+Base.reducedim_initarray(A::SparseVectorUnion, region, v0, ::Type{R}) where {R} =
     fill!(Array{R}(undef, Base.to_shape(Base.reduced_indices(A, region))), v0)
 
-function Base._mapreduce(f, op, ::IndexCartesian, A::AbstractSparseVector{T}) where {T}
+function Base._mapreduce(f, op, ::IndexCartesian, A::SparseVectorUnion{T}) where {T}
     isempty(A) && return Base.mapreduce_empty(f, op, T)
     z = nnz(A)
     rest, ini = if z == 0
@@ -1439,7 +1457,7 @@ function Base._mapreduce(f, op, ::IndexCartesian, A::AbstractSparseVector{T}) wh
     _mapreducezeros(f, op, T, rest, ini)
 end
 
-function Base.mapreducedim!(f, op, R::AbstractVector, A::AbstractSparseVector)
+function Base.mapreducedim!(f, op, R::AbstractVector, A::SparseVectorUnion)
     # dim1 reduction could be safely replaced with a mapreduce
     if length(R) == 1
         I = firstindex(R)
@@ -1465,7 +1483,7 @@ for (fun, comp, word) in ((:findmin, :(<), "minimum"), (:findmax, :(>), "maximum
         $comp(val, zeroval) && return val, nzinds[index]
         # we need to find the first zero, which could be stored or implicit
         # we try to avoid findfirst(iszero, x)
-        sindex = findfirst(iszero, nzvals) # first stored zero, if any
+        sindex = findfirst(_iszero, nzvals) # first stored zero, if any
         zindex = findfirst(i -> i < nzinds[i], eachindex(nzinds)) # first non-stored zero
         index = if isnothing(sindex)
             # non-stored zero are contiguous and at the end
@@ -1650,7 +1668,7 @@ function mul!(y::AbstractVector, A::_StridedOrTriangularMatrix, x::AbstractSpars
     xnzval = nonzeros(x)
     @inbounds for i = 1:length(xnzind)
         v = xnzval[i]
-        if v != zero(v)
+        if _isnotzero(v)
             j = xnzind[i]
             αv = v * α
             for r = 1:m
@@ -1784,7 +1802,7 @@ function mul!(y::AbstractVector, A::AbstractSparseMatrixCSC, x::AbstractSparseVe
 
     @inbounds for i = 1:length(xnzind)
         v = xnzval[i]
-        if v != zero(v)
+        if _isnotzero(v)
             αv = v * α
             j = xnzind[i]
             for r = Acolptr[j]:(Acolptr[j+1]-1)
@@ -2067,7 +2085,7 @@ Removes stored numerical zeros from `x`.
 For an out-of-place version, see [`dropzeros`](@ref). For
 algorithmic information, see `fkeep!`.
 """
-dropzeros!(x::SparseVector) = fkeep!(x, (i, x) -> !iszero(x))
+dropzeros!(x::SparseVector) = fkeep!(x, (i, x) -> _isnotzero(x))
 
 """
     dropzeros(x::SparseVector)
diff --git a/test/allowscalar.jl b/test/allowscalar.jl
new file mode 100644
index 00000000..fa35f35c
--- /dev/null
+++ b/test/allowscalar.jl
@@ -0,0 +1,24 @@
+using Test, SparseArrays
+
+@testset "allowscalar" begin
+    A = sprandn(10, 20, 0.9)
+    A[1, 1] = 2
+    @test A[1, 1] == 2
+    SparseArrays.allowscalar(false)
+    @test_throws Any A[1, 1]
+    @test_throws Any A[1, 1] = 2
+    SparseArrays.allowscalar(true)
+    @test A[1, 1] == 2
+    A[1, 1] = 3
+    @test A[1, 1] == 3
+
+    B = sprandn(10, 0.9)
+    B[1] = 2
+    @test B[1] == 2
+    SparseArrays.allowscalar(false)
+    @test_throws Any B[1]
+    SparseArrays.allowscalar(true)
+    @test B[1] == 2
+    B[1] = 3
+    @test B[1] == 3
+end
diff --git a/test/ambiguous.jl b/test/ambiguous.jl
index 633557f7..7cea558a 100644
--- a/test/ambiguous.jl
+++ b/test/ambiguous.jl
@@ -1,7 +1,5 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
-
 using Test, LinearAlgebra, SparseArrays
-
 @testset "detect_ambiguities" begin
     @test_nowarn detect_ambiguities(SparseArrays; recursive=true, ambiguous_bottom=false)
 end
diff --git a/test/cholmod.jl b/test/cholmod.jl
index 6614bc74..a123d961 100644
--- a/test/cholmod.jl
+++ b/test/cholmod.jl
@@ -4,7 +4,6 @@ module CHOLMODTests
 
 using Test
 using SparseArrays.CHOLMOD
-using DelimitedFiles
 using Random
 using Serialization
 using LinearAlgebra:
@@ -216,20 +215,29 @@ end
 end
 
 @testset "test Sparse constructor and read_sparse" begin
+    # avoid dependenting on delimited files
+    function writedlm(fn, title="", xs...)
+        open(fn, "w") do file
+            println(file, title)
+            for i in xs
+                println(file, i)
+            end
+        end
+    end
     mktempdir() do temp_dir
         testfile = joinpath(temp_dir, "tmp.mtx")
 
-        writedlm(testfile, ["%%MatrixMarket matrix coordinate real symmetric","3 3 4","1 1 1","2 2 1","3 2 0.5","3 3 1"])
+        writedlm(testfile, "%%MatrixMarket matrix coordinate real symmetric","3 3 4","1 1 1","2 2 1","3 2 0.5","3 3 1")
         @test sparse(CHOLMOD.Sparse(testfile)) == [1 0 0;0 1 0.5;0 0.5 1]
         rm(testfile)
 
-        writedlm(testfile, ["%%MatrixMarket matrix coordinate complex Hermitian",
-                        "3 3 4","1 1 1.0 0.0","2 2 1.0 0.0","3 2 0.5 0.5","3 3 1.0 0.0"])
+        writedlm(testfile, "%%MatrixMarket matrix coordinate complex Hermitian",
+                        "3 3 4","1 1 1.0 0.0","2 2 1.0 0.0","3 2 0.5 0.5","3 3 1.0 0.0")
         @test sparse(CHOLMOD.Sparse(testfile)) == [1 0 0;0 1 0.5-0.5im;0 0.5+0.5im 1]
         rm(testfile)
 
         # this also tests that the error message is correctly retrieved from the library
-        writedlm(testfile, ["%%MatrixMarket matrix coordinate real symmetric","%3 3 4","1 1 1","2 2 1","3 2 0.5","3 3 1"])
+        writedlm(testfile, "%%MatrixMarket matrix coordinate real symmetric","%3 3 4","1 1 1","2 2 1","3 2 0.5","3 3 1")
         @test_throws CHOLMOD.CHOLMODException("indices out of range") sparse(CHOLMOD.Sparse(testfile))
         rm(testfile)
     end
@@ -237,35 +245,35 @@ end
 
 ## The struct pointer must be constructed by the library constructor and then modified afterwards to checks that the method throws
 @testset "illegal dtype (for now but should be supported at some point)" begin
-    p = cholmod_l_allocate_sparse(1, 1, 1, true, true, 0, CHOLMOD_REAL, CHOLMOD.COMMONS[Threads.threadid()])
+    p = cholmod_l_allocate_sparse(1, 1, 1, true, true, 0, CHOLMOD_REAL, CHOLMOD.getcommon())
     puint = convert(Ptr{UInt32}, p)
     unsafe_store!(puint, CHOLMOD_SINGLE, 3*div(sizeof(Csize_t), 4) + 5*div(sizeof(Ptr{Cvoid}), 4) + 4)
     @test_throws CHOLMOD.CHOLMODException CHOLMOD.Sparse(p)
 end
 
 @testset "illegal dtype" begin
-    p = cholmod_l_allocate_sparse(1, 1, 1, true, true, 0, CHOLMOD_REAL, CHOLMOD.COMMONS[Threads.threadid()])
+    p = cholmod_l_allocate_sparse(1, 1, 1, true, true, 0, CHOLMOD_REAL, CHOLMOD.getcommon())
     puint = convert(Ptr{UInt32}, p)
     unsafe_store!(puint, 5, 3*div(sizeof(Csize_t), 4) + 5*div(sizeof(Ptr{Cvoid}), 4) + 4)
     @test_throws CHOLMOD.CHOLMODException CHOLMOD.Sparse(p)
 end
 
 @testset "illegal xtype" begin
-    p = cholmod_l_allocate_sparse(1, 1, 1, true, true, 0, CHOLMOD_REAL, CHOLMOD.COMMONS[Threads.threadid()])
+    p = cholmod_l_allocate_sparse(1, 1, 1, true, true, 0, CHOLMOD_REAL, CHOLMOD.getcommon())
     puint = convert(Ptr{UInt32}, p)
     unsafe_store!(puint, 3, 3*div(sizeof(Csize_t), 4) + 5*div(sizeof(Ptr{Cvoid}), 4) + 3)
     @test_throws CHOLMOD.CHOLMODException CHOLMOD.Sparse(p)
 end
 
 @testset "illegal itype I" begin
-    p = cholmod_l_allocate_sparse(1, 1, 1, true, true, 0, CHOLMOD_REAL, CHOLMOD.COMMONS[Threads.threadid()])
+    p = cholmod_l_allocate_sparse(1, 1, 1, true, true, 0, CHOLMOD_REAL, CHOLMOD.getcommon())
     puint = convert(Ptr{UInt32}, p)
     unsafe_store!(puint, CHOLMOD_INTLONG, 3*div(sizeof(Csize_t), 4) + 5*div(sizeof(Ptr{Cvoid}), 4) + 2)
     @test_throws CHOLMOD.CHOLMODException CHOLMOD.Sparse(p)
 end
 
 @testset "illegal itype II" begin
-    p = cholmod_l_allocate_sparse(1, 1, 1, true, true, 0, CHOLMOD_REAL, CHOLMOD.COMMONS[Threads.threadid()])
+    p = cholmod_l_allocate_sparse(1, 1, 1, true, true, 0, CHOLMOD_REAL, CHOLMOD.getcommon())
     puint = convert(Ptr{UInt32}, p)
     unsafe_store!(puint,  5, 3*div(sizeof(Csize_t), 4) + 5*div(sizeof(Ptr{Cvoid}), 4) + 2)
     @test_throws CHOLMOD.CHOLMODException CHOLMOD.Sparse(p)
@@ -314,7 +322,7 @@ end
 
 # Test Sparse and Factor
 @testset "test free!" begin
-    p = cholmod_l_allocate_sparse(1, 1, 1, true, true, 0, CHOLMOD_REAL, CHOLMOD.COMMONS[Threads.threadid()])
+    p = cholmod_l_allocate_sparse(1, 1, 1, true, true, 0, CHOLMOD_REAL, CHOLMOD.getcommon())
     @test CHOLMOD.free!(p)
 end
 
@@ -904,7 +912,7 @@ end
 
 @testset "Check common is still in default state" begin
     # This test intentially depends on all the above tests!
-    current_common = CHOLMOD.COMMONS[Threads.threadid()]
+    current_common = CHOLMOD.getcommon()
     default_common = Ref(cholmod_common())
     result = cholmod_l_start(default_common)
     @test result == CHOLMOD.TRUE
diff --git a/test/higherorderfns.jl b/test/higherorderfns.jl
index ef49eefa..7e3cac85 100644
--- a/test/higherorderfns.jl
+++ b/test/higherorderfns.jl
@@ -11,21 +11,35 @@ using SparseArrays
 using LinearAlgebra
 using Random
 include("forbidproperties.jl")
-
+function test_map_and_map!(A, alloc_tests)
+    # --> test map entry point
+    fA = Array(A)
+    shapeA = size(A)
+    @test map(sin, A) == sparse(map(sin, fA))
+    @test map(cos, A) == sparse(map(cos, fA))
+    # --> test map! entry point
+    fX = copy(fA); X = similar(A)
+    map!(sin, X, A); X = similar(A) # warmup for @allocated
+    map!(sin, X, A); X = similar(A) # warmup for @allocated
+    if @allocated(map!(sin, X, A)) != 0 && alloc_tests
+        println(stderr, "A in test_map_and_map! is ", A)
+    end
+    X = similar(A)
+    @test @allocated(map!(sin, X, A)) == 0 || !alloc_tests
+    @test map!(sin, X, A) == sparse(map!(sin, fX, fA))
+    @test map!(cos, X, A) == sparse(map!(cos, fX, fA))
+    @test_throws DimensionMismatch map!(sin, X, spzeros((shapeA .- 1)...))
+end
 @testset "map[!] implementation specialized for a single (input) sparse vector/matrix" begin
     N, M = 10, 12
     for shapeA in ((N,), (N, M))
-        A = sprand(shapeA..., 0.4); fA = Array(A)
-        # --> test map entry point
-        @test map(sin, A) == sparse(map(sin, fA))
-        @test map(cos, A) == sparse(map(cos, fA))
-        # --> test map! entry point
-        fX = copy(fA); X = sparse(fX)
-        map!(sin, X, A); X = sparse(fX) # warmup for @allocated
-        @test (@allocated map!(sin, X, A)) == 0
-        @test map!(sin, X, A) == sparse(map!(sin, fX, fA))
-        @test map!(cos, X, A) == sparse(map!(cos, fX, fA))
-        @test_throws DimensionMismatch map!(sin, X, spzeros((shapeA .- 1)...))
+        A = sprand(shapeA..., 0.4)
+        nonzeros(A)[sin.(nonzeros(A)) .== 0] .= .0
+        nonzeros(A)[cos.(nonzeros(A)) .== 0] .= .0
+        A = dropzeros(A)
+        test_map_and_map!(A, false)
+        test_map_and_map!(A, false)
+        test_map_and_map!(A, true)
     end
     # https://github.com/JuliaLang/julia/issues/37819
     Z = spzeros(Float64, Int32, 50000, 50000)
diff --git a/test/issues.jl b/test/issues.jl
index 6ae81092..6a131d41 100644
--- a/test/issues.jl
+++ b/test/issues.jl
@@ -9,6 +9,28 @@ using InteractiveUtils: @which
 include("forbidproperties.jl")
 include("simplesmatrix.jl")
 
+
+@testset "Issue #15" begin
+    s = sparse([1, 2], [1, 2], [10, missing])
+    d = Matrix(s)
+    
+    s2 = sparse(d)
+
+    @test s2[1, 1] == 10
+    @test s2[2, 1] == 0
+    @test s2[1, 2] == 0
+    @test s2[2, 2] === missing
+    @test typeof(s2) == typeof(s)
+
+    x = spzeros(3)
+    y = similar(x, Union{Int, Missing})
+    y[1] = missing
+    y[2] = 10
+    @test y[1] === missing
+    @test y[2] == 10
+    @test y[3] == 0
+end
+
 @testset "Issue #33169" begin
     m21 = sparse([1, 2], [2, 2], SimpleSMatrix{2,1}.([rand(2, 1), rand(2, 1)]), 2, 2)
     m12 = sparse([1, 2], [2, 2], SimpleSMatrix{1,2}.([rand(1, 2), rand(1, 2)]), 2, 2)
@@ -719,6 +741,18 @@ f12063(args...) = 2
 g12063() = f12063(0, 0, 0, 0, 0, 0, 0.0, spzeros(0,0), Int[])
 @test g12063() == 1
 
+@testset "Issue #210" begin
+    io = IOBuffer()
+    show(io, sparse([1 2; 3 4]))
+    @test String(take!(io)) == "sparse([1, 2, 1, 2], [1, 1, 2, 2], [1, 3, 2, 4], 2, 2)"
+    io = IOBuffer()
+    show(io, sparse([1 2; 3 4])')
+    @test String(take!(io)) == "adjoint(sparse([1, 2, 1, 2], [1, 1, 2, 2], [1, 3, 2, 4], 2, 2))"
+    io = IOBuffer()
+    show(io, transpose(sparse([1 2; 3 4])))
+    @test String(take!(io)) == "transpose(sparse([1, 2, 1, 2], [1, 1, 2, 2], [1, 3, 2, 4], 2, 2))"
+end
+
 end
 
 end # module
diff --git a/test/runtests.jl b/test/runtests.jl
index d6c15a72..bc847de5 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -1,19 +1,26 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
-
+using Test, LinearAlgebra, SparseArrays
 for file in readlines(joinpath(@__DIR__, "testgroups"))
+    file == "" && continue # skip empty lines
     include(file * ".jl")
 end
 
 if Base.USE_GPL_LIBS
 
+    nt = @static if isdefined(Threads, :maxthreadid)
+        Threads.maxthreadid()
+    else
+        Threads.nthreads()
+    end
+
     # Test multithreaded execution
     @testset "threaded SuiteSparse tests" verbose = true begin
-        @testset "threads = $(Threads.nthreads())" begin
+        @testset "threads = $nt" begin
             include("threads.jl")
         end
         # test both nthreads==1 and nthreads>1. spawn a process to test whichever
         # case we are not running currently.
-        other_nthreads = Threads.nthreads() == 1 ? 4 : 1
+        other_nthreads = nt == 1 ? 4 : 1
         @testset "threads = $other_nthreads" begin
             let p, cmd = `$(Base.julia_cmd()) --depwarn=error --startup-file=no threads.jl`
                 p = run(
diff --git a/test/sparsematrix_ops.jl b/test/sparsematrix_ops.jl
index 6e8344d3..08ba6e90 100644
--- a/test/sparsematrix_ops.jl
+++ b/test/sparsematrix_ops.jl
@@ -4,7 +4,7 @@ module SparseTests
 
 using Test
 using SparseArrays
-using SparseArrays: getcolptr, nonzeroinds, _show_with_braille_patterns
+using SparseArrays: getcolptr, nonzeroinds, _show_with_braille_patterns, _isnotzero
 using LinearAlgebra
 using Printf: @printf # for debug
 using Random
@@ -14,6 +14,16 @@ using Dates
 include("forbidproperties.jl")
 include("simplesmatrix.jl")
 
+
+@testset "_isnotzero" begin
+    @test !_isnotzero(0::Int)
+    @test _isnotzero(1::Int)
+    @test _isnotzero(missing)
+    @test !_isnotzero(0.0)
+    @test _isnotzero(1.0)
+end
+
+
 @testset "issparse" begin
     @test issparse(sparse(fill(1,5,5)))
     @test !issparse(fill(1,5,5))
@@ -90,6 +100,34 @@ do33 = fill(1.,3)
             end
         end
     end
+    @testset "binary operations on sparse matrices with union eltype" begin
+        A = sparse([1,2,1], [1,1,2], Union{Int, Missing}[1, missing, 0])
+        for fun in (+, -, *, min, max)
+            if fun in (+, -)
+                @test collect(skipmissing(Array(fun(A, A)))) == collect(skipmissing(Array(fun(Array(A), Array(A)))))
+            end
+            @test collect(skipmissing(Array(map(fun, A, A)))) == collect(skipmissing(map(fun, Array(A), Array(A))))
+            @test collect(skipmissing(Array(broadcast(fun, A, A)))) == collect(skipmissing(broadcast(fun, Array(A), Array(A))))
+        end
+        b = convert(SparseMatrixCSC{Union{Float64, Missing}}, sprandn(Float64, 20, 10, 0.2)); b[rand(1:200, 3)] .= missing
+        C = convert(SparseMatrixCSC{Union{Float64, Missing}}, sprandn(Float64, 20, 10, 0.9)); C[rand(1:200, 3)] .= missing
+        CA = Array(C)
+        D = convert(SparseMatrixCSC{Union{Float64, Missing}}, spzeros(Float64, 20, 10)); D[rand(1:200, 3)] .= missing
+        E = convert(SparseMatrixCSC{Union{Float64, Missing}}, spzeros(Float64, 20, 10))
+        for B in (b, C, D, E), fun in (+, -, *, min, max)
+            BA = Array(B)
+            # reverse order for opposite nonzeroinds-structure
+            if fun in (+, -)
+                @test collect(skipmissing(Array(fun(B, C)))) == collect(skipmissing(Array(fun(BA, CA))))
+                @test collect(skipmissing(Array(fun(C, B)))) == collect(skipmissing(Array(fun(CA, BA))))
+            end
+            @test collect(skipmissing(Array(map(fun, B, C)))) == collect(skipmissing(map(fun, BA, CA)))
+            @test collect(skipmissing(Array(map(fun, C, B)))) == collect(skipmissing(map(fun, CA, BA)))
+            @test collect(skipmissing(Array(broadcast(fun, B, C)))) == collect(skipmissing(broadcast(fun, BA, CA)))
+            @test collect(skipmissing(Array(broadcast(fun, C, B)))) == collect(skipmissing(broadcast(fun, CA, BA)))
+        end
+    end
+    
 end
 
 let
diff --git a/test/sparsevector.jl b/test/sparsevector.jl
index 6c01c5ea..63f14e71 100644
--- a/test/sparsevector.jl
+++ b/test/sparsevector.jl
@@ -33,9 +33,12 @@ x1_full[SparseArrays.nonzeroinds(spv_x1)] = nonzeros(spv_x1)
     @test SparseArrays.nonzeroinds(x) == [2, 5, 6]
     @test nonzeros(x) == [1.25, -0.75, 3.5]
     @test count(SparseVector(8, [2, 5, 6], [true,false,true])) == 2
-    y = SparseVector(typemax(Int128), Int128[4], [5])
+    y = SparseVector(8, Int128[4], [5])
     @test y isa SparseVector{Int,Int128}
-    @test @inferred size(y) == (@inferred(length(y)),)
+    @test @inferred size(y) == (@inferred(length(y))::Int128,)
+    y = SparseVector(8, Int8[4], [5.0])
+    @test y isa SparseVector{Float64,Int8}
+    @test @inferred size(y) == (@inferred(length(y))::Int8,)
 end
 
 @testset "isstored" begin
@@ -337,6 +340,10 @@ end
         @test findall(!iszero, xc) == findall(!iszero, fc)
         @test findnz(xc) == ([2, 3, 5], [1.25, 0, -0.75])
     end
+    let Xc = spdiagm(spv_x1)
+        @test all(isempty, findnz(@view Xc[:,1]))
+        @test findnz(@view Xc[:,2]) == ([2], [1.25])
+    end
 end
 
 @testset "iteratenz (vector)" begin
@@ -555,6 +562,8 @@ end
             densevec = fill(1., N)
             densemat = fill(1., N, 1)
             diagmat = Diagonal(densevec)
+            # inferrability (https://github.com/JuliaSparse/SparseArrays.jl/pull/92)
+            cat_with_constdims(args...) = cat(args...; dims=(1,2))
             # Test that concatenations of pairwise combinations of sparse vectors with dense
             # vectors/matrices, sparse matrices, or special matrices yield sparse arrays
             for othervecormat in (densevec, densemat, spmat)
@@ -569,8 +578,6 @@ end
                 @test issparse(cat(spvec, othervecormat; dims=(1,2)))
                 @test issparse(cat(othervecormat, spvec; dims=(1,2)))
 
-                # inferrability (https://github.com/JuliaSparse/SparseArrays.jl/pull/92)
-                cat_with_constdims(args...) = cat(args...; dims=(1,2))
                 @test issparse(@inferred cat_with_constdims(spvec, othervecormat))
                 @test issparse(@inferred cat_with_constdims(othervecormat, spvec))
             end
@@ -585,8 +592,6 @@ end
             @test issparse(cat(densemat, diagmat, spmat, densevec, spvec; dims=(1,2)))
             @test issparse(cat(spvec, diagmat, densevec, spmat, densemat; dims=(1,2)))
 
-            # inferrability (https://github.com/JuliaSparse/SparseArrays.jl/pull/92)
-            cat_with_constdims(args...) = cat(args...; dims=(1,2))
             @test issparse(@inferred cat_with_constdims(densemat, diagmat, spmat, densevec, spvec))
             @test issparse(@inferred cat_with_constdims(spvec, diagmat, densevec, spmat, densemat))
         end
@@ -1445,6 +1450,34 @@ end
     @test typeof(spzeros(Float32, Int16, 3)) == SparseVector{Float32,Int16}
 end
 
+@testset "binary operations on sparse vectors with union eltype" begin
+    A = SparseVector(2, [1,2], Union{Int, Missing}[1, missing])
+    for fun in (+, -, *, min, max)
+        if fun in (+, -)
+            @test collect(skipmissing(Array(fun(A, A)))) == collect(skipmissing(Array(fun(Array(A), Array(A)))))
+        end
+        @test collect(skipmissing(Array(map(fun, A, A)))) == collect(skipmissing(map(fun, Array(A), Array(A))))
+        @test collect(skipmissing(Array(broadcast(fun, A, A)))) == collect(skipmissing(broadcast(fun, Array(A), Array(A))))
+    end
+    b = convert(SparseVector{Union{Float64, Missing}}, sprandn(Float64, 10, 0.2)); b[rand(1:10, 3)] .= missing
+    C = convert(SparseVector{Union{Float64, Missing}}, sprandn(Float64, 10, 0.9)); C[rand(1:10, 3)] .= missing
+    CA = Array(C)
+    D = convert(SparseVector{Union{Float64, Missing}}, spzeros(Float64, 10)); D[rand(1:10, 3)] .= missing
+    E = convert(SparseVector{Union{Float64, Missing}}, spzeros(Float64, 10))
+    for B in (b, C, D, E), fun in (+, -, *, min, max)
+        BA = Array(B)
+        # reverse order for opposite nonzeroinds-structure
+        if fun in (+, -)
+            @test collect(skipmissing(Array(fun(B, C)))) == collect(skipmissing(Array(fun(BA, CA))))
+            @test collect(skipmissing(Array(fun(C, B)))) == collect(skipmissing(Array(fun(CA, BA))))
+        end
+        @test collect(skipmissing(Array(map(fun, B, C)))) == collect(skipmissing(map(fun, BA, CA)))
+        @test collect(skipmissing(Array(map(fun, C, B)))) == collect(skipmissing(map(fun, CA, BA)))
+        @test collect(skipmissing(Array(broadcast(fun, B, C)))) == collect(skipmissing(broadcast(fun, BA, CA)))
+        @test collect(skipmissing(Array(broadcast(fun, C, B)))) == collect(skipmissing(broadcast(fun, CA, BA)))
+    end
+end
+
 @testset "corner cases of broadcast arithmetic operations with scalars (#21515)" begin
     # test both scalar literals and variables
     areequal(a, b, c) = isequal(a, b) && isequal(b, c)
diff --git a/test/spqr.jl b/test/spqr.jl
index 95e8a52e..a65b16cb 100644
--- a/test/spqr.jl
+++ b/test/spqr.jl
@@ -90,12 +90,14 @@ end
 @testset "Issue 26367" begin
     A = sparse([0.0 1 0 0; 0 0 0 0])
     @test Matrix(qr(A).Q) == Matrix(qr(Matrix(A)).Q) == Matrix(I, 2, 2)
+    @test sparse(qr(A).Q) == sparse(qr(Matrix(A)).Q) == Matrix(I, 2, 2)
+    @test (sparse(I, 2, 2) * qr(A).Q)::SparseMatrixCSC == sparse(qr(A).Q) == sparse(I, 2, 2)
 end
 
 @testset "Issue 26368" begin
     A = sparse([0.0 1 0 0; 0 0 0 0])
     F = qr(A)
-    @test F.Q*F.R == A[F.prow,F.pcol]
+    @test (F.Q*F.R)::SparseMatrixCSC == A[F.prow,F.pcol]
 end
 
 @testset "select ordering overdetermined" begin
@@ -131,12 +133,25 @@ end
 
 @testset "rank" begin
     S = sprandn(10, 5, 1.0)*sprandn(5, 10, 1.0)
-    @test rank(qr(S)) == 5
-    @test rank(S) == 5
+    @test rank(qr(S; tol=1e-5)) == 5
+    @test rank(S; tol=1e-5) == 5
     @test all(iszero, (rank(qr(spzeros(10, i))) for i in 1:10))
     @test all(iszero, (rank(spzeros(10, i)) for i in 1:10))
 end
 
+
+@testset "sparse" begin
+    A = I + sprandn(100, 100, 0.01)
+    q = qr(A; ordering=SPQR.ORDERING_FIXED)
+    Q = q.Q
+    sQ = sparse(Q)
+    @test sQ == sparse(Matrix(Q))
+    Dq = qr(Matrix(A))
+    perm = inv(Matrix(I, size(A)...)[q.prow, :])
+    f = sum(q.R; dims=2) ./ sum(Dq.R; dims=2)
+    @test perm * (transpose(f) .* sQ) ≈ sparse(Dq.Q)
+end
+
 end
 
 end # Base.USE_GPL_LIBS
diff --git a/test/testgroups b/test/testgroups
index e8e8b8a0..e61a1591 100644
--- a/test/testgroups
+++ b/test/testgroups
@@ -1,3 +1,4 @@
+allowscalar
 ambiguous
 higherorderfns
 sparsematrix_ops
diff --git a/test/umfpack.jl b/test/umfpack.jl
index a0b4c91c..0c343ee8 100644
--- a/test/umfpack.jl
+++ b/test/umfpack.jl
@@ -9,8 +9,12 @@ using Serialization
 using LinearAlgebra:
     I, det, issuccess, ldiv!, lu, lu!, Adjoint, Transpose, SingularException, Diagonal, logabsdet
 using SparseArrays: nnz, sparse, sprand, sprandn, SparseMatrixCSC, UMFPACK, increment!
-
 if Base.USE_GPL_LIBS
+function umfpack_report(l::UMFPACK.UmfpackLU)
+    UMFPACK.umfpack_report_numeric(l, 0)
+    UMFPACK.umfpack_report_symbolic(l, 0)
+    return
+end
 
 for itype in UMFPACK.UmfpackIndexTypes
     sol_r = Symbol(UMFPACK.umf_nm("solve", :Float64, itype))
@@ -26,8 +30,8 @@ for itype in UMFPACK.UmfpackIndexTypes
             UMFPACK.umfpack_numeric!(lu)
             (size(b,1) == lu.m) && (size(b) == size(x)) || throw(DimensionMismatch())
             UMFPACK.@isok UMFPACK.$sol_r(typ, lu.colptr, lu.rowval, lu.nzval,
-                        x, b, lu.numeric, UMFPACK.umf_ctrl,
-                        UMFPACK.umf_info)
+                        x, b, lu.numeric, lu.control,
+                        lu.info)
             return x
         end
         function alloc_solve!(x::StridedVector{ComplexF64}, lu::UMFPACK.UmfpackLU{ComplexF64,$itype}, b::StridedVector{ComplexF64}, typ::Integer)
@@ -40,7 +44,7 @@ for itype in UMFPACK.UmfpackIndexTypes
             UMFPACK.umfpack_numeric!(lu)
             (size(b, 1) == lu.m) && (size(b) == size(x)) || throw(DimensionMismatch())
             UMFPACK.@isok UMFPACK.$sol_c(typ, lu.colptr, lu.rowval, lu.nzval, C_NULL, x, C_NULL, b,
-                        C_NULL, lu.numeric, UMFPACK.umf_ctrl, UMFPACK.umf_info)
+                        C_NULL, lu.numeric, lu.control, lu.info)
             return x
         end
     end
@@ -54,6 +58,7 @@ end
         for Ti in Base.uniontypes(UMFPACK.UMFITypes)
             A = convert(SparseMatrixCSC{Tv,Ti}, A0)
             Af = lu(A)
+            umfpack_report(Af)
             b = convert(Vector{Tv}, b0)
             x = alloc_solve!(
                 similar(b),
@@ -69,11 +74,13 @@ end
                     UMFPACK.UMFPACK_A)
             end
             @test A \ bn == xn
+            umfpack_report(Af)
         end
     end
-    f = function(Tv, Ti)
+    function f(Tv, Ti)
         A = convert(SparseMatrixCSC{Tv,Ti}, A0)
         Af = lu(A)
+        umfpack_report(Af)
         b = convert(Vector{Tv}, b0)
         x = similar(b)
         ldiv!(x, Af, b)
@@ -82,16 +89,20 @@ end
         xn = similar(bn)
         ldiv!(xn, Af, bn)
         aloc2 = @allocated ldiv!(xn, Af, bn)
+        umfpack_report(Af)
         return aloc1 + aloc2
     end
     @testset "Allocations" begin
         for Tv in Base.uniontypes(UMFPACK.UMFVTypes),
             Ti in Base.uniontypes(UMFPACK.UMFITypes)
+            f(Tv, Ti)
+            f(Tv, Ti)
             @test f(Tv, Ti) == 0
         end
     end
     @testset "Thread safety" begin
         Af = lu(A0)
+        umfpack_report(Af)
         x = similar(b0)
         ldiv!(x, Af, b0)
         n = 30
@@ -102,24 +113,25 @@ end
         for i in acc
             @test i == x
         end
-
-        Af1 = UMFPACK.duplicate(Af)
+        umfpack_report(Af)
+        Af1 = copy(Af)
+        umfpack_report(Af1)
         @test trylock(Af)
         @test trylock(Af1)
-
     end
+
     @testset "test similar" begin
         Af = lu(A0)
+        umfpack_report(Af)
         sim = similar(Af.workspace)
         for f in [typeof, length],
             p in [:Wi, :W]
             @test f(getproperty(sim, p)) == f(getproperty(Af.workspace, p))
             @test getproperty(sim, p) !== getproperty(Af.workspace, p)
         end
+        umfpack_report(Af)
     end
-    @testset "test duplicate" begin
-        Af = lu(A0)
-        Af1 = UMFPACK.duplicate(Af)
+    function test_ws_dup(Af, Af1)
         for i in [:symbolic, :numeric, :colptr, :rowval, :nzval]
             @test getproperty(Af, i) === getproperty(Af1, i)
         end
@@ -130,6 +142,14 @@ end
             @test getproperty(Af, i) !== getproperty(Af1, i)
         end
     end
+    @testset "test copy(UmfpackLU)" begin
+        Af = lu(A0)
+        umfpack_report(Af)
+        test_ws_dup(Af, copy(Af))
+        test_ws_dup(Af, copy(transpose(Af)).parent)
+        test_ws_dup(Af, copy(adjoint(Af)).parent)
+        umfpack_report(Af)
+    end
 end
 
 @testset "UMFPACK wrappers" begin
@@ -148,6 +168,7 @@ end
         for Ti in Base.uniontypes(UMFPACK.UMFITypes)
             A = convert(SparseMatrixCSC{Tv,Ti}, A0)
             lua = lu(A)
+            umfpack_report(lua)
             @test nnz(lua) == 18
             @test_throws ErrorException lua.Z
             L,U,p,q,Rs = lua.:(:)
@@ -215,6 +236,7 @@ end
 
             # Element promotion and type inference
             @inferred lua\fill(1, size(A, 2))
+            umfpack_report(lua)
         end
     end
 
@@ -224,6 +246,7 @@ end
             Ac = convert(SparseMatrixCSC{ComplexF64,Ti}, Ac0)
             x  = fill(1.0 + im, size(Ac,1))
             lua = lu(Ac)
+            umfpack_report(lua)
             L,U,p,q,Rs = lua.:(:)
             @test (Diagonal(Rs) * Ac)[p,q] ≈ L * U
             b  = Ac*x
@@ -232,6 +255,7 @@ end
             @test Ac'\b ≈ x
             b  = transpose(Ac)*x
             @test transpose(Ac)\b ≈ x
+            umfpack_report(lua)
         end
     end
 
@@ -242,8 +266,10 @@ end
         Random.seed!(30072018)
         A = sparse([1:min(m,n); rand(1:m, 10)], [1:min(m,n); rand(1:n, 10)], elty == Float64 ? randn(min(m, n) + 10) : complex.(randn(min(m, n) + 10), randn(min(m, n) + 10)))
         F = lu(A)
+        umfpack_report(F)
         L, U, p, q, Rs = F.:(:)
         @test (Diagonal(Rs) * A)[p,q] ≈ L * U
+        umfpack_report(F)
     end
 
     @testset "Issue #4523 - complex sparse \\" begin
@@ -264,17 +290,19 @@ end
             (Float32, Float64),
             (Float64, Float64),
             (Int, Float64),
+            (ComplexF16, ComplexF64),
+            (Float16, Float64),
         ]
-        # Remove 16-bit eltypes on Windows due to julia #45736 issue
-        !Sys.iswindows() && push!(testtypes, (ComplexF16, ComplexF64), (Float16, Float64))
 
         for (Tin, Tout) in testtypes
             F = lu(sparse(fill(Tin(1), 1, 1)))
+            umfpack_report(F)
             L = sparse(fill(Tout(1), 1, 1))
             @test F.p == F.q == [1]
             @test F.Rs == [1.0]
             @test F.L == F.U == L
             @test F.:(:) == (L, L, [1], [1], [1.0])
+            umfpack_report(F)
         end
     end
 
@@ -285,11 +313,13 @@ end
     @testset "size(::UmfpackLU)" begin
         m = n = 1
         F = lu(sparse(fill(1., m, n)))
+        umfpack_report(F)
         @test size(F) == (m, n)
         @test size(F, 1) == m
         @test size(F, 2) == n
         @test size(F, 3) == 1
         @test_throws ArgumentError size(F,-1)
+        umfpack_report(F)
     end
 
     @testset "Test aliasing" begin
@@ -303,6 +333,7 @@ end
         A = sparse(1.0I, 4, 4)
         A[1:2,1:2] = [-.01 -200; 200 .001]
         F = lu(A)
+        umfpack_report(F)
         @test F.p == [3 ; 4 ; 2 ; 1]
     end
 
@@ -313,9 +344,11 @@ end
         X = zeros(ComplexF64, N, N)
         B = complex.(rand(N, N), rand(N, N))
         luA, lufA = lu(A), lu(Array(A))
+        umfpack_report(luA)
         @test ldiv!(copy(X), luA, B) ≈ ldiv!(copy(X), lufA, B)
         @test ldiv!(copy(X), adjoint(luA), B) ≈ ldiv!(copy(X), adjoint(lufA), B)
         @test ldiv!(copy(X), transpose(luA), B) ≈ ldiv!(copy(X), transpose(lufA), B)
+        umfpack_report(luA)
     end
 
     @testset "singular matrix" begin
@@ -328,6 +361,7 @@ end
     @testset "deserialization" begin
         A  = 10*I + sprandn(10, 10, 0.4)
         F1 = lu(A)
+        umfpack_report(F1)
         b  = IOBuffer()
         serialize(b, F1)
         seekstart(b)
@@ -335,35 +369,56 @@ end
         for nm in (:colptr, :m, :n, :nzval, :rowval, :status)
             @test getfield(F1, nm) == getfield(F2, nm)
         end
+        for nm in (:W, :Wi)
+            @test getfield(F1.workspace, nm) == getfield(F2.workspace, nm)
+        end
+        b1 = IOBuffer()
+        serialize(b1, (a=F1, b=F2))
+        seekstart(b1)
+        x = deserialize(b1)
+        for nm in (:colptr, :m, :n, :nzval, :rowval, :status)
+            @test getfield(F1, nm) == getfield(x.a, nm) == getfield(x.b, nm)
+        end
+        for nm in (:W, :Wi)
+            @test getfield(x.a.workspace, nm) == getfield(F1.workspace, nm)
+            @test getfield(x.b.workspace, nm) == getfield(F2.workspace, nm)
+        end
+
+        umfpack_report(F1)
+        umfpack_report(F2)
+        umfpack_report(x.a)
+        umfpack_report(x.b)
     end
 
     @testset "Reuse symbolic LU factorization" begin
         A1 = sparse(increment!([0,4,1,1,2,2,0,1,2,3,4,4]),
                     increment!([0,4,0,2,1,2,1,4,3,2,1,2]),
                     [2.,1.,3.,4.,-1.,-3.,3.,9.,2.,1.,4.,2.], 5, 5)
-        # Remove 16-bit eltypes on Windows due to julia #45736 issue
-        testtypes = [Float64, ComplexF64, Float32, ComplexF32]
-        !Sys.iswindows() && push!(testtypes, Float16, ComplexF16)
+        testtypes = [Float64, ComplexF64, Float32, ComplexF32, Float16, ComplexF16]
         for Tv in testtypes
             for Ti in Base.uniontypes(UMFPACK.UMFITypes)
                 A = convert(SparseMatrixCSC{Tv,Ti}, A0)
                 B = convert(SparseMatrixCSC{Tv,Ti}, A1)
                 b = Tv[8., 45., -3., 3., 19.]
                 F = lu(A)
+                umfpack_report(F)
                 lu!(F, B)
+                umfpack_report(F)
                 @test F\b ≈ B\b ≈ Matrix(B)\b
 
                 # singular matrix
                 C = copy(B)
                 C[4, 3] = Tv(0)
                 F = lu(A)
+                umfpack_report(F)
                 @test_throws SingularException lu!(F, C)
-
                 # change of nonzero pattern
                 D = copy(B)
                 D[5, 1] = Tv(1.0)
                 F = lu(A)
+                umfpack_report(F)
                 @test_throws ArgumentError lu!(F, D)
+                umfpack_report(F)
             end
         end
     end
@@ -382,8 +437,57 @@ end
     # singular matrix
     B = sparse(zeros(Float64, 2, 2))
     F = lu(B; check=false)
+    umfpack_report(F)
     facstring = sprint((t, s) -> show(t, "text/plain", s), F)
     @test facstring == "Failed factorization of type $(summary(F))"
+    umfpack_report(F)
+end
+
+
+@testset "UMFPACK's lu with custom permutation" begin
+    A = sparse([1.0 0.0 0.9778920565882165 0.0 0.0 0.0 0.0 0.0 0.0 0.0;
+    0.0 1.0 0.0 0.0 0.0 1.847311282254734 0.0 0.0 0.0 0.0;
+    0.0 0.0 1.0 0.0 0.0 0.04863647201402087 0.0 0.0 0.0 -1.1593207405039443;
+    0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.5145863988424498;
+    0.0421803353935357 0.0 -1.2818900361848549 0.0 1.0 0.0 0.1116124255865398 0.0 0.0 0.0;
+    0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.5457237331767308;
+    -0.4983003278517826 -0.9974658316950679 1.0734689365455168 -1.0511956770913033 0.0 -0.37409855916460416 1.999357231970987 0.0 0.0 -0.9620788056415616;
+    -1.5784683379261246 0.0 0.0 0.0 -0.4147349268116999 0.0 0.8539293641597945 1.0 0.0 0.0;
+    0.0 0.0 0.0 0.0 -0.039051958043171624 0.0 0.0 -0.3814599389272203 1.0 0.0;
+    0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0])
+    q1 = [9, 8, 5, 1, 7, 2, 3, 4, 6, 10]
+    q0 = q1 .- 1
+    for i in 1:10
+        b = randn(10)
+        x = lu(A) \ b
+        x0 = lu(A; q=q0) \ b
+        x1 = lu(A; q=q1) \ b
+        @test x ≈ x0
+        @test x ≈ x1
+    end
+end
+
+@testset "changing refinement should resize workspace" begin
+    A = lu(sprandn(100, 100, 0.1) + I)
+    umfpack_report(A)
+    b = randn(100)
+    @test length(A.workspace.Wi) == 100
+    @test length(A.workspace.W) == 100
+    x = A \ b
+    A.control[UMFPACK.JL_UMFPACK_IRSTEP] = 2
+    y = A \ b
+    @test x ≈ y
+    @test length(A.workspace.Wi) == 100
+    @test length(A.workspace.W) == 500
+    umfpack_report(A)
+end
+
+
+for Ti in Base.uniontypes(UMFPACK.UMFITypes)
+    A = I + sprandn(100, 100, 0.01)
+    Af = lu(A)
+    UMFPACK.umfpack_report_numeric(Af, 0)
+    UMFPACK.umfpack_report_symbolic(Af, 0)
 end
 
 end # Base.USE_GPL_LIBS