From b77ca44c7ef4f93633a73c2694297b6f83f6ed20 Mon Sep 17 00:00:00 2001 From: chriselrod Date: Wed, 9 Aug 2023 11:07:15 -0400 Subject: [PATCH 1/5] @turbo findmax, and smaller nsplit --- src/lu.jl | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/lu.jl b/src/lu.jl index f75ee0e..117315b 100644 --- a/src/lu.jl +++ b/src/lu.jl @@ -144,7 +144,8 @@ end end @inline function nsplit(::Type{T}, n) where {T} - k = max(2, 512 ÷ (isbitstype(T) ? sizeof(T) : 8)) + # k = max(2, 512 ÷ (isbitstype(T) ? sizeof(T) : 8)) + k = max(2, 128 ÷ (isbitstype(T) ? sizeof(T) : 8)) k_2 = k ÷ 2 return n >= k ? ((n + k_2) ÷ k) * k_2 : n ÷ 2 end @@ -277,12 +278,11 @@ function _generic_lufact!(A, ::Val{Pivot}, ipiv, info) where {Pivot} kp = k if Pivot amax = abs(zero(eltype(A))) - for i in k:m + @turbo for i in k:m absi = abs(A[i, k]) - if absi > amax - kp = i - amax = absi - end + isnewmax = absi > amax + kp = isnewmax ? i : kp + amax = isnewmax ? absi : amax end ipiv[k] = kp end From f48ada5da13cc1b5ae167be6531010b4c700854a Mon Sep 17 00:00:00 2001 From: chriselrod Date: Wed, 9 Aug 2023 11:07:55 -0400 Subject: [PATCH 2/5] Bump version --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 06b0306..82b8cd2 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "RecursiveFactorization" uuid = "f2c3362d-daeb-58d1-803e-2bc74f2840b4" authors = ["Yingbo Ma "] -version = "0.2.19" +version = "0.2.20" [deps] LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" From 5ead5daa552e9471fe7286b26a6c11ca2c90e353 Mon Sep 17 00:00:00 2001 From: Chris Elrod Date: Wed, 9 Aug 2023 15:13:46 -0400 Subject: [PATCH 3/5] `warn_check_args=false ` --- src/lu.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lu.jl b/src/lu.jl index 117315b..7862ba4 100644 --- a/src/lu.jl +++ b/src/lu.jl @@ -289,7 +289,7 @@ function _generic_lufact!(A, ::Val{Pivot}, ipiv, info) where {Pivot} if !iszero(A[kp, k]) if k != kp # Interchange - @simd for i in 1:n + @simd warn_check_args=false for i in 1:n tmp = A[k, i] A[k, i] = A[kp, i] A[kp, i] = tmp From fc7e7de1540671d40ccae8ad4eb0decd97525dc2 Mon Sep 17 00:00:00 2001 From: Chris Elrod Date: Wed, 9 Aug 2023 15:32:31 -0400 Subject: [PATCH 4/5] `warn_check_args=false` at the correct place --- src/lu.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/lu.jl b/src/lu.jl index 7862ba4..d630f6b 100644 --- a/src/lu.jl +++ b/src/lu.jl @@ -278,7 +278,7 @@ function _generic_lufact!(A, ::Val{Pivot}, ipiv, info) where {Pivot} kp = k if Pivot amax = abs(zero(eltype(A))) - @turbo for i in k:m + @turbo warn_check_args=false for i in k:m absi = abs(A[i, k]) isnewmax = absi > amax kp = isnewmax ? i : kp @@ -289,7 +289,7 @@ function _generic_lufact!(A, ::Val{Pivot}, ipiv, info) where {Pivot} if !iszero(A[kp, k]) if k != kp # Interchange - @simd warn_check_args=false for i in 1:n + @simd for i in 1:n tmp = A[k, i] A[k, i] = A[kp, i] A[kp, i] = tmp From 753c55842d316ce4052e22e45e0eacb90293fa31 Mon Sep 17 00:00:00 2001 From: Chris Elrod Date: Wed, 9 Aug 2023 17:27:37 -0400 Subject: [PATCH 5/5] Remove commented out additional line --- src/lu.jl | 1 - 1 file changed, 1 deletion(-) diff --git a/src/lu.jl b/src/lu.jl index d630f6b..8fd3442 100644 --- a/src/lu.jl +++ b/src/lu.jl @@ -144,7 +144,6 @@ end end @inline function nsplit(::Type{T}, n) where {T} - # k = max(2, 512 ÷ (isbitstype(T) ? sizeof(T) : 8)) k = max(2, 128 ÷ (isbitstype(T) ? sizeof(T) : 8)) k_2 = k ÷ 2 return n >= k ? ((n + k_2) ÷ k) * k_2 : n ÷ 2