From 66e5d5a9cdee1ac5ed5146d21c0d67c2aefed357 Mon Sep 17 00:00:00 2001
From: Karl Pierce <kmp5@vt.edu>
Date: Fri, 22 Dec 2023 16:00:41 -0500
Subject: [PATCH 01/12] Pseudoinverse fails if not square

---
 btas/generic/linear_algebra.h | 1 +
 1 file changed, 1 insertion(+)
diff --git a/btas/generic/linear_algebra.h b/btas/generic/linear_algebra.h
index 0029c65f..a7c934e3 100644
--- a/btas/generic/linear_algebra.h
+++ b/btas/generic/linear_algebra.h
@@ -260,6 +260,7 @@ Tensor pseudoInverse(Tensor & A, bool & fast_pI) {
     // Compute the matrix A^-1 from the inverted singular values and the U and
     // V^T provided by the SVD
     gemm(blas::Op::NoTrans, blas::Op::NoTrans, 1.0, U, s_inv, 0.0, s_);
+    U = Tensor(Range{Range1{row}, Range1{col}});
     gemm(blas::Op::NoTrans, blas::Op::NoTrans, 1.0, s_, Vt, 0.0, U);
 
     return U;

From 682729d2f3a19176657364774f701587567f67fa Mon Sep 17 00:00:00 2001
From: Karl Pierce <kmp5@vt.edu>
Date: Fri, 22 Dec 2023 16:01:56 -0500
Subject: [PATCH 02/12] Update Flatten

Old flatten used recursion and was hard to debug. Fix it by mapping order-N tensor to order 3 tensor (before_n, n, after_n) then do transpose in copy.
---
 btas/generic/flatten.h | 96 ++++++++++--------------------------------
 1 file changed, 22 insertions(+), 74 deletions(-)

diff --git a/btas/generic/flatten.h b/btas/generic/flatten.h
index afb52573..36de4519 100644
--- a/btas/generic/flatten.h
+++ b/btas/generic/flatten.h
@@ -9,86 +9,34 @@ namespace btas {
 /// \f[ A(I_1, I_2, I_3, ..., I_{mode}, ..., I_N) -> A(I_{mode}, J)\f]
 /// where \f$J = I_1 * I_2 * ...I_{mode-1} * I_{mode+1} * ... * I_N.\f$
 /// \return Matrix with dimension \f$(I_{mode}, J)\f$
-
-template<typename Tensor>
-Tensor flatten(const Tensor &A, size_t mode) {
-  using ord_t = typename range_traits<typename Tensor::range_type>::ordinal_type;
-
-  if (mode >= A.rank()) BTAS_EXCEPTION("Cannot flatten along mode outside of A.rank()");
-
-  // make X the correct size
-  Tensor X(A.extent(mode), A.range().area() / A.extent(mode));
-
-  ord_t indexi = 0, indexj = 0;
-  size_t ndim = A.rank();
-  // J is the new step size found by removing the mode of interest
-  std::vector<ord_t> J(ndim, 1);
-  for (size_t i = 0; i < ndim; ++i)
-    if (i != mode)
-      for (size_t m = 0; m < i; ++m)
-        if (m != mode)
-          J[i] *= A.extent(m);
-
-  auto tensor_itr = A.begin();
-
-  // Fill X with the correct values
-  fill(A, 0, X, mode, indexi, indexj, J, tensor_itr);
-
-  // return the flattened matrix
-  return X;
-}
-
-/// following the formula for flattening layed out by Kolda and Bader
-/// <a href=http://epubs.siam.org/doi/pdf/10.1137/07070111X> See reference. </a>
-/// Recursive method utilized by flatten.\n **Important** if you want to flatten a tensor
-/// call flatten, not fill.
-
-/// \param[in] A  The reference tensor to be flattened
-/// \param[in] depth The recursion depth. Should not exceed the A.rank()
-/// \param[in, out] X In: An empty matrix to be filled with correct
-/// elements of \c A flattened on the \c mode fiber. Should be size \f$ (I_{mode}, J)\f$
-/// Out: The flattened A matrix along the \c mode fiber \param[in]
-/// mode The mode which A is to be flattened. \param[in] indexi The row index of
-/// matrix X \param[in] indexj The column index of matrix X \param[in] J The
-/// step size for the row dimension of X \param[in] tensor_itr An iterator of \c A.
-/// The value of the iterator is placed in the correct position of X using
-/// recursive calls of fill().
-
-  template<typename Tensor, typename iterator, typename ord_t>
-  void fill(const Tensor &A, size_t depth, Tensor &X, size_t mode,
-            ord_t indexi, ord_t indexj, const std::vector<ord_t> &J, iterator &tensor_itr) {
+  template<typename Tensor>
+  Tensor flatten(Tensor A, size_t mode) {
+    using ord_t = typename range_traits<typename Tensor::range_type>::ordinal_type;
     using ind_t = typename Tensor::range_type::index_type::value_type;
-    size_t ndim = A.rank();
-    if (depth < ndim) {
+    // We are going to first make the order N tensor into a order 3 tensor with
+    // (modes before `mode`, `mode`, modes after `mode`
 
-      // Creates a for loop based on the number of modes A has
-      for (ind_t i = 0; i < A.extent(depth); ++i) {
+    auto dim_mode = A.extent(mode);
+    Tensor flat(dim_mode, A.range().area() / dim_mode);
+    size_t ndim = A.rank();
+    ord_t dim1 = 1, dim3 = 1;
+    for (ind_t i = 0; i < ndim; ++i) {
+      if (i < mode)
+        dim1 *= A.extent(i);
+      else if (i > mode)
+        dim3 *= A.extent(i);
+    }
+    
+    A.resize(Range{Range1{dim1}, Range1{dim_mode}, Range1{dim3}});
 
-        // use the for loop to find the column dimension index
-        if (depth != mode) {
-          indexj += i * J[depth]; // column matrix index
+    for (ord_t i = 0; i < dim1; ++i) {
+      for (ind_t j = 0; j < dim_mode; ++j) {
+        for (ord_t k = 0; k < dim3; ++k) {
+          flat(j, i * dim3 + k) = A(i,j,k);
         }
-
-          // if this depth is the mode being flattened use the for loop to find the
-          // row dimension
-        else {
-          indexi = i; // row matrix index
       }
-
-      fill(A, depth + 1, X, mode, indexi, indexj, J, tensor_itr);
-
-      // remove the indexing from earlier in this loop.
-      if (depth != mode)
-        indexj -= i * J[depth];
     }
-  }
-  
-  // When depth steps out of the number of dimensions, set X to be the correct
-  // value from the iterator then increment the iterator.
-  else {
-    X(indexi, indexj) = *tensor_itr;
-    tensor_itr++;
-  }
+    return flat;
 }
 
 } // namespace btas

From 6fa226e6b72bc53300e9f1c0e5f21dc124980e08 Mon Sep 17 00:00:00 2001
From: Karl Pierce <kmp5@vt.edu>
Date: Fri, 22 Dec 2023 16:03:04 -0500
Subject: [PATCH 03/12] Fix Flatten impl by not calling flatten and dont
 matricize

---
 btas/generic/cp_als.h | 29 ++++++++++++++++++++++++++++-
 1 file changed, 28 insertions(+), 1 deletion(-)

diff --git a/btas/generic/cp_als.h b/btas/generic/cp_als.h
index 132956ce..842a903a 100644
--- a/btas/generic/cp_als.h
+++ b/btas/generic/cp_als.h
@@ -692,9 +692,36 @@ namespace btas {
       swap_to_first(tensor_ref, n, true);
 
 #else  // BTAS_HAS_CBLAS
+//      // Computes the Khatri-Rao product intermediate
+      auto KhatriRao = this->generate_KRP(n, rank, true);
+
+      // moves mode n of the reference tensor to the front to simplify contraction
+      std::vector<ind_t> tref_indices, KRP_dims, An_indices;
+
+      // resize the Khatri-Rao product to the proper dimensions
+      for (size_t i = 0; i < ndim; i++) {
+        tref_indices.push_back(i);
+        if(i == n)
+          continue;
+        KRP_dims.push_back(tensor_ref.extent(i));
+      }
+      KRP_dims.push_back(rank);
+      KhatriRao.resize(KRP_dims);
+      KRP_dims.clear();
+
+      An_indices.push_back(n);
+      An_indices.push_back(ndim);
+      for (size_t i = 0; i < ndim; i++) {
+        if(i == n)
+          continue;
+        KRP_dims.push_back(i);
+      }
+      KRP_dims.push_back(ndim);
+      contract(this->one, tensor_ref, tref_indices, KhatriRao, KRP_dims, this->zero, temp, An_indices);
+
       // without MKL program cannot perform the swapping algorithm, must compute
       // flattened intermediate
-      gemm(blas::Op::NoTrans, blas::Op::NoTrans, this->one, flatten(tensor_ref, n), this->generate_KRP(n, rank, true), this->zero, temp);
+//      gemm(blas::Op::NoTrans, blas::Op::NoTrans, this->one, new_flatten(tensor_ref, n), this->generate_KRP(n, rank, true), this->zero, temp);
 #endif
 
       if(lambda != 0){

From 51eaab81f2869f9113f6985814fb38ab389e4130 Mon Sep 17 00:00:00 2001
From: Karl Pierce <kmp5@vt.edu>
Date: Fri, 22 Dec 2023 16:05:03 -0500
Subject: [PATCH 04/12] Give target tensor ptr direct to allow flexibility in
 decompositions

---
 btas/generic/cp.h          | 10 ++++++++++
 btas/generic/cp_als.h      | 36 ++++++++++++++++++------------------
 btas/generic/cp_rals.h     |  2 +-
 btas/generic/tuck_cp_als.h |  2 +-
 4 files changed, 30 insertions(+), 20 deletions(-)

diff --git a/btas/generic/cp.h b/btas/generic/cp.h
index 8b557289..384b6e08 100644
--- a/btas/generic/cp.h
+++ b/btas/generic/cp.h
@@ -71,6 +71,16 @@ namespace btas {
       epsilon = t.get_fit(max_iter);
       return;
     }
+
+    template <typename T>
+    void set_norm(T &t, double norm){
+      return;
+    }
+
+    template <typename Tensor>
+    void set_norm(FitCheck<Tensor> &t, double norm){
+      t.set_norm(norm);
+    }
   }  // namespace detail
 
   /** \brief Base class to compute the Canonical Product (CP) decomposition of an order-N
diff --git a/btas/generic/cp_als.h b/btas/generic/cp_als.h
index 842a903a..7335e723 100644
--- a/btas/generic/cp_als.h
+++ b/btas/generic/cp_als.h
@@ -625,7 +625,7 @@ namespace btas {
           if (tmp != i) {
             A[i] = A[tmp];
           } else if (dir) {
-            direct(i, rank, fast_pI, matlab, converge_test);
+            direct(i, rank, fast_pI, matlab, converge_test, tensor_ref);
           } else {
             update_w_KRP(i, rank, fast_pI, matlab, converge_test);
           }
@@ -764,52 +764,52 @@ namespace btas {
     /// return if \c matlab was successful
     /// \param[in, out] converge_test Test to see if ALS is converged, holds the value of fit. test to see if the ALS is converged
 
-    void direct(size_t n, ind_t rank, bool &fast_pI, bool &matlab, ConvClass &converge_test, double lambda = 0.0) {
+    void direct(size_t n, ind_t rank, bool &fast_pI, bool &matlab, ConvClass &converge_test, Tensor& target, double lambda = 0.0) {
       // Determine if n is the last mode, if it is first contract with first mode
       // and transpose the product
       bool last_dim = n == ndim - 1;
       // product of all dimensions
       ord_t LH_size = size;
       size_t contract_dim = last_dim ? 0 : ndim - 1;
-      ind_t offset_dim = tensor_ref.extent(n);
+      ind_t offset_dim = target.extent(n);
       ind_t pseudo_rank = rank;
 
       // Store the dimensions which are available to hadamard contract
       std::vector<ind_t> dimensions;
       for (size_t i = last_dim ? 1 : 0; i < (last_dim ? ndim : ndim - 1); i++) {
-        dimensions.push_back(tensor_ref.extent(i));
+        dimensions.push_back(target.extent(i));
       }
 
-      // Modifying the dimension of tensor_ref so store the range here to resize
-      Range R = tensor_ref.range();
+      // Modifying the dimension of target so store the range here to resize
+      Range R = target.range();
       //Tensor an(A[n].range());
 
-      // Resize the tensor which will store the product of tensor_ref and the first factor matrix
-      Tensor temp = Tensor(size / tensor_ref.extent(contract_dim), rank);
-      tensor_ref.resize(
-          Range{Range1{last_dim ? tensor_ref.extent(contract_dim) : size / tensor_ref.extent(contract_dim)},
-                Range1{last_dim ? size / tensor_ref.extent(contract_dim) : tensor_ref.extent(contract_dim)}});
+      // Resize the tensor which will store the product of target and the first factor matrix
+      Tensor temp = Tensor(size / target.extent(contract_dim), rank);
+      target.resize(
+          Range{Range1{last_dim ? target.extent(contract_dim) : size / target.extent(contract_dim)},
+                Range1{last_dim ? size / target.extent(contract_dim) : target.extent(contract_dim)}});
 
       // contract tensor ref and the first factor matrix
-      gemm((last_dim ? blas::Op::Trans : blas::Op::NoTrans), blas::Op::NoTrans, this->one , (last_dim? tensor_ref.conj():tensor_ref), A[contract_dim].conj(), this->zero,
+      gemm((last_dim ? blas::Op::Trans : blas::Op::NoTrans), blas::Op::NoTrans, this->one , (last_dim? target.conj():target), A[contract_dim].conj(), this->zero,
            temp);
 
-      // Resize tensor_ref
-      tensor_ref.resize(R);
+      // Resize target
+      target.resize(R);
       // Remove the dimension which was just contracted out
-      LH_size /= tensor_ref.extent(contract_dim);
+      LH_size /= target.extent(contract_dim);
 
       // n tells which dimension not to contract, and contract_dim says which dimension I am trying to contract.
       // If n == contract_dim then that mode is skipped.
       // if n == ndim - 1, my contract_dim = 0. The gemm transposes to make rank = ndim - 1, so I
       // move the pointer that preserves the last dimension to n = ndim -2.
-      // In all cases I want to walk through the orders in tensor_ref backward so contract_dim = ndim - 2
+      // In all cases I want to walk through the orders in target backward so contract_dim = ndim - 2
       n = last_dim ? ndim - 2 : n;
       contract_dim = ndim - 2;
 
       while (contract_dim > 0) {
         // Now temp is three index object where temp has size
-        // (size of tensor_ref/product of dimension contracted, dimension to be
+        // (size of target/product of dimension contracted, dimension to be
         // contracted, rank)
         ord_t idx2 = dimensions[contract_dim],
               idx1 = LH_size / idx2;
@@ -820,7 +820,7 @@ namespace btas {
         //contract_tensor.fill(0.0);
         const auto &a = A[(last_dim ? contract_dim + 1 : contract_dim)];
         // If the middle dimension is the mode not being contracted, I will move
-        // it to the right hand side temp((size of tensor_ref/product of
+        // it to the right hand side temp((size of target/product of
         // dimension contracted, rank * mode n dimension)
         if (n == contract_dim) {
           pseudo_rank *= offset_dim;
diff --git a/btas/generic/cp_rals.h b/btas/generic/cp_rals.h
index 55e612b9..937c9061 100644
--- a/btas/generic/cp_rals.h
+++ b/btas/generic/cp_rals.h
@@ -172,7 +172,7 @@ namespace btas {
             A[i] = A[tmp];
             lambda[i] = lambda[tmp];
           } else if (dir) {
-            this->direct(i, rank, fast_pI, matlab, converge_test, lambda[i]);
+            this->direct(i, rank, fast_pI, matlab, converge_test, tensor_ref, lambda[i]);
           } else {
             update_w_KRP(i, rank, fast_pI, matlab, converge_test, lambda[i]);
           }
diff --git a/btas/generic/tuck_cp_als.h b/btas/generic/tuck_cp_als.h
index 25d0db81..469723df 100644
--- a/btas/generic/tuck_cp_als.h
+++ b/btas/generic/tuck_cp_als.h
@@ -484,7 +484,7 @@ namespace btas{
         count++;
         this->num_ALS++;
         for (size_t i = 0; i < ndim; i++) {
-          this->direct(i, rank, fast_pI, matlab, converge_test, lambda[i]);
+          this->direct(i, rank, fast_pI, matlab, converge_test, tensor_ref, lambda[i]);
           // Compute the value s after normalizing the columns
           auto & ai = A[i];
           this->s = helper(i, ai);

From c956bea321688c8266bde9d73e916208a915040a Mon Sep 17 00:00:00 2001
From: Karl Pierce <kmp5@vt.edu>
Date: Mon, 22 Jan 2024 14:07:17 -0500
Subject: [PATCH 05/12] Try making the converge tighter

---
 unittest/ztensor_cp_test.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/unittest/ztensor_cp_test.cc b/unittest/ztensor_cp_test.cc
index f7c26732..a2d947b4 100644
--- a/unittest/ztensor_cp_test.cc
+++ b/unittest/ztensor_cp_test.cc
@@ -72,7 +72,7 @@ TEST_CASE("ZCP") {
   std::complex<double> norm3 = sqrt(dot(Z3, Z3));
   std::complex<double> norm32 = sqrt(dot(Z33, Z33));
 
-  zconv_class conv(1e-3);
+  zconv_class conv(1e-4);
 
   // ALS tests
   {

From b6de17125f99a81bfa7682189197086c0783a9ec Mon Sep 17 00:00:00 2001
From: Karl Pierce <kmp5@vt.edu>
Date: Mon, 22 Jan 2024 15:29:20 -0500
Subject: [PATCH 06/12] change conv back and try to get tests working

---
 unittest/ztensor_cp_test.cc | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/unittest/ztensor_cp_test.cc b/unittest/ztensor_cp_test.cc
index a2d947b4..e68368b0 100644
--- a/unittest/ztensor_cp_test.cc
+++ b/unittest/ztensor_cp_test.cc
@@ -72,7 +72,7 @@ TEST_CASE("ZCP") {
   std::complex<double> norm3 = sqrt(dot(Z3, Z3));
   std::complex<double> norm32 = sqrt(dot(Z33, Z33));
 
-  zconv_class conv(1e-4);
+  zconv_class conv(1e-3);
 
   // ALS tests
   {
@@ -98,16 +98,19 @@ TEST_CASE("ZCP") {
       conv.verbose(false);
     }
 #endif
+    std::cout.precision(16);
     SECTION("ALS MODE = 4, Finite error") {
       CP_ALS<ztensor, zconv_class> A1(Z4);
       conv.set_norm(norm4.real());
-      double diff = A1.compute_error(conv, 1e-2, 1, 100, true, 57);
+      double diff = A1.compute_error(conv, 1e-5, 1, 100, true, 57);
+      std::cout << diff << std::endl;
+      std::cout << epsilon << std::endl;
       CHECK(std::abs(diff) <= epsilon);
     }
     SECTION("ALS MODE = 4, Finite rank") {
       CP_ALS<ztensor, zconv_class> A1(Z4);
       conv.set_norm(norm4.real());
-      double diff = A1.compute_rank(57, conv, 1, true, 57);
+      double diff = A1.compute_rank(70, conv, 1, true, 70);
       CHECK(std::abs(diff) <= epsilon);
     }
 #if BTAS_ENABLE_TUCKER_CP_UT

From d21ee08da09633b5d83d48c066bea3cfca9166d2 Mon Sep 17 00:00:00 2001
From: Karl Pierce <kmp5@vt.edu>
Date: Mon, 22 Jan 2024 15:37:56 -0500
Subject: [PATCH 07/12] Print the rank to see when `compute_error` finishes

---
 unittest/ztensor_cp_test.cc | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/unittest/ztensor_cp_test.cc b/unittest/ztensor_cp_test.cc
index e68368b0..f5760001 100644
--- a/unittest/ztensor_cp_test.cc
+++ b/unittest/ztensor_cp_test.cc
@@ -105,12 +105,13 @@ TEST_CASE("ZCP") {
       double diff = A1.compute_error(conv, 1e-5, 1, 100, true, 57);
       std::cout << diff << std::endl;
       std::cout << epsilon << std::endl;
+      std::cout << A1.get_factor_matrices()[1].extent(1) << std::endl;
       CHECK(std::abs(diff) <= epsilon);
     }
     SECTION("ALS MODE = 4, Finite rank") {
       CP_ALS<ztensor, zconv_class> A1(Z4);
       conv.set_norm(norm4.real());
-      double diff = A1.compute_rank(70, conv, 1, true, 70);
+      double diff = A1.compute_rank(80, conv, 1, true, 80);
       CHECK(std::abs(diff) <= epsilon);
     }
 #if BTAS_ENABLE_TUCKER_CP_UT

From fd015d57951678aa34c5d02d8b01d541f21f2d72 Mon Sep 17 00:00:00 2001
From: Karl Pierce <kmp5@vt.edu>
Date: Mon, 22 Jan 2024 15:43:20 -0500
Subject: [PATCH 08/12] I was testing the wrong function on accident

---
 unittest/ztensor_cp_test.cc | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/unittest/ztensor_cp_test.cc b/unittest/ztensor_cp_test.cc
index f5760001..d01d77a5 100644
--- a/unittest/ztensor_cp_test.cc
+++ b/unittest/ztensor_cp_test.cc
@@ -98,20 +98,16 @@ TEST_CASE("ZCP") {
       conv.verbose(false);
     }
 #endif
-    std::cout.precision(16);
     SECTION("ALS MODE = 4, Finite error") {
       CP_ALS<ztensor, zconv_class> A1(Z4);
       conv.set_norm(norm4.real());
-      double diff = A1.compute_error(conv, 1e-5, 1, 100, true, 57);
-      std::cout << diff << std::endl;
-      std::cout << epsilon << std::endl;
-      std::cout << A1.get_factor_matrices()[1].extent(1) << std::endl;
+      double diff = A1.compute_error(conv, 1e-2, 1, 100, true, 57);
       CHECK(std::abs(diff) <= epsilon);
     }
     SECTION("ALS MODE = 4, Finite rank") {
       CP_ALS<ztensor, zconv_class> A1(Z4);
       conv.set_norm(norm4.real());
-      double diff = A1.compute_rank(80, conv, 1, true, 80);
+      double diff = A1.compute_rank(57, conv, 1, true, 57);
       CHECK(std::abs(diff) <= epsilon);
     }
 #if BTAS_ENABLE_TUCKER_CP_UT
@@ -156,7 +152,11 @@ TEST_CASE("ZCP") {
    SECTION("RALS MODE = 4, Finite error"){
      CP_RALS<ztensor, zconv_class> A1(Z4);
      conv.set_norm(norm4.real());
-     double diff = A1.compute_error(conv, 1e-2, 1, 67, true, 65);
+     double diff = A1.compute_error(conv, 1e-5, 1, 100, true, 65);
+     std::cout.precision(16);
+     std::cout << diff << std::endl;
+     std::cout << epsilon << std::endl;
+     std::cout << A1.get_factor_matrices()[1].extent(1) << std::endl;
      CHECK(std::abs(diff) <= epsilon);
    }
 #if BTAS_ENABLE_TUCKER_CP_UT

From 0c66507cef2c721af4045191988627565f43bad3 Mon Sep 17 00:00:00 2001
From: Karl Pierce <kmp5@vt.edu>
Date: Mon, 22 Jan 2024 15:49:44 -0500
Subject: [PATCH 09/12] Try to get RALS working

---
 btas/generic/cp_als.h       | 1 +
 unittest/ztensor_cp_test.cc | 9 +++------
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/btas/generic/cp_als.h b/btas/generic/cp_als.h
index 7335e723..52866628 100644
--- a/btas/generic/cp_als.h
+++ b/btas/generic/cp_als.h
@@ -659,6 +659,7 @@ namespace btas {
       Tensor temp(A[n].extent(0), rank);
       Tensor an(A[n].range());
 
+          }
 #ifdef BTAS_HAS_INTEL_MKL
 
       // Computes the Khatri-Rao product intermediate
diff --git a/unittest/ztensor_cp_test.cc b/unittest/ztensor_cp_test.cc
index d01d77a5..66ea12c6 100644
--- a/unittest/ztensor_cp_test.cc
+++ b/unittest/ztensor_cp_test.cc
@@ -146,17 +146,14 @@ TEST_CASE("ZCP") {
     SECTION("RALS MODE = 4, Finite rank") {
       CP_RALS<ztensor, zconv_class> A1(Z4);
       conv.set_norm(norm4.real());
-      double diff = A1.compute_rank(65, conv, 1, true, 65);
+      double diff = A1.compute_rank(67, conv, 1, true, 65);
       CHECK(std::abs(diff) <= epsilon);
     }
    SECTION("RALS MODE = 4, Finite error"){
      CP_RALS<ztensor, zconv_class> A1(Z4);
      conv.set_norm(norm4.real());
-     double diff = A1.compute_error(conv, 1e-5, 1, 100, true, 65);
-     std::cout.precision(16);
-     std::cout << diff << std::endl;
-     std::cout << epsilon << std::endl;
-     std::cout << A1.get_factor_matrices()[1].extent(1) << std::endl;
+     conv.verbose(true);
+     double diff = A1.compute_error(conv, 1e-5, 1, 67, true, 65);
      CHECK(std::abs(diff) <= epsilon);
    }
 #if BTAS_ENABLE_TUCKER_CP_UT

From d6fd0e6e5f15f71b10478c18b5c2e037dc2e4903 Mon Sep 17 00:00:00 2001
From: Karl Pierce <kmp5@vt.edu>
Date: Mon, 22 Jan 2024 15:55:42 -0500
Subject: [PATCH 10/12] remove extraneous curly brace

---
 btas/generic/cp_als.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/btas/generic/cp_als.h b/btas/generic/cp_als.h
index 52866628..1944d510 100644
--- a/btas/generic/cp_als.h
+++ b/btas/generic/cp_als.h
@@ -658,8 +658,7 @@ namespace btas {
                       double lambda = 0) {
       Tensor temp(A[n].extent(0), rank);
       Tensor an(A[n].range());
-
-          }
+      
 #ifdef BTAS_HAS_INTEL_MKL
 
       // Computes the Khatri-Rao product intermediate

From 41a491c3ed037f272bd743be4e4cdf463ab214bf Mon Sep 17 00:00:00 2001
From: Karl Pierce <kmp5@vt.edu>
Date: Mon, 22 Jan 2024 16:00:49 -0500
Subject: [PATCH 11/12] remove verbose statement [no ci]

---
 unittest/ztensor_cp_test.cc | 1 -
 1 file changed, 1 deletion(-)

diff --git a/unittest/ztensor_cp_test.cc b/unittest/ztensor_cp_test.cc
index 66ea12c6..d523b0f9 100644
--- a/unittest/ztensor_cp_test.cc
+++ b/unittest/ztensor_cp_test.cc
@@ -152,7 +152,6 @@ TEST_CASE("ZCP") {
    SECTION("RALS MODE = 4, Finite error"){
      CP_RALS<ztensor, zconv_class> A1(Z4);
      conv.set_norm(norm4.real());
-     conv.verbose(true);
      double diff = A1.compute_error(conv, 1e-5, 1, 67, true, 65);
      CHECK(std::abs(diff) <= epsilon);
    }

From bc22242babb6858906f1cc31f16c6c22e2e26c9a Mon Sep 17 00:00:00 2001
From: Karl Pierce <kmp5@vt.edu>
Date: Mon, 22 Jan 2024 17:27:59 -0500
Subject: [PATCH 12/12] Remove unnecessary comments  [no ci]

---
 btas/generic/cp_als.h | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/btas/generic/cp_als.h b/btas/generic/cp_als.h
index 1944d510..39698e2d 100644
--- a/btas/generic/cp_als.h
+++ b/btas/generic/cp_als.h
@@ -718,10 +718,6 @@ namespace btas {
       }
       KRP_dims.push_back(ndim);
       contract(this->one, tensor_ref, tref_indices, KhatriRao, KRP_dims, this->zero, temp, An_indices);
-
-      // without MKL program cannot perform the swapping algorithm, must compute
-      // flattened intermediate
-//      gemm(blas::Op::NoTrans, blas::Op::NoTrans, this->one, new_flatten(tensor_ref, n), this->generate_KRP(n, rank, true), this->zero, temp);
 #endif
 
       if(lambda != 0){