From 39f80b949b36e7d1484445f0b9bf7de26b2e2524 Mon Sep 17 00:00:00 2001
From: Ben Gui <guiastrennec@gmail.com>
Date: Sun, 2 Jun 2024 22:30:52 +0200
Subject: [PATCH] summarise_nm_model: fixed bug in condition number

---
 NEWS.md                             |  3 +
 R/summarise_nm_model.R              | 58 ++++++++++++++-----
 tests/testthat/test-model-summary.R | 90 ++++++++++++++++++++++++++++-
 3 files changed, 132 insertions(+), 19 deletions(-)

diff --git a/NEWS.md b/NEWS.md
index 0b00ab17..a5370208 100755
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,3 +1,6 @@
+# xpose 0.4.19
+* Fixed bug in condition number when eigen values outputted on multiple records in .lst file (@billdenney & @marianklose, #128)
+
 # xpose 0.4.18
 * Compatibility fix for `roxygen2` 7.3.1
 * Fix bug when reading a control stream and using `$PROB` instead of `$PROBLEM` (@AndreasCalvagone, #222)
diff --git a/R/summarise_nm_model.R b/R/summarise_nm_model.R
index d73877d2..fcc07fbe 100755
--- a/R/summarise_nm_model.R
+++ b/R/summarise_nm_model.R
@@ -428,27 +428,53 @@ sum_nsig <- function(model, software) {
 # Condition number
 sum_condn <- function(model, software, rounding) {
   if (software == 'nonmem') {
-    x <- model %>% 
-      dplyr::filter(.$subroutine == 'lst') %>% 
-      dplyr::slice(which(stringr::str_detect(.$code, stringr::fixed('EIGENVALUES OF COR'))) + 4)
     
-    if (nrow(x) == 0) return(sum_tpl('condn', 'na'))
+    ## Check if any match for eigenvalues throughout the lst
+    if (!any(stringr::str_detect(model$code[model$subroutine == "lst"], stringr::fixed('EIGENVALUES OF COR')))) { 
+      return(sum_tpl('condn', 'na'))
+    }
     
-    x %>% 
+    model %>% 
       dplyr::group_by_at(.vars = 'problem') %>% 
       tidyr::nest() %>% 
       dplyr::ungroup() %>% 
-      dplyr::mutate(subprob = 0,
-                    label = 'condn',
-                    value = purrr::map_chr(.$data, function(x) {
-                      stringr::str_trim(x$code, side = 'both') %>%  
-                        stringr::str_split(pattern = '\\s+') %>% 
-                        purrr::flatten_chr() %>% 
-                        as.numeric() %>% 
-                        {max(.)/min(.)} %>% 
-                        round(digits = rounding) %>% 
-                        as.character()})) %>% 
-      dplyr::select(dplyr::one_of('problem', 'subprob', 'label', 'value'))
+      mutate(value = purrr::map_chr(
+        .x = .$data,
+        .f = ~{
+          ## Find the eigenvalues header
+          eigen_header <- stringr::str_which(.x$code, stringr::fixed('EIGENVALUES OF COR'))
+          
+          if (length(eigen_header) == 0) return(NA_character_)
+          
+          # Find numeric values in the format of eigen values
+          eigen_rows <- eigen_header - 1 + stringr::str_which(.x$code[eigen_header:length(.x$code)], pattern = "\\d\\.\\d{2}E[+-]?\\d+(?=\\s|$)")
+          
+          ## Make sure rows are consecutive to prevent possible false positive match
+          diff_rows <- c(1, diff(eigen_rows))
+          if (any(diff_rows != 1)) {
+            eigen_rows <- eigen_rows[1:(which(diff_rows != 1) - 1)]
+          }
+          
+          ## Parse the eigen values
+          eigen_values <- .x[eigen_rows, ] %>% 
+            dplyr::pull("code") %>% 
+            stringr::str_trim(side = 'both') %>%  
+            paste(collapse = " ") %>% 
+            stringr::str_split(pattern = '\\s+') %>% 
+            purrr::flatten_chr() %>% 
+            as.numeric()
+          
+          ## Compute the condition number
+          eigen_values %>% 
+            {max(.)/min(.)} %>% 
+            round(digits = rounding) %>% 
+            as.character()
+        }
+      )) %>% 
+      dplyr::ungroup() %>% 
+      mutate(subprob = 0, label = 'condn') %>% 
+      dplyr::select(dplyr::one_of('problem', 'subprob', 'label', 'value')) %>% 
+      dplyr::filter(!is.na(.$value))
   }
 }
 
diff --git a/tests/testthat/test-model-summary.R b/tests/testthat/test-model-summary.R
index 86c86a29..46d3d508 100755
--- a/tests/testthat/test-model-summary.R
+++ b/tests/testthat/test-model-summary.R
@@ -2,6 +2,37 @@
 software <- 'nonmem'
 model    <- xpdb_ex_pk$code
 model2   <- model[0, ]
+
+## Special model for testing conditional number
+model3 <- structure(list(
+  problem = c(3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0), 
+  level = c(34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 36, 37, 37), 
+  subroutine = c("lst", "lst", "lst", "lst", "lst", "lst", "lst", "lst", "lst", "lst", "lst", "lst", "lst", "lst", "lst", "lst", "lst", "lst", "lst", "lst", "lst", "lst", "lst", "lst", "lst", "lst", "lst", "lst", "lst", "lst", "lst", "oth", "oth", "oth"), 
+  code = c("1", " ************************************************************************************************************************", 
+           " ******************** ********************", " ******************** FIRST ORDER CONDITIONAL ESTIMATION WITH INTERACTION ********************", 
+           " ******************** EIGENVALUES OF COR MATRIX OF ESTIMATE ********************", 
+           " ******************** ********************", " ************************************************************************************************************************", 
+           " 1 2 3 4 5 6 7 8 9 10 11 12", " 13 14 15 16 17 18 19 20 21 22 23 24", 
+           " 25", " 8.65E-02 1.33E-01 1.92E-01 2.31E-01 2.72E-01 3.00E-01 3.39E-01 4.20E-01 5.69E-01 5.81E-01 6.53E-01 7.52E-01", 
+           " 7.61E-01 9.33E-01 9.54E-01 9.99E-01 1.12E+00 1.18E+00 1.28E+00 1.48E+00 1.61E+00 1.72E+00 2.19E+00 2.57E+00", 
+           " 3.68E+00", "1", " ************************************************************************************************************************", 
+           " ******************** ********************", " ******************** FIRST ORDER CONDITIONAL ESTIMATION WITH INTERACTION ********************", 
+           " ******************** EIGENVALUES OF COR MATRIX OF ESTIMATE ********************", 
+           " ******************** ********************", " ************************************************************************************************************************", 
+           " 1 2 3 4 5 6 7 8 9 10 11 12", " 13 14 15 16 17 18 19 20 21", 
+           " 7.61E-03 4.44E-02 4.82E-02 6.23E-02 1.12E-01 2.08E-01 2.69E-01 3.57E-01 3.93E-01 5.83E-01 5.96E-01 8.08E-01", 
+           " 9.21E-01 9.59E-01 1.04E+00 1.32E+00 1.50E+00 2.20E+00 2.52E+00 3.08E+00 3.97E+00", 
+           " 1 2 3 4 5 6 7 8 9 10 11 12", " 13 14 15 16 17 18 19 20 21 22 23 24", 
+           " 25", " 8.65E-02 1.33E-01 1.92E-01 2.31E-01 2.72E-01 3.00E-01 3.39E-01 4.20E-01 5.69E-01 5.81E-01 6.53E-01 7.52E-01", 
+           " 7.61E-05 9.33E-01 9.54E-01 9.99E-01 1.12E+00 1.18E+00 1.28E+00 1.48E+00 1.61E+00 1.72E+00 2.19E+00 2.57E+00", 
+           " 3.68E+10", "1", " #CPUT: Total CPU Time in Seconds, 6.040", 
+           "Stop Time:", "Mon Oct 16 13:34:35 CEST 2017"), 
+  comment = c("", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "Add non-consecutive values in eigen value format to test parser", "", "", "", "", "", "", "", "", "")
+), row.names = c(NA, -34L), class = c("nm_model", "tbl_df", "tbl", "data.frame"), file  = "run001.lst", dir = "data", software = "nonmem")
+
+model3 <- dplyr::bind_rows(slice(xpdb_ex_pk$code, 1:(nrow(xpdb_ex_pk$code) - 3)), model3)
+
+
 file     <- 'pk/model/run001.lst'
 rounding <- 2
 
@@ -18,76 +49,128 @@ eta_shk_string <- ifelse(getRversion() >= "4.0.0",
 
 test_that('summary is properly created with the appropriate information', {
   expect_equal(sum_out(sum_software(software)), c('software', 'nonmem'))
+  
   expect_equal(sum_out(sum_version(model, software)), c('version', '7.3.0'))
+  
   expect_equal(sum_out(sum_file(file)), c('file', 'run001.lst'))
+  
   expect_equal(sum_out(sum_run(file)), c('run', 'run001'))
+  
   expect_equal(sum_out(sum_directory(file)), c('dir', 'pk/model'))
+  
   expect_equal(sum_out(sum_reference(model, software)), c('ref', '000'))
+  
   expect_equal(sum_out(sum_probn(model, software), 1), c('probn', '1'))
   expect_equal(sum_out(sum_probn(model, software), 2), c('probn', '2'))
+  
   expect_equal(sum_out(sum_timestart(model, software)), c('timestart', 'Mon Oct 16 13:34:28 CEST 2017'))
+  
   expect_equal(sum_out(sum_timestop(model, software)), c('timestop', 'Mon Oct 16 13:34:35 CEST 2017'))
+  
   expect_equal(sum_out(sum_description(model, software)), c('descr', 'NONMEM PK example for xpose'))
+  
   expect_equal(sum_out(sum_label(model, software), 1), c('label', 'Parameter estimation'))
   expect_equal(sum_out(sum_label(model, software), 2), c('label', 'Model simulations'))
+  
   expect_equal(sum_out(sum_input_data(model, software), 1), c('data', '../../mx19_2.csv'))
   expect_equal(sum_out(sum_input_data(model, software), 2), c('data', '../../mx19_2.csv'))
+  
   expect_equal(sum_out(sum_nobs(model, software), 1), c('nobs', '476'))
   expect_equal(sum_out(sum_nobs(model, software), 2), c('nobs', '476'))
+  
   expect_equal(sum_out(sum_nind(model, software), 1), c('nind', '74'))
   expect_equal(sum_out(sum_nind(model, software), 2), c('nind', '74'))
+  
   expect_equal(sum_out(sum_nsim(model, software)), c('nsim', '20'))
+  
   expect_equal(sum_out(sum_simseed(model, software)), c('simseed', '221287'))
+  
   expect_equal(sum_out(sum_subroutine(model, software)), c('subroutine', '2'))
+  
   expect_equal(sum_out(sum_runtime(model, software)), c('runtime', '00:00:02'))
+  
   expect_equal(sum_out(sum_covtime(model, software)), c('covtime', '00:00:03'))
+  
   expect_equal(sum_out(sum_term(model, software)), c('term', 'MINIMIZATION SUCCESSFUL'))
+  
   expect_equal(sum_out(sum_warnings(model, software), 1), c('warnings', '(WARNING 2) NM-TRAN INFERS THAT THE DATA ARE POPULATION.'))
   expect_equal(sum_out(sum_warnings(model, software), 2), c('warnings', '(WARNING 2) NM-TRAN INFERS THAT THE DATA ARE POPULATION.\n(WARNING 22) WITH $MSFI AND \"SUBPROBS\", \"TRUE=FINAL\" ...'))
+  
   #expect_equal(sum_out(sum_errors(model, software)), c('errors', 'na'))
+  
   expect_equal(sum_out(sum_nsig(model, software)), c('nsig', '3.3'))
-  expect_equal(sum_out(sum_condn(model, software, rounding)), c('condn', '21.5'))
+  
+  expect_equal(sum_out(sum_condn(model3, software, rounding), 1), c('condn', '21.5'))
+  expect_equal(sum_out(sum_condn(model3, software, rounding), 2), c('condn', '42.54'))
+  expect_equal(sum_out(sum_condn(model3, software, rounding), 3), c('condn', '521.68'))
+  
   #expect_equal(sum_out(sum_nesample(model, software)), c('nesample', 'na'))
+  
   #expect_equal(sum_out(sum_esampleseed(model, software)), c('esampleseed', 'na'))
+  
   expect_equal(sum_out(sum_ofv(model, software)), c('ofv', '-1403.905'))
+  
   expect_equal(sum_out(sum_method(model, software), 1), c('method', 'foce-i'))
   expect_equal(sum_out(sum_method(model = dplyr::tibble(problem = 1L, level = 1L, subroutine = 'est', 
                                                         code = 'METH=0', comment = ''), software), 1), c('method', 'fo'))
   expect_equal(sum_out(sum_method(model, software), 2), c('method', 'sim'))
-  expect_equal(sum_out(sum_shk(model, software, 'eps', rounding)), c('epsshk', '14.86 [1]'))
   
-  skip_on_cran() # Let's wait and see how R 4.0.0 rounding behaves once it's released
+  expect_equal(sum_out(sum_shk(model, software, 'eps', rounding)), c('epsshk', '14.86 [1]'))
   expect_equal(sum_out(sum_shk(model, software, 'eta', rounding)), c('etashk', eta_shk_string))
 })
 
 test_that('summary default summary is returned for missing information', {
   expect_equal(sum_out(sum_version(model2, software)), c('version', 'na'))
+  
   expect_equal(sum_out(sum_reference(model2, software)), c('ref', 'na'))
+  
   expect_equal(sum_out(sum_probn(model2, software)), c('probn', 'na'))
+  
   expect_equal(sum_out(sum_timestart(model2, software)), c('timestart', 'na'))
+  
   expect_equal(sum_out(sum_timestop(model2, software)), c('timestop', 'na'))
+  
   expect_equal(sum_out(sum_description(model2, software)), c('descr', 'na'))
+  
   expect_equal(sum_out(sum_label(model2, software)), c('label', 'na'))
+  
   expect_equal(sum_out(sum_input_data(model2, software)), c('data', 'na'))
+  
   expect_equal(sum_out(sum_nobs(model2, software)), c('nobs', 'na'))
+  
   expect_equal(sum_out(sum_nind(model2, software)), c('nind', 'na'))
+  
   expect_equal(sum_out(sum_nsim(model2, software)), c('nsim', 'na'))
+  
   expect_equal(sum_out(sum_simseed(model2, software)), c('simseed', 'na'))
+  
   expect_equal(sum_out(sum_subroutine(model2, software)), c('subroutine', 'na'))
+  
   expect_equal(sum_out(sum_runtime(model2, software)), c('runtime', 'na'))
+  
   expect_equal(sum_out(sum_covtime(model2, software)), c('covtime', 'na'))
   expect_equal(sum_out(sum_covtime(model = dplyr::tibble(problem = 1L, level = 1L, subroutine = 'lst', 
                                                          code = 'Elapsed covariance time in seconds: ********', 
                                                          comment = ''), software), 1), c('covtime', 'na'))
+  
   expect_equal(sum_out(sum_term(model2, software)), c('term', 'na'))
+  
   expect_equal(sum_out(sum_warnings(model2, software)), c('warnings', 'na'))
+  
   expect_equal(sum_out(sum_errors(model2, software)), c('errors', 'na'))
+  
   expect_equal(sum_out(sum_nsig(model2, software)), c('nsig', 'na'))
+  
   expect_equal(sum_out(sum_condn(model2, software, rounding)), c('condn', 'na'))
+  
   expect_equal(sum_out(sum_nesample(model2, software)), c('nesample', 'na'))
+  
   expect_equal(sum_out(sum_esampleseed(model2, software)), c('esampleseed', 'na'))
+  
   expect_equal(sum_out(sum_ofv(model2, software)), c('ofv', 'na'))
+  
   expect_equal(sum_out(sum_method(model2, software)), c('method', 'na'))
+  
   expect_equal(sum_out(sum_shk(model2, software, 'eps', rounding)), c('epsshk', 'na'))
   expect_equal(sum_out(sum_shk(model2, software, 'eta', rounding)), c('etashk', 'na'))
 })
@@ -118,3 +201,4 @@ test_that("Termination messages are parsed when minimization is terminated",{
   model <- tibble::tibble(problem = 1, level = 60, subroutine = 'lst', code = unlist(stringr::str_split(relevant_lst_part, "\\n")), comment = "")
   expect_equal(sum_term(model, "nonmem"), expected_result)
 })
+