From 39f80b949b36e7d1484445f0b9bf7de26b2e2524 Mon Sep 17 00:00:00 2001 From: Ben Gui Date: Sun, 2 Jun 2024 22:30:52 +0200 Subject: [PATCH] summarise_nm_model: fixed bug in condition number --- NEWS.md | 3 + R/summarise_nm_model.R | 58 ++++++++++++++----- tests/testthat/test-model-summary.R | 90 ++++++++++++++++++++++++++++- 3 files changed, 132 insertions(+), 19 deletions(-) diff --git a/NEWS.md b/NEWS.md index 0b00ab17..a5370208 100755 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,6 @@ +# xpose 0.4.19 +* Fixed bug in condition number when eigen values outputted on multiple records in .lst file (@billdenney & @marianklose, #128) + # xpose 0.4.18 * Compatibility fix for `roxygen2` 7.3.1 * Fix bug when reading a control stream and using `$PROB` instead of `$PROBLEM` (@AndreasCalvagone, #222) diff --git a/R/summarise_nm_model.R b/R/summarise_nm_model.R index d73877d2..fcc07fbe 100755 --- a/R/summarise_nm_model.R +++ b/R/summarise_nm_model.R @@ -428,27 +428,53 @@ sum_nsig <- function(model, software) { # Condition number sum_condn <- function(model, software, rounding) { if (software == 'nonmem') { - x <- model %>% - dplyr::filter(.$subroutine == 'lst') %>% - dplyr::slice(which(stringr::str_detect(.$code, stringr::fixed('EIGENVALUES OF COR'))) + 4) - if (nrow(x) == 0) return(sum_tpl('condn', 'na')) + ## Check if any match for eigenvalues throughout the lst + if (!any(stringr::str_detect(model$code[model$subroutine == "lst"], stringr::fixed('EIGENVALUES OF COR')))) { + return(sum_tpl('condn', 'na')) + } - x %>% + model %>% dplyr::group_by_at(.vars = 'problem') %>% tidyr::nest() %>% dplyr::ungroup() %>% - dplyr::mutate(subprob = 0, - label = 'condn', - value = purrr::map_chr(.$data, function(x) { - stringr::str_trim(x$code, side = 'both') %>% - stringr::str_split(pattern = '\\s+') %>% - purrr::flatten_chr() %>% - as.numeric() %>% - {max(.)/min(.)} %>% - round(digits = rounding) %>% - as.character()})) %>% - dplyr::select(dplyr::one_of('problem', 'subprob', 'label', 'value')) + mutate(value = purrr::map_chr( + .x = .$data, + .f = ~{ + ## Find the eigenvalues header + eigen_header <- stringr::str_which(.x$code, stringr::fixed('EIGENVALUES OF COR')) + + if (length(eigen_header) == 0) return(NA_character_) + + # Find numeric values in the format of eigen values + eigen_rows <- eigen_header - 1 + stringr::str_which(.x$code[eigen_header:length(.x$code)], pattern = "\\d\\.\\d{2}E[+-]?\\d+(?=\\s|$)") + + ## Make sure rows are consecutive to prevent possible false positive match + diff_rows <- c(1, diff(eigen_rows)) + if (any(diff_rows != 1)) { + eigen_rows <- eigen_rows[1:(which(diff_rows != 1) - 1)] + } + + ## Parse the eigen values + eigen_values <- .x[eigen_rows, ] %>% + dplyr::pull("code") %>% + stringr::str_trim(side = 'both') %>% + paste(collapse = " ") %>% + stringr::str_split(pattern = '\\s+') %>% + purrr::flatten_chr() %>% + as.numeric() + + ## Compute the condition number + eigen_values %>% + {max(.)/min(.)} %>% + round(digits = rounding) %>% + as.character() + } + )) %>% + dplyr::ungroup() %>% + mutate(subprob = 0, label = 'condn') %>% + dplyr::select(dplyr::one_of('problem', 'subprob', 'label', 'value')) %>% + dplyr::filter(!is.na(.$value)) } } diff --git a/tests/testthat/test-model-summary.R b/tests/testthat/test-model-summary.R index 86c86a29..46d3d508 100755 --- a/tests/testthat/test-model-summary.R +++ b/tests/testthat/test-model-summary.R @@ -2,6 +2,37 @@ software <- 'nonmem' model <- xpdb_ex_pk$code model2 <- model[0, ] + +## Special model for testing conditional number +model3 <- structure(list( + problem = c(3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0), + level = c(34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 36, 37, 37), + subroutine = c("lst", "lst", "lst", "lst", "lst", "lst", "lst", "lst", "lst", "lst", "lst", "lst", "lst", "lst", "lst", "lst", "lst", "lst", "lst", "lst", "lst", "lst", "lst", "lst", "lst", "lst", "lst", "lst", "lst", "lst", "lst", "oth", "oth", "oth"), + code = c("1", " ************************************************************************************************************************", + " ******************** ********************", " ******************** FIRST ORDER CONDITIONAL ESTIMATION WITH INTERACTION ********************", + " ******************** EIGENVALUES OF COR MATRIX OF ESTIMATE ********************", + " ******************** ********************", " ************************************************************************************************************************", + " 1 2 3 4 5 6 7 8 9 10 11 12", " 13 14 15 16 17 18 19 20 21 22 23 24", + " 25", " 8.65E-02 1.33E-01 1.92E-01 2.31E-01 2.72E-01 3.00E-01 3.39E-01 4.20E-01 5.69E-01 5.81E-01 6.53E-01 7.52E-01", + " 7.61E-01 9.33E-01 9.54E-01 9.99E-01 1.12E+00 1.18E+00 1.28E+00 1.48E+00 1.61E+00 1.72E+00 2.19E+00 2.57E+00", + " 3.68E+00", "1", " ************************************************************************************************************************", + " ******************** ********************", " ******************** FIRST ORDER CONDITIONAL ESTIMATION WITH INTERACTION ********************", + " ******************** EIGENVALUES OF COR MATRIX OF ESTIMATE ********************", + " ******************** ********************", " ************************************************************************************************************************", + " 1 2 3 4 5 6 7 8 9 10 11 12", " 13 14 15 16 17 18 19 20 21", + " 7.61E-03 4.44E-02 4.82E-02 6.23E-02 1.12E-01 2.08E-01 2.69E-01 3.57E-01 3.93E-01 5.83E-01 5.96E-01 8.08E-01", + " 9.21E-01 9.59E-01 1.04E+00 1.32E+00 1.50E+00 2.20E+00 2.52E+00 3.08E+00 3.97E+00", + " 1 2 3 4 5 6 7 8 9 10 11 12", " 13 14 15 16 17 18 19 20 21 22 23 24", + " 25", " 8.65E-02 1.33E-01 1.92E-01 2.31E-01 2.72E-01 3.00E-01 3.39E-01 4.20E-01 5.69E-01 5.81E-01 6.53E-01 7.52E-01", + " 7.61E-05 9.33E-01 9.54E-01 9.99E-01 1.12E+00 1.18E+00 1.28E+00 1.48E+00 1.61E+00 1.72E+00 2.19E+00 2.57E+00", + " 3.68E+10", "1", " #CPUT: Total CPU Time in Seconds, 6.040", + "Stop Time:", "Mon Oct 16 13:34:35 CEST 2017"), + comment = c("", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "Add non-consecutive values in eigen value format to test parser", "", "", "", "", "", "", "", "", "") +), row.names = c(NA, -34L), class = c("nm_model", "tbl_df", "tbl", "data.frame"), file = "run001.lst", dir = "data", software = "nonmem") + +model3 <- dplyr::bind_rows(slice(xpdb_ex_pk$code, 1:(nrow(xpdb_ex_pk$code) - 3)), model3) + + file <- 'pk/model/run001.lst' rounding <- 2 @@ -18,76 +49,128 @@ eta_shk_string <- ifelse(getRversion() >= "4.0.0", test_that('summary is properly created with the appropriate information', { expect_equal(sum_out(sum_software(software)), c('software', 'nonmem')) + expect_equal(sum_out(sum_version(model, software)), c('version', '7.3.0')) + expect_equal(sum_out(sum_file(file)), c('file', 'run001.lst')) + expect_equal(sum_out(sum_run(file)), c('run', 'run001')) + expect_equal(sum_out(sum_directory(file)), c('dir', 'pk/model')) + expect_equal(sum_out(sum_reference(model, software)), c('ref', '000')) + expect_equal(sum_out(sum_probn(model, software), 1), c('probn', '1')) expect_equal(sum_out(sum_probn(model, software), 2), c('probn', '2')) + expect_equal(sum_out(sum_timestart(model, software)), c('timestart', 'Mon Oct 16 13:34:28 CEST 2017')) + expect_equal(sum_out(sum_timestop(model, software)), c('timestop', 'Mon Oct 16 13:34:35 CEST 2017')) + expect_equal(sum_out(sum_description(model, software)), c('descr', 'NONMEM PK example for xpose')) + expect_equal(sum_out(sum_label(model, software), 1), c('label', 'Parameter estimation')) expect_equal(sum_out(sum_label(model, software), 2), c('label', 'Model simulations')) + expect_equal(sum_out(sum_input_data(model, software), 1), c('data', '../../mx19_2.csv')) expect_equal(sum_out(sum_input_data(model, software), 2), c('data', '../../mx19_2.csv')) + expect_equal(sum_out(sum_nobs(model, software), 1), c('nobs', '476')) expect_equal(sum_out(sum_nobs(model, software), 2), c('nobs', '476')) + expect_equal(sum_out(sum_nind(model, software), 1), c('nind', '74')) expect_equal(sum_out(sum_nind(model, software), 2), c('nind', '74')) + expect_equal(sum_out(sum_nsim(model, software)), c('nsim', '20')) + expect_equal(sum_out(sum_simseed(model, software)), c('simseed', '221287')) + expect_equal(sum_out(sum_subroutine(model, software)), c('subroutine', '2')) + expect_equal(sum_out(sum_runtime(model, software)), c('runtime', '00:00:02')) + expect_equal(sum_out(sum_covtime(model, software)), c('covtime', '00:00:03')) + expect_equal(sum_out(sum_term(model, software)), c('term', 'MINIMIZATION SUCCESSFUL')) + expect_equal(sum_out(sum_warnings(model, software), 1), c('warnings', '(WARNING 2) NM-TRAN INFERS THAT THE DATA ARE POPULATION.')) expect_equal(sum_out(sum_warnings(model, software), 2), c('warnings', '(WARNING 2) NM-TRAN INFERS THAT THE DATA ARE POPULATION.\n(WARNING 22) WITH $MSFI AND \"SUBPROBS\", \"TRUE=FINAL\" ...')) + #expect_equal(sum_out(sum_errors(model, software)), c('errors', 'na')) + expect_equal(sum_out(sum_nsig(model, software)), c('nsig', '3.3')) - expect_equal(sum_out(sum_condn(model, software, rounding)), c('condn', '21.5')) + + expect_equal(sum_out(sum_condn(model3, software, rounding), 1), c('condn', '21.5')) + expect_equal(sum_out(sum_condn(model3, software, rounding), 2), c('condn', '42.54')) + expect_equal(sum_out(sum_condn(model3, software, rounding), 3), c('condn', '521.68')) + #expect_equal(sum_out(sum_nesample(model, software)), c('nesample', 'na')) + #expect_equal(sum_out(sum_esampleseed(model, software)), c('esampleseed', 'na')) + expect_equal(sum_out(sum_ofv(model, software)), c('ofv', '-1403.905')) + expect_equal(sum_out(sum_method(model, software), 1), c('method', 'foce-i')) expect_equal(sum_out(sum_method(model = dplyr::tibble(problem = 1L, level = 1L, subroutine = 'est', code = 'METH=0', comment = ''), software), 1), c('method', 'fo')) expect_equal(sum_out(sum_method(model, software), 2), c('method', 'sim')) - expect_equal(sum_out(sum_shk(model, software, 'eps', rounding)), c('epsshk', '14.86 [1]')) - skip_on_cran() # Let's wait and see how R 4.0.0 rounding behaves once it's released + expect_equal(sum_out(sum_shk(model, software, 'eps', rounding)), c('epsshk', '14.86 [1]')) expect_equal(sum_out(sum_shk(model, software, 'eta', rounding)), c('etashk', eta_shk_string)) }) test_that('summary default summary is returned for missing information', { expect_equal(sum_out(sum_version(model2, software)), c('version', 'na')) + expect_equal(sum_out(sum_reference(model2, software)), c('ref', 'na')) + expect_equal(sum_out(sum_probn(model2, software)), c('probn', 'na')) + expect_equal(sum_out(sum_timestart(model2, software)), c('timestart', 'na')) + expect_equal(sum_out(sum_timestop(model2, software)), c('timestop', 'na')) + expect_equal(sum_out(sum_description(model2, software)), c('descr', 'na')) + expect_equal(sum_out(sum_label(model2, software)), c('label', 'na')) + expect_equal(sum_out(sum_input_data(model2, software)), c('data', 'na')) + expect_equal(sum_out(sum_nobs(model2, software)), c('nobs', 'na')) + expect_equal(sum_out(sum_nind(model2, software)), c('nind', 'na')) + expect_equal(sum_out(sum_nsim(model2, software)), c('nsim', 'na')) + expect_equal(sum_out(sum_simseed(model2, software)), c('simseed', 'na')) + expect_equal(sum_out(sum_subroutine(model2, software)), c('subroutine', 'na')) + expect_equal(sum_out(sum_runtime(model2, software)), c('runtime', 'na')) + expect_equal(sum_out(sum_covtime(model2, software)), c('covtime', 'na')) expect_equal(sum_out(sum_covtime(model = dplyr::tibble(problem = 1L, level = 1L, subroutine = 'lst', code = 'Elapsed covariance time in seconds: ********', comment = ''), software), 1), c('covtime', 'na')) + expect_equal(sum_out(sum_term(model2, software)), c('term', 'na')) + expect_equal(sum_out(sum_warnings(model2, software)), c('warnings', 'na')) + expect_equal(sum_out(sum_errors(model2, software)), c('errors', 'na')) + expect_equal(sum_out(sum_nsig(model2, software)), c('nsig', 'na')) + expect_equal(sum_out(sum_condn(model2, software, rounding)), c('condn', 'na')) + expect_equal(sum_out(sum_nesample(model2, software)), c('nesample', 'na')) + expect_equal(sum_out(sum_esampleseed(model2, software)), c('esampleseed', 'na')) + expect_equal(sum_out(sum_ofv(model2, software)), c('ofv', 'na')) + expect_equal(sum_out(sum_method(model2, software)), c('method', 'na')) + expect_equal(sum_out(sum_shk(model2, software, 'eps', rounding)), c('epsshk', 'na')) expect_equal(sum_out(sum_shk(model2, software, 'eta', rounding)), c('etashk', 'na')) }) @@ -118,3 +201,4 @@ test_that("Termination messages are parsed when minimization is terminated",{ model <- tibble::tibble(problem = 1, level = 60, subroutine = 'lst', code = unlist(stringr::str_split(relevant_lst_part, "\\n")), comment = "") expect_equal(sum_term(model, "nonmem"), expected_result) }) +