diff --git a/_articles/RJ-2023-026/RJ-2023-026.R b/_articles/RJ-2023-026/RJ-2023-026.R index 29905a6a9..e24e1ca6d 100644 --- a/_articles/RJ-2023-026/RJ-2023-026.R +++ b/_articles/RJ-2023-026/RJ-2023-026.R @@ -2,12 +2,12 @@ # Please edit RJ-2023-026.Rmd to modify this file ## ----setup, include=FALSE----------------------------------------------------- -knitr::opts_chunk$set(echo = FALSE, warning = FALSE, message = FALSE) +knitr::opts_chunk$set(echo = FALSE, warning = FALSE, message = FALSE, out.width="100%") library(ggplot2) #library(kableExtra) -## ----Fig0, fig.height = 12, fig.width=8, fig.cap = "The visual predictive check plot. The solid red line represents the $50^{th}$ percentile of the observed data, and dashed red lines represent the $10^{th}$ and $90^{th}$ percentiles of the observed data. The solid blue line represents the $50^{th}$ percentile of the simularted data, and dashed blue lines represent the $10^{th}$ and $90^{th}$ percentiles of the simulated data. Light blue and pink areas represent the 95% confidence areas of the $10^{th}$, $50^{th}$ and $90^{th}$ percentile lines."---- +## ----Fig0, fig.height = 12, fig.width=8, fig.cap = "The visual predictive check plot. The solid red line represents the $50^{th}$ percentile of the observed data, and dashed red lines represent the $10^{th}$ and $90^{th}$ percentiles of the observed data. The solid blue line represents the $50^{th}$ percentile of the simularted data, and dashed blue lines represent the $10^{th}$ and $90^{th}$ percentiles of the simulated data. Light blue and pink areas represent the 95\\% confidence areas of the $10^{th}$, $50^{th}$ and $90^{th}$ percentile lines."---- library(nlmeVPC) library(ggplot2) library(gridExtra) @@ -23,15 +23,15 @@ C=VPCgraph(origdata,simdata,N_xbin=8,type="CI")+ grid.arrange(A,B,C,nrow=3) -## ----Fig1, fig.height = 4, fig.width=8, fig.cap = "The additive equantile VPC plot. Dots indicate the observed data. The solid and dashed blue lines represent the $10^{th}$, $50^{th}$, and $90^{th}$ percentiles of the simulated data. The solid red line represents the $50^{th}$ percentile line. Light blue and pink areas represent the 95% confidence areas of the $10^{th}$, $50^{th}$ and $90^{th}$ percentile lines."---- +## ----Fig1, fig.height = 4, fig.width=8, fig.cap = "The additive equantile VPC plot. Dots indicate the observed data. The solid and dashed blue lines represent the $10^{th}$, $50^{th}$, and $90^{th}$ percentiles of the simulated data. The solid red line represents the $50^{th}$ percentile line. Light blue and pink areas represent the 95\\% confidence areas of the $10^{th}$, $50^{th}$ and $90^{th}$ percentile lines."---- aqrVPC(origdata,simdata) +labs(caption="") -## ----Fig2, fig.height = 4, fig.width=8, fig.cap = "The bootstrap VPC plot. Dots indicate the observed data. The solid and dashed blue lines represent the $10^{th}$, $50^{th}$, and $90^{th}$ percentiles of the simulated data. The solid red line represents the $50^{th}$ percentile line, and the pink areas represent the 95% confidence areas of the $50^{th}$ percentile line, calculated from the bootstrap samples of the observed data."---- +## ----Fig2, fig.height = 4, fig.width=8, fig.cap = "The bootstrap VPC plot. Dots indicate the observed data. The solid and dashed blue lines represent the $10^{th}$, $50^{th}$, and $90^{th}$ percentiles of the simulated data. The solid red line represents the $50^{th}$ percentile line, and the pink areas represent the 95\\% confidence areas of the $50^{th}$ percentile line, calculated from the bootstrap samples of the observed data."---- bootVPC(origdata,simdata,N_xbin=8) -## ----Fig3, fig.height = 8, fig.width=8,fig.cap = "The average shifted VPC plot. Dots indicate the observed data. The solid line represents the 50th quantiles of the observed data, and dashed lines represent the $10^{th}$ and $90^{th}$ percentiles of the observed data. Light blue and pink areas represent the 95% confidence areas of the $10^{th}$, $50^{th}$, and $90^{th}$ percentiles."---- +## ----Fig3, fig.height = 8, fig.width=8,fig.cap = "The average shifted VPC plot. Dots indicate the observed data. The solid line represents the 50th quantiles of the observed data, and dashed lines represent the $10^{th}$ and $90^{th}$ percentiles of the observed data. Light blue and pink areas represent the 95\\% confidence areas of the $10^{th}$, $50^{th}$, and $90^{th}$ percentiles."---- A=asVPC(origdata,simdata,type="CI",N_xbin=8,N_hist=3,weight_method="bin") +labs(caption="") B=asVPC(origdata,simdata,type="CI",N_xbin=8,N_hist=3,weight_method="distance")+labs(caption="") grid.arrange(A,B,nrow=2) @@ -41,7 +41,7 @@ grid.arrange(A,B,nrow=2) NumericalCheck(origdata,simdata,pred.level=c(0,0.2,0.4,0.6,0.8,0.9),N_xbin=8)$NPC -## ----Fig4, fig.height = 10, fig.width=6, fig.cap = "The coverage plot and the coverage detailed plot for the 80% prediction interval. In the coverage plot, the X-axis is the level of the prediction interval. The Y-axis is the ratio between the number of observed data and the number of expected data of the lower and upper parts in each level of the prediction interval. The white line is the reference line, and the gray area represents the confidence area of the ratios. If the solid lines are near the white line, we can conclude that the suggested model is suitable. In the coverage detailed plot, the white dots represent the expected percentages of lower and upper prediction intervals of, 10%, and 90%, respectively. The upper and lower percentages of observation in each time bin are darker gray."---- +## ----Fig4, out.width="90%", fig.height = 10, fig.width=6, fig.cap = "The coverage plot and the coverage detailed plot for the 80\\% prediction interval. In the coverage plot, the X-axis is the level of the prediction interval. The Y-axis is the ratio between the number of observed data and the number of expected data of the lower and upper parts in each level of the prediction interval. The white line is the reference line, and the gray area represents the confidence area of the ratios. If the solid lines are near the white line, we can conclude that the suggested model is suitable. In the coverage detailed plot, the white dots represent the expected percentages of lower and upper prediction intervals of, 10\\%, and 90\\%, respectively. The upper and lower percentages of observation in each time bin are darker gray."---- A=coverageplot(origdata,simdata,N_xbin=8) +ggtitle("(A) Coverage Plot") B=coverageDetailplot(origdata,simdata,N_xbin=8,predL=0.8) + @@ -256,13 +256,13 @@ B=coverageplot(origdata,simdata.F,conf.level=0.9,N_xbin=8)+labs(title="Model 2") grid.arrange(A,B,ncol=2) -## ----M16,fig.height = 3, fig.width=7.5,fig.cap = "The coverage detailed plots for Model 1 and Model 2 when PI=50%."---- +## ----M16,fig.height = 3, fig.width=7.5,fig.cap = "The coverage detailed plots for Model 1 and Model 2 when PI=50\\%."---- A=coverageDetailplot(origdata,simdata.T,predL=0.5,N_xbin=8)+labs(title="Model 1") B=coverageDetailplot(origdata,simdata.F,predL=0.5,N_xbin=8)+labs(title="Model 2") grid.arrange(A,B,ncol=2) -## ----M17, fig.height = 3, fig.width=7.5,fig.cap = "The coverage detailed plots for Model 1 and Model 2 when PI=80%."---- +## ----M17, fig.height = 3, fig.width=7.5,fig.cap = "The coverage detailed plots for Model 1 and Model 2 when PI=80\\%."---- A=coverageDetailplot(origdata,simdata.T,predL=0.8,N_xbin=8)+labs(title="Model 1") B=coverageDetailplot(origdata,simdata.F,predL=0.8,N_xbin=8)+labs(title="Model 2") grid.arrange(A,B,ncol=2) diff --git a/_articles/RJ-2023-026/RJ-2023-026.Rmd b/_articles/RJ-2023-026/RJ-2023-026.Rmd index 4fe0c65f6..adad4b536 100644 --- a/_articles/RJ-2023-026/RJ-2023-026.Rmd +++ b/_articles/RJ-2023-026/RJ-2023-026.Rmd @@ -29,7 +29,7 @@ author: orcid: 0000-0003-0817-5000 type: package output: - rjtools::rjournal_article: + rjtools::rjournal_web_article: self_contained: yes toc: no bibliography: EunKyung_Lee.bib @@ -44,9 +44,8 @@ journal: --- - ```{r setup, include=FALSE} -knitr::opts_chunk$set(echo = FALSE, warning = FALSE, message = FALSE) +knitr::opts_chunk$set(echo = FALSE, warning = FALSE, message = FALSE, out.width="100%") library(ggplot2) #library(kableExtra) ``` @@ -97,7 +96,7 @@ As the number of bins decreases, the lines become smoother and more regular, how `VPCgraph` provides the automatic binning with `optK` and `makeCOVbin`; here, `optK` finds the optimal number of bins, and `makeCOVbin` finds the optimal cutoffs of bins using Lavielle and Bleakley's method. -```{r Fig0, fig.height = 12, fig.width=8, fig.cap = "The visual predictive check plot. The solid red line represents the $50^{th}$ percentile of the observed data, and dashed red lines represent the $10^{th}$ and $90^{th}$ percentiles of the observed data. The solid blue line represents the $50^{th}$ percentile of the simularted data, and dashed blue lines represent the $10^{th}$ and $90^{th}$ percentiles of the simulated data. Light blue and pink areas represent the 95% confidence areas of the $10^{th}$, $50^{th}$ and $90^{th}$ percentile lines."} +```{r Fig0, fig.height = 12, fig.width=8, fig.cap = "The visual predictive check plot. The solid red line represents the $50^{th}$ percentile of the observed data, and dashed red lines represent the $10^{th}$ and $90^{th}$ percentiles of the observed data. The solid blue line represents the $50^{th}$ percentile of the simularted data, and dashed blue lines represent the $10^{th}$ and $90^{th}$ percentiles of the simulated data. Light blue and pink areas represent the 95\\% confidence areas of the $10^{th}$, $50^{th}$ and $90^{th}$ percentile lines."} library(nlmeVPC) library(ggplot2) library(gridExtra) @@ -119,7 +118,7 @@ To overcome the difficulties of making bins as well as determining the number of @jamsen2018regression used additive quantile regression to calculate the quantiles of the observed and simulated data. This regression method makes it possible to estimate quantiles without discrete binning, which is especially useful when the data are insufficient, irregular, or inappropriate to configure the bins. To fit the additive quantile regression, we used the `rqss` function in the \CRANpkg{quantreg} [@quantreg] package and developed the `aqrVPC` function to draw the VPC type plot with additive quantile regression. Figure \@ref(fig:Fig1) shows the additive quantile regression VPC plot. The solid and dashed lines represent the $10^{th}$, $50^{th}$, and $90^{th}$ additive quantile regression lines of the observed data, and the pink and light blue areas represent the confidence areas of the additive quantile regression lines of the simulated data. Lines and areas in the additive quantile regression VPC plot are much smoother than those in the original VPC plot. -```{r Fig1, fig.height = 4, fig.width=8, fig.cap = "The additive equantile VPC plot. Dots indicate the observed data. The solid and dashed blue lines represent the $10^{th}$, $50^{th}$, and $90^{th}$ percentiles of the simulated data. The solid red line represents the $50^{th}$ percentile line. Light blue and pink areas represent the 95% confidence areas of the $10^{th}$, $50^{th}$ and $90^{th}$ percentile lines."} +```{r Fig1, fig.height = 4, fig.width=8, fig.cap = "The additive equantile VPC plot. Dots indicate the observed data. The solid and dashed blue lines represent the $10^{th}$, $50^{th}$, and $90^{th}$ percentiles of the simulated data. The solid red line represents the $50^{th}$ percentile line. Light blue and pink areas represent the 95\\% confidence areas of the $10^{th}$, $50^{th}$ and $90^{th}$ percentile lines."} aqrVPC(origdata,simdata) +labs(caption="") ``` @@ -131,7 +130,7 @@ This plot reflects the uncertainty of the observed data and allows for more obje Figure \@ref(fig:Fig2) shows the bootstrap VPC plot using `bootVPC`. The solid and dashed blue lines represent the $10^{th}$, $50^{th}$, and $90^{th}$ percentiles of the simulated data. The solid red line represents the $50^{th}$ percentile line, and the pink areas represent the 95$\%$ confidence areas of the $50^{th}$ percentile line, calculated from the bootstrap samples of the observed data. If the solid blue line and the solid red line are similar, the solid blue line is in the pink area, and the pink area is located between two dashed blue lines, then this is evidence that the fitted model fit the observed data well. -```{r Fig2, fig.height = 4, fig.width=8, fig.cap = "The bootstrap VPC plot. Dots indicate the observed data. The solid and dashed blue lines represent the $10^{th}$, $50^{th}$, and $90^{th}$ percentiles of the simulated data. The solid red line represents the $50^{th}$ percentile line, and the pink areas represent the 95% confidence areas of the $50^{th}$ percentile line, calculated from the bootstrap samples of the observed data."} +```{r Fig2, fig.height = 4, fig.width=8, fig.cap = "The bootstrap VPC plot. Dots indicate the observed data. The solid and dashed blue lines represent the $10^{th}$, $50^{th}$, and $90^{th}$ percentiles of the simulated data. The solid red line represents the $50^{th}$ percentile line, and the pink areas represent the 95\\% confidence areas of the $50^{th}$ percentile line, calculated from the bootstrap samples of the observed data."} bootVPC(origdata,simdata,N_xbin=8) ``` @@ -180,7 +179,7 @@ In the asVPC plot, the observations in each bin are combined using weights. Typi Figure \@ref(fig:Fig3) shows the results from the `asVPC` function using bin-related weights and distance-related weights. The solid and dashed lines represent the average shifted quantile lines of the observed data, and the pink and light blue areas represent the confidence areas of the simulated data. The lines in the asVPC plot are smoother than those in the original VPC plot, and the confidence areas in the asVPC plot are thinner than those in the original VPC plot. -```{r Fig3, fig.height = 8, fig.width=8,fig.cap = "The average shifted VPC plot. Dots indicate the observed data. The solid line represents the 50th quantiles of the observed data, and dashed lines represent the $10^{th}$ and $90^{th}$ percentiles of the observed data. Light blue and pink areas represent the 95% confidence areas of the $10^{th}$, $50^{th}$, and $90^{th}$ percentiles."} +```{r Fig3, fig.height = 8, fig.width=8,fig.cap = "The average shifted VPC plot. Dots indicate the observed data. The solid line represents the 50th quantiles of the observed data, and dashed lines represent the $10^{th}$ and $90^{th}$ percentiles of the observed data. Light blue and pink areas represent the 95\\% confidence areas of the $10^{th}$, $50^{th}$, and $90^{th}$ percentiles."} A=asVPC(origdata,simdata,type="CI",N_xbin=8,N_hist=3,weight_method="bin") +labs(caption="") B=asVPC(origdata,simdata,type="CI",N_xbin=8,N_hist=3,weight_method="distance")+labs(caption="") grid.arrange(A,B,nrow=2) @@ -228,7 +227,7 @@ Unlike the VPC plot, which represents the data space, the information in the obs Figure \@ref(fig:Fig4)(B) is the result of `coverageDetailplot` when the prediction level is 80$\%$. The white dots represent the expected percentages of the lower and upper the prediction intervals, 10$\%$, and 90$\%$, respectively. The upper and lower percentages of observation in each time bin are shown in darker gray. The left bin(before 0.045 hours) shows all light gray in the coverage detailed plot, and it is quite different patterns from the expected one. However, it is mainly due to the characteristics of this example data. All observations in this bin are 0. It makes the lower and upper bound of the prediction interval all 0, and the lower and upper percentages become 0. -```{r Fig4, fig.height = 10, fig.width=6, fig.cap = "The coverage plot and the coverage detailed plot for the 80% prediction interval. In the coverage plot, the X-axis is the level of the prediction interval. The Y-axis is the ratio between the number of observed data and the number of expected data of the lower and upper parts in each level of the prediction interval. The white line is the reference line, and the gray area represents the confidence area of the ratios. If the solid lines are near the white line, we can conclude that the suggested model is suitable. In the coverage detailed plot, the white dots represent the expected percentages of lower and upper prediction intervals of, 10%, and 90%, respectively. The upper and lower percentages of observation in each time bin are darker gray."} +```{r Fig4, out.width="90%", fig.height = 10, fig.width=6, fig.cap = "The coverage plot and the coverage detailed plot for the 80\\% prediction interval. In the coverage plot, the X-axis is the level of the prediction interval. The Y-axis is the ratio between the number of observed data and the number of expected data of the lower and upper parts in each level of the prediction interval. The white line is the reference line, and the gray area represents the confidence area of the ratios. If the solid lines are near the white line, we can conclude that the suggested model is suitable. In the coverage detailed plot, the white dots represent the expected percentages of lower and upper prediction intervals of, 10\\%, and 90\\%, respectively. The upper and lower percentages of observation in each time bin are darker gray."} A=coverageplot(origdata,simdata,N_xbin=8) +ggtitle("(A) Coverage Plot") B=coverageDetailplot(origdata,simdata,N_xbin=8,predL=0.8) + @@ -503,13 +502,13 @@ Figure \@ref(fig:M15) shows the `coverageplot` results for Model 1 and Model 2. The upper and lower percentages in both figures are close to the white points in Model 1. On the other hand, the upper percentages of the most time bins are far from the white points in Model 2, especially the time bin (3.54,5.28] when PI = 50$\%$. When PI = 80$\%$, most upper and lower percentages are far from the white points. -```{r M16,fig.height = 3, fig.width=7.5,fig.cap = "The coverage detailed plots for Model 1 and Model 2 when PI=50%."} +```{r M16,fig.height = 3, fig.width=7.5,fig.cap = "The coverage detailed plots for Model 1 and Model 2 when PI=50\\%."} A=coverageDetailplot(origdata,simdata.T,predL=0.5,N_xbin=8)+labs(title="Model 1") B=coverageDetailplot(origdata,simdata.F,predL=0.5,N_xbin=8)+labs(title="Model 2") grid.arrange(A,B,ncol=2) ``` -```{r M17, fig.height = 3, fig.width=7.5,fig.cap = "The coverage detailed plots for Model 1 and Model 2 when PI=80%."} +```{r M17, fig.height = 3, fig.width=7.5,fig.cap = "The coverage detailed plots for Model 1 and Model 2 when PI=80\\%."} A=coverageDetailplot(origdata,simdata.T,predL=0.8,N_xbin=8)+labs(title="Model 1") B=coverageDetailplot(origdata,simdata.F,predL=0.8,N_xbin=8)+labs(title="Model 2") grid.arrange(A,B,ncol=2) diff --git a/_articles/RJ-2023-026/RJ-2023-026.html b/_articles/RJ-2023-026/RJ-2023-026.html index 03fe18a6d..6fdbd4ba2 100644 --- a/_articles/RJ-2023-026/RJ-2023-026.html +++ b/_articles/RJ-2023-026/RJ-2023-026.html @@ -1,19 +1,19 @@ - +
- - - + + + + /* Hide doc at startup (prevent jankiness while JS renders/transforms) */ + body { + visibility: hidden; + } + @@ -35,149 +35,150 @@ pre > code.sourceCode > span { text-indent: -5em; padding-left: 5em; } } pre.numberSource code -{ counter-reset: source-line 0; } + { counter-reset: source-line 0; } pre.numberSource code > span -{ position: relative; left: -4em; counter-increment: source-line; } + { position: relative; left: -4em; counter-increment: source-line; } pre.numberSource code > span > a:first-child::before -{ content: counter(source-line); -position: relative; left: -1em; text-align: right; vertical-align: baseline; -border: none; display: inline-block; --webkit-touch-callout: none; -webkit-user-select: none; --khtml-user-select: none; -moz-user-select: none; --ms-user-select: none; user-select: none; -padding: 0 4px; width: 4em; -color: #aaaaaa; -} -pre.numberSource { margin-left: 3em; border-left: 1px solid #aaaaaa; padding-left: 4px; } + { content: counter(source-line); + position: relative; left: -1em; text-align: right; vertical-align: baseline; + border: none; display: inline-block; + -webkit-touch-callout: none; -webkit-user-select: none; + -khtml-user-select: none; -moz-user-select: none; + -ms-user-select: none; user-select: none; + padding: 0 4px; width: 4em; + color: #aaaaaa; + } +pre.numberSource { margin-left: 3em; border-left: 1px solid #aaaaaa; padding-left: 4px; } div.sourceCode -{ color: #00769e; background-color: #f1f3f5; } + { color: #00769e; background-color: #f1f3f5; } @media screen { pre > code.sourceCode > span > a:first-child::before { text-decoration: underline; } } -code span { color: #00769e; } -code span.al { color: #ad0000; } -code span.an { color: #5e5e5e; } -code span.at { color: #657422; } -code span.bn { color: #ad0000; } -code span.bu { } -code span.cf { color: #00769e; } -code span.ch { color: #20794d; } -code span.cn { color: #8f5902; } -code span.co { color: #5e5e5e; } -code span.cv { color: #5e5e5e; font-style: italic; } -code span.do { color: #5e5e5e; font-style: italic; } -code span.dt { color: #ad0000; } -code span.dv { color: #ad0000; } -code span.er { color: #ad0000; } -code span.ex { } -code span.fl { color: #ad0000; } -code span.fu { color: #4758ab; } -code span.im { } -code span.in { color: #5e5e5e; } -code span.kw { color: #00769e; } -code span.op { color: #5e5e5e; } -code span.ot { color: #00769e; } -code span.pp { color: #ad0000; } -code span.sc { color: #5e5e5e; } -code span.ss { color: #20794d; } -code span.st { color: #20794d; } -code span.va { color: #111111; } -code span.vs { color: #20794d; } -code span.wa { color: #5e5e5e; font-style: italic; } +code span { color: #00769e; } /* Normal */ +code span.al { color: #ad0000; } /* Alert */ +code span.an { color: #5e5e5e; } /* Annotation */ +code span.at { color: #657422; } /* Attribute */ +code span.bn { color: #ad0000; } /* BaseN */ +code span.bu { } /* BuiltIn */ +code span.cf { color: #00769e; } /* ControlFlow */ +code span.ch { color: #20794d; } /* Char */ +code span.cn { color: #8f5902; } /* Constant */ +code span.co { color: #5e5e5e; } /* Comment */ +code span.cv { color: #5e5e5e; font-style: italic; } /* CommentVar */ +code span.do { color: #5e5e5e; font-style: italic; } /* Documentation */ +code span.dt { color: #ad0000; } /* DataType */ +code span.dv { color: #ad0000; } /* DecVal */ +code span.er { color: #ad0000; } /* Error */ +code span.ex { } /* Extension */ +code span.fl { color: #ad0000; } /* Float */ +code span.fu { color: #4758ab; } /* Function */ +code span.im { } /* Import */ +code span.in { color: #5e5e5e; } /* Information */ +code span.kw { color: #00769e; } /* Keyword */ +code span.op { color: #5e5e5e; } /* Operator */ +code span.ot { color: #00769e; } /* Other */ +code span.pp { color: #ad0000; } /* Preprocessor */ +code span.sc { color: #5e5e5e; } /* SpecialChar */ +code span.ss { color: #20794d; } /* SpecialString */ +code span.st { color: #20794d; } /* String */ +code span.va { color: #111111; } /* Variable */ +code span.vs { color: #20794d; } /* VerbatimString */ +code span.wa { color: #5e5e5e; font-style: italic; } /* Warning */A nonlinear mixed effects model is useful when the data are repeatedly measured within the same unit or correlated between units. Such models are widely used in medicine, disease mechanics, pharmacology, ecology, social science, psychology, etc. After fitting the nonlinear mixed effect model, model diagnostics are essential for verifying that the results are reliable. The visual predictive check (VPC) has recently been highlighted as a visual diagnostic tool for pharmacometric models. This method can also be applied to general nonlinear mixed effects models. However, functions for VPCs in existing R packages are specialized for pharmacometric model diagnosis, and are not suitable for general nonlinear mixed effect models. In this paper, we propose nlmeVPC, an R package for the visual diagnosis of various nonlinear mixed effect models. The nlmeVPC package allows for more diverse model diagnostics, including visual diagnostic tools that extend the concept of VPCs along with the capabilities of existing R packages.
@@ -2552,17 +1782,17 @@After fitting a model, diagnosing the fitted model is essential for verifying that the results are reliable (Nguyen et al. 2017). For linear models, the residuals are usually used to determine the goodness of fit of the fitted model. However, due to random effects and nonlinearity, residuals are less useful for diagnosing fit in nonlinear mixed effects models. Therefore, various diagnostic tools for this type of model have been developed. The nonlinear mixed effect model is useful when the data are repeatedly measured within the same unit, or the relationship between the dependent and independent variables is nonlinear. It is widely used in various fields, including medicine, disease mechanics, pharmacology, ecology, pharmacometrics, social science, and psychology (Pinheiro and Bates 2000; Davidian 2017). Recently, among the various diagnostic tools applicable to nonlinear mixed models, simulation-based diagnostic methods have been developed in the field of pharmacology (Karlsson and Savic 2007). The visual predictive check (VPC; Karlsson and Holford (2008)) is a critical diagnostic tool that visually tests for the adequacy of the fitted model. It allows for the diagnosis of fixed and random effects in mixed models (Karlsson and Holford 2008) in the original data space. Currently, it is a widely used method for diagnosing population pharmacometric models: Heus et al. (2022) used the VPC method to evaluate their population pharmacokinetic models of vancomycin, Mudde et al. (2022) checked their final population PK model for each regimen of antituberculosis drug using the VPC method, and Otto et al. (2021) compares the predictive performance of parent-metabolite models of (S)-ketamine with the VPC method. This VPC method can be used for general nonlinear mixed effect models, including hierarchical models.
The psn (Lindbom et al. 2004) and xpose4 (Keizer et al. 2013) packages provide various diagnostic methods for pharmacometric models in R (R Core Team 2023), including the VPC plot. These packages were developed only for the pharmacometricians, and the users needed to use the NONMEM software (Bauer 2011) for generating inputs of functions in these two packages. However, NONMEM is licensed software, and it is mainly designed towards the analysis of population pharmacometric models. Therefore, it is not easy for nonpharmacometrician to use NONMEM and to draw the VPC plot through psn and xpose4. Recently, the vpc (Keizer 2021) package and nlmixr2 (Fidler et al. 2019) package have been developed to draw VPC plots in R without results from NONMEM. However, vpc package provides the function for drawing the original VPC plot only. nlmixr2 was developed initially for fitting general dynamic models. To check the fitted model, nlmixr2 provides a function to use graphical diagnostics in xpose4 with the nlmixr2
model object. Also, nlmixr2 uses the VPC plot in the vpc package with the nlmixr2
model object. Therefore, both packages only provide a function to draw the basic VPC plot, and the other newly developed simulation-based methods, including extensions of VPC, are not provided.
We have developed a new R package, nlmeVPC, to provide a suite for various visual checking methods for the nonlinear mixed models. This package includes various state-of-the-art model diagnostic methods based on the visual comparison between the original and simulated data. Most methods compare the statistics calculated from the observed data to the statistics from the simulated data. Percentiles, for example, the \(10^{th}\), \(50^{th}\), and \(90^{th}\) percentiles, are widely used to summarize relevant statistics from the observed and simulated data. We compare the similarities between the statistics from the observed data and those from the simulated data in two different spaces: the data space and the model space (Wickham et al. 2015). The original data comprise the data space. Usually, the data space is represented by the independent and dependent variables, such as time and blood concentration, in the pharmacokinetic data. On the other hand, the model space is composed of quantities obtained from the fitted model, for example, residuals and summary values from the fitted model. From this viewpoint, we categorize the well-known visual diagnostic tools into two categories. One method compares the observed and simulated data in the original data space, and the other in the model space. In the method with the data space, we developed functions for the original VPC (VPCgraph
), the additive quantile regression VPC (aqrVPC
), and the bootstrap VPC (bootVPC
). In addition, we proposed a new VPC method: the average shifted VPC (asVPC
). In the method with the model space, the coverage plot (coverageplot
) and the quantified VPC (quantVPC
) are included. For a more detailed diagnosis, we developed a coverage detailed plot (coverageDetailplot
). In nlmeVPC, the ggplot2 package (Wickham 2016) is used to create all plots.
The general nonlinear mixed effect model is defined as follow:
\[
y_{ij} = f(x_{ij}, \theta, \eta_i, \epsilon_{ij}) \\
@@ -2570,12 +1800,12 @@ \(y_{ij}\) is the dependent variable of the \(j^{th}\) observation of the \(i^{th}\) individual, \(x_{ij}\) is the independent variable, \(f\) is the nonlinear function of \(x_{ij}\), \(\theta\) is the population parameter, \(\eta_i\) represents the variability of the individual \(i\), and \(\epsilon_{ij}\) represents the random error. From the data, \(\theta\) , \(\Omega\), and \(\Sigma\) are estimated. For the simulated data, the fitted model \(y_{ij} = f(x_{ij}, \hat{\theta}, \eta_i, \epsilon_{ij})\), \(\eta_i \sim N(0,\hat\Omega)\), \(\epsilon_{ij} \sim N(0, \hat\Sigma)\) are used.
The VPC is the most popular model validation method in the pharmacometrics area. It was developed to diagnose population pharmacokinetic/pharmacodynamic models visually. The main idea of the VPC is to compare the distribution of observations and the distribution of predicted values, where the distribution of predicted values is obtained from simulated data drawn from the fitted model. If the fitted model explains the observed data well, these two distributions should be similar. Both distributions can be represented in the original data space that consists X axis as the independent variable and Y axis as the dependent variable. It allows us to compare the observed data with the fitted model in the original data space. In nlmeVPC, we include an original VPC plot, an additive quantile regression VPC (Jamsen et al. 2018), and a bootstrap VPC (Post et al. 2008). We also proposed a new approach to draw the VPC: the average shifted VPC.
-The visual predictive check (VPC; Karlsson and Holford (2008)) is based on the principle that if the fitted model adequately describes the observed data, the distribution of the simulated data from the fitted model should be similar to the distribution of the observed data. There are several ways to compare the similarities between the distributions. In the VPC approach, profiles of quantiles are used. Two profiles are mainly used to compare the distributions of observations and predictions. One profile is from the upper bound of the prediction intervals, and the other is from the lower bound. These prediction intervals are calculated from the simulated data. 90\(\%\) prediction intervals are usually used. For small and sparse samples, 80\(\%\) prediction interval is also used. The lower and upper bounds of 80\(\%\) prediction interval are the \(10^{th}\) and \(90^{th}\) percentiles of the simulated data. Figure 1(A) shows the “scatter” type of the VPC plot. Dots indicate the observed data. Two dashed blue lines represent profiles of the \(10^{th}\) and \(90^{th}\) percentiles of the simulated data, and the solid blue line represents the \(50^{th}\) percentile. If the fitted model represents the observed data well, most observed data should lie between profiles of \(10^{th}\) and \(90^{th}\) percentiles.
Figure 1(B) is the “percentile” type of the VPC plot. In this plot, profiles of percentiles from the observed data are compared to profiles of percentiles from the simulated data. Two dashed red lines represent profiles of the \(10^{th}\) and \(90^{th}\) percentiles of the observed data, and the solid red line represents profiles of the \(50^{th}\) percentile of the observed data. If the fitted model represents the observed data well, two profiles in each percentile - one from the original data and the other from the simulated data - are similar.
Figure 1(C) is the “CI” type of the VPC plot. The solid red line represents the \(50^{th}\) percentile of the observed data, and dashed red lines represent the \(10^{th}\) and \(90^{th}\) percentiles of the observed data. Light blue areas represent the 95\(\%\) confidence areas of the \(10^{th}\) and \(90^{th}\) percentiles, and pink areas represent the 95\(\%\) confidence areas of the \(50^{th}\) percentile. These confidence areas were calculated from the simulated data. After calculating percentiles in each simulated data, we find 95\(\%\) confidence intervals for each percentile and use this to draw the areas. In this plot, it is necessary to verify that the profiles of the original data are in confidence areas of each profile from the simulated data in each percentile. If each percentile line of the observed data is in the corresponding confidence area, this can be evidence @@ -2585,37 +1815,37 @@
VPCgraph
provides the automatic binning with optK
and makeCOVbin
; here, optK
finds the optimal number of bins, and makeCOVbin
finds the optimal cutoffs of bins using Lavielle and Bleakley’s method.
To overcome the difficulties of making bins as well as determining the number of bins,
Jamsen et al. (2018) used additive quantile regression to calculate the quantiles of the observed and simulated data. This regression method makes it possible to estimate quantiles without discrete binning, which is especially useful when the data are insufficient, irregular, or inappropriate to configure the bins. To fit the additive quantile regression, we used the rqss
function in the quantreg (Koenker 2023) package and developed the aqrVPC
function to draw the VPC type plot with additive quantile regression. Figure 2 shows the additive quantile regression VPC plot. The solid and dashed lines represent the \(10^{th}\), \(50^{th}\), and \(90^{th}\) additive quantile regression lines of the observed data, and the pink and light blue areas represent the confidence areas of the additive quantile regression lines of the simulated data. Lines and areas in the additive quantile regression VPC plot are much smoother than those in the original VPC plot.
The bootstrap VPC (Post et al. 2008) compares the distribution of the simulated data to the distribution of the bootstrap samples drawn from the observed data. This plot reflects the uncertainty of the observed data and allows for more objective comparisons with the predicted median.
Figure 3 shows the bootstrap VPC plot using bootVPC
. The solid and dashed blue lines represent the \(10^{th}\), \(50^{th}\), and \(90^{th}\) percentiles of the simulated data. The solid red line represents the \(50^{th}\) percentile line, and the pink areas represent the 95\(\%\) confidence areas of the \(50^{th}\) percentile line, calculated from the bootstrap samples of the observed data. If the solid blue line and the solid red line are similar, the solid blue line is in the pink area, and the pink area is located between two dashed blue lines,
then this is evidence that the fitted model fit the observed data well.
Even though binning mitigates the problem with highly irregular data, the VPC plot still has a precision problem with sparse data. In this paper, we propose a new approach to draw the adapted VPC plot from the average shifted histograms (Scott 1985). A histogram is a widely used method for displaying the density of a single continuous variable. However, histograms can look quite different based on different choice of bin width and anchor. This requires computing an optimal bin width. @@ -2640,7 +1870,7 @@
Collect the medians of the independent variable and the weighted percentiles of the dependent variable from 2, and connect them to the lines.
We can implement (A) and (B) by applying these procedures separately to the observed and simulated data. Additionally, (C) can be implemented using these procedures for each simulated dataset. First, we find the weighted percentiles, combine the results from each simulated dataset, and then calculate the 95\(\%\) confidence intervals of each percentile. Using these three quantities (A), (B), and (C), we can draw the VPC type plot with the ASH approach, producing the asVPC plot.
-In the asVPC plot, the observations in each bin are combined using weights. Typically, the data near the center of the integrated bin have higher weights, and the data far from the center have smaller weights. This idea is used in the ASH algorithm as well as the density estimation literature. We suggest two different ways to apply weights for the asVPC calculation.
Bin-related weight: For each bin in the combined bin, K-1 bins in both directions have weights and the other bins have no weight. Use \(\frac{1}{K}\), \(\cdots\), \(\frac{i-1}{K}\), \(\cdots\), \(\frac{K-1}{K}\), \(1\), \(\frac{K-1}{K}\), \(\cdots\), \(\frac{i-1}{K}\), \(\cdots\), \(\frac{1}{K}\) as weights for 2K-1 bins. This approach is the same as the original ASH approach.
Figure 4 shows the results from the asVPC
function using bin-related weights and distance-related weights. The solid and dashed lines represent the average shifted quantile lines of the observed data, and the pink and light blue areas represent the confidence areas of the simulated data. The lines in the asVPC plot are smoother than those in the original VPC plot, and the confidence areas in the asVPC plot are thinner than those in the original VPC plot.
To validate the fitted model in the model space, we need to choose appropriate statistics to visualize and describe them in the model space. In nlmeVPC, we use the same statistics, that is quantiles, as the VPC plot in the data space, and compare them in two ways - numerically and visually. The numerical predictive check and the coverage plot are two commonly used methods in pharmacometrics. However, there is a limitation to detecting the illness of the fitted model. These methods combine the results of all ranges of the independent variable, and it is helpful to see the overall fitness. However, it is not easy to detect the detailed discrepancy between the fitted model and the observed data. To overcome this limitation, we developed a new plot, the coverage detailed plot, to compensate for the shortness of the coverage plot.
-The VPC plot visually compares the observed data to the simulated percentiles in the data space. On the other hand, the numerical predictive check (NPC; Wang and Zhang (2012); Karlsson and Savic (2007)) numerically compares the observed data to the simulated data. For a given level of prediction (for example, 90\(\%\)), the predicted interval is calculated using the simulated data, and the number of the observed data points outside of the prediction interval is counted, both below the lower bound and above the upper bound. The expected number of points below the lower bound of the predicted interval (for example, 5\(\%\) of observations) is also calculated and compared to the observed number. If these two numbers are similar, the suggested model is suitable (Maharaj et al. 2019).
NumericalCheck
provides information summarizing the results of the NPC for various prediction levels.
PI
is the prediction level \(\times\) 100 and n
is the number of observations.
-"Expected points below PI"
and "Expected points above PI"
are respectively the expected numbers below and above the PI; "points below PI"
and "points above PI"
respectively represent the numbers of points below and above the PI;
-"95%CIBelowFrom(%)"
and "95%CIBelowTo(%)"
represent the 95\(\%\) confidence interval of "points below PI(%)"
; and "95%CIAboveFrom(%)"
and "95%CIAboveTo(%)"
represent the 95\(\%\) confidence interval of "points above PI(%)"
. If "points below PI(%)"
is in the
-95\(\%\) confidence intervals of "points below PI(%)"
and is similar to "Expected points below PI"
, and if
-"points above PI(%)"
is in the
-95\(\%\) confidence intervals of "points above PI(%)"
and is similar to "Expected points above PI"
, this is the evidence that the fitted model explains the data well.
"Expected points below PI"
and "Expected points above PI"
are respectively the expected numbers below and above the PI; "points below PI"
and "points above PI"
respectively represent the numbers of points below and above the PI;
+"95%CIBelowFrom(%)"
and "95%CIBelowTo(%)"
represent the 95\(\%\) confidence interval of "points below PI(%)"
; and "95%CIAboveFrom(%)"
and "95%CIAboveTo(%)"
represent the 95\(\%\) confidence interval of "points above PI(%)"
. If "points below PI(%)"
is in the
+95\(\%\) confidence intervals of "points below PI(%)"
and is similar to "Expected points below PI"
, and if
+"points above PI(%)"
is in the
+95\(\%\) confidence intervals of "points above PI(%)"
and is similar to "Expected points above PI"
, this is the evidence that the fitted model explains the data well.
NumericalCheck(origdata,simdata,pred.level=c(0,0.2,0.4,0.6,0.8,0.9),N_xbin=8)$NPC
+NumericalCheck(origdata,simdata,pred.level=c(0,0.2,0.4,0.6,0.8,0.9),N_xbin=8)$NPC
PI n Expected points below PI points below PI points below PI(%)
1 0 132 66.0 57 43.18182
@@ -2700,7 +1930,7 @@ Numerical predictive chec
5 15.549242
6 8.333333
The result of the NPC is a table with many values, which, while useful, can be difficult to parse visually. The coverage plot (Karlsson and Holford 2008) was developed to help visually check the fitted model with the NPC result. In each level of the predicted interval, the ratios between the expected number of data points (Exp) outside the prediction interval and the observed number of data points (Obs) outside the prediction interval are calculated. These ratios are calculated separately for the upper and lower bound of the prediction interval. For example, when the prediction level is 90, a 90\(\%\) prediction interval is used, and 10\(\%\) of the observations are expected to locate outside this prediction interval. To be more precise, 5\(\%\) of observations are expected to be above the upper limit, and 5\(\%\) of observations are expected to be below the lower limit.
@@ -2708,26 +1938,26 @@The xpose4 package (Keizer et al. 2013) provides a coverage plot. However,
to draw the coverage plot using xpose4, PsN (Lindbom et al. 2004) software is needed to calculate the NPC result. Therefore, we developed coverageplot
to draw the coverage plot using the results from NumericalCheck
.
Figure 5(A) shows the coverage plot. The X-axis shows the level of the prediction interval. The Y-axis show the ratio between the observed number of data and the expected number of data of the lower and upper parts in each level of the prediction interval. The white line is the reference line, and the gray area represents the confidence areas of the ratios in each prediction level. If the solid lines are near the white line or in the gray area, we can conclude that the suggested model is suitable.
-Unlike the VPC plot, which represents the data space, the information in the observed data space does not come together in the coverage plot, which makes it difficult to determine whether the model is overestimated, underestimated, or adequate in the specific region of the data space. To overcome this limitation of the coverage plot, we propose a new method called the coverage detailed plot. The percentages of observations above the prediction interval are calculated in each bin of the independent variable. Additionally, the percentages of observations below the prediction interval are calculated. The white dots in the plot represent the expected percentages. If the percentages of upper and lower observations are near the white dots, we can conclude that the suggested model is suitable for the specific prediction interval.
Figure 5(B) is the result of coverageDetailplot
when the prediction level is 80\(\%\). The white dots represent the expected percentages of the lower and upper the prediction intervals, 10\(\%\), and 90\(\%\), respectively. The upper and lower percentages of observation in each time bin are shown in darker gray. The left bin(before 0.045 hours) shows all light gray in the coverage detailed plot, and it is quite different patterns from the expected one. However, it is mainly due to the characteristics of this example data. All observations in this bin are 0. It makes the lower and upper bound of the prediction interval all 0, and the lower and upper percentages become 0.
Post et al. (2008) proposed the quantified VPC (QVPC). It expanded the existing VPC, including information about missing data. The QVPC plot visually represents actual and unavailable observations around the predicted medians through the form of percent, regardless of the observed data’s density or shape. Here, “unavailable observations” refer to all kinds of missing values and unobserved data that occur for various reasons, including deviations and unexpected omissions.
If the model is appropriate, observations at each time bin are allocated randomly around the predicted median of the model. In the QVPC plot, white dots represent the model’s predicted median in each bin. If the borderlines above and below are close to the white dots, we can conclude that the fitted model describes the observed data well.
Figure 6 shows the result of quantVPC
. The darker gray areas represent the percentages below the median. The lighter gray areas represent the percentage above. The brightest gray areas represent the percent unavailable in each time bin. The white dots represent the ideal location where the above and the below percentages meet. In this example, there is no missing value.
Table 1 shows the list of functions and Table 2 shows the list of arguments used in the functions of nlmeVPC. The following codes are for Figure 1 to Figure 6.
library(nlmeVPC)
-data(origdata)
-data(simdata)
+library(nlmeVPC)
+data(origdata)
+data(simdata)
-optK(origdata$TIME)$K
+optK(origdata$TIME)$K
[1] 8
# Figure 1
+# Figure 1
-VPCgraph(origdata,simdata,N_xbin=8,type="scatter")+
- labs(title="(A) Visual Predictive Check : scatter",caption="")
-VPCgraph(origdata,simdata,N_xbin=8,type="percentile")+
- labs(title="(B) Visual Predictive Check : percentile",caption="")
-VPCgraph(origdata,simdata,N_xbin=8,type="CI")+
- labs(title="(C) Visual Predictive Check : CI",caption="")
+VPCgraph(origdata,simdata,N_xbin=8,type="scatter")+
+ labs(title="(A) Visual Predictive Check : scatter",caption="")
+VPCgraph(origdata,simdata,N_xbin=8,type="percentile")+
+ labs(title="(B) Visual Predictive Check : percentile",caption="")
+VPCgraph(origdata,simdata,N_xbin=8,type="CI")+
+ labs(title="(C) Visual Predictive Check : CI",caption="")
-# Figure 2
+# Figure 2
-aqrVPC(origdata,simdata) +labs(caption="")
+aqrVPC(origdata,simdata) +labs(caption="")
-# Figure 3
+# Figure 3
-bootVPC(origdata,simdata,N_xbin=8)+labs(caption="")
+bootVPC(origdata,simdata,N_xbin=8)+labs(caption="")
-# Figure 4
+# Figure 4
-asVPC(origdata,simdata,type="CI",N_xbin=8,N_hist=3,weight_method="bin")+labs(caption="")
-asVPC(origdata,simdata,type="CI",N_xbin=8,N_hist=3,weight_method="distance")+labs(caption="")
+asVPC(origdata,simdata,type="CI",N_xbin=8,N_hist=3,weight_method="bin")+labs(caption="")
+asVPC(origdata,simdata,type="CI",N_xbin=8,N_hist=3,weight_method="distance")+labs(caption="")
-# Numerical Predictive Check
+# Numerical Predictive Check
-NumericalCheck(origdata,simdata,N_xbin=8,pred.level=c(0,0.2,0.4,0.6,0.8,0.9))$NPC
+NumericalCheck(origdata,simdata,N_xbin=8,pred.level=c(0,0.2,0.4,0.6,0.8,0.9))$NPC
-# Figure 5
+# Figure 5
-coverageplot(origdata,simdata,N_xbin=8) +ggtitle("(A) Coverage Plot")
-coverageDetailplot(origdata,simdata,N_xbin=8,predL=0.8) +
-ggtitle("(B) Coverage Detailed plot: PI = 80")
+coverageplot(origdata,simdata,N_xbin=8) +ggtitle("(A) Coverage Plot")
+coverageDetailplot(origdata,simdata,N_xbin=8,predL=0.8) +
+ggtitle("(B) Coverage Detailed plot: PI = 80")
-# Figure 6
+# Figure 6
-quantVPC(origdata,simdata)
+quantVPC(origdata,simdata)
We use an example to show how to use functions in nlmeVPC and how they work.
-The origdata
in nlmeVPC is from an experiment on the pharmacokinetics of theophylline. Twelve patients were given oral doses of theophylline, and blood concentrations were measured at 11 time points over the next 25 hours. Each patient had different time points.
We consider the following first-order absorption one-compartment model:
\[
@@ -3082,83 +2312,83 @@ Example
In this example, two different models are fitted and diagnosed using functions in the nlmeVPC package. In Model 1, \(Ka\) and \(Cl\) are considered as random effects. In Model 2, \(Ke\) is considered as random effect, and \(Ka\) and \(Cl\) are considered only as a fixed effect. The nlme (Pinheiro et al. 2022) and nlraa (Miguez 2022) packages are used to fit the nonlinear mixed models and to generate the simulated data from the fitted model.
library(nlme)
-library(nlraa)
-library(nlmeVPC)
-data(origdata)
-origdataT <- groupedData(DV~TIME|ID,origdata)
+library(nlme)
+library(nlraa)
+library(nlmeVPC)
+data(origdata)
+origdataT <- groupedData(DV~TIME|ID,origdata)
-# Model 1 (True)
-T.nlme <- nlme(DV ~ exp(lKe+lKa-lCl)*AMT*
- (exp(-exp(lKe)*TIME) - exp(-exp(lKa)*TIME))/
- (exp(lKa)-exp(lKe)), data=origdataT,
- fixed=lKa+lKe+lCl~1,
- random=lKa+lCl~1,
- start=c(lKe=-2,lKa=1.5,lCl=-3))
-set.seed(123456)
-sim.T <- simulate_nlme(object=T.nlme,nsim=100,psim=3,level=1,value="data.frame",data = NULL)
-simdata.T <- matrix(sim.T$sim.y,ncol=100)
+# Model 1 (True)
+T.nlme <- nlme(DV ~ exp(lKe+lKa-lCl)*AMT*
+ (exp(-exp(lKe)*TIME) - exp(-exp(lKa)*TIME))/
+ (exp(lKa)-exp(lKe)), data=origdataT,
+ fixed=lKa+lKe+lCl~1,
+ random=lKa+lCl~1,
+ start=c(lKe=-2,lKa=1.5,lCl=-3))
+set.seed(123456)
+sim.T <- simulate_nlme(object=T.nlme,nsim=100,psim=3,level=1,value="data.frame",data = NULL)
+simdata.T <- matrix(sim.T$sim.y,ncol=100)
-# Model 2 (Wrong)
-F.nlme <- nlme(DV ~ exp(lKe+lKa-lCl)*AMT*
- (exp(-exp(lKe)*TIME) - exp(-exp(lKa)*TIME))/
- (exp(lKa)-exp(lKe)), data=origdataT,
- fixed=lKa+lKe+lCl~1,
- random=lKe~1,
- start=c(lKe=-2,lKa=1.5,lCl=-3))
+# Model 2 (Wrong)
+F.nlme <- nlme(DV ~ exp(lKe+lKa-lCl)*AMT*
+ (exp(-exp(lKe)*TIME) - exp(-exp(lKa)*TIME))/
+ (exp(lKa)-exp(lKe)), data=origdataT,
+ fixed=lKa+lKe+lCl~1,
+ random=lKe~1,
+ start=c(lKe=-2,lKa=1.5,lCl=-3))
-sim.F <- simulate_nlme(object=F.nlme,nsim=100,psim=3,level=1,value="data.frame",data = NULL)
-simdata.F <- matrix(sim.F$sim.y,ncol=100)
+sim.F <- simulate_nlme(object=F.nlme,nsim=100,psim=3,level=1,value="data.frame",data = NULL)
+simdata.F <- matrix(sim.F$sim.y,ncol=100)
# Figure 7
+# Figure 7
-VPCgraph(origdata,simdata.T,type="CI",N_xbin=8)+labs(title="Model 1",caption="")
-VPCgraph(origdata,simdata.F,type="CI",N_xbin=8)+labs(title="Model 2",caption="")
+VPCgraph(origdata,simdata.T,type="CI",N_xbin=8)+labs(title="Model 1",caption="")
+VPCgraph(origdata,simdata.F,type="CI",N_xbin=8)+labs(title="Model 2",caption="")
-# Figure 8
+# Figure 8
-aqrVPC(origdata,simdata.T)+labs(title="Model 1",caption="")
-aqrVPC(origdata,simdata.F)+labs(title="Model 2",caption="")
+aqrVPC(origdata,simdata.T)+labs(title="Model 1",caption="")
+aqrVPC(origdata,simdata.F)+labs(title="Model 2",caption="")
-# Figure 9
+# Figure 9
-asVPC(origdata,simdata.T,type="CI",weight_method="distance",N_xbin=8)+labs(title="Model 1",caption="")
-asVPC(origdata,simdata.F,type="CI",weight_method="distance",N_xbin=8)+labs(title="Model 2",caption="")
+asVPC(origdata,simdata.T,type="CI",weight_method="distance",N_xbin=8)+labs(title="Model 1",caption="")
+asVPC(origdata,simdata.F,type="CI",weight_method="distance",N_xbin=8)+labs(title="Model 2",caption="")
-# Figure 10
+# Figure 10
-bootVPC(origdata,simdata.T,N_xbin=8)+labs(title="Model 1",caption="")
-bootVPC(origdata,simdata.F,N_xbin=8)+labs(title="Model 2",caption="")
+bootVPC(origdata,simdata.T,N_xbin=8)+labs(title="Model 1",caption="")
+bootVPC(origdata,simdata.F,N_xbin=8)+labs(title="Model 2",caption="")
-# Figure 11
+# Figure 11
-coverageplot(origdata,simdata.T,conf.level=0.9,N_xbin=8)+labs(title="Model 1")
-coverageplot(origdata,simdata.F,conf.level=0.9,N_xbin=8)+labs(title="Model 2")
+coverageplot(origdata,simdata.T,conf.level=0.9,N_xbin=8)+labs(title="Model 1")
+coverageplot(origdata,simdata.F,conf.level=0.9,N_xbin=8)+labs(title="Model 2")
-# Figure 12
+# Figure 12
-coverageDetailplot(origdata,simdata.T,predL=0.5,N_xbin=8)+labs(title="Model 1")
-coverageDetailplot(origdata,simdata.F,predL=0.5,N_xbin=8)+labs(title="Model 2")
+coverageDetailplot(origdata,simdata.T,predL=0.5,N_xbin=8)+labs(title="Model 1")
+coverageDetailplot(origdata,simdata.F,predL=0.5,N_xbin=8)+labs(title="Model 2")
-# Figure 13
+# Figure 13
-coverageDetailplot(origdata,simdata.T,predL=0.8,N_xbin=8)+labs(title="Model 1")
-coverageDetailplot(origdata,simdata.F,predL=0.8,N_xbin=8)+labs(title="Model 2")
+coverageDetailplot(origdata,simdata.T,predL=0.8,N_xbin=8)+labs(title="Model 1")
+coverageDetailplot(origdata,simdata.F,predL=0.8,N_xbin=8)+labs(title="Model 2")
-# Figure 14
+# Figure 14
-quantVPC(origdata,simdata.T,N_xbin=8)+labs(title="Model 1")
-quantVPC(origdata,simdata.F,N_xbin=8)+labs(title="Model 2")
+quantVPC(origdata,simdata.T,N_xbin=8)+labs(title="Model 1")
+quantVPC(origdata,simdata.F,N_xbin=8)+labs(title="Model 2")
Figure 7 shows the VPCgraph
for Model 1 and Model 2.
The solid line represents the 50\(^{th}\) percentile of the observed data, and the dashed lines represent the 10\(^{th}\) and 90\(^{th}\) percentiles of the observed data. In Model 1, all quantile lines (the solid and dashed lines) are in the confidence area. However, the 10\(^{th}\) and 90\(^{th}\) percentiles in Model 2 are mostly outside the confidence area, especially at 0 to 5 hours.
Figure 8 shows the results of aqrVPC
, and Figure 9 shows the results of asVPC
for Model 1 and Model 2. The additive quantile regression VPCs and the average shifted VPCs show similar patterns to the original VPCs. Model 1 shows all quantile lines in the confidence area, and the 10\(^{th}\) and 90\(^{th}\) percentiles in Model 2 are mostly outside the confidence area.
The results from Figure 7 through Figure 14 show that Model 1 explains the origdata
quite well. However, Model 2 shows different patterns than Model 1 in most figures. We can conclude that Model 1 is better than Model 2, and treating \(Ka\) and \(Cl\) as random effects is better.
This paper introduces the nlmeVPC package. The VPC and its extensions are useful for validating the nonlinear mixed effect model. The nlmeVPC package provides various visual diagnostic tools for the nonlinear mixed effect model in two different approaches: validation in data space and model space. Both approaches are valuable. Validation in data space can compare the fitted model with the original data, and validation in model space provides detailed comparisons in various ways. In the nlmeVPC package, we also provide new approaches - asVPC
and coverageDetailplot
. Here, asVPC
provides a more precise VPC plot, and coverageDetailplot
provides the detailed feature of the fitted model that is not captured in the coverage plot. Even though the coverage plot does not show any problem with the fitted model, the coverage detailed plot can reveal the problem in a specific region (Figures 10, 11, and 12).
This work was supported by the National Research Foundation of Korea(NRF) grant funded by the Korea government(MSIT) (2018R1A2B6001251). This paper was prepared by extracting part of Eun-Hwa Kang and Myungji Ko’s master thesis.
nlmeVPC, xpose4, vpc, nlmixr2, ggplot2, quantreg, nlme, nlraa
-Agriculture, ChemPhys, DifferentialEquations, Econometrics, Environmetrics, Finance, HighPerformanceComputing, MixedModels, OfficialStatistics, Optimization, Pharmacokinetics, Phylogenetics, Psychometrics, ReproducibleResearch, Robust, Spatial, SpatioTemporal, Survival, TeachingStatistics
Text and figures are licensed under Creative Commons Attribution CC BY 4.0. The figures that have been reused from other sources don't fall under this license and can be recognized by a note in their caption: "Figure from ...".
+Text and figures are licensed under Creative Commons Attribution CC BY 4.0. The figures that have been reused from other sources don't fall under this license and can be recognized by a note in their caption: "Figure from ...".
For attribution, please cite this work as
-Kang, et al., "nlmeVPC: Visual Model Diagnosis for the Nonlinear Mixed Effect Model", The R Journal, 2023+
Kang, et al., "nlmeVPC: Visual Model Diagnosis for the Nonlinear Mixed Effect Model", The R Journal, 2023
BibTeX citation
@article{RJ-2023-026, author = {Kang, Eun-Hwa and Ko, Myungji and Lee, Eun-Kyung}, diff --git a/_articles/RJ-2023-026/RJ-2023-026.pdf b/_articles/RJ-2023-026/RJ-2023-026.pdf index 100ff995e..2d684f942 100644 Binary files a/_articles/RJ-2023-026/RJ-2023-026.pdf and b/_articles/RJ-2023-026/RJ-2023-026.pdf differ diff --git a/_articles/RJ-2023-026/RJ-2023-026.tex b/_articles/RJ-2023-026/RJ-2023-026.tex index 035194441..3ce0bfd92 100644 --- a/_articles/RJ-2023-026/RJ-2023-026.tex +++ b/_articles/RJ-2023-026/RJ-2023-026.tex @@ -56,9 +56,7 @@ \subsection{Visual Predictive Check}\label{visual-predictive-check}} \texttt{VPCgraph} provides the automatic binning with \texttt{optK} and \texttt{makeCOVbin}; here, \texttt{optK} finds the optimal number of bins, and \texttt{makeCOVbin} finds the optimal cutoffs of bins using Lavielle and Bleakley's method. \begin{figure} -\centering -\includegraphics{RJ-2023-026_files/figure-latex/Fig0-1.pdf} -\caption{\label{fig:Fig0}The visual predictive check plot. The solid red line represents the \(50^{th}\) percentile of the observed data, and dashed red lines represent the \(10^{th}\) and \(90^{th}\) percentiles of the observed data. The solid blue line represents the \(50^{th}\) percentile of the simularted data, and dashed blue lines represent the \(10^{th}\) and \(90^{th}\) percentiles of the simulated data. Light blue and pink areas represent the 95\% confidence areas of the \(10^{th}\), \(50^{th}\) and \(90^{th}\) percentile lines.} +\includegraphics[width=1\linewidth]{RJ-2023-026_files/figure-latex/Fig0-1} \caption{The visual predictive check plot. The solid red line represents the $50^{th}$ percentile of the observed data, and dashed red lines represent the $10^{th}$ and $90^{th}$ percentiles of the observed data. The solid blue line represents the $50^{th}$ percentile of the simularted data, and dashed blue lines represent the $10^{th}$ and $90^{th}$ percentiles of the simulated data. Light blue and pink areas represent the 95\% confidence areas of the $10^{th}$, $50^{th}$ and $90^{th}$ percentile lines.}\label{fig:Fig0} \end{figure} \hypertarget{additive-quantile-regression-vpc}{% @@ -68,9 +66,7 @@ \subsection{Additive quantile regression VPC}\label{additive-quantile-regression Jamsen et al. (2018) used additive quantile regression to calculate the quantiles of the observed and simulated data. This regression method makes it possible to estimate quantiles without discrete binning, which is especially useful when the data are insufficient, irregular, or inappropriate to configure the bins. To fit the additive quantile regression, we used the \texttt{rqss} function in the \CRANpkg{quantreg} (Koenker 2023) package and developed the \texttt{aqrVPC} function to draw the VPC type plot with additive quantile regression. Figure \ref{fig:Fig1} shows the additive quantile regression VPC plot. The solid and dashed lines represent the \(10^{th}\), \(50^{th}\), and \(90^{th}\) additive quantile regression lines of the observed data, and the pink and light blue areas represent the confidence areas of the additive quantile regression lines of the simulated data. Lines and areas in the additive quantile regression VPC plot are much smoother than those in the original VPC plot. \begin{figure} -\centering -\includegraphics{RJ-2023-026_files/figure-latex/Fig1-1.pdf} -\caption{\label{fig:Fig1}The additive equantile VPC plot. Dots indicate the observed data. The solid and dashed blue lines represent the \(10^{th}\), \(50^{th}\), and \(90^{th}\) percentiles of the simulated data. The solid red line represents the \(50^{th}\) percentile line. Light blue and pink areas represent the 95\% confidence areas of the \(10^{th}\), \(50^{th}\) and \(90^{th}\) percentile lines.} +\includegraphics[width=1\linewidth]{RJ-2023-026_files/figure-latex/Fig1-1} \caption{The additive equantile VPC plot. Dots indicate the observed data. The solid and dashed blue lines represent the $10^{th}$, $50^{th}$, and $90^{th}$ percentiles of the simulated data. The solid red line represents the $50^{th}$ percentile line. Light blue and pink areas represent the 95\% confidence areas of the $10^{th}$, $50^{th}$ and $90^{th}$ percentile lines.}\label{fig:Fig1} \end{figure} \hypertarget{bootstrap-vpc}{% @@ -83,9 +79,7 @@ \subsection{Bootstrap VPC}\label{bootstrap-vpc}} then this is evidence that the fitted model fit the observed data well. \begin{figure} -\centering -\includegraphics{RJ-2023-026_files/figure-latex/Fig2-1.pdf} -\caption{\label{fig:Fig2}The bootstrap VPC plot. Dots indicate the observed data. The solid and dashed blue lines represent the \(10^{th}\), \(50^{th}\), and \(90^{th}\) percentiles of the simulated data. The solid red line represents the \(50^{th}\) percentile line, and the pink areas represent the 95\% confidence areas of the \(50^{th}\) percentile line, calculated from the bootstrap samples of the observed data.} +\includegraphics[width=1\linewidth]{RJ-2023-026_files/figure-latex/Fig2-1} \caption{The bootstrap VPC plot. Dots indicate the observed data. The solid and dashed blue lines represent the $10^{th}$, $50^{th}$, and $90^{th}$ percentiles of the simulated data. The solid red line represents the $50^{th}$ percentile line, and the pink areas represent the 95\% confidence areas of the $50^{th}$ percentile line, calculated from the bootstrap samples of the observed data.}\label{fig:Fig2} \end{figure} \hypertarget{average-shifted-vpc}{% @@ -150,9 +144,7 @@ \subsubsection{Determining the weights}\label{determining-the-weights}} Figure \ref{fig:Fig3} shows the results from the \texttt{asVPC} function using bin-related weights and distance-related weights. The solid and dashed lines represent the average shifted quantile lines of the observed data, and the pink and light blue areas represent the confidence areas of the simulated data. The lines in the asVPC plot are smoother than those in the original VPC plot, and the confidence areas in the asVPC plot are thinner than those in the original VPC plot. \begin{figure} -\centering -\includegraphics{RJ-2023-026_files/figure-latex/Fig3-1.pdf} -\caption{\label{fig:Fig3}The average shifted VPC plot. Dots indicate the observed data. The solid line represents the 50th quantiles of the observed data, and dashed lines represent the \(10^{th}\) and \(90^{th}\) percentiles of the observed data. Light blue and pink areas represent the 95\% confidence areas of the \(10^{th}\), \(50^{th}\), and \(90^{th}\) percentiles.} +\includegraphics[width=1\linewidth]{RJ-2023-026_files/figure-latex/Fig3-1} \caption{The average shifted VPC plot. Dots indicate the observed data. The solid line represents the 50th quantiles of the observed data, and dashed lines represent the $10^{th}$ and $90^{th}$ percentiles of the observed data. Light blue and pink areas represent the 95\% confidence areas of the $10^{th}$, $50^{th}$, and $90^{th}$ percentiles.}\label{fig:Fig3} \end{figure} \hypertarget{validation-in-the-model-space}{% @@ -225,9 +217,7 @@ \subsection{Coverage detailed plot}\label{coverage-detailed-plot}} Figure \ref{fig:Fig4}(B) is the result of \texttt{coverageDetailplot} when the prediction level is 80\(\%\). The white dots represent the expected percentages of the lower and upper the prediction intervals, 10\(\%\), and 90\(\%\), respectively. The upper and lower percentages of observation in each time bin are shown in darker gray. The left bin(before 0.045 hours) shows all light gray in the coverage detailed plot, and it is quite different patterns from the expected one. However, it is mainly due to the characteristics of this example data. All observations in this bin are 0. It makes the lower and upper bound of the prediction interval all 0, and the lower and upper percentages become 0. \begin{figure} -\centering -\includegraphics{RJ-2023-026_files/figure-latex/Fig4-1.pdf} -\caption{\label{fig:Fig4}The coverage plot and the coverage detailed plot for the 80\% prediction interval. In the coverage plot, the X-axis is the level of the prediction interval. The Y-axis is the ratio between the number of observed data and the number of expected data of the lower and upper parts in each level of the prediction interval. The white line is the reference line, and the gray area represents the confidence area of the ratios. If the solid lines are near the white line, we can conclude that the suggested model is suitable. In the coverage detailed plot, the white dots represent the expected percentages of lower and upper prediction intervals of, 10\%, and 90\%, respectively. The upper and lower percentages of observation in each time bin are darker gray.} +\includegraphics[width=0.9\linewidth]{RJ-2023-026_files/figure-latex/Fig4-1} \caption{The coverage plot and the coverage detailed plot for the 80\% prediction interval. In the coverage plot, the X-axis is the level of the prediction interval. The Y-axis is the ratio between the number of observed data and the number of expected data of the lower and upper parts in each level of the prediction interval. The white line is the reference line, and the gray area represents the confidence area of the ratios. If the solid lines are near the white line, we can conclude that the suggested model is suitable. In the coverage detailed plot, the white dots represent the expected percentages of lower and upper prediction intervals of, 10\%, and 90\%, respectively. The upper and lower percentages of observation in each time bin are darker gray.}\label{fig:Fig4} \end{figure} \hypertarget{quantified-vpc}{% @@ -241,9 +231,7 @@ \subsection{Quantified VPC}\label{quantified-vpc}} Figure \ref{fig:Fig5} shows the result of \texttt{quantVPC}. The darker gray areas represent the percentages below the median. The lighter gray areas represent the percentage above. The brightest gray areas represent the percent unavailable in each time bin. The white dots represent the ideal location where the above and the below percentages meet. In this example, there is no missing value. \begin{figure} -\centering -\includegraphics{RJ-2023-026_files/figure-latex/Fig5-1.pdf} -\caption{\label{fig:Fig5}The quantified VPC plot. The darker gray areas represent the percentages below the median, the lighter gray areas represent the percentage above, and the brightest gray areas represent the percent unavailable in each time bin. The white dots represent the ideal location where the above and the below percentages meet.} +\includegraphics[width=1\linewidth]{RJ-2023-026_files/figure-latex/Fig5-1} \caption{The quantified VPC plot. The darker gray areas represent the percentages below the median, the lighter gray areas represent the percentage above, and the brightest gray areas represent the percent unavailable in each time bin. The white dots represent the ideal location where the above and the below percentages meet.}\label{fig:Fig5} \end{figure} \hypertarget{the-nlmevpc-package-structure-and-functionality}{% @@ -393,23 +381,17 @@ \subsection{Example}\label{example}} The solid line represents the 50\(^{th}\) percentile of the observed data, and the dashed lines represent the 10\(^{th}\) and 90\(^{th}\) percentiles of the observed data. In Model 1, all quantile lines (the solid and dashed lines) are in the confidence area. However, the 10\(^{th}\) and 90\(^{th}\) percentiles in Model 2 are mostly outside the confidence area, especially at 0 to 5 hours. \begin{figure} -\centering -\includegraphics{RJ-2023-026_files/figure-latex/M11-1.pdf} -\caption{\label{fig:M11}The VPC plots of Model 1 and Model 2.} +\includegraphics[width=1\linewidth]{RJ-2023-026_files/figure-latex/M11-1} \caption{The VPC plots of Model 1 and Model 2.}\label{fig:M11} \end{figure} Figure \ref{fig:M12} shows the results of \texttt{aqrVPC}, and Figure \ref{fig:M13} shows the results of \texttt{asVPC} for Model 1 and Model 2. The additive quantile regression VPCs and the average shifted VPCs show similar patterns to the original VPCs. Model 1 shows all quantile lines in the confidence area, and the 10\(^{th}\) and 90\(^{th}\) percentiles in Model 2 are mostly outside the confidence area. \begin{figure} -\centering -\includegraphics{RJ-2023-026_files/figure-latex/M12-1.pdf} -\caption{\label{fig:M12}The additive quantile regression VPC plots for Model 1 and Model 2.} +\includegraphics[width=1\linewidth]{RJ-2023-026_files/figure-latex/M12-1} \caption{The additive quantile regression VPC plots for Model 1 and Model 2.}\label{fig:M12} \end{figure} \begin{figure} -\centering -\includegraphics{RJ-2023-026_files/figure-latex/M13-1.pdf} -\caption{\label{fig:M13}The average shifted VPC plots for Model 1 and Model 2.} +\includegraphics[width=1\linewidth]{RJ-2023-026_files/figure-latex/M13-1} \caption{The average shifted VPC plots for Model 1 and Model 2.}\label{fig:M13} \end{figure} Figure \ref{fig:M14} shows the results of \texttt{bootVPC} for Model 1 and Model 2. The solid and dashed blue lines show the \(10^{th}\), \(50^{th}\), and \(90^{th}\) percentiles of the simulated data. The solid red line represents the \(50^{th}\) percentile line of the observed data, and the pink areas represent the 95\(\%\) confidence areas of the \(50^{th}\) percentile line, calculated from the bootstrap samples of the observed data. @@ -417,30 +399,22 @@ \subsection{Example}\label{example}} The dashed blue lines in Model 1 cover most of the observed data. However, the dashed blue lines in Model 2 do not cover the observed data. Many observed data points lie outside these dashed blue lines in Model 2, especially in 0 to 10 hours. \begin{figure} -\centering -\includegraphics{RJ-2023-026_files/figure-latex/M14-1.pdf} -\caption{\label{fig:M14}The bootstrap VPC plots for Model 1 and Model 2.} +\includegraphics[width=1\linewidth]{RJ-2023-026_files/figure-latex/M14-1} \caption{The bootstrap VPC plots for Model 1 and Model 2.}\label{fig:M14} \end{figure} \begin{figure} -\centering -\includegraphics{RJ-2023-026_files/figure-latex/M15-1.pdf} -\caption{\label{fig:M15}The coverage plots for Model 1 and Model 2.} +\includegraphics[width=1\linewidth]{RJ-2023-026_files/figure-latex/M15-1} \caption{The coverage plots for Model 1 and Model 2.}\label{fig:M15} \end{figure} Figure \ref{fig:M15} shows the \texttt{coverageplot} results for Model 1 and Model 2. The lines are in the gray area and close to the white line in Model 1. However, the lines in Model 2 are not in the gray area, especially when the PI value is large. Figures \ref{fig:M16} and \ref{fig:M17} show the results of \texttt{coverageDetailplot} for Model 1 and Model 2 when PIs are 50\(\%\) and 80\(\%\). The upper and lower percentages in both figures are close to the white points in Model 1. On the other hand, the upper percentages of the most time bins are far from the white points in Model 2, especially the time bin (3.54,5.28{]} when PI = 50\(\%\). When PI = 80\(\%\), most upper and lower percentages are far from the white points. \begin{figure} -\centering -\includegraphics{RJ-2023-026_files/figure-latex/M16-1.pdf} -\caption{\label{fig:M16}The coverage detailed plots for Model 1 and Model 2 when PI=50\%.} +\includegraphics[width=1\linewidth]{RJ-2023-026_files/figure-latex/M16-1} \caption{The coverage detailed plots for Model 1 and Model 2 when PI=50\%.}\label{fig:M16} \end{figure} \begin{figure} -\centering -\includegraphics{RJ-2023-026_files/figure-latex/M17-1.pdf} -\caption{\label{fig:M17}The coverage detailed plots for Model 1 and Model 2 when PI=80\%.} +\includegraphics[width=1\linewidth]{RJ-2023-026_files/figure-latex/M17-1} \caption{The coverage detailed plots for Model 1 and Model 2 when PI=80\%.}\label{fig:M17} \end{figure} Figure \ref{fig:M18} shows the results of \texttt{quantVPC} for Model 1 and Model 2. In Model 2, the right tail area (after 11 hours) looks quite different from the expected pattern. The above percentages are much larger than the below percentages. @@ -448,9 +422,7 @@ \subsection{Example}\label{example}} The results from Figure \ref{fig:M11} through Figure \ref{fig:M18} show that Model 1 explains the \texttt{origdata} quite well. However, Model 2 shows different patterns than Model 1 in most figures. We can conclude that Model 1 is better than Model 2, and treating \(Ka\) and \(Cl\) as random effects is better. \begin{figure} -\centering -\includegraphics{RJ-2023-026_files/figure-latex/M18-1.pdf} -\caption{\label{fig:M18}The quantified VPC plots for Model 1 and Model 2.} +\includegraphics[width=1\linewidth]{RJ-2023-026_files/figure-latex/M18-1} \caption{The quantified VPC plots for Model 1 and Model 2.}\label{fig:M18} \end{figure} \hypertarget{summary}{% @@ -546,7 +518,6 @@ \section*{References}\label{references}} \end{CSLReferences} -\bibliography{EunKyung\_Lee.bib} \address{% Eun-Hwa Kang\\ diff --git a/_articles/RJ-2023-026/RJwrapper.tex b/_articles/RJ-2023-026/RJwrapper.tex index 55e10d215..ba6161f35 100644 --- a/_articles/RJ-2023-026/RJwrapper.tex +++ b/_articles/RJ-2023-026/RJwrapper.tex @@ -10,6 +10,7 @@ \providecommand{\tightlist}{% \setlength{\itemsep}{0pt}\setlength{\parskip}{0pt}} +\usepackage{longtable} % Always define CSL refs as bib entries are contained in separate doc % Pandoc citation processing diff --git a/_issues/2023-1/2023-1.pdf b/_issues/2023-1/2023-1.pdf index 075309276..e55c5e56a 100644 Binary files a/_issues/2023-1/2023-1.pdf and b/_issues/2023-1/2023-1.pdf differ