From 3d5e6cec9c6216ed31a663c82e803e29a7d45b40 Mon Sep 17 00:00:00 2001
From: Bill Denney <wdenney@humanpredictions.com>
Date: Tue, 24 May 2022 16:44:17 -0400
Subject: [PATCH 1/7] Add nlmixr_formula() (testable work in progress)

---
 DESCRIPTION           |   2 +-
 NAMESPACE             |   1 +
 R/nlmixr_formula.R    | 275 ++++++++++++++++++++++++++++++++++++++++++
 man/nlmixr_formula.Rd |  61 ++++++++++
 4 files changed, 338 insertions(+), 1 deletion(-)
 create mode 100644 R/nlmixr_formula.R
 create mode 100644 man/nlmixr_formula.Rd

diff --git a/DESCRIPTION b/DESCRIPTION
index f72177d..b304a6d 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -49,4 +49,4 @@ Config/testthat/edition: 3
 Encoding: UTF-8
 NeedsCompilation: yes
 Roxygen: list(markdown = TRUE)
-RoxygenNote: 7.1.2
+RoxygenNote: 7.2.0.9000
diff --git a/NAMESPACE b/NAMESPACE
index ba6028f..9d0be4f 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -18,6 +18,7 @@ export(model)
 export(nlmixr)
 export(nlmixr2)
 export(nlmixrWithTiming)
+export(nlmixr_formula)
 export(performNorm)
 export(preconditionFit)
 export(removeCovMultiple)
diff --git a/R/nlmixr_formula.R b/R/nlmixr_formula.R
new file mode 100644
index 0000000..ef2acbe
--- /dev/null
+++ b/R/nlmixr_formula.R
@@ -0,0 +1,275 @@
+#' A simple formula-based interface for nlmixr2
+#' 
+#' @details
+#' The formula is given with different notation than typical formulas.  The
+#' formula notation is inspired by and similar to \code{lme4::nlmer()}.  It is a
+#' 3-part formula: \code{dependent_variable~predictor_equation~random_effects}.
+#' 
+#' The \code{dependent_variable} is any variable in the dataset.  It may not
+#' include any math; for example, \code{log(DV)} is not allowed.
+#' 
+#' The \code{predictor_equation} is any valid math, and it will be used directly
+#' in the nlmixr2 model.
+#' 
+#' The \code{random_effects} are one or more random effect parameters defined by
+#' putting the parameter in parentheses and putting a vertical bar and the
+#' grouping parameter.  Only one grouping parameter is allowed for all random
+#' effects.  An example would be \code{(slope|ID)} to estimate a random effect
+#' parameter named "slope" for each "ID" in the data.
+#' 
+#' @param object The formula defining the model (see details)
+#' @param data The data to fit
+#' @param start A named list of starting estimates.  The names define the
+#'   parameters in the model.  If a single parameter estimate is desired, it can
+#'   be given here.  If a parameter estimate per factor level is desired, either
+#'   a single starting estimate can be given across all factor levels or one
+#'   estimate may be given per factor level.  (Specify the factors with the \code{param} argument.)
+#' @param ... Passed to \code{nlmixr()}
+#' @param residual_model The residual model formula to use.
+#' @return The model fit from \code{nlmixr()}
+#' @examples
+#' nlmixr_formula(
+#'   height ~ (Asym+Asym_re)+(R0-(Asym+Asym_re))*exp(-exp(lrc)*age) ~ (Asym_re|Seed),
+#'   data = Loblolly,
+#'   start = list(Asym = 103, R0 = -8.5, lrc = -3.3, add_err=1),
+#'   est="focei"
+#' )
+#' @export
+nlmixr_formula <- function(object, data, start, param=NULL, ..., residual_model=~add(add_err)) {
+  parsed_formula <- nlmixr_formula_parser(object)
+  # Add "TIME", if needed to the data
+  if (!("TIME" %in% names(data))) {
+    data$TIME <- seq_len(nrow(data))
+  }
+  # Setup DV
+  data <- rename_or_overwrite(data, new_name="DV", old_name=parsed_formula$DV)
+  
+  # Setup the random effects
+  ranef_group <- NULL
+  for (current_ranef in parsed_formula$ranef) {
+    if (is.null(ranef_group)) {
+      ranef_group <- current_ranef$ranef_group
+    } else {
+      # only one random effect grouping variable is allowed
+      stopifnot(current_ranef$ranef_group == ranef_group)
+    }
+  }
+  data <- rename_or_overwrite(data, new_name="ID", old_name=ranef_group)
+  start_all <- nlmixr_formula_expand_start_param(start=start, param=param, data=data)
+  ini_fixed <- nlmixr_formula_setup_ini_fixed(start=start_all)
+  ini_complete <-
+    nlmixr_formula_setup_ini_random(
+      ranef_definition=parsed_formula$ranef,
+      base=ini_fixed
+    )
+  model_complete <-
+    nlmixr_formula_setup_model(
+      start=start_all,
+      predictor=parsed_formula$predictor,
+      residual_model=residual_model,
+      param=param
+    )
+  
+  # Build up the model function  
+  model_out <- function() {
+    ini()
+    model()
+  }
+  body(model_out)[[2]][[2]] <- ini_complete
+  body(model_out)[[3]][[2]] <- model_complete
+  nlmixr2est::nlmixr(object=model_out, data=data, ...)
+}
+
+rename_or_overwrite <- function(data, new_name, old_name) {
+  char_old <- as.character(old_name)
+  stopifnot(char_old %in% names(data))
+  if (char_old != new_name) {
+    if (new_name %in% names(data)) {
+      warning(new_name, " is in the data and will be overwritten with ", old_name)
+    }
+    data[[new_name]] <- data[[char_old]]
+  }
+  stopifnot(new_name %in% names(data))
+  data
+}
+
+nlmixr_formula_parser <- function(object) {
+  stopifnot(rlang::is_formula(object))
+  # Confirm that the formula looks like it should and break it up into its
+  # component parts
+  stopifnot(length(object) == 3)
+  stopifnot(identical(as.name("~"), object[[1]]))
+  ranef_part <- object[[3]]
+  main_formula_part <- object[[2]]
+  stopifnot(length(main_formula_part) == 3)
+  stopifnot(identical(as.name("~"), main_formula_part[[1]]))
+  dv_part <- main_formula_part[[2]]
+  predictor_part <- main_formula_part[[3]]
+  
+  # We cannot yet handle DV that are not a NAME (no manipulations are done
+  # within the data)
+  stopifnot(is.name(dv_part))
+  
+  list(
+    DV=dv_part,
+    predictor=list(predictor_part),
+    ranef=nlmixr_formula_parser_ranef(ranef_part)
+  )
+}
+
+nlmixr_formula_parser_ranef <- function(object) {
+  stopifnot(is.call(object))
+  if (object[[1]] == as.name("+")) {
+    # multiple random effects being parsed, separate them
+    ret <-
+      append(
+        nlmixr_formula_parser_ranef(object[[2]]),
+        nlmixr_formula_parser_ranef(object[[3]])
+      )
+  } else if (object[[1]] == as.name("(")) {
+    stopifnot(length(object) == 2)
+    inner_ranef_spec <- object[[2]]
+    stopifnot(is.call(inner_ranef_spec))
+    stopifnot(length(inner_ranef_spec) == 3)
+    stopifnot(inner_ranef_spec[[1]] == as.name("|"))
+    stopifnot(is.name(inner_ranef_spec[[2]]))
+    stopifnot(is.name(inner_ranef_spec[[3]]))
+    ret <-
+      list(
+        list(
+          ranef_var=inner_ranef_spec[[2]],
+          ranef_group=inner_ranef_spec[[3]],
+          # Give the user control of start in the future
+          start=1
+        )
+      )
+  } else {
+    stop("Invalid random effect") # nocov
+  }
+  ret
+}
+
+param_expand <- function(param) {
+  if (is.null(param)) {
+    character()
+  } else if (is.list(param)) {
+    unlist(lapply(param, param_expand))
+  } else if (rlang::is_formula(param)) {
+    stopifnot(length(param) == 3)
+    stopifnot(is.name(param[[3]]))
+    varnames <- all.vars(param[[2]])
+    setNames(rep(as.character(param[[3]]), length(varnames)), nm=varnames)
+  } else {
+    stop("Cannot expand param") # nocov
+  }
+}
+
+nlmixr_formula_expand_start_param_single <- function(start_name, start_value, param, data) {
+  stopifnot(is.character(start_name))
+  stopifnot(is.numeric(start_value))
+  if (is.na(param)) {
+    stopifnot(length(start_value) == 1)
+    ret <-
+      list(
+        ini=list(str2lang(paste(start_name, "<-", start_value))),
+        model=list(NULL)
+      )
+  } else {
+    stopifnot(is.character(param))
+    stopifnot(length(param) == 1)
+    stopifnot(param %in% names(data))
+    if (any(is.na(data[[param]]))) {
+      stop("NA found in data column:", param)
+    }
+    if (is.factor(data[[param]])) {
+      param_label <- levels(data[[param]])
+      stopifnot(length(start_value) %in% c(1, length(param_label)))
+      ret <- list(ini=list(), model=list())
+      model_string <- character()
+      for (idx in seq_along(param_label)) {
+        current_param_name <- make.names(paste(start_name, param, param_label[idx]))
+        current_start_value <-
+          if (length(start_value) == 1) {
+            start_value
+          } else {
+            start_value[idx]
+          }
+        ret$ini <-
+          append(
+            ret$ini,
+            nlmixr_formula_expand_start_param_single(start_name=current_param_name, start_value=current_start_value, param=NA)$ini
+          )
+        model_string <-
+          c(model_string, sprintf("%s*(%s == '%s')", current_param_name, param, param_label[idx]))
+      }
+      ret$model <-
+        list(str2lang(sprintf(
+          "%s <- %s", start_name, paste(model_string, collapse="+")
+        )))
+    } else {
+      stop("Can only handle factors for fixed effect grouping levels")
+    }
+  }
+  ret
+}
+
+nlmixr_formula_expand_start_param <- function(start, param, data) {
+  param_expanded <- param_expand(param)
+  stopifnot(all(names(param_expanded) %in% names(start)))
+  ret <- list()
+  for (current_start in names(start)) {
+    ret <-
+      append(
+        ret,
+        list(nlmixr_formula_expand_start_param_single(
+          start_name=current_start,
+          start_value=start[[current_start]],
+          param=param_expanded[current_start], # this will be NA if it does not exist
+          data=data
+        ))
+      )
+  }
+  ret
+}
+
+nlmixr_formula_setup_ini_fixed <- function(start, base=str2lang("{}")) {
+  stopifnot(length(start) > 0)
+  stopifnot(is.list(start))
+  for (idx_outer in seq_along(start)) {
+    for (idx_inner in seq_along(start[[idx_outer]]$ini)) {
+      base[[length(base) + 1]] <- start[[idx_outer]]$ini[[idx_inner]]
+    }
+  }
+  base
+}
+
+nlmixr_formula_setup_ini_random <- function(ranef_definition, base=str2lang("{}")) {
+  stopifnot(is.list(ranef_definition))
+  for (current_ranef in ranef_definition) {
+    base[[length(base) + 1]] <-
+      str2lang(paste(current_ranef$ranef_var, "~", current_ranef$start))
+  }
+  base
+}
+
+nlmixr_formula_setup_model <- function(start, predictor, residual_model, predictor_var="value", param, data) {
+  stopifnot(rlang::is_formula(residual_model))
+  # a one-sided formula
+  stopifnot(length(residual_model) == 2)
+  
+  pred_line <- str2lang(paste(predictor_var, " <- ."))
+  pred_line[[3]] <- predictor[[1]]
+  residual_line <- str2lang(paste(predictor_var, "~."))
+  residual_line[[3]] <- residual_model[[2]]
+  
+  base <- str2lang("{}")
+  # Add any lines needed from the 'start'
+  for (current_start in start) {
+    if (!is.null(current_start$model[[1]])) {
+      base[[length(base) + 1]] <- current_start$model[[1]]
+    }
+  }
+  base[[length(base) + 1]] <- pred_line
+  base[[length(base) + 1]] <- residual_line
+  base
+}
diff --git a/man/nlmixr_formula.Rd b/man/nlmixr_formula.Rd
new file mode 100644
index 0000000..3be55e6
--- /dev/null
+++ b/man/nlmixr_formula.Rd
@@ -0,0 +1,61 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/nlmixr_formula.R
+\name{nlmixr_formula}
+\alias{nlmixr_formula}
+\title{A simple formula-based interface for nlmixr2}
+\usage{
+nlmixr_formula(
+  object,
+  data,
+  start,
+  param = NULL,
+  ...,
+  residual_model = ~add(add_err)
+)
+}
+\arguments{
+\item{object}{The formula defining the model (see details)}
+
+\item{data}{The data to fit}
+
+\item{start}{A named list of starting estimates.  The names define the
+parameters in the model.  If a single parameter estimate is desired, it can
+be given here.  If a parameter estimate per factor level is desired, either
+a single starting estimate can be given across all factor levels or one
+estimate may be given per factor level.  (Specify the factors with the \code{param} argument.)}
+
+\item{...}{Passed to \code{nlmixr()}}
+
+\item{residual_model}{The residual model formula to use.}
+}
+\value{
+The model fit from \code{nlmixr()}
+}
+\description{
+A simple formula-based interface for nlmixr2
+}
+\details{
+The formula is given with different notation than typical formulas.  The
+formula notation is inspired by and similar to \code{lme4::nlmer()}.  It is a
+3-part formula: \code{dependent_variable~predictor_equation~random_effects}.
+
+The \code{dependent_variable} is any variable in the dataset.  It may not
+include any math; for example, \code{log(DV)} is not allowed.
+
+The \code{predictor_equation} is any valid math, and it will be used directly
+in the nlmixr2 model.
+
+The \code{random_effects} are one or more random effect parameters defined by
+putting the parameter in parentheses and putting a vertical bar and the
+grouping parameter.  Only one grouping parameter is allowed for all random
+effects.  An example would be \code{(slope|ID)} to estimate a random effect
+parameter named "slope" for each "ID" in the data.
+}
+\examples{
+nlmixr_formula(
+  height ~ (Asym+Asym_re)+(R0-(Asym+Asym_re))*exp(-exp(lrc)*age) ~ (Asym_re|Seed),
+  data = Loblolly,
+  start = list(Asym = 103, R0 = -8.5, lrc = -3.3, add_err=1),
+  est="focei"
+)
+}

From dade7b9d6844daa2d97d3016a75bb3b13984c9b8 Mon Sep 17 00:00:00 2001
From: Bill Denney <wdenney@humanpredictions.com>
Date: Wed, 25 May 2022 16:38:03 -0400
Subject: [PATCH 2/7] Remove rlang dependency

---
 R/nlmixr_formula.R | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/R/nlmixr_formula.R b/R/nlmixr_formula.R
index ef2acbe..713b5be 100644
--- a/R/nlmixr_formula.R
+++ b/R/nlmixr_formula.R
@@ -94,7 +94,7 @@ rename_or_overwrite <- function(data, new_name, old_name) {
 }
 
 nlmixr_formula_parser <- function(object) {
-  stopifnot(rlang::is_formula(object))
+  stopifnot(inherits(object, "formula"))
   # Confirm that the formula looks like it should and break it up into its
   # component parts
   stopifnot(length(object) == 3)
@@ -154,7 +154,7 @@ param_expand <- function(param) {
     character()
   } else if (is.list(param)) {
     unlist(lapply(param, param_expand))
-  } else if (rlang::is_formula(param)) {
+  } else if (inherits(param, "formula")) {
     stopifnot(length(param) == 3)
     stopifnot(is.name(param[[3]]))
     varnames <- all.vars(param[[2]])
@@ -253,7 +253,7 @@ nlmixr_formula_setup_ini_random <- function(ranef_definition, base=str2lang("{}"
 }
 
 nlmixr_formula_setup_model <- function(start, predictor, residual_model, predictor_var="value", param, data) {
-  stopifnot(rlang::is_formula(residual_model))
+  stopifnot(inherits(residual_model, "formula"))
   # a one-sided formula
   stopifnot(length(residual_model) == 2)
   

From 6ad58d4642063e452280dce10ef6b4f697c5a946 Mon Sep 17 00:00:00 2001
From: Bill Denney <wdenney@humanpredictions.com>
Date: Wed, 25 May 2022 17:09:56 -0400
Subject: [PATCH 3/7] Fix devtools::check() issues including documentation
 issues

---
 R/nlmixr_formula.R    | 11 ++++++++---
 man/nlmixr_formula.Rd | 16 +++++++++++++---
 2 files changed, 21 insertions(+), 6 deletions(-)

diff --git a/R/nlmixr_formula.R b/R/nlmixr_formula.R
index 713b5be..cd35ac7 100644
--- a/R/nlmixr_formula.R
+++ b/R/nlmixr_formula.R
@@ -23,9 +23,14 @@
 #'   parameters in the model.  If a single parameter estimate is desired, it can
 #'   be given here.  If a parameter estimate per factor level is desired, either
 #'   a single starting estimate can be given across all factor levels or one
-#'   estimate may be given per factor level.  (Specify the factors with the \code{param} argument.)
-#' @param ... Passed to \code{nlmixr()}
-#' @param residual_model The residual model formula to use.
+#'   estimate may be given per factor level.  (Specify the factors with the
+#'   \code{param} argument.)
+#' @param param A formula or list of two-sided formulas giving the model used
+#'   for parameters.  If a parameter is a simple fixed effect, only, then it
+#'   should not be included here.  If a parameter should have a separate
+#'   estimate per level of a factor, give that as the two-sided formula here.
+#' @inheritDotParams nlmixr2::nlmixr
+#' @param residual_model The residual model formula to use as a one-sided formula.
 #' @return The model fit from \code{nlmixr()}
 #' @examples
 #' nlmixr_formula(
diff --git a/man/nlmixr_formula.Rd b/man/nlmixr_formula.Rd
index 3be55e6..f892daf 100644
--- a/man/nlmixr_formula.Rd
+++ b/man/nlmixr_formula.Rd
@@ -22,11 +22,21 @@ nlmixr_formula(
 parameters in the model.  If a single parameter estimate is desired, it can
 be given here.  If a parameter estimate per factor level is desired, either
 a single starting estimate can be given across all factor levels or one
-estimate may be given per factor level.  (Specify the factors with the \code{param} argument.)}
+estimate may be given per factor level.  (Specify the factors with the
+\code{param} argument.)}
 
-\item{...}{Passed to \code{nlmixr()}}
+\item{param}{A formula or list of two-sided formulas giving the model used
+for parameters.  If a parameter is a simple fixed effect, only, then it
+should not be included here.  If a parameter should have a separate
+estimate per level of a factor, give that as the two-sided formula here.}
 
-\item{residual_model}{The residual model formula to use.}
+\item{...}{
+  Arguments passed on to \code{\link[nlmixr2:reexports]{nlmixr2::nlmixr}}
+  \describe{
+    \item{\code{}}{}
+  }}
+
+\item{residual_model}{The residual model formula to use as a one-sided formula.}
 }
 \value{
 The model fit from \code{nlmixr()}

From 473a4a195e7c804da374f70a12c130e2ddd3b4bd Mon Sep 17 00:00:00 2001
From: Bill Denney <wdenney@humanpredictions.com>
Date: Thu, 1 Sep 2022 21:30:18 -0400
Subject: [PATCH 4/7] Code style updates

---
 DESCRIPTION           |   2 +-
 NAMESPACE             |   3 +-
 R/nlmixrFormula.R     | 280 ++++++++++++++++++++++++++++++++++++++++++
 R/nlmixr_formula.R    | 280 ------------------------------------------
 man/nlmixr_formula.Rd |  71 -----------
 5 files changed, 282 insertions(+), 354 deletions(-)
 create mode 100644 R/nlmixrFormula.R
 delete mode 100644 R/nlmixr_formula.R
 delete mode 100644 man/nlmixr_formula.Rd

diff --git a/DESCRIPTION b/DESCRIPTION
index 4bdb50e..2f25952 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -52,4 +52,4 @@ Config/testthat/edition: 3
 Encoding: UTF-8
 NeedsCompilation: yes
 Roxygen: list(markdown = TRUE)
-RoxygenNote: 7.2.0
+RoxygenNote: 7.2.1
diff --git a/NAMESPACE b/NAMESPACE
index f1a4b75..97daf60 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -21,9 +21,8 @@ export(lassoSummardf)
 export(model)
 export(nlmixr)
 export(nlmixr2)
+export(nlmixrFormula)
 export(nlmixrWithTiming)
-export(nlmixr_formula)
-export(performNorm)
 export(normalizedData)
 export(optimUnisampling)
 export(preconditionFit)
diff --git a/R/nlmixrFormula.R b/R/nlmixrFormula.R
new file mode 100644
index 0000000..88aec24
--- /dev/null
+++ b/R/nlmixrFormula.R
@@ -0,0 +1,280 @@
+#' A simple formula-based interface for nlmixr2
+#' 
+#' @details
+#' The formula is given with different notation than typical formulas.  The
+#' formula notation is inspired by and similar to \code{lme4::nlmer()}.  It is a
+#' 3-part formula: \code{dependentVariable~predictorEquation~randomEffects}.
+#' 
+#' The \code{dependentVariable} is any variable in the dataset.  It may not
+#' include any math; for example, \code{log(DV)} is not allowed.
+#' 
+#' The \code{predictorEquation} is any valid math, and it will be used directly
+#' in the nlmixr2 model.
+#' 
+#' The \code{randomEffects} are one or more random effect parameters defined by
+#' putting the parameter in parentheses and putting a vertical bar and the
+#' grouping parameter.  Only one grouping parameter is allowed for all random
+#' effects.  An example would be \code{(slope|ID)} to estimate a random effect
+#' parameter named "slope" for each "ID" in the data.
+#' 
+#' @param object The formula defining the model (see details)
+#' @param data The data to fit
+#' @param start A named list of starting estimates.  The names define the
+#'   parameters in the model.  If a single parameter estimate is desired, it can
+#'   be given here.  If a parameter estimate per factor level is desired, either
+#'   a single starting estimate can be given across all factor levels or one
+#'   estimate may be given per factor level.  (Specify the factors with the
+#'   \code{param} argument.)
+#' @param param A formula or list of two-sided formulas giving the model used
+#'   for parameters.  If a parameter is a simple fixed effect, only, then it
+#'   should not be included here.  If a parameter should have a separate
+#'   estimate per level of a factor, give that as the two-sided formula here.
+#' @inheritDotParams nlmixr2::nlmixr
+#' @param residualModel The residual model formula to use as a one-sided formula.
+#' @return The model fit from \code{nlmixr()}
+#' @examples
+#' nlmixrFormula(
+#'   height ~ (Asym+AsymRe)+(R0-(Asym+AsymRe))*exp(-exp(lrc)*age) ~ (AsymRe|Seed),
+#'   data = Loblolly,
+#'   start = list(Asym = 103, R0 = -8.5, lrc = -3.3, addErr=1),
+#'   est="focei"
+#' )
+#' @export
+nlmixrFormula <- function(object, data, start, param=NULL, ..., residualModel=~add(addErr)) {
+  parsedFormula <- .nlmixrFormulaParser(object)
+  # Add "TIME", if needed to the data
+  if (!("TIME" %in% names(data))) {
+    data$TIME <- seq_len(nrow(data))
+  }
+  # Setup DV
+  data <- .renameOrOverwrite(data, newName="DV", oldName=parsedFormula$DV)
+  
+  # Setup the random effects
+  ranefGroup <- NULL
+  for (currentRanef in parsedFormula$ranef) {
+    if (is.null(ranefGroup)) {
+      ranefGroup <- currentRanef$ranefGroup
+    } else {
+      # only one random effect grouping variable is allowed
+      stopifnot(currentRanef$ranefGroup == ranefGroup)
+    }
+  }
+  data <- .renameOrOverwrite(data, newName="ID", oldName=ranefGroup)
+  startAll <- .nlmixrFormulaExpandStartParam(start=start, param=param, data=data)
+  iniFixed <- .nlmixrFormulaSetupIniFixed(start=startAll)
+  iniComplete <-
+    .nlmixrFormulaSetupIniRandom(
+      ranefDefinition=parsedFormula$ranef,
+      base=iniFixed
+    )
+  modelComplete <-
+    .nlmixrFormulaSetupModel(
+      start=startAll,
+      predictor=parsedFormula$predictor,
+      residualModel=residualModel,
+      param=param
+    )
+  
+  # Build up the model function  
+  modelOut <- function() {
+    ini()
+    model()
+  }
+  body(modelOut)[[2]][[2]] <- iniComplete
+  body(modelOut)[[3]][[2]] <- modelComplete
+  nlmixr2est::nlmixr(object=modelOut, data=data, ...)
+}
+
+.renameOrOverwrite <- function(data, newName, oldName) {
+  charOld <- as.character(oldName)
+  stopifnot(charOld %in% names(data))
+  if (charOld != newName) {
+    if (newName %in% names(data)) {
+      warning(newName, " is in the data and will be overwritten with ", oldName)
+    }
+    data[[newName]] <- data[[charOld]]
+  }
+  stopifnot(newName %in% names(data))
+  data
+}
+
+.nlmixrFormulaParser <- function(object) {
+  stopifnot(inherits(object, "formula"))
+  # Confirm that the formula looks like it should and break it up into its
+  # component parts
+  stopifnot(length(object) == 3)
+  stopifnot(identical(as.name("~"), object[[1]]))
+  ranefPart <- object[[3]]
+  mainFormulaPart <- object[[2]]
+  stopifnot(length(mainFormulaPart) == 3)
+  stopifnot(identical(as.name("~"), mainFormulaPart[[1]]))
+  dvPart <- mainFormulaPart[[2]]
+  predictorPart <- mainFormulaPart[[3]]
+  
+  # We cannot yet handle DV that are not a NAME (no manipulations are done
+  # within the data)
+  stopifnot(is.name(dvPart))
+  
+  list(
+    DV=dvPart,
+    predictor=list(predictorPart),
+    ranef=.nlmixrFormulaParserRanef(ranefPart)
+  )
+}
+
+.nlmixrFormulaParserRanef <- function(object) {
+  stopifnot(is.call(object))
+  if (object[[1]] == as.name("+")) {
+    # multiple random effects being parsed, separate them
+    ret <-
+      append(
+        .nlmixrFormulaParserRanef(object[[2]]),
+        .nlmixrFormulaParserRanef(object[[3]])
+      )
+  } else if (object[[1]] == as.name("(")) {
+    stopifnot(length(object) == 2)
+    innerRanefSpec <- object[[2]]
+    stopifnot(is.call(innerRanefSpec))
+    stopifnot(length(innerRanefSpec) == 3)
+    stopifnot(innerRanefSpec[[1]] == as.name("|"))
+    stopifnot(is.name(innerRanefSpec[[2]]))
+    stopifnot(is.name(innerRanefSpec[[3]]))
+    ret <-
+      list(
+        list(
+          ranefVar=innerRanefSpec[[2]],
+          ranefGroup=innerRanefSpec[[3]],
+          # Give the user control of start in the future
+          start=1
+        )
+      )
+  } else {
+    stop("Invalid random effect") # nocov
+  }
+  ret
+}
+
+.paramExpand <- function(param) {
+  if (is.null(param)) {
+    character()
+  } else if (is.list(param)) {
+    unlist(lapply(X = param, FUN = .paramExpand))
+  } else if (inherits(param, "formula")) {
+    stopifnot(length(param) == 3)
+    stopifnot(is.name(param[[3]]))
+    varnames <- all.vars(param[[2]])
+    setNames(rep(as.character(param[[3]]), length(varnames)), nm=varnames)
+  } else {
+    stop("Cannot expand param") # nocov
+  }
+}
+
+.nlmixrFormulaExpandStartParamSingle <- function(startName, startValue, param, data) {
+  stopifnot(is.character(startName))
+  stopifnot(is.numeric(startValue))
+  if (is.na(param)) {
+    stopifnot(length(startValue) == 1)
+    ret <-
+      list(
+        ini=list(str2lang(paste(startName, "<-", startValue))),
+        model=list(NULL)
+      )
+  } else {
+    stopifnot(is.character(param))
+    stopifnot(length(param) == 1)
+    stopifnot(param %in% names(data))
+    if (any(is.na(data[[param]]))) {
+      stop("NA found in data column:", param)
+    }
+    if (is.factor(data[[param]])) {
+      paramLabel <- levels(data[[param]])
+      stopifnot(length(startValue) %in% c(1, length(paramLabel)))
+      ret <- list(ini=list(), model=list())
+      modelString <- character()
+      for (idx in seq_along(paramLabel)) {
+        currentParamName <- make.names(paste(startName, param, paramLabel[idx]))
+        currentStartValue <-
+          if (length(startValue) == 1) {
+            startValue
+          } else {
+            startValue[idx]
+          }
+        ret$ini <-
+          append(
+            ret$ini,
+            .nlmixrFormulaExpandStartParamSingle(startName=currentParamName, startValue=currentStartValue, param=NA)$ini
+          )
+        modelString <-
+          c(modelString, sprintf("%s*(%s == '%s')", currentParamName, param, paramLabel[idx]))
+      }
+      ret$model <-
+        list(str2lang(sprintf(
+          "%s <- %s", startName, paste(modelString, collapse="+")
+        )))
+    } else {
+      stop("Can only handle factors for fixed effect grouping levels")
+    }
+  }
+  ret
+}
+
+.nlmixrFormulaExpandStartParam <- function(start, param, data) {
+  paramExpanded <- .paramExpand(param)
+  stopifnot(all(names(paramExpanded) %in% names(start)))
+  ret <- list()
+  for (currentStart in names(start)) {
+    ret <-
+      append(
+        ret,
+        list(.nlmixrFormulaExpandStartParamSingle(
+          startName=currentStart,
+          startValue=start[[currentStart]],
+          param=paramExpanded[currentStart], # this will be NA if it does not exist
+          data=data
+        ))
+      )
+  }
+  ret
+}
+
+.nlmixrFormulaSetupIniFixed <- function(start, base=str2lang("{}")) {
+  stopifnot(length(start) > 0)
+  stopifnot(is.list(start))
+  for (idxOuter in seq_along(start)) {
+    for (idxInner in seq_along(start[[idxOuter]]$ini)) {
+      base[[length(base) + 1]] <- start[[idxOuter]]$ini[[idxInner]]
+    }
+  }
+  base
+}
+
+.nlmixrFormulaSetupIniRandom <- function(ranefDefinition, base=str2lang("{}")) {
+  stopifnot(is.list(ranefDefinition))
+  for (currentRanef in ranefDefinition) {
+    base[[length(base) + 1]] <-
+      str2lang(paste(currentRanef$ranefVar, "~", currentRanef$start))
+  }
+  base
+}
+
+.nlmixrFormulaSetupModel <- function(start, predictor, residualModel, predictorVar="value", param, data) {
+  stopifnot(inherits(residualModel, "formula"))
+  # a one-sided formula
+  stopifnot(length(residualModel) == 2)
+  
+  predLine <- str2lang(paste(predictorVar, " <- ."))
+  predLine[[3]] <- predictor[[1]]
+  residualLine <- str2lang(paste(predictorVar, "~."))
+  residualLine[[3]] <- residualModel[[2]]
+  
+  base <- str2lang("{}")
+  # Add any lines needed from the 'start'
+  for (currentStart in start) {
+    if (!is.null(currentStart$model[[1]])) {
+      base[[length(base) + 1]] <- currentStart$model[[1]]
+    }
+  }
+  base[[length(base) + 1]] <- predLine
+  base[[length(base) + 1]] <- residualLine
+  base
+}
diff --git a/R/nlmixr_formula.R b/R/nlmixr_formula.R
deleted file mode 100644
index cd35ac7..0000000
--- a/R/nlmixr_formula.R
+++ /dev/null
@@ -1,280 +0,0 @@
-#' A simple formula-based interface for nlmixr2
-#' 
-#' @details
-#' The formula is given with different notation than typical formulas.  The
-#' formula notation is inspired by and similar to \code{lme4::nlmer()}.  It is a
-#' 3-part formula: \code{dependent_variable~predictor_equation~random_effects}.
-#' 
-#' The \code{dependent_variable} is any variable in the dataset.  It may not
-#' include any math; for example, \code{log(DV)} is not allowed.
-#' 
-#' The \code{predictor_equation} is any valid math, and it will be used directly
-#' in the nlmixr2 model.
-#' 
-#' The \code{random_effects} are one or more random effect parameters defined by
-#' putting the parameter in parentheses and putting a vertical bar and the
-#' grouping parameter.  Only one grouping parameter is allowed for all random
-#' effects.  An example would be \code{(slope|ID)} to estimate a random effect
-#' parameter named "slope" for each "ID" in the data.
-#' 
-#' @param object The formula defining the model (see details)
-#' @param data The data to fit
-#' @param start A named list of starting estimates.  The names define the
-#'   parameters in the model.  If a single parameter estimate is desired, it can
-#'   be given here.  If a parameter estimate per factor level is desired, either
-#'   a single starting estimate can be given across all factor levels or one
-#'   estimate may be given per factor level.  (Specify the factors with the
-#'   \code{param} argument.)
-#' @param param A formula or list of two-sided formulas giving the model used
-#'   for parameters.  If a parameter is a simple fixed effect, only, then it
-#'   should not be included here.  If a parameter should have a separate
-#'   estimate per level of a factor, give that as the two-sided formula here.
-#' @inheritDotParams nlmixr2::nlmixr
-#' @param residual_model The residual model formula to use as a one-sided formula.
-#' @return The model fit from \code{nlmixr()}
-#' @examples
-#' nlmixr_formula(
-#'   height ~ (Asym+Asym_re)+(R0-(Asym+Asym_re))*exp(-exp(lrc)*age) ~ (Asym_re|Seed),
-#'   data = Loblolly,
-#'   start = list(Asym = 103, R0 = -8.5, lrc = -3.3, add_err=1),
-#'   est="focei"
-#' )
-#' @export
-nlmixr_formula <- function(object, data, start, param=NULL, ..., residual_model=~add(add_err)) {
-  parsed_formula <- nlmixr_formula_parser(object)
-  # Add "TIME", if needed to the data
-  if (!("TIME" %in% names(data))) {
-    data$TIME <- seq_len(nrow(data))
-  }
-  # Setup DV
-  data <- rename_or_overwrite(data, new_name="DV", old_name=parsed_formula$DV)
-  
-  # Setup the random effects
-  ranef_group <- NULL
-  for (current_ranef in parsed_formula$ranef) {
-    if (is.null(ranef_group)) {
-      ranef_group <- current_ranef$ranef_group
-    } else {
-      # only one random effect grouping variable is allowed
-      stopifnot(current_ranef$ranef_group == ranef_group)
-    }
-  }
-  data <- rename_or_overwrite(data, new_name="ID", old_name=ranef_group)
-  start_all <- nlmixr_formula_expand_start_param(start=start, param=param, data=data)
-  ini_fixed <- nlmixr_formula_setup_ini_fixed(start=start_all)
-  ini_complete <-
-    nlmixr_formula_setup_ini_random(
-      ranef_definition=parsed_formula$ranef,
-      base=ini_fixed
-    )
-  model_complete <-
-    nlmixr_formula_setup_model(
-      start=start_all,
-      predictor=parsed_formula$predictor,
-      residual_model=residual_model,
-      param=param
-    )
-  
-  # Build up the model function  
-  model_out <- function() {
-    ini()
-    model()
-  }
-  body(model_out)[[2]][[2]] <- ini_complete
-  body(model_out)[[3]][[2]] <- model_complete
-  nlmixr2est::nlmixr(object=model_out, data=data, ...)
-}
-
-rename_or_overwrite <- function(data, new_name, old_name) {
-  char_old <- as.character(old_name)
-  stopifnot(char_old %in% names(data))
-  if (char_old != new_name) {
-    if (new_name %in% names(data)) {
-      warning(new_name, " is in the data and will be overwritten with ", old_name)
-    }
-    data[[new_name]] <- data[[char_old]]
-  }
-  stopifnot(new_name %in% names(data))
-  data
-}
-
-nlmixr_formula_parser <- function(object) {
-  stopifnot(inherits(object, "formula"))
-  # Confirm that the formula looks like it should and break it up into its
-  # component parts
-  stopifnot(length(object) == 3)
-  stopifnot(identical(as.name("~"), object[[1]]))
-  ranef_part <- object[[3]]
-  main_formula_part <- object[[2]]
-  stopifnot(length(main_formula_part) == 3)
-  stopifnot(identical(as.name("~"), main_formula_part[[1]]))
-  dv_part <- main_formula_part[[2]]
-  predictor_part <- main_formula_part[[3]]
-  
-  # We cannot yet handle DV that are not a NAME (no manipulations are done
-  # within the data)
-  stopifnot(is.name(dv_part))
-  
-  list(
-    DV=dv_part,
-    predictor=list(predictor_part),
-    ranef=nlmixr_formula_parser_ranef(ranef_part)
-  )
-}
-
-nlmixr_formula_parser_ranef <- function(object) {
-  stopifnot(is.call(object))
-  if (object[[1]] == as.name("+")) {
-    # multiple random effects being parsed, separate them
-    ret <-
-      append(
-        nlmixr_formula_parser_ranef(object[[2]]),
-        nlmixr_formula_parser_ranef(object[[3]])
-      )
-  } else if (object[[1]] == as.name("(")) {
-    stopifnot(length(object) == 2)
-    inner_ranef_spec <- object[[2]]
-    stopifnot(is.call(inner_ranef_spec))
-    stopifnot(length(inner_ranef_spec) == 3)
-    stopifnot(inner_ranef_spec[[1]] == as.name("|"))
-    stopifnot(is.name(inner_ranef_spec[[2]]))
-    stopifnot(is.name(inner_ranef_spec[[3]]))
-    ret <-
-      list(
-        list(
-          ranef_var=inner_ranef_spec[[2]],
-          ranef_group=inner_ranef_spec[[3]],
-          # Give the user control of start in the future
-          start=1
-        )
-      )
-  } else {
-    stop("Invalid random effect") # nocov
-  }
-  ret
-}
-
-param_expand <- function(param) {
-  if (is.null(param)) {
-    character()
-  } else if (is.list(param)) {
-    unlist(lapply(param, param_expand))
-  } else if (inherits(param, "formula")) {
-    stopifnot(length(param) == 3)
-    stopifnot(is.name(param[[3]]))
-    varnames <- all.vars(param[[2]])
-    setNames(rep(as.character(param[[3]]), length(varnames)), nm=varnames)
-  } else {
-    stop("Cannot expand param") # nocov
-  }
-}
-
-nlmixr_formula_expand_start_param_single <- function(start_name, start_value, param, data) {
-  stopifnot(is.character(start_name))
-  stopifnot(is.numeric(start_value))
-  if (is.na(param)) {
-    stopifnot(length(start_value) == 1)
-    ret <-
-      list(
-        ini=list(str2lang(paste(start_name, "<-", start_value))),
-        model=list(NULL)
-      )
-  } else {
-    stopifnot(is.character(param))
-    stopifnot(length(param) == 1)
-    stopifnot(param %in% names(data))
-    if (any(is.na(data[[param]]))) {
-      stop("NA found in data column:", param)
-    }
-    if (is.factor(data[[param]])) {
-      param_label <- levels(data[[param]])
-      stopifnot(length(start_value) %in% c(1, length(param_label)))
-      ret <- list(ini=list(), model=list())
-      model_string <- character()
-      for (idx in seq_along(param_label)) {
-        current_param_name <- make.names(paste(start_name, param, param_label[idx]))
-        current_start_value <-
-          if (length(start_value) == 1) {
-            start_value
-          } else {
-            start_value[idx]
-          }
-        ret$ini <-
-          append(
-            ret$ini,
-            nlmixr_formula_expand_start_param_single(start_name=current_param_name, start_value=current_start_value, param=NA)$ini
-          )
-        model_string <-
-          c(model_string, sprintf("%s*(%s == '%s')", current_param_name, param, param_label[idx]))
-      }
-      ret$model <-
-        list(str2lang(sprintf(
-          "%s <- %s", start_name, paste(model_string, collapse="+")
-        )))
-    } else {
-      stop("Can only handle factors for fixed effect grouping levels")
-    }
-  }
-  ret
-}
-
-nlmixr_formula_expand_start_param <- function(start, param, data) {
-  param_expanded <- param_expand(param)
-  stopifnot(all(names(param_expanded) %in% names(start)))
-  ret <- list()
-  for (current_start in names(start)) {
-    ret <-
-      append(
-        ret,
-        list(nlmixr_formula_expand_start_param_single(
-          start_name=current_start,
-          start_value=start[[current_start]],
-          param=param_expanded[current_start], # this will be NA if it does not exist
-          data=data
-        ))
-      )
-  }
-  ret
-}
-
-nlmixr_formula_setup_ini_fixed <- function(start, base=str2lang("{}")) {
-  stopifnot(length(start) > 0)
-  stopifnot(is.list(start))
-  for (idx_outer in seq_along(start)) {
-    for (idx_inner in seq_along(start[[idx_outer]]$ini)) {
-      base[[length(base) + 1]] <- start[[idx_outer]]$ini[[idx_inner]]
-    }
-  }
-  base
-}
-
-nlmixr_formula_setup_ini_random <- function(ranef_definition, base=str2lang("{}")) {
-  stopifnot(is.list(ranef_definition))
-  for (current_ranef in ranef_definition) {
-    base[[length(base) + 1]] <-
-      str2lang(paste(current_ranef$ranef_var, "~", current_ranef$start))
-  }
-  base
-}
-
-nlmixr_formula_setup_model <- function(start, predictor, residual_model, predictor_var="value", param, data) {
-  stopifnot(inherits(residual_model, "formula"))
-  # a one-sided formula
-  stopifnot(length(residual_model) == 2)
-  
-  pred_line <- str2lang(paste(predictor_var, " <- ."))
-  pred_line[[3]] <- predictor[[1]]
-  residual_line <- str2lang(paste(predictor_var, "~."))
-  residual_line[[3]] <- residual_model[[2]]
-  
-  base <- str2lang("{}")
-  # Add any lines needed from the 'start'
-  for (current_start in start) {
-    if (!is.null(current_start$model[[1]])) {
-      base[[length(base) + 1]] <- current_start$model[[1]]
-    }
-  }
-  base[[length(base) + 1]] <- pred_line
-  base[[length(base) + 1]] <- residual_line
-  base
-}
diff --git a/man/nlmixr_formula.Rd b/man/nlmixr_formula.Rd
deleted file mode 100644
index f892daf..0000000
--- a/man/nlmixr_formula.Rd
+++ /dev/null
@@ -1,71 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/nlmixr_formula.R
-\name{nlmixr_formula}
-\alias{nlmixr_formula}
-\title{A simple formula-based interface for nlmixr2}
-\usage{
-nlmixr_formula(
-  object,
-  data,
-  start,
-  param = NULL,
-  ...,
-  residual_model = ~add(add_err)
-)
-}
-\arguments{
-\item{object}{The formula defining the model (see details)}
-
-\item{data}{The data to fit}
-
-\item{start}{A named list of starting estimates.  The names define the
-parameters in the model.  If a single parameter estimate is desired, it can
-be given here.  If a parameter estimate per factor level is desired, either
-a single starting estimate can be given across all factor levels or one
-estimate may be given per factor level.  (Specify the factors with the
-\code{param} argument.)}
-
-\item{param}{A formula or list of two-sided formulas giving the model used
-for parameters.  If a parameter is a simple fixed effect, only, then it
-should not be included here.  If a parameter should have a separate
-estimate per level of a factor, give that as the two-sided formula here.}
-
-\item{...}{
-  Arguments passed on to \code{\link[nlmixr2:reexports]{nlmixr2::nlmixr}}
-  \describe{
-    \item{\code{}}{}
-  }}
-
-\item{residual_model}{The residual model formula to use as a one-sided formula.}
-}
-\value{
-The model fit from \code{nlmixr()}
-}
-\description{
-A simple formula-based interface for nlmixr2
-}
-\details{
-The formula is given with different notation than typical formulas.  The
-formula notation is inspired by and similar to \code{lme4::nlmer()}.  It is a
-3-part formula: \code{dependent_variable~predictor_equation~random_effects}.
-
-The \code{dependent_variable} is any variable in the dataset.  It may not
-include any math; for example, \code{log(DV)} is not allowed.
-
-The \code{predictor_equation} is any valid math, and it will be used directly
-in the nlmixr2 model.
-
-The \code{random_effects} are one or more random effect parameters defined by
-putting the parameter in parentheses and putting a vertical bar and the
-grouping parameter.  Only one grouping parameter is allowed for all random
-effects.  An example would be \code{(slope|ID)} to estimate a random effect
-parameter named "slope" for each "ID" in the data.
-}
-\examples{
-nlmixr_formula(
-  height ~ (Asym+Asym_re)+(R0-(Asym+Asym_re))*exp(-exp(lrc)*age) ~ (Asym_re|Seed),
-  data = Loblolly,
-  start = list(Asym = 103, R0 = -8.5, lrc = -3.3, add_err=1),
-  est="focei"
-)
-}

From 3f209f49d9c51bb2547180697997aaf0dfd1efa5 Mon Sep 17 00:00:00 2001
From: Bill Denney <wdenney@humanpredictions.com>
Date: Thu, 1 Sep 2022 21:30:44 -0400
Subject: [PATCH 5/7] Code style updates

---
 man/nlmixrFormula.Rd | 71 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 71 insertions(+)
 create mode 100644 man/nlmixrFormula.Rd

diff --git a/man/nlmixrFormula.Rd b/man/nlmixrFormula.Rd
new file mode 100644
index 0000000..aea32bb
--- /dev/null
+++ b/man/nlmixrFormula.Rd
@@ -0,0 +1,71 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/nlmixrFormula.R
+\name{nlmixrFormula}
+\alias{nlmixrFormula}
+\title{A simple formula-based interface for nlmixr2}
+\usage{
+nlmixrFormula(
+  object,
+  data,
+  start,
+  param = NULL,
+  ...,
+  residualModel = ~add(addErr)
+)
+}
+\arguments{
+\item{object}{The formula defining the model (see details)}
+
+\item{data}{The data to fit}
+
+\item{start}{A named list of starting estimates.  The names define the
+parameters in the model.  If a single parameter estimate is desired, it can
+be given here.  If a parameter estimate per factor level is desired, either
+a single starting estimate can be given across all factor levels or one
+estimate may be given per factor level.  (Specify the factors with the
+\code{param} argument.)}
+
+\item{param}{A formula or list of two-sided formulas giving the model used
+for parameters.  If a parameter is a simple fixed effect, only, then it
+should not be included here.  If a parameter should have a separate
+estimate per level of a factor, give that as the two-sided formula here.}
+
+\item{...}{
+  Arguments passed on to \code{\link[nlmixr2:reexports]{nlmixr2::nlmixr}}
+  \describe{
+    \item{\code{}}{}
+  }}
+
+\item{residualModel}{The residual model formula to use as a one-sided formula.}
+}
+\value{
+The model fit from \code{nlmixr()}
+}
+\description{
+A simple formula-based interface for nlmixr2
+}
+\details{
+The formula is given with different notation than typical formulas.  The
+formula notation is inspired by and similar to \code{lme4::nlmer()}.  It is a
+3-part formula: \code{dependentVariable~predictorEquation~randomEffects}.
+
+The \code{dependentVariable} is any variable in the dataset.  It may not
+include any math; for example, \code{log(DV)} is not allowed.
+
+The \code{predictorEquation} is any valid math, and it will be used directly
+in the nlmixr2 model.
+
+The \code{randomEffects} are one or more random effect parameters defined by
+putting the parameter in parentheses and putting a vertical bar and the
+grouping parameter.  Only one grouping parameter is allowed for all random
+effects.  An example would be \code{(slope|ID)} to estimate a random effect
+parameter named "slope" for each "ID" in the data.
+}
+\examples{
+nlmixrFormula(
+  height ~ (Asym+AsymRe)+(R0-(Asym+AsymRe))*exp(-exp(lrc)*age) ~ (AsymRe|Seed),
+  data = Loblolly,
+  start = list(Asym = 103, R0 = -8.5, lrc = -3.3, addErr=1),
+  est="focei"
+)
+}

From 5cb48e35d4e3bc3a316376517ce53a20a77532b6 Mon Sep 17 00:00:00 2001
From: Bill Denney <wdenney@humanpredictions.com>
Date: Thu, 1 Sep 2022 21:57:36 -0400
Subject: [PATCH 6/7] Expand documentation

---
 R/nlmixrFormula.R                        | 95 +++++++++++++++++++-----
 man/dot-nlmixrFormulaExpandStartParam.Rd | 26 +++++++
 man/dot-nlmixrFormulaParser.Rd           | 24 ++++++
 man/dot-nlmixrFormulaParserRanef.Rd      | 25 +++++++
 man/dot-nlmixrFormulaSetupIniFixed.Rd    | 20 +++++
 man/dot-nlmixrFormulaSetupModel.Rd       | 34 +++++++++
 man/dot-renameOrOverwrite.Rd             | 25 +++++++
 7 files changed, 230 insertions(+), 19 deletions(-)
 create mode 100644 man/dot-nlmixrFormulaExpandStartParam.Rd
 create mode 100644 man/dot-nlmixrFormulaParser.Rd
 create mode 100644 man/dot-nlmixrFormulaParserRanef.Rd
 create mode 100644 man/dot-nlmixrFormulaSetupIniFixed.Rd
 create mode 100644 man/dot-nlmixrFormulaSetupModel.Rd
 create mode 100644 man/dot-renameOrOverwrite.Rd

diff --git a/R/nlmixrFormula.R b/R/nlmixrFormula.R
index 88aec24..0009fe9 100644
--- a/R/nlmixrFormula.R
+++ b/R/nlmixrFormula.R
@@ -85,6 +85,15 @@ nlmixrFormula <- function(object, data, start, param=NULL, ..., residualModel=~a
   nlmixr2est::nlmixr(object=modelOut, data=data, ...)
 }
 
+#' Rename a column in a dataset, optionally overwriting it if the column does
+#' not exist
+#' 
+#' @param data The dataset to modify
+#' @param newName,oldName The new and old column names
+#' @return data with \code{data[[newName]] <- data[[charOld]]}
+#' @keywords Internal
+#' @examples
+#' .renameOrOverwrite(data.frame(A=1), newName="B", oldName="A")
 .renameOrOverwrite <- function(data, newName, oldName) {
   charOld <- as.character(oldName)
   stopifnot(charOld %in% names(data))
@@ -98,6 +107,15 @@ nlmixrFormula <- function(object, data, start, param=NULL, ..., residualModel=~a
   data
 }
 
+#' Parse the formula to extract the dependent variable, predictor, and random
+#' effects
+#'
+#' @param object The formula to parse
+#' @return A list with names of "DV", "predictor", and "ranef" which are each
+#'   part of the formula broken down into calls
+#' @keywords Internal
+#' @examples
+#' .nlmixrFormulaParser(a~b+c~(c|id))
 .nlmixrFormulaParser <- function(object) {
   stopifnot(inherits(object, "formula"))
   # Confirm that the formula looks like it should and break it up into its
@@ -122,6 +140,17 @@ nlmixrFormula <- function(object, data, start, param=NULL, ..., residualModel=~a
   )
 }
 
+#' Parse the random effects part of a formula
+#'
+#' @param object The formula to parse
+#' @return An unnamed list with one element per random effect.  The elements
+#'   each have names of "ranefVar", "ranefGroup", and "start" indicating the
+#'   variable with the random effect, the grouping variable for the random
+#'   effect, and the starting value for the standard deviation of the random
+#'   effect.
+#' @keywords Internal
+#' @examples
+#' .nlmixrFormulaParserRanef(str2lang("(c|id)+(d|id2)"))
 .nlmixrFormulaParserRanef <- function(object) {
   stopifnot(is.call(object))
   if (object[[1]] == as.name("+")) {
@@ -169,6 +198,35 @@ nlmixrFormula <- function(object, data, start, param=NULL, ..., residualModel=~a
   }
 }
 
+#' Expand parameters to include their factor representations, if applicable.
+#' 
+#' @param start the starting values for the model
+#' @param startName The base name for the parameter
+#' @param startValue The initial value for the base parameter
+#' @param param The parameter in the model
+#' @param data The dataset
+#' @return A list with the ini and model parts needed to use the parameter
+#' @keywords Internal
+.nlmixrFormulaExpandStartParam <- function(start, param, data) {
+  paramExpanded <- .paramExpand(param)
+  stopifnot(all(names(paramExpanded) %in% names(start)))
+  ret <- list()
+  for (currentStart in names(start)) {
+    ret <-
+      append(
+        ret,
+        list(.nlmixrFormulaExpandStartParamSingle(
+          startName=currentStart,
+          startValue=start[[currentStart]],
+          param=paramExpanded[currentStart], # this will be NA if it does not exist
+          data=data
+        ))
+      )
+  }
+  ret
+}
+
+#' @describeIn .nlmixrFormulaExpandStartParam Expand a single parameter in a model using dataset factors, if applicable
 .nlmixrFormulaExpandStartParamSingle <- function(startName, startValue, param, data) {
   stopifnot(is.character(startName))
   stopifnot(is.numeric(startValue))
@@ -218,25 +276,12 @@ nlmixrFormula <- function(object, data, start, param=NULL, ..., residualModel=~a
   ret
 }
 
-.nlmixrFormulaExpandStartParam <- function(start, param, data) {
-  paramExpanded <- .paramExpand(param)
-  stopifnot(all(names(paramExpanded) %in% names(start)))
-  ret <- list()
-  for (currentStart in names(start)) {
-    ret <-
-      append(
-        ret,
-        list(.nlmixrFormulaExpandStartParamSingle(
-          startName=currentStart,
-          startValue=start[[currentStart]],
-          param=paramExpanded[currentStart], # this will be NA if it does not exist
-          data=data
-        ))
-      )
-  }
-  ret
-}
-
+#' Setup the ini() part of the model for fixed effects
+#' 
+#' @param start The starting estimates for the model
+#' @param base The initial basis for the ini definition
+#' @return The inside of the ini() part of the model
+#' @keywords Internal
 .nlmixrFormulaSetupIniFixed <- function(start, base=str2lang("{}")) {
   stopifnot(length(start) > 0)
   stopifnot(is.list(start))
@@ -248,6 +293,8 @@ nlmixrFormula <- function(object, data, start, param=NULL, ..., residualModel=~a
   base
 }
 
+#' @describeIn .nlmixrFormulaSetupIniFixed Setup the ini() part of the model for fixed effects
+#' @param ranefDefinition The random effect definitions
 .nlmixrFormulaSetupIniRandom <- function(ranefDefinition, base=str2lang("{}")) {
   stopifnot(is.list(ranefDefinition))
   for (currentRanef in ranefDefinition) {
@@ -257,6 +304,16 @@ nlmixrFormula <- function(object, data, start, param=NULL, ..., residualModel=~a
   base
 }
 
+#' Setup the model() part of the model
+#' 
+#' @param start The starting estimates (used when fixed effects need more
+#'   defition like for factors)
+#' @param predictor The predictor from the formula
+#' @param residualModel The residual model definition
+#' @param predictorVar The variable in the data for the predictor
+#' @param data The data used in the model
+#' @return the interior of the model()
+#' @keywords Internal
 .nlmixrFormulaSetupModel <- function(start, predictor, residualModel, predictorVar="value", param, data) {
   stopifnot(inherits(residualModel, "formula"))
   # a one-sided formula
diff --git a/man/dot-nlmixrFormulaExpandStartParam.Rd b/man/dot-nlmixrFormulaExpandStartParam.Rd
new file mode 100644
index 0000000..d8f741f
--- /dev/null
+++ b/man/dot-nlmixrFormulaExpandStartParam.Rd
@@ -0,0 +1,26 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/nlmixrFormula.R
+\name{.nlmixrFormulaExpandStartParam}
+\alias{.nlmixrFormulaExpandStartParam}
+\title{Expand parameters to include their factor representations, if applicable.}
+\usage{
+.nlmixrFormulaExpandStartParam(start, param, data)
+}
+\arguments{
+\item{start}{the starting values for the model}
+
+\item{param}{The parameter in the model}
+
+\item{data}{The dataset}
+
+\item{startName}{The base name for the parameter}
+
+\item{startValue}{The initial value for the base parameter}
+}
+\value{
+A list with the ini and model parts needed to use the parameter
+}
+\description{
+Expand parameters to include their factor representations, if applicable.
+}
+\keyword{Internal}
diff --git a/man/dot-nlmixrFormulaParser.Rd b/man/dot-nlmixrFormulaParser.Rd
new file mode 100644
index 0000000..698bbd2
--- /dev/null
+++ b/man/dot-nlmixrFormulaParser.Rd
@@ -0,0 +1,24 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/nlmixrFormula.R
+\name{.nlmixrFormulaParser}
+\alias{.nlmixrFormulaParser}
+\title{Parse the formula to extract the dependent variable, predictor, and random
+effects}
+\usage{
+.nlmixrFormulaParser(object)
+}
+\arguments{
+\item{object}{The formula to parse}
+}
+\value{
+A list with names of "DV", "predictor", and "ranef" which are each
+part of the formula broken down into calls
+}
+\description{
+Parse the formula to extract the dependent variable, predictor, and random
+effects
+}
+\examples{
+.nlmixrFormulaParser(a~b+c~(c|id))
+}
+\keyword{Internal}
diff --git a/man/dot-nlmixrFormulaParserRanef.Rd b/man/dot-nlmixrFormulaParserRanef.Rd
new file mode 100644
index 0000000..25737c9
--- /dev/null
+++ b/man/dot-nlmixrFormulaParserRanef.Rd
@@ -0,0 +1,25 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/nlmixrFormula.R
+\name{.nlmixrFormulaParserRanef}
+\alias{.nlmixrFormulaParserRanef}
+\title{Parse the random effects part of a formula}
+\usage{
+.nlmixrFormulaParserRanef(object)
+}
+\arguments{
+\item{object}{The formula to parse}
+}
+\value{
+An unnamed list with one element per random effect.  The elements
+each have names of "ranefVar", "ranefGroup", and "start" indicating the
+variable with the random effect, the grouping variable for the random
+effect, and the starting value for the standard deviation of the random
+effect.
+}
+\description{
+Parse the random effects part of a formula
+}
+\examples{
+.nlmixrFormulaParserRanef(str2lang("(c|id)+(d|id2)"))
+}
+\keyword{Internal}
diff --git a/man/dot-nlmixrFormulaSetupIniFixed.Rd b/man/dot-nlmixrFormulaSetupIniFixed.Rd
new file mode 100644
index 0000000..0b6cbb5
--- /dev/null
+++ b/man/dot-nlmixrFormulaSetupIniFixed.Rd
@@ -0,0 +1,20 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/nlmixrFormula.R
+\name{.nlmixrFormulaSetupIniFixed}
+\alias{.nlmixrFormulaSetupIniFixed}
+\title{Setup the ini() part of the model for fixed effects}
+\usage{
+.nlmixrFormulaSetupIniFixed(start, base = str2lang("{}"))
+}
+\arguments{
+\item{start}{The starting estimates for the model}
+
+\item{base}{The initial basis for the ini definition}
+}
+\value{
+The inside of the ini() part of the model
+}
+\description{
+Setup the ini() part of the model for fixed effects
+}
+\keyword{Internal}
diff --git a/man/dot-nlmixrFormulaSetupModel.Rd b/man/dot-nlmixrFormulaSetupModel.Rd
new file mode 100644
index 0000000..a1fe0d7
--- /dev/null
+++ b/man/dot-nlmixrFormulaSetupModel.Rd
@@ -0,0 +1,34 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/nlmixrFormula.R
+\name{.nlmixrFormulaSetupModel}
+\alias{.nlmixrFormulaSetupModel}
+\title{Setup the model() part of the model}
+\usage{
+.nlmixrFormulaSetupModel(
+  start,
+  predictor,
+  residualModel,
+  predictorVar = "value",
+  param,
+  data
+)
+}
+\arguments{
+\item{start}{The starting estimates (used when fixed effects need more
+defition like for factors)}
+
+\item{predictor}{The predictor from the formula}
+
+\item{residualModel}{The residual model definition}
+
+\item{predictorVar}{The variable in the data for the predictor}
+
+\item{data}{The data used in the model}
+}
+\value{
+the interior of the model()
+}
+\description{
+Setup the model() part of the model
+}
+\keyword{Internal}
diff --git a/man/dot-renameOrOverwrite.Rd b/man/dot-renameOrOverwrite.Rd
new file mode 100644
index 0000000..dbc7ce8
--- /dev/null
+++ b/man/dot-renameOrOverwrite.Rd
@@ -0,0 +1,25 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/nlmixrFormula.R
+\name{.renameOrOverwrite}
+\alias{.renameOrOverwrite}
+\title{Rename a column in a dataset, optionally overwriting it if the column does
+not exist}
+\usage{
+.renameOrOverwrite(data, newName, oldName)
+}
+\arguments{
+\item{data}{The dataset to modify}
+
+\item{newName, oldName}{The new and old column names}
+}
+\value{
+data with \code{data[[newName]] <- data[[charOld]]}
+}
+\description{
+Rename a column in a dataset, optionally overwriting it if the column does
+not exist
+}
+\examples{
+.renameOrOverwrite(data.frame(A=1), newName="B", oldName="A")
+}
+\keyword{Internal}

From 2cd602b4a2b0762c113204bc39a4c102a99dcd6a Mon Sep 17 00:00:00 2001
From: Bill Denney <wdenney@humanpredictions.com>
Date: Fri, 2 Sep 2022 17:00:41 -0400
Subject: [PATCH 7/7] Intermediate modifications

---
 R/nlmixrFormula.R                   | 166 +++++++++++++++++---------
 tests/testthat/test-nlmixrFormula.R | 166 ++++++++++++++++++++++++++
 vignettes/nlmixrFormula.Rmd         | 174 ++++++++++++++++++++++++++++
 3 files changed, 453 insertions(+), 53 deletions(-)
 create mode 100644 tests/testthat/test-nlmixrFormula.R
 create mode 100644 vignettes/nlmixrFormula.Rmd

diff --git a/R/nlmixrFormula.R b/R/nlmixrFormula.R
index 0009fe9..d4a3141 100644
--- a/R/nlmixrFormula.R
+++ b/R/nlmixrFormula.R
@@ -42,12 +42,6 @@
 #' @export
 nlmixrFormula <- function(object, data, start, param=NULL, ..., residualModel=~add(addErr)) {
   parsedFormula <- .nlmixrFormulaParser(object)
-  # Add "TIME", if needed to the data
-  if (!("TIME" %in% names(data))) {
-    data$TIME <- seq_len(nrow(data))
-  }
-  # Setup DV
-  data <- .renameOrOverwrite(data, newName="DV", oldName=parsedFormula$DV)
   
   # Setup the random effects
   ranefGroup <- NULL
@@ -59,7 +53,10 @@ nlmixrFormula <- function(object, data, start, param=NULL, ..., residualModel=~a
       stopifnot(currentRanef$ranefGroup == ranefGroup)
     }
   }
-  data <- .renameOrOverwrite(data, newName="ID", oldName=ranefGroup)
+  if (missing(data)) {
+    data <- NULL
+  }
+  data <- .nlmixrFormulaDataPrep(data, dvName=parsedFormula$DV, idName=ranefGroup)
   startAll <- .nlmixrFormulaExpandStartParam(start=start, param=param, data=data)
   iniFixed <- .nlmixrFormulaSetupIniFixed(start=startAll)
   iniComplete <-
@@ -82,7 +79,31 @@ nlmixrFormula <- function(object, data, start, param=NULL, ..., residualModel=~a
   }
   body(modelOut)[[2]][[2]] <- iniComplete
   body(modelOut)[[3]][[2]] <- modelComplete
-  nlmixr2est::nlmixr(object=modelOut, data=data, ...)
+  if (is.null(data)) {
+    nlmixr2est::nlmixr(object=modelOut, ...)
+  } else {
+    nlmixr2est::nlmixr(object=modelOut, data=data, ...)
+  }
+}
+
+#' Perform any required data modifications for the nlmixrFormula interface
+#' 
+#' @inheritParams nlmixr2est nlmixr2
+#' @param dvName,idName The name of the DV and ID columns for the dataset,
+#'   respectively
+#' @return A data frame modified, as needed for nlmixrFormula; if data is NULL,
+#'   return NULL
+.nlmixrFormulaDataPrep <- function(data, dvName, idName) {
+  if (is.null(data)) return(NULL)
+  # Add "TIME", if needed to the data
+  if (!("TIME" %in% names(data))) {
+    data$TIME <- seq_len(nrow(data))
+  }
+  # Setup DV
+  data <- .renameOrOverwrite(data, newName="DV", oldName=dvName)
+  # Setup ID
+  data <- .renameOrOverwrite(data, newName="ID", oldName=idName)
+  data
 }
 
 #' Rename a column in a dataset, optionally overwriting it if the column does
@@ -118,25 +139,42 @@ nlmixrFormula <- function(object, data, start, param=NULL, ..., residualModel=~a
 #' .nlmixrFormulaParser(a~b+c~(c|id))
 .nlmixrFormulaParser <- function(object) {
   stopifnot(inherits(object, "formula"))
-  # Confirm that the formula looks like it should and break it up into its
-  # component parts
+  # Confirm that the formula looks like it should
+
+  # Confirm that it is a two-sided formula
+  if (length(object) == 2) {
+    stop("formula must be two-sided")
+  }
+  
+  # break the formula up into its component parts
   stopifnot(length(object) == 3)
   stopifnot(identical(as.name("~"), object[[1]]))
-  ranefPart <- object[[3]]
-  mainFormulaPart <- object[[2]]
-  stopifnot(length(mainFormulaPart) == 3)
-  stopifnot(identical(as.name("~"), mainFormulaPart[[1]]))
-  dvPart <- mainFormulaPart[[2]]
-  predictorPart <- mainFormulaPart[[3]]
-  
+  # Split out the left and right-hand sides of the formula before determining if
+  # there are random effects
+  lhsFirst <- object[[2]]
+  rhsFirst <- object[[3]]
+  if (length(lhsFirst) == 3 && identical(as.name("~"), lhsFirst[[1]])) {
+    # the model has random effects
+    dvPart <- lhsFirst[[2]]
+    predictorPart <- lhsFirst[[3]]
+    ranefPart <- .nlmixrFormulaParserRanef(rhsFirst)
+  } else {
+    # the model does not have random effects
+    dvPart <- lhsFirst
+    predictorPart <- rhsFirst
+    ranefPart <- NULL
+  }
+
   # We cannot yet handle DV that are not a NAME (no manipulations are done
   # within the data)
-  stopifnot(is.name(dvPart))
-  
+  if (!is.name(dvPart)) {
+    stop("formula left-hand-side must be a single variable, not ", deparse(dvPart))
+  }
+
   list(
     DV=dvPart,
     predictor=list(predictorPart),
-    ranef=.nlmixrFormulaParserRanef(ranefPart)
+    ranef=ranefPart
   )
 }
 
@@ -162,23 +200,24 @@ nlmixrFormula <- function(object, data, start, param=NULL, ..., residualModel=~a
       )
   } else if (object[[1]] == as.name("(")) {
     stopifnot(length(object) == 2)
-    innerRanefSpec <- object[[2]]
-    stopifnot(is.call(innerRanefSpec))
-    stopifnot(length(innerRanefSpec) == 3)
-    stopifnot(innerRanefSpec[[1]] == as.name("|"))
-    stopifnot(is.name(innerRanefSpec[[2]]))
-    stopifnot(is.name(innerRanefSpec[[3]]))
+    # work inside the parentheses
+    ret <- .nlmixrFormulaParserRanef(object[[2]])
+  } else if (object[[1]] == as.name("|")) {
+    stopifnot(is.call(object))
+    stopifnot(length(object) == 3)
+    stopifnot(is.name(object[[2]]))
+    stopifnot(is.name(object[[3]]))
     ret <-
       list(
         list(
-          ranefVar=innerRanefSpec[[2]],
-          ranefGroup=innerRanefSpec[[3]],
+          ranefVar=object[[2]],
+          ranefGroup=object[[3]],
           # Give the user control of start in the future
           start=1
         )
       )
   } else {
-    stop("Invalid random effect") # nocov
+    stop("Invalid random effect: ", deparse(object))
   }
   ret
 }
@@ -237,6 +276,8 @@ nlmixrFormula <- function(object, data, start, param=NULL, ..., residualModel=~a
         ini=list(str2lang(paste(startName, "<-", startValue))),
         model=list(NULL)
       )
+  } else if (is.null(data)) {
+    stop("data must be given when parameters are not single fixed effects")
   } else {
     stopifnot(is.character(param))
     stopifnot(length(param) == 1)
@@ -245,30 +286,7 @@ nlmixrFormula <- function(object, data, start, param=NULL, ..., residualModel=~a
       stop("NA found in data column:", param)
     }
     if (is.factor(data[[param]])) {
-      paramLabel <- levels(data[[param]])
-      stopifnot(length(startValue) %in% c(1, length(paramLabel)))
-      ret <- list(ini=list(), model=list())
-      modelString <- character()
-      for (idx in seq_along(paramLabel)) {
-        currentParamName <- make.names(paste(startName, param, paramLabel[idx]))
-        currentStartValue <-
-          if (length(startValue) == 1) {
-            startValue
-          } else {
-            startValue[idx]
-          }
-        ret$ini <-
-          append(
-            ret$ini,
-            .nlmixrFormulaExpandStartParamSingle(startName=currentParamName, startValue=currentStartValue, param=NA)$ini
-          )
-        modelString <-
-          c(modelString, sprintf("%s*(%s == '%s')", currentParamName, param, paramLabel[idx]))
-      }
-      ret$model <-
-        list(str2lang(sprintf(
-          "%s <- %s", startName, paste(modelString, collapse="+")
-        )))
+      .nlmixrFormulaExpandStartParamFactor(startName, startValue, param, data)
     } else {
       stop("Can only handle factors for fixed effect grouping levels")
     }
@@ -276,6 +294,48 @@ nlmixrFormula <- function(object, data, start, param=NULL, ..., residualModel=~a
   ret
 }
 
+# Provide start and model setup for factor parameters
+.nlmixrFormulaExpandStartParamFactor <- function(startName, startValue, param, data) {
+  paramLabel <- levels(data[[param]])
+  stopifnot(length(startValue) %in% c(1, length(paramLabel)))
+  if (length(startValue) == 1 && !is.ordered(data[[param]])) {
+    message("ordering the parameters by factor frequency: ", startName, " with parameter ", param)
+    paramLabel <- names(rev(sort(summary(data[[param]]))))
+  }
+  ret <- list(ini=list(), model=list())
+  modelString <- character()
+  for (idx in seq_along(paramLabel)) {
+    currentParamName <- make.names(paste(startName, param, paramLabel[idx]))
+    currentStartValue <-
+      if (length(startValue) == 1 && idx == 1) {
+        startValue
+      } else if (length(startValue) == 1) {
+        0
+      } else {
+        startValue[idx]
+      }
+    ret$ini <-
+      append(
+        ret$ini,
+        .nlmixrFormulaExpandStartParamSingle(startName=currentParamName, startValue=currentStartValue, param=NA)$ini
+      )
+    # Setup for mu-referencing where the base value is estimated for all levels
+    # and other values are estimated as differences from the base
+    modelStringCurrent <-
+      if (idx == 1) {
+        currentParamName
+      } else {
+        sprintf("%s*(%s == '%s')", currentParamName, param, paramLabel[idx])
+      }
+    modelString <- c(modelString, modelStringCurrent)
+  }
+  ret$model <-
+    list(str2lang(sprintf(
+      "%s <- %s", startName, paste(modelString, collapse="+")
+    )))
+  ret
+}
+
 #' Setup the ini() part of the model for fixed effects
 #' 
 #' @param start The starting estimates for the model
diff --git a/tests/testthat/test-nlmixrFormula.R b/tests/testthat/test-nlmixrFormula.R
new file mode 100644
index 0000000..639cf95
--- /dev/null
+++ b/tests/testthat/test-nlmixrFormula.R
@@ -0,0 +1,166 @@
+test_that(".nlmixrFormulaParser breaks the formula up into the correct bits", {
+  # without random effects
+  expect_equal(
+    .nlmixrFormulaParser(object = y ~ m*x + b),
+    list(
+      DV=as.name("y"),
+      predictor=list(str2lang("m*x + b")),
+      ranef=NULL
+    )
+  )
+  # with random effects
+  expect_equal(
+    .nlmixrFormulaParser(object = y ~ m*x + b ~ (m|c)),
+    list(
+      DV=as.name("y"),
+      predictor=list(str2lang("m*x + b")),
+      ranef=
+        list(
+          list(
+            ranefVar=as.name("m"),
+            ranefGroup=as.name("c"),
+            start=1
+          )
+        )
+    )
+  )
+})
+
+test_that(".nlmixrFormulaParser gives expected errors for invalid formula", {
+  expect_error(
+    .nlmixrFormulaParser(object = ~ m*x + b),
+    regexp = "formula must be two-sided"
+  )
+  
+  # a weird amalgam of one-sided and two-sided formula that looks like a
+  # two-sided formula with simple parsing
+  expect_error(
+    .nlmixrFormulaParser(object = ~ m*x + b ~ c),
+    regexp = "formula left-hand-side must be a single variable"
+  )
+  
+  expect_error(
+    .nlmixrFormulaParser(object = m*x + b ~ c),
+    regexp = "formula left-hand-side must be a single variable, not m * x + b",
+    fixed = TRUE
+  )
+})
+
+test_that(".nlmixrFormulaParserRanef correctly parses random effects", {
+  # single random effect
+  expect_equal(
+    .nlmixrFormulaParserRanef(str2lang("c|id")),
+    list(
+      list(
+        ranefVar=as.name("c"),
+        ranefGroup=as.name("id"),
+        start=1
+      )
+    )
+  )
+  # single random effect (grouped with parentheses)
+  expect_equal(
+    .nlmixrFormulaParserRanef(str2lang("(c|id)")),
+    list(
+      list(
+        ranefVar=as.name("c"),
+        ranefGroup=as.name("id"),
+        start=1
+      )
+    )
+  )
+  expect_equal(
+    .nlmixrFormulaParserRanef(str2lang("(c|id)+(d|id2)")),
+    list(
+      list(
+        ranefVar=as.name("c"),
+        ranefGroup=as.name("id"),
+        start=1
+      ),
+      list(
+        ranefVar=as.name("d"),
+        ranefGroup=as.name("id2"),
+        start=1
+      )
+    )
+  )
+})
+
+test_that(".nlmixrFormulaParserRanef expected errors", {
+  expect_error(.nlmixrFormulaParserRanef("A"))
+  expect_error(
+    .nlmixrFormulaParserRanef(str2lang("a*b")),
+    regexp = "Invalid random effect: a * b",
+    fixed = TRUE
+  )
+})
+
+test_that("nlmixrFormula creates factor parameters correctly", {
+  # Base case with no reordering
+  d_factor <- data.frame(A=factor(c("A", "B")))
+  expect_equal(
+    .nlmixrFormulaExpandStartParamFactor(startName="myest", startValue=1, param="A", data=d_factor),
+    list(
+      ini=
+        list(
+          str2lang('myest.A.A <- 1'),
+          str2lang('myest.A.B <- 0')
+        ),
+      model=list(str2lang('myest <- myest.A.A + myest.A.B * (A == "B")'))
+    )
+  )
+
+  # reorder factors based on prevalence
+  d_factor <- data.frame(A=factor(c("A", "B", "B")))
+  expect_message(
+    v1 <- .nlmixrFormulaExpandStartParamFactor(startName="myest", startValue=1, param="A", data=d_factor),
+    regexp = "ordering the parameters by factor frequency: myest with parameter A"
+  )
+  expect_equal(
+    v1,
+    list(
+      ini=
+        list(
+          str2lang('myest.A.B <- 1'),
+          str2lang('myest.A.A <- 0')
+        ),
+      model=list(str2lang('myest <- myest.A.B + myest.A.A * (A == "A")'))
+    )
+  )
+
+  # do not reorder when start is the same length as the number of factors
+  d_factor <- data.frame(A=factor(c("A", "B", "B")))
+  expect_message(
+    v1 <- .nlmixrFormulaExpandStartParamFactor(startName="myest", startValue=c(1, 2), param="A", data=d_factor),
+    NA
+  )
+  expect_equal(
+    v1,
+    list(
+      ini=
+        list(
+          str2lang('myest.A.A <- 1'),
+          str2lang('myest.A.B <- 2')
+        ),
+      model=list(str2lang('myest <- myest.A.A + myest.A.B * (A == "B")'))
+    )
+  )
+  
+  # do not reorder when the factors are ordered
+  d_factor <- data.frame(A=ordered(c("A", "B", "B")))
+  expect_message(
+    v1 <- .nlmixrFormulaExpandStartParamFactor(startName="myest", startValue=1, param="A", data=d_factor),
+    NA
+  )
+  expect_equal(
+    v1,
+    list(
+      ini=
+        list(
+          str2lang('myest.A.A <- 1'),
+          str2lang('myest.A.B <- 0')
+        ),
+      model=list(str2lang('myest <- myest.A.A + myest.A.B * (A == "B")'))
+    )
+  )
+})
diff --git a/vignettes/nlmixrFormula.Rmd b/vignettes/nlmixrFormula.Rmd
new file mode 100644
index 0000000..fa60dd9
--- /dev/null
+++ b/vignettes/nlmixrFormula.Rmd
@@ -0,0 +1,174 @@
+---
+title: "nlmixr2 Algebraic Solutions with Formula"
+author: "Bill Denney"
+date: "`r Sys.Date()`"
+output:
+  rmarkdown::html_vignette:
+    code_folding: show
+vignette: >
+  %\VignetteIndexEntry{nlmixr2 Algebraic Solutions with Formula}
+  %\VignetteEngine{knitr::rmarkdown}
+  %\VignetteEncoding{UTF-8}
+---
+
+```{r setup, include = FALSE}
+knitr::opts_chunk$set(
+  collapse = TRUE,
+  comment = "#>"
+)
+```
+
+# Introduction
+
+Some models are able to be described by simple algebraic solutions.  To simplify
+algebraic models, you can use a formula interface similar to the formula used
+with the `lme4` library.  (The formula interface is described in more detail
+below; knowledge of `lme4` is not required to use the formula interface.)
+
+# Quick start
+
+The simplest, non-trivial model is a linear model, `y = m*x + b`.  We will
+generate the data for this model and then fit it to show the simplest use case
+of the nlmixr2 formula interface.
+
+```{r quickstart-setup, class.source = 'fold-show'}
+library(nlmixr2extra)
+```
+
+```{r quickstart-data-gen}
+library(tidyverse)
+
+# Simulate the equation y = 3*x + 5 with normally-distributed residual error
+# having a standard deviation of 5
+withr::with_seed(
+  5, # standardize the random seed so that the results are reproducible
+  dSim <-
+    data.frame(
+      x = 1:100,
+      y =
+        (1:100)*3 +
+        5 +
+        rnorm(n = 100, mean = 0, sd = 5)
+    )
+)
+
+ggplot(dSim, aes(x=x, y=y)) + geom_point()
+```
+
+To fit this model requires only one line of R code:
+
+```{r quickstart-fit, class.source = 'fold-show'}
+mod <-
+  nlmixrFormula(
+    y~m*x + b,
+    data = dSim,
+    start = c(m = 3, b = 5),
+    est = "focei"
+  )
+```
+
+## Quick start, mixed-effects model
+
+A more typical use case for nlmixr2 is a mixed-effects model.  It adds
+(subject-level) random effects to the model.
+
+```{r quickstart-data-gen-nlme}
+# Setup the dataset for nonlinear mixed-effects fitting
+
+# Simulate the equation y = 3*x + 5 with normally-distributed residual error
+# having a standard deviation of 5
+withr::with_seed(
+  5, # standardize the random seed so that the results are reproducible
+  {
+    dSimSetup <-
+      data.frame(
+        id = rep(1:10, each=10),
+        x = rep(1:10, 10),
+        y = 1
+      )
+    dSimNlmePrep <-
+      nlmixrFormula(
+        y~m*x + b + bRe ~ bRe|id,
+        start = c(m=3, b=5, addErr=5),
+        data = dSimSetup,
+        est = "rxSolve"
+      )
+  }
+)
+
+# Modify the simulated results to be ready for fitting
+dSimNlme <-
+  dSimNlmePrep %>%
+  as.data.frame() %>%
+  select(id, time, x, y=sim)
+
+ggplot(dSimNlme, aes(x=x, y=y, colour=factor(id))) + geom_point()
+```
+
+```{r quickstart-fit-nlme, class.source = 'fold-show'}
+mod <-
+  nlmixrFormula(
+    y~m*x + b + bRe ~ bRe|id,
+    start = c(m=3, b=5, addErr=5),
+    data = dSimNlme,
+    est = "focei"
+  )
+
+mod
+```
+
+In this model, the fixed effects of `m` and `b` were estimated along with the
+random effect of `bRe`.
+
+# Defining the model equation (the formula)
+
+The model formula has 3 parts: the dependent variable, the equation predicting
+the dependent variable, and the optional random effects.  A full equation looks
+like the below:
+
+```
+dv~predictors~randomEffects
+```
+
+Any model part may have any parameter name (for example, the dependent variable
+does not have to be named `dv`).
+
+## Dependent variable
+
+The dependent variable may be any column in the `data`.  It must only be the
+column name and no additional modifiers.  For example, `log(param)` is not
+allowed.
+
+## Predictors
+
+Predictors may be any algebraic equation.  In general, there are no restrictions
+to the way that the equation may be written.
+
+## Random effects
+
+Random effects are not required for a model; fixed-effect-only models are
+possible.  When present, fixed and random effects must have different names.
+
+Random effects are written as a parameter a pipe (`|`) and its grouping value.
+From the quick start example above, the parameter is `bRe` and the grouping
+value is `id`, so it is written `bRe|id`.
+
+For multiple parameters, they must be setup separately.  For example, to have
+`mRe` and `bRe` both grouped by `id`, it would be defined as
+`~(mRe|id)+(bRe|id)`.
+
+# Providing the Starting estimates for the model
+
+## Fixed effects
+
+Fixed effects are defined by the `start` argument.  The `start` argument may
+either be a named vector or a named list.  If fixed effects only have a single
+starting value, then the two methods are equivalent.  `c(m=3, b=5)` is the same
+as `list(m=3, b=5)`.  If one of the fixed effects is defined by a factor
+variable (more on that later), the list may have multiple starting values such
+as `list(m=3, b=c(5, 10))`.
+
+## Random effects
+
+Random effect starting values are currently fixed at 1 without the ability to
+modify it.