update the manual

smithlabcode · Feb 14, 2018 · 1bba2f4 · 1bba2f4
1 parent 4c5e34d
commit 1bba2f4
Show file tree

Hide file tree

Showing 32 changed files with 139 additions and 144 deletions.
diff --git a/data/ShakespeareWordHist.txt → data/Shakespeare.txt b/data/ShakespeareWordHist.txt → data/Shakespeare.txt
diff --git a/man/FisherButterfly.Rd b/man/FisherButterfly.Rd
@@ -25,4 +25,4 @@ library(preseqR)
 data(FisherButterfly)
 }
 
-\keyword{ data }
+\keyword{ datasets }
diff --git a/man/SRR061157_k31.Rd b/man/SRR061157_k31.Rd
@@ -1,10 +1,10 @@
 \name{SRR061157_k31}
 \alias{SRR061157_k31}
 \docType{data}
-\title{k-mer counts of a metagenomic data}
-\description{The k-mer counts are based on a metagenome sequencing data from
+\title{\eqn{k}-mer counts of a metagenomic data}
+\description{The \eqn{k}-mer counts are based on a metagenome sequencing data from
 Human Microbiome Project with the accession number
-SRR061157. Only forward reads are used to generate the k-mer counts.}
+SRR061157. Only forward reads are used to generate the \eqn{k}-mer counts.}
 \references{
   Human Microbiome Project (\url{https://hmpdacc.org/}).
 }

diff --git a/man/SRR1301329_1M_base.Rd b/man/SRR1301329_1M_base.Rd
@@ -1,9 +1,9 @@
 \name{SRR1301329_1M_base}
 \alias{SRR1301329_1M_base}
 \docType{data}
-\title{Coverage histogram of an exome sequencing data}
-\description{The coverage histogram is based on an exome sequencing data from
-Simons Foundation Autism Research Initiative with the accession number
+\title{Coverage histogram of a WES data}
+\description{The coverage histogram is based on an whole-exome sequencing (WES) 
+data from Simons Foundation Autism Research Initiative with the accession number
 SRR1301329. One million reads are randomly sampled from the raw data to
 generate this coverage histogram.}
 \references{

diff --git a/man/SRR1301329_1M_read.Rd b/man/SRR1301329_1M_read.Rd
@@ -1,9 +1,9 @@
 \name{SRR1301329_1M_read}
 \alias{SRR1301329_1M_read}
 \docType{data}
-\title{Read counts of an exome sequencing data}
-\description{The read counts are based on an exome sequencing data from
-Simons Foundation Autism Research Initiative with the accession number
+\title{Read counts of a WES data}
+\description{The read counts are based on an whole-exome sequencing (WES) data
+from Simons Foundation Autism Research Initiative with the accession number
 SRR1301329. One million reads are randomly sampled from the raw data to
 generate the read counts.}
 \references{

diff --git a/man/SRR1301329_base.Rd b/man/SRR1301329_base.Rd
@@ -1,10 +1,12 @@
 \name{SRR1301329_base}
 \alias{SRR1301329_base}
 \docType{data}
-\title{Coverage histogram of an exome sequencing data}
-\description{The coverage histogram is based on an exome sequencing data from
-Simons Foundation Autism Research Initiative with the accession number
-SRR1301329. Only forward reads are used to generate the coverage histogram.}
+\title{Coverage histogram of a WES data}
+\description{
+The coverage histogram is based on a whole-exome sequencing (WES) 
+data from Simons Foundation Autism Research Initiative with the accession number
+SRR1301329. Only forward reads are used to generate the coverage histogram.
+}
 \references{
   Simons Foundation Autism Research Initiative (\url{https://www.sfari.org/}).
 }

diff --git a/man/SRR1301329_read.Rd b/man/SRR1301329_read.Rd
@@ -1,8 +1,8 @@
 \name{SRR1301329_read}
 \alias{SRR1301329_read}
 \docType{data}
-\title{Read counts of an exome sequencing data}
-\description{The read counts are based on an exome sequencing data from
+\title{Read counts of a WES data}
+\description{The read counts are based on a whole-exome sequencing data from
 Simons Foundation Autism Research Initiative with the accession number
 SRR1301329. Only forward reads are used to generate the read counts.}
 \references{

diff --git a/man/SRR611492.Rd b/man/SRR611492.Rd
@@ -1,16 +1,18 @@
 \name{SRR611492}
 \alias{SRR611492}
 \docType{data}
-\title{Coverage histogram of a single-cell whole-genome sequencing data}
-\description{The coverage histogram is based on a single-cell whole-genome
-sequencing data through MALBAK protocol. The accession number of the raw data
-is SRR1301329. Only forward reads are used to generate the coverage histogram.}
+\title{Coverage histogram of a scWGS data}
+\description{
+The coverage histogram is based on a single-cell whole-genome
+sequencing data (scWGS) through MALBAK protocol. The accession number of the raw
+data is SRR1301329. Only forward reads are used to generate the coverage
+histogram.
+}
 \references{
   Zong, C., Lu, S., Chapman, A. R., & Xie, X. S. (2012). Genome-wide detection
   of single-nucleotide and copy-number variations of a single human cell.
   Science, 338(6114), 1622-1626.
 }
-
 \details{
     A two-column matrix.  
     The first column is the frequency \eqn{j = 1,2,\dots}; and the second column

diff --git a/man/SRR611492_5M.Rd b/man/SRR611492_5M.Rd
@@ -1,11 +1,13 @@
 \name{SRR611492_5M}
 \alias{SRR611492_5M}
 \docType{data}
-\title{Coverage histogram of a single-cell whole-genome sequencing data}
-\description{The coverage histogram is based on a single-cell whole-genome
-sequencing data through MALBAK protocol. The accession number of the raw data
-is SRR1301329. Five million reads are randomly sampled from the raw data to
-generate this coverage histogram.}
+\title{Coverage histogram of a scWGS data}
+\description{
+The coverage histogram is based on a single-cell whole-genome
+sequencing (scWGS) data through MALBAK protocol. The accession number of the
+raw data is SRR1301329. Five million reads are randomly sampled from the raw
+data to generate this coverage histogram.
+}
 \references{
   Zong, C., Lu, S., Chapman, A. R., & Xie, X. S. (2012). Genome-wide detection
   of single-nucleotide and copy-number variations of a single human cell.

diff --git a/man/ShakespeareWordHist.Rd b/man/ShakespeareWordHist.Rd
@@ -1,5 +1,5 @@
-\name{ShakespeareWordHist}
-\alias{ShakespeareWordHist}
+\name{Shakespeare}
+\alias{Shakespeare}
 \docType{data}
 \title{Shakespeare's word type frequencies}
 \description{The Shakespeare's word type frequencies data was from 
@@ -21,7 +21,7 @@ How many words did Shakespeare know?. Biometrika, 63(3), 435-447.
 library(preseqR)
 
 ##load data
-data(ShakespeareWordHist)
+data(Shakespeare)
 }
 
-\keyword{ data }
+\keyword{ datasets }
diff --git a/man/Twitter.Rd b/man/Twitter.Rd
@@ -22,4 +22,4 @@ library(preseqR)
 data(Twitter)
 }
 
-\keyword{ data }
+\keyword{ datasets }
diff --git a/man/WillButterfly.Rd b/man/WillButterfly.Rd
@@ -25,4 +25,4 @@ library(preseqR)
 data(WillButterfly)
 }
 
-\keyword{ data }
+\keyword{ datasets }
diff --git a/man/bbc.rSAC.Rd b/man/bbc.rSAC.Rd
@@ -64,4 +64,4 @@ bbc2(c(50, 100))
 }
 % Add one or more standard keywords, see file 'KEYWORDS' in the
 % R documentation directory.
-\keyword{ Estimator, r-SAC, Nonparametric }
+\keyword{ estimator, r-SAC, nonparametric }
diff --git a/man/cs.rSAC.Rd b/man/cs.rSAC.Rd
@@ -66,4 +66,4 @@ chao2(c(50, 100))
 }
 % Add one or more standard keywords, see file 'KEYWORDS' in the
 % R documentation directory.
-\keyword{ Estimator, r-SAC, Nonparametric }
+\keyword{ estimator, r-SAC, nonparametric }
diff --git a/man/ds.rSAC.Rd b/man/ds.rSAC.Rd
@@ -73,4 +73,4 @@ ds2(c(50, 100))
 }
 % Add one or more standard keywords, see file 'KEYWORDS' in the
 % R documentation directory.
-\keyword{ Estimator, r-SAC, RFA, Nonparametric}
+\keyword{ estimator, r-SAC, RFA, nonparametric}
diff --git a/man/ds.rSAC.bootsrap.Rd b/man/ds.rSAC.bootsrap.Rd
@@ -46,7 +46,7 @@ ds.rSAC.bootstrap(n, r=1, mt=20, times=30, conf=0.95)
 }
 \value{
   \item{f}{
-    The estimator for the number of species represented at least r times in 
+    The estimator for the number of species represented at least \eqn{r} times in 
     a random sample. The input of the estimator is a vector of sampling 
     efforts t, i.e. the relative sample sizes comparing with the initial sample.
     For example, t = 2 means a random sample that is twice the size of the 
@@ -110,4 +110,4 @@ sufficient representation in a random sample. arXiv preprint arXiv:1607.02804
 }
 % Add one or more standard keywords, see file 'KEYWORDS' in the
 % R documentation directory.
-\keyword{ Estimator, r-SAC, RFA, Bootstrap, Nonparametric}
+\keyword{ estimator, r-SAC, RFA, bootstrap, nonparametric}
diff --git a/man/fisher.alpha.Rd b/man/fisher.alpha.Rd
@@ -47,4 +47,4 @@ fisher.alpha <- fisher.alpha(WillButterfly)
 }
 % Add one or more standard keywords, see file 'KEYWORDS' in the
 % R documentation directory.
-\keyword{ Parameter, Parametric }
+\keyword{ parametric }
diff --git a/man/fisher.rSAC.Rd b/man/fisher.rSAC.Rd
@@ -64,4 +64,4 @@ fisher2(c(50, 100))
 }
 % Add one or more standard keywords, see file 'KEYWORDS' in the
 % R documentation directory.
-\keyword{ Estimator, r-SAC, Parametric }
+\keyword{ estimator, r-SAC, parametric }
diff --git a/man/kmer.frac.curve.Rd b/man/kmer.frac.curve.Rd
@@ -74,4 +74,4 @@ kmer.frac.curve(n=SRR061157_k31, k=31, read.len=200, seq.gb=10^(6:12), r=2, mt=2
 }
 % Add one or more standard keywords, see file 'KEYWORDS' in the
 % R documentation directory.
-\keyword{\eqn{k}-mer, Sample coverage, High-throughput, Metagenomics}
+\keyword{k-mer, sample coverage, high-throughput, metagenomics}
diff --git a/man/kmer.frac.curve.bootstrap.Rd b/man/kmer.frac.curve.bootstrap.Rd
@@ -86,4 +86,4 @@ kmer.frac.curve.bootstrap(n=SRR061157_k31, k=31, read.len=200,
 }
 % Add one or more standard keywords, see file 'KEYWORDS' in the
 % R documentation directory.
-\keyword{\eqn{k}-mer, Sample coverage, Bootstrap, High-throughput, Metagenomics}
+\keyword{k-mer, sample coverage, bootstrap, high-throughput, Metagenomics}
diff --git a/man/preseqR.interpolate.rSAC.Rd b/man/preseqR.interpolate.rSAC.Rd
@@ -56,12 +56,12 @@ determination of sufficient sample size. Ecology, 1459-1461.
 library(preseqR)
 
 ## import data
-data(ShakespeareWordHist)
+data(Shakespeare)
 
 ## The expected number of distinct words represented twice or more in the
 ## subsample 
-preseqR.interpolate.rSAC(n=ShakespeareWordHist, ss=1e5, r=2)
+preseqR.interpolate.rSAC(n=Shakespeare, ss=1e5, r=2)
 }
 % Add one or more standard keywords, see file 'KEYWORDS' in the
 % R documentation directory.
-\keyword{ Interpolation, r-SAC}
+\keyword{ interpolation, r-SAC}
diff --git a/man/preseqR.nonreplace.sampling.Rd b/man/preseqR.nonreplace.sampling.Rd
@@ -52,4 +52,4 @@ preseqR.nonreplace.sampling(n=FisherButterfly, size=1000)
 }
 % Add one or more standard keywords, see file 'KEYWORDS' in the
 % R documentation directory.
-\keyword{Sampling}
+\keyword{sampling}
diff --git a/man/preseqR.optimal.sequencing.Rd b/man/preseqR.optimal.sequencing.Rd
@@ -95,4 +95,4 @@ sufficient representation in a random sample. arXiv preprint arXiv:1607.02804
 }
 % Add one or more standard keywords, see file 'KEYWORDS' in the
 % R documentation directory.
-\keyword{WGS, High-throughput, Cost-benefit}
+\keyword{WGS, high-throughput, cost-benefit}
diff --git a/man/preseqR.rSAC.Rd b/man/preseqR.rSAC.Rd
@@ -5,7 +5,7 @@
   Best practice for \eqn{r}-SAC -- a fast version
 }
 \description{
-\eqn{preseqR.rSAC} predicts the expected number of species represented at least 
+\code{preseqR.rSAC} predicts the expected number of species represented at least 
 \eqn{r} times in a random sample based on the initial sample.
 }
 \usage{
@@ -21,7 +21,7 @@ preseqR.rSAC(n, r=1, mt=20, size=SIZE.INIT, mu=MU.INIT)
     ascending order.
   }
   \item{mt}{
-    An positive integer constraining possible rational function
+    A positive integer constraining possible rational function
     approximations. Default is 20.
   }
   \item{r}{
@@ -85,4 +85,4 @@ estimator2(c(50, 100))
 }
 % Add one or more standard keywords, see file 'KEYWORDS' in the
 % R documentation directory.
-\keyword{ Estimator, r-SAC, RFA, ZTNB, Best practice}
+\keyword{ estimator, r-SAC, RFA, ZTNB, best practice}
diff --git a/man/preseqR.rSAC.bootstrap.Rd b/man/preseqR.rSAC.bootstrap.Rd
@@ -117,4 +117,4 @@ sufficient representation in a random sample. arXiv preprint arXiv:1607.02804
 }
 % Add one or more standard keywords, see file 'KEYWORDS' in the
 % R documentation directory.
-\keyword{ Estimator, r-SAC, Bootstrap, Best practice}
+\keyword{ estimator, r-SAC, bootstrap, best practice}
diff --git a/man/preseqR.rSAC.sequencing.rmdup.Rd b/man/preseqR.rSAC.sequencing.rmdup.Rd
@@ -122,4 +122,4 @@ sufficient representation in a random sample. arXiv preprint arXiv:1607.02804
 }
 % Add one or more standard keywords, see file 'KEYWORDS' in the
 % R documentation directory.
-\keyword{r-SAC, Duplicates, WES, WGS, High-throughput}
+\keyword{r-SAC, duplicates, WES, WGS, high-throughput}
diff --git a/man/preseqR.sample.cov.Rd b/man/preseqR.sample.cov.Rd
@@ -2,12 +2,11 @@
 \alias{preseqR.sample.cov}
 %- Also NEED an '\alias' for EACH other topic documented here.
 \title{
-Estimating the probability of observing a species represented at least r times
-in a random sample
+  Predicting generalized sample coverage
 }
 \description{
-The function estimates the probability of observing a species represented at
-least r times in a random sample
+  \code{preseqR.sample.cov} predicts the probability of observing a species
+  represented at least \eqn{r} times in a random sample.
 }
 \usage{
   preseqR.sample.cov(n, r=1, mt=20)
@@ -17,46 +16,47 @@ least r times in a random sample
   \item{n}{
     A two-column matrix.  
     The first column is the frequency \eqn{j = 1,2,\dots}; and the second column
-    is \eqn{n_j}, the number of species with each species represented \eqn{j}
+    is \eqn{N_j}, the number of species with each species represented \eqn{j}
     times in the initial sample. The first column must be sorted in an
     ascending order.
   }
   \item{r}{
     A positive integer. Default is 1.
   }
   \item{mt}{
-    An positive integer constraining possible rational function
+    A positive integer constraining possible rational function
     approximations. Default is 20.
   }
 }
 \details{
   Suppose a sample is given and one more individual is randomly drawn from the
-  population. The function estimates the probability of the species, which 
-  represents the individual, has been observed at least \eqn{r} times in the
+  population. \code{preseqR.sample.cov} estimates the probability of the
+  species, which represents the individual, has been observed at least
+  \eqn{r} times in the
   sample. When \eqn{r = 1}, the probability is called the sample coverage.
 
   Let \eqn{N_j} be the number of species represented exactly \eqn{j} times in 
   a sample. The probability of observing a species represented at
   least \eqn{r} times in the sample is estimated as 
   \eqn{\sum_{j=r+1}^\infty jN_j / \sum_{j=1}^\infty jN_j}. The theory is
-  described in Mao and Lindsay (2002). For a random sample
+  described by Mao and Lindsay (2002). For a random sample
   where \eqn{N_j} is unknown, a modified rational function approximation is
   first used to predict the value of \eqn{N_j}. Then the estimates are
   substituted to obtain an estimator for the probability of observing a species
   represented at least \eqn{r} times in the sample.
 
   This function is the fast version of \code{\link{preseqR.sample.cov.bootstrap}}.
-  The function does not provide an estimate for the confidence interval.
-  If one needs the confidence interval, please use 
-  \code{\link{preseqR.sample.cov.bootstrap}} instead.
+  The function does not provide the confidence interval. To obtain the
+  confidence interval along with the estimates, one should use the function
+  \code{\link{preseqR.sample.cov.bootstrap}}.
 }
 \value{
   The estimator for the probability of observing a species represented at least
-  r times in a sample as a function of the sample size.
-  The input of the estimator is a vector of sampling 
-  efforts t, i.e. the relative sample sizes comparing with the initial sample.
-  For example, t = 2 means a random sample that is twice the size of the 
-  initial sample.
+  \eqn{r} times in a random sample.
+  The input of the estimator is a vector of sampling efforts \eqn{t}, i.e.,
+  the relative sample sizes comparing with the initial sample. 
+  For example, \eqn{t = 2} means a random sample that is twice the size of
+  the initial sample.
 }
 \references{
 Good, I. J. (1953). The population frequencies of species and the estimation of
@@ -80,21 +80,19 @@ library(preseqR)
 data(FisherButterfly)
 
 ## construct the estimator for the sample coverage
-## in a random sample
 estimator1 <- preseqR.sample.cov(FisherButterfly, r=1) 
 ## Given a sample that is 10 times or 20 times the size of an initial samples,
 ## suppose one randomly draws one more individual from the population. The
 ## value of the function is the probability that the representing species 
 ## has been observed in the sample
 estimator1(c(10, 20))
 
-## construct the estimator for the probability of observing a species represented
-## at least r times in a random sample for r = 2
+## construct the estimator
 estimator2 <- preseqR.sample.cov(FisherButterfly, r=2)
-## the probability when the sample size is 50 times or 100 times of the initial
-## sample
+## the probability a species represented at least twice when the sample size
+## is 50 times or 100 times of the initial sample
 estimator2(c(50, 100))
 }
 % Add one or more standard keywords, see file 'KEYWORDS' in the
 % R documentation directory.
-\keyword{ Estimator, RFA, Sample coverage}
+\keyword{ estimator, RFA, sample coverage}