From 1bba2f4f36020fc554d1e22db4a0b9183805655d Mon Sep 17 00:00:00 2001 From: chaodengusc Date: Tue, 13 Feb 2018 20:56:32 -0800 Subject: [PATCH] update the manual --- ...hakespeareWordHist.txt => Shakespeare.txt} | 0 man/FisherButterfly.Rd | 2 +- man/SRR061157_k31.Rd | 6 +-- man/SRR1301329_1M_base.Rd | 6 +-- man/SRR1301329_1M_read.Rd | 6 +-- man/SRR1301329_base.Rd | 10 +++-- man/SRR1301329_read.Rd | 4 +- man/SRR611492.Rd | 12 ++--- man/SRR611492_5M.Rd | 12 ++--- man/ShakespeareWordHist.Rd | 8 ++-- man/Twitter.Rd | 2 +- man/WillButterfly.Rd | 2 +- man/bbc.rSAC.Rd | 2 +- man/cs.rSAC.Rd | 2 +- man/ds.rSAC.Rd | 2 +- man/ds.rSAC.bootsrap.Rd | 4 +- man/fisher.alpha.Rd | 2 +- man/fisher.rSAC.Rd | 2 +- man/kmer.frac.curve.Rd | 2 +- man/kmer.frac.curve.bootstrap.Rd | 2 +- man/preseqR.interpolate.rSAC.Rd | 6 +-- man/preseqR.nonreplace.sampling.Rd | 2 +- man/preseqR.optimal.sequencing.Rd | 2 +- man/preseqR.rSAC.Rd | 6 +-- man/preseqR.rSAC.bootstrap.Rd | 2 +- man/preseqR.rSAC.sequencing.rmdup.Rd | 2 +- man/preseqR.sample.cov.Rd | 44 +++++++++---------- man/preseqR.sample.cov.bootstrap.Rd | 34 +++++++------- man/preseqR.simu.hist.Rd | 21 +++++---- man/preseqR.ztnb.em.Rd | 8 ++-- man/ztnb.rSAC.Rd | 34 +++++++------- man/ztp.rSAC.Rd | 34 +++++++------- 32 files changed, 139 insertions(+), 144 deletions(-) rename data/{ShakespeareWordHist.txt => Shakespeare.txt} (100%) diff --git a/data/ShakespeareWordHist.txt b/data/Shakespeare.txt similarity index 100% rename from data/ShakespeareWordHist.txt rename to data/Shakespeare.txt diff --git a/man/FisherButterfly.Rd b/man/FisherButterfly.Rd index ed72edb..1ac1913 100644 --- a/man/FisherButterfly.Rd +++ b/man/FisherButterfly.Rd @@ -25,4 +25,4 @@ library(preseqR) data(FisherButterfly) } -\keyword{ data } +\keyword{ datasets } diff --git a/man/SRR061157_k31.Rd b/man/SRR061157_k31.Rd index 696ebe7..5f04fe6 100644 --- a/man/SRR061157_k31.Rd +++ b/man/SRR061157_k31.Rd @@ -1,10 +1,10 @@ \name{SRR061157_k31} \alias{SRR061157_k31} \docType{data} -\title{k-mer counts of a metagenomic data} -\description{The k-mer counts are based on a metagenome sequencing data from +\title{\eqn{k}-mer counts of a metagenomic data} +\description{The \eqn{k}-mer counts are based on a metagenome sequencing data from Human Microbiome Project with the accession number -SRR061157. Only forward reads are used to generate the k-mer counts.} +SRR061157. Only forward reads are used to generate the \eqn{k}-mer counts.} \references{ Human Microbiome Project (\url{https://hmpdacc.org/}). } diff --git a/man/SRR1301329_1M_base.Rd b/man/SRR1301329_1M_base.Rd index 8b8cbae..bb5a4bb 100644 --- a/man/SRR1301329_1M_base.Rd +++ b/man/SRR1301329_1M_base.Rd @@ -1,9 +1,9 @@ \name{SRR1301329_1M_base} \alias{SRR1301329_1M_base} \docType{data} -\title{Coverage histogram of an exome sequencing data} -\description{The coverage histogram is based on an exome sequencing data from -Simons Foundation Autism Research Initiative with the accession number +\title{Coverage histogram of a WES data} +\description{The coverage histogram is based on an whole-exome sequencing (WES) +data from Simons Foundation Autism Research Initiative with the accession number SRR1301329. One million reads are randomly sampled from the raw data to generate this coverage histogram.} \references{ diff --git a/man/SRR1301329_1M_read.Rd b/man/SRR1301329_1M_read.Rd index 3ea82f9..eaf515c 100644 --- a/man/SRR1301329_1M_read.Rd +++ b/man/SRR1301329_1M_read.Rd @@ -1,9 +1,9 @@ \name{SRR1301329_1M_read} \alias{SRR1301329_1M_read} \docType{data} -\title{Read counts of an exome sequencing data} -\description{The read counts are based on an exome sequencing data from -Simons Foundation Autism Research Initiative with the accession number +\title{Read counts of a WES data} +\description{The read counts are based on an whole-exome sequencing (WES) data +from Simons Foundation Autism Research Initiative with the accession number SRR1301329. One million reads are randomly sampled from the raw data to generate the read counts.} \references{ diff --git a/man/SRR1301329_base.Rd b/man/SRR1301329_base.Rd index 7eeb63c..292dd6b 100644 --- a/man/SRR1301329_base.Rd +++ b/man/SRR1301329_base.Rd @@ -1,10 +1,12 @@ \name{SRR1301329_base} \alias{SRR1301329_base} \docType{data} -\title{Coverage histogram of an exome sequencing data} -\description{The coverage histogram is based on an exome sequencing data from -Simons Foundation Autism Research Initiative with the accession number -SRR1301329. Only forward reads are used to generate the coverage histogram.} +\title{Coverage histogram of a WES data} +\description{ +The coverage histogram is based on a whole-exome sequencing (WES) +data from Simons Foundation Autism Research Initiative with the accession number +SRR1301329. Only forward reads are used to generate the coverage histogram. +} \references{ Simons Foundation Autism Research Initiative (\url{https://www.sfari.org/}). } diff --git a/man/SRR1301329_read.Rd b/man/SRR1301329_read.Rd index e3d823a..29ae12b 100644 --- a/man/SRR1301329_read.Rd +++ b/man/SRR1301329_read.Rd @@ -1,8 +1,8 @@ \name{SRR1301329_read} \alias{SRR1301329_read} \docType{data} -\title{Read counts of an exome sequencing data} -\description{The read counts are based on an exome sequencing data from +\title{Read counts of a WES data} +\description{The read counts are based on a whole-exome sequencing data from Simons Foundation Autism Research Initiative with the accession number SRR1301329. Only forward reads are used to generate the read counts.} \references{ diff --git a/man/SRR611492.Rd b/man/SRR611492.Rd index 49a7d40..e724672 100644 --- a/man/SRR611492.Rd +++ b/man/SRR611492.Rd @@ -1,16 +1,18 @@ \name{SRR611492} \alias{SRR611492} \docType{data} -\title{Coverage histogram of a single-cell whole-genome sequencing data} -\description{The coverage histogram is based on a single-cell whole-genome -sequencing data through MALBAK protocol. The accession number of the raw data -is SRR1301329. Only forward reads are used to generate the coverage histogram.} +\title{Coverage histogram of a scWGS data} +\description{ +The coverage histogram is based on a single-cell whole-genome +sequencing data (scWGS) through MALBAK protocol. The accession number of the raw +data is SRR1301329. Only forward reads are used to generate the coverage +histogram. +} \references{ Zong, C., Lu, S., Chapman, A. R., & Xie, X. S. (2012). Genome-wide detection of single-nucleotide and copy-number variations of a single human cell. Science, 338(6114), 1622-1626. } - \details{ A two-column matrix. The first column is the frequency \eqn{j = 1,2,\dots}; and the second column diff --git a/man/SRR611492_5M.Rd b/man/SRR611492_5M.Rd index 00e4bf3..49083c8 100644 --- a/man/SRR611492_5M.Rd +++ b/man/SRR611492_5M.Rd @@ -1,11 +1,13 @@ \name{SRR611492_5M} \alias{SRR611492_5M} \docType{data} -\title{Coverage histogram of a single-cell whole-genome sequencing data} -\description{The coverage histogram is based on a single-cell whole-genome -sequencing data through MALBAK protocol. The accession number of the raw data -is SRR1301329. Five million reads are randomly sampled from the raw data to -generate this coverage histogram.} +\title{Coverage histogram of a scWGS data} +\description{ +The coverage histogram is based on a single-cell whole-genome +sequencing (scWGS) data through MALBAK protocol. The accession number of the +raw data is SRR1301329. Five million reads are randomly sampled from the raw +data to generate this coverage histogram. +} \references{ Zong, C., Lu, S., Chapman, A. R., & Xie, X. S. (2012). Genome-wide detection of single-nucleotide and copy-number variations of a single human cell. diff --git a/man/ShakespeareWordHist.Rd b/man/ShakespeareWordHist.Rd index d6e9873..46a2f97 100644 --- a/man/ShakespeareWordHist.Rd +++ b/man/ShakespeareWordHist.Rd @@ -1,5 +1,5 @@ -\name{ShakespeareWordHist} -\alias{ShakespeareWordHist} +\name{Shakespeare} +\alias{Shakespeare} \docType{data} \title{Shakespeare's word type frequencies} \description{The Shakespeare's word type frequencies data was from @@ -21,7 +21,7 @@ How many words did Shakespeare know?. Biometrika, 63(3), 435-447. library(preseqR) ##load data -data(ShakespeareWordHist) +data(Shakespeare) } -\keyword{ data } +\keyword{ datasets } diff --git a/man/Twitter.Rd b/man/Twitter.Rd index eefdf0c..7579a22 100644 --- a/man/Twitter.Rd +++ b/man/Twitter.Rd @@ -22,4 +22,4 @@ library(preseqR) data(Twitter) } -\keyword{ data } +\keyword{ datasets } diff --git a/man/WillButterfly.Rd b/man/WillButterfly.Rd index 4441a81..654d224 100644 --- a/man/WillButterfly.Rd +++ b/man/WillButterfly.Rd @@ -25,4 +25,4 @@ library(preseqR) data(WillButterfly) } -\keyword{ data } +\keyword{ datasets } diff --git a/man/bbc.rSAC.Rd b/man/bbc.rSAC.Rd index 4b80bdc..c68f20d 100644 --- a/man/bbc.rSAC.Rd +++ b/man/bbc.rSAC.Rd @@ -64,4 +64,4 @@ bbc2(c(50, 100)) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. -\keyword{ Estimator, r-SAC, Nonparametric } +\keyword{ estimator, r-SAC, nonparametric } diff --git a/man/cs.rSAC.Rd b/man/cs.rSAC.Rd index af2f448..7d4d20e 100644 --- a/man/cs.rSAC.Rd +++ b/man/cs.rSAC.Rd @@ -66,4 +66,4 @@ chao2(c(50, 100)) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. -\keyword{ Estimator, r-SAC, Nonparametric } +\keyword{ estimator, r-SAC, nonparametric } diff --git a/man/ds.rSAC.Rd b/man/ds.rSAC.Rd index 5bfee93..d414aac 100644 --- a/man/ds.rSAC.Rd +++ b/man/ds.rSAC.Rd @@ -73,4 +73,4 @@ ds2(c(50, 100)) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. -\keyword{ Estimator, r-SAC, RFA, Nonparametric} +\keyword{ estimator, r-SAC, RFA, nonparametric} diff --git a/man/ds.rSAC.bootsrap.Rd b/man/ds.rSAC.bootsrap.Rd index 79b4841..b6f0bff 100644 --- a/man/ds.rSAC.bootsrap.Rd +++ b/man/ds.rSAC.bootsrap.Rd @@ -46,7 +46,7 @@ ds.rSAC.bootstrap(n, r=1, mt=20, times=30, conf=0.95) } \value{ \item{f}{ - The estimator for the number of species represented at least r times in + The estimator for the number of species represented at least \eqn{r} times in a random sample. The input of the estimator is a vector of sampling efforts t, i.e. the relative sample sizes comparing with the initial sample. For example, t = 2 means a random sample that is twice the size of the @@ -110,4 +110,4 @@ sufficient representation in a random sample. arXiv preprint arXiv:1607.02804 } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. -\keyword{ Estimator, r-SAC, RFA, Bootstrap, Nonparametric} +\keyword{ estimator, r-SAC, RFA, bootstrap, nonparametric} diff --git a/man/fisher.alpha.Rd b/man/fisher.alpha.Rd index 7419e7d..ede4970 100644 --- a/man/fisher.alpha.Rd +++ b/man/fisher.alpha.Rd @@ -47,4 +47,4 @@ fisher.alpha <- fisher.alpha(WillButterfly) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. -\keyword{ Parameter, Parametric } +\keyword{ parametric } diff --git a/man/fisher.rSAC.Rd b/man/fisher.rSAC.Rd index d4c0345..d2d1f61 100644 --- a/man/fisher.rSAC.Rd +++ b/man/fisher.rSAC.Rd @@ -64,4 +64,4 @@ fisher2(c(50, 100)) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. -\keyword{ Estimator, r-SAC, Parametric } +\keyword{ estimator, r-SAC, parametric } diff --git a/man/kmer.frac.curve.Rd b/man/kmer.frac.curve.Rd index 390ab1c..1ff0944 100644 --- a/man/kmer.frac.curve.Rd +++ b/man/kmer.frac.curve.Rd @@ -74,4 +74,4 @@ kmer.frac.curve(n=SRR061157_k31, k=31, read.len=200, seq.gb=10^(6:12), r=2, mt=2 } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. -\keyword{\eqn{k}-mer, Sample coverage, High-throughput, Metagenomics} +\keyword{k-mer, sample coverage, high-throughput, metagenomics} diff --git a/man/kmer.frac.curve.bootstrap.Rd b/man/kmer.frac.curve.bootstrap.Rd index 0000b7e..2a82700 100644 --- a/man/kmer.frac.curve.bootstrap.Rd +++ b/man/kmer.frac.curve.bootstrap.Rd @@ -86,4 +86,4 @@ kmer.frac.curve.bootstrap(n=SRR061157_k31, k=31, read.len=200, } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. -\keyword{\eqn{k}-mer, Sample coverage, Bootstrap, High-throughput, Metagenomics} +\keyword{k-mer, sample coverage, bootstrap, high-throughput, Metagenomics} diff --git a/man/preseqR.interpolate.rSAC.Rd b/man/preseqR.interpolate.rSAC.Rd index e1833c3..b99ee11 100644 --- a/man/preseqR.interpolate.rSAC.Rd +++ b/man/preseqR.interpolate.rSAC.Rd @@ -56,12 +56,12 @@ determination of sufficient sample size. Ecology, 1459-1461. library(preseqR) ## import data -data(ShakespeareWordHist) +data(Shakespeare) ## The expected number of distinct words represented twice or more in the ## subsample -preseqR.interpolate.rSAC(n=ShakespeareWordHist, ss=1e5, r=2) +preseqR.interpolate.rSAC(n=Shakespeare, ss=1e5, r=2) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. -\keyword{ Interpolation, r-SAC} +\keyword{ interpolation, r-SAC} diff --git a/man/preseqR.nonreplace.sampling.Rd b/man/preseqR.nonreplace.sampling.Rd index 4974ed0..0409342 100644 --- a/man/preseqR.nonreplace.sampling.Rd +++ b/man/preseqR.nonreplace.sampling.Rd @@ -52,4 +52,4 @@ preseqR.nonreplace.sampling(n=FisherButterfly, size=1000) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. -\keyword{Sampling} +\keyword{sampling} diff --git a/man/preseqR.optimal.sequencing.Rd b/man/preseqR.optimal.sequencing.Rd index 6c1ed7d..55e0d5b 100644 --- a/man/preseqR.optimal.sequencing.Rd +++ b/man/preseqR.optimal.sequencing.Rd @@ -95,4 +95,4 @@ sufficient representation in a random sample. arXiv preprint arXiv:1607.02804 } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. -\keyword{WGS, High-throughput, Cost-benefit} +\keyword{WGS, high-throughput, cost-benefit} diff --git a/man/preseqR.rSAC.Rd b/man/preseqR.rSAC.Rd index 6b4628c..b36ae4d 100644 --- a/man/preseqR.rSAC.Rd +++ b/man/preseqR.rSAC.Rd @@ -5,7 +5,7 @@ Best practice for \eqn{r}-SAC -- a fast version } \description{ -\eqn{preseqR.rSAC} predicts the expected number of species represented at least +\code{preseqR.rSAC} predicts the expected number of species represented at least \eqn{r} times in a random sample based on the initial sample. } \usage{ @@ -21,7 +21,7 @@ preseqR.rSAC(n, r=1, mt=20, size=SIZE.INIT, mu=MU.INIT) ascending order. } \item{mt}{ - An positive integer constraining possible rational function + A positive integer constraining possible rational function approximations. Default is 20. } \item{r}{ @@ -85,4 +85,4 @@ estimator2(c(50, 100)) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. -\keyword{ Estimator, r-SAC, RFA, ZTNB, Best practice} +\keyword{ estimator, r-SAC, RFA, ZTNB, best practice} diff --git a/man/preseqR.rSAC.bootstrap.Rd b/man/preseqR.rSAC.bootstrap.Rd index dbd2636..fe29d0c 100644 --- a/man/preseqR.rSAC.bootstrap.Rd +++ b/man/preseqR.rSAC.bootstrap.Rd @@ -117,4 +117,4 @@ sufficient representation in a random sample. arXiv preprint arXiv:1607.02804 } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. -\keyword{ Estimator, r-SAC, Bootstrap, Best practice} +\keyword{ estimator, r-SAC, bootstrap, best practice} diff --git a/man/preseqR.rSAC.sequencing.rmdup.Rd b/man/preseqR.rSAC.sequencing.rmdup.Rd index 67469cc..9066e5b 100644 --- a/man/preseqR.rSAC.sequencing.rmdup.Rd +++ b/man/preseqR.rSAC.sequencing.rmdup.Rd @@ -122,4 +122,4 @@ sufficient representation in a random sample. arXiv preprint arXiv:1607.02804 } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. -\keyword{r-SAC, Duplicates, WES, WGS, High-throughput} +\keyword{r-SAC, duplicates, WES, WGS, high-throughput} diff --git a/man/preseqR.sample.cov.Rd b/man/preseqR.sample.cov.Rd index 9a89d31..6cfb267 100644 --- a/man/preseqR.sample.cov.Rd +++ b/man/preseqR.sample.cov.Rd @@ -2,12 +2,11 @@ \alias{preseqR.sample.cov} %- Also NEED an '\alias' for EACH other topic documented here. \title{ -Estimating the probability of observing a species represented at least r times -in a random sample + Predicting generalized sample coverage } \description{ -The function estimates the probability of observing a species represented at -least r times in a random sample + \code{preseqR.sample.cov} predicts the probability of observing a species + represented at least \eqn{r} times in a random sample. } \usage{ preseqR.sample.cov(n, r=1, mt=20) @@ -17,7 +16,7 @@ least r times in a random sample \item{n}{ A two-column matrix. The first column is the frequency \eqn{j = 1,2,\dots}; and the second column - is \eqn{n_j}, the number of species with each species represented \eqn{j} + is \eqn{N_j}, the number of species with each species represented \eqn{j} times in the initial sample. The first column must be sorted in an ascending order. } @@ -25,38 +24,39 @@ least r times in a random sample A positive integer. Default is 1. } \item{mt}{ - An positive integer constraining possible rational function + A positive integer constraining possible rational function approximations. Default is 20. } } \details{ Suppose a sample is given and one more individual is randomly drawn from the - population. The function estimates the probability of the species, which - represents the individual, has been observed at least \eqn{r} times in the + population. \code{preseqR.sample.cov} estimates the probability of the + species, which represents the individual, has been observed at least + \eqn{r} times in the sample. When \eqn{r = 1}, the probability is called the sample coverage. Let \eqn{N_j} be the number of species represented exactly \eqn{j} times in a sample. The probability of observing a species represented at least \eqn{r} times in the sample is estimated as \eqn{\sum_{j=r+1}^\infty jN_j / \sum_{j=1}^\infty jN_j}. The theory is - described in Mao and Lindsay (2002). For a random sample + described by Mao and Lindsay (2002). For a random sample where \eqn{N_j} is unknown, a modified rational function approximation is first used to predict the value of \eqn{N_j}. Then the estimates are substituted to obtain an estimator for the probability of observing a species represented at least \eqn{r} times in the sample. This function is the fast version of \code{\link{preseqR.sample.cov.bootstrap}}. - The function does not provide an estimate for the confidence interval. - If one needs the confidence interval, please use - \code{\link{preseqR.sample.cov.bootstrap}} instead. + The function does not provide the confidence interval. To obtain the + confidence interval along with the estimates, one should use the function + \code{\link{preseqR.sample.cov.bootstrap}}. } \value{ The estimator for the probability of observing a species represented at least - r times in a sample as a function of the sample size. - The input of the estimator is a vector of sampling - efforts t, i.e. the relative sample sizes comparing with the initial sample. - For example, t = 2 means a random sample that is twice the size of the - initial sample. + \eqn{r} times in a random sample. + The input of the estimator is a vector of sampling efforts \eqn{t}, i.e., + the relative sample sizes comparing with the initial sample. + For example, \eqn{t = 2} means a random sample that is twice the size of + the initial sample. } \references{ Good, I. J. (1953). The population frequencies of species and the estimation of @@ -80,7 +80,6 @@ library(preseqR) data(FisherButterfly) ## construct the estimator for the sample coverage -## in a random sample estimator1 <- preseqR.sample.cov(FisherButterfly, r=1) ## Given a sample that is 10 times or 20 times the size of an initial samples, ## suppose one randomly draws one more individual from the population. The @@ -88,13 +87,12 @@ estimator1 <- preseqR.sample.cov(FisherButterfly, r=1) ## has been observed in the sample estimator1(c(10, 20)) -## construct the estimator for the probability of observing a species represented -## at least r times in a random sample for r = 2 +## construct the estimator estimator2 <- preseqR.sample.cov(FisherButterfly, r=2) -## the probability when the sample size is 50 times or 100 times of the initial -## sample +## the probability a species represented at least twice when the sample size +## is 50 times or 100 times of the initial sample estimator2(c(50, 100)) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. -\keyword{ Estimator, RFA, Sample coverage} +\keyword{ estimator, RFA, sample coverage} diff --git a/man/preseqR.sample.cov.bootstrap.Rd b/man/preseqR.sample.cov.bootstrap.Rd index f65a6b4..6fbecce 100644 --- a/man/preseqR.sample.cov.bootstrap.Rd +++ b/man/preseqR.sample.cov.bootstrap.Rd @@ -2,12 +2,11 @@ \alias{preseqR.sample.cov.bootstrap} %- Also NEED an '\alias' for EACH other topic documented here. \title{ -Estimating the probability of observing a species represented at least r times -in a random sample + Predicting generalized sample coverage with bootstrap } \description{ -The function estimates the probability of observing a species represented at -least r times in a random sample + \code{preseqR.sample.cov.bootstrap} predicts the probability of observing a species + represented at least \eqn{r} times in a random sample. } \usage{ preseqR.sample.cov.bootstrap(n, r=1, mt=20, times=30, conf=0.95) @@ -17,7 +16,7 @@ least r times in a random sample \item{n}{ A two-column matrix. The first column is the frequency \eqn{j = 1,2,\dots}; and the second column - is \eqn{n_j}, the number of species with each species represented \eqn{j} + is \eqn{N_j}, the number of species with each species represented \eqn{j} times in the initial sample. The first column must be sorted in an ascending order. } @@ -25,26 +24,29 @@ least r times in a random sample A positive integer. Default is 1. } \item{mt}{ - An positive integer constraining possible rational function + A positive integer constraining possible rational function approximations. Default is 20. } \item{times}{ - The number of bootstrap samples. + The number of bootstrap samples. Default is 30. } \item{conf}{ The confidence level. Default is 0.95 } } \details{ - This is the bootstrap version of \code{\link{preseqR.sample.cov}}. The function - provides the confidence interval for estiamtes based on - \code{\link{preseqR.sample.cov}}. The confidence interval is based on a - lognormal distribution. + This is the bootstrap version of \code{\link{preseqR.sample.cov}}. The bootstrap + sample is generated by randomly sampling the initial sample with replacement. + For each bootstrap sample, we construct an estimator. The median of + estimates is used as the prediction for the number of species + represented at least \eqn{r} times in a random sample. + + The confidence interval is constructed based on a lognormal distribution. } \value{ \item{f}{ The estimator for the probability of observing a species represented at least - r times in a sample as a function of the sample size. + \eqn{r} times in a sample as a function of the sample size. The input of the estimator is a vector of sampling efforts t, i.e. the relative sample sizes comparing with the initial sample. For example, t = 2 means a random sample that is twice the size of the @@ -52,7 +54,7 @@ least r times in a random sample } \item{se}{ The standard error for the estimator. The input is a vector of sampling - efforts t. The standard error depends on t. + efforts t. } \item{lb}{ The lower bound of the confidence interval.The input is a vector of sampling @@ -83,7 +85,6 @@ sufficient representation in a random sample. arXiv preprint arXiv:1607.02804 #data(FisherButterfly) ## construct the estimator for the sample coverage -## in a random sample # estimator1 <- preseqR.sample.cov.bootstrap(FisherButterfly, r=1) ## Given a sample that is 10 times or 20 times the size of an initial samples, ## suppose one randomly draws one more individual from the population. The @@ -97,8 +98,7 @@ sufficient representation in a random sample. arXiv preprint arXiv:1607.02804 # ub <- estimator1$ub(c(10, 20)) # matrix(c(lb, ub), byrow=FALSE, ncol=2) -## construct the estimator for the probability of observing a species represented -## at least r times in a random sample for r = 2 +## construct the estimator # estimator2 <- preseqR.rSAC.bootstrap(FisherButterfly, r=2) ## the probability when the sample size is 50 times or 100 times of the initial ## sample @@ -112,4 +112,4 @@ sufficient representation in a random sample. arXiv preprint arXiv:1607.02804 } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. -\keyword{ Estimator, RFA, Bootstrap, Sample coverage} +\keyword{ estimator, RFA, bootstrap, sample coverage} diff --git a/man/preseqR.simu.hist.Rd b/man/preseqR.simu.hist.Rd index 6e4bc9d..f3db8a5 100644 --- a/man/preseqR.simu.hist.Rd +++ b/man/preseqR.simu.hist.Rd @@ -2,7 +2,7 @@ \alias{preseqR.simu.hist} %- Also NEED an '\alias' for EACH other topic documented here. \title{ - Simulating a histogram + Simulation } \description{ Generating a histogram based on a Poisson mixture model. @@ -23,21 +23,20 @@ } } \details{ - The function uses a compound Poisson model to generate a sample of size n. - It assumes for each species the number of individuals captured in a sample follows - a Poisson process. The Poisson rates among species are generated by a given - function \code{FUN} per unit of sampling effort. Under this statistical - assumption, for a given sample size N, - the number of individuals in the sample for each species follow a - multinomial distributions. + \code{preseqR.simu.hist} uses a mixture of Poisson distributions to generate + a sample, which size is defined by the variable \eqn{N}. + The statistical assumption is that for each species the number of individuals + captured in a sample follows a Poisson process. + The Poisson rates among species are generated by a given + function \code{FUN} per unit of sampling effort. - The function \code{FUN} must take an argument indicating the number of random + \code{FUN} must take an argument indicating the number of random numbers generated and return a vector of generated numbers. } \value{ A two-column matrix. The first column is the frequency \eqn{j = 1,2,\dots}; and the second column - is \eqn{n_j}, the number of species with each species represented \eqn{j} + is \eqn{N_j}, the number of species with each species represented \eqn{j} times in the initial sample. The first column must be sorted in an ascending order. } @@ -58,4 +57,4 @@ preseqR.simu.hist(L=1e5, N=1, f) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. -\keyword{ Simulation, Sampling, Mixture of Poisson} +\keyword{ simulation, sampling, mixture of Poisson} diff --git a/man/preseqR.ztnb.em.Rd b/man/preseqR.ztnb.em.Rd index aa8ca30..71962ec 100644 --- a/man/preseqR.ztnb.em.Rd +++ b/man/preseqR.ztnb.em.Rd @@ -5,8 +5,8 @@ Fitting a zero-truncated negative binomial distribution } \description{ -This function fits a zero-truncated negative binomial (ZTNB) distribution -to the initial sample. +\code{preseqR.ztnb.em} fits a zero-truncated negative binomial (ZTNB) +distribution to the initial sample. Since the species with zero observations are missed in the sample, an EM algorithm is used to estimate the parameters assuming the number of individuals for each species follows a Negative Binomial distribution @@ -20,7 +20,7 @@ preseqR.ztnb.em(n, size = SIZE.INIT, mu = MU.INIT) \item{n}{ A two-column matrix. The first column is the frequency \eqn{j = 1,2,\dots}; and the second column - is \eqn{n_j}, the number of species with each species represented \eqn{j} + is \eqn{N_j}, the number of species with each species represented \eqn{j} times in the initial sample. The first column must be sorted in an ascending order. } @@ -67,4 +67,4 @@ preseqR.ztnb.em(FisherButterfly) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. -\keyword{ Zero truncated negative binomial, EM} +\keyword{ negative binomial, EM} diff --git a/man/ztnb.rSAC.Rd b/man/ztnb.rSAC.Rd index 9e28167..bd68491 100644 --- a/man/ztnb.rSAC.Rd +++ b/man/ztnb.rSAC.Rd @@ -2,12 +2,11 @@ \alias{ztnb.rSAC} %- Also NEED an '\alias' for EACH other topic documented here. \title{ -Estimating the expected number of species represented r or more times + ZTNB estimator } \description{ -The function estimates the expected number of species represented at least -r times in a random sample based on the initial sample using zero-truncated -negative binomial (ZTNB) model. +\code{ztnb.rSAC} predicts the expected number of species represented at least +\eqn{r} times in a random sample, based on the initial sample. } \usage{ ztnb.rSAC(n, r=1, size=SIZE.INIT, mu=MU.INIT) @@ -17,7 +16,7 @@ ztnb.rSAC(n, r=1, size=SIZE.INIT, mu=MU.INIT) \item{n}{ A two-column matrix. The first column is the frequency \eqn{j = 1,2,\dots}; and the second column - is \eqn{n_j}, the number of species with each species represented \eqn{j} + is \eqn{N_j}, the number of species with each species represented \eqn{j} times in the initial sample. The first column must be sorted in an ascending order. } @@ -46,11 +45,10 @@ ztnb.rSAC(n, r=1, size=SIZE.INIT, mu=MU.INIT) Daley T. and Smith AD. (2013). } \value{ - The constructed estimator for the number of species represneted at least r - times in a sample. The input of the estimator is a vector of sampling - efforts t, i.e. the relative sample sizes comparing with the initial sample. - For example, t = 2 means a random sample that is twice the size of the - initial sample. + The estimator for the \eqn{r}-SAC. The input of the estimator is a vector of + sampling efforts \eqn{t}, i.e., the relative sample sizes comparing with the initial + sample. For example, \eqn{t = 2} means a random sample that is twice the size of + the initial sample. } \author{ Chao Deng @@ -73,20 +71,18 @@ library(preseqR) ## import data data(FisherButterfly) -## construct the estimator for the number of species represented at least once -## in a random sample +## construct the estimator for SAC ztnb1 <- ztnb.rSAC(FisherButterfly, r=1) -## The number of species represented at least once, when the sample size is -## 10 or 20 times of the initial sample +## The number of species represented at least once in a sample, +## when the sample size is 10 or 20 times of the initial sample ztnb1(c(10, 20)) -## construct the estimator for the number of species represented at least twice -## in a random sample +## construct the estimator for r-SAC ztnb2 <- ztnb.rSAC(FisherButterfly, r=2) -## The number of species represented at least twice, when the sample size is -## 50 or 100 times of the initial sample +## The number of species represented at least twice in a sample, +## when the sample size is 50 or 100 times of the initial sample ztnb2(c(50, 100)) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. -\keyword{ Estimator, r-SAC, Parametric } +\keyword{ estimator, r-SAC, parametric, negative binomial} diff --git a/man/ztp.rSAC.Rd b/man/ztp.rSAC.Rd index f6310e8..bd87157 100644 --- a/man/ztp.rSAC.Rd +++ b/man/ztp.rSAC.Rd @@ -2,12 +2,11 @@ \alias{ztp.rSAC} %- Also NEED an '\alias' for EACH other topic documented here. \title{ -Estimating the expected number of species represented r or more times + ZTP estimator } \description{ -The function estimates the expected number of species represented at least -r times in a random sample based on the initial sample using the zero truncated -Poisson approach. + \code{ztp.rSAC} predicts the expected number of species represented at least +\eqn{r} times in a random sample, based on the initial sample. } \usage{ ztp.rSAC(n, r=1) @@ -17,7 +16,7 @@ ztp.rSAC(n, r=1) \item{n}{ A two-column matrix. The first column is the frequency \eqn{j = 1,2,\dots}; and the second column - is \eqn{n_j}, the number of species with each species represented \eqn{j} + is \eqn{N_j}, the number of species with each species represented \eqn{j} times in the initial sample. The first column must be sorted in an ascending order. } @@ -35,11 +34,10 @@ ztp.rSAC(n, r=1) we calculate the expected number of species in a random sample. } \value{ - The constructed estimator for the number of species represneted at least r - times in a sample. The input of the estimator is a vector of sampling - efforts t, i.e. the relative sample sizes comparing with the initial sample. - For example, t = 2 means a random sample that is twice the size of the - initial sample. + The estimator for the \eqn{r}-SAC. The input of the estimator is a vector of + sampling efforts \eqn{t}, i.e., the relative sample sizes comparing with the initial + sample. For example, \eqn{t = 2} means a random sample that is twice the size of + the initial sample. } \author{ Chao Deng @@ -58,20 +56,18 @@ library(preseqR) ## import data data(FisherButterfly) -## construct the estimator for the number of species represented at least once -## in a random sample +## construct the estimator for SAC ztp1 <- ztp.rSAC(FisherButterfly, r=1) -## The number of species represented at least once, when the sample size is -## 10 or 20 times of the initial sample +## The number of species represented at least once in a sample, +## when the sample size is 10 or 20 times of the initial sample ztp1(c(10, 20)) -## construct the estimator for the number of species represented at least twice -## in a random sample +## construct the estimator for r-SAC ztp2 <- ztp.rSAC(FisherButterfly, r=2) -## The number of species represented at least twice, when the sample size is -## 50 or 100 times of the initial sample +## The number of species represented at least once in a sample, +## when the sample size is 10 or 20 times of the initial sample ztp2(c(50, 100)) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. -\keyword{ Estimator, r-SAC, Parametric} +\keyword{ estimator, r-SAC, parametric, Poisson}