-
Notifications
You must be signed in to change notification settings - Fork 6
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #2 from EricMarcon/JSPE
JSPE
- Loading branch information
Showing
22 changed files
with
1,539 additions
and
565 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,194 @@ | ||
Mhat <- | ||
function(X, r = NULL, ReferenceType, NeighborType = ReferenceType, | ||
CaseControl = FALSE, Individual = FALSE, ReferencePoint = NULL, | ||
Quantiles = FALSE, NumberOfSimulations = 100, Alpha = 0.05, | ||
verbose = interactive(), CheckArguments = TRUE) | ||
{ | ||
# Eliminate erroneous configurations | ||
if (CheckArguments) { | ||
CheckdbmssArguments() | ||
if (CaseControl & (ReferenceType==NeighborType)) { | ||
warning("Cases and controls are identical.") | ||
return(rep(1,length(r))) | ||
} | ||
if (Quantiles & !Individual) | ||
stop("Quantiles can't be TRUE if Individual is FALSE.") | ||
} | ||
|
||
# Default r values: 64 values up to half the max distance | ||
if (is.null(r)) { | ||
if (inherits(X, "Dtable")) { | ||
# Dtable case | ||
rMax <- max(X$Dmatrix) | ||
} else { | ||
# wmppp case | ||
rMax <- diameter(X$window) | ||
} | ||
r <- rMax*c(0, 1:20, seq(22, 40, 2), seq(45, 100,5), seq(110, 200, 10), seq(220, 400, 20))/800 | ||
} | ||
|
||
# Vectors to recognize point types | ||
IsReferenceType <- X$marks$PointType==ReferenceType | ||
IsNeighborType <- X$marks$PointType==NeighborType | ||
|
||
if (!is.null(ReferencePoint)) { | ||
# Set individual to TRUE if a refernce point is given | ||
Individual <- TRUE | ||
if (IsReferenceType[ReferencePoint]) { | ||
# Remember the name of the reference point in the dataset of reference type | ||
ReferencePoint_name <- row.names(X$marks[ReferencePoint, ]) | ||
} else { | ||
# The reference point must be in the reference type | ||
stop("The reference point must be of the reference point type.") | ||
} | ||
} | ||
|
||
# Global ratio | ||
if (ReferenceType==NeighborType | CaseControl) { | ||
WrMinusReferencePoint <- sum(X$marks$PointWeight[IsReferenceType])-X$marks$PointWeight | ||
Wn <- WrMinusReferencePoint[IsReferenceType] | ||
} else { | ||
Wn <- sum(X$marks$PointWeight[IsNeighborType]) | ||
} | ||
if (CaseControl) { | ||
Wa <- sum(X$marks$PointWeight[IsNeighborType]) | ||
} else { | ||
WaMinusReferencePoint <- sum(X$marks$PointWeight)-X$marks$PointWeight | ||
Wa <- WaMinusReferencePoint[IsReferenceType] | ||
} | ||
GlobalRatio <- Wn/Wa | ||
|
||
Nr <- length(r) | ||
# Neighborhoods (i.e. all neighbors of a point less than a distance apart) | ||
# Store weights of neighbors of interest in first Nr columns, all points from Nr+1 to 2*Nr | ||
|
||
# Call C routine to fill Nbd | ||
if (CaseControl) { | ||
if (inherits(X, "Dtable")) { | ||
# Dtable case | ||
Nbd <- parallelCountNbdDtCC(r, X$Dmatrix, X$marks$PointWeight, IsReferenceType, IsNeighborType) | ||
} else { | ||
# wmppp case | ||
Nbd <- parallelCountNbdCC(r, X$x, X$y, X$marks$PointWeight, IsReferenceType, IsNeighborType) | ||
} | ||
} else { | ||
if (inherits(X, "Dtable")) { | ||
# Dtable case | ||
Nbd <- parallelCountNbdDt(r, X$Dmatrix, X$marks$PointWeight, IsReferenceType, IsNeighborType) | ||
} else { | ||
# wmppp case | ||
Nbd <- parallelCountNbd(r, X$x, X$y, X$marks$PointWeight, IsReferenceType, IsNeighborType) | ||
} | ||
} | ||
|
||
# Cumulate weights up to each distance | ||
NbdInt <- t(apply(Nbd[, seq_len(Nr)], 1, cumsum)) | ||
NbdAll <- t(apply(Nbd[, (Nr + 1):(2 * Nr)], 1, cumsum)) | ||
|
||
# Calculate the ratio of points of interest around each point | ||
LocalRatio <- NbdInt/NbdAll | ||
|
||
if (is.null(ReferencePoint)) { | ||
# Divide it by the global ratio. Ignore points with no neighbor at all. | ||
Mvalues <- apply(LocalRatio, 2, function(x) sum(x[is.finite(x)])/sum(GlobalRatio[is.finite(x)])) | ||
# Keep individual values | ||
if (Individual) { | ||
Mvalues <- cbind(Mvalues, t(LocalRatio/GlobalRatio)) | ||
} | ||
} else { | ||
# Find the reference point in the set of points of the reference type | ||
ReferencePoint_index <- which(rownames(X[IsReferenceType]$marks) == ReferencePoint_name) | ||
# Only keep the value of the reference point | ||
Mvalues <- LocalRatio[ReferencePoint_index, ]/GlobalRatio[ReferencePoint_index] | ||
} | ||
|
||
# Put the results into an fv object | ||
MEstimate <- data.frame(r, rep(1, length(r)), Mvalues) | ||
ColNames <- c("r", "theo", "M") | ||
Labl <- c("r", "%s[theo](r)", "hat(%s)(r)") | ||
Desc <- c("Distance argument r", "Theoretical independent %s", "Estimated %s") | ||
if (Individual & is.null(ReferencePoint)) { | ||
# ColNumbers will usually be line numbers of the marks df, but may be real names. | ||
ColNumbers <- row.names(X$marks[IsReferenceType, ]) | ||
ColNames <- c(ColNames, paste("M", ColNumbers, sep="_")) | ||
Labl <- c(Labl, paste("hat(%s)[", ColNumbers, "](r)", sep="")) | ||
Desc <- c(Desc, paste("Individual %s around point", ColNumbers)) | ||
} | ||
colnames(MEstimate) <- ColNames | ||
|
||
# Make an fv object | ||
M <- fv(MEstimate, argu="r", ylab=quote(M(r)), valu="M", | ||
fmla= "cbind(M,theo)~r", alim=c(0, max(r)), labl=Labl, | ||
desc=Desc, unitname=X$window$unit, fname="M") | ||
fvnames(M, ".") <- ColNames[-1] | ||
|
||
|
||
# Calculate the quantiles of the individual values with respect to the null hypothesis | ||
if (Quantiles) { | ||
# Run Monte Carlo simulations of Mhat for each reference point | ||
nReferencePoints <- sum(IsReferenceType) | ||
# Prepare a matrix to save quantiles | ||
MQuantiles <- matrix(0, nrow = length(r), ncol = nReferencePoints) | ||
colnames(MQuantiles) <- paste("M", ColNumbers, sep="_") | ||
rownames(MQuantiles) <- r | ||
if (verbose) ProgressBar <- utils::txtProgressBar(min = 0, max = nReferencePoints) | ||
for (i in seq_len(nReferencePoints)) { | ||
# Null hypothesis | ||
SimulatedPP <- expression( | ||
rRandomLocation( | ||
X, | ||
ReferencePoint = which(X$marks$PointType == ReferenceType)[i], | ||
CheckArguments = FALSE | ||
) | ||
) | ||
# Compute the simulations. The envelope is useless: we need the simulated values. | ||
Envelope <- envelope( | ||
# The value Mhat(X) is not used but the point #1 must be of ReferenceType | ||
# so retain points of ReferenceType only to save time | ||
X[X$marks$PointType == ReferenceType], | ||
fun = Mhat, | ||
nsim = NumberOfSimulations, | ||
# nrank may be any value because the envelope is not used | ||
nrank = 1, | ||
# Arguments for Mhat() | ||
r = r, | ||
ReferenceType = ReferenceType, | ||
NeighborType = NeighborType, | ||
CaseControl = CaseControl, | ||
Individual = TRUE, | ||
# The reference point is always 1 after rRandomLocation() | ||
ReferencePoint = 1, | ||
CheckArguments = FALSE, | ||
# Arguments for envelope() | ||
simulate = SimulatedPP, | ||
# Do not show the progress | ||
verbose = FALSE, | ||
# Save individual simulations into attribute simfuns | ||
savefuns = TRUE | ||
) | ||
# Get the distribution of simulated values (eliminate the "r" column). | ||
Simulations <- as.matrix(attr(Envelope, "simfuns"))[, -1] | ||
# Calculate the quantiles of observed values | ||
for (r_i in seq_along(r)) { | ||
if (any(!is.na(Simulations[r_i, ]))) { | ||
# Check that at least one simulated value is not NaN so that ecdf() works. | ||
MQuantiles[r_i, i] <- stats::ecdf(Simulations[r_i, ])(Mvalues[r_i, i]) | ||
} else { | ||
MQuantiles[r_i, i] <- NaN | ||
} | ||
} | ||
# Progress bar | ||
if (verbose) utils::setTxtProgressBar(ProgressBar, i) | ||
} | ||
if (verbose) close(ProgressBar) | ||
# Save the quantiles as an attribute of the fv | ||
attr(M, "Quantiles") <- MQuantiles | ||
attr(M, "Alpha") <- Alpha | ||
} | ||
|
||
if (Individual & is.null(ReferencePoint)) { | ||
# Save the reference type for future smoothing | ||
attr(M, "ReferenceType") <- ReferenceType | ||
} | ||
return(M) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,81 @@ | ||
\name{Mhat} | ||
\alias{Mhat} | ||
\title{ | ||
Estimation of the M function | ||
} | ||
\description{ | ||
Estimates the \emph{M} function | ||
} | ||
\usage{ | ||
Mhat(X, r = NULL, ReferenceType, NeighborType = ReferenceType, | ||
CaseControl = FALSE, Individual = FALSE, ReferencePoint = NULL, | ||
Quantiles = FALSE, NumberOfSimulations = 100, Alpha = 0.05, | ||
verbose = interactive(), CheckArguments = TRUE) | ||
} | ||
\arguments{ | ||
\item{X}{ | ||
A weighted, marked planar point pattern (\code{\link{wmppp.object}}) or a \code{\link{Dtable}} object. | ||
} | ||
\item{r}{ | ||
A vector of distances. If \code{NULL}, a default value is set: 64 unequally spaced values are used up to half the maximum distance between points \eqn{d_m}. The first value is 0, first steps are small (\eqn{d_m/800}) then increase progressively up to \eqn{d_m/40}. | ||
} | ||
\item{ReferenceType}{ | ||
One of the point types. | ||
} | ||
\item{ReferencePoint}{ | ||
The index of one of the points, i.e. an integer number between 1 and the number of points of the point pattern. | ||
} | ||
\item{NeighborType}{ | ||
One of the point types. By default, the same as reference type. | ||
} | ||
\item{CaseControl}{ | ||
Logical; if \code{TRUE}, the case-control version of \emph{M} is computed. \emph{ReferenceType} points are cases, \emph{NeighborType} points are controls. | ||
} | ||
\item{Individual}{ | ||
Logical; if \code{TRUE}, values of the function around each individual point are returned. | ||
} | ||
\item{Quantiles}{ | ||
If \code{TRUE}, Monte-Carlo simulations are run to obtain the distribution of each individual \emph{M} value under the null hypothesis of random location of points (i.e. all points except for the reference point are relocated). | ||
} | ||
\item{NumberOfSimulations}{ | ||
The number of simulations to run, 100 by default. | ||
} | ||
\item{Alpha}{ | ||
The risk level, 5\% by default. | ||
} | ||
\item{verbose}{ | ||
Logical; if \code{TRUE}, print progress reports during the simulations. | ||
} | ||
\item{CheckArguments}{ | ||
Logical; if \code{TRUE}, the function arguments are verified. Should be set to \code{FALSE} to save time in simulations for example, when the arguments have been checked elsewhere. | ||
} | ||
} | ||
\details{ | ||
\emph{M} is a weighted, cumulative, relative measure of a point pattern structure. Its value at any distance is the ratio of neighbors of the \emph{NeighborType} to all points around \emph{ReferenceType} points, normalized by its value over the windows. | ||
|
||
If \emph{CaseControl} is \code{TRUE}, then \emph{ReferenceType} points are cases and \emph{NeighborType} points are controls. The univariate concentration of cases is calculated as if \emph{NeighborType} was equal to \emph{ReferenceType}, but only controls are considered when counting all points around cases (Marcon et al., 2012). This makes sense when the sampling design is such that all points of \emph{ReferenceType} (the cases) but only a sample of the other points (the controls) are recorded. Then, the whole distribution of points is better represented by the controls alone. | ||
|
||
If \emph{Individual} is \code{TRUE}, then the individual values \emph{M} in the neighborhood of each point are returned. If \emph{ReferencePoint} is also specified, then \emph{only} the individual value of M around the reference point is returned. | ||
} | ||
\value{ | ||
An object of class \code{fv}, see \code{\link{fv.object}}, which can be plotted directly using \code{\link{plot.fv}}. | ||
|
||
If \code{Individual} is set to \code{TRUE}, the object also contains the value of the function around each individual \emph{ReferenceType} point taken as the only reference point. The column names of the \code{fv} are "M_" followed by the point names, i.e. the row names of the marks of the point pattern. | ||
} | ||
\references{ | ||
Marcon, E. and Puech, F. (2010). Measures of the Geographic Concentration of Industries: Improving Distance-Based Methods. \emph{Journal of Economic Geography} 10(5): 745-762. | ||
|
||
Marcon, E., F. Puech and S. Traissac (2012). Characterizing the relative spatial structure of point patterns. \emph{International Journal of Ecology} 2012(Article ID 619281): 11. | ||
|
||
Marcon, E., and Puech, F. (2017). A Typology of Distance-Based Measures of Spatial Concentration. \emph{Regional Science and Urban Economics} 62:56-67 | ||
} | ||
\seealso{ | ||
\code{\link{MEnvelope}}, \code{\link{Kdhat}} | ||
} | ||
\examples{ | ||
data(paracou16) | ||
autoplot(paracou16) | ||
|
||
# Calculate M | ||
autoplot(Mhat(paracou16, , "V. Americana", "Q. Rosea")) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
# MHat | ||
|
||
If Quantiles is TRUE, Monte-Carlo simulations are run to obtain the distribution of each individual M value under the null hypothesis of random location of points (i.e. all points except for the reference point are relocated). | ||
|
||
|
||
# rRandomLocation | ||
|
||
If ReferencePoint is not NULL, other points are shuffled but the reference point is kept untouched to compute the CI of its local M value. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
rRandomLocation <- | ||
function(X, ReferenceType = "", ReferencePoint = NULL, CheckArguments = TRUE) { | ||
|
||
if (CheckArguments) | ||
CheckdbmssArguments() | ||
|
||
if (inherits(X, "Dtable")) { | ||
# Dtable case | ||
Index <- seq_along(X$marks$PointType) | ||
if (ReferenceType != "") { | ||
# Retain a single point type | ||
ReferencePoints <- X$marks$PointType==ReferenceType | ||
# Randomize the reference points | ||
RandomizedReferences <- sample(Index[ReferencePoints]) | ||
# Replace randomized elements in the index | ||
i <- o <- 1 | ||
while (i <= length(X$marks$PointType)) | ||
{ | ||
if (ReferencePoints[i]) { | ||
Index[i] <- RandomizedReferences[o] | ||
o <- o+1 | ||
} | ||
i <- i+1 | ||
} | ||
} else { | ||
Index <- sample(Index) | ||
} | ||
# Apply the randomization to PointType and PointWeight | ||
X$marks$PointType <- X$marks$PointType[Index] | ||
X$marks$PointWeight <- X$marks$PointWeight[Index] | ||
return(X) | ||
} else { | ||
# wmppp case | ||
if (!is.null(ReferencePoint)) { | ||
# The reference point must be < than the number of points | ||
if (ReferencePoint > X$n) { | ||
stop("The number of the reference point must be smaller than the number of points in the point pattern.") | ||
} | ||
# The reference point must belong to the reference point type | ||
if (ReferenceType != "") { | ||
if (X$marks$PointType[ReferencePoint] != ReferenceType) { | ||
stop("The reference point must be of the reference point type.") | ||
} | ||
} | ||
# Save the reference point | ||
ReferencePoint_ppp <- X[ReferencePoint] | ||
X <- X[-ReferencePoint] | ||
} | ||
if (ReferenceType != "") { | ||
# Retain a single point type | ||
X.reduced <- X[X$marks$PointType == ReferenceType] | ||
RandomizedX <- rlabel(X.reduced) | ||
} else { | ||
RandomizedX <- rlabel(X) | ||
} | ||
if (!is.null(ReferencePoint)) { | ||
# Restore the reference point with index 1 | ||
RandomizedX <- superimpose(ReferencePoint_ppp, RandomizedX) | ||
} | ||
class(RandomizedX) <- c("wmppp", "ppp") | ||
return (RandomizedX) | ||
} | ||
} |
Oops, something went wrong.