From ee28e03ad0ab92417b45310b7bf48f619884eb2f Mon Sep 17 00:00:00 2001 From: Eric Marcon Date: Fri, 21 Jul 2023 11:03:02 +0200 Subject: [PATCH 01/29] Smoothed wmppp --- R/swmppp.R | 61 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) create mode 100644 R/swmppp.R diff --git a/R/swmppp.R b/R/swmppp.R new file mode 100644 index 0000000..90927f9 --- /dev/null +++ b/R/swmppp.R @@ -0,0 +1,61 @@ +swmppp <- + function (X, fvind, ReferenceType = "", distance = stats::median(fvind$r), + AllowJitter = TRUE, Nbx = 128, Nby = 128, + Adjust = 1, CheckArguments = TRUE) +{ + if (CheckArguments) + CheckdbmssArguments() + + # Reduce the community to the reference type + if (ReferenceType != "") { + is_ReferenceType <- X$marks$PointType == ReferenceType + X <- X[is_ReferenceType] + } + + # Check the consistency between X and fvind + if (X$n != sum(startsWith(colnames(fvind), paste0(attr(fvind, "valu"), "_")))) + stop(paste("The number of reference points in the function value is different from \n", + "that of the reference points of the spatialized community")) + + # Jitter + if (AllowJitter) { + # Find duplicates + Dups <- spatstat.geom::duplicated.ppp(X, rule="unmark") + if (sum(Dups)>0) { + # Extract the duplicates and jitter them + Dupswmppp <- spatstat.geom::rjitter(X[Dups]) + # Put the coordinates back into the original wmppp + X$x[Dups] <- Dupswmppp$x + X$y[Dups] <- Dupswmppp$y + } + } + + # Find the max r value of fvind lower than or equal to argument distance + r_to_plot <- max(fvind$r[fvind$r<=distance]) + + # Format the value to plot + df <- as.data.frame(fvind) + # Pivot longer. Columns are named M_1, etc. for function M + df <- tidyr::pivot_longer( + df, + cols = tidyselect::starts_with(paste0(attr(fvind, "valu"),"_")), + names_to = "point", + values_to = "dbmss" + ) + # Filter the appropriate distance + df <- dplyr::filter( + df, + .data$r == r_to_plot + ) + # Select the function value column only + df <- dplyr::select( + df, + .data$dbmss + ) + # Make it the marks of X + X$marks <- df + + # Smoothed value of the dbmss. + # Bandwidth is r/2 so that 95% of the weight is each point's neighborhood + return(spatstat.explore::Smooth.ppp(X, sigma = r_to_plot/2, adjust = Adjust)) +} From 397481d9045ceab4c299c598e65c7af8a0955803 Mon Sep 17 00:00:00 2001 From: Eric Marcon Date: Fri, 21 Jul 2023 11:44:39 +0200 Subject: [PATCH 02/29] Individual reference point --- R/rRandomLocation.R | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/R/rRandomLocation.R b/R/rRandomLocation.R index 0c3c8e9..fa7b6f3 100644 --- a/R/rRandomLocation.R +++ b/R/rRandomLocation.R @@ -1,5 +1,5 @@ rRandomLocation <- -function(X, ReferenceType = "", CheckArguments = TRUE) { +function(X, ReferenceType = "", ReferencePoint = NULL, CheckArguments = TRUE) { if (CheckArguments) CheckdbmssArguments() @@ -31,6 +31,21 @@ function(X, ReferenceType = "", CheckArguments = TRUE) { return(X) } else { # wmppp case + if (!is.null(ReferencePoint)) { + # The reference point must be < than the number of points + if (ReferencePoint > X$n) { + stop("The number of the reference point must be smaller than the number of points in the point pattern.") + } + # The reference point must belong to the reference point type + if (ReferenceType != "") { + if (X$marks$PointType[ReferencePoint] != ReferenceType) { + stop("The reference point must be of the reference point type.") + } + } + # Save the reference point + ReferencePoint_ppp <- X[ReferencePoint] + X <- X[-ReferencePoint] + } if (ReferenceType != "") { # Retain a single point type X.reduced <- X[X$marks$PointType == ReferenceType] @@ -38,7 +53,10 @@ function(X, ReferenceType = "", CheckArguments = TRUE) { } else { RandomizedX <- rlabel(X) } - + if (!is.null(ReferencePoint)) { + # Restore the reference point + RandomizedX <- superimpose(RandomizedX, ReferencePoint_ppp) + } class(RandomizedX) <- c("wmppp", "ppp") return (RandomizedX) } From 703b03363c29a00f2276a44951746513d106eac3 Mon Sep 17 00:00:00 2001 From: Eric Marcon Date: Sat, 22 Jul 2023 12:57:49 +0200 Subject: [PATCH 03/29] Documentation --- R/Mhat.R | 37 ++++++++++++++++++++++++++++++------- R/rRandomLocation.R | 4 ++-- man/Mhat.Rd | 8 +++++++- man/rRandomLocation.Rd | 9 +++++++-- 4 files changed, 46 insertions(+), 12 deletions(-) diff --git a/R/Mhat.R b/R/Mhat.R index cb25de2..e39a700 100644 --- a/R/Mhat.R +++ b/R/Mhat.R @@ -1,6 +1,7 @@ Mhat <- function(X, r = NULL, ReferenceType, NeighborType = ReferenceType, - CaseControl = FALSE, Individual = FALSE, CheckArguments = TRUE) + CaseControl = FALSE, Individual = FALSE, ReferencePoint = NULL, + CheckArguments = TRUE) { # Eliminate erroneous configurations if (CheckArguments) { @@ -27,6 +28,20 @@ function(X, r = NULL, ReferenceType, NeighborType = ReferenceType, IsReferenceType <- X$marks$PointType==ReferenceType IsNeighborType <- X$marks$PointType==NeighborType + if (!is.null(ReferencePoint)) { + if (!Individual) { + warning("The reference point is ignored because Individual is FALSE.") + ReferencePoint <- NULL + } + if (IsReferenceType[ReferencePoint]) { + # Remember the name of the reference point in the dataset of reference type + ReferencePoint_name <- row.names(X$marks[ReferencePoint, ]) + } else { + # The reference point must be in the reference type + stop("The reference point must be of the reference point type.") + } + } + # Global ratio if (ReferenceType==NeighborType | CaseControl) { WrMinusReferencePoint <- sum(X$marks$PointWeight[IsReferenceType])-X$marks$PointWeight @@ -71,11 +86,19 @@ function(X, r = NULL, ReferenceType, NeighborType = ReferenceType, # Calculate the ratio of points of interest around each point LocalRatio <- NbdInt/NbdAll - # Divide it by the global ratio. Ignore points with no neighbor at all. - Mvalues <- apply(LocalRatio, 2, function(x) sum(x[is.finite(x)])/sum(GlobalRatio[is.finite(x)])) - # Keep individual values - if (Individual) { - Mvalues <- cbind(Mvalues, t(LocalRatio/GlobalRatio)) + + if (is.null(ReferencePoint)) { + # Divide it by the global ratio. Ignore points with no neighbor at all. + Mvalues <- apply(LocalRatio, 2, function(x) sum(x[is.finite(x)])/sum(GlobalRatio[is.finite(x)])) + # Keep individual values + if (Individual) { + Mvalues <- cbind(Mvalues, t(LocalRatio/GlobalRatio)) + } + } else { + # Find the reference point in the set of points of the reference type + ReferencePoint_index <- which(rownames(X[IsReferenceType]$marks) == ReferencePoint_name) + # Only keep the value of the reference point + Mvalues <- LocalRatio[ReferencePoint_index, ]/GlobalRatio[ReferencePoint_index] } # Put the results into an fv object @@ -83,7 +106,7 @@ function(X, r = NULL, ReferenceType, NeighborType = ReferenceType, ColNames <- c("r", "theo", "M") Labl <- c("r", "%s[theo](r)", "hat(%s)(r)") Desc <- c("Distance argument r", "Theoretical independent %s", "Estimated %s") - if (Individual) { + if (Individual & is.null(ReferencePoint)) { # ColNumbers will usually be line numbers of the marks df, but may be real names. ColNumbers <- row.names(X$marks[IsReferenceType, ]) ColNames <- c(ColNames, paste("M", ColNumbers, sep="_")) diff --git a/R/rRandomLocation.R b/R/rRandomLocation.R index fa7b6f3..c63fc83 100644 --- a/R/rRandomLocation.R +++ b/R/rRandomLocation.R @@ -54,8 +54,8 @@ function(X, ReferenceType = "", ReferencePoint = NULL, CheckArguments = TRUE) { RandomizedX <- rlabel(X) } if (!is.null(ReferencePoint)) { - # Restore the reference point - RandomizedX <- superimpose(RandomizedX, ReferencePoint_ppp) + # Restore the reference point with index 1 + RandomizedX <- superimpose(ReferencePoint_ppp, RandomizedX) } class(RandomizedX) <- c("wmppp", "ppp") return (RandomizedX) diff --git a/man/Mhat.Rd b/man/Mhat.Rd index 86d2920..e6b7046 100644 --- a/man/Mhat.Rd +++ b/man/Mhat.Rd @@ -8,7 +8,8 @@ } \usage{ Mhat(X, r = NULL, ReferenceType, NeighborType = ReferenceType, - CaseControl = FALSE, Individual = FALSE, CheckArguments = TRUE) + CaseControl = FALSE, Individual = FALSE, ReferencePoint = NULL, + CheckArguments = TRUE) } \arguments{ \item{X}{ @@ -20,6 +21,9 @@ Mhat(X, r = NULL, ReferenceType, NeighborType = ReferenceType, \item{ReferenceType}{ One of the point types. } + \item{ReferencePoint}{ + The index of one of the points, i.e. an integer number between 1 and the number of points of the point pattern. + } \item{NeighborType}{ One of the point types. By default, the same as reference type. } @@ -37,6 +41,8 @@ Mhat(X, r = NULL, ReferenceType, NeighborType = ReferenceType, \emph{M} is a weighted, cumulative, relative measure of a point pattern structure. Its value at any distance is the ratio of neighbors of the \emph{NeighborType} to all points around \emph{ReferenceType} points, normalized by its value over the windows. If \emph{CaseControl} is \code{TRUE}, then \emph{ReferenceType} points are cases and \emph{NeighborType} points are controls. The univariate concentration of cases is calculated as if \emph{NeighborType} was equal to \emph{ReferenceType}, but only controls are considered when counting all points around cases (Marcon et al., 2012). This makes sense when the sampling design is such that all points of \emph{ReferenceType} (the cases) but only a sample of the other points (the controls) are recorded. Then, the whole distribution of points is better represented by the controls alone. + + If \emph{Individual} is \code{TRUE}, then the individual values \emph{M} in the neighborhood of each point are returned. If \emph{ReferencePoint} is also specified, then \emph{only} the individual value of M around the reference point is returned. } \value{ An object of class \code{fv}, see \code{\link{fv.object}}, which can be plotted directly using \code{\link{plot.fv}}. diff --git a/man/rRandomLocation.Rd b/man/rRandomLocation.Rd index c7848c9..e739e11 100644 --- a/man/rRandomLocation.Rd +++ b/man/rRandomLocation.Rd @@ -7,7 +7,7 @@ Simulates of a point pattern according to the null hypothesis of random location. } \usage{ -rRandomLocation(X, ReferenceType = "", CheckArguments = TRUE) +rRandomLocation(X, ReferenceType = "", ReferencePoint = NULL, CheckArguments = TRUE) } \arguments{ \item{X}{ @@ -16,13 +16,18 @@ rRandomLocation(X, ReferenceType = "", CheckArguments = TRUE) \item{ReferenceType}{ One of the point types. } + \item{ReferencePoint}{ + The index of one of the points, i.e. an integer number between 1 and the number of points of the point pattern. + The reference point kept untouched during simulations. + } \item{CheckArguments}{ Logical; if \code{TRUE}, the function arguments are verified. Should be set to \code{FALSE} to save time in simulations for example, when the arguments have been checked elsewhere. } } \details{ Points are redistributed randomly across the locations of the original point pattern. This randomization is equivalent to random labeling, considering the label is both point type and point weight. - If \code{ReferenceType} is specified, then only reference type points are kept in the orginal point pattern before randomization. + If \code{ReferenceType} is specified, then only reference type points are kept in the original point pattern before randomization. + If \code{ReferencePoint} is specified, then this point is not modified in all simulations. } \value{ A new weighted, marked, planar point pattern (an object of class \code{wmppp}, see \code{\link{wmppp.object}}). From dd8c81b959a97628b80fae6fa796e5174c949e28 Mon Sep 17 00:00:00 2001 From: Eric Marcon Date: Sat, 22 Jul 2023 13:02:57 +0200 Subject: [PATCH 04/29] Check ReferencePoint --- R/CheckdbmssArguments.R | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/R/CheckdbmssArguments.R b/R/CheckdbmssArguments.R index 2f1eb6e..aad55fe 100644 --- a/R/CheckdbmssArguments.R +++ b/R/CheckdbmssArguments.R @@ -183,6 +183,17 @@ function() { if (Precision < 0) ErrorMessage("Precision must be positive", Precision) } + + # ReferencePoint + if (!is.na(names(Args["ReferencePoint"]))) { + ReferencePoint <- eval(expression(ReferencePoint), parent.frame()) + if (!is.numeric(ReferencePoint)) + ErrorMessage("ReferencePoint must be a number", ReferencePoint) + if (ReferencePoint <= 0) + ErrorMessage("ReferencePoint must be positive", ReferencePoint) + if (as.integer(ReferencePoint) != ReferencePoint) + ErrorMessage("ReferencePoint must be an integer", ReferencePoint) + } # show.window if (!is.na(names(Args["show.window"]))) { From 0de2c596e10075e575a74bc8246928f0cba87aa7 Mon Sep 17 00:00:00 2001 From: Eric Marcon Date: Mon, 24 Jul 2023 12:12:22 +0200 Subject: [PATCH 05/29] Mhat with quantiles --- R/Mhat.R | 76 +++++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 72 insertions(+), 4 deletions(-) diff --git a/R/Mhat.R b/R/Mhat.R index e39a700..186b4b2 100644 --- a/R/Mhat.R +++ b/R/Mhat.R @@ -1,7 +1,8 @@ Mhat <- function(X, r = NULL, ReferenceType, NeighborType = ReferenceType, - CaseControl = FALSE, Individual = FALSE, ReferencePoint = NULL, - CheckArguments = TRUE) + CaseControl = FALSE, Individual = FALSE, ReferencePoint = NULL, + Quantiles = FALSE, NumberOfSimulations = 100, Alpha = 0.05, + verbose = interactive(), CheckArguments = TRUE) { # Eliminate erroneous configurations if (CheckArguments) { @@ -12,6 +13,9 @@ function(X, r = NULL, ReferenceType, NeighborType = ReferenceType, } } + if (Quantiles & !Individual) + stop("Quantiles can't be TRUE if Individual is FALSE.") + # Default r values: 64 values up to half the max distance if (is.null(r)) { if (inherits(X, "Dtable")) { @@ -82,7 +86,7 @@ function(X, r = NULL, ReferenceType, NeighborType = ReferenceType, # Cumulate weights up to each distance NbdInt <- t(apply(Nbd[, seq_len(Nr)], 1, cumsum)) - NbdAll <- t(apply(Nbd[, (Nr+1):(2*Nr)], 1, cumsum)) + NbdAll <- t(apply(Nbd[, (Nr + 1):(2 * Nr)], 1, cumsum)) # Calculate the ratio of points of interest around each point LocalRatio <- NbdInt/NbdAll @@ -115,10 +119,74 @@ function(X, r = NULL, ReferenceType, NeighborType = ReferenceType, } colnames(MEstimate) <- ColNames - # Return the values of M(r) + # Make an fv object M <- fv(MEstimate, argu="r", ylab=quote(M(r)), valu="M", fmla= "cbind(M,theo)~r", alim=c(0, max(r)), labl=Labl, desc=Desc, unitname=X$window$unit, fname="M") fvnames(M, ".") <- ColNames[-1] + + + # Calculate the quantiles of the individual values with respect to the null hypothesis + if (Quantiles) { + # Run Monte Carlo simulations of Mhat for each reference point + nReferencePoints <- sum(IsReferenceType) + # Prepare a matrix to save quantiles + MQuantiles <- matrix(0, nrow = length(r), ncol = nReferencePoints) + colnames(MQuantiles) <- paste("M", ColNumbers, sep="_") + rownames(MQuantiles) <- r + if (verbose) ProgressBar <- utils::txtProgressBar(min = 0, max = nReferencePoints) + for (i in seq_len(nReferencePoints)) { + # Null hypothesis + SimulatedPP <- expression( + rRandomLocation( + X, + ReferencePoint = which(X$marks$PointType == ReferenceType)[i], + CheckArguments = FALSE + ) + ) + # Compute the simulations. The envelope is useless: we need the simulated values. + Envelope <- envelope( + # The value Mhat(X) is not used but the point #1 must be of ReferenceType + # so retain points of ReferenceType only to save time + X[X$marks$PointType == ReferenceType], + fun = Mhat, + nsim = NumberOfSimulations, + # nrank may be any value because the envelope is not used + nrank = 1, + # Arguments for Mhat() + r = r, + ReferenceType = ReferenceType, + NeighborType = NeighborType, + CaseControl = CaseControl, + Individual = TRUE, + # The reference point is always 1 after rRandomLocation() + ReferencePoint = 1, + CheckArguments = FALSE, + # Arguments for envelope() + simulate = SimulatedPP, + # Do not show the progress + verbose = FALSE, + # Save individual simulations into attribute simfuns + savefuns = TRUE + ) + # Get the distribution of simulated values (eliminate the "r" column). + Simulations <- as.matrix(attr(Envelope, "simfuns"))[, -1] + # Calculate the quantiles of observed values + for (r_i in seq_along(r)) { + if (any(!is.na(Simulations[r_i, ]))) { + # Check that at least one simulated value is not NaN so that ecdf() works. + MQuantiles[r_i, i] <- stats::ecdf(Simulations[r_i, ])(Mvalues[r_i, i]) + } else { + MQuantiles[r_i, i] <- NaN + } + } + # Progress bar + if (verbose) utils::setTxtProgressBar(ProgressBar, i) + } + close(ProgressBar) + # Save the quantiles as an attribute of the fv + attr(M, "Quantiles") <- MQuantiles + } + return (M) } From 61d3338dcd45642a73bb0345dcef05d3468a3683 Mon Sep 17 00:00:00 2001 From: Eric Marcon Date: Mon, 24 Jul 2023 15:26:51 +0200 Subject: [PATCH 06/29] Documentation --- R/Mhat.R | 7 +++---- R/swmppp.R | 2 +- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/R/Mhat.R b/R/Mhat.R index 186b4b2..f26eeb5 100644 --- a/R/Mhat.R +++ b/R/Mhat.R @@ -11,11 +11,10 @@ function(X, r = NULL, ReferenceType, NeighborType = ReferenceType, warning("Cases and controls are identical.") return(rep(1,length(r))) } + if (Quantiles & !Individual) + stop("Quantiles can't be TRUE if Individual is FALSE.") } - if (Quantiles & !Individual) - stop("Quantiles can't be TRUE if Individual is FALSE.") - # Default r values: 64 values up to half the max distance if (is.null(r)) { if (inherits(X, "Dtable")) { @@ -188,5 +187,5 @@ function(X, r = NULL, ReferenceType, NeighborType = ReferenceType, attr(M, "Quantiles") <- MQuantiles } - return (M) + return(M) } diff --git a/R/swmppp.R b/R/swmppp.R index 90927f9..b23538e 100644 --- a/R/swmppp.R +++ b/R/swmppp.R @@ -6,7 +6,7 @@ swmppp <- if (CheckArguments) CheckdbmssArguments() - # Reduce the community to the reference type + # Reduce the point pattern to the reference type if (ReferenceType != "") { is_ReferenceType <- X$marks$PointType == ReferenceType X <- X[is_ReferenceType] From a4cbb39cf91373e2ec380a6e74f8535e5311fb63 Mon Sep 17 00:00:00 2001 From: Eric Marcon Date: Mon, 24 Jul 2023 15:27:41 +0200 Subject: [PATCH 07/29] Smooth.wmppp --- NAMESPACE | 1 + R/Smooth.wmppp.R | 67 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 68 insertions(+) create mode 100644 R/Smooth.wmppp.R diff --git a/NAMESPACE b/NAMESPACE index 4c3a35a..f2fe804 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -58,6 +58,7 @@ S3method("envelope", "Dtable") S3method("plot", "kwmppp") S3method("print", "dbmssEnvelope") S3method("sharpen", "wmppp") +S3method("Smooth", "wmppp") S3method("split", "wmppp") S3method("summary", "dbmssEnvelope") S3method("superimpose", "wmppp") diff --git a/R/Smooth.wmppp.R b/R/Smooth.wmppp.R new file mode 100644 index 0000000..e7159a0 --- /dev/null +++ b/R/Smooth.wmppp.R @@ -0,0 +1,67 @@ +Smooth.wmppp <- function(X, fvind = NULL, distance = NULL, ReferenceType = "", + Quantiles = FALSE, Weighted = TRUE, Adjust = 1, + Nbx = 128, Nby = 128, CheckArguments = TRUE) +{ + # Check the arguments + if (CheckArguments) { + CheckdbmssArguments() + } + + # Reduce the point pattern to the reference type + if (ReferenceType != "") { + is_ReferenceType <- X$marks$PointType == ReferenceType + X <- X[is_ReferenceType] + } + + # Smooth the point weights. + if (is.null(fv)) { + # Marks as a numeric vector + X$marks <- X$marks$PointWeight + # Smooth requires the top class of X to be ppp + class(X) <- "ppp" + Image <- Smooth.ppp(X, dimyx = c(Nbx, Nby)) + return(Image) + } + + # Smooth the individual function values + + # Check the consistency between X and fvind + if (X$n != sum(startsWith(colnames(fvind), paste0(attr(fvind, "valu"), "_")))) + stop(paste("The number of reference points in the function value is different from \n", + "that of the reference points of the point pattern")) + + if (is.null(distance)) { + # default distance + distance <- stats::median(fvind$r) + } + # Find the max r value of fvind lower than or equal to argument distance + r_to_plot <- max(fvind$r[fvind$r<=distance]) + # Weights + if (Weighted) { + weights <- X$marks$PointWeight + } else { + weights <- rep(1, x$n) + } + + if (Quantiles) { + # Smooth the quantiles of the dbmss + if (is.null(attr(fvind, "Quantiles"))) + stop("The quantiles of fvind are not available.") + # Make the quantiles the marks of X + X$marks <- attr(fvind, "Quantiles")[ + which(rownames(attr(fvind, "Quantiles")) == r_to_plot), + ] + # Smooth requires the top class of X to be ppp + class(X) <- "ppp" + Image <- Smooth.ppp(X, sigma = r_to_plot/2, weights = weights, adjust = Adjust, dimyx = c(Nbx, Nby)) + } else { + # Smooth the values of the dbm + fvind.matrix <- as.matrix(fvind) + # Extract the values. Columns 1 to 3 contain the global dbm + X$marks <- fvind.matrix [which(fvind.matrix [, 1] == r_to_plot), -(1:3)] + # Smooth requires the top class of X to be ppp + class(X) <- "ppp" + Image <- Smooth.ppp(X, sigma = r_to_plot/2, weights = weights, adjust = Adjust, dimyx = c(Nbx, Nby)) + } + return(Image) +} \ No newline at end of file From bccbd2043da63879d28d55beaa913357535a06f3 Mon Sep 17 00:00:00 2001 From: Eric Marcon Date: Mon, 24 Jul 2023 15:48:05 +0200 Subject: [PATCH 08/29] v. 2.8-2.9100 --- DESCRIPTION | 2 +- NEWS.md | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index f040084..a8d5193 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: dbmss Type: Package Title: Distance-Based Measures of Spatial Structures -Version: 2.8-2.9000 +Version: 2.8-2.9100 Authors@R: c( person("Eric", "Marcon", email="eric.marcon@agroparistech.fr", role=c("aut", "cre"), comment = c(ORCID = "0000-0002-5249-321X")), person("Gabriel", "Lang", email="gabriel.lang@agroparistech.fr", role="aut", comment = c(ORCID = "0000-0002-4325-6044")), diff --git a/NEWS.md b/NEWS.md index 463f79f..ee605da 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,9 +1,13 @@ -# dbmss 2.8-2.9000 +# dbmss 2.8-2.9100 ## Improvements - NAMESPACE cleaned up. +## Significant user-visible changes + +- `Smooth.wmppp()` smooths wmppp's to map individual values of functions such as $M$ in the neighborhood of points. + # dbmss 2.8-2 From bb1d19c68f436f1c9b2d447beeecc8583a315431 Mon Sep 17 00:00:00 2001 From: Eric Marcon Date: Mon, 24 Jul 2023 17:21:30 +0200 Subject: [PATCH 09/29] allowed ReferencePoint to be null --- R/CheckdbmssArguments.R | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/R/CheckdbmssArguments.R b/R/CheckdbmssArguments.R index aad55fe..808c5d1 100644 --- a/R/CheckdbmssArguments.R +++ b/R/CheckdbmssArguments.R @@ -187,12 +187,14 @@ function() { # ReferencePoint if (!is.na(names(Args["ReferencePoint"]))) { ReferencePoint <- eval(expression(ReferencePoint), parent.frame()) - if (!is.numeric(ReferencePoint)) - ErrorMessage("ReferencePoint must be a number", ReferencePoint) - if (ReferencePoint <= 0) - ErrorMessage("ReferencePoint must be positive", ReferencePoint) - if (as.integer(ReferencePoint) != ReferencePoint) - ErrorMessage("ReferencePoint must be an integer", ReferencePoint) + if (!is.null(ReferencePoint)) { + if (!is.numeric(ReferencePoint)) + ErrorMessage("ReferencePoint must be a number", ReferencePoint) + if (ReferencePoint <= 0) + ErrorMessage("ReferencePoint must be positive", ReferencePoint) + if (as.integer(ReferencePoint) != ReferencePoint) + ErrorMessage("ReferencePoint must be an integer", ReferencePoint) + } } # show.window From c0d7d33620f01323ae3dd869fbaf7324eb4f1415 Mon Sep 17 00:00:00 2001 From: Eric Marcon Date: Mon, 24 Jul 2023 17:21:53 +0200 Subject: [PATCH 10/29] allowed NA's in local dbm values --- R/Smooth.wmppp.R | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/R/Smooth.wmppp.R b/R/Smooth.wmppp.R index e7159a0..35c9cea 100644 --- a/R/Smooth.wmppp.R +++ b/R/Smooth.wmppp.R @@ -53,6 +53,10 @@ Smooth.wmppp <- function(X, fvind = NULL, distance = NULL, ReferenceType = "", ] # Smooth requires the top class of X to be ppp class(X) <- "ppp" + # Eliminate NA's before smoothing + is_na <- is.na(X$marks) + weights <- weights[!is_na] + X<- X[!is_na] Image <- Smooth.ppp(X, sigma = r_to_plot/2, weights = weights, adjust = Adjust, dimyx = c(Nbx, Nby)) } else { # Smooth the values of the dbm @@ -61,6 +65,10 @@ Smooth.wmppp <- function(X, fvind = NULL, distance = NULL, ReferenceType = "", X$marks <- fvind.matrix [which(fvind.matrix [, 1] == r_to_plot), -(1:3)] # Smooth requires the top class of X to be ppp class(X) <- "ppp" + # Eliminate NA's before smoothing + is_na <- is.na(X$marks) + weights <- weights[!is_na] + X<- X[!is_na] Image <- Smooth.ppp(X, sigma = r_to_plot/2, weights = weights, adjust = Adjust, dimyx = c(Nbx, Nby)) } return(Image) From 8d326befdd5f7e3dcc396e1b89902d6bc87d1b3c Mon Sep 17 00:00:00 2001 From: Eric Marcon Date: Mon, 24 Jul 2023 17:22:18 +0200 Subject: [PATCH 11/29] v. 2.8-2.9101 --- DESCRIPTION | 2 +- NEWS.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index a8d5193..fc697ed 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: dbmss Type: Package Title: Distance-Based Measures of Spatial Structures -Version: 2.8-2.9100 +Version: 2.8-2.9101 Authors@R: c( person("Eric", "Marcon", email="eric.marcon@agroparistech.fr", role=c("aut", "cre"), comment = c(ORCID = "0000-0002-5249-321X")), person("Gabriel", "Lang", email="gabriel.lang@agroparistech.fr", role="aut", comment = c(ORCID = "0000-0002-4325-6044")), diff --git a/NEWS.md b/NEWS.md index ee605da..97650f8 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,4 @@ -# dbmss 2.8-2.9100 +# dbmss 2.8-2.9101 ## Improvements From cdcd9b90a2ea232069d5bf175f9189f028223b0b Mon Sep 17 00:00:00 2001 From: Eric Marcon Date: Mon, 24 Jul 2023 18:05:52 +0200 Subject: [PATCH 12/29] Close progress bar only if it exists --- R/Mhat.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/Mhat.R b/R/Mhat.R index f26eeb5..ec19f33 100644 --- a/R/Mhat.R +++ b/R/Mhat.R @@ -182,7 +182,7 @@ function(X, r = NULL, ReferenceType, NeighborType = ReferenceType, # Progress bar if (verbose) utils::setTxtProgressBar(ProgressBar, i) } - close(ProgressBar) + if (verbose) close(ProgressBar) # Save the quantiles as an attribute of the fv attr(M, "Quantiles") <- MQuantiles } From 2431477caa8fedd38f99979634a24e93199b03e5 Mon Sep 17 00:00:00 2001 From: Eric Marcon Date: Tue, 25 Jul 2023 09:51:09 +0200 Subject: [PATCH 13/29] Documentation --- DESCRIPTION | 2 +- NEWS.md | 2 +- R/Smooth.wmppp.R | 22 ++++----------- man/Mhat.Rd | 15 ++++++++++- man/Smooth.wmppp.Rd | 65 +++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 86 insertions(+), 20 deletions(-) create mode 100644 man/Smooth.wmppp.Rd diff --git a/DESCRIPTION b/DESCRIPTION index fc697ed..81dd7ba 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: dbmss Type: Package Title: Distance-Based Measures of Spatial Structures -Version: 2.8-2.9101 +Version: 2.8-2.9102 Authors@R: c( person("Eric", "Marcon", email="eric.marcon@agroparistech.fr", role=c("aut", "cre"), comment = c(ORCID = "0000-0002-5249-321X")), person("Gabriel", "Lang", email="gabriel.lang@agroparistech.fr", role="aut", comment = c(ORCID = "0000-0002-4325-6044")), diff --git a/NEWS.md b/NEWS.md index 97650f8..0061e7d 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,4 @@ -# dbmss 2.8-2.9101 +# dbmss 2.8-2.9102 ## Improvements diff --git a/R/Smooth.wmppp.R b/R/Smooth.wmppp.R index 35c9cea..fd0b145 100644 --- a/R/Smooth.wmppp.R +++ b/R/Smooth.wmppp.R @@ -1,6 +1,6 @@ -Smooth.wmppp <- function(X, fvind = NULL, distance = NULL, ReferenceType = "", +Smooth.wmppp <- function(X, fvind, ReferenceType = "", distance = NULL, Quantiles = FALSE, Weighted = TRUE, Adjust = 1, - Nbx = 128, Nby = 128, CheckArguments = TRUE) + Nbx = 128, Nby = 128,..., CheckArguments = TRUE) { # Check the arguments if (CheckArguments) { @@ -11,19 +11,7 @@ Smooth.wmppp <- function(X, fvind = NULL, distance = NULL, ReferenceType = "", if (ReferenceType != "") { is_ReferenceType <- X$marks$PointType == ReferenceType X <- X[is_ReferenceType] - } - - # Smooth the point weights. - if (is.null(fv)) { - # Marks as a numeric vector - X$marks <- X$marks$PointWeight - # Smooth requires the top class of X to be ppp - class(X) <- "ppp" - Image <- Smooth.ppp(X, dimyx = c(Nbx, Nby)) - return(Image) } - - # Smooth the individual function values # Check the consistency between X and fvind if (X$n != sum(startsWith(colnames(fvind), paste0(attr(fvind, "valu"), "_")))) @@ -40,7 +28,7 @@ Smooth.wmppp <- function(X, fvind = NULL, distance = NULL, ReferenceType = "", if (Weighted) { weights <- X$marks$PointWeight } else { - weights <- rep(1, x$n) + weights <- rep(1, X$n) } if (Quantiles) { @@ -57,7 +45,7 @@ Smooth.wmppp <- function(X, fvind = NULL, distance = NULL, ReferenceType = "", is_na <- is.na(X$marks) weights <- weights[!is_na] X<- X[!is_na] - Image <- Smooth.ppp(X, sigma = r_to_plot/2, weights = weights, adjust = Adjust, dimyx = c(Nbx, Nby)) + Image <- Smooth.ppp(X, sigma = r_to_plot, ..., weights = weights, adjust = Adjust, dimyx = c(Nbx, Nby)) } else { # Smooth the values of the dbm fvind.matrix <- as.matrix(fvind) @@ -69,7 +57,7 @@ Smooth.wmppp <- function(X, fvind = NULL, distance = NULL, ReferenceType = "", is_na <- is.na(X$marks) weights <- weights[!is_na] X<- X[!is_na] - Image <- Smooth.ppp(X, sigma = r_to_plot/2, weights = weights, adjust = Adjust, dimyx = c(Nbx, Nby)) + Image <- Smooth.ppp(X, sigma = r_to_plot, ..., weights = weights, adjust = Adjust, dimyx = c(Nbx, Nby)) } return(Image) } \ No newline at end of file diff --git a/man/Mhat.Rd b/man/Mhat.Rd index e6b7046..94e1566 100644 --- a/man/Mhat.Rd +++ b/man/Mhat.Rd @@ -9,7 +9,8 @@ \usage{ Mhat(X, r = NULL, ReferenceType, NeighborType = ReferenceType, CaseControl = FALSE, Individual = FALSE, ReferencePoint = NULL, - CheckArguments = TRUE) + Quantiles = FALSE, NumberOfSimulations = 100, Alpha = 0.05, + verbose = interactive(), CheckArguments = TRUE) } \arguments{ \item{X}{ @@ -33,6 +34,18 @@ Mhat(X, r = NULL, ReferenceType, NeighborType = ReferenceType, \item{Individual}{ Logical; if \code{TRUE}, values of the function around each individual point are returned. } + \item{Quantiles}{ + If \code{TRUE}, Monte-Carlo simulations are run to obtain the distribution of each individual \emph{M} value under the null hypothesis of random location of points (i.e. all points except for the reference point are relocated). + } + \item{NumberOfSimulations}{ + The number of simulations to run, 100 by default. + } + \item{Alpha}{ + The risk level, 5\% by default. + } + \item{verbose}{ + Logical; if \code{TRUE}, print progress reports during the simulations. + } \item{CheckArguments}{ Logical; if \code{TRUE}, the function arguments are verified. Should be set to \code{FALSE} to save time in simulations for example, when the arguments have been checked elsewhere. } diff --git a/man/Smooth.wmppp.Rd b/man/Smooth.wmppp.Rd new file mode 100644 index 0000000..59aeec2 --- /dev/null +++ b/man/Smooth.wmppp.Rd @@ -0,0 +1,65 @@ +\name{Smooth.wmppp} +\alias{Smooth.wmppp} +\title{Spatial smoothing of individual dbmss's} +\usage{ + \method{Smooth}{wmppp} (X, fvind, ReferenceType = "", distance = NULL, + Quantiles = FALSE, Weighted = TRUE, Adjust = 1, + Nbx = 128, Nby = 128, \dots, CheckArguments = TRUE) +} +\description{ + Performs spatial smoothing of the individual values of distance-based measures computed in the neighborhood of each point. +} +\arguments{ + \item{X}{ + A point pattern (\code{\link{wmppp.object}}). + } + \item{fvind}{ + An object of class \code{fv}, see \code{\link{fv.object}}, obtained a distance-based method, such as \code{\link{Mhat}} with individual values (argument \code{Individual = TRUE}). + } + \item{ReferenceType}{ + The point type used to calculate the function values. + The default value is "", i.e. all point types, but it will generate an error if the actual reference type is different. + } + \item{distance}{ + The distance at which the function value must be considered. + The default value is the median distance used to calculate the function values. + } + \item{Quantiles}{ + If \code{FALSE} (default), the dbmss is smoothed to produce a map of the measure. + If \code{TRUE}, its quantiles (computed by \code{\link{Mhat}} with argument \code{Quantiles = TRUE}) are smoothed to produce a map of the confidence level of the measure. + } + \item{Weighted}{ + If \code{TRUE} (default), the point weights are taken into account for smoothing. + } + \item{Adjust}{ + Force the selected bandwidth (equal to \code{distance}) to be multiplied by \code{Adjust}. Setting it to values greater than one (2 for example) will smooth the estimation. + } + \item{Nbx, Nby}{ + The number of columns and rows (pixels) of the resulting map, 128 by default. + Increase it for quality, paid by increasing computing time. + } + \item{\dots}{ + Extra arguments, passed to \code{\link{Smooth.ppp}}. + } + \item{CheckArguments}{ + If \code{TRUE} (default), the function arguments are verified. + Should be set to \code{FALSE} to save time in simulations for example, when the arguments have been checked elsewhere. + } +} +\value{ + An image that can be plotted. +} +\examples{ + ReferenceType <- "V. Americana" + NeighborType <- "Q. Rosea" + # Calculate individual intertype M(distance) values + fvind <- Mhat(paracou16, , ReferenceType, NeighborType, Individual=TRUE) + # Plot the point pattern with values of M(30 meters) + p16_map <- Smooth(paracou16, fvind, ReferenceType, distance=30) + plot(p16_map, main = "") + # Add the reference points to the plot + is.ReferenceType <- paracou16$marks$PointType == ReferenceType + points(x=paracou16$x[is.ReferenceType], y=paracou16$y[is.ReferenceType], pch=20) + # Add contour lines + contour(p16_map, nlevels = 5, add = TRUE) +} From cce9928c8f9ff48fb29b13fd566af697e07139e9 Mon Sep 17 00:00:00 2001 From: Eric Marcon Date: Tue, 25 Jul 2023 11:14:50 +0200 Subject: [PATCH 14/29] Attributes to save ReferencePoint and Alpha --- DESCRIPTION | 2 +- NEWS.md | 2 +- R/Mhat.R | 11 +++++++---- R/Smooth.wmppp.R | 23 +++++++++++++++++++---- man/Smooth.wmppp.Rd | 15 ++++++--------- 5 files changed, 34 insertions(+), 19 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 81dd7ba..62f9cad 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: dbmss Type: Package Title: Distance-Based Measures of Spatial Structures -Version: 2.8-2.9102 +Version: 2.8-2.9103 Authors@R: c( person("Eric", "Marcon", email="eric.marcon@agroparistech.fr", role=c("aut", "cre"), comment = c(ORCID = "0000-0002-5249-321X")), person("Gabriel", "Lang", email="gabriel.lang@agroparistech.fr", role="aut", comment = c(ORCID = "0000-0002-4325-6044")), diff --git a/NEWS.md b/NEWS.md index 0061e7d..fd946dd 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,4 @@ -# dbmss 2.8-2.9102 +# dbmss 2.8-2.9103 ## Improvements diff --git a/R/Mhat.R b/R/Mhat.R index ec19f33..e7f2e14 100644 --- a/R/Mhat.R +++ b/R/Mhat.R @@ -32,10 +32,8 @@ function(X, r = NULL, ReferenceType, NeighborType = ReferenceType, IsNeighborType <- X$marks$PointType==NeighborType if (!is.null(ReferencePoint)) { - if (!Individual) { - warning("The reference point is ignored because Individual is FALSE.") - ReferencePoint <- NULL - } + # Set individual to TRUE if a refernce point is given + Individual <- TRUE if (IsReferenceType[ReferencePoint]) { # Remember the name of the reference point in the dataset of reference type ReferencePoint_name <- row.names(X$marks[ReferencePoint, ]) @@ -185,7 +183,12 @@ function(X, r = NULL, ReferenceType, NeighborType = ReferenceType, if (verbose) close(ProgressBar) # Save the quantiles as an attribute of the fv attr(M, "Quantiles") <- MQuantiles + attr(M, "Alpha") <- Alpha } + if (Individual & is.null(ReferencePoint)) { + # Save the reference type for future smoothing + attr(M, "ReferenceType") <- ReferenceType + } return(M) } diff --git a/R/Smooth.wmppp.R b/R/Smooth.wmppp.R index fd0b145..c214a8b 100644 --- a/R/Smooth.wmppp.R +++ b/R/Smooth.wmppp.R @@ -1,5 +1,5 @@ -Smooth.wmppp <- function(X, fvind, ReferenceType = "", distance = NULL, - Quantiles = FALSE, Weighted = TRUE, Adjust = 1, +Smooth.wmppp <- function(X, fvind, distance = NULL, Quantiles = FALSE, + Weighted = TRUE, Adjust = 1, Nbx = 128, Nby = 128,..., CheckArguments = TRUE) { # Check the arguments @@ -7,6 +7,18 @@ Smooth.wmppp <- function(X, fvind, ReferenceType = "", distance = NULL, CheckdbmssArguments() } + if (Quantiles) { + # Read the risk level in fvind + Alpha <- attr(fvind, "Alpha") + if (is.null(Alpha)) + stop("The risk level 'Alpha' could not be read in 'fvind'. Was it computed with argument 'Quantiles = TRUE' ?") + } + + # Read the reference type in fvind + ReferenceType <- attr(fvind, "ReferenceType") + if (is.null(ReferenceType)) + stop("The refence type could not be read in 'fvind'. Was it computed with argument 'Indivivual = TRUE' ?") + # Reduce the point pattern to the reference type if (ReferenceType != "") { is_ReferenceType <- X$marks$PointType == ReferenceType @@ -45,7 +57,10 @@ Smooth.wmppp <- function(X, fvind, ReferenceType = "", distance = NULL, is_na <- is.na(X$marks) weights <- weights[!is_na] X<- X[!is_na] - Image <- Smooth.ppp(X, sigma = r_to_plot, ..., weights = weights, adjust = Adjust, dimyx = c(Nbx, Nby)) + Image <- Smooth.ppp(X, sigma = r_to_plot, ..., weights = weights, adjust = Adjust, dimyx = c(Nby, Nbx)) + # Statistical significance saved in attributes + attr(Image, "High") <- X$marks >= 1 - Alpha / 2 + attr(Image, "Low") <- X$marks <= Alpha / 2 } else { # Smooth the values of the dbm fvind.matrix <- as.matrix(fvind) @@ -57,7 +72,7 @@ Smooth.wmppp <- function(X, fvind, ReferenceType = "", distance = NULL, is_na <- is.na(X$marks) weights <- weights[!is_na] X<- X[!is_na] - Image <- Smooth.ppp(X, sigma = r_to_plot, ..., weights = weights, adjust = Adjust, dimyx = c(Nbx, Nby)) + Image <- Smooth.ppp(X, sigma = r_to_plot, ..., weights = weights, adjust = Adjust, dimyx = c(Nby, Nbx)) } return(Image) } \ No newline at end of file diff --git a/man/Smooth.wmppp.Rd b/man/Smooth.wmppp.Rd index 59aeec2..8fa6d92 100644 --- a/man/Smooth.wmppp.Rd +++ b/man/Smooth.wmppp.Rd @@ -2,9 +2,9 @@ \alias{Smooth.wmppp} \title{Spatial smoothing of individual dbmss's} \usage{ - \method{Smooth}{wmppp} (X, fvind, ReferenceType = "", distance = NULL, - Quantiles = FALSE, Weighted = TRUE, Adjust = 1, - Nbx = 128, Nby = 128, \dots, CheckArguments = TRUE) + \method{Smooth}{wmppp} (X, fvind, distance = NULL, Quantiles = FALSE, + Weighted = TRUE, Adjust = 1, + Nbx = 128, Nby = 128, \dots, CheckArguments = TRUE) } \description{ Performs spatial smoothing of the individual values of distance-based measures computed in the neighborhood of each point. @@ -16,10 +16,6 @@ \item{fvind}{ An object of class \code{fv}, see \code{\link{fv.object}}, obtained a distance-based method, such as \code{\link{Mhat}} with individual values (argument \code{Individual = TRUE}). } - \item{ReferenceType}{ - The point type used to calculate the function values. - The default value is "", i.e. all point types, but it will generate an error if the actual reference type is different. - } \item{distance}{ The distance at which the function value must be considered. The default value is the median distance used to calculate the function values. @@ -48,14 +44,15 @@ } \value{ An image that can be plotted. + If quantiles are computed, attributes "High" and "Low" contain logical vectors to indentify significantly high and low quantiles ( ) } \examples{ ReferenceType <- "V. Americana" NeighborType <- "Q. Rosea" # Calculate individual intertype M(distance) values - fvind <- Mhat(paracou16, , ReferenceType, NeighborType, Individual=TRUE) + fvind <- Mhat(paracou16, r=c(0, 30), ReferenceType, NeighborType, Individual=TRUE) # Plot the point pattern with values of M(30 meters) - p16_map <- Smooth(paracou16, fvind, ReferenceType, distance=30) + p16_map <- Smooth(paracou16, fvind, distance=30) plot(p16_map, main = "") # Add the reference points to the plot is.ReferenceType <- paracou16$marks$PointType == ReferenceType From 2254f8c7cae68814737723a532d7788517bc15e7 Mon Sep 17 00:00:00 2001 From: Eric Marcon Date: Tue, 25 Jul 2023 13:42:27 +0200 Subject: [PATCH 15/29] doubled the bandwidth --- R/Smooth.wmppp.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/Smooth.wmppp.R b/R/Smooth.wmppp.R index c214a8b..7c27a88 100644 --- a/R/Smooth.wmppp.R +++ b/R/Smooth.wmppp.R @@ -57,7 +57,7 @@ Smooth.wmppp <- function(X, fvind, distance = NULL, Quantiles = FALSE, is_na <- is.na(X$marks) weights <- weights[!is_na] X<- X[!is_na] - Image <- Smooth.ppp(X, sigma = r_to_plot, ..., weights = weights, adjust = Adjust, dimyx = c(Nby, Nbx)) + Image <- Smooth.ppp(X, sigma = 2*r_to_plot, ..., weights = weights, adjust = Adjust, dimyx = c(Nby, Nbx)) # Statistical significance saved in attributes attr(Image, "High") <- X$marks >= 1 - Alpha / 2 attr(Image, "Low") <- X$marks <= Alpha / 2 @@ -72,7 +72,7 @@ Smooth.wmppp <- function(X, fvind, distance = NULL, Quantiles = FALSE, is_na <- is.na(X$marks) weights <- weights[!is_na] X<- X[!is_na] - Image <- Smooth.ppp(X, sigma = r_to_plot, ..., weights = weights, adjust = Adjust, dimyx = c(Nby, Nbx)) + Image <- Smooth.ppp(X, sigma = 2*r_to_plot, ..., weights = weights, adjust = Adjust, dimyx = c(Nby, Nbx)) } return(Image) } \ No newline at end of file From db9243b7e1e784a424d574be0479543c906e0bc5 Mon Sep 17 00:00:00 2001 From: Eric Marcon Date: Wed, 26 Jul 2023 17:06:18 +0200 Subject: [PATCH 16/29] Bandwidth selection according to Scott 1992 --- DESCRIPTION | 2 +- NEWS.md | 2 +- R/Smooth.wmppp.R | 37 ++++++++++++++++++++++--------------- man/Smooth.wmppp.Rd | 11 ++++++++--- 4 files changed, 32 insertions(+), 20 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 62f9cad..5c14c97 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: dbmss Type: Package Title: Distance-Based Measures of Spatial Structures -Version: 2.8-2.9103 +Version: 2.8-2.9104 Authors@R: c( person("Eric", "Marcon", email="eric.marcon@agroparistech.fr", role=c("aut", "cre"), comment = c(ORCID = "0000-0002-5249-321X")), person("Gabriel", "Lang", email="gabriel.lang@agroparistech.fr", role="aut", comment = c(ORCID = "0000-0002-4325-6044")), diff --git a/NEWS.md b/NEWS.md index fd946dd..d5f935f 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,4 @@ -# dbmss 2.8-2.9103 +# dbmss 2.8-2.9104 ## Improvements diff --git a/R/Smooth.wmppp.R b/R/Smooth.wmppp.R index 7c27a88..94768e8 100644 --- a/R/Smooth.wmppp.R +++ b/R/Smooth.wmppp.R @@ -1,5 +1,5 @@ Smooth.wmppp <- function(X, fvind, distance = NULL, Quantiles = FALSE, - Weighted = TRUE, Adjust = 1, + sigma = bw.scott(X, isotropic = TRUE), Weighted = TRUE, Adjust = 1, Nbx = 128, Nby = 128,..., CheckArguments = TRUE) { # Check the arguments @@ -9,9 +9,10 @@ Smooth.wmppp <- function(X, fvind, distance = NULL, Quantiles = FALSE, if (Quantiles) { # Read the risk level in fvind - Alpha <- attr(fvind, "Alpha") - if (is.null(Alpha)) + if (is.null(attr(fvind, "Alpha"))) stop("The risk level 'Alpha' could not be read in 'fvind'. Was it computed with argument 'Quantiles = TRUE' ?") + if (is.null(attr(fvind, "Quantiles"))) + stop("The quantiles of 'fvind' are not available. Was it computed with argument 'Quantiles = TRUE' ?") } # Read the reference type in fvind @@ -43,24 +44,23 @@ Smooth.wmppp <- function(X, fvind, distance = NULL, Quantiles = FALSE, weights <- rep(1, X$n) } + # Read the attributes of the fvind + if (!is.null(attr(fvind, "Alpha"))) { + Alpha <- attr(fvind, "Alpha") + Qvalues <- attr(fvind, "Quantiles")[which(rownames(attr(fvind, "Quantiles")) == r_to_plot), ] + } + if (Quantiles) { - # Smooth the quantiles of the dbmss - if (is.null(attr(fvind, "Quantiles"))) - stop("The quantiles of fvind are not available.") + # Smooth the quantiles of the dbm # Make the quantiles the marks of X - X$marks <- attr(fvind, "Quantiles")[ - which(rownames(attr(fvind, "Quantiles")) == r_to_plot), - ] - # Smooth requires the top class of X to be ppp + X$marks <- Qvalues + # Smooth() requires the top class of X to be ppp class(X) <- "ppp" # Eliminate NA's before smoothing is_na <- is.na(X$marks) weights <- weights[!is_na] X<- X[!is_na] - Image <- Smooth.ppp(X, sigma = 2*r_to_plot, ..., weights = weights, adjust = Adjust, dimyx = c(Nby, Nbx)) - # Statistical significance saved in attributes - attr(Image, "High") <- X$marks >= 1 - Alpha / 2 - attr(Image, "Low") <- X$marks <= Alpha / 2 + Image <- Smooth.ppp(X, sigma = sigma, ..., weights = weights, adjust = Adjust, dimyx = c(Nby, Nbx)) } else { # Smooth the values of the dbm fvind.matrix <- as.matrix(fvind) @@ -72,7 +72,14 @@ Smooth.wmppp <- function(X, fvind, distance = NULL, Quantiles = FALSE, is_na <- is.na(X$marks) weights <- weights[!is_na] X<- X[!is_na] - Image <- Smooth.ppp(X, sigma = 2*r_to_plot, ..., weights = weights, adjust = Adjust, dimyx = c(Nby, Nbx)) + Image <- Smooth.ppp(X, sigma = sigma, ..., weights = weights, adjust = Adjust, dimyx = c(Nby, Nbx)) + } + # Statistical significance saved in attributes + if (!is.null(attr(fvind, "Alpha"))) { + # Eliminate NAs to obtain FALSE in attributes High and Low + Qvalues[is.na(Qvalues)] <- 0.5 + attr(Image, "High") <- Qvalues >= 1 - Alpha / 2 + attr(Image, "Low") <- Qvalues <= Alpha / 2 } return(Image) } \ No newline at end of file diff --git a/man/Smooth.wmppp.Rd b/man/Smooth.wmppp.Rd index 8fa6d92..81c6d11 100644 --- a/man/Smooth.wmppp.Rd +++ b/man/Smooth.wmppp.Rd @@ -3,7 +3,7 @@ \title{Spatial smoothing of individual dbmss's} \usage{ \method{Smooth}{wmppp} (X, fvind, distance = NULL, Quantiles = FALSE, - Weighted = TRUE, Adjust = 1, + sigma = bw.scott(X, isotropic = TRUE), Weighted = TRUE, Adjust = 1, Nbx = 128, Nby = 128, \dots, CheckArguments = TRUE) } \description{ @@ -27,8 +27,13 @@ \item{Weighted}{ If \code{TRUE} (default), the point weights are taken into account for smoothing. } + \item{sigma}{ + The bandwidth used for smoothing. + A Gaussian kernel is used (see \code{\link{Smooth.ppp}}). + Its bandwidth is chosen by default according to Scott's rule (see \code{\link{bw.scott}}). + } \item{Adjust}{ - Force the selected bandwidth (equal to \code{distance}) to be multiplied by \code{Adjust}. Setting it to values greater than one (2 for example) will smooth the estimation. + Force the selected bandwidth (\code{sigma}) to be multiplied by \code{Adjust}. Setting it to values smaller than one (1/2 for example) will sharpen the estimation. } \item{Nbx, Nby}{ The number of columns and rows (pixels) of the resulting map, 128 by default. @@ -44,7 +49,7 @@ } \value{ An image that can be plotted. - If quantiles are computed, attributes "High" and "Low" contain logical vectors to indentify significantly high and low quantiles ( ) + If quantiles have been computed in \code{fvind}, attributes "High" and "Low" contain logical vectors to indentify significantly high and low quantiles. } \examples{ ReferenceType <- "V. Americana" From bccc33f0c43e386b2423dc3ee3344b18c82604ea Mon Sep 17 00:00:00 2001 From: Eric Marcon Date: Fri, 29 Sep 2023 10:55:59 +0200 Subject: [PATCH 17/29] deleted the reference to docsearch --- pkgdown/_pkgdown.yml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/pkgdown/_pkgdown.yml b/pkgdown/_pkgdown.yml index caa9c62..e8e222f 100644 --- a/pkgdown/_pkgdown.yml +++ b/pkgdown/_pkgdown.yml @@ -5,10 +5,6 @@ development: template: bootstrap: 5 - params: - docsearch: - api_key: 34d07893d7877757953d7bd46f236a5e - index_name: dbmss reference: - title: Package summary From 9237f616c5f2c3b4316eba667be6d6983dcce0e2 Mon Sep 17 00:00:00 2001 From: Eric Marcon Date: Fri, 29 Sep 2023 15:53:57 +0200 Subject: [PATCH 18/29] added checked arguments --- R/CheckdbmssArguments.R | 175 +++++++++++++++++++++++++++------------- 1 file changed, 119 insertions(+), 56 deletions(-) diff --git a/R/CheckdbmssArguments.R b/R/CheckdbmssArguments.R index 808c5d1..0fd557c 100644 --- a/R/CheckdbmssArguments.R +++ b/R/CheckdbmssArguments.R @@ -29,6 +29,7 @@ function() { stop("Check the function arguments.", call. = FALSE) } + # Main arguments ---- # Get the point pattern or the Dtable X <- eval(expression(X), parent.frame()) @@ -37,7 +38,6 @@ function() { if (!(inherits(X, "wmppp") | (inherits(X, "Dtable")))) ErrorMessage("X must be of class wmppp or Dtable", X) } - # r if (!is.na(names(Args["r"]))) { r <- eval(expression(r), parent.frame()) @@ -52,7 +52,6 @@ function() { ErrorMessage("successive values of r must be increasing", r) } } - # ReferenceType if (!is.na(names(Args["ReferenceType"]))) { ReferenceType <- eval(expression(ReferenceType), parent.frame()) @@ -80,129 +79,193 @@ function() { } } - # CaseControl - if (!is.na(names(Args["CaseControl"]))) { - CaseControl <- eval(expression(CaseControl), parent.frame()) - if (!is.logical(CaseControl)) - ErrorMessage("CaseControl must be TRUE or FALSE", CaseControl) - } - # Intertype - if (!is.na(names(Args["Intertype"]))) { - Intertype <- eval(expression(Intertype), parent.frame()) - if (!is.logical(Intertype)) - ErrorMessage("Intertype must be TRUE or FALSE", Intertype) - } - # Weighted - if (!is.na(names(Args["Weighted"]))) { - Weighted <- eval(expression(Weighted), parent.frame()) - if (!is.logical(Weighted)) - ErrorMessage("Weighted must be TRUE or FALSE", Weighted) - } - # Original - if (!is.na(names(Args["Original"]))) { - Original <- eval(expression(Original), parent.frame()) - if (!is.logical(Original)) - ErrorMessage("Original must be TRUE or FALSE", Original) - } - - # lambda - if (!is.na(names(Args["lambda"]))) { - lambda <- eval(expression(lambda), parent.frame()) - if (!is.null(lambda)) { - if (!inherits(lambda, "im") & !is.numeric(lambda)) - ErrorMessage("lambda must be an image of class im or a numeric vector", lambda) - } - } - # NumberOfSimulations - if (!is.na(names(Args["NumberOfSimulations"]))) { - NumberOfSimulations <- eval(expression(NumberOfSimulations), parent.frame()) - if (!is.numeric(NumberOfSimulations)) - ErrorMessage("NumberOfSimulations must be a number", NumberOfSimulations) - if (NumberOfSimulations <= 0) - ErrorMessage("NumberOfSimulations must be positive", NumberOfSimulations) - } + # Other arguments ---- # Alpha if (!is.na(names(Args["Alpha"]))) { Alpha <- eval(expression(Alpha), parent.frame()) if (!is.numeric(Alpha)) ErrorMessage("Alpha must be a number", Alpha) + if (length(Alpha) > 1) + ErrorMessage(paste("Alpha must be a single number, not a vector of length", length(Alpha)), Alpha) if (Alpha < 0) ErrorMessage("Alpha must be positive", Alpha) } - # alpha if (!is.na(names(Args["alpha"]))) { alpha <- eval(expression(alpha), parent.frame()) if (!is.numeric(alpha)) ErrorMessage("alpha must be a number", alpha) + if (length(alpha) > 1) + ErrorMessage(paste("alpha must be a single number, not a vector of length", length(alpha)), alpha) if (alpha < 0) ErrorMessage("alpha must be positive", alpha) if (alpha > 1) ErrorMessage("alpha must be less than or equal to 1", alpha) } - # Adjust if (!is.na(names(Args["Adjust"]))) { Adjust <- eval(expression(Adjust), parent.frame()) if (!is.numeric(Adjust)) ErrorMessage("Adjust must be a number", Adjust) + if (length(Adjust) > 1) + ErrorMessage(paste("Adjust must be a single number, not a vector of length", length(Adjust)), Adjust) if (Adjust<=0) ErrorMessage("Adjust must be strictly positive", Adjust) } - # Approximate if (!is.na(names(Args["Approximate"]))) { Approximate <- eval(expression(Approximate), parent.frame()) if (!is.numeric(Approximate)) ErrorMessage("Approximate must be a number", Approximate) + if (length(Approximate) > 1) + ErrorMessage(paste("Approximate must be a single number, not a vector of length", length(Approximate)), Approximate) if (Approximate < 0) ErrorMessage("Approximate must be positive", Approximate) } - - # StartFromMinR - if (!is.na(names(Args["StartFromMinR"]))) { - StartFromMinR <- eval(expression(StartFromMinR), parent.frame()) - if (!is.logical(StartFromMinR)) - ErrorMessage("StartFromMinR must be TRUE or FALSE", StartFromMinR) + # CaseControl + if (!is.na(names(Args["CaseControl"]))) { + CaseControl <- eval(expression(CaseControl), parent.frame()) + if (!is.logical(CaseControl)) + ErrorMessage("CaseControl must be TRUE or FALSE", CaseControl) + } + # distance + if (!is.na(names(Args["distance"]))) { + distance <- eval(expression(distance), parent.frame()) + if (!is.numeric(distance)) + ErrorMessage("distance must be a number", distance) + if (length(distance) > 1) + ErrorMessage(paste("distance must be a single number, not a vector of length", length(distance)), distance) + if (distance < 0) + ErrorMessage("distance must be positive", distance) + } + # fvind + if (!is.na(names(Args["fvind"]))) { + fvind <- eval(expression(fvind), parent.frame()) + if (!(inherits(fvind, "fv"))) + ErrorMessage("fvind must be of class fv", fvind) } - # Individual if (!is.na(names(Args["Individual"]))) { Individual <- eval(expression(Individual), parent.frame()) if (!is.logical(Individual)) ErrorMessage("Individual must be TRUE or FALSE", Individual) } - + # Intertype + if (!is.na(names(Args["Intertype"]))) { + Intertype <- eval(expression(Intertype), parent.frame()) + if (!is.logical(Intertype)) + ErrorMessage("Intertype must be TRUE or FALSE", Intertype) + } + # lambda + if (!is.na(names(Args["lambda"]))) { + lambda <- eval(expression(lambda), parent.frame()) + if (!is.null(lambda)) { + if (!inherits(lambda, "im") & !is.numeric(lambda)) + ErrorMessage("lambda must be an image of class im or a numeric vector", lambda) + } + } + # NumberOfSimulations + if (!is.na(names(Args["NumberOfSimulations"]))) { + NumberOfSimulations <- eval(expression(NumberOfSimulations), parent.frame()) + if (!is.numeric(NumberOfSimulations)) + ErrorMessage("NumberOfSimulations must be a number", NumberOfSimulations) + if (length(NumberOfSimulations) > 1) + ErrorMessage(paste("NumberOfSimulations must be a single number, not a vector of length", length(NumberOfSimulations)), NumberOfSimulations) + if (NumberOfSimulations <= 0) + ErrorMessage("NumberOfSimulations must be positive", NumberOfSimulations) + } + # Nbx + if (!is.na(names(Args["Nbx"]))) { + Nbx <- eval(expression(Nbx), parent.frame()) + if (!is.numeric(Nbx)) + ErrorMessage("Nbx must be a number", Nbx) + if (length(Nbx) > 1) + ErrorMessage(paste("Nbx must be a single number, not a vector of length", length(Nbx)), Nbx) + if (Nbx < 0) + ErrorMessage("Nbx must be positive", Nbx) + } + # Nby + if (!is.na(names(Args["Nby"]))) { + Nby <- eval(expression(Nby), parent.frame()) + if (!is.numeric(Nby)) + ErrorMessage("Nby must be a number", Nby) + if (length(Nby) > 1) + ErrorMessage(paste("Nby must be a single number, not a vector of length", length(Nby)), Nby) + if (Nby < 0) + ErrorMessage("Nby must be positive", Nby) + } + # Original + if (!is.na(names(Args["Original"]))) { + Original <- eval(expression(Original), parent.frame()) + if (!is.logical(Original)) + ErrorMessage("Original must be TRUE or FALSE", Original) + } # Precision if (!is.na(names(Args["Precision"]))) { Precision <- eval(expression(Precision), parent.frame()) if (!is.numeric(Precision)) ErrorMessage("Precision must be a number", Precision) + if (length(Precision) > 1) + ErrorMessage(paste("Precision must be a single number, not a vector of length", length(Precision)), Precision) if (Precision < 0) ErrorMessage("Precision must be positive", Precision) } - + # Quantiles + if (!is.na(names(Args["Quantiles"]))) { + Quantiles <- eval(expression(Quantiles), parent.frame()) + if (!is.logical(Quantiles)) + ErrorMessage("Quantiles must be TRUE or FALSE", Quantiles) + } # ReferencePoint if (!is.na(names(Args["ReferencePoint"]))) { ReferencePoint <- eval(expression(ReferencePoint), parent.frame()) if (!is.null(ReferencePoint)) { if (!is.numeric(ReferencePoint)) ErrorMessage("ReferencePoint must be a number", ReferencePoint) + if (length(ReferencePoint) > 1) + ErrorMessage(paste("ReferencePoint must be a single number, not a vector of length", length(ReferencePoint)), ReferencePoint) if (ReferencePoint <= 0) ErrorMessage("ReferencePoint must be positive", ReferencePoint) if (as.integer(ReferencePoint) != ReferencePoint) ErrorMessage("ReferencePoint must be an integer", ReferencePoint) } } - # show.window if (!is.na(names(Args["show.window"]))) { show.window <- eval(expression(show.window), parent.frame()) if (!is.logical(show.window)) ErrorMessage("show.window must be TRUE or FALSE", show.window) } + # sigma + if (!is.na(names(Args["sigma"]))) { + sigma <- eval(expression(sigma), parent.frame()) + if (!is.numeric(sigma)) + ErrorMessage("sigma must be a number", sigma) + if (length(sigma) > 1) + ErrorMessage(paste("sigma must be a single number, not a vector of length", length(sigma)), sigma) + if (sigma < 0) + ErrorMessage("sigma must be positive", sigma) + } + # StartFromMinR + if (!is.na(names(Args["StartFromMinR"]))) { + StartFromMinR <- eval(expression(StartFromMinR), parent.frame()) + if (!is.logical(StartFromMinR)) + ErrorMessage("StartFromMinR must be TRUE or FALSE", StartFromMinR) + } + # verbose + if (!is.na(names(Args["verbose"]))) { + verbose <- eval(expression(verbose), parent.frame()) + if (!is.logical(verbose)) + ErrorMessage("verbose must be TRUE or FALSE", verbose) + } + # Weighted + if (!is.na(names(Args["Weighted"]))) { + Weighted <- eval(expression(Weighted), parent.frame()) + if (!is.logical(Weighted)) + ErrorMessage("Weighted must be TRUE or FALSE", Weighted) + } return (TRUE) } From 0b9b561f61153c6da7c908df0d572750b59f7145 Mon Sep 17 00:00:00 2001 From: Eric Marcon Date: Sat, 9 Dec 2023 10:11:33 +0100 Subject: [PATCH 19/29] archived kwmppp() --- .#Extra/kwmppp/ReadMe.txt | 0 {R => .#Extra/kwmppp}/kwmppp.R | 0 {man => .#Extra/kwmppp}/kwmppp.Rd | 0 DESCRIPTION | 4 +--- 4 files changed, 1 insertion(+), 3 deletions(-) create mode 100644 .#Extra/kwmppp/ReadMe.txt rename {R => .#Extra/kwmppp}/kwmppp.R (100%) rename {man => .#Extra/kwmppp}/kwmppp.Rd (100%) diff --git a/.#Extra/kwmppp/ReadMe.txt b/.#Extra/kwmppp/ReadMe.txt new file mode 100644 index 0000000..e69de29 diff --git a/R/kwmppp.R b/.#Extra/kwmppp/kwmppp.R similarity index 100% rename from R/kwmppp.R rename to .#Extra/kwmppp/kwmppp.R diff --git a/man/kwmppp.Rd b/.#Extra/kwmppp/kwmppp.Rd similarity index 100% rename from man/kwmppp.Rd rename to .#Extra/kwmppp/kwmppp.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 5c14c97..5f7bb28 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -18,8 +18,7 @@ Depends: R (>= 3.5.0), Rcpp (>= 0.12.14), spatstat.explore -Imports: - automap, +Imports: cubature, dplyr, ggplot2, @@ -31,7 +30,6 @@ Imports: tibble, tidyr, tidyselect, - sp, spatstat.geom, spatstat.random Suggests: From 689e2dcb552d69eba7f6842fbf9b7c1a797cee37 Mon Sep 17 00:00:00 2001 From: Eric Marcon Date: Sat, 9 Dec 2023 10:27:33 +0100 Subject: [PATCH 20/29] referenced Marcon and Puech (2023) --- man/Smooth.wmppp.Rd | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/man/Smooth.wmppp.Rd b/man/Smooth.wmppp.Rd index 81c6d11..285a49e 100644 --- a/man/Smooth.wmppp.Rd +++ b/man/Smooth.wmppp.Rd @@ -7,7 +7,7 @@ Nbx = 128, Nby = 128, \dots, CheckArguments = TRUE) } \description{ - Performs spatial smoothing of the individual values of distance-based measures computed in the neighborhood of each point. + Performs spatial smoothing of the individual values of distance-based measures computed in the neighborhood of each point (Marcon and Puech, 2023). } \arguments{ \item{X}{ @@ -51,6 +51,9 @@ An image that can be plotted. If quantiles have been computed in \code{fvind}, attributes "High" and "Low" contain logical vectors to indentify significantly high and low quantiles. } +\references{ + Marcon, E. and Puech, F. (2023). Mapping distributions in non-homogeneous space with distance-based methods. \emph{Journal of Spatial Econometrics} 4(1), 13. +} \examples{ ReferenceType <- "V. Americana" NeighborType <- "Q. Rosea" From 3b5eba2f10373762a0a940916932e5d8f08263a1 Mon Sep 17 00:00:00 2001 From: Eric Marcon Date: Sat, 9 Dec 2023 10:32:53 +0100 Subject: [PATCH 21/29] delete useless code swmppp() was renamed kwmppp() and archived --- R/swmppp.R | 61 ------------------------------------------------------ 1 file changed, 61 deletions(-) delete mode 100644 R/swmppp.R diff --git a/R/swmppp.R b/R/swmppp.R deleted file mode 100644 index b23538e..0000000 --- a/R/swmppp.R +++ /dev/null @@ -1,61 +0,0 @@ -swmppp <- - function (X, fvind, ReferenceType = "", distance = stats::median(fvind$r), - AllowJitter = TRUE, Nbx = 128, Nby = 128, - Adjust = 1, CheckArguments = TRUE) -{ - if (CheckArguments) - CheckdbmssArguments() - - # Reduce the point pattern to the reference type - if (ReferenceType != "") { - is_ReferenceType <- X$marks$PointType == ReferenceType - X <- X[is_ReferenceType] - } - - # Check the consistency between X and fvind - if (X$n != sum(startsWith(colnames(fvind), paste0(attr(fvind, "valu"), "_")))) - stop(paste("The number of reference points in the function value is different from \n", - "that of the reference points of the spatialized community")) - - # Jitter - if (AllowJitter) { - # Find duplicates - Dups <- spatstat.geom::duplicated.ppp(X, rule="unmark") - if (sum(Dups)>0) { - # Extract the duplicates and jitter them - Dupswmppp <- spatstat.geom::rjitter(X[Dups]) - # Put the coordinates back into the original wmppp - X$x[Dups] <- Dupswmppp$x - X$y[Dups] <- Dupswmppp$y - } - } - - # Find the max r value of fvind lower than or equal to argument distance - r_to_plot <- max(fvind$r[fvind$r<=distance]) - - # Format the value to plot - df <- as.data.frame(fvind) - # Pivot longer. Columns are named M_1, etc. for function M - df <- tidyr::pivot_longer( - df, - cols = tidyselect::starts_with(paste0(attr(fvind, "valu"),"_")), - names_to = "point", - values_to = "dbmss" - ) - # Filter the appropriate distance - df <- dplyr::filter( - df, - .data$r == r_to_plot - ) - # Select the function value column only - df <- dplyr::select( - df, - .data$dbmss - ) - # Make it the marks of X - X$marks <- df - - # Smoothed value of the dbmss. - # Bandwidth is r/2 so that 95% of the weight is each point's neighborhood - return(spatstat.explore::Smooth.ppp(X, sigma = r_to_plot/2, adjust = Adjust)) -} From dec0e777412ae940e93e9759528e475f394522db Mon Sep 17 00:00:00 2001 From: Eric Marcon Date: Mon, 11 Dec 2023 15:26:35 +0100 Subject: [PATCH 22/29] archived kwmppp() --- .#Extra/kwmppp/ReadMe.txt | 13 +++++++++++++ DESCRIPTION | 3 --- NAMESPACE | 2 -- 3 files changed, 13 insertions(+), 5 deletions(-) diff --git a/.#Extra/kwmppp/ReadMe.txt b/.#Extra/kwmppp/ReadMe.txt index e69de29..5f56672 100644 --- a/.#Extra/kwmppp/ReadMe.txt +++ b/.#Extra/kwmppp/ReadMe.txt @@ -0,0 +1,13 @@ +kmwmppp() is the abandonned alternative to Smooth.ppp(). + +It relies on ordinary kriging instead of smoothing to produce maps of dbmss individual values. + +It requires importing + automap, + dplyr, + sp, + tidyr, + tidyselect, +in DESCRIPTION and exporting kwmppp() and plot.kwmppp(): + export("kwmppp") + S3method("plot", "kwmppp") diff --git a/DESCRIPTION b/DESCRIPTION index 5f7bb28..6e4c486 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -20,7 +20,6 @@ Depends: spatstat.explore Imports: cubature, - dplyr, ggplot2, RcppParallel, reshape2, @@ -28,8 +27,6 @@ Imports: spatstat.utils, stats, tibble, - tidyr, - tidyselect, spatstat.geom, spatstat.random Suggests: diff --git a/NAMESPACE b/NAMESPACE index f2fe804..9b1e3a2 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -30,7 +30,6 @@ export("Kinhomhat") export("KmmEnvelope") export("Kmmhat") export("Ktest") -export("kwmppp") export("LEnvelope") export("Lhat") export("LmmEnvelope") @@ -55,7 +54,6 @@ S3method("autoplot", "envelope") S3method("autoplot", "fv") S3method("autoplot", "wmppp") S3method("envelope", "Dtable") -S3method("plot", "kwmppp") S3method("print", "dbmssEnvelope") S3method("sharpen", "wmppp") S3method("Smooth", "wmppp") From b1ce337f3eb63af5f42938a467ae754e81fb623e Mon Sep 17 00:00:00 2001 From: Eric Marcon Date: Mon, 11 Dec 2023 15:38:06 +0100 Subject: [PATCH 23/29] archived local CI --- .#Extra/Local CI/Mhat.R | 194 ++++++++++++++++++++++++++++ .#Extra/Local CI/Mhat.Rd | 81 ++++++++++++ .#Extra/Local CI/ReadMe.txt | 8 ++ .#Extra/Local CI/rRandomLocation.R | 63 +++++++++ .#Extra/Local CI/rRandomLocation.Rd | 57 ++++++++ R/Mhat.R | 113 ++-------------- R/rRandomLocation.R | 22 +--- man/Mhat.Rd | 21 +-- man/rRandomLocation.Rd | 9 +- 9 files changed, 418 insertions(+), 150 deletions(-) create mode 100644 .#Extra/Local CI/Mhat.R create mode 100644 .#Extra/Local CI/Mhat.Rd create mode 100644 .#Extra/Local CI/ReadMe.txt create mode 100644 .#Extra/Local CI/rRandomLocation.R create mode 100644 .#Extra/Local CI/rRandomLocation.Rd diff --git a/.#Extra/Local CI/Mhat.R b/.#Extra/Local CI/Mhat.R new file mode 100644 index 0000000..407a69a --- /dev/null +++ b/.#Extra/Local CI/Mhat.R @@ -0,0 +1,194 @@ +Mhat <- +function(X, r = NULL, ReferenceType, NeighborType = ReferenceType, + CaseControl = FALSE, Individual = FALSE, ReferencePoint = NULL, + Quantiles = FALSE, NumberOfSimulations = 100, Alpha = 0.05, + verbose = interactive(), CheckArguments = TRUE) +{ + # Eliminate erroneous configurations + if (CheckArguments) { + CheckdbmssArguments() + if (CaseControl & (ReferenceType==NeighborType)) { + warning("Cases and controls are identical.") + return(rep(1,length(r))) + } + if (Quantiles & !Individual) + stop("Quantiles can't be TRUE if Individual is FALSE.") + } + + # Default r values: 64 values up to half the max distance + if (is.null(r)) { + if (inherits(X, "Dtable")) { + # Dtable case + rMax <- max(X$Dmatrix) + } else { + # wmppp case + rMax <- diameter(X$window) + } + r <- rMax*c(0, 1:20, seq(22, 40, 2), seq(45, 100,5), seq(110, 200, 10), seq(220, 400, 20))/800 + } + + # Vectors to recognize point types + IsReferenceType <- X$marks$PointType==ReferenceType + IsNeighborType <- X$marks$PointType==NeighborType + + if (!is.null(ReferencePoint)) { + # Set individual to TRUE if a refernce point is given + Individual <- TRUE + if (IsReferenceType[ReferencePoint]) { + # Remember the name of the reference point in the dataset of reference type + ReferencePoint_name <- row.names(X$marks[ReferencePoint, ]) + } else { + # The reference point must be in the reference type + stop("The reference point must be of the reference point type.") + } + } + + # Global ratio + if (ReferenceType==NeighborType | CaseControl) { + WrMinusReferencePoint <- sum(X$marks$PointWeight[IsReferenceType])-X$marks$PointWeight + Wn <- WrMinusReferencePoint[IsReferenceType] + } else { + Wn <- sum(X$marks$PointWeight[IsNeighborType]) + } + if (CaseControl) { + Wa <- sum(X$marks$PointWeight[IsNeighborType]) + } else { + WaMinusReferencePoint <- sum(X$marks$PointWeight)-X$marks$PointWeight + Wa <- WaMinusReferencePoint[IsReferenceType] + } + GlobalRatio <- Wn/Wa + + Nr <- length(r) + # Neighborhoods (i.e. all neighbors of a point less than a distance apart) + # Store weights of neighbors of interest in first Nr columns, all points from Nr+1 to 2*Nr + + # Call C routine to fill Nbd + if (CaseControl) { + if (inherits(X, "Dtable")) { + # Dtable case + Nbd <- parallelCountNbdDtCC(r, X$Dmatrix, X$marks$PointWeight, IsReferenceType, IsNeighborType) + } else { + # wmppp case + Nbd <- parallelCountNbdCC(r, X$x, X$y, X$marks$PointWeight, IsReferenceType, IsNeighborType) + } + } else { + if (inherits(X, "Dtable")) { + # Dtable case + Nbd <- parallelCountNbdDt(r, X$Dmatrix, X$marks$PointWeight, IsReferenceType, IsNeighborType) + } else { + # wmppp case + Nbd <- parallelCountNbd(r, X$x, X$y, X$marks$PointWeight, IsReferenceType, IsNeighborType) + } + } + + # Cumulate weights up to each distance + NbdInt <- t(apply(Nbd[, seq_len(Nr)], 1, cumsum)) + NbdAll <- t(apply(Nbd[, (Nr + 1):(2 * Nr)], 1, cumsum)) + + # Calculate the ratio of points of interest around each point + LocalRatio <- NbdInt/NbdAll + + if (is.null(ReferencePoint)) { + # Divide it by the global ratio. Ignore points with no neighbor at all. + Mvalues <- apply(LocalRatio, 2, function(x) sum(x[is.finite(x)])/sum(GlobalRatio[is.finite(x)])) + # Keep individual values + if (Individual) { + Mvalues <- cbind(Mvalues, t(LocalRatio/GlobalRatio)) + } + } else { + # Find the reference point in the set of points of the reference type + ReferencePoint_index <- which(rownames(X[IsReferenceType]$marks) == ReferencePoint_name) + # Only keep the value of the reference point + Mvalues <- LocalRatio[ReferencePoint_index, ]/GlobalRatio[ReferencePoint_index] + } + + # Put the results into an fv object + MEstimate <- data.frame(r, rep(1, length(r)), Mvalues) + ColNames <- c("r", "theo", "M") + Labl <- c("r", "%s[theo](r)", "hat(%s)(r)") + Desc <- c("Distance argument r", "Theoretical independent %s", "Estimated %s") + if (Individual & is.null(ReferencePoint)) { + # ColNumbers will usually be line numbers of the marks df, but may be real names. + ColNumbers <- row.names(X$marks[IsReferenceType, ]) + ColNames <- c(ColNames, paste("M", ColNumbers, sep="_")) + Labl <- c(Labl, paste("hat(%s)[", ColNumbers, "](r)", sep="")) + Desc <- c(Desc, paste("Individual %s around point", ColNumbers)) + } + colnames(MEstimate) <- ColNames + + # Make an fv object + M <- fv(MEstimate, argu="r", ylab=quote(M(r)), valu="M", + fmla= "cbind(M,theo)~r", alim=c(0, max(r)), labl=Labl, + desc=Desc, unitname=X$window$unit, fname="M") + fvnames(M, ".") <- ColNames[-1] + + + # Calculate the quantiles of the individual values with respect to the null hypothesis + if (Quantiles) { + # Run Monte Carlo simulations of Mhat for each reference point + nReferencePoints <- sum(IsReferenceType) + # Prepare a matrix to save quantiles + MQuantiles <- matrix(0, nrow = length(r), ncol = nReferencePoints) + colnames(MQuantiles) <- paste("M", ColNumbers, sep="_") + rownames(MQuantiles) <- r + if (verbose) ProgressBar <- utils::txtProgressBar(min = 0, max = nReferencePoints) + for (i in seq_len(nReferencePoints)) { + # Null hypothesis + SimulatedPP <- expression( + rRandomLocation( + X, + ReferencePoint = which(X$marks$PointType == ReferenceType)[i], + CheckArguments = FALSE + ) + ) + # Compute the simulations. The envelope is useless: we need the simulated values. + Envelope <- envelope( + # The value Mhat(X) is not used but the point #1 must be of ReferenceType + # so retain points of ReferenceType only to save time + X[X$marks$PointType == ReferenceType], + fun = Mhat, + nsim = NumberOfSimulations, + # nrank may be any value because the envelope is not used + nrank = 1, + # Arguments for Mhat() + r = r, + ReferenceType = ReferenceType, + NeighborType = NeighborType, + CaseControl = CaseControl, + Individual = TRUE, + # The reference point is always 1 after rRandomLocation() + ReferencePoint = 1, + CheckArguments = FALSE, + # Arguments for envelope() + simulate = SimulatedPP, + # Do not show the progress + verbose = FALSE, + # Save individual simulations into attribute simfuns + savefuns = TRUE + ) + # Get the distribution of simulated values (eliminate the "r" column). + Simulations <- as.matrix(attr(Envelope, "simfuns"))[, -1] + # Calculate the quantiles of observed values + for (r_i in seq_along(r)) { + if (any(!is.na(Simulations[r_i, ]))) { + # Check that at least one simulated value is not NaN so that ecdf() works. + MQuantiles[r_i, i] <- stats::ecdf(Simulations[r_i, ])(Mvalues[r_i, i]) + } else { + MQuantiles[r_i, i] <- NaN + } + } + # Progress bar + if (verbose) utils::setTxtProgressBar(ProgressBar, i) + } + if (verbose) close(ProgressBar) + # Save the quantiles as an attribute of the fv + attr(M, "Quantiles") <- MQuantiles + attr(M, "Alpha") <- Alpha + } + + if (Individual & is.null(ReferencePoint)) { + # Save the reference type for future smoothing + attr(M, "ReferenceType") <- ReferenceType + } + return(M) +} diff --git a/.#Extra/Local CI/Mhat.Rd b/.#Extra/Local CI/Mhat.Rd new file mode 100644 index 0000000..c1e2a01 --- /dev/null +++ b/.#Extra/Local CI/Mhat.Rd @@ -0,0 +1,81 @@ +\name{Mhat} +\alias{Mhat} +\title{ + Estimation of the M function +} +\description{ + Estimates the \emph{M} function +} +\usage{ +Mhat(X, r = NULL, ReferenceType, NeighborType = ReferenceType, + CaseControl = FALSE, Individual = FALSE, ReferencePoint = NULL, + Quantiles = FALSE, NumberOfSimulations = 100, Alpha = 0.05, + verbose = interactive(), CheckArguments = TRUE) +} +\arguments{ + \item{X}{ + A weighted, marked planar point pattern (\code{\link{wmppp.object}}) or a \code{\link{Dtable}} object. + } + \item{r}{ + A vector of distances. If \code{NULL}, a default value is set: 64 unequally spaced values are used up to half the maximum distance between points \eqn{d_m}. The first value is 0, first steps are small (\eqn{d_m/800}) then increase progressively up to \eqn{d_m/40}. + } + \item{ReferenceType}{ + One of the point types. + } + \item{ReferencePoint}{ + The index of one of the points, i.e. an integer number between 1 and the number of points of the point pattern. + } + \item{NeighborType}{ + One of the point types. By default, the same as reference type. + } + \item{CaseControl}{ + Logical; if \code{TRUE}, the case-control version of \emph{M} is computed. \emph{ReferenceType} points are cases, \emph{NeighborType} points are controls. + } + \item{Individual}{ + Logical; if \code{TRUE}, values of the function around each individual point are returned. + } + \item{Quantiles}{ + If \code{TRUE}, Monte-Carlo simulations are run to obtain the distribution of each individual \emph{M} value under the null hypothesis of random location of points (i.e. all points except for the reference point are relocated). + } + \item{NumberOfSimulations}{ + The number of simulations to run, 100 by default. + } + \item{Alpha}{ + The risk level, 5\% by default. + } + \item{verbose}{ + Logical; if \code{TRUE}, print progress reports during the simulations. + } + \item{CheckArguments}{ + Logical; if \code{TRUE}, the function arguments are verified. Should be set to \code{FALSE} to save time in simulations for example, when the arguments have been checked elsewhere. + } +} +\details{ + \emph{M} is a weighted, cumulative, relative measure of a point pattern structure. Its value at any distance is the ratio of neighbors of the \emph{NeighborType} to all points around \emph{ReferenceType} points, normalized by its value over the windows. + + If \emph{CaseControl} is \code{TRUE}, then \emph{ReferenceType} points are cases and \emph{NeighborType} points are controls. The univariate concentration of cases is calculated as if \emph{NeighborType} was equal to \emph{ReferenceType}, but only controls are considered when counting all points around cases (Marcon et al., 2012). This makes sense when the sampling design is such that all points of \emph{ReferenceType} (the cases) but only a sample of the other points (the controls) are recorded. Then, the whole distribution of points is better represented by the controls alone. + + If \emph{Individual} is \code{TRUE}, then the individual values \emph{M} in the neighborhood of each point are returned. If \emph{ReferencePoint} is also specified, then \emph{only} the individual value of M around the reference point is returned. +} +\value{ + An object of class \code{fv}, see \code{\link{fv.object}}, which can be plotted directly using \code{\link{plot.fv}}. + + If \code{Individual} is set to \code{TRUE}, the object also contains the value of the function around each individual \emph{ReferenceType} point taken as the only reference point. The column names of the \code{fv} are "M_" followed by the point names, i.e. the row names of the marks of the point pattern. +} +\references{ + Marcon, E. and Puech, F. (2010). Measures of the Geographic Concentration of Industries: Improving Distance-Based Methods. \emph{Journal of Economic Geography} 10(5): 745-762. + + Marcon, E., F. Puech and S. Traissac (2012). Characterizing the relative spatial structure of point patterns. \emph{International Journal of Ecology} 2012(Article ID 619281): 11. + + Marcon, E., and Puech, F. (2017). A Typology of Distance-Based Measures of Spatial Concentration. \emph{Regional Science and Urban Economics} 62:56-67 +} +\seealso{ + \code{\link{MEnvelope}}, \code{\link{Kdhat}} +} +\examples{ +data(paracou16) +autoplot(paracou16) + +# Calculate M +autoplot(Mhat(paracou16, , "V. Americana", "Q. Rosea")) +} diff --git a/.#Extra/Local CI/ReadMe.txt b/.#Extra/Local CI/ReadMe.txt new file mode 100644 index 0000000..f055c31 --- /dev/null +++ b/.#Extra/Local CI/ReadMe.txt @@ -0,0 +1,8 @@ +# MHat + +If Quantiles is TRUE, Monte-Carlo simulations are run to obtain the distribution of each individual M value under the null hypothesis of random location of points (i.e. all points except for the reference point are relocated). + + +# rRandomLocation + +If ReferencePoint is not NULL, other points are shuffled but the reference point is kept untouched to compute the CI of its local M value. diff --git a/.#Extra/Local CI/rRandomLocation.R b/.#Extra/Local CI/rRandomLocation.R new file mode 100644 index 0000000..3f0f085 --- /dev/null +++ b/.#Extra/Local CI/rRandomLocation.R @@ -0,0 +1,63 @@ +rRandomLocation <- +function(X, ReferenceType = "", ReferencePoint = NULL, CheckArguments = TRUE) { + + if (CheckArguments) + CheckdbmssArguments() + + if (inherits(X, "Dtable")) { + # Dtable case + Index <- seq_along(X$marks$PointType) + if (ReferenceType != "") { + # Retain a single point type + ReferencePoints <- X$marks$PointType==ReferenceType + # Randomize the reference points + RandomizedReferences <- sample(Index[ReferencePoints]) + # Replace randomized elements in the index + i <- o <- 1 + while (i <= length(X$marks$PointType)) + { + if (ReferencePoints[i]) { + Index[i] <- RandomizedReferences[o] + o <- o+1 + } + i <- i+1 + } + } else { + Index <- sample(Index) + } + # Apply the randomization to PointType and PointWeight + X$marks$PointType <- X$marks$PointType[Index] + X$marks$PointWeight <- X$marks$PointWeight[Index] + return(X) + } else { + # wmppp case + if (!is.null(ReferencePoint)) { + # The reference point must be < than the number of points + if (ReferencePoint > X$n) { + stop("The number of the reference point must be smaller than the number of points in the point pattern.") + } + # The reference point must belong to the reference point type + if (ReferenceType != "") { + if (X$marks$PointType[ReferencePoint] != ReferenceType) { + stop("The reference point must be of the reference point type.") + } + } + # Save the reference point + ReferencePoint_ppp <- X[ReferencePoint] + X <- X[-ReferencePoint] + } + if (ReferenceType != "") { + # Retain a single point type + X.reduced <- X[X$marks$PointType == ReferenceType] + RandomizedX <- rlabel(X.reduced) + } else { + RandomizedX <- rlabel(X) + } + if (!is.null(ReferencePoint)) { + # Restore the reference point with index 1 + RandomizedX <- superimpose(ReferencePoint_ppp, RandomizedX) + } + class(RandomizedX) <- c("wmppp", "ppp") + return (RandomizedX) + } +} diff --git a/.#Extra/Local CI/rRandomLocation.Rd b/.#Extra/Local CI/rRandomLocation.Rd new file mode 100644 index 0000000..381c058 --- /dev/null +++ b/.#Extra/Local CI/rRandomLocation.Rd @@ -0,0 +1,57 @@ +\name{rRandomLocation} +\alias{rRandomLocation} +\title{ + Simulations of a point pattern according to the null hypothesis of random location +} +\description{ + Simulates of a point pattern according to the null hypothesis of random location. +} +\usage{ +rRandomLocation(X, ReferenceType = "", ReferencePoint = NULL, CheckArguments = TRUE) +} +\arguments{ + \item{X}{ + A weighted, marked, planar point pattern (\code{\link{wmppp.object}}). + } + \item{ReferenceType}{ + One of the point types. + } + \item{ReferencePoint}{ + The index of one of the points, i.e. an integer number between 1 and the number of points of the point pattern. + The reference point kept untouched during simulations. + } + \item{CheckArguments}{ + Logical; if \code{TRUE}, the function arguments are verified. Should be set to \code{FALSE} to save time in simulations for example, when the arguments have been checked elsewhere. + } +} +\details{ + Points are redistributed randomly across the locations of the original point pattern. This randomization is equivalent to random labeling, considering the label is both point type and point weight. + If \code{ReferenceType} is specified, then only reference type points are kept in the original point pattern before randomization. + If \code{ReferencePoint} is specified, then this point is not modified in all simulations. +} +\value{ + A new weighted, marked, planar point pattern (an object of class \code{wmppp}, see \code{\link{wmppp.object}}). +} +\references{ + Duranton, G. and Overman, H. G. (2005). Testing for Localisation Using Micro-Geographic Data. \emph{Review of Economic Studies} 72(4): 1077-1106. + + Marcon, E. and Puech, F. (2010). Measures of the Geographic Concentration of Industries: Improving Distance-Based Methods. \emph{Journal of Economic Geography} 10(5): 745-762. +} +\seealso{ + \code{\link{rRandomPositionK}} +} +\examples{ +# Simulate a point pattern with five types +X <- rpoispp(50) +PointType <- sample(c("A", "B", "C", "D", "E"), X$n, replace=TRUE) +PointWeight <- runif(X$n, min=1, max=10) +X$marks <- data.frame(PointType, PointWeight) +X <- as.wmppp(X) + +autoplot(X, main="Original pattern") + +# Randomize it +Y <- rRandomLocation(X) +# Points have been redistributed randomly across locations +autoplot(Y, main="Randomized pattern") +} diff --git a/R/Mhat.R b/R/Mhat.R index e7f2e14..cb25de2 100644 --- a/R/Mhat.R +++ b/R/Mhat.R @@ -1,8 +1,6 @@ Mhat <- function(X, r = NULL, ReferenceType, NeighborType = ReferenceType, - CaseControl = FALSE, Individual = FALSE, ReferencePoint = NULL, - Quantiles = FALSE, NumberOfSimulations = 100, Alpha = 0.05, - verbose = interactive(), CheckArguments = TRUE) + CaseControl = FALSE, Individual = FALSE, CheckArguments = TRUE) { # Eliminate erroneous configurations if (CheckArguments) { @@ -11,8 +9,6 @@ function(X, r = NULL, ReferenceType, NeighborType = ReferenceType, warning("Cases and controls are identical.") return(rep(1,length(r))) } - if (Quantiles & !Individual) - stop("Quantiles can't be TRUE if Individual is FALSE.") } # Default r values: 64 values up to half the max distance @@ -31,18 +27,6 @@ function(X, r = NULL, ReferenceType, NeighborType = ReferenceType, IsReferenceType <- X$marks$PointType==ReferenceType IsNeighborType <- X$marks$PointType==NeighborType - if (!is.null(ReferencePoint)) { - # Set individual to TRUE if a refernce point is given - Individual <- TRUE - if (IsReferenceType[ReferencePoint]) { - # Remember the name of the reference point in the dataset of reference type - ReferencePoint_name <- row.names(X$marks[ReferencePoint, ]) - } else { - # The reference point must be in the reference type - stop("The reference point must be of the reference point type.") - } - } - # Global ratio if (ReferenceType==NeighborType | CaseControl) { WrMinusReferencePoint <- sum(X$marks$PointWeight[IsReferenceType])-X$marks$PointWeight @@ -83,23 +67,15 @@ function(X, r = NULL, ReferenceType, NeighborType = ReferenceType, # Cumulate weights up to each distance NbdInt <- t(apply(Nbd[, seq_len(Nr)], 1, cumsum)) - NbdAll <- t(apply(Nbd[, (Nr + 1):(2 * Nr)], 1, cumsum)) + NbdAll <- t(apply(Nbd[, (Nr+1):(2*Nr)], 1, cumsum)) # Calculate the ratio of points of interest around each point LocalRatio <- NbdInt/NbdAll - - if (is.null(ReferencePoint)) { - # Divide it by the global ratio. Ignore points with no neighbor at all. - Mvalues <- apply(LocalRatio, 2, function(x) sum(x[is.finite(x)])/sum(GlobalRatio[is.finite(x)])) - # Keep individual values - if (Individual) { - Mvalues <- cbind(Mvalues, t(LocalRatio/GlobalRatio)) - } - } else { - # Find the reference point in the set of points of the reference type - ReferencePoint_index <- which(rownames(X[IsReferenceType]$marks) == ReferencePoint_name) - # Only keep the value of the reference point - Mvalues <- LocalRatio[ReferencePoint_index, ]/GlobalRatio[ReferencePoint_index] + # Divide it by the global ratio. Ignore points with no neighbor at all. + Mvalues <- apply(LocalRatio, 2, function(x) sum(x[is.finite(x)])/sum(GlobalRatio[is.finite(x)])) + # Keep individual values + if (Individual) { + Mvalues <- cbind(Mvalues, t(LocalRatio/GlobalRatio)) } # Put the results into an fv object @@ -107,7 +83,7 @@ function(X, r = NULL, ReferenceType, NeighborType = ReferenceType, ColNames <- c("r", "theo", "M") Labl <- c("r", "%s[theo](r)", "hat(%s)(r)") Desc <- c("Distance argument r", "Theoretical independent %s", "Estimated %s") - if (Individual & is.null(ReferencePoint)) { + if (Individual) { # ColNumbers will usually be line numbers of the marks df, but may be real names. ColNumbers <- row.names(X$marks[IsReferenceType, ]) ColNames <- c(ColNames, paste("M", ColNumbers, sep="_")) @@ -116,79 +92,10 @@ function(X, r = NULL, ReferenceType, NeighborType = ReferenceType, } colnames(MEstimate) <- ColNames - # Make an fv object + # Return the values of M(r) M <- fv(MEstimate, argu="r", ylab=quote(M(r)), valu="M", fmla= "cbind(M,theo)~r", alim=c(0, max(r)), labl=Labl, desc=Desc, unitname=X$window$unit, fname="M") fvnames(M, ".") <- ColNames[-1] - - - # Calculate the quantiles of the individual values with respect to the null hypothesis - if (Quantiles) { - # Run Monte Carlo simulations of Mhat for each reference point - nReferencePoints <- sum(IsReferenceType) - # Prepare a matrix to save quantiles - MQuantiles <- matrix(0, nrow = length(r), ncol = nReferencePoints) - colnames(MQuantiles) <- paste("M", ColNumbers, sep="_") - rownames(MQuantiles) <- r - if (verbose) ProgressBar <- utils::txtProgressBar(min = 0, max = nReferencePoints) - for (i in seq_len(nReferencePoints)) { - # Null hypothesis - SimulatedPP <- expression( - rRandomLocation( - X, - ReferencePoint = which(X$marks$PointType == ReferenceType)[i], - CheckArguments = FALSE - ) - ) - # Compute the simulations. The envelope is useless: we need the simulated values. - Envelope <- envelope( - # The value Mhat(X) is not used but the point #1 must be of ReferenceType - # so retain points of ReferenceType only to save time - X[X$marks$PointType == ReferenceType], - fun = Mhat, - nsim = NumberOfSimulations, - # nrank may be any value because the envelope is not used - nrank = 1, - # Arguments for Mhat() - r = r, - ReferenceType = ReferenceType, - NeighborType = NeighborType, - CaseControl = CaseControl, - Individual = TRUE, - # The reference point is always 1 after rRandomLocation() - ReferencePoint = 1, - CheckArguments = FALSE, - # Arguments for envelope() - simulate = SimulatedPP, - # Do not show the progress - verbose = FALSE, - # Save individual simulations into attribute simfuns - savefuns = TRUE - ) - # Get the distribution of simulated values (eliminate the "r" column). - Simulations <- as.matrix(attr(Envelope, "simfuns"))[, -1] - # Calculate the quantiles of observed values - for (r_i in seq_along(r)) { - if (any(!is.na(Simulations[r_i, ]))) { - # Check that at least one simulated value is not NaN so that ecdf() works. - MQuantiles[r_i, i] <- stats::ecdf(Simulations[r_i, ])(Mvalues[r_i, i]) - } else { - MQuantiles[r_i, i] <- NaN - } - } - # Progress bar - if (verbose) utils::setTxtProgressBar(ProgressBar, i) - } - if (verbose) close(ProgressBar) - # Save the quantiles as an attribute of the fv - attr(M, "Quantiles") <- MQuantiles - attr(M, "Alpha") <- Alpha - } - - if (Individual & is.null(ReferencePoint)) { - # Save the reference type for future smoothing - attr(M, "ReferenceType") <- ReferenceType - } - return(M) + return (M) } diff --git a/R/rRandomLocation.R b/R/rRandomLocation.R index c63fc83..0c3c8e9 100644 --- a/R/rRandomLocation.R +++ b/R/rRandomLocation.R @@ -1,5 +1,5 @@ rRandomLocation <- -function(X, ReferenceType = "", ReferencePoint = NULL, CheckArguments = TRUE) { +function(X, ReferenceType = "", CheckArguments = TRUE) { if (CheckArguments) CheckdbmssArguments() @@ -31,21 +31,6 @@ function(X, ReferenceType = "", ReferencePoint = NULL, CheckArguments = TRUE) { return(X) } else { # wmppp case - if (!is.null(ReferencePoint)) { - # The reference point must be < than the number of points - if (ReferencePoint > X$n) { - stop("The number of the reference point must be smaller than the number of points in the point pattern.") - } - # The reference point must belong to the reference point type - if (ReferenceType != "") { - if (X$marks$PointType[ReferencePoint] != ReferenceType) { - stop("The reference point must be of the reference point type.") - } - } - # Save the reference point - ReferencePoint_ppp <- X[ReferencePoint] - X <- X[-ReferencePoint] - } if (ReferenceType != "") { # Retain a single point type X.reduced <- X[X$marks$PointType == ReferenceType] @@ -53,10 +38,7 @@ function(X, ReferenceType = "", ReferencePoint = NULL, CheckArguments = TRUE) { } else { RandomizedX <- rlabel(X) } - if (!is.null(ReferencePoint)) { - # Restore the reference point with index 1 - RandomizedX <- superimpose(ReferencePoint_ppp, RandomizedX) - } + class(RandomizedX) <- c("wmppp", "ppp") return (RandomizedX) } diff --git a/man/Mhat.Rd b/man/Mhat.Rd index 94e1566..86d2920 100644 --- a/man/Mhat.Rd +++ b/man/Mhat.Rd @@ -8,9 +8,7 @@ } \usage{ Mhat(X, r = NULL, ReferenceType, NeighborType = ReferenceType, - CaseControl = FALSE, Individual = FALSE, ReferencePoint = NULL, - Quantiles = FALSE, NumberOfSimulations = 100, Alpha = 0.05, - verbose = interactive(), CheckArguments = TRUE) + CaseControl = FALSE, Individual = FALSE, CheckArguments = TRUE) } \arguments{ \item{X}{ @@ -22,9 +20,6 @@ Mhat(X, r = NULL, ReferenceType, NeighborType = ReferenceType, \item{ReferenceType}{ One of the point types. } - \item{ReferencePoint}{ - The index of one of the points, i.e. an integer number between 1 and the number of points of the point pattern. - } \item{NeighborType}{ One of the point types. By default, the same as reference type. } @@ -34,18 +29,6 @@ Mhat(X, r = NULL, ReferenceType, NeighborType = ReferenceType, \item{Individual}{ Logical; if \code{TRUE}, values of the function around each individual point are returned. } - \item{Quantiles}{ - If \code{TRUE}, Monte-Carlo simulations are run to obtain the distribution of each individual \emph{M} value under the null hypothesis of random location of points (i.e. all points except for the reference point are relocated). - } - \item{NumberOfSimulations}{ - The number of simulations to run, 100 by default. - } - \item{Alpha}{ - The risk level, 5\% by default. - } - \item{verbose}{ - Logical; if \code{TRUE}, print progress reports during the simulations. - } \item{CheckArguments}{ Logical; if \code{TRUE}, the function arguments are verified. Should be set to \code{FALSE} to save time in simulations for example, when the arguments have been checked elsewhere. } @@ -54,8 +37,6 @@ Mhat(X, r = NULL, ReferenceType, NeighborType = ReferenceType, \emph{M} is a weighted, cumulative, relative measure of a point pattern structure. Its value at any distance is the ratio of neighbors of the \emph{NeighborType} to all points around \emph{ReferenceType} points, normalized by its value over the windows. If \emph{CaseControl} is \code{TRUE}, then \emph{ReferenceType} points are cases and \emph{NeighborType} points are controls. The univariate concentration of cases is calculated as if \emph{NeighborType} was equal to \emph{ReferenceType}, but only controls are considered when counting all points around cases (Marcon et al., 2012). This makes sense when the sampling design is such that all points of \emph{ReferenceType} (the cases) but only a sample of the other points (the controls) are recorded. Then, the whole distribution of points is better represented by the controls alone. - - If \emph{Individual} is \code{TRUE}, then the individual values \emph{M} in the neighborhood of each point are returned. If \emph{ReferencePoint} is also specified, then \emph{only} the individual value of M around the reference point is returned. } \value{ An object of class \code{fv}, see \code{\link{fv.object}}, which can be plotted directly using \code{\link{plot.fv}}. diff --git a/man/rRandomLocation.Rd b/man/rRandomLocation.Rd index e739e11..c7848c9 100644 --- a/man/rRandomLocation.Rd +++ b/man/rRandomLocation.Rd @@ -7,7 +7,7 @@ Simulates of a point pattern according to the null hypothesis of random location. } \usage{ -rRandomLocation(X, ReferenceType = "", ReferencePoint = NULL, CheckArguments = TRUE) +rRandomLocation(X, ReferenceType = "", CheckArguments = TRUE) } \arguments{ \item{X}{ @@ -16,18 +16,13 @@ rRandomLocation(X, ReferenceType = "", ReferencePoint = NULL, CheckArguments = T \item{ReferenceType}{ One of the point types. } - \item{ReferencePoint}{ - The index of one of the points, i.e. an integer number between 1 and the number of points of the point pattern. - The reference point kept untouched during simulations. - } \item{CheckArguments}{ Logical; if \code{TRUE}, the function arguments are verified. Should be set to \code{FALSE} to save time in simulations for example, when the arguments have been checked elsewhere. } } \details{ Points are redistributed randomly across the locations of the original point pattern. This randomization is equivalent to random labeling, considering the label is both point type and point weight. - If \code{ReferenceType} is specified, then only reference type points are kept in the original point pattern before randomization. - If \code{ReferencePoint} is specified, then this point is not modified in all simulations. + If \code{ReferenceType} is specified, then only reference type points are kept in the orginal point pattern before randomization. } \value{ A new weighted, marked, planar point pattern (an object of class \code{wmppp}, see \code{\link{wmppp.object}}). From 38e83fd2703f042ab7e59038166a212a040530eb Mon Sep 17 00:00:00 2001 From: Eric Marcon Date: Mon, 11 Dec 2023 15:38:32 +0100 Subject: [PATCH 24/29] added an article for @marcon2023 --- vignettes/articles/maps.Rmd | 161 ++++++++++++++++++++++++++++++++++++ 1 file changed, 161 insertions(+) create mode 100644 vignettes/articles/maps.Rmd diff --git a/vignettes/articles/maps.Rmd b/vignettes/articles/maps.Rmd new file mode 100644 index 0000000..1a768f1 --- /dev/null +++ b/vignettes/articles/maps.Rmd @@ -0,0 +1,161 @@ +--- +title: "Mapping local values of measures" +bibliography: dbmss.bib +output: + rmarkdown::html_document: + toc: yes + toc_float: yes +vignette: > + %\VignetteIndexEntry{Bandwidth} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r global_options, include=FALSE} +# Installation of packages if necessary +InstallPackages <- function(Packages) { + InstallPackage <- function(Package) { + if (!Package %in% installed.packages()[, 1]) { + install.packages(Package, repos="https://cran.rstudio.com/") + } + } + invisible(sapply(Packages, InstallPackage)) +} +# Add necessary packages here +Packages <- c("dplyr", "dbmss") +# Install them +InstallPackages(Packages) +knitr::opts_chunk$set( + cache = TRUE, + message = FALSE, + warning = FALSE +) +set.seed(97310) +``` + + +Local values of distance-based measures of spatial concentration can be mapped (@Marcon2023). + + +# Dataset simulation + +We build a point pattern made of cases (the points of interest) and controls (the background distribution of points). + +Cases are a Matérn [@Matern1960] point pattern with $\kappa$ (expected) clusters of $\mu$ (expected) points in a circle of radius *scale*. +Controls are a Poisson point pattern whose density $\lambda$ decreases exponentially along the y-axis (we will call "north" the higher y values). + +```{r} +library("dplyr") +library("dbmss") +# Simulation of cases (clusters) +rMatClust(kappa = 10, scale = 0.05, mu = 10) %>% + as.wmppp -> + CASES +CASES$marks$PointType <- "Cases" +# Number of points +CASES$n + +# Simulation of controls (random distribution) +rpoispp(function(x, y) {1000 * exp(-2 * y)}) %>% + as.wmppp -> + CONTROLS +CONTROLS$marks$PointType <-"Controls" +# Number of points +CONTROLS$n + +# Mixed patterns (cases and controls) +ALL <- superimpose(CASES, CONTROLS) +autoplot(ALL) +``` + + +## Calculate and plot *M* Cases + +```{r} +# Fix the number of simulations and the level of risk +NumberOfSimulations <- 1000 +Alpha <- .01 + +# Calculate and plot M Cases +ALL %>% + MEnvelope( + ReferenceType="Cases", + SimulationType = "RandomLocation", + NumberOfSimulations = NumberOfSimulations, + Alpha = Alpha, + Global = TRUE + ) -> + M_env_cases + +autoplot(M_env_cases) +``` + +The plot shows a clear relative concentration of cases. + + +## Map *M* results + +To plot the individual values of *M* around each case, a distance must be chosen. +Then, the function must be computed at this distance with individual values. +Finally, a map is produced by smoothing the individual values and plotted. + +```{r} +# Choose the distance to plot +Distance <- 0.1 + +# Calculate the M values to plot +ALL %>% + Mhat( + r = c(0, Distance), + ReferenceType = "Cases", + NeighborType = "Cases", + # Individual must be TRUE + Individual = TRUE + ) -> + M_TheoEx + +# Map resolution +resolution <- 512 + +# Create a map by smoothing the local values of M +M_TheoEx_map <- Smooth( + # First argument is the point pattern + ALL, + # fvind contains the individual values of M + fvind = M_TheoEx, + # Distance selects the appropriate distance in fvind + distance = Distance, + Nbx = resolution, Nby = resolution +) + +# Plot the point pattern with values of M(Distance) +plot(M_TheoEx_map, main = "") +# Add the cases to the map +points( + ALL[ALL$marks$PointType == "Cases"], + pch = 20, col = "green" +) +# Add contour lines +contour(M_TheoEx_map, add = TRUE) +``` + +We can see that cases are concentrated almost everywhere (local *M* value above 1) because we chose a Matérn point pattern. + +The areas with the higher relative concentration are located in the north of the map because the controls are less dense there. + + +## Compare with the density of cases + +The density of cases is plotted. +High densities are *not* similar to high relative concentrations in this example because the control points are not homogeneously distributed. + +```{r} +plot(density(CASES), main = "") +points( + ALL[ALL$marks$PointType == "Cases"], + pch = 20, col = "green" +) +``` + + +# References From d3d76dd369d802f838ad1f198286d2678ba32774 Mon Sep 17 00:00:00 2001 From: Eric Marcon Date: Mon, 11 Dec 2023 15:39:02 +0100 Subject: [PATCH 25/29] automated references with Zotero --- vignettes/articles/dbmss.bib | 923 ++++++++++++++++++----------------- 1 file changed, 479 insertions(+), 444 deletions(-) diff --git a/vignettes/articles/dbmss.bib b/vignettes/articles/dbmss.bib index b354eaa..d6be52f 100644 --- a/vignettes/articles/dbmss.bib +++ b/vignettes/articles/dbmss.bib @@ -1,451 +1,486 @@ -% This file was created with JabRef 2.10. -% Encoding: UTF8 - - -@Manual{Allaire2015, - Title = {RcppParallel: Parallel Programming Tools for Rcpp}, - Author = {JJ Allaire and Romain Francois and Gregory Vandenbrouck and Marcus Geelnard and {Intel}}, - Note = {R package version 4.3.14}, - Year = {2015}, - - Url = {http://CRAN.R-project.org/package=RcppParallel} -} - -@Article{Baddeley2000, - Title = {Non- and Semi-Parametric Estimation of Interaction in Inhomogeneous Point Patterns}, - Author = {Baddeley, Adrian J. and Møller, Jesper and Waagepetersen, Rasmus Plenge}, - Journal = {Statistica Neerlandica}, - Year = {2000}, - Number = {3}, - Pages = {329-350}, - Volume = {54} -} - -@Article{Baddeley2005, - Title = {Spatstat: an \proglang{R} Package for Analyzing Spatial Point Patterns}, - Author = {Baddeley, Adrian J. and Turner, R}, - Journal = {Journal of Statistical Software}, - Year = {2005}, - Number = {6}, - Pages = {1-42}, - Volume = {12} -} - -@Article{Besag1977, - Title = {Comments on Ripley's Paper}, - Author = {Besag, Julian E.}, - Journal = {Journal of the Royal Statistical Society}, - Year = {1977}, - Number = {2}, - Pages = {193-195}, - Volume = {B 39} -} - -@Article{Bonneu2007, - Title = {Exploring and Modeling Fire Department Emergencies with a Spatio-Temporal Marked Point Process}, - Author = {Bonneu, Florent}, - Journal = {Case Studies in Business, Industry and Government Statistics}, - Year = {2007}, - Number = {2}, - Pages = {139-152}, - Volume = {1} -} - -@Article{Thomas2013, - Title = {Measuring and testing spatial mass concentration of micro-geographic data}, - Author = {Bonneu, Florent and Thomas-Agnan, Christine}, - Journal = {Spatial Economic Analysis}, - Year = {2015}, - Number = {3}, - Pages = {289-316}, - Volume = {10} -} - -@Article{Brulhart2005, - Title = {An Account of Geographic Concentration Patterns in Europe}, - Author = {Brülhart, Marius and Traeger, Rolf}, - Journal = {Regional Science and Urban Economics}, - Year = {2005}, - Number = {6}, - Pages = {597-624}, - Volume = {35} -} - -@Book{Combes2008, - Title = {Economic Geography}, - Author = {Combes, Pierre-Philippe and Mayer, Thierry and Thisse, Jacques-François}, - Publisher = {Princeton University Press}, - Year = {2008}, - - Address = {Princeton, New Jersey}, - - Pages = {416} -} - -@InCollection{Combes2004, - Title = {The spatial distribution of economic activities in the European Union}, - Author = {Combes, Pierre-Philippe and Overman, Henry G}, - Booktitle = {Handbook of Urban and Regional Economics}, - Publisher = {Elsevier. North Holland}, - Year = {2004}, - - Address = {Amsterdam}, - Chapter = {64}, - Editor = {Henderson, J. Vernon and Thisse, Jacques-Fran\c{c}ois}, - Pages = {2845-2909}, - Volume = {4} -} - -@Book{Diggle1983, - Title = {Statistical Analysis of Spatial Point Patterns}, - Author = {Diggle, Peter J.}, - Publisher = {Academic Press}, - Year = {1983}, - - Address = {London}, - - Pages = {148 p.} -} - -@Article{Diggle1991, - Title = {Second-Order Analysis of Spatial Clustering for Inhomogeneous Populations}, - Author = {Diggle, Peter J. and Chetwynd, A. G.}, - Journal = {Biometrics}, - Year = {1991}, - Number = {3}, - Pages = {1155-1163}, - Volume = {47} -} - -@Article{Diggle2007, - Title = {Second-order Analysis of Inhomogeneous Spatial Point Processes Using Case-Control Data}, - Author = {Diggle, P. J. and Gomez-Rubio, V. and Brown, P. E. and Chetwynd, A. G. and Gooding, S.}, - Journal = {Biometrics}, - Year = {2007}, - Number = {2}, - Pages = {550-557}, - Volume = {63} -} - -@Article{Duncan1991, - Title = {Competition and the coexistence of species in a mixed podocarp stand}, - Author = {Duncan, Richard P.}, - Journal = {Journal of Ecology}, - Year = {1991}, - Number = {4}, - Pages = {1073-1084}, - Volume = {79} -} - -@Article{Duranton2008, - Title = {Exploring the Detailed Location Patterns of UK Manufacturing Industries using Microgeographic Data}, - Author = {Duranton, Gilles and Overman, Henry G.}, - Journal = {Journal of Regional Science}, - Year = {2008}, - Number = {1}, - Pages = {213-243}, - Volume = {48} -} - -@Article{Duranton2005, - Title = {Testing for Localisation Using Micro-Geographic Data}, - Author = {Duranton, Gilles and Overman, Henry G.}, - Journal = {Review of Economic Studies}, - Year = {2005}, - Number = {4}, - Pages = {1077-1106}, - Volume = {72} -} - -@Article{Eddelbuettel2011, - Title = {Rcpp: Seamless R and C++ Integration}, - Author = {Eddelbuettel, Dirk and Fran{\c{c}}ois, Romain}, - Journal = {Journal of Statistical Software}, - Year = {2011}, - Number = {8}, - Pages = {1--18}, - Volume = {40}, - - Url = {http://www.jstatsoft.org/v40/i08/} -} - -@Article{Ellison1997, - Title = {Geographic Concentration in U.S. Manufacturing Industries: A Dartboard Approach}, - Author = {Ellison, Glenn and Glaeser, Edward L.}, - Journal = {Journal of Political Economy}, - Year = {1997}, - Number = {5}, - Pages = {889-927}, - Volume = {105} -} - -@Article{Ellison2010, - Title = {What Causes Industry Agglomeration? Evidence from Coagglomeration Patterns}, - Author = {Ellison, Glenn and Glaeser, Edward L. and Kerr, William R.}, - Journal = {The American Economic Review}, - Year = {2010}, - Number = {3}, - Pages = {1195-1213}, - Volume = {100} -} - -@Book{Fortin2005, - Title = {Spatial Analysis. A Guide for Ecologists.}, - Author = {Fortin, Marie-Josée and Dale, Mark R. T.}, - Publisher = {Cambridge University Press}, - Year = {2005}, - - Address = {Cambridge}, - - Pages = {365 p.} -} - -@Book{Gini1912, - Title = {Variabilità e mutabilità}, - Author = {Gini, Corrado}, - Publisher = {Università di Cagliari}, - Year = {1912}, - Series = {Studi Economico-Giuridici dell’Università di Cagliari}, - Volume = {3}, - - Pages = {1-158} -} - -@Article{Goreaud2003, - Title = {Avoiding Misinterpretation of Biotic Interactions with the Intertype K12 Fonction: Population Independence vs Random Labelling Hypotheses}, - Author = {Goreaud, François and Pélissier, Raphaël}, - Journal = {Journal of Vegetation Science}, - Year = {2003}, - Number = {5}, - Pages = {681-692}, - Volume = {14} -} - -@Book{Gourlet2004, - Title = {Ecology \& Management of a Neotropical Rainforest. Lessons Drawn from Paracou, a Long-Term Experimental Research Site in French Guiana}, - Author = {Gourlet-Fleury, Sylvie and Guehl, Jean Marc and Laroussinie, Olivier}, - Publisher = {Elsevier}, - Year = {2004}, - - Address = {Paris, France} -} - -@Article{Haase1997, - Title = {Spatial pattern in Anthyllis cytisoides shrubland on abandoned land in southeastern Spain}, - Author = {Haase, Peter and Pugnaire, Francisco I. and Clark, S. C. and Incoll, L. D.}, - Journal = {Journal of Vegetation Science}, - Year = {1997}, - Number = {5}, - Pages = {627-634}, - Volume = {8} -} - -@Article{Hoover1936, - Title = {The Measurement of Industrial Localization}, - Author = {Hoover, Edgar M.}, - Journal = {The Review of Economics and Statistics}, - Year = {1936}, - Number = {4}, - Pages = {162-171}, - Volume = {18} -} - -@Book{Illian2008, - Title = {Statistical Analysis and Modelling of Spatial Point Patterns}, - Author = {Illian, Janine and Penttinen, Antti and Stoyan, Helga and Stoyan, Dietrich}, - Publisher = {John Wiley \& Sons}, - Year = {2008}, - - Address = {Chichester, England} -} - -@Article{Kenkel1988, - Title = {Pattern of Self-Thinning in Jack Pine: Testing the Random Mortality Hypothesis}, - Author = {Kenkel, N. C.}, - Journal = {Ecology}, - Year = {1988}, - Number = {4}, - Pages = {1017-1024}, - Volume = {69} -} - -@Article{Lang2010, - Title = {Testing randomness of Spatial Point Patterns with the Ripley Statistic}, - Author = {Lang, Gabriel and Marcon, Eric}, - Journal = {ESAIM: Probability and Statistics}, - Year = {2013}, - Pages = {767-788}, - Volume = {17} -} - -@Article{Lang2014, - Title = {Distance-Based Measures of Spatial Concentration: Introducing a Relative Density Function}, - Author = {Lang, Gabriel and Marcon, Eric and Puech, Florence}, - Journal = {HAL}, - Year = {2014}, - Number = {version 2}, - Volume = {01082178} -} - -@Article{Loosmore2006, - Title = {Statistical Inference Using the G or K Point Pattern Spatial Statistics}, - Author = {Loosmore, N. B. and Ford, E. D.}, - Journal = {Ecology}, - Year = {2006}, - Number = {8}, - Pages = {1925-1931}, - Volume = {87} -} - -@Article{Marcon2012c, - Title = {A Typology of Distance-Based Measures of Spatial Concentration}, - Author = {Marcon, Eric and Puech, Florence}, - Journal = {Regional Science and Urban Economics}, - Year = {2017}, - Pages = {56-67}, - Volume = {62} -} - -@Article{Marcon2010, - Title = {Measures of the Geographic Concentration of Industries: Improving Distance-Based Methods}, - Author = {Marcon, Eric and Puech, Florence}, - Journal = {Journal of Economic Geography}, - Year = {2010}, - Number = {5}, - Pages = {745-762}, - Volume = {10} -} - -@Article{Marcon2012, - Title = {Characterizing the Relative Spatial Structure of Point Patterns}, - Author = {Marcon, Eric and Puech, Florence and Traissac, Stéphane}, - Journal = {International Journal of Ecology}, - Year = {2012}, - Number = {Article ID 619281}, - Pages = {11}, - Volume = {2012} -} - -@Article{Marcon2013, - Title = {A Statistical Test for Ripley’s Function Rejection of Poisson Null Hypothesis}, - Author = {Marcon, Eric and Traissac, Stéphane and Lang, Gabriel}, - Journal = {ISRN Ecology}, - Year = {2013}, - Number = {Article ID 753475}, - Pages = {9}, - Volume = {2013} -} - -@InCollection{Openshaw1979, - Title = {A million or so correlation coefficients: three experiments on the modifiable areal unit problem}, - Author = {Openshaw, S. and Taylor, P. J.}, - Booktitle = {Statistical Applications in the Spatial Sciences}, - Publisher = {Pion}, - Year = {1979}, - - Address = {London}, - Editor = {Wrigley, N.}, - Pages = {127-144} -} - -@InBook{Penttinen2006, - Title = {Statistics for Marked Point Patterns}, - Author = {Penttinen, Antti}, - Pages = {70-91}, - Publisher = {The Finnish Statistical Society}, - Year = {2006}, - - Address = {Helsinki}, - - Booktitle = {The Yearbook of the Finnish Statistical Society} -} - -@Article{Penttinen1992, - Title = {Marked Point Processes in Forest Statistics}, - Author = {Penttinen, Antti and Stoyan, Dietrich and Henttonen, Helena M.}, - Journal = {Forest Science}, - Year = {1992}, - Number = {4}, - Pages = {806-824}, - Volume = {38} -} - -@Article{Picone2009, - Title = {Distance decreases with differentiation: Strategic agglomeration by retailers}, - Author = {Picone, G. A. and Ridley, D. B. and Zandbergen, P. A.}, - Journal = {International Journal of Industrial Organization}, - Year = {2009}, - Number = {3}, - Pages = {463-473}, - Volume = {27} -} - -@Manual{R2012, - Title = {\proglang{R}: A Language and Environment for Statistical Computing}, - - Address = {Vienna, Austria}, - Author = {{R Core Team}}, - Note = {ISBN 3-900051-07-0}, - Organization = {R Foundation for Statistical Computing}, - Year = {2015}, - - Url = {http://www.R-project.org} -} - -@Book{Ripley1988, - Title = {Statistical inference for spatial processes}, - Author = {Ripley, Brian D.}, - Publisher = {Cambridge University Press}, - Year = {1988} -} - -@Article{Ripley1977, - Title = {Modelling Spatial Patterns}, - Author = {Ripley, Brian D.}, - Journal = {Journal of the Royal Statistical Society}, - Year = {1977}, - Number = {2}, - Pages = {172-212}, - Volume = {B 39} -} - -@Article{Ripley1976, - Title = {The Second-Order Analysis of Stationary Point Processes}, - Author = {Ripley, Brian D.}, - Journal = {Journal of Applied Probability}, - Year = {1976}, - Number = {2}, - Pages = {255-266}, - Volume = {13} -} - -@Article{Scholl2013, - Title = {Optimizing Distance-Based Methods for Large Data Sets}, - Author = {Scholl, Tobias and Brenner, Thomas}, - Journal = {Journal of Geographical Systems}, - Year = {2015}, - Number = {17}, - Pages = {333-351}, - Volume = {4} +@manual{Allaire2015, + type = {Manual}, + title = {{{RcppParallel}}: {{Parallel}} Programming Tools for '{{Rcpp}}'}, + author = {Allaire, {\relax JJ} and Francois, Romain and Ushey, Kevin and Vandenbrouck, Gregory and Geelnard, Marcus and {Intel}}, + year = {2023} +} + +@article{Baddeley2000, + title = {Non- and Semi-Parametric Estimation of Interaction in Inhomogeneous Point Patterns}, + author = {Baddeley, Adrian J. and M{\o}ller, Jesper and Waagepetersen, Rasmus Plenge}, + year = {2000}, + journal = {Statistica Neerlandica}, + volume = {54}, + number = {3}, + pages = {329--350}, + doi = {10.1111/1467-9574.00144} +} + +@article{Baddeley2005, + title = {Spatstat: An {{R}} Package for Analyzing Spatial Point Patterns}, + author = {Baddeley, Adrian J. and Turner, Rolf}, + year = {2005}, + journal = {Journal of Statistical Software}, + volume = {12}, + number = {6}, + pages = {1--42}, + doi = {10.18637/jss.v012.i06} +} + +@article{Besag1977a, + title = {Comments on {{Ripley}}'s Paper}, + author = {Besag, Julian E.}, + year = {1977}, + journal = {Journal of the Royal Statistical Society: Series B (Statistical Methodology)}, + volume = {39}, + number = {2}, + pages = {193--195} +} + +@article{Bonneu2007, + title = {Exploring and Modeling Fire Department Emergencies with a Spatio-Temporal Marked Point Process}, + author = {Bonneu, Florent}, + year = {2007}, + journal = {Case Studies in Business, Industry and Government Statistics}, + volume = {1}, + number = {2}, + pages = {139--152} +} + +@article{Brulhart2005, + title = {An Account of Geographic Concentration Patterns in Europe}, + author = {Br{\"u}lhart, Marius and Traeger, Rolf}, + year = {2005}, + journal = {Regional Science and Urban Economics}, + volume = {35}, + number = {6}, + pages = {597--624}, + doi = {10.1016/j.regsciurbeco.2004.09.002} +} + +@incollection{Combes2004, + title = {The Spatial Distribution of Economic Activities in the {{European Union}}}, + booktitle = {Handbook of Urban and Regional Economics}, + author = {Combes, Pierre-Philippe and Overman, Henry G}, + editor = {Henderson, J Vernon and Thisse, Jacques-Fran{\c c}ois}, + year = {2004}, + volume = {4}, + pages = {2845--2909}, + publisher = {{Elsevier. North Holland}}, + address = {{Amsterdam}}, + chapter = {64} +} + +@incollection{Combes2008, + title = {Economic Geography}, + booktitle = {The Integration of Regions and Nations}, + author = {Combes, Pierre-Philippe and Mayer, Thierry and Thisse, Jacques-Fran{\c c}ois}, + year = {2008}, + pages = {1--416}, + publisher = {{Princeton University Press}}, + address = {{Princeton, New Jersey}}, + isbn = {978-0-691-12459-9} +} + +@article{Diggle1991, + title = {Second-Order Analysis of Spatial Clustering for Inhomogeneous Populations}, + author = {Diggle, Peter J. and Chetwynd, A. G.}, + year = {1991}, + journal = {Biometrics}, + volume = {47}, + number = {3}, + eprint = {2532668}, + eprinttype = {jstor}, + pages = {1155--1163}, + doi = {10.2307/2532668} +} + +@article{Diggle2007, + title = {Second-Order Analysis of Inhomogeneous Spatial Point Processes Using Case-Control Data}, + author = {Diggle, Peter J. and {G{\'o}mez-Rubio}, V. and Brown, P. E. and Chetwynd, A. G. and Gooding, S.}, + year = {2007}, + journal = {Biometrics}, + volume = {63}, + number = {2}, + pages = {550--557}, + doi = {10.1111/j.1541-0420.2006.00683.x} +} + +@book{Diggle2013, + title = {Statistical Analysis of Spatial and Spatio-Temporal Point Patterns}, + author = {Diggle, Peter J.}, + year = {2013}, + edition = {3rd Editio}, + publisher = {{Chapman and Hall/CRC}}, + isbn = {978-1-4665-6024-6} +} + +@article{Duncan1991, + title = {Competition and the Coexistence of Species in a Mixed Podocarp Stand}, + author = {Duncan, Richard P.}, + year = {1991}, + journal = {Journal of Ecology}, + volume = {79}, + number = {4}, + eprint = {2261099}, + eprinttype = {jstor}, + pages = {1073--1084}, + doi = {10.2307/2261099} +} + +@article{Duranton2005, + title = {Testing for Localisation Using Micro-Geographic Data}, + author = {Duranton, Gilles and Overman, Henry G}, + year = {2005}, + journal = {Review of Economic Studies}, + volume = {72}, + number = {4}, + pages = {1077--1106}, + doi = {10.1111/0034-6527.00362} +} + +@article{Duranton2008, + title = {Exploring the Detailed Location Patterns of {{UK}} Manufacturing Industries Using Microgeographic Data}, + author = {Duranton, Gilles and Overman, Henry G}, + year = {2008}, + journal = {Journal of Regional Science}, + volume = {48}, + number = {1}, + pages = {213--243}, + doi = {10.1111/j.1365-2966.2006.0547.x} +} + +@article{Eddelbuettel2011, + title = {Rcpp: {{Seamless R}} and {{C}}++ Integration}, + author = {Eddelbuettel, Dirk and Fran{\c c}ois, Romain}, + year = {2011}, + journal = {Journal of Statistical Software}, + volume = {40}, + number = {8}, + pages = {1--18}, + doi = {10.18637/jss.v040.i08} +} + +@article{Ellison1997, + title = {Geographic Concentration in {{U}}.{{S}}. Manufacturing Industries: {{A}} Dartboard Approach}, + author = {Ellison, Glenn and Glaeser, Edward L}, + year = {1997}, + journal = {Journal of Political Economy}, + volume = {105}, + number = {5}, + pages = {889--927}, + doi = {10.1086/262098} +} + +@article{Ellison2010, + title = {What Causes Industry Agglomeration? {{Evidence}} from Coagglomeration Patterns}, + author = {Ellison, Glenn and Glaeser, Edward L and Kerr, William R}, + year = {2010}, + journal = {The American Economic Review}, + volume = {100}, + number = {3}, + pages = {1195--1213}, + doi = {10.1257/aer.100.3.1195} +} + +@book{Gini1912, + title = {Variabilit{\`a} e Mutabilit{\`a}}, + author = {Gini, Corrado}, + year = {1912}, + publisher = {{C. Cuppini}}, + address = {{Bologna}} +} + +@article{Goreaud2003, + title = {Avoiding Misinterpretation of Biotic Interactions with the Intertype {{K12}} Function: Population Independence vs Random Labelling Hypotheses}, + author = {Goreaud, Fran{\c c}ois and P{\'e}lissier, Rapha{\"e}l}, + year = {2003}, + journal = {Journal of Vegetation Science}, + volume = {14}, + number = {5}, + pages = {681--692}, + doi = {10.1111/j.1654-1103.2003.tb02200.x} +} + +@book{Gourlet-Fleury2004, + title = {Ecology \& Management of a Neotropical Rainforest. {{Lessons}} Drawn from {{Paracou}}, a Long-Term Experimental Research Site in {{French Guiana}}}, + author = {{Gourlet-Fleury}, Sylvie and Guehl, Jean Marc and Laroussinie, Olivier}, + year = {2004}, + publisher = {{Elsevier}}, + address = {{Paris}} +} + +@article{Haase1997, + title = {Spatial Pattern in {{Anthyllis}} Cytisoides Shrubland on Abandoned Land in Southeastern {{Spain}}}, + author = {Haase, Peter and Pugnaire, Francisco I. and Clark, S. C. and Incoll, L. D.}, + year = {1997}, + journal = {Journal of Vegetation Science}, + volume = {8}, + number = {5}, + pages = {627--634} +} + +@article{Hoover1936, + title = {The Measurement of Industrial Localization}, + author = {Hoover, Edgar M.}, + year = {1936}, + journal = {The Review of Economics and Statistics}, + volume = {18}, + number = {4}, + pages = {162--171} +} + +@incollection{Illian2008, + title = {Statistical Analysis and Modelling of Spatial Point Patterns}, + booktitle = {Statistics in Practice}, + author = {Illian, Janine and Penttinen, Antti and Stoyan, Helga and Stoyan, Dietrich}, + year = {2008}, + pages = {1--534}, + publisher = {{Wiley-Interscience}}, + address = {{Chichester}}, + isbn = {0-470-01491-1 978-0-470-01491-2} +} + +@article{Kenkel1988, + title = {Pattern of Self-Thinning in Jack Pine: {{Testing}} the Random Mortality Hypothesis}, + author = {Kenkel, N. C.}, + year = {1988}, + journal = {Ecology}, + volume = {69}, + number = {4}, + pages = {1017--1024}, + doi = {10.2307/1941257} +} + +@article{Lang2013, + title = {Testing Randomness of Spatial Point Patterns with the {{Ripley}} Statistic}, + author = {Lang, Gabriel and Marcon, Eric}, + year = {2013}, + journal = {ESAIM: Probability and Statistics}, + volume = {17}, + eprint = {1006.1567}, + pages = {767--788}, + doi = {10.1051/ps/2012027}, + archiveprefix = {arxiv}, + arxivid = {1006.1567} +} + +@article{Lang2014, + title = {Distance-Based Measures of Spatial Concentration: {{Introducing}} a Relative Density Function}, + author = {Lang, Gabriel and Marcon, Eric and Puech, Florence}, + year = {2020}, + journal = {The Annals of Regional Science}, + volume = {64}, + pages = {243--265}, + doi = {10.1007/s00168-019-00946-7} +} + +@article{Loosmore2006, + title = {Statistical Inference Using the {{G}} or {{K}} Point Pattern Spatial Statistics}, + author = {Loosmore, N. B. and Ford, E. D.}, + year = {2006}, + journal = {Ecology}, + volume = {87}, + number = {8}, + pages = {1925--1931}, + doi = {10.1890/001} +} + +@article{Marcon2010, + title = {Measures of the Geographic Concentration of Industries: {{Improving}} Distance-Based Methods}, + author = {Marcon, Eric and Puech, Florence}, + year = {2010}, + journal = {Journal of Economic Geography}, + volume = {10}, + number = {5}, + pages = {745--762}, + doi = {10.1093/jeg/lbp056} +} + +@article{Marcon2012, + title = {Characterizing the Relative Spatial Structure of Point Patterns}, + author = {Marcon, Eric and Puech, Florence and Traissac, St{\'e}phane}, + year = {2012}, + journal = {International Journal of Ecology}, + volume = {2012}, + number = {Article ID 619281}, + pages = {11}, + doi = {10.1155/2012/619281} +} + +@article{Marcon2012c, + title = {A Typology of Distance-Based Measures of Spatial Concentration}, + author = {Marcon, Eric and Puech, Florence}, + year = {2017}, + journal = {Regional Science and Urban Economics}, + volume = {62}, + pages = {56--67}, + doi = {10.1016/j.regsciurbeco.2016.10.004} +} + +@article{Marcon2013, + title = {A Statistical Test for {{Ripley}}'s Function Rejection of Poisson Null Hypothesis}, + author = {Marcon, Eric and Traissac, St{\'e}phane and Lang, Gabriel}, + year = {2013}, + journal = {ISRN Ecology}, + volume = {2013}, + number = {Article ID 753475}, + pages = {9}, + doi = {10.1155/2013/753475} +} + +@article{Marcon2023, + title = {Mapping Distributions in Non-Homogeneous Space with Distance-Based Methods}, + author = {Marcon, {\'E}ric and Puech, Florence}, + year = {2023}, + month = dec, + journal = {Journal of Spatial Econometrics}, + volume = {4}, + number = {1}, + pages = {13}, + issn = {2662-2998, 2662-298X}, + doi = {10.1007/s43071-023-00042-1}, + urldate = {2023-12-06}, + copyright = {All rights reserved}, + langid = {english} +} + +@article{Matern1960, + title = {Spatial Variation}, + author = {Mat{\'e}rn, Bertil}, + year = {1960}, + journal = {Meddelanden fr{\aa}n Statens Skogsforskningsinstitut}, + volume = {49}, + number = {5}, + pages = {1--144} +} + +@incollection{Openshaw1979, + title = {A Million or so Correlation Coefficients: Three Experiments on the Modifiable Areal Unit Problem}, + booktitle = {Statistical Applications in the Spatial Sciences}, + author = {Openshaw, S. and Taylor, P. J.}, + editor = {Wrigley, N}, + year = {1979}, + pages = {127--144}, + publisher = {{Pion}}, + address = {{London}} +} + +@article{Penttinen1992, + title = {Marked Point Processes in Forest Statistics}, + author = {Penttinen, Antti and Stoyan, Dietrich and Henttonen, Helena M}, + year = {1992}, + journal = {Forest Science}, + volume = {38}, + number = {4}, + pages = {806--824}, + doi = {10.1093/forestscience/38.4.806} +} + +@incollection{Penttinen2006, + title = {Statistics for Marked Point Patterns}, + booktitle = {The Yearbook of the Finnish Statistical Society}, + author = {Penttinen, Antti}, + year = {2006}, + pages = {70--91}, + publisher = {{The Finnish Statistical Society}}, + address = {{Helsinki}} +} + +@article{Picone2009, + title = {Distance Decreases with Differentiation: {{Strategic}} Agglomeration by Retailers}, + author = {Picone, Gabriel A. and Ridley, David B. and Zandbergen, Paul A.}, + year = {2009}, + journal = {International Journal of Industrial Organization}, + volume = {27}, + number = {3}, + pages = {463--473}, + isbn = {01677187 (ISSN)} +} + +@book{R, + title = {R: {{A}} Language and Environment for Statistical Computing}, + author = {{R Core Team}}, + year = {2023}, + publisher = {{R Foundation for Statistical Computing}}, + address = {{Vienna, Austria}} +} + +@article{Ripley1976a, + title = {The Second-Order Analysis of Stationary Point Processes}, + author = {Ripley, Brian D.}, + year = {1976}, + journal = {Journal of Applied Probability}, + volume = {13}, + number = {2}, + eprint = {3212829}, + eprinttype = {jstor}, + pages = {255--266}, + doi = {10.2307/3212829} +} + +@article{Ripley1977, + title = {Modelling Spatial Patterns}, + author = {Ripley, Brian D.}, + year = {1977}, + journal = {Journal of the Royal Statistical Society: Series B (Statistical Methodology)}, + volume = {39}, + number = {2}, + eprint = {2984796}, + eprinttype = {jstor}, + pages = {172--212} +} + +@book{Ripley1988, + title = {Statistical Inference for Spatial Processes}, + author = {Ripley, Brian D.}, + year = {1988}, + publisher = {{Cambridge University Press}} +} + +@article{Scholl2013, + title = {Optimizing Distance-Based Methods for Large Data Sets}, + author = {Scholl, Tobias and Brenner, Thomas}, + year = {2015}, + journal = {Journal of Geographical Systems}, + volume = {17}, + number = {4}, + pages = {333--351}, + doi = {10.1007/s10109-015-0219-1} } @article{Sheather1991, - author = {Sheather, S. J. and Jones, M. C.}, - journal = {Journal of the Royal Statistical Society: Series B (Statistical Methodology)}, - number = {3}, - pages = {683-690}, - title = {A Reliable Data-Based Bandwidth Selection Method for Kernel Density Estimation}, - volume = {53}, - year = {1991} + title = {A Reliable Data-Based Bandwidth Selection Method for Kernel Density Estimation}, + author = {Sheather, S. J. and Jones, M. C.}, + year = {1991}, + journal = {Journal of the Royal Statistical Society: Series B (Statistical Methodology)}, + volume = {53}, + number = {3}, + pages = {683--690}, + doi = {10.2307/2345597} } - @book{Silverman1986, - address = {London}, - author = {Silverman, B. W.}, - pages = {1-175}, - publisher = {Chapman and Hall}, - title = {Density estimation for statistics and data analysis}, - year = {1986} + title = {Density Estimation for Statistics and Data Analysis}, + author = {Silverman, B. W.}, + year = {1986}, + pages = {1--175}, + publisher = {{Chapman and Hall}}, + address = {{London}}, + isbn = {978-0-412-24620-3} +} + +@article{Thomas2013, + title = {Measuring and {{Testing Spatial Mass Concentration}} with {{Micro-geographic Data}}}, + author = {Bonneu, Florent and {Thomas-Agnan}, Christine}, + year = {2015}, + month = jul, + journal = {Spatial Economic Analysis}, + volume = {10}, + number = {3}, + pages = {289--316}, + issn = {1742-1772, 1742-1780}, + doi = {10.1080/17421772.2015.1062124}, + urldate = {2023-12-11}, + langid = {english} } - From e62e3f8590106eaaf35cbec741d92686f988ec9e Mon Sep 17 00:00:00 2001 From: Eric Marcon Date: Mon, 11 Dec 2023 15:51:56 +0100 Subject: [PATCH 26/29] saved the reference type for Smooth.wmppp() --- R/Mhat.R | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/R/Mhat.R b/R/Mhat.R index cb25de2..dd93949 100644 --- a/R/Mhat.R +++ b/R/Mhat.R @@ -92,10 +92,15 @@ function(X, r = NULL, ReferenceType, NeighborType = ReferenceType, } colnames(MEstimate) <- ColNames - # Return the values of M(r) + # Make an fv object M <- fv(MEstimate, argu="r", ylab=quote(M(r)), valu="M", fmla= "cbind(M,theo)~r", alim=c(0, max(r)), labl=Labl, desc=Desc, unitname=X$window$unit, fname="M") fvnames(M, ".") <- ColNames[-1] + if (Individual) { + # Save the reference type for future smoothing + attr(M, "ReferenceType") <- ReferenceType + } + # Return the values of M(r) return (M) } From 5d2cafa7188c9744a35b9feb46c15e09be8abd93 Mon Sep 17 00:00:00 2001 From: Eric Marcon Date: Mon, 11 Dec 2023 16:04:37 +0100 Subject: [PATCH 27/29] polished the vignette format --- vignettes/articles/bandwidth.Rmd | 88 ++++++++++++++++++++++++++++---- vignettes/articles/reference.Rmd | 83 +++++++++++++++++------------- vignettes/dbmss.Rmd | 22 ++++++-- 3 files changed, 142 insertions(+), 51 deletions(-) diff --git a/vignettes/articles/bandwidth.Rmd b/vignettes/articles/bandwidth.Rmd index a1a14ad..45593a3 100644 --- a/vignettes/articles/bandwidth.Rmd +++ b/vignettes/articles/bandwidth.Rmd @@ -12,7 +12,7 @@ vignette: > --- ```{r global_options, include=FALSE} -knitr::opts_chunk$set(cache=TRUE) +knitr::opts_chunk$set(cache = TRUE) set.seed(97310) ``` @@ -47,9 +47,25 @@ The following figure shows this difference on the estimation of the $m$ function ```{r} library("dbmss") # Sheather and Jones bandwidth in black -plot(mhat(paracou16, , ReferenceType="Q. Rosea", Original=FALSE), main="", legend=FALSE) -# Original bandwith in red -plot(mhat(paracou16, , ReferenceType="Q. Rosea"), col="red", main="", add=TRUE) +plot( + mhat( + paracou16, + ReferenceType = "Q. Rosea", + Original = FALSE + ), + main = "", + legend = FALSE +) +# Original bandwidth in red +plot( + mhat( + paracou16, + ReferenceType = "Q. Rosea" + ), + col ="red", + main = "", + add = TRUE +) ``` # Data @@ -63,8 +79,25 @@ The default distance range is used for the black curve. The red curve is the same function estimated up to 30 meters only, resulting into a narrower bandwith: the curve of $K_d$ is less smoothed. ```{r message=FALSE} -plot(Kdhat(paracou16, , ReferenceType="Q. Rosea", Original=FALSE), ylim=c(0, 0.008), main="") -plot(Kdhat(paracou16, r=0:30 , ReferenceType="Q. Rosea", Original=FALSE), main="", col="red", add=TRUE) +plot( + Kdhat( + paracou16, + ReferenceType = "Q. Rosea", + Original = FALSE + ), + ylim = c(0, 0.008), + main = "" +) +plot( + Kdhat( + paracou16, + r = 0:30, + ReferenceType = "Q. Rosea", + Original = FALSE + ), + main = "", + col = "red", + add = TRUE) ``` The approximated algorithm used to calculate $K_d$ and $m$ retains point pairs up to twice the max value of $r$ and rounds their distances in 2048 (this number can be increased by the `Approximate` argument) equally-spaced values. @@ -73,9 +106,26 @@ Simulations show that the approximated algorithm yields bandwidths very similar ```{r} # Exact computation in black -plot(Kdhat(paracou16, , ReferenceType="Q. Rosea", Original=FALSE), main="") +plot( + Kdhat( + paracou16, + ReferenceType = "Q. Rosea", + Original = FALSE + ), + main = "" +) # Approximated computation in green -plot(Kdhat(paracou16, , ReferenceType="Q. Rosea", Original=FALSE, Approximate = 1), main="", col="green", add=TRUE) +plot( + Kdhat( + paracou16, + ReferenceType = "Q. Rosea", + Original = FALSE, + Approximate = 1 + ), + main = "", + col = "green", + add = TRUE +) ``` # Fine tuning @@ -85,9 +135,27 @@ Values over 1 will smooth the density estimation; under 1, they will sharpen it. ```{r} # Default bandwith in black -plot(Kdhat(paracou16, , ReferenceType="Q. Rosea", Original=FALSE), ylim=c(0, 0.008), main="") +plot( + Kdhat( + paracou16, + ReferenceType = "Q. Rosea", + Original = FALSE + ), + ylim = c(0, 0.008), + main = "" +) # Adjusted (half) bandwidth in red -plot(Kdhat(paracou16, , ReferenceType="Q. Rosea", Original=FALSE, Adjust = 1/2), main="", col="red", add=TRUE) +plot( + Kdhat( + paracou16, + ReferenceType = "Q. Rosea", + Original = FALSE, + Adjust = 1/2 + ), + main = "", + col = "red", + add = TRUE +) ``` # Conclusion diff --git a/vignettes/articles/reference.Rmd b/vignettes/articles/reference.Rmd index 8d154f4..d5ba98d 100644 --- a/vignettes/articles/reference.Rmd +++ b/vignettes/articles/reference.Rmd @@ -13,7 +13,7 @@ vignette: > --- ```{r global_options, include=FALSE} -knitr::opts_chunk$set(cache=TRUE) +knitr::opts_chunk$set(cache = TRUE) set.seed(97310) ``` @@ -27,7 +27,7 @@ Amongst many questions, ecologists have been addressing the spatial attraction b Analyzing the spatial distribution of plants, economists may be concerned by the location of new entrants [@Duranton2008] or by the location of shops according to the types of good sold [@Picone2009]. In epidemiology, researchers want to identify the spatial distribution of sick individuals in comparison to the population [@Diggle1991]. In these fields of research, the point process theory undoubtedly helps dealing with these questions. -Exploratory statistics of point patterns widely rely on Ripley's seminal work [@Ripley1976; @Ripley1977], namely the $K$ function. +Exploratory statistics of point patterns widely rely on Ripley's seminal work [@Ripley1976a; @Ripley1977], namely the $K$ function. A review of similar methods has been made by @Marcon2012c who called them distance-based measures of spatial concentration. We will write spatial structures here since both dispersion and concentration can be characterized. They are considered as novel and promising tools in spatial economics [@Combes2008]. @@ -44,12 +44,9 @@ The basic purpose of Ripley's $K$ is to test the observed point pattern against Ripley-like functions, available in the _dbmss_ package, can be classified in three families: - Topographic measures such as $K$ take space as their reference. They have been widely used in ecology [@Fortin2005]. They have been built from the point process theory and have a strong mathematical background. - - Relative measures such as $M$ [@Marcon2010] compare the structure of a point type to that of another type (they can be considered as cases and controls). They have been developed in economics, where comparing the distribution of a sector of activity to that of the whole economic activity is a classical approach [@Combes2008], but introduced only recently in ecology [@Marcon2012]. - - Absolute functions such as $K_d$ [@Duranton2005] have no reference at all but their value can be compared to the appropriate null hypothesis to test it. - Relative and absolute functions have been built from descriptive statistics of point patterns, not related to the underlying point processes, so they are seen as heuristic and ignored by the statistical literature [@Illian2008]. Topographic functions are implemented in the _spatstat_ package [@Baddeley2005} but absolute and relative functions are missing. We fill this gap by proposing the _dbmss_ package. @@ -57,7 +54,7 @@ It makes the computation of the whole set of distance-based methods simple for e Estimated values of the functions must be tested against a null hypothesis. The usual empirical way to characterize a spatial structure consists in computing the appropriate function and comparing it to the quantiles of a large number of simulations of the null hypothesis to reject [@Kenkel1988]. -We propose extended possibilities to evaluate confidence envelopes, including _global envelopes_ [@Duranton2005], a goodness-of-fit test [@Diggle1983] and an analytical test [@Lang2010; @Marcon2013]. +We propose extended possibilities to evaluate confidence envelopes, including _global envelopes_ [@Duranton2005], a goodness-of-fit test [@Diggle2013] and an analytic test [@Lang2013; @Marcon2013]. Definitions of all functions and formulas for their estimation can be found in @Marcon2012c and are not repeated here, but they are summarized in the statistical background section. Their implementation is presented in the package content section. @@ -211,7 +208,7 @@ It draws point coordinates between 0 and 1, and creates a `wmppp` with a default ```{r SimpleWmppp, warning=FALSE} library("dbmss") -Pattern <- wmppp(data.frame(X=runif(100), Y=runif(100))) +Pattern <- wmppp(data.frame(X = runif(100), Y = runif(100))) summary(Pattern) ``` @@ -220,16 +217,11 @@ summary(Pattern) All functions are named `Xhat` where `X` is the name of the function: -- Ripley's $g$ and $K$, and $K$'s normalization, Besag's $L$ [-@Besag1977]; - +- Ripley's $g$ and $K$, and $K$'s normalization, Besag's $L$ [-@Besag1977a]; - Penttinen's $K_{mm}$ and $L_{mm}$; - - Diggle and Chetwynd's $D$; - - Baddeley et al.'s $K_{inhom}$ and its derivative $g_{inhom}$; - - Marcon and Puech's $M$; - - Duranton and Overman's $K_d$ (including its weighted version $K^{emp}$). @@ -238,11 +230,8 @@ The suffix `hat` has been used to avoid confusion with other functions in R: `D` Arguments are: - A weighted, marked planar point pattern (a `wmppp` class object). The window can be a polygon or a binary image, as in _spatstat_. - - A vector of distances. - - Optionally a reference and a neighbor point type to calculate bivariate functions, or equivalently the types of cases and controls for the $D$ function. - - Some optional arguments, specific to some functions. @@ -279,19 +268,26 @@ The first one comes from the economic literature [@Bonneu2007] [^1] ```{r Toulouse, warning=FALSE} load("CSBIGS.Rdata") -Category <- cut(Emergencies$M, quantile(Emergencies$M, c(0, 0.9, 1)), - labels = c("Other", "Biggest"), include.lowest = TRUE) -X <- wmppp(data.frame(X=Emergencies$X, Y=Emergencies$Y, PointType=Category), win=Region) +Category <- cut( + Emergencies$M, + quantile(Emergencies$M, probs = c(0, 0.9, 1)), + labels = c("Other", "Biggest"), + include.lowest = TRUE +) +X <- wmppp( + data.frame(X = Emergencies$X, Y = Emergencies$Y, PointType = Category), + win=Region +) X$window$units <- c("meter","meters") X2 <- split(X) marks(X2$Other) <- rep(1, X2$Other$n) marks(X2$Biggest) <- rep(1, X2$Biggest$n) -par(mfrow=c(1,2), mar=c(0,0,0,0)) -plot(X2$Other, main="", maxsize=1, legend=FALSE) -text(514300, 1826800, "a") -plot(X2$Biggest, main="", maxsize=1, legend=FALSE) -text(514300, 1826800, "b") -par(mfrow=c(1,1)) +par(mfrow = c(1, 2), mar = c(0, 0, 0, 0)) +plot(X2$Other, main="", maxsize = 1, legend = FALSE) +text(514300, 1826800, "a") +plot(X2$Biggest, main = "", maxsize = 1, legend = FALSE) +text(514300, 1826800, "b") +par(mfrow = c(1, 1)) ``` The map of emergencies in the urban area of Toulouse, France, during year 2004 (about 33 km from south to north) shows (a) 20,820 emergencies recorded (many points are confused at the figure scale) and (b) the locations of the 10 percent most serious ones. @@ -307,8 +303,14 @@ The `KdEnvelope` function is run from 0 to 10km by steps of 100m for the most se The figure below shows that the 10% most serious emergencies are more dispersed than the distribution of all emergencies at all distances up to 10km. ```{r KdCode, fig.width=6, fig.height=5} -KdE <- KdEnvelope(X, r=seq(0, 10000, 100), NumberOfSimulations=1000, ReferenceType="Biggest", Global=TRUE) -autoplot(KdE, main="") +KdE <- KdEnvelope( + X, + r = seq(0, 10000, 100), + NumberOfSimulations = 1000, + ReferenceType = "Biggest", + Global = TRUE +) +autoplot(KdE, main = "") ``` The solid, black curve is $K_d$. @@ -319,14 +321,16 @@ distances are in meters. This opens the way to discuss on the optimal location of fire stations. The second example uses the `paracou16` point pattern provided in the package. -It represents the distribution of trees in a 4.1-ha tropical forest plot in the Paracou field station in French Guiana [@Gourlet2004]. +It represents the distribution of trees in a 4.1-ha tropical forest plot in the Paracou field station in French Guiana [@Gourlet-Fleury2004]. It contains 2426 trees, whose species is either _Qualea rosea_, _Vouacapoua americana_ or Other (one of more than 300 species). Weights are basal areas (the area of the stems virtually cut 1.3 meter above ground), measured in square centimeters. ```{r P16Code, eval=FALSE} -autoplot(paracou16, +autoplot( + paracou16, labelSize = expression("Basal area (" ~cm^2~ ")"), - labelColor = "Species") + labelColor = "Species" +) ``` On the map, circles are centered on trees in the forest plot (the containing rectangle is 200m wide by 250m long). @@ -337,10 +341,17 @@ Bivariate $M(r)$ is calculated for $r$ between 0 and 30 meters. 1000 simulations are run to build the global confidence envelope. ```{r MCode} -Envelope <- MEnvelope(paracou16, r = seq(0, 30, 2), NumberOfSimulations - = 1000, Alpha = 0.05, ReferenceType = "V. Americana", NeighborType - = "Q. Rosea", SimulationType = "RandomLabeling", Global = TRUE) -autoplot(Envelope, main="", ylim=c(0, 20)) +Envelope <- MEnvelope( + paracou16, + r = seq(0, 30, 2), + NumberOfSimulations = 1000, + Alpha = 0.05, + ReferenceType = "V. Americana", + NeighborType = "Q. Rosea", + SimulationType = "RandomLabeling", + Global = TRUE +) +autoplot(Envelope, main = "", ylim = c(0, 20)) ``` $M(r)$ values of _Qualea rosea_ around _Vouacapoua Americana_ are plotted. @@ -358,7 +369,7 @@ The complete study, with a larger dataset allowing significant results, can be f ## Goodness-of-fit test -A Goodness-of-fit test for $K$ has been proposed by @Diggle1983, applied to $K$ by @Loosmore2006 and to $M$ by @Marcon2012. +A Goodness-of-fit test for $K$ has been proposed by @Diggle2013, applied to $K$ by @Loosmore2006 and to $M$ by @Marcon2012. It calculates the distance between the actual values of the function and its average value obtained in simulations of the null hypothesis. The same distance is calculated for each simulated point pattern, and the returned $p$-value of the test if the ratio of simulations whose distance is larger than that of the real point pattern. The test is performed by the `GoFtest` function whose argument is the envelope previously calculated (actually, the function uses the simulation values). @@ -372,7 +383,7 @@ GoFtest(Envelope) ## Ktest -The `Ktest` has been developed by Lang and Marcon [@Lang2010; @Marcon2013]. +The `Ktest` has been developed by Lang and Marcon [@Lang2013; @Marcon2013]. It does not rely on simulations and returns the $p$-value to erroneously reject complete spatial randomness (CSR) given the values of $K$. It relies on the exact variance of $K$ calculated with edge-effect corrections. It only works in a rectangular window. diff --git a/vignettes/dbmss.Rmd b/vignettes/dbmss.Rmd index aa7af66..9346a55 100644 --- a/vignettes/dbmss.Rmd +++ b/vignettes/dbmss.Rmd @@ -30,7 +30,7 @@ library("dbmss") X <- runif(10) Y <- runif(10) # Draw the point types. -PointType <- sample(c("A", "B"), 10, replace=TRUE) +PointType <- sample(c("A", "B"), size = 10, replace = TRUE) # Plot the point pattern. Weights are set to 1 ant the window is adjusted autoplot(wmppp(data.frame(X, Y, PointType))) ``` @@ -41,9 +41,11 @@ Point weights are their basal area, in square centimeters. ```{r paracou} # Plot (second column of marks is Point Types) -autoplot(paracou16, +autoplot( + paracou16, labelSize = expression("Basal area (" ~cm^2~ ")"), - labelColor = "Species") + labelColor = "Species" +) ``` # Main functions @@ -59,7 +61,14 @@ These are the $M$ and $m$ and $K_d$ functions. The bivariate $M$ function can be calculated for _Q. Rosea_ trees around _V. Americana_ trees: ```{r m} -autoplot(Mhat(paracou16, , "V. Americana", "Q. Rosea"), main="") +autoplot( + Mhat( + paracou16, + ReferenceType = "V. Americana", + NeighborType = "Q. Rosea" + ), + main = "" +) ``` @@ -69,7 +78,10 @@ Confidence envelopes of various null hypotheses can be calculated. The univariate distribution of _Q. Rosea_ is tested against the null hypothesis of random location. ```{r} -autoplot(KdEnvelope(paracou16, , ReferenceType="Q. Rosea", Global=TRUE), main="") +autoplot( + KdEnvelope(paracou16, ReferenceType = "Q. Rosea", Global = TRUE), + main = "" +) ``` Significant concentration is detected between about 10 and 20 meters. From 8a12e96bb3876102c0a88ef8a8828d59c26435de Mon Sep 17 00:00:00 2001 From: Eric Marcon Date: Mon, 11 Dec 2023 16:11:06 +0100 Subject: [PATCH 28/29] added a CI test for PR --- .github/workflows/pr.yml | 32 ++++++++++++++++++++++++++++++++ DESCRIPTION | 2 +- NEWS.md | 2 +- 3 files changed, 34 insertions(+), 2 deletions(-) create mode 100644 .github/workflows/pr.yml diff --git a/.github/workflows/pr.yml b/.github/workflows/pr.yml new file mode 100644 index 0000000..213c22c --- /dev/null +++ b/.github/workflows/pr.yml @@ -0,0 +1,32 @@ +on: + pull_request: + branches: + - master + +name: R-CMD-check + +jobs: + R-CMD-check: + runs-on: macOS-latest + env: + GITHUB_PAT: ${{ secrets.GH_PAT }} + steps: + - uses: actions/checkout@master + - uses: r-lib/actions/setup-r@v2 + - name: Install pandoc + uses: r-lib/actions/setup-pandoc@v2 + - name: Install dependencies + run: | + options(pkgType = "binary") + options(install.packages.check.source = "no") + install.packages(c("remotes", "rcmdcheck", "covr", "pkgdown")) + remotes::install_deps(dependencies = TRUE) + shell: Rscript {0} + - name: Check + run: rcmdcheck::rcmdcheck(args = "--no-manual", error_on = "warning", check_dir = "check") + shell: Rscript {0} + - name: Install package + run: R CMD INSTALL . + - name: Pkgdown + # Build the package site locally + run: Rscript -e 'pkgdown::build_site()' diff --git a/DESCRIPTION b/DESCRIPTION index 6e4c486..5ecd6d1 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: dbmss Type: Package Title: Distance-Based Measures of Spatial Structures -Version: 2.8-2.9104 +Version: 2.8-2.9105 Authors@R: c( person("Eric", "Marcon", email="eric.marcon@agroparistech.fr", role=c("aut", "cre"), comment = c(ORCID = "0000-0002-5249-321X")), person("Gabriel", "Lang", email="gabriel.lang@agroparistech.fr", role="aut", comment = c(ORCID = "0000-0002-4325-6044")), diff --git a/NEWS.md b/NEWS.md index d5f935f..0dd32ed 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,4 @@ -# dbmss 2.8-2.9104 +# dbmss 2.8-2.9105 ## Improvements From e58026810ee3615c9675c0af35dc1b12bda647c6 Mon Sep 17 00:00:00 2001 From: Eric Marcon Date: Mon, 11 Dec 2023 16:18:33 +0100 Subject: [PATCH 29/29] added reference Fortin2005 --- vignettes/articles/dbmss.bib | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/vignettes/articles/dbmss.bib b/vignettes/articles/dbmss.bib index d6be52f..171d187 100644 --- a/vignettes/articles/dbmss.bib +++ b/vignettes/articles/dbmss.bib @@ -183,6 +183,15 @@ @article{Ellison2010 doi = {10.1257/aer.100.3.1195} } +@book{Fortin2005, + title = {Spatial Analysis. {{A}} Guide for Ecologists.}, + author = {Fortin, Marie-Jos{\'e}e and Dale, Mark R T}, + year = {2005}, + publisher = {{Cambridge University Press}}, + address = {{Cambridge}}, + chapter = {365} +} + @book{Gini1912, title = {Variabilit{\`a} e Mutabilit{\`a}}, author = {Gini, Corrado},