diff --git a/Code/DHARMaIssues/348.R b/Code/DHARMaIssues/348.R index a37055ab..65771c06 100644 --- a/Code/DHARMaIssues/348.R +++ b/Code/DHARMaIssues/348.R @@ -1,5 +1,16 @@ + +dat = read.csv("~/Downloads/Use_Avail.csv", stringsAsFactors = T) + +library(lme4) +library(DHARMa) + M2 <- glmer(cbind(Used, NotUsed) ~ Sex + type + (1 | ID), family = binomial, - data=Use_Avail) + data=dat) simOut <- simulateResiduals(M2, plot = T) -plotResiduals(simOut, Use_Avail$type) -plotResiduals(simOut, Use_Avail$Sex) \ No newline at end of file +plotResiduals(simOut, dat$type) +plotResiduals(simOut, dat$Sex) + +sessionInfo() + + +install.packages("DHARMa") diff --git a/Code/DHARMaIssues/402.R b/Code/DHARMaIssues/402.R new file mode 100644 index 00000000..887b349c --- /dev/null +++ b/Code/DHARMaIssues/402.R @@ -0,0 +1,32 @@ + + +TooHot = read.csv('https://raw.githubusercontent.com/HugoMH/Stat_M1_BootGLMM/main/TDs/Data/Suicides%20and%20Ambient%20Temperature.csv') +head(TooHot) + +TooHot$Temperature2 = TooHot$Temperature^2 + +Mpoisson = glm(Suicides ~ Temperature + Temperature2 + Country + ,data = TooHot + ,family = poisson(link = 'log')) # !! <°)))>< !! + +MpoissonRE = lme4::glmer(Suicides ~ Temperature + Temperature2 + (1|Country) + ,data = TooHot, family = poisson(link = 'log')) + +DHARMa::testDispersion(Mpoisson,plot = F) +# dispersion = 11350, p-value < 2.2e-16 + +DHARMa::testDispersion(MpoissonRE,plot = F) +summary(MpoissonRE) + + +# Best to check dispersion with conditional simulations +res2 <- simulateResiduals(MpoissonRE, re.form = NULL) +DHARMa::testDispersion(res2) + +plot(res2) + + +# The analytical test can also be used but it is not generally reliable (biased towards underdispersion) +DHARMa::testDispersion(MpoissonRE, type = "PearsonChisq") + + diff --git a/Code/DHARMaIssues/415.R b/Code/DHARMaIssues/415.R new file mode 100644 index 00000000..632fbb67 --- /dev/null +++ b/Code/DHARMaIssues/415.R @@ -0,0 +1,108 @@ +# fit <- readRDS("~/Downloads/Pinus.strobus_fit_k10_bsts_selectFALSE.rds") +fit <- readRDS("~/Downloads/Basalarea_fit_Pinus.strobus_tekc(25,50).rds") + +summary(fit) + +dat = model.frame(fit) + +res = simulateResiduals(fit, plot = F) +res2 = recalculateResiduals(res, group = dat$blname) +plot(res, quantreg = F) +testDispersion(res) + +# testing if tweedie works in principle, using example of mgcv + +library(mgcv) + +# checking shape +hist(rTweedie(rep(1,1000),p=1.5,phi=1.3)) + + +f2 <- function(x) 0.2 * x^11 * (10 * (1 - x))^6 + 10 * + (10 * x)^3 * (1 - x)^10 +n <- 3000 +x <- runif(n) +mu <- exp(f2(x)/3+.1);x <- x*10 - 4 +y <- rTweedie(mu,p=1.5,phi=1.3) +b <- gam(y~s(x,k=20),family=Tweedie(p=1.3)) + + +res = simulateResiduals(b, plot = F) +plot(res, quantreg = F) + + +testDispersion(res) +testDispersion(res, type = "PearsonChisq") + + +x = residuals(b, type = "response") + +x = residuals(b, type = "scaled.pearson") +sd(x) + +x = residuals(b, type = "pearson") +sd(x) + + + + + +# testing variations of the dispersion test + +fit <- readRDS("~/Downloads/Basalarea_fit_Pinus.strobus_tekc(25,50).rds") + + + +# Scaled Pearson residuals are raw residuals divided by the standard deviation of the data according to the model mean variance relationship and estimated scale parameter. +x = residuals(fit, type = "scaled.pearson") +sd(x) + +# +x = residuals(fit, type = "pearson") +sd(x) + +testDispersion(res, type = "PearsonChisq") + +res = simulateResiduals(fit, n = 1000, plot = F) + +simulatedSD = apply(res$simulatedResponse, 1, sd) + +residuals(fit, type = "response") / simulatedSD + + + sd(res$simulatedResponse)^2 +spread <- function(x) var(x - simulationOutput$fittedPredictedResponse) / expectedVar + +spread <- function(x) var(x - simulationOutput$fittedPredictedResponse) / var(simulationOutput$fittedPredictedResponse) + +out = testGeneric(simulationOutput, summary = spread, alternative = alternative, methodName = "DHARMa nonparametric dispersion test via sd of residuals fitted vs. simulated", plot = plot, ...) + + + +out = list() +out$data.name = deparse(substitute(simulationOutput)) + +simulationOutput = ensureDHARMa(simulationOutput, convert = "Model") + +alternative <- match.arg(alternative) + +observed = summary(simulationOutput$observedResponse) + +simulated = apply(simulationOutput$simulatedResponse, 2, summary) + +p = getP(simulated = simulated, observed = observed, alternative = alternative) + +out$statistic = c(ratioObsSim = observed / mean(simulated)) +out$method = methodName +out$alternative = alternative +out$p.value = p + + + + + + + + + +