issue updates

florianhartig · Jun 9, 2024 · 4c0936c · 4c0936c
1 parent e269198
commit 4c0936c
Show file tree

Hide file tree

Showing 3 changed files with 154 additions and 3 deletions.
diff --git a/Code/DHARMaIssues/348.R b/Code/DHARMaIssues/348.R
@@ -1,5 +1,16 @@
+
+dat = read.csv("~/Downloads/Use_Avail.csv",  stringsAsFactors = T)
+
+library(lme4)
+library(DHARMa)
+
 M2 <- glmer(cbind(Used, NotUsed) ~ Sex + type + (1 | ID), family = binomial,
-            data=Use_Avail)
+            data=dat)
 simOut <- simulateResiduals(M2, plot = T)
-plotResiduals(simOut, Use_Avail$type)
-plotResiduals(simOut, Use_Avail$Sex)
+plotResiduals(simOut, dat$type)
+plotResiduals(simOut, dat$Sex)
+
+sessionInfo()
+
+
+install.packages("DHARMa")
diff --git a/Code/DHARMaIssues/402.R b/Code/DHARMaIssues/402.R
@@ -0,0 +1,32 @@
+
+
+TooHot = read.csv('https://raw.githubusercontent.com/HugoMH/Stat_M1_BootGLMM/main/TDs/Data/Suicides%20and%20Ambient%20Temperature.csv')
+head(TooHot)
+
+TooHot$Temperature2 = TooHot$Temperature^2
+
+Mpoisson = glm(Suicides ~ Temperature + Temperature2 + Country
+               ,data = TooHot
+               ,family = poisson(link = 'log')) # !!   <°)))><    !!
+
+MpoissonRE = lme4::glmer(Suicides ~ Temperature + Temperature2 + (1|Country) 
+                         ,data = TooHot, family = poisson(link = 'log'))
+
+DHARMa::testDispersion(Mpoisson,plot = F)
+# dispersion = 11350, p-value < 2.2e-16
+
+DHARMa::testDispersion(MpoissonRE,plot = F) 
+summary(MpoissonRE)
+
+
+# Best to check dispersion with conditional simulations
+res2 <- simulateResiduals(MpoissonRE, re.form = NULL)
+DHARMa::testDispersion(res2)
+
+plot(res2)
+
+
+# The analytical test can also be used but it is not generally reliable (biased towards underdispersion)
+DHARMa::testDispersion(MpoissonRE, type = "PearsonChisq")
+
+
diff --git a/Code/DHARMaIssues/415.R b/Code/DHARMaIssues/415.R
@@ -0,0 +1,108 @@
+# fit <- readRDS("~/Downloads/Pinus.strobus_fit_k10_bsts_selectFALSE.rds")
+fit <- readRDS("~/Downloads/Basalarea_fit_Pinus.strobus_tekc(25,50).rds")
+
+summary(fit)
+
+dat = model.frame(fit)
+
+res = simulateResiduals(fit, plot = F)
+res2 = recalculateResiduals(res, group = dat$blname)
+plot(res, quantreg = F)
+testDispersion(res)
+
+# testing if tweedie works in principle, using example of mgcv
+
+library(mgcv)
+
+# checking shape
+hist(rTweedie(rep(1,1000),p=1.5,phi=1.3))
+
+
+f2 <- function(x) 0.2 * x^11 * (10 * (1 - x))^6 + 10 *
+  (10 * x)^3 * (1 - x)^10
+n <- 3000
+x <- runif(n)
+mu <- exp(f2(x)/3+.1);x <- x*10 - 4
+y <- rTweedie(mu,p=1.5,phi=1.3)
+b <- gam(y~s(x,k=20),family=Tweedie(p=1.3))
+
+
+res = simulateResiduals(b, plot = F)
+plot(res, quantreg = F)
+
+
+testDispersion(res)
+testDispersion(res, type = "PearsonChisq")
+
+
+x = residuals(b, type = "response")
+
+x = residuals(b, type = "scaled.pearson")
+sd(x)
+
+x = residuals(b, type = "pearson")
+sd(x)
+
+
+
+
+
+# testing variations of the dispersion test 
+
+fit <- readRDS("~/Downloads/Basalarea_fit_Pinus.strobus_tekc(25,50).rds")
+
+
+
+# Scaled Pearson residuals are raw residuals divided by the standard deviation of the data according to the model mean variance relationship and estimated scale parameter.
+x = residuals(fit, type = "scaled.pearson")
+sd(x)
+
+# 
+x = residuals(fit, type = "pearson")
+sd(x)
+
+testDispersion(res, type = "PearsonChisq")
+
+res = simulateResiduals(fit, n = 1000, plot = F)
+
+simulatedSD = apply(res$simulatedResponse, 1, sd)
+
+residuals(fit, type = "response") / simulatedSD
+
+
+  sd(res$simulatedResponse)^2
+spread <- function(x) var(x - simulationOutput$fittedPredictedResponse) / expectedVar
+
+spread <- function(x) var(x - simulationOutput$fittedPredictedResponse) / var(simulationOutput$fittedPredictedResponse)
+
+out = testGeneric(simulationOutput, summary = spread, alternative = alternative, methodName = "DHARMa nonparametric dispersion test via sd of residuals fitted vs. simulated", plot = plot, ...)
+
+
+
+out = list()
+out$data.name = deparse(substitute(simulationOutput))
+
+simulationOutput = ensureDHARMa(simulationOutput, convert = "Model")
+
+alternative <- match.arg(alternative)
+
+observed = summary(simulationOutput$observedResponse)
+
+simulated = apply(simulationOutput$simulatedResponse, 2, summary)
+
+p = getP(simulated = simulated, observed = observed, alternative = alternative)
+
+out$statistic = c(ratioObsSim = observed / mean(simulated))
+out$method = methodName
+out$alternative = alternative
+out$p.value = p
+
+
+
+
+
+
+
+
+
+