From 3ca32b6eb5f47a23dea14125abaa295e4e254070 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]" <github-actions[bot]@users.noreply.github.com>
Date: Tue, 30 Jul 2024 09:35:49 +0000
Subject: [PATCH] Strip outputs from .ipynb files [skip ci]

---
 PM2/test_addition.Rmd | 116 ------------------------------------------
 1 file changed, 116 deletions(-)
 delete mode 100644 PM2/test_addition.Rmd

diff --git a/PM2/test_addition.Rmd b/PM2/test_addition.Rmd
deleted file mode 100644
index f35f112..0000000
--- a/PM2/test_addition.Rmd
+++ /dev/null
@@ -1,116 +0,0 @@
----
-title: An R Markdown document converted from "PM2/test_addition.irnb"
-output: html_document
----
-
-# Simulation on Orthogonal Estimation
-
-We compare the performance of the naive and orthogonal methods in a computational experiment where
-$p=n=100$, $\beta_j = 1/j^2$, $(\gamma_{DW})_j = 1/j^2$ and $$Y = 1 \cdot D + \beta' W + \epsilon_Y$$
-
-where $W \sim N(0,I)$, $\epsilon_Y \sim N(0,1)$, and $$D = \gamma'_{DW} W + \tilde{D}$$ where $\tilde{D} \sim N(0,1)/4$.
-
-The true treatment effect here is 1. From the plots produced in this notebook (estimate minus ground truth), we show that the naive single-selection estimator is heavily biased (lack of Neyman orthogonality in its estimation strategy), while the orthogonal estimator based on partialling out, is approximately unbiased and Gaussian.
-
-```{r}
-install.packages("hdm")
-install.packages("ggplot2")
-```
-
-```{r}
-library(hdm)
-library(ggplot2)
-```
-
-```{r}
-# Initialize constants
-B <- 10000  # Number of iterations
-n <- 100  # Sample size
-p <- 100  # Number of features
-
-# Initialize arrays to store results
-Naive <- rep(0, B)
-Orthogonal <- rep(0, B)
-
-
-lambdaYs <- rep(0, B)
-lambdaDs <- rep(0, B)
-
-for (i in 1:B) {
-  # Generate parameters
-  beta <- 1 / (1:p)^2
-  gamma <- 1 / (1:p)^2
-
-  # Generate covariates / random data
-  X <- matrix(rnorm(n * p), n, p)
-  D <- X %*% gamma + rnorm(n) / 4
-
-  # Generate Y using DGP
-  Y <- D + X %*% beta + rnorm(n)
-
-  # Single selection method
-  rlasso_result <- hdm::rlasso(Y ~ D + X)  # Fit lasso regression
-  sx_ids <- which(rlasso_result$coef[-c(1, 2)] != 0)  # Selected covariates
-
-  # Check if any Xs are selected
-  if (sum(sx_ids) == 0) {
-    Naive[i] <- lm(Y ~ D)$coef[2]  # Fit linear regression with only D if no Xs are selected
-  } else {
-    Naive[i] <- lm(Y ~ D + X[, sx_ids])$coef[2]  # Fit linear regression with selected X otherwise
-  }
-
-  # Partialling out / Double Lasso
-
-  fitY <- hdm::rlasso(Y ~ X, post = TRUE)
-  resY <- fitY$res
-
-  fitD <- hdm::rlasso(D ~ X, post = TRUE)
-  resD <- fitD$res
-
-  Orthogonal[i] <- lm(resY ~ resD)$coef[2]  # Fit linear regression for residuals
-}
-```
-
-## Make a Nice Plot
-
-```{r}
-# Specify ratio
-img_width <- 15
-img_height <- img_width / 2
-```
-
-```{r}
-# Create a data frame for the estimates
-df <- data.frame(Method = rep(c("Naive", "Orthogonal"), each = B),
-                 Value = c(Naive - 1, Orthogonal - 1))
-
-# Create the histogram using ggplot2
-hist_plot <- ggplot(df, aes(x = Value, fill = Method)) +
-  geom_histogram(binwidth = 0.1, color = "black", alpha = 0.7) +
-  facet_wrap(~Method, scales = "fixed") +
-  labs(
-    title = "Distribution of Estimates (Centered around Ground Truth)",
-    x = "Bias",
-    y = "Frequency"
-  ) +
-  scale_x_continuous(breaks = seq(-2, 1.5, 0.5)) +
-  theme_minimal() +
-  theme(
-    plot.title = element_text(hjust = 0.5),  # Center the plot title
-    strip.text = element_text(size = 10),  # Increase text size in facet labels
-    legend.position = "none", # Remove the legend
-    panel.grid.major = element_blank(),  # Make major grid lines invisible
-    # panel.grid.minor = element_blank(),  # Make minor grid lines invisible
-    strip.background = element_blank()  # Make the strip background transparent
-  ) +
-  theme(panel.spacing = unit(2, "lines"))  # Adjust the ratio to separate subplots wider
-
-# Set a wider plot size
-options(repr.plot.width = img_width, repr.plot.height = img_height)
-
-# Display the histogram
-print(hist_plot)
-```
-
-As we can see from the above bias plots (estimates minus the ground truth effect of 1), the double lasso procedure concentrates around zero whereas the naive estimator does not.
-