From d3034e92c3bf910f3f1021ebc1dd172d93093bb5 Mon Sep 17 00:00:00 2001 From: vsyrgkanis Date: Tue, 30 Jul 2024 02:42:38 -0700 Subject: [PATCH] Delete PM2/test_addition.irnb --- PM2/test_addition.irnb | 214 ----------------------------------------- 1 file changed, 214 deletions(-) delete mode 100644 PM2/test_addition.irnb diff --git a/PM2/test_addition.irnb b/PM2/test_addition.irnb deleted file mode 100644 index 29ad277d..00000000 --- a/PM2/test_addition.irnb +++ /dev/null @@ -1,214 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "7HCJkA2ifjEk" - }, - "source": [ - "# Simulation on Orthogonal Estimation\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "4sldk16nfXw9" - }, - "source": [ - "We compare the performance of the naive and orthogonal methods in a computational experiment where\n", - "$p=n=100$, $\\beta_j = 1/j^2$, $(\\gamma_{DW})_j = 1/j^2$ and $$Y = 1 \\cdot D + \\beta' W + \\epsilon_Y$$\n", - "\n", - "where $W \\sim N(0,I)$, $\\epsilon_Y \\sim N(0,1)$, and $$D = \\gamma'_{DW} W + \\tilde{D}$$ where $\\tilde{D} \\sim N(0,1)/4$.\n", - "\n", - "The true treatment effect here is 1. From the plots produced in this notebook (estimate minus ground truth), we show that the naive single-selection estimator is heavily biased (lack of Neyman orthogonality in its estimation strategy), while the orthogonal estimator based on partialling out, is approximately unbiased and Gaussian." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "dSvVz5Z6D14H", - "vscode": { - "languageId": "r" - } - }, - "outputs": [], - "source": [ - "install.packages(\"hdm\")\n", - "install.packages(\"ggplot2\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "vscode": { - "languageId": "r" - } - }, - "outputs": [], - "source": [ - "library(hdm)\n", - "library(ggplot2)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "_execution_state": "idle", - "_uuid": "051d70d956493feee0c6d64651c6a088724dca2a", - "id": "fAe2EP5VCFN_", - "vscode": { - "languageId": "r" - } - }, - "outputs": [], - "source": [ - "# Initialize constants\n", - "B <- 10000 # Number of iterations\n", - "n <- 100 # Sample size\n", - "p <- 100 # Number of features\n", - "\n", - "# Initialize arrays to store results\n", - "Naive <- rep(0, B)\n", - "Orthogonal <- rep(0, B)\n", - "\n", - "\n", - "lambdaYs <- rep(0, B)\n", - "lambdaDs <- rep(0, B)\n", - "\n", - "for (i in 1:B) {\n", - " # Generate parameters\n", - " beta <- 1 / (1:p)^2\n", - " gamma <- 1 / (1:p)^2\n", - "\n", - " # Generate covariates / random data\n", - " X <- matrix(rnorm(n * p), n, p)\n", - " D <- X %*% gamma + rnorm(n) / 4\n", - "\n", - " # Generate Y using DGP\n", - " Y <- D + X %*% beta + rnorm(n)\n", - "\n", - " # Single selection method\n", - " rlasso_result <- hdm::rlasso(Y ~ D + X) # Fit lasso regression\n", - " sx_ids <- which(rlasso_result$coef[-c(1, 2)] != 0) # Selected covariates\n", - "\n", - " # Check if any Xs are selected\n", - " if (sum(sx_ids) == 0) {\n", - " Naive[i] <- lm(Y ~ D)$coef[2] # Fit linear regression with only D if no Xs are selected\n", - " } else {\n", - " Naive[i] <- lm(Y ~ D + X[, sx_ids])$coef[2] # Fit linear regression with selected X otherwise\n", - " }\n", - "\n", - " # Partialling out / Double Lasso\n", - "\n", - " fitY <- hdm::rlasso(Y ~ X, post = TRUE)\n", - " resY <- fitY$res\n", - "\n", - " fitD <- hdm::rlasso(D ~ X, post = TRUE)\n", - " resD <- fitD$res\n", - "\n", - " Orthogonal[i] <- lm(resY ~ resD)$coef[2] # Fit linear regression for residuals\n", - "}" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Bj174QuEaPb5" - }, - "source": [ - "## Make a Nice Plot" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "MjB3qbGEaRnl", - "vscode": { - "languageId": "r" - } - }, - "outputs": [], - "source": [ - "# Specify ratio\n", - "img_width <- 15\n", - "img_height <- img_width / 2" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "N7bdztt1CFOE", - "vscode": { - "languageId": "r" - } - }, - "outputs": [], - "source": [ - "# Create a data frame for the estimates\n", - "df <- data.frame(Method = rep(c(\"Naive\", \"Orthogonal\"), each = B),\n", - " Value = c(Naive - 1, Orthogonal - 1))\n", - "\n", - "# Create the histogram using ggplot2\n", - "hist_plot <- ggplot(df, aes(x = Value, fill = Method)) +\n", - " geom_histogram(binwidth = 0.1, color = \"black\", alpha = 0.7) +\n", - " facet_wrap(~Method, scales = \"fixed\") +\n", - " labs(\n", - " title = \"Distribution of Estimates (Centered around Ground Truth)\",\n", - " x = \"Bias\",\n", - " y = \"Frequency\"\n", - " ) +\n", - " scale_x_continuous(breaks = seq(-2, 1.5, 0.5)) +\n", - " theme_minimal() +\n", - " theme(\n", - " plot.title = element_text(hjust = 0.5), # Center the plot title\n", - " strip.text = element_text(size = 10), # Increase text size in facet labels\n", - " legend.position = \"none\", # Remove the legend\n", - " panel.grid.major = element_blank(), # Make major grid lines invisible\n", - " # panel.grid.minor = element_blank(), # Make minor grid lines invisible\n", - " strip.background = element_blank() # Make the strip background transparent\n", - " ) +\n", - " theme(panel.spacing = unit(2, \"lines\")) # Adjust the ratio to separate subplots wider\n", - "\n", - "# Set a wider plot size\n", - "options(repr.plot.width = img_width, repr.plot.height = img_height)\n", - "\n", - "# Display the histogram\n", - "print(hist_plot)\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "8hrJ3M5mrD8_" - }, - "source": [ - "As we can see from the above bias plots (estimates minus the ground truth effect of 1), the double lasso procedure concentrates around zero whereas the naive estimator does not." - ] - } - ], - "metadata": { - "colab": { - "provenance": [] - }, - "kernelspec": { - "display_name": "R", - "language": "R", - "name": "ir" - }, - "language_info": { - "codemirror_mode": "r", - "file_extension": ".r", - "mimetype": "text/x-r-source", - "name": "R", - "pygments_lexer": "r", - "version": "3.6.3" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -}