From ee558dba6ffc66c877ad598e058a8db8e9513e61 Mon Sep 17 00:00:00 2001
From: Alexander Fischer <alexander-fischer1801@t-online.de>
Date: Tue, 6 Aug 2024 06:14:17 +0200
Subject: [PATCH] Basic Unit Test Framework (#6)

* add github actions to install and test package

* add tests

* small tweaks

* update tests

* stricter internal equivalence tests

* also test on py 3.12
---
 .github/workflows/ci.yml  | 33 ++++++++++++++++++++
 tests/__init__.py         |  0
 tests/test_fitter.py      | 47 ++++++++++++++++++++++++++++
 tests/test_vs_pyfixest.py | 66 +++++++++++++++++++++++++++++++++++++++
 tests/utils.py            | 24 ++++++++++++++
 5 files changed, 170 insertions(+)
 create mode 100644 .github/workflows/ci.yml
 create mode 100644 tests/__init__.py
 create mode 100644 tests/test_fitter.py
 create mode 100644 tests/test_vs_pyfixest.py
 create mode 100644 tests/utils.py

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
new file mode 100644
index 0000000..895f551
--- /dev/null
+++ b/.github/workflows/ci.yml
@@ -0,0 +1,33 @@
+name: Testing duckreg
+
+on:
+  push:
+    branches: [ master ]
+  pull_request:
+    branches: [ master ]
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+
+    steps:
+    - name: Checkout repository
+      uses: actions/checkout@v3
+
+    - name: Set up Python
+      uses: actions/setup-python@v4
+      with:
+        python-version: ['3.9', '3.10', '3.11', '3.12']
+
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install .
+
+    - name: Install test dependencies
+      run: |
+        pip install pytest
+
+    - name: Run tests
+      run: |
+        pytest
diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/test_fitter.py b/tests/test_fitter.py
new file mode 100644
index 0000000..3b2a206
--- /dev/null
+++ b/tests/test_fitter.py
@@ -0,0 +1,47 @@
+import pytest
+import numpy as np
+from duckreg.estimators import DuckRegression
+from tests.utils import generate_sample_data, create_duckdb_database
+
+@pytest.fixture(scope="session")
+def database():
+    df = generate_sample_data()
+    db_name = 'test_dataset.db'
+    create_duckdb_database(df, db_name)
+
+@pytest.mark.parametrize("fml", ["Y ~ D", "Y ~ D + f1", "Y ~ D + f1 + f2"])
+@pytest.mark.parametrize("cluster_col", ["f1"])
+def test_fitters(fml, cluster_col):
+
+    m_duck = DuckRegression(
+        db_name='test_dataset.db',
+        table_name='data',
+        formula=fml,
+        cluster_col=cluster_col,
+        n_bootstraps=20,
+        seed = 42
+    )
+    m_duck.fit()
+
+
+    m_feols = DuckRegression(
+        db_name='test_dataset.db',
+        table_name='data',
+        formula=fml,
+        cluster_col=cluster_col,
+        n_bootstraps=20,
+        seed = 42,
+        fitter = "feols"
+    ).fit()
+
+    results = m_duck.summary()
+    coefs = results["point_estimate"]
+    se = results["standard_error"]
+
+    assert np.all(np.abs(coefs) - np.abs(m_feols.coef().values) < 1e-12), "Coeficients are not equal"
+    assert np.all(np.abs(se) - np.abs(m_feols.se().values) < 1e-12), "Standard errors are not equal"
+
+
+
+
+
diff --git a/tests/test_vs_pyfixest.py b/tests/test_vs_pyfixest.py
new file mode 100644
index 0000000..292b0ac
--- /dev/null
+++ b/tests/test_vs_pyfixest.py
@@ -0,0 +1,66 @@
+import pytest
+import numpy as np
+from duckreg.estimators import DuckRegression
+from tests.utils import generate_sample_data, create_duckdb_database
+import pyfixest as pf
+
+@pytest.fixture(scope="session")
+def get_data():
+    return generate_sample_data()
+
+@pytest.fixture(scope="session")
+def database(get_data):
+    df = get_data
+    db_name = 'test_dataset.db'
+    create_duckdb_database(df, db_name)
+    return df
+
+
+@pytest.mark.parametrize("fml", ["Y ~ D", "Y ~ D + f1", "Y ~ D + f1 + f2"])
+@pytest.mark.parametrize("cluster_col", [""])
+
+def test_vs_pyfixest_deterministic(get_data, fml, cluster_col):
+
+    m_duck = DuckRegression(
+        db_name='test_dataset.db',
+        table_name='data',
+        formula=fml,
+        cluster_col=cluster_col,
+        n_bootstraps=0,
+        seed = 42
+    )
+    m_duck.fit()
+    m_duck.fit_vcov()
+
+    m_feols = pf.feols(
+        fml,
+        data = get_data,
+        vcov = "hetero" if cluster_col == "" else {"CRV1": cluster_col},
+        ssc = pf.ssc(adj = False, cluster_adj = True)
+    )
+
+    results = m_duck.summary()
+    coefs = results["point_estimate"]
+    se = results["standard_error"]
+
+    assert np.all(np.abs(coefs) - np.abs(m_feols.coef().values) < 1e-8), "Coeficients are not equal"
+    assert np.all(np.abs(se) - np.abs(m_feols.se().values) < 1e-4), "Standard errors are not equal"
+
+def test_multiple_estimation_stochastic():
+
+    pass
+
+
+def test_vs_pyfixest_stochastic():
+
+    pass
+
+
+def test_mundlak():
+
+    pass
+
+
+def test_double_demeaning():
+
+    pass
\ No newline at end of file
diff --git a/tests/utils.py b/tests/utils.py
new file mode 100644
index 0000000..bd13836
--- /dev/null
+++ b/tests/utils.py
@@ -0,0 +1,24 @@
+import numpy as np
+import pandas as pd
+import duckdb
+
+# Generate sample data
+def generate_sample_data(N=10_000, seed=12345):
+    rng = np.random.default_rng(seed)
+    D = rng.choice([0, 1], size=(N, 1))
+    X = rng.choice(range(20), (N, 2), True)
+    Y = D + X @ np.array([1, 2]).reshape(2, 1) + rng.normal(size=(N, 1))
+    Y2 = -1 * D + X @ np.array([1, 2]).reshape(2, 1) + rng.normal(size=(N, 1))
+    df = pd.DataFrame(
+        np.concatenate([Y, Y2, D, X], axis=1), columns=["Y", "Y2", "D", "f1", "f2"]
+    ).assign(rowid=range(N))
+    return df
+
+
+# Function to create and populate DuckDB database
+def create_duckdb_database(df, db_name="large_dataset.db", table="data"):
+    conn = duckdb.connect(db_name)
+    conn.execute(f"DROP TABLE IF EXISTS {table}")
+    conn.execute(f"CREATE TABLE {table} AS SELECT * FROM df")
+    conn.close()
+    print(f"Data loaded into DuckDB database: {db_name}")
\ No newline at end of file