diff --git a/README.md b/README.md index 4236547..82a112f 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,10 @@ -# merino: Mastering Econometrics Regressions, Inference, and Numerical Optimization +# `merino`: Mastering Econometrics Regressions, Inference, and Numerical Optimization ![](resources/advanced.jhu.edu.svg 'JHU AAP') ## Introduction -Welcome to merino, a comprehensive collection of Jupyter notebooks designed to help you master econometrics. These notebooks cover various topics in regression analysis, inference, and numerical optimization, making them an excellent resource for students, researchers, and practitioners in the field of econometrics. +Welcome to `merino`, a comprehensive collection of Jupyter notebooks designed to help you master econometrics. These notebooks cover various topics in regression analysis, inference, and numerical optimization, making them an excellent resource for students, researchers, and practitioners in the field of econometrics. ## Source @@ -41,6 +41,11 @@ Each notebook corresponds to a chapter from the source material. Click on the "O Open In Colab +6. **Ch7. MRA - Qualitative Regressors** + + Open In Colab + + ## How to Use 1. Click on the "Open in Colab" badge next to the notebook you want to explore. diff --git a/data/CPS1985.csv b/data/CPS1985.csv new file mode 100644 index 0000000..65856a5 --- /dev/null +++ b/data/CPS1985.csv @@ -0,0 +1,535 @@ +wage,education,experience,age,ethnicity,region,gender,occupation,sector,union,married +5.1,8,21,35,hispanic,other,female,worker,manufacturing,no,yes +4.95,9,42,57,cauc,other,female,worker,manufacturing,no,yes +6.67,12,1,19,cauc,other,male,worker,manufacturing,no,no +4,12,4,22,cauc,other,male,worker,other,no,no +7.5,12,17,35,cauc,other,male,worker,other,no,yes +13.07,13,9,28,cauc,other,male,worker,other,yes,no +4.45,10,27,43,cauc,south,male,worker,other,no,no +19.47,12,9,27,cauc,other,male,worker,other,no,no +13.28,16,11,33,cauc,other,male,worker,manufacturing,no,yes +8.75,12,9,27,cauc,other,male,worker,other,no,no +11.35,12,17,35,cauc,other,male,worker,other,yes,yes +11.5,12,19,37,cauc,other,male,worker,manufacturing,yes,no +6.5,8,27,41,cauc,south,male,worker,other,no,yes +6.25,9,30,45,cauc,south,male,worker,other,yes,no +19.98,9,29,44,cauc,south,male,worker,other,no,yes +7.3,12,37,55,cauc,other,male,worker,construction,no,yes +8,7,44,57,cauc,south,male,worker,other,no,yes +22.2,12,26,44,cauc,other,male,worker,manufacturing,yes,yes +3.65,11,16,33,cauc,other,male,worker,other,no,no +20.55,12,33,51,cauc,other,male,worker,other,no,yes +5.71,12,16,34,cauc,other,female,worker,manufacturing,yes,yes +7,7,42,55,other,other,male,worker,manufacturing,yes,yes +3.75,12,9,27,cauc,other,male,worker,other,no,no +4.5,11,14,31,other,south,male,worker,other,no,yes +9.56,12,23,41,cauc,other,male,worker,other,no,yes +5.75,6,45,57,cauc,south,male,worker,manufacturing,no,yes +9.36,12,8,26,cauc,other,male,worker,manufacturing,no,yes +6.5,10,30,46,cauc,other,male,worker,other,no,yes +3.35,12,8,26,cauc,other,female,worker,manufacturing,no,yes +4.75,12,8,26,cauc,other,male,worker,other,no,yes +8.9,14,13,33,cauc,other,male,worker,other,no,no +4,12,46,64,cauc,south,female,worker,other,no,no +4.7,8,19,33,cauc,other,male,worker,other,no,yes +5,17,1,24,cauc,south,female,worker,other,no,no +9.25,12,19,37,cauc,other,male,worker,manufacturing,no,no +10.67,12,36,54,other,other,male,worker,other,no,no +7.61,12,20,38,other,south,male,worker,construction,no,yes +10,12,35,53,other,other,male,worker,construction,yes,yes +7.5,12,3,21,cauc,other,male,worker,other,no,no +12.2,14,10,30,cauc,south,male,worker,manufacturing,no,yes +3.35,12,0,18,cauc,other,male,worker,other,no,no +11,14,14,34,cauc,south,male,worker,manufacturing,yes,yes +12,12,14,32,cauc,other,male,worker,manufacturing,no,yes +4.85,9,16,31,cauc,other,female,worker,manufacturing,no,yes +4.3,13,8,27,cauc,south,male,worker,construction,no,no +6,7,15,28,cauc,south,female,worker,manufacturing,no,yes +15,16,12,34,cauc,other,male,worker,manufacturing,no,yes +4.85,10,13,29,cauc,south,male,worker,other,no,no +9,8,33,47,cauc,other,male,worker,other,yes,yes +6.36,12,9,27,cauc,other,male,worker,manufacturing,no,yes +9.15,12,7,25,cauc,other,male,worker,other,no,yes +11,16,13,35,cauc,other,male,worker,manufacturing,yes,yes +4.5,12,7,25,cauc,other,female,worker,manufacturing,no,yes +4.8,12,16,34,cauc,other,female,worker,manufacturing,no,yes +4,13,0,19,cauc,other,male,worker,other,no,no +5.5,12,11,29,cauc,other,female,worker,manufacturing,no,no +8.4,13,17,36,cauc,other,male,worker,manufacturing,no,no +6.75,10,13,29,cauc,other,male,worker,manufacturing,no,yes +10,12,22,40,other,other,male,worker,manufacturing,yes,no +5,12,28,46,cauc,other,female,worker,manufacturing,no,yes +6.5,11,17,34,cauc,other,male,worker,other,no,no +10.75,12,24,42,cauc,other,male,worker,construction,yes,yes +7,3,55,64,hispanic,south,male,worker,manufacturing,no,yes +11.43,12,3,21,cauc,south,male,worker,construction,no,no +4,12,6,24,other,other,male,worker,manufacturing,yes,no +9,10,27,43,cauc,other,male,worker,construction,no,yes +13,12,19,37,other,south,male,worker,manufacturing,yes,yes +12.22,12,19,37,cauc,other,male,worker,construction,yes,yes +6.28,12,38,56,cauc,other,female,worker,manufacturing,no,yes +6.75,10,41,57,other,south,male,worker,manufacturing,yes,yes +3.35,11,3,20,other,south,male,worker,manufacturing,no,no +16,14,20,40,cauc,other,male,worker,other,yes,yes +5.25,10,15,31,cauc,other,male,worker,other,no,yes +3.5,8,8,22,hispanic,south,male,worker,manufacturing,no,yes +4.22,8,39,53,cauc,south,female,worker,manufacturing,no,yes +3,6,43,55,hispanic,other,female,worker,manufacturing,yes,yes +4,11,25,42,cauc,south,female,worker,manufacturing,yes,yes +10,12,11,29,cauc,other,male,worker,other,yes,yes +5,12,12,30,other,other,male,worker,other,no,yes +16,12,35,53,cauc,south,male,worker,manufacturing,yes,yes +13.98,14,14,34,cauc,other,male,worker,other,no,no +13.26,12,16,34,cauc,other,male,worker,other,yes,yes +6.1,10,44,60,cauc,other,female,worker,manufacturing,yes,no +3.75,16,13,35,cauc,south,female,worker,other,no,no +9,13,8,27,other,other,male,worker,manufacturing,yes,no +9.45,12,13,31,cauc,other,male,worker,manufacturing,no,no +5.5,11,18,35,cauc,other,male,worker,other,yes,yes +8.93,12,18,36,cauc,other,female,worker,other,no,yes +6.25,12,6,24,cauc,south,female,worker,other,no,no +9.75,11,37,54,cauc,south,male,worker,manufacturing,yes,yes +6.73,12,2,20,cauc,south,male,worker,manufacturing,no,yes +7.78,12,23,41,cauc,other,male,worker,manufacturing,no,yes +2.85,12,1,19,cauc,other,male,worker,other,no,no +3.35,12,10,28,other,south,female,worker,manufacturing,no,yes +19.98,12,23,41,cauc,other,male,worker,manufacturing,no,yes +8.5,12,8,26,other,other,male,worker,other,yes,yes +9.75,15,9,30,cauc,other,female,worker,manufacturing,no,yes +15,12,33,51,cauc,other,male,worker,construction,yes,yes +8,12,19,37,cauc,other,female,worker,manufacturing,no,yes +11.25,13,14,33,cauc,other,male,worker,other,no,yes +14,11,13,30,cauc,other,male,worker,other,yes,yes +10,10,12,28,cauc,other,male,worker,construction,no,yes +6.5,12,8,26,cauc,other,male,worker,other,no,no +9.83,12,23,41,cauc,other,male,worker,manufacturing,no,yes +18.5,14,13,33,cauc,other,female,worker,manufacturing,no,no +12.5,12,9,27,cauc,south,male,worker,other,no,yes +26,14,21,41,cauc,other,male,worker,other,yes,yes +14,5,44,55,cauc,south,male,worker,construction,no,yes +10.5,12,4,22,cauc,other,male,worker,other,yes,yes +11,8,42,56,cauc,other,male,worker,manufacturing,no,yes +12.47,13,10,29,cauc,other,male,worker,other,yes,yes +12.5,12,11,29,cauc,other,male,worker,construction,no,no +15,12,40,58,cauc,other,male,worker,construction,yes,yes +6,12,8,26,cauc,other,male,worker,construction,no,no +9.5,11,29,46,cauc,south,male,worker,construction,no,yes +5,16,3,25,cauc,other,male,worker,other,yes,no +3.75,11,11,28,cauc,other,male,worker,construction,no,no +12.57,12,12,30,cauc,other,male,worker,other,yes,yes +6.88,8,22,36,hispanic,other,female,worker,other,no,yes +5.5,12,12,30,cauc,other,male,worker,other,no,yes +7,12,7,25,cauc,other,male,worker,other,yes,yes +4.5,12,15,33,cauc,other,female,worker,manufacturing,no,no +6.5,12,28,46,cauc,other,male,worker,other,no,yes +12,12,20,38,cauc,south,male,worker,manufacturing,yes,yes +5,12,6,24,cauc,south,male,worker,construction,no,no +6.5,12,5,23,cauc,south,male,worker,manufacturing,no,no +6.8,9,30,45,cauc,south,female,worker,manufacturing,no,yes +8.75,13,18,37,cauc,other,male,worker,other,no,yes +3.75,12,6,24,other,south,female,worker,manufacturing,no,yes +4.5,12,16,34,hispanic,south,male,worker,other,no,no +6,12,1,19,hispanic,south,male,worker,other,yes,no +5.5,12,3,21,cauc,other,male,worker,manufacturing,no,no +13,12,8,26,cauc,other,male,worker,other,no,yes +5.65,14,2,22,cauc,other,male,worker,manufacturing,no,no +4.8,9,16,31,other,other,male,worker,manufacturing,no,no +7,10,9,25,cauc,south,male,worker,construction,no,yes +5.25,12,2,20,cauc,other,male,worker,other,no,no +3.35,7,43,56,cauc,south,male,worker,manufacturing,no,yes +8.5,9,38,53,cauc,other,male,worker,manufacturing,no,yes +6,12,9,27,cauc,other,male,worker,other,no,yes +6.75,12,12,30,cauc,south,male,worker,other,no,yes +8.89,12,18,36,cauc,other,male,worker,manufacturing,no,yes +14.21,11,15,32,cauc,other,male,worker,manufacturing,yes,no +10.78,11,28,45,other,south,male,worker,construction,yes,yes +8.9,10,27,43,cauc,south,male,worker,construction,yes,yes +7.5,12,38,56,cauc,south,male,worker,other,no,yes +4.5,12,3,21,cauc,other,female,worker,manufacturing,no,no +11.25,12,41,59,cauc,other,male,worker,other,yes,yes +13.45,12,16,34,cauc,south,male,worker,other,yes,yes +6,13,7,26,cauc,south,male,worker,manufacturing,no,yes +4.62,6,33,45,other,south,female,worker,manufacturing,no,no +10.58,14,25,45,cauc,other,male,worker,manufacturing,no,yes +5,12,5,23,cauc,south,male,worker,other,no,yes +8.2,14,17,37,other,south,male,worker,other,no,no +6.25,12,1,19,cauc,south,male,worker,other,no,no +8.5,12,13,31,cauc,other,male,worker,manufacturing,no,yes +24.98,16,18,40,cauc,other,male,management,other,no,yes +16.65,14,21,41,cauc,south,male,management,other,no,yes +6.25,14,2,22,cauc,other,male,management,other,no,no +4.55,12,4,22,hispanic,south,female,management,other,no,no +11.25,12,30,48,hispanic,south,female,management,other,no,yes +21.25,13,32,51,cauc,other,male,management,other,no,no +12.65,17,13,36,cauc,other,female,management,other,no,yes +7.5,12,17,35,cauc,other,male,management,other,no,no +10.25,14,26,46,cauc,other,female,management,other,no,yes +3.35,16,9,31,cauc,other,male,management,other,no,no +13.45,16,8,30,other,other,male,management,other,no,no +4.84,15,1,22,cauc,other,male,management,other,yes,yes +26.29,17,32,55,cauc,south,male,management,other,no,yes +6.58,12,24,42,cauc,other,female,management,other,no,yes +44.5,14,1,21,cauc,other,female,management,other,no,no +15,12,42,60,cauc,other,male,management,manufacturing,no,yes +11.25,16,3,25,other,other,female,management,manufacturing,no,no +7,12,32,50,cauc,other,female,management,other,no,yes +10,14,22,42,other,other,male,management,other,no,no +14.53,16,18,40,cauc,other,male,management,other,no,yes +20,18,19,43,cauc,other,female,management,other,no,yes +22.5,15,12,33,cauc,other,male,management,other,no,yes +3.64,12,42,60,cauc,other,female,management,other,no,yes +10.62,12,34,52,cauc,south,male,management,other,no,yes +24.98,18,29,53,cauc,other,male,management,other,no,yes +6,16,8,30,cauc,south,male,management,other,no,no +19,18,13,37,cauc,other,male,management,manufacturing,no,no +13.2,16,10,32,cauc,other,male,management,other,no,no +22.5,16,22,44,cauc,other,male,management,other,no,yes +15,16,10,32,cauc,south,male,management,other,no,yes +6.88,17,15,38,cauc,other,female,management,other,no,yes +11.84,12,26,44,cauc,other,male,management,other,no,yes +16.14,14,16,36,cauc,other,male,management,other,no,no +13.95,18,14,38,cauc,other,female,management,other,no,yes +13.16,12,38,56,cauc,other,female,management,other,no,yes +5.3,12,14,32,other,south,male,management,other,no,yes +4.5,12,7,25,cauc,other,female,management,other,no,yes +10,18,13,37,cauc,south,female,management,other,no,no +10,10,20,36,cauc,other,male,management,other,no,yes +10,16,7,29,hispanic,other,male,management,other,yes,yes +9.37,16,26,48,cauc,other,female,management,other,no,yes +5.8,16,14,36,cauc,other,male,management,other,no,yes +17.86,13,36,55,cauc,other,male,management,other,no,no +1,12,24,42,cauc,other,male,management,other,no,yes +8.8,14,41,61,cauc,south,male,management,other,no,yes +9,16,7,29,other,other,male,management,other,no,yes +18.16,17,14,37,cauc,south,male,management,other,no,no +7.81,12,1,19,cauc,south,female,management,other,no,no +10.62,16,6,28,cauc,other,female,management,manufacturing,no,yes +4.5,12,3,21,cauc,other,female,management,other,no,yes +17.25,15,31,52,cauc,other,male,management,other,no,yes +10.5,13,14,33,cauc,other,female,management,manufacturing,no,yes +9.22,14,13,33,cauc,other,female,management,other,no,yes +15,16,26,48,other,other,male,management,manufacturing,yes,yes +22.5,18,14,38,cauc,other,male,management,other,no,yes +4.55,13,33,52,cauc,other,female,sales,other,no,yes +9,12,16,34,cauc,other,male,sales,other,no,yes +13.33,18,10,34,cauc,other,male,sales,other,no,yes +15,14,22,42,cauc,other,male,sales,other,no,no +7.5,14,2,22,cauc,other,male,sales,other,no,no +4.25,12,29,47,cauc,south,female,sales,other,no,yes +12.5,12,43,61,cauc,other,male,sales,manufacturing,no,yes +5.13,12,5,23,cauc,other,female,sales,other,no,yes +3.35,16,14,36,other,south,female,sales,other,no,yes +11.11,12,28,46,cauc,south,male,sales,other,no,yes +3.84,11,25,42,other,south,female,sales,other,no,yes +6.4,12,45,63,cauc,other,female,sales,other,no,yes +5.56,14,5,25,cauc,south,male,sales,other,no,no +10,12,20,38,cauc,south,male,sales,manufacturing,no,yes +5.65,16,6,28,cauc,other,female,sales,other,no,yes +11.5,16,16,38,cauc,other,male,sales,other,no,yes +3.5,11,33,50,cauc,other,female,sales,other,no,yes +3.35,13,2,21,cauc,south,female,sales,other,no,yes +4.75,12,10,28,cauc,south,female,sales,other,no,no +19.98,14,44,64,cauc,south,male,sales,other,no,yes +3.5,14,6,26,cauc,south,female,sales,other,no,yes +4,12,15,33,cauc,other,female,sales,other,no,no +7,12,5,23,cauc,other,male,sales,other,no,yes +6.25,13,4,23,cauc,other,female,sales,manufacturing,no,yes +4.5,14,14,34,cauc,other,male,sales,other,no,yes +14.29,14,32,52,cauc,other,female,sales,other,no,yes +5,12,14,32,cauc,other,female,sales,other,no,yes +13.75,14,21,41,cauc,other,male,sales,other,no,yes +13.71,12,43,61,cauc,other,male,sales,other,yes,yes +7.5,12,27,45,other,south,female,sales,other,no,yes +3.8,12,4,22,cauc,other,female,sales,other,no,no +5,14,0,20,hispanic,other,male,sales,other,no,no +9.42,12,32,50,cauc,south,male,sales,other,no,yes +5.5,12,20,38,cauc,other,male,sales,other,no,yes +3.75,15,4,25,cauc,south,male,sales,other,no,no +3.5,12,34,52,cauc,other,male,sales,other,no,yes +5.8,13,5,24,cauc,other,male,sales,other,no,no +12,17,13,36,cauc,other,male,sales,manufacturing,no,yes +5,14,17,37,hispanic,other,female,office,other,no,yes +8.75,13,10,29,cauc,south,female,office,other,no,yes +10,16,7,29,cauc,other,female,office,other,no,yes +8.5,12,25,43,cauc,other,female,office,other,no,no +8.63,12,18,36,other,other,female,office,other,no,yes +9,16,27,49,cauc,other,female,office,manufacturing,no,yes +5.5,16,2,24,cauc,other,female,office,other,no,no +11.11,13,13,32,cauc,other,male,office,other,no,yes +10,14,24,44,cauc,other,female,office,other,no,no +5.2,18,13,37,hispanic,south,female,office,other,no,yes +8,14,15,35,cauc,other,female,office,other,yes,no +3.56,12,12,30,hispanic,south,female,office,other,no,no +5.2,12,24,42,cauc,other,female,office,other,no,yes +11.67,12,43,61,cauc,other,female,office,construction,no,yes +11.32,12,13,31,cauc,other,female,office,manufacturing,no,yes +7.5,12,16,34,cauc,south,female,office,other,no,yes +5.5,11,24,41,cauc,other,female,office,other,no,yes +5,16,4,26,cauc,south,female,office,other,no,yes +7.75,12,24,42,cauc,other,female,office,other,no,yes +5.25,12,45,63,cauc,other,female,office,other,no,yes +9,12,20,38,cauc,other,male,office,other,yes,yes +9.65,12,38,56,cauc,other,female,office,other,no,yes +5.21,18,10,34,cauc,south,male,office,other,no,yes +7,11,16,33,other,other,female,office,other,no,yes +12.16,12,32,50,other,south,female,office,other,no,yes +5.25,16,2,24,cauc,south,female,office,other,no,no +10.32,13,28,47,cauc,south,female,office,other,no,no +3.35,16,3,25,other,other,male,office,other,no,no +7.7,13,8,27,cauc,other,female,office,other,yes,no +9.17,12,44,62,cauc,other,female,office,manufacturing,no,yes +8.43,12,12,30,cauc,south,male,office,other,no,yes +4,12,8,26,other,south,male,office,other,no,yes +4.13,12,4,22,cauc,other,female,office,other,no,yes +3,12,28,46,cauc,south,female,office,other,no,yes +4.25,13,0,19,cauc,south,female,office,other,no,no +7.53,14,1,21,cauc,south,male,office,other,no,no +10.53,14,12,32,cauc,other,female,office,manufacturing,no,yes +5,12,39,57,cauc,other,female,office,other,no,yes +15.03,12,24,42,cauc,other,female,office,other,no,yes +11.25,17,32,55,other,other,female,office,other,no,yes +6.25,16,4,26,other,other,male,office,other,no,no +3.5,12,25,43,other,other,female,office,other,no,no +6.85,12,8,26,other,other,male,office,other,no,no +12.5,13,16,35,cauc,other,female,office,other,no,yes +12,12,5,23,cauc,south,male,office,other,no,no +6,13,31,50,cauc,other,male,office,other,no,no +9.5,12,25,43,cauc,other,female,office,other,no,no +4.1,12,15,33,cauc,other,female,office,other,no,yes +10.43,14,15,35,cauc,south,female,office,other,no,yes +5,12,0,18,cauc,other,female,office,other,no,no +7.69,12,19,37,cauc,other,male,office,other,no,yes +5.5,12,21,39,other,other,female,office,other,no,no +6.4,12,6,24,cauc,other,female,office,other,no,no +12.5,12,14,32,cauc,other,female,office,other,yes,yes +6.25,13,30,49,cauc,other,female,office,other,no,yes +8,12,8,26,cauc,other,female,office,other,no,no +9.6,9,33,48,cauc,other,male,office,other,yes,no +9.1,13,16,35,hispanic,other,male,office,other,no,no +7.5,12,20,38,cauc,south,female,office,other,no,no +5,13,6,25,cauc,south,female,office,other,no,yes +7,12,10,28,cauc,other,female,office,other,yes,yes +3.55,13,1,20,cauc,south,female,office,other,no,no +8.5,12,2,20,other,south,male,office,other,no,no +4.5,13,0,19,cauc,south,female,office,other,no,no +7.88,16,17,39,other,other,male,office,other,no,yes +5.25,12,8,26,cauc,other,female,office,other,no,no +5,12,4,22,cauc,south,male,office,other,no,no +9.33,12,15,33,cauc,other,female,office,other,no,no +10.5,12,29,47,cauc,other,female,office,other,no,yes +7.5,12,23,41,other,south,female,office,other,no,yes +9.5,12,39,57,cauc,south,female,office,other,no,yes +9.6,12,14,32,cauc,south,female,office,other,no,yes +5.87,17,6,29,other,south,female,office,other,no,no +11.02,14,12,32,cauc,south,male,office,other,yes,yes +5,12,26,44,cauc,south,female,office,other,no,no +5.62,14,32,52,cauc,other,female,office,other,no,yes +12.5,15,6,27,cauc,other,female,office,other,no,yes +10.81,12,40,58,cauc,other,female,office,other,no,yes +5.4,12,18,36,cauc,other,female,office,manufacturing,no,yes +7,11,12,29,cauc,other,female,office,other,no,no +4.59,12,36,54,cauc,south,female,office,construction,no,yes +6,12,19,37,cauc,other,female,office,other,no,yes +11.71,16,42,64,cauc,other,female,office,manufacturing,no,no +5.62,13,2,21,hispanic,other,female,office,other,no,yes +5.5,12,33,51,cauc,other,female,office,other,no,yes +4.85,12,14,32,cauc,south,female,office,other,no,yes +6.75,12,22,40,cauc,other,male,office,other,no,no +4.25,12,20,38,cauc,other,female,office,other,no,yes +5.75,12,15,33,cauc,other,female,office,other,no,yes +3.5,12,35,53,cauc,other,male,office,other,no,yes +3.35,12,7,25,cauc,other,female,office,other,no,yes +10.62,12,45,63,cauc,other,female,office,manufacturing,no,no +8,12,9,27,cauc,other,female,office,other,no,no +4.75,12,2,20,cauc,south,female,office,other,no,yes +8.5,17,3,26,cauc,south,male,office,other,no,no +8.85,14,19,39,other,other,female,office,other,yes,yes +8,12,14,32,cauc,south,female,office,other,no,yes +6,4,54,64,cauc,other,male,services,other,no,yes +7.14,14,17,37,cauc,other,male,services,other,no,yes +3.4,8,29,43,other,other,female,services,other,no,yes +6,15,26,47,cauc,south,female,services,other,no,no +3.75,2,16,24,hispanic,other,male,services,other,no,no +8.89,8,29,43,other,other,female,services,other,no,no +4.35,11,20,37,cauc,other,female,services,other,no,yes +13.1,10,38,54,other,south,female,services,other,no,yes +4.35,8,37,51,other,south,female,services,other,no,yes +3.5,9,48,63,cauc,other,male,services,other,no,no +3.8,12,16,34,cauc,other,female,services,other,no,no +5.26,8,38,52,cauc,other,female,services,other,no,yes +3.35,14,0,20,other,other,male,services,other,no,no +16.26,12,14,32,other,other,male,services,other,yes,no +4.25,12,2,20,cauc,other,female,services,other,no,yes +4.5,16,21,43,cauc,other,male,services,other,no,yes +8,13,15,34,cauc,other,female,services,other,no,yes +4,16,20,42,cauc,other,female,services,other,no,no +7.96,14,12,32,cauc,other,female,services,other,no,yes +4,12,7,25,hispanic,south,male,services,other,no,no +4.15,11,4,21,cauc,other,male,services,other,no,yes +5.95,13,9,28,cauc,south,male,services,other,no,yes +3.6,12,43,61,hispanic,south,female,services,other,no,yes +8.75,10,19,35,cauc,south,male,services,other,no,no +3.4,8,49,63,cauc,other,female,services,other,no,no +4.28,12,38,56,cauc,other,female,services,other,no,yes +5.35,12,13,31,cauc,other,female,services,other,no,yes +5,12,14,32,cauc,other,female,services,other,no,yes +7.65,12,20,38,cauc,other,male,services,other,no,no +6.94,12,7,25,cauc,other,female,services,other,no,no +7.5,12,9,27,cauc,other,female,services,manufacturing,yes,yes +3.6,12,6,24,cauc,other,female,services,other,no,no +1.75,12,5,23,cauc,south,female,services,other,no,yes +3.45,13,1,20,other,south,female,services,other,no,no +9.63,14,22,42,cauc,other,male,services,other,yes,yes +8.49,12,24,42,cauc,other,female,services,other,no,yes +8.99,12,15,33,cauc,other,female,services,other,yes,no +3.65,11,8,25,cauc,south,female,services,other,no,yes +3.5,11,17,34,cauc,south,female,services,other,no,yes +3.43,12,2,20,other,south,male,services,other,no,no +5.5,12,20,38,cauc,south,male,services,other,no,yes +6.93,12,26,44,cauc,other,male,services,other,yes,yes +3.51,10,37,53,other,south,female,services,other,no,yes +3.75,12,41,59,cauc,other,female,services,other,no,no +4.17,12,27,45,cauc,other,female,services,other,no,yes +9.57,12,5,23,cauc,other,female,services,other,yes,yes +14.67,14,16,36,other,other,male,services,other,no,yes +12.5,14,19,39,cauc,other,female,services,other,no,yes +5.5,12,10,28,cauc,other,male,services,other,no,yes +5.15,13,1,20,cauc,south,male,services,other,yes,no +8,12,43,61,other,other,female,services,other,yes,yes +5.83,13,3,22,other,other,male,services,other,no,no +3.35,12,0,18,cauc,other,female,services,other,no,no +7,12,26,44,cauc,south,female,services,other,no,yes +10,10,25,41,cauc,other,female,services,other,yes,yes +8,12,15,33,cauc,other,female,services,other,no,yes +6.88,14,10,30,cauc,south,female,services,other,no,no +5.55,11,45,62,cauc,other,female,services,other,yes,no +7.5,11,3,20,other,other,male,services,other,no,no +8.93,8,47,61,hispanic,other,male,services,other,yes,yes +9,16,6,28,other,other,female,services,other,no,yes +3.5,10,33,49,cauc,south,female,services,other,no,no +5.77,16,3,25,cauc,other,male,services,manufacturing,no,no +25,14,4,24,hispanic,other,male,services,other,yes,no +6.85,14,34,54,other,other,male,services,other,yes,yes +6.5,11,39,56,cauc,south,male,services,other,no,yes +3.75,12,17,35,cauc,south,female,services,other,no,yes +3.5,9,47,62,cauc,other,male,services,other,yes,yes +4.5,11,2,19,cauc,other,male,services,other,no,no +2.01,13,0,19,cauc,south,male,services,other,no,no +4.17,14,24,44,cauc,other,female,services,other,no,no +13,12,25,43,other,other,male,services,other,yes,yes +3.98,14,6,26,cauc,other,female,services,other,no,no +7.5,12,10,28,cauc,other,female,services,other,no,no +13.12,12,33,51,other,other,female,services,other,no,yes +4,12,12,30,cauc,other,male,services,other,no,no +3.95,12,9,27,cauc,south,female,services,other,no,yes +13,11,18,35,cauc,south,male,services,other,yes,yes +9,12,10,28,cauc,other,male,services,other,no,yes +4.55,8,45,59,cauc,south,female,services,other,no,no +9.5,9,46,61,cauc,other,female,services,other,yes,yes +4.5,7,14,27,hispanic,south,male,services,other,no,yes +8.75,11,36,53,cauc,other,female,services,other,no,no +10,13,34,53,cauc,other,male,technical,construction,yes,yes +18,18,15,39,cauc,other,male,technical,other,no,yes +24.98,17,31,54,cauc,other,male,technical,manufacturing,no,yes +12.05,16,6,28,cauc,other,female,technical,manufacturing,no,no +22,14,15,35,cauc,south,male,technical,other,no,yes +8.75,12,30,48,cauc,other,male,technical,other,no,yes +22.2,18,8,32,cauc,other,male,technical,other,no,yes +17.25,18,5,29,cauc,other,male,technical,manufacturing,no,yes +6,17,3,26,cauc,other,female,technical,other,yes,no +8.06,13,17,36,cauc,south,male,technical,other,no,yes +9.24,16,5,27,other,other,male,technical,manufacturing,yes,yes +12,14,10,30,cauc,other,female,technical,other,no,yes +10.61,15,33,54,cauc,other,female,technical,other,no,no +5.71,18,3,27,cauc,other,male,technical,other,no,yes +10,16,0,18,cauc,other,female,technical,other,no,no +17.5,16,13,35,other,south,male,technical,other,no,yes +15,18,12,36,cauc,other,male,technical,other,no,yes +7.78,16,6,28,cauc,other,female,technical,other,no,yes +7.8,17,7,30,cauc,other,male,technical,other,no,yes +10,16,14,36,cauc,south,male,technical,other,yes,yes +24.98,17,5,28,cauc,other,female,technical,other,no,no +10.28,15,10,31,cauc,south,female,technical,other,no,yes +15,18,11,35,cauc,other,female,technical,other,no,yes +12,17,24,47,cauc,other,female,technical,other,no,yes +10.58,16,9,31,cauc,other,male,technical,manufacturing,no,no +5.85,18,12,36,cauc,south,male,technical,other,no,yes +11.22,18,19,43,cauc,other,male,technical,other,no,yes +8.56,14,14,34,cauc,other,female,technical,other,no,yes +13.89,16,17,39,cauc,other,female,technical,manufacturing,no,no +5.71,18,7,31,cauc,south,male,technical,other,no,no +15.79,18,7,31,cauc,other,male,technical,other,no,yes +7.5,16,22,44,cauc,other,female,technical,other,no,yes +11.25,12,28,46,cauc,other,female,technical,other,no,yes +6.15,16,16,38,cauc,other,female,technical,other,no,no +13.45,16,16,38,other,south,male,technical,other,no,no +6.25,16,7,29,cauc,other,female,technical,other,no,yes +6.5,12,11,29,cauc,other,female,technical,other,no,no +12,12,11,29,cauc,other,female,technical,other,no,yes +8.5,12,16,34,cauc,other,female,technical,other,no,no +8,18,33,57,cauc,other,male,technical,other,yes,no +5.75,12,21,39,cauc,south,female,technical,other,no,yes +15.73,16,4,26,cauc,other,male,technical,manufacturing,no,yes +9.86,15,13,34,cauc,other,male,technical,other,no,yes +13.51,18,14,38,cauc,other,male,technical,other,yes,yes +5.4,16,10,32,cauc,other,female,technical,other,no,yes +6.25,18,14,38,cauc,south,male,technical,other,no,yes +5.5,16,29,51,cauc,south,male,technical,other,no,yes +5,12,4,22,hispanic,other,male,technical,other,no,no +6.25,18,27,51,other,other,male,technical,other,no,yes +5.75,12,3,21,cauc,other,male,technical,other,no,yes +20.5,16,14,36,cauc,south,male,technical,other,yes,yes +5,14,0,20,cauc,other,male,technical,construction,no,yes +7,18,33,57,cauc,other,male,technical,other,no,yes +18,16,38,60,cauc,south,male,technical,other,no,yes +12,18,18,42,cauc,other,female,technical,other,yes,yes +20.4,17,3,26,cauc,other,male,technical,manufacturing,no,no +22.2,18,40,64,cauc,other,female,technical,other,no,no +16.42,14,19,39,cauc,other,male,technical,manufacturing,no,no +8.63,14,4,24,cauc,other,female,technical,other,no,no +19.38,16,11,33,cauc,other,female,technical,other,no,yes +14,16,16,38,cauc,other,female,technical,other,no,yes +10,14,22,42,cauc,other,male,technical,other,no,yes +15.95,17,13,36,cauc,other,female,technical,other,yes,no +20,16,28,50,cauc,south,female,technical,other,yes,yes +10,16,10,32,cauc,other,female,technical,other,no,yes +24.98,16,5,27,cauc,south,female,technical,other,no,no +11.25,15,5,26,cauc,other,male,technical,other,no,no +22.83,18,37,61,cauc,other,female,technical,manufacturing,no,no +10.2,17,26,49,cauc,other,female,technical,other,yes,yes +10,16,4,26,cauc,south,female,technical,other,no,yes +14,18,31,55,cauc,other,female,technical,other,yes,no +12.5,17,13,36,cauc,other,female,technical,other,yes,yes +5.79,12,42,60,cauc,other,female,technical,other,no,yes +24.98,17,18,41,hispanic,other,male,technical,other,no,yes +4.35,12,3,21,cauc,other,female,technical,other,no,yes +11.25,17,10,33,cauc,other,female,technical,other,no,no +6.67,16,10,32,cauc,other,female,technical,other,yes,no +8,16,17,39,hispanic,other,female,technical,other,no,yes +18.16,18,7,31,cauc,other,male,technical,other,no,yes +12,16,14,36,cauc,other,female,technical,other,no,yes +8.89,16,22,44,cauc,other,female,technical,other,yes,yes +9.5,17,14,37,cauc,other,female,technical,other,no,yes +13.65,16,11,33,cauc,other,male,technical,other,no,yes +12,18,23,47,cauc,other,male,technical,other,yes,yes +15,12,39,57,cauc,other,male,technical,other,yes,yes +12.67,16,15,37,cauc,other,male,technical,other,no,yes +7.38,14,15,35,hispanic,other,female,technical,other,no,no +15.56,16,10,32,cauc,other,male,technical,other,no,no +7.45,12,25,43,cauc,south,female,technical,other,no,no +6.25,14,12,32,cauc,other,female,technical,other,no,yes +6.25,16,7,29,hispanic,south,female,technical,other,no,yes +9.37,17,7,30,cauc,other,male,technical,other,yes,yes +22.5,16,17,39,cauc,other,male,technical,manufacturing,no,yes +7.5,16,10,32,cauc,other,male,technical,other,yes,yes +7,17,2,25,cauc,south,male,technical,other,no,yes +5.75,9,34,49,other,south,female,technical,other,yes,yes +7.67,15,11,32,cauc,other,female,technical,other,no,yes +12.5,15,10,31,cauc,other,male,technical,other,no,no +16,12,12,30,cauc,south,male,technical,other,no,yes +11.79,16,6,28,cauc,other,female,technical,other,yes,no +11.36,18,5,29,cauc,other,male,technical,other,no,no +6.1,12,33,51,other,other,female,technical,other,no,yes +23.25,17,25,48,other,other,female,technical,other,yes,yes +19.88,12,13,31,cauc,south,male,technical,other,yes,yes +15.38,16,33,55,cauc,other,male,technical,manufacturing,no,yes diff --git a/markdown/Ch5. MRA - OLS Asymptotics.md b/markdown/Ch5. MRA - OLS Asymptotics.md index 73858b9..70d7f5c 100644 --- a/markdown/Ch5. MRA - OLS Asymptotics.md +++ b/markdown/Ch5. MRA - OLS Asymptotics.md @@ -13,7 +13,7 @@ jupyter: name: python3 --- -# Ch5. Multiple Regression Analysis: OLS Asymptotics +# 5. Multiple Regression Analysis: OLS Asymptotics ```python %pip install matplotlib numpy statsmodels wooldridge scipy -q diff --git a/markdown/Ch6. MRA - Further Issues.md b/markdown/Ch6. MRA - Further Issues.md index eabf4f1..783a0df 100644 --- a/markdown/Ch6. MRA - Further Issues.md +++ b/markdown/Ch6. MRA - Further Issues.md @@ -13,7 +13,7 @@ jupyter: name: python3 --- -# Ch6. Multiple Regression Analysis: Further Issues +# 6. Multiple Regression Analysis: Further Issues ```python %pip install matplotlib numpy pandas statsmodels wooldridge -q diff --git a/markdown/Ch7. MRA - Qualitative Regressors.md b/markdown/Ch7. MRA - Qualitative Regressors.md new file mode 100644 index 0000000..274f91e --- /dev/null +++ b/markdown/Ch7. MRA - Qualitative Regressors.md @@ -0,0 +1,311 @@ +--- +jupyter: + jupytext: + formats: notebooks//ipynb,markdown//md,scripts//py + text_representation: + extension: .md + format_name: markdown + format_version: '1.3' + jupytext_version: 1.16.4 + kernelspec: + display_name: merino + language: python + name: python3 +--- + +# 7. Multiple Regression Analysis with Qualitative Regressors + +```python +%pip install matplotlib numpy pandas statsmodels wooldridge -q +``` + +```python +import pandas as pd +import statsmodels.api as sm +import statsmodels.formula.api as smf +import wooldridge as wool +``` + +## 7.1 Linear Regression with Dummy Variables as Regressors + +### Example 7.1: Hourly Wage Equation + +```python +wage1 = wool.data("wage1") + +reg = smf.ols(formula="wage ~ female + educ + exper + tenure", data=wage1) +results = reg.fit() + +# print regression table: +table = pd.DataFrame( + { + "b": round(results.params, 4), + "se": round(results.bse, 4), + "t": round(results.tvalues, 4), + "pval": round(results.pvalues, 4), + }, +) +print(f"table: \n{table}\n") +``` + +### Example 7.6: Log Hourly Wage Equation + +```python +wage1 = wool.data("wage1") + +reg = smf.ols( + formula="np.log(wage) ~ married*female + educ + exper +" + "I(exper**2) + tenure + I(tenure**2)", + data=wage1, +) +results = reg.fit() + +# print regression table: +table = pd.DataFrame( + { + "b": round(results.params, 4), + "se": round(results.bse, 4), + "t": round(results.tvalues, 4), + "pval": round(results.pvalues, 4), + }, +) +print(f"table: \n{table}\n") +``` + +## 7.2 Boolean variables + +```python +wage1 = wool.data("wage1") + +# regression with boolean variable: +wage1["isfemale"] = wage1["female"] == 1 +reg = smf.ols(formula="wage ~ isfemale + educ + exper + tenure", data=wage1) +results = reg.fit() + +# print regression table: +table = pd.DataFrame( + { + "b": round(results.params, 4), + "se": round(results.bse, 4), + "t": round(results.tvalues, 4), + "pval": round(results.pvalues, 4), + }, +) +print(f"table: \n{table}\n") +``` + +## 7.3 Categorical Variables + +```python +CPS1985 = pd.read_csv("../data/CPS1985.csv") +# rename variable to make outputs more compact: +CPS1985["oc"] = CPS1985["occupation"] + +# table of categories and frequencies for two categorical variables: +freq_gender = pd.crosstab(CPS1985["gender"], columns="count") +print(f"freq_gender: \n{freq_gender}\n") + +freq_occupation = pd.crosstab(CPS1985["oc"], columns="count") +print(f"freq_occupation: \n{freq_occupation}\n") +``` + +```python +# directly using categorical variables in regression formula: +reg = smf.ols( + formula="np.log(wage) ~ education +experience + C(gender) + C(oc)", + data=CPS1985, +) +results = reg.fit() + +# print regression table: +table = pd.DataFrame( + { + "b": round(results.params, 4), + "se": round(results.bse, 4), + "t": round(results.tvalues, 4), + "pval": round(results.pvalues, 4), + }, +) +print(f"table: \n{table}\n") +``` + +```python +# rerun regression with different reference category: +reg_newref = smf.ols( + formula="np.log(wage) ~ education + experience + " + 'C(gender, Treatment("male")) + ' + 'C(oc, Treatment("technical"))', + data=CPS1985, +) +results_newref = reg_newref.fit() + +# print results: +table_newref = pd.DataFrame( + { + "b": round(results_newref.params, 4), + "se": round(results_newref.bse, 4), + "t": round(results_newref.tvalues, 4), + "pval": round(results_newref.pvalues, 4), + }, +) +print(f"table_newref: \n{table_newref}\n") +``` + +### 7.3.1 ANOVA Tables + +```python +CPS1985 = pd.read_csv("../data/CPS1985.csv") + +# run regression: +reg = smf.ols( + formula="np.log(wage) ~ education + experience + gender + occupation", + data=CPS1985, +) +results = reg.fit() + +# print regression table: +table_reg = pd.DataFrame( + { + "b": round(results.params, 4), + "se": round(results.bse, 4), + "t": round(results.tvalues, 4), + "pval": round(results.pvalues, 4), + }, +) +print(f"table_reg: \n{table_reg}\n") +``` + +```python +# ANOVA table: +table_anova = sm.stats.anova_lm(results, typ=2) +print(f"table_anova: \n{table_anova}\n") +``` + +## 7.4 Breaking a Numeric Variable Into Categories + +### Example 7.8: Effects of Law School Rankings on Starting Salaries + +```python +lawsch85 = wool.data("lawsch85") + +# define cut points for the rank: +cutpts = [0, 10, 25, 40, 60, 100, 175] + +# create categorical variable containing ranges for the rank: +lawsch85["rc"] = pd.cut( + lawsch85["rank"], + bins=cutpts, + labels=["(0,10]", "(10,25]", "(25,40]", "(40,60]", "(60,100]", "(100,175]"], +) + +# display frequencies: +freq = pd.crosstab(lawsch85["rc"], columns="count") +print(f"freq: \n{freq}\n") +``` + +```python +# run regression: +reg = smf.ols( + formula='np.log(salary) ~ C(rc, Treatment("(100,175]")) +' + "LSAT + GPA + np.log(libvol) + np.log(cost)", + data=lawsch85, +) +results = reg.fit() + +# print regression table: +table_reg = pd.DataFrame( + { + "b": round(results.params, 4), + "se": round(results.bse, 4), + "t": round(results.tvalues, 4), + "pval": round(results.pvalues, 4), + }, +) +print(f"table_reg: \n{table_reg}\n") +``` + +```python +# ANOVA table: +table_anova = sm.stats.anova_lm(results, typ=2) +print(f"table_anova: \n{table_anova}\n") +``` + +## 7.5 Interactions and Differences in Regression Functions Across Groups + +```python +gpa3 = wool.data("gpa3") + +# model with full interactions with female dummy (only for spring data): +reg = smf.ols( + formula="cumgpa ~ female * (sat + hsperc + tothrs)", + data=gpa3, + subset=(gpa3["spring"] == 1), +) +results = reg.fit() + +# print regression table: +table = pd.DataFrame( + { + "b": round(results.params, 4), + "se": round(results.bse, 4), + "t": round(results.tvalues, 4), + "pval": round(results.pvalues, 4), + }, +) +print(f"table: \n{table}\n") +``` + +```python +# F-Test for H0 (the interaction coefficients of 'female' are zero): +hypotheses = ["female = 0", "female:sat = 0", "female:hsperc = 0", "female:tothrs = 0"] +ftest = results.f_test(hypotheses) +fstat = ftest.statistic +fpval = ftest.pvalue + +print(f"fstat: {fstat}\n") +print(f"fpval: {fpval}\n") +``` + +```python +gpa3 = wool.data("gpa3") + +# estimate model for males (& spring data): +reg_m = smf.ols( + formula="cumgpa ~ sat + hsperc + tothrs", + data=gpa3, + subset=(gpa3["spring"] == 1) & (gpa3["female"] == 0), +) +results_m = reg_m.fit() + +# print regression table: +table_m = pd.DataFrame( + { + "b": round(results_m.params, 4), + "se": round(results_m.bse, 4), + "t": round(results_m.tvalues, 4), + "pval": round(results_m.pvalues, 4), + }, +) +print(f"table_m: \n{table_m}\n") +``` + +```python +# estimate model for females (& spring data): +reg_f = smf.ols( + formula="cumgpa ~ sat + hsperc + tothrs", + data=gpa3, + subset=(gpa3["spring"] == 1) & (gpa3["female"] == 1), +) +results_f = reg_f.fit() + +# print regression table: +table_f = pd.DataFrame( + { + "b": round(results_f.params, 4), + "se": round(results_f.bse, 4), + "t": round(results_f.tvalues, 4), + "pval": round(results_f.pvalues, 4), + }, +) +print(f"table_f: \n{table_f}\n") +``` diff --git a/notebooks/Ch5. MRA - OLS Asymptotics.ipynb b/notebooks/Ch5. MRA - OLS Asymptotics.ipynb index a1780ef..c69208b 100644 --- a/notebooks/Ch5. MRA - OLS Asymptotics.ipynb +++ b/notebooks/Ch5. MRA - OLS Asymptotics.ipynb @@ -4,7 +4,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Ch5. Multiple Regression Analysis: OLS Asymptotics" + "# 5. Multiple Regression Analysis: OLS Asymptotics" ] }, { diff --git a/notebooks/Ch6. MRA - Further Issues.ipynb b/notebooks/Ch6. MRA - Further Issues.ipynb index b898950..9a4832c 100644 --- a/notebooks/Ch6. MRA - Further Issues.ipynb +++ b/notebooks/Ch6. MRA - Further Issues.ipynb @@ -4,7 +4,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Ch6. Multiple Regression Analysis: Further Issues" + "# 6. Multiple Regression Analysis: Further Issues" ] }, { diff --git a/notebooks/Ch7. MRA - Qualitative Regressors.ipynb b/notebooks/Ch7. MRA - Qualitative Regressors.ipynb new file mode 100644 index 0000000..64a1fa4 --- /dev/null +++ b/notebooks/Ch7. MRA - Qualitative Regressors.ipynb @@ -0,0 +1,728 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 7. Multiple Regression Analysis with Qualitative Regressors" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], + "source": [ + "%pip install matplotlib numpy pandas statsmodels wooldridge -q" + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "id": "965e7d33", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import statsmodels.api as sm\n", + "import statsmodels.formula.api as smf\n", + "import wooldridge as wool" + ] + }, + { + "cell_type": "markdown", + "id": "d1e06cf4", + "metadata": {}, + "source": [ + "## 7.1 Linear Regression with Dummy Variables as Regressors\n", + "\n", + "### Example 7.1: Hourly Wage Equation" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "id": "a86d9c78", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "table: \n", + " b se t pval\n", + "Intercept -1.5679 0.7246 -2.1640 0.0309\n", + "female -1.8109 0.2648 -6.8379 0.0000\n", + "educ 0.5715 0.0493 11.5836 0.0000\n", + "exper 0.0254 0.0116 2.1951 0.0286\n", + "tenure 0.1410 0.0212 6.6632 0.0000\n", + "\n" + ] + } + ], + "source": [ + "wage1 = wool.data(\"wage1\")\n", + "\n", + "reg = smf.ols(formula=\"wage ~ female + educ + exper + tenure\", data=wage1)\n", + "results = reg.fit()\n", + "\n", + "# print regression table:\n", + "table = pd.DataFrame(\n", + " {\n", + " \"b\": round(results.params, 4),\n", + " \"se\": round(results.bse, 4),\n", + " \"t\": round(results.tvalues, 4),\n", + " \"pval\": round(results.pvalues, 4),\n", + " },\n", + ")\n", + "print(f\"table: \\n{table}\\n\")" + ] + }, + { + "cell_type": "markdown", + "id": "48c2ac20", + "metadata": {}, + "source": [ + "### Example 7.6: Log Hourly Wage Equation" + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "id": "17114ae0", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "table: \n", + " b se t pval\n", + "Intercept 0.3214 0.1000 3.2135 0.0014\n", + "married 0.2127 0.0554 3.8419 0.0001\n", + "female -0.1104 0.0557 -1.9797 0.0483\n", + "married:female -0.3006 0.0718 -4.1885 0.0000\n", + "educ 0.0789 0.0067 11.7873 0.0000\n", + "exper 0.0268 0.0052 5.1118 0.0000\n", + "I(exper ** 2) -0.0005 0.0001 -4.8471 0.0000\n", + "tenure 0.0291 0.0068 4.3016 0.0000\n", + "I(tenure ** 2) -0.0005 0.0002 -2.3056 0.0215\n", + "\n" + ] + } + ], + "source": [ + "wage1 = wool.data(\"wage1\")\n", + "\n", + "reg = smf.ols(\n", + " formula=\"np.log(wage) ~ married*female + educ + exper +\"\n", + " \"I(exper**2) + tenure + I(tenure**2)\",\n", + " data=wage1,\n", + ")\n", + "results = reg.fit()\n", + "\n", + "# print regression table:\n", + "table = pd.DataFrame(\n", + " {\n", + " \"b\": round(results.params, 4),\n", + " \"se\": round(results.bse, 4),\n", + " \"t\": round(results.tvalues, 4),\n", + " \"pval\": round(results.pvalues, 4),\n", + " },\n", + ")\n", + "print(f\"table: \\n{table}\\n\")" + ] + }, + { + "cell_type": "markdown", + "id": "720c7656", + "metadata": {}, + "source": [ + "## 7.2 Boolean variables" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "id": "dc8f1463", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "table: \n", + " b se t pval\n", + "Intercept -1.5679 0.7246 -2.1640 0.0309\n", + "isfemale[T.True] -1.8109 0.2648 -6.8379 0.0000\n", + "educ 0.5715 0.0493 11.5836 0.0000\n", + "exper 0.0254 0.0116 2.1951 0.0286\n", + "tenure 0.1410 0.0212 6.6632 0.0000\n", + "\n" + ] + } + ], + "source": [ + "wage1 = wool.data(\"wage1\")\n", + "\n", + "# regression with boolean variable:\n", + "wage1[\"isfemale\"] = wage1[\"female\"] == 1\n", + "reg = smf.ols(formula=\"wage ~ isfemale + educ + exper + tenure\", data=wage1)\n", + "results = reg.fit()\n", + "\n", + "# print regression table:\n", + "table = pd.DataFrame(\n", + " {\n", + " \"b\": round(results.params, 4),\n", + " \"se\": round(results.bse, 4),\n", + " \"t\": round(results.tvalues, 4),\n", + " \"pval\": round(results.pvalues, 4),\n", + " },\n", + ")\n", + "print(f\"table: \\n{table}\\n\")" + ] + }, + { + "cell_type": "markdown", + "id": "91f310f4", + "metadata": {}, + "source": [ + "## 7.3 Categorical Variables" + ] + }, + { + "cell_type": "code", + "execution_count": 66, + "id": "d7282dbf", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "freq_gender: \n", + "col_0 count\n", + "gender \n", + "female 245\n", + "male 289\n", + "\n", + "freq_occupation: \n", + "col_0 count\n", + "oc \n", + "management 55\n", + "office 97\n", + "sales 38\n", + "services 83\n", + "technical 105\n", + "worker 156\n", + "\n" + ] + } + ], + "source": [ + "CPS1985 = pd.read_csv(\"../data/CPS1985.csv\")\n", + "# rename variable to make outputs more compact:\n", + "CPS1985[\"oc\"] = CPS1985[\"occupation\"]\n", + "\n", + "# table of categories and frequencies for two categorical variables:\n", + "freq_gender = pd.crosstab(CPS1985[\"gender\"], columns=\"count\")\n", + "print(f\"freq_gender: \\n{freq_gender}\\n\")\n", + "\n", + "freq_occupation = pd.crosstab(CPS1985[\"oc\"], columns=\"count\")\n", + "print(f\"freq_occupation: \\n{freq_occupation}\\n\")" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "table: \n", + " b se t pval\n", + "Intercept 0.9050 0.1717 5.2718 0.0000\n", + "C(gender)[T.male] 0.2238 0.0423 5.2979 0.0000\n", + "C(oc)[T.office] -0.2073 0.0776 -2.6699 0.0078\n", + "C(oc)[T.sales] -0.3601 0.0936 -3.8455 0.0001\n", + "C(oc)[T.services] -0.3626 0.0818 -4.4305 0.0000\n", + "C(oc)[T.technical] -0.0101 0.0740 -0.1363 0.8916\n", + "C(oc)[T.worker] -0.1525 0.0763 -1.9981 0.0462\n", + "education 0.0759 0.0101 7.5449 0.0000\n", + "experience 0.0119 0.0017 7.0895 0.0000\n", + "\n" + ] + } + ], + "source": [ + "# directly using categorical variables in regression formula:\n", + "reg = smf.ols(\n", + " formula=\"np.log(wage) ~ education +experience + C(gender) + C(oc)\",\n", + " data=CPS1985,\n", + ")\n", + "results = reg.fit()\n", + "\n", + "# print regression table:\n", + "table = pd.DataFrame(\n", + " {\n", + " \"b\": round(results.params, 4),\n", + " \"se\": round(results.bse, 4),\n", + " \"t\": round(results.tvalues, 4),\n", + " \"pval\": round(results.pvalues, 4),\n", + " },\n", + ")\n", + "print(f\"table: \\n{table}\\n\")" + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "table_newref: \n", + " b se t pval\n", + "Intercept 1.1187 0.1765 6.3393 0.0000\n", + "C(gender, Treatment(\"male\"))[T.female] -0.2238 0.0423 -5.2979 0.0000\n", + "C(oc, Treatment(\"technical\"))[T.management] 0.0101 0.0740 0.1363 0.8916\n", + "C(oc, Treatment(\"technical\"))[T.office] -0.1972 0.0678 -2.9082 0.0038\n", + "C(oc, Treatment(\"technical\"))[T.sales] -0.3500 0.0863 -4.0541 0.0001\n", + "C(oc, Treatment(\"technical\"))[T.services] -0.3525 0.0750 -4.7030 0.0000\n", + "C(oc, Treatment(\"technical\"))[T.worker] -0.1425 0.0705 -2.0218 0.0437\n", + "education 0.0759 0.0101 7.5449 0.0000\n", + "experience 0.0119 0.0017 7.0895 0.0000\n", + "\n" + ] + } + ], + "source": [ + "# rerun regression with different reference category:\n", + "reg_newref = smf.ols(\n", + " formula=\"np.log(wage) ~ education + experience + \"\n", + " 'C(gender, Treatment(\"male\")) + '\n", + " 'C(oc, Treatment(\"technical\"))',\n", + " data=CPS1985,\n", + ")\n", + "results_newref = reg_newref.fit()\n", + "\n", + "# print results:\n", + "table_newref = pd.DataFrame(\n", + " {\n", + " \"b\": round(results_newref.params, 4),\n", + " \"se\": round(results_newref.bse, 4),\n", + " \"t\": round(results_newref.tvalues, 4),\n", + " \"pval\": round(results_newref.pvalues, 4),\n", + " },\n", + ")\n", + "print(f\"table_newref: \\n{table_newref}\\n\")" + ] + }, + { + "cell_type": "markdown", + "id": "bf984d8f", + "metadata": {}, + "source": [ + "### 7.3.1 ANOVA Tables" + ] + }, + { + "cell_type": "code", + "execution_count": 69, + "id": "da908f4a", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "table_reg: \n", + " b se t pval\n", + "Intercept 0.9050 0.1717 5.2718 0.0000\n", + "gender[T.male] 0.2238 0.0423 5.2979 0.0000\n", + "occupation[T.office] -0.2073 0.0776 -2.6699 0.0078\n", + "occupation[T.sales] -0.3601 0.0936 -3.8455 0.0001\n", + "occupation[T.services] -0.3626 0.0818 -4.4305 0.0000\n", + "occupation[T.technical] -0.0101 0.0740 -0.1363 0.8916\n", + "occupation[T.worker] -0.1525 0.0763 -1.9981 0.0462\n", + "education 0.0759 0.0101 7.5449 0.0000\n", + "experience 0.0119 0.0017 7.0895 0.0000\n", + "\n" + ] + } + ], + "source": [ + "CPS1985 = pd.read_csv(\"../data/CPS1985.csv\")\n", + "\n", + "# run regression:\n", + "reg = smf.ols(\n", + " formula=\"np.log(wage) ~ education + experience + gender + occupation\",\n", + " data=CPS1985,\n", + ")\n", + "results = reg.fit()\n", + "\n", + "# print regression table:\n", + "table_reg = pd.DataFrame(\n", + " {\n", + " \"b\": round(results.params, 4),\n", + " \"se\": round(results.bse, 4),\n", + " \"t\": round(results.tvalues, 4),\n", + " \"pval\": round(results.pvalues, 4),\n", + " },\n", + ")\n", + "print(f\"table_reg: \\n{table_reg}\\n\")" + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "table_anova: \n", + " sum_sq df F PR(>F)\n", + "gender 5.414018 1.0 28.067296 1.727015e-07\n", + "occupation 7.152529 5.0 7.416013 9.805485e-07\n", + "education 10.980589 1.0 56.925450 2.010374e-13\n", + "experience 9.695055 1.0 50.261001 4.365391e-12\n", + "Residual 101.269451 525.0 NaN NaN\n", + "\n" + ] + } + ], + "source": [ + "# ANOVA table:\n", + "table_anova = sm.stats.anova_lm(results, typ=2)\n", + "print(f\"table_anova: \\n{table_anova}\\n\")" + ] + }, + { + "cell_type": "markdown", + "id": "c159ee07", + "metadata": {}, + "source": [ + "## 7.4 Breaking a Numeric Variable Into Categories\n", + "\n", + "### Example 7.8: Effects of Law School Rankings on Starting Salaries" + ] + }, + { + "cell_type": "code", + "execution_count": 71, + "id": "d63ca022", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "freq: \n", + "col_0 count\n", + "rc \n", + "(0,10] 10\n", + "(10,25] 16\n", + "(25,40] 13\n", + "(40,60] 18\n", + "(60,100] 37\n", + "(100,175] 62\n", + "\n" + ] + } + ], + "source": [ + "lawsch85 = wool.data(\"lawsch85\")\n", + "\n", + "# define cut points for the rank:\n", + "cutpts = [0, 10, 25, 40, 60, 100, 175]\n", + "\n", + "# create categorical variable containing ranges for the rank:\n", + "lawsch85[\"rc\"] = pd.cut(\n", + " lawsch85[\"rank\"],\n", + " bins=cutpts,\n", + " labels=[\"(0,10]\", \"(10,25]\", \"(25,40]\", \"(40,60]\", \"(60,100]\", \"(100,175]\"],\n", + ")\n", + "\n", + "# display frequencies:\n", + "freq = pd.crosstab(lawsch85[\"rc\"], columns=\"count\")\n", + "print(f\"freq: \\n{freq}\\n\")" + ] + }, + { + "cell_type": "code", + "execution_count": 72, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "table_reg: \n", + " b se t pval\n", + "Intercept 9.1653 0.4114 22.2770 0.0000\n", + "C(rc, Treatment(\"(100,175]\"))[T.(0,10]] 0.6996 0.0535 13.0780 0.0000\n", + "C(rc, Treatment(\"(100,175]\"))[T.(10,25]] 0.5935 0.0394 15.0493 0.0000\n", + "C(rc, Treatment(\"(100,175]\"))[T.(25,40]] 0.3751 0.0341 11.0054 0.0000\n", + "C(rc, Treatment(\"(100,175]\"))[T.(40,60]] 0.2628 0.0280 9.3991 0.0000\n", + "C(rc, Treatment(\"(100,175]\"))[T.(60,100]] 0.1316 0.0210 6.2540 0.0000\n", + "LSAT 0.0057 0.0031 1.8579 0.0655\n", + "GPA 0.0137 0.0742 0.1850 0.8535\n", + "np.log(libvol) 0.0364 0.0260 1.3976 0.1647\n", + "np.log(cost) 0.0008 0.0251 0.0335 0.9734\n", + "\n" + ] + } + ], + "source": [ + "# run regression:\n", + "reg = smf.ols(\n", + " formula='np.log(salary) ~ C(rc, Treatment(\"(100,175]\")) +'\n", + " \"LSAT + GPA + np.log(libvol) + np.log(cost)\",\n", + " data=lawsch85,\n", + ")\n", + "results = reg.fit()\n", + "\n", + "# print regression table:\n", + "table_reg = pd.DataFrame(\n", + " {\n", + " \"b\": round(results.params, 4),\n", + " \"se\": round(results.bse, 4),\n", + " \"t\": round(results.tvalues, 4),\n", + " \"pval\": round(results.pvalues, 4),\n", + " },\n", + ")\n", + "print(f\"table_reg: \\n{table_reg}\\n\")" + ] + }, + { + "cell_type": "code", + "execution_count": 73, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "table_anova: \n", + " sum_sq df F PR(>F)\n", + "C(rc, Treatment(\"(100,175]\")) 1.868867 5.0 50.962988 1.174406e-28\n", + "LSAT 0.025317 1.0 3.451900 6.551320e-02\n", + "GPA 0.000251 1.0 0.034225 8.535262e-01\n", + "np.log(libvol) 0.014327 1.0 1.953419 1.646748e-01\n", + "np.log(cost) 0.000008 1.0 0.001120 9.733564e-01\n", + "Residual 0.924111 126.0 NaN NaN\n", + "\n" + ] + } + ], + "source": [ + "# ANOVA table:\n", + "table_anova = sm.stats.anova_lm(results, typ=2)\n", + "print(f\"table_anova: \\n{table_anova}\\n\")" + ] + }, + { + "cell_type": "markdown", + "id": "66b66fb0", + "metadata": {}, + "source": [ + "## 7.5 Interactions and Differences in Regression Functions Across Groups" + ] + }, + { + "cell_type": "code", + "execution_count": 74, + "id": "a9ef0007", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "table: \n", + " b se t pval\n", + "Intercept 1.4808 0.2073 7.1422 0.0000\n", + "female -0.3535 0.4105 -0.8610 0.3898\n", + "sat 0.0011 0.0002 5.8073 0.0000\n", + "hsperc -0.0085 0.0014 -6.1674 0.0000\n", + "tothrs 0.0023 0.0009 2.7182 0.0069\n", + "female:sat 0.0008 0.0004 1.9488 0.0521\n", + "female:hsperc -0.0005 0.0032 -0.1739 0.8621\n", + "female:tothrs -0.0001 0.0016 -0.0712 0.9433\n", + "\n" + ] + } + ], + "source": [ + "gpa3 = wool.data(\"gpa3\")\n", + "\n", + "# model with full interactions with female dummy (only for spring data):\n", + "reg = smf.ols(\n", + " formula=\"cumgpa ~ female * (sat + hsperc + tothrs)\",\n", + " data=gpa3,\n", + " subset=(gpa3[\"spring\"] == 1),\n", + ")\n", + "results = reg.fit()\n", + "\n", + "# print regression table:\n", + "table = pd.DataFrame(\n", + " {\n", + " \"b\": round(results.params, 4),\n", + " \"se\": round(results.bse, 4),\n", + " \"t\": round(results.tvalues, 4),\n", + " \"pval\": round(results.pvalues, 4),\n", + " },\n", + ")\n", + "print(f\"table: \\n{table}\\n\")" + ] + }, + { + "cell_type": "code", + "execution_count": 75, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "fstat: 8.179111637044619\n", + "\n", + "fpval: 2.544637191829608e-06\n", + "\n" + ] + } + ], + "source": [ + "# F-Test for H0 (the interaction coefficients of 'female' are zero):\n", + "hypotheses = [\"female = 0\", \"female:sat = 0\", \"female:hsperc = 0\", \"female:tothrs = 0\"]\n", + "ftest = results.f_test(hypotheses)\n", + "fstat = ftest.statistic\n", + "fpval = ftest.pvalue\n", + "\n", + "print(f\"fstat: {fstat}\\n\")\n", + "print(f\"fpval: {fpval}\\n\")" + ] + }, + { + "cell_type": "code", + "execution_count": 76, + "id": "a44fa6e8", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "table_m: \n", + " b se t pval\n", + "Intercept 1.4808 0.2060 7.1894 0.0000\n", + "sat 0.0011 0.0002 5.8458 0.0000\n", + "hsperc -0.0085 0.0014 -6.2082 0.0000\n", + "tothrs 0.0023 0.0009 2.7362 0.0066\n", + "\n" + ] + } + ], + "source": [ + "gpa3 = wool.data(\"gpa3\")\n", + "\n", + "# estimate model for males (& spring data):\n", + "reg_m = smf.ols(\n", + " formula=\"cumgpa ~ sat + hsperc + tothrs\",\n", + " data=gpa3,\n", + " subset=(gpa3[\"spring\"] == 1) & (gpa3[\"female\"] == 0),\n", + ")\n", + "results_m = reg_m.fit()\n", + "\n", + "# print regression table:\n", + "table_m = pd.DataFrame(\n", + " {\n", + " \"b\": round(results_m.params, 4),\n", + " \"se\": round(results_m.bse, 4),\n", + " \"t\": round(results_m.tvalues, 4),\n", + " \"pval\": round(results_m.pvalues, 4),\n", + " },\n", + ")\n", + "print(f\"table_m: \\n{table_m}\\n\")" + ] + }, + { + "cell_type": "code", + "execution_count": 77, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "table_f: \n", + " b se t pval\n", + "Intercept 1.1273 0.3616 3.1176 0.0025\n", + "sat 0.0018 0.0003 5.1950 0.0000\n", + "hsperc -0.0090 0.0029 -3.0956 0.0027\n", + "tothrs 0.0022 0.0014 1.5817 0.1174\n", + "\n" + ] + } + ], + "source": [ + "# estimate model for females (& spring data):\n", + "reg_f = smf.ols(\n", + " formula=\"cumgpa ~ sat + hsperc + tothrs\",\n", + " data=gpa3,\n", + " subset=(gpa3[\"spring\"] == 1) & (gpa3[\"female\"] == 1),\n", + ")\n", + "results_f = reg_f.fit()\n", + "\n", + "# print regression table:\n", + "table_f = pd.DataFrame(\n", + " {\n", + " \"b\": round(results_f.params, 4),\n", + " \"se\": round(results_f.bse, 4),\n", + " \"t\": round(results_f.tvalues, 4),\n", + " \"pval\": round(results_f.pvalues, 4),\n", + " },\n", + ")\n", + "print(f\"table_f: \\n{table_f}\\n\")" + ] + } + ], + "metadata": { + "jupytext": { + "formats": "notebooks//ipynb,markdown//md,scripts//py" + }, + "kernelspec": { + "display_name": "merino", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/scripts/Ch5. MRA - OLS Asymptotics.py b/scripts/Ch5. MRA - OLS Asymptotics.py index 1e5822d..3b826b6 100644 --- a/scripts/Ch5. MRA - OLS Asymptotics.py +++ b/scripts/Ch5. MRA - OLS Asymptotics.py @@ -13,7 +13,7 @@ # name: python3 # --- -# # Ch5. Multiple Regression Analysis: OLS Asymptotics +# # 5. Multiple Regression Analysis: OLS Asymptotics # %pip install matplotlib numpy statsmodels wooldridge scipy -q diff --git a/scripts/Ch6. MRA - Further Issues.py b/scripts/Ch6. MRA - Further Issues.py index 1766787..bc19c3b 100644 --- a/scripts/Ch6. MRA - Further Issues.py +++ b/scripts/Ch6. MRA - Further Issues.py @@ -13,7 +13,7 @@ # name: python3 # --- -# # Ch6. Multiple Regression Analysis: Further Issues +# # 6. Multiple Regression Analysis: Further Issues # %pip install matplotlib numpy pandas statsmodels wooldridge -q diff --git a/scripts/Ch7. MRA - Qualitative Regressors.py b/scripts/Ch7. MRA - Qualitative Regressors.py new file mode 100644 index 0000000..8153b69 --- /dev/null +++ b/scripts/Ch7. MRA - Qualitative Regressors.py @@ -0,0 +1,296 @@ +# --- +# jupyter: +# jupytext: +# formats: notebooks//ipynb,markdown//md,scripts//py +# text_representation: +# extension: .py +# format_name: light +# format_version: '1.5' +# jupytext_version: 1.16.4 +# kernelspec: +# display_name: merino +# language: python +# name: python3 +# --- + +# # 7. Multiple Regression Analysis with Qualitative Regressors + +# %pip install matplotlib numpy pandas statsmodels wooldridge -q + +import pandas as pd +import statsmodels.api as sm +import statsmodels.formula.api as smf +import wooldridge as wool + +# ## 7.1 Linear Regression with Dummy Variables as Regressors +# +# ### Example 7.1: Hourly Wage Equation + +# + +wage1 = wool.data("wage1") + +reg = smf.ols(formula="wage ~ female + educ + exper + tenure", data=wage1) +results = reg.fit() + +# print regression table: +table = pd.DataFrame( + { + "b": round(results.params, 4), + "se": round(results.bse, 4), + "t": round(results.tvalues, 4), + "pval": round(results.pvalues, 4), + }, +) +print(f"table: \n{table}\n") +# - + +# ### Example 7.6: Log Hourly Wage Equation + +# + +wage1 = wool.data("wage1") + +reg = smf.ols( + formula="np.log(wage) ~ married*female + educ + exper +" + "I(exper**2) + tenure + I(tenure**2)", + data=wage1, +) +results = reg.fit() + +# print regression table: +table = pd.DataFrame( + { + "b": round(results.params, 4), + "se": round(results.bse, 4), + "t": round(results.tvalues, 4), + "pval": round(results.pvalues, 4), + }, +) +print(f"table: \n{table}\n") +# - + +# ## 7.2 Boolean variables + +# + +wage1 = wool.data("wage1") + +# regression with boolean variable: +wage1["isfemale"] = wage1["female"] == 1 +reg = smf.ols(formula="wage ~ isfemale + educ + exper + tenure", data=wage1) +results = reg.fit() + +# print regression table: +table = pd.DataFrame( + { + "b": round(results.params, 4), + "se": round(results.bse, 4), + "t": round(results.tvalues, 4), + "pval": round(results.pvalues, 4), + }, +) +print(f"table: \n{table}\n") +# - + +# ## 7.3 Categorical Variables + +# + +CPS1985 = pd.read_csv("../data/CPS1985.csv") +# rename variable to make outputs more compact: +CPS1985["oc"] = CPS1985["occupation"] + +# table of categories and frequencies for two categorical variables: +freq_gender = pd.crosstab(CPS1985["gender"], columns="count") +print(f"freq_gender: \n{freq_gender}\n") + +freq_occupation = pd.crosstab(CPS1985["oc"], columns="count") +print(f"freq_occupation: \n{freq_occupation}\n") + +# + +# directly using categorical variables in regression formula: +reg = smf.ols( + formula="np.log(wage) ~ education +experience + C(gender) + C(oc)", + data=CPS1985, +) +results = reg.fit() + +# print regression table: +table = pd.DataFrame( + { + "b": round(results.params, 4), + "se": round(results.bse, 4), + "t": round(results.tvalues, 4), + "pval": round(results.pvalues, 4), + }, +) +print(f"table: \n{table}\n") + +# + +# rerun regression with different reference category: +reg_newref = smf.ols( + formula="np.log(wage) ~ education + experience + " + 'C(gender, Treatment("male")) + ' + 'C(oc, Treatment("technical"))', + data=CPS1985, +) +results_newref = reg_newref.fit() + +# print results: +table_newref = pd.DataFrame( + { + "b": round(results_newref.params, 4), + "se": round(results_newref.bse, 4), + "t": round(results_newref.tvalues, 4), + "pval": round(results_newref.pvalues, 4), + }, +) +print(f"table_newref: \n{table_newref}\n") +# - + +# ### 7.3.1 ANOVA Tables + +# + +CPS1985 = pd.read_csv("../data/CPS1985.csv") + +# run regression: +reg = smf.ols( + formula="np.log(wage) ~ education + experience + gender + occupation", + data=CPS1985, +) +results = reg.fit() + +# print regression table: +table_reg = pd.DataFrame( + { + "b": round(results.params, 4), + "se": round(results.bse, 4), + "t": round(results.tvalues, 4), + "pval": round(results.pvalues, 4), + }, +) +print(f"table_reg: \n{table_reg}\n") +# - + +# ANOVA table: +table_anova = sm.stats.anova_lm(results, typ=2) +print(f"table_anova: \n{table_anova}\n") + +# ## 7.4 Breaking a Numeric Variable Into Categories +# +# ### Example 7.8: Effects of Law School Rankings on Starting Salaries + +# + +lawsch85 = wool.data("lawsch85") + +# define cut points for the rank: +cutpts = [0, 10, 25, 40, 60, 100, 175] + +# create categorical variable containing ranges for the rank: +lawsch85["rc"] = pd.cut( + lawsch85["rank"], + bins=cutpts, + labels=["(0,10]", "(10,25]", "(25,40]", "(40,60]", "(60,100]", "(100,175]"], +) + +# display frequencies: +freq = pd.crosstab(lawsch85["rc"], columns="count") +print(f"freq: \n{freq}\n") + +# + +# run regression: +reg = smf.ols( + formula='np.log(salary) ~ C(rc, Treatment("(100,175]")) +' + "LSAT + GPA + np.log(libvol) + np.log(cost)", + data=lawsch85, +) +results = reg.fit() + +# print regression table: +table_reg = pd.DataFrame( + { + "b": round(results.params, 4), + "se": round(results.bse, 4), + "t": round(results.tvalues, 4), + "pval": round(results.pvalues, 4), + }, +) +print(f"table_reg: \n{table_reg}\n") +# - + +# ANOVA table: +table_anova = sm.stats.anova_lm(results, typ=2) +print(f"table_anova: \n{table_anova}\n") + +# ## 7.5 Interactions and Differences in Regression Functions Across Groups + +# + +gpa3 = wool.data("gpa3") + +# model with full interactions with female dummy (only for spring data): +reg = smf.ols( + formula="cumgpa ~ female * (sat + hsperc + tothrs)", + data=gpa3, + subset=(gpa3["spring"] == 1), +) +results = reg.fit() + +# print regression table: +table = pd.DataFrame( + { + "b": round(results.params, 4), + "se": round(results.bse, 4), + "t": round(results.tvalues, 4), + "pval": round(results.pvalues, 4), + }, +) +print(f"table: \n{table}\n") + +# + +# F-Test for H0 (the interaction coefficients of 'female' are zero): +hypotheses = ["female = 0", "female:sat = 0", "female:hsperc = 0", "female:tothrs = 0"] +ftest = results.f_test(hypotheses) +fstat = ftest.statistic +fpval = ftest.pvalue + +print(f"fstat: {fstat}\n") +print(f"fpval: {fpval}\n") + +# + +gpa3 = wool.data("gpa3") + +# estimate model for males (& spring data): +reg_m = smf.ols( + formula="cumgpa ~ sat + hsperc + tothrs", + data=gpa3, + subset=(gpa3["spring"] == 1) & (gpa3["female"] == 0), +) +results_m = reg_m.fit() + +# print regression table: +table_m = pd.DataFrame( + { + "b": round(results_m.params, 4), + "se": round(results_m.bse, 4), + "t": round(results_m.tvalues, 4), + "pval": round(results_m.pvalues, 4), + }, +) +print(f"table_m: \n{table_m}\n") + +# + +# estimate model for females (& spring data): +reg_f = smf.ols( + formula="cumgpa ~ sat + hsperc + tothrs", + data=gpa3, + subset=(gpa3["spring"] == 1) & (gpa3["female"] == 1), +) +results_f = reg_f.fit() + +# print regression table: +table_f = pd.DataFrame( + { + "b": round(results_f.params, 4), + "se": round(results_f.bse, 4), + "t": round(results_f.tvalues, 4), + "pval": round(results_f.pvalues, 4), + }, +) +print(f"table_f: \n{table_f}\n")