Merge pull request #35 from dhhagan/dhhagan-feature-regplot

Adds regplot
dhhagan · Apr 22, 2024 · dedd589 · dedd589
2 parents 2cef2f3 + 579c73d
commit dedd589
Show file tree

Hide file tree

Showing 5 changed files with 348 additions and 201 deletions.
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -5,7 +5,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python: ["3.8", "3.9", "3.10", "3.11", "3.12"]
+        python: ["3.8", "3.9", "3.10", "3.11"]
     name: Python ${{ matrix.python }} tests
     steps:
       - name: Checkout branch
@@ -29,10 +29,10 @@ jobs:
           poetry run pytest tests/ --cov=./ --cov-report=xml
           
       - name: Upload coverage to Codecov
-        uses: codecov/codecov-action@v1
+        uses: codecov/codecov-action@v4
         with:
           token: ${{ secrets.CODECOV_TOKEN }}
           file: ./coverage.xml
+          slug: dhhagan/atmospy
           flags: unittests
-          name: codecov-umbrella
-          fail_ci_if_error: true
+          name: codecov-umbrella
diff --git a/atmospy/relational.py b/atmospy/relational.py
@@ -1,14 +1,109 @@
 """This file will contain regression figures.
 """
+import seaborn as sns
+import numpy as np
+from scipy.stats import (
+    linregress,
+)
 from .utils import (
     remove_na,
-)
-from seaborn import (
-    FacetGrid,
+    check_for_numeric_cols
 )
 
 __all__ = ["regplot", ]
 
-def regplot():
-    """Standard regression plot between two variables.
-    """
+def regplot(data, x, y, fit_reg=True, color=None, marker="o", ylim=None, **kwargs):
+    """Plot data and a linear regression best fit line between the two variables.
+    
+    This function builds on Seaborn's `jointplot` and most kwargs can be passed 
+    straight through to the `jointplot` for customization.
+
+    Parameters
+    ----------
+    data : pd.DataFrame
+        Input data structure.
+    x : str
+        Variable that corresponds to the x data in `data`.
+    y : str
+        _description_
+    fit_reg : bool, optional
+        _description_, by default True
+    color : _type_, optional
+        _description_, by default None
+    marker : str, optional
+        _description_, by default "o"
+    ylim : _type_, optional
+        _description_, by default None
+    """
+    check_for_numeric_cols(data, [x, y])
+
+    # drop NaNs and keep only needed columns
+    _data = data[[x, y]].dropna(how='any')
+
+    xdata, ydata = _data[x], _data[y]
+
+    # get the range for the plot
+    if ylim is None:
+        ymin = min([xdata.min(), ydata.min()])
+        ymax = max([xdata.max(), ydata.max()])
+    else:
+        ymin = ylim[0]
+        ymax = ylim[1]
+
+        if ymax <= ymin:
+            raise ValueError("`ymax` must be larger than `ymin`")
+
+    # update the kwargs
+    kwargs.update({"color": color, "marker": marker})
+
+    # remove certain kwargs that we don't want to allow to be passed to the jointplot
+    for each in (
+        "hue",
+        "kind",
+        "dropna",
+        "xlim",
+        "ylim",
+        "hue_order",
+        "hue_norm"
+    ):
+        if each in kwargs:
+            kwargs.pop(each)
+
+    # set the color if one wasn't explicitly set
+    if color is None:
+        color = "C0"
+
+    # make the call to jointplot
+    g = sns.jointplot(
+        data=_data, x=x, y=y, kind="scatter",
+        xlim=(ymin, ymax),
+        ylim=(ymin, ymax),
+        **kwargs
+    )
+
+    # add a unity line
+    g.ax_joint.axline(
+        (0, 0), slope=1, ls='--',
+        c='k', alpha=0.5, zorder=0, label="1:1"
+    )
+
+    # if set, add a regression line
+    if fit_reg:
+        _x = np.linspace(ymin, ymax, 10)
+        res = linregress(xdata, ydata)
+
+        # build the label
+        label = f"y = {res.slope:.2f}"
+        if res.intercept < 0:
+            label += f" - {res.intercept:.2f}"
+        else:
+            label += f" + {res.intercept:.2f}"
+
+        # tack on the correlation coef.
+        label += f"\n$r^2$ = {res.rvalue**2:.3f}"
+
+        g.ax_joint.plot(_x, res.intercept + res.slope*_x, label=label, c=color)
+
+    g.ax_joint.legend()
+
+    return g