diff --git a/atmospy/trends.py b/atmospy/trends.py index 128eb9d..64606af 100644 --- a/atmospy/trends.py +++ b/atmospy/trends.py @@ -309,8 +309,8 @@ def calendarplot(data, x, y, freq="day", agg="mean", vmin=None, vmax=None, cmap= return ax -def dielplot(data, x, y, ax=None, ylim=None, xlabel=None, - ylabel=None, title=None, plot_kws=None, **kwargs): +def dielplot(data=None, *, x=None, y=None, ax=None, ylim=None, xlabel=None, + ylabel=None, title=None, color=None, show_iqr=True, plot_kws=None, **kwargs): """Plot the diel (e.g., diurnal) trend for a pollutant. Diel plots can be incredibly useful for understanding daily @@ -334,6 +334,10 @@ def dielplot(data, x, y, ax=None, ylim=None, xlabel=None, The label for the y-axis, by default None title : str, optional The title for the plot, by default None + color : str, optional + Specify the color to use in the figure + shoq_iqr : bool, optional + If True, plot the interquartile range as a shaded region, default True plot_kws : dict or None, optional Additional keyword arguments are passed directly to the underlying plot call , by default None @@ -349,7 +353,7 @@ def dielplot(data, x, y, ax=None, ylim=None, xlabel=None, Plot a simple heatmap for the entire year. >>> df = atmospy.load_dataset("us-bc") - >>> atmospy.dielplot(df, x="Timestamp GMT", y="Sample Measurement") + >>> atmospy.dielplot(data=df, x="Timestamp GMT", y="Sample Measurement") """ default_plot_kws = { @@ -362,6 +366,8 @@ def dielplot(data, x, y, ax=None, ylim=None, xlabel=None, # plot_kws = {} if plot_kws is None else dict(default_plot_kws, **plot_kws) + if color is not None: + plot_kws.update(dict(c=color)) # copy over only the needed data _data = data[[x, y]].copy(deep=True) @@ -375,23 +381,27 @@ def dielplot(data, x, y, ax=None, ylim=None, xlabel=None, # compute the diel statistics stats = _data.groupby([_data.index.hour, _data.index.minute], as_index=False).describe() + # append the first record so the first and last records are identical + stats.loc[len(stats.index)] = stats.loc[0] + # build an index we can use to make the figure index = stats.index.values - freq = int(60 / (index.size / 24)) + freq = int(60 / ((index.size - 1) / 24)) figure_index = pd.date_range(start='2020-01-01', periods=index.size, freq=f"{freq}min") # plot the diel average ax.plot(figure_index, stats[y]['mean'], **plot_kws) # add the IQR as a shaded region - ax.fill_between( - figure_index, - y1=stats[y]['25%'], - y2=stats[y]['75%'], - alpha=0.25, - lw=2, - color=plt.gca().lines[-1].get_color() - ) + if show_iqr: + ax.fill_between( + figure_index, + y1=stats[y]['25%'], + y2=stats[y]['75%'], + alpha=0.25, + lw=2, + color=plt.gca().lines[-1].get_color() + ) # adjust plot parameters xticks = ax.get_xticks() diff --git a/atmospy/utils.py b/atmospy/utils.py index e04798f..5afcb78 100644 --- a/atmospy/utils.py +++ b/atmospy/utils.py @@ -126,6 +126,9 @@ def load_dataset(name, cache=True, data_home=None, **kwargs): if name == "air-sensors-met": df["timestamp_local"] = pd.to_datetime(df["timestamp_local"]) + + # only keep data after april + df = df[df["timestamp_local"] >= "2023-05-01"].copy() return df diff --git a/docs/_tutorial/Introduction.ipynb b/docs/_tutorial/Introduction.ipynb index e2b49c8..09839a7 100644 --- a/docs/_tutorial/Introduction.ipynb +++ b/docs/_tutorial/Introduction.ipynb @@ -11,11 +11,7 @@ "An introduction to atmospy\n", "==========================\n", "\n", - "Seaborn is a library for making statistical graphics in Python. It builds on top of `matplotlib `_ and integrates closely with `pandas `_ data structures.\n", - "\n", - "Seaborn helps you explore and understand your data. Its plotting functions operate on dataframes and arrays containing whole datasets and internally perform the necessary semantic mapping and statistical aggregation to produce informative plots. Its dataset-oriented, declarative API lets you focus on what the different elements of your plots mean, rather than on the details of how to draw them.\n", - "\n", - "Here's an example of what seaborn can do:" + "Coming soon!" ] }, { diff --git a/docs/_tutorial/aesthetics.ipynb b/docs/_tutorial/aesthetics.ipynb index bea6281..6712d60 100644 --- a/docs/_tutorial/aesthetics.ipynb +++ b/docs/_tutorial/aesthetics.ipynb @@ -11,11 +11,7 @@ "Figure Aesthetics\n", "==================\n", "\n", - "Seaborn is a library for making statistical graphics in Python. It builds on top of `matplotlib `_ and integrates closely with `pandas `_ data structures.\n", - "\n", - "Seaborn helps you explore and understand your data. Its plotting functions operate on dataframes and arrays containing whole datasets and internally perform the necessary semantic mapping and statistical aggregation to produce informative plots. Its dataset-oriented, declarative API lets you focus on what the different elements of your plots mean, rather than on the details of how to draw them.\n", - "\n", - "Here's an example of what seaborn can do:" + "Coming Soon!" ] } ], diff --git a/docs/_tutorial/plots.ipynb b/docs/_tutorial/plots.ipynb index 6ba5317..e7250b3 100644 --- a/docs/_tutorial/plots.ipynb +++ b/docs/_tutorial/plots.ipynb @@ -246,7 +246,6 @@ " y=\"Sample Measurement\", \n", " freq=\"day\",\n", " cbar=False,\n", - " # xlabel=\"Day of Month\",\n", " height=2.5,\n", " linewidths=.1\n", ");" @@ -401,12 +400,269 @@ "source": [ "# Facets and other Fun Things\n", "\n", - "Now that you've seen how to use the individual plots above, we're going to go over some of the advanced features that are available by leveraging seaborn's great grid functionality." + "Now that you've seen how to use the individual plots above, we're going to go over some of the advanced features that are available by leveraging seaborn's great grid functionality.\n", + "\n", + "Often, it can be useful to draw multiple versions of the same figure with slight differences in what's being plotted. For example, you may want to plot a pollution rose, but plot each month of data separately. Or, you may want to plot a diel profile, but show the results for weekdays vs weekends on separate plots so that you can easily see the difference caused by traffic patterns.\n", + "\n", + "To do so, we can use seaborn's `FacetGrid`. Seaborn has a great tutorial on how to use `FacetGrid`'s [here](https://seaborn.pydata.org/tutorial/axis_grids.html) that we advise you read. Next, we'll go over several examples that showcase some of the cool things you can do with `atmospy` and `seaborn` together." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## `FacetGrid` and the `pollutionroseplot`\n", + "\n", + "Using the example dataset (`air-sensors-met`), let's plot the pollution rose separetely for each month (the dataset contains data for April through November). To do so, we will use the `FacetGrid` function from `seaborn`. First, we will add a column that will serve as the dimension of the figure. In this case, we want to extract the month:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# load the example dataset\n", + "met = atmospy.load_dataset(\"air-sensors-met\")\n", + "\n", + "# add a column that extracts the month from the timestamp_local column\n", + "met.loc[:, \"Month\"] = met[\"timestamp_local\"].dt.month_name()\n", + "\n", + "# print the first 5 records\n", + "met.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "As we can see above, we now have a column with the month name. In order to use the `FacetGrid` properly, we need to convert our wide-form dataframe into a long-form dataframe. For a much better explanation of the difference than I can provide, please read through the seaborn explanation [here](https://seaborn.pydata.org/tutorial/data_structure.html#long-form-vs-wide-form-data).\n", + "\n", + "We can easily convert our dataframe to long-form by using the Pandas `melt` function:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "met_long_form = met.melt(id_vars=[\"timestamp_local\", \"Month\", \"ws\", \"wd\"], value_vars=[\"pm25\"])\n", + "\n", + "# print the first 5 records\n", + "met_long_form.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Next, we will set up our `FacetGrid` and tell it to use the `Month` column as the dimension and to wrap every 3 so that it all fits into one nice figure:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# set up the FacetGrid\n", + "g = sns.FacetGrid(\n", + " data=met_long_form, \n", + " col=\"Month\", \n", + " col_wrap=3,\n", + " subplot_kws={\"projection\": \"polar\"},\n", + " despine=False\n", + ")\n", + "\n", + "# map the dataframe using the pollutionroseplot function\n", + "g.map_dataframe(\n", + " atmospy.pollutionroseplot, \n", + " ws=\"ws\", wd=\"wd\", pollutant=\"value\", \n", + " faceted=True, \n", + " segments=20, \n", + " suffix=\"$µgm^{-3}$\"\n", + ")\n", + "\n", + "# add the legend and place it where it looks nice\n", + "g.add_legend(\n", + " title=\"$PM_{2.5}$\", \n", + " bbox_to_anchor=(.535, 0.2), \n", + " handlelength=1, \n", + " handleheight=1\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Not bad for 3 lines of code (yes, they are split across more than 3 lines for readability, but still - 3 lines!)!" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## `FacetGrid` and the `dielplot`\n", + "\n", + "Next, we're going to go ahead and explore what we can do with the `dielplot` on a `FacetGrid`. Like above, we can simply plot a slightly different subset of the data in each column - let's go ahead and walk through an example. Let's plot the diel profile for black carbon on weekdays versus weekends.\n", + "\n", + "First, we need to load our example dataset and modify it a bit to provide the information we want to facet by:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# load the data\n", + "bc = atmospy.load_dataset(\"us-bc\")\n", + "\n", + "# select just one random location for now\n", + "bc_single_site = bc[bc[\"Local Site Name\"] == bc[\"Local Site Name\"].unique()[0]]\n", + "\n", + "# create a column that sets a bool if the date is a weekend\n", + "bc_single_site.loc[:, \"Is Weekend\"] = bc_single_site[\"Timestamp Local\"].dt.day_name().isin([\"Saturday\", \"Sunday\"])\n", + "\n", + "# convert to long-form for faceting\n", + "bc_long_form = bc_single_site.melt(\n", + " id_vars=[\"Timestamp Local\", \"Is Weekend\"], \n", + " value_vars=[\"Sample Measurement\"]\n", + ")\n", + "\n", + "# print the first 5 records\n", + "bc_long_form.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now that we have our data prepared, we can set up a `FacetGrid` like above and define the column to facet by as the new column we just created:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "g = sns.FacetGrid(\n", + " data=bc_long_form,\n", + " col=\"Is Weekend\",\n", + " \n", + " # let's adjust the aspect ratio for funsies\n", + " aspect=1.25\n", + ")\n", + "g.map_dataframe(atmospy.dielplot, x=\"Timestamp Local\", y=\"value\")" ] }, { "cell_type": "markdown", "metadata": {}, + "source": [ + "While this isn't the greatest example, we can see there is difference between weekday's and weekend's in the early morning, though the IQR band is quite wide. At some point, we will add some better example datasets so that these figures are more impressive. For now, they work!\n", + "\n", + "Now, what if we had two locations that we wanted to compare? Let's go ahead and pull data for two sites and show the difference by site rather than by weekday/weekend:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# load the data\n", + "bc = atmospy.load_dataset(\"us-bc\")\n", + "\n", + "# select just one random location for now\n", + "bc_multi_site = bc[bc[\"Local Site Name\"].isin(bc[\"Local Site Name\"].unique()[0:2])]\n", + "\n", + "# create a column that sets a bool if the date is a weekend\n", + "bc_multi_site.loc[:, \"Is Weekend\"] = bc_multi_site[\"Timestamp Local\"].dt.day_name().isin([\"Saturday\", \"Sunday\"])\n", + "\n", + "# convert to long-form for faceting\n", + "bc_long_form = bc_multi_site.melt(\n", + " id_vars=[\"Timestamp Local\", \"Is Weekend\", \"Local Site Name\"], \n", + " value_vars=[\"Sample Measurement\"]\n", + ")\n", + "\n", + "# print the first 5 records\n", + "bc_long_form.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's set up the `FacetGrid` and plot the diel trend by location:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "g = sns.FacetGrid(\n", + " bc_long_form,\n", + " row=\"Local Site Name\",\n", + " hue=\"Local Site Name\",\n", + " aspect=1.25,\n", + ")\n", + "\n", + "g.map_dataframe(atmospy.dielplot, x=\"Timestamp Local\", y=\"value\")\n", + "\n", + "# update the y-axis limit to force to zero\n", + "g.set(ylim=(0, None))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Rather than plotting on the column, we plotted different locations on the row, which sets up the next figure nicely - let's go ahead and use the same data set, but plot by weekday/weekend AND two different locations together:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "g = sns.FacetGrid(\n", + " bc_long_form,\n", + " row=\"Local Site Name\",\n", + " col=\"Is Weekend\",\n", + " hue=\"Local Site Name\",\n", + " aspect=1.25,\n", + ")\n", + "\n", + "g.map_dataframe(atmospy.dielplot, x=\"Timestamp Local\", y=\"value\")\n", + "\n", + "# update the y-axis limit to force to zero\n", + "g.set(ylim=(0, None), ylabel='Black Carbon')\n", + "\n", + "# update the titles to take up less space\n", + "g.set_titles(\"{row_name} | Weekend = {col_name}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## `FacetGrid` and the `calendarplot`\n", + "\n", + "Coming soon!" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [] } ], diff --git a/docs/example_thumbs/diel_by_weekend_weekday_thumb.png b/docs/example_thumbs/diel_by_weekend_weekday_thumb.png new file mode 100644 index 0000000..4435168 Binary files /dev/null and b/docs/example_thumbs/diel_by_weekend_weekday_thumb.png differ diff --git a/docs/example_thumbs/dielplot_thumb.png b/docs/example_thumbs/dielplot_thumb.png index 4d9b24f..54f5cdc 100644 Binary files a/docs/example_thumbs/dielplot_thumb.png and b/docs/example_thumbs/dielplot_thumb.png differ diff --git a/docs/example_thumbs/pollution_rose_thumb.png b/docs/example_thumbs/pollution_rose_thumb.png index 4c5b1d8..327e95d 100644 Binary files a/docs/example_thumbs/pollution_rose_thumb.png and b/docs/example_thumbs/pollution_rose_thumb.png differ diff --git a/docs/example_thumbs/rose_by_month_thumb.png b/docs/example_thumbs/rose_by_month_thumb.png new file mode 100644 index 0000000..c4662be Binary files /dev/null and b/docs/example_thumbs/rose_by_month_thumb.png differ diff --git a/docs/examples/_images/diel_by_weekend_weekday.png b/docs/examples/_images/diel_by_weekend_weekday.png new file mode 100644 index 0000000..ecc31ca Binary files /dev/null and b/docs/examples/_images/diel_by_weekend_weekday.png differ diff --git a/docs/examples/_images/dielplot.png b/docs/examples/_images/dielplot.png index 28d9596..9a2f38f 100644 Binary files a/docs/examples/_images/dielplot.png and b/docs/examples/_images/dielplot.png differ diff --git a/docs/examples/_images/pollution_rose.png b/docs/examples/_images/pollution_rose.png index 6d050d1..0b184ef 100644 Binary files a/docs/examples/_images/pollution_rose.png and b/docs/examples/_images/pollution_rose.png differ diff --git a/docs/examples/_images/rose_by_month.png b/docs/examples/_images/rose_by_month.png new file mode 100644 index 0000000..0224156 Binary files /dev/null and b/docs/examples/_images/rose_by_month.png differ diff --git a/docs/examples/calendar_by_day.py b/docs/examples/calendar_by_day.py index f1361cb..cd77b16 100644 --- a/docs/examples/calendar_by_day.py +++ b/docs/examples/calendar_by_day.py @@ -1,6 +1,6 @@ """ -Ozone by Year -============= +Daily Average Ozone +=================== _thumb: .8, .8 """ diff --git a/docs/examples/calendar_by_day.rst b/docs/examples/calendar_by_day.rst index c1ea67b..ce3f8d1 100644 --- a/docs/examples/calendar_by_day.rst +++ b/docs/examples/calendar_by_day.rst @@ -5,8 +5,8 @@ .. _calendar_by_day: -Ozone by Year -============= +Daily Average Ozone +=================== diff --git a/docs/examples/calendar_by_hour.py b/docs/examples/calendar_by_hour.py index 327790d..649fbf1 100644 --- a/docs/examples/calendar_by_hour.py +++ b/docs/examples/calendar_by_hour.py @@ -1,6 +1,6 @@ """ -Ozone by Year -============= +Hourly Averaged Ozone +===================== _thumb: .8, .8 """ diff --git a/docs/examples/calendar_by_hour.rst b/docs/examples/calendar_by_hour.rst index bb7be0d..a195d22 100644 --- a/docs/examples/calendar_by_hour.rst +++ b/docs/examples/calendar_by_hour.rst @@ -5,8 +5,8 @@ .. _calendar_by_hour: -Ozone by Year -============= +Hourly Averaged Ozone +===================== diff --git a/docs/examples/diel_by_weekend_weekday.py b/docs/examples/diel_by_weekend_weekday.py new file mode 100644 index 0000000..5e49e63 --- /dev/null +++ b/docs/examples/diel_by_weekend_weekday.py @@ -0,0 +1,40 @@ +""" +Diel Trends by Weekend vs Weekday +================================= + +_thumb: .4, .4 +""" +import atmospy +import seaborn as sns +atmospy.set_theme() + +# load the data +bc = atmospy.load_dataset("us-bc") + +# select just one random location for now +bc_multi_site = bc[bc["Local Site Name"].isin(bc["Local Site Name"].unique()[0:2])] + +# create a column that sets a bool if the date is a weekend +bc_multi_site.loc[:, "Is Weekend"] = bc_multi_site["Timestamp Local"].dt.day_name().isin(["Saturday", "Sunday"]) + +# convert to long-form for faceting +bc_long_form = bc_multi_site.melt( + id_vars=["Timestamp Local", "Is Weekend", "Local Site Name"], + value_vars=["Sample Measurement"] +) + +g = sns.FacetGrid( + bc_long_form, + row="Local Site Name", + col="Is Weekend", + hue="Local Site Name", + aspect=1.25, +) + +g.map_dataframe(atmospy.dielplot, x="Timestamp Local", y="value") + +# update the y-axis limit to force to zero +g.set(ylim=(0, None), ylabel='Black Carbon') + +# update the titles to take up less space +g.set_titles("{row_name} | Weekend = {col_name}") \ No newline at end of file diff --git a/docs/examples/diel_by_weekend_weekday.rst b/docs/examples/diel_by_weekend_weekday.rst new file mode 100644 index 0000000..8cfc95d --- /dev/null +++ b/docs/examples/diel_by_weekend_weekday.rst @@ -0,0 +1,19 @@ + + +.. currentmodule:: atmospy + +.. _diel_by_weekend_weekday: + + +Diel Trends by Weekend vs Weekday +================================= + + + +.. image:: _images/diel_by_weekend_weekday.png + +**atmospy components used:** :func:`set_theme`, :func:`load_dataset` + +.. literalinclude:: diel_by_weekend_weekday.py + :lines: 7- + diff --git a/docs/examples/dielplot.py b/docs/examples/dielplot.py index 1f3f226..c699721 100644 --- a/docs/examples/dielplot.py +++ b/docs/examples/dielplot.py @@ -1,6 +1,6 @@ """ -Diurnal Ozone -============= +Diel Ozone Trends +================= _thumb: .8, .8 """ diff --git a/docs/examples/dielplot.rst b/docs/examples/dielplot.rst index d1e6c61..2443b56 100644 --- a/docs/examples/dielplot.rst +++ b/docs/examples/dielplot.rst @@ -5,8 +5,8 @@ .. _dielplot: -Diurnal Ozone -============= +Diel Ozone Trends +================= diff --git a/docs/examples/index.rst b/docs/examples/index.rst index 5a2ddae..ac715e8 100644 --- a/docs/examples/index.rst +++ b/docs/examples/index.rst @@ -80,12 +80,16 @@ Example gallery ./calendar_by_hour + ./diel_by_weekend_weekday + ./dielplot ./pollution_rose ./regression + ./rose_by_month + @@ -116,6 +120,19 @@ Example gallery +.. raw:: html + + + + + .. raw:: html
@@ -155,6 +172,19 @@ Example gallery +.. raw:: html + + + + + .. raw:: html diff --git a/docs/examples/pollution_rose.py b/docs/examples/pollution_rose.py index 7cc6cc5..56aacdf 100644 --- a/docs/examples/pollution_rose.py +++ b/docs/examples/pollution_rose.py @@ -1,6 +1,6 @@ """ -Pollution Rose -============== +PM2.5 by Direction +================== _thumb: .8, .8 """ diff --git a/docs/examples/pollution_rose.rst b/docs/examples/pollution_rose.rst index 8b02b45..5f5dfc6 100644 --- a/docs/examples/pollution_rose.rst +++ b/docs/examples/pollution_rose.rst @@ -5,8 +5,8 @@ .. _pollution_rose: -Pollution Rose -============== +PM2.5 by Direction +================== diff --git a/docs/examples/regression.py b/docs/examples/regression.py index 008c10d..b6f07e0 100644 --- a/docs/examples/regression.py +++ b/docs/examples/regression.py @@ -1,6 +1,6 @@ """ -Regression Plot -=============== +Compare Two Sensors +=================== _thumb: .4, .4 """ diff --git a/docs/examples/regression.rst b/docs/examples/regression.rst index 8423c30..9f55d72 100644 --- a/docs/examples/regression.rst +++ b/docs/examples/regression.rst @@ -5,8 +5,8 @@ .. _regression: -Regression Plot -=============== +Compare Two Sensors +=================== diff --git a/docs/examples/rose_by_month.py b/docs/examples/rose_by_month.py new file mode 100644 index 0000000..a404666 --- /dev/null +++ b/docs/examples/rose_by_month.py @@ -0,0 +1,44 @@ +""" +PM2.5 Pollution Rose by Month +============================== + +_thumb: .4, .4 +""" +import atmospy +import seaborn as sns +atmospy.set_theme() + +# Load the example dataset +met = atmospy.load_dataset("air-sensors-met") + +# add a column that extracts the month from the timestamp_local column +met.loc[:, "Month"] = met["timestamp_local"].dt.month_name() + +# conver to long form data +met_long_form = met.melt(id_vars=["timestamp_local", "Month", "ws", "wd"], value_vars=["pm25"]) + +# set up the FacetGrid +g = sns.FacetGrid( + data=met_long_form, + col="Month", + col_wrap=3, + subplot_kws={"projection": "polar"}, + despine=False +) + +# map the dataframe using the pollutionroseplot function +g.map_dataframe( + atmospy.pollutionroseplot, + ws="ws", wd="wd", pollutant="value", + faceted=True, + segments=20, + suffix="$µgm^{-3}$" +) + +# add the legend and place it where it looks nice +g.add_legend( + title="$PM_{2.5}$", + bbox_to_anchor=(.535, 0.2), + handlelength=1, + handleheight=1 +) \ No newline at end of file diff --git a/docs/examples/rose_by_month.rst b/docs/examples/rose_by_month.rst new file mode 100644 index 0000000..f4e660f --- /dev/null +++ b/docs/examples/rose_by_month.rst @@ -0,0 +1,19 @@ + + +.. currentmodule:: atmospy + +.. _rose_by_month: + + +PM2.5 Pollution Rose by Month +============================== + + + +.. image:: _images/rose_by_month.png + +**atmospy components used:** :func:`set_theme`, :func:`load_dataset` + +.. literalinclude:: rose_by_month.py + :lines: 7- + diff --git a/docs/index.rst b/docs/index.rst index 620b842..c97647c 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -28,8 +28,13 @@ atmospy: air quality data visualization .. grid-item:: - .. image:: example_thumbs/calendar_by_day_thumb.png - :target: ./examples/calendar_by_day.html + .. image:: example_thumbs/diel_by_weekend_weekday_thumb.png + :target: ./examples/diel_by_weekend_weekday.html + + .. grid-item:: + + .. image:: example_thumbs/rose_by_month_thumb.png + :target: ./examples/rose_by_month.html diff --git a/docs/tutorial/Introduction.rst b/docs/tutorial/Introduction.rst index 1416114..a7f6250 100644 --- a/docs/tutorial/Introduction.rst +++ b/docs/tutorial/Introduction.rst @@ -5,11 +5,7 @@ An introduction to atmospy ========================== -Seaborn is a library for making statistical graphics in Python. It builds on top of `matplotlib `_ and integrates closely with `pandas `_ data structures. - -Seaborn helps you explore and understand your data. Its plotting functions operate on dataframes and arrays containing whole datasets and internally perform the necessary semantic mapping and statistical aggregation to produce informative plots. Its dataset-oriented, declarative API lets you focus on what the different elements of your plots mean, rather than on the details of how to draw them. - -Here's an example of what seaborn can do: +Coming soon! .. code:: ipython3 diff --git a/docs/tutorial/aesthetics.rst b/docs/tutorial/aesthetics.rst index 18e611e..52cdffd 100644 --- a/docs/tutorial/aesthetics.rst +++ b/docs/tutorial/aesthetics.rst @@ -5,8 +5,4 @@ Figure Aesthetics ================== -Seaborn is a library for making statistical graphics in Python. It builds on top of `matplotlib `_ and integrates closely with `pandas `_ data structures. - -Seaborn helps you explore and understand your data. Its plotting functions operate on dataframes and arrays containing whole datasets and internally perform the necessary semantic mapping and statistical aggregation to produce informative plots. Its dataset-oriented, declarative API lets you focus on what the different elements of your plots mean, rather than on the details of how to draw them. - -Here's an example of what seaborn can do: +Coming Soon! diff --git a/docs/tutorial/plots.rst b/docs/tutorial/plots.rst index f4c8f46..0c558f0 100644 --- a/docs/tutorial/plots.rst +++ b/docs/tutorial/plots.rst @@ -213,7 +213,6 @@ default, we plot the daily average for the pollutant of choice: y="Sample Measurement", freq="day", cbar=False, - # xlabel="Day of Month", height=2.5, linewidths=.1 ); @@ -389,4 +388,561 @@ Now that you’ve seen how to use the individual plots above, we’re going to go over some of the advanced features that are available by leveraging seaborn’s great grid functionality. +Often, it can be useful to draw multiple versions of the same figure +with slight differences in what’s being plotted. For example, you may +want to plot a pollution rose, but plot each month of data separately. +Or, you may want to plot a diel profile, but show the results for +weekdays vs weekends on separate plots so that you can easily see the +difference caused by traffic patterns. + +To do so, we can use seaborn’s ``FacetGrid``. Seaborn has a great +tutorial on how to use ``FacetGrid``\ ’s +`here `__ that we +advise you read. Next, we’ll go over several examples that showcase some +of the cool things you can do with ``atmospy`` and ``seaborn`` together. + +``FacetGrid`` and the ``pollutionroseplot`` +------------------------------------------- + +Using the example dataset (``air-sensors-met``), let’s plot the +pollution rose separetely for each month (the dataset contains data for +April through November). To do so, we will use the ``FacetGrid`` +function from ``seaborn``. First, we will add a column that will serve +as the dimension of the figure. In this case, we want to extract the +month: + +.. code:: ipython3 + + # load the example dataset + met = atmospy.load_dataset("air-sensors-met") + + # add a column that extracts the month from the timestamp_local column + met.loc[:, "Month"] = met["timestamp_local"].dt.month_name() + + # print the first 5 records + met.head() + + + + +.. raw:: html + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
timestamp_localwdwspm1pm25pm10temprhMonth
42023-05-01 00:00:00229.2966670.8623332.1899332.3700225.51701014.60333353.111667May
52023-05-01 01:00:00233.0016670.9115002.3601522.5165384.62334013.71500054.445000May
62023-05-01 02:00:00230.7316670.9306672.4995502.6825074.81637213.31000058.595000May
72023-05-01 03:00:00218.7566670.8921672.6292822.8069704.85106013.12500059.406667May
82023-05-01 04:00:00206.0433330.7913332.9747153.1842875.85943712.21000062.248333May
+
+ + + +As we can see above, we now have a column with the month name. In order +to use the ``FacetGrid`` properly, we need to convert our wide-form +dataframe into a long-form dataframe. For a much better explanation of +the difference than I can provide, please read through the seaborn +explanation +`here `__. + +We can easily convert our dataframe to long-form by using the Pandas +``melt`` function: + +.. code:: ipython3 + + met_long_form = met.melt(id_vars=["timestamp_local", "Month", "ws", "wd"], value_vars=["pm25"]) + + # print the first 5 records + met_long_form.head() + + + + +.. raw:: html + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
timestamp_localMonthwswdvariablevalue
02023-05-01 00:00:00May0.862333229.296667pm252.370022
12023-05-01 01:00:00May0.911500233.001667pm252.516538
22023-05-01 02:00:00May0.930667230.731667pm252.682507
32023-05-01 03:00:00May0.892167218.756667pm252.806970
42023-05-01 04:00:00May0.791333206.043333pm253.184287
+
+ + + +Next, we will set up our ``FacetGrid`` and tell it to use the ``Month`` +column as the dimension and to wrap every 3 so that it all fits into one +nice figure: + +.. code:: ipython3 + + # set up the FacetGrid + g = sns.FacetGrid( + data=met_long_form, + col="Month", + col_wrap=3, + subplot_kws={"projection": "polar"}, + despine=False + ) + + # map the dataframe using the pollutionroseplot function + g.map_dataframe( + atmospy.pollutionroseplot, + ws="ws", wd="wd", pollutant="value", + faceted=True, + segments=20, + suffix="$µgm^{-3}$" + ) + + # add the legend and place it where it looks nice + g.add_legend( + title="$PM_{2.5}$", + bbox_to_anchor=(.535, 0.2), + handlelength=1, + handleheight=1 + ) + + + +.. image:: plots_files/plots_40_0.png + + +Not bad for 3 lines of code (yes, they are split across more than 3 +lines for readability, but still - 3 lines!)! + +``FacetGrid`` and the ``dielplot`` +---------------------------------- + +Next, we’re going to go ahead and explore what we can do with the +``dielplot`` on a ``FacetGrid``. Like above, we can simply plot a +slightly different subset of the data in each column - let’s go ahead +and walk through an example. Let’s plot the diel profile for black +carbon on weekdays versus weekends. + +First, we need to load our example dataset and modify it a bit to +provide the information we want to facet by: + +.. code:: ipython3 + + # load the data + bc = atmospy.load_dataset("us-bc") + + # select just one random location for now + bc_single_site = bc[bc["Local Site Name"] == bc["Local Site Name"].unique()[0]] + + # create a column that sets a bool if the date is a weekend + bc_single_site.loc[:, "Is Weekend"] = bc_single_site["Timestamp Local"].dt.day_name().isin(["Saturday", "Sunday"]) + + # convert to long-form for faceting + bc_long_form = bc_single_site.melt( + id_vars=["Timestamp Local", "Is Weekend"], + value_vars=["Sample Measurement"] + ) + + # print the first 5 records + bc_long_form.head() + + + + +.. raw:: html + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Timestamp LocalIs Weekendvariablevalue
02023-01-01 00:00:00TrueSample Measurement2.76
12023-01-01 01:00:00TrueSample Measurement2.55
22023-01-01 02:00:00TrueSample Measurement3.18
32023-01-01 03:00:00TrueSample Measurement1.64
42023-01-01 04:00:00TrueSample Measurement1.79
+
+ + + +Now that we have our data prepared, we can set up a ``FacetGrid`` like +above and define the column to facet by as the new column we just +created: + +.. code:: ipython3 + + g = sns.FacetGrid( + data=bc_long_form, + col="Is Weekend", + + # let's adjust the aspect ratio for funsies + aspect=1.25 + ) + g.map_dataframe(atmospy.dielplot, x="Timestamp Local", y="value") + + + +.. image:: plots_files/plots_45_0.png + + +While this isn’t the greatest example, we can see there is difference +between weekday’s and weekend’s in the early morning, though the IQR +band is quite wide. At some point, we will add some better example +datasets so that these figures are more impressive. For now, they work! + +Now, what if we had two locations that we wanted to compare? Let’s go +ahead and pull data for two sites and show the difference by site rather +than by weekday/weekend: + +.. code:: ipython3 + + # load the data + bc = atmospy.load_dataset("us-bc") + + # select just one random location for now + bc_multi_site = bc[bc["Local Site Name"].isin(bc["Local Site Name"].unique()[0:2])] + + # create a column that sets a bool if the date is a weekend + bc_multi_site.loc[:, "Is Weekend"] = bc_multi_site["Timestamp Local"].dt.day_name().isin(["Saturday", "Sunday"]) + + # convert to long-form for faceting + bc_long_form = bc_multi_site.melt( + id_vars=["Timestamp Local", "Is Weekend", "Local Site Name"], + value_vars=["Sample Measurement"] + ) + + # print the first 5 records + bc_long_form.head() + + + + +.. raw:: html + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Timestamp LocalIs WeekendLocal Site Namevariablevalue
02023-01-01 00:00:00TrueI-25Sample Measurement2.76
12023-01-01 01:00:00TrueI-25Sample Measurement2.55
22023-01-01 02:00:00TrueI-25Sample Measurement3.18
32023-01-01 03:00:00TrueI-25Sample Measurement1.64
42023-01-01 04:00:00TrueI-25Sample Measurement1.79
+
+ + + +Let’s set up the ``FacetGrid`` and plot the diel trend by location: + +.. code:: ipython3 + + g = sns.FacetGrid( + bc_long_form, + row="Local Site Name", + hue="Local Site Name", + aspect=1.25, + ) + + g.map_dataframe(atmospy.dielplot, x="Timestamp Local", y="value") + + # update the y-axis limit to force to zero + g.set(ylim=(0, None)) + + + +.. image:: plots_files/plots_49_0.png + + +Rather than plotting on the column, we plotted different locations on +the row, which sets up the next figure nicely - let’s go ahead and use +the same data set, but plot by weekday/weekend AND two different +locations together: + +.. code:: ipython3 + + g = sns.FacetGrid( + bc_long_form, + row="Local Site Name", + col="Is Weekend", + hue="Local Site Name", + aspect=1.25, + ) + + g.map_dataframe(atmospy.dielplot, x="Timestamp Local", y="value") + + # update the y-axis limit to force to zero + g.set(ylim=(0, None), ylabel='Black Carbon') + + # update the titles to take up less space + g.set_titles("{row_name} | Weekend = {col_name}") + + + +.. image:: plots_files/plots_51_0.png + + +``FacetGrid`` and the ``calendarplot`` +-------------------------------------- + +Coming soon! diff --git a/docs/tutorial/plots_files/plots_26_0.png b/docs/tutorial/plots_files/plots_26_0.png index d327893..ae76455 100644 Binary files a/docs/tutorial/plots_files/plots_26_0.png and b/docs/tutorial/plots_files/plots_26_0.png differ diff --git a/docs/tutorial/plots_files/plots_28_0.png b/docs/tutorial/plots_files/plots_28_0.png index de60e21..ad1884d 100644 Binary files a/docs/tutorial/plots_files/plots_28_0.png and b/docs/tutorial/plots_files/plots_28_0.png differ diff --git a/docs/tutorial/plots_files/plots_30_0.png b/docs/tutorial/plots_files/plots_30_0.png index a44de9a..768f665 100644 Binary files a/docs/tutorial/plots_files/plots_30_0.png and b/docs/tutorial/plots_files/plots_30_0.png differ diff --git a/docs/tutorial/plots_files/plots_32_0.png b/docs/tutorial/plots_files/plots_32_0.png index 36b17d0..379f887 100644 Binary files a/docs/tutorial/plots_files/plots_32_0.png and b/docs/tutorial/plots_files/plots_32_0.png differ diff --git a/docs/tutorial/plots_files/plots_40_0.png b/docs/tutorial/plots_files/plots_40_0.png new file mode 100644 index 0000000..95ef848 Binary files /dev/null and b/docs/tutorial/plots_files/plots_40_0.png differ diff --git a/docs/tutorial/plots_files/plots_45_0.png b/docs/tutorial/plots_files/plots_45_0.png new file mode 100644 index 0000000..b4b95ca Binary files /dev/null and b/docs/tutorial/plots_files/plots_45_0.png differ diff --git a/docs/tutorial/plots_files/plots_49_0.png b/docs/tutorial/plots_files/plots_49_0.png new file mode 100644 index 0000000..4f44591 Binary files /dev/null and b/docs/tutorial/plots_files/plots_49_0.png differ diff --git a/docs/tutorial/plots_files/plots_51_0.png b/docs/tutorial/plots_files/plots_51_0.png new file mode 100644 index 0000000..d12e7af Binary files /dev/null and b/docs/tutorial/plots_files/plots_51_0.png differ diff --git a/examples/calendar_by_day.py b/examples/calendar_by_day.py index f1361cb..cd77b16 100644 --- a/examples/calendar_by_day.py +++ b/examples/calendar_by_day.py @@ -1,6 +1,6 @@ """ -Ozone by Year -============= +Daily Average Ozone +=================== _thumb: .8, .8 """ diff --git a/examples/calendar_by_hour.py b/examples/calendar_by_hour.py index 327790d..649fbf1 100644 --- a/examples/calendar_by_hour.py +++ b/examples/calendar_by_hour.py @@ -1,6 +1,6 @@ """ -Ozone by Year -============= +Hourly Averaged Ozone +===================== _thumb: .8, .8 """ diff --git a/examples/diel_by_weekend_weekday.py b/examples/diel_by_weekend_weekday.py new file mode 100644 index 0000000..5e49e63 --- /dev/null +++ b/examples/diel_by_weekend_weekday.py @@ -0,0 +1,40 @@ +""" +Diel Trends by Weekend vs Weekday +================================= + +_thumb: .4, .4 +""" +import atmospy +import seaborn as sns +atmospy.set_theme() + +# load the data +bc = atmospy.load_dataset("us-bc") + +# select just one random location for now +bc_multi_site = bc[bc["Local Site Name"].isin(bc["Local Site Name"].unique()[0:2])] + +# create a column that sets a bool if the date is a weekend +bc_multi_site.loc[:, "Is Weekend"] = bc_multi_site["Timestamp Local"].dt.day_name().isin(["Saturday", "Sunday"]) + +# convert to long-form for faceting +bc_long_form = bc_multi_site.melt( + id_vars=["Timestamp Local", "Is Weekend", "Local Site Name"], + value_vars=["Sample Measurement"] +) + +g = sns.FacetGrid( + bc_long_form, + row="Local Site Name", + col="Is Weekend", + hue="Local Site Name", + aspect=1.25, +) + +g.map_dataframe(atmospy.dielplot, x="Timestamp Local", y="value") + +# update the y-axis limit to force to zero +g.set(ylim=(0, None), ylabel='Black Carbon') + +# update the titles to take up less space +g.set_titles("{row_name} | Weekend = {col_name}") \ No newline at end of file diff --git a/examples/dielplot.py b/examples/dielplot.py index 1f3f226..c699721 100644 --- a/examples/dielplot.py +++ b/examples/dielplot.py @@ -1,6 +1,6 @@ """ -Diurnal Ozone -============= +Diel Ozone Trends +================= _thumb: .8, .8 """ diff --git a/examples/pollution_rose.py b/examples/pollution_rose.py index 7cc6cc5..56aacdf 100644 --- a/examples/pollution_rose.py +++ b/examples/pollution_rose.py @@ -1,6 +1,6 @@ """ -Pollution Rose -============== +PM2.5 by Direction +================== _thumb: .8, .8 """ diff --git a/examples/regression.py b/examples/regression.py index 008c10d..b6f07e0 100644 --- a/examples/regression.py +++ b/examples/regression.py @@ -1,6 +1,6 @@ """ -Regression Plot -=============== +Compare Two Sensors +=================== _thumb: .4, .4 """ diff --git a/examples/rose_by_month.py b/examples/rose_by_month.py new file mode 100644 index 0000000..a404666 --- /dev/null +++ b/examples/rose_by_month.py @@ -0,0 +1,44 @@ +""" +PM2.5 Pollution Rose by Month +============================== + +_thumb: .4, .4 +""" +import atmospy +import seaborn as sns +atmospy.set_theme() + +# Load the example dataset +met = atmospy.load_dataset("air-sensors-met") + +# add a column that extracts the month from the timestamp_local column +met.loc[:, "Month"] = met["timestamp_local"].dt.month_name() + +# conver to long form data +met_long_form = met.melt(id_vars=["timestamp_local", "Month", "ws", "wd"], value_vars=["pm25"]) + +# set up the FacetGrid +g = sns.FacetGrid( + data=met_long_form, + col="Month", + col_wrap=3, + subplot_kws={"projection": "polar"}, + despine=False +) + +# map the dataframe using the pollutionroseplot function +g.map_dataframe( + atmospy.pollutionroseplot, + ws="ws", wd="wd", pollutant="value", + faceted=True, + segments=20, + suffix="$µgm^{-3}$" +) + +# add the legend and place it where it looks nice +g.add_legend( + title="$PM_{2.5}$", + bbox_to_anchor=(.535, 0.2), + handlelength=1, + handleheight=1 +) \ No newline at end of file