Skip to content

Commit

Permalink
Updates key params to enable faceting
Browse files Browse the repository at this point in the history
  • Loading branch information
dhhagan committed Apr 26, 2024
1 parent f1dab75 commit ab5985e
Show file tree
Hide file tree
Showing 47 changed files with 1,117 additions and 67 deletions.
34 changes: 22 additions & 12 deletions atmospy/trends.py
Original file line number Diff line number Diff line change
Expand Up @@ -309,8 +309,8 @@ def calendarplot(data, x, y, freq="day", agg="mean", vmin=None, vmax=None, cmap=
return ax


def dielplot(data, x, y, ax=None, ylim=None, xlabel=None,
ylabel=None, title=None, plot_kws=None, **kwargs):
def dielplot(data=None, *, x=None, y=None, ax=None, ylim=None, xlabel=None,
ylabel=None, title=None, color=None, show_iqr=True, plot_kws=None, **kwargs):
"""Plot the diel (e.g., diurnal) trend for a pollutant.
Diel plots can be incredibly useful for understanding daily
Expand All @@ -334,6 +334,10 @@ def dielplot(data, x, y, ax=None, ylim=None, xlabel=None,
The label for the y-axis, by default None
title : str, optional
The title for the plot, by default None
color : str, optional
Specify the color to use in the figure
shoq_iqr : bool, optional
If True, plot the interquartile range as a shaded region, default True
plot_kws : dict or None, optional
Additional keyword arguments are passed directly to the underlying plot call
, by default None
Expand All @@ -349,7 +353,7 @@ def dielplot(data, x, y, ax=None, ylim=None, xlabel=None,
Plot a simple heatmap for the entire year.
>>> df = atmospy.load_dataset("us-bc")
>>> atmospy.dielplot(df, x="Timestamp GMT", y="Sample Measurement")
>>> atmospy.dielplot(data=df, x="Timestamp GMT", y="Sample Measurement")
"""
default_plot_kws = {
Expand All @@ -362,6 +366,8 @@ def dielplot(data, x, y, ax=None, ylim=None, xlabel=None,

#
plot_kws = {} if plot_kws is None else dict(default_plot_kws, **plot_kws)
if color is not None:
plot_kws.update(dict(c=color))

# copy over only the needed data
_data = data[[x, y]].copy(deep=True)
Expand All @@ -375,23 +381,27 @@ def dielplot(data, x, y, ax=None, ylim=None, xlabel=None,
# compute the diel statistics
stats = _data.groupby([_data.index.hour, _data.index.minute], as_index=False).describe()

# append the first record so the first and last records are identical
stats.loc[len(stats.index)] = stats.loc[0]

# build an index we can use to make the figure
index = stats.index.values
freq = int(60 / (index.size / 24))
freq = int(60 / ((index.size - 1) / 24))
figure_index = pd.date_range(start='2020-01-01', periods=index.size, freq=f"{freq}min")

# plot the diel average
ax.plot(figure_index, stats[y]['mean'], **plot_kws)

# add the IQR as a shaded region
ax.fill_between(
figure_index,
y1=stats[y]['25%'],
y2=stats[y]['75%'],
alpha=0.25,
lw=2,
color=plt.gca().lines[-1].get_color()
)
if show_iqr:
ax.fill_between(
figure_index,
y1=stats[y]['25%'],
y2=stats[y]['75%'],
alpha=0.25,
lw=2,
color=plt.gca().lines[-1].get_color()
)

# adjust plot parameters
xticks = ax.get_xticks()
Expand Down
3 changes: 3 additions & 0 deletions atmospy/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,9 @@ def load_dataset(name, cache=True, data_home=None, **kwargs):

if name == "air-sensors-met":
df["timestamp_local"] = pd.to_datetime(df["timestamp_local"])

# only keep data after april
df = df[df["timestamp_local"] >= "2023-05-01"].copy()

return df

Expand Down
6 changes: 1 addition & 5 deletions docs/_tutorial/Introduction.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,7 @@
"An introduction to atmospy\n",
"==========================\n",
"\n",
"Seaborn is a library for making statistical graphics in Python. It builds on top of `matplotlib <https://matplotlib.org/>`_ and integrates closely with `pandas <https://pandas.pydata.org/>`_ data structures.\n",
"\n",
"Seaborn helps you explore and understand your data. Its plotting functions operate on dataframes and arrays containing whole datasets and internally perform the necessary semantic mapping and statistical aggregation to produce informative plots. Its dataset-oriented, declarative API lets you focus on what the different elements of your plots mean, rather than on the details of how to draw them.\n",
"\n",
"Here's an example of what seaborn can do:"
"Coming soon!"
]
},
{
Expand Down
6 changes: 1 addition & 5 deletions docs/_tutorial/aesthetics.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,7 @@
"Figure Aesthetics\n",
"==================\n",
"\n",
"Seaborn is a library for making statistical graphics in Python. It builds on top of `matplotlib <https://matplotlib.org/>`_ and integrates closely with `pandas <https://pandas.pydata.org/>`_ data structures.\n",
"\n",
"Seaborn helps you explore and understand your data. Its plotting functions operate on dataframes and arrays containing whole datasets and internally perform the necessary semantic mapping and statistical aggregation to produce informative plots. Its dataset-oriented, declarative API lets you focus on what the different elements of your plots mean, rather than on the details of how to draw them.\n",
"\n",
"Here's an example of what seaborn can do:"
"Coming Soon!"
]
}
],
Expand Down
260 changes: 258 additions & 2 deletions docs/_tutorial/plots.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,6 @@
" y=\"Sample Measurement\", \n",
" freq=\"day\",\n",
" cbar=False,\n",
" # xlabel=\"Day of Month\",\n",
" height=2.5,\n",
" linewidths=.1\n",
");"
Expand Down Expand Up @@ -401,12 +400,269 @@
"source": [
"# Facets and other Fun Things\n",
"\n",
"Now that you've seen how to use the individual plots above, we're going to go over some of the advanced features that are available by leveraging seaborn's great grid functionality."
"Now that you've seen how to use the individual plots above, we're going to go over some of the advanced features that are available by leveraging seaborn's great grid functionality.\n",
"\n",
"Often, it can be useful to draw multiple versions of the same figure with slight differences in what's being plotted. For example, you may want to plot a pollution rose, but plot each month of data separately. Or, you may want to plot a diel profile, but show the results for weekdays vs weekends on separate plots so that you can easily see the difference caused by traffic patterns.\n",
"\n",
"To do so, we can use seaborn's `FacetGrid`. Seaborn has a great tutorial on how to use `FacetGrid`'s [here](https://seaborn.pydata.org/tutorial/axis_grids.html) that we advise you read. Next, we'll go over several examples that showcase some of the cool things you can do with `atmospy` and `seaborn` together."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## `FacetGrid` and the `pollutionroseplot`\n",
"\n",
"Using the example dataset (`air-sensors-met`), let's plot the pollution rose separetely for each month (the dataset contains data for April through November). To do so, we will use the `FacetGrid` function from `seaborn`. First, we will add a column that will serve as the dimension of the figure. In this case, we want to extract the month:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# load the example dataset\n",
"met = atmospy.load_dataset(\"air-sensors-met\")\n",
"\n",
"# add a column that extracts the month from the timestamp_local column\n",
"met.loc[:, \"Month\"] = met[\"timestamp_local\"].dt.month_name()\n",
"\n",
"# print the first 5 records\n",
"met.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"As we can see above, we now have a column with the month name. In order to use the `FacetGrid` properly, we need to convert our wide-form dataframe into a long-form dataframe. For a much better explanation of the difference than I can provide, please read through the seaborn explanation [here](https://seaborn.pydata.org/tutorial/data_structure.html#long-form-vs-wide-form-data).\n",
"\n",
"We can easily convert our dataframe to long-form by using the Pandas `melt` function:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"met_long_form = met.melt(id_vars=[\"timestamp_local\", \"Month\", \"ws\", \"wd\"], value_vars=[\"pm25\"])\n",
"\n",
"# print the first 5 records\n",
"met_long_form.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Next, we will set up our `FacetGrid` and tell it to use the `Month` column as the dimension and to wrap every 3 so that it all fits into one nice figure:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# set up the FacetGrid\n",
"g = sns.FacetGrid(\n",
" data=met_long_form, \n",
" col=\"Month\", \n",
" col_wrap=3,\n",
" subplot_kws={\"projection\": \"polar\"},\n",
" despine=False\n",
")\n",
"\n",
"# map the dataframe using the pollutionroseplot function\n",
"g.map_dataframe(\n",
" atmospy.pollutionroseplot, \n",
" ws=\"ws\", wd=\"wd\", pollutant=\"value\", \n",
" faceted=True, \n",
" segments=20, \n",
" suffix=\"$µgm^{-3}$\"\n",
")\n",
"\n",
"# add the legend and place it where it looks nice\n",
"g.add_legend(\n",
" title=\"$PM_{2.5}$\", \n",
" bbox_to_anchor=(.535, 0.2), \n",
" handlelength=1, \n",
" handleheight=1\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Not bad for 3 lines of code (yes, they are split across more than 3 lines for readability, but still - 3 lines!)!"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## `FacetGrid` and the `dielplot`\n",
"\n",
"Next, we're going to go ahead and explore what we can do with the `dielplot` on a `FacetGrid`. Like above, we can simply plot a slightly different subset of the data in each column - let's go ahead and walk through an example. Let's plot the diel profile for black carbon on weekdays versus weekends.\n",
"\n",
"First, we need to load our example dataset and modify it a bit to provide the information we want to facet by:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# load the data\n",
"bc = atmospy.load_dataset(\"us-bc\")\n",
"\n",
"# select just one random location for now\n",
"bc_single_site = bc[bc[\"Local Site Name\"] == bc[\"Local Site Name\"].unique()[0]]\n",
"\n",
"# create a column that sets a bool if the date is a weekend\n",
"bc_single_site.loc[:, \"Is Weekend\"] = bc_single_site[\"Timestamp Local\"].dt.day_name().isin([\"Saturday\", \"Sunday\"])\n",
"\n",
"# convert to long-form for faceting\n",
"bc_long_form = bc_single_site.melt(\n",
" id_vars=[\"Timestamp Local\", \"Is Weekend\"], \n",
" value_vars=[\"Sample Measurement\"]\n",
")\n",
"\n",
"# print the first 5 records\n",
"bc_long_form.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Now that we have our data prepared, we can set up a `FacetGrid` like above and define the column to facet by as the new column we just created:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"g = sns.FacetGrid(\n",
" data=bc_long_form,\n",
" col=\"Is Weekend\",\n",
" \n",
" # let's adjust the aspect ratio for funsies\n",
" aspect=1.25\n",
")\n",
"g.map_dataframe(atmospy.dielplot, x=\"Timestamp Local\", y=\"value\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"While this isn't the greatest example, we can see there is difference between weekday's and weekend's in the early morning, though the IQR band is quite wide. At some point, we will add some better example datasets so that these figures are more impressive. For now, they work!\n",
"\n",
"Now, what if we had two locations that we wanted to compare? Let's go ahead and pull data for two sites and show the difference by site rather than by weekday/weekend:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# load the data\n",
"bc = atmospy.load_dataset(\"us-bc\")\n",
"\n",
"# select just one random location for now\n",
"bc_multi_site = bc[bc[\"Local Site Name\"].isin(bc[\"Local Site Name\"].unique()[0:2])]\n",
"\n",
"# create a column that sets a bool if the date is a weekend\n",
"bc_multi_site.loc[:, \"Is Weekend\"] = bc_multi_site[\"Timestamp Local\"].dt.day_name().isin([\"Saturday\", \"Sunday\"])\n",
"\n",
"# convert to long-form for faceting\n",
"bc_long_form = bc_multi_site.melt(\n",
" id_vars=[\"Timestamp Local\", \"Is Weekend\", \"Local Site Name\"], \n",
" value_vars=[\"Sample Measurement\"]\n",
")\n",
"\n",
"# print the first 5 records\n",
"bc_long_form.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Let's set up the `FacetGrid` and plot the diel trend by location:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"g = sns.FacetGrid(\n",
" bc_long_form,\n",
" row=\"Local Site Name\",\n",
" hue=\"Local Site Name\",\n",
" aspect=1.25,\n",
")\n",
"\n",
"g.map_dataframe(atmospy.dielplot, x=\"Timestamp Local\", y=\"value\")\n",
"\n",
"# update the y-axis limit to force to zero\n",
"g.set(ylim=(0, None))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Rather than plotting on the column, we plotted different locations on the row, which sets up the next figure nicely - let's go ahead and use the same data set, but plot by weekday/weekend AND two different locations together:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"g = sns.FacetGrid(\n",
" bc_long_form,\n",
" row=\"Local Site Name\",\n",
" col=\"Is Weekend\",\n",
" hue=\"Local Site Name\",\n",
" aspect=1.25,\n",
")\n",
"\n",
"g.map_dataframe(atmospy.dielplot, x=\"Timestamp Local\", y=\"value\")\n",
"\n",
"# update the y-axis limit to force to zero\n",
"g.set(ylim=(0, None), ylabel='Black Carbon')\n",
"\n",
"# update the titles to take up less space\n",
"g.set_titles(\"{row_name} | Weekend = {col_name}\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## `FacetGrid` and the `calendarplot`\n",
"\n",
"Coming soon!"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
Expand Down
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified docs/example_thumbs/dielplot_thumb.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified docs/example_thumbs/pollution_rose_thumb.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added docs/example_thumbs/rose_by_month_thumb.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added docs/examples/_images/diel_by_weekend_weekday.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified docs/examples/_images/dielplot.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified docs/examples/_images/pollution_rose.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added docs/examples/_images/rose_by_month.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading

0 comments on commit ab5985e

Please sign in to comment.