NCAS-CMS · sadielbartholomew · Nov 8, 2024 · Oct 8, 2024 · Oct 8, 2024 · Oct 8, 2024
diff --git a/docs/source/recipes/plot_18_recipe.py b/docs/source/recipes/plot_18_recipe.py
@@ -0,0 +1,140 @@
+"""
+Calculating the Pearson correlation coefficient between datasets
+================================================================
+
+In this recipe, we will take two datasets, one for an independent variable
+(in this example elevation) and one for a dependent variable (snow
+cover over a particuar day), regrid them to the same resolution then
+calculate the correlation coefficient, to get a measure of the relationship
+between them.
+
+"""
+
+# %%
+# 1. Import cf-python, cf-plot and other required packages:
+import cfplot as cfp
+import cf
+
+import matplotlib.pyplot as plt
+import scipy.stats.mstats as mstats
+
+# %%
+# 2. Read the data in and unpack the Fields from FieldLists using indexing.
+# In our example We are investigating the influence of the land height on
+# the snow cover extent, so snow cover is the dependent variable. The snow
+# cover data is the
+# 'Snow Cover Extent 2017-present (raster 500 m), Europe, daily – version 1'
+# sourced from the Copernicus Land Monitoring Service which is described at:
+# https://land.copernicus.eu/en/products/snow/snow-cover-extent-europe-v1-0-500m
+# and the elevation data is the 'NOAA NGDC GLOBE topo: elevation data' dataset
+# which can be sourced from the IRI Data Library, or details found, at:
+# http://iridl.ldeo.columbia.edu/SOURCES/.NOAA/.NGDC/.GLOBE/.topo/index.html.
+orog = cf.read("~/recipes/1km_elevation.nc")[0]
+snow = cf.read("~/recipes/snowcover")[0]
+
+# %%
+# 3. Choose the day of pre-aggregated snow cover to investigate. We will
+# take the first datetime element corresponding to the first day from the
+# datasets, 1st January 2024, but by changing the indexing you can explore
+# other days by changing the index. We also get the string corresponding to
+# the date, to reference later:
+snow_day = snow[0]
+snow_day_dt = snow_day.coordinate("time")[0].data
+snow_day_daystring = f"{snow_day_dt.datetime_as_string[0].split(' ')[0]}"
+
+# %%
+# 4. Choose the region to consider to compare the relationship across,
+# which must be defined across both datasets, though not necessarily on the
+# same grid since we regrid to the same grid next and subspace to the same
+# area for both datasets ready for comparison in the next steps. By changing
+# the latitude and longitude points in the tuple below, you can change the
+# area that is used:
+region_in_mid_uk = ((-3.0, -1.0), (52.0, 55.0))
+sub_orog = orog.subspace(
+    longitude=cf.wi(*region_in_mid_uk[0]), latitude=cf.wi(*region_in_mid_uk[1])
+)
+sub_snow = snow_day.subspace(
+    longitude=cf.wi(*region_in_mid_uk[0]), latitude=cf.wi(*region_in_mid_uk[1])
+)
+
+# %%
+# 5. Ensure data quality, since the standard name here corresponds to a
+# unitless fraction, but the values are in the tens, so we need to
+# normalise these to all lie between 0 and 1 and change the units
+# appropriately:
+sub_snow = ((sub_snow - sub_snow.minimum()) / (sub_snow.range()))
+sub_snow.override_units("1", inplace=True)
+
+# %%
+# 6. Regrid the data so that they lie on the same grid and therefore each
+# array structure has values with corresponding geospatial points that
+# can be statistically compared. Here the elevation field is regridded to the
+# snow field since the snow is higher-resolution, but the other way round is
+# possible by switching the field order:
+regridded_orog = sub_orog.regrids(sub_snow, method="linear")
+
+# %%
+# 7. Squeeze the snow data to remove the size 1 axes so we have arrays of
+# the same dimensions for each of the two fields to compare:
+sub_snow = sub_snow.squeeze()
+
+# %%
+# 8. Finally, perform the statistical calculation by using the SciPy method
+# to find the Pearson correlation coefficient for the two arrays now they are
+# in comparable form. Note we need to use 'scipy.stats.mstats' and not
+# 'scipy.stats' for the 'pearsonr' method, to account for masked
+# data in the array(s) properly:
+coefficient = mstats.pearsonr(regridded_orog.array, sub_snow.array)
+print(f"The Pearson correlation coefficient is: {coefficient}")
+
+# %%
+# 9. Make a final plot showing the two arrays side-by-side and quoting the
+# determined Pearson correlation coefficient to illustrate the relatoinship
+# and its strength visually. We use 'gpos' to position the plots in two
+# columns and apply some specific axes ticks and labels for clarity.
+cfp.gopen(
+    rows=1, columns=2, top=0.85,
+    file="snow_and_orog_on_same_grid.png",
+    user_position=True,
+)
+
+# Joint configuration of the plots, including adding an overall title
+plt.suptitle(
+    (
+        "Snow cover compared to elevation for the same area of the UK "
+        f"aggregated across\n day {snow_day_daystring} with correlation "
+        "coefficient (on the same grid) of "
+        f"{coefficient.statistic:.4g} (4 s.f.)"
+    ),
+    fontsize=17,
+)
+cfp.mapset(resolution="10m")
+cfp.setvars(ocean_color="white", lake_color="white")
+label_info = {
+    "xticklabels": ("3W", "2W", "1W"),
+    "yticklabels": ("52N", "53N", "54N", "55N"),
+    "xticks": (-3, -2, -1),
+    "yticks": (52, 53, 54, 55),
+}
+
+# Plot the two contour plots as columns
+cfp.gpos(1)
+cfp.cscale("wiki_2_0_reduced")
+cfp.con(
+    regridded_orog,
+    lines=False,
+    title="Elevation (from 1km-resolution orography)",
+    colorbar_drawedges=False,
+    **label_info,
+)
+cfp.gpos(2)
+# Don't add extentions on the colourbar since it can only be 0 to 1 inclusive
+cfp.levs(min=0, max=1, step=0.1, extend="neither")
+cfp.cscale("precip_11lev", ncols=11, reverse=1)
+cfp.con(sub_snow, lines=False,
+        title="Snow cover extent (from satellite imagery)",
+        colorbar_drawedges=False,
+        **label_info
+)
+
+cfp.gclose()
diff --git a/docs/source/recipes/recipe_list.txt b/docs/source/recipes/recipe_list.txt
@@ -13,7 +13,7 @@ plot_06_recipe.html#sphx-glr-recipes-plot-06-recipe-py
 plot_07_recipe.html#sphx-glr-recipes-plot-07-recipe-py
 <div class="sphx-glr-thumbcontainer aggregate lineplot subspace" tooltip="Aggregate, Lineplot, Subspace">
 plot_08_recipe.html#sphx-glr-recipes-plot-08-recipe-py
-<div class="sphx-glr-thumbcontainer collapse contourmap" tooltip="Collapse, Contourmap, Subplot">
+<div class="sphx-glr-thumbcontainer collapse contourmap stats subplot" tooltip="Collapse, Contourmap, Statistical Operations, Subplot">
 plot_09_recipe.html#sphx-glr-recipes-plot-09-recipe-py
 <div class="sphx-glr-thumbcontainer histogram" tooltip="Histogram">
 plot_10_recipe.html#sphx-glr-recipes-plot-10-recipe-py
@@ -23,12 +23,14 @@ plot_11_recipe.html#sphx-glr-recipes-plot-11-recipe-py
 plot_12_recipe.html#sphx-glr-recipes-plot-12-recipe-py
 <div class="sphx-glr-thumbcontainer contourmap mask" tooltip=" Contourmap, Mask">
 plot_13_recipe.html#sphx-glr-recipes-plot-13-recipe-py
-<div class="sphx-glr-thumbcontainer subspace collapse contourmap lineplot" tooltip="Subspace, Collapse, Contourmap, Lineplot">
+<div class="sphx-glr-thumbcontainer subspace collapse contourmap lineplot stats" tooltip="Subspace, Collapse, Contourmap, Lineplot, Statistical Operations">
 plot_14_recipe.html#sphx-glr-recipes-plot-14-recipe-py
-<div class="sphx-glr-thumbcontainer subspace collapse contourmap" tooltip="Subspace, Collapse, Contourmap">
+<div class="sphx-glr-thumbcontainer subspace collapse contourmap stats" tooltip="Subspace, Collapse, Contourmap, Statistical Operations">
 plot_15_recipe.html#sphx-glr-recipes-plot-15-recipe-py
-<div class="sphx-glr-thumbcontainer histogram subspace" tooltip="Histogram, Subspace, Subplot">
+<div class="sphx-glr-thumbcontainer histogram subspace subplot" tooltip="Histogram, Subspace, Subplot">
 plot_16_recipe.html#sphx-glr-recipes-plot-16-recipe-py
 <div class="sphx-glr-thumbcontainer histogram subspace" tooltip="Histogram, Subspace">
 plot_17_recipe.html#sphx-glr-recipes-plot-17-recipe-py
 <div class="sphx-glr-thumbcontainer contourmap subspace subplot" tooltip="Contourmap, Subspace, Subplot">
+plot_18_recipe.html#sphx-glr-recipes-plot-18-recipe-py
+<div class="sphx-glr-thumbcontainer regrid stats" tooltip="Regrid, Statistical Operations">