owid · veronikasamborska1994 · Feb 27, 2025 · Feb 27, 2025 · Feb 27, 2025 · Feb 27, 2025
diff --git a/dag/climate.yml b/dag/climate.yml
@@ -200,6 +200,9 @@ steps:
   #
   data://garden/climate/2025-01-21/sea_surface_temperature:
     - data://meadow/climate/2025-01-21/sea_surface_temperature
+  data://garden/climate/2025-01-21/sea_surface_temperature_annual:
+    - data://garden/climate/2025-01-21/sea_surface_temperature
+
   #
   # Various sources - Long-run greenhouse gas concentration.
   #
@@ -222,6 +225,7 @@ steps:
     - data://garden/climate/2025-01-21/sea_surface_temperature
     - data://garden/climate/2025-01-21/surface_temperature_analysis
     - data://garden/epa/2024-04-17/mass_balance_us_glaciers
+    - data://garden/climate/2025-01-21/sea_surface_temperature_annual
   #
   # Various sources - Climate change impacts (monthly).
   #

diff --git a/etl/steps/data/garden/climate/2025-01-21/climate_change_impacts.py b/etl/steps/data/garden/climate/2025-01-21/climate_change_impacts.py
@@ -67,52 +67,55 @@ def run(dest_dir: str) -> None:
     #
     # Load GISS dataset surface temperature analysis, and read monthly data.
     ds_giss = paths.load_dataset("surface_temperature_analysis")
-    tb_giss = ds_giss["surface_temperature_analysis"].reset_index()
+    tb_giss = ds_giss.read("surface_temperature_analysis")
 
     # Load NSIDC dataset of sea ice index.
     ds_nsidc = paths.load_dataset("sea_ice_index")
-    tb_nsidc = ds_nsidc["sea_ice_index"].reset_index()
+    tb_nsidc = ds_nsidc.read("sea_ice_index")
 
     # Load Met Office dataset on sea surface temperature.
     ds_met_office = paths.load_dataset("sea_surface_temperature")
-    tb_met_office = ds_met_office["sea_surface_temperature"].reset_index()
+    tb_met_office = ds_met_office.read("sea_surface_temperature")
+
+    ds_met_office_annual = paths.load_dataset("sea_surface_temperature_annual")
+    tb_met_office_annual = ds_met_office_annual.read("sea_surface_temperature")
 
     # Load NOAA/NCIE dataset on ocean heat content.
     ds_ocean_heat = paths.load_dataset("ocean_heat_content", namespace="climate")
-    tb_ocean_heat_monthly = ds_ocean_heat["ocean_heat_content_monthly"].reset_index()
-    tb_ocean_heat_annual = ds_ocean_heat["ocean_heat_content_annual"].reset_index()
+    tb_ocean_heat_monthly = ds_ocean_heat.read("ocean_heat_content_monthly")
+    tb_ocean_heat_annual = ds_ocean_heat.read("ocean_heat_content_annual")
 
     # Load EPA's compilation of data on ocean heat content.
     ds_epa = paths.load_dataset("ocean_heat_content", namespace="epa")
-    tb_ocean_heat_annual_epa = ds_epa["ocean_heat_content"].reset_index()
+    tb_ocean_heat_annual_epa = ds_epa.read("ocean_heat_content")
 
     # Load ocean pH data from the School of Ocean and Earth Science and Technology.
     ds_ocean_ph = paths.load_dataset("ocean_ph_levels")
-    tb_ocean_ph = ds_ocean_ph["ocean_ph_levels"].reset_index()
+    tb_ocean_ph = ds_ocean_ph.read("ocean_ph_levels")
 
     # Load snow cover extent from Rutgers University Global Snow Lab.
     ds_snow = paths.load_dataset("snow_cover_extent")
-    tb_snow = ds_snow["snow_cover_extent"].reset_index()
+    tb_snow = ds_snow.read("snow_cover_extent")
 
     # Load ice sheet mass balance data from EPA.
     ds_ice_sheet = paths.load_dataset("ice_sheet_mass_balance")
-    tb_ice_sheet = ds_ice_sheet["ice_sheet_mass_balance"].reset_index()
+    tb_ice_sheet = ds_ice_sheet.read("ice_sheet_mass_balance")
 
     # Load annual data on mass balance of US glaciers from EPA.
     ds_us_glaciers = paths.load_dataset("mass_balance_us_glaciers")
-    tb_us_glaciers = ds_us_glaciers["mass_balance_us_glaciers"].reset_index()
+    tb_us_glaciers = ds_us_glaciers.read("mass_balance_us_glaciers")
 
     # Load monthly greenhouse gas concentration data from NOAA/GML.
     ds_gml = paths.load_dataset("ghg_concentration")
-    tb_gml = ds_gml["ghg_concentration"].reset_index()
+    tb_gml = ds_gml.read("ghg_concentration")
 
     # Load long-run yearly greenhouse gas concentration data.
     ds_ghg = paths.load_dataset("long_run_ghg_concentration")
-    tb_ghg = ds_ghg["long_run_ghg_concentration"].reset_index()
+    tb_ghg = ds_ghg.read("long_run_ghg_concentration")
 
     # Load global sea level.
     ds_sea_level = paths.load_dataset("global_sea_level")
-    tb_sea_level = ds_sea_level["global_sea_level"].reset_index()
+    tb_sea_level = ds_sea_level.read("global_sea_level")
 
     #
     # Process data.
@@ -150,7 +153,13 @@ def run(dest_dir: str) -> None:
 
     # Gather annual data from different tables.
     tb_annual = tb_ocean_heat_annual.copy()
-    for table in [arctic_sea_ice_extent, antarctic_sea_ice_extent, tb_ghg, tb_us_glaciers.astype({"year": int})]:
+    for table in [
+        tb_met_office_annual,
+        arctic_sea_ice_extent,
+        antarctic_sea_ice_extent,
+        tb_ghg,
+        tb_us_glaciers.astype({"year": int}),
+    ]:
         tb_annual = tb_annual.merge(
             table,
             how="outer",

diff --git a/etl/steps/data/garden/climate/2025-01-21/sea_surface_temperature.meta.yml b/etl/steps/data/garden/climate/2025-01-21/sea_surface_temperature.meta.yml
@@ -1,9 +1,19 @@
 definitions:
   common:
-    presentation:
-      topic_tags:
-      - Climate Change
-    processing_level: minor
+    description_from_producer: |-
+       The 1961-90 period is most often used as a baseline because it is the period recommended by the World Meteorological Organisation. In some cases other periods are used. For global average temperatures, an 1861-1890 period is sometimes used to show the warming since the "pre-industrial" period.
+    description_processing: |-
+      We switch from using 1961-1990 to using 1861-1890 as our baseline to better show how temperatures have changed since pre-industrial times. For each region, we calculate the mean temperature anomalies for 1961–1990 and for 1861–1890. The difference between these two means serves as the adjustment factor. This factor is applied uniformly to both the temperature anomalies and the confidence intervals to ensure that both the central values and the associated uncertainty bounds are correctly shifted relative to the new 1861–1890 baseline.
+    processing_level: major
+    unit: °C
+    short_unit: °C
+    display:
+      numDecimalPlaces: 1
+  presentation:
+    topic_tags:
+    - Climate Change
+    grapher_config:
+      note: The period 1861–1890 is used as the baseline to measure temperature changes relative to pre-industrial times, [as recommended by climate research institutions](https://www.metoffice.gov.uk/hadobs/indicators/index.html#:~:text=The%201961%2D90%20period%20is,other%20parts%20of%20the%20world.).
 
 dataset:
   title: Sea surface temperature
@@ -13,17 +23,20 @@ tables:
   sea_surface_temperature:
     variables:
       sea_temperature_anomaly:
-        title: "Monthly sea surface temperature anomaly"
-        description_short: Measured in degrees Celsius.
-        unit: °C
-        short_unit: °C
+        title: Monthly sea surface temperature anomalies
+        description_short: The deviation of the average sea surface temperature measured at a nominal depth of 20cm from the 1861-1890 mean, in degrees Celsius.
+        description_key:
+        - Temperature anomalies show how many degrees Celsius temperatures have changed compared to the 1861-1890 period. This baseline period is commonly used to highlight the changes in temperature since pre-industrial times, prior to major human impacts.
+        - The data includes separate measurements for the Northern and Southern Hemispheres, which helps researchers analyze regional differences.
+        - This data is based on the HadISST method. This method averages temperature measurements onto a fixed grid. If no data is available for a grid cell, it remains empty and adds extra uncertainty when calculating averages like the global mean.
+        - Despite different approaches, HadISST and other methods show similar global temperature trends.
       sea_temperature_anomaly_low:
-        title: "Monthly sea surface temperature anomaly (lower bound)"
-        description_short: Measured in degrees Celsius.
-        unit: °C
-        short_unit: °C
+        title: Monthly sea surface temperature anomalies (lower bound)
+        description_short: The lower bound, defined as the 95% confidence interval for the annual sea surface temperature anomalies, represents the deviation of the average sea surface temperature measured at a nominal depth of 20 cm from the 1861–1890 mean, in degrees Celsius.
+        description_key:
+          - The lower bound is the 95% confidence interval for the annual sea surface temperature anomalies. It represents the range of values within which the true value is expected to fall with 95% certainty.
       sea_temperature_anomaly_high:
-        title: "Monthly sea surface temperature anomaly (upper bound)"
-        description_short: Measured in degrees Celsius.
-        unit: °C
-        short_unit: °C
+        title: Monthly sea surface temperature anomalies (upper bound)
+        description_short: The upper bound, defined as the 95% confidence interval for the annual sea surface temperature anomalies, represents the deviation of the average sea surface temperature measured at a nominal depth of 20 cm from the 1861–1890 mean, in degrees Celsius.
+        description_key:
+          - The upper bound is the 95% confidence interval for the annual sea surface temperature anomalies. It represents the range of values within which the true value is expected to fall with 95% certainty.
diff --git a/etl/steps/data/garden/climate/2025-01-21/sea_surface_temperature.py b/etl/steps/data/garden/climate/2025-01-21/sea_surface_temperature.py
@@ -23,17 +23,36 @@ def run(dest_dir: str) -> None:
     #
     # Load meadow dataset and read its main table.
     ds_meadow = paths.load_dataset("sea_surface_temperature")
-    tb = ds_meadow["sea_surface_temperature"].reset_index()
+    tb = ds_meadow.read("sea_surface_temperature")
 
     #
     # Process data.
     #
     # Select and rename columns.
     tb = tb[list(COLUMNS)].rename(columns=COLUMNS, errors="raise")
 
+    # Switch from using 1961-1990 to using 1861-1890 as our baseline to better show how temperatures have changed since pre-industrial times.
+    # Calculate the adjustment factors based only on temperature_anomaly
+    adjustment_factors = (
+        tb[tb["year"].between(1961, 1990)].groupby("location")["sea_temperature_anomaly"].mean()
+        - tb[tb["year"].between(1861, 1890)].groupby("location")["sea_temperature_anomaly"].mean()
+    )
+    # Apply the temperature_anomaly adjustment factor
+    # The adjustment factor is applied uniformly to the temperature anomalies and their confidence intervals to ensure that both the central values and the associated uncertainty bounds are correctly shifted relative to the new 1861–1890 baseline.
+    columns_to_adjust = [
+        "sea_temperature_anomaly",
+        "sea_temperature_anomaly_low",
+        "sea_temperature_anomaly_high",
+    ]
+
+    # Apply the temperature_anomaly adjustment factor
+    # The adjustment factor is applied uniformly to the temperature anomalies and their confidence intervals to ensure that both the central values and the associated uncertainty bounds are correctly shifted relative to the new 1861–1890 baseline.
+    for region in adjustment_factors.index:
+        for column in columns_to_adjust:
+            tb.loc[tb["location"] == region, column] += adjustment_factors[region]
+
     # Create a date column (assume the middle of the month for each monthly data point).
     tb["date"] = tb["year"].astype(str) + "-" + tb["month"].astype(str).str.zfill(2) + "-15"
-
     # Remove unnecessary columns.
     tb = tb.drop(columns=["year", "month"], errors="raise")
 

diff --git a/etl/steps/data/garden/climate/2025-01-21/sea_surface_temperature_annual.meta.yml b/etl/steps/data/garden/climate/2025-01-21/sea_surface_temperature_annual.meta.yml
@@ -0,0 +1,44 @@
+definitions:
+  common:
+    description_from_producer: |-
+       The 1961-90 period is most often used as a baseline because it is the period recommended by the World Meteorological Organisation. In some cases other periods are used. For global average temperatures, an 1861-1890 period is sometimes used to show the warming since the "pre-industrial" period.
+    description_processing: |-
+      Annual sea surface anomalies were calculated by averaging the monthly anomalies in a given year.
+
+      We switch from using 1961-1990 to using 1861-1890 as our baseline to better show how temperatures have changed since pre-industrial times. For each region, we calculate the mean temperature anomalies for 1961–1990 and for 1861–1890. The difference between these two means serves as the adjustment factor. This factor is applied uniformly to both the temperature anomalies and the confidence intervals to ensure that both the central values and the associated uncertainty bounds are correctly shifted relative to the new 1861–1890 baseline.
+    processing_level: major
+    unit: °C
+    short_unit: °C
+    display:
+      numDecimalPlaces: 1
+  presentation:
+    topic_tags:
+    - Climate Change
+    grapher_config:
+      note: The period 1861–1890 is used as the baseline to measure temperature changes relative to pre-industrial times, [as recommended by climate research institutions](https://www.metoffice.gov.uk/hadobs/indicators/index.html#:~:text=The%201961%2D90%20period%20is,other%20parts%20of%20the%20world.).
+
+dataset:
+  title: Sea surface temperature - annual
+  update_period_days: 60
+
+tables:
+  sea_surface_temperature:
+    variables:
+      sea_temperature_anomaly:
+        title: Annual sea surface temperature anomalies
+        description_short: The deviation of the average sea surface temperature measured at a nominal depth of 20cm from the 1861-1890 mean, in degrees Celsius.
+        description_key:
+        - Temperature anomalies show how many degrees Celsius temperatures have changed compared to the 1861-1890 period. This baseline period is commonly used to highlight the changes in temperature since pre-industrial times, prior to major human impacts.
+        - The data includes separate measurements for the Northern and Southern Hemispheres, which helps researchers analyze regional differences.
+        - This data is based on the HadISST method. This method averages temperature measurements onto a fixed grid. If no data is available for a grid cell, it remains empty and adds extra uncertainty when calculating averages like the global mean.
+        - Despite different approaches, HadISST and other methods show similar global temperature trends.
+      sea_temperature_anomaly_low:
+        title: Annual sea surface temperature anomalies (lower bound)
+        description_short: The lower bound, defined as the 95% confidence interval for the annual sea surface temperature anomalies, represents the deviation of the average sea surface temperature measured at a nominal depth of 20 cm from the 1861–1890 mean, in degrees Celsius.
+        description_key:
+          - The lower bound is the 95% confidence interval for the annual sea surface temperature anomalies. It represents the range of values within which the true value is expected to fall with 95% certainty.
+      sea_temperature_anomaly_high:
+        title: Annual sea surface temperature anomalies (upper bound)
+        description_short: The upper bound, defined as the 95% confidence interval for the annual sea surface temperature anomalies, represents the deviation of the average sea surface temperature measured at a nominal depth of 20 cm from the 1861–1890 mean, in degrees Celsius.
+        description_key:
+          - The upper bound is the 95% confidence interval for the annual sea surface temperature anomalies. It represents the range of values within which the true value is expected to fall with 95% certainty.
diff --git a/etl/steps/data/garden/climate/2025-01-21/sea_surface_temperature_annual.py b/etl/steps/data/garden/climate/2025-01-21/sea_surface_temperature_annual.py
@@ -0,0 +1,42 @@
+"""Load a meadow dataset and create a garden dataset."""
+
+import pandas as pd
+
+from etl.helpers import PathFinder, create_dataset
+
+# Get paths and naming conventions for current step.
+paths = PathFinder(__file__)
+
+
+def run(dest_dir: str) -> None:
+    #
+    # Load inputs.
+    #
+    # Load garden dataset and read its main table.
+    ds_garden = paths.load_dataset("sea_surface_temperature")
+    tb = ds_garden.read("sea_surface_temperature")
+
+    #
+    # Process data.
+    #
+    # Extract year from date column
+    tb["year"] = pd.to_datetime(tb["date"]).dt.year
+
+    # Compute annual averages
+    tb = tb.groupby(["year", "location"], as_index=False).agg(
+        {
+            "sea_temperature_anomaly": "mean",
+            "sea_temperature_anomaly_low": "mean",
+            "sea_temperature_anomaly_high": "mean",
+        }
+    )
+
+    # Set an appropriate index and sort conveniently.
+    tb = tb.format(["location", "year"])
+
+    #
+    # Save outputs.
+    #
+    # Create a new garden dataset with the combined table.
+    ds_garden = create_dataset(dest_dir, tables=[tb], check_variables_metadata=True)
+    ds_garden.save()
diff --git a/etl/steps/data/garden/climate/2025-01-21/surface_temperature_analysis.meta.yml b/etl/steps/data/garden/climate/2025-01-21/surface_temperature_analysis.meta.yml
@@ -1,8 +1,26 @@
 definitions:
   common:
-    presentation:
-      topic_tags:
-      - Climate Change
+    description_short: |-
+        The deviation of the average land-sea surface temperature from the 1880-1900 mean, in degrees Celsius.
+    description_key:
+      - Temperature anomalies show how many degrees Celsius temperatures have changed compared to the 1880-1900 period.
+      - While 1861–1890 baseline period is commonly used to highlight the changes in temperature since pre-industrial times, here we use 1880–1900 instead, as monthly data on temperature anomalies in this dataset is only available from 1880.
+      - Temperature averages and anomalies are calculated over all land and ocean surfaces.
+      - The data includes separate measurements for the Northern and Southern Hemispheres, which helps researchers analyze regional differences.
+      - This data is based on the GISS method which estimates temperatures in areas without data by using information from nearby locations within 1200 kilometers. This is especially useful in the Arctic and Antarctic, where measurements are sparse.
+      - Despite different approaches, GISS and other methods show similar global temperature trends.
+    description_processing: |-
+      We switch from using 1951-1980 to using 1880-1900 as our baseline to better show how temperatures have changed since pre-industrial times. For each region, we calculate the mean temperature anomalies for 1961–1990 and for 1880-1900. The difference between these two means serves as the adjustment factor. This factor is applied uniformly to both the temperature anomalies and the confidence intervals to ensure that both the central values and the associated uncertainty bounds are correctly shifted relative to the new 1880-1900 baseline.
+    processing_level: major
+    unit: °C
+    short_unit: °C
+    display:
+      numDecimalPlaces: 1
+  presentation:
+    topic_tags:
+    - Climate Change
+    grapher_config:
+      note: Because observations start in 1880, the 1880–1900 period is used as the baseline for measuring temperature changes relative to pre-industrial times instead of the standard 1861–1890 period [recommended by climate research institutions](https://www.metoffice.gov.uk/hadobs/indicators/index.html#:~:text=The%201961%2D90%20period%20is,other%20parts%20of%20the%20world.).
 
 dataset:
   title: GISS surface temperature analysis
@@ -12,9 +30,5 @@ tables:
   surface_temperature_analysis:
     variables:
       temperature_anomaly:
-        title: "Global warming: monthly temperature anomaly"
-        description_short: |-
-          Combined land-surface air and sea-surface water temperature anomaly, given as the deviation from the 1951-1980 mean, in degrees Celsius.
-        unit: °C
-        short_unit: °C
-        processing_level: minor
+        title: "Global warming: monthly temperature anomalies"
+