Skip to content

Commit

Permalink
Merge pull request #538 from opendsm/fix/ghi-sufficiency
Browse files Browse the repository at this point in the history
Add GHI sufficiency check requiring 90% coverage for each month
  • Loading branch information
jason-recurve authored Feb 6, 2025
2 parents df821df + 83bb50a commit 1daea97
Show file tree
Hide file tree
Showing 4 changed files with 48 additions and 1 deletion.
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ Changelog
Development
-----------

* Placeholder
* Add GHI sufficiency check requiring 90% coverage for each month

4.1.0
-----
Expand Down
21 changes: 21 additions & 0 deletions eemeter/eemeter/common/sufficiency_criteria.py
Original file line number Diff line number Diff line change
Expand Up @@ -469,6 +469,25 @@ def _check_hourly_consecutive_temperature_data(self):
)
)

def _check_monthly_ghi_percentage(self):
if "ghi" not in self.data.columns:
return
non_null_temp_ghi_per_month = (
self.data["ghi"]
.groupby(self.data.index.month)
.apply(lambda x: x.notna().mean())
)
if (non_null_temp_ghi_per_month < self.min_fraction_daily_coverage).any():
self.disqualification.append(
EEMeterWarning(
qualified_name="eemeter.sufficiency_criteria.missing_monthly_ghi_data",
description=("At least one month is missing over 10% of GHI data."),
data={
"lowest_monthly_coverage": non_null_temp_ghi_per_month.min(),
},
)
)

def check_sufficiency_baseline(self):
# TODO : add caltrack check number on top of each method
self._check_no_data()
Expand All @@ -480,6 +499,7 @@ def check_sufficiency_baseline(self):
self._check_monthly_temperature_values_percentage()
self._check_monthly_meter_readings_percentage()
self._check_extreme_values()
self._check_monthly_ghi_percentage()
# TODO these will only apply to legacy, and currently do not work
# self._check_high_frequency_meter_values()
# self._check_high_frequency_temperature_values()
Expand All @@ -490,6 +510,7 @@ def check_sufficiency_reporting(self):
self._check_valid_days_percentage()
self._check_valid_temperature_values_percentage()
self._check_monthly_temperature_values_percentage()
self._check_monthly_ghi_percentage()
# self._check_high_frequency_temperature_values()


Expand Down
2 changes: 2 additions & 0 deletions eemeter/eemeter/models/hourly/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -417,6 +417,8 @@ def _create_sufficiency_df(df: pd.DataFrame):
"""Creates dataframe equivalent to legacy hourly input"""
df.loc[df["interpolated_observed"] == 1, "observed"] = np.nan
df.loc[df["interpolated_temperature"] == 1, "temperature"] = np.nan
if "ghi" in df.columns:
df.loc[df["interpolated_ghi"] == 1, "ghi"] = np.nan
# set temperature_not_null to 1.0 if temperature is not null
df["temperature_not_null"] = df["temperature"].notnull().astype(float)
df["temperature_null"] = df["temperature"].isnull().astype(float)
Expand Down
24 changes: 24 additions & 0 deletions tests/test_hourly_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,9 @@ def test_good_data(baseline, reporting):
reporting_data = HourlyReportingData(reporting, is_electricity_data=True)
hm = HourlyModel().fit(baseline_data)
p1 = hm.predict(reporting_data)
assert np.isclose(
p1["predicted"].sum(), 1135000, rtol=1e-2
) # quick check that model fit isn't changing drastically
serialized = hm.to_json()
hm2 = HourlyModel.from_json(serialized)
p2 = hm2.predict(reporting_data)
Expand Down Expand Up @@ -259,6 +262,27 @@ def test_monthly_percentage(baseline):
HourlyModel().fit(baseline_data)


def test_monthly_ghi_percentage(baseline):
# create datetimeindex where a little over 10% of days are missing in feb, but still 90% overall
missing_idx = pd.date_range(
start=baseline.index.min(), end=baseline.index.max(), freq="h"
)
missing_idx = missing_idx[missing_idx.day < 4]

invalid_ghi = baseline.copy()
invalid_ghi.loc[invalid_ghi.index.day < 5, "ghi"] = np.nan

baseline_data = HourlyBaselineData(invalid_ghi, is_electricity_data=True)
assert_dq(
baseline_data,
[
"eemeter.sufficiency_criteria.missing_monthly_ghi_data",
],
)
with pytest.raises(DataSufficiencyError):
HourlyModel().fit(baseline_data)


def test_hourly_fit_daily_threshold(baseline):
"""confirm that days with >50% interpolated data are excluded from fit step"""

Expand Down

0 comments on commit 1daea97

Please sign in to comment.