From b904455cf0bf964609d17dfbff4489fcd85ef65a Mon Sep 17 00:00:00 2001 From: Sharon Fitzpatrick Date: Fri, 26 Apr 2024 10:50:42 -0700 Subject: [PATCH] add new function to SDS_tools get_nearest_datapoint to handle tide predictions for the exact dates update jupyter notebook to be able to read tides from coastseg tide model --- example.py | 6 ++++- example_jupyter.ipynb | 13 ++++++--- src/coastsat/SDS_tools.py | 55 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 70 insertions(+), 4 deletions(-) diff --git a/example.py b/example.py index 52c88e3..1f6d835 100644 --- a/example.py +++ b/example.py @@ -305,7 +305,11 @@ def DecodeDateTime(readDict): # get tide levels corresponding to the time of image acquisition dates_sat = output['dates'] -tides_sat = SDS_tools.get_closest_datapoint(dates_sat, dates_ts, tides_ts) +try: + tides_sat = SDS_tools.get_closest_datapoint(dates_sat, dates_ts, tides_ts) +except Exception as e: + print("\nThe default method of getting the closest datapoint failed. Trying the second method") + tides_sat = SDS_tools.get_nearest_datapoint(dates_sat, dates_ts, tides_ts) # plot the subsampled tide data fig, ax = plt.subplots(1,1,figsize=(15,4), tight_layout=True) diff --git a/example_jupyter.ipynb b/example_jupyter.ipynb index 660f562..22d4193 100644 --- a/example_jupyter.ipynb +++ b/example_jupyter.ipynb @@ -600,15 +600,22 @@ "outputs": [], "source": [ "# load the measured tide data\n", - "filepath = os.path.join(os.getcwd(),'examples','NARRA_tides.csv')\n", + "# filepath = os.path.join(os.getcwd(),'examples','NARRA_tides.csv')\n", + "filepath = r'C:\\development\\doodleverse\\coastseg\\CoastSeg\\scripts\\tidal_predictions.csv'\n", + "# filepath = r\"C:\\development\\doodleverse\\coastseg\\CoastSeg\\scripts\\tidal_predictions_2_points.csv\"\n", "tide_data = pd.read_csv(filepath)\n", "dates_ts = [pd.to_datetime(_).to_pydatetime() for _ in tide_data['dates']]\n", "tides_ts = np.array(tide_data['tide'])\n", "\n", "# get tide levels corresponding to the time of image acquisition\n", "dates_sat = output['dates']\n", - "tides_sat = SDS_tools.get_closest_datapoint(dates_sat, dates_ts, tides_ts)\n", - "\n", + "try:\n", + " tides_sat = SDS_tools.get_closest_datapoint(dates_sat, dates_ts, tides_ts)\n", + "except Exception as e:\n", + " print(\"\\nThe default method of getting the closest datapoint failed. Trying the second method\")\n", + " tides_sat = SDS_tools.get_nearest_datapoint(dates_sat, dates_ts, tides_ts)\n", + " \n", + " \n", "# plot the subsampled tide data\n", "fig, ax = plt.subplots(1,1,figsize=(15,4), tight_layout=True)\n", "ax.grid(which='major', linestyle=':', color='0.5')\n", diff --git a/src/coastsat/SDS_tools.py b/src/coastsat/SDS_tools.py index 5277076..d8c2278 100644 --- a/src/coastsat/SDS_tools.py +++ b/src/coastsat/SDS_tools.py @@ -8,6 +8,9 @@ from datetime import datetime, timedelta from typing import List, Dict, Union, Optional +import numpy as np +import bisect + # Third-party imports import geopandas as gpd import matplotlib.pyplot as plt @@ -693,6 +696,58 @@ def remove_inaccurate_georef(output, accuracy): return output_filtered +def get_nearest_datapoint(dates, dates_ts, values_ts): + """ + Retrieves the nearest data points from a time-series for a given set of target dates. + This function handles exact date matches efficiently and finds the closest date when + an exact match isn't present, using a binary search mechanism for improved performance. + + Ensure that both `dates` and `dates_ts` are datetime objects and are either both timezone-aware + or both timezone-naive. The function checks if the provided time-series covers the range + of the target dates and adjusts for edge cases where target dates might align exactly + with the earliest or latest entries in the time-series. + + Arguments: + ----------- + dates : list of datetime.datetime + Target dates for which the nearest data points in the time-series are desired. + dates_ts : list of datetime.datetime + Dates in the time-series from which data points will be extracted. + values_ts : np.array + Values corresponding to each date in `dates_ts`. + + Returns: + ----------- + values : np.array + An array of values from `values_ts` that are closest to each date in `dates`. + + """ + + # get closest point to each date (handles exact matches and uses bisect for efficient search) + indices = [bisect.bisect_left(dates_ts, date) for date in dates] + temp = [] + for idx, date in zip(indices, dates): + if idx < len(dates_ts) and dates_ts[idx] == date: + # Exact match found + temp.append(values_ts[idx]) + elif idx == 0: + # Before the first element (shouldn't occur due to range check) + temp.append(values_ts[0]) + elif idx == len(dates_ts): + # After the last element (shouldn't occur due to range check) + temp.append(values_ts[-1]) + else: + # Find the nearest of the two possible elements + prev_date = dates_ts[idx - 1] + next_date = dates_ts[idx] + if (date - prev_date) <= (next_date - date): + temp.append(values_ts[idx - 1]) + else: + temp.append(values_ts[idx]) + + return np.array(temp) + + def get_closest_datapoint(dates, dates_ts, values_ts): """ Extremely efficient script to get closest data point to a set of dates from a very