Skip to content

Commit

Permalink
add new function to SDS_tools get_nearest_datapoint to handle tide pr…
Browse files Browse the repository at this point in the history
…edictions for the exact dates update jupyter notebook to be able to read tides from coastseg tide model
  • Loading branch information
2320sharon committed Apr 26, 2024
1 parent 3279909 commit b904455
Show file tree
Hide file tree
Showing 3 changed files with 70 additions and 4 deletions.
6 changes: 5 additions & 1 deletion example.py
Original file line number Diff line number Diff line change
Expand Up @@ -305,7 +305,11 @@ def DecodeDateTime(readDict):

# get tide levels corresponding to the time of image acquisition
dates_sat = output['dates']
tides_sat = SDS_tools.get_closest_datapoint(dates_sat, dates_ts, tides_ts)
try:
tides_sat = SDS_tools.get_closest_datapoint(dates_sat, dates_ts, tides_ts)
except Exception as e:
print("\nThe default method of getting the closest datapoint failed. Trying the second method")
tides_sat = SDS_tools.get_nearest_datapoint(dates_sat, dates_ts, tides_ts)

# plot the subsampled tide data
fig, ax = plt.subplots(1,1,figsize=(15,4), tight_layout=True)
Expand Down
13 changes: 10 additions & 3 deletions example_jupyter.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -600,15 +600,22 @@
"outputs": [],
"source": [
"# load the measured tide data\n",
"filepath = os.path.join(os.getcwd(),'examples','NARRA_tides.csv')\n",
"# filepath = os.path.join(os.getcwd(),'examples','NARRA_tides.csv')\n",
"filepath = r'C:\\development\\doodleverse\\coastseg\\CoastSeg\\scripts\\tidal_predictions.csv'\n",
"# filepath = r\"C:\\development\\doodleverse\\coastseg\\CoastSeg\\scripts\\tidal_predictions_2_points.csv\"\n",
"tide_data = pd.read_csv(filepath)\n",
"dates_ts = [pd.to_datetime(_).to_pydatetime() for _ in tide_data['dates']]\n",
"tides_ts = np.array(tide_data['tide'])\n",
"\n",
"# get tide levels corresponding to the time of image acquisition\n",
"dates_sat = output['dates']\n",
"tides_sat = SDS_tools.get_closest_datapoint(dates_sat, dates_ts, tides_ts)\n",
"\n",
"try:\n",
" tides_sat = SDS_tools.get_closest_datapoint(dates_sat, dates_ts, tides_ts)\n",
"except Exception as e:\n",
" print(\"\\nThe default method of getting the closest datapoint failed. Trying the second method\")\n",
" tides_sat = SDS_tools.get_nearest_datapoint(dates_sat, dates_ts, tides_ts)\n",
" \n",
" \n",
"# plot the subsampled tide data\n",
"fig, ax = plt.subplots(1,1,figsize=(15,4), tight_layout=True)\n",
"ax.grid(which='major', linestyle=':', color='0.5')\n",
Expand Down
55 changes: 55 additions & 0 deletions src/coastsat/SDS_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@
from datetime import datetime, timedelta
from typing import List, Dict, Union, Optional

import numpy as np
import bisect

# Third-party imports
import geopandas as gpd
import matplotlib.pyplot as plt
Expand Down Expand Up @@ -693,6 +696,58 @@ def remove_inaccurate_georef(output, accuracy):
return output_filtered


def get_nearest_datapoint(dates, dates_ts, values_ts):
"""
Retrieves the nearest data points from a time-series for a given set of target dates.
This function handles exact date matches efficiently and finds the closest date when
an exact match isn't present, using a binary search mechanism for improved performance.
Ensure that both `dates` and `dates_ts` are datetime objects and are either both timezone-aware
or both timezone-naive. The function checks if the provided time-series covers the range
of the target dates and adjusts for edge cases where target dates might align exactly
with the earliest or latest entries in the time-series.
Arguments:
-----------
dates : list of datetime.datetime
Target dates for which the nearest data points in the time-series are desired.
dates_ts : list of datetime.datetime
Dates in the time-series from which data points will be extracted.
values_ts : np.array
Values corresponding to each date in `dates_ts`.
Returns:
-----------
values : np.array
An array of values from `values_ts` that are closest to each date in `dates`.
"""

# get closest point to each date (handles exact matches and uses bisect for efficient search)
indices = [bisect.bisect_left(dates_ts, date) for date in dates]
temp = []
for idx, date in zip(indices, dates):
if idx < len(dates_ts) and dates_ts[idx] == date:
# Exact match found
temp.append(values_ts[idx])
elif idx == 0:
# Before the first element (shouldn't occur due to range check)
temp.append(values_ts[0])
elif idx == len(dates_ts):
# After the last element (shouldn't occur due to range check)
temp.append(values_ts[-1])
else:
# Find the nearest of the two possible elements
prev_date = dates_ts[idx - 1]
next_date = dates_ts[idx]
if (date - prev_date) <= (next_date - date):
temp.append(values_ts[idx - 1])
else:
temp.append(values_ts[idx])

return np.array(temp)


def get_closest_datapoint(dates, dates_ts, values_ts):
"""
Extremely efficient script to get closest data point to a set of dates from a very
Expand Down

0 comments on commit b904455

Please sign in to comment.