Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Unet Prototype & Pipeline #54

Open
wants to merge 18 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
2c79753
unet: Add sub pixi env for tensorflow linux env and copy of notebook …
JuLieAlgebra Mar 24, 2024
ca100c0
metrics: Committing numba jitted displacement metrics, edge processin…
JuLieAlgebra Mar 27, 2024
28911fe
Unet: Added to environment, continuted prototyping work with sobel ba…
JuLieAlgebra Mar 27, 2024
913e4d3
training pipeline: Added data loader and utility functions from colab…
JuLieAlgebra Mar 27, 2024
d3e9b67
Organization, documentation: Added additional functions from colab no…
JuLieAlgebra Mar 28, 2024
f618d26
Bugfix training: Fixed major bug in data loader with extracting date …
JuLieAlgebra Mar 28, 2024
e559667
bugfix, modeling: Fixed bug with sobel edge detection layer, misc org…
JuLieAlgebra Mar 28, 2024
4504640
edge demo: Demonstrate ice edge evaluation pipeline for cross model c…
JuLieAlgebra Mar 29, 2024
e0e1421
dataloaders: Fixes to binary target dataloader
JuLieAlgebra Mar 29, 2024
5115ade
metrics: Jitted combination of rms and average displacement metric ca…
JuLieAlgebra Mar 30, 2024
d59bfa5
sobel model: Sobel model trains and loads from file with custom layer…
JuLieAlgebra Mar 30, 2024
eb2d43d
environment updates: Added project dependency as explicit for the set…
JuLieAlgebra Mar 30, 2024
fc3d7e9
dependencies(tensorflow): recomputed entire dependency graph to get t…
JuLieAlgebra Mar 31, 2024
fd6f5b5
Unspecified changes to process edges and dataloaders
JuLieAlgebra Oct 9, 2024
72e7a92
config, org: Start unet model module, lake mask configuration
JuLieAlgebra Oct 9, 2024
0c93a94
model checkpoints: Add model parameter json file
JuLieAlgebra Oct 9, 2024
23d8d8e
Copy of unet modeling from april 26, plotting notebook for edge metri…
JuLieAlgebra Oct 9, 2024
4405201
Edge metrics pipeline from April 11
JuLieAlgebra Oct 9, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions config/lake_mask.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
[ApplyLakeMaskNetCDF]
# Full relative directory is data/input_dir, data/output_dir
input_dir = "ims_netcdf_1km_cropped_2_000km_window_74lat_-170lon"

# Change this output_dir name to reflect the window size and center coordinates selected
# Example: _50000x_15000y
output_dir = "masked_ims_netcdf_1km_cropped_2_000km_window_74lat_-170lon"

# Final shape will be (window_size, window_size)
window_size = 2000 #km

mask_filepath = "lake_mask_74lat_-170lon.nc"
4 changes: 2 additions & 2 deletions config/preprocess_netcdf.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,12 @@ input_dir = "ims_1km"

# Change this output_dir name to reflect the window size and center coordinates selected
# Example: _50000x_15000y
output_dir = "ims_netcdf_1km_cropped_2_000km_window"
output_dir = "ims_netcdf_1km_cropped_2_000km_window_74lat_-170lon"

# Final shape will be (window_size, window_size)
window_size = 2000 #km

# Longitude, latitude coordinates IN DEGREES for center of cropped window.
# Defaults to center of current grid system if either is "None"
center_latitude = 74.0
center_longitude = -138.0
center_longitude = -170.0
Empty file added icedyno/metrics/__init__.py
Empty file.
130 changes: 130 additions & 0 deletions icedyno/metrics/metrics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
import numba
import numpy as np

## Metrics ##


@numba.jit(nopython=True)
def average_and_rms_ice_edge_displacement(observed_edges, model_edges):
"""
Calculate both the average (D_AVG_IE) and RMS ice edge displacement (D_RMS_IE) between observed and model ice edges.
Credit: Validation metrics for ice edge position forecasts, Melsom et al., 2019.

Parameters:
- observed_edges: numpy array of shape (N, 2), where N is the number of observed ice edge points,
and each point is represented by its (x, y) coordinates.
- model_edges: numpy array of shape (M, 2), where M is the number of model ice edge points,
and each point is represented by its (x, y) coordinates.

Returns:
- D_AVG_IE: The average displacement between the observed and model ice edges.
- D_RMS_IE: The root mean square displacement between the observed and model ice edges.
"""

# Initialize lists to store minimum distances for each point
observed_to_model_distances = []
model_to_observed_distances = []

# Calculate distances from each observed point to the nearest model point
for obs_point in observed_edges:
distances = np.sqrt(np.sum((model_edges - obs_point) ** 2, axis=1))
observed_to_model_distances.append(np.min(distances))

# Calculate distances from each model point to the nearest observed point
for model_point in model_edges:
distances = np.sqrt(np.sum((observed_edges - model_point) ** 2, axis=1))
model_to_observed_distances.append(np.min(distances))

# Calculate the average displacement
avg_displacement = (
sum(observed_to_model_distances) / len(observed_to_model_distances)
+ sum(model_to_observed_distances) / len(model_to_observed_distances)
) / 2

# Calculate the root mean square displacement
rms_displacement = np.sqrt(
(
sum([dist**2 for dist in observed_to_model_distances])
+ sum([dist**2 for dist in model_to_observed_distances])
)
/ (len(observed_to_model_distances) + len(model_to_observed_distances))
)

return avg_displacement, rms_displacement


@numba.jit(nopython=True)
def average_ice_edge_displacement(observed_edges, model_edges):
"""
Calculate the average ice edge displacement (D_AVG_IE) between observed and model ice edges.
Credit: Validation metrics for ice edge position forecasts, Melsom et al., 2019.

Parameters:
- observed_edges: numpy array of shape (N, 2), where N is the number of observed ice edge points,
and each point is represented by its (x, y) coordinates.
- model_edges: numpy array of shape (M, 2), where M is the number of model ice edge points,
and each point is represented by its (x, y) coordinates.

Returns:
- D_AVG_IE: The average displacement between the observed and model ice edges.
"""

# Initialize lists to store minimum distances for each point
observed_to_model_distances = []
model_to_observed_distances = []

# Calculate distances from each observed point to the nearest model point
for obs_point in observed_edges:
distances = np.sqrt(np.sum((model_edges - obs_point) ** 2, axis=1))
observed_to_model_distances.append(np.min(distances))

# Calculate distances from each model point to the nearest observed point
for model_point in model_edges:
distances = np.sqrt(np.sum((observed_edges - model_point) ** 2, axis=1))
model_to_observed_distances.append(np.min(distances))

# Calculate the average displacement
avg_displacement = (
sum(observed_to_model_distances) / len(observed_to_model_distances)
+ sum(model_to_observed_distances) / len(model_to_observed_distances)
) / 2

return avg_displacement


@numba.jit(nopython=True)
def root_mean_square_ice_edge_displacement(observed_edges, model_edges):
"""
Calculate the root mean square ice edge displacement (D_RMS_IE) between observed and model ice edges.

Parameters:
- observed_edges: numpy array of shape (N, 2), where N is the number of observed ice edge points,
and each point is represented by its (x, y) coordinates.
- model_edges: numpy array of shape (M, 2), where M is the number of model ice edge points,
and each point is represented by its (x, y) coordinates.

Returns:
- D_RMS_IE: The root mean square displacement between the observed and model ice edges.
"""

# Initialize lists to store distances for each point
observed_to_model_distances = []
model_to_observed_distances = []

# Calculate distances from each observed point to the nearest model predicted point
for obs_point in observed_edges:
distances = np.sqrt(np.sum((model_edges - obs_point) ** 2, axis=1))
observed_to_model_distances.append(np.min(distances) ** 2)

# Calculate distances from each model point to the nearest observed point
for model_point in model_edges:
distances = np.sqrt(np.sum((observed_edges - model_point) ** 2, axis=1))
model_to_observed_distances.append(np.min(distances) ** 2)

# Calculate the root mean square displacement
rms_displacement = np.sqrt(
(sum(observed_to_model_distances) + sum(model_to_observed_distances))
/ (len(observed_to_model_distances) + len(model_to_observed_distances))
)

return rms_displacement
165 changes: 165 additions & 0 deletions icedyno/metrics/process_edges.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
import glob
import os
import pathlib

import luigi
import numpy as np
import tensorflow as tf
import xarray as xr


def image_batch_to_binary_edge_image(image_batch: np.array) -> np.array:
"""
Perform Sobel edge detection on an (optionally batched) image
Image has dimension (batch_size, window_size, window_size).
"""
binary_ice_no_ice = np.round(image_batch)
assert np.allclose(np.unique(binary_ice_no_ice), np.array([0, 1]))

# Sobel expects input to have dimension (batch, window, window, 1)
batch_tensor = tf.expand_dims(tf.convert_to_tensor(binary_ice_no_ice), axis=-1)

sobel_edges = tf.image.sobel_edges(batch_tensor)
sobel_x = sobel_edges[..., 0]
sobel_y = sobel_edges[..., 1]

# Apply absolute value, threshold, and sum the results of x and y edges
edges_x = tf.cast(tf.square(sobel_x) >= 1, tf.float32)
edges_y = tf.cast(tf.square(sobel_y) >= 1, tf.float32)
edge_magnitudes = edges_x + edges_y
return edge_magnitudes.numpy()


def process_binary_edge_image_into_coordinates(activated: np.array) -> np.array:
"""
Takes a numpy array of edge detected pixels (not SIE).
Cannot take in a tensorflow tensor, input array must be between 0 and 1.
Input array is the result of running edge-detection on Ice/No Ice array.

Parameters:
activated: Numpy array with >= 0.5 indicating sea-ice edge.

Returns:
numpy array of integer indices in input array corresponding to edge pixels. Of shape (N Edges, 2)

"""
edges = np.where(np.round(activated) >= 1)
x, y = edges[0], edges[1]
edge_coordinates = np.stack((x, y), axis=-1)

return edge_coordinates


class ApplyLakeMaskNetCDF(luigi.Task):
"""
Also trinarizes the data to
trinary_sie[sie != 3] = 0

# Sea and Lake Ice is treated as 1
trinary_sie[sie == 3] = 1

# Land and Snow-Covered Land is sent to 2.
trinary_sie[sie == 2] = 2
trinary_sie[sie == 4] = 2

Supports centering the window of the cropped files at different locations.
See config/preprocess_netcdf.toml for configuration settings.
"""

input_dir = luigi.Parameter()
output_dir = luigi.Parameter()

window_size = luigi.IntParameter(default=4000)
mask_filepath = luigi.Parameter()

year = luigi.IntParameter() # Determined at runtime

def output(self) -> luigi.LocalTarget:
return luigi.LocalTarget(
os.path.join("data", self.output_dir, f"_SUCCESS_{self.year}")
)

def run(self) -> None:
year_output_dir = os.path.join("data", self.output_dir, str(self.year))
if not os.path.exists(year_output_dir):
os.makedirs(year_output_dir)

input_cdf_files = glob.glob(
os.path.join("data", self.input_dir, str(self.year), "*.nc")
)

for cdf_filepath in input_cdf_files:
# Set output file name and check if the output file already exists on disk.
output_filename = (
os.path.join(year_output_dir, pathlib.Path(cdf_filepath).stem) + ".nc"
)

# Don't recompute file if the expected filename in the output folder already exists.
# if os.path.exists(output_filename):
# print(cdf_filepath, "already on disk, skipping...")
# continue

with xr.open_dataset(cdf_filepath, engine="h5netcdf") as ds:
ds = apply_lake_mask_netcdf(
ds, os.path.join("data", self.mask_filepath)
)

# Write the cropped data to a new NetCDF file
ds.to_netcdf(output_filename, engine="h5netcdf")


def apply_lake_mask_netcdf(ds: xr.Dataset, mask_filepath: str) -> xr.Dataset:
"""Also sends the data to 0 (open water), 1 (sea ice) and 2 (land)."""
with xr.open_dataset(mask_filepath, engine="h5netcdf") as mask_ds:
land_lake_mask = mask_ds.IMS_Surface_Values[0].values.copy()

unmasked_sie = ds.IMS_Surface_Values[0].values.copy()

masked_sie = unmasked_sie.copy()
masked_sie[unmasked_sie != 3] = 0

# Sea and Lake Ice is treated as 1
masked_sie[unmasked_sie == 3] = 1

# Land and Snow-Covered Land is sent to 2.
masked_sie[unmasked_sie == 2] = 2
masked_sie[unmasked_sie == 4] = 2

# Apply lake mask
land_lake_mask[np.isnan(land_lake_mask)] = 0
masked_sie[land_lake_mask.astype(bool)] = 2

masked_sie = masked_sie.reshape((1, masked_sie.shape[0], masked_sie.shape[1]))
data_xr = xr.DataArray(
masked_sie,
coords={"y": ds.y, "x": ds.x, "time": ds.time},
dims=["time", "y", "x"],
)
assert np.sum(np.isnan(data_xr)) == 0

ds["IMS_Surface_Values"].loc[:, :] = data_xr
assert np.allclose(np.unique(ds.IMS_Surface_Values.values), np.array([0, 1, 2]))
assert np.allclose(
np.where(np.isnan(ds.IMS_Surface_Values.values[0])),
(np.array([], dtype=np.int64), np.array([], dtype=np.int64)),
)

return ds


if __name__ == "__main__":
os.environ["LUIGI_CONFIG_PARSER"] = "toml"

config_path = os.path.join("config", "lake_mask.toml")

config = luigi.configuration.get_config(parser="toml")
config.read(config_path)

luigi.configuration.add_config_path(config_path)

## Change acording to your number of cores
n_workers = 10
years = range(2015, 2025)

tasks = [ApplyLakeMaskNetCDF(year=year) for year in years]
luigi.build(tasks, workers=n_workers, local_scheduler=True)
2 changes: 1 addition & 1 deletion icedyno/preprocess/crop.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ def rotate_netcdf(ds: xr.Dataset) -> xr.Dataset:
luigi.configuration.add_config_path(config_path)

## Change acording to your number of cores
n_workers = 6
n_workers = 2
years = range(2015, 2025)

tasks = [CropRotateNetCDF(year=year) for year in years]
Expand Down
Empty file added icedyno/training/__init__.py
Empty file.
Loading
Loading