diff --git a/workspaces/brendon/HDF5_EDA.ipynb b/workspaces/brendon/HDF5_EDA.ipynb index 08378ff..9b9ec1c 100644 --- a/workspaces/brendon/HDF5_EDA.ipynb +++ b/workspaces/brendon/HDF5_EDA.ipynb @@ -61,28 +61,32 @@ "outputs": [], "source": [ "def convertHDF2NPY(root_dir, save_dir, sie_index_name):\n", - " pos_min = -0.25e+07\n", - " pos_max = 0.25e+07\n", + " pos_min = -0.25e07\n", + " pos_max = 0.25e07\n", " output_list = []\n", " for yr in years:\n", - " print(f'Processing {yr}')\n", + " print(f\"Processing {yr}\")\n", " hdf_dir = os.path.join(root_dir, str(yr))\n", " npy_dir = os.path.join(save_dir, str(yr))\n", " os.makedirs(npy_dir, exist_ok=True)\n", - " \n", - " for hdf5_file in glob.glob(os.path.join(hdf_dir, \"*4km*.nc\")): \n", + "\n", + " for hdf5_file in glob.glob(os.path.join(hdf_dir, \"*4km*.nc\")):\n", " # Open dataset\n", " ds = xr.open_dataset(hdf5_file)\n", "\n", " # Get date\n", " dt64 = ds[\"time\"][0].values\n", - " formatted_date = np.datetime_as_string(dt64, unit='D')\n", + " formatted_date = np.datetime_as_string(dt64, unit=\"D\")\n", "\n", " # Get values\n", " if sie_index_name == \"sea_ice_extent\":\n", " sie = np.array(ds.sel(x=slice(pos_min, pos_max)).sea_ice_extent)\n", " else:\n", - " sie = np.array(ds.sel(x=slice(pos_min, pos_max), y=slice(pos_min, pos_max)).IMS_Surface_Values)\n", + " sie = np.array(\n", + " ds.sel(\n", + " x=slice(pos_min, pos_max), y=slice(pos_min, pos_max)\n", + " ).IMS_Surface_Values\n", + " )\n", "\n", " # Close dataset\n", " ds.close()\n", @@ -99,15 +103,17 @@ " output_list.append([formatted_date, sea_ice_counts, non_sea_ice_counts])\n", "\n", " npy_file_name = os.path.join(npy_dir, formatted_date)\n", - "# np.save(npy_file_name, sie)\n", + " # np.save(npy_file_name, sie)\n", "\n", - " df = pd.DataFrame(data=output_list, columns=[\"Date\", \"Sea-ice Count\", \"Non-sea-ice Count\"])\n", - " df['Date'] = pd.to_datetime(df['Date'])\n", - " df['Year']=df['Date'].dt.year\n", + " df = pd.DataFrame(\n", + " data=output_list, columns=[\"Date\", \"Sea-ice Count\", \"Non-sea-ice Count\"]\n", + " )\n", + " df[\"Date\"] = pd.to_datetime(df[\"Date\"])\n", + " df[\"Year\"] = df[\"Date\"].dt.year\n", " display(df)\n", "\n", - " df.to_csv(f'{root_dir}/sea_ice_counts.csv', index=False)\n", - " \n", + " df.to_csv(f\"{root_dir}/sea_ice_counts.csv\", index=False)\n", + "\n", " return df" ] }, @@ -288,23 +294,28 @@ "outputs": [], "source": [ "def plot_yearly_sie(df):\n", - " fig, axs = plt.subplots(round(len(years)/2), 2, figsize=(16, 24))\n", + " fig, axs = plt.subplots(round(len(years) / 2), 2, figsize=(16, 24))\n", " axs = axs.flatten()\n", "\n", - " for i, (year, group) in enumerate(df.groupby('Year')):\n", - " axs[i].plot(group['Date'], np.log(group['Sea-ice Count']), label=f'Sea-ice Count {year}', marker='o')\n", - " axs[i].set_title(f'Sea-ice Counts for {year}', fontsize=18)\n", - " axs[i].set_xlabel('Date', fontsize=16)\n", - " axs[i].set_ylabel('Sea-ice Count (Log)', fontsize=16)\n", - " axs[i].grid(axis='y')\n", + " for i, (year, group) in enumerate(df.groupby(\"Year\")):\n", + " axs[i].plot(\n", + " group[\"Date\"],\n", + " np.log(group[\"Sea-ice Count\"]),\n", + " label=f\"Sea-ice Count {year}\",\n", + " marker=\"o\",\n", + " )\n", + " axs[i].set_title(f\"Sea-ice Counts for {year}\", fontsize=18)\n", + " axs[i].set_xlabel(\"Date\", fontsize=16)\n", + " axs[i].set_ylabel(\"Sea-ice Count (Log)\", fontsize=16)\n", + " axs[i].grid(axis=\"y\")\n", "\n", " axs[i].xaxis.set_major_formatter(DateFormatter(\"%m-%d\"))\n", - " axs[i].tick_params(axis='x', rotation=45, labelsize=12)\n", - " axs[i].tick_params(axis='y', labelsize=12)\n", + " axs[i].tick_params(axis=\"x\", rotation=45, labelsize=12)\n", + " axs[i].tick_params(axis=\"y\", labelsize=12)\n", "\n", " axs[-1].remove()\n", " plt.tight_layout()\n", - " plt.suptitle('Yearly Sea-Ice Coverage 2014-Current', fontsize=22, y=1.025)\n", + " plt.suptitle(\"Yearly Sea-Ice Coverage 2014-Current\", fontsize=22, y=1.025)\n", " plt.show()" ] }, @@ -343,13 +354,13 @@ " year_counts = {}\n", " for yr in years:\n", " if yr not in [2014, 2024]:\n", - " year_counts[yr] = np.log(np.sum(df[df['Year']==yr]['Sea-ice Count']))\n", + " year_counts[yr] = np.log(np.sum(df[df[\"Year\"] == yr][\"Sea-ice Count\"]))\n", "\n", " plt.figure(figsize=(10, 6))\n", - " plt.plot(list(year_counts.keys()), list(year_counts.values()), marker='o')\n", - " plt.xlabel('Year')\n", - " plt.ylabel('Yearly Sea-ice Count Log Sum')\n", - " plt.title('Yearly Log Sum of Sea-ice Count')\n", + " plt.plot(list(year_counts.keys()), list(year_counts.values()), marker=\"o\")\n", + " plt.xlabel(\"Year\")\n", + " plt.ylabel(\"Yearly Sea-ice Count Log Sum\")\n", + " plt.title(\"Yearly Log Sum of Sea-ice Count\")\n", " plt.grid(True)\n", " plt.show()" ] @@ -547,7 +558,9 @@ ], "source": [ "%%time\n", - "df = convertHDF2NPY(\"D:/IceDyno/IMS_images\", \"D:/IceDyno/IMS_images_npy\", \"IMS_Surface_Values\")" + "df = convertHDF2NPY(\n", + " \"D:/IceDyno/IMS_images\", \"D:/IceDyno/IMS_images_npy\", \"IMS_Surface_Values\"\n", + ")" ] }, { diff --git a/workspaces/brendon/HDF5_Xarray.ipynb b/workspaces/brendon/HDF5_Xarray.ipynb index 509232e..889657a 100644 --- a/workspaces/brendon/HDF5_Xarray.ipynb +++ b/workspaces/brendon/HDF5_Xarray.ipynb @@ -444,7 +444,7 @@ } ], "source": [ - "file_path = 'D:/IceDyno/IMS_images/2017/ims2017001_4km_v1.3.nc'\n", + "file_path = \"D:/IceDyno/IMS_images/2017/ims2017001_4km_v1.3.nc\"\n", "ds = xr.open_dataset(file_path)\n", "da = ds[\"IMS_Surface_Values\"]\n", "da" @@ -1165,7 +1165,7 @@ } ], "source": [ - "ds.sel(x=slice(-.25e+07, 0.25e+07), y=slice(-.25e+07, 0.25e+07)).IMS_Surface_Values.plot()" + "ds.sel(x=slice(-0.25e07, 0.25e07), y=slice(-0.25e07, 0.25e07)).IMS_Surface_Values.plot()" ] }, { @@ -1216,7 +1216,9 @@ } ], "source": [ - "np_array = np.array(ds.sel(x=slice(-.25e+07, 0.25e+07), y=slice(-.25e+07, 0.25e+07)).IMS_Surface_Values)\n", + "np_array = np.array(\n", + " ds.sel(x=slice(-0.25e07, 0.25e07), y=slice(-0.25e07, 0.25e07)).IMS_Surface_Values\n", + ")\n", "np_array" ] }, @@ -1268,7 +1270,7 @@ } ], "source": [ - "file_path = 'D:/IceDyno/netcdf/2017/masie_all_r00_v01_2017001_4km.nc'\n", + "file_path = \"D:/IceDyno/netcdf/2017/masie_all_r00_v01_2017001_4km.nc\"\n", "ds = xr.open_dataset(file_path)\n", "ds.info" ] @@ -1304,7 +1306,6 @@ "outputs": [], "source": [ "def plot_greenland_array(sie_array, product):\n", - " \n", " sie_array = np.rot90(sie_array, 2)\n", " array_2D = sie_array[0, :, :]\n", "\n", @@ -1325,16 +1326,16 @@ "\n", " # Select the subset of the data array based on both longitude and latitude bounds\n", " subset_data_array = array_2D[lat_indices][:, lon_indices]\n", - " \n", + "\n", " # Print the shape of the subset\n", - "# print(subset_data_array.shape)\n", + " # print(subset_data_array.shape)\n", "\n", " # Set all all non-3 values to 0\n", " subset_data_array[subset_data_array != 3] = 0\n", "\n", " # Set all 3 values to 1\n", " subset_data_array[subset_data_array == 3] = 1\n", - " \n", + "\n", " colors = [\"#000000\", \"#FFFFFF\"]\n", " cmap = ListedColormap(colors, name=\"custom_colormap\", N=len(colors))\n", " plt.imshow(subset_data_array, cmap=cmap, vmin=0, vmax=len(colors) - 1)\n", @@ -1388,9 +1389,9 @@ } ], "source": [ - "file_path = 'D:/IceDyno/netcdf/2017/masie_all_r00_v01_2017001_4km.nc'\n", + "file_path = \"D:/IceDyno/netcdf/2017/masie_all_r00_v01_2017001_4km.nc\"\n", "ds = xr.open_dataset(file_path)\n", - "plot_greenland_array(ds.sea_ice_extent, 'MASIE')" + "plot_greenland_array(ds.sea_ice_extent, \"MASIE\")" ] }, { @@ -1419,9 +1420,9 @@ } ], "source": [ - "file_path = 'D:/IceDyno/IMS_images/2017/ims2017001_4km_v1.3.nc'\n", + "file_path = \"D:/IceDyno/IMS_images/2017/ims2017001_4km_v1.3.nc\"\n", "ds = xr.open_dataset(file_path)\n", - "plot_greenland_array(ds.IMS_Surface_Values, 'IMS')" + "plot_greenland_array(ds.IMS_Surface_Values, \"IMS\")" ] } ], diff --git a/workspaces/brendon/preprocess_IMS.ipynb b/workspaces/brendon/preprocess_IMS.ipynb index eada9ae..e770b7b 100644 --- a/workspaces/brendon/preprocess_IMS.ipynb +++ b/workspaces/brendon/preprocess_IMS.ipynb @@ -603,7 +603,6 @@ " hdf_dir = f\"D:/IceDyno/IMS Images/{yr}\"\n", " for i, hdf_file in enumerate(glob.glob(f\"{hdf_dir}/*.nc\")):\n", " with h5py.File(hdf_file, \"r\") as file:\n", - "\n", " # Time stamp\n", " dataset = file[\"time\"]\n", " timestamp = dataset[0]\n",