Skip to content

Commit

Permalink
reformatting files
Browse files Browse the repository at this point in the history
  • Loading branch information
vishal committed Nov 19, 2024
1 parent d94aed2 commit e6a6ee9
Show file tree
Hide file tree
Showing 6 changed files with 44 additions and 24 deletions.
14 changes: 8 additions & 6 deletions data_transformation_plugins/ecco_darwin_transformation.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import xarray
import re

import xarray


def ecco_darwin_transformation(file_obj, name, nodata):
"""Tranformation function for the ecco darwin dataset
Expand All @@ -15,9 +17,9 @@ def ecco_darwin_transformation(file_obj, name, nodata):
var_data_netcdf = {}
xds = xarray.open_dataset(file_obj)
xds = xds.rename({"y": "latitude", "x": "longitude"})
xds = xds.assign_coords(longitude=((xds.longitude / 1440) * 360) - 180).sortby(
"longitude"
)
xds = xds.assign_coords(
longitude=((xds.longitude / 1440) * 360) - 180
).sortby("longitude")
xds = xds.assign_coords(latitude=((xds.latitude / 721) * 180) - 90).sortby(
"latitude"
)
Expand All @@ -43,5 +45,5 @@ def ecco_darwin_transformation(file_obj, name, nodata):
# # add extension
cog_filename = f"{cog_filename}.tif"
var_data_netcdf[cog_filename] = data
return var_data_netcdf

return var_data_netcdf
6 changes: 4 additions & 2 deletions data_transformation_plugins/geos_oco2_transformation.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import xarray
import re

import xarray


def geos_oco2_transformation(file_obj, name, nodata):
"""Tranformation function for the oco2 geos dataset
Expand Down Expand Up @@ -35,4 +37,4 @@ def geos_oco2_transformation(file_obj, name, nodata):
cog_filename = f"{cog_filename}.tif"
var_data_netcdf[cog_filename] = data

return var_data_netcdf
return var_data_netcdf
8 changes: 5 additions & 3 deletions data_transformation_plugins/gosat_ch4_transformation.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import xarray
import re

import xarray


def gosat_ch4_transformation(file_obj, name, nodata):
"""Tranformation function for the ecco darwin dataset
Expand Down Expand Up @@ -33,5 +35,5 @@ def gosat_ch4_transformation(file_obj, name, nodata):
data.rio.set_spatial_dims("lon", "lat")
data.rio.write_crs("epsg:4326", inplace=True)
var_data_netcdf[cog_filename] = data
return var_data_netcdf

return var_data_netcdf
8 changes: 5 additions & 3 deletions data_transformation_plugins/gpw_transformation.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import xarray
import re

import xarray


def gpw_transformation(file_obj, name, nodata):
"""Tranformation function for the gridded population dataset
Expand Down Expand Up @@ -30,5 +32,5 @@ def gpw_transformation(file_obj, name, nodata):
# # add extension
cog_filename = f"{cog_filename}.tif"
var_data_netcdf[cog_filename] = xds
return var_data_netcdf

return var_data_netcdf
24 changes: 17 additions & 7 deletions data_transformation_plugins/push_to_s3.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
import boto3
import os

import boto3
import os


def upload_files_to_s3(folder_path, bucket_name, s3_folder, exclude_files):
"""
Expand All @@ -16,23 +15,23 @@ def upload_files_to_s3(folder_path, bucket_name, s3_folder, exclude_files):
- exclude_files (list): List of files to exclude from uploading.
"""
# Initialize S3 client
s3 = boto3.client('s3')
s3 = boto3.client("s3")

# Loop through files in the local folder
for file_name in os.listdir(folder_path):
file_path = os.path.join(folder_path, file_name)

# Check if it's a file and not in the exclude list
if os.path.isfile(file_path) and file_name not in exclude_files:
s3_key = os.path.join(s3_folder, file_name)

try:
# Check if the file already exists in S3
s3.head_object(Bucket=bucket_name, Key=s3_key)
print(f"Skipped {file_name} (already exists in S3)")
except s3.exceptions.ClientError as e:
# If the file does not exist, upload it
if e.response['Error']['Code'] == '404':
if e.response["Error"]["Code"] == "404":
try:
s3.upload_file(file_path, bucket_name, s3_key)
print(f"Uploaded {file_name} to {s3_key}")
Expand All @@ -41,7 +40,18 @@ def upload_files_to_s3(folder_path, bucket_name, s3_folder, exclude_files):
else:
print(f"Error checking existence of {file_name}: {e}")


# Example usage:
# upload_folder_to_s3("path/to/local/folder", "my-s3-bucket", "my/s3/folder", ["exclude1.ext", "exclude2.ext"])
if __name__ == "__main__":
upload_files_to_s3("data_transformation_plugins", "ghgc-data-store-develop", "data_transformation_plugins", ["__init__.py", "push_to_s3.py", "README.md", "sample_transformation.ipynb"])
upload_files_to_s3(
"data_transformation_plugins",
"ghgc-data-store-develop",
"data_transformation_plugins",
[
"__init__.py",
"push_to_s3.py",
"README.md",
"sample_transformation.ipynb",
],
)
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import xarray
from datetime import datetime
import re
from datetime import datetime

import xarray


def tm5_4dvar_update_noaa_transformation(file_obj, name, nodata):
"""Tranformation function for the tm5 ch4 influx dataset
Expand Down Expand Up @@ -43,4 +45,4 @@ def tm5_4dvar_update_noaa_transformation(file_obj, name, nodata):
cog_filename = f"{cog_filename}.tif"
var_data_netcdf[cog_filename] = data

return var_data_netcdf
return var_data_netcdf

0 comments on commit e6a6ee9

Please sign in to comment.