diff --git a/data_transformation_plugins/ecco_darwin_transformation.py b/data_transformation_plugins/ecco_darwin_transformation.py index b31c2c13..93be21cd 100644 --- a/data_transformation_plugins/ecco_darwin_transformation.py +++ b/data_transformation_plugins/ecco_darwin_transformation.py @@ -1,6 +1,8 @@ -import xarray import re +import xarray + + def ecco_darwin_transformation(file_obj, name, nodata): """Tranformation function for the ecco darwin dataset @@ -15,9 +17,9 @@ def ecco_darwin_transformation(file_obj, name, nodata): var_data_netcdf = {} xds = xarray.open_dataset(file_obj) xds = xds.rename({"y": "latitude", "x": "longitude"}) - xds = xds.assign_coords(longitude=((xds.longitude / 1440) * 360) - 180).sortby( - "longitude" - ) + xds = xds.assign_coords( + longitude=((xds.longitude / 1440) * 360) - 180 + ).sortby("longitude") xds = xds.assign_coords(latitude=((xds.latitude / 721) * 180) - 90).sortby( "latitude" ) @@ -43,5 +45,5 @@ def ecco_darwin_transformation(file_obj, name, nodata): # # add extension cog_filename = f"{cog_filename}.tif" var_data_netcdf[cog_filename] = data - - return var_data_netcdf \ No newline at end of file + + return var_data_netcdf diff --git a/data_transformation_plugins/geos_oco2_transformation.py b/data_transformation_plugins/geos_oco2_transformation.py index 4165c91e..61b9702a 100644 --- a/data_transformation_plugins/geos_oco2_transformation.py +++ b/data_transformation_plugins/geos_oco2_transformation.py @@ -1,6 +1,8 @@ -import xarray import re +import xarray + + def geos_oco2_transformation(file_obj, name, nodata): """Tranformation function for the oco2 geos dataset @@ -35,4 +37,4 @@ def geos_oco2_transformation(file_obj, name, nodata): cog_filename = f"{cog_filename}.tif" var_data_netcdf[cog_filename] = data - return var_data_netcdf \ No newline at end of file + return var_data_netcdf diff --git a/data_transformation_plugins/gosat_ch4_transformation.py b/data_transformation_plugins/gosat_ch4_transformation.py index 7a88b85f..ea5552ec 100644 --- a/data_transformation_plugins/gosat_ch4_transformation.py +++ b/data_transformation_plugins/gosat_ch4_transformation.py @@ -1,6 +1,8 @@ -import xarray import re +import xarray + + def gosat_ch4_transformation(file_obj, name, nodata): """Tranformation function for the ecco darwin dataset @@ -33,5 +35,5 @@ def gosat_ch4_transformation(file_obj, name, nodata): data.rio.set_spatial_dims("lon", "lat") data.rio.write_crs("epsg:4326", inplace=True) var_data_netcdf[cog_filename] = data - - return var_data_netcdf \ No newline at end of file + + return var_data_netcdf diff --git a/data_transformation_plugins/gpw_transformation.py b/data_transformation_plugins/gpw_transformation.py index fde5d1f7..fb1e99d8 100644 --- a/data_transformation_plugins/gpw_transformation.py +++ b/data_transformation_plugins/gpw_transformation.py @@ -1,6 +1,8 @@ -import xarray import re +import xarray + + def gpw_transformation(file_obj, name, nodata): """Tranformation function for the gridded population dataset @@ -30,5 +32,5 @@ def gpw_transformation(file_obj, name, nodata): # # add extension cog_filename = f"{cog_filename}.tif" var_data_netcdf[cog_filename] = xds - - return var_data_netcdf \ No newline at end of file + + return var_data_netcdf diff --git a/data_transformation_plugins/push_to_s3.py b/data_transformation_plugins/push_to_s3.py index 7f941bc9..03cdfaa6 100644 --- a/data_transformation_plugins/push_to_s3.py +++ b/data_transformation_plugins/push_to_s3.py @@ -1,8 +1,7 @@ -import boto3 import os import boto3 -import os + def upload_files_to_s3(folder_path, bucket_name, s3_folder, exclude_files): """ @@ -16,23 +15,23 @@ def upload_files_to_s3(folder_path, bucket_name, s3_folder, exclude_files): - exclude_files (list): List of files to exclude from uploading. """ # Initialize S3 client - s3 = boto3.client('s3') + s3 = boto3.client("s3") # Loop through files in the local folder for file_name in os.listdir(folder_path): file_path = os.path.join(folder_path, file_name) - + # Check if it's a file and not in the exclude list if os.path.isfile(file_path) and file_name not in exclude_files: s3_key = os.path.join(s3_folder, file_name) - + try: # Check if the file already exists in S3 s3.head_object(Bucket=bucket_name, Key=s3_key) print(f"Skipped {file_name} (already exists in S3)") except s3.exceptions.ClientError as e: # If the file does not exist, upload it - if e.response['Error']['Code'] == '404': + if e.response["Error"]["Code"] == "404": try: s3.upload_file(file_path, bucket_name, s3_key) print(f"Uploaded {file_name} to {s3_key}") @@ -41,7 +40,18 @@ def upload_files_to_s3(folder_path, bucket_name, s3_folder, exclude_files): else: print(f"Error checking existence of {file_name}: {e}") + # Example usage: # upload_folder_to_s3("path/to/local/folder", "my-s3-bucket", "my/s3/folder", ["exclude1.ext", "exclude2.ext"]) if __name__ == "__main__": - upload_files_to_s3("data_transformation_plugins", "ghgc-data-store-develop", "data_transformation_plugins", ["__init__.py", "push_to_s3.py", "README.md", "sample_transformation.ipynb"]) \ No newline at end of file + upload_files_to_s3( + "data_transformation_plugins", + "ghgc-data-store-develop", + "data_transformation_plugins", + [ + "__init__.py", + "push_to_s3.py", + "README.md", + "sample_transformation.ipynb", + ], + ) diff --git a/data_transformation_plugins/tm5_4dvar_update_noaa_transformation.py b/data_transformation_plugins/tm5_4dvar_update_noaa_transformation.py index 1fd491b3..a7f3a8ac 100644 --- a/data_transformation_plugins/tm5_4dvar_update_noaa_transformation.py +++ b/data_transformation_plugins/tm5_4dvar_update_noaa_transformation.py @@ -1,6 +1,8 @@ -import xarray -from datetime import datetime import re +from datetime import datetime + +import xarray + def tm5_4dvar_update_noaa_transformation(file_obj, name, nodata): """Tranformation function for the tm5 ch4 influx dataset @@ -43,4 +45,4 @@ def tm5_4dvar_update_noaa_transformation(file_obj, name, nodata): cog_filename = f"{cog_filename}.tif" var_data_netcdf[cog_filename] = data - return var_data_netcdf \ No newline at end of file + return var_data_netcdf