From cee55748b3b97369c497d6eebfaaf1510d13ff9c Mon Sep 17 00:00:00 2001 From: vbrancat Date: Mon, 4 Apr 2022 10:27:54 -0700 Subject: [PATCH 1/8] Stitching geocoded burst for stack processing --- src/compass/utils/stitching/stitch_burst.py | 399 ++++++++++++++++++++ 1 file changed, 399 insertions(+) create mode 100644 src/compass/utils/stitching/stitch_burst.py diff --git a/src/compass/utils/stitching/stitch_burst.py b/src/compass/utils/stitching/stitch_burst.py new file mode 100644 index 00000000..8bf0585f --- /dev/null +++ b/src/compass/utils/stitching/stitch_burst.py @@ -0,0 +1,399 @@ +import argparse +import glob +import json +import os +import time + +import isce3 +import journal +import pandas as pd +import shapely.wkt +from compass.utils import helpers +from osgeo import gdal, ogr +from shapely.geometry import Polygon + + +def command_line_parser(): + """ + Command line parser + """ + + parser = argparse.ArgumentParser(description=""" + Stitch S1-A/B bursts for stack processing""", + formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser.add_argument('-d', '--indir', type=str, action='store', dest='indir', + help='Directory with S1-A/B bursts organized by dates') + parser.add_argument('-b', '--burst-list', type=str, nargs='+', + default=None, dest='burst_list', + help='List of burst IDs to stitch. If None, common bursts' + 'among all dates will be stitched (default: None') + parser.add_argument('-m', '--margin', type=float, + default=100, dest='margin', + help='Margin to apply during stitching. Same units as bursts coordinate system.' + '(default: 100 m, UTM)') + parser.add_argument('-s', '--scratchdir', type=str, default='scratch', + dest='scratch', + help='Directory where to store temporary results (default: scratch)') + parser.add_argument('-o', '--outdir', type=str, default='outdir', + dest='outdir', + help='Directory path where to store stitched bursts (default: outdir)') + return parser.parse_args() + + +def main(indir, outdir, scratchdir, margin, burst_list): + ''' + Stitch S1-A/B bursts for stack processing + + Parameters: + ---------- + indir: str + File path to directory containing S1-A/B bursts + organized by date + outdir: str + File path to directory where to store stitched bursts + scratchdir: str + File path to directory where to store intermediate + results (e.g., shapefiles of burst boundary) + margin: float + Margin to apply to burst boundary while stitching. + Same units as bursts coordinate system + burst_list: list [str] + List of burst IDs to stitch. If not provided, common + bursts among all dates are identified and considered + for stitching + ''' + info_channel = journal.info('stitch_burst.main') + error_channel = journal.error('stitch_burst.main') + + # Start tracking time + info_channel.log('Start burst stitching') + t_start = time.time() + + # Check that input directory is valid + if not os.path.isdir(indir): + err_str = f'{indir} is not a valid input directory' + error_channel.log(err_str) + raise ValueError(err_str) + + # Create output and scratch dirs if not existing + if not os.path.exists(outdir): + os.makedirs(outdir, exist_ok=True) + helpers.check_write_dir(outdir) + if not os.path.exists(scratchdir): + os.makedirs(scratchdir, exist_ok=True) + helpers.check_write_dir(scratchdir) + + # Collect info for stitching in all dirs in 'indir' + # and return a panda dataframe with info + data_dict = get_stitching_dict(indir) + + # If stitching some bursts, prune dataframe to + # contains only the burst IDs to stitch + if burst_list is not None: + data_dict = prune_dataframe(data_dict, + 'burst_id', burst_list) + + # Identify common burst IDs among different dates + ids2stitch = get_common_burst_ids(data_dict) + + # Prune dataframe to contain only the IDs to stitch + data_dict = prune_dataframe(data_dict, + 'burst_id', ids2stitch) + + # Track cut bursts by adding new column to dataframe + data_dict["cut_granule_id"] = "" + + # For each burst ID, get common bursts boundary and store it + # as a shapefile to be used by gdalwarp (later for cutting) + for burst_id in list(set(data_dict['burst_id'])): + # Get info on polygons, epsg, granule + polys = data_dict.polygon[data_dict.burst_id == burst_id].tolist() + epsgs = data_dict.epsg[data_dict.burst_id == burst_id].tolist() + granules = data_dict.granule_id[data_dict.burst_id == burst_id].tolist() + + # Get common burst boundary and save it as shapefile + common_poly, epsg = intersect_polygons(polys, epsgs, margin) + shp_filename = f'{scratchdir}/shp_{burst_id}.shp' + save_as_shapefile(common_poly, epsg, shp_filename) + + # Cut all the same ID burts with shapefile + for granule in granules: + # Get raster resolution + xres, yres, epsg = get_raster_info(granule) + + filename = os.path.splitext(os.path.basename(granule))[0] + cut_filename = f'{scratchdir}/cut_{filename}' + opts = gdal.WarpOptions(format='ENVI', xRes=xres, + yRes=yres, dstNodata='nan', + cutlineDSName=shp_filename, + multithread=True, warpMemoryLimit=3000, + srcSRS=f'EPSG:{epsg}', + dstSRS=f'EPSG:{epsg}', + targetAlignedPixels=True) + gdal.Warp(cut_filename, granule, options=opts) + # Save location of cut burst IDs (later for stitching) + data_dict.loc[data_dict['granule_id'] == granule, + 'cut_granule_id'] = cut_filename + + # Start stitching by date + unique_dates = list(set(data_dict['date'])) + for date in unique_dates: + cut_rasters = data_dict.cut_granule_id[data_dict.date == date].tolist() + xres, yres, epsg = get_raster_info(cut_rasters[0]) + outfilename = f'{outdir}/stitched_{date}' + opts = gdal.WarpOptions(format='ENVI', xRes=xres, + yRes=yres, dstNodata='nan', + multithread=True, warpMemoryLimit=3000, + srcSRS=f'EPSG:{epsg}', + dstSRS=f'EPSG:{epsg}', + targetAlignedPixels=True) + gdal.Warp(outfilename, cut_rasters, options=opts) + + # Save data dictionary to keep trace of what has been merged + data_dict.to_csv(f'{outdir}/merged_report.csv') + + # Log elapsed time for stitching + dt = time.time() - t_start + info_channel.log(f'Successfully run stitching in {dt:.3f} seconds') + + +def get_raster_info(filename): + ''' + Get raster X and Y resolution and epsg + + Parameters: + ----------- + filename: str + Filepath where raster is stored + + Returns: + ------- + xres, yres, epsg: (tuple, float) + Raster resolution along X and Y directions and epsg + ''' + raster = isce3.io.Raster(filename) + return raster.dx, raster.dy, raster.get_epsg() + + +def save_as_shapefile(polygon, epsg, outfile): + ''' + Save polygon as an ESRI shapefile + + Parameters: + ---------- + polygon: shapely.geometry.Polygon + Shapely polygon identify burst boundary on the ground + epsg: int + EPSG code associate to 'polygon' coordinate system + outfile: str + File path to store create ESRI shapefile + + ''' + dest_srs = ogr.osr.SpatialReference() + dest_srs.ImportFromEPSG(int(epsg)) + driver = ogr.GetDriverByName('ESRI Shapefile') + ds = driver.CreateDataSource(outfile) + layer = ds.CreateLayer('', None, ogr.wkbPolygon) + + # Add attribute and create new feature + layer.CreateField(ogr.FieldDefn('id', ogr.OFTInteger)) + defn = layer.GetLayerDefn() + feat = ogr.Feature(defn) + feat.SetField('id', 123) + + # Make a geometry, from Shapely object + geom = ogr.CreateGeometryFromWkb(polygon.wkb) + geom.AssignSpatialReference(dest_srs) + feat.SetGeometry(geom) + layer.CreateFeature(feat) + + # Clean up + feat = geom = None + ds = layer = feat = geom = None + + +def intersect_polygons(polygons, epsgs, margin): + ''' + Get the intersection of polygons stored in 'polygons' + + Parameters: + ----------- + polygons: list + List of shapely polygons to intersect + epsgs: list + List of EPSGs associate to 'polygons' + + Returns: + ------- + poly_int: shapely.Polygon + Result of polygon intersection + epsg_int: int + EPSG code associated to poly + ''' + # Assert validity of inputs + assert (len(polygons) == len(epsgs)) + + # Initialize polygon and epsg of intersection + poly_int = polygons[0] + epsg_int = epsgs[0] + + # Initialize coordinate transformation in case + # there are polygons with different epsgs + llh = ogr.osr.SpatialReference() + llh.ImportFromEPSG(epsg_int) + tgt = ogr.osr.SpatialReference() + + for poly, epsg in zip(polygons, epsgs): + if epsg != epsg_int: + # Transform polygons in same coord system as epsg_int + tgt_x, tgt_y = [], [] + x, y = poly.exterior.coords.xy + tgt.ImportFromEPSG(epsg) + trans = ogr.osr.CoordinateTransformation(llh, tgt) + for lx, ly in zip(x, y): + dummy_x, dummy_y, dummy_z = trans.Transform(lx, ly, 0) + tgt_x.append(dummy_x) + tgt_y.append(dummy_y) + poly = Polygon(list(zip(tgt_x, tgt_y))) + poly_int = poly.intersection(poly_int) + + # To be conservative, apply some margin to the polygon (TO DO: + # check eps) + poly_int = poly_int.buffer(-margin) + return poly_int, epsg_int + + +def get_stitching_dict(indir): + ''' + Collect info on bursts to stitch and store them + in a panda dataframe + + Parameters: + ---------- + indir: str + Directory where bursts are stored (organized by date) + + Returns: + ------- + cfg: pandas.DataFrame + Dataframe collecting info on bursts to stitch + ''' + # Create dictionary where to store results + cfg = {'burst_id': [], 'granule_id': [], 'polygon': [], + 'date': [], 'epsg': []} + # Get list of directory under dir_list + dir_list = os.listdir(indir) + for dir in dir_list: + # List metadata files in the directory + meta_list = sorted(glob.glob(f'{indir}/{dir}/*json')) + for path in meta_list: + # Read metadata file + metadata = read_metadata(path) + # Read info and store in dictionary + cfg['burst_id'].append(get_metadata(metadata, 'burst_id')) + filename = get_metadata(metadata, 'granule_id') + cfg['granule_id'].append(f'{indir}/{dir}/{filename}') + poly = get_metadata(metadata, 'polygon') + cfg['polygon'].append(shapely.wkt.loads(poly)) + cfg['date'].append(get_metadata(metadata, 'date')) + cfg['epsg'].append(get_metadata(metadata, 'epsg')) + + return pd.DataFrame(data=cfg) + + +def read_metadata(meta_file): + '''Read metadata file in a dictionary + + Parameters: + ----------- + meta_file: str + Filepath where metadata is located + + Returns: + ------- + cfg: dict + Dictionary containing metadata + ''' + with open(meta_file) as json_file: + metadata = json.load(json_file) + return metadata + + +def get_metadata(metadata, field): + ''' + Get 'field" value from metadata dictionary + + Parameters: + ----------- + metadata: dict + Dictionary containing metadata for a burst + field: str + Field in the metadata to extract value for + + Returns: + ------- + value: float, int, shapely.Polygon + Value stored in the metadata field (type + depends on type of metadata extracted) + ''' + value = metadata[field] + return value + + +def prune_dataframe(data, id_col, id_list): + ''' + Prune dataframe based on column ID and list of value + + Parameters: + ---------- + data: pandas.DataFrame + dataframe that needs to be pruned + id_col: str + column identification for 'data' (e.g. 'burst_id') + id_list: list + List of elements to keep after pruning. Elements not + in id_list but contained in 'data' will be pruned + + Returns: + ------- + data: pandas.DataFrame + Pruned dataframe with rows in 'id_list' + ''' + pattern = '|'.join(id_list) + dataf = data.loc[data[id_col].str.contains(pattern, + case=False)] + return dataf + + +def get_common_burst_ids(data): + ''' + Get list of burst IDs common among all processed dates + + Parameters: + ---------- + data: pandas.DataFrame + Dataframe containing info for stitching (e.g. burst IDs) + + Returns: + ------- + common_id: list + List containing common burst IDs among all the dates + ''' + # Identify all the dates for the bursts to stitch + unique_dates = list(set(data['date'])) + + # Initialize list of unique burst IDs + common_id = data.burst_id[data.date == unique_dates[0]] + + for date in unique_dates: + ids = data.burst_id[data.date == date] + common_id = sorted(list(set(ids.tolist()) & set(common_id))) + return common_id + + +if __name__ == '__main__': + # Get command line arguments + opts = command_line_parser() + # Give these arguments to the main code + main(opts.indir, opts.outdir, + opts.scratch, opts.margin, opts.burst_list) From 6d5f80247a0baf1b146673471543cb34dc7ac6e2 Mon Sep 17 00:00:00 2001 From: vbrancat Date: Mon, 16 May 2022 10:51:09 -0700 Subject: [PATCH 2/8] Address compatibility with current version of main --- src/compass/utils/stitching/stitch_burst.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/src/compass/utils/stitching/stitch_burst.py b/src/compass/utils/stitching/stitch_burst.py index 8bf0585f..57144cf3 100644 --- a/src/compass/utils/stitching/stitch_burst.py +++ b/src/compass/utils/stitching/stitch_burst.py @@ -8,6 +8,7 @@ import journal import pandas as pd import shapely.wkt +from datetime import datetime from compass.utils import helpers from osgeo import gdal, ogr from shapely.geometry import Polygon @@ -291,12 +292,16 @@ def get_stitching_dict(indir): metadata = read_metadata(path) # Read info and store in dictionary cfg['burst_id'].append(get_metadata(metadata, 'burst_id')) - filename = get_metadata(metadata, 'granule_id') + datestr = get_metadata(metadata, 'sensing_start') + date = datetime.fromisoformat(datestr).strftime("%Y%m%d") + filename = f"{get_metadata(metadata, 'burst_id')}_{date}_VV.slc" cfg['granule_id'].append(f'{indir}/{dir}/{filename}') - poly = get_metadata(metadata, 'polygon') + poly = get_metadata(metadata, 'border') cfg['polygon'].append(shapely.wkt.loads(poly)) - cfg['date'].append(get_metadata(metadata, 'date')) - cfg['epsg'].append(get_metadata(metadata, 'epsg')) + cfg['date'].append(date) + geogrid = get_metadata(metadata, 'geogrid') + cfg['epsg'].append(geogrid['epsg']) + return pd.DataFrame(data=cfg) From 4dba162b2eed622061f170a12767be02feeb9689 Mon Sep 17 00:00:00 2001 From: vbrancat Date: Mon, 11 Jul 2022 08:07:11 -0700 Subject: [PATCH 3/8] Organize imports with pep8 convention --- src/compass/utils/stitching/stitch_burst.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/compass/utils/stitching/stitch_burst.py b/src/compass/utils/stitching/stitch_burst.py index 57144cf3..0bbb1ab7 100644 --- a/src/compass/utils/stitching/stitch_burst.py +++ b/src/compass/utils/stitching/stitch_burst.py @@ -3,12 +3,12 @@ import json import os import time +from datetime import datetime import isce3 import journal import pandas as pd import shapely.wkt -from datetime import datetime from compass.utils import helpers from osgeo import gdal, ogr from shapely.geometry import Polygon From 0ee452c210eb2606811d2dc97f3034ec41501493 Mon Sep 17 00:00:00 2001 From: vbrancat Date: Mon, 11 Jul 2022 08:47:08 -0700 Subject: [PATCH 4/8] Remove metadata read and extraction functions --- src/compass/utils/stitching/stitch_burst.py | 59 ++++----------------- 1 file changed, 10 insertions(+), 49 deletions(-) diff --git a/src/compass/utils/stitching/stitch_burst.py b/src/compass/utils/stitching/stitch_burst.py index 0bbb1ab7..4db4fc22 100644 --- a/src/compass/utils/stitching/stitch_burst.py +++ b/src/compass/utils/stitching/stitch_burst.py @@ -286,65 +286,26 @@ def get_stitching_dict(indir): dir_list = os.listdir(indir) for dir in dir_list: # List metadata files in the directory - meta_list = sorted(glob.glob(f'{indir}/{dir}/*json')) - for path in meta_list: - # Read metadata file - metadata = read_metadata(path) + json_meta_list = sorted(glob.glob(f'{indir}/{dir}/*json')) + for json_path in json_meta_list: + with open(json_path) as json_file: + metadata_dict = json.load(json_file) + # Read info and store in dictionary - cfg['burst_id'].append(get_metadata(metadata, 'burst_id')) - datestr = get_metadata(metadata, 'sensing_start') + cfg['burst_id'].append(metadata_dict['burst_id']) + datestr = metadata_dict['sensing_start'] date = datetime.fromisoformat(datestr).strftime("%Y%m%d") - filename = f"{get_metadata(metadata, 'burst_id')}_{date}_VV.slc" + filename = f"{metadata_dict['burst_id']}_{date}_VV.slc" cfg['granule_id'].append(f'{indir}/{dir}/{filename}') - poly = get_metadata(metadata, 'border') + poly = metadata_dict['border'] cfg['polygon'].append(shapely.wkt.loads(poly)) cfg['date'].append(date) - geogrid = get_metadata(metadata, 'geogrid') + geogrid = metadata_dict['geogrid'] cfg['epsg'].append(geogrid['epsg']) - return pd.DataFrame(data=cfg) -def read_metadata(meta_file): - '''Read metadata file in a dictionary - - Parameters: - ----------- - meta_file: str - Filepath where metadata is located - - Returns: - ------- - cfg: dict - Dictionary containing metadata - ''' - with open(meta_file) as json_file: - metadata = json.load(json_file) - return metadata - - -def get_metadata(metadata, field): - ''' - Get 'field" value from metadata dictionary - - Parameters: - ----------- - metadata: dict - Dictionary containing metadata for a burst - field: str - Field in the metadata to extract value for - - Returns: - ------- - value: float, int, shapely.Polygon - Value stored in the metadata field (type - depends on type of metadata extracted) - ''' - value = metadata[field] - return value - - def prune_dataframe(data, id_col, id_list): ''' Prune dataframe based on column ID and list of value From 163e54fdfa975c2b5acf02b894e1f03eed60cb39 Mon Sep 17 00:00:00 2001 From: vbrancat Date: Mon, 11 Jul 2022 11:15:16 -0700 Subject: [PATCH 5/8] Rename variables for clarity --- src/compass/utils/stitching/stitch_burst.py | 38 ++++++++++----------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/src/compass/utils/stitching/stitch_burst.py b/src/compass/utils/stitching/stitch_burst.py index 4db4fc22..6d208b2a 100644 --- a/src/compass/utils/stitching/stitch_burst.py +++ b/src/compass/utils/stitching/stitch_burst.py @@ -22,10 +22,10 @@ def command_line_parser(): parser = argparse.ArgumentParser(description=""" Stitch S1-A/B bursts for stack processing""", formatter_class=argparse.ArgumentDefaultsHelpFormatter) - parser.add_argument('-d', '--indir', type=str, action='store', dest='indir', + parser.add_argument('-d', '--burst-dir', type=str, action='store', dest='burst_dir', help='Directory with S1-A/B bursts organized by dates') - parser.add_argument('-b', '--burst-list', type=str, nargs='+', - default=None, dest='burst_list', + parser.add_argument('-b', '--burst-id-list', type=str, nargs='+', + default=None, dest='burst_id_list', help='List of burst IDs to stitch. If None, common bursts' 'among all dates will be stitched (default: None') parser.add_argument('-m', '--margin', type=float, @@ -41,13 +41,13 @@ def command_line_parser(): return parser.parse_args() -def main(indir, outdir, scratchdir, margin, burst_list): +def main(burst_dir, outdir, scratchdir, margin, burst_id_list): ''' Stitch S1-A/B bursts for stack processing Parameters: ---------- - indir: str + burst_dir: str File path to directory containing S1-A/B bursts organized by date outdir: str @@ -58,7 +58,7 @@ def main(indir, outdir, scratchdir, margin, burst_list): margin: float Margin to apply to burst boundary while stitching. Same units as bursts coordinate system - burst_list: list [str] + burst_id_list: list [str] List of burst IDs to stitch. If not provided, common bursts among all dates are identified and considered for stitching @@ -71,8 +71,8 @@ def main(indir, outdir, scratchdir, margin, burst_list): t_start = time.time() # Check that input directory is valid - if not os.path.isdir(indir): - err_str = f'{indir} is not a valid input directory' + if not os.path.isdir(burst_dir): + err_str = f'{burst_dir} is not a valid input directory' error_channel.log(err_str) raise ValueError(err_str) @@ -84,15 +84,15 @@ def main(indir, outdir, scratchdir, margin, burst_list): os.makedirs(scratchdir, exist_ok=True) helpers.check_write_dir(scratchdir) - # Collect info for stitching in all dirs in 'indir' + # Collect info for stitching in all dirs in 'burst_dir' # and return a panda dataframe with info - data_dict = get_stitching_dict(indir) + data_dict = get_stitching_dict(burst_dir) # If stitching some bursts, prune dataframe to # contains only the burst IDs to stitch - if burst_list is not None: + if burst_id_list is not None: data_dict = prune_dataframe(data_dict, - 'burst_id', burst_list) + 'burst_id', burst_id_list) # Identify common burst IDs among different dates ids2stitch = get_common_burst_ids(data_dict) @@ -264,14 +264,14 @@ def intersect_polygons(polygons, epsgs, margin): return poly_int, epsg_int -def get_stitching_dict(indir): +def get_stitching_dict(burst_dir): ''' Collect info on bursts to stitch and store them in a panda dataframe Parameters: ---------- - indir: str + burst_dir: str Directory where bursts are stored (organized by date) Returns: @@ -283,10 +283,10 @@ def get_stitching_dict(indir): cfg = {'burst_id': [], 'granule_id': [], 'polygon': [], 'date': [], 'epsg': []} # Get list of directory under dir_list - dir_list = os.listdir(indir) + dir_list = os.listdir(burst_dir) for dir in dir_list: # List metadata files in the directory - json_meta_list = sorted(glob.glob(f'{indir}/{dir}/*json')) + json_meta_list = sorted(glob.glob(f'{burst_dir}/{dir}/*json')) for json_path in json_meta_list: with open(json_path) as json_file: metadata_dict = json.load(json_file) @@ -296,7 +296,7 @@ def get_stitching_dict(indir): datestr = metadata_dict['sensing_start'] date = datetime.fromisoformat(datestr).strftime("%Y%m%d") filename = f"{metadata_dict['burst_id']}_{date}_VV.slc" - cfg['granule_id'].append(f'{indir}/{dir}/{filename}') + cfg['granule_id'].append(f'{burst_dir}/{dir}/{filename}') poly = metadata_dict['border'] cfg['polygon'].append(shapely.wkt.loads(poly)) cfg['date'].append(date) @@ -361,5 +361,5 @@ def get_common_burst_ids(data): # Get command line arguments opts = command_line_parser() # Give these arguments to the main code - main(opts.indir, opts.outdir, - opts.scratch, opts.margin, opts.burst_list) + main(opts.burst_dir, opts.outdir, + opts.scratch, opts.margin, opts.burst_id_list) From 87cee4d24ef3ac4bcc67af7ea07f0f2c608f603e Mon Sep 17 00:00:00 2001 From: vbrancat Date: Mon, 11 Jul 2022 11:18:53 -0700 Subject: [PATCH 6/8] Rename data_dict to metadata_dataframe --- src/compass/utils/stitching/stitch_burst.py | 26 ++++++++++----------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/src/compass/utils/stitching/stitch_burst.py b/src/compass/utils/stitching/stitch_burst.py index 6d208b2a..8eb8829d 100644 --- a/src/compass/utils/stitching/stitch_burst.py +++ b/src/compass/utils/stitching/stitch_burst.py @@ -86,31 +86,31 @@ def main(burst_dir, outdir, scratchdir, margin, burst_id_list): # Collect info for stitching in all dirs in 'burst_dir' # and return a panda dataframe with info - data_dict = get_stitching_dict(burst_dir) + metadata_dataframe = get_stitching_dict(burst_dir) # If stitching some bursts, prune dataframe to # contains only the burst IDs to stitch if burst_id_list is not None: - data_dict = prune_dataframe(data_dict, + metadata_dataframe = prune_dataframe(metadata_dataframe, 'burst_id', burst_id_list) # Identify common burst IDs among different dates - ids2stitch = get_common_burst_ids(data_dict) + ids2stitch = get_common_burst_ids(metadata_dataframe) # Prune dataframe to contain only the IDs to stitch - data_dict = prune_dataframe(data_dict, + metadata_dataframe = prune_dataframe(metadata_dataframe, 'burst_id', ids2stitch) # Track cut bursts by adding new column to dataframe - data_dict["cut_granule_id"] = "" + metadata_dataframe["cut_granule_id"] = "" # For each burst ID, get common bursts boundary and store it # as a shapefile to be used by gdalwarp (later for cutting) - for burst_id in list(set(data_dict['burst_id'])): + for burst_id in list(set(metadata_dataframe['burst_id'])): # Get info on polygons, epsg, granule - polys = data_dict.polygon[data_dict.burst_id == burst_id].tolist() - epsgs = data_dict.epsg[data_dict.burst_id == burst_id].tolist() - granules = data_dict.granule_id[data_dict.burst_id == burst_id].tolist() + polys = metadata_dataframe.polygon[metadata_dataframe.burst_id == burst_id].tolist() + epsgs = metadata_dataframe.epsg[metadata_dataframe.burst_id == burst_id].tolist() + granules = metadata_dataframe.granule_id[metadata_dataframe.burst_id == burst_id].tolist() # Get common burst boundary and save it as shapefile common_poly, epsg = intersect_polygons(polys, epsgs, margin) @@ -133,13 +133,13 @@ def main(burst_dir, outdir, scratchdir, margin, burst_id_list): targetAlignedPixels=True) gdal.Warp(cut_filename, granule, options=opts) # Save location of cut burst IDs (later for stitching) - data_dict.loc[data_dict['granule_id'] == granule, + metadata_dataframe.loc[metadata_dataframe['granule_id'] == granule, 'cut_granule_id'] = cut_filename # Start stitching by date - unique_dates = list(set(data_dict['date'])) + unique_dates = list(set(metadata_dataframe['date'])) for date in unique_dates: - cut_rasters = data_dict.cut_granule_id[data_dict.date == date].tolist() + cut_rasters = metadata_dataframe.cut_granule_id[metadata_dataframe.date == date].tolist() xres, yres, epsg = get_raster_info(cut_rasters[0]) outfilename = f'{outdir}/stitched_{date}' opts = gdal.WarpOptions(format='ENVI', xRes=xres, @@ -151,7 +151,7 @@ def main(burst_dir, outdir, scratchdir, margin, burst_id_list): gdal.Warp(outfilename, cut_rasters, options=opts) # Save data dictionary to keep trace of what has been merged - data_dict.to_csv(f'{outdir}/merged_report.csv') + metadata_dataframe.to_csv(f'{outdir}/merged_report.csv') # Log elapsed time for stitching dt = time.time() - t_start From 9cf73bdc88c8c332cc178ed641346456ef5c1ef3 Mon Sep 17 00:00:00 2001 From: vbrancat Date: Mon, 11 Jul 2022 11:36:56 -0700 Subject: [PATCH 7/8] stitch_burst.py --- src/compass/utils/stitching/stitch_burst.py | 44 +++++++++++++-------- 1 file changed, 28 insertions(+), 16 deletions(-) diff --git a/src/compass/utils/stitching/stitch_burst.py b/src/compass/utils/stitching/stitch_burst.py index 8eb8829d..9ba0add1 100644 --- a/src/compass/utils/stitching/stitch_burst.py +++ b/src/compass/utils/stitching/stitch_burst.py @@ -28,6 +28,9 @@ def command_line_parser(): default=None, dest='burst_id_list', help='List of burst IDs to stitch. If None, common bursts' 'among all dates will be stitched (default: None') + parser.add_argument('-p', '--pol', type=str, nargs='+', default='VV', dest='pol', + help='Polarization to process one or many between HH, HV, VH, VV' + '(default: VV)') parser.add_argument('-m', '--margin', type=float, default=100, dest='margin', help='Margin to apply during stitching. Same units as bursts coordinate system.' @@ -41,27 +44,29 @@ def command_line_parser(): return parser.parse_args() -def main(burst_dir, outdir, scratchdir, margin, burst_id_list): +def main(burst_dir, outdir, scratchdir, margin, burst_id_list, pol): ''' Stitch S1-A/B bursts for stack processing Parameters: ---------- burst_dir: str - File path to directory containing S1-A/B bursts - organized by date + File path to directory containing S1-A/B bursts + organized by date outdir: str - File path to directory where to store stitched bursts + File path to directory where to store stitched bursts scratchdir: str - File path to directory where to store intermediate - results (e.g., shapefiles of burst boundary) + File path to directory where to store intermediate + results (e.g., shapefiles of burst boundary) margin: float - Margin to apply to burst boundary while stitching. - Same units as bursts coordinate system + Margin to apply to burst boundary while stitching. + Same units as bursts coordinate system burst_id_list: list [str] - List of burst IDs to stitch. If not provided, common - bursts among all dates are identified and considered - for stitching + List of burst IDs to stitch. If not provided, common + bursts among all dates are identified and considered + for stitching + pol: list [str] + Polarization to process. One or many among HH, HV, VH, VV. ''' info_channel = journal.info('stitch_burst.main') error_channel = journal.error('stitch_burst.main') @@ -92,7 +97,12 @@ def main(burst_dir, outdir, scratchdir, margin, burst_id_list): # contains only the burst IDs to stitch if burst_id_list is not None: metadata_dataframe = prune_dataframe(metadata_dataframe, - 'burst_id', burst_id_list) + 'burst_id', burst_id_list) + + # Prune dataframe for user-selected polarizations + if pol is not None: + metadata_dataframe = prune_dataframe(metadata_dataframe, + 'polarization', pol) # Identify common burst IDs among different dates ids2stitch = get_common_burst_ids(metadata_dataframe) @@ -106,7 +116,7 @@ def main(burst_dir, outdir, scratchdir, margin, burst_id_list): # For each burst ID, get common bursts boundary and store it # as a shapefile to be used by gdalwarp (later for cutting) - for burst_id in list(set(metadata_dataframe['burst_id'])): + for burst_id in set(metadata_dataframe['burst_id']): # Get info on polygons, epsg, granule polys = metadata_dataframe.polygon[metadata_dataframe.burst_id == burst_id].tolist() epsgs = metadata_dataframe.epsg[metadata_dataframe.burst_id == burst_id].tolist() @@ -281,7 +291,7 @@ def get_stitching_dict(burst_dir): ''' # Create dictionary where to store results cfg = {'burst_id': [], 'granule_id': [], 'polygon': [], - 'date': [], 'epsg': []} + 'date': [], 'epsg': [], 'polarization': []} # Get list of directory under dir_list dir_list = os.listdir(burst_dir) for dir in dir_list: @@ -293,9 +303,10 @@ def get_stitching_dict(burst_dir): # Read info and store in dictionary cfg['burst_id'].append(metadata_dict['burst_id']) + cfg['polarization'].append(metadata_dict['polarization']) datestr = metadata_dict['sensing_start'] date = datetime.fromisoformat(datestr).strftime("%Y%m%d") - filename = f"{metadata_dict['burst_id']}_{date}_VV.slc" + filename = f"{metadata_dict['burst_id']}_{date}_{metadata_dict['polarization']}.slc" cfg['granule_id'].append(f'{burst_dir}/{dir}/{filename}') poly = metadata_dict['border'] cfg['polygon'].append(shapely.wkt.loads(poly)) @@ -362,4 +373,5 @@ def get_common_burst_ids(data): opts = command_line_parser() # Give these arguments to the main code main(opts.burst_dir, opts.outdir, - opts.scratch, opts.margin, opts.burst_id_list) + opts.scratch, opts.margin, opts.burst_id_list, + opts.pol) From 31d4426b9c08281fa90e020a9b96da1dc01effc2 Mon Sep 17 00:00:00 2001 From: vbrancat Date: Tue, 27 Sep 2022 10:02:38 -0700 Subject: [PATCH 8/8] Change the directory structure of stitching code --- src/compass/utils/stitching/stitch_burst.py | 30 ++++++++++----------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/src/compass/utils/stitching/stitch_burst.py b/src/compass/utils/stitching/stitch_burst.py index 9ba0add1..5c41a062 100644 --- a/src/compass/utils/stitching/stitch_burst.py +++ b/src/compass/utils/stitching/stitch_burst.py @@ -23,7 +23,7 @@ def command_line_parser(): Stitch S1-A/B bursts for stack processing""", formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('-d', '--burst-dir', type=str, action='store', dest='burst_dir', - help='Directory with S1-A/B bursts organized by dates') + help='Directory with S1-A/B bursts organized by burst ID') parser.add_argument('-b', '--burst-id-list', type=str, nargs='+', default=None, dest='burst_id_list', help='List of burst IDs to stitch. If None, common bursts' @@ -296,23 +296,23 @@ def get_stitching_dict(burst_dir): dir_list = os.listdir(burst_dir) for dir in dir_list: # List metadata files in the directory - json_meta_list = sorted(glob.glob(f'{burst_dir}/{dir}/*json')) + json_meta_list = sorted(glob.glob(f'{burst_dir}/{dir}/*/*json')) for json_path in json_meta_list: - with open(json_path) as json_file: + with open(json_path) as json_file: metadata_dict = json.load(json_file) - # Read info and store in dictionary - cfg['burst_id'].append(metadata_dict['burst_id']) - cfg['polarization'].append(metadata_dict['polarization']) - datestr = metadata_dict['sensing_start'] - date = datetime.fromisoformat(datestr).strftime("%Y%m%d") - filename = f"{metadata_dict['burst_id']}_{date}_{metadata_dict['polarization']}.slc" - cfg['granule_id'].append(f'{burst_dir}/{dir}/{filename}') - poly = metadata_dict['border'] - cfg['polygon'].append(shapely.wkt.loads(poly)) - cfg['date'].append(date) - geogrid = metadata_dict['geogrid'] - cfg['epsg'].append(geogrid['epsg']) + # Read info and store in dictionary + cfg['burst_id'].append(metadata_dict['burst_id']) + cfg['polarization'].append(metadata_dict['polarization']) + datestr = metadata_dict['sensing_start'] + date = datetime.fromisoformat(datestr).strftime("%Y%m%d") + filename = f"{metadata_dict['burst_id']}_{date}_{metadata_dict['polarization']}.slc" + cfg['granule_id'].append(f'{burst_dir}/{dir}/{date}/{filename}') + poly = metadata_dict['border'] + cfg['polygon'].append(shapely.wkt.loads(poly)) + cfg['date'].append(date) + geogrid = metadata_dict['geogrid'] + cfg['epsg'].append(geogrid['epsg']) return pd.DataFrame(data=cfg)