From cee55748b3b97369c497d6eebfaaf1510d13ff9c Mon Sep 17 00:00:00 2001
From: vbrancat <virginia.brancato@jpl.nasa.gov>
Date: Mon, 4 Apr 2022 10:27:54 -0700
Subject: [PATCH 1/8] Stitching geocoded burst for stack processing

---
 src/compass/utils/stitching/stitch_burst.py | 399 ++++++++++++++++++++
 1 file changed, 399 insertions(+)
 create mode 100644 src/compass/utils/stitching/stitch_burst.py

diff --git a/src/compass/utils/stitching/stitch_burst.py b/src/compass/utils/stitching/stitch_burst.py
new file mode 100644
index 00000000..8bf0585f
--- /dev/null
+++ b/src/compass/utils/stitching/stitch_burst.py
@@ -0,0 +1,399 @@
+import argparse
+import glob
+import json
+import os
+import time
+
+import isce3
+import journal
+import pandas as pd
+import shapely.wkt
+from compass.utils import helpers
+from osgeo import gdal, ogr
+from shapely.geometry import Polygon
+
+
+def command_line_parser():
+    """
+    Command line parser
+    """
+
+    parser = argparse.ArgumentParser(description="""
+                                     Stitch S1-A/B bursts for stack processing""",
+                                     formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+    parser.add_argument('-d', '--indir', type=str, action='store', dest='indir',
+                        help='Directory with S1-A/B bursts organized by dates')
+    parser.add_argument('-b', '--burst-list', type=str, nargs='+',
+                        default=None, dest='burst_list',
+                        help='List of burst IDs to stitch. If None, common bursts'
+                             'among all dates will be stitched (default: None')
+    parser.add_argument('-m', '--margin', type=float,
+                        default=100, dest='margin',
+                        help='Margin to apply during stitching. Same units as bursts coordinate system.'
+                             '(default: 100 m, UTM)')
+    parser.add_argument('-s', '--scratchdir', type=str, default='scratch',
+                        dest='scratch',
+                        help='Directory where to store temporary results (default: scratch)')
+    parser.add_argument('-o', '--outdir', type=str, default='outdir',
+                        dest='outdir',
+                        help='Directory path where to store stitched bursts (default: outdir)')
+    return parser.parse_args()
+
+
+def main(indir, outdir, scratchdir, margin, burst_list):
+    '''
+    Stitch S1-A/B bursts for stack processing
+
+    Parameters:
+    ----------
+    indir: str
+       File path to directory containing S1-A/B bursts
+       organized by date
+    outdir: str
+       File path to directory where to store stitched bursts
+    scratchdir: str
+       File path to directory where to store intermediate
+       results (e.g., shapefiles of burst boundary)
+    margin: float
+       Margin to apply to burst boundary while stitching.
+       Same units as bursts coordinate system
+    burst_list: list [str]
+       List of burst IDs to stitch. If not provided, common
+       bursts among all dates are identified and considered
+       for stitching
+    '''
+    info_channel = journal.info('stitch_burst.main')
+    error_channel = journal.error('stitch_burst.main')
+
+    # Start tracking time
+    info_channel.log('Start burst stitching')
+    t_start = time.time()
+
+    # Check that input directory is valid
+    if not os.path.isdir(indir):
+        err_str = f'{indir} is not a valid input directory'
+        error_channel.log(err_str)
+        raise ValueError(err_str)
+
+    # Create output and scratch dirs if not existing
+    if not os.path.exists(outdir):
+        os.makedirs(outdir, exist_ok=True)
+        helpers.check_write_dir(outdir)
+    if not os.path.exists(scratchdir):
+        os.makedirs(scratchdir, exist_ok=True)
+        helpers.check_write_dir(scratchdir)
+
+    # Collect info for stitching in all dirs in 'indir'
+    # and return a panda dataframe with info
+    data_dict = get_stitching_dict(indir)
+
+    # If stitching some bursts, prune dataframe to
+    # contains only the burst IDs to stitch
+    if burst_list is not None:
+        data_dict = prune_dataframe(data_dict,
+                                    'burst_id', burst_list)
+
+    # Identify common burst IDs among different dates
+    ids2stitch = get_common_burst_ids(data_dict)
+
+    # Prune dataframe to contain only the IDs to stitch
+    data_dict = prune_dataframe(data_dict,
+                                'burst_id', ids2stitch)
+
+    # Track cut bursts by adding new column to dataframe
+    data_dict["cut_granule_id"] = ""
+
+    # For each burst ID, get common bursts boundary and store it
+    # as a shapefile to be used by gdalwarp (later for cutting)
+    for burst_id in list(set(data_dict['burst_id'])):
+        # Get info on polygons, epsg, granule
+        polys = data_dict.polygon[data_dict.burst_id == burst_id].tolist()
+        epsgs = data_dict.epsg[data_dict.burst_id == burst_id].tolist()
+        granules = data_dict.granule_id[data_dict.burst_id == burst_id].tolist()
+
+        # Get common burst boundary and save it as shapefile
+        common_poly, epsg = intersect_polygons(polys, epsgs, margin)
+        shp_filename = f'{scratchdir}/shp_{burst_id}.shp'
+        save_as_shapefile(common_poly, epsg, shp_filename)
+
+        # Cut all the same ID burts with shapefile
+        for granule in granules:
+            # Get raster resolution
+            xres, yres, epsg = get_raster_info(granule)
+
+            filename = os.path.splitext(os.path.basename(granule))[0]
+            cut_filename = f'{scratchdir}/cut_{filename}'
+            opts = gdal.WarpOptions(format='ENVI', xRes=xres,
+                                    yRes=yres, dstNodata='nan',
+                                    cutlineDSName=shp_filename,
+                                    multithread=True, warpMemoryLimit=3000,
+                                    srcSRS=f'EPSG:{epsg}',
+                                    dstSRS=f'EPSG:{epsg}',
+                                    targetAlignedPixels=True)
+            gdal.Warp(cut_filename, granule, options=opts)
+            # Save location of cut burst IDs (later for stitching)
+            data_dict.loc[data_dict['granule_id'] == granule,
+                          'cut_granule_id'] = cut_filename
+
+    # Start stitching by date
+    unique_dates = list(set(data_dict['date']))
+    for date in unique_dates:
+        cut_rasters = data_dict.cut_granule_id[data_dict.date == date].tolist()
+        xres, yres, epsg = get_raster_info(cut_rasters[0])
+        outfilename = f'{outdir}/stitched_{date}'
+        opts = gdal.WarpOptions(format='ENVI', xRes=xres,
+                                yRes=yres, dstNodata='nan',
+                                multithread=True, warpMemoryLimit=3000,
+                                srcSRS=f'EPSG:{epsg}',
+                                dstSRS=f'EPSG:{epsg}',
+                                targetAlignedPixels=True)
+        gdal.Warp(outfilename, cut_rasters, options=opts)
+
+    # Save data dictionary to keep trace of what has been merged
+    data_dict.to_csv(f'{outdir}/merged_report.csv')
+
+    # Log elapsed time for stitching
+    dt = time.time() - t_start
+    info_channel.log(f'Successfully run stitching in {dt:.3f} seconds')
+
+
+def get_raster_info(filename):
+    '''
+    Get raster X and Y resolution and epsg
+
+    Parameters:
+    -----------
+    filename: str
+       Filepath where raster is stored
+
+    Returns:
+    -------
+    xres, yres, epsg: (tuple, float)
+       Raster resolution along X and Y directions and epsg
+    '''
+    raster = isce3.io.Raster(filename)
+    return raster.dx, raster.dy, raster.get_epsg()
+
+
+def save_as_shapefile(polygon, epsg, outfile):
+    '''
+    Save polygon as an ESRI shapefile
+
+    Parameters:
+    ----------
+    polygon: shapely.geometry.Polygon
+       Shapely polygon identify burst boundary on the ground
+    epsg: int
+       EPSG code associate to 'polygon' coordinate system
+    outfile: str
+       File path to store create ESRI shapefile
+
+    '''
+    dest_srs = ogr.osr.SpatialReference()
+    dest_srs.ImportFromEPSG(int(epsg))
+    driver = ogr.GetDriverByName('ESRI Shapefile')
+    ds = driver.CreateDataSource(outfile)
+    layer = ds.CreateLayer('', None, ogr.wkbPolygon)
+
+    # Add attribute and create new feature
+    layer.CreateField(ogr.FieldDefn('id', ogr.OFTInteger))
+    defn = layer.GetLayerDefn()
+    feat = ogr.Feature(defn)
+    feat.SetField('id', 123)
+
+    # Make a geometry, from Shapely object
+    geom = ogr.CreateGeometryFromWkb(polygon.wkb)
+    geom.AssignSpatialReference(dest_srs)
+    feat.SetGeometry(geom)
+    layer.CreateFeature(feat)
+
+    # Clean up
+    feat = geom = None
+    ds = layer = feat = geom = None
+
+
+def intersect_polygons(polygons, epsgs, margin):
+    '''
+    Get the intersection of polygons stored in 'polygons'
+
+    Parameters:
+    -----------
+    polygons: list
+       List of shapely polygons to intersect
+    epsgs: list
+       List of EPSGs associate to 'polygons'
+
+    Returns:
+    -------
+    poly_int: shapely.Polygon
+       Result of polygon intersection
+    epsg_int: int
+       EPSG code associated to poly
+    '''
+    # Assert validity of inputs
+    assert (len(polygons) == len(epsgs))
+
+    # Initialize polygon and epsg of intersection
+    poly_int = polygons[0]
+    epsg_int = epsgs[0]
+
+    # Initialize coordinate transformation in case
+    # there are polygons with different epsgs
+    llh = ogr.osr.SpatialReference()
+    llh.ImportFromEPSG(epsg_int)
+    tgt = ogr.osr.SpatialReference()
+
+    for poly, epsg in zip(polygons, epsgs):
+        if epsg != epsg_int:
+            # Transform polygons in same coord system as epsg_int
+            tgt_x, tgt_y = [], []
+            x, y = poly.exterior.coords.xy
+            tgt.ImportFromEPSG(epsg)
+            trans = ogr.osr.CoordinateTransformation(llh, tgt)
+            for lx, ly in zip(x, y):
+                dummy_x, dummy_y, dummy_z = trans.Transform(lx, ly, 0)
+                tgt_x.append(dummy_x)
+                tgt_y.append(dummy_y)
+            poly = Polygon(list(zip(tgt_x, tgt_y)))
+        poly_int = poly.intersection(poly_int)
+
+    # To be conservative, apply some margin to the polygon (TO DO:
+    # check eps)
+    poly_int = poly_int.buffer(-margin)
+    return poly_int, epsg_int
+
+
+def get_stitching_dict(indir):
+    '''
+    Collect info on bursts to stitch and store them
+    in a panda dataframe
+
+    Parameters:
+    ----------
+    indir: str
+       Directory where bursts are stored (organized by date)
+
+    Returns:
+    -------
+    cfg: pandas.DataFrame
+       Dataframe collecting info on bursts to stitch
+    '''
+    # Create dictionary where to store results
+    cfg = {'burst_id': [], 'granule_id': [], 'polygon': [],
+           'date': [], 'epsg': []}
+    # Get list of directory under dir_list
+    dir_list = os.listdir(indir)
+    for dir in dir_list:
+        # List metadata files in the directory
+        meta_list = sorted(glob.glob(f'{indir}/{dir}/*json'))
+        for path in meta_list:
+            # Read metadata file
+            metadata = read_metadata(path)
+            # Read info and store in dictionary
+            cfg['burst_id'].append(get_metadata(metadata, 'burst_id'))
+            filename = get_metadata(metadata, 'granule_id')
+            cfg['granule_id'].append(f'{indir}/{dir}/{filename}')
+            poly = get_metadata(metadata, 'polygon')
+            cfg['polygon'].append(shapely.wkt.loads(poly))
+            cfg['date'].append(get_metadata(metadata, 'date'))
+            cfg['epsg'].append(get_metadata(metadata, 'epsg'))
+
+    return pd.DataFrame(data=cfg)
+
+
+def read_metadata(meta_file):
+    '''Read metadata file in a dictionary
+
+    Parameters:
+    -----------
+    meta_file: str
+       Filepath where metadata is located
+
+    Returns:
+    -------
+    cfg: dict
+       Dictionary containing metadata
+    '''
+    with open(meta_file) as json_file:
+        metadata = json.load(json_file)
+    return metadata
+
+
+def get_metadata(metadata, field):
+    '''
+    Get 'field" value from metadata dictionary
+
+    Parameters:
+    -----------
+    metadata: dict
+       Dictionary containing metadata for a burst
+    field: str
+       Field in the metadata to extract value for
+
+    Returns:
+    -------
+    value: float, int, shapely.Polygon
+       Value stored in the metadata field (type
+       depends on type of metadata extracted)
+    '''
+    value = metadata[field]
+    return value
+
+
+def prune_dataframe(data, id_col, id_list):
+    '''
+    Prune dataframe based on column ID and list of value
+
+    Parameters:
+    ----------
+    data: pandas.DataFrame
+       dataframe that needs to be pruned
+    id_col: str
+       column identification for 'data' (e.g. 'burst_id')
+    id_list: list
+       List of elements to keep after pruning. Elements not
+       in id_list but contained in 'data' will be pruned
+
+    Returns:
+    -------
+    data: pandas.DataFrame
+       Pruned dataframe with rows in 'id_list'
+    '''
+    pattern = '|'.join(id_list)
+    dataf = data.loc[data[id_col].str.contains(pattern,
+                                               case=False)]
+    return dataf
+
+
+def get_common_burst_ids(data):
+    '''
+    Get list of burst IDs common among all processed dates
+
+    Parameters:
+    ----------
+    data: pandas.DataFrame
+      Dataframe containing info for stitching (e.g. burst IDs)
+
+    Returns:
+    -------
+    common_id: list
+      List containing common burst IDs among all the dates
+    '''
+    # Identify all the dates for the bursts to stitch
+    unique_dates = list(set(data['date']))
+
+    # Initialize list of unique burst IDs
+    common_id = data.burst_id[data.date == unique_dates[0]]
+
+    for date in unique_dates:
+        ids = data.burst_id[data.date == date]
+        common_id = sorted(list(set(ids.tolist()) & set(common_id)))
+    return common_id
+
+
+if __name__ == '__main__':
+    # Get command line arguments
+    opts = command_line_parser()
+    # Give these arguments to the main code
+    main(opts.indir, opts.outdir,
+         opts.scratch, opts.margin, opts.burst_list)

From 6d5f80247a0baf1b146673471543cb34dc7ac6e2 Mon Sep 17 00:00:00 2001
From: vbrancat <virginia.brancato@jpl.nasa.gov>
Date: Mon, 16 May 2022 10:51:09 -0700
Subject: [PATCH 2/8] Address compatibility with current version of main

---
 src/compass/utils/stitching/stitch_burst.py | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/src/compass/utils/stitching/stitch_burst.py b/src/compass/utils/stitching/stitch_burst.py
index 8bf0585f..57144cf3 100644
--- a/src/compass/utils/stitching/stitch_burst.py
+++ b/src/compass/utils/stitching/stitch_burst.py
@@ -8,6 +8,7 @@
 import journal
 import pandas as pd
 import shapely.wkt
+from datetime import datetime
 from compass.utils import helpers
 from osgeo import gdal, ogr
 from shapely.geometry import Polygon
@@ -291,12 +292,16 @@ def get_stitching_dict(indir):
             metadata = read_metadata(path)
             # Read info and store in dictionary
             cfg['burst_id'].append(get_metadata(metadata, 'burst_id'))
-            filename = get_metadata(metadata, 'granule_id')
+            datestr = get_metadata(metadata, 'sensing_start')
+            date = datetime.fromisoformat(datestr).strftime("%Y%m%d")
+            filename = f"{get_metadata(metadata, 'burst_id')}_{date}_VV.slc"
             cfg['granule_id'].append(f'{indir}/{dir}/{filename}')
-            poly = get_metadata(metadata, 'polygon')
+            poly = get_metadata(metadata, 'border')
             cfg['polygon'].append(shapely.wkt.loads(poly))
-            cfg['date'].append(get_metadata(metadata, 'date'))
-            cfg['epsg'].append(get_metadata(metadata, 'epsg'))
+            cfg['date'].append(date)
+            geogrid = get_metadata(metadata, 'geogrid')
+            cfg['epsg'].append(geogrid['epsg'])
+
 
     return pd.DataFrame(data=cfg)
 

From 4dba162b2eed622061f170a12767be02feeb9689 Mon Sep 17 00:00:00 2001
From: vbrancat <virginia.brancato@jpl.nasa.gov>
Date: Mon, 11 Jul 2022 08:07:11 -0700
Subject: [PATCH 3/8] Organize imports with pep8 convention

---
 src/compass/utils/stitching/stitch_burst.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/compass/utils/stitching/stitch_burst.py b/src/compass/utils/stitching/stitch_burst.py
index 57144cf3..0bbb1ab7 100644
--- a/src/compass/utils/stitching/stitch_burst.py
+++ b/src/compass/utils/stitching/stitch_burst.py
@@ -3,12 +3,12 @@
 import json
 import os
 import time
+from datetime import datetime
 
 import isce3
 import journal
 import pandas as pd
 import shapely.wkt
-from datetime import datetime
 from compass.utils import helpers
 from osgeo import gdal, ogr
 from shapely.geometry import Polygon

From 0ee452c210eb2606811d2dc97f3034ec41501493 Mon Sep 17 00:00:00 2001
From: vbrancat <virginia.brancato@jpl.nasa.gov>
Date: Mon, 11 Jul 2022 08:47:08 -0700
Subject: [PATCH 4/8] Remove metadata read and extraction functions

---
 src/compass/utils/stitching/stitch_burst.py | 59 ++++-----------------
 1 file changed, 10 insertions(+), 49 deletions(-)

diff --git a/src/compass/utils/stitching/stitch_burst.py b/src/compass/utils/stitching/stitch_burst.py
index 0bbb1ab7..4db4fc22 100644
--- a/src/compass/utils/stitching/stitch_burst.py
+++ b/src/compass/utils/stitching/stitch_burst.py
@@ -286,65 +286,26 @@ def get_stitching_dict(indir):
     dir_list = os.listdir(indir)
     for dir in dir_list:
         # List metadata files in the directory
-        meta_list = sorted(glob.glob(f'{indir}/{dir}/*json'))
-        for path in meta_list:
-            # Read metadata file
-            metadata = read_metadata(path)
+        json_meta_list = sorted(glob.glob(f'{indir}/{dir}/*json'))
+        for json_path in json_meta_list:
+            with open(json_path) as json_file:
+                metadata_dict = json.load(json_file)
+
             # Read info and store in dictionary
-            cfg['burst_id'].append(get_metadata(metadata, 'burst_id'))
-            datestr = get_metadata(metadata, 'sensing_start')
+            cfg['burst_id'].append(metadata_dict['burst_id'])
+            datestr = metadata_dict['sensing_start']
             date = datetime.fromisoformat(datestr).strftime("%Y%m%d")
-            filename = f"{get_metadata(metadata, 'burst_id')}_{date}_VV.slc"
+            filename = f"{metadata_dict['burst_id']}_{date}_VV.slc"
             cfg['granule_id'].append(f'{indir}/{dir}/{filename}')
-            poly = get_metadata(metadata, 'border')
+            poly = metadata_dict['border']
             cfg['polygon'].append(shapely.wkt.loads(poly))
             cfg['date'].append(date)
-            geogrid = get_metadata(metadata, 'geogrid')
+            geogrid = metadata_dict['geogrid']
             cfg['epsg'].append(geogrid['epsg'])
 
-
     return pd.DataFrame(data=cfg)
 
 
-def read_metadata(meta_file):
-    '''Read metadata file in a dictionary
-
-    Parameters:
-    -----------
-    meta_file: str
-       Filepath where metadata is located
-
-    Returns:
-    -------
-    cfg: dict
-       Dictionary containing metadata
-    '''
-    with open(meta_file) as json_file:
-        metadata = json.load(json_file)
-    return metadata
-
-
-def get_metadata(metadata, field):
-    '''
-    Get 'field" value from metadata dictionary
-
-    Parameters:
-    -----------
-    metadata: dict
-       Dictionary containing metadata for a burst
-    field: str
-       Field in the metadata to extract value for
-
-    Returns:
-    -------
-    value: float, int, shapely.Polygon
-       Value stored in the metadata field (type
-       depends on type of metadata extracted)
-    '''
-    value = metadata[field]
-    return value
-
-
 def prune_dataframe(data, id_col, id_list):
     '''
     Prune dataframe based on column ID and list of value

From 163e54fdfa975c2b5acf02b894e1f03eed60cb39 Mon Sep 17 00:00:00 2001
From: vbrancat <virginia.brancato@jpl.nasa.gov>
Date: Mon, 11 Jul 2022 11:15:16 -0700
Subject: [PATCH 5/8] Rename variables for clarity

---
 src/compass/utils/stitching/stitch_burst.py | 38 ++++++++++-----------
 1 file changed, 19 insertions(+), 19 deletions(-)

diff --git a/src/compass/utils/stitching/stitch_burst.py b/src/compass/utils/stitching/stitch_burst.py
index 4db4fc22..6d208b2a 100644
--- a/src/compass/utils/stitching/stitch_burst.py
+++ b/src/compass/utils/stitching/stitch_burst.py
@@ -22,10 +22,10 @@ def command_line_parser():
     parser = argparse.ArgumentParser(description="""
                                      Stitch S1-A/B bursts for stack processing""",
                                      formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-    parser.add_argument('-d', '--indir', type=str, action='store', dest='indir',
+    parser.add_argument('-d', '--burst-dir', type=str, action='store', dest='burst_dir',
                         help='Directory with S1-A/B bursts organized by dates')
-    parser.add_argument('-b', '--burst-list', type=str, nargs='+',
-                        default=None, dest='burst_list',
+    parser.add_argument('-b', '--burst-id-list', type=str, nargs='+',
+                        default=None, dest='burst_id_list',
                         help='List of burst IDs to stitch. If None, common bursts'
                              'among all dates will be stitched (default: None')
     parser.add_argument('-m', '--margin', type=float,
@@ -41,13 +41,13 @@ def command_line_parser():
     return parser.parse_args()
 
 
-def main(indir, outdir, scratchdir, margin, burst_list):
+def main(burst_dir, outdir, scratchdir, margin, burst_id_list):
     '''
     Stitch S1-A/B bursts for stack processing
 
     Parameters:
     ----------
-    indir: str
+    burst_dir: str
        File path to directory containing S1-A/B bursts
        organized by date
     outdir: str
@@ -58,7 +58,7 @@ def main(indir, outdir, scratchdir, margin, burst_list):
     margin: float
        Margin to apply to burst boundary while stitching.
        Same units as bursts coordinate system
-    burst_list: list [str]
+    burst_id_list: list [str]
        List of burst IDs to stitch. If not provided, common
        bursts among all dates are identified and considered
        for stitching
@@ -71,8 +71,8 @@ def main(indir, outdir, scratchdir, margin, burst_list):
     t_start = time.time()
 
     # Check that input directory is valid
-    if not os.path.isdir(indir):
-        err_str = f'{indir} is not a valid input directory'
+    if not os.path.isdir(burst_dir):
+        err_str = f'{burst_dir} is not a valid input directory'
         error_channel.log(err_str)
         raise ValueError(err_str)
 
@@ -84,15 +84,15 @@ def main(indir, outdir, scratchdir, margin, burst_list):
         os.makedirs(scratchdir, exist_ok=True)
         helpers.check_write_dir(scratchdir)
 
-    # Collect info for stitching in all dirs in 'indir'
+    # Collect info for stitching in all dirs in 'burst_dir'
     # and return a panda dataframe with info
-    data_dict = get_stitching_dict(indir)
+    data_dict = get_stitching_dict(burst_dir)
 
     # If stitching some bursts, prune dataframe to
     # contains only the burst IDs to stitch
-    if burst_list is not None:
+    if burst_id_list is not None:
         data_dict = prune_dataframe(data_dict,
-                                    'burst_id', burst_list)
+                                    'burst_id', burst_id_list)
 
     # Identify common burst IDs among different dates
     ids2stitch = get_common_burst_ids(data_dict)
@@ -264,14 +264,14 @@ def intersect_polygons(polygons, epsgs, margin):
     return poly_int, epsg_int
 
 
-def get_stitching_dict(indir):
+def get_stitching_dict(burst_dir):
     '''
     Collect info on bursts to stitch and store them
     in a panda dataframe
 
     Parameters:
     ----------
-    indir: str
+    burst_dir: str
        Directory where bursts are stored (organized by date)
 
     Returns:
@@ -283,10 +283,10 @@ def get_stitching_dict(indir):
     cfg = {'burst_id': [], 'granule_id': [], 'polygon': [],
            'date': [], 'epsg': []}
     # Get list of directory under dir_list
-    dir_list = os.listdir(indir)
+    dir_list = os.listdir(burst_dir)
     for dir in dir_list:
         # List metadata files in the directory
-        json_meta_list = sorted(glob.glob(f'{indir}/{dir}/*json'))
+        json_meta_list = sorted(glob.glob(f'{burst_dir}/{dir}/*json'))
         for json_path in json_meta_list:
             with open(json_path) as json_file:
                 metadata_dict = json.load(json_file)
@@ -296,7 +296,7 @@ def get_stitching_dict(indir):
             datestr = metadata_dict['sensing_start']
             date = datetime.fromisoformat(datestr).strftime("%Y%m%d")
             filename = f"{metadata_dict['burst_id']}_{date}_VV.slc"
-            cfg['granule_id'].append(f'{indir}/{dir}/{filename}')
+            cfg['granule_id'].append(f'{burst_dir}/{dir}/{filename}')
             poly = metadata_dict['border']
             cfg['polygon'].append(shapely.wkt.loads(poly))
             cfg['date'].append(date)
@@ -361,5 +361,5 @@ def get_common_burst_ids(data):
     # Get command line arguments
     opts = command_line_parser()
     # Give these arguments to the main code
-    main(opts.indir, opts.outdir,
-         opts.scratch, opts.margin, opts.burst_list)
+    main(opts.burst_dir, opts.outdir,
+         opts.scratch, opts.margin, opts.burst_id_list)

From 87cee4d24ef3ac4bcc67af7ea07f0f2c608f603e Mon Sep 17 00:00:00 2001
From: vbrancat <virginia.brancato@jpl.nasa.gov>
Date: Mon, 11 Jul 2022 11:18:53 -0700
Subject: [PATCH 6/8] Rename data_dict to metadata_dataframe

---
 src/compass/utils/stitching/stitch_burst.py | 26 ++++++++++-----------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/src/compass/utils/stitching/stitch_burst.py b/src/compass/utils/stitching/stitch_burst.py
index 6d208b2a..8eb8829d 100644
--- a/src/compass/utils/stitching/stitch_burst.py
+++ b/src/compass/utils/stitching/stitch_burst.py
@@ -86,31 +86,31 @@ def main(burst_dir, outdir, scratchdir, margin, burst_id_list):
 
     # Collect info for stitching in all dirs in 'burst_dir'
     # and return a panda dataframe with info
-    data_dict = get_stitching_dict(burst_dir)
+    metadata_dataframe = get_stitching_dict(burst_dir)
 
     # If stitching some bursts, prune dataframe to
     # contains only the burst IDs to stitch
     if burst_id_list is not None:
-        data_dict = prune_dataframe(data_dict,
+        metadata_dataframe = prune_dataframe(metadata_dataframe,
                                     'burst_id', burst_id_list)
 
     # Identify common burst IDs among different dates
-    ids2stitch = get_common_burst_ids(data_dict)
+    ids2stitch = get_common_burst_ids(metadata_dataframe)
 
     # Prune dataframe to contain only the IDs to stitch
-    data_dict = prune_dataframe(data_dict,
+    metadata_dataframe = prune_dataframe(metadata_dataframe,
                                 'burst_id', ids2stitch)
 
     # Track cut bursts by adding new column to dataframe
-    data_dict["cut_granule_id"] = ""
+    metadata_dataframe["cut_granule_id"] = ""
 
     # For each burst ID, get common bursts boundary and store it
     # as a shapefile to be used by gdalwarp (later for cutting)
-    for burst_id in list(set(data_dict['burst_id'])):
+    for burst_id in list(set(metadata_dataframe['burst_id'])):
         # Get info on polygons, epsg, granule
-        polys = data_dict.polygon[data_dict.burst_id == burst_id].tolist()
-        epsgs = data_dict.epsg[data_dict.burst_id == burst_id].tolist()
-        granules = data_dict.granule_id[data_dict.burst_id == burst_id].tolist()
+        polys = metadata_dataframe.polygon[metadata_dataframe.burst_id == burst_id].tolist()
+        epsgs = metadata_dataframe.epsg[metadata_dataframe.burst_id == burst_id].tolist()
+        granules = metadata_dataframe.granule_id[metadata_dataframe.burst_id == burst_id].tolist()
 
         # Get common burst boundary and save it as shapefile
         common_poly, epsg = intersect_polygons(polys, epsgs, margin)
@@ -133,13 +133,13 @@ def main(burst_dir, outdir, scratchdir, margin, burst_id_list):
                                     targetAlignedPixels=True)
             gdal.Warp(cut_filename, granule, options=opts)
             # Save location of cut burst IDs (later for stitching)
-            data_dict.loc[data_dict['granule_id'] == granule,
+            metadata_dataframe.loc[metadata_dataframe['granule_id'] == granule,
                           'cut_granule_id'] = cut_filename
 
     # Start stitching by date
-    unique_dates = list(set(data_dict['date']))
+    unique_dates = list(set(metadata_dataframe['date']))
     for date in unique_dates:
-        cut_rasters = data_dict.cut_granule_id[data_dict.date == date].tolist()
+        cut_rasters = metadata_dataframe.cut_granule_id[metadata_dataframe.date == date].tolist()
         xres, yres, epsg = get_raster_info(cut_rasters[0])
         outfilename = f'{outdir}/stitched_{date}'
         opts = gdal.WarpOptions(format='ENVI', xRes=xres,
@@ -151,7 +151,7 @@ def main(burst_dir, outdir, scratchdir, margin, burst_id_list):
         gdal.Warp(outfilename, cut_rasters, options=opts)
 
     # Save data dictionary to keep trace of what has been merged
-    data_dict.to_csv(f'{outdir}/merged_report.csv')
+    metadata_dataframe.to_csv(f'{outdir}/merged_report.csv')
 
     # Log elapsed time for stitching
     dt = time.time() - t_start

From 9cf73bdc88c8c332cc178ed641346456ef5c1ef3 Mon Sep 17 00:00:00 2001
From: vbrancat <virginia.brancato@jpl.nasa.gov>
Date: Mon, 11 Jul 2022 11:36:56 -0700
Subject: [PATCH 7/8] stitch_burst.py

---
 src/compass/utils/stitching/stitch_burst.py | 44 +++++++++++++--------
 1 file changed, 28 insertions(+), 16 deletions(-)

diff --git a/src/compass/utils/stitching/stitch_burst.py b/src/compass/utils/stitching/stitch_burst.py
index 8eb8829d..9ba0add1 100644
--- a/src/compass/utils/stitching/stitch_burst.py
+++ b/src/compass/utils/stitching/stitch_burst.py
@@ -28,6 +28,9 @@ def command_line_parser():
                         default=None, dest='burst_id_list',
                         help='List of burst IDs to stitch. If None, common bursts'
                              'among all dates will be stitched (default: None')
+    parser.add_argument('-p', '--pol', type=str, nargs='+', default='VV', dest='pol',
+                        help='Polarization to process one or many between HH, HV, VH, VV'
+                             '(default: VV)')
     parser.add_argument('-m', '--margin', type=float,
                         default=100, dest='margin',
                         help='Margin to apply during stitching. Same units as bursts coordinate system.'
@@ -41,27 +44,29 @@ def command_line_parser():
     return parser.parse_args()
 
 
-def main(burst_dir, outdir, scratchdir, margin, burst_id_list):
+def main(burst_dir, outdir, scratchdir, margin, burst_id_list, pol):
     '''
     Stitch S1-A/B bursts for stack processing
 
     Parameters:
     ----------
     burst_dir: str
-       File path to directory containing S1-A/B bursts
-       organized by date
+        File path to directory containing S1-A/B bursts
+        organized by date
     outdir: str
-       File path to directory where to store stitched bursts
+        File path to directory where to store stitched bursts
     scratchdir: str
-       File path to directory where to store intermediate
-       results (e.g., shapefiles of burst boundary)
+        File path to directory where to store intermediate
+        results (e.g., shapefiles of burst boundary)
     margin: float
-       Margin to apply to burst boundary while stitching.
-       Same units as bursts coordinate system
+        Margin to apply to burst boundary while stitching.
+        Same units as bursts coordinate system
     burst_id_list: list [str]
-       List of burst IDs to stitch. If not provided, common
-       bursts among all dates are identified and considered
-       for stitching
+        List of burst IDs to stitch. If not provided, common
+        bursts among all dates are identified and considered
+        for stitching
+    pol: list [str]
+        Polarization to process. One or many among HH, HV, VH, VV.
     '''
     info_channel = journal.info('stitch_burst.main')
     error_channel = journal.error('stitch_burst.main')
@@ -92,7 +97,12 @@ def main(burst_dir, outdir, scratchdir, margin, burst_id_list):
     # contains only the burst IDs to stitch
     if burst_id_list is not None:
         metadata_dataframe = prune_dataframe(metadata_dataframe,
-                                    'burst_id', burst_id_list)
+                                             'burst_id', burst_id_list)
+
+    # Prune dataframe for user-selected polarizations
+    if pol is not None:
+        metadata_dataframe = prune_dataframe(metadata_dataframe,
+                                             'polarization', pol)
 
     # Identify common burst IDs among different dates
     ids2stitch = get_common_burst_ids(metadata_dataframe)
@@ -106,7 +116,7 @@ def main(burst_dir, outdir, scratchdir, margin, burst_id_list):
 
     # For each burst ID, get common bursts boundary and store it
     # as a shapefile to be used by gdalwarp (later for cutting)
-    for burst_id in list(set(metadata_dataframe['burst_id'])):
+    for burst_id in set(metadata_dataframe['burst_id']):
         # Get info on polygons, epsg, granule
         polys = metadata_dataframe.polygon[metadata_dataframe.burst_id == burst_id].tolist()
         epsgs = metadata_dataframe.epsg[metadata_dataframe.burst_id == burst_id].tolist()
@@ -281,7 +291,7 @@ def get_stitching_dict(burst_dir):
     '''
     # Create dictionary where to store results
     cfg = {'burst_id': [], 'granule_id': [], 'polygon': [],
-           'date': [], 'epsg': []}
+           'date': [], 'epsg': [], 'polarization': []}
     # Get list of directory under dir_list
     dir_list = os.listdir(burst_dir)
     for dir in dir_list:
@@ -293,9 +303,10 @@ def get_stitching_dict(burst_dir):
 
             # Read info and store in dictionary
             cfg['burst_id'].append(metadata_dict['burst_id'])
+            cfg['polarization'].append(metadata_dict['polarization'])
             datestr = metadata_dict['sensing_start']
             date = datetime.fromisoformat(datestr).strftime("%Y%m%d")
-            filename = f"{metadata_dict['burst_id']}_{date}_VV.slc"
+            filename = f"{metadata_dict['burst_id']}_{date}_{metadata_dict['polarization']}.slc"
             cfg['granule_id'].append(f'{burst_dir}/{dir}/{filename}')
             poly = metadata_dict['border']
             cfg['polygon'].append(shapely.wkt.loads(poly))
@@ -362,4 +373,5 @@ def get_common_burst_ids(data):
     opts = command_line_parser()
     # Give these arguments to the main code
     main(opts.burst_dir, opts.outdir,
-         opts.scratch, opts.margin, opts.burst_id_list)
+         opts.scratch, opts.margin, opts.burst_id_list,
+         opts.pol)

From 31d4426b9c08281fa90e020a9b96da1dc01effc2 Mon Sep 17 00:00:00 2001
From: vbrancat <virginia.brancato@jpl.nasa.gov>
Date: Tue, 27 Sep 2022 10:02:38 -0700
Subject: [PATCH 8/8] Change the directory structure of stitching code

---
 src/compass/utils/stitching/stitch_burst.py | 30 ++++++++++-----------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/src/compass/utils/stitching/stitch_burst.py b/src/compass/utils/stitching/stitch_burst.py
index 9ba0add1..5c41a062 100644
--- a/src/compass/utils/stitching/stitch_burst.py
+++ b/src/compass/utils/stitching/stitch_burst.py
@@ -23,7 +23,7 @@ def command_line_parser():
                                      Stitch S1-A/B bursts for stack processing""",
                                      formatter_class=argparse.ArgumentDefaultsHelpFormatter)
     parser.add_argument('-d', '--burst-dir', type=str, action='store', dest='burst_dir',
-                        help='Directory with S1-A/B bursts organized by dates')
+                        help='Directory with S1-A/B bursts organized by burst ID')
     parser.add_argument('-b', '--burst-id-list', type=str, nargs='+',
                         default=None, dest='burst_id_list',
                         help='List of burst IDs to stitch. If None, common bursts'
@@ -296,23 +296,23 @@ def get_stitching_dict(burst_dir):
     dir_list = os.listdir(burst_dir)
     for dir in dir_list:
         # List metadata files in the directory
-        json_meta_list = sorted(glob.glob(f'{burst_dir}/{dir}/*json'))
+        json_meta_list = sorted(glob.glob(f'{burst_dir}/{dir}/*/*json'))
         for json_path in json_meta_list:
-            with open(json_path) as json_file:
+             with open(json_path) as json_file:
                 metadata_dict = json.load(json_file)
 
-            # Read info and store in dictionary
-            cfg['burst_id'].append(metadata_dict['burst_id'])
-            cfg['polarization'].append(metadata_dict['polarization'])
-            datestr = metadata_dict['sensing_start']
-            date = datetime.fromisoformat(datestr).strftime("%Y%m%d")
-            filename = f"{metadata_dict['burst_id']}_{date}_{metadata_dict['polarization']}.slc"
-            cfg['granule_id'].append(f'{burst_dir}/{dir}/{filename}')
-            poly = metadata_dict['border']
-            cfg['polygon'].append(shapely.wkt.loads(poly))
-            cfg['date'].append(date)
-            geogrid = metadata_dict['geogrid']
-            cfg['epsg'].append(geogrid['epsg'])
+              # Read info and store in dictionary
+             cfg['burst_id'].append(metadata_dict['burst_id'])
+             cfg['polarization'].append(metadata_dict['polarization'])
+             datestr = metadata_dict['sensing_start']
+             date = datetime.fromisoformat(datestr).strftime("%Y%m%d")
+             filename = f"{metadata_dict['burst_id']}_{date}_{metadata_dict['polarization']}.slc"
+             cfg['granule_id'].append(f'{burst_dir}/{dir}/{date}/{filename}')
+             poly = metadata_dict['border']
+             cfg['polygon'].append(shapely.wkt.loads(poly))
+             cfg['date'].append(date)
+             geogrid = metadata_dict['geogrid']
+             cfg['epsg'].append(geogrid['epsg'])
 
     return pd.DataFrame(data=cfg)