From cee55748b3b97369c497d6eebfaaf1510d13ff9c Mon Sep 17 00:00:00 2001
From: vbrancat <virginia.brancato@jpl.nasa.gov>
Date: Mon, 4 Apr 2022 10:27:54 -0700
Subject: [PATCH 01/42] Stitching geocoded burst for stack processing

---
 src/compass/utils/stitching/stitch_burst.py | 399 ++++++++++++++++++++
 1 file changed, 399 insertions(+)
 create mode 100644 src/compass/utils/stitching/stitch_burst.py

diff --git a/src/compass/utils/stitching/stitch_burst.py b/src/compass/utils/stitching/stitch_burst.py
new file mode 100644
index 00000000..8bf0585f
--- /dev/null
+++ b/src/compass/utils/stitching/stitch_burst.py
@@ -0,0 +1,399 @@
+import argparse
+import glob
+import json
+import os
+import time
+
+import isce3
+import journal
+import pandas as pd
+import shapely.wkt
+from compass.utils import helpers
+from osgeo import gdal, ogr
+from shapely.geometry import Polygon
+
+
+def command_line_parser():
+    """
+    Command line parser
+    """
+
+    parser = argparse.ArgumentParser(description="""
+                                     Stitch S1-A/B bursts for stack processing""",
+                                     formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+    parser.add_argument('-d', '--indir', type=str, action='store', dest='indir',
+                        help='Directory with S1-A/B bursts organized by dates')
+    parser.add_argument('-b', '--burst-list', type=str, nargs='+',
+                        default=None, dest='burst_list',
+                        help='List of burst IDs to stitch. If None, common bursts'
+                             'among all dates will be stitched (default: None')
+    parser.add_argument('-m', '--margin', type=float,
+                        default=100, dest='margin',
+                        help='Margin to apply during stitching. Same units as bursts coordinate system.'
+                             '(default: 100 m, UTM)')
+    parser.add_argument('-s', '--scratchdir', type=str, default='scratch',
+                        dest='scratch',
+                        help='Directory where to store temporary results (default: scratch)')
+    parser.add_argument('-o', '--outdir', type=str, default='outdir',
+                        dest='outdir',
+                        help='Directory path where to store stitched bursts (default: outdir)')
+    return parser.parse_args()
+
+
+def main(indir, outdir, scratchdir, margin, burst_list):
+    '''
+    Stitch S1-A/B bursts for stack processing
+
+    Parameters:
+    ----------
+    indir: str
+       File path to directory containing S1-A/B bursts
+       organized by date
+    outdir: str
+       File path to directory where to store stitched bursts
+    scratchdir: str
+       File path to directory where to store intermediate
+       results (e.g., shapefiles of burst boundary)
+    margin: float
+       Margin to apply to burst boundary while stitching.
+       Same units as bursts coordinate system
+    burst_list: list [str]
+       List of burst IDs to stitch. If not provided, common
+       bursts among all dates are identified and considered
+       for stitching
+    '''
+    info_channel = journal.info('stitch_burst.main')
+    error_channel = journal.error('stitch_burst.main')
+
+    # Start tracking time
+    info_channel.log('Start burst stitching')
+    t_start = time.time()
+
+    # Check that input directory is valid
+    if not os.path.isdir(indir):
+        err_str = f'{indir} is not a valid input directory'
+        error_channel.log(err_str)
+        raise ValueError(err_str)
+
+    # Create output and scratch dirs if not existing
+    if not os.path.exists(outdir):
+        os.makedirs(outdir, exist_ok=True)
+        helpers.check_write_dir(outdir)
+    if not os.path.exists(scratchdir):
+        os.makedirs(scratchdir, exist_ok=True)
+        helpers.check_write_dir(scratchdir)
+
+    # Collect info for stitching in all dirs in 'indir'
+    # and return a panda dataframe with info
+    data_dict = get_stitching_dict(indir)
+
+    # If stitching some bursts, prune dataframe to
+    # contains only the burst IDs to stitch
+    if burst_list is not None:
+        data_dict = prune_dataframe(data_dict,
+                                    'burst_id', burst_list)
+
+    # Identify common burst IDs among different dates
+    ids2stitch = get_common_burst_ids(data_dict)
+
+    # Prune dataframe to contain only the IDs to stitch
+    data_dict = prune_dataframe(data_dict,
+                                'burst_id', ids2stitch)
+
+    # Track cut bursts by adding new column to dataframe
+    data_dict["cut_granule_id"] = ""
+
+    # For each burst ID, get common bursts boundary and store it
+    # as a shapefile to be used by gdalwarp (later for cutting)
+    for burst_id in list(set(data_dict['burst_id'])):
+        # Get info on polygons, epsg, granule
+        polys = data_dict.polygon[data_dict.burst_id == burst_id].tolist()
+        epsgs = data_dict.epsg[data_dict.burst_id == burst_id].tolist()
+        granules = data_dict.granule_id[data_dict.burst_id == burst_id].tolist()
+
+        # Get common burst boundary and save it as shapefile
+        common_poly, epsg = intersect_polygons(polys, epsgs, margin)
+        shp_filename = f'{scratchdir}/shp_{burst_id}.shp'
+        save_as_shapefile(common_poly, epsg, shp_filename)
+
+        # Cut all the same ID burts with shapefile
+        for granule in granules:
+            # Get raster resolution
+            xres, yres, epsg = get_raster_info(granule)
+
+            filename = os.path.splitext(os.path.basename(granule))[0]
+            cut_filename = f'{scratchdir}/cut_{filename}'
+            opts = gdal.WarpOptions(format='ENVI', xRes=xres,
+                                    yRes=yres, dstNodata='nan',
+                                    cutlineDSName=shp_filename,
+                                    multithread=True, warpMemoryLimit=3000,
+                                    srcSRS=f'EPSG:{epsg}',
+                                    dstSRS=f'EPSG:{epsg}',
+                                    targetAlignedPixels=True)
+            gdal.Warp(cut_filename, granule, options=opts)
+            # Save location of cut burst IDs (later for stitching)
+            data_dict.loc[data_dict['granule_id'] == granule,
+                          'cut_granule_id'] = cut_filename
+
+    # Start stitching by date
+    unique_dates = list(set(data_dict['date']))
+    for date in unique_dates:
+        cut_rasters = data_dict.cut_granule_id[data_dict.date == date].tolist()
+        xres, yres, epsg = get_raster_info(cut_rasters[0])
+        outfilename = f'{outdir}/stitched_{date}'
+        opts = gdal.WarpOptions(format='ENVI', xRes=xres,
+                                yRes=yres, dstNodata='nan',
+                                multithread=True, warpMemoryLimit=3000,
+                                srcSRS=f'EPSG:{epsg}',
+                                dstSRS=f'EPSG:{epsg}',
+                                targetAlignedPixels=True)
+        gdal.Warp(outfilename, cut_rasters, options=opts)
+
+    # Save data dictionary to keep trace of what has been merged
+    data_dict.to_csv(f'{outdir}/merged_report.csv')
+
+    # Log elapsed time for stitching
+    dt = time.time() - t_start
+    info_channel.log(f'Successfully run stitching in {dt:.3f} seconds')
+
+
+def get_raster_info(filename):
+    '''
+    Get raster X and Y resolution and epsg
+
+    Parameters:
+    -----------
+    filename: str
+       Filepath where raster is stored
+
+    Returns:
+    -------
+    xres, yres, epsg: (tuple, float)
+       Raster resolution along X and Y directions and epsg
+    '''
+    raster = isce3.io.Raster(filename)
+    return raster.dx, raster.dy, raster.get_epsg()
+
+
+def save_as_shapefile(polygon, epsg, outfile):
+    '''
+    Save polygon as an ESRI shapefile
+
+    Parameters:
+    ----------
+    polygon: shapely.geometry.Polygon
+       Shapely polygon identify burst boundary on the ground
+    epsg: int
+       EPSG code associate to 'polygon' coordinate system
+    outfile: str
+       File path to store create ESRI shapefile
+
+    '''
+    dest_srs = ogr.osr.SpatialReference()
+    dest_srs.ImportFromEPSG(int(epsg))
+    driver = ogr.GetDriverByName('ESRI Shapefile')
+    ds = driver.CreateDataSource(outfile)
+    layer = ds.CreateLayer('', None, ogr.wkbPolygon)
+
+    # Add attribute and create new feature
+    layer.CreateField(ogr.FieldDefn('id', ogr.OFTInteger))
+    defn = layer.GetLayerDefn()
+    feat = ogr.Feature(defn)
+    feat.SetField('id', 123)
+
+    # Make a geometry, from Shapely object
+    geom = ogr.CreateGeometryFromWkb(polygon.wkb)
+    geom.AssignSpatialReference(dest_srs)
+    feat.SetGeometry(geom)
+    layer.CreateFeature(feat)
+
+    # Clean up
+    feat = geom = None
+    ds = layer = feat = geom = None
+
+
+def intersect_polygons(polygons, epsgs, margin):
+    '''
+    Get the intersection of polygons stored in 'polygons'
+
+    Parameters:
+    -----------
+    polygons: list
+       List of shapely polygons to intersect
+    epsgs: list
+       List of EPSGs associate to 'polygons'
+
+    Returns:
+    -------
+    poly_int: shapely.Polygon
+       Result of polygon intersection
+    epsg_int: int
+       EPSG code associated to poly
+    '''
+    # Assert validity of inputs
+    assert (len(polygons) == len(epsgs))
+
+    # Initialize polygon and epsg of intersection
+    poly_int = polygons[0]
+    epsg_int = epsgs[0]
+
+    # Initialize coordinate transformation in case
+    # there are polygons with different epsgs
+    llh = ogr.osr.SpatialReference()
+    llh.ImportFromEPSG(epsg_int)
+    tgt = ogr.osr.SpatialReference()
+
+    for poly, epsg in zip(polygons, epsgs):
+        if epsg != epsg_int:
+            # Transform polygons in same coord system as epsg_int
+            tgt_x, tgt_y = [], []
+            x, y = poly.exterior.coords.xy
+            tgt.ImportFromEPSG(epsg)
+            trans = ogr.osr.CoordinateTransformation(llh, tgt)
+            for lx, ly in zip(x, y):
+                dummy_x, dummy_y, dummy_z = trans.Transform(lx, ly, 0)
+                tgt_x.append(dummy_x)
+                tgt_y.append(dummy_y)
+            poly = Polygon(list(zip(tgt_x, tgt_y)))
+        poly_int = poly.intersection(poly_int)
+
+    # To be conservative, apply some margin to the polygon (TO DO:
+    # check eps)
+    poly_int = poly_int.buffer(-margin)
+    return poly_int, epsg_int
+
+
+def get_stitching_dict(indir):
+    '''
+    Collect info on bursts to stitch and store them
+    in a panda dataframe
+
+    Parameters:
+    ----------
+    indir: str
+       Directory where bursts are stored (organized by date)
+
+    Returns:
+    -------
+    cfg: pandas.DataFrame
+       Dataframe collecting info on bursts to stitch
+    '''
+    # Create dictionary where to store results
+    cfg = {'burst_id': [], 'granule_id': [], 'polygon': [],
+           'date': [], 'epsg': []}
+    # Get list of directory under dir_list
+    dir_list = os.listdir(indir)
+    for dir in dir_list:
+        # List metadata files in the directory
+        meta_list = sorted(glob.glob(f'{indir}/{dir}/*json'))
+        for path in meta_list:
+            # Read metadata file
+            metadata = read_metadata(path)
+            # Read info and store in dictionary
+            cfg['burst_id'].append(get_metadata(metadata, 'burst_id'))
+            filename = get_metadata(metadata, 'granule_id')
+            cfg['granule_id'].append(f'{indir}/{dir}/{filename}')
+            poly = get_metadata(metadata, 'polygon')
+            cfg['polygon'].append(shapely.wkt.loads(poly))
+            cfg['date'].append(get_metadata(metadata, 'date'))
+            cfg['epsg'].append(get_metadata(metadata, 'epsg'))
+
+    return pd.DataFrame(data=cfg)
+
+
+def read_metadata(meta_file):
+    '''Read metadata file in a dictionary
+
+    Parameters:
+    -----------
+    meta_file: str
+       Filepath where metadata is located
+
+    Returns:
+    -------
+    cfg: dict
+       Dictionary containing metadata
+    '''
+    with open(meta_file) as json_file:
+        metadata = json.load(json_file)
+    return metadata
+
+
+def get_metadata(metadata, field):
+    '''
+    Get 'field" value from metadata dictionary
+
+    Parameters:
+    -----------
+    metadata: dict
+       Dictionary containing metadata for a burst
+    field: str
+       Field in the metadata to extract value for
+
+    Returns:
+    -------
+    value: float, int, shapely.Polygon
+       Value stored in the metadata field (type
+       depends on type of metadata extracted)
+    '''
+    value = metadata[field]
+    return value
+
+
+def prune_dataframe(data, id_col, id_list):
+    '''
+    Prune dataframe based on column ID and list of value
+
+    Parameters:
+    ----------
+    data: pandas.DataFrame
+       dataframe that needs to be pruned
+    id_col: str
+       column identification for 'data' (e.g. 'burst_id')
+    id_list: list
+       List of elements to keep after pruning. Elements not
+       in id_list but contained in 'data' will be pruned
+
+    Returns:
+    -------
+    data: pandas.DataFrame
+       Pruned dataframe with rows in 'id_list'
+    '''
+    pattern = '|'.join(id_list)
+    dataf = data.loc[data[id_col].str.contains(pattern,
+                                               case=False)]
+    return dataf
+
+
+def get_common_burst_ids(data):
+    '''
+    Get list of burst IDs common among all processed dates
+
+    Parameters:
+    ----------
+    data: pandas.DataFrame
+      Dataframe containing info for stitching (e.g. burst IDs)
+
+    Returns:
+    -------
+    common_id: list
+      List containing common burst IDs among all the dates
+    '''
+    # Identify all the dates for the bursts to stitch
+    unique_dates = list(set(data['date']))
+
+    # Initialize list of unique burst IDs
+    common_id = data.burst_id[data.date == unique_dates[0]]
+
+    for date in unique_dates:
+        ids = data.burst_id[data.date == date]
+        common_id = sorted(list(set(ids.tolist()) & set(common_id)))
+    return common_id
+
+
+if __name__ == '__main__':
+    # Get command line arguments
+    opts = command_line_parser()
+    # Give these arguments to the main code
+    main(opts.indir, opts.outdir,
+         opts.scratch, opts.margin, opts.burst_list)

From 9630de3ee0f54ac2020a493e613f4867b8f6d2cd Mon Sep 17 00:00:00 2001
From: vbrancat <virginia.brancato@jpl.nasa.gov>
Date: Tue, 10 May 2022 13:17:13 -0700
Subject: [PATCH 02/42] First draft of geocoded bursts stack processor

---
 src/compass/utils/defaults/geo_cslc_s1.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/compass/utils/defaults/geo_cslc_s1.yaml b/src/compass/utils/defaults/geo_cslc_s1.yaml
index 985d46ce..f4d40114 100644
--- a/src/compass/utils/defaults/geo_cslc_s1.yaml
+++ b/src/compass/utils/defaults/geo_cslc_s1.yaml
@@ -35,7 +35,7 @@ runconfig:
               output_format: ENVI
               flatten: True
               # Dem margin (in units of input DEM)
-              dem_margin: 0.1
+              dem_margin: 10
               lines_per_block: 1000
               output_epsg:
               x_posting:

From 63782c0dc08dc961c33a0f62ab88c47927101729 Mon Sep 17 00:00:00 2001
From: vbrancat <virginia.brancato@jpl.nasa.gov>
Date: Wed, 11 May 2022 10:58:17 -0700
Subject: [PATCH 03/42] Stack processor code

---
 src/compass/s1_geo_stack.py | 511 ++++++++++++++++++++++++++++++++++++
 1 file changed, 511 insertions(+)
 create mode 100644 src/compass/s1_geo_stack.py

diff --git a/src/compass/s1_geo_stack.py b/src/compass/s1_geo_stack.py
new file mode 100644
index 00000000..f670f157
--- /dev/null
+++ b/src/compass/s1_geo_stack.py
@@ -0,0 +1,511 @@
+import argparse
+import os, sys, glob
+import journal
+import cgi
+import requests
+import datetime
+import pandas as pd
+import yaml
+import isce3
+import numpy as np
+
+from xml.etree import ElementTree
+from osgeo import osr
+from s1reader.s1_reader import load_bursts
+from s1reader.s1_orbit import get_orbit_file_from_list
+from compass.geo_cslc import run
+from compass.utils import helpers
+from compass.utils.runconfig import load_validate_yaml
+
+from compass.utils.geogrid import get_point_epsg
+from compass.utils.geo_runconfig import GeoRunConfig
+
+# Required for orbit download
+scihub_url = 'https://scihub.copernicus.eu/gnss/odata/v1/Products'
+# Namespaces of the XML file returned by the S1 query. Will they change it?
+w3_url = '{http://www.w3.org/2005/Atom}'
+m_url = '{http://schemas.microsoft.com/ado/2007/08/dataservices/metadata}'
+d_url = '{http://schemas.microsoft.com/ado/2007/08/dataservices}'
+# Scihub guest credential
+scihub_user = 'gnssguest'
+scihub_password = 'gnssguest'
+
+
+def create_parser():
+    parser = argparse.ArgumentParser(
+        description='S1-A/B geocoded CSLC stack processor.')
+    parser.add_argument('-s', '--slc-dir', dest='slc_dir', type=str,
+                        required=True,
+                        help='Directory containing the S1-A/B SLCs (zip files)')
+    parser.add_argument('-d', '--dem-file', dest='dem_file', type=str,
+                        required=True,
+                        help='File path to DEM to use for processing.')
+    parser.add_argument('-o', '--orbit-dir', dest='orbit_dir', type=str,
+                        default=None,
+                        help='Directory with orbit files. If None, downloads orbit files')
+    parser.add_argument('-w', '--working-dir', dest='work_dir', type=str,
+                        default='stack',
+                        help='Directory to store intermediate and final results')
+    parser.add_argument('-r', '--ref-date', dest='ref_date', type=int,
+                        default=None,
+                        help='Date of reference acquisition (yyyymmdd). If None, first acquisition of '
+                             'the stack is selected as reference.')
+    parser.add_argument('-b', '--burst-id', dest='burst_id', nargs='+',
+                        default=None,
+                        help='List of burst IDs to process. If None, all the burst IDs in the'
+                             'reference date are processed. (default: None)')
+    parser.add_argument('-p', '--pol', dest='pol', nargs='+', default='co-pol',
+                        help='Polarization to process: dual-pol, co-pol, cross-pol (default: co-pol).')
+    parser.add_argument('-x', '--x-spac', dest='x_spac', type=float, default=5,
+                        help='Spacing in meters of geocoded CSLC along X-direction.')
+    parser.add_argument('-y', '--y-spac', dest='y_spac', type=float, default=10,
+                        help='Spacing in meters of geocoded CSLC along Y-direction.')
+    parser.add_argument('-e', '--epsg', dest='epsg', type=int, default=None,
+                        help='EPSG projection code for output geocoded bursts')
+    parser.add_argument('-f', '--flatten', dest='flatten', type=bool,
+                        default=True,
+                        help='If True, enables flattening (default: True)')
+    parser.add_argument('-ss', '--range-split-spectrum',
+                        dest='is_split_spectrum', type=bool, default=False,
+                        help='If True, enables split-spectrum (default: False)')
+    parser.add_argument('-lb', '--low-band', dest='low_band', type=float,
+                        default=0.0,
+                        help='Low sub-band bandwidth in Hz (default: 0.0)')
+    parser.add_argument('-hb', '--high-band', dest='high_band', type=float,
+                        default=0.0,
+                        help='High sub-band bandwidth in Hz (default: 0.0')
+    return parser.parse_args()
+
+
+def check_internet_connection():
+    '''
+    Check connection availability
+    '''
+    url = "http://google.com"
+    try:
+        request = requests.get(url, timeout=10)
+    except (requests.ConnectionError, requests.Timeout) as exception:
+        raise sys.exit(exception)
+
+
+def parse_safe_filename(safe_filename):
+    '''
+    Extract info from S1-A/B SAFE filename
+    SAFE filename structure: S1A_IW_SLC__1SDV_20150224T114043_20150224T114111_004764_005E86_AD02.SAFE
+
+    Parameters:
+    -----------
+    safe_filename: string
+       Path to S1-A/B SAFE file
+
+    Returns:
+    -------
+    List of [sensor_id, mode_id, start_datetime,
+                end_datetime, abs_orbit_num]
+       sensor_id: sensor identifier (S1A or S1B)
+       mode_id: mode/beam (e.g. IW)
+       start_datetime: acquisition start datetime
+       stop_datetime: acquisition stop datetime
+       abs_orbit_num: absolute orbit number
+    '''
+
+    safe_name = os.path.basename(safe_filename)
+    sensor_id = safe_name[2]
+    sensor_mode = safe_name[4:10]
+    start_datetime = datetime.datetime.strptime(safe_name[17:32],
+                                                '%Y%m%dT%H%M%S')
+    end_datetime = datetime.datetime.strptime(safe_name[33:48],
+                                              '%Y%m%dT%H%M%S')
+    abs_orb_num = int(safe_name[49:55])
+
+    return [sensor_id, sensor_mode, start_datetime, end_datetime, abs_orb_num]
+
+
+def get_orbit_dict(sensor_id, start_time, end_time, orbit_type):
+    '''
+    Query Copernicus GNSS API to find latest orbit file
+
+    Parameters:
+    ----------
+    sensor_id: str
+        Sentinel satellite identifier ('A' or 'B')
+    start_time: datetime object
+        Sentinel start acquisition time
+    end_time: datetime object
+        Sentinel end acquisition time
+    orbit_type: str
+        Type of orbit to download (AUX_POEORB: precise, AUX_RESORB: restituted)
+
+    Returns:
+    orbit_dict: dict
+        Python dictionary with [orbit_name, orbit_type, download_url]
+    '''
+    # Check if correct orbit_type
+    if orbit_type not in ['AUX_POEORB', 'AUX_RESORB']:
+        err_msg = f'{orbit_type} not a valid orbit type'
+        raise ValueError(err_msg)
+
+    # Add a 30 min margin to start_time and end_time
+    pad_start_time = start_time - datetime.timedelta(hours=0.5)
+    pad_end_time = end_time + datetime.timedelta(hours=0.5)
+    new_start_time = pad_start_time.strftime('%Y-%m-%dT%H:%M:%S')
+    new_end_time = pad_end_time.strftime('%Y-%m-%dT%H:%M:%S')
+    query_string = f"startswith(Name,'S1{sensor_id}') and substringof('{orbit_type}',Name) " \
+                   f"and ContentDate/Start lt datetime'{new_start_time}' and ContentDate/End gt datetime'{new_end_time}'"
+    query_params = {'$top': 1, '$orderby': 'ContentDate/Start asc',
+                    '$filter': query_string}
+    query_response = requests.get(url=scihub_url, params=query_params,
+                                  auth=(scihub_user, scihub_password))
+    # Parse XML tree from query response
+    xml_tree = ElementTree.fromstring(query_response.content)
+    # Extract w3.org URL
+    w3_url = xml_tree.tag.split('feed')[0]
+
+    # Extract orbit's name, id, url
+    orbit_id = xml_tree.findtext(
+        f'.//{w3_url}entry/{m_url}properties/{d_url}Id')
+    orbit_url = f"{scihub_url}('{orbit_id}')/$value"
+    orbit_name = xml_tree.findtext(f'./{w3_url}entry/{w3_url}title')
+
+    if orbit_id is not None:
+        orbit_dict = {'orbit_name': orbit_name, 'orbit_type': orbit_type,
+                      'orbit_url': orbit_url}
+    else:
+        orbit_dict = None
+    return orbit_dict
+
+
+def download_orbit(output_folder, orbit_url):
+    '''
+    Download S1-A/B orbits
+
+    Parameters:
+    ----------
+    output_folder: str
+        Path to directory where to store orbits
+    orbit_url: str
+        Remote url of orbit file to download
+    '''
+
+    response = requests.get(url=orbit_url, auth=(scihub_user, scihub_password))
+    # Get header and find filename
+    header = response.headers['content-disposition']
+    header_value, header_params = cgi.parse_header(header)
+    # construct orbit filename
+    orbit_filename = os.path.join(output_folder, header_params['filename'])
+    # Save orbits
+    open(orbit_filename, 'wb').write(response.content)
+
+
+def generate_burst_map(ref_file, orbit_dir, x_spac, y_spac, epsg=4326):
+    '''
+    Generates dataframe containing geogrid info for each burst ID in
+    the ref_file
+
+    Parameters
+    ----------
+    ref_file: str
+        File path to the stack reference file
+    orbit_dir: str
+        Directory containing sensor orbit ephemerides
+    x_spac: float
+        Spacing of geocoded burst along X-direction
+    y_spac: float
+        Spacing of geocoded burst along Y-direction
+    epsg: int
+        EPSG code identifying output product projection system
+
+    Returns
+    -------
+    burst_map: pandas.Dataframe
+        Pandas dataframe containing geogrid info (e.g. top-left, bottom-right
+        x and y coordinates) for each burst to process
+    '''
+    # Initialize dictionary that contains all the info for geocoding
+    burst_map = {'burst_id': [], 'x_top_left': [], 'y_top_left': [],
+                 'x_bottom_right': [], 'y_bottom_right': [], 'epsg': []}
+
+    # Get all the bursts from safe file
+    i_subswath = [1, 2, 3]
+    orbit_path = get_orbit_file_from_list(ref_file,
+                                          glob.glob(f'{orbit_dir}/S1*'))
+
+    for subswath in i_subswath:
+        ref_bursts = load_bursts(ref_file, orbit_path, subswath)
+        for burst in ref_bursts:
+            burst_map['burst_id'].append(burst.burst_id)
+
+            # TO DO: Correct when integrated to compass
+            if epsg is None:
+                epsg = get_point_epsg(burst.center.y,
+                                      burst.center.x)
+
+            # Initialize geogrid with the info checked at this stage
+            geogrid = isce3.product.bbox_to_geogrid(burst.as_isce3_radargrid(),
+                                                    burst.orbit,
+                                                    isce3.core.LUT2d(),
+                                                    x_spac, -y_spac,
+                                                    epsg)
+
+            # Snap coordinates so that adjacent burst coordinates are integer
+            # multiples of the spacing in X-/Y-directions
+            burst_map['x_top_left'].append(
+                x_spac * np.floor(geogrid.start_x / x_spac))
+            burst_map['y_top_left'].append(
+                y_spac * np.ceil(geogrid.start_y / y_spac))
+            burst_map['x_bottom_right'].append(
+                x_spac * np.ceil(
+                    (geogrid.start_x + x_spac * geogrid.width) / x_spac))
+            burst_map['y_bottom_right'].append(
+                y_spac * np.floor(
+                    (geogrid.start_y + y_spac * geogrid.length) / y_spac))
+            burst_map['epsg'].append(epsg)
+
+    map = pd.DataFrame(data=burst_map)
+    return map
+
+
+def prune_dataframe(data, id_col, id_list):
+    '''
+    Prune dataframe based on column ID and list of value
+    Parameters:
+    ----------
+    data: pandas.DataFrame
+       dataframe that needs to be pruned
+    id_col: str
+       column identification for 'data' (e.g. 'burst_id')
+    id_list: list
+       List of elements to keep after pruning. Elements not
+       in id_list but contained in 'data' will be pruned
+    Returns:
+    -------
+    data: pandas.DataFrame
+       Pruned dataframe with rows in 'id_list'
+    '''
+    pattern = '|'.join(id_list)
+    dataf = data.loc[data[id_col].str.contains(pattern,
+                                               case=False)]
+    return dataf
+
+
+def create_runconfig(burst, safe, orbit_path, dem_file, work_dir,
+                     burst_map, flatten, enable_rss, low_band, high_band, pol,
+                     x_spac, y_spac):
+    '''
+    Create runconfig to process geocoded bursts
+
+    Parameters
+    ---------
+    burst: Sentinel1BurstSlc
+        Object containing info on burst to process
+    safe: str
+        Path to SAFE file containing burst to process
+    orbit_path: str
+        Path to orbit file related to burst to process
+    dem_file: str
+        Path to DEM to use for processing
+    work_dir: str
+        Path to working directory for temp and final results
+    burst_map: pandas.Dataframe
+        Pandas dataframe containing burst top-left, bottom-right coordinates
+    flatten: bool
+        Flag to enable/disable flattening
+    enable_rss: bool
+        Flag to enable range split-spectrum
+    low-band: float
+        Low sub-image bandwidth (in Hz) for split-spectrum
+    high-band: float
+        High sub-image bandwidth (in Hz) for split-spectrum
+    pol: str
+        Polarizations to process: co-pol, dual-pol, cross-pol
+    x_spac: float
+        Spacing of geocoded burst along X-direction
+    y_spac: float
+        Spacing of geocoded burst along Y-direction
+    '''
+    # Load default runconfig and fill it with user-defined options
+    yaml_path = f'{helpers.WORKFLOW_SCRIPTS_DIR}/defaults/geo_cslc_s1.yaml'
+    with open(yaml_path, 'r') as stream:
+        yaml_cfg = yaml.safe_load(stream)
+
+    groups = yaml_cfg['runconfig']['groups']
+    inputs = groups['input_file_group']
+    product = groups['product_path_group']
+    process = groups['processing']
+    geocode = process['geocoding']
+    rss = process['range_split_spectrum']
+
+    # Allocate Inputs
+    inputs['safe_file_path'] = [safe]
+    inputs['orbit_file_path'] = [orbit_path]
+    inputs['burst_id'] = burst.burst_id
+    groups['dynamic_ancillary_file_group']['dem_file'] = dem_file
+
+    # Product path
+    product['product_path'] = work_dir
+    product['scratch_path'] = f'{work_dir}/scratch'
+    product['sas_output_file'] = work_dir
+
+    # Geocoding
+    process['polarization'] = pol
+    geocode['flatten'] = flatten
+    geocode['x_posting'] = x_spac
+    geocode['y_posting'] = y_spac
+    geocode['top_left']['x'] = \
+    burst_map.x_top_left[burst_map.burst_id == burst.burst_id].tolist()[0]
+    geocode['top_left']['y'] = \
+    burst_map.y_top_left[burst_map.burst_id == burst.burst_id].tolist()[0]
+    geocode['bottom_right']['x'] = \
+    burst_map.x_bottom_right[burst_map.burst_id == burst.burst_id].tolist()[0]
+    geocode['bottom_right']['y'] = \
+    burst_map.y_bottom_right[burst_map.burst_id == burst.burst_id].tolist()[0]
+    # geocode['x_snap'] = None
+    # geocode['y_snap'] = None
+    geocode['output_epsg'] = \
+    burst_map.epsg[burst_map.burst_id == burst.burst_id].tolist()[0]
+
+    # Range split spectrum
+    rss['enabled'] = enable_rss
+    rss['low_band_bandwidth'] = low_band
+    rss['high_band_bandwidth'] = high_band
+
+    date_str = burst.sensing_start.strftime("%Y%m%d")
+    os.makedirs(f'{work_dir}/runconfigs', exist_ok=True)
+    runconfig_path = f'{work_dir}/runconfigs/geo_runconfig_{date_str}_{burst.burst_id}.yaml'
+    with open(runconfig_path, 'w') as yaml_file:
+        yaml.dump(yaml_cfg, yaml_file, default_flow_style=False)
+    return runconfig_path
+
+
+def main(slc_dir, dem_file, burst_id, ref_date=None, orbit_dir=None,
+         work_dir='stack',
+         pol='dual-pol', x_spac=5, y_spac=10, epsg=None,
+         flatten= True,
+         is_split_spectrum=False,
+         low_band=0.0,
+         high_band=0.0):
+    '''
+    Create runconfigs and runfiles generating geocoded bursts for
+    a static stack of Sentinel-1 A/B SAFE files.
+
+    Parameters
+    ----------
+    slc_dir: str
+        Directory containing S1-A/B SAFE files
+    dem_file: str
+        File path to DEM to use for processing
+    burst_id: list
+        List of burst IDs to process (default: None)
+    ref_date: str
+        Date of reference acquisition of the stack (format: YYYYMMDD)
+    orbit_dir: str
+        Directory containing orbit files
+    work_dir: str
+        Working directory to store temp and final files
+    x_spac: float
+        Spacing of geocoded burst along X-direction
+    y_spac: float
+        Spacing of geocoded burst along Y-direction
+    epsg: int
+        EPSG code identifying projection system to use for processing
+    flatten: bool
+        Enable/disable flattening of geocoded burst
+    is_split_spectrum: bool
+        Enable/disable range split spectrum
+    low_band: float
+        Low sub-band bandwidth for split-spectrum in Hz
+    high_band: float
+        High sub-band bandwidth for split-spectrum in Hz
+    '''
+    error = journal.error('s1_geo_stack_processor.main')
+    info = journal.info('s1_geo_stack_processor.main')
+
+    # Check if SLC dir and DEM exists
+    if not os.path.isdir(slc_dir):
+        err_str = f'{slc_dir} SLC directory does not exist'
+        error.log(err_str)
+        raise FileNotFoundError(err_str)
+
+    if not os.path.isfile(dem_file):
+        err_str = f'{dem_file} DEM file does not exists'
+        error.log(err_str)
+        raise FileNotFoundError(err_str)
+
+    # Create directory for runfiles
+    run_dir = f'{work_dir}/run_files'
+    os.makedirs(run_dir, exist_ok=True)
+
+    # Check if orbit are provided, if Not download
+    if orbit_dir is None:
+        info.log('Orbit directory not assigned. Download orbit ephemerides')
+
+        # Create orbit dir and check internet connection
+        orbit_dir = f'{work_dir}/orbits'
+        os.makedirs(orbit_dir, exist_ok=True)
+        check_internet_connection()
+
+        # List all zip file and extract info
+        zip_file_list = sorted(glob.glob(f'{slc_dir}/S1*zip'))
+
+        for zip_file in zip_file_list:
+            sensor_id, _, start_datetime, \
+            end_datetime, _ = parse_safe_filename(zip_file)
+
+            # Find precise orbits first
+            orbit_dict = get_orbit_dict(sensor_id, start_datetime,
+                                        end_datetime, 'AUX_POEORB')
+            # If orbit_dict is empty, precise orbits have not been found. Find restituted orbits instead
+            if orbit_dict == None:
+                orbit_dict = get_orbit_dict(sensor_id, start_datetime,
+                                            end_datetime, 'AUX_RESORB')
+            # Download the orbit file
+            download_orbit(orbit_dir, orbit_dict['orbit_url'])
+
+    # Find reference date and construct dict for geocoding
+    if ref_date is None:
+        ref_file = sorted(glob.glob(f'{slc_dir}/S1*zip'))[0]
+    else:
+        ref_file = glob.glob(f'{slc_dir}/S1*{str(ref_date)}*zip')[0]
+
+    # Generate burst map and prune it if a list of burst ID is provided
+    burst_map = generate_burst_map(ref_file, orbit_dir, x_spac, y_spac, epsg)
+
+    if burst_id is not None:
+        burst_map = prune_dataframe(burst_map, 'burst_id', burst_id)
+
+    # Start to geocode bursts
+    zip_file = sorted(glob.glob(f'{slc_dir}/S1*zip'))
+    for safe in zip_file:
+        i_subswath = [1, 2, 3]
+        orbit_path = get_orbit_file_from_list(safe,
+                                              glob.glob(f'{orbit_dir}/S1*'))
+
+        for subswath in i_subswath:
+            bursts = load_bursts(safe, orbit_path, subswath)
+            for burst in bursts:
+                if burst.burst_id in list(set(burst_map['burst_id'])):
+                    runconfig_path = create_runconfig(burst, safe, orbit_path,
+                                                      dem_file,
+                                                      work_dir, burst_map, flatten,
+                                                      is_split_spectrum,
+                                                      low_band, high_band, pol,
+                                                      x_spac, y_spac)
+                    date_str = burst.sensing_start.strftime("%Y%m%d")
+                    runfile_name = f'{run_dir}/run_{date_str}_{burst.burst_id}.sh'
+                    with open(runfile_name, 'w') as rsh:
+                        path = os.path.dirname(os.path.realpath(__file__))
+                        rsh.write(
+                            f'python {path}/geo_cslc.py {runconfig_path}\n')
+                else:
+                    info.log(f'{burst.burst_id} not part of the stack')
+
+
+if __name__ == '__main__':
+    # Run main script
+    args = create_parser()
+
+    main(args.slc_dir, args.dem_file, args.burst_id, args.ref_date,
+         args.orbit_dir,
+         args.work_dir, args.pol, args.x_spac, args.y_spac, args.epsg,
+         args.flatten, args.is_split_spectrum,
+         args.low_band, args.high_band)

From 6d5f80247a0baf1b146673471543cb34dc7ac6e2 Mon Sep 17 00:00:00 2001
From: vbrancat <virginia.brancato@jpl.nasa.gov>
Date: Mon, 16 May 2022 10:51:09 -0700
Subject: [PATCH 04/42] Address compatibility with current version of main

---
 src/compass/utils/stitching/stitch_burst.py | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/src/compass/utils/stitching/stitch_burst.py b/src/compass/utils/stitching/stitch_burst.py
index 8bf0585f..57144cf3 100644
--- a/src/compass/utils/stitching/stitch_burst.py
+++ b/src/compass/utils/stitching/stitch_burst.py
@@ -8,6 +8,7 @@
 import journal
 import pandas as pd
 import shapely.wkt
+from datetime import datetime
 from compass.utils import helpers
 from osgeo import gdal, ogr
 from shapely.geometry import Polygon
@@ -291,12 +292,16 @@ def get_stitching_dict(indir):
             metadata = read_metadata(path)
             # Read info and store in dictionary
             cfg['burst_id'].append(get_metadata(metadata, 'burst_id'))
-            filename = get_metadata(metadata, 'granule_id')
+            datestr = get_metadata(metadata, 'sensing_start')
+            date = datetime.fromisoformat(datestr).strftime("%Y%m%d")
+            filename = f"{get_metadata(metadata, 'burst_id')}_{date}_VV.slc"
             cfg['granule_id'].append(f'{indir}/{dir}/{filename}')
-            poly = get_metadata(metadata, 'polygon')
+            poly = get_metadata(metadata, 'border')
             cfg['polygon'].append(shapely.wkt.loads(poly))
-            cfg['date'].append(get_metadata(metadata, 'date'))
-            cfg['epsg'].append(get_metadata(metadata, 'epsg'))
+            cfg['date'].append(date)
+            geogrid = get_metadata(metadata, 'geogrid')
+            cfg['epsg'].append(geogrid['epsg'])
+
 
     return pd.DataFrame(data=cfg)
 

From 58b9f33b6064fd8bf395b55cf00029b04bbfbf33 Mon Sep 17 00:00:00 2001
From: vbrancat <virginia.brancato@jpl.nasa.gov>
Date: Mon, 16 May 2022 16:32:55 -0700
Subject: [PATCH 05/42] Fix compatibility with latest reorganization of
 repository

---
 src/compass/s1_geo_stack.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/compass/s1_geo_stack.py b/src/compass/s1_geo_stack.py
index f670f157..0144adf7 100644
--- a/src/compass/s1_geo_stack.py
+++ b/src/compass/s1_geo_stack.py
@@ -13,11 +13,10 @@
 from osgeo import osr
 from s1reader.s1_reader import load_bursts
 from s1reader.s1_orbit import get_orbit_file_from_list
-from compass.geo_cslc import run
 from compass.utils import helpers
 from compass.utils.runconfig import load_validate_yaml
 
-from compass.utils.geogrid import get_point_epsg
+from compass.utils.geo_grid import get_point_epsg
 from compass.utils.geo_runconfig import GeoRunConfig
 
 # Required for orbit download
@@ -324,7 +323,7 @@ def create_runconfig(burst, safe, orbit_path, dem_file, work_dir,
         Spacing of geocoded burst along Y-direction
     '''
     # Load default runconfig and fill it with user-defined options
-    yaml_path = f'{helpers.WORKFLOW_SCRIPTS_DIR}/defaults/geo_cslc_s1.yaml'
+    yaml_path = f'{helpers.WORKFLOW_SCRIPTS_DIR}/defaults/s1_cslc_geo.yaml'
     with open(yaml_path, 'r') as stream:
         yaml_cfg = yaml.safe_load(stream)
 

From 288e34c4146db3908f6472d27c835a46f711cb51 Mon Sep 17 00:00:00 2001
From: vbrancat <virginia.brancato@jpl.nasa.gov>
Date: Tue, 17 May 2022 08:50:53 -0700
Subject: [PATCH 06/42] Fill toy burst_path and burst metadata

---
 src/compass/defaults/s1_cslc_geo.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/compass/defaults/s1_cslc_geo.yaml b/src/compass/defaults/s1_cslc_geo.yaml
index 03f3d931..786c0bdb 100644
--- a/src/compass/defaults/s1_cslc_geo.yaml
+++ b/src/compass/defaults/s1_cslc_geo.yaml
@@ -11,9 +11,9 @@ runconfig:
           # Required. List of orbit (EOF) files (min=1)
           orbit_file_path:
           # Required. Path to the burst data
-          burst_file_path:
+          burst_file_path: burst.vrt
           # Required. Path to light metadata file
-          light_metadata_path:
+          light_metadata_path: burst_metadata.json
           # Required. The unique burst ID to process
           burst_id:
 

From 441b3bbae692194e67f8622dc22adec59c04aed0 Mon Sep 17 00:00:00 2001
From: vbrancat <virginia.brancato@jpl.nasa.gov>
Date: Wed, 25 May 2022 08:56:22 -0700
Subject: [PATCH 07/42] Change name of script to run geocoded bursts

---
 src/compass/s1_geo_stack.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/compass/s1_geo_stack.py b/src/compass/s1_geo_stack.py
index 0144adf7..2601e24c 100644
--- a/src/compass/s1_geo_stack.py
+++ b/src/compass/s1_geo_stack.py
@@ -494,7 +494,7 @@ def main(slc_dir, dem_file, burst_id, ref_date=None, orbit_dir=None,
                     with open(runfile_name, 'w') as rsh:
                         path = os.path.dirname(os.path.realpath(__file__))
                         rsh.write(
-                            f'python {path}/geo_cslc.py {runconfig_path}\n')
+                            f'python {path}/s1_cslc.py {runconfig_path}\n')
                 else:
                     info.log(f'{burst.burst_id} not part of the stack')
 

From a345abf09619dc4cda9eccc77a683903decbd68a Mon Sep 17 00:00:00 2001
From: vbrancat <virginia.brancato@jpl.nasa.gov>
Date: Tue, 14 Jun 2022 11:40:36 -0700
Subject: [PATCH 08/42] Identify common burst of stack, filter stack by
 start/end date, add exclude dates

---
 src/compass/s1_geo_stack.py | 170 +++++++++++++++++++++++-------------
 1 file changed, 110 insertions(+), 60 deletions(-)

diff --git a/src/compass/s1_geo_stack.py b/src/compass/s1_geo_stack.py
index 2601e24c..f0a75acc 100644
--- a/src/compass/s1_geo_stack.py
+++ b/src/compass/s1_geo_stack.py
@@ -7,6 +7,7 @@
 import pandas as pd
 import yaml
 import isce3
+import time
 import numpy as np
 
 from xml.etree import ElementTree
@@ -45,14 +46,17 @@ def create_parser():
     parser.add_argument('-w', '--working-dir', dest='work_dir', type=str,
                         default='stack',
                         help='Directory to store intermediate and final results')
-    parser.add_argument('-r', '--ref-date', dest='ref_date', type=int,
+    parser.add_argument('-sd', '--start-date', dest='start_date', type=int,
                         default=None,
-                        help='Date of reference acquisition (yyyymmdd). If None, first acquisition of '
-                             'the stack is selected as reference.')
+                        help='Start date of the stack to process')
+    parser.add_argument('-ed', '--end-date', dest='end_date', type=int,
+                        help='End date of the stack to process')
     parser.add_argument('-b', '--burst-id', dest='burst_id', nargs='+',
                         default=None,
                         help='List of burst IDs to process. If None, all the burst IDs in the'
                              'reference date are processed. (default: None)')
+    parser.add_argument('-exd', '--exclude-dates', dest='exclude_dates', nargs='+', type=int,
+                        help='Date to be excluded from stack processing (format: YYYYMMDD)')
     parser.add_argument('-p', '--pol', dest='pol', nargs='+', default='co-pol',
                         help='Polarization to process: dual-pol, co-pol, cross-pol (default: co-pol).')
     parser.add_argument('-x', '--x-spac', dest='x_spac', type=float, default=5,
@@ -82,7 +86,7 @@ def check_internet_connection():
     '''
     url = "http://google.com"
     try:
-        request = requests.get(url, timeout=10)
+        requests.get(url, timeout=10)
     except (requests.ConnectionError, requests.Timeout) as exception:
         raise sys.exit(exception)
 
@@ -196,15 +200,15 @@ def download_orbit(output_folder, orbit_url):
     open(orbit_filename, 'wb').write(response.content)
 
 
-def generate_burst_map(ref_file, orbit_dir, x_spac, y_spac, epsg=4326):
+def generate_burst_map(zip_files, orbit_dir, x_spac, y_spac, epsg=4326):
     '''
     Generates dataframe containing geogrid info for each burst ID in
     the ref_file
 
     Parameters
     ----------
-    ref_file: str
-        File path to the stack reference file
+    zip_files: str
+        List of S1-A/B SAFE (zip) files
     orbit_dir: str
         Directory containing sensor orbit ephemerides
     x_spac: float
@@ -222,43 +226,46 @@ def generate_burst_map(ref_file, orbit_dir, x_spac, y_spac, epsg=4326):
     '''
     # Initialize dictionary that contains all the info for geocoding
     burst_map = {'burst_id': [], 'x_top_left': [], 'y_top_left': [],
-                 'x_bottom_right': [], 'y_bottom_right': [], 'epsg': []}
+                 'x_bottom_right': [], 'y_bottom_right': [], 'epsg': [],
+                 'date': []}
 
     # Get all the bursts from safe file
     i_subswath = [1, 2, 3]
-    orbit_path = get_orbit_file_from_list(ref_file,
-                                          glob.glob(f'{orbit_dir}/S1*'))
-
-    for subswath in i_subswath:
-        ref_bursts = load_bursts(ref_file, orbit_path, subswath)
-        for burst in ref_bursts:
-            burst_map['burst_id'].append(burst.burst_id)
-
-            # TO DO: Correct when integrated to compass
-            if epsg is None:
-                epsg = get_point_epsg(burst.center.y,
-                                      burst.center.x)
-
-            # Initialize geogrid with the info checked at this stage
-            geogrid = isce3.product.bbox_to_geogrid(burst.as_isce3_radargrid(),
-                                                    burst.orbit,
-                                                    isce3.core.LUT2d(),
-                                                    x_spac, -y_spac,
-                                                    epsg)
-
-            # Snap coordinates so that adjacent burst coordinates are integer
-            # multiples of the spacing in X-/Y-directions
-            burst_map['x_top_left'].append(
-                x_spac * np.floor(geogrid.start_x / x_spac))
-            burst_map['y_top_left'].append(
-                y_spac * np.ceil(geogrid.start_y / y_spac))
-            burst_map['x_bottom_right'].append(
-                x_spac * np.ceil(
+
+    for zip_file in zip_files:
+        orbit_path = get_orbit_file_from_list(zip_file,
+                                              glob.glob(f'{orbit_dir}/S1*'))
+
+        for subswath in i_subswath:
+            ref_bursts = load_bursts(zip_file, orbit_path, subswath)
+            for burst in ref_bursts:
+                burst_map['burst_id'].append(burst.burst_id)
+
+                if epsg is None:
+                    epsg = get_point_epsg(burst.center.y,
+                                          burst.center.x)
+
+                    # Initialize geogrid with the info checked at this stage
+                geogrid = isce3.product.bbox_to_geogrid(burst.as_isce3_radargrid(),
+                                                        burst.orbit,
+                                                        isce3.core.LUT2d(),
+                                                        x_spac, -y_spac,
+                                                        epsg)
+
+                # Snap coordinates so that adjacent burst coordinates are integer
+                # multiples of the spacing in X-/Y-directions
+                burst_map['x_top_left'].append(
+                    x_spac * np.floor(geogrid.start_x / x_spac))
+                burst_map['y_top_left'].append(
+                     y_spac * np.ceil(geogrid.start_y / y_spac))
+                burst_map['x_bottom_right'].append(
+                    x_spac * np.ceil(
                     (geogrid.start_x + x_spac * geogrid.width) / x_spac))
-            burst_map['y_bottom_right'].append(
-                y_spac * np.floor(
+                burst_map['y_bottom_right'].append(
+                    y_spac * np.floor(
                     (geogrid.start_y + y_spac * geogrid.length) / y_spac))
-            burst_map['epsg'].append(epsg)
+                burst_map['epsg'].append(epsg)
+                burst_map['date'].append(int(burst.sensing_start.strftime("%Y%m%d")))
 
     map = pd.DataFrame(data=burst_map)
     return map
@@ -287,6 +294,31 @@ def prune_dataframe(data, id_col, id_list):
     return dataf
 
 
+def get_common_burst_ids(data):
+    '''
+    Get list of burst IDs common among all processed dates
+    Parameters:
+    ----------
+    data: pandas.DataFrame
+      Dataframe containing info for stitching (e.g. burst IDs)
+    Returns:
+    -------
+    common_id: list
+      List containing common burst IDs among all the dates
+    '''
+    # Identify all the dates for the bursts to stitch
+    unique_dates = list(set(data['date']))
+
+    # Initialize list of unique burst IDs
+    common_id = data.burst_id[data.date == unique_dates[0]]
+
+    for date in unique_dates:
+        ids = data.burst_id[data.date == date]
+        common_id = sorted(list(set(ids.tolist()) & set(common_id)))
+    return common_id
+
+
+
 def create_runconfig(burst, safe, orbit_path, dem_file, work_dir,
                      burst_map, flatten, enable_rss, low_band, high_band, pol,
                      x_spac, y_spac):
@@ -376,7 +408,8 @@ def create_runconfig(burst, safe, orbit_path, dem_file, work_dir,
     return runconfig_path
 
 
-def main(slc_dir, dem_file, burst_id, ref_date=None, orbit_dir=None,
+def main(slc_dir, dem_file, burst_id, start_date=None, end_date=None,
+         exclude_dates= None, orbit_dir=None,
          work_dir='stack',
          pol='dual-pol', x_spac=5, y_spac=10, epsg=None,
          flatten= True,
@@ -395,8 +428,10 @@ def main(slc_dir, dem_file, burst_id, ref_date=None, orbit_dir=None,
         File path to DEM to use for processing
     burst_id: list
         List of burst IDs to process (default: None)
-    ref_date: str
-        Date of reference acquisition of the stack (format: YYYYMMDD)
+    start_date: int
+        Date of the start acquisition of the stack (format: YYYYMMDD)
+    end_date: int
+        Date of the end acquistion of the stack (format: YYYYMMDD)
     orbit_dir: str
         Directory containing orbit files
     work_dir: str
@@ -416,6 +451,7 @@ def main(slc_dir, dem_file, burst_id, ref_date=None, orbit_dir=None,
     high_band: float
         High sub-band bandwidth for split-spectrum in Hz
     '''
+    start_time = time.time()
     error = journal.error('s1_geo_stack_processor.main')
     info = journal.info('s1_geo_stack_processor.main')
 
@@ -460,51 +496,65 @@ def main(slc_dir, dem_file, burst_id, ref_date=None, orbit_dir=None,
             # Download the orbit file
             download_orbit(orbit_dir, orbit_dict['orbit_url'])
 
-    # Find reference date and construct dict for geocoding
-    if ref_date is None:
-        ref_file = sorted(glob.glob(f'{slc_dir}/S1*zip'))[0]
-    else:
-        ref_file = glob.glob(f'{slc_dir}/S1*{str(ref_date)}*zip')[0]
-
     # Generate burst map and prune it if a list of burst ID is provided
-    burst_map = generate_burst_map(ref_file, orbit_dir, x_spac, y_spac, epsg)
+    zip_file = sorted(glob.glob(f'{slc_dir}/S1*zip'))
+    burst_map = generate_burst_map(zip_file, orbit_dir, x_spac, y_spac, epsg)
+
+    # Identify burst IDs common across the stack and remove from the dataframe
+    # burst IDs that are not in common
+    common_ids = get_common_burst_ids(burst_map)
+    burst_map = prune_dataframe(burst_map, 'burst_id', common_ids)
 
+    # If user selects burst IDs to process, prune unnecessary bursts
     if burst_id is not None:
         burst_map = prune_dataframe(burst_map, 'burst_id', burst_id)
 
-    # Start to geocode bursts
-    zip_file = sorted(glob.glob(f'{slc_dir}/S1*zip'))
+    # Select only dates between start and end
+    if start_date is not None:
+        burst_map = burst_map[burst_map['date'] >= start_date]
+    if end_date is not None:
+        burst_map = burst_map[burst_map['date'] <= end_date]
+
+    # Exclude some dates if the user requires it
+    if exclude_dates is not None:
+        burst_map = prune_dataframe(burst_map, 'date', exclude_dates)
+
+    # Ready to geocode bursts
     for safe in zip_file:
         i_subswath = [1, 2, 3]
         orbit_path = get_orbit_file_from_list(safe,
                                               glob.glob(f'{orbit_dir}/S1*'))
-
         for subswath in i_subswath:
             bursts = load_bursts(safe, orbit_path, subswath)
             for burst in bursts:
-                if burst.burst_id in list(set(burst_map['burst_id'])):
+                date = int(burst.sensing_start.strftime("%Y%m%d"))
+                if (burst.burst_id in list(set(burst_map['burst_id']))) and \
+                    (date in list(set(burst_map['date']))):
                     runconfig_path = create_runconfig(burst, safe, orbit_path,
                                                       dem_file,
                                                       work_dir, burst_map, flatten,
                                                       is_split_spectrum,
                                                       low_band, high_band, pol,
                                                       x_spac, y_spac)
-                    date_str = burst.sensing_start.strftime("%Y%m%d")
+                    date_str = str(date)
                     runfile_name = f'{run_dir}/run_{date_str}_{burst.burst_id}.sh'
                     with open(runfile_name, 'w') as rsh:
-                        path = os.path.dirname(os.path.realpath(__file__))
-                        rsh.write(
-                            f'python {path}/s1_cslc.py {runconfig_path}\n')
+                         path = os.path.dirname(os.path.realpath(__file__))
+                         rsh.write(
+                             f'python {path}/s1_cslc.py {runconfig_path}\n')
                 else:
-                    info.log(f'{burst.burst_id} not part of the stack')
+                     info.log(f'Burst ID {burst.burst_id} or SAFE file date {date_str} '
+                              f'not part of the stack to process')
+    end_time = time.time()
+    print('Elapsed time (min):', (end_time - start_time) / 60.0)
 
 
 if __name__ == '__main__':
     # Run main script
     args = create_parser()
 
-    main(args.slc_dir, args.dem_file, args.burst_id, args.ref_date,
-         args.orbit_dir,
+    main(args.slc_dir, args.dem_file, args.burst_id, args.start_date,
+         args.end_date, args.exclude_dates, args.orbit_dir,
          args.work_dir, args.pol, args.x_spac, args.y_spac, args.epsg,
          args.flatten, args.is_split_spectrum,
          args.low_band, args.high_band)

From 34d575bced6f912fbd25430bb89088fecfb62639 Mon Sep 17 00:00:00 2001
From: vbrancat <virginia.brancato@jpl.nasa.gov>
Date: Tue, 14 Jun 2022 11:47:35 -0700
Subject: [PATCH 09/42] Isort imports, pep8 convention, codacy

---
 src/compass/s1_geo_stack.py | 88 ++++++++++++++++++++-----------------
 1 file changed, 47 insertions(+), 41 deletions(-)

diff --git a/src/compass/s1_geo_stack.py b/src/compass/s1_geo_stack.py
index f0a75acc..80b9502d 100644
--- a/src/compass/s1_geo_stack.py
+++ b/src/compass/s1_geo_stack.py
@@ -1,24 +1,24 @@
 import argparse
-import os, sys, glob
-import journal
 import cgi
-import requests
 import datetime
-import pandas as pd
-import yaml
-import isce3
+import glob
+import os
+import sys
 import time
-import numpy as np
-
 from xml.etree import ElementTree
-from osgeo import osr
-from s1reader.s1_reader import load_bursts
+
+import isce3
+import journal
+import numpy as np
+import pandas as pd
+import requests
+import yaml
 from s1reader.s1_orbit import get_orbit_file_from_list
-from compass.utils import helpers
-from compass.utils.runconfig import load_validate_yaml
+from s1reader.s1_reader import load_bursts
 
+from compass.utils import helpers
 from compass.utils.geo_grid import get_point_epsg
-from compass.utils.geo_runconfig import GeoRunConfig
+
 
 # Required for orbit download
 scihub_url = 'https://scihub.copernicus.eu/gnss/odata/v1/Products'
@@ -55,7 +55,8 @@ def create_parser():
                         default=None,
                         help='List of burst IDs to process. If None, all the burst IDs in the'
                              'reference date are processed. (default: None)')
-    parser.add_argument('-exd', '--exclude-dates', dest='exclude_dates', nargs='+', type=int,
+    parser.add_argument('-exd', '--exclude-dates', dest='exclude_dates',
+                        nargs='+', type=int,
                         help='Date to be excluded from stack processing (format: YYYYMMDD)')
     parser.add_argument('-p', '--pol', dest='pol', nargs='+', default='co-pol',
                         help='Polarization to process: dual-pol, co-pol, cross-pol (default: co-pol).')
@@ -193,7 +194,7 @@ def download_orbit(output_folder, orbit_url):
     response = requests.get(url=orbit_url, auth=(scihub_user, scihub_password))
     # Get header and find filename
     header = response.headers['content-disposition']
-    header_value, header_params = cgi.parse_header(header)
+    _, header_params = cgi.parse_header(header)
     # construct orbit filename
     orbit_filename = os.path.join(output_folder, header_params['filename'])
     # Save orbits
@@ -246,29 +247,31 @@ def generate_burst_map(zip_files, orbit_dir, x_spac, y_spac, epsg=4326):
                                           burst.center.x)
 
                     # Initialize geogrid with the info checked at this stage
-                geogrid = isce3.product.bbox_to_geogrid(burst.as_isce3_radargrid(),
-                                                        burst.orbit,
-                                                        isce3.core.LUT2d(),
-                                                        x_spac, -y_spac,
-                                                        epsg)
+                geogrid = isce3.product.bbox_to_geogrid(
+                    burst.as_isce3_radargrid(),
+                    burst.orbit,
+                    isce3.core.LUT2d(),
+                    x_spac, -y_spac,
+                    epsg)
 
                 # Snap coordinates so that adjacent burst coordinates are integer
                 # multiples of the spacing in X-/Y-directions
                 burst_map['x_top_left'].append(
                     x_spac * np.floor(geogrid.start_x / x_spac))
                 burst_map['y_top_left'].append(
-                     y_spac * np.ceil(geogrid.start_y / y_spac))
+                    y_spac * np.ceil(geogrid.start_y / y_spac))
                 burst_map['x_bottom_right'].append(
                     x_spac * np.ceil(
-                    (geogrid.start_x + x_spac * geogrid.width) / x_spac))
+                        (geogrid.start_x + x_spac * geogrid.width) / x_spac))
                 burst_map['y_bottom_right'].append(
                     y_spac * np.floor(
-                    (geogrid.start_y + y_spac * geogrid.length) / y_spac))
+                        (geogrid.start_y + y_spac * geogrid.length) / y_spac))
                 burst_map['epsg'].append(epsg)
-                burst_map['date'].append(int(burst.sensing_start.strftime("%Y%m%d")))
+                burst_map['date'].append(
+                    int(burst.sensing_start.strftime("%Y%m%d")))
 
-    map = pd.DataFrame(data=burst_map)
-    return map
+    burst_map = pd.DataFrame(data=burst_map)
+    return burst_map
 
 
 def prune_dataframe(data, id_col, id_list):
@@ -318,7 +321,6 @@ def get_common_burst_ids(data):
     return common_id
 
 
-
 def create_runconfig(burst, safe, orbit_path, dem_file, work_dir,
                      burst_map, flatten, enable_rss, low_band, high_band, pol,
                      x_spac, y_spac):
@@ -383,17 +385,19 @@ def create_runconfig(burst, safe, orbit_path, dem_file, work_dir,
     geocode['x_posting'] = x_spac
     geocode['y_posting'] = y_spac
     geocode['top_left']['x'] = \
-    burst_map.x_top_left[burst_map.burst_id == burst.burst_id].tolist()[0]
+        burst_map.x_top_left[burst_map.burst_id == burst.burst_id].tolist()[0]
     geocode['top_left']['y'] = \
-    burst_map.y_top_left[burst_map.burst_id == burst.burst_id].tolist()[0]
+        burst_map.y_top_left[burst_map.burst_id == burst.burst_id].tolist()[0]
     geocode['bottom_right']['x'] = \
-    burst_map.x_bottom_right[burst_map.burst_id == burst.burst_id].tolist()[0]
+        burst_map.x_bottom_right[burst_map.burst_id == burst.burst_id].tolist()[
+            0]
     geocode['bottom_right']['y'] = \
-    burst_map.y_bottom_right[burst_map.burst_id == burst.burst_id].tolist()[0]
+        burst_map.y_bottom_right[burst_map.burst_id == burst.burst_id].tolist()[
+            0]
     # geocode['x_snap'] = None
     # geocode['y_snap'] = None
     geocode['output_epsg'] = \
-    burst_map.epsg[burst_map.burst_id == burst.burst_id].tolist()[0]
+        burst_map.epsg[burst_map.burst_id == burst.burst_id].tolist()[0]
 
     # Range split spectrum
     rss['enabled'] = enable_rss
@@ -409,10 +413,10 @@ def create_runconfig(burst, safe, orbit_path, dem_file, work_dir,
 
 
 def main(slc_dir, dem_file, burst_id, start_date=None, end_date=None,
-         exclude_dates= None, orbit_dir=None,
+         exclude_dates=None, orbit_dir=None,
          work_dir='stack',
          pol='dual-pol', x_spac=5, y_spac=10, epsg=None,
-         flatten= True,
+         flatten=True,
          is_split_spectrum=False,
          low_band=0.0,
          high_band=0.0):
@@ -529,22 +533,24 @@ def main(slc_dir, dem_file, burst_id, start_date=None, end_date=None,
             for burst in bursts:
                 date = int(burst.sensing_start.strftime("%Y%m%d"))
                 if (burst.burst_id in list(set(burst_map['burst_id']))) and \
-                    (date in list(set(burst_map['date']))):
+                        (date in list(set(burst_map['date']))):
                     runconfig_path = create_runconfig(burst, safe, orbit_path,
                                                       dem_file,
-                                                      work_dir, burst_map, flatten,
+                                                      work_dir, burst_map,
+                                                      flatten,
                                                       is_split_spectrum,
                                                       low_band, high_band, pol,
                                                       x_spac, y_spac)
                     date_str = str(date)
                     runfile_name = f'{run_dir}/run_{date_str}_{burst.burst_id}.sh'
                     with open(runfile_name, 'w') as rsh:
-                         path = os.path.dirname(os.path.realpath(__file__))
-                         rsh.write(
-                             f'python {path}/s1_cslc.py {runconfig_path}\n')
+                        path = os.path.dirname(os.path.realpath(__file__))
+                        rsh.write(
+                            f'python {path}/s1_cslc.py {runconfig_path}\n')
                 else:
-                     info.log(f'Burst ID {burst.burst_id} or SAFE file date {date_str} '
-                              f'not part of the stack to process')
+                    info.log(
+                        f'Burst ID {burst.burst_id} or SAFE file date {date_str} '
+                        f'not part of the stack to process')
     end_time = time.time()
     print('Elapsed time (min):', (end_time - start_time) / 60.0)
 

From 4dba162b2eed622061f170a12767be02feeb9689 Mon Sep 17 00:00:00 2001
From: vbrancat <virginia.brancato@jpl.nasa.gov>
Date: Mon, 11 Jul 2022 08:07:11 -0700
Subject: [PATCH 10/42] Organize imports with pep8 convention

---
 src/compass/utils/stitching/stitch_burst.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/compass/utils/stitching/stitch_burst.py b/src/compass/utils/stitching/stitch_burst.py
index 57144cf3..0bbb1ab7 100644
--- a/src/compass/utils/stitching/stitch_burst.py
+++ b/src/compass/utils/stitching/stitch_burst.py
@@ -3,12 +3,12 @@
 import json
 import os
 import time
+from datetime import datetime
 
 import isce3
 import journal
 import pandas as pd
 import shapely.wkt
-from datetime import datetime
 from compass.utils import helpers
 from osgeo import gdal, ogr
 from shapely.geometry import Polygon

From 0ee452c210eb2606811d2dc97f3034ec41501493 Mon Sep 17 00:00:00 2001
From: vbrancat <virginia.brancato@jpl.nasa.gov>
Date: Mon, 11 Jul 2022 08:47:08 -0700
Subject: [PATCH 11/42] Remove metadata read and extraction functions

---
 src/compass/utils/stitching/stitch_burst.py | 59 ++++-----------------
 1 file changed, 10 insertions(+), 49 deletions(-)

diff --git a/src/compass/utils/stitching/stitch_burst.py b/src/compass/utils/stitching/stitch_burst.py
index 0bbb1ab7..4db4fc22 100644
--- a/src/compass/utils/stitching/stitch_burst.py
+++ b/src/compass/utils/stitching/stitch_burst.py
@@ -286,65 +286,26 @@ def get_stitching_dict(indir):
     dir_list = os.listdir(indir)
     for dir in dir_list:
         # List metadata files in the directory
-        meta_list = sorted(glob.glob(f'{indir}/{dir}/*json'))
-        for path in meta_list:
-            # Read metadata file
-            metadata = read_metadata(path)
+        json_meta_list = sorted(glob.glob(f'{indir}/{dir}/*json'))
+        for json_path in json_meta_list:
+            with open(json_path) as json_file:
+                metadata_dict = json.load(json_file)
+
             # Read info and store in dictionary
-            cfg['burst_id'].append(get_metadata(metadata, 'burst_id'))
-            datestr = get_metadata(metadata, 'sensing_start')
+            cfg['burst_id'].append(metadata_dict['burst_id'])
+            datestr = metadata_dict['sensing_start']
             date = datetime.fromisoformat(datestr).strftime("%Y%m%d")
-            filename = f"{get_metadata(metadata, 'burst_id')}_{date}_VV.slc"
+            filename = f"{metadata_dict['burst_id']}_{date}_VV.slc"
             cfg['granule_id'].append(f'{indir}/{dir}/{filename}')
-            poly = get_metadata(metadata, 'border')
+            poly = metadata_dict['border']
             cfg['polygon'].append(shapely.wkt.loads(poly))
             cfg['date'].append(date)
-            geogrid = get_metadata(metadata, 'geogrid')
+            geogrid = metadata_dict['geogrid']
             cfg['epsg'].append(geogrid['epsg'])
 
-
     return pd.DataFrame(data=cfg)
 
 
-def read_metadata(meta_file):
-    '''Read metadata file in a dictionary
-
-    Parameters:
-    -----------
-    meta_file: str
-       Filepath where metadata is located
-
-    Returns:
-    -------
-    cfg: dict
-       Dictionary containing metadata
-    '''
-    with open(meta_file) as json_file:
-        metadata = json.load(json_file)
-    return metadata
-
-
-def get_metadata(metadata, field):
-    '''
-    Get 'field" value from metadata dictionary
-
-    Parameters:
-    -----------
-    metadata: dict
-       Dictionary containing metadata for a burst
-    field: str
-       Field in the metadata to extract value for
-
-    Returns:
-    -------
-    value: float, int, shapely.Polygon
-       Value stored in the metadata field (type
-       depends on type of metadata extracted)
-    '''
-    value = metadata[field]
-    return value
-
-
 def prune_dataframe(data, id_col, id_list):
     '''
     Prune dataframe based on column ID and list of value

From 163e54fdfa975c2b5acf02b894e1f03eed60cb39 Mon Sep 17 00:00:00 2001
From: vbrancat <virginia.brancato@jpl.nasa.gov>
Date: Mon, 11 Jul 2022 11:15:16 -0700
Subject: [PATCH 12/42] Rename variables for clarity

---
 src/compass/utils/stitching/stitch_burst.py | 38 ++++++++++-----------
 1 file changed, 19 insertions(+), 19 deletions(-)

diff --git a/src/compass/utils/stitching/stitch_burst.py b/src/compass/utils/stitching/stitch_burst.py
index 4db4fc22..6d208b2a 100644
--- a/src/compass/utils/stitching/stitch_burst.py
+++ b/src/compass/utils/stitching/stitch_burst.py
@@ -22,10 +22,10 @@ def command_line_parser():
     parser = argparse.ArgumentParser(description="""
                                      Stitch S1-A/B bursts for stack processing""",
                                      formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-    parser.add_argument('-d', '--indir', type=str, action='store', dest='indir',
+    parser.add_argument('-d', '--burst-dir', type=str, action='store', dest='burst_dir',
                         help='Directory with S1-A/B bursts organized by dates')
-    parser.add_argument('-b', '--burst-list', type=str, nargs='+',
-                        default=None, dest='burst_list',
+    parser.add_argument('-b', '--burst-id-list', type=str, nargs='+',
+                        default=None, dest='burst_id_list',
                         help='List of burst IDs to stitch. If None, common bursts'
                              'among all dates will be stitched (default: None')
     parser.add_argument('-m', '--margin', type=float,
@@ -41,13 +41,13 @@ def command_line_parser():
     return parser.parse_args()
 
 
-def main(indir, outdir, scratchdir, margin, burst_list):
+def main(burst_dir, outdir, scratchdir, margin, burst_id_list):
     '''
     Stitch S1-A/B bursts for stack processing
 
     Parameters:
     ----------
-    indir: str
+    burst_dir: str
        File path to directory containing S1-A/B bursts
        organized by date
     outdir: str
@@ -58,7 +58,7 @@ def main(indir, outdir, scratchdir, margin, burst_list):
     margin: float
        Margin to apply to burst boundary while stitching.
        Same units as bursts coordinate system
-    burst_list: list [str]
+    burst_id_list: list [str]
        List of burst IDs to stitch. If not provided, common
        bursts among all dates are identified and considered
        for stitching
@@ -71,8 +71,8 @@ def main(indir, outdir, scratchdir, margin, burst_list):
     t_start = time.time()
 
     # Check that input directory is valid
-    if not os.path.isdir(indir):
-        err_str = f'{indir} is not a valid input directory'
+    if not os.path.isdir(burst_dir):
+        err_str = f'{burst_dir} is not a valid input directory'
         error_channel.log(err_str)
         raise ValueError(err_str)
 
@@ -84,15 +84,15 @@ def main(indir, outdir, scratchdir, margin, burst_list):
         os.makedirs(scratchdir, exist_ok=True)
         helpers.check_write_dir(scratchdir)
 
-    # Collect info for stitching in all dirs in 'indir'
+    # Collect info for stitching in all dirs in 'burst_dir'
     # and return a panda dataframe with info
-    data_dict = get_stitching_dict(indir)
+    data_dict = get_stitching_dict(burst_dir)
 
     # If stitching some bursts, prune dataframe to
     # contains only the burst IDs to stitch
-    if burst_list is not None:
+    if burst_id_list is not None:
         data_dict = prune_dataframe(data_dict,
-                                    'burst_id', burst_list)
+                                    'burst_id', burst_id_list)
 
     # Identify common burst IDs among different dates
     ids2stitch = get_common_burst_ids(data_dict)
@@ -264,14 +264,14 @@ def intersect_polygons(polygons, epsgs, margin):
     return poly_int, epsg_int
 
 
-def get_stitching_dict(indir):
+def get_stitching_dict(burst_dir):
     '''
     Collect info on bursts to stitch and store them
     in a panda dataframe
 
     Parameters:
     ----------
-    indir: str
+    burst_dir: str
        Directory where bursts are stored (organized by date)
 
     Returns:
@@ -283,10 +283,10 @@ def get_stitching_dict(indir):
     cfg = {'burst_id': [], 'granule_id': [], 'polygon': [],
            'date': [], 'epsg': []}
     # Get list of directory under dir_list
-    dir_list = os.listdir(indir)
+    dir_list = os.listdir(burst_dir)
     for dir in dir_list:
         # List metadata files in the directory
-        json_meta_list = sorted(glob.glob(f'{indir}/{dir}/*json'))
+        json_meta_list = sorted(glob.glob(f'{burst_dir}/{dir}/*json'))
         for json_path in json_meta_list:
             with open(json_path) as json_file:
                 metadata_dict = json.load(json_file)
@@ -296,7 +296,7 @@ def get_stitching_dict(indir):
             datestr = metadata_dict['sensing_start']
             date = datetime.fromisoformat(datestr).strftime("%Y%m%d")
             filename = f"{metadata_dict['burst_id']}_{date}_VV.slc"
-            cfg['granule_id'].append(f'{indir}/{dir}/{filename}')
+            cfg['granule_id'].append(f'{burst_dir}/{dir}/{filename}')
             poly = metadata_dict['border']
             cfg['polygon'].append(shapely.wkt.loads(poly))
             cfg['date'].append(date)
@@ -361,5 +361,5 @@ def get_common_burst_ids(data):
     # Get command line arguments
     opts = command_line_parser()
     # Give these arguments to the main code
-    main(opts.indir, opts.outdir,
-         opts.scratch, opts.margin, opts.burst_list)
+    main(opts.burst_dir, opts.outdir,
+         opts.scratch, opts.margin, opts.burst_id_list)

From 87cee4d24ef3ac4bcc67af7ea07f0f2c608f603e Mon Sep 17 00:00:00 2001
From: vbrancat <virginia.brancato@jpl.nasa.gov>
Date: Mon, 11 Jul 2022 11:18:53 -0700
Subject: [PATCH 13/42] Rename data_dict to metadata_dataframe

---
 src/compass/utils/stitching/stitch_burst.py | 26 ++++++++++-----------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/src/compass/utils/stitching/stitch_burst.py b/src/compass/utils/stitching/stitch_burst.py
index 6d208b2a..8eb8829d 100644
--- a/src/compass/utils/stitching/stitch_burst.py
+++ b/src/compass/utils/stitching/stitch_burst.py
@@ -86,31 +86,31 @@ def main(burst_dir, outdir, scratchdir, margin, burst_id_list):
 
     # Collect info for stitching in all dirs in 'burst_dir'
     # and return a panda dataframe with info
-    data_dict = get_stitching_dict(burst_dir)
+    metadata_dataframe = get_stitching_dict(burst_dir)
 
     # If stitching some bursts, prune dataframe to
     # contains only the burst IDs to stitch
     if burst_id_list is not None:
-        data_dict = prune_dataframe(data_dict,
+        metadata_dataframe = prune_dataframe(metadata_dataframe,
                                     'burst_id', burst_id_list)
 
     # Identify common burst IDs among different dates
-    ids2stitch = get_common_burst_ids(data_dict)
+    ids2stitch = get_common_burst_ids(metadata_dataframe)
 
     # Prune dataframe to contain only the IDs to stitch
-    data_dict = prune_dataframe(data_dict,
+    metadata_dataframe = prune_dataframe(metadata_dataframe,
                                 'burst_id', ids2stitch)
 
     # Track cut bursts by adding new column to dataframe
-    data_dict["cut_granule_id"] = ""
+    metadata_dataframe["cut_granule_id"] = ""
 
     # For each burst ID, get common bursts boundary and store it
     # as a shapefile to be used by gdalwarp (later for cutting)
-    for burst_id in list(set(data_dict['burst_id'])):
+    for burst_id in list(set(metadata_dataframe['burst_id'])):
         # Get info on polygons, epsg, granule
-        polys = data_dict.polygon[data_dict.burst_id == burst_id].tolist()
-        epsgs = data_dict.epsg[data_dict.burst_id == burst_id].tolist()
-        granules = data_dict.granule_id[data_dict.burst_id == burst_id].tolist()
+        polys = metadata_dataframe.polygon[metadata_dataframe.burst_id == burst_id].tolist()
+        epsgs = metadata_dataframe.epsg[metadata_dataframe.burst_id == burst_id].tolist()
+        granules = metadata_dataframe.granule_id[metadata_dataframe.burst_id == burst_id].tolist()
 
         # Get common burst boundary and save it as shapefile
         common_poly, epsg = intersect_polygons(polys, epsgs, margin)
@@ -133,13 +133,13 @@ def main(burst_dir, outdir, scratchdir, margin, burst_id_list):
                                     targetAlignedPixels=True)
             gdal.Warp(cut_filename, granule, options=opts)
             # Save location of cut burst IDs (later for stitching)
-            data_dict.loc[data_dict['granule_id'] == granule,
+            metadata_dataframe.loc[metadata_dataframe['granule_id'] == granule,
                           'cut_granule_id'] = cut_filename
 
     # Start stitching by date
-    unique_dates = list(set(data_dict['date']))
+    unique_dates = list(set(metadata_dataframe['date']))
     for date in unique_dates:
-        cut_rasters = data_dict.cut_granule_id[data_dict.date == date].tolist()
+        cut_rasters = metadata_dataframe.cut_granule_id[metadata_dataframe.date == date].tolist()
         xres, yres, epsg = get_raster_info(cut_rasters[0])
         outfilename = f'{outdir}/stitched_{date}'
         opts = gdal.WarpOptions(format='ENVI', xRes=xres,
@@ -151,7 +151,7 @@ def main(burst_dir, outdir, scratchdir, margin, burst_id_list):
         gdal.Warp(outfilename, cut_rasters, options=opts)
 
     # Save data dictionary to keep trace of what has been merged
-    data_dict.to_csv(f'{outdir}/merged_report.csv')
+    metadata_dataframe.to_csv(f'{outdir}/merged_report.csv')
 
     # Log elapsed time for stitching
     dt = time.time() - t_start

From 9cf73bdc88c8c332cc178ed641346456ef5c1ef3 Mon Sep 17 00:00:00 2001
From: vbrancat <virginia.brancato@jpl.nasa.gov>
Date: Mon, 11 Jul 2022 11:36:56 -0700
Subject: [PATCH 14/42] stitch_burst.py

---
 src/compass/utils/stitching/stitch_burst.py | 44 +++++++++++++--------
 1 file changed, 28 insertions(+), 16 deletions(-)

diff --git a/src/compass/utils/stitching/stitch_burst.py b/src/compass/utils/stitching/stitch_burst.py
index 8eb8829d..9ba0add1 100644
--- a/src/compass/utils/stitching/stitch_burst.py
+++ b/src/compass/utils/stitching/stitch_burst.py
@@ -28,6 +28,9 @@ def command_line_parser():
                         default=None, dest='burst_id_list',
                         help='List of burst IDs to stitch. If None, common bursts'
                              'among all dates will be stitched (default: None')
+    parser.add_argument('-p', '--pol', type=str, nargs='+', default='VV', dest='pol',
+                        help='Polarization to process one or many between HH, HV, VH, VV'
+                             '(default: VV)')
     parser.add_argument('-m', '--margin', type=float,
                         default=100, dest='margin',
                         help='Margin to apply during stitching. Same units as bursts coordinate system.'
@@ -41,27 +44,29 @@ def command_line_parser():
     return parser.parse_args()
 
 
-def main(burst_dir, outdir, scratchdir, margin, burst_id_list):
+def main(burst_dir, outdir, scratchdir, margin, burst_id_list, pol):
     '''
     Stitch S1-A/B bursts for stack processing
 
     Parameters:
     ----------
     burst_dir: str
-       File path to directory containing S1-A/B bursts
-       organized by date
+        File path to directory containing S1-A/B bursts
+        organized by date
     outdir: str
-       File path to directory where to store stitched bursts
+        File path to directory where to store stitched bursts
     scratchdir: str
-       File path to directory where to store intermediate
-       results (e.g., shapefiles of burst boundary)
+        File path to directory where to store intermediate
+        results (e.g., shapefiles of burst boundary)
     margin: float
-       Margin to apply to burst boundary while stitching.
-       Same units as bursts coordinate system
+        Margin to apply to burst boundary while stitching.
+        Same units as bursts coordinate system
     burst_id_list: list [str]
-       List of burst IDs to stitch. If not provided, common
-       bursts among all dates are identified and considered
-       for stitching
+        List of burst IDs to stitch. If not provided, common
+        bursts among all dates are identified and considered
+        for stitching
+    pol: list [str]
+        Polarization to process. One or many among HH, HV, VH, VV.
     '''
     info_channel = journal.info('stitch_burst.main')
     error_channel = journal.error('stitch_burst.main')
@@ -92,7 +97,12 @@ def main(burst_dir, outdir, scratchdir, margin, burst_id_list):
     # contains only the burst IDs to stitch
     if burst_id_list is not None:
         metadata_dataframe = prune_dataframe(metadata_dataframe,
-                                    'burst_id', burst_id_list)
+                                             'burst_id', burst_id_list)
+
+    # Prune dataframe for user-selected polarizations
+    if pol is not None:
+        metadata_dataframe = prune_dataframe(metadata_dataframe,
+                                             'polarization', pol)
 
     # Identify common burst IDs among different dates
     ids2stitch = get_common_burst_ids(metadata_dataframe)
@@ -106,7 +116,7 @@ def main(burst_dir, outdir, scratchdir, margin, burst_id_list):
 
     # For each burst ID, get common bursts boundary and store it
     # as a shapefile to be used by gdalwarp (later for cutting)
-    for burst_id in list(set(metadata_dataframe['burst_id'])):
+    for burst_id in set(metadata_dataframe['burst_id']):
         # Get info on polygons, epsg, granule
         polys = metadata_dataframe.polygon[metadata_dataframe.burst_id == burst_id].tolist()
         epsgs = metadata_dataframe.epsg[metadata_dataframe.burst_id == burst_id].tolist()
@@ -281,7 +291,7 @@ def get_stitching_dict(burst_dir):
     '''
     # Create dictionary where to store results
     cfg = {'burst_id': [], 'granule_id': [], 'polygon': [],
-           'date': [], 'epsg': []}
+           'date': [], 'epsg': [], 'polarization': []}
     # Get list of directory under dir_list
     dir_list = os.listdir(burst_dir)
     for dir in dir_list:
@@ -293,9 +303,10 @@ def get_stitching_dict(burst_dir):
 
             # Read info and store in dictionary
             cfg['burst_id'].append(metadata_dict['burst_id'])
+            cfg['polarization'].append(metadata_dict['polarization'])
             datestr = metadata_dict['sensing_start']
             date = datetime.fromisoformat(datestr).strftime("%Y%m%d")
-            filename = f"{metadata_dict['burst_id']}_{date}_VV.slc"
+            filename = f"{metadata_dict['burst_id']}_{date}_{metadata_dict['polarization']}.slc"
             cfg['granule_id'].append(f'{burst_dir}/{dir}/{filename}')
             poly = metadata_dict['border']
             cfg['polygon'].append(shapely.wkt.loads(poly))
@@ -362,4 +373,5 @@ def get_common_burst_ids(data):
     opts = command_line_parser()
     # Give these arguments to the main code
     main(opts.burst_dir, opts.outdir,
-         opts.scratch, opts.margin, opts.burst_id_list)
+         opts.scratch, opts.margin, opts.burst_id_list,
+         opts.pol)

From 6bce434441656ffd8ab7eb6f1c4630f93e7051bb Mon Sep 17 00:00:00 2001
From: Scott Staniewicz <scott.j.staniewicz@jpl.nasa.gov>
Date: Tue, 2 Aug 2022 08:54:17 -0700
Subject: [PATCH 15/42] clean up argparser for stitch_burst

---
 src/compass/utils/stitching/stitch_burst.py | 26 ++++++++-------------
 1 file changed, 10 insertions(+), 16 deletions(-)

diff --git a/src/compass/utils/stitching/stitch_burst.py b/src/compass/utils/stitching/stitch_burst.py
index 9ba0add1..c75f3b31 100644
--- a/src/compass/utils/stitching/stitch_burst.py
+++ b/src/compass/utils/stitching/stitch_burst.py
@@ -19,28 +19,22 @@ def command_line_parser():
     Command line parser
     """
 
-    parser = argparse.ArgumentParser(description="""
-                                     Stitch S1-A/B bursts for stack processing""",
+    parser = argparse.ArgumentParser(description="Stitch S1-A/B bursts for stack processing",
                                      formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-    parser.add_argument('-d', '--burst-dir', type=str, action='store', dest='burst_dir',
+    parser.add_argument('-d', '--burst-dir', type=str, action='store',
                         help='Directory with S1-A/B bursts organized by dates')
     parser.add_argument('-b', '--burst-id-list', type=str, nargs='+',
                         default=None, dest='burst_id_list',
-                        help='List of burst IDs to stitch. If None, common bursts'
-                             'among all dates will be stitched (default: None')
+                        help='List of burst IDs to stitch. If None, common bursts '
+                             'among all dates will be stitched.')
     parser.add_argument('-p', '--pol', type=str, nargs='+', default='VV', dest='pol',
-                        help='Polarization to process one or many between HH, HV, VH, VV'
-                             '(default: VV)')
-    parser.add_argument('-m', '--margin', type=float,
-                        default=100, dest='margin',
-                        help='Margin to apply during stitching. Same units as bursts coordinate system.'
-                             '(default: 100 m, UTM)')
-    parser.add_argument('-s', '--scratchdir', type=str, default='scratch',
-                        dest='scratch',
-                        help='Directory where to store temporary results (default: scratch)')
+                        help='Polarization to process one or many between HH, HV, VH, VV')
+    parser.add_argument('-m', '--margin', type=float, default=100, dest='margin',
+                        help='Margin to apply during stitching. Same units as bursts coordinate system.')
+    parser.add_argument('-s', '--scratchdir', type=str, default='scratch', dest='scratch',
+                        help='Directory where to store temporary results.')
     parser.add_argument('-o', '--outdir', type=str, default='outdir',
-                        dest='outdir',
-                        help='Directory path where to store stitched bursts (default: outdir)')
+                        help='Directory path where to store stitched bursts.')
     return parser.parse_args()
 
 

From 0d757f0c322b160209881e760576438637a8923f Mon Sep 17 00:00:00 2001
From: Scott Staniewicz <scott.j.staniewicz@jpl.nasa.gov>
Date: Tue, 2 Aug 2022 09:08:41 -0700
Subject: [PATCH 16/42] remove scratch directory after finishing stitch

cut_ files take up more space than merged

Conflicts:
	src/compass/utils/stitching/stitch_burst.py
---
 src/compass/utils/stitching/stitch_burst.py | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/src/compass/utils/stitching/stitch_burst.py b/src/compass/utils/stitching/stitch_burst.py
index c75f3b31..566aab30 100644
--- a/src/compass/utils/stitching/stitch_burst.py
+++ b/src/compass/utils/stitching/stitch_burst.py
@@ -2,6 +2,7 @@
 import glob
 import json
 import os
+import shutil
 import time
 from datetime import datetime
 
@@ -9,10 +10,11 @@
 import journal
 import pandas as pd
 import shapely.wkt
-from compass.utils import helpers
 from osgeo import gdal, ogr
 from shapely.geometry import Polygon
 
+from compass.utils import helpers
+
 
 def command_line_parser():
     """
@@ -21,15 +23,14 @@ def command_line_parser():
 
     parser = argparse.ArgumentParser(description="Stitch S1-A/B bursts for stack processing",
                                      formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-    parser.add_argument('-d', '--burst-dir', type=str, action='store',
+    parser.add_argument('-d', '--burst-dir', type=str,
                         help='Directory with S1-A/B bursts organized by dates')
-    parser.add_argument('-b', '--burst-id-list', type=str, nargs='+',
-                        default=None, dest='burst_id_list',
+    parser.add_argument('-b', '--burst-id-list', type=str, nargs='+', default=None,
                         help='List of burst IDs to stitch. If None, common bursts '
                              'among all dates will be stitched.')
-    parser.add_argument('-p', '--pol', type=str, nargs='+', default='VV', dest='pol',
+    parser.add_argument('-p', '--pol', type=str, nargs='+', default='VV',
                         help='Polarization to process one or many between HH, HV, VH, VV')
-    parser.add_argument('-m', '--margin', type=float, default=100, dest='margin',
+    parser.add_argument('-m', '--margin', type=float, default=100,
                         help='Margin to apply during stitching. Same units as bursts coordinate system.')
     parser.add_argument('-s', '--scratchdir', type=str, default='scratch', dest='scratch',
                         help='Directory where to store temporary results.')
@@ -157,6 +158,8 @@ def main(burst_dir, outdir, scratchdir, margin, burst_id_list, pol):
     # Save data dictionary to keep trace of what has been merged
     metadata_dataframe.to_csv(f'{outdir}/merged_report.csv')
 
+    # Get rid of scratch directory
+    shutil.rmtree(scratchdir)
     # Log elapsed time for stitching
     dt = time.time() - t_start
     info_channel.log(f'Successfully run stitching in {dt:.3f} seconds')

From 44fa8c92a64b22188e3788b31ae9fc2c2d019277 Mon Sep 17 00:00:00 2001
From: Scott Staniewicz <scott.j.staniewicz@jpl.nasa.gov>
Date: Tue, 2 Aug 2022 09:09:03 -0700
Subject: [PATCH 17/42] name stitched files with .slc extension

---
 src/compass/utils/stitching/stitch_burst.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/compass/utils/stitching/stitch_burst.py b/src/compass/utils/stitching/stitch_burst.py
index 566aab30..8dc81039 100644
--- a/src/compass/utils/stitching/stitch_burst.py
+++ b/src/compass/utils/stitching/stitch_burst.py
@@ -146,7 +146,7 @@ def main(burst_dir, outdir, scratchdir, margin, burst_id_list, pol):
     for date in unique_dates:
         cut_rasters = metadata_dataframe.cut_granule_id[metadata_dataframe.date == date].tolist()
         xres, yres, epsg = get_raster_info(cut_rasters[0])
-        outfilename = f'{outdir}/stitched_{date}'
+        outfilename = f'{outdir}/stitched_{date}.slc'
         opts = gdal.WarpOptions(format='ENVI', xRes=xres,
                                 yRes=yres, dstNodata='nan',
                                 multithread=True, warpMemoryLimit=3000,

From 31d4426b9c08281fa90e020a9b96da1dc01effc2 Mon Sep 17 00:00:00 2001
From: vbrancat <virginia.brancato@jpl.nasa.gov>
Date: Tue, 27 Sep 2022 10:02:38 -0700
Subject: [PATCH 18/42] Change the directory structure of stitching code

---
 src/compass/utils/stitching/stitch_burst.py | 30 ++++++++++-----------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/src/compass/utils/stitching/stitch_burst.py b/src/compass/utils/stitching/stitch_burst.py
index 9ba0add1..5c41a062 100644
--- a/src/compass/utils/stitching/stitch_burst.py
+++ b/src/compass/utils/stitching/stitch_burst.py
@@ -23,7 +23,7 @@ def command_line_parser():
                                      Stitch S1-A/B bursts for stack processing""",
                                      formatter_class=argparse.ArgumentDefaultsHelpFormatter)
     parser.add_argument('-d', '--burst-dir', type=str, action='store', dest='burst_dir',
-                        help='Directory with S1-A/B bursts organized by dates')
+                        help='Directory with S1-A/B bursts organized by burst ID')
     parser.add_argument('-b', '--burst-id-list', type=str, nargs='+',
                         default=None, dest='burst_id_list',
                         help='List of burst IDs to stitch. If None, common bursts'
@@ -296,23 +296,23 @@ def get_stitching_dict(burst_dir):
     dir_list = os.listdir(burst_dir)
     for dir in dir_list:
         # List metadata files in the directory
-        json_meta_list = sorted(glob.glob(f'{burst_dir}/{dir}/*json'))
+        json_meta_list = sorted(glob.glob(f'{burst_dir}/{dir}/*/*json'))
         for json_path in json_meta_list:
-            with open(json_path) as json_file:
+             with open(json_path) as json_file:
                 metadata_dict = json.load(json_file)
 
-            # Read info and store in dictionary
-            cfg['burst_id'].append(metadata_dict['burst_id'])
-            cfg['polarization'].append(metadata_dict['polarization'])
-            datestr = metadata_dict['sensing_start']
-            date = datetime.fromisoformat(datestr).strftime("%Y%m%d")
-            filename = f"{metadata_dict['burst_id']}_{date}_{metadata_dict['polarization']}.slc"
-            cfg['granule_id'].append(f'{burst_dir}/{dir}/{filename}')
-            poly = metadata_dict['border']
-            cfg['polygon'].append(shapely.wkt.loads(poly))
-            cfg['date'].append(date)
-            geogrid = metadata_dict['geogrid']
-            cfg['epsg'].append(geogrid['epsg'])
+              # Read info and store in dictionary
+             cfg['burst_id'].append(metadata_dict['burst_id'])
+             cfg['polarization'].append(metadata_dict['polarization'])
+             datestr = metadata_dict['sensing_start']
+             date = datetime.fromisoformat(datestr).strftime("%Y%m%d")
+             filename = f"{metadata_dict['burst_id']}_{date}_{metadata_dict['polarization']}.slc"
+             cfg['granule_id'].append(f'{burst_dir}/{dir}/{date}/{filename}')
+             poly = metadata_dict['border']
+             cfg['polygon'].append(shapely.wkt.loads(poly))
+             cfg['date'].append(date)
+             geogrid = metadata_dict['geogrid']
+             cfg['epsg'].append(geogrid['epsg'])
 
     return pd.DataFrame(data=cfg)
 

From 8b42f13ac2b1a7dea456544b9cc5f03863547de4 Mon Sep 17 00:00:00 2001
From: Scott Staniewicz <scott.j.staniewicz@jpl.nasa.gov>
Date: Tue, 27 Sep 2022 13:55:48 -0700
Subject: [PATCH 19/42] adjustments for new autodownload s1reader

---
 src/compass/s1_geo_stack.py    | 16 ++++++++--------
 src/compass/utils/runconfig.py | 14 +++++++++-----
 2 files changed, 17 insertions(+), 13 deletions(-)
 mode change 100644 => 100755 src/compass/s1_geo_stack.py

diff --git a/src/compass/s1_geo_stack.py b/src/compass/s1_geo_stack.py
old mode 100644
new mode 100755
index 80b9502d..0e7578b6
--- a/src/compass/s1_geo_stack.py
+++ b/src/compass/s1_geo_stack.py
@@ -1,3 +1,4 @@
+#!/usr/bin/env python
 import argparse
 import cgi
 import datetime
@@ -13,7 +14,7 @@
 import pandas as pd
 import requests
 import yaml
-from s1reader.s1_orbit import get_orbit_file_from_list
+from s1reader.s1_orbit import get_orbit_file_from_dir
 from s1reader.s1_reader import load_bursts
 
 from compass.utils import helpers
@@ -234,8 +235,7 @@ def generate_burst_map(zip_files, orbit_dir, x_spac, y_spac, epsg=4326):
     i_subswath = [1, 2, 3]
 
     for zip_file in zip_files:
-        orbit_path = get_orbit_file_from_list(zip_file,
-                                              glob.glob(f'{orbit_dir}/S1*'))
+        orbit_path = get_orbit_file_from_dir(zip_file, orbit_dir, auto_download=True)
 
         for subswath in i_subswath:
             ref_bursts = load_bursts(zip_file, orbit_path, subswath)
@@ -501,8 +501,8 @@ def main(slc_dir, dem_file, burst_id, start_date=None, end_date=None,
             download_orbit(orbit_dir, orbit_dict['orbit_url'])
 
     # Generate burst map and prune it if a list of burst ID is provided
-    zip_file = sorted(glob.glob(f'{slc_dir}/S1*zip'))
-    burst_map = generate_burst_map(zip_file, orbit_dir, x_spac, y_spac, epsg)
+    zip_file_list = sorted(glob.glob(f'{slc_dir}/S1*zip'))
+    burst_map = generate_burst_map(zip_file_list, orbit_dir, x_spac, y_spac, epsg)
 
     # Identify burst IDs common across the stack and remove from the dataframe
     # burst IDs that are not in common
@@ -524,10 +524,10 @@ def main(slc_dir, dem_file, burst_id, start_date=None, end_date=None,
         burst_map = prune_dataframe(burst_map, 'date', exclude_dates)
 
     # Ready to geocode bursts
-    for safe in zip_file:
+    for safe in zip_file_list:
+        orbit_path = get_orbit_file_from_dir(safe, orbit_dir, auto_download=True)
+
         i_subswath = [1, 2, 3]
-        orbit_path = get_orbit_file_from_list(safe,
-                                              glob.glob(f'{orbit_dir}/S1*'))
         for subswath in i_subswath:
             bursts = load_bursts(safe, orbit_path, subswath)
             for burst in bursts:
diff --git a/src/compass/utils/runconfig.py b/src/compass/utils/runconfig.py
index 85189d5a..0affff32 100644
--- a/src/compass/utils/runconfig.py
+++ b/src/compass/utils/runconfig.py
@@ -13,7 +13,7 @@
 from compass.utils.radar_grid import file_to_rdr_grid
 from compass.utils.wrap_namespace import wrap_namespace, unwrap_to_dict
 from s1reader.s1_burst_slc import Sentinel1BurstSlc
-from s1reader.s1_orbit import get_orbit_file_from_list
+from s1reader.s1_orbit import get_orbit_file_from_dir
 from s1reader.s1_reader import load_bursts
 
 
@@ -140,7 +140,7 @@ def validate_group_dict(group_cfg: dict, workflow_name) -> None:
     helpers.check_write_dir(product_path_group['sas_output_file'])
 
 
-def runconfig_to_bursts(cfg: SimpleNamespace) -> list[Sentinel1BurstSlc]:
+def runconfig_to_bursts(cfg: SimpleNamespace, auto_download: bool = False) -> list[Sentinel1BurstSlc]:
     '''Return bursts based on parameters in given runconfig
 
     Parameters
@@ -161,9 +161,13 @@ def runconfig_to_bursts(cfg: SimpleNamespace) -> list[Sentinel1BurstSlc]:
     # extract given SAFE zips to find bursts identified in cfg.burst_id
     for safe_file in cfg.input_file_group.safe_file_path:
         # get orbit file
-        orbit_path = get_orbit_file_from_list(
-            safe_file,
-            cfg.input_file_group.orbit_file_path)
+        # orbit_path = get_orbit_file_from_dir(
+        #     safe_file,
+        #     cfg.input_file_group.orbit_file_path,
+        #     auto_download=auto_download
+        # )
+        # TODO: why do we need a list for "orbit_file_path"?
+        orbit_path = cfg.input_file_group.orbit_file_path[0]
 
         if not orbit_path:
             err_str = f"No orbit file correlates to safe file: {os.path.basename(safe_file)}"

From 3abd41c32a463267f1d3c7f2872156689e6e9c09 Mon Sep 17 00:00:00 2001
From: Scott Staniewicz <scott.j.staniewicz@jpl.nasa.gov>
Date: Tue, 27 Sep 2022 17:11:39 -0700
Subject: [PATCH 20/42] make stitch_burst executable

---
 src/compass/utils/stitching/stitch_burst.py | 1 +
 1 file changed, 1 insertion(+)
 mode change 100644 => 100755 src/compass/utils/stitching/stitch_burst.py

diff --git a/src/compass/utils/stitching/stitch_burst.py b/src/compass/utils/stitching/stitch_burst.py
old mode 100644
new mode 100755
index 1ed3f922..d3f350f1
--- a/src/compass/utils/stitching/stitch_burst.py
+++ b/src/compass/utils/stitching/stitch_burst.py
@@ -1,3 +1,4 @@
+#!/usr/bin/env python
 import argparse
 import glob
 import json

From e186efd7c09cf91e7ba8089eabef04a7da8eeea1 Mon Sep 17 00:00:00 2001
From: Scott Staniewicz <scott.j.staniewicz@jpl.nasa.gov>
Date: Mon, 10 Oct 2022 18:49:48 -0700
Subject: [PATCH 21/42] fix exclude_dates and date_str undefined bug

---
 src/compass/s1_geo_stack.py    | 42 ++++++++++++++++++++--------------
 src/compass/s1_geocode_slc.py  |  5 ++--
 src/compass/utils/geo_grid.py  |  2 +-
 src/compass/utils/runconfig.py |  2 +-
 4 files changed, 30 insertions(+), 21 deletions(-)

diff --git a/src/compass/s1_geo_stack.py b/src/compass/s1_geo_stack.py
index 0e7578b6..96034164 100755
--- a/src/compass/s1_geo_stack.py
+++ b/src/compass/s1_geo_stack.py
@@ -34,7 +34,8 @@
 
 def create_parser():
     parser = argparse.ArgumentParser(
-        description='S1-A/B geocoded CSLC stack processor.')
+        description='S1-A/B geocoded CSLC stack processor.',
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
     parser.add_argument('-s', '--slc-dir', dest='slc_dir', type=str,
                         required=True,
                         help='Directory containing the S1-A/B SLCs (zip files)')
@@ -56,8 +57,7 @@ def create_parser():
                         default=None,
                         help='List of burst IDs to process. If None, all the burst IDs in the'
                              'reference date are processed. (default: None)')
-    parser.add_argument('-exd', '--exclude-dates', dest='exclude_dates',
-                        nargs='+', type=int,
+    parser.add_argument('-exd', '--exclude-dates', dest='exclude_dates', nargs='+', 
                         help='Date to be excluded from stack processing (format: YYYYMMDD)')
     parser.add_argument('-p', '--pol', dest='pol', nargs='+', default='co-pol',
                         help='Polarization to process: dual-pol, co-pol, cross-pol (default: co-pol).')
@@ -246,7 +246,7 @@ def generate_burst_map(zip_files, orbit_dir, x_spac, y_spac, epsg=4326):
                     epsg = get_point_epsg(burst.center.y,
                                           burst.center.x)
 
-                    # Initialize geogrid with the info checked at this stage
+                # Initialize geogrid with the info checked at this stage
                 geogrid = isce3.product.bbox_to_geogrid(
                     burst.as_isce3_radargrid(),
                     burst.orbit,
@@ -267,34 +267,40 @@ def generate_burst_map(zip_files, orbit_dir, x_spac, y_spac, epsg=4326):
                     y_spac * np.floor(
                         (geogrid.start_y + y_spac * geogrid.length) / y_spac))
                 burst_map['epsg'].append(epsg)
-                burst_map['date'].append(
-                    int(burst.sensing_start.strftime("%Y%m%d")))
+                burst_map['date'].append(burst.sensing_start.strftime("%Y%m%d"))
 
     burst_map = pd.DataFrame(data=burst_map)
     return burst_map
 
 
-def prune_dataframe(data, id_col, id_list):
+def prune_dataframe(data, id_col, id_list, exclude_items=False):
     '''
     Prune dataframe based on column ID and list of value
     Parameters:
     ----------
     data: pandas.DataFrame
-       dataframe that needs to be pruned
+        dataframe that needs to be pruned
     id_col: str
-       column identification for 'data' (e.g. 'burst_id')
+        column identification for 'data' (e.g. 'burst_id')
     id_list: list
-       List of elements to keep after pruning. Elements not
-       in id_list but contained in 'data' will be pruned
+        List of elements to consider when pruning.
+        If exclude_items is False (default), then all elements in `data`
+            will be kept *except for* those in `id_list`.
+        If exclude_items is True, the items in `id_list` will be removed from `data`.
+    exclude_items: bool
+        If True, the items in `id_list` will be removed from `data`.
+        If False, all elements in `data` will be kept *except for* those in `id_list`.
     Returns:
     -------
     data: pandas.DataFrame
        Pruned dataframe with rows in 'id_list'
     '''
     pattern = '|'.join(id_list)
-    dataf = data.loc[data[id_col].str.contains(pattern,
-                                               case=False)]
-    return dataf
+    if not exclude_items:
+        df = data.loc[data[id_col].str.contains(pattern, case=False)]
+    else:
+        df = data.loc[~data[id_col].str.contains(pattern, case=False)]
+    return df
 
 
 def get_common_burst_ids(data):
@@ -436,6 +442,8 @@ def main(slc_dir, dem_file, burst_id, start_date=None, end_date=None,
         Date of the start acquisition of the stack (format: YYYYMMDD)
     end_date: int
         Date of the end acquistion of the stack (format: YYYYMMDD)
+    exclude_dates: list
+        List of dates to exclude from the stack (format: YYYYMMDD)
     orbit_dir: str
         Directory containing orbit files
     work_dir: str
@@ -521,7 +529,7 @@ def main(slc_dir, dem_file, burst_id, start_date=None, end_date=None,
 
     # Exclude some dates if the user requires it
     if exclude_dates is not None:
-        burst_map = prune_dataframe(burst_map, 'date', exclude_dates)
+        burst_map = prune_dataframe(burst_map, 'date', exclude_dates, exclude_items=True)
 
     # Ready to geocode bursts
     for safe in zip_file_list:
@@ -531,7 +539,8 @@ def main(slc_dir, dem_file, burst_id, start_date=None, end_date=None,
         for subswath in i_subswath:
             bursts = load_bursts(safe, orbit_path, subswath)
             for burst in bursts:
-                date = int(burst.sensing_start.strftime("%Y%m%d"))
+                date = burst.sensing_start.strftime("%Y%m%d")
+                date_str = str(date)
                 if (burst.burst_id in list(set(burst_map['burst_id']))) and \
                         (date in list(set(burst_map['date']))):
                     runconfig_path = create_runconfig(burst, safe, orbit_path,
@@ -541,7 +550,6 @@ def main(slc_dir, dem_file, burst_id, start_date=None, end_date=None,
                                                       is_split_spectrum,
                                                       low_band, high_band, pol,
                                                       x_spac, y_spac)
-                    date_str = str(date)
                     runfile_name = f'{run_dir}/run_{date_str}_{burst.burst_id}.sh'
                     with open(runfile_name, 'w') as rsh:
                         path = os.path.dirname(os.path.realpath(__file__))
diff --git a/src/compass/s1_geocode_slc.py b/src/compass/s1_geocode_slc.py
index 4acb47b9..0a19e3c3 100755
--- a/src/compass/s1_geocode_slc.py
+++ b/src/compass/s1_geocode_slc.py
@@ -49,6 +49,7 @@ def run(cfg):
 
     # process one burst only
     burst = cfg.bursts[0]
+    info_channel.log(f"Running on {burst}")
     date_str = burst.sensing_start.strftime("%Y%m%d")
     burst_id = burst.burst_id
     pol = burst.polarization
@@ -77,8 +78,9 @@ def run(cfg):
         rdr_burst_raster = isce3.io.Raster(temp_slc_path)
 
     # Generate output geocoded burst raster
+    output_name = f'{cfg.output_dir}/{burst_id}_{date_str}_{pol}.slc'
     geo_burst_raster = isce3.io.Raster(
-        f'{cfg.output_dir}/{burst_id}_{date_str}_{pol}.slc',
+        output_name,
         geo_grid.width, geo_grid.length,
         rdr_burst_raster.num_bands, gdal.GDT_CFloat32,
         cfg.geocoding_params.output_format)
@@ -89,7 +91,6 @@ def run(cfg):
 
     # Create sliced radar grid representing valid region of the burst
     sliced_radar_grid = burst.as_isce3_radargrid()[b_bounds]
-
     # Geocode
     isce3.geocode.geocode_slc(geo_burst_raster, rdr_burst_raster,
                               dem_raster,
diff --git a/src/compass/utils/geo_grid.py b/src/compass/utils/geo_grid.py
index ba0fb99e..7723aa5b 100644
--- a/src/compass/utils/geo_grid.py
+++ b/src/compass/utils/geo_grid.py
@@ -284,7 +284,7 @@ def get_point_epsg(lat, lon):
 def generate_geogrids(bursts, geo_dict, dem):
     dem_raster = isce3.io.Raster(dem)
 
-    # Unpack values from geocoding disctionary
+    # Unpack values from geocoding dictionary
     epsg_dict = geo_dict['output_epsg']
     x_start_dict = geo_dict['top_left']['x']
     y_start_dict = geo_dict['top_left']['y']
diff --git a/src/compass/utils/runconfig.py b/src/compass/utils/runconfig.py
index 0affff32..8b7abfc3 100644
--- a/src/compass/utils/runconfig.py
+++ b/src/compass/utils/runconfig.py
@@ -50,7 +50,7 @@ def load_validate_yaml(yaml_path: str, workflow_name: str) -> dict:
             error_channel.log(err_str)
             raise yamale.YamaleError(err_str) from yamale_err
     else:
-        raise FileNotFoundError
+        raise FileNotFoundError(f'Yaml file {yaml_path} not found.')
 
     # validate yaml file taken from command line
     try:

From 232a87c7e9527b43c4e5734585b8df92f96d67f1 Mon Sep 17 00:00:00 2001
From: Scott Staniewicz <scott.j.staniewicz@jpl.nasa.gov>
Date: Mon, 10 Oct 2022 18:52:12 -0700
Subject: [PATCH 22/42] remove `dest`s which are same as default dest

---
 src/compass/s1_geo_stack.py | 34 +++++++++++++---------------------
 1 file changed, 13 insertions(+), 21 deletions(-)

diff --git a/src/compass/s1_geo_stack.py b/src/compass/s1_geo_stack.py
index 96034164..f4360312 100755
--- a/src/compass/s1_geo_stack.py
+++ b/src/compass/s1_geo_stack.py
@@ -36,48 +36,40 @@ def create_parser():
     parser = argparse.ArgumentParser(
         description='S1-A/B geocoded CSLC stack processor.',
         formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-    parser.add_argument('-s', '--slc-dir', dest='slc_dir', type=str,
-                        required=True,
+    parser.add_argument('-s', '--slc-dir', type=str, required=True,
                         help='Directory containing the S1-A/B SLCs (zip files)')
-    parser.add_argument('-d', '--dem-file', dest='dem_file', type=str,
-                        required=True,
+    parser.add_argument('-d', '--dem-file', type=str, required=True,
                         help='File path to DEM to use for processing.')
-    parser.add_argument('-o', '--orbit-dir', dest='orbit_dir', type=str,
-                        default=None,
+    parser.add_argument('-o', '--orbit-dir', type=str, default=None,
                         help='Directory with orbit files. If None, downloads orbit files')
     parser.add_argument('-w', '--working-dir', dest='work_dir', type=str,
                         default='stack',
                         help='Directory to store intermediate and final results')
-    parser.add_argument('-sd', '--start-date', dest='start_date', type=int,
-                        default=None,
+    parser.add_argument('-sd', '--start-date', type=int, default=None,
                         help='Start date of the stack to process')
-    parser.add_argument('-ed', '--end-date', dest='end_date', type=int,
+    parser.add_argument('-ed', '--end-date', type=int,
                         help='End date of the stack to process')
-    parser.add_argument('-b', '--burst-id', dest='burst_id', nargs='+',
-                        default=None,
+    parser.add_argument('-b', '--burst-id', nargs='+', default=None,
                         help='List of burst IDs to process. If None, all the burst IDs in the'
                              'reference date are processed. (default: None)')
-    parser.add_argument('-exd', '--exclude-dates', dest='exclude_dates', nargs='+', 
+    parser.add_argument('-exd', '--exclude-dates', nargs='+', 
                         help='Date to be excluded from stack processing (format: YYYYMMDD)')
     parser.add_argument('-p', '--pol', dest='pol', nargs='+', default='co-pol',
                         help='Polarization to process: dual-pol, co-pol, cross-pol (default: co-pol).')
-    parser.add_argument('-x', '--x-spac', dest='x_spac', type=float, default=5,
+    parser.add_argument('-x', '--x-spac', type=float, default=5,
                         help='Spacing in meters of geocoded CSLC along X-direction.')
-    parser.add_argument('-y', '--y-spac', dest='y_spac', type=float, default=10,
+    parser.add_argument('-y', '--y-spac', type=float, default=10,
                         help='Spacing in meters of geocoded CSLC along Y-direction.')
-    parser.add_argument('-e', '--epsg', dest='epsg', type=int, default=None,
+    parser.add_argument('-e', '--epsg', type=int, default=None,
                         help='EPSG projection code for output geocoded bursts')
-    parser.add_argument('-f', '--flatten', dest='flatten', type=bool,
-                        default=True,
+    parser.add_argument('-f', '--flatten', type=bool, default=True,
                         help='If True, enables flattening (default: True)')
     parser.add_argument('-ss', '--range-split-spectrum',
                         dest='is_split_spectrum', type=bool, default=False,
                         help='If True, enables split-spectrum (default: False)')
-    parser.add_argument('-lb', '--low-band', dest='low_band', type=float,
-                        default=0.0,
+    parser.add_argument('-lb', '--low-band', type=float, default=0.0,
                         help='Low sub-band bandwidth in Hz (default: 0.0)')
-    parser.add_argument('-hb', '--high-band', dest='high_band', type=float,
-                        default=0.0,
+    parser.add_argument('-hb', '--high-band', type=float, default=0.0,
                         help='High sub-band bandwidth in Hz (default: 0.0')
     return parser.parse_args()
 

From 698acbe29446272f05c3bed2d0648c00d21cd063 Mon Sep 17 00:00:00 2001
From: Scott Staniewicz <scott.j.staniewicz@jpl.nasa.gov>
Date: Mon, 10 Oct 2022 18:56:38 -0700
Subject: [PATCH 23/42] remove stitch from this branch

---
 src/compass/utils/stitching/stitch_burst.py | 380 --------------------
 1 file changed, 380 deletions(-)
 delete mode 100755 src/compass/utils/stitching/stitch_burst.py

diff --git a/src/compass/utils/stitching/stitch_burst.py b/src/compass/utils/stitching/stitch_burst.py
deleted file mode 100755
index d3f350f1..00000000
--- a/src/compass/utils/stitching/stitch_burst.py
+++ /dev/null
@@ -1,380 +0,0 @@
-#!/usr/bin/env python
-import argparse
-import glob
-import json
-import os
-import shutil
-import time
-from datetime import datetime
-
-import isce3
-import journal
-import pandas as pd
-import shapely.wkt
-from osgeo import gdal, ogr
-from shapely.geometry import Polygon
-
-from compass.utils import helpers
-
-
-def command_line_parser():
-    """
-    Command line parser
-    """
-
-    parser = argparse.ArgumentParser(description="Stitch S1-A/B bursts for stack processing",
-                                     formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-    parser.add_argument('-d', '--burst-dir', type=str, action='store', dest='burst_dir',
-                        help='Directory with S1-A/B bursts organized by burst ID')
-    parser.add_argument('-b', '--burst-id-list', type=str, nargs='+',
-                        default=None, dest='burst_id_list',
-                        help='List of burst IDs to stitch. If None, common bursts'
-                             'among all dates will be stitched (default: None')
-    parser.add_argument('-p', '--pol', type=str, nargs='+', default='VV', dest='pol',
-                        help='Polarization to process one or many between HH, HV, VH, VV'
-                             '(default: VV)')
-    parser.add_argument('-m', '--margin', type=float,
-                        default=100, dest='margin',
-                        help='Margin to apply during stitching. Same units as bursts coordinate system.'
-                             '(default: 100 m, UTM)')
-    parser.add_argument('-s', '--scratchdir', type=str, default='scratch',
-                        dest='scratch',
-                        help='Directory where to store temporary results (default: scratch)')
-    parser.add_argument('-o', '--outdir', type=str, default='outdir',
-                        help='Directory path where to store stitched bursts.')
-    return parser.parse_args()
-
-
-def main(burst_dir, outdir, scratchdir, margin, burst_id_list, pol):
-    '''
-    Stitch S1-A/B bursts for stack processing
-
-    Parameters:
-    ----------
-    burst_dir: str
-        File path to directory containing S1-A/B bursts
-        organized by date
-    outdir: str
-        File path to directory where to store stitched bursts
-    scratchdir: str
-        File path to directory where to store intermediate
-        results (e.g., shapefiles of burst boundary)
-    margin: float
-        Margin to apply to burst boundary while stitching.
-        Same units as bursts coordinate system
-    burst_id_list: list [str]
-        List of burst IDs to stitch. If not provided, common
-        bursts among all dates are identified and considered
-        for stitching
-    pol: list [str]
-        Polarization to process. One or many among HH, HV, VH, VV.
-    '''
-    info_channel = journal.info('stitch_burst.main')
-    error_channel = journal.error('stitch_burst.main')
-
-    # Start tracking time
-    info_channel.log('Start burst stitching')
-    t_start = time.time()
-
-    # Check that input directory is valid
-    if not os.path.isdir(burst_dir):
-        err_str = f'{burst_dir} is not a valid input directory'
-        error_channel.log(err_str)
-        raise ValueError(err_str)
-
-    # Create output and scratch dirs if not existing
-    if not os.path.exists(outdir):
-        os.makedirs(outdir, exist_ok=True)
-        helpers.check_write_dir(outdir)
-    if not os.path.exists(scratchdir):
-        os.makedirs(scratchdir, exist_ok=True)
-        helpers.check_write_dir(scratchdir)
-
-    # Collect info for stitching in all dirs in 'burst_dir'
-    # and return a panda dataframe with info
-    metadata_dataframe = get_stitching_dict(burst_dir)
-
-    # If stitching some bursts, prune dataframe to
-    # contains only the burst IDs to stitch
-    if burst_id_list is not None:
-        metadata_dataframe = prune_dataframe(metadata_dataframe,
-                                             'burst_id', burst_id_list)
-
-    # Prune dataframe for user-selected polarizations
-    if pol is not None:
-        metadata_dataframe = prune_dataframe(metadata_dataframe,
-                                             'polarization', pol)
-
-    # Identify common burst IDs among different dates
-    ids2stitch = get_common_burst_ids(metadata_dataframe)
-
-    # Prune dataframe to contain only the IDs to stitch
-    metadata_dataframe = prune_dataframe(metadata_dataframe,
-                                'burst_id', ids2stitch)
-
-    # Track cut bursts by adding new column to dataframe
-    metadata_dataframe["cut_granule_id"] = ""
-
-    # For each burst ID, get common bursts boundary and store it
-    # as a shapefile to be used by gdalwarp (later for cutting)
-    for burst_id in set(metadata_dataframe['burst_id']):
-        # Get info on polygons, epsg, granule
-        polys = metadata_dataframe.polygon[metadata_dataframe.burst_id == burst_id].tolist()
-        epsgs = metadata_dataframe.epsg[metadata_dataframe.burst_id == burst_id].tolist()
-        granules = metadata_dataframe.granule_id[metadata_dataframe.burst_id == burst_id].tolist()
-
-        # Get common burst boundary and save it as shapefile
-        common_poly, epsg = intersect_polygons(polys, epsgs, margin)
-        shp_filename = f'{scratchdir}/shp_{burst_id}.shp'
-        save_as_shapefile(common_poly, epsg, shp_filename)
-
-        # Cut all the same ID burts with shapefile
-        for granule in granules:
-            # Get raster resolution
-            xres, yres, epsg = get_raster_info(granule)
-
-            filename = os.path.splitext(os.path.basename(granule))[0]
-            cut_filename = f'{scratchdir}/cut_{filename}'
-            opts = gdal.WarpOptions(format='ENVI', xRes=xres,
-                                    yRes=yres, dstNodata='nan',
-                                    cutlineDSName=shp_filename,
-                                    multithread=True, warpMemoryLimit=3000,
-                                    srcSRS=f'EPSG:{epsg}',
-                                    dstSRS=f'EPSG:{epsg}',
-                                    targetAlignedPixels=True)
-            gdal.Warp(cut_filename, granule, options=opts)
-            # Save location of cut burst IDs (later for stitching)
-            metadata_dataframe.loc[metadata_dataframe['granule_id'] == granule,
-                          'cut_granule_id'] = cut_filename
-
-    # Start stitching by date
-    unique_dates = list(set(metadata_dataframe['date']))
-    for date in unique_dates:
-        cut_rasters = metadata_dataframe.cut_granule_id[metadata_dataframe.date == date].tolist()
-        xres, yres, epsg = get_raster_info(cut_rasters[0])
-        outfilename = f'{outdir}/stitched_{date}.slc'
-        opts = gdal.WarpOptions(format='ENVI', xRes=xres,
-                                yRes=yres, dstNodata='nan',
-                                multithread=True, warpMemoryLimit=3000,
-                                srcSRS=f'EPSG:{epsg}',
-                                dstSRS=f'EPSG:{epsg}',
-                                targetAlignedPixels=True)
-        gdal.Warp(outfilename, cut_rasters, options=opts)
-
-    # Save data dictionary to keep trace of what has been merged
-    metadata_dataframe.to_csv(f'{outdir}/merged_report.csv')
-
-    # Get rid of scratch directory
-    shutil.rmtree(scratchdir)
-    # Log elapsed time for stitching
-    dt = time.time() - t_start
-    info_channel.log(f'Successfully run stitching in {dt:.3f} seconds')
-
-
-def get_raster_info(filename):
-    '''
-    Get raster X and Y resolution and epsg
-
-    Parameters:
-    -----------
-    filename: str
-       Filepath where raster is stored
-
-    Returns:
-    -------
-    xres, yres, epsg: (tuple, float)
-       Raster resolution along X and Y directions and epsg
-    '''
-    raster = isce3.io.Raster(filename)
-    return raster.dx, raster.dy, raster.get_epsg()
-
-
-def save_as_shapefile(polygon, epsg, outfile):
-    '''
-    Save polygon as an ESRI shapefile
-
-    Parameters:
-    ----------
-    polygon: shapely.geometry.Polygon
-       Shapely polygon identify burst boundary on the ground
-    epsg: int
-       EPSG code associate to 'polygon' coordinate system
-    outfile: str
-       File path to store create ESRI shapefile
-
-    '''
-    dest_srs = ogr.osr.SpatialReference()
-    dest_srs.ImportFromEPSG(int(epsg))
-    driver = ogr.GetDriverByName('ESRI Shapefile')
-    ds = driver.CreateDataSource(outfile)
-    layer = ds.CreateLayer('', None, ogr.wkbPolygon)
-
-    # Add attribute and create new feature
-    layer.CreateField(ogr.FieldDefn('id', ogr.OFTInteger))
-    defn = layer.GetLayerDefn()
-    feat = ogr.Feature(defn)
-    feat.SetField('id', 123)
-
-    # Make a geometry, from Shapely object
-    geom = ogr.CreateGeometryFromWkb(polygon.wkb)
-    geom.AssignSpatialReference(dest_srs)
-    feat.SetGeometry(geom)
-    layer.CreateFeature(feat)
-
-    # Clean up
-    feat = geom = None
-    ds = layer = feat = geom = None
-
-
-def intersect_polygons(polygons, epsgs, margin):
-    '''
-    Get the intersection of polygons stored in 'polygons'
-
-    Parameters:
-    -----------
-    polygons: list
-       List of shapely polygons to intersect
-    epsgs: list
-       List of EPSGs associate to 'polygons'
-
-    Returns:
-    -------
-    poly_int: shapely.Polygon
-       Result of polygon intersection
-    epsg_int: int
-       EPSG code associated to poly
-    '''
-    # Assert validity of inputs
-    assert (len(polygons) == len(epsgs))
-
-    # Initialize polygon and epsg of intersection
-    poly_int = polygons[0]
-    epsg_int = epsgs[0]
-
-    # Initialize coordinate transformation in case
-    # there are polygons with different epsgs
-    llh = ogr.osr.SpatialReference()
-    llh.ImportFromEPSG(epsg_int)
-    tgt = ogr.osr.SpatialReference()
-
-    for poly, epsg in zip(polygons, epsgs):
-        if epsg != epsg_int:
-            # Transform polygons in same coord system as epsg_int
-            tgt_x, tgt_y = [], []
-            x, y = poly.exterior.coords.xy
-            tgt.ImportFromEPSG(epsg)
-            trans = ogr.osr.CoordinateTransformation(llh, tgt)
-            for lx, ly in zip(x, y):
-                dummy_x, dummy_y, dummy_z = trans.Transform(lx, ly, 0)
-                tgt_x.append(dummy_x)
-                tgt_y.append(dummy_y)
-            poly = Polygon(list(zip(tgt_x, tgt_y)))
-        poly_int = poly.intersection(poly_int)
-
-    # To be conservative, apply some margin to the polygon (TO DO:
-    # check eps)
-    poly_int = poly_int.buffer(-margin)
-    return poly_int, epsg_int
-
-
-def get_stitching_dict(burst_dir):
-    '''
-    Collect info on bursts to stitch and store them
-    in a panda dataframe
-
-    Parameters:
-    ----------
-    burst_dir: str
-       Directory where bursts are stored (organized by date)
-
-    Returns:
-    -------
-    cfg: pandas.DataFrame
-       Dataframe collecting info on bursts to stitch
-    '''
-    # Create dictionary where to store results
-    cfg = {'burst_id': [], 'granule_id': [], 'polygon': [],
-           'date': [], 'epsg': [], 'polarization': []}
-    # Get list of directory under dir_list
-    dir_list = os.listdir(burst_dir)
-    for dir in dir_list:
-        # List metadata files in the directory
-        json_meta_list = sorted(glob.glob(f'{burst_dir}/{dir}/*/*json'))
-        for json_path in json_meta_list:
-             with open(json_path) as json_file:
-                metadata_dict = json.load(json_file)
-
-              # Read info and store in dictionary
-             cfg['burst_id'].append(metadata_dict['burst_id'])
-             cfg['polarization'].append(metadata_dict['polarization'])
-             datestr = metadata_dict['sensing_start']
-             date = datetime.fromisoformat(datestr).strftime("%Y%m%d")
-             filename = f"{metadata_dict['burst_id']}_{date}_{metadata_dict['polarization']}.slc"
-             cfg['granule_id'].append(f'{burst_dir}/{dir}/{date}/{filename}')
-             poly = metadata_dict['border']
-             cfg['polygon'].append(shapely.wkt.loads(poly))
-             cfg['date'].append(date)
-             geogrid = metadata_dict['geogrid']
-             cfg['epsg'].append(geogrid['epsg'])
-
-    return pd.DataFrame(data=cfg)
-
-
-def prune_dataframe(data, id_col, id_list):
-    '''
-    Prune dataframe based on column ID and list of value
-
-    Parameters:
-    ----------
-    data: pandas.DataFrame
-       dataframe that needs to be pruned
-    id_col: str
-       column identification for 'data' (e.g. 'burst_id')
-    id_list: list
-       List of elements to keep after pruning. Elements not
-       in id_list but contained in 'data' will be pruned
-
-    Returns:
-    -------
-    data: pandas.DataFrame
-       Pruned dataframe with rows in 'id_list'
-    '''
-    pattern = '|'.join(id_list)
-    dataf = data.loc[data[id_col].str.contains(pattern,
-                                               case=False)]
-    return dataf
-
-
-def get_common_burst_ids(data):
-    '''
-    Get list of burst IDs common among all processed dates
-
-    Parameters:
-    ----------
-    data: pandas.DataFrame
-      Dataframe containing info for stitching (e.g. burst IDs)
-
-    Returns:
-    -------
-    common_id: list
-      List containing common burst IDs among all the dates
-    '''
-    # Identify all the dates for the bursts to stitch
-    unique_dates = list(set(data['date']))
-
-    # Initialize list of unique burst IDs
-    common_id = data.burst_id[data.date == unique_dates[0]]
-
-    for date in unique_dates:
-        ids = data.burst_id[data.date == date]
-        common_id = sorted(list(set(ids.tolist()) & set(common_id)))
-    return common_id
-
-
-if __name__ == '__main__':
-    # Get command line arguments
-    opts = command_line_parser()
-    # Give these arguments to the main code
-    main(opts.burst_dir, opts.outdir,
-         opts.scratch, opts.margin, opts.burst_id_list,
-         opts.pol)

From 8be146497fb95074f9d258ba7eb42154a4242d52 Mon Sep 17 00:00:00 2001
From: Scott Staniewicz <scott.j.staniewicz@jpl.nasa.gov>
Date: Mon, 17 Oct 2022 11:54:24 -0700
Subject: [PATCH 24/42] remove orbit download functions in favor of s1reader

The functions deleted are all contained in `get_orbit_file_from_dir`,
which includes the `auto_download` option now.
---
 src/compass/s1_geo_stack.py | 173 ++----------------------------------
 1 file changed, 9 insertions(+), 164 deletions(-)

diff --git a/src/compass/s1_geo_stack.py b/src/compass/s1_geo_stack.py
index f4360312..0df6526e 100755
--- a/src/compass/s1_geo_stack.py
+++ b/src/compass/s1_geo_stack.py
@@ -1,18 +1,13 @@
 #!/usr/bin/env python
 import argparse
-import cgi
-import datetime
 import glob
 import os
-import sys
 import time
-from xml.etree import ElementTree
 
 import isce3
 import journal
 import numpy as np
 import pandas as pd
-import requests
 import yaml
 from s1reader.s1_orbit import get_orbit_file_from_dir
 from s1reader.s1_reader import load_bursts
@@ -21,17 +16,6 @@
 from compass.utils.geo_grid import get_point_epsg
 
 
-# Required for orbit download
-scihub_url = 'https://scihub.copernicus.eu/gnss/odata/v1/Products'
-# Namespaces of the XML file returned by the S1 query. Will they change it?
-w3_url = '{http://www.w3.org/2005/Atom}'
-m_url = '{http://schemas.microsoft.com/ado/2007/08/dataservices/metadata}'
-d_url = '{http://schemas.microsoft.com/ado/2007/08/dataservices}'
-# Scihub guest credential
-scihub_user = 'gnssguest'
-scihub_password = 'gnssguest'
-
-
 def create_parser():
     parser = argparse.ArgumentParser(
         description='S1-A/B geocoded CSLC stack processor.',
@@ -50,12 +34,13 @@ def create_parser():
     parser.add_argument('-ed', '--end-date', type=int,
                         help='End date of the stack to process')
     parser.add_argument('-b', '--burst-id', nargs='+', default=None,
-                        help='List of burst IDs to process. If None, all the burst IDs in the'
-                             'reference date are processed. (default: None)')
-    parser.add_argument('-exd', '--exclude-dates', nargs='+', 
+                        help='List of burst IDs to process. If None, all the burst IDs '
+                             'in the reference date are processed. (default: None)')
+    parser.add_argument('-exd', '--exclude-dates', nargs='+',
                         help='Date to be excluded from stack processing (format: YYYYMMDD)')
     parser.add_argument('-p', '--pol', dest='pol', nargs='+', default='co-pol',
-                        help='Polarization to process: dual-pol, co-pol, cross-pol (default: co-pol).')
+                        help='Polarization to process: dual-pol, co-pol, cross-pol '
+                             ' (default: co-pol).')
     parser.add_argument('-x', '--x-spac', type=float, default=5,
                         help='Spacing in meters of geocoded CSLC along X-direction.')
     parser.add_argument('-y', '--y-spac', type=float, default=10,
@@ -74,130 +59,9 @@ def create_parser():
     return parser.parse_args()
 
 
-def check_internet_connection():
-    '''
-    Check connection availability
-    '''
-    url = "http://google.com"
-    try:
-        requests.get(url, timeout=10)
-    except (requests.ConnectionError, requests.Timeout) as exception:
-        raise sys.exit(exception)
-
-
-def parse_safe_filename(safe_filename):
-    '''
-    Extract info from S1-A/B SAFE filename
-    SAFE filename structure: S1A_IW_SLC__1SDV_20150224T114043_20150224T114111_004764_005E86_AD02.SAFE
-
-    Parameters:
-    -----------
-    safe_filename: string
-       Path to S1-A/B SAFE file
-
-    Returns:
-    -------
-    List of [sensor_id, mode_id, start_datetime,
-                end_datetime, abs_orbit_num]
-       sensor_id: sensor identifier (S1A or S1B)
-       mode_id: mode/beam (e.g. IW)
-       start_datetime: acquisition start datetime
-       stop_datetime: acquisition stop datetime
-       abs_orbit_num: absolute orbit number
-    '''
-
-    safe_name = os.path.basename(safe_filename)
-    sensor_id = safe_name[2]
-    sensor_mode = safe_name[4:10]
-    start_datetime = datetime.datetime.strptime(safe_name[17:32],
-                                                '%Y%m%dT%H%M%S')
-    end_datetime = datetime.datetime.strptime(safe_name[33:48],
-                                              '%Y%m%dT%H%M%S')
-    abs_orb_num = int(safe_name[49:55])
-
-    return [sensor_id, sensor_mode, start_datetime, end_datetime, abs_orb_num]
-
-
-def get_orbit_dict(sensor_id, start_time, end_time, orbit_type):
-    '''
-    Query Copernicus GNSS API to find latest orbit file
-
-    Parameters:
-    ----------
-    sensor_id: str
-        Sentinel satellite identifier ('A' or 'B')
-    start_time: datetime object
-        Sentinel start acquisition time
-    end_time: datetime object
-        Sentinel end acquisition time
-    orbit_type: str
-        Type of orbit to download (AUX_POEORB: precise, AUX_RESORB: restituted)
-
-    Returns:
-    orbit_dict: dict
-        Python dictionary with [orbit_name, orbit_type, download_url]
-    '''
-    # Check if correct orbit_type
-    if orbit_type not in ['AUX_POEORB', 'AUX_RESORB']:
-        err_msg = f'{orbit_type} not a valid orbit type'
-        raise ValueError(err_msg)
-
-    # Add a 30 min margin to start_time and end_time
-    pad_start_time = start_time - datetime.timedelta(hours=0.5)
-    pad_end_time = end_time + datetime.timedelta(hours=0.5)
-    new_start_time = pad_start_time.strftime('%Y-%m-%dT%H:%M:%S')
-    new_end_time = pad_end_time.strftime('%Y-%m-%dT%H:%M:%S')
-    query_string = f"startswith(Name,'S1{sensor_id}') and substringof('{orbit_type}',Name) " \
-                   f"and ContentDate/Start lt datetime'{new_start_time}' and ContentDate/End gt datetime'{new_end_time}'"
-    query_params = {'$top': 1, '$orderby': 'ContentDate/Start asc',
-                    '$filter': query_string}
-    query_response = requests.get(url=scihub_url, params=query_params,
-                                  auth=(scihub_user, scihub_password))
-    # Parse XML tree from query response
-    xml_tree = ElementTree.fromstring(query_response.content)
-    # Extract w3.org URL
-    w3_url = xml_tree.tag.split('feed')[0]
-
-    # Extract orbit's name, id, url
-    orbit_id = xml_tree.findtext(
-        f'.//{w3_url}entry/{m_url}properties/{d_url}Id')
-    orbit_url = f"{scihub_url}('{orbit_id}')/$value"
-    orbit_name = xml_tree.findtext(f'./{w3_url}entry/{w3_url}title')
-
-    if orbit_id is not None:
-        orbit_dict = {'orbit_name': orbit_name, 'orbit_type': orbit_type,
-                      'orbit_url': orbit_url}
-    else:
-        orbit_dict = None
-    return orbit_dict
-
-
-def download_orbit(output_folder, orbit_url):
-    '''
-    Download S1-A/B orbits
-
-    Parameters:
-    ----------
-    output_folder: str
-        Path to directory where to store orbits
-    orbit_url: str
-        Remote url of orbit file to download
-    '''
-
-    response = requests.get(url=orbit_url, auth=(scihub_user, scihub_password))
-    # Get header and find filename
-    header = response.headers['content-disposition']
-    _, header_params = cgi.parse_header(header)
-    # construct orbit filename
-    orbit_filename = os.path.join(output_folder, header_params['filename'])
-    # Save orbits
-    open(orbit_filename, 'wb').write(response.content)
-
-
 def generate_burst_map(zip_files, orbit_dir, x_spac, y_spac, epsg=4326):
     '''
-    Generates dataframe containing geogrid info for each burst ID in
-    the ref_file
+    Generates a dataframe of geogrid infos for each burst ID in `zip_files`.
 
     Parameters
     ----------
@@ -433,7 +297,7 @@ def main(slc_dir, dem_file, burst_id, start_date=None, end_date=None,
     start_date: int
         Date of the start acquisition of the stack (format: YYYYMMDD)
     end_date: int
-        Date of the end acquistion of the stack (format: YYYYMMDD)
+        Date of the end acquisition of the stack (format: YYYYMMDD)
     exclude_dates: list
         List of dates to exclude from the stack (format: YYYYMMDD)
     orbit_dir: str
@@ -476,29 +340,10 @@ def main(slc_dir, dem_file, burst_id, start_date=None, end_date=None,
 
     # Check if orbit are provided, if Not download
     if orbit_dir is None:
-        info.log('Orbit directory not assigned. Download orbit ephemerides')
-
-        # Create orbit dir and check internet connection
         orbit_dir = f'{work_dir}/orbits'
+        info.log(f'Orbit directory not assigned. Using {orbit_dir} to download orbits')
         os.makedirs(orbit_dir, exist_ok=True)
-        check_internet_connection()
-
-        # List all zip file and extract info
-        zip_file_list = sorted(glob.glob(f'{slc_dir}/S1*zip'))
-
-        for zip_file in zip_file_list:
-            sensor_id, _, start_datetime, \
-            end_datetime, _ = parse_safe_filename(zip_file)
-
-            # Find precise orbits first
-            orbit_dict = get_orbit_dict(sensor_id, start_datetime,
-                                        end_datetime, 'AUX_POEORB')
-            # If orbit_dict is empty, precise orbits have not been found. Find restituted orbits instead
-            if orbit_dict == None:
-                orbit_dict = get_orbit_dict(sensor_id, start_datetime,
-                                            end_datetime, 'AUX_RESORB')
-            # Download the orbit file
-            download_orbit(orbit_dir, orbit_dict['orbit_url'])
+        # Note: Specific files will be downloaded as needed during `generate_burst_map`
 
     # Generate burst map and prune it if a list of burst ID is provided
     zip_file_list = sorted(glob.glob(f'{slc_dir}/S1*zip'))

From 74b3e7b8a73cd49f3fac7689eda762d69e89351e Mon Sep 17 00:00:00 2001
From: Scott Staniewicz <scott.j.staniewicz@jpl.nasa.gov>
Date: Mon, 17 Oct 2022 12:25:40 -0700
Subject: [PATCH 25/42] remove str(date), strftime already creates string

---
 src/compass/s1_geo_stack.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/compass/s1_geo_stack.py b/src/compass/s1_geo_stack.py
index 0df6526e..a2fa8c41 100755
--- a/src/compass/s1_geo_stack.py
+++ b/src/compass/s1_geo_stack.py
@@ -376,10 +376,9 @@ def main(slc_dir, dem_file, burst_id, start_date=None, end_date=None,
         for subswath in i_subswath:
             bursts = load_bursts(safe, orbit_path, subswath)
             for burst in bursts:
-                date = burst.sensing_start.strftime("%Y%m%d")
-                date_str = str(date)
+                date_str = burst.sensing_start.strftime("%Y%m%d")
                 if (burst.burst_id in list(set(burst_map['burst_id']))) and \
-                        (date in list(set(burst_map['date']))):
+                        (date_str in list(set(burst_map['date']))):
                     runconfig_path = create_runconfig(burst, safe, orbit_path,
                                                       dem_file,
                                                       work_dir, burst_map,

From 8da7a2606c41dd32c5602fd0a6e68f37872139e0 Mon Sep 17 00:00:00 2001
From: Scott Staniewicz <scott.j.staniewicz@jpl.nasa.gov>
Date: Mon, 17 Oct 2022 16:12:30 -0700
Subject: [PATCH 26/42] save burst and files paths to simplify dataframe
 iteration

also removes the need to run `load_bursts` twice
---
 src/compass/s1_geo_stack.py | 107 +++++++++++++++++-------------------
 1 file changed, 49 insertions(+), 58 deletions(-)

diff --git a/src/compass/s1_geo_stack.py b/src/compass/s1_geo_stack.py
index a2fa8c41..c605e9b0 100755
--- a/src/compass/s1_geo_stack.py
+++ b/src/compass/s1_geo_stack.py
@@ -3,6 +3,7 @@
 import glob
 import os
 import time
+from collections import defaultdict
 
 import isce3
 import journal
@@ -83,9 +84,7 @@ def generate_burst_map(zip_files, orbit_dir, x_spac, y_spac, epsg=4326):
         x and y coordinates) for each burst to process
     '''
     # Initialize dictionary that contains all the info for geocoding
-    burst_map = {'burst_id': [], 'x_top_left': [], 'y_top_left': [],
-                 'x_bottom_right': [], 'y_bottom_right': [], 'epsg': [],
-                 'date': []}
+    burst_map = defaultdict(list)
 
     # Get all the bursts from safe file
     i_subswath = [1, 2, 3]
@@ -97,6 +96,8 @@ def generate_burst_map(zip_files, orbit_dir, x_spac, y_spac, epsg=4326):
             ref_bursts = load_bursts(zip_file, orbit_path, subswath)
             for burst in ref_bursts:
                 burst_map['burst_id'].append(burst.burst_id)
+                # keep the burst object so we don't have to re-parse
+                burst_map['burst'].append(burst)
 
                 if epsg is None:
                     epsg = get_point_epsg(burst.center.y,
@@ -113,17 +114,26 @@ def generate_burst_map(zip_files, orbit_dir, x_spac, y_spac, epsg=4326):
                 # Snap coordinates so that adjacent burst coordinates are integer
                 # multiples of the spacing in X-/Y-directions
                 burst_map['x_top_left'].append(
-                    x_spac * np.floor(geogrid.start_x / x_spac))
+                    x_spac * np.floor(geogrid.start_x / x_spac)
+                )
                 burst_map['y_top_left'].append(
-                    y_spac * np.ceil(geogrid.start_y / y_spac))
+                    y_spac * np.ceil(geogrid.start_y / y_spac)
+                )
                 burst_map['x_bottom_right'].append(
                     x_spac * np.ceil(
-                        (geogrid.start_x + x_spac * geogrid.width) / x_spac))
+                        (geogrid.start_x + x_spac * geogrid.width) / x_spac
+                    )
+                )
                 burst_map['y_bottom_right'].append(
                     y_spac * np.floor(
-                        (geogrid.start_y + y_spac * geogrid.length) / y_spac))
+                        (geogrid.start_y + y_spac * geogrid.length) / y_spac
+                    )
+                )
                 burst_map['epsg'].append(epsg)
                 burst_map['date'].append(burst.sensing_start.strftime("%Y%m%d"))
+                # Save the file paths for creating the runconfig
+                burst_map['orbit_path'].append(orbit_path)
+                burst_map['zip_file'].append(zip_file)
 
     burst_map = pd.DataFrame(data=burst_map)
     return burst_map
@@ -183,26 +193,19 @@ def get_common_burst_ids(data):
     return common_id
 
 
-def create_runconfig(burst, safe, orbit_path, dem_file, work_dir,
-                     burst_map, flatten, enable_rss, low_band, high_band, pol,
-                     x_spac, y_spac):
+def create_runconfig(burst_map_row, dem_file, work_dir, flatten, enable_rss,
+                     low_band, high_band, pol, x_spac, y_spac):
     '''
     Create runconfig to process geocoded bursts
 
     Parameters
     ---------
-    burst: Sentinel1BurstSlc
-        Object containing info on burst to process
-    safe: str
-        Path to SAFE file containing burst to process
-    orbit_path: str
-        Path to orbit file related to burst to process
+    burst_map_row: namedtuple
+        one row from the dataframe method `burst_map.itertuples()`
     dem_file: str
         Path to DEM to use for processing
     work_dir: str
         Path to working directory for temp and final results
-    burst_map: pandas.Dataframe
-        Pandas dataframe containing burst top-left, bottom-right coordinates
     flatten: bool
         Flag to enable/disable flattening
     enable_rss: bool
@@ -231,9 +234,9 @@ def create_runconfig(burst, safe, orbit_path, dem_file, work_dir,
     rss = process['range_split_spectrum']
 
     # Allocate Inputs
-    inputs['safe_file_path'] = [safe]
-    inputs['orbit_file_path'] = [orbit_path]
-    inputs['burst_id'] = burst.burst_id
+    inputs['safe_file_path'] = [burst_map_row.zip_file]
+    inputs['orbit_file_path'] = [burst_map_row.orbit_path]
+    inputs['burst_id'] = burst_map_row.burst.burst_id
     groups['dynamic_ancillary_file_group']['dem_file'] = dem_file
 
     # Product path
@@ -246,20 +249,14 @@ def create_runconfig(burst, safe, orbit_path, dem_file, work_dir,
     geocode['flatten'] = flatten
     geocode['x_posting'] = x_spac
     geocode['y_posting'] = y_spac
-    geocode['top_left']['x'] = \
-        burst_map.x_top_left[burst_map.burst_id == burst.burst_id].tolist()[0]
-    geocode['top_left']['y'] = \
-        burst_map.y_top_left[burst_map.burst_id == burst.burst_id].tolist()[0]
-    geocode['bottom_right']['x'] = \
-        burst_map.x_bottom_right[burst_map.burst_id == burst.burst_id].tolist()[
-            0]
-    geocode['bottom_right']['y'] = \
-        burst_map.y_bottom_right[burst_map.burst_id == burst.burst_id].tolist()[
-            0]
+
+    geocode['top_left']['x'] = burst_map_row.x_top_left
+    geocode['top_left']['y'] = burst_map_row.y_top_left
+    geocode['bottom_right']['x'] = burst_map_row.x_bottom_right
+    geocode['bottom_right']['y'] = burst_map_row.y_bottom_right
     # geocode['x_snap'] = None
     # geocode['y_snap'] = None
-    geocode['output_epsg'] = \
-        burst_map.epsg[burst_map.burst_id == burst.burst_id].tolist()[0]
+    geocode['output_epsg'] = burst_map_row.epsg
 
     # Range split spectrum
     rss['enabled'] = enable_rss
@@ -369,32 +366,26 @@ def main(slc_dir, dem_file, burst_id, start_date=None, end_date=None,
         burst_map = prune_dataframe(burst_map, 'date', exclude_dates, exclude_items=True)
 
     # Ready to geocode bursts
-    for safe in zip_file_list:
-        orbit_path = get_orbit_file_from_dir(safe, orbit_dir, auto_download=True)
+    for row in burst_map.itertuples():
+        runconfig_path = create_runconfig(
+            row,
+            dem_file,
+            work_dir,
+            flatten,
+            is_split_spectrum,
+            low_band,
+            high_band,
+            pol,
+            x_spac,
+            y_spac
+        )
+        date_str = row.burst.sensing_start.strftime("%Y%m%d")
+        runfile_name = f'{run_dir}/run_{date_str}_{row.burst.burst_id}.sh'
+        with open(runfile_name, 'w') as rsh:
+            path = os.path.dirname(os.path.realpath(__file__))
+            rsh.write(
+                f'python {path}/s1_cslc.py {runconfig_path}\n')
 
-        i_subswath = [1, 2, 3]
-        for subswath in i_subswath:
-            bursts = load_bursts(safe, orbit_path, subswath)
-            for burst in bursts:
-                date_str = burst.sensing_start.strftime("%Y%m%d")
-                if (burst.burst_id in list(set(burst_map['burst_id']))) and \
-                        (date_str in list(set(burst_map['date']))):
-                    runconfig_path = create_runconfig(burst, safe, orbit_path,
-                                                      dem_file,
-                                                      work_dir, burst_map,
-                                                      flatten,
-                                                      is_split_spectrum,
-                                                      low_band, high_band, pol,
-                                                      x_spac, y_spac)
-                    runfile_name = f'{run_dir}/run_{date_str}_{burst.burst_id}.sh'
-                    with open(runfile_name, 'w') as rsh:
-                        path = os.path.dirname(os.path.realpath(__file__))
-                        rsh.write(
-                            f'python {path}/s1_cslc.py {runconfig_path}\n')
-                else:
-                    info.log(
-                        f'Burst ID {burst.burst_id} or SAFE file date {date_str} '
-                        f'not part of the stack to process')
     end_time = time.time()
     print('Elapsed time (min):', (end_time - start_time) / 60.0)
 

From cb82254d398c0f9cd34d2abf731c540c0952a568 Mon Sep 17 00:00:00 2001
From: Scott Staniewicz <scott.j.staniewicz@jpl.nasa.gov>
Date: Mon, 17 Oct 2022 18:11:13 -0700
Subject: [PATCH 27/42] filter zip files by date before loading

---
 src/compass/s1_geo_stack.py | 89 ++++++++++++++++++++++++++-----------
 1 file changed, 62 insertions(+), 27 deletions(-)

diff --git a/src/compass/s1_geo_stack.py b/src/compass/s1_geo_stack.py
index c605e9b0..e6b77145 100755
--- a/src/compass/s1_geo_stack.py
+++ b/src/compass/s1_geo_stack.py
@@ -1,5 +1,6 @@
 #!/usr/bin/env python
 import argparse
+import datetime
 import glob
 import os
 import time
@@ -10,7 +11,7 @@
 import numpy as np
 import pandas as pd
 import yaml
-from s1reader.s1_orbit import get_orbit_file_from_dir
+from s1reader.s1_orbit import get_orbit_file_from_dir, parse_safe_filename
 from s1reader.s1_reader import load_bursts
 
 from compass.utils import helpers
@@ -21,19 +22,16 @@ def create_parser():
     parser = argparse.ArgumentParser(
         description='S1-A/B geocoded CSLC stack processor.',
         formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-    parser.add_argument('-s', '--slc-dir', type=str, required=True,
+    parser.add_argument('-s', '--slc-dir', required=True,
                         help='Directory containing the S1-A/B SLCs (zip files)')
-    parser.add_argument('-d', '--dem-file', type=str, required=True,
+    parser.add_argument('-d', '--dem-file', required=True,
                         help='File path to DEM to use for processing.')
-    parser.add_argument('-o', '--orbit-dir', type=str, default=None,
+    parser.add_argument('-o', '--orbit-dir', default=None,
                         help='Directory with orbit files. If None, downloads orbit files')
-    parser.add_argument('-w', '--working-dir', dest='work_dir', type=str,
-                        default='stack',
+    parser.add_argument('-w', '--working-dir', dest='work_dir', default='stack',
                         help='Directory to store intermediate and final results')
-    parser.add_argument('-sd', '--start-date', type=int, default=None,
-                        help='Start date of the stack to process')
-    parser.add_argument('-ed', '--end-date', type=int,
-                        help='End date of the stack to process')
+    parser.add_argument('-sd', '--start-date', help='Start date of the stack to process')
+    parser.add_argument('-ed', '--end-date', help='End date of the stack to process')
     parser.add_argument('-b', '--burst-id', nargs='+', default=None,
                         help='List of burst IDs to process. If None, all the burst IDs '
                              'in the reference date are processed. (default: None)')
@@ -90,7 +88,7 @@ def generate_burst_map(zip_files, orbit_dir, x_spac, y_spac, epsg=4326):
     i_subswath = [1, 2, 3]
 
     for zip_file in zip_files:
-        orbit_path = get_orbit_file_from_dir(zip_file, orbit_dir, auto_download=True)
+        orbit_path = get_orbit_file_from_dir(zip_file, orbit_dir)
 
         for subswath in i_subswath:
             ref_bursts = load_bursts(zip_file, orbit_path, subswath)
@@ -139,7 +137,7 @@ def generate_burst_map(zip_files, orbit_dir, x_spac, y_spac, epsg=4326):
     return burst_map
 
 
-def prune_dataframe(data, id_col, id_list, exclude_items=False):
+def prune_dataframe(data, id_col, id_list):
     '''
     Prune dataframe based on column ID and list of value
     Parameters:
@@ -162,10 +160,7 @@ def prune_dataframe(data, id_col, id_list, exclude_items=False):
        Pruned dataframe with rows in 'id_list'
     '''
     pattern = '|'.join(id_list)
-    if not exclude_items:
-        df = data.loc[data[id_col].str.contains(pattern, case=False)]
-    else:
-        df = data.loc[~data[id_col].str.contains(pattern, case=False)]
+    df = data.loc[~data[id_col].str.contains(pattern, case=False)]
     return df
 
 
@@ -234,9 +229,10 @@ def create_runconfig(burst_map_row, dem_file, work_dir, flatten, enable_rss,
     rss = process['range_split_spectrum']
 
     # Allocate Inputs
+    burst = burst_map_row.burst
     inputs['safe_file_path'] = [burst_map_row.zip_file]
     inputs['orbit_file_path'] = [burst_map_row.orbit_path]
-    inputs['burst_id'] = burst_map_row.burst.burst_id
+    inputs['burst_id'] = burst.burst_id
     groups['dynamic_ancillary_file_group']['dem_file'] = dem_file
 
     # Product path
@@ -271,6 +267,51 @@ def create_runconfig(burst_map_row, dem_file, work_dir, flatten, enable_rss,
     return runconfig_path
 
 
+def _filter_by_date(zip_file_list, start_date, end_date, exclude_dates):
+    '''
+    Filter list of zip files based on date
+    Parameters:
+    ----------
+    zip_file_list: list
+        List of zip files to filter
+    start_date: str
+        Start date in YYYYMMDD format
+    end_date: str
+        End date in YYYYMMDD format
+    exclude_dates: list
+        List of dates to exclude
+    Returns:
+    -------
+    zip_file_list: list
+        Filtered list of zip files
+    '''
+    safe_datetimes = [parse_safe_filename(zip_file)[2] for zip_file in zip_file_list]
+    if start_date:
+        start_datetime = datetime.datetime.strptime(start_date, '%Y%m%d')
+    else:
+        start_datetime = min(safe_datetimes)
+    if end_date:
+        end_datetime = datetime.datetime.strptime(end_date, '%Y%m%d')
+    else:
+        end_datetime = max(safe_datetimes)
+
+    if exclude_dates is not None:
+        exclude_datetimes = [
+            datetime.datetime.strptime(d, '%Y%m%d').date
+            for d in exclude_dates
+        ]
+    else:
+        exclude_datetimes = []
+
+    # Filter within date range, and prune excluded dates
+    zip_file_list = [
+        f
+        for (f, dt) in zip(zip_file_list, safe_datetimes)
+        if start_datetime <= dt <= end_datetime and dt.date not in exclude_datetimes
+    ]
+    return zip_file_list
+
+
 def main(slc_dir, dem_file, burst_id, start_date=None, end_date=None,
          exclude_dates=None, orbit_dir=None,
          work_dir='stack',
@@ -344,6 +385,10 @@ def main(slc_dir, dem_file, burst_id, start_date=None, end_date=None,
 
     # Generate burst map and prune it if a list of burst ID is provided
     zip_file_list = sorted(glob.glob(f'{slc_dir}/S1*zip'))
+    # Remove zip files that are not in the date range before generating burst map
+    zip_file_list = _filter_by_date(zip_file_list, start_date, end_date, exclude_dates)
+
+    info.log(f'Generating burst map for {len(zip_file_list)} SAFE files')
     burst_map = generate_burst_map(zip_file_list, orbit_dir, x_spac, y_spac, epsg)
 
     # Identify burst IDs common across the stack and remove from the dataframe
@@ -355,16 +400,6 @@ def main(slc_dir, dem_file, burst_id, start_date=None, end_date=None,
     if burst_id is not None:
         burst_map = prune_dataframe(burst_map, 'burst_id', burst_id)
 
-    # Select only dates between start and end
-    if start_date is not None:
-        burst_map = burst_map[burst_map['date'] >= start_date]
-    if end_date is not None:
-        burst_map = burst_map[burst_map['date'] <= end_date]
-
-    # Exclude some dates if the user requires it
-    if exclude_dates is not None:
-        burst_map = prune_dataframe(burst_map, 'date', exclude_dates, exclude_items=True)
-
     # Ready to geocode bursts
     for row in burst_map.itertuples():
         runconfig_path = create_runconfig(

From 96a537c05ae8284e0ccd151bca4ca1e853cad5f9 Mon Sep 17 00:00:00 2001
From: Scott Staniewicz <scott.j.staniewicz@jpl.nasa.gov>
Date: Tue, 18 Oct 2022 14:29:11 -0700
Subject: [PATCH 28/42] add ability to geocode a subset of the stack using
 --bbox

---
 src/compass/s1_geo_stack.py  | 116 ++++++++++++++++++++++-------------
 src/compass/utils/helpers.py |  55 +++++++++++++++++
 2 files changed, 129 insertions(+), 42 deletions(-)

diff --git a/src/compass/s1_geo_stack.py b/src/compass/s1_geo_stack.py
index e6b77145..acf491be 100755
--- a/src/compass/s1_geo_stack.py
+++ b/src/compass/s1_geo_stack.py
@@ -13,6 +13,7 @@
 import yaml
 from s1reader.s1_orbit import get_orbit_file_from_dir, parse_safe_filename
 from s1reader.s1_reader import load_bursts
+from shapely import geometry
 
 from compass.utils import helpers
 from compass.utils.geo_grid import get_point_epsg
@@ -44,6 +45,10 @@ def create_parser():
                         help='Spacing in meters of geocoded CSLC along X-direction.')
     parser.add_argument('-y', '--y-spac', type=float, default=10,
                         help='Spacing in meters of geocoded CSLC along Y-direction.')
+    parser.add_argument('--bbox', nargs=4, type=float, default=None, 
+                        metavar=('lonmin', 'latmin', 'lonmax', 'latmax'),
+                        help='(Optional) Bounding box of the geocoded stack'
+                             ' in lon/lat degrees. ')
     parser.add_argument('-e', '--epsg', type=int, default=None,
                         help='EPSG projection code for output geocoded bursts')
     parser.add_argument('-f', '--flatten', type=bool, default=True,
@@ -58,7 +63,7 @@ def create_parser():
     return parser.parse_args()
 
 
-def generate_burst_map(zip_files, orbit_dir, x_spac, y_spac, epsg=4326):
+def generate_burst_map(zip_files, orbit_dir, x_spac, y_spac, epsg=4326, bbox=None):
     '''
     Generates a dataframe of geogrid infos for each burst ID in `zip_files`.
 
@@ -74,6 +79,9 @@ def generate_burst_map(zip_files, orbit_dir, x_spac, y_spac, epsg=4326):
         Spacing of geocoded burst along Y-direction
     epsg: int
         EPSG code identifying output product projection system
+    bbox: Optional[tuple[float]]
+        Bounding box of the geocoded bursts (left, bottom, right, top)
+        If not provided, the bounding box is computed for each burst
 
     Returns
     -------
@@ -93,40 +101,37 @@ def generate_burst_map(zip_files, orbit_dir, x_spac, y_spac, epsg=4326):
         for subswath in i_subswath:
             ref_bursts = load_bursts(zip_file, orbit_path, subswath)
             for burst in ref_bursts:
+                if epsg is None:
+                    epsg = get_point_epsg(burst.center.y,
+                                          burst.center.x)
+
+
+                if bbox is not None:
+                    bbox_utm = helpers.convert_bbox_to_utm(bbox, epsg)
+                    burst_border_utm = helpers.convert_polygon_to_utm(burst.border[0], epsg)
+                    # Skip this burst if it doesn't intersect the specified bbox
+                    if not geometry.box(*bbox_utm).intersects(burst_border_utm):
+                        continue
+                else:
+                    # Initialize geogrid with the info checked at this stage
+                    geogrid = isce3.product.bbox_to_geogrid(
+                        burst.as_isce3_radargrid(),
+                        burst.orbit,
+                        isce3.core.LUT2d(),
+                        x_spac, -y_spac,
+                        epsg)
+                    bbox_utm = snap_geogrid(geogrid, x_spac, y_spac)
+
                 burst_map['burst_id'].append(burst.burst_id)
                 # keep the burst object so we don't have to re-parse
                 burst_map['burst'].append(burst)
 
-                if epsg is None:
-                    epsg = get_point_epsg(burst.center.y,
-                                          burst.center.x)
+                left, bottom, right, top = bbox_utm
+                burst_map['x_top_left'].append(left)
+                burst_map['y_top_left'].append(top)
+                burst_map['x_bottom_right'].append(right)
+                burst_map['y_bottom_right'].append(bottom)
 
-                # Initialize geogrid with the info checked at this stage
-                geogrid = isce3.product.bbox_to_geogrid(
-                    burst.as_isce3_radargrid(),
-                    burst.orbit,
-                    isce3.core.LUT2d(),
-                    x_spac, -y_spac,
-                    epsg)
-
-                # Snap coordinates so that adjacent burst coordinates are integer
-                # multiples of the spacing in X-/Y-directions
-                burst_map['x_top_left'].append(
-                    x_spac * np.floor(geogrid.start_x / x_spac)
-                )
-                burst_map['y_top_left'].append(
-                    y_spac * np.ceil(geogrid.start_y / y_spac)
-                )
-                burst_map['x_bottom_right'].append(
-                    x_spac * np.ceil(
-                        (geogrid.start_x + x_spac * geogrid.width) / x_spac
-                    )
-                )
-                burst_map['y_bottom_right'].append(
-                    y_spac * np.floor(
-                        (geogrid.start_y + y_spac * geogrid.length) / y_spac
-                    )
-                )
                 burst_map['epsg'].append(epsg)
                 burst_map['date'].append(burst.sensing_start.strftime("%Y%m%d"))
                 # Save the file paths for creating the runconfig
@@ -137,6 +142,35 @@ def generate_burst_map(zip_files, orbit_dir, x_spac, y_spac, epsg=4326):
     return burst_map
 
 
+def snap_geogrid(geogrid, x_spac, y_spac):
+    """Snap the bounding box of the geogrid to integer multiples of the
+    spacing in X-/Y-directions.
+
+    Parameters
+    ----------
+    geogrid : isce3.core.Geogrid
+        Geogrid object
+    x_spac : float
+        Spacing of geocoded burst along X-direction
+    y_spac : float
+        Spacing of geocoded burst along Y-direction
+
+    Returns
+    -------
+    tuple
+        Tuple containing the snapped bounding box coordinates
+        (left, bottom, right, top)
+    """
+    x_left = x_spac * np.floor(geogrid.start_x / x_spac)
+    x_right = x_spac * np.ceil((geogrid.start_x + x_spac * geogrid.width) / x_spac)
+    y_bottom = y_spac * np.floor((geogrid.start_y + y_spac * geogrid.length) / y_spac)
+    y_top = y_spac * np.ceil(geogrid.start_y / y_spac)
+    return (x_left, y_bottom, x_right, y_top)
+
+
+
+
+
 def prune_dataframe(data, id_col, id_list):
     '''
     Prune dataframe based on column ID and list of value
@@ -160,7 +194,7 @@ def prune_dataframe(data, id_col, id_list):
        Pruned dataframe with rows in 'id_list'
     '''
     pattern = '|'.join(id_list)
-    df = data.loc[~data[id_col].str.contains(pattern, case=False)]
+    df = data.loc[data[id_col].str.contains(pattern, case=False)]
     return df
 
 
@@ -312,14 +346,9 @@ def _filter_by_date(zip_file_list, start_date, end_date, exclude_dates):
     return zip_file_list
 
 
-def main(slc_dir, dem_file, burst_id, start_date=None, end_date=None,
-         exclude_dates=None, orbit_dir=None,
-         work_dir='stack',
-         pol='dual-pol', x_spac=5, y_spac=10, epsg=None,
-         flatten=True,
-         is_split_spectrum=False,
-         low_band=0.0,
-         high_band=0.0):
+def main(slc_dir, dem_file, burst_id, start_date=None, end_date=None, exclude_dates=None,
+         orbit_dir=None, work_dir='stack', pol='dual-pol', x_spac=5, y_spac=10, bbox=None,
+         epsg=None, flatten=True, is_split_spectrum=False, low_band=0.0, high_band=0.0):
     '''
     Create runconfigs and runfiles generating geocoded bursts for
     a static stack of Sentinel-1 A/B SAFE files.
@@ -346,6 +375,9 @@ def main(slc_dir, dem_file, burst_id, start_date=None, end_date=None,
         Spacing of geocoded burst along X-direction
     y_spac: float
         Spacing of geocoded burst along Y-direction
+    bbox: tuple[float], optional
+        Bounding box of the area to geocode: (lonmin, latmin, lonmax, latmax) in degrees.
+        If not provided, will use the bounding box of the stack.
     epsg: int
         EPSG code identifying projection system to use for processing
     flatten: bool
@@ -389,7 +421,7 @@ def main(slc_dir, dem_file, burst_id, start_date=None, end_date=None,
     zip_file_list = _filter_by_date(zip_file_list, start_date, end_date, exclude_dates)
 
     info.log(f'Generating burst map for {len(zip_file_list)} SAFE files')
-    burst_map = generate_burst_map(zip_file_list, orbit_dir, x_spac, y_spac, epsg)
+    burst_map = generate_burst_map(zip_file_list, orbit_dir, x_spac, y_spac, epsg, bbox)
 
     # Identify burst IDs common across the stack and remove from the dataframe
     # burst IDs that are not in common
@@ -431,6 +463,6 @@ def main(slc_dir, dem_file, burst_id, start_date=None, end_date=None,
 
     main(args.slc_dir, args.dem_file, args.burst_id, args.start_date,
          args.end_date, args.exclude_dates, args.orbit_dir,
-         args.work_dir, args.pol, args.x_spac, args.y_spac, args.epsg,
-         args.flatten, args.is_split_spectrum,
+         args.work_dir, args.pol, args.x_spac, args.y_spac, args.bbox,
+         args.epsg, args.flatten, args.is_split_spectrum,
          args.low_band, args.high_band)
diff --git a/src/compass/utils/helpers.py b/src/compass/utils/helpers.py
index 83192491..e425ca63 100644
--- a/src/compass/utils/helpers.py
+++ b/src/compass/utils/helpers.py
@@ -4,7 +4,9 @@
 
 import isce3
 import journal
+import numpy as np
 from osgeo import gdal
+from shapely import geometry
 
 import compass
 
@@ -154,3 +156,56 @@ def check_dem(dem_path: str):
         err_str = f'DEM epsg of {epsg} out of bounds'
         error_channel.log(err_str)
         raise ValueError(err_str)
+
+
+def convert_bbox_to_utm(bbox, epsg):
+    """Convert bounding box coordinates to UTM.
+
+    Parameters
+    ----------
+    bbox : tuple
+        Tuple containing the lon/lat bounding box coordinates
+        (left, bottom, right, top) in degrees
+    epsg : int
+        EPSG code identifying output product projection system
+
+    Returns
+    -------
+    tuple
+        Tuple containing the bounding box coordinates in UTM (meters)
+        (left, bottom, right, top)
+    """
+    lonmin, latmin, lonmax, latmax = bbox
+    xys = _convert_to_utm([(lonmin, latmin), (lonmax, latmax)], epsg)
+    return (*xys[0], *xys[1])
+
+
+def convert_polygon_to_utm(poly, epsg):
+    """Convert bounding box coordinates to UTM.
+
+    Parameters
+    ----------
+    poly: shapely.geometry.Polygon
+        Polygon object
+    epsg : int
+        EPSG code identifying output product projection system
+
+    Returns
+    -------
+    tuple
+        Tuple containing the bounding box coordinates in UTM (meters)
+        (left, bottom, right, top)
+    """
+    coords = np.array(poly.exterior.coords)
+    xys = _convert_to_utm(coords, epsg)
+    return geometry.Polygon(xys)
+
+
+def _convert_to_utm(points_lonlat, epsg):
+    """Convert a list of points from lon/lat to UTM coordinates."""
+    proj = isce3.core.UTM(epsg)
+    # proj.forward expects [Longitude (in radians), latitude (in radians), height (m)]
+    out = []
+    for lon, lat in points_lonlat:
+        out.append(proj.forward(np.deg2rad([lon, lat, 0]))[:2])
+    return out

From 0b1ebaae7d3d99b7a55ee1ac7a1669486ad5163f Mon Sep 17 00:00:00 2001
From: Scott Staniewicz <scott.j.staniewicz@jpl.nasa.gov>
Date: Thu, 20 Oct 2022 13:31:43 -0700
Subject: [PATCH 29/42] address formatting comments of PR

---
 setup.cfg                    |   1 +
 src/compass/s1_geo_stack.py  | 151 +++++++++++++++++++----------------
 src/compass/utils/helpers.py |   8 +-
 3 files changed, 87 insertions(+), 73 deletions(-)

diff --git a/setup.cfg b/setup.cfg
index d784e29d..f3d080b2 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -28,3 +28,4 @@ where = src
 [options.entry_points]
 console_scripts =
     s1_cslc.py = compass.s1_cslc:main
+    s1_geo_stack.py = compass.s1_geo_stack:main
diff --git a/src/compass/s1_geo_stack.py b/src/compass/s1_geo_stack.py
index acf491be..bb028569 100755
--- a/src/compass/s1_geo_stack.py
+++ b/src/compass/s1_geo_stack.py
@@ -23,49 +23,55 @@ def create_parser():
     parser = argparse.ArgumentParser(
         description='S1-A/B geocoded CSLC stack processor.',
         formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-    parser.add_argument('-s', '--slc-dir', required=True,
-                        help='Directory containing the S1-A/B SLCs (zip files)')
-    parser.add_argument('-d', '--dem-file', required=True,
-                        help='File path to DEM to use for processing.')
-    parser.add_argument('-o', '--orbit-dir', default=None,
-                        help='Directory with orbit files. If None, downloads orbit files')
-    parser.add_argument('-w', '--working-dir', dest='work_dir', default='stack',
-                        help='Directory to store intermediate and final results')
-    parser.add_argument('-sd', '--start-date', help='Start date of the stack to process')
-    parser.add_argument('-ed', '--end-date', help='End date of the stack to process')
-    parser.add_argument('-b', '--burst-id', nargs='+', default=None,
-                        help='List of burst IDs to process. If None, all the burst IDs '
-                             'in the reference date are processed. (default: None)')
-    parser.add_argument('-exd', '--exclude-dates', nargs='+',
-                        help='Date to be excluded from stack processing (format: YYYYMMDD)')
-    parser.add_argument('-p', '--pol', dest='pol', nargs='+', default='co-pol',
-                        help='Polarization to process: dual-pol, co-pol, cross-pol '
-                             ' (default: co-pol).')
-    parser.add_argument('-x', '--x-spac', type=float, default=5,
-                        help='Spacing in meters of geocoded CSLC along X-direction.')
-    parser.add_argument('-y', '--y-spac', type=float, default=10,
-                        help='Spacing in meters of geocoded CSLC along Y-direction.')
-    parser.add_argument('--bbox', nargs=4, type=float, default=None, 
-                        metavar=('lonmin', 'latmin', 'lonmax', 'latmax'),
-                        help='(Optional) Bounding box of the geocoded stack'
-                             ' in lon/lat degrees. ')
-    parser.add_argument('-e', '--epsg', type=int, default=None,
-                        help='EPSG projection code for output geocoded bursts')
-    parser.add_argument('-f', '--flatten', type=bool, default=True,
-                        help='If True, enables flattening (default: True)')
-    parser.add_argument('-ss', '--range-split-spectrum',
-                        dest='is_split_spectrum', type=bool, default=False,
-                        help='If True, enables split-spectrum (default: False)')
-    parser.add_argument('-lb', '--low-band', type=float, default=0.0,
-                        help='Low sub-band bandwidth in Hz (default: 0.0)')
-    parser.add_argument('-hb', '--high-band', type=float, default=0.0,
-                        help='High sub-band bandwidth in Hz (default: 0.0')
+    # Separate the required options from the optional ones
+    # https://stackoverflow.com/a/41747010/
+    parser._action_groups.pop()
+    required = parser.add_argument_group('required arguments')
+    optional = parser.add_argument_group('optional arguments')
+    required.add_argument('-s', '--slc-dir', required=True,
+                          help='Directory containing the S1-A/B SLCs (zip files)')
+    required.add_argument('-d', '--dem-file', required=True,
+                          help='File path to a GDAL-readable DEM to use for processing.')
+    optional.add_argument('-o', '--orbit-dir', default=None,
+                          help='Directory with orbit files. If None, downloads orbit files')
+    optional.add_argument('-w', '--working-dir', dest='work_dir', default='stack',
+                          help='Directory to store intermediate and final results')
+    optional.add_argument('-sd', '--start-date', help='Start date of the stack to process')
+    optional.add_argument('-ed', '--end-date', help='End date of the stack to process')
+    optional.add_argument('-b', '--burst-id', nargs='+', default=None,
+                          help='List of burst IDs to process. If None, burst IDs '
+                             'common to all dates are processed.')
+    optional.add_argument('-exd', '--exclude-dates', nargs='+',
+                          help='Date to be excluded from stack processing (format: YYYYMMDD)')
+    optional.add_argument('-p', '--pol', dest='pol', nargs='+', default='co-pol',
+                          help='Polarization to process: dual-pol, co-pol, cross-pol '
+                               ' (default: co-pol).')
+    optional.add_argument('-dx', '--x-spac', type=float, default=5,
+                          help='Spacing in meters of geocoded CSLC along X-direction.')
+    optional.add_argument('-dy', '--y-spac', type=float, default=10,
+                          help='Spacing in meters of geocoded CSLC along Y-direction.')
+    optional.add_argument('--bbox', nargs=4, type=float, default=None,
+                          metavar=('lonmin', 'latmin', 'lonmax', 'latmax'),
+                          help='Bounding box of the geocoded stack.')
+    optional.add_argument('--bbox-epsg', type=int, default=4326,
+                          help='EPSG code of the bounding box.'
+                               'If 4326, the bounding box is in lon/lat degrees.')
+    optional.add_argument('-e', '--epsg', type=int, default=None,
+                          help='EPSG projection code for output geocoded bursts')
+    optional.add_argument('-f', '--flatten', type=bool, default=True,
+                          help='If True, enables topographic phase flattening.')
+    optional.add_argument('-ss', '--range-split-spectrum',
+                          dest='is_split_spectrum', type=bool, default=False,
+                          help='If True, enables split-spectrum')
+    optional.add_argument('-lb', '--low-band', type=float, default=0.0,
+                          help='Low sub-band bandwidth in Hz (default: 0.0)')
+    optional.add_argument('-hb', '--high-band', type=float, default=0.0,
+                          help='High sub-band bandwidth in Hz (default: 0.0')
     return parser.parse_args()
 
 
 def generate_burst_map(zip_files, orbit_dir, x_spac, y_spac, epsg=4326, bbox=None):
-    '''
-    Generates a dataframe of geogrid infos for each burst ID in `zip_files`.
+    """Generates a dataframe of geogrid infos for each burst ID in `zip_files`.
 
     Parameters
     ----------
@@ -88,7 +94,7 @@ def generate_burst_map(zip_files, orbit_dir, x_spac, y_spac, epsg=4326, bbox=Non
     burst_map: pandas.Dataframe
         Pandas dataframe containing geogrid info (e.g. top-left, bottom-right
         x and y coordinates) for each burst to process
-    '''
+    """
     # Initialize dictionary that contains all the info for geocoding
     burst_map = defaultdict(list)
 
@@ -168,12 +174,9 @@ def snap_geogrid(geogrid, x_spac, y_spac):
     return (x_left, y_bottom, x_right, y_top)
 
 
-
-
-
 def prune_dataframe(data, id_col, id_list):
-    '''
-    Prune dataframe based on column ID and list of value
+    """Prune dataframe based on column ID and list of value
+
     Parameters:
     ----------
     data: pandas.DataFrame
@@ -185,31 +188,30 @@ def prune_dataframe(data, id_col, id_list):
         If exclude_items is False (default), then all elements in `data`
             will be kept *except for* those in `id_list`.
         If exclude_items is True, the items in `id_list` will be removed from `data`.
-    exclude_items: bool
-        If True, the items in `id_list` will be removed from `data`.
-        If False, all elements in `data` will be kept *except for* those in `id_list`.
+
     Returns:
     -------
     data: pandas.DataFrame
-       Pruned dataframe with rows in 'id_list'
-    '''
+        Pruned dataframe with rows in 'id_list'
+    """
     pattern = '|'.join(id_list)
     df = data.loc[data[id_col].str.contains(pattern, case=False)]
     return df
 
 
 def get_common_burst_ids(data):
-    '''
-    Get list of burst IDs common among all processed dates
-    Parameters:
+    """Get list of burst IDs common among all processed dates
+
+    Parameters
     ----------
     data: pandas.DataFrame
-      Dataframe containing info for stitching (e.g. burst IDs)
-    Returns:
+        Dataframe containing info for stitching (e.g. burst IDs)
+
+    Returns
     -------
     common_id: list
-      List containing common burst IDs among all the dates
-    '''
+        List containing common burst IDs among all the dates
+    """
     # Identify all the dates for the bursts to stitch
     unique_dates = list(set(data['date']))
 
@@ -224,11 +226,11 @@ def get_common_burst_ids(data):
 
 def create_runconfig(burst_map_row, dem_file, work_dir, flatten, enable_rss,
                      low_band, high_band, pol, x_spac, y_spac):
-    '''
+    """
     Create runconfig to process geocoded bursts
 
     Parameters
-    ---------
+    ----------
     burst_map_row: namedtuple
         one row from the dataframe method `burst_map.itertuples()`
     dem_file: str
@@ -249,7 +251,12 @@ def create_runconfig(burst_map_row, dem_file, work_dir, flatten, enable_rss,
         Spacing of geocoded burst along X-direction
     y_spac: float
         Spacing of geocoded burst along Y-direction
-    '''
+
+    Returns
+    -------
+    runconfig: str
+        Path to runconfig file
+    """
     # Load default runconfig and fill it with user-defined options
     yaml_path = f'{helpers.WORKFLOW_SCRIPTS_DIR}/defaults/s1_cslc_geo.yaml'
     with open(yaml_path, 'r') as stream:
@@ -302,9 +309,10 @@ def create_runconfig(burst_map_row, dem_file, work_dir, flatten, enable_rss,
 
 
 def _filter_by_date(zip_file_list, start_date, end_date, exclude_dates):
-    '''
+    """
     Filter list of zip files based on date
-    Parameters:
+
+    Parameters
     ----------
     zip_file_list: list
         List of zip files to filter
@@ -314,11 +322,12 @@ def _filter_by_date(zip_file_list, start_date, end_date, exclude_dates):
         End date in YYYYMMDD format
     exclude_dates: list
         List of dates to exclude
-    Returns:
+
+    Returns
     -------
     zip_file_list: list
         Filtered list of zip files
-    '''
+    """
     safe_datetimes = [parse_safe_filename(zip_file)[2] for zip_file in zip_file_list]
     if start_date:
         start_datetime = datetime.datetime.strptime(start_date, '%Y%m%d')
@@ -346,11 +355,10 @@ def _filter_by_date(zip_file_list, start_date, end_date, exclude_dates):
     return zip_file_list
 
 
-def main(slc_dir, dem_file, burst_id, start_date=None, end_date=None, exclude_dates=None,
+def run(slc_dir, dem_file, burst_id, start_date=None, end_date=None, exclude_dates=None,
          orbit_dir=None, work_dir='stack', pol='dual-pol', x_spac=5, y_spac=10, bbox=None,
          epsg=None, flatten=True, is_split_spectrum=False, low_band=0.0, high_band=0.0):
-    '''
-    Create runconfigs and runfiles generating geocoded bursts for
+    """Create runconfigs and runfiles generating geocoded bursts for
     a static stack of Sentinel-1 A/B SAFE files.
 
     Parameters
@@ -388,7 +396,7 @@ def main(slc_dir, dem_file, burst_id, start_date=None, end_date=None, exclude_da
         Low sub-band bandwidth for split-spectrum in Hz
     high_band: float
         High sub-band bandwidth for split-spectrum in Hz
-    '''
+    """
     start_time = time.time()
     error = journal.error('s1_geo_stack_processor.main')
     info = journal.info('s1_geo_stack_processor.main')
@@ -457,7 +465,8 @@ def main(slc_dir, dem_file, burst_id, start_date=None, end_date=None, exclude_da
     print('Elapsed time (min):', (end_time - start_time) / 60.0)
 
 
-if __name__ == '__main__':
+def main():
+    """Create the command line interface and run the script."""
     # Run main script
     args = create_parser()
 
@@ -466,3 +475,7 @@ def main(slc_dir, dem_file, burst_id, start_date=None, end_date=None, exclude_da
          args.work_dir, args.pol, args.x_spac, args.y_spac, args.bbox,
          args.epsg, args.flatten, args.is_split_spectrum,
          args.low_band, args.high_band)
+
+
+if __name__ == '__main__':
+    main()
\ No newline at end of file
diff --git a/src/compass/utils/helpers.py b/src/compass/utils/helpers.py
index e425ca63..6910336f 100644
--- a/src/compass/utils/helpers.py
+++ b/src/compass/utils/helpers.py
@@ -167,7 +167,7 @@ def convert_bbox_to_utm(bbox, epsg):
         Tuple containing the lon/lat bounding box coordinates
         (left, bottom, right, top) in degrees
     epsg : int
-        EPSG code identifying output product projection system
+        EPSG code identifying output projection system
 
     Returns
     -------
@@ -181,14 +181,14 @@ def convert_bbox_to_utm(bbox, epsg):
 
 
 def convert_polygon_to_utm(poly, epsg):
-    """Convert bounding box coordinates to UTM.
+    """Convert a shapely.Polygon's coordinates to UTM.
 
     Parameters
     ----------
     poly: shapely.geometry.Polygon
         Polygon object
     epsg : int
-        EPSG code identifying output product projection system
+        EPSG code identifying output projection system
 
     Returns
     -------
@@ -202,7 +202,7 @@ def convert_polygon_to_utm(poly, epsg):
 
 
 def _convert_to_utm(points_lonlat, epsg):
-    """Convert a list of points from lon/lat to UTM coordinates."""
+    """Convert a list of points from lon/lat (in degrees) to UTM coordinates."""
     proj = isce3.core.UTM(epsg)
     # proj.forward expects [Longitude (in radians), latitude (in radians), height (m)]
     out = []

From b769be75d8a2c7bbb31fe4fb91ca025b71895d34 Mon Sep 17 00:00:00 2001
From: Scott Staniewicz <scott.j.staniewicz@jpl.nasa.gov>
Date: Thu, 20 Oct 2022 14:02:13 -0700
Subject: [PATCH 30/42] allow arbitrary bbox coordinate system through
 --epsg-bbox

---
 src/compass/s1_geo_stack.py  | 41 ++++++++++++++++++++++--------------
 src/compass/utils/helpers.py | 34 +++++++++++++++++++++---------
 2 files changed, 49 insertions(+), 26 deletions(-)

diff --git a/src/compass/s1_geo_stack.py b/src/compass/s1_geo_stack.py
index bb028569..cc4e663f 100755
--- a/src/compass/s1_geo_stack.py
+++ b/src/compass/s1_geo_stack.py
@@ -51,13 +51,14 @@ def create_parser():
     optional.add_argument('-dy', '--y-spac', type=float, default=10,
                           help='Spacing in meters of geocoded CSLC along Y-direction.')
     optional.add_argument('--bbox', nargs=4, type=float, default=None,
-                          metavar=('lonmin', 'latmin', 'lonmax', 'latmax'),
+                          metavar=('xmin', 'ymin', 'xmax', 'ymax'),
                           help='Bounding box of the geocoded stack.')
-    optional.add_argument('--bbox-epsg', type=int, default=4326,
-                          help='EPSG code of the bounding box.'
+    optional.add_argument('--epsg-bbox', type=int, default=4326,
+                          help='EPSG code of the bounding box. '
                                'If 4326, the bounding box is in lon/lat degrees.')
     optional.add_argument('-e', '--epsg', type=int, default=None,
-                          help='EPSG projection code for output geocoded bursts')
+                          help='Output EPSG projection code for geocoded bursts. '
+                               'If None, projection the UTM zone of the first burst.')
     optional.add_argument('-f', '--flatten', type=bool, default=True,
                           help='If True, enables topographic phase flattening.')
     optional.add_argument('-ss', '--range-split-spectrum',
@@ -70,7 +71,8 @@ def create_parser():
     return parser.parse_args()
 
 
-def generate_burst_map(zip_files, orbit_dir, x_spac, y_spac, epsg=4326, bbox=None):
+def generate_burst_map(zip_files, orbit_dir, x_spac, y_spac, epsg=None,
+                       bbox=None, epsg_bbox=4326):
     """Generates a dataframe of geogrid infos for each burst ID in `zip_files`.
 
     Parameters
@@ -86,8 +88,11 @@ def generate_burst_map(zip_files, orbit_dir, x_spac, y_spac, epsg=4326, bbox=Non
     epsg: int
         EPSG code identifying output product projection system
     bbox: Optional[tuple[float]]
-        Bounding box of the geocoded bursts (left, bottom, right, top)
-        If not provided, the bounding box is computed for each burst
+        Desired bounding box of the geocoded bursts as (left, bottom, right, top).
+        If not provided, the bounding box is computed for each burst.
+    epsg_bbox: int
+        EPSG code of the bounding box. If 4326, the bounding box is assumed
+        to be lon/lat degrees (default: 4326).
 
     Returns
     -------
@@ -111,10 +116,13 @@ def generate_burst_map(zip_files, orbit_dir, x_spac, y_spac, epsg=4326, bbox=Non
                     epsg = get_point_epsg(burst.center.y,
                                           burst.center.x)
 
-
                 if bbox is not None:
-                    bbox_utm = helpers.convert_bbox_to_utm(bbox, epsg)
-                    burst_border_utm = helpers.convert_polygon_to_utm(burst.border[0], epsg)
+                    bbox_utm = helpers.bbox_to_utm(
+                        bbox, epsg_bbox=epsg_bbox, epsg_out=epsg
+                    )
+                    burst_border_utm = helpers.polygon_to_utm(
+                        burst.border[0], epsg
+                    )
                     # Skip this burst if it doesn't intersect the specified bbox
                     if not geometry.box(*bbox_utm).intersects(burst_border_utm):
                         continue
@@ -356,10 +364,11 @@ def _filter_by_date(zip_file_list, start_date, end_date, exclude_dates):
 
 
 def run(slc_dir, dem_file, burst_id, start_date=None, end_date=None, exclude_dates=None,
-         orbit_dir=None, work_dir='stack', pol='dual-pol', x_spac=5, y_spac=10, bbox=None,
-         epsg=None, flatten=True, is_split_spectrum=False, low_band=0.0, high_band=0.0):
-    """Create runconfigs and runfiles generating geocoded bursts for
-    a static stack of Sentinel-1 A/B SAFE files.
+        orbit_dir=None, work_dir='stack', pol='dual-pol', x_spac=5, y_spac=10, bbox=None,
+        epsg_bbox=4326, epsg=None, flatten=True, is_split_spectrum=False, low_band=0.0,
+        high_band=0.0):
+    """Create runconfigs and runfiles generating geocoded bursts for a static
+    stack of Sentinel-1 A/B SAFE files.
 
     Parameters
     ----------
@@ -384,7 +393,7 @@ def run(slc_dir, dem_file, burst_id, start_date=None, end_date=None, exclude_dat
     y_spac: float
         Spacing of geocoded burst along Y-direction
     bbox: tuple[float], optional
-        Bounding box of the area to geocode: (lonmin, latmin, lonmax, latmax) in degrees.
+        Bounding box of the area to geocode: (xmin, ymin, xmax, ymax) in degrees.
         If not provided, will use the bounding box of the stack.
     epsg: int
         EPSG code identifying projection system to use for processing
@@ -473,7 +482,7 @@ def main():
     main(args.slc_dir, args.dem_file, args.burst_id, args.start_date,
          args.end_date, args.exclude_dates, args.orbit_dir,
          args.work_dir, args.pol, args.x_spac, args.y_spac, args.bbox,
-         args.epsg, args.flatten, args.is_split_spectrum,
+         args.epsg_bbox, args.epsg, args.flatten, args.is_split_spectrum,
          args.low_band, args.high_band)
 
 
diff --git a/src/compass/utils/helpers.py b/src/compass/utils/helpers.py
index 6910336f..efe8a3df 100644
--- a/src/compass/utils/helpers.py
+++ b/src/compass/utils/helpers.py
@@ -158,7 +158,7 @@ def check_dem(dem_path: str):
         raise ValueError(err_str)
 
 
-def convert_bbox_to_utm(bbox, epsg):
+def bbox_to_utm(bbox, epsg_bbox=4326, epsg_out=None):
     """Convert bounding box coordinates to UTM.
 
     Parameters
@@ -175,12 +175,12 @@ def convert_bbox_to_utm(bbox, epsg):
         Tuple containing the bounding box coordinates in UTM (meters)
         (left, bottom, right, top)
     """
-    lonmin, latmin, lonmax, latmax = bbox
-    xys = _convert_to_utm([(lonmin, latmin), (lonmax, latmax)], epsg)
+    xmin, ymin, xmax, ymax = bbox
+    xys = _convert_to_utm([(xmin, ymin), (xmax, ymax)], epsg_bbox, epsg_out)
     return (*xys[0], *xys[1])
 
 
-def convert_polygon_to_utm(poly, epsg):
+def polygon_to_utm(poly, epsg_poly=4326, epsg_out=None):
     """Convert a shapely.Polygon's coordinates to UTM.
 
     Parameters
@@ -197,15 +197,29 @@ def convert_polygon_to_utm(poly, epsg):
         (left, bottom, right, top)
     """
     coords = np.array(poly.exterior.coords)
-    xys = _convert_to_utm(coords, epsg)
+    xys = _convert_to_utm(coords, epsg_poly, epsg_out)
     return geometry.Polygon(xys)
 
 
-def _convert_to_utm(points_lonlat, epsg):
+def _convert_to_utm(points_xy, epsg_in, epsg_out):
     """Convert a list of points from lon/lat (in degrees) to UTM coordinates."""
-    proj = isce3.core.UTM(epsg)
-    # proj.forward expects [Longitude (in radians), latitude (in radians), height (m)]
+    if epsg_out == epsg_in:
+        return points_xy
+
+    if epsg_in == 4326:
+        # proj.forward expects Longitude/latitude in radians
+        points_ll = np.deg2rad(points_xy)
+    else:
+        # convert points to lon/lat if given a different UTM projection
+        points_ll = []
+        proj_ll = isce3.core.UTM(epsg_in)
+        for x, y in points_xy:
+            points_ll.append(proj_ll.inverse([x, y, 0])[:2])
+        print(points_ll, np.rad2deg(points_ll))
+
+    proj = isce3.core.UTM(epsg_out)
     out = []
-    for lon, lat in points_lonlat:
-        out.append(proj.forward(np.deg2rad([lon, lat, 0]))[:2])
+    for lon, lat in points_ll:
+        # proj.forward expects llh, [Longitude, latitude, height (m)]
+        out.append(proj.forward([lon, lat, 0])[:2])
     return out

From cb991d3095af83f32860b2a3b06e113a43efb319 Mon Sep 17 00:00:00 2001
From: Scott Staniewicz <scott.j.staniewicz@jpl.nasa.gov>
Date: Thu, 20 Oct 2022 14:05:21 -0700
Subject: [PATCH 31/42] fix comment and stray print

---
 src/compass/utils/helpers.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/compass/utils/helpers.py b/src/compass/utils/helpers.py
index efe8a3df..192cb95f 100644
--- a/src/compass/utils/helpers.py
+++ b/src/compass/utils/helpers.py
@@ -202,7 +202,10 @@ def polygon_to_utm(poly, epsg_poly=4326, epsg_out=None):
 
 
 def _convert_to_utm(points_xy, epsg_in, epsg_out):
-    """Convert a list of points from lon/lat (in degrees) to UTM coordinates."""
+    """Convert a list of points to a specified UTM coordinate system.
+
+    If epsg_in is 4326 (lat/lon), assumes points_xy are in degrees.
+    """
     if epsg_out == epsg_in:
         return points_xy
 
@@ -215,7 +218,6 @@ def _convert_to_utm(points_xy, epsg_in, epsg_out):
         proj_ll = isce3.core.UTM(epsg_in)
         for x, y in points_xy:
             points_ll.append(proj_ll.inverse([x, y, 0])[:2])
-        print(points_ll, np.rad2deg(points_ll))
 
     proj = isce3.core.UTM(epsg_out)
     out = []

From 730ccbab76b629b866f91cd35cee28e4eab709e6 Mon Sep 17 00:00:00 2001
From: Scott Staniewicz <scott.j.staniewicz@jpl.nasa.gov>
Date: Mon, 24 Oct 2022 07:26:52 -0700
Subject: [PATCH 32/42] rename to geocode instead of geo

---
 src/compass/s1_geocode_stack.py | 490 ++++++++++++++++++++++++++++++++
 1 file changed, 490 insertions(+)
 create mode 100755 src/compass/s1_geocode_stack.py

diff --git a/src/compass/s1_geocode_stack.py b/src/compass/s1_geocode_stack.py
new file mode 100755
index 00000000..5d6285b3
--- /dev/null
+++ b/src/compass/s1_geocode_stack.py
@@ -0,0 +1,490 @@
+#!/usr/bin/env python
+import argparse
+import datetime
+import glob
+import os
+import time
+from collections import defaultdict
+
+import isce3
+import journal
+import numpy as np
+import pandas as pd
+import yaml
+from s1reader.s1_orbit import get_orbit_file_from_dir, parse_safe_filename
+from s1reader.s1_reader import load_bursts
+from shapely import geometry
+
+from compass.utils import helpers
+from compass.utils.geo_grid import get_point_epsg
+
+
+def create_parser():
+    parser = argparse.ArgumentParser(
+        description='S1-A/B geocoded CSLC stack processor.',
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+    # Separate the required options from the optional ones
+    # https://stackoverflow.com/a/41747010/
+    parser._action_groups.pop()
+    required = parser.add_argument_group('required arguments')
+    optional = parser.add_argument_group('optional arguments')
+    required.add_argument('-s', '--slc-dir', required=True,
+                          help='Directory containing the S1-A/B SLCs (zip files)')
+    required.add_argument('-d', '--dem-file', required=True,
+                          help='File path to a GDAL-readable DEM to use for processing.')
+    optional.add_argument('-o', '--orbit-dir', default=None,
+                          help='Directory with orbit files. If None, downloads orbit files')
+    optional.add_argument('-w', '--working-dir', dest='work_dir', default='stack',
+                          help='Directory to store intermediate and final results')
+    optional.add_argument('-sd', '--start-date', help='Start date of the stack to process')
+    optional.add_argument('-ed', '--end-date', help='End date of the stack to process')
+    optional.add_argument('-b', '--burst-id', nargs='+', default=None,
+                          help='List of burst IDs to process. If None, burst IDs '
+                             'common to all dates are processed.')
+    optional.add_argument('-exd', '--exclude-dates', nargs='+',
+                          help='Date to be excluded from stack processing (format: YYYYMMDD)')
+    optional.add_argument('-p', '--pol', dest='pol', nargs='+', default='co-pol',
+                          help='Polarization to process: dual-pol, co-pol, cross-pol '
+                               ' (default: co-pol).')
+    optional.add_argument('-dx', '--x-spac', type=float, default=5,
+                          help='Spacing in meters of geocoded CSLC along X-direction.')
+    optional.add_argument('-dy', '--y-spac', type=float, default=10,
+                          help='Spacing in meters of geocoded CSLC along Y-direction.')
+    optional.add_argument('--bbox', nargs=4, type=float, default=None,
+                          metavar=('xmin', 'ymin', 'xmax', 'ymax'),
+                          help='Bounding box of the geocoded stack.')
+    optional.add_argument('--epsg-bbox', type=int, default=4326,
+                          help='EPSG code of the bounding box. '
+                               'If 4326, the bounding box is in lon/lat degrees.')
+    optional.add_argument('-e', '--epsg', type=int, default=None,
+                          help='Output EPSG projection code for geocoded bursts. '
+                               'If None, projection the UTM zone of the first burst.')
+    optional.add_argument('-f', '--flatten', type=bool, default=True,
+                          help='If True, enables topographic phase flattening.')
+    optional.add_argument('-ss', '--range-split-spectrum',
+                          dest='is_split_spectrum', type=bool, default=False,
+                          help='If True, enables split-spectrum')
+    optional.add_argument('-lb', '--low-band', type=float, default=0.0,
+                          help='Low sub-band bandwidth in Hz (default: 0.0)')
+    optional.add_argument('-hb', '--high-band', type=float, default=0.0,
+                          help='High sub-band bandwidth in Hz (default: 0.0')
+    return parser.parse_args()
+
+
+def generate_burst_map(zip_files, orbit_dir, x_spac, y_spac, epsg=None,
+                       bbox=None, epsg_bbox=4326):
+    """Generates a dataframe of geogrid infos for each burst ID in `zip_files`.
+
+    Parameters
+    ----------
+    zip_files: str
+        List of S1-A/B SAFE (zip) files
+    orbit_dir: str
+        Directory containing sensor orbit ephemerides
+    x_spac: float
+        Spacing of geocoded burst along X-direction
+    y_spac: float
+        Spacing of geocoded burst along Y-direction
+    epsg: int
+        EPSG code identifying output product projection system
+    bbox: Optional[tuple[float]]
+        Desired bounding box of the geocoded bursts as (left, bottom, right, top).
+        If not provided, the bounding box is computed for each burst.
+    epsg_bbox: int
+        EPSG code of the bounding box. If 4326, the bounding box is assumed
+        to be lon/lat degrees (default: 4326).
+
+    Returns
+    -------
+    burst_map: pandas.Dataframe
+        Pandas dataframe containing geogrid info (e.g. top-left, bottom-right
+        x and y coordinates) for each burst to process
+    """
+    # Initialize dictionary that contains all the info for geocoding
+    burst_map = defaultdict(list)
+
+    # Get all the bursts from safe file
+    i_subswath = [1, 2, 3]
+
+    for zip_file in zip_files:
+        orbit_path = get_orbit_file_from_dir(zip_file, orbit_dir)
+
+        for subswath in i_subswath:
+            ref_bursts = load_bursts(zip_file, orbit_path, subswath)
+            for burst in ref_bursts:
+                if epsg is None:
+                    epsg = get_point_epsg(burst.center.y,
+                                          burst.center.x)
+
+                if bbox is not None:
+                    bbox_utm = helpers.bbox_to_utm(
+                        bbox, epsg_bbox=epsg_bbox, epsg_out=epsg
+                    )
+                    burst_border_utm = helpers.polygon_to_utm(
+                        burst.border[0], epsg
+                    )
+                    # Skip this burst if it doesn't intersect the specified bbox
+                    if not geometry.box(*bbox_utm).intersects(burst_border_utm):
+                        continue
+                else:
+                    # Initialize geogrid with the info checked at this stage
+                    geogrid = isce3.product.bbox_to_geogrid(
+                        burst.as_isce3_radargrid(),
+                        burst.orbit,
+                        isce3.core.LUT2d(),
+                        x_spac, -y_spac,
+                        epsg)
+                    bbox_utm = snap_geogrid(geogrid, x_spac, y_spac)
+
+                burst_map['burst_id'].append(burst.burst_id)
+                # keep the burst object so we don't have to re-parse
+                burst_map['burst'].append(burst)
+
+                left, bottom, right, top = bbox_utm
+                burst_map['x_top_left'].append(left)
+                burst_map['y_top_left'].append(top)
+                burst_map['x_bottom_right'].append(right)
+                burst_map['y_bottom_right'].append(bottom)
+
+                burst_map['epsg'].append(epsg)
+                burst_map['date'].append(burst.sensing_start.strftime("%Y%m%d"))
+                # Save the file paths for creating the runconfig
+                burst_map['orbit_path'].append(orbit_path)
+                burst_map['zip_file'].append(zip_file)
+
+    burst_map = pd.DataFrame(data=burst_map)
+    return burst_map
+
+
+def snap_geogrid(geogrid, x_spac, y_spac):
+    """Snap the bounding box of the geogrid to integer multiples of the
+    spacing in X-/Y-directions.
+
+    Parameters
+    ----------
+    geogrid : isce3.core.Geogrid
+        Geogrid object
+    x_spac : float
+        Spacing of geocoded burst along X-direction
+    y_spac : float
+        Spacing of geocoded burst along Y-direction
+
+    Returns
+    -------
+    tuple
+        Tuple containing the snapped bounding box coordinates
+        (left, bottom, right, top)
+    """
+    x_left = x_spac * np.floor(geogrid.start_x / x_spac)
+    x_right = x_spac * np.ceil((geogrid.start_x + x_spac * geogrid.width) / x_spac)
+    y_bottom = y_spac * np.floor((geogrid.start_y + y_spac * geogrid.length) / y_spac)
+    y_top = y_spac * np.ceil(geogrid.start_y / y_spac)
+    return (x_left, y_bottom, x_right, y_top)
+
+
+def prune_dataframe(data, id_col, id_list):
+    """Prune dataframe based on column ID and list of value
+
+    Parameters:
+    ----------
+    data: pandas.DataFrame
+        dataframe that needs to be pruned
+    id_col: str
+        column identification for 'data' (e.g. 'burst_id')
+    id_list: list
+        List of elements to consider when pruning.
+        If exclude_items is False (default), then all elements in `data`
+            will be kept *except for* those in `id_list`.
+        If exclude_items is True, the items in `id_list` will be removed from `data`.
+
+    Returns:
+    -------
+    data: pandas.DataFrame
+        Pruned dataframe with rows in 'id_list'
+    """
+    pattern = '|'.join(id_list)
+    df = data.loc[data[id_col].str.contains(pattern, case=False)]
+    return df
+
+
+def get_common_burst_ids(data):
+    """Get list of burst IDs common among all processed dates
+
+    Parameters
+    ----------
+    data: pandas.DataFrame
+        Dataframe containing info for stitching (e.g. burst IDs)
+
+    Returns
+    -------
+    common_id: list
+        List containing common burst IDs among all the dates
+    """
+    # Identify all the dates for the bursts to stitch
+    unique_dates = list(set(data['date']))
+
+    # Initialize list of unique burst IDs
+    common_id = data.burst_id[data.date == unique_dates[0]]
+
+    for date in unique_dates:
+        ids = data.burst_id[data.date == date]
+        common_id = sorted(list(set(ids.tolist()) & set(common_id)))
+    return common_id
+
+
+def create_runconfig(burst_map_row, dem_file, work_dir, flatten, enable_rss,
+                     low_band, high_band, pol, x_spac, y_spac):
+    """
+    Create runconfig to process geocoded bursts
+
+    Parameters
+    ----------
+    burst_map_row: namedtuple
+        one row from the dataframe method `burst_map.itertuples()`
+    dem_file: str
+        Path to DEM to use for processing
+    work_dir: str
+        Path to working directory for temp and final results
+    flatten: bool
+        Flag to enable/disable flattening
+    enable_rss: bool
+        Flag to enable range split-spectrum
+    low-band: float
+        Low sub-image bandwidth (in Hz) for split-spectrum
+    high-band: float
+        High sub-image bandwidth (in Hz) for split-spectrum
+    pol: str
+        Polarizations to process: co-pol, dual-pol, cross-pol
+    x_spac: float
+        Spacing of geocoded burst along X-direction
+    y_spac: float
+        Spacing of geocoded burst along Y-direction
+
+    Returns
+    -------
+    runconfig: str
+        Path to runconfig file
+    """
+    # Load default runconfig and fill it with user-defined options
+    yaml_path = f'{helpers.WORKFLOW_SCRIPTS_DIR}/defaults/s1_cslc_geo.yaml'
+    with open(yaml_path, 'r') as stream:
+        yaml_cfg = yaml.safe_load(stream)
+
+    groups = yaml_cfg['runconfig']['groups']
+    inputs = groups['input_file_group']
+    product = groups['product_path_group']
+    process = groups['processing']
+    geocode = process['geocoding']
+    rss = process['range_split_spectrum']
+
+    # Allocate Inputs
+    burst = burst_map_row.burst
+    inputs['safe_file_path'] = [burst_map_row.zip_file]
+    inputs['orbit_file_path'] = [burst_map_row.orbit_path]
+    inputs['burst_id'] = burst.burst_id
+    groups['dynamic_ancillary_file_group']['dem_file'] = dem_file
+
+    # Product path
+    product['product_path'] = work_dir
+    product['scratch_path'] = f'{work_dir}/scratch'
+    product['sas_output_file'] = work_dir
+
+    # Geocoding
+    process['polarization'] = pol
+    geocode['flatten'] = flatten
+    geocode['x_posting'] = x_spac
+    geocode['y_posting'] = y_spac
+
+    geocode['top_left']['x'] = burst_map_row.x_top_left
+    geocode['top_left']['y'] = burst_map_row.y_top_left
+    geocode['bottom_right']['x'] = burst_map_row.x_bottom_right
+    geocode['bottom_right']['y'] = burst_map_row.y_bottom_right
+    # geocode['x_snap'] = None
+    # geocode['y_snap'] = None
+    geocode['output_epsg'] = burst_map_row.epsg
+
+    # Range split spectrum
+    rss['enabled'] = enable_rss
+    rss['low_band_bandwidth'] = low_band
+    rss['high_band_bandwidth'] = high_band
+
+    date_str = burst.sensing_start.strftime("%Y%m%d")
+    os.makedirs(f'{work_dir}/runconfigs', exist_ok=True)
+    runconfig_path = f'{work_dir}/runconfigs/geo_runconfig_{date_str}_{burst.burst_id}.yaml'
+    with open(runconfig_path, 'w') as yaml_file:
+        yaml.dump(yaml_cfg, yaml_file, default_flow_style=False)
+    return runconfig_path
+
+
+def _filter_by_date(zip_file_list, start_date, end_date, exclude_dates):
+    """
+    Filter list of zip files based on date
+
+    Parameters
+    ----------
+    zip_file_list: list
+        List of zip files to filter
+    start_date: str
+        Start date in YYYYMMDD format
+    end_date: str
+        End date in YYYYMMDD format
+    exclude_dates: list
+        List of dates to exclude
+
+    Returns
+    -------
+    zip_file_list: list
+        Filtered list of zip files
+    """
+    safe_datetimes = [parse_safe_filename(zip_file)[2] for zip_file in zip_file_list]
+    if start_date:
+        start_datetime = datetime.datetime.strptime(start_date, '%Y%m%d')
+    else:
+        start_datetime = min(safe_datetimes)
+    if end_date:
+        end_datetime = datetime.datetime.strptime(end_date, '%Y%m%d')
+    else:
+        end_datetime = max(safe_datetimes)
+
+    if exclude_dates is not None:
+        exclude_datetimes = [
+            datetime.datetime.strptime(d, '%Y%m%d').date
+            for d in exclude_dates
+        ]
+    else:
+        exclude_datetimes = []
+
+    # Filter within date range, and prune excluded dates
+    zip_file_list = [
+        f
+        for (f, dt) in zip(zip_file_list, safe_datetimes)
+        if start_datetime <= dt <= end_datetime and dt.date not in exclude_datetimes
+    ]
+    return zip_file_list
+
+
+def run(slc_dir, dem_file, burst_id, start_date=None, end_date=None, exclude_dates=None,
+        orbit_dir=None, work_dir='stack', pol='dual-pol', x_spac=5, y_spac=10, bbox=None,
+        epsg_bbox=4326, epsg=None, flatten=True, is_split_spectrum=False, low_band=0.0,
+        high_band=0.0):
+    """Create runconfigs and runfiles generating geocoded bursts for a static
+    stack of Sentinel-1 A/B SAFE files.
+
+    Parameters
+    ----------
+    slc_dir: str
+        Directory containing S1-A/B SAFE files
+    dem_file: str
+        File path to DEM to use for processing
+    burst_id: list
+        List of burst IDs to process (default: None)
+    start_date: int
+        Date of the start acquisition of the stack (format: YYYYMMDD)
+    end_date: int
+        Date of the end acquisition of the stack (format: YYYYMMDD)
+    exclude_dates: list
+        List of dates to exclude from the stack (format: YYYYMMDD)
+    orbit_dir: str
+        Directory containing orbit files
+    work_dir: str
+        Working directory to store temp and final files
+    x_spac: float
+        Spacing of geocoded burst along X-direction
+    y_spac: float
+        Spacing of geocoded burst along Y-direction
+    bbox: tuple[float], optional
+        Bounding box of the area to geocode: (xmin, ymin, xmax, ymax) in degrees.
+        If not provided, will use the bounding box of the stack.
+    epsg: int
+        EPSG code identifying projection system to use for processing
+    flatten: bool
+        Enable/disable flattening of geocoded burst
+    is_split_spectrum: bool
+        Enable/disable range split spectrum
+    low_band: float
+        Low sub-band bandwidth for split-spectrum in Hz
+    high_band: float
+        High sub-band bandwidth for split-spectrum in Hz
+    """
+    start_time = time.time()
+    error = journal.error('s1_geo_stack_processor.main')
+    info = journal.info('s1_geo_stack_processor.main')
+
+    # Check if SLC dir and DEM exists
+    if not os.path.isdir(slc_dir):
+        err_str = f'{slc_dir} SLC directory does not exist'
+        error.log(err_str)
+        raise FileNotFoundError(err_str)
+
+    if not os.path.isfile(dem_file):
+        err_str = f'{dem_file} DEM file does not exists'
+        error.log(err_str)
+        raise FileNotFoundError(err_str)
+
+    # Create directory for runfiles
+    run_dir = f'{work_dir}/run_files'
+    os.makedirs(run_dir, exist_ok=True)
+
+    # Check if orbit are provided, if Not download
+    if orbit_dir is None:
+        orbit_dir = f'{work_dir}/orbits'
+        info.log(f'Orbit directory not assigned. Using {orbit_dir} to download orbits')
+        os.makedirs(orbit_dir, exist_ok=True)
+        # Note: Specific files will be downloaded as needed during `generate_burst_map`
+
+    # Generate burst map and prune it if a list of burst ID is provided
+    zip_file_list = sorted(glob.glob(f'{slc_dir}/S1*zip'))
+    # Remove zip files that are not in the date range before generating burst map
+    zip_file_list = _filter_by_date(zip_file_list, start_date, end_date, exclude_dates)
+
+    info.log(f'Generating burst map for {len(zip_file_list)} SAFE files')
+    burst_map = generate_burst_map(zip_file_list, orbit_dir, x_spac, y_spac, epsg, bbox)
+
+    # Identify burst IDs common across the stack and remove from the dataframe
+    # burst IDs that are not in common
+    common_ids = get_common_burst_ids(burst_map)
+    burst_map = prune_dataframe(burst_map, 'burst_id', common_ids)
+
+    # If user selects burst IDs to process, prune unnecessary bursts
+    if burst_id is not None:
+        burst_map = prune_dataframe(burst_map, 'burst_id', burst_id)
+
+    # Ready to geocode bursts
+    for row in burst_map.itertuples():
+        runconfig_path = create_runconfig(
+            row,
+            dem_file,
+            work_dir,
+            flatten,
+            is_split_spectrum,
+            low_band,
+            high_band,
+            pol,
+            x_spac,
+            y_spac
+        )
+        date_str = row.burst.sensing_start.strftime("%Y%m%d")
+        runfile_name = f'{run_dir}/run_{date_str}_{row.burst.burst_id}.sh'
+        with open(runfile_name, 'w') as rsh:
+            path = os.path.dirname(os.path.realpath(__file__))
+            rsh.write(
+                f'python {path}/s1_cslc.py {runconfig_path}\n')
+
+    end_time = time.time()
+    print('Elapsed time (min):', (end_time - start_time) / 60.0)
+
+
+def main():
+    """Create the command line interface and run the script."""
+    # Run main script
+    args = create_parser()
+
+    run(args.slc_dir, args.dem_file, args.burst_id, args.start_date,
+        args.end_date, args.exclude_dates, args.orbit_dir,
+        args.work_dir, args.pol, args.x_spac, args.y_spac, args.bbox,
+        args.epsg_bbox, args.epsg, args.flatten, args.is_split_spectrum,
+        args.low_band, args.high_band)
+
+
+if __name__ == '__main__':
+    main()
\ No newline at end of file

From dc6cafa27b8a2b417501fa462cbf823fe7466037 Mon Sep 17 00:00:00 2001
From: Scott Staniewicz <scott.j.staniewicz@jpl.nasa.gov>
Date: Mon, 24 Oct 2022 07:28:00 -0700
Subject: [PATCH 33/42] rename in other files too, fix `run`

---
 setup.cfg                   |   2 +-
 src/compass/s1_geo_stack.py | 490 ------------------------------------
 2 files changed, 1 insertion(+), 491 deletions(-)
 delete mode 100755 src/compass/s1_geo_stack.py

diff --git a/setup.cfg b/setup.cfg
index f3d080b2..fc2f1aa4 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -28,4 +28,4 @@ where = src
 [options.entry_points]
 console_scripts =
     s1_cslc.py = compass.s1_cslc:main
-    s1_geo_stack.py = compass.s1_geo_stack:main
+    s1_geo_stack.py = compass.s1_geocoded_stack:main
diff --git a/src/compass/s1_geo_stack.py b/src/compass/s1_geo_stack.py
deleted file mode 100755
index cc4e663f..00000000
--- a/src/compass/s1_geo_stack.py
+++ /dev/null
@@ -1,490 +0,0 @@
-#!/usr/bin/env python
-import argparse
-import datetime
-import glob
-import os
-import time
-from collections import defaultdict
-
-import isce3
-import journal
-import numpy as np
-import pandas as pd
-import yaml
-from s1reader.s1_orbit import get_orbit_file_from_dir, parse_safe_filename
-from s1reader.s1_reader import load_bursts
-from shapely import geometry
-
-from compass.utils import helpers
-from compass.utils.geo_grid import get_point_epsg
-
-
-def create_parser():
-    parser = argparse.ArgumentParser(
-        description='S1-A/B geocoded CSLC stack processor.',
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-    # Separate the required options from the optional ones
-    # https://stackoverflow.com/a/41747010/
-    parser._action_groups.pop()
-    required = parser.add_argument_group('required arguments')
-    optional = parser.add_argument_group('optional arguments')
-    required.add_argument('-s', '--slc-dir', required=True,
-                          help='Directory containing the S1-A/B SLCs (zip files)')
-    required.add_argument('-d', '--dem-file', required=True,
-                          help='File path to a GDAL-readable DEM to use for processing.')
-    optional.add_argument('-o', '--orbit-dir', default=None,
-                          help='Directory with orbit files. If None, downloads orbit files')
-    optional.add_argument('-w', '--working-dir', dest='work_dir', default='stack',
-                          help='Directory to store intermediate and final results')
-    optional.add_argument('-sd', '--start-date', help='Start date of the stack to process')
-    optional.add_argument('-ed', '--end-date', help='End date of the stack to process')
-    optional.add_argument('-b', '--burst-id', nargs='+', default=None,
-                          help='List of burst IDs to process. If None, burst IDs '
-                             'common to all dates are processed.')
-    optional.add_argument('-exd', '--exclude-dates', nargs='+',
-                          help='Date to be excluded from stack processing (format: YYYYMMDD)')
-    optional.add_argument('-p', '--pol', dest='pol', nargs='+', default='co-pol',
-                          help='Polarization to process: dual-pol, co-pol, cross-pol '
-                               ' (default: co-pol).')
-    optional.add_argument('-dx', '--x-spac', type=float, default=5,
-                          help='Spacing in meters of geocoded CSLC along X-direction.')
-    optional.add_argument('-dy', '--y-spac', type=float, default=10,
-                          help='Spacing in meters of geocoded CSLC along Y-direction.')
-    optional.add_argument('--bbox', nargs=4, type=float, default=None,
-                          metavar=('xmin', 'ymin', 'xmax', 'ymax'),
-                          help='Bounding box of the geocoded stack.')
-    optional.add_argument('--epsg-bbox', type=int, default=4326,
-                          help='EPSG code of the bounding box. '
-                               'If 4326, the bounding box is in lon/lat degrees.')
-    optional.add_argument('-e', '--epsg', type=int, default=None,
-                          help='Output EPSG projection code for geocoded bursts. '
-                               'If None, projection the UTM zone of the first burst.')
-    optional.add_argument('-f', '--flatten', type=bool, default=True,
-                          help='If True, enables topographic phase flattening.')
-    optional.add_argument('-ss', '--range-split-spectrum',
-                          dest='is_split_spectrum', type=bool, default=False,
-                          help='If True, enables split-spectrum')
-    optional.add_argument('-lb', '--low-band', type=float, default=0.0,
-                          help='Low sub-band bandwidth in Hz (default: 0.0)')
-    optional.add_argument('-hb', '--high-band', type=float, default=0.0,
-                          help='High sub-band bandwidth in Hz (default: 0.0')
-    return parser.parse_args()
-
-
-def generate_burst_map(zip_files, orbit_dir, x_spac, y_spac, epsg=None,
-                       bbox=None, epsg_bbox=4326):
-    """Generates a dataframe of geogrid infos for each burst ID in `zip_files`.
-
-    Parameters
-    ----------
-    zip_files: str
-        List of S1-A/B SAFE (zip) files
-    orbit_dir: str
-        Directory containing sensor orbit ephemerides
-    x_spac: float
-        Spacing of geocoded burst along X-direction
-    y_spac: float
-        Spacing of geocoded burst along Y-direction
-    epsg: int
-        EPSG code identifying output product projection system
-    bbox: Optional[tuple[float]]
-        Desired bounding box of the geocoded bursts as (left, bottom, right, top).
-        If not provided, the bounding box is computed for each burst.
-    epsg_bbox: int
-        EPSG code of the bounding box. If 4326, the bounding box is assumed
-        to be lon/lat degrees (default: 4326).
-
-    Returns
-    -------
-    burst_map: pandas.Dataframe
-        Pandas dataframe containing geogrid info (e.g. top-left, bottom-right
-        x and y coordinates) for each burst to process
-    """
-    # Initialize dictionary that contains all the info for geocoding
-    burst_map = defaultdict(list)
-
-    # Get all the bursts from safe file
-    i_subswath = [1, 2, 3]
-
-    for zip_file in zip_files:
-        orbit_path = get_orbit_file_from_dir(zip_file, orbit_dir)
-
-        for subswath in i_subswath:
-            ref_bursts = load_bursts(zip_file, orbit_path, subswath)
-            for burst in ref_bursts:
-                if epsg is None:
-                    epsg = get_point_epsg(burst.center.y,
-                                          burst.center.x)
-
-                if bbox is not None:
-                    bbox_utm = helpers.bbox_to_utm(
-                        bbox, epsg_bbox=epsg_bbox, epsg_out=epsg
-                    )
-                    burst_border_utm = helpers.polygon_to_utm(
-                        burst.border[0], epsg
-                    )
-                    # Skip this burst if it doesn't intersect the specified bbox
-                    if not geometry.box(*bbox_utm).intersects(burst_border_utm):
-                        continue
-                else:
-                    # Initialize geogrid with the info checked at this stage
-                    geogrid = isce3.product.bbox_to_geogrid(
-                        burst.as_isce3_radargrid(),
-                        burst.orbit,
-                        isce3.core.LUT2d(),
-                        x_spac, -y_spac,
-                        epsg)
-                    bbox_utm = snap_geogrid(geogrid, x_spac, y_spac)
-
-                burst_map['burst_id'].append(burst.burst_id)
-                # keep the burst object so we don't have to re-parse
-                burst_map['burst'].append(burst)
-
-                left, bottom, right, top = bbox_utm
-                burst_map['x_top_left'].append(left)
-                burst_map['y_top_left'].append(top)
-                burst_map['x_bottom_right'].append(right)
-                burst_map['y_bottom_right'].append(bottom)
-
-                burst_map['epsg'].append(epsg)
-                burst_map['date'].append(burst.sensing_start.strftime("%Y%m%d"))
-                # Save the file paths for creating the runconfig
-                burst_map['orbit_path'].append(orbit_path)
-                burst_map['zip_file'].append(zip_file)
-
-    burst_map = pd.DataFrame(data=burst_map)
-    return burst_map
-
-
-def snap_geogrid(geogrid, x_spac, y_spac):
-    """Snap the bounding box of the geogrid to integer multiples of the
-    spacing in X-/Y-directions.
-
-    Parameters
-    ----------
-    geogrid : isce3.core.Geogrid
-        Geogrid object
-    x_spac : float
-        Spacing of geocoded burst along X-direction
-    y_spac : float
-        Spacing of geocoded burst along Y-direction
-
-    Returns
-    -------
-    tuple
-        Tuple containing the snapped bounding box coordinates
-        (left, bottom, right, top)
-    """
-    x_left = x_spac * np.floor(geogrid.start_x / x_spac)
-    x_right = x_spac * np.ceil((geogrid.start_x + x_spac * geogrid.width) / x_spac)
-    y_bottom = y_spac * np.floor((geogrid.start_y + y_spac * geogrid.length) / y_spac)
-    y_top = y_spac * np.ceil(geogrid.start_y / y_spac)
-    return (x_left, y_bottom, x_right, y_top)
-
-
-def prune_dataframe(data, id_col, id_list):
-    """Prune dataframe based on column ID and list of value
-
-    Parameters:
-    ----------
-    data: pandas.DataFrame
-        dataframe that needs to be pruned
-    id_col: str
-        column identification for 'data' (e.g. 'burst_id')
-    id_list: list
-        List of elements to consider when pruning.
-        If exclude_items is False (default), then all elements in `data`
-            will be kept *except for* those in `id_list`.
-        If exclude_items is True, the items in `id_list` will be removed from `data`.
-
-    Returns:
-    -------
-    data: pandas.DataFrame
-        Pruned dataframe with rows in 'id_list'
-    """
-    pattern = '|'.join(id_list)
-    df = data.loc[data[id_col].str.contains(pattern, case=False)]
-    return df
-
-
-def get_common_burst_ids(data):
-    """Get list of burst IDs common among all processed dates
-
-    Parameters
-    ----------
-    data: pandas.DataFrame
-        Dataframe containing info for stitching (e.g. burst IDs)
-
-    Returns
-    -------
-    common_id: list
-        List containing common burst IDs among all the dates
-    """
-    # Identify all the dates for the bursts to stitch
-    unique_dates = list(set(data['date']))
-
-    # Initialize list of unique burst IDs
-    common_id = data.burst_id[data.date == unique_dates[0]]
-
-    for date in unique_dates:
-        ids = data.burst_id[data.date == date]
-        common_id = sorted(list(set(ids.tolist()) & set(common_id)))
-    return common_id
-
-
-def create_runconfig(burst_map_row, dem_file, work_dir, flatten, enable_rss,
-                     low_band, high_band, pol, x_spac, y_spac):
-    """
-    Create runconfig to process geocoded bursts
-
-    Parameters
-    ----------
-    burst_map_row: namedtuple
-        one row from the dataframe method `burst_map.itertuples()`
-    dem_file: str
-        Path to DEM to use for processing
-    work_dir: str
-        Path to working directory for temp and final results
-    flatten: bool
-        Flag to enable/disable flattening
-    enable_rss: bool
-        Flag to enable range split-spectrum
-    low-band: float
-        Low sub-image bandwidth (in Hz) for split-spectrum
-    high-band: float
-        High sub-image bandwidth (in Hz) for split-spectrum
-    pol: str
-        Polarizations to process: co-pol, dual-pol, cross-pol
-    x_spac: float
-        Spacing of geocoded burst along X-direction
-    y_spac: float
-        Spacing of geocoded burst along Y-direction
-
-    Returns
-    -------
-    runconfig: str
-        Path to runconfig file
-    """
-    # Load default runconfig and fill it with user-defined options
-    yaml_path = f'{helpers.WORKFLOW_SCRIPTS_DIR}/defaults/s1_cslc_geo.yaml'
-    with open(yaml_path, 'r') as stream:
-        yaml_cfg = yaml.safe_load(stream)
-
-    groups = yaml_cfg['runconfig']['groups']
-    inputs = groups['input_file_group']
-    product = groups['product_path_group']
-    process = groups['processing']
-    geocode = process['geocoding']
-    rss = process['range_split_spectrum']
-
-    # Allocate Inputs
-    burst = burst_map_row.burst
-    inputs['safe_file_path'] = [burst_map_row.zip_file]
-    inputs['orbit_file_path'] = [burst_map_row.orbit_path]
-    inputs['burst_id'] = burst.burst_id
-    groups['dynamic_ancillary_file_group']['dem_file'] = dem_file
-
-    # Product path
-    product['product_path'] = work_dir
-    product['scratch_path'] = f'{work_dir}/scratch'
-    product['sas_output_file'] = work_dir
-
-    # Geocoding
-    process['polarization'] = pol
-    geocode['flatten'] = flatten
-    geocode['x_posting'] = x_spac
-    geocode['y_posting'] = y_spac
-
-    geocode['top_left']['x'] = burst_map_row.x_top_left
-    geocode['top_left']['y'] = burst_map_row.y_top_left
-    geocode['bottom_right']['x'] = burst_map_row.x_bottom_right
-    geocode['bottom_right']['y'] = burst_map_row.y_bottom_right
-    # geocode['x_snap'] = None
-    # geocode['y_snap'] = None
-    geocode['output_epsg'] = burst_map_row.epsg
-
-    # Range split spectrum
-    rss['enabled'] = enable_rss
-    rss['low_band_bandwidth'] = low_band
-    rss['high_band_bandwidth'] = high_band
-
-    date_str = burst.sensing_start.strftime("%Y%m%d")
-    os.makedirs(f'{work_dir}/runconfigs', exist_ok=True)
-    runconfig_path = f'{work_dir}/runconfigs/geo_runconfig_{date_str}_{burst.burst_id}.yaml'
-    with open(runconfig_path, 'w') as yaml_file:
-        yaml.dump(yaml_cfg, yaml_file, default_flow_style=False)
-    return runconfig_path
-
-
-def _filter_by_date(zip_file_list, start_date, end_date, exclude_dates):
-    """
-    Filter list of zip files based on date
-
-    Parameters
-    ----------
-    zip_file_list: list
-        List of zip files to filter
-    start_date: str
-        Start date in YYYYMMDD format
-    end_date: str
-        End date in YYYYMMDD format
-    exclude_dates: list
-        List of dates to exclude
-
-    Returns
-    -------
-    zip_file_list: list
-        Filtered list of zip files
-    """
-    safe_datetimes = [parse_safe_filename(zip_file)[2] for zip_file in zip_file_list]
-    if start_date:
-        start_datetime = datetime.datetime.strptime(start_date, '%Y%m%d')
-    else:
-        start_datetime = min(safe_datetimes)
-    if end_date:
-        end_datetime = datetime.datetime.strptime(end_date, '%Y%m%d')
-    else:
-        end_datetime = max(safe_datetimes)
-
-    if exclude_dates is not None:
-        exclude_datetimes = [
-            datetime.datetime.strptime(d, '%Y%m%d').date
-            for d in exclude_dates
-        ]
-    else:
-        exclude_datetimes = []
-
-    # Filter within date range, and prune excluded dates
-    zip_file_list = [
-        f
-        for (f, dt) in zip(zip_file_list, safe_datetimes)
-        if start_datetime <= dt <= end_datetime and dt.date not in exclude_datetimes
-    ]
-    return zip_file_list
-
-
-def run(slc_dir, dem_file, burst_id, start_date=None, end_date=None, exclude_dates=None,
-        orbit_dir=None, work_dir='stack', pol='dual-pol', x_spac=5, y_spac=10, bbox=None,
-        epsg_bbox=4326, epsg=None, flatten=True, is_split_spectrum=False, low_band=0.0,
-        high_band=0.0):
-    """Create runconfigs and runfiles generating geocoded bursts for a static
-    stack of Sentinel-1 A/B SAFE files.
-
-    Parameters
-    ----------
-    slc_dir: str
-        Directory containing S1-A/B SAFE files
-    dem_file: str
-        File path to DEM to use for processing
-    burst_id: list
-        List of burst IDs to process (default: None)
-    start_date: int
-        Date of the start acquisition of the stack (format: YYYYMMDD)
-    end_date: int
-        Date of the end acquisition of the stack (format: YYYYMMDD)
-    exclude_dates: list
-        List of dates to exclude from the stack (format: YYYYMMDD)
-    orbit_dir: str
-        Directory containing orbit files
-    work_dir: str
-        Working directory to store temp and final files
-    x_spac: float
-        Spacing of geocoded burst along X-direction
-    y_spac: float
-        Spacing of geocoded burst along Y-direction
-    bbox: tuple[float], optional
-        Bounding box of the area to geocode: (xmin, ymin, xmax, ymax) in degrees.
-        If not provided, will use the bounding box of the stack.
-    epsg: int
-        EPSG code identifying projection system to use for processing
-    flatten: bool
-        Enable/disable flattening of geocoded burst
-    is_split_spectrum: bool
-        Enable/disable range split spectrum
-    low_band: float
-        Low sub-band bandwidth for split-spectrum in Hz
-    high_band: float
-        High sub-band bandwidth for split-spectrum in Hz
-    """
-    start_time = time.time()
-    error = journal.error('s1_geo_stack_processor.main')
-    info = journal.info('s1_geo_stack_processor.main')
-
-    # Check if SLC dir and DEM exists
-    if not os.path.isdir(slc_dir):
-        err_str = f'{slc_dir} SLC directory does not exist'
-        error.log(err_str)
-        raise FileNotFoundError(err_str)
-
-    if not os.path.isfile(dem_file):
-        err_str = f'{dem_file} DEM file does not exists'
-        error.log(err_str)
-        raise FileNotFoundError(err_str)
-
-    # Create directory for runfiles
-    run_dir = f'{work_dir}/run_files'
-    os.makedirs(run_dir, exist_ok=True)
-
-    # Check if orbit are provided, if Not download
-    if orbit_dir is None:
-        orbit_dir = f'{work_dir}/orbits'
-        info.log(f'Orbit directory not assigned. Using {orbit_dir} to download orbits')
-        os.makedirs(orbit_dir, exist_ok=True)
-        # Note: Specific files will be downloaded as needed during `generate_burst_map`
-
-    # Generate burst map and prune it if a list of burst ID is provided
-    zip_file_list = sorted(glob.glob(f'{slc_dir}/S1*zip'))
-    # Remove zip files that are not in the date range before generating burst map
-    zip_file_list = _filter_by_date(zip_file_list, start_date, end_date, exclude_dates)
-
-    info.log(f'Generating burst map for {len(zip_file_list)} SAFE files')
-    burst_map = generate_burst_map(zip_file_list, orbit_dir, x_spac, y_spac, epsg, bbox)
-
-    # Identify burst IDs common across the stack and remove from the dataframe
-    # burst IDs that are not in common
-    common_ids = get_common_burst_ids(burst_map)
-    burst_map = prune_dataframe(burst_map, 'burst_id', common_ids)
-
-    # If user selects burst IDs to process, prune unnecessary bursts
-    if burst_id is not None:
-        burst_map = prune_dataframe(burst_map, 'burst_id', burst_id)
-
-    # Ready to geocode bursts
-    for row in burst_map.itertuples():
-        runconfig_path = create_runconfig(
-            row,
-            dem_file,
-            work_dir,
-            flatten,
-            is_split_spectrum,
-            low_band,
-            high_band,
-            pol,
-            x_spac,
-            y_spac
-        )
-        date_str = row.burst.sensing_start.strftime("%Y%m%d")
-        runfile_name = f'{run_dir}/run_{date_str}_{row.burst.burst_id}.sh'
-        with open(runfile_name, 'w') as rsh:
-            path = os.path.dirname(os.path.realpath(__file__))
-            rsh.write(
-                f'python {path}/s1_cslc.py {runconfig_path}\n')
-
-    end_time = time.time()
-    print('Elapsed time (min):', (end_time - start_time) / 60.0)
-
-
-def main():
-    """Create the command line interface and run the script."""
-    # Run main script
-    args = create_parser()
-
-    main(args.slc_dir, args.dem_file, args.burst_id, args.start_date,
-         args.end_date, args.exclude_dates, args.orbit_dir,
-         args.work_dir, args.pol, args.x_spac, args.y_spac, args.bbox,
-         args.epsg_bbox, args.epsg, args.flatten, args.is_split_spectrum,
-         args.low_band, args.high_band)
-
-
-if __name__ == '__main__':
-    main()
\ No newline at end of file

From b7914d19e287c26afa76742cb47509a5f08a4790 Mon Sep 17 00:00:00 2001
From: Scott Staniewicz <scott.j.staniewicz@jpl.nasa.gov>
Date: Mon, 24 Oct 2022 07:28:06 -0700
Subject: [PATCH 34/42] add a function to get database bbox

---
 src/compass/utils/helpers.py | 46 +++++++++++++++++++++++++++++++++++-
 1 file changed, 45 insertions(+), 1 deletion(-)

diff --git a/src/compass/utils/helpers.py b/src/compass/utils/helpers.py
index 192cb95f..415ab79f 100644
--- a/src/compass/utils/helpers.py
+++ b/src/compass/utils/helpers.py
@@ -1,6 +1,7 @@
 '''collection of useful functions used across workflows'''
 
 import os
+import sqlite3
 
 import isce3
 import journal
@@ -211,7 +212,7 @@ def _convert_to_utm(points_xy, epsg_in, epsg_out):
 
     if epsg_in == 4326:
         # proj.forward expects Longitude/latitude in radians
-        points_ll = np.deg2rad(points_xy)
+        points_ll = np.deg2rad(points_xy)[:, :2]
     else:
         # convert points to lon/lat if given a different UTM projection
         points_ll = []
@@ -225,3 +226,46 @@ def _convert_to_utm(points_xy, epsg_in, epsg_out):
         # proj.forward expects llh, [Longitude, latitude, height (m)]
         out.append(proj.forward([lon, lat, 0])[:2])
     return out
+
+
+def get_burst_bbox(burst_id, burst_db_file=None, burst_db_conn=None):
+    """Find the bounding box of a burst in the burst database.
+
+    Parameters
+    ----------
+    burst_id : str
+        JPL burst ID
+    burst_db_file : str
+        Location of burst database sqlite file, by default None
+    burst_db_conn : sqlite3.Connection
+        Connection object to burst database (If already connected)
+        Alternative to providing burst_db_file, will be faster
+        for multiply queries.
+
+    Returns
+    -------
+    epsg : int
+        EPSG code of burst bounding box
+    bbox : tuple[float]
+        Bounding box of burst in EPSG coordinates
+
+    Raises
+    ------
+    ValueError
+        If burst_id is not found in burst database
+    """
+    # example burst db:
+    # /home/staniewi/dev/burst_map_IW_000001_375887.OPERA-JPL.20221020_143224.sqlite3
+    if burst_db_conn is None:
+        burst_db_conn = sqlite3.connect(burst_db_file)
+    burst_db_conn.row_factory = sqlite3.Row  # return rows as dicts
+
+    query = "SELECT epsg, xmin, ymin, xmax, ymax FROM burst_id_map WHERE burst_id_jpl = ?"
+    cur = burst_db_conn.execute(query, (burst_id,))
+
+    result = cur.fetchone()
+    if result is None:
+        raise ValueError(f"Failed to find {burst_id} in {burst_db_file}")
+    epsg = result["EPSG"]
+    bbox = (result["xmin"], result["ymin"], result["xmax"], result["ymax"])
+    return epsg, bbox

From 63cf12e890d1690f6e94057a35f2ddb34d7a8ab8 Mon Sep 17 00:00:00 2001
From: Scott Staniewicz <scott.j.staniewicz@jpl.nasa.gov>
Date: Mon, 24 Oct 2022 09:00:40 -0700
Subject: [PATCH 35/42] use the burst database if available to bbox information

---
 src/compass/s1_geocode_stack.py | 116 ++++++++++++++++----------------
 src/compass/utils/helpers.py    |  30 +++++----
 2 files changed, 75 insertions(+), 71 deletions(-)

diff --git a/src/compass/s1_geocode_stack.py b/src/compass/s1_geocode_stack.py
index 5d6285b3..fd15b9d7 100755
--- a/src/compass/s1_geocode_stack.py
+++ b/src/compass/s1_geocode_stack.py
@@ -19,6 +19,9 @@
 from compass.utils.geo_grid import get_point_epsg
 
 
+DEFAULT_BURST_DB_FILE = os.path.abspath("/u/aurora-r0/staniewi/dev/burst_map_IW_000001_375887.OPERA-JPL.sqlite3")  # noqa
+
+
 def create_parser():
     parser = argparse.ArgumentParser(
         description='S1-A/B geocoded CSLC stack processor.',
@@ -71,8 +74,8 @@ def create_parser():
     return parser.parse_args()
 
 
-def generate_burst_map(zip_files, orbit_dir, x_spac, y_spac, epsg=None,
-                       bbox=None, epsg_bbox=4326):
+def generate_burst_map(zip_files, orbit_dir, epsg=None, bbox=None, 
+                       epsg_bbox=4326, burst_db_file=DEFAULT_BURST_DB_FILE):
     """Generates a dataframe of geogrid infos for each burst ID in `zip_files`.
 
     Parameters
@@ -81,10 +84,6 @@ def generate_burst_map(zip_files, orbit_dir, x_spac, y_spac, epsg=None,
         List of S1-A/B SAFE (zip) files
     orbit_dir: str
         Directory containing sensor orbit ephemerides
-    x_spac: float
-        Spacing of geocoded burst along X-direction
-    y_spac: float
-        Spacing of geocoded burst along Y-direction
     epsg: int
         EPSG code identifying output product projection system
     bbox: Optional[tuple[float]]
@@ -93,6 +92,8 @@ def generate_burst_map(zip_files, orbit_dir, x_spac, y_spac, epsg=None,
     epsg_bbox: int
         EPSG code of the bounding box. If 4326, the bounding box is assumed
         to be lon/lat degrees (default: 4326).
+    burst_db_file: str
+        Path to the burst database file to load bounding boxes.
 
     Returns
     -------
@@ -112,29 +113,11 @@ def generate_burst_map(zip_files, orbit_dir, x_spac, y_spac, epsg=None,
         for subswath in i_subswath:
             ref_bursts = load_bursts(zip_file, orbit_path, subswath)
             for burst in ref_bursts:
-                if epsg is None:
-                    epsg = get_point_epsg(burst.center.y,
-                                          burst.center.x)
-
-                if bbox is not None:
-                    bbox_utm = helpers.bbox_to_utm(
-                        bbox, epsg_bbox=epsg_bbox, epsg_out=epsg
-                    )
-                    burst_border_utm = helpers.polygon_to_utm(
-                        burst.border[0], epsg
-                    )
-                    # Skip this burst if it doesn't intersect the specified bbox
-                    if not geometry.box(*bbox_utm).intersects(burst_border_utm):
-                        continue
-                else:
-                    # Initialize geogrid with the info checked at this stage
-                    geogrid = isce3.product.bbox_to_geogrid(
-                        burst.as_isce3_radargrid(),
-                        burst.orbit,
-                        isce3.core.LUT2d(),
-                        x_spac, -y_spac,
-                        epsg)
-                    bbox_utm = snap_geogrid(geogrid, x_spac, y_spac)
+                epsg, bbox_utm = _get_burst_epsg_and_bbox(
+                    burst, epsg, bbox, epsg_bbox, burst_db_file
+                )
+                if epsg is None:  # Flag for skipping burst
+                    continue
 
                 burst_map['burst_id'].append(burst.burst_id)
                 # keep the burst object so we don't have to re-parse
@@ -156,30 +139,41 @@ def generate_burst_map(zip_files, orbit_dir, x_spac, y_spac, epsg=None,
     return burst_map
 
 
-def snap_geogrid(geogrid, x_spac, y_spac):
-    """Snap the bounding box of the geogrid to integer multiples of the
-    spacing in X-/Y-directions.
-
-    Parameters
-    ----------
-    geogrid : isce3.core.Geogrid
-        Geogrid object
-    x_spac : float
-        Spacing of geocoded burst along X-direction
-    y_spac : float
-        Spacing of geocoded burst along Y-direction
-
-    Returns
-    -------
-    tuple
-        Tuple containing the snapped bounding box coordinates
-        (left, bottom, right, top)
+def _get_burst_epsg_and_bbox(burst, epsg, bbox, epsg_bbox, burst_db_file):
+    """Returns the EPSG code and bounding box for a burst.
+    
+    Uses specified `bbox` if provided; otherwise, uses burst database (if available).
     """
-    x_left = x_spac * np.floor(geogrid.start_x / x_spac)
-    x_right = x_spac * np.ceil((geogrid.start_x + x_spac * geogrid.width) / x_spac)
-    y_bottom = y_spac * np.floor((geogrid.start_y + y_spac * geogrid.length) / y_spac)
-    y_top = y_spac * np.ceil(geogrid.start_y / y_spac)
-    return (x_left, y_bottom, x_right, y_top)
+    if epsg is None:
+        # Get the UTM zone of the first burst from the database
+        if os.path.exists(burst_db_file):
+            epsg, _ = helpers.get_burst_bbox(
+                burst.burst_id, burst_db_file
+            )
+        else:
+            # Fallback: ust the burst center UTM zone
+            epsg = get_point_epsg(burst.center.y,
+                                  burst.center.x)
+
+    if bbox is not None:
+        bbox_utm = helpers.bbox_to_utm(
+            bbox, epsg_src=epsg_bbox, epsg_dst=epsg
+        )
+        burst_border_utm = helpers.polygon_to_utm(
+            burst.border[0], epsg_src=4326, epsg_dst=epsg
+        )
+        # Skip this burst if it doesn't intersect the specified bbox
+        if not geometry.box(*bbox_utm).intersects(burst_border_utm):
+            return None, None
+    else:
+        epsg_db, bbox_utm = helpers.get_burst_bbox(
+            burst.burst_id, burst_db_file
+        )
+        if epsg_db != epsg:
+            bbox_utm = helpers.transform_bbox(
+                bbox_utm, epsg_src=epsg_db, epsg_dst=epsg
+            )
+    return epsg, bbox_utm
 
 
 def prune_dataframe(data, id_col, id_list):
@@ -365,8 +359,8 @@ def _filter_by_date(zip_file_list, start_date, end_date, exclude_dates):
 
 def run(slc_dir, dem_file, burst_id, start_date=None, end_date=None, exclude_dates=None,
         orbit_dir=None, work_dir='stack', pol='dual-pol', x_spac=5, y_spac=10, bbox=None,
-        epsg_bbox=4326, epsg=None, flatten=True, is_split_spectrum=False, low_band=0.0,
-        high_band=0.0):
+        epsg_bbox=4326, epsg=None, burst_db_file=DEFAULT_BURST_DB_FILE, flatten=True, 
+        is_split_spectrum=False, low_band=0.0, high_band=0.0):
     """Create runconfigs and runfiles generating geocoded bursts for a static
     stack of Sentinel-1 A/B SAFE files.
 
@@ -395,8 +389,15 @@ def run(slc_dir, dem_file, burst_id, start_date=None, end_date=None, exclude_dat
     bbox: tuple[float], optional
         Bounding box of the area to geocode: (xmin, ymin, xmax, ymax) in degrees.
         If not provided, will use the bounding box of the stack.
+    epsg_bbox: int
+        EPSG code of the bounding box coordinates (default: 4326)
+        If using EPSG:4326, the bounding box coordinates are in degrees.
     epsg: int
-        EPSG code identifying projection system to use for processing
+        EPSG code identifying projection system to use for output.
+        If not specified, will search for burst center's EPSG from
+        the burst database.
+    burst_db_file : str
+        File path to burst database containing EPSG/extent information.
     flatten: bool
         Enable/disable flattening of geocoded burst
     is_split_spectrum: bool
@@ -438,7 +439,8 @@ def run(slc_dir, dem_file, burst_id, start_date=None, end_date=None, exclude_dat
     zip_file_list = _filter_by_date(zip_file_list, start_date, end_date, exclude_dates)
 
     info.log(f'Generating burst map for {len(zip_file_list)} SAFE files')
-    burst_map = generate_burst_map(zip_file_list, orbit_dir, x_spac, y_spac, epsg, bbox)
+    burst_map = generate_burst_map(zip_file_list, orbit_dir, epsg, bbox, epsg_bbox,
+                                   burst_db_file)
 
     # Identify burst IDs common across the stack and remove from the dataframe
     # burst IDs that are not in common
@@ -487,4 +489,4 @@ def main():
 
 
 if __name__ == '__main__':
-    main()
\ No newline at end of file
+    main()
diff --git a/src/compass/utils/helpers.py b/src/compass/utils/helpers.py
index 415ab79f..0ecb3497 100644
--- a/src/compass/utils/helpers.py
+++ b/src/compass/utils/helpers.py
@@ -159,7 +159,7 @@ def check_dem(dem_path: str):
         raise ValueError(err_str)
 
 
-def bbox_to_utm(bbox, epsg_bbox=4326, epsg_out=None):
+def bbox_to_utm(bbox, *, epsg_src, epsg_out):
     """Convert bounding box coordinates to UTM.
 
     Parameters
@@ -167,8 +167,10 @@ def bbox_to_utm(bbox, epsg_bbox=4326, epsg_out=None):
     bbox : tuple
         Tuple containing the lon/lat bounding box coordinates
         (left, bottom, right, top) in degrees
-    epsg : int
-        EPSG code identifying output projection system
+    epsg_src : int
+        EPSG code identifying input bbox coordinate system
+    epsg_out : int
+        EPSG code identifying output coordinate system
 
     Returns
     -------
@@ -177,11 +179,11 @@ def bbox_to_utm(bbox, epsg_bbox=4326, epsg_out=None):
         (left, bottom, right, top)
     """
     xmin, ymin, xmax, ymax = bbox
-    xys = _convert_to_utm([(xmin, ymin), (xmax, ymax)], epsg_bbox, epsg_out)
+    xys = _convert_to_utm([(xmin, ymin), (xmax, ymax)], epsg_src, epsg_out)
     return (*xys[0], *xys[1])
 
 
-def polygon_to_utm(poly, epsg_poly=4326, epsg_out=None):
+def polygon_to_utm(poly, *, epsg_src, epsg_dst):
     """Convert a shapely.Polygon's coordinates to UTM.
 
     Parameters
@@ -198,29 +200,29 @@ def polygon_to_utm(poly, epsg_poly=4326, epsg_out=None):
         (left, bottom, right, top)
     """
     coords = np.array(poly.exterior.coords)
-    xys = _convert_to_utm(coords, epsg_poly, epsg_out)
+    xys = _convert_to_utm(coords, epsg_src, epsg_dst)
     return geometry.Polygon(xys)
 
 
-def _convert_to_utm(points_xy, epsg_in, epsg_out):
+def _convert_to_utm(points_xy, epsg_src, epsg_dst):
     """Convert a list of points to a specified UTM coordinate system.
 
-    If epsg_in is 4326 (lat/lon), assumes points_xy are in degrees.
+    If epsg_src is 4326 (lat/lon), assumes points_xy are in degrees.
     """
-    if epsg_out == epsg_in:
+    if epsg_dst == epsg_src:
         return points_xy
 
-    if epsg_in == 4326:
+    if epsg_src == 4326:
         # proj.forward expects Longitude/latitude in radians
         points_ll = np.deg2rad(points_xy)[:, :2]
     else:
         # convert points to lon/lat if given a different UTM projection
         points_ll = []
-        proj_ll = isce3.core.UTM(epsg_in)
+        proj_ll = isce3.core.UTM(epsg_src)
         for x, y in points_xy:
             points_ll.append(proj_ll.inverse([x, y, 0])[:2])
 
-    proj = isce3.core.UTM(epsg_out)
+    proj = isce3.core.UTM(epsg_dst)
     out = []
     for lon, lat in points_ll:
         # proj.forward expects llh, [Longitude, latitude, height (m)]
@@ -255,7 +257,7 @@ def get_burst_bbox(burst_id, burst_db_file=None, burst_db_conn=None):
         If burst_id is not found in burst database
     """
     # example burst db:
-    # /home/staniewi/dev/burst_map_IW_000001_375887.OPERA-JPL.20221020_143224.sqlite3
+    # /home/staniewi/dev/burst_map_IW_000001_375887.OPERA-JPL.sqlite3
     if burst_db_conn is None:
         burst_db_conn = sqlite3.connect(burst_db_file)
     burst_db_conn.row_factory = sqlite3.Row  # return rows as dicts
@@ -266,6 +268,6 @@ def get_burst_bbox(burst_id, burst_db_file=None, burst_db_conn=None):
     result = cur.fetchone()
     if result is None:
         raise ValueError(f"Failed to find {burst_id} in {burst_db_file}")
-    epsg = result["EPSG"]
+    epsg = result["epsg"]
     bbox = (result["xmin"], result["ymin"], result["xmax"], result["ymax"])
     return epsg, bbox

From cf84e481f7f7979301a0cb3ad28af579c6fe1ba2 Mon Sep 17 00:00:00 2001
From: Scott Staniewicz <scott.j.staniewicz@jpl.nasa.gov>
Date: Mon, 24 Oct 2022 09:13:06 -0700
Subject: [PATCH 36/42] add metadata argument, wip

---
 setup.cfg                       |  2 +-
 src/compass/s1_geocode_stack.py | 22 +++++++++++++++++-----
 2 files changed, 18 insertions(+), 6 deletions(-)

diff --git a/setup.cfg b/setup.cfg
index fc2f1aa4..a48a4f34 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -28,4 +28,4 @@ where = src
 [options.entry_points]
 console_scripts =
     s1_cslc.py = compass.s1_cslc:main
-    s1_geo_stack.py = compass.s1_geocoded_stack:main
+    s1_geocode_stack.py = compass.s1_geocode_stack:main
diff --git a/src/compass/s1_geocode_stack.py b/src/compass/s1_geocode_stack.py
index fd15b9d7..910252d0 100755
--- a/src/compass/s1_geocode_stack.py
+++ b/src/compass/s1_geocode_stack.py
@@ -62,6 +62,7 @@ def create_parser():
     optional.add_argument('-e', '--epsg', type=int, default=None,
                           help='Output EPSG projection code for geocoded bursts. '
                                'If None, projection the UTM zone of the first burst.')
+    # TODO: fix flattin/bool args. No way to make flatten false.
     optional.add_argument('-f', '--flatten', type=bool, default=True,
                           help='If True, enables topographic phase flattening.')
     optional.add_argument('-ss', '--range-split-spectrum',
@@ -71,6 +72,9 @@ def create_parser():
                           help='Low sub-band bandwidth in Hz (default: 0.0)')
     optional.add_argument('-hb', '--high-band', type=float, default=0.0,
                           help='High sub-band bandwidth in Hz (default: 0.0')
+    optional.add_argument('-m', '--metadata', type=bool, default=False,
+                          help='If True, generates radar metadata layers for each '
+                               'burst stack (see rdr2geo processing options)')
     return parser.parse_args()
 
 
@@ -227,7 +231,7 @@ def get_common_burst_ids(data):
 
 
 def create_runconfig(burst_map_row, dem_file, work_dir, flatten, enable_rss,
-                     low_band, high_band, pol, x_spac, y_spac):
+                     low_band, high_band, pol, x_spac, y_spac, enable_metadata):
     """
     Create runconfig to process geocoded bursts
 
@@ -253,6 +257,8 @@ def create_runconfig(burst_map_row, dem_file, work_dir, flatten, enable_rss,
         Spacing of geocoded burst along X-direction
     y_spac: float
         Spacing of geocoded burst along Y-direction
+    enable_metadata: bool
+        Flag to enable/disable metadata generation for each burst stack.
 
     Returns
     -------
@@ -302,6 +308,9 @@ def create_runconfig(burst_map_row, dem_file, work_dir, flatten, enable_rss,
     rss['low_band_bandwidth'] = low_band
     rss['high_band_bandwidth'] = high_band
 
+    # Metadata generation
+    process['rdr2geo']['enabled'] = enable_metadata
+
     date_str = burst.sensing_start.strftime("%Y%m%d")
     os.makedirs(f'{work_dir}/runconfigs', exist_ok=True)
     runconfig_path = f'{work_dir}/runconfigs/geo_runconfig_{date_str}_{burst.burst_id}.yaml'
@@ -359,8 +368,8 @@ def _filter_by_date(zip_file_list, start_date, end_date, exclude_dates):
 
 def run(slc_dir, dem_file, burst_id, start_date=None, end_date=None, exclude_dates=None,
         orbit_dir=None, work_dir='stack', pol='dual-pol', x_spac=5, y_spac=10, bbox=None,
-        epsg_bbox=4326, epsg=None, burst_db_file=DEFAULT_BURST_DB_FILE, flatten=True, 
-        is_split_spectrum=False, low_band=0.0, high_band=0.0):
+        epsg_bbox=4326, epsg=None, burst_db_file=DEFAULT_BURST_DB_FILE, flatten=True,
+        is_split_spectrum=False, low_band=0.0, high_band=0.0, enable_metadata=False):
     """Create runconfigs and runfiles generating geocoded bursts for a static
     stack of Sentinel-1 A/B SAFE files.
 
@@ -406,6 +415,8 @@ def run(slc_dir, dem_file, burst_id, start_date=None, end_date=None, exclude_dat
         Low sub-band bandwidth for split-spectrum in Hz
     high_band: float
         High sub-band bandwidth for split-spectrum in Hz
+    enable_metadata: bool
+        Enable/disable generation of metadata files for each burst stack.
     """
     start_time = time.time()
     error = journal.error('s1_geo_stack_processor.main')
@@ -463,7 +474,8 @@ def run(slc_dir, dem_file, burst_id, start_date=None, end_date=None, exclude_dat
             high_band,
             pol,
             x_spac,
-            y_spac
+            y_spac,
+            enable_metadata,
         )
         date_str = row.burst.sensing_start.strftime("%Y%m%d")
         runfile_name = f'{run_dir}/run_{date_str}_{row.burst.burst_id}.sh'
@@ -485,7 +497,7 @@ def main():
         args.end_date, args.exclude_dates, args.orbit_dir,
         args.work_dir, args.pol, args.x_spac, args.y_spac, args.bbox,
         args.epsg_bbox, args.epsg, args.flatten, args.is_split_spectrum,
-        args.low_band, args.high_band)
+        args.low_band, args.high_band, args.metadata)
 
 
 if __name__ == '__main__':

From 5f07e3445116852b2f353df5ac206931c77563fd Mon Sep 17 00:00:00 2001
From: Scott Staniewicz <scott.j.staniewicz@jpl.nasa.gov>
Date: Mon, 24 Oct 2022 11:18:30 -0700
Subject: [PATCH 37/42] allow burst db selection to take one or many ids

---
 src/compass/s1_geocode_stack.py |  8 +++---
 src/compass/utils/helpers.py    | 47 ++++++++++++++++++++++-----------
 2 files changed, 36 insertions(+), 19 deletions(-)

diff --git a/src/compass/s1_geocode_stack.py b/src/compass/s1_geocode_stack.py
index 910252d0..2da2f504 100755
--- a/src/compass/s1_geocode_stack.py
+++ b/src/compass/s1_geocode_stack.py
@@ -19,7 +19,7 @@
 from compass.utils.geo_grid import get_point_epsg
 
 
-DEFAULT_BURST_DB_FILE = os.path.abspath("/u/aurora-r0/staniewi/dev/burst_map_IW_000001_375887.OPERA-JPL.sqlite3")  # noqa
+DEFAULT_BURST_DB_FILE = os.path.abspath("/u/aurora-r0/staniewi/dev/burst_map_bbox_only.sqlite3")  # noqa
 
 
 def create_parser():
@@ -309,6 +309,7 @@ def create_runconfig(burst_map_row, dem_file, work_dir, flatten, enable_rss,
     rss['high_band_bandwidth'] = high_band
 
     # Metadata generation
+    # TODO: Need to somehow do this only once per stack
     process['rdr2geo']['enabled'] = enable_metadata
 
     date_str = burst.sensing_start.strftime("%Y%m%d")
@@ -450,8 +451,9 @@ def run(slc_dir, dem_file, burst_id, start_date=None, end_date=None, exclude_dat
     zip_file_list = _filter_by_date(zip_file_list, start_date, end_date, exclude_dates)
 
     info.log(f'Generating burst map for {len(zip_file_list)} SAFE files')
-    burst_map = generate_burst_map(zip_file_list, orbit_dir, epsg, bbox, epsg_bbox,
-                                   burst_db_file)
+    burst_map = generate_burst_map(
+        zip_file_list, orbit_dir, epsg, bbox, epsg_bbox, burst_db_file
+    )
 
     # Identify burst IDs common across the stack and remove from the dataframe
     # burst IDs that are not in common
diff --git a/src/compass/utils/helpers.py b/src/compass/utils/helpers.py
index 0ecb3497..65b2f283 100644
--- a/src/compass/utils/helpers.py
+++ b/src/compass/utils/helpers.py
@@ -231,12 +231,14 @@ def _convert_to_utm(points_xy, epsg_src, epsg_dst):
 
 
 def get_burst_bbox(burst_id, burst_db_file=None, burst_db_conn=None):
-    """Find the bounding box of a burst in the burst database.
+    """Find the bounding box of a burst (or bursts) in the database.
+
+    Can either pass one string burst_id or a list of burst_ids.
 
     Parameters
     ----------
-    burst_id : str
-        JPL burst ID
+    burst_id : str or list[str]
+        JPL burst ID, or a list of burst IDs.
     burst_db_file : str
         Location of burst database sqlite file, by default None
     burst_db_conn : sqlite3.Connection
@@ -246,15 +248,15 @@ def get_burst_bbox(burst_id, burst_db_file=None, burst_db_conn=None):
 
     Returns
     -------
-    epsg : int
-        EPSG code of burst bounding box
-    bbox : tuple[float]
-        Bounding box of burst in EPSG coordinates
+    epsg : int, or list[int]
+        EPSG code (or codes) of burst bounding box(es)
+    bbox : tuple[float] or list[tuple[float]]
+        Bounding box of burst in EPSG coordinates, or list of bounding boxes.
 
     Raises
     ------
     ValueError
-        If burst_id is not found in burst database
+        If no burst_id is not found in burst database
     """
     # example burst db:
     # /home/staniewi/dev/burst_map_IW_000001_375887.OPERA-JPL.sqlite3
@@ -262,12 +264,25 @@ def get_burst_bbox(burst_id, burst_db_file=None, burst_db_conn=None):
         burst_db_conn = sqlite3.connect(burst_db_file)
     burst_db_conn.row_factory = sqlite3.Row  # return rows as dicts
 
-    query = "SELECT epsg, xmin, ymin, xmax, ymax FROM burst_id_map WHERE burst_id_jpl = ?"
-    cur = burst_db_conn.execute(query, (burst_id,))
+    burst_ids = [burst_id] if isinstance(burst_id, str) else burst_id
+
+    results = []
+    query = f"SELECT epsg, xmin, ymin, xmax, ymax FROM burst_id_map WHERE burst_id_jpl = ?"
+    for bid in burst_ids:
+        cur = burst_db_conn.execute(query, (bid,))
+        results.append(cur.fetchone())
+
+    if not results:
+        raise ValueError(f"Failed to find {burst_ids} in {burst_db_file}")
+
+    # If they only requested one, just return the single epsg/bbox
+    if len(results) == 1:
+        result = results[0]
+        epsg = result["epsg"]
+        bbox = (result["xmin"], result["ymin"], result["xmax"], result["ymax"])
+        return epsg, bbox
 
-    result = cur.fetchone()
-    if result is None:
-        raise ValueError(f"Failed to find {burst_id} in {burst_db_file}")
-    epsg = result["epsg"]
-    bbox = (result["xmin"], result["ymin"], result["xmax"], result["ymax"])
-    return epsg, bbox
+    # Otherwise, return a list of epsg/bbox tuples
+    epsgs = [r["epsg"] for r in results]
+    bboxes = [(r["xmin"], r["ymin"], r["xmax"], r["ymax"]) for r in results]
+    return epsgs, bboxes

From 29a5df2e25ccc4535d95bbb99bc96d96f8ae2d32 Mon Sep 17 00:00:00 2001
From: Scott Staniewicz <scott.j.staniewicz@jpl.nasa.gov>
Date: Mon, 24 Oct 2022 11:23:00 -0700
Subject: [PATCH 38/42] fix bool cli args to be flags instead of bool

---
 src/compass/s1_geocode_stack.py | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/src/compass/s1_geocode_stack.py b/src/compass/s1_geocode_stack.py
index 2da2f504..922fc23f 100755
--- a/src/compass/s1_geocode_stack.py
+++ b/src/compass/s1_geocode_stack.py
@@ -62,17 +62,16 @@ def create_parser():
     optional.add_argument('-e', '--epsg', type=int, default=None,
                           help='Output EPSG projection code for geocoded bursts. '
                                'If None, projection the UTM zone of the first burst.')
-    # TODO: fix flattin/bool args. No way to make flatten false.
-    optional.add_argument('-f', '--flatten', type=bool, default=True,
-                          help='If True, enables topographic phase flattening.')
+    optional.add_argument('-nf', '--no-flatten', action='store_true',
+                          help='If True, disables topographic phase flattening.')
     optional.add_argument('-ss', '--range-split-spectrum',
-                          dest='is_split_spectrum', type=bool, default=False,
-                          help='If True, enables split-spectrum')
+                          dest='is_split_spectrum', action='store_true',
+                          help='If flag is set, enables split-spectrum processing.')
     optional.add_argument('-lb', '--low-band', type=float, default=0.0,
-                          help='Low sub-band bandwidth in Hz (default: 0.0)')
+                          help='For -ss, low sub-band bandwidth in Hz (default: 0.0)')
     optional.add_argument('-hb', '--high-band', type=float, default=0.0,
-                          help='High sub-band bandwidth in Hz (default: 0.0')
-    optional.add_argument('-m', '--metadata', type=bool, default=False,
+                          help='For -ss, high sub-band bandwidth in Hz (default: 0.0')
+    optional.add_argument('-m', '--metadata', action='store_true',
                           help='If True, generates radar metadata layers for each '
                                'burst stack (see rdr2geo processing options)')
     return parser.parse_args()
@@ -498,7 +497,7 @@ def main():
     run(args.slc_dir, args.dem_file, args.burst_id, args.start_date,
         args.end_date, args.exclude_dates, args.orbit_dir,
         args.work_dir, args.pol, args.x_spac, args.y_spac, args.bbox,
-        args.epsg_bbox, args.epsg, args.flatten, args.is_split_spectrum,
+        args.epsg_bbox, args.epsg, not args.no_flatten, args.is_split_spectrum,
         args.low_band, args.high_band, args.metadata)
 
 

From c070842bbb0d4db6e46576778f3aaee1f7adad58 Mon Sep 17 00:00:00 2001
From: Scott Staniewicz <scott.j.staniewicz@jpl.nasa.gov>
Date: Mon, 24 Oct 2022 11:42:37 -0700
Subject: [PATCH 39/42] use pyproj as projection

simplifies code, and is recommended in isce3

https://github.com/isce-framework/isce3/blob/develop/doc/sphinx/tutorial/projections.rst
---
 src/compass/utils/helpers.py | 21 +++++----------------
 1 file changed, 5 insertions(+), 16 deletions(-)

diff --git a/src/compass/utils/helpers.py b/src/compass/utils/helpers.py
index 65b2f283..4cee67ba 100644
--- a/src/compass/utils/helpers.py
+++ b/src/compass/utils/helpers.py
@@ -6,6 +6,7 @@
 import isce3
 import journal
 import numpy as np
+from pyproj.transformer import Transformer
 from osgeo import gdal
 from shapely import geometry
 
@@ -212,22 +213,10 @@ def _convert_to_utm(points_xy, epsg_src, epsg_dst):
     if epsg_dst == epsg_src:
         return points_xy
 
-    if epsg_src == 4326:
-        # proj.forward expects Longitude/latitude in radians
-        points_ll = np.deg2rad(points_xy)[:, :2]
-    else:
-        # convert points to lon/lat if given a different UTM projection
-        points_ll = []
-        proj_ll = isce3.core.UTM(epsg_src)
-        for x, y in points_xy:
-            points_ll.append(proj_ll.inverse([x, y, 0])[:2])
-
-    proj = isce3.core.UTM(epsg_dst)
-    out = []
-    for lon, lat in points_ll:
-        # proj.forward expects llh, [Longitude, latitude, height (m)]
-        out.append(proj.forward([lon, lat, 0])[:2])
-    return out
+    t = Transformer.from_crs(epsg_src, epsg_dst, always_xy=True)
+    xs, ys = np.array(points_xy).T
+    xt, yt = t.transform(xs, ys)
+    return list(zip(xt, yt))
 
 
 def get_burst_bbox(burst_id, burst_db_file=None, burst_db_conn=None):

From 21e661bd9cc9fddf019d1fbeba598f91432116a1 Mon Sep 17 00:00:00 2001
From: Scott Staniewicz <scott.j.staniewicz@jpl.nasa.gov>
Date: Mon, 24 Oct 2022 13:02:15 -0700
Subject: [PATCH 40/42] add burst file arg, only generate metadata on first
 burst instance

---
 src/compass/s1_geocode_stack.py   | 45 ++++++++++++++++++-------------
 src/compass/utils/geo_metadata.py |  2 +-
 2 files changed, 27 insertions(+), 20 deletions(-)

diff --git a/src/compass/s1_geocode_stack.py b/src/compass/s1_geocode_stack.py
index 922fc23f..7f3f761a 100755
--- a/src/compass/s1_geocode_stack.py
+++ b/src/compass/s1_geocode_stack.py
@@ -61,9 +61,11 @@ def create_parser():
                                'If 4326, the bounding box is in lon/lat degrees.')
     optional.add_argument('-e', '--epsg', type=int, default=None,
                           help='Output EPSG projection code for geocoded bursts. '
-                               'If None, projection the UTM zone of the first burst.')
+                               'If None, looks up the UTM zone for each burst.')
+    optional.add_argument('--burst-db-file', type=str, default=DEFAULT_BURST_DB_FILE,
+                          help='Sqlite3 database file with burst bounding boxes.')
     optional.add_argument('-nf', '--no-flatten', action='store_true',
-                          help='If True, disables topographic phase flattening.')
+                          help='If flag is set, disables topographic phase flattening.')
     optional.add_argument('-ss', '--range-split-spectrum',
                           dest='is_split_spectrum', action='store_true',
                           help='If flag is set, enables split-spectrum processing.')
@@ -72,12 +74,12 @@ def create_parser():
     optional.add_argument('-hb', '--high-band', type=float, default=0.0,
                           help='For -ss, high sub-band bandwidth in Hz (default: 0.0')
     optional.add_argument('-m', '--metadata', action='store_true',
-                          help='If True, generates radar metadata layers for each '
-                               'burst stack (see rdr2geo processing options)')
+                          help='If flat is set, generates radar metadata layers for each'
+                               ' burst stack (see rdr2geo processing options)')
     return parser.parse_args()
 
 
-def generate_burst_map(zip_files, orbit_dir, epsg=None, bbox=None, 
+def generate_burst_map(zip_files, orbit_dir, epsg=None, bbox=None,
                        epsg_bbox=4326, burst_db_file=DEFAULT_BURST_DB_FILE):
     """Generates a dataframe of geogrid infos for each burst ID in `zip_files`.
 
@@ -144,19 +146,19 @@ def generate_burst_map(zip_files, orbit_dir, epsg=None, bbox=None,
 
 def _get_burst_epsg_and_bbox(burst, epsg, bbox, epsg_bbox, burst_db_file):
     """Returns the EPSG code and bounding box for a burst.
-    
+ 
     Uses specified `bbox` if provided; otherwise, uses burst database (if available).
     """
-    if epsg is None:
-        # Get the UTM zone of the first burst from the database
-        if os.path.exists(burst_db_file):
-            epsg, _ = helpers.get_burst_bbox(
-                burst.burst_id, burst_db_file
-            )
-        else:
-            # Fallback: ust the burst center UTM zone
-            epsg = get_point_epsg(burst.center.y,
-                                  burst.center.x)
+    # # Get the UTM zone of the first burst from the database
+    # if epsg is None:
+    if os.path.exists(burst_db_file):
+        epsg, _ = helpers.get_burst_bbox(
+            burst.burst_id, burst_db_file
+        )
+    else:
+        # Fallback: ust the burst center UTM zone
+        epsg = get_point_epsg(burst.center.y,
+                                burst.center.x)
 
     if bbox is not None:
         bbox_utm = helpers.bbox_to_utm(
@@ -463,8 +465,13 @@ def run(slc_dir, dem_file, burst_id, start_date=None, end_date=None, exclude_dat
     if burst_id is not None:
         burst_map = prune_dataframe(burst_map, 'burst_id', burst_id)
 
+    # Find the rows which are the first ones to process each burst
+    # Only these rows will be used to generate metadata (if turned on)
+    first_rows = [(burst_map.burst_id == b).idxmax() for b in burst_map.burst_id.unique()]
+
     # Ready to geocode bursts
     for row in burst_map.itertuples():
+        do_metadata = enable_metadata and (row.Index in first_rows)
         runconfig_path = create_runconfig(
             row,
             dem_file,
@@ -476,7 +483,7 @@ def run(slc_dir, dem_file, burst_id, start_date=None, end_date=None, exclude_dat
             pol,
             x_spac,
             y_spac,
-            enable_metadata,
+            do_metadata,
         )
         date_str = row.burst.sensing_start.strftime("%Y%m%d")
         runfile_name = f'{run_dir}/run_{date_str}_{row.burst.burst_id}.sh'
@@ -497,8 +504,8 @@ def main():
     run(args.slc_dir, args.dem_file, args.burst_id, args.start_date,
         args.end_date, args.exclude_dates, args.orbit_dir,
         args.work_dir, args.pol, args.x_spac, args.y_spac, args.bbox,
-        args.epsg_bbox, args.epsg, not args.no_flatten, args.is_split_spectrum,
-        args.low_band, args.high_band, args.metadata)
+        args.epsg_bbox, args.epsg, args.burst_db_file, not args.no_flatten,
+        args.is_split_spectrum, args.low_band, args.high_band, args.metadata)
 
 
 if __name__ == '__main__':
diff --git a/src/compass/utils/geo_metadata.py b/src/compass/utils/geo_metadata.py
index 557b20fb..8d000d27 100644
--- a/src/compass/utils/geo_metadata.py
+++ b/src/compass/utils/geo_metadata.py
@@ -61,7 +61,7 @@ class GeoCslcMetadata():
     doppler: Poly1d
     range_bandwidth: float
     polarization: str # {VV, VH, HH, HV}
-    burst_id: str # t{track_number}_iw{1,2,3}_b{burst_index}
+    burst_id: str # t{track_number}_{burst_number}_iw{1,2,3}
     platform_id: str # S1{A,B}
     center: Point # {center lon, center lat} in degrees
     border: Polygon # list of lon, lat coordinate tuples (in degrees) representing burst border

From 68e0399dc2ae2beae0fa4e9091f66421eff9b00d Mon Sep 17 00:00:00 2001
From: Scott Staniewicz <scott.j.staniewicz@jpl.nasa.gov>
Date: Mon, 24 Oct 2022 14:01:51 -0700
Subject: [PATCH 41/42] rename to specify output epsg, use burst db by default

---
 src/compass/s1_geocode_stack.py | 50 +++++++++++++++++----------------
 1 file changed, 26 insertions(+), 24 deletions(-)

diff --git a/src/compass/s1_geocode_stack.py b/src/compass/s1_geocode_stack.py
index 7f3f761a..fae5fed7 100755
--- a/src/compass/s1_geocode_stack.py
+++ b/src/compass/s1_geocode_stack.py
@@ -56,10 +56,10 @@ def create_parser():
     optional.add_argument('--bbox', nargs=4, type=float, default=None,
                           metavar=('xmin', 'ymin', 'xmax', 'ymax'),
                           help='Bounding box of the geocoded stack.')
-    optional.add_argument('--epsg-bbox', type=int, default=4326,
+    optional.add_argument('--bbox-epsg', type=int, default=4326,
                           help='EPSG code of the bounding box. '
                                'If 4326, the bounding box is in lon/lat degrees.')
-    optional.add_argument('-e', '--epsg', type=int, default=None,
+    optional.add_argument('-e', '--output-epsg', type=int, default=None,
                           help='Output EPSG projection code for geocoded bursts. '
                                'If None, looks up the UTM zone for each burst.')
     optional.add_argument('--burst-db-file', type=str, default=DEFAULT_BURST_DB_FILE,
@@ -79,8 +79,8 @@ def create_parser():
     return parser.parse_args()
 
 
-def generate_burst_map(zip_files, orbit_dir, epsg=None, bbox=None,
-                       epsg_bbox=4326, burst_db_file=DEFAULT_BURST_DB_FILE):
+def generate_burst_map(zip_files, orbit_dir, output_epsg=None, bbox=None,
+                       bbox_epsg=4326, burst_db_file=DEFAULT_BURST_DB_FILE):
     """Generates a dataframe of geogrid infos for each burst ID in `zip_files`.
 
     Parameters
@@ -89,12 +89,12 @@ def generate_burst_map(zip_files, orbit_dir, epsg=None, bbox=None,
         List of S1-A/B SAFE (zip) files
     orbit_dir: str
         Directory containing sensor orbit ephemerides
-    epsg: int
+    output_epsg: int
         EPSG code identifying output product projection system
     bbox: Optional[tuple[float]]
         Desired bounding box of the geocoded bursts as (left, bottom, right, top).
         If not provided, the bounding box is computed for each burst.
-    epsg_bbox: int
+    bbox_epsg: int
         EPSG code of the bounding box. If 4326, the bounding box is assumed
         to be lon/lat degrees (default: 4326).
     burst_db_file: str
@@ -119,7 +119,7 @@ def generate_burst_map(zip_files, orbit_dir, epsg=None, bbox=None,
             ref_bursts = load_bursts(zip_file, orbit_path, subswath)
             for burst in ref_bursts:
                 epsg, bbox_utm = _get_burst_epsg_and_bbox(
-                    burst, epsg, bbox, epsg_bbox, burst_db_file
+                    burst, output_epsg, bbox, bbox_epsg, burst_db_file
                 )
                 if epsg is None:  # Flag for skipping burst
                     continue
@@ -144,25 +144,27 @@ def generate_burst_map(zip_files, orbit_dir, epsg=None, bbox=None,
     return burst_map
 
 
-def _get_burst_epsg_and_bbox(burst, epsg, bbox, epsg_bbox, burst_db_file):
+def _get_burst_epsg_and_bbox(burst, output_epsg, bbox, bbox_epsg, burst_db_file):
     """Returns the EPSG code and bounding box for a burst.
- 
+
     Uses specified `bbox` if provided; otherwise, uses burst database (if available).
     """
     # # Get the UTM zone of the first burst from the database
-    # if epsg is None:
-    if os.path.exists(burst_db_file):
-        epsg, _ = helpers.get_burst_bbox(
-            burst.burst_id, burst_db_file
-        )
+    if output_epsg is None:
+        if os.path.exists(burst_db_file):
+            epsg, _ = helpers.get_burst_bbox(
+                burst.burst_id, burst_db_file
+            )
+        else:
+            # Fallback: ust the burst center UTM zone
+            epsg = get_point_epsg(burst.center.y,
+                                  burst.center.x)
     else:
-        # Fallback: ust the burst center UTM zone
-        epsg = get_point_epsg(burst.center.y,
-                                burst.center.x)
+        epsg = output_epsg
 
     if bbox is not None:
         bbox_utm = helpers.bbox_to_utm(
-            bbox, epsg_src=epsg_bbox, epsg_dst=epsg
+            bbox, epsg_src=bbox_epsg, epsg_dst=epsg
         )
         burst_border_utm = helpers.polygon_to_utm(
             burst.border[0], epsg_src=4326, epsg_dst=epsg
@@ -370,7 +372,7 @@ def _filter_by_date(zip_file_list, start_date, end_date, exclude_dates):
 
 def run(slc_dir, dem_file, burst_id, start_date=None, end_date=None, exclude_dates=None,
         orbit_dir=None, work_dir='stack', pol='dual-pol', x_spac=5, y_spac=10, bbox=None,
-        epsg_bbox=4326, epsg=None, burst_db_file=DEFAULT_BURST_DB_FILE, flatten=True,
+        bbox_epsg=4326, output_epsg=None, burst_db_file=DEFAULT_BURST_DB_FILE, flatten=True,
         is_split_spectrum=False, low_band=0.0, high_band=0.0, enable_metadata=False):
     """Create runconfigs and runfiles generating geocoded bursts for a static
     stack of Sentinel-1 A/B SAFE files.
@@ -400,12 +402,12 @@ def run(slc_dir, dem_file, burst_id, start_date=None, end_date=None, exclude_dat
     bbox: tuple[float], optional
         Bounding box of the area to geocode: (xmin, ymin, xmax, ymax) in degrees.
         If not provided, will use the bounding box of the stack.
-    epsg_bbox: int
+    bbox_epsg: int
         EPSG code of the bounding box coordinates (default: 4326)
         If using EPSG:4326, the bounding box coordinates are in degrees.
-    epsg: int
+    output_epsg: int
         EPSG code identifying projection system to use for output.
-        If not specified, will search for burst center's EPSG from
+        If not specified, will search for each burst center's EPSG from
         the burst database.
     burst_db_file : str
         File path to burst database containing EPSG/extent information.
@@ -453,7 +455,7 @@ def run(slc_dir, dem_file, burst_id, start_date=None, end_date=None, exclude_dat
 
     info.log(f'Generating burst map for {len(zip_file_list)} SAFE files')
     burst_map = generate_burst_map(
-        zip_file_list, orbit_dir, epsg, bbox, epsg_bbox, burst_db_file
+        zip_file_list, orbit_dir, output_epsg, bbox, bbox_epsg, burst_db_file
     )
 
     # Identify burst IDs common across the stack and remove from the dataframe
@@ -504,7 +506,7 @@ def main():
     run(args.slc_dir, args.dem_file, args.burst_id, args.start_date,
         args.end_date, args.exclude_dates, args.orbit_dir,
         args.work_dir, args.pol, args.x_spac, args.y_spac, args.bbox,
-        args.epsg_bbox, args.epsg, args.burst_db_file, not args.no_flatten,
+        args.bbox_epsg, args.output_epsg, args.burst_db_file, not args.no_flatten,
         args.is_split_spectrum, args.low_band, args.high_band, args.metadata)
 
 

From 427970b3f2a22312198535f81a425f5eaeaaf33d Mon Sep 17 00:00:00 2001
From: Scott Staniewicz <scott.j.staniewicz@jpl.nasa.gov>
Date: Mon, 24 Oct 2022 14:39:59 -0700
Subject: [PATCH 42/42] fix codacy complaints

---
 src/compass/s1_geocode_stack.py | 2 --
 src/compass/utils/helpers.py    | 8 ++++----
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/src/compass/s1_geocode_stack.py b/src/compass/s1_geocode_stack.py
index fae5fed7..556a10ee 100755
--- a/src/compass/s1_geocode_stack.py
+++ b/src/compass/s1_geocode_stack.py
@@ -6,9 +6,7 @@
 import time
 from collections import defaultdict
 
-import isce3
 import journal
-import numpy as np
 import pandas as pd
 import yaml
 from s1reader.s1_orbit import get_orbit_file_from_dir, parse_safe_filename
diff --git a/src/compass/utils/helpers.py b/src/compass/utils/helpers.py
index 4cee67ba..8b9988c2 100644
--- a/src/compass/utils/helpers.py
+++ b/src/compass/utils/helpers.py
@@ -160,7 +160,7 @@ def check_dem(dem_path: str):
         raise ValueError(err_str)
 
 
-def bbox_to_utm(bbox, *, epsg_src, epsg_out):
+def bbox_to_utm(bbox, *, epsg_src, epsg_dst):
     """Convert bounding box coordinates to UTM.
 
     Parameters
@@ -170,7 +170,7 @@ def bbox_to_utm(bbox, *, epsg_src, epsg_out):
         (left, bottom, right, top) in degrees
     epsg_src : int
         EPSG code identifying input bbox coordinate system
-    epsg_out : int
+    epsg_dst : int
         EPSG code identifying output coordinate system
 
     Returns
@@ -180,7 +180,7 @@ def bbox_to_utm(bbox, *, epsg_src, epsg_out):
         (left, bottom, right, top)
     """
     xmin, ymin, xmax, ymax = bbox
-    xys = _convert_to_utm([(xmin, ymin), (xmax, ymax)], epsg_src, epsg_out)
+    xys = _convert_to_utm([(xmin, ymin), (xmax, ymax)], epsg_src, epsg_dst)
     return (*xys[0], *xys[1])
 
 
@@ -256,7 +256,7 @@ def get_burst_bbox(burst_id, burst_db_file=None, burst_db_conn=None):
     burst_ids = [burst_id] if isinstance(burst_id, str) else burst_id
 
     results = []
-    query = f"SELECT epsg, xmin, ymin, xmax, ymax FROM burst_id_map WHERE burst_id_jpl = ?"
+    query = "SELECT epsg, xmin, ymin, xmax, ymax FROM burst_id_map WHERE burst_id_jpl = ?"
     for bid in burst_ids:
         cur = burst_db_conn.execute(query, (bid,))
         results.append(cur.fetchone())