Merge pull request #8 from scottstanie/frame-definition

Frame definition
opera-adt · Aug 23, 2023 · f843141 · f843141
2 parents 985bb65 + d1352ce
commit f843141
Show file tree

Hide file tree

Showing 12 changed files with 1,268 additions and 34 deletions.
diff --git a/LICENSE b/LICENSE
@@ -186,7 +186,8 @@
       same "printed page" as the copyright notice for easier
       identification within third-party archives.
 
-   Copyright [yyyy] [name of copyright owner]
+   Copyright 2023 California Institute of Technology (“Caltech”).
+   U.S. Government sponsorship acknowledged.
 
    Licensed under the Apache License, Version 2.0 (the "License");
    you may not use this file except in compliance with the License.

diff --git a/environment.yml b/environment.yml
@@ -0,0 +1,13 @@
+channels:
+- conda-forge
+dependencies:
+- python >= 3.8
+- geopandas
+- numpy
+- pandas
+- pip
+- shapely
+- tqdm
+- utm
+- pip:
+  - unzip_http
diff --git a/readme.md b/readme.md
@@ -10,19 +10,20 @@ Follow the steps below to install `burst_db` using conda environment.
 
 ```bash
 git clone https://github.com/opera-adt/burst_db burst_db
+cd burst_db
 ```
 
 2. Install dependencies:
 
 ```bash
-conda install -c conda-forge --file burst_db/requirements.txt
+conda install --name burst_db -c conda-forge --file environment.yml
 ```
 
 3. Install `burst_db` via pip:
 
 ```bash
 # run "pip install -e" to install in development mode
-python -m pip install ./burst_db
+python -m pip install .
 ```
 
 ## How to use
@@ -37,6 +38,66 @@ python -m pip install ./burst_db
 - `sqlite_path_out` : Path to the output SQLite database file.
 
 
+## Frame database information
+
+After running `pip install .` , the `opera-create-db` command will create the sqlite Frame Database, as well as JSON files which map the burst IDs to frame IDs, and frame IDs to burst IDs.
+
+The format of the frame-to-burst mapping is
+```python
+{
+    "data" : {
+        "1": {
+            "epsg": 32631,
+            "is_land": False,
+            "is_north_america": False,
+            "xmin": 500160,
+            "ymin": 78240,
+            "xmax": 789960,
+            "ymax": 322740,
+            "burst_ids": [
+                "t001_000001_iw1",
+                "t001_000001_iw2",
+                "t001_000001_iw3",
+                "t001_000002_iw1",
+                ...
+                "t001_000009_iw3"
+            ]
+        }, ...
+    },
+    "metadata": {
+        "version": "0.1.2", "margin": 5000.0, ...
+    }
+}
+```
+where the keys of the the `data` dict are the frame IDs.
+
+The burst-to-frame mapping has the structure
+```python
+{
+    "data" : {
+        "t001_000001_iw1": {"frame_ids": [1]},
+        "t001_000001_iw2": {"frame_ids": [1]},
+        ...
+    },
+    "metadata": {
+        "version": "0.1.2", "margin": 5000.0, ...
+    }
+}
+```
+These data structures can be read into python using the function `build_frame_db.read_zipped_json` .
+
+The `opera-create-db` command also makes the full [Geopackage database](https://www.geopackage.org/) (which is based on sqlite), where the `burst_id_map` table contains the burst geometries, the `frames` table contains the frame geometries, and the `frames_bursts` table is the JOIN table for the many-to-many relationship.
+An example SQL query to view all columns of these tables is
+```sql
+SELECT *
+FROM frames f
+JOIN frames_bursts fb ON fb.frame_fid = f.fid
+JOIN burst_id_map b ON fb.burst_ogc_fid = b.ogc_fid
+LIMIT 1;
+```
+You can also drag the `opera-s1-disp.gpkg` file into QGIS to load the `frames` and `burst_id_map` tables to filter/view the geometries.
+
+
 ### License
 **Copyright (c) 2022** California Institute of Technology (“Caltech”). U.S. Government
 sponsorship acknowledged.

diff --git a/requirements.txt b/requirements.txt
diff --git a/setup.py b/setup.py
@@ -1,35 +1,38 @@
-'''
+"""
 setup.py for OPERA burst database generator
-'''
+"""
 
 import os
+import sys
 
-from setuptools import setup
+from setuptools import find_packages, setup
 
-__version__ = VERSION = '0.1.0'
+# taken from mintpy: https://github.com/insarlab/MintPy/blob/main/setup.py
+# Grab version and description from version.py
+# link: https://stackoverflow.com/questions/53648900
+sys.path.append(os.path.join(os.path.dirname(__file__), "src"))
+from burst_db.version import release_version
 
-LONG_DESCRIPTION = 'Burst database for OPERA SAS'
-
-package_data_dict = {}
-
-package_data_dict['rtc'] = [
-    os.path.join('defaults', 'rtc_s1.yaml'),
-    os.path.join('schemas', 'rtc_s1.yaml')]
+LONG_DESCRIPTION = "Sentinel-1 Burst database for OPERA SAS"
 
 setup(
-    name = 'burst_db',
-    version = VERSION,
-    description = 'Burst database for OPERA SAS',
-    package_dir = {'burst_db': 'src/burst_db'},
-    include_package_data = True,
-    package_data = package_data_dict,
-    classifiers = ['Programming Language :: Python'],
-    #scripts = ['app/rtc_s1.py'],
-    install_requires = ['argparse', 'numpy', 'gdal'],
-    url = 'https://github.com/opera-adt/burst_db',
-    author = ('Seongsu Jeong'),
-    author_email = ('[email protected]'),
-    license = ('Copyright by the California Institute of Technology.'
-               ' ALL RIGHTS RESERVED.'),
-    long_description=LONG_DESCRIPTION
+    name="burst_db",
+    version=release_version,
+    description="Burst database for OPERA SAS",
+    packages=find_packages("src"),  # include all packages under src
+    package_dir={"": "src"},  # tell distutils packages are under src
+    classifiers=["Programming Language :: Python"],
+    url="https://github.com/opera-adt/burst_db",
+    author="Seongsu Jeong; Scott J. Staniewicz",
+    author_email="[email protected]; [email protected]",
+    license=(
+        "Copyright by the California Institute of Technology. ALL RIGHTS RESERVED."
+    ),
+    long_description=LONG_DESCRIPTION,
+    # Add console scripts here
+    entry_points={
+        "console_scripts": [
+            "opera-create-db = burst_db.build_frame_db:main",
+        ],
+    },
 )
diff --git a/src/burst_db/_esa_burst_db.py b/src/burst_db/_esa_burst_db.py
@@ -0,0 +1,29 @@
+'''
+An internal module to download the ESA burst database
+'''
+import os
+import shutil
+import subprocess
+import tempfile
+import zipfile
+
+ESA_DB_URL = "https://sar-mpc.eu/files/S1_burstid_20220530.zip"
+
+
+def get_esa_burst_db(output_path="burst_map_IW_000001_375887.sqlite3"):
+    """Download the ESA burst database."""
+    print(f"Downloading ESA burst database from {ESA_DB_URL} to {output_path}.")
+    db_filename = "S1_burstid_20220530/IW/sqlite/burst_map_IW_000001_375887.sqlite3"
+    cur_dir = os.getcwd()
+    output_path = os.path.abspath(output_path)
+    with tempfile.TemporaryDirectory() as tmpdir:
+        try:
+            os.chdir(tmpdir)
+            subprocess.check_call(["wget", ESA_DB_URL])
+
+            with zipfile.ZipFile(ESA_DB_URL.split("/")[-1], "r") as zip_ref:
+                zip_ref.extract(db_filename)
+                shutil.move(db_filename, output_path)
+                shutil.rmtree(db_filename.split("/")[0])
+        finally:
+            os.chdir(cur_dir)
diff --git a/src/burst_db/_land_usgs.py b/src/burst_db/_land_usgs.py
@@ -0,0 +1,108 @@
+"""An internal module to download shape files for land are and Greenland."""
+import fnmatch
+import zipfile
+from pathlib import Path
+
+import geopandas as gpd
+import pandas as pd
+import requests
+import unzip_http
+from shapely.geometry import MultiPolygon
+
+USGS_LAND_URL = "https://www.ngdc.noaa.gov/mgg/shorelines/data/gshhg/latest/gshhg-shp-2.3.7.zip"  # noqa
+GREENLAND_URL = "https://stacks.stanford.edu/file/druid:sd368wz2435/data.zip"  # noqa
+
+
+def get_usgs_land(outpath=None):
+    """Get the USGS land data from the following url:
+
+    https://www.ngdc.noaa.gov/mgg/shorelines/data/gshhg/latest/gshhg-shp-2.3.7.zip
+    """
+    outpath = Path(outpath) if outpath else Path.cwd()
+    rzf = unzip_http.RemoteZipFile(USGS_LAND_URL)
+    # Level 1: Continental land masses and ocean islands, except Antarctica.
+    # Level 6: Antarctica based on grounding line boundary.
+    paths = ["GSHHS_shp/h/GSHHS_h_L1.*", "GSHHS_shp/h/GSHHS_h_L6.*"]
+    shp_files = []
+    dfs = []
+    for fn in rzf.infolist():
+        if not any(fnmatch.fnmatch(fn.filename, g) for g in paths):
+            continue
+        outname = outpath / fn.filename
+        if outname.suffix == (".shp"):
+            shp_files.append(outname)
+        if not outname.exists():
+            outname.parent.mkdir(parents=True, exist_ok=True)
+            with rzf.open(fn) as fp, open(outname, "wb") as fout:
+                print(f"Extracting {fn.filename} to {outname}")
+                while r := fp.read(2**18):
+                    fout.write(r)
+    for p in shp_files:
+        dfs.append(gpd.read_file(p))
+    return dfs
+
+
+def get_land_df(
+    buffer_deg=0.2,
+    outname="usgs_land_{d}deg_buffered.geojson",
+    driver="GeoJSON",
+    do_zip=True,
+) -> gpd.GeoDataFrame:
+    """Create a GeoDataFrame of the (buffered) USGS land polygons."""
+    outname = outname.format(d=buffer_deg)
+    if outname and Path(outname).exists():
+        print(f"Loading {outname} from disk")
+        return gpd.read_file(outname)
+    elif Path(outname + ".zip").exists():
+        print(f"Loading {outname}.zip from disk")
+        return gpd.read_file(str(outname) + ".zip")
+
+    # If we haven't already made the file, make it
+    df_land_cont, df_antarctica = get_usgs_land()
+    df_land = pd.concat([df_land_cont, df_antarctica], axis=0)[["geometry"]]
+    df_land.geometry = df_land.geometry.buffer(buffer_deg)
+    df_land = df_land.dissolve()
+
+    df_land.to_file(outname, driver=driver)
+    if do_zip and outname.endswith(".geojson"):
+        outname_zipped = Path(str(outname) + ".zip")
+        # zip and remove the original
+        with zipfile.ZipFile(
+            outname_zipped, "w", compression=zipfile.ZIP_DEFLATED
+        ) as zf:
+            zf.write(outname)
+
+        # Remove the original
+        Path(outname).unlink()
+
+    return df_land
+
+
+def get_greenland_shape(outpath=None, buffer_deg=0.2) -> MultiPolygon:
+    """Get the Greenland data from the following URL:
+
+    https://stacks.stanford.edu/file/druid:sd368wz2435/data.zip
+    """
+    outpath = Path(outpath) if outpath else Path.cwd()
+    outname = outpath / f"greenland_{buffer_deg}deg_buffered.geojson"
+    if outname.exists():
+        print(f"Loading {outname} from disk")
+        return gpd.read_file(outname).iloc[0].geometry
+
+    # download the whole greenland shapefile
+    print("Downloading Greenland shapefile...")
+    r = requests.get(GREENLAND_URL)
+    zipfile = outpath / "greenland.zip"
+    if not zipfile.exists():
+        with open(zipfile, "wb") as fout:
+            fout.write(r.content)
+
+    df = gpd.read_file(zipfile)
+    print("Simplifying and buffering Greenland shapefile...")
+    g = df.iloc[0].geometry
+    # Now simplify first, then buffer
+    gs = g.simplify(0.1)
+    g_buffered = gs.buffer(buffer_deg)
+    # Save for later
+    gpd.GeoDataFrame(geometry=[g_buffered]).to_file(outname, driver="GeoJSON")
+    return g_buffered
diff --git a/src/burst_db/_opera_north_america.py b/src/burst_db/_opera_north_america.py
@@ -0,0 +1,17 @@
+"""Module to read the OPERA North America shape.
+
+Data comes from:
+https://github.com/OPERA-Cal-Val/DSWx-Validation-Experiments/blob/7f06ab98cf43135eb63e5a29593235dbebcb19fa/marshak/north_america_boundaries/north_america_opera.geojson
+"""
+from pathlib import Path
+
+import geopandas as gpd
+from shapely import GeometryType
+
+
+def get_opera_na_shape() -> GeometryType.MULTIPOLYGON:
+    """Read the OPERA North America geometry as a shapely `multipolygon`."""
+    filename = Path(__file__).parent / "data" / "north_america_opera.geojson.zip"
+    na_gpd = gpd.read_file(filename)
+    # Combine all geometries in the GeoDataFrame into one MultiPolygon
+    return na_gpd.geometry.unary_union