Skip to content

Commit

Permalink
Merge pull request #8 from scottstanie/frame-definition
Browse files Browse the repository at this point in the history
Frame definition
  • Loading branch information
scottstanie authored Aug 23, 2023
2 parents 985bb65 + d1352ce commit f843141
Show file tree
Hide file tree
Showing 12 changed files with 1,268 additions and 34 deletions.
3 changes: 2 additions & 1 deletion LICENSE
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,8 @@
same "printed page" as the copyright notice for easier
identification within third-party archives.

Copyright [yyyy] [name of copyright owner]
Copyright 2023 California Institute of Technology (“Caltech”).
U.S. Government sponsorship acknowledged.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
Expand Down
13 changes: 13 additions & 0 deletions environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
channels:
- conda-forge
dependencies:
- python >= 3.8
- geopandas
- numpy
- pandas
- pip
- shapely
- tqdm
- utm
- pip:
- unzip_http
65 changes: 63 additions & 2 deletions readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,19 +10,20 @@ Follow the steps below to install `burst_db` using conda environment.

```bash
git clone https://github.com/opera-adt/burst_db burst_db
cd burst_db
```

2. Install dependencies:

```bash
conda install -c conda-forge --file burst_db/requirements.txt
conda install --name burst_db -c conda-forge --file environment.yml
```

3. Install `burst_db` via pip:

```bash
# run "pip install -e" to install in development mode
python -m pip install ./burst_db
python -m pip install .
```

## How to use
Expand All @@ -37,6 +38,66 @@ python -m pip install ./burst_db
- `sqlite_path_out` : Path to the output SQLite database file.


## Frame database information

After running `pip install .` , the `opera-create-db` command will create the sqlite Frame Database, as well as JSON files which map the burst IDs to frame IDs, and frame IDs to burst IDs.

The format of the frame-to-burst mapping is
```python
{
"data" : {
"1": {
"epsg": 32631,
"is_land": False,
"is_north_america": False,
"xmin": 500160,
"ymin": 78240,
"xmax": 789960,
"ymax": 322740,
"burst_ids": [
"t001_000001_iw1",
"t001_000001_iw2",
"t001_000001_iw3",
"t001_000002_iw1",
...
"t001_000009_iw3"
]
}, ...
},
"metadata": {
"version": "0.1.2", "margin": 5000.0, ...
}
}
```
where the keys of the the `data` dict are the frame IDs.

The burst-to-frame mapping has the structure
```python
{
"data" : {
"t001_000001_iw1": {"frame_ids": [1]},
"t001_000001_iw2": {"frame_ids": [1]},
...
},
"metadata": {
"version": "0.1.2", "margin": 5000.0, ...
}
}
```
These data structures can be read into python using the function `build_frame_db.read_zipped_json` .

The `opera-create-db` command also makes the full [Geopackage database](https://www.geopackage.org/) (which is based on sqlite), where the `burst_id_map` table contains the burst geometries, the `frames` table contains the frame geometries, and the `frames_bursts` table is the JOIN table for the many-to-many relationship.
An example SQL query to view all columns of these tables is
```sql
SELECT *
FROM frames f
JOIN frames_bursts fb ON fb.frame_fid = f.fid
JOIN burst_id_map b ON fb.burst_ogc_fid = b.ogc_fid
LIMIT 1;
```
You can also drag the `opera-s1-disp.gpkg` file into QGIS to load the `frames` and `burst_id_map` tables to filter/view the geometries.


### License
**Copyright (c) 2022** California Institute of Technology (“Caltech”). U.S. Government
sponsorship acknowledged.
Expand Down
4 changes: 0 additions & 4 deletions requirements.txt

This file was deleted.

55 changes: 29 additions & 26 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,35 +1,38 @@
'''
"""
setup.py for OPERA burst database generator
'''
"""

import os
import sys

from setuptools import setup
from setuptools import find_packages, setup

__version__ = VERSION = '0.1.0'
# taken from mintpy: https://github.com/insarlab/MintPy/blob/main/setup.py
# Grab version and description from version.py
# link: https://stackoverflow.com/questions/53648900
sys.path.append(os.path.join(os.path.dirname(__file__), "src"))
from burst_db.version import release_version

LONG_DESCRIPTION = 'Burst database for OPERA SAS'

package_data_dict = {}

package_data_dict['rtc'] = [
os.path.join('defaults', 'rtc_s1.yaml'),
os.path.join('schemas', 'rtc_s1.yaml')]
LONG_DESCRIPTION = "Sentinel-1 Burst database for OPERA SAS"

setup(
name = 'burst_db',
version = VERSION,
description = 'Burst database for OPERA SAS',
package_dir = {'burst_db': 'src/burst_db'},
include_package_data = True,
package_data = package_data_dict,
classifiers = ['Programming Language :: Python'],
#scripts = ['app/rtc_s1.py'],
install_requires = ['argparse', 'numpy', 'gdal'],
url = 'https://github.com/opera-adt/burst_db',
author = ('Seongsu Jeong'),
author_email = ('[email protected]'),
license = ('Copyright by the California Institute of Technology.'
' ALL RIGHTS RESERVED.'),
long_description=LONG_DESCRIPTION
name="burst_db",
version=release_version,
description="Burst database for OPERA SAS",
packages=find_packages("src"), # include all packages under src
package_dir={"": "src"}, # tell distutils packages are under src
classifiers=["Programming Language :: Python"],
url="https://github.com/opera-adt/burst_db",
author="Seongsu Jeong; Scott J. Staniewicz",
author_email="[email protected]; [email protected]",
license=(
"Copyright by the California Institute of Technology. ALL RIGHTS RESERVED."
),
long_description=LONG_DESCRIPTION,
# Add console scripts here
entry_points={
"console_scripts": [
"opera-create-db = burst_db.build_frame_db:main",
],
},
)
29 changes: 29 additions & 0 deletions src/burst_db/_esa_burst_db.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
'''
An internal module to download the ESA burst database
'''
import os
import shutil
import subprocess
import tempfile
import zipfile

ESA_DB_URL = "https://sar-mpc.eu/files/S1_burstid_20220530.zip"


def get_esa_burst_db(output_path="burst_map_IW_000001_375887.sqlite3"):
"""Download the ESA burst database."""
print(f"Downloading ESA burst database from {ESA_DB_URL} to {output_path}.")
db_filename = "S1_burstid_20220530/IW/sqlite/burst_map_IW_000001_375887.sqlite3"
cur_dir = os.getcwd()
output_path = os.path.abspath(output_path)
with tempfile.TemporaryDirectory() as tmpdir:
try:
os.chdir(tmpdir)
subprocess.check_call(["wget", ESA_DB_URL])

with zipfile.ZipFile(ESA_DB_URL.split("/")[-1], "r") as zip_ref:
zip_ref.extract(db_filename)
shutil.move(db_filename, output_path)
shutil.rmtree(db_filename.split("/")[0])
finally:
os.chdir(cur_dir)
108 changes: 108 additions & 0 deletions src/burst_db/_land_usgs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
"""An internal module to download shape files for land are and Greenland."""
import fnmatch
import zipfile
from pathlib import Path

import geopandas as gpd
import pandas as pd
import requests
import unzip_http
from shapely.geometry import MultiPolygon

USGS_LAND_URL = "https://www.ngdc.noaa.gov/mgg/shorelines/data/gshhg/latest/gshhg-shp-2.3.7.zip" # noqa
GREENLAND_URL = "https://stacks.stanford.edu/file/druid:sd368wz2435/data.zip" # noqa


def get_usgs_land(outpath=None):
"""Get the USGS land data from the following url:
https://www.ngdc.noaa.gov/mgg/shorelines/data/gshhg/latest/gshhg-shp-2.3.7.zip
"""
outpath = Path(outpath) if outpath else Path.cwd()
rzf = unzip_http.RemoteZipFile(USGS_LAND_URL)
# Level 1: Continental land masses and ocean islands, except Antarctica.
# Level 6: Antarctica based on grounding line boundary.
paths = ["GSHHS_shp/h/GSHHS_h_L1.*", "GSHHS_shp/h/GSHHS_h_L6.*"]
shp_files = []
dfs = []
for fn in rzf.infolist():
if not any(fnmatch.fnmatch(fn.filename, g) for g in paths):
continue
outname = outpath / fn.filename
if outname.suffix == (".shp"):
shp_files.append(outname)
if not outname.exists():
outname.parent.mkdir(parents=True, exist_ok=True)
with rzf.open(fn) as fp, open(outname, "wb") as fout:
print(f"Extracting {fn.filename} to {outname}")
while r := fp.read(2**18):
fout.write(r)
for p in shp_files:
dfs.append(gpd.read_file(p))
return dfs


def get_land_df(
buffer_deg=0.2,
outname="usgs_land_{d}deg_buffered.geojson",
driver="GeoJSON",
do_zip=True,
) -> gpd.GeoDataFrame:
"""Create a GeoDataFrame of the (buffered) USGS land polygons."""
outname = outname.format(d=buffer_deg)
if outname and Path(outname).exists():
print(f"Loading {outname} from disk")
return gpd.read_file(outname)
elif Path(outname + ".zip").exists():
print(f"Loading {outname}.zip from disk")
return gpd.read_file(str(outname) + ".zip")

# If we haven't already made the file, make it
df_land_cont, df_antarctica = get_usgs_land()
df_land = pd.concat([df_land_cont, df_antarctica], axis=0)[["geometry"]]
df_land.geometry = df_land.geometry.buffer(buffer_deg)
df_land = df_land.dissolve()

df_land.to_file(outname, driver=driver)
if do_zip and outname.endswith(".geojson"):
outname_zipped = Path(str(outname) + ".zip")
# zip and remove the original
with zipfile.ZipFile(
outname_zipped, "w", compression=zipfile.ZIP_DEFLATED
) as zf:
zf.write(outname)

# Remove the original
Path(outname).unlink()

return df_land


def get_greenland_shape(outpath=None, buffer_deg=0.2) -> MultiPolygon:
"""Get the Greenland data from the following URL:
https://stacks.stanford.edu/file/druid:sd368wz2435/data.zip
"""
outpath = Path(outpath) if outpath else Path.cwd()
outname = outpath / f"greenland_{buffer_deg}deg_buffered.geojson"
if outname.exists():
print(f"Loading {outname} from disk")
return gpd.read_file(outname).iloc[0].geometry

# download the whole greenland shapefile
print("Downloading Greenland shapefile...")
r = requests.get(GREENLAND_URL)
zipfile = outpath / "greenland.zip"
if not zipfile.exists():
with open(zipfile, "wb") as fout:
fout.write(r.content)

df = gpd.read_file(zipfile)
print("Simplifying and buffering Greenland shapefile...")
g = df.iloc[0].geometry
# Now simplify first, then buffer
gs = g.simplify(0.1)
g_buffered = gs.buffer(buffer_deg)
# Save for later
gpd.GeoDataFrame(geometry=[g_buffered]).to_file(outname, driver="GeoJSON")
return g_buffered
17 changes: 17 additions & 0 deletions src/burst_db/_opera_north_america.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
"""Module to read the OPERA North America shape.
Data comes from:
https://github.com/OPERA-Cal-Val/DSWx-Validation-Experiments/blob/7f06ab98cf43135eb63e5a29593235dbebcb19fa/marshak/north_america_boundaries/north_america_opera.geojson
"""
from pathlib import Path

import geopandas as gpd
from shapely import GeometryType


def get_opera_na_shape() -> GeometryType.MULTIPOLYGON:
"""Read the OPERA North America geometry as a shapely `multipolygon`."""
filename = Path(__file__).parent / "data" / "north_america_opera.geojson.zip"
na_gpd = gpd.read_file(filename)
# Combine all geometries in the GeoDataFrame into one MultiPolygon
return na_gpd.geometry.unary_union
Loading

0 comments on commit f843141

Please sign in to comment.