diff --git a/LICENSE b/LICENSE
index 261eeb9..86dbdc3 100644
--- a/LICENSE
+++ b/LICENSE
@@ -186,7 +186,8 @@
       same "printed page" as the copyright notice for easier
       identification within third-party archives.
 
-   Copyright [yyyy] [name of copyright owner]
+   Copyright 2023 California Institute of Technology (“Caltech”).
+   U.S. Government sponsorship acknowledged.
 
    Licensed under the Apache License, Version 2.0 (the "License");
    you may not use this file except in compliance with the License.
diff --git a/environment.yml b/environment.yml
new file mode 100644
index 0000000..0948fda
--- /dev/null
+++ b/environment.yml
@@ -0,0 +1,13 @@
+channels:
+- conda-forge
+dependencies:
+- python >= 3.8
+- geopandas
+- numpy
+- pandas
+- pip
+- shapely
+- tqdm
+- utm
+- pip:
+  - unzip_http
diff --git a/readme.md b/readme.md
index ed3f3fb..e1fa429 100644
--- a/readme.md
+++ b/readme.md
@@ -10,19 +10,20 @@ Follow the steps below to install `burst_db` using conda environment.
 
 ```bash
 git clone https://github.com/opera-adt/burst_db burst_db
+cd burst_db
 ```
 
 2. Install dependencies:
 
 ```bash
-conda install -c conda-forge --file burst_db/requirements.txt
+conda install --name burst_db -c conda-forge --file environment.yml
 ```
 
 3. Install `burst_db` via pip:
 
 ```bash
 # run "pip install -e" to install in development mode
-python -m pip install ./burst_db
+python -m pip install .
 ```
 
 ## How to use
@@ -37,6 +38,66 @@ python -m pip install ./burst_db
 - `sqlite_path_out` : Path to the output SQLite database file.
 
 
+## Frame database information
+
+After running `pip install .` , the `opera-create-db` command will create the sqlite Frame Database, as well as JSON files which map the burst IDs to frame IDs, and frame IDs to burst IDs.
+
+The format of the frame-to-burst mapping is
+```python
+{
+    "data" : {
+        "1": {
+            "epsg": 32631,
+            "is_land": False,
+            "is_north_america": False,
+            "xmin": 500160,
+            "ymin": 78240,
+            "xmax": 789960,
+            "ymax": 322740,
+            "burst_ids": [
+                "t001_000001_iw1",
+                "t001_000001_iw2",
+                "t001_000001_iw3",
+                "t001_000002_iw1",
+                ...
+                "t001_000009_iw3"
+            ]
+        }, ...
+    },
+    "metadata": {
+        "version": "0.1.2", "margin": 5000.0, ...
+    }
+}
+```
+where the keys of the the `data` dict are the frame IDs.
+
+The burst-to-frame mapping has the structure
+```python
+{
+    "data" : {
+        "t001_000001_iw1": {"frame_ids": [1]},
+        "t001_000001_iw2": {"frame_ids": [1]},
+        ...
+    },
+    "metadata": {
+        "version": "0.1.2", "margin": 5000.0, ...
+    }
+}
+```
+These data structures can be read into python using the function `build_frame_db.read_zipped_json` .
+
+The `opera-create-db` command also makes the full [Geopackage database](https://www.geopackage.org/) (which is based on sqlite), where the `burst_id_map` table contains the burst geometries, the `frames` table contains the frame geometries, and the `frames_bursts` table is the JOIN table for the many-to-many relationship.
+An example SQL query to view all columns of these tables is
+```sql
+SELECT *
+FROM frames f
+JOIN frames_bursts fb ON fb.frame_fid = f.fid
+JOIN burst_id_map b ON fb.burst_ogc_fid = b.ogc_fid
+LIMIT 1;
+```
+You can also drag the `opera-s1-disp.gpkg` file into QGIS to load the `frames` and `burst_id_map` tables to filter/view the geometries.
+
+
 ### License
 **Copyright (c) 2022** California Institute of Technology (“Caltech”). U.S. Government
 sponsorship acknowledged.
diff --git a/requirements.txt b/requirements.txt
deleted file mode 100644
index 548c1e8..0000000
--- a/requirements.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-python>=3.9     # sentinel1-reader requirement
-numpy           # sentinel1-reader requirement
-gdal>=3
-sqlite
diff --git a/setup.py b/setup.py
index 3227903..a180426 100644
--- a/setup.py
+++ b/setup.py
@@ -1,35 +1,38 @@
-'''
+"""
 setup.py for OPERA burst database generator
-'''
+"""
 
 import os
+import sys
 
-from setuptools import setup
+from setuptools import find_packages, setup
 
-__version__ = VERSION = '0.1.0'
+# taken from mintpy: https://github.com/insarlab/MintPy/blob/main/setup.py
+# Grab version and description from version.py
+# link: https://stackoverflow.com/questions/53648900
+sys.path.append(os.path.join(os.path.dirname(__file__), "src"))
+from burst_db.version import release_version
 
-LONG_DESCRIPTION = 'Burst database for OPERA SAS'
-
-package_data_dict = {}
-
-package_data_dict['rtc'] = [
-    os.path.join('defaults', 'rtc_s1.yaml'),
-    os.path.join('schemas', 'rtc_s1.yaml')]
+LONG_DESCRIPTION = "Sentinel-1 Burst database for OPERA SAS"
 
 setup(
-    name = 'burst_db',
-    version = VERSION,
-    description = 'Burst database for OPERA SAS',
-    package_dir = {'burst_db': 'src/burst_db'},
-    include_package_data = True,
-    package_data = package_data_dict,
-    classifiers = ['Programming Language :: Python'],
-    #scripts = ['app/rtc_s1.py'],
-    install_requires = ['argparse', 'numpy', 'gdal'],
-    url = 'https://github.com/opera-adt/burst_db',
-    author = ('Seongsu Jeong'),
-    author_email = ('seongsu.jeong@jpl.nasa.gov'),
-    license = ('Copyright by the California Institute of Technology.'
-               ' ALL RIGHTS RESERVED.'),
-    long_description=LONG_DESCRIPTION
+    name="burst_db",
+    version=release_version,
+    description="Burst database for OPERA SAS",
+    packages=find_packages("src"),  # include all packages under src
+    package_dir={"": "src"},  # tell distutils packages are under src
+    classifiers=["Programming Language :: Python"],
+    url="https://github.com/opera-adt/burst_db",
+    author="Seongsu Jeong; Scott J. Staniewicz",
+    author_email="seongsu.jeong@jpl.nasa.gov; scott.j.staniewicz@jpl.nasa.gov",
+    license=(
+        "Copyright by the California Institute of Technology. ALL RIGHTS RESERVED."
+    ),
+    long_description=LONG_DESCRIPTION,
+    # Add console scripts here
+    entry_points={
+        "console_scripts": [
+            "opera-create-db = burst_db.build_frame_db:main",
+        ],
+    },
 )
diff --git a/src/burst_db/_esa_burst_db.py b/src/burst_db/_esa_burst_db.py
new file mode 100644
index 0000000..5bd0c05
--- /dev/null
+++ b/src/burst_db/_esa_burst_db.py
@@ -0,0 +1,29 @@
+'''
+An internal module to download the ESA burst database
+'''
+import os
+import shutil
+import subprocess
+import tempfile
+import zipfile
+
+ESA_DB_URL = "https://sar-mpc.eu/files/S1_burstid_20220530.zip"
+
+
+def get_esa_burst_db(output_path="burst_map_IW_000001_375887.sqlite3"):
+    """Download the ESA burst database."""
+    print(f"Downloading ESA burst database from {ESA_DB_URL} to {output_path}.")
+    db_filename = "S1_burstid_20220530/IW/sqlite/burst_map_IW_000001_375887.sqlite3"
+    cur_dir = os.getcwd()
+    output_path = os.path.abspath(output_path)
+    with tempfile.TemporaryDirectory() as tmpdir:
+        try:
+            os.chdir(tmpdir)
+            subprocess.check_call(["wget", ESA_DB_URL])
+
+            with zipfile.ZipFile(ESA_DB_URL.split("/")[-1], "r") as zip_ref:
+                zip_ref.extract(db_filename)
+                shutil.move(db_filename, output_path)
+                shutil.rmtree(db_filename.split("/")[0])
+        finally:
+            os.chdir(cur_dir)
diff --git a/src/burst_db/_land_usgs.py b/src/burst_db/_land_usgs.py
new file mode 100644
index 0000000..77b20b2
--- /dev/null
+++ b/src/burst_db/_land_usgs.py
@@ -0,0 +1,108 @@
+"""An internal module to download shape files for land are and Greenland."""
+import fnmatch
+import zipfile
+from pathlib import Path
+
+import geopandas as gpd
+import pandas as pd
+import requests
+import unzip_http
+from shapely.geometry import MultiPolygon
+
+USGS_LAND_URL = "https://www.ngdc.noaa.gov/mgg/shorelines/data/gshhg/latest/gshhg-shp-2.3.7.zip"  # noqa
+GREENLAND_URL = "https://stacks.stanford.edu/file/druid:sd368wz2435/data.zip"  # noqa
+
+
+def get_usgs_land(outpath=None):
+    """Get the USGS land data from the following url:
+
+    https://www.ngdc.noaa.gov/mgg/shorelines/data/gshhg/latest/gshhg-shp-2.3.7.zip
+    """
+    outpath = Path(outpath) if outpath else Path.cwd()
+    rzf = unzip_http.RemoteZipFile(USGS_LAND_URL)
+    # Level 1: Continental land masses and ocean islands, except Antarctica.
+    # Level 6: Antarctica based on grounding line boundary.
+    paths = ["GSHHS_shp/h/GSHHS_h_L1.*", "GSHHS_shp/h/GSHHS_h_L6.*"]
+    shp_files = []
+    dfs = []
+    for fn in rzf.infolist():
+        if not any(fnmatch.fnmatch(fn.filename, g) for g in paths):
+            continue
+        outname = outpath / fn.filename
+        if outname.suffix == (".shp"):
+            shp_files.append(outname)
+        if not outname.exists():
+            outname.parent.mkdir(parents=True, exist_ok=True)
+            with rzf.open(fn) as fp, open(outname, "wb") as fout:
+                print(f"Extracting {fn.filename} to {outname}")
+                while r := fp.read(2**18):
+                    fout.write(r)
+    for p in shp_files:
+        dfs.append(gpd.read_file(p))
+    return dfs
+
+
+def get_land_df(
+    buffer_deg=0.2,
+    outname="usgs_land_{d}deg_buffered.geojson",
+    driver="GeoJSON",
+    do_zip=True,
+) -> gpd.GeoDataFrame:
+    """Create a GeoDataFrame of the (buffered) USGS land polygons."""
+    outname = outname.format(d=buffer_deg)
+    if outname and Path(outname).exists():
+        print(f"Loading {outname} from disk")
+        return gpd.read_file(outname)
+    elif Path(outname + ".zip").exists():
+        print(f"Loading {outname}.zip from disk")
+        return gpd.read_file(str(outname) + ".zip")
+
+    # If we haven't already made the file, make it
+    df_land_cont, df_antarctica = get_usgs_land()
+    df_land = pd.concat([df_land_cont, df_antarctica], axis=0)[["geometry"]]
+    df_land.geometry = df_land.geometry.buffer(buffer_deg)
+    df_land = df_land.dissolve()
+
+    df_land.to_file(outname, driver=driver)
+    if do_zip and outname.endswith(".geojson"):
+        outname_zipped = Path(str(outname) + ".zip")
+        # zip and remove the original
+        with zipfile.ZipFile(
+            outname_zipped, "w", compression=zipfile.ZIP_DEFLATED
+        ) as zf:
+            zf.write(outname)
+
+        # Remove the original
+        Path(outname).unlink()
+
+    return df_land
+
+
+def get_greenland_shape(outpath=None, buffer_deg=0.2) -> MultiPolygon:
+    """Get the Greenland data from the following URL:
+
+    https://stacks.stanford.edu/file/druid:sd368wz2435/data.zip
+    """
+    outpath = Path(outpath) if outpath else Path.cwd()
+    outname = outpath / f"greenland_{buffer_deg}deg_buffered.geojson"
+    if outname.exists():
+        print(f"Loading {outname} from disk")
+        return gpd.read_file(outname).iloc[0].geometry
+
+    # download the whole greenland shapefile
+    print("Downloading Greenland shapefile...")
+    r = requests.get(GREENLAND_URL)
+    zipfile = outpath / "greenland.zip"
+    if not zipfile.exists():
+        with open(zipfile, "wb") as fout:
+            fout.write(r.content)
+
+    df = gpd.read_file(zipfile)
+    print("Simplifying and buffering Greenland shapefile...")
+    g = df.iloc[0].geometry
+    # Now simplify first, then buffer
+    gs = g.simplify(0.1)
+    g_buffered = gs.buffer(buffer_deg)
+    # Save for later
+    gpd.GeoDataFrame(geometry=[g_buffered]).to_file(outname, driver="GeoJSON")
+    return g_buffered
diff --git a/src/burst_db/_opera_north_america.py b/src/burst_db/_opera_north_america.py
new file mode 100644
index 0000000..ce63420
--- /dev/null
+++ b/src/burst_db/_opera_north_america.py
@@ -0,0 +1,17 @@
+"""Module to read the OPERA North America shape.
+
+Data comes from:
+https://github.com/OPERA-Cal-Val/DSWx-Validation-Experiments/blob/7f06ab98cf43135eb63e5a29593235dbebcb19fa/marshak/north_america_boundaries/north_america_opera.geojson
+"""
+from pathlib import Path
+
+import geopandas as gpd
+from shapely import GeometryType
+
+
+def get_opera_na_shape() -> GeometryType.MULTIPOLYGON:
+    """Read the OPERA North America geometry as a shapely `multipolygon`."""
+    filename = Path(__file__).parent / "data" / "north_america_opera.geojson.zip"
+    na_gpd = gpd.read_file(filename)
+    # Combine all geometries in the GeoDataFrame into one MultiPolygon
+    return na_gpd.geometry.unary_union
diff --git a/src/burst_db/build_frame_db.py b/src/burst_db/build_frame_db.py
new file mode 100755
index 0000000..cb03b29
--- /dev/null
+++ b/src/burst_db/build_frame_db.py
@@ -0,0 +1,754 @@
+#!/usr/bin/env python
+import argparse
+import datetime
+import json
+import sqlite3
+import time
+import zipfile
+from pathlib import Path
+
+import geopandas as gpd
+import numpy as np
+import pandas as pd
+import utm  # https://github.com/Turbo87/utm
+from shapely import GeometryType, STRtree
+from shapely.affinity import translate
+from tqdm.auto import tqdm
+
+from burst_db import __version__
+
+from . import frames
+from ._esa_burst_db import ESA_DB_URL, get_esa_burst_db
+from ._land_usgs import GREENLAND_URL, USGS_LAND_URL, get_greenland_shape, get_land_df
+from ._opera_north_america import get_opera_na_shape
+
+# Threshold to use EPSG:3413, Sea Ice Polar North (https://epsg.io/3413)
+NORTH_THRESHOLD = 75
+NORTH_EPSG = 3413
+# Threshold to use EPSG:3031, Antarctic Polar Stereographic (https://epsg.io/3031)
+SOUTH_THRESHOLD = -60
+SOUTH_EPSG = 3031
+
+
+def make_jpl_burst_id(df: pd.DataFrame):
+    """Make the JPL burst ID from the ESA burst ID."""
+    burst_id_jpl = (
+        "t"
+        + df["relative_orbit_number"].astype(str).str.zfill(3)
+        + "_"
+        + df["burst_id"].astype(str).str.zfill(6)
+        + "_"
+        + df["subswath_name"].str.lower()
+    )
+    return burst_id_jpl
+
+
+def _setup_spatialite_con(con: sqlite3.Connection):
+    """Enable spatialite and load the spatialite extension."""
+    con.enable_load_extension(True)
+    # Try the two versions, mac and linux with .so
+    try:
+        con.load_extension("mod_spatialite")
+    except:
+        con.load_extension("mod_spatialite.so")
+    # Allow us to use spatialite functions in on GPKG files.
+    # https://medium.com/@joelmalone/sqlite3-spatialite-and-geopackages-66a08485da6c
+    con.execute("SELECT EnableGpkgAmphibiousMode();")
+
+
+def make_burst_triplets(df_burst: pd.DataFrame) -> pd.DataFrame:
+    """Make a burst triplets dataframe, aggregating IW1,2,3 from the burst dataframe."""
+
+    def join_track_numbers(orbits: list) -> str:
+        orbits = list(set(orbits))
+        orbits_str = list(map(str, orbits))
+        return ",".join(orbits_str)
+
+    df_burst_triplet_temp = df_burst.dissolve(
+        by="burst_id",
+        aggfunc={
+            "OGC_FID": ["min", "max"],
+            "relative_orbit_number": join_track_numbers,
+            "orbit_pass": "first",
+        },
+        as_index=False,
+    )
+    df_burst_triplet = df_burst_triplet_temp.reset_index(drop=True)
+    df_burst_triplet.columns = [
+        "burst_id",
+        "geom",
+        "OGC_FID_min",
+        "OGC_FID_max",
+        "relative_orbit_numbers",
+        "look_direction",
+    ]
+    return df_burst_triplet
+
+
+def get_land_indicator(gdf: gpd.GeoDataFrame, land_geom: GeometryType.POLYGON):
+    """Get a boolean array indicating if each row of `gdf` intersects `land_geom`."""
+    tree = STRtree(gdf.geometry)
+    idxs_land = tree.query(land_geom, predicate="intersects")
+    if idxs_land.ndim == 2:
+        idxs_land = idxs_land[1]
+    is_in_land = gdf.index.isin(idxs_land)
+    return is_in_land
+
+
+def make_frame_to_burst_table(outfile: str, df_frame_to_burst_id: pd.DataFrame):
+    """Create the frames_bursts table and indexes."""
+    with sqlite3.connect(outfile) as con:
+        _setup_spatialite_con(con)
+
+        df_frame_to_burst_id.to_sql("frames_bursts", con, if_exists="replace")
+        con.execute(
+            "CREATE INDEX IF NOT EXISTS idx_frames_bursts_burst_ogc_fid ON"
+            " frames_bursts (burst_ogc_fid)"
+        )
+        con.execute(
+            "CREATE INDEX IF NOT EXISTS idx_frames_bursts_frame_fid ON frames_bursts"
+            " (frame_fid)"
+        )
+        con.execute(
+            "CREATE INDEX IF NOT EXISTS idx_burst_id_map_burst_id ON burst_id_map"
+            " (burst_id)"
+        )
+
+
+def make_frame_table(outfile: str):
+    """Create the frames table and indexes."""
+    with sqlite3.connect(outfile) as con:
+        _setup_spatialite_con(con)
+        con.execute(
+            "CREATE TABLE frames (fid INTEGER PRIMARY KEY, epsg INTEGER, "
+            "is_land INTEGER, is_north_america INTEGER)"
+        )
+        # https://groups.google.com/g/spatialite-users/c/XcWvAk7vg0c
+        # should add geom after the table is created
+        # table_name , geometry_column_name , geometry_type , with_z , with_m , srs_id
+        con.execute(
+            "SELECT gpkgAddGeometryColumn('frames', 'geom', 'MULTIPOLYGON', 2, 0,"
+            " 4326);"
+        )
+
+        # Aggregates burst geometries for each frame into one
+        con.execute(
+            """INSERT INTO frames(fid, is_land, geom)
+            SELECT fb.frame_fid as fid,
+                    fb.is_land,
+                    ST_UnaryUnion(ST_Collect(geom)) as geom
+            FROM burst_id_map b
+            JOIN
+                frames_bursts fb
+                ON b.ogc_fid = fb.burst_ogc_fid
+            GROUP BY 1;
+        """
+        )
+        print("Creating indexes and spatial index...")
+        con.execute("CREATE INDEX IF NOT EXISTS idx_frames_fid ON frames (fid)")
+        con.execute("SELECT gpkgAddSpatialIndex('frames', 'geom') ;")
+        # Extra thing so that QGIS recognizes "frames" better
+        con.execute(
+            "UPDATE gpkg_geometry_columns SET geometry_type_name = 'MULTIPOLYGON';"
+        )
+        # Set the relative_orbit_number as the most common value for each frame
+        con.execute("ALTER TABLE frames ADD COLUMN relative_orbit_number INTEGER;")
+        con.execute(
+            """WITH frame_tracks AS (
+                SELECT f.fid,
+                    CAST(ROUND(AVG(b.relative_orbit_number)) AS INTEGER) AS relative_orbit_number
+                FROM frames f
+                JOIN frames_bursts fb ON f.fid = fb.frame_fid
+                JOIN burst_id_map b ON fb.burst_ogc_fid = b.ogc_fid
+                GROUP BY 1
+            ) UPDATE frames SET relative_orbit_number = frame_tracks.relative_orbit_number
+            FROM frame_tracks
+            WHERE frames.fid = frame_tracks.fid;
+            """
+        )
+        # Set the orbit_pass as the value from the first burst
+        con.execute("ALTER TABLE frames ADD COLUMN orbit_pass TEXT;")
+        con.execute(
+            """WITH op AS
+                (SELECT f.fid,
+                        b.orbit_pass
+                FROM frames f
+                JOIN frames_bursts fb ON f.fid = fb.frame_fid
+                JOIN burst_id_map b ON fb.burst_ogc_fid = b.ogc_fid),
+            frame_orbits AS (
+                SELECT fid,
+                    FIRST_VALUE(orbit_pass) OVER (PARTITION BY fid) AS orbit_pass
+                FROM op
+                GROUP BY fid)
+            UPDATE frames SET orbit_pass = frame_orbits.orbit_pass
+            FROM frame_orbits
+            WHERE frames.fid = frame_orbits.fid;
+            """
+        )
+
+        # Drop the is_land from the frames_bursts table
+        con.execute("ALTER TABLE frames_bursts DROP COLUMN is_land;")
+
+
+def get_epsg_codes(df: gpd.GeoDataFrame):
+    """Get the EPSG codes for all non-antimeridian polygons in a GeoDataFrame.
+
+    Uses the UTM library to account for the oddities of the Zones near Norway [1]_.
+
+    References
+    ----------
+    .. _[1]: http://www.jaworski.ca/utmzones.htm
+    """
+
+    def _is_on_antimeridian(geom):
+        return geom.geom_type == "MultiPolygon" and len(geom.geoms) > 1
+
+    epsgs = np.zeros(len(df), dtype=int)
+
+    # do the antimeridian frames first
+    am_idxs = df.geometry.map(_is_on_antimeridian).values
+    epsgs[am_idxs] = df[am_idxs].geometry.map(antimeridian_epsg)
+
+    # everything else
+    # get the x, y (lon, lat) coords of all other rows
+    other_coords = np.array(
+        df[~am_idxs].geometry.map(lambda g: tuple(g.centroid.coords)[0]).tolist()
+    )
+    xs, ys = other_coords.T
+    ys_full_size = np.ones(len(epsgs)) * np.nan
+    ys_full_size[~am_idxs] = ys
+
+    idxs = np.logical_and.reduce((~am_idxs, ys_full_size > NORTH_THRESHOLD))
+    epsgs[idxs] = NORTH_EPSG
+
+    idxs = np.logical_and.reduce((~am_idxs, ys_full_size < SOUTH_THRESHOLD))
+    epsgs[idxs] = SOUTH_EPSG
+
+    utm_idxs = np.logical_and(ys < NORTH_THRESHOLD, ys > SOUTH_THRESHOLD)
+    north_idxs = ys[utm_idxs] > 0
+
+    # North hemisphere
+    zones_north = [
+        utm.from_latlon(y, x)[2]
+        for (y, x) in zip(ys[utm_idxs][north_idxs], xs[utm_idxs][north_idxs])
+    ]
+    idxs = np.logical_and.reduce(
+        (~am_idxs, ys_full_size < NORTH_THRESHOLD, ys_full_size > 0)
+    )
+    epsgs[idxs] = 32600 + np.array(zones_north)
+
+    # South hemisphere
+    zones_south = [
+        utm.from_latlon(y, x)[2]
+        for (y, x) in zip(ys[utm_idxs][~north_idxs], xs[utm_idxs][~north_idxs])
+    ]
+    idxs = np.logical_and.reduce(
+        (~am_idxs, ys_full_size > SOUTH_THRESHOLD, ys_full_size < 0)
+    )
+    epsgs[idxs] = 32700 + np.array(zones_south)
+
+    # Set all Greenland frames to EPSG:3413
+    geom_greenland = get_greenland_shape()
+    is_in_greenland = get_land_indicator(df, geom_greenland)
+    print(
+        f"{is_in_greenland.sum()} frames are in Greenland. Setting to EPSG:{NORTH_EPSG}"
+    )
+    epsgs[is_in_greenland] = NORTH_EPSG
+
+    return epsgs
+
+
+def antimeridian_epsg(mp):
+    """Calculate the EPSG of multipolygons along the antimeridian.
+
+    Parameters
+    ----------
+    mp : shapely.geometry.MultiPolygon
+        The multipolygon to calculate the EPSG for.
+
+    Returns
+    -------
+    epsg : int
+        The EPSG code for the multipolygon.
+
+    Notes
+    -----
+
+    The EPSG code is calculated by taking the weighted average of the centroid of the
+    polygons in the multipolygon. The centroid is weighted by the area of the polygon.
+    The centroid is shifted by 360 degrees if it is in the western hemisphere.
+    """
+    y_c = mp.centroid.y
+    # check north/south pole cases
+    if y_c >= NORTH_THRESHOLD:
+        return NORTH_EPSG
+    elif y_c <= SOUTH_THRESHOLD:
+        return SOUTH_EPSG
+
+    # otherwise, do the weighted average of the shifted polygons to get the centroid
+    A = 0
+    x_weighted = 0
+    # might have 2 or 3 polygons
+    for g in mp.geoms:
+        A += g.area
+        if g.centroid.x < 0:
+            g_shifted = translate(g, xoff=360)
+            x_weighted += g_shifted.centroid.x * g.area
+        else:
+            x_weighted += g.centroid.x * g.area
+    x_c = x_weighted / A
+
+    # Northern hemisphere = 326XX, southern is 327XX
+    base = 32600 if y_c > 0 else 32700
+    # EPSG increases negative to positive (west to east)
+    # 32601 is at longitude -179 (which 181 after shifting +360)
+    zone_addition = 1 if x_c > 180 else 60
+    return base + zone_addition
+
+
+def update_burst_epsg(outfile):
+    """Update the EPSG of each burst to match the EPSG of the frame it is in."""
+    with sqlite3.connect(outfile) as con:
+        _setup_spatialite_con(con)
+        # add index
+        con.execute(
+            "CREATE INDEX IF NOT EXISTS idx_burst_id_map_burst_id_jpl ON burst_id_map"
+            " (burst_id_jpl)"
+        )
+        # Set the EPSG on every burst from the frames
+        print("Updating burst EPSGs to match frames...")
+        sql = """WITH burst_epsgs AS (
+                    SELECT b.OGC_FID,
+                        f.epsg
+                    FROM burst_id_map b
+                    JOIN frames_bursts fb
+                    ON b.OGC_FID = fb.burst_ogc_fid
+                    JOIN frames f
+                    ON fb.frame_fid = f.fid
+                )
+                UPDATE burst_id_map
+                SET epsg = burst_epsgs.epsg
+                FROM burst_epsgs
+                WHERE burst_id_map.OGC_FID = burst_epsgs.OGC_FID;
+        """
+        con.execute(sql)
+
+
+def add_gpkg_spatial_ref_sys(outfile):
+    """Add all EPSG codes to the gpkg_spatial_ref_sys table."""
+    north_hemi_utm = list(range(32601, 32661))
+    south_hemi_utm = list(range(32701, 32761))
+    epsgs = [SOUTH_EPSG, NORTH_EPSG, 4326] + north_hemi_utm + south_hemi_utm
+    with sqlite3.connect(outfile) as con:
+        _setup_spatialite_con(con)
+        sql = "SELECT gpkgInsertEpsgSRID({epsg});"
+        for epsg in tqdm(epsgs):
+            try:
+                con.execute(sql.format(epsg=epsg))
+            except (sqlite3.OperationalError, sqlite3.IntegrityError):
+                # exists
+                pass
+        # Fix the gpkg_spatial_ref_sys table for missing UTM zone 32760
+        # https://www.gaia-gis.it/fossil/libspatialite/tktview/8b6910dbbb2180026af54a5cc5aac107fb1d62ad?plaintext
+        sql = """INSERT INTO gpkg_spatial_ref_sys (
+                    srs_name,
+                    srs_id,
+                    organization,
+                    organization_coordsys_id,
+                    definition
+                    )
+                VALUES (
+                    'WGS 84 / UTM zone 60S',
+                    32760,
+                    'EPSG',
+                    32760,
+'PROJCS["WGS 84 / UTM zone 60S",GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AUTHORITY["EPSG","4326"]],PROJECTION["Transverse_Mercator"],PARAMETER["latitude_of_origin",0],PARAMETER["central_meridian",177],PARAMETER["scale_factor",0.9996],PARAMETER["false_easting",500000],PARAMETER["false_northing",10000000],UNIT["metre",1,AUTHORITY["EPSG","9001"]],AXIS["Easting",EAST],AXIS["Northing",NORTH],AUTHORITY["EPSG","32760"]]'
+                );
+        """
+        try:
+            con.execute(sql)
+        except (sqlite3.OperationalError, sqlite3.IntegrityError):
+            # exists
+            pass
+        # More the entries from gpkg_spatial_ref_sys to spatial_ref_sys
+        # so we can use the `ST_Transform` function
+        con.execute("DROP TABLE IF EXISTS spatial_ref_sys;")
+        sql = """
+        CREATE TABLE spatial_ref_sys (
+            srid       INTEGER NOT NULL PRIMARY KEY,
+            auth_name  VARCHAR(256),
+            auth_srid  INTEGER,
+            srtext     VARCHAR(2048),
+            proj4text  VARCHAR(2048)
+        );"""
+        con.execute(sql)
+        sql = """
+        INSERT INTO spatial_ref_sys
+        SELECT
+            srs_id AS srid,
+            organization AS auth_name,
+            organization_coordsys_id AS auth_srid,
+            definition AS srtext,
+            NULL
+        FROM gpkg_spatial_ref_sys;
+        """
+        con.execute(sql)
+
+
+def save_utm_bounding_boxes(outfile, *, margin: float, snap: float):
+    """Save the bounding boxes of each burst in UTM coordinates."""
+    print("Saving UTM bounding boxes...")
+    try:
+        with sqlite3.connect(outfile) as con:
+            for col in ["xmin", "ymin", "xmax", "ymax"]:
+                con.execute(f"ALTER TABLE burst_id_map ADD COLUMN {col} INTEGER;")
+    except sqlite3.OperationalError:
+        # Already exists
+        pass
+
+    sql = f"""
+WITH transformed(g, OGC_FID) AS
+  (SELECT ST_Envelope(ST_Transform(geom, epsg)) g,
+          OGC_FID
+   FROM burst_id_map
+   WHERE epsg != 0 )
+UPDATE burst_id_map
+SET (xmin,
+     ymin,
+     xmax,
+     ymax) = (bboxes.xmin,
+              bboxes.ymin,
+              bboxes.xmax,
+              bboxes.ymax)
+FROM
+  (SELECT OGC_FID,
+          FLOOR((ST_MinX(g) - {margin}) / {snap:.1f}) * {snap:.1f} AS xmin,
+          FLOOR((ST_MinY(g) - {margin}) / {snap:.1f}) * {snap:.1f} AS ymin,
+          CEIL((ST_MaxX(g) + {margin}) / {snap:.1f}) * {snap:.1f} AS xmax,
+          CEIL((ST_MaxY(g) + {margin}) / {snap:.1f}) * {snap:.1f} AS ymax
+   FROM transformed) AS bboxes
+WHERE burst_id_map.OGC_FID = bboxes.OGC_FID ;
+    """
+    with sqlite3.connect(outfile) as con:
+        _setup_spatialite_con(con)
+        con.execute(sql)
+
+
+def make_minimal_db(db_path, df_frame_to_burst_id, output_path):
+    """Make a minimal database with only the following columns:
+
+    burst_id_jpl, epsg, xmin, ymin, ymax, ymax, frame_ids
+    and a zip file with:
+    {burst_id_jpl: (epsg, xmin, ymin, ymax, ymax, frame_ids),...}
+    """
+    with sqlite3.connect(db_path) as con:
+        df = pd.read_sql_query(
+            (
+                "SELECT OGC_FID, burst_id_jpl, epsg, xmin, ymin, xmax, ymax FROM"
+                " burst_id_map"
+            ),
+            con,
+        )
+    # Make sure snapped coordinates as integers (~40% smaller than REAL)
+    df["xmin"] = df["xmin"].astype(int)
+    df["ymin"] = df["ymin"].astype(int)
+    df["xmax"] = df["xmax"].astype(int)
+    df["ymax"] = df["ymax"].astype(int)
+
+    with sqlite3.connect(output_path) as con:
+        df.to_sql("burst_id_map", con, if_exists="replace", index=False)
+
+    # Make a version which has the list of frames each burst belongs to
+    df_burst_to_frames = _get_burst_to_frame_list(df_frame_to_burst_id)
+    df_out = pd.merge(
+        df, df_burst_to_frames, how="inner", left_on="OGC_FID", right_on="burst_ogc_fid"
+    )
+    df_out.drop(columns="OGC_FID", inplace=True)
+    return df_out
+
+
+def make_burst_to_frame_json(df, output_path: str, metadata: dict):
+    data_dict = (
+        df.set_index("burst_id_jpl")[["frame_fid"]]
+        .rename(columns={"frame_fid": "frame_ids"})
+        .to_dict(orient="index")
+    )
+    # Format:  {'t001_000001_iw1': [1], ...}
+    dict_out = {"data": data_dict, "metadata": metadata}
+    _write_zipped_json(output_path, dict_out)
+
+
+def make_frame_to_burst_json(db_path: str, output_path: str, metadata: dict):
+    with sqlite3.connect(db_path) as con:
+        df_frame_to_burst = pd.read_sql_query(
+            """
+            SELECT
+                f.fid AS frame_id,
+                f.epsg,
+                f.is_land,
+                f.is_north_america,
+                MIN(xmin) AS xmin,
+                MIN(ymin) AS ymin,
+                MAX(xmax) AS xmax,
+                MAX(ymax) AS ymax,
+                GROUP_CONCAT(burst_id_jpl) AS burst_ids
+            FROM frames f
+            JOIN frames_bursts fb ON fb.frame_fid = f.fid
+            JOIN burst_id_map b ON fb.burst_ogc_fid = b.ogc_fid
+            GROUP BY 1;
+        """,
+            con,
+        )
+    df_frame_to_burst.burst_ids = df_frame_to_burst.burst_ids.str.split(",")
+    df_frame_to_burst.is_land = df_frame_to_burst.is_land.astype(bool)
+    df_frame_to_burst.is_north_america = df_frame_to_burst.is_north_america.astype(bool)
+    data_dict = df_frame_to_burst.set_index("frame_id").to_dict(orient="index")
+    dict_out = {"data": data_dict, "metadata": metadata}
+
+    _write_zipped_json(output_path, dict_out)
+
+
+def _write_zipped_json(json_path, dict_out, level: int = 6):
+    json_zip_path = str(json_path) + ".zip"
+    with zipfile.Zipfile(
+        json_zip_path, "w", compression=zipfile.ZIP_DEFLATED, compresslevel=level
+    ) as zf:
+        zf.writestr(json_path, json.dumps(dict_out))
+
+
+def _get_burst_to_frame_list(df_frame_to_burst_id):
+    """Get a DataFrame which maps the burst ID to the list of frames
+    containing the burst.
+
+    Example:
+                frame_fid
+    burst_ogc_fid
+    ...
+    24                  [1]
+    25               [1, 2]
+    26               [1, 2]
+    27               [1, 2]
+    28                  [2]
+    """
+    return (
+        df_frame_to_burst_id[["burst_ogc_fid", "frame_fid"]]
+        .groupby("burst_ogc_fid")
+        .agg(list)
+    )
+
+
+def _get_metadata(args):
+    base = {
+        "margin": args.margin,
+        "snap": args.snap,
+        "target_frame_size": args.target_frame,
+        "version": __version__,
+        "last_modified": datetime.datetime.now().isoformat(),
+        "land_buffer_deg": args.land_buffer_deg,
+        "land_optimized": args.optimize_land,
+        "usgs_land_shape_url": USGS_LAND_URL,
+        "greenland_shape_url": GREENLAND_URL,
+        "esa_burst_db_url": ESA_DB_URL,
+    }
+    if args.optimize_land:
+        base["min_frame_size"] = args.min_frame
+        base["max_frame_size"] = args.max_frame
+    return base
+
+
+def create_metadata_table(db_path, metadata):
+    """Make the metadata table with the arguments used to create the database."""
+    df = pd.DataFrame([metadata])
+    with sqlite3.connect(db_path) as con:
+        df.to_sql("metadata", con, if_exists="replace", index=False)
+
+
+def read_zipped_json(filename):
+    with zipfile.ZipFile(filename) as zf:
+        bytes = zf.read(str(Path(filename).name).replace(".zip", ""))
+        return json.loads(bytes.decode())
+
+
+def get_cli_args():
+    parser = argparse.ArgumentParser(
+        description="Generate frames for Sentinel-1 data",
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+    )
+    parser.add_argument(
+        "--esa-db-path",
+        default="burst_map_IW_000001_375887.sqlite3",
+        help=(
+            "Path to the ESA sqlite burst database to convert, "
+            f"downloaded from {ESA_DB_URL} . Will be downloaded if not exists."
+        ),
+    )
+    parser.add_argument(
+        "--snap",
+        type=float,
+        default=30.0,
+        help="Snap the bounding box to the nearest multiple of this value.",
+    )
+    parser.add_argument(
+        "--margin",
+        type=float,
+        default=5000.0,
+        help="Add this margin surrounding the bounding box of bursts.",
+    )
+    parser.add_argument(
+        "--optimize-land",
+        action="store_true",
+        help=(
+            "Create frames which attempt to minimize the number of majority-water"
+            " frames"
+        ),
+    )
+    parser.add_argument(
+        "--target-frame",
+        type=int,
+        default=9,
+        help="Target number of bursts per frame.",
+    )
+    parser.add_argument(
+        "--min-frame",
+        type=int,
+        default=5,
+        help="(If using `--optimize-land`): Minimum number of bursts per frame",
+    )
+    parser.add_argument(
+        "--max-frame",
+        type=int,
+        default=10,
+        help="(If using `--optimize-land`): Maximum number of bursts per frame",
+    )
+    parser.add_argument(
+        "-o",
+        "--outfile",
+        default="opera-s1-disp.gpkg",
+        help="Output file name",
+    )
+    parser.add_argument(
+        "--land-buffer-deg",
+        type=float,
+        default=0.3,
+        help="A buffer (in degrees) to indicate that a frame is near land.",
+    )
+
+    return parser.parse_args()
+
+
+def main():
+    args = get_cli_args()
+
+    t0 = time.time()
+
+    # Read ESA's Burst Data
+    if not Path(args.esa_db_path).exists():
+        print(f"Downloading {ESA_DB_URL} to {args.esa_db_path}...")
+        get_esa_burst_db(args.esa_db_path)
+
+    print("Loading burst data...")
+    sql = "SELECT * FROM burst_id_map"
+    with sqlite3.connect(args.esa_db_path) as con:
+        df_burst = gpd.GeoDataFrame.from_postgis(
+            sql, con, geom_col="GEOMETRY", crs="EPSG:4326"
+        ).rename_geometry("geom")
+
+    print("Forming string JPL id")
+    jpl_ids = make_jpl_burst_id(df_burst)
+    df_burst.loc[:, "burst_id_jpl"] = jpl_ids
+    # placeholder to compute later
+    df_burst.loc[:, "epsg"] = 0
+
+    # Start the outfile with the ESA database contents
+    outfile = args.outfile
+    print("Saving initial version of `burst_id_map` table")
+    df_burst.set_index("OGC_FID").to_file(
+        outfile, driver="GPKG", layer="burst_id_map", index=False
+    )
+    # Adjust the primary key so it still matches original OGC_FID
+    print("Renaming index column from 'fid' to 'OGC_FID'")
+    with sqlite3.connect(outfile) as con:
+        con.execute("ALTER TABLE burst_id_map RENAME COLUMN fid TO OGC_FID;")
+
+    print("Aggregating burst triplets (grouping IW1,2,3 geometries together)")
+    df_burst_triplet = make_burst_triplets(df_burst)
+
+    # Get the land polygon to intersect
+    print("Indicating which burst triplets are near land...")
+    df_land = get_land_df(args.land_buffer_deg)
+    land_geom = df_land.geometry
+
+    is_in_land = get_land_indicator(df_burst_triplet, land_geom)
+
+    # Create frames and JOIN tables
+    # Make the JOIN table first
+    print("Defining frames - bursts JOIN table")
+    df_frame_to_burst_id = frames.create_frame_to_burst_mapping(
+        is_in_land,
+        target_frame=args.target_frame,
+        min_frame=args.min_frame,
+        max_frame=args.max_frame,
+        optimize_land=args.optimize_land,
+    )
+    make_frame_to_burst_table(outfile, df_frame_to_burst_id)
+
+    # make the "frames" table
+    print("Making frames table by aggregating burst geometries...")
+    make_frame_table(outfile)
+    df_frames = gpd.read_file(outfile, layer="frames")
+
+    print("Computing EPSG codes for each frame...")
+    epsgs = get_epsg_codes(df_frames)
+    df_frames.loc[:, "epsg"] = epsgs
+
+    # Mark the ones in north america in the OPERA region of interest
+    geom_north_america = get_opera_na_shape()
+    is_in_north_america = get_land_indicator(df_frames, geom_north_america)
+    df_frames.loc[:, "is_north_america"] = is_in_north_america
+
+    print("Final number of frames:", len(df_frames))
+    print("Number intersecting land:", is_in_land.sum())
+    print("Number in North America:", is_in_north_america.sum())
+    print("Saving frames...")
+    df_frames.to_file(outfile, driver="GPKG", layer="frames")
+
+    update_burst_epsg(outfile)
+
+    # Create the bounding box in UTM coordinates
+    add_gpkg_spatial_ref_sys(outfile)
+    save_utm_bounding_boxes(outfile, margin=args.margin, snap=args.snap)
+
+    # Make the minimal version of the DB
+    ext = Path(outfile).suffix
+    out_minimal = outfile.replace(ext, f"-bbox-only{ext}")
+    out_burst_to_frame = outfile.replace(ext, "-burst-to-frame.json")
+    print(f"Creating a epsg/bbox only version: {out_minimal}")
+    df_minimal = make_minimal_db(
+        db_path=outfile,
+        df_frame_to_burst_id=df_frame_to_burst_id,
+        output_path=out_minimal,
+    )
+
+    # Get metadata for output dbs
+    metadata = _get_metadata(args)
+    # Create the two JSON mappings:
+    # from frame id -> [burst Ids]
+    # and burst id -> [frame Ids]
+    make_burst_to_frame_json(
+        df_minimal, output_path=out_burst_to_frame, metadata=metadata
+    )
+
+    out_frame_to_burst = outfile.replace(ext, "-frame-to-burst.json")
+    make_frame_to_burst_json(
+        db_path=outfile, output_path=out_frame_to_burst, metadata=metadata
+    )
+
+    # Add metadata to each
+    create_metadata_table(outfile, metadata=metadata)
+    create_metadata_table(out_minimal, metadata=metadata)
+
+    print(f"Total time: {time.time() - t0:.2f} seconds")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/src/burst_db/data/north_america_opera.geojson.zip b/src/burst_db/data/north_america_opera.geojson.zip
new file mode 100644
index 0000000..be593f7
Binary files /dev/null and b/src/burst_db/data/north_america_opera.geojson.zip differ
diff --git a/src/burst_db/frames.py b/src/burst_db/frames.py
new file mode 100644
index 0000000..9f19727
--- /dev/null
+++ b/src/burst_db/frames.py
@@ -0,0 +1,251 @@
+from __future__ import annotations
+
+import math
+from collections import Counter, namedtuple
+from concurrent.futures import ProcessPoolExecutor
+from functools import lru_cache
+from itertools import groupby, repeat
+
+import numpy as np
+import pandas as pd
+from numpy.typing import ArrayLike
+from tqdm.auto import tqdm
+
+MIN_FRAME = 5
+MAX_FRAME = 12
+TARGET_FRAME = 10
+
+FrameSlice = namedtuple("FrameSlice", ["start_idx", "end_idx", "is_land"])
+
+
+def create_frame_to_burst_mapping(
+    burst_is_in_land: ArrayLike,
+    target_frame: int,
+    min_frame: int,
+    max_frame: int,
+    optimize_land: bool = False,
+) -> pd.DataFrame:
+    """Create the JOIN table between frames_number and burst_id."""
+    if not optimize_land:
+        cumulative_slice_idxs = make_simple_frame_slices(burst_is_in_land)
+    else:
+        cumulative_slice_idxs = make_land_optimized_frame_slices(
+            burst_is_in_land,
+            target_frame=target_frame,
+            min_frame=min_frame,
+            max_frame=max_frame,
+        )
+
+    # Create the frame IDs mapping to burst_id
+    # (frame_id, OGC_FID)
+    frame_ogc_fid_tuples = []
+    for frame_id, (start_idx, end_idx, is_land) in enumerate(
+        cumulative_slice_idxs, start=1
+    ):
+        for burst_id in range(start_idx + 1, end_idx + 1):
+            for ogc_fid in range(1 + 3 * (burst_id - 1), 4 + 3 * (burst_id - 1)):
+                frame_ogc_fid_tuples.append((frame_id, ogc_fid, is_land))
+
+    df_frame_to_burst_id = pd.DataFrame(
+        frame_ogc_fid_tuples, columns=["frame_fid", "burst_ogc_fid", "is_land"]
+    )
+    return df_frame_to_burst_id
+
+
+def make_simple_frame_slices(
+    burst_is_in_land: ArrayLike, n_bursts_per_frame: int = 9, overlap: int = 1
+) -> list[FrameSlice]:
+    """Group together adjacent `n_bursts_per_frame`, overlapping by `overlap`."""
+    num_bursts = len(burst_is_in_land)
+    num_frames = int(np.ceil(num_bursts / (n_bursts_per_frame - overlap)))
+    burst_start_idxs = [k * (n_bursts_per_frame - overlap) for k in range(num_frames)]
+
+    frame_slices: list[FrameSlice] = []
+    # for is_land, cur_indicators in groupby(indicator):
+    for start in burst_start_idxs:
+        end = start + n_bursts_per_frame
+        frame_slices.append(FrameSlice(start, end, any(burst_is_in_land[start:end])))
+
+    return frame_slices
+
+
+def make_land_optimized_frame_slices(
+    burst_is_in_land: ArrayLike, *, target_frame: int, min_frame: int, max_frame: int
+):
+    """Create FrameSlices which optimize the area on land.
+
+    Uses the boolean array `burst_is_in_land` to find slices corresponding
+    to frames with size >= `min_frame` , <= `max_frame`, and nominally `target_frame`.
+    """
+    frame_slices = create_frame_slices(burst_is_in_land, min_frame=min_frame)
+    # parallelize the solve function calls
+    with ProcessPoolExecutor() as executor:
+        cumulative_slice_idxs = list(
+            tqdm(
+                executor.map(
+                    _process_slice,
+                    frame_slices,
+                    repeat(target_frame),
+                    repeat(min_frame),
+                    repeat(max_frame),
+                ),
+                desc="Solving frame sizes",
+                total=len(frame_slices),
+            )
+        )
+    # Flatten the list of lists, since each result from executor.map is a list
+    cumulative_slice_idxs = sorted(
+        [item for sublist in cumulative_slice_idxs for item in sublist]
+    )
+    return cumulative_slice_idxs
+
+
+def _process_slice(slice_info, target_frame, min_frame, max_frame):
+    """Solve the current batch of bursts to find frame slices."""
+    start_idx, end_idx, is_land = slice_info
+    n = end_idx - start_idx
+    cur_slices = solve(
+        n,
+        target=target_frame,
+        min_frame=min_frame,
+        max_frame=max_frame,
+    )
+    # bump up so they refer to rows, instead of being from 0
+    return [(s + start_idx, e + start_idx, is_land) for (s, e) in cur_slices]
+
+
+@lru_cache(maxsize=1000)
+def solve(n, target=TARGET_FRAME, max_frame=MAX_FRAME, min_frame=MIN_FRAME):
+    """Solve the dynamic programming problem to find the best frame sizes.
+
+    Parameters
+    ----------
+    n : int
+        The number of bursts to split into frames
+    target : int
+        The target number of bursts per frame
+    max_frame : int
+        The maximum number of bursts per frame
+    min_frame : int
+        The minimum number of bursts per frame
+
+    Returns
+    -------
+    list of tuples
+        The start and end indices of each frame
+
+    Notes
+    -----
+    This is posed as the same problem as the text justification problem.
+    "words" == bursts. "lines" == Frames. Where to "break the lines" == "group the frames"
+
+    The differences between here and reference [1]_ are
+
+    1. Our "badness" has both a maximum and minimum size, beyond which the
+    badness is infinite.  We also have a "target" so that the majority of
+    frames are exactly that size, and only occasionally to we adjust the
+    size to be smaller or larger.
+    2. The slices are overlapping, so we need to add 1 to the length of each frame.
+    This is accounted for in the "badness" function.
+
+    Reference
+    ---------
+    ..[1] https://ocw.mit.edu/courses/6-006-introduction-to-algorithms-fall-2011/resources/mit6_006f11_lec20/
+    """
+    # DP[i][0] is the minimum badness of the frames starting at i
+    # and DP[i][1] is the index of the next frame (to backtrack and get the slices)
+    DP = [None] * (n + 1)
+    DP[n] = (0, None)
+    for i in range(n - 1, -1, -1):
+        DP[i] = min(
+            (
+                (
+                    DP[j][0]
+                    + _badness(
+                        i, j, target=target, max_frame=max_frame, min_frame=min_frame
+                    ),
+                    j,
+                )
+                for j in range(i + 1, n + 1)
+            ),
+            key=lambda x: x[0],
+        )
+
+    # backtrack and get the slices
+    slices = []
+    i = 0
+    while i < n:
+        j = DP[i][1]
+        # To make the frames overlapping, make it j + 1
+        # Just need to account for the final one, which can't be
+        # bigger than n.
+        end = min(j + 1, n)
+        slices.append((i, end))
+        i = j
+
+    return slices
+
+
+def _badness(i, j, target=TARGET_FRAME, max_frame=MAX_FRAME, min_frame=MIN_FRAME):
+    """Compute the badness of a frame of length j - i.
+
+    To account for the overlap, each frame will really be 1 bigger (except last)
+    so say "max_frame" is 12, even though it'll be 13
+    If target is a fraction (like 9.5), than either side of the target is allowed.
+    with zero badness
+    """
+    n = j - i
+    # make n+1 cuz it's bigger
+    if (n + 1) > max_frame or (n + 1) < min_frame:
+        return math.inf
+    else:
+        return math.floor(abs((n + 1) - target)) ** 3
+
+
+def create_frame_slices(is_land_indicator, min_frame=MIN_FRAME) -> list[FrameSlice]:
+    """Group adjacent frames that are too small into continuous land tracks."""
+    indicator = is_land_indicator.copy()
+    ii = 0
+    # First iter: make sure all land sequences are at least min_frame long
+    for is_land, v in groupby(indicator):
+        n_frames = len(list(v))
+        ii += n_frames
+        if is_land and n_frames < min_frame:
+            indicator[ii - min_frame // 2 : ii + min_frame // 2 + 1] = True
+
+    # Second iter: keep looping while there's any small water sequences. make them land
+    keep_looping = True
+    while keep_looping:
+        keep_looping = False
+        ii = 0
+        for is_land, v in groupby(indicator):
+            n_frames = len(list(v))
+            ii += n_frames
+            if not is_land and n_frames < min_frame:
+                keep_looping = True
+                indicator[ii - min_frame // 2 : ii + min_frame // 2 + 1] = True
+            # loop will break when we didn't adjust any water sequences
+
+    consecutive_land_frames = Counter()
+    consecutive_water_frames = Counter()
+    frame_slices: list[FrameSlice] = []
+    ii, i_prev = 0, 0
+    for is_land, cur_indicators in groupby(indicator):
+        n_frames = len(list(cur_indicators))
+        i_prev = ii
+        ii += n_frames
+        frame_slices.append(FrameSlice(i_prev, ii, bool(is_land)))
+
+        if is_land:
+            consecutive_land_frames[n_frames] += 1
+        else:
+            consecutive_water_frames[n_frames] += 1
+
+    print("Number of occurrences with consecutive land bursts:")
+    print(sorted(consecutive_land_frames.items())[:5], end=",... ")
+    print(sorted(consecutive_land_frames.items())[-5:])
+    print("Number of occurrences with consecutive water bursts:")
+    print(sorted(consecutive_water_frames.items())[:5], end=",... ")
+    print(sorted(consecutive_water_frames.items())[-5:])
+
+    return frame_slices
diff --git a/src/burst_db/version.py b/src/burst_db/version.py
index 447dfd0..d20574e 100644
--- a/src/burst_db/version.py
+++ b/src/burst_db/version.py
@@ -6,8 +6,9 @@
 
 
 # release history
-Tag = collections.namedtuple('Tag', 'version date')
+Tag = collections.namedtuple('Tag', ['version', 'date'])
 release_history = (
+    Tag('0.1.2', '2023-07-24'),
     Tag('0.1.1', '2022-12-14'),
     Tag('0.1.0', '2022-12-08')
 )