From 9454d37fd721276bed6c911991a9fddb09a43c3c Mon Sep 17 00:00:00 2001 From: Yuri Astrakhan Date: Wed, 27 Sep 2023 15:44:46 -0400 Subject: [PATCH] Update `agg_tiles_hash` docs and minor bug The copying should set `agg_tiles_hash` in all cases because now it uses the always available `tiles` table/view. Also, a few minor cleanups and renames related to that. --- .github/workflows/ci.yml | 2 +- docs/src/tools.md | 103 +++++++++++++++++------------- martin-mbtiles/src/bin/main.rs | 4 +- martin-mbtiles/src/mbtiles.rs | 4 +- martin-mbtiles/src/tile_copier.rs | 2 +- 5 files changed, 66 insertions(+), 49 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index fa5bd6473..39678e3cb 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -506,7 +506,7 @@ jobs: elif [[ "${{ matrix.target }}" == "debian-x86_64" ]]; then mv debian-x86_64.deb ../${{ matrix.name }} else - tar czvf ../${{ matrix.name }} martin${{ matrix.ext }} mbtiles${{ matrix.ext }} + tar czvf ../${{ matrix.name }} martin${{ matrix.ext }} mbtiles${{ matrix.ext }} fi - name: Generate SHA-256 (MacOS) if: matrix.sha == 'true' diff --git a/docs/src/tools.md b/docs/src/tools.md index 05f444035..d59d8e6cb 100644 --- a/docs/src/tools.md +++ b/docs/src/tools.md @@ -69,47 +69,64 @@ If the `.mbtiles` file is of `flat_with_hash` or `normalized` type, then verify mbtiles validate src_file.mbtiles ``` +## Content Validation +The original [MBTiles specification](https://github.com/mapbox/mbtiles-spec#readme) does not provide any guarantees for the content of the tile data in MBTiles. This tool adds a few additional conventions to ensure that the content of the tile data is valid. + +A typical Normalized schema generated by tools like [tilelive-copy](https://github.com/mapbox/TileLive#bintilelive-copy) use MD5 hash in the `tile_id` column. The Martin's `mbtiles` tool can use this hash to verify the content of each tile. We also define a new `flat-with-hash` schema that stores the hash and tile data in the same table. This schema is more efficient than the `normalized` schema when data has no duplicate tiles (see below). Per tile validation is not available for `flat` schema. + +Per-tile validation will catch individual invalid tiles, but it will not detect overall datastore corruption (e.g. missing tiles or tiles that shouldn't exist, or tiles with incorrect z/x/y values). For that, Martin `mbtiles` tool defines a new metadata value called `agg_tiles_hash`. The value is computed by hashing `cast(zoom_level AS text), cast(tile_column AS text), cast(tile_row AS text), tile_data` combined for all rows in the `tiles` table/view, ordered by z,x,y. In case there are no rows or all are NULL, the hash value of an empty string is used. + +The `mbtiles` tool will compute `agg_tiles_hash` value when copying or validating mbtiles files. + ## Supported Schema -The `mbtiles` tool supports three different kinds of schema for `tiles` data in `.mbtiles` files: - -- `flat`: - ``` - CREATE TABLE tiles (zoom_level integer, tile_column integer, tile_row integer, tile_data blob); - CREATE UNIQUE INDEX tile_index on tiles (zoom_level, tile_column, tile_row); - ``` -- `flat-with-hash`: - ``` - CREATE TABLE tiles_with_hash (zoom_level integer NOT NULL, tile_column integer NOT NULL, tile_row integer NOT NULL, tile_data blob, tile_hash text); - CREATE UNIQUE INDEX tiles_with_hash_index on tiles_with_hash (zoom_level, tile_column, tile_row); - CREATE VIEW tiles AS SELECT zoom_level, tile_column, tile_row, tile_data FROM tiles_with_hash; - ``` -- `normalized`: - ``` - CREATE TABLE map (zoom_level INTEGER, tile_column INTEGER, tile_row INTEGER, tile_id TEXT); - CREATE UNIQUE INDEX map_index ON map (zoom_level, tile_column, tile_row); - CREATE TABLE images (tile_data blob, tile_id text); - CREATE UNIQUE INDEX images_id ON images (tile_id); - CREATE VIEW tiles AS - SELECT - map.zoom_level AS zoom_level, - map.tile_column AS tile_column, - map.tile_row AS tile_row, - images.tile_data AS tile_data - FROM map - JOIN images ON images.tile_id = map.tile_id; - ``` - Optionally, `.mbtiles` files with `normalized` schema can include a `tiles_with_hash` view: - ``` - CREATE VIEW tiles_with_hash AS - SELECT - map.zoom_level AS zoom_level, - map.tile_column AS tile_column, - map.tile_row AS tile_row, - images.tile_data AS tile_data, - images.tile_id AS tile_hash - FROM map - JOIN images ON images.tile_id = map.tile_id; - ``` - **__Note:__** All `normalized` files created by the `mbtiles` tool will contain this view. - -For more general spec information, see [here](https://github.com/mapbox/mbtiles-spec#readme). +The `mbtiles` tool supports three different kinds of schema for `tiles` data in `.mbtiles` files. See also the original [specification](https://github.com/mapbox/mbtiles-spec#readme). + +#### flat +```sql, ignore +CREATE TABLE tiles (zoom_level integer, tile_column integer, tile_row integer, tile_data blob); +CREATE UNIQUE INDEX tile_index on tiles (zoom_level, tile_column, tile_row); +``` + +#### flat-with-hash +```sql, ignore +CREATE TABLE tiles_with_hash ( + zoom_level integer NOT NULL, + tile_column integer NOT NULL, + tile_row integer NOT NULL, + tile_data blob, + tile_hash text); +CREATE UNIQUE INDEX tiles_with_hash_index on tiles_with_hash (zoom_level, tile_column, tile_row); +CREATE VIEW tiles AS SELECT zoom_level, tile_column, tile_row, tile_data FROM tiles_with_hash; +``` + +#### normalized +```sql, ignore +CREATE TABLE map (zoom_level INTEGER, tile_column INTEGER, tile_row INTEGER, tile_id TEXT); +CREATE UNIQUE INDEX map_index ON map (zoom_level, tile_column, tile_row); +CREATE TABLE images (tile_data blob, tile_id text); +CREATE UNIQUE INDEX images_id ON images (tile_id); +CREATE VIEW tiles AS + SELECT + map.zoom_level AS zoom_level, + map.tile_column AS tile_column, + map.tile_row AS tile_row, + images.tile_data AS tile_data + FROM map + JOIN images ON images.tile_id = map.tile_id; +``` + +Optionally, `.mbtiles` files with `normalized` schema can include a `tiles_with_hash` view: + +```sql, ignore +CREATE VIEW tiles_with_hash AS + SELECT + map.zoom_level AS zoom_level, + map.tile_column AS tile_column, + map.tile_row AS tile_row, + images.tile_data AS tile_data, + images.tile_id AS tile_hash + FROM map + JOIN images ON images.tile_id = map.tile_id; +``` + +**__Note:__** All `normalized` files created by the `mbtiles` tool will contain this view. diff --git a/martin-mbtiles/src/bin/main.rs b/martin-mbtiles/src/bin/main.rs index 7d8721a05..de3976eb9 100644 --- a/martin-mbtiles/src/bin/main.rs +++ b/martin-mbtiles/src/bin/main.rs @@ -67,7 +67,7 @@ enum Commands { /// Value to specify the extent of the SQLite integrity check performed #[arg(long, value_enum, default_value_t=IntegrityCheckType::default())] integrity_check: IntegrityCheckType, - /// Generate a hash of the tile data hashes and store under the 'agg_tiles_hash' key in metadata + /// Update `agg_tiles_hash` metadata value instead of using it to validate if the entire tile store is valid. #[arg(long)] update_agg_tiles_hash: bool, }, @@ -148,7 +148,7 @@ async fn validate_mbtiles( if update_agg_tiles_hash { mbt.update_agg_tiles_hash(&mut conn).await?; } else { - mbt.check_agg_tile_hashes(&mut conn).await?; + mbt.check_agg_tiles_hashes(&mut conn).await?; } Ok(()) } diff --git a/martin-mbtiles/src/mbtiles.rs b/martin-mbtiles/src/mbtiles.rs index 4912e4e72..fc878a095 100644 --- a/martin-mbtiles/src/mbtiles.rs +++ b/martin-mbtiles/src/mbtiles.rs @@ -491,7 +491,7 @@ impl Mbtiles { Ok(()) } - pub async fn check_agg_tile_hashes(&self, conn: &mut T) -> MbtResult<()> + pub async fn check_agg_tiles_hashes(&self, conn: &mut T) -> MbtResult<()> where for<'e> &'e mut T: SqliteExecutor<'e>, { @@ -745,7 +745,7 @@ mod tests { async fn validate_invalid_file() { let (mut conn, mbt) = open("../tests/fixtures/files/invalid/invalid_zoomed_world_cities.mbtiles").await; - let result = mbt.check_agg_tile_hashes(&mut conn).await; + let result = mbt.check_agg_tiles_hashes(&mut conn).await; assert!(matches!(result, Err(MbtError::AggHashMismatch(..)))); } } diff --git a/martin-mbtiles/src/tile_copier.rs b/martin-mbtiles/src/tile_copier.rs index e556b79ac..382573702 100644 --- a/martin-mbtiles/src/tile_copier.rs +++ b/martin-mbtiles/src/tile_copier.rs @@ -46,7 +46,7 @@ pub struct TileCopierOptions { /// Compare source file with this file, and only copy non-identical tiles to destination #[cfg_attr(feature = "cli", arg(long))] diff_with_file: Option, - /// Skip generating a global hash for mbtiles validation. By default, if dst_type is flat-with-hash or normalized, generate a global hash and store in the metadata table + /// Skip generating a global hash for mbtiles validation. By default, `mbtiles` will compute `agg_tiles_hash` metadata value. #[cfg_attr(feature = "cli", arg(long))] skip_agg_tiles_hash: bool, }