From 7421a99c8308259f1cf86cce8b4071d36a714cde Mon Sep 17 00:00:00 2001 From: Yuri Astrakhan Date: Thu, 8 Feb 2024 00:35:31 -0500 Subject: [PATCH] feature: improve diff cmd with copy opts (#1183) * add most of the `mbtiles copy` options to the `mbtiles diff` command * reworked the book --- Cargo.lock | 13 ++- docs/src/SUMMARY.md | 7 +- docs/src/mbtiles-copy.md | 35 ++----- docs/src/mbtiles-diff.md | 50 ++++++--- martin/src/srv/server.rs | 1 + mbtiles/src/bin/mbtiles.rs | 208 ++++++++++++++++++++++--------------- mbtiles/src/copier.rs | 3 +- mbtiles/src/patcher.rs | 16 +-- 8 files changed, 185 insertions(+), 148 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f9fe79437..507902360 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2388,19 +2388,18 @@ dependencies = [ [[package]] name = "num-integer" -version = "0.1.45" +version = "0.1.46" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "225d3389fb3509a24c93f5c29eb6bde2586b98d9f016636dff58d7c6f7569cd9" +checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" dependencies = [ - "autocfg", "num-traits", ] [[package]] name = "num-iter" -version = "0.1.43" +version = "0.1.44" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d03e6c028c5dc5cac6e2dec0efda81fc887605bb3d884578bb6d6bf7514e252" +checksum = "d869c01cc0c455284163fd0092f1f93835385ccab5a98a0dcc497b2f8bf055a9" dependencies = [ "autocfg", "num-integer", @@ -2420,9 +2419,9 @@ dependencies = [ [[package]] name = "num-traits" -version = "0.2.17" +version = "0.2.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "39e3200413f237f41ab11ad6d161bc7239c84dcb631773ccd7de3dfe4b5c267c" +checksum = "da0df0e5185db44f69b44f26786fe401b6c293d1907744beaa7fa62b2e5a517a" dependencies = [ "autocfg", "libm", diff --git a/docs/src/SUMMARY.md b/docs/src/SUMMARY.md index bd7d57b55..ac0d5e000 100644 --- a/docs/src/SUMMARY.md +++ b/docs/src/SUMMARY.md @@ -26,8 +26,9 @@ - [Recipes](recipes.md) - [Tools](tools.md) - [martin-cp bulk tile generation](martin-cp.md) - - [MBTiles Info and Metadata](mbtiles-meta.md) - - [MBTiles Copying / Diffing](mbtiles-copy.md) - - [MBTiles Validation](mbtiles-validation.md) + - [MBTiles Metadata](mbtiles-meta.md) - [MBTiles Schemas](mbtiles-schema.md) + - [Copying MBTiles](mbtiles-copy.md) + - [Diffing/Patching MBTiles](mbtiles-diff.md) + - [Validating MBTiles](mbtiles-validation.md) - [Development](development.md) diff --git a/docs/src/mbtiles-copy.md b/docs/src/mbtiles-copy.md index c65e6ddfd..29a3a46a6 100644 --- a/docs/src/mbtiles-copy.md +++ b/docs/src/mbtiles-copy.md @@ -18,13 +18,13 @@ mbtiles copy normalized.mbtiles dst.mbtiles \ ## `mbtiles copy --diff-with-file` -Copy command can also be used to compare two mbtiles files and generate a delta (diff) file. The diff file can be applied to the `src_file.mbtiles` elsewhere, to avoid copying/transmitting the entire modified dataset. The delta file will contain all tiles that are different between the two files (modifications, insertions, and deletions as `NULL` values), for both the tile and metadata tables. - -There is one exception: `agg_tiles_hash` metadata value will be renamed to `agg_tiles_hash_in_diff`, and a new `agg_tiles_hash` will be generated for the diff file itself. This is done to avoid confusion when applying the diff file to the original file, as the `agg_tiles_hash` value will be different after the diff is applied. The `apply-diff` command will automatically rename the `agg_tiles_hash_in_diff` value back to `agg_tiles_hash` when applying the diff. +This option is identical to using [`mbtiles diff ...`](mbtiles-diff.md). The following commands two are equivalent: ```shell -mbtiles copy src_file.mbtiles diff_file.mbtiles \ - --diff-with-file modified_file.mbtiles +mbtiles diff file1.mbtiles file2.mbtiles diff.mbtiles + +mbtiles copy file1.mbtiles diff.mbtiles \ + --diff-with-file file2.mbtiles ``` ## `mbtiles copy --apply-patch` @@ -33,28 +33,5 @@ Copy a source file to destination while also applying the diff file generated by ```shell mbtiles copy src_file.mbtiles dst_file.mbtiles \ - --apply-patch diff_file.mbtiles -``` - -## `mbtiles apply-patch` - -Apply the diff file generated from `copy` command above to an mbtiles file. The diff file can be applied to the `src_file.mbtiles` elsewhere, to avoid copying/transmitting the entire modified dataset. - -Note that the `agg_tiles_hash_in_diff` metadata value will be renamed to `agg_tiles_hash` when applying the diff. This is done to avoid confusion when applying the diff file to the original file, as the `agg_tiles_hash` value will be different after the diff is applied. - -```shell -mbtiles apply-patch src_file.mbtiles diff_file.mbtiles -``` - -#### Applying diff with SQLite - -Another way to apply the diff is to use the `sqlite3` command line tool directly. This SQL will delete all tiles from `src_file.mbtiles` that are set to `NULL` in `diff_file.mbtiles`, and then insert or update all new tiles from `diff_file.mbtiles` into `src_file.mbtiles`, where both files are of `flat` type. The name of the diff file is passed as a query parameter to the sqlite3 command line tool, and then used in the SQL statements. Note that this does not update the `agg_tiles_hash` metadata value, so it will be incorrect after the diff is applied. - -```shell -sqlite3 src_file.mbtiles \ - -bail \ - -cmd ".parameter set @diffDbFilename diff_file.mbtiles" \ - "ATTACH DATABASE @diffDbFilename AS diffDb;" \ - "DELETE FROM tiles WHERE (zoom_level, tile_column, tile_row) IN (SELECT zoom_level, tile_column, tile_row FROM diffDb.tiles WHERE tile_data ISNULL);" \ - "INSERT OR REPLACE INTO tiles (zoom_level, tile_column, tile_row, tile_data) SELECT * FROM diffDb.tiles WHERE tile_data NOTNULL;" + --apply-patch diff.mbtiles ``` diff --git a/docs/src/mbtiles-diff.md b/docs/src/mbtiles-diff.md index 9f24356c8..8adb65804 100644 --- a/docs/src/mbtiles-diff.md +++ b/docs/src/mbtiles-diff.md @@ -2,25 +2,45 @@ ## `mbtiles diff` -Diff command compares two mbtiles files `A` and `B`, and generates a diff (delta) file. -If the diff file is [applied](mbtiles-copy.md#mbtiles-apply-patch) to `A`, it will produce `B`. -The diff file will contain all tiles that are different between the two files -(modifications, insertions, and deletions as `NULL` values), for both the tile and metadata tables. -The only exception is `agg_tiles_has` metadata value. It will be renamed to `agg_tiles_hash_in_diff` and a -new `agg_tiles_hash` will be generated for the diff file itself. +Copy command can also be used to compare two mbtiles files and generate a delta (diff) file. The diff file can be [applied](#mbtiles-apply-patch) to the `src_file.mbtiles` elsewhere, to avoid copying/transmitting the entire modified dataset. The delta file will contain all tiles that are different between the two files (modifications, insertions, and deletions as `NULL` values), for both the tile and metadata tables. + +There is one exception: `agg_tiles_hash` metadata value will be renamed to `agg_tiles_hash_in_diff`, and a new `agg_tiles_hash` will be generated for the diff file itself. This is done to avoid confusion when applying the diff file to the original file, as the `agg_tiles_hash` value will be different after the diff is applied. The `apply-diff` command will automatically rename the `agg_tiles_hash_in_diff` value back to `agg_tiles_hash` when applying the diff. ```shell -# This command will comapre `a.mbtiles` and `b.mbtiles`, and generate a new diff file `diff.mbtiles`. -mbtiles diff a.mbtiles b.mbtiles diff.mbtiles +# This command will compare `file1.mbtiles` and `file2.mbtiles`, and generate a new diff file `diff.mbtiles`. +mbtiles diff file1.mbtiles file2.mbtiles diff.mbtiles -# If diff.mbtiles is applied to a.mbtiles, it will produce b.mbtiles -mbtiles apply-diff a.mbtiles diff.mbtiles b2.mbtiles +# If diff.mbtiles is applied to file1.mbtiles, it will produce file2.mbtiles +mbtiles apply-diff file1.mbtiles diff.mbtiles file2a.mbtiles -# b.mbtiles and b2.mbtiles should now be the same +# file2.mbtiles and file2a.mbtiles should now be the same # Validate both files and see that their hash values are identical -mbtiles validate b.mbtiles -[INFO ] The agg_tiles_hashes=E95C1081447FB25674DCC1EB97F60C26 has been verified for b.mbtiles +mbtiles validate file2.mbtiles +[INFO ] The agg_tiles_hashes=E95C1081447FB25674DCC1EB97F60C26 has been verified for file2.mbtiles + +mbtiles validate file2a.mbtiles +[INFO ] The agg_tiles_hashes=E95C1081447FB25674DCC1EB97F60C26 has been verified for file2a.mbtiles +``` + +## `mbtiles apply-patch` + +Apply the diff file generated with the `mbtiles diff` command above to an MBTiles file. The diff file can be applied to the `src_file.mbtiles` that has been previously downloaded to avoid copying/transmitting the entire modified dataset again. The `src_file.mbtiles` will modified in-place. It is also possible to apply the diff file while copying the source file to a new destination file, by using the [`mbtiles copy --apply-patch`](mbtiles-copy.md#mbtiles-copy---apply-patch) command. + +Note that the `agg_tiles_hash_in_diff` metadata value will be renamed to `agg_tiles_hash` when applying the diff. This is done to avoid confusion when applying the diff file to the original file, as the `agg_tiles_hash` value will be different after the diff is applied. -mbtiles validate b2.mbtiles -[INFO ] The agg_tiles_hashes=E95C1081447FB25674DCC1EB97F60C26 has been verified for b2.mbtiles +```shell +mbtiles apply-patch src_file.mbtiles diff_file.mbtiles +``` + +#### Applying diff with SQLite + +Another way to apply the diff is to use the `sqlite3` command line tool directly. This SQL will delete all tiles from `src_file.mbtiles` that are set to `NULL` in `diff_file.mbtiles`, and then insert or update all new tiles from `diff_file.mbtiles` into `src_file.mbtiles`, where both files are of `flat` type. The name of the diff file is passed as a query parameter to the sqlite3 command line tool, and then used in the SQL statements. Note that this does not update the `agg_tiles_hash` metadata value, so it will be incorrect after the diff is applied. + +```shell +sqlite3 src_file.mbtiles \ + -bail \ + -cmd ".parameter set @diffDbFilename diff_file.mbtiles" \ + "ATTACH DATABASE @diffDbFilename AS diffDb;" \ + "DELETE FROM tiles WHERE (zoom_level, tile_column, tile_row) IN (SELECT zoom_level, tile_column, tile_row FROM diffDb.tiles WHERE tile_data ISNULL);" \ + "INSERT OR REPLACE INTO tiles (zoom_level, tile_column, tile_row, tile_data) SELECT * FROM diffDb.tiles WHERE tile_data NOTNULL;" ``` diff --git a/martin/src/srv/server.rs b/martin/src/srv/server.rs index cf89cc605..b0c1caa16 100755 --- a/martin/src/srv/server.rs +++ b/martin/src/srv/server.rs @@ -150,6 +150,7 @@ pub fn new_server(config: SrvConfig, state: ServerState) -> MartinResult<(Server .workers(worker_processes) .run() .err_into(); + Ok((Box::pin(server), listen_addresses)) } diff --git a/mbtiles/src/bin/mbtiles.rs b/mbtiles/src/bin/mbtiles.rs index bc8702396..fc3d7228f 100644 --- a/mbtiles/src/bin/mbtiles.rs +++ b/mbtiles/src/bin/mbtiles.rs @@ -54,11 +54,7 @@ enum Commands { }, /// Compare two files A and B, and generate a new diff file. If the diff file is applied to A, it will produce B. #[command(name = "diff")] - Diff { - file_a: PathBuf, - file_b: PathBuf, - diff: PathBuf, - }, + Diff(DiffArgs), /// Copy tiles from one mbtiles file to another. #[command(name = "copy", alias = "cp")] Copy(CopyArgs), @@ -66,9 +62,9 @@ enum Commands { #[command(name = "apply-patch", alias = "apply-diff")] ApplyPatch { /// MBTiles file to apply diff to - src_file: PathBuf, + base_file: PathBuf, /// Diff file - diff_file: PathBuf, + patch_file: PathBuf, }, /// Update metadata to match the content of the file #[command(name = "meta-update", alias = "update-meta")] @@ -102,6 +98,34 @@ pub struct CopyArgs { src_file: PathBuf, /// MBTiles file to write to dst_file: PathBuf, + #[command(flatten)] + pub options: SharedCopyOpts, + /// Compare source file with this file, and only copy non-identical tiles to destination. + /// Use `mbtiles diff` as a more convenient way to generate this file. + /// It should be later possible to run `mbtiles apply-diff` to merge it in. + #[arg(long, conflicts_with("apply_patch"))] + diff_with_file: Option, + /// Compare source file with this file, and only copy non-identical tiles to destination. + /// It should be later possible to run `mbtiles apply-diff SRC_FILE DST_FILE` to get the same DIFF file. + #[arg(long, conflicts_with("diff_with_file"))] + apply_patch: Option, +} + +#[derive(Clone, Default, PartialEq, Debug, clap::Args)] +pub struct DiffArgs { + /// First MBTiles file to compare + file1: PathBuf, + /// Second MBTiles file to compare + file2: PathBuf, + /// Output file to write the resulting difference to + diff: PathBuf, + + #[command(flatten)] + pub options: SharedCopyOpts, +} + +#[derive(Clone, Default, PartialEq, Debug, clap::Args)] +pub struct SharedCopyOpts { /// Limit what gets copied. /// When copying tiles only, the agg_tiles_hash will still be updated unless --skip-agg-tiles-hash is set. #[arg(long, value_name = "TYPE", default_value_t=CopyType::default())] @@ -124,19 +148,40 @@ pub struct CopyArgs { /// Bounding box to copy, in the format `min_lon,min_lat,max_lon,max_lat`. Can be used multiple times. #[arg(long)] bbox: Vec, - /// Compare source file with this file, and only copy non-identical tiles to destination. - /// It should be later possible to run `mbtiles apply-diff SRC_FILE DST_FILE` to get the same DIFF file. - #[arg(long, conflicts_with("apply_patch"))] - diff_with_file: Option, - /// Compare source file with this file, and only copy non-identical tiles to destination. - /// It should be later possible to run `mbtiles apply-diff SRC_FILE DST_FILE` to get the same DIFF file. - #[arg(long, conflicts_with("diff_with_file"))] - apply_patch: Option, /// Skip generating a global hash for mbtiles validation. By default, `mbtiles` will compute `agg_tiles_hash` metadata value. #[arg(long)] skip_agg_tiles_hash: bool, } +impl SharedCopyOpts { + #[must_use] + pub fn into_copier( + self, + src_file: PathBuf, + dst_file: PathBuf, + diff_with_file: Option, + apply_patch: Option, + ) -> MbtilesCopier { + MbtilesCopier { + src_file, + dst_file, + diff_with_file, + apply_patch, + // Shared + copy: self.copy, + dst_type_cli: self.mbtiles_type, + on_duplicate: self.on_duplicate, + min_zoom: self.min_zoom, + max_zoom: self.max_zoom, + zoom_levels: self.zoom_levels, + bbox: self.bbox, + skip_agg_tiles_hash: self.skip_agg_tiles_hash, + // Constants + dst_type: None, // Taken from dst_type_cli + } + } +} + #[tokio::main] async fn main() { let env = env_logger::Env::default().default_filter_or("mbtiles=info"); @@ -165,29 +210,26 @@ async fn main_int() -> anyhow::Result<()> { Commands::MetaSetValue { file, key, value } => { meta_set_value(file.as_path(), &key, value.as_deref()).await?; } - Commands::Copy(opts) => { - let opts = MbtilesCopier { - src_file: opts.src_file, - dst_file: opts.dst_file, - copy: opts.copy, - dst_type_cli: opts.mbtiles_type, - dst_type: None, - on_duplicate: opts.on_duplicate, - min_zoom: opts.min_zoom, - max_zoom: opts.max_zoom, - zoom_levels: opts.zoom_levels, - bbox: opts.bbox, - diff_with_file: opts.diff_with_file, - apply_patch: opts.apply_patch, - skip_agg_tiles_hash: opts.skip_agg_tiles_hash, - }; - opts.run().await?; + Commands::Copy(args) => { + let copier = args.options.into_copier( + args.src_file, + args.dst_file, + args.diff_with_file, + args.apply_patch, + ); + copier.run().await?; + } + Commands::Diff(args) => { + let copier = args + .options + .into_copier(args.file1, args.diff, Some(args.file2), None); + copier.run().await?; } Commands::ApplyPatch { - src_file, - diff_file, + base_file, + patch_file, } => { - apply_patch(src_file, diff_file).await?; + apply_patch(base_file, patch_file).await?; } Commands::UpdateMetadata { file, update_zoom } => { let mbt = Mbtiles::new(file.as_path())?; @@ -219,28 +261,6 @@ async fn main_int() -> anyhow::Result<()> { println!("MBTiles file summary for {mbt}"); println!("{}", mbt.summary(&mut conn).await?); } - Commands::Diff { - file_a, - file_b, - diff, - } => { - let opts = MbtilesCopier { - src_file: file_a, - diff_with_file: Some(file_b), - dst_file: diff, - copy: CopyType::All, - skip_agg_tiles_hash: false, - on_duplicate: Some(CopyDuplicateMode::Override), - dst_type_cli: None, - dst_type: None, - min_zoom: None, - max_zoom: None, - zoom_levels: vec![], - bbox: vec![], - apply_patch: None, - }; - opts.run().await?; - } } Ok(()) @@ -329,8 +349,11 @@ mod tests { command: Copy(CopyArgs { src_file: PathBuf::from("src_file"), dst_file: PathBuf::from("dst_file"), - min_zoom: Some(1), - max_zoom: Some(100), + options: SharedCopyOpts { + min_zoom: Some(1), + max_zoom: Some(100), + ..Default::default() + }, ..Default::default() }) } @@ -391,7 +414,10 @@ mod tests { command: Copy(CopyArgs { src_file: PathBuf::from("src_file"), dst_file: PathBuf::from("dst_file"), - zoom_levels: vec![3, 7, 1], + options: SharedCopyOpts { + zoom_levels: vec![3, 7, 1], + ..Default::default() + }, ..Default::default() }) } @@ -437,7 +463,10 @@ mod tests { command: Copy(CopyArgs { src_file: PathBuf::from("src_file"), dst_file: PathBuf::from("dst_file"), - on_duplicate: Some(CopyDuplicateMode::Override), + options: SharedCopyOpts { + on_duplicate: Some(CopyDuplicateMode::Override), + ..Default::default() + }, ..Default::default() }) } @@ -453,13 +482,43 @@ mod tests { command: Copy(CopyArgs { src_file: PathBuf::from("src_file"), dst_file: PathBuf::from("dst_file"), - copy: CopyType::Metadata, + options: SharedCopyOpts { + copy: CopyType::Metadata, + ..Default::default() + }, ..Default::default() }) } ); } + #[test] + fn test_diff() { + assert_eq!( + Args::parse_from([ + "mbtiles", + "diff", + "file1.mbtiles", + "file2.mbtiles", + "../delta.mbtiles", + "--on-duplicate", + "override" + ]), + Args { + verbose: false, + command: Diff(DiffArgs { + file1: PathBuf::from("file1.mbtiles"), + file2: PathBuf::from("file2.mbtiles"), + diff: PathBuf::from("../delta.mbtiles"), + options: SharedCopyOpts { + on_duplicate: Some(CopyDuplicateMode::Override), + ..Default::default() + }, + }) + } + ); + } + #[test] fn test_meta_get_no_arguments() { assert_eq!( @@ -531,8 +590,8 @@ mod tests { Args { verbose: false, command: ApplyPatch { - src_file: PathBuf::from("src_file"), - diff_file: PathBuf::from("diff_file"), + base_file: PathBuf::from("src_file"), + patch_file: PathBuf::from("diff_file"), } } ); @@ -553,25 +612,4 @@ mod tests { } ); } - - #[test] - fn test_diff() { - assert_eq!( - Args::parse_from([ - "mbtiles", - "diff", - "file-a.mbtiles", - "file-b.mbtiles", - "../delta.mbtiles", - ]), - Args { - verbose: false, - command: Diff { - file_a: PathBuf::from("file-a.mbtiles"), - file_b: PathBuf::from("file-b.mbtiles"), - diff: PathBuf::from("../delta.mbtiles"), - } - } - ); - } } diff --git a/mbtiles/src/copier.rs b/mbtiles/src/copier.rs index ed304b674..31b1766ab 100644 --- a/mbtiles/src/copier.rs +++ b/mbtiles/src/copier.rs @@ -63,7 +63,8 @@ pub struct MbtilesCopier { /// Bounding box to copy, in the format `min_lon,min_lat,max_lon,max_lat`. Can be used multiple times. pub bbox: Vec, /// Compare source file with this file, and only copy non-identical tiles to destination. - /// It should be later possible to run `mbtiles apply-diff SRC_FILE DST_FILE` to get the same DIFF file. + /// Use `mbtiles diff` as a more convenient way to generate this file. + /// It should be later possible to run `mbtiles apply-diff` to merge it in. pub diff_with_file: Option, /// Compare source file with this file, and only copy non-identical tiles to destination. /// It should be later possible to run `mbtiles apply-diff SRC_FILE DST_FILE` to get the same DIFF file. diff --git a/mbtiles/src/patcher.rs b/mbtiles/src/patcher.rs index 063333208..1e1777fa9 100644 --- a/mbtiles/src/patcher.rs +++ b/mbtiles/src/patcher.rs @@ -7,18 +7,18 @@ use crate::queries::detach_db; use crate::MbtType::{Flat, FlatWithHash, Normalized}; use crate::{MbtResult, MbtType, Mbtiles, AGG_TILES_HASH, AGG_TILES_HASH_IN_DIFF}; -pub async fn apply_patch(src_file: PathBuf, patch_file: PathBuf) -> MbtResult<()> { - let src_mbt = Mbtiles::new(src_file)?; +pub async fn apply_patch(base_file: PathBuf, patch_file: PathBuf) -> MbtResult<()> { + let base_mbt = Mbtiles::new(base_file)?; let patch_mbt = Mbtiles::new(patch_file)?; let patch_type = patch_mbt.open_and_detect_type().await?; - let mut conn = src_mbt.open().await?; - let src_type = src_mbt.detect_type(&mut conn).await?; + let mut conn = base_mbt.open().await?; + let base_type = base_mbt.detect_type(&mut conn).await?; patch_mbt.attach_to(&mut conn, "patchDb").await?; - info!("Applying patch file {patch_mbt} ({patch_type}) to {src_mbt} ({src_type})"); - let select_from = get_select_from(src_type, patch_type); - let (main_table, insert1, insert2) = get_insert_sql(src_type, select_from); + info!("Applying patch file {patch_mbt} ({patch_type}) to {base_mbt} ({base_type})"); + let select_from = get_select_from(base_type, patch_type); + let (main_table, insert1, insert2) = get_insert_sql(base_type, select_from); query(&format!("{insert1} WHERE tile_data NOTNULL")) .execute(&mut conn) @@ -40,7 +40,7 @@ pub async fn apply_patch(src_file: PathBuf, patch_file: PathBuf) -> MbtResult<() .execute(&mut conn) .await?; - if src_type.is_normalized() { + if base_type.is_normalized() { debug!("Removing unused tiles from the images table (normalized schema)"); query("DELETE FROM images WHERE tile_id NOT IN (SELECT tile_id FROM map)") .execute(&mut conn)