From 1295fdda9e1235adfaadefb8a7b3f2971ee678ed Mon Sep 17 00:00:00 2001 From: feathercyc Date: Mon, 27 May 2024 16:26:39 +0800 Subject: [PATCH 1/3] feat: add .gitignore Signed-off-by: feathercyc --- .gitignore | 1 + 1 file changed, 1 insertion(+) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ea8c4bf --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/target From e179fcd884e1cb4e26f7627aa2f1a0b91d18e4ca Mon Sep 17 00:00:00 2001 From: feathercyc Date: Mon, 27 May 2024 18:56:11 +0800 Subject: [PATCH 2/3] feat: add some doc Signed-off-by: feathercyc --- Cargo.lock | 648 +++++++++++++++++++++++++ Cargo.toml | 23 + README.md | 36 ++ benches/bench.rs | 113 +++++ src/entry.rs | 97 ++++ src/index.rs | 64 +++ src/interval.rs | 76 +++ src/intervalmap.rs | 1141 ++++++++++++++++++++++++++++++++++++++++++++ src/lib.rs | 32 ++ src/node.rs | 124 +++++ 10 files changed, 2354 insertions(+) create mode 100644 Cargo.lock create mode 100644 Cargo.toml create mode 100644 README.md create mode 100644 benches/bench.rs create mode 100644 src/entry.rs create mode 100644 src/index.rs create mode 100644 src/interval.rs create mode 100644 src/intervalmap.rs create mode 100644 src/lib.rs create mode 100644 src/node.rs diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..78c1808 --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,648 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "aho-corasick" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" +dependencies = [ + "memchr", +] + +[[package]] +name = "anes" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" + +[[package]] +name = "anstyle" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "038dfcf04a5feb68e9c60b21c9625a54c2c0616e79b72b0fd87075a056ae1d1b" + +[[package]] +name = "autocfg" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0" + +[[package]] +name = "bumpalo" +version = "3.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c" + +[[package]] +name = "cast" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "ciborium" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e" +dependencies = [ + "ciborium-io", + "ciborium-ll", + "serde", +] + +[[package]] +name = "ciborium-io" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757" + +[[package]] +name = "ciborium-ll" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9" +dependencies = [ + "ciborium-io", + "half", +] + +[[package]] +name = "clap" +version = "4.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90bc066a67923782aa8515dbaea16946c5bcc5addbd668bb80af688e53e548a0" +dependencies = [ + "clap_builder", +] + +[[package]] +name = "clap_builder" +version = "4.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae129e2e766ae0ec03484e609954119f123cc1fe650337e155d03b022f24f7b4" +dependencies = [ + "anstyle", + "clap_lex", +] + +[[package]] +name = "clap_lex" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "98cc8fbded0c607b7ba9dd60cd98df59af97e84d24e49c8557331cfc26d301ce" + +[[package]] +name = "criterion" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2b12d017a929603d80db1831cd3a24082f8137ce19c69e6447f54f5fc8d692f" +dependencies = [ + "anes", + "cast", + "ciborium", + "clap", + "criterion-plot", + "is-terminal", + "itertools", + "num-traits", + "once_cell", + "oorandom", + "plotters", + "rayon", + "regex", + "serde", + "serde_derive", + "serde_json", + "tinytemplate", + "walkdir", +] + +[[package]] +name = "criterion-plot" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1" +dependencies = [ + "cast", + "itertools", +] + +[[package]] +name = "crossbeam-deque" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "613f8cc01fe9cf1a3eb3d7f488fd2fa8388403e97039e2f73692932e291a770d" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "248e3bacc7dc6baa3b21e405ee045c3047101a49145e7e9eca583ab4c2ca5345" + +[[package]] +name = "crunchy" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" + +[[package]] +name = "either" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3dca9240753cf90908d7e4aac30f630662b02aebaa1b58a3cadabdb23385b58b" + +[[package]] +name = "getrandom" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94b22e06ecb0110981051723910cbf0b5f5e09a2062dd7663334ee79a9d1286c" +dependencies = [ + "cfg-if", + "libc", + "wasi", +] + +[[package]] +name = "half" +version = "2.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6dd08c532ae367adf81c312a4580bc67f1d0fe8bc9c460520283f4c0ff277888" +dependencies = [ + "cfg-if", + "crunchy", +] + +[[package]] +name = "hermit-abi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024" + +[[package]] +name = "interval_map" +version = "0.1.0" +dependencies = [ + "criterion", + "rand", +] + +[[package]] +name = "is-terminal" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f23ff5ef2b80d608d61efee834934d862cd92461afc0560dedf493e4c033738b" +dependencies = [ + "hermit-abi", + "libc", + "windows-sys", +] + +[[package]] +name = "itertools" +version = "0.10.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" +dependencies = [ + "either", +] + +[[package]] +name = "itoa" +version = "1.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" + +[[package]] +name = "js-sys" +version = "0.3.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29c15563dc2726973df627357ce0c9ddddbea194836909d655df6a75d2cf296d" +dependencies = [ + "wasm-bindgen", +] + +[[package]] +name = "libc" +version = "0.2.153" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd" + +[[package]] +name = "log" +version = "0.4.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c" + +[[package]] +name = "memchr" +version = "2.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c8640c5d730cb13ebd907d8d04b52f55ac9a2eec55b440c8892f40d56c76c1d" + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", +] + +[[package]] +name = "once_cell" +version = "1.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" + +[[package]] +name = "oorandom" +version = "11.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ab1bc2a289d34bd04a330323ac98a1b4bc82c9d9fcb1e66b63caa84da26b575" + +[[package]] +name = "plotters" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2c224ba00d7cadd4d5c660deaf2098e5e80e07846537c51f9cfa4be50c1fd45" +dependencies = [ + "num-traits", + "plotters-backend", + "plotters-svg", + "wasm-bindgen", + "web-sys", +] + +[[package]] +name = "plotters-backend" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e76628b4d3a7581389a35d5b6e2139607ad7c75b17aed325f210aa91f4a9609" + +[[package]] +name = "plotters-svg" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38f6d39893cca0701371e3c27294f09797214b86f1fb951b89ade8ec04e2abab" +dependencies = [ + "plotters-backend", +] + +[[package]] +name = "ppv-lite86" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" + +[[package]] +name = "proc-macro2" +version = "1.0.82" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ad3d49ab951a01fbaafe34f2ec74122942fe18a3f9814c3268f1bb72042131b" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "rand" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +dependencies = [ + "libc", + "rand_chacha", + "rand_core", +] + +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" +dependencies = [ + "getrandom", +] + +[[package]] +name = "rayon" +version = "1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa" +dependencies = [ + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2" +dependencies = [ + "crossbeam-deque", + "crossbeam-utils", +] + +[[package]] +name = "regex" +version = "1.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c117dbdfde9c8308975b6a18d71f3f385c89461f7b3fb054288ecf2a2058ba4c" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "86b83b8b9847f9bf95ef68afb0b8e6cdb80f498442f5179a29fad448fcc1eaea" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "adad44e29e4c806119491a7f06f03de4d1af22c3a680dd47f1e6e179439d1f56" + +[[package]] +name = "ryu" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f" + +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + +[[package]] +name = "serde" +version = "1.0.202" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "226b61a0d411b2ba5ff6d7f73a476ac4f8bb900373459cd00fab8512828ba395" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.202" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6048858004bcff69094cd972ed40a32500f153bd3be9f716b2eed2e8217c4838" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.117" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "455182ea6142b14f93f4bc5320a2b31c1f266b66a4a5c858b013302a5d8cbfc3" +dependencies = [ + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "syn" +version = "2.0.64" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ad3dee41f36859875573074334c200d1add8e4a87bb37113ebd31d926b7b11f" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "tinytemplate" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc" +dependencies = [ + "serde", + "serde_json", +] + +[[package]] +name = "unicode-ident" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" + +[[package]] +name = "walkdir" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" +dependencies = [ + "same-file", + "winapi-util", +] + +[[package]] +name = "wasi" +version = "0.11.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" + +[[package]] +name = "wasm-bindgen" +version = "0.2.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4be2531df63900aeb2bca0daaaddec08491ee64ceecbee5076636a3b026795a8" +dependencies = [ + "cfg-if", + "wasm-bindgen-macro", +] + +[[package]] +name = "wasm-bindgen-backend" +version = "0.2.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "614d787b966d3989fa7bb98a654e369c762374fd3213d212cfc0251257e747da" +dependencies = [ + "bumpalo", + "log", + "once_cell", + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1f8823de937b71b9460c0c34e25f3da88250760bec0ebac694b49997550d726" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-backend", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af190c94f2773fdb3729c55b007a722abb5384da03bc0986df4c289bf5567e96" + +[[package]] +name = "web-sys" +version = "0.3.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77afa9a11836342370f4817622a2f0f418b134426d91a82dfb48f532d2ec13ef" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "winapi-util" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4d4cc384e1e73b93bafa6fb4f1df8c41695c8a91cf9c4c64358067d15a7b6c6b" +dependencies = [ + "windows-sys", +] + +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-targets" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f0713a46559409d202e70e28227288446bf7841d3211583a4b53e3f6d96e7eb" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_gnullvm", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7088eed71e8b8dda258ecc8bac5fb1153c5cffaf2578fc8ff5d61e23578d3263" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9985fd1504e250c615ca5f281c3f7a6da76213ebd5ccc9561496568a2752afb6" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88ba073cf16d5372720ec942a8ccbf61626074c6d4dd2e745299726ce8b89670" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87f4261229030a858f36b459e748ae97545d6f1ec60e5e0d6a3d32e0dc232ee9" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db3c2bf3d13d5b658be73463284eaf12830ac9a26a90c717b7f771dfe97487bf" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e4246f76bdeff09eb48875a0fd3e2af6aada79d409d33011886d3e1581517d9" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "852298e482cd67c356ddd9570386e2862b5673c85bd5f88df9ab6802b334c596" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bec47e5bfd1bff0eeaf6d8b485cc1074891a197ab4225d504cb7a1ab88b02bf0" diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..8a89c0e --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,23 @@ +[package] +name = "interval_map" +version = "0.1.0" +edition = "2021" +authors = ["feathercyc feathercyc@163.com"] +description = "`interval_map` is a map based on interval tree." +license = "Apache-2.0" +keywords = ["Interval Tree", "Augmented Tree", "Red-Black Tree"] + +[dependencies] + +[dev-dependencies] +criterion = "0.5.1" +rand = "0.8.5" + +[features] +default = [] +interval_tree_find_overlap_ordered = [] + +[[bench]] +name = "bench" +path = "benches/bench.rs" +harness = false diff --git a/README.md b/README.md new file mode 100644 index 0000000..0fdb9b5 --- /dev/null +++ b/README.md @@ -0,0 +1,36 @@ +# interval_map + +`interval_map` is a map based on interval tree. It fully implements the insertion and deletion functionality of a red-black tree, ensuring that each modification operation requires at most $O(logN)$ time complexity. + +The implementation of the interval tree in interval_map references "Introduction to Algorithms" (3rd ed., Section 14.3: Interval trees, pp. 348–354). + +To safely and efficiently handle insertion and deletion operations in Rust, `interval_map` innovatively **uses arrays to simulate pointers** for managing the parent-child references in the red-black tree. This approach also ensures that interval_map has the `Send` and `Unpin` traits, allowing it to be safely transferred between threads and to maintain a fixed memory location during asynchronous operations. + +`interval_map` implements an `IntervalMap` struct: +- It accepts `Interval` as the key, where `T` can be any type that implements `Ord+Clone` trait. Therefore, intervals such as $[1, 2)$ and $["aaa", "bbb")$ are allowed +- The value can be of any type + +`interval_map` supports `insert`, `delete`, and `iter` fns. Traversal is performed in the order of `Interval` . For instance, with intervals of type `Interval`: +- $[1,4)<[2,5)$, because $1<2$ +- $[1,4)<[1,5)$, because $4<5$ + +So the order of intervals in `IntervalMap` is $[1,4)<[1,5)<[2,5)$. + +Currently, `interval_map` only supports half-open intervals, i.e., $[...,...)$. + +## Benchmark + +The benchmark was conducted on a platform with `AMD R7 7840H + DDR5 5600MHz`. The result are as follows: +1. Only insert + | insert | 100 | 1000 | 10, 000 | 100, 000 | + | --------------- | --------- | --------- | --------- | --------- | + | Time per insert | 5.4168 µs | 80.518 µs | 2.2823 ms | 36.528 ms | +2. Insert N and remove N + | insert_and_remove | 100 | 1000 | 10, 000 | 100, 000 | + | ------------------ | --------- | --------- | --------- | --------- | + | Time per operation | 10.333 µs | 223.43 µs | 4.9358 ms | 81.634 ms | + +## TODO +- [] Support for $(...,...)$, $[...,...]$ and $(...,...]$ interval types. +- [] Add more tests like [etcd](https://github.com/etcd-io/etcd/blob/main/pkg/adt/interval_tree_test.go) +- [] Add Point type for Interval diff --git a/benches/bench.rs b/benches/bench.rs new file mode 100644 index 0000000..f7f7435 --- /dev/null +++ b/benches/bench.rs @@ -0,0 +1,113 @@ +use criterion::{criterion_group, criterion_main, Bencher, Criterion}; +use interval_map::{Interval, IntervalMap}; +use std::hint::black_box; + +struct Rng { + state: u32, +} +impl Rng { + fn new() -> Self { + Self { state: 0x87654321 } + } + + fn gen_u32(&mut self) -> u32 { + self.state ^= self.state << 13; + self.state ^= self.state >> 17; + self.state ^= self.state << 5; + self.state + } + + fn gen_range_i32(&mut self, low: i32, high: i32) -> i32 { + let d = (high - low) as u32; + low + (self.gen_u32() % d) as i32 + } +} + +struct IntervalGenerator { + rng: Rng, + limit: i32, +} +impl IntervalGenerator { + fn new() -> Self { + const LIMIT: i32 = 100000; + Self { + rng: Rng::new(), + limit: LIMIT, + } + } + + fn next(&mut self) -> Interval { + let low = self.rng.gen_range_i32(0, self.limit - 1); + let high = self.rng.gen_range_i32(low + 1, self.limit); + Interval::new(low, high) + } +} + +// insert helper fn +fn interval_map_insert(count: usize, bench: &mut Bencher) { + let mut gen = IntervalGenerator::new(); + let intervals: Vec<_> = std::iter::repeat_with(|| gen.next()).take(count).collect(); + bench.iter(|| { + let mut map = IntervalMap::new(); + for i in intervals.clone() { + black_box(map.insert(i, ())); + } + }); +} + +// insert and remove helper fn +fn interval_map_insert_remove(count: usize, bench: &mut Bencher) { + let mut gen = IntervalGenerator::new(); + let intervals: Vec<_> = std::iter::repeat_with(|| gen.next()).take(count).collect(); + bench.iter(|| { + let mut map = IntervalMap::new(); + for i in intervals.clone() { + black_box(map.insert(i, ())); + } + for i in &intervals { + black_box(map.remove(&i)); + } + }); +} + +fn bench_interval_map_insert(c: &mut Criterion) { + c.bench_function("bench_interval_map_insert_100", |b| { + interval_map_insert(100, b) + }); + c.bench_function("bench_interval_map_insert_1000", |b| { + interval_map_insert(1000, b) + }); + c.bench_function("bench_interval_map_insert_10,000", |b| { + interval_map_insert(10_000, b) + }); + c.bench_function("bench_interval_map_insert_100,000", |b| { + interval_map_insert(100_000, b) + }); +} + +fn bench_interval_map_insert_remove(c: &mut Criterion) { + c.bench_function("bench_interval_map_insert_remove_100", |b| { + interval_map_insert_remove(100, b) + }); + c.bench_function("bench_interval_map_insert_remove_1000", |b| { + interval_map_insert_remove(1000, b) + }); + c.bench_function("bench_interval_map_insert_remove_10,000", |b| { + interval_map_insert_remove(10_000, b) + }); + c.bench_function("bench_interval_map_insert_remove_100,000", |b| { + interval_map_insert_remove(100_000, b) + }); +} + +fn criterion_config() -> Criterion { + Criterion::default().configure_from_args().without_plots() +} + +criterion_group! { + name = benches; + config = criterion_config(); + targets = bench_interval_map_insert, bench_interval_map_insert_remove +} + +criterion_main!(benches); diff --git a/src/entry.rs b/src/entry.rs new file mode 100644 index 0000000..731a55a --- /dev/null +++ b/src/entry.rs @@ -0,0 +1,97 @@ +use crate::index::{IndexType, NodeIndex}; +use crate::interval::Interval; +use crate::intervalmap::IntervalMap; +use crate::node::Node; + +/// A view into a single entry in a map, which may either be vacant or occupied. +#[derive(Debug)] +pub enum Entry<'a, T, V, Ix> { + /// An occupied entry. + Occupied(OccupiedEntry<'a, T, V, Ix>), + /// A vacant entry. + Vacant(VacantEntry<'a, T, V, Ix>), +} + +/// A view into an occupied entry in a `IntervalMap`. +/// It is part of the [`Entry`] enum. +#[derive(Debug)] +pub struct OccupiedEntry<'a, T, V, Ix> { + /// Reference to the map + pub map_ref: &'a mut IntervalMap, + /// The entry node + pub node: NodeIndex, +} + +/// A view into a vacant entry in a `IntervalMap`. +/// It is part of the [`Entry`] enum. +#[derive(Debug)] +pub struct VacantEntry<'a, T, V, Ix> { + /// Mutable reference to the map + pub map_ref: &'a mut IntervalMap, + /// The interval of this entry + pub interval: Interval, +} + +impl<'a, T, V, Ix> Entry<'a, T, V, Ix> +where + T: Ord, + Ix: IndexType, +{ + /// Ensures a value is in the entry by inserting the default if empty, and returns + /// a mutable reference to the value in the entry. + /// + /// # Example + /// ```rust + /// use interval_map::{Interval, IntervalMap, Entry}; + /// + /// let mut map = IntervalMap::new(); + /// assert!(matches!(map.entry(Interval::new(1, 2)), Entry::Vacant(_))); + /// map.entry(Interval::new(1, 2)).or_insert(3); + /// assert!(matches!(map.entry(Interval::new(1, 2)), Entry::Occupied(_))); + /// assert_eq!(map.get(&Interval::new(1, 2)), Some(&3)); + /// ``` + #[inline] + pub fn or_insert(self, default: V) -> &'a mut V { + match self { + Entry::Occupied(entry) => entry.map_ref.node_mut(entry.node, Node::value_mut), + Entry::Vacant(entry) => { + let entry_idx = NodeIndex::new(entry.map_ref.nodes.len()); + let _ignore = entry.map_ref.insert(entry.interval, default); + entry.map_ref.node_mut(entry_idx, Node::value_mut) + } + } + } + + /// Provides in-place mutable access to an occupied entry before any + /// potential inserts into the map. + /// + /// # Panics + /// + /// This method panics when the node is a sentinel node + /// + /// # Example + /// ```rust + /// use interval_map::{Interval, IntervalMap, Entry}; + /// + /// let mut map = IntervalMap::new(); + /// + /// map.insert(Interval::new(6, 7), 3); + /// assert!(matches!(map.entry(Interval::new(6, 7)), Entry::Occupied(_))); + /// map.entry(Interval::new(6, 7)).and_modify(|v| *v += 1); + /// assert_eq!(map.get(&Interval::new(6, 7)), Some(&4)); + /// ``` + #[inline] + #[must_use] + pub fn and_modify(self, f: F) -> Self + where + F: FnOnce(&mut V), + { + match self { + Entry::Occupied(entry) => { + f(entry.map_ref.node_mut(entry.node, Node::value_mut)); + Self::Occupied(entry) + } + Entry::Vacant(entry) => Self::Vacant(entry), + } + } +} diff --git a/src/index.rs b/src/index.rs new file mode 100644 index 0000000..657f955 --- /dev/null +++ b/src/index.rs @@ -0,0 +1,64 @@ +use std::fmt; +use std::hash::Hash; + +pub type DefaultIx = u32; + +pub unsafe trait IndexType: Copy + Default + Hash + Ord + fmt::Debug + 'static { + fn new(x: usize) -> Self; + fn index(&self) -> usize; + fn max() -> Self; +} + +unsafe impl IndexType for u32 { + #[inline(always)] + fn new(x: usize) -> Self { + x as u32 + } + #[inline(always)] + fn index(&self) -> usize { + *self as usize + } + #[inline(always)] + fn max() -> Self { + ::std::u32::MAX + } +} + +/// Node identifier. +#[derive(Copy, Clone, Default, PartialEq, PartialOrd, Eq, Ord, Hash)] +pub struct NodeIndex(Ix); + +impl NodeIndex { + #[inline] + pub fn new(x: usize) -> Self { + NodeIndex(IndexType::new(x)) + } + + #[inline] + pub fn index(self) -> usize { + self.0.index() + } + + #[inline] + pub fn end() -> Self { + NodeIndex(IndexType::max()) + } +} + +unsafe impl IndexType for NodeIndex { + fn index(&self) -> usize { + self.0.index() + } + fn new(x: usize) -> Self { + NodeIndex::new(x) + } + fn max() -> Self { + NodeIndex(::max()) + } +} + +impl fmt::Debug for NodeIndex { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "NodeIndex({:?})", self.0) + } +} diff --git a/src/interval.rs b/src/interval.rs new file mode 100644 index 0000000..e7d78bc --- /dev/null +++ b/src/interval.rs @@ -0,0 +1,76 @@ +//! The `Interval` stored in `IntervalMap` and represents the interval [low, high) +//! +//! `interval_map` supports `insert`, `delete`, and `iter` fns. Traversal is performed in the order of `Interval` . For instance, with intervals of type `Interval`: +//! - [1,4)<[2,5), because 1<2 +//! - [1,4)<[1,5), because 4<5 +//! +//! So the order of intervals in `IntervalMap` is [1,4)<[1,5)<[2,5). +//! +//! Currently, `interval_map` only supports half-open intervals, i.e., [...,...). + +/// The interval stored in `IntervalMap` represents [low, high) +#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +#[non_exhaustive] +pub struct Interval { + /// Low value + pub low: T, + /// high value + pub high: T, +} + +impl Interval { + /// Create a new `Interval` + /// + /// # Panics + /// + /// This method panics when low >= high + #[inline] + pub fn new(low: T, high: T) -> Self { + assert!(low < high, "invalid range"); + Self { low, high } + } + + /// Checks if self overlaps with other interval + #[inline] + pub fn overlap(&self, other: &Self) -> bool { + self.high > other.low && other.high > self.low + } +} + +/// Reference type of `Interval` +#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct IntervalRef<'a, T> { + /// Low value + low: &'a T, + /// high value + high: &'a T, +} + +impl<'a, T: Ord> IntervalRef<'a, T> { + /// Create a new `IntervalRef` + /// + /// # Panics + /// + /// This method panics when low >= high + #[inline] + pub fn new(low: &'a T, high: &'a T) -> Self { + assert!(low < high, "invalid range"); + Self { low, high } + } + + /// Check if self overlaps with a `Interval` + pub fn overlap(&self, other: &Interval) -> bool { + self.high > &other.low && &other.high > self.low + } +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + #[should_panic(expected = "invalid range")] + fn invalid_range_should_panic() { + let _interval = Interval::new(3, 1); + } +} diff --git a/src/intervalmap.rs b/src/intervalmap.rs new file mode 100644 index 0000000..51dcb24 --- /dev/null +++ b/src/intervalmap.rs @@ -0,0 +1,1141 @@ +use crate::entry::{Entry, OccupiedEntry, VacantEntry}; +use crate::index::{DefaultIx, IndexType, NodeIndex}; +use crate::interval::{Interval, IntervalRef}; +use crate::node::{Color, Node}; +use std::collections::VecDeque; + +/// An interval-value map, which support operations on dynamic sets of intervals. +#[derive(Debug)] +pub struct IntervalMap { + /// Vector that stores nodes + pub(crate) nodes: Vec>, + /// Root of the interval tree + pub(crate) root: NodeIndex, + /// Number of elements in the map + pub(crate) len: usize, +} + +impl IntervalMap +where + T: Ord, + Ix: IndexType, +{ + /// Creates a new `IntervalMap` with estimated capacity. + #[inline] + #[must_use] + pub fn with_capacity(capacity: usize) -> Self { + let mut nodes = vec![Self::new_sentinel()]; + nodes.reserve(capacity); + IntervalMap { + nodes, + root: Self::sentinel(), + len: 0, + } + } + + /// Insert an interval-value pair into the map. + /// If the interval exists, overwrite and return the previous value. + /// + /// # Panics + /// + /// This method panics when the tree is at the maximum number of nodes for its index + /// + /// # Example + /// ```rust + /// use interval_map::{Interval, IntervalMap}; + /// + /// let mut map = IntervalMap::new(); + /// assert_eq!(map.insert(Interval::new(1, 3), 1), None); + /// assert_eq!(map.insert(Interval::new(1, 3), 2), Some(1)); + /// assert_eq!(map.insert(Interval::new(1, 3), 3), Some(2)); + /// ``` + #[inline] + pub fn insert(&mut self, interval: Interval, value: V) -> Option { + let node_idx = NodeIndex::new(self.nodes.len()); + let node = Self::new_node(interval, value, node_idx); + // check for max capacity, except if we use usize + assert!( + ::max().index() == !0 || NodeIndex::end() != node_idx, + "Reached maximum number of nodes" + ); + self.nodes.push(node); + self.insert_inner(node_idx) + } + + /// Remove an interval from the map, returning the value at the interval if the interval exists + /// + /// # Example + /// ```rust + /// use interval_map::{Interval, IntervalMap}; + /// + /// let mut map = IntervalMap::new(); + /// map.insert(Interval::new(1, 3), 1); + /// map.insert(Interval::new(2, 4), 2); + /// assert_eq!(map.len(), 2); + /// assert_eq!(map.remove(&Interval::new(3, 6)), None); + /// assert_eq!(map.len(), 2); + /// assert_eq!(map.remove(&Interval::new(2, 4)), Some(2)); + /// assert_eq!(map.len(), 1); + /// ``` + #[inline] + pub fn remove(&mut self, interval: &Interval) -> Option { + if let Some(node_idx) = self.search_exact(interval) { + self.remove_inner(node_idx); + // Swap the node with the last node stored in the vector and update indices + let mut node = self.nodes.swap_remove(node_idx.index()); + let old = NodeIndex::::new(self.nodes.len()); + self.update_idx(old, node_idx); + + return node.value.take(); + } + None + } + + /// Check if an interval in the map overlaps with the given interval. + /// + /// # Example + /// ```rust + /// use interval_map::{Interval, IntervalMap}; + /// + /// let mut map = IntervalMap::new(); + /// map.insert(Interval::new(1, 3), ()); + /// map.insert(Interval::new(6, 7), ()); + /// map.insert(Interval::new(9, 11), ()); + /// assert!(map.overlap(&Interval::new(2, 5))); + /// assert!(map.overlap(&Interval::new(1, 17))); + /// assert!(!map.overlap(&Interval::new(3, 6))); + /// assert!(!map.overlap(&Interval::new(11, 23))); + /// ``` + #[inline] + pub fn overlap(&self, interval: &Interval) -> bool { + let node_idx = self.search(interval); + !self.node_ref(node_idx, Node::is_sentinel) + } + + /// Find all intervals in the map that overlaps with the given interval. + /// + /// # Example + /// ```rust + /// use interval_map::{Interval, IntervalMap}; + /// + /// let mut map = IntervalMap::new(); + /// map.insert(Interval::new(1, 3), ()); + /// map.insert(Interval::new(2, 4), ()); + /// map.insert(Interval::new(6, 7), ()); + /// map.insert(Interval::new(7, 11), ()); + /// assert_eq!(map.find_all_overlap(&Interval::new(2, 7)).len(), 3); + /// map.remove(&Interval::new(1, 3)); + /// assert_eq!(map.find_all_overlap(&Interval::new(2, 7)).len(), 2); + /// ``` + #[inline] + pub fn find_all_overlap(&self, interval: &Interval) -> Vec<(&Interval, &V)> { + if self.node_ref(self.root, Node::is_sentinel) { + Vec::new() + } else { + self.find_all_overlap_inner_unordered(self.root, interval) + } + } + + /// Return reference to the value corresponding to the key. + /// + /// # Example + /// ```rust + /// use interval_map::{Interval, IntervalMap}; + /// + /// let mut map = IntervalMap::new(); + /// map.insert(Interval::new(1, 3), 1); + /// map.insert(Interval::new(7, 11), 4); + /// assert_eq!(map.get(&Interval::new(1, 3)), Some(&1)); + /// assert_eq!(map.get(&Interval::new(7, 11)), Some(&4)); + /// assert_eq!(map.get(&Interval::new(5, 17)), None); + /// ``` + #[inline] + pub fn get(&self, interval: &Interval) -> Option<&V> { + self.search_exact(interval) + .map(|idx| self.node_ref(idx, Node::value)) + } + + /// Return a reference to the value corresponding to the key. + /// + /// # Example + /// ```rust + /// use interval_map::{Interval, IntervalMap}; + /// + /// let mut map = IntervalMap::new(); + /// map.insert(Interval::new(3, 5), 0); + /// map.get_mut(&Interval::new(3, 5)).map(|v| *v += 1); + /// assert_eq!(map.get(&Interval::new(3, 5)), Some(&1)); + /// ``` + #[inline] + pub fn get_mut(&mut self, interval: &Interval) -> Option<&mut V> { + self.search_exact(interval) + .map(|idx| self.node_mut(idx, Node::value_mut)) + } + + /// Get an iterator over the entries of the map, sorted by key. + #[inline] + #[must_use] + pub fn iter(&self) -> Iter<'_, T, V, Ix> { + Iter { + map_ref: self, + stack: None, + } + } + + /// Get the given key's corresponding entry in the map for in-place manipulation. + /// + /// # Example + /// ```rust + /// use interval_map::{Interval, IntervalMap, Entry}; + /// + /// let mut map = IntervalMap::new(); + /// + /// assert!(matches!(map.entry(Interval::new(1, 2)), Entry::Vacant(_))); + /// map.entry(Interval::new(1, 2)).or_insert(0); + /// assert!(matches!(map.entry(Interval::new(1, 2)), Entry::Occupied(_))); + /// map.entry(Interval::new(1, 2)).and_modify(|v| *v += 1); + /// assert_eq!(map.get(&Interval::new(1, 2)), Some(&1)); + /// ``` + #[inline] + pub fn entry(&mut self, interval: Interval) -> Entry<'_, T, V, Ix> { + match self.search_exact(&interval) { + Some(node) => Entry::Occupied(OccupiedEntry { + map_ref: self, + node, + }), + None => Entry::Vacant(VacantEntry { + map_ref: self, + interval, + }), + } + } + + /// Remove all elements from the map + #[inline] + pub fn clear(&mut self) { + self.nodes.clear(); + self.nodes.push(Self::new_sentinel()); + self.root = Self::sentinel(); + self.len = 0; + } + + /// Return the number of elements in the map. + #[inline] + #[must_use] + pub fn len(&self) -> usize { + self.len + } + + /// Return `true` if the map contains no elements. + #[inline] + #[must_use] + pub fn is_empty(&self) -> bool { + self.len() == 0 + } +} + +impl IntervalMap +where + T: Ord, +{ + /// Create an empty `IntervalMap` + #[inline] + #[must_use] + pub fn new() -> Self { + Self { + nodes: vec![Self::new_sentinel()], + root: Self::sentinel(), + len: 0, + } + } +} + +impl Default for IntervalMap +where + T: Ord, +{ + #[inline] + fn default() -> Self { + Self::with_capacity(0) + } +} + +impl IntervalMap +where + T: Ord, + Ix: IndexType, +{ + /// Create a new sentinel node + fn new_sentinel() -> Node { + Node { + interval: None, + value: None, + max_index: None, + left: None, + right: None, + parent: None, + color: Color::Black, + } + } + + /// Create a new tree node + fn new_node(interval: Interval, value: V, index: NodeIndex) -> Node { + Node { + max_index: Some(index), + interval: Some(interval), + value: Some(value), + left: Some(Self::sentinel()), + right: Some(Self::sentinel()), + parent: Some(Self::sentinel()), + color: Color::Red, + } + } + + /// Get the sentinel node index + fn sentinel() -> NodeIndex { + NodeIndex::new(0) + } +} + +impl IntervalMap +where + T: Ord, + Ix: IndexType, +{ + /// Insert a node into the tree. + fn insert_inner(&mut self, z: NodeIndex) -> Option { + let mut y = Self::sentinel(); + let mut x = self.root; + + while !self.node_ref(x, Node::is_sentinel) { + y = x; + if self.node_ref(z, Node::interval) == self.node_ref(y, Node::interval) { + let zval = self.node_mut(z, Node::take_value); + let old_value = self.node_mut(y, Node::set_value(zval)); + return Some(old_value); + } + if self.node_ref(z, Node::interval) < self.node_ref(x, Node::interval) { + x = self.node_ref(x, Node::left); + } else { + x = self.node_ref(x, Node::right); + } + } + self.node_mut(z, Node::set_parent(y)); + if self.node_ref(y, Node::is_sentinel) { + self.root = z; + } else { + if self.node_ref(z, Node::interval) < self.node_ref(y, Node::interval) { + self.node_mut(y, Node::set_left(z)); + } else { + self.node_mut(y, Node::set_right(z)); + } + self.update_max_bottom_up(y); + } + self.node_mut(z, Node::set_color(Color::Red)); + + self.insert_fixup(z); + + self.len = self.len.wrapping_add(1); + None + } + + /// Remove a node from the tree. + fn remove_inner(&mut self, z: NodeIndex) { + let mut y = z; + let mut y_orig_color = self.node_ref(y, Node::color); + let x; + if self.left_ref(z, Node::is_sentinel) { + x = self.node_ref(z, Node::right); + self.transplant(z, x); + self.update_max_bottom_up(self.node_ref(z, Node::parent)); + } else if self.right_ref(z, Node::is_sentinel) { + x = self.node_ref(z, Node::left); + self.transplant(z, x); + self.update_max_bottom_up(self.node_ref(z, Node::parent)); + } else { + y = self.tree_minimum(self.node_ref(z, Node::right)); + let mut p = y; + y_orig_color = self.node_ref(y, Node::color); + x = self.node_ref(y, Node::right); + if self.node_ref(y, Node::parent) == z { + self.node_mut(x, Node::set_parent(y)); + } else { + self.transplant(y, x); + p = self.node_ref(y, Node::parent); + self.node_mut(y, Node::set_right(self.node_ref(z, Node::right))); + self.right_mut(y, Node::set_parent(y)); + } + self.transplant(z, y); + self.node_mut(y, Node::set_left(self.node_ref(z, Node::left))); + self.left_mut(y, Node::set_parent(y)); + self.node_mut(y, Node::set_color(self.node_ref(z, Node::color))); + + self.update_max_bottom_up(p); + } + + if matches!(y_orig_color, Color::Black) { + self.remove_fixup(x); + } + + self.len = self.len.wrapping_sub(1); + } + + /// Find all intervals in the map that overlaps with the given interval. + #[cfg(interval_tree_find_overlap_ordered)] + fn find_all_overlap_inner( + &self, + x: NodeIndex, + interval: &Interval, + ) -> Vec<(&Interval, &V)> { + let mut list = vec![]; + if self.node_ref(x, Node::interval).overlap(interval) { + list.push(self.node_ref(x, |nx| (nx.interval(), nx.value()))); + } + if self.max(self.node_ref(x, Node::left)) >= Some(&interval.low) { + list.extend(self.find_all_overlap_inner(self.node_ref(x, Node::left), interval)); + } + if self + .max(self.node_ref(x, Node::right)) + .map(|rmax| IntervalRef::new(&self.node_ref(x, Node::interval).low, rmax)) + .is_some_and(|i| i.overlap(interval)) + { + list.extend(self.find_all_overlap_inner(self.node_ref(x, Node::right), interval)); + } + list + } + + /// Find all intervals in the map that overlaps with the given interval. + /// + /// The result is unordered because of breadth-first search to save stack size + fn find_all_overlap_inner_unordered( + &self, + x: NodeIndex, + interval: &Interval, + ) -> Vec<(&Interval, &V)> { + let mut list = Vec::new(); + let mut queue = VecDeque::new(); + queue.push_back(x); + while let Some(p) = queue.pop_front() { + if self.node_ref(p, Node::interval).overlap(interval) { + list.push(self.node_ref(p, |np| (np.interval(), np.value()))); + } + let p_left = self.node_ref(p, Node::left); + let p_right = self.node_ref(p, Node::right); + if self.max(p_left) >= Some(&interval.low) { + queue.push_back(p_left); + } + if self + .max(self.node_ref(p, Node::right)) + .map(|rmax| IntervalRef::new(&self.node_ref(p, Node::interval).low, rmax)) + .is_some_and(|i| i.overlap(interval)) + { + queue.push_back(p_right); + } + } + + list + } + + /// Search for an interval that overlaps with the given interval. + fn search(&self, interval: &Interval) -> NodeIndex { + let mut x = self.root; + while self + .node_ref(x, Node::sentinel) + .map(Node::interval) + .is_some_and(|xi| !xi.overlap(interval)) + { + if self.max(self.node_ref(x, Node::left)) > Some(&interval.low) { + x = self.node_ref(x, Node::left); + } else { + x = self.node_ref(x, Node::right); + } + } + x + } + + /// Search for the node with exact the given interval + fn search_exact(&self, interval: &Interval) -> Option> { + let mut x = self.root; + while !self.node_ref(x, Node::is_sentinel) { + if self.node_ref(x, Node::interval) == interval { + return Some(x); + } + if self.max(x) < Some(&interval.high) { + return None; + } + if self.node_ref(x, Node::interval) > interval { + x = self.node_ref(x, Node::left); + } else { + x = self.node_ref(x, Node::right); + } + } + None + } + + /// Restore red-black tree properties after an insert. + fn insert_fixup(&mut self, mut z: NodeIndex) { + while self.parent_ref(z, Node::is_red) { + if self.grand_parent_ref(z, Node::is_sentinel) { + break; + } + if self.is_left_child(self.node_ref(z, Node::parent)) { + let y = self.grand_parent_ref(z, Node::right); + if self.node_ref(y, Node::is_red) { + self.parent_mut(z, Node::set_color(Color::Black)); + self.node_mut(y, Node::set_color(Color::Black)); + self.grand_parent_mut(z, Node::set_color(Color::Red)); + z = self.parent_ref(z, Node::parent); + } else { + if self.is_right_child(z) { + z = self.node_ref(z, Node::parent); + self.left_rotate(z); + } + self.parent_mut(z, Node::set_color(Color::Black)); + self.grand_parent_mut(z, Node::set_color(Color::Red)); + self.right_rotate(self.parent_ref(z, Node::parent)); + } + } else { + let y = self.grand_parent_ref(z, Node::left); + if self.node_ref(y, Node::is_red) { + self.parent_mut(z, Node::set_color(Color::Black)); + self.node_mut(y, Node::set_color(Color::Black)); + self.grand_parent_mut(z, Node::set_color(Color::Red)); + z = self.parent_ref(z, Node::parent); + } else { + if self.is_left_child(z) { + z = self.node_ref(z, Node::parent); + self.right_rotate(z); + } + self.parent_mut(z, Node::set_color(Color::Black)); + self.grand_parent_mut(z, Node::set_color(Color::Red)); + self.left_rotate(self.parent_ref(z, Node::parent)); + } + } + } + self.node_mut(self.root, Node::set_color(Color::Black)); + } + + /// Restore red-black tree properties after a remove. + fn remove_fixup(&mut self, mut x: NodeIndex) { + while x != self.root && self.node_ref(x, Node::is_black) { + let mut w; + if self.is_left_child(x) { + w = self.parent_ref(x, Node::right); + if self.node_ref(w, Node::is_red) { + self.node_mut(w, Node::set_color(Color::Black)); + self.parent_mut(x, Node::set_color(Color::Red)); + self.left_rotate(self.node_ref(x, Node::parent)); + w = self.parent_ref(x, Node::right); + } + if self.node_ref(w, Node::is_sentinel) { + break; + } + if self.left_ref(w, Node::is_black) && self.right_ref(w, Node::is_black) { + self.node_mut(w, Node::set_color(Color::Red)); + x = self.node_ref(x, Node::parent); + } else { + if self.right_ref(w, Node::is_black) { + self.left_mut(w, Node::set_color(Color::Black)); + self.node_mut(w, Node::set_color(Color::Red)); + self.right_rotate(w); + w = self.parent_ref(x, Node::right); + } + self.node_mut(w, Node::set_color(self.parent_ref(x, Node::color))); + self.parent_mut(x, Node::set_color(Color::Black)); + self.right_mut(w, Node::set_color(Color::Black)); + self.left_rotate(self.node_ref(x, Node::parent)); + x = self.root; + } + } else { + w = self.parent_ref(x, Node::left); + if self.node_ref(w, Node::is_red) { + self.node_mut(w, Node::set_color(Color::Black)); + self.parent_mut(x, Node::set_color(Color::Red)); + self.right_rotate(self.node_ref(x, Node::parent)); + w = self.parent_ref(x, Node::left); + } + if self.node_ref(w, Node::is_sentinel) { + break; + } + if self.right_ref(w, Node::is_black) && self.left_ref(w, Node::is_black) { + self.node_mut(w, Node::set_color(Color::Red)); + x = self.node_ref(x, Node::parent); + } else { + if self.left_ref(w, Node::is_black) { + self.right_mut(w, Node::set_color(Color::Black)); + self.node_mut(w, Node::set_color(Color::Red)); + self.left_rotate(w); + w = self.parent_ref(x, Node::left); + } + self.node_mut(w, Node::set_color(self.parent_ref(x, Node::color))); + self.parent_mut(x, Node::set_color(Color::Black)); + self.left_mut(w, Node::set_color(Color::Black)); + self.right_rotate(self.node_ref(x, Node::parent)); + x = self.root; + } + } + } + self.node_mut(x, Node::set_color(Color::Black)); + } + + /// Binary tree left rotate. + fn left_rotate(&mut self, x: NodeIndex) { + if self.right_ref(x, Node::is_sentinel) { + return; + } + let y = self.node_ref(x, Node::right); + self.node_mut(x, Node::set_right(self.node_ref(y, Node::left))); + if !self.left_ref(y, Node::is_sentinel) { + self.left_mut(y, Node::set_parent(x)); + } + + self.replace_parent(x, y); + self.node_mut(y, Node::set_left(x)); + + self.rotate_update_max(x, y); + } + + /// Binary tree right rotate. + fn right_rotate(&mut self, x: NodeIndex) { + if self.left_ref(x, Node::is_sentinel) { + return; + } + let y = self.node_ref(x, Node::left); + self.node_mut(x, Node::set_left(self.node_ref(y, Node::right))); + if !self.right_ref(y, Node::is_sentinel) { + self.right_mut(y, Node::set_parent(x)); + } + + self.replace_parent(x, y); + self.node_mut(y, Node::set_right(x)); + + self.rotate_update_max(x, y); + } + + /// Replace parent during a rotation. + fn replace_parent(&mut self, x: NodeIndex, y: NodeIndex) { + self.node_mut(y, Node::set_parent(self.node_ref(x, Node::parent))); + if self.parent_ref(x, Node::is_sentinel) { + self.root = y; + } else if self.is_left_child(x) { + self.parent_mut(x, Node::set_left(y)); + } else { + self.parent_mut(x, Node::set_right(y)); + } + self.node_mut(x, Node::set_parent(y)); + } + + /// Update the max value after a rotation. + fn rotate_update_max(&mut self, x: NodeIndex, y: NodeIndex) { + self.node_mut(y, Node::set_max_index(self.node_ref(x, Node::max_index))); + self.recaculate_max(x); + } + + /// Update the max value towards the root + fn update_max_bottom_up(&mut self, x: NodeIndex) { + let mut p = x; + while !self.node_ref(p, Node::is_sentinel) { + self.recaculate_max(p); + p = self.node_ref(p, Node::parent); + } + } + + /// Recaculate max value from left and right childrens + fn recaculate_max(&mut self, x: NodeIndex) { + self.node_mut(x, Node::set_max_index(x)); + let x_left = self.node_ref(x, Node::left); + let x_right = self.node_ref(x, Node::right); + if self.max(x_left) > self.max(x) { + self.node_mut( + x, + Node::set_max_index(self.node_ref(x_left, Node::max_index)), + ); + } + if self.max(x_right) > self.max(x) { + self.node_mut( + x, + Node::set_max_index(self.node_ref(x_right, Node::max_index)), + ); + } + } + + /// Find the node with the minimum interval. + fn tree_minimum(&self, mut x: NodeIndex) -> NodeIndex { + while !self.left_ref(x, Node::is_sentinel) { + x = self.node_ref(x, Node::left); + } + x + } + + /// Replace one subtree as a child of its parent with another subtree. + fn transplant(&mut self, u: NodeIndex, v: NodeIndex) { + if self.parent_ref(u, Node::is_sentinel) { + self.root = v; + } else if self.is_left_child(u) { + self.parent_mut(u, Node::set_left(v)); + } else { + self.parent_mut(u, Node::set_right(v)); + } + self.node_mut(v, Node::set_parent(self.node_ref(u, Node::parent))); + } + + /// Check if a node is a left child of its parent. + fn is_left_child(&self, node: NodeIndex) -> bool { + self.parent_ref(node, Node::left) == node + } + + /// Check if a node is a right child of its parent. + fn is_right_child(&self, node: NodeIndex) -> bool { + self.parent_ref(node, Node::right) == node + } + + /// Update nodes indices after remove + /// + /// This method has a time complexity of `O(logn)`, as we need to + /// update the max index from bottom to top. + fn update_idx(&mut self, old: NodeIndex, new: NodeIndex) { + if self.root == old { + self.root = new; + } + if self.nodes.get(new.index()).is_some() { + if !self.parent_ref(new, Node::is_sentinel) { + if self.parent_ref(new, Node::left) == old { + self.parent_mut(new, Node::set_left(new)); + } else { + self.parent_mut(new, Node::set_right(new)); + } + } + self.left_mut(new, Node::set_parent(new)); + self.right_mut(new, Node::set_parent(new)); + + let mut p = new; + while !self.node_ref(p, Node::is_sentinel) { + if self.node_ref(p, Node::max_index) == old { + self.node_mut(p, Node::set_max_index(new)); + } + p = self.node_ref(p, Node::parent); + } + } + } +} + +// Convenient methods for reference or mutate current/parent/left/right node +impl<'a, T, V, Ix> IntervalMap +where + Ix: IndexType, +{ + fn node_ref(&'a self, node: NodeIndex, op: F) -> R + where + R: 'a, + F: FnOnce(&'a Node) -> R, + { + op(&self.nodes[node.index()]) + } + + pub(crate) fn node_mut(&'a mut self, node: NodeIndex, op: F) -> R + where + R: 'a, + F: FnOnce(&'a mut Node) -> R, + { + op(&mut self.nodes[node.index()]) + } + + fn left_ref(&'a self, node: NodeIndex, op: F) -> R + where + R: 'a, + F: FnOnce(&'a Node) -> R, + { + let idx = self.nodes[node.index()].left().index(); + op(&self.nodes[idx]) + } + + fn right_ref(&'a self, node: NodeIndex, op: F) -> R + where + R: 'a, + F: FnOnce(&'a Node) -> R, + { + let idx = self.nodes[node.index()].right().index(); + op(&self.nodes[idx]) + } + + fn parent_ref(&'a self, node: NodeIndex, op: F) -> R + where + R: 'a, + F: FnOnce(&'a Node) -> R, + { + let idx = self.nodes[node.index()].parent().index(); + op(&self.nodes[idx]) + } + + fn grand_parent_ref(&'a self, node: NodeIndex, op: F) -> R + where + R: 'a, + F: FnOnce(&'a Node) -> R, + { + let parent_idx = self.nodes[node.index()].parent().index(); + let grand_parent_idx = self.nodes[parent_idx].parent().index(); + op(&self.nodes[grand_parent_idx]) + } + + fn left_mut(&'a mut self, node: NodeIndex, op: F) -> R + where + R: 'a, + F: FnOnce(&'a mut Node) -> R, + { + let idx = self.nodes[node.index()].left().index(); + op(&mut self.nodes[idx]) + } + + fn right_mut(&'a mut self, node: NodeIndex, op: F) -> R + where + R: 'a, + F: FnOnce(&'a mut Node) -> R, + { + let idx = self.nodes[node.index()].right().index(); + op(&mut self.nodes[idx]) + } + + fn parent_mut(&'a mut self, node: NodeIndex, op: F) -> R + where + R: 'a, + F: FnOnce(&'a mut Node) -> R, + { + let idx = self.nodes[node.index()].parent().index(); + op(&mut self.nodes[idx]) + } + + fn grand_parent_mut(&'a mut self, node: NodeIndex, op: F) -> R + where + R: 'a, + F: FnOnce(&'a mut Node) -> R, + { + let parent_idx = self.nodes[node.index()].parent().index(); + let grand_parent_idx = self.nodes[parent_idx].parent().index(); + op(&mut self.nodes[grand_parent_idx]) + } + + fn max(&self, node: NodeIndex) -> Option<&T> { + let max_index = self.nodes[node.index()].max_index?.index(); + self.nodes[max_index].interval.as_ref().map(|i| &i.high) + } +} + +/// An iterator over the entries of a `IntervalMap`. +#[derive(Debug)] +pub struct Iter<'a, T, V, Ix> { + /// Reference to the map + map_ref: &'a IntervalMap, + /// Stack for iteration + stack: Option>>, +} + +impl Iter<'_, T, V, Ix> +where + Ix: IndexType, +{ + /// Initializes the stack + fn init_stack(&mut self) { + self.stack = Some(Self::left_link(self.map_ref, self.map_ref.root)); + } + + /// Pushes a link of nodes on the left to stack. + fn left_link(map_ref: &IntervalMap, mut x: NodeIndex) -> Vec> { + let mut nodes = vec![]; + while !map_ref.node_ref(x, Node::is_sentinel) { + nodes.push(x); + x = map_ref.node_ref(x, Node::left); + } + nodes + } +} + +impl<'a, T, V, Ix> Iterator for Iter<'a, T, V, Ix> +where + Ix: IndexType, +{ + type Item = (&'a Interval, &'a V); + + #[inline] + fn next(&mut self) -> Option { + if self.stack.is_none() { + self.init_stack(); + } + let stack = self.stack.as_mut().unwrap(); + if stack.is_empty() { + return None; + } + let x = stack.pop().unwrap(); + stack.extend(Self::left_link( + self.map_ref, + self.map_ref.node_ref(x, Node::right), + )); + Some(self.map_ref.node_ref(x, |xn| (xn.interval(), xn.value()))) + } +} + +#[cfg(test)] +mod test { + use std::collections::HashSet; + + use rand::{rngs::StdRng, Rng, SeedableRng}; + + use super::*; + + struct IntervalGenerator { + rng: StdRng, + unique: HashSet>, + limit: i32, + } + + impl IntervalGenerator { + fn new(seed: [u8; 32]) -> Self { + const LIMIT: i32 = 1000; + Self { + rng: SeedableRng::from_seed(seed), + unique: HashSet::new(), + limit: LIMIT, + } + } + + fn next(&mut self) -> Interval { + let low = self.rng.gen_range(0..self.limit - 1); + let high = self.rng.gen_range((low + 1)..self.limit); + Interval::new(low, high) + } + + fn next_unique(&mut self) -> Interval { + let mut interval = self.next(); + while self.unique.contains(&interval) { + interval = self.next(); + } + self.unique.insert(interval.clone()); + interval + } + + fn next_with_range(&mut self, range: i32) -> Interval { + let low = self.rng.gen_range(0..self.limit - 1); + let high = self + .rng + .gen_range((low + 1)..self.limit.min(low + 1 + range)); + Interval::new(low, high) + } + } + + impl IntervalMap { + fn check_max(&self) { + let _ignore = self.check_max_inner(self.root); + } + + fn check_max_inner(&self, x: NodeIndex) -> i32 { + if self.node_ref(x, Node::is_sentinel) { + return 0; + } + let l_max = self.check_max_inner(self.node_ref(x, Node::left)); + let r_max = self.check_max_inner(self.node_ref(x, Node::right)); + let max = self.node_ref(x, |x| x.interval().high.max(l_max).max(r_max)); + assert_eq!(self.max(x), Some(&max)); + max + } + + /// 1. Every node is either red or black. + /// 2. The root is black. + /// 3. Every leaf (NIL) is black. + /// 4. If a node is red, then both its children are black. + /// 5. For each node, all simple paths from the node to descendant leaves contain the + /// same number of black nodes. + fn check_rb_properties(&self) { + assert!(matches!( + self.node_ref(self.root, Node::color), + Color::Black + )); + self.check_children_color(self.root); + self.check_black_height(self.root); + } + + fn check_children_color(&self, x: NodeIndex) { + if self.node_ref(x, Node::is_sentinel) { + return; + } + self.check_children_color(self.node_ref(x, Node::left)); + self.check_children_color(self.node_ref(x, Node::right)); + if self.node_ref(x, Node::is_red) { + assert!(matches!(self.left_ref(x, Node::color), Color::Black)); + assert!(matches!(self.right_ref(x, Node::color), Color::Black)); + } + } + + fn check_black_height(&self, x: NodeIndex) -> usize { + if self.node_ref(x, Node::is_sentinel) { + return 0; + } + let lefth = self.check_black_height(self.node_ref(x, Node::left)); + let righth = self.check_black_height(self.node_ref(x, Node::right)); + assert_eq!(lefth, righth); + if self.node_ref(x, Node::is_black) { + return lefth + 1; + } + lefth + } + } + + fn with_map_and_generator(test_fn: impl Fn(IntervalMap, IntervalGenerator)) { + let seeds = vec![[0; 32], [1; 32], [2; 32]]; + for seed in seeds { + let gen = IntervalGenerator::new(seed); + let map = IntervalMap::new(); + test_fn(map, gen); + } + } + + #[test] + fn red_black_tree_properties_is_satisfied() { + with_map_and_generator(|mut map, mut gen| { + let intervals: Vec<_> = std::iter::repeat_with(|| gen.next_unique()) + .take(1000) + .collect(); + for i in intervals.clone() { + let _ignore = map.insert(i, ()); + } + map.check_rb_properties(); + }); + } + + #[test] + fn map_len_will_update() { + with_map_and_generator(|mut map, mut gen| { + let intervals: Vec<_> = std::iter::repeat_with(|| gen.next_unique()) + .take(100) + .collect(); + for i in intervals.clone() { + let _ignore = map.insert(i, ()); + } + assert_eq!(map.len(), 100); + for i in intervals { + let _ignore = map.remove(&i); + } + assert_eq!(map.len(), 0); + }); + } + + #[test] + fn check_overlap_is_ok() { + with_map_and_generator(|mut map, mut gen| { + let intervals: Vec<_> = std::iter::repeat_with(|| gen.next_with_range(10)) + .take(100) + .collect(); + for i in intervals.clone() { + let _ignore = map.insert(i, ()); + } + let to_check: Vec<_> = std::iter::repeat_with(|| gen.next_with_range(10)) + .take(1000) + .collect(); + let expects: Vec<_> = to_check + .iter() + .map(|ci| intervals.iter().any(|i| ci.overlap(i))) + .collect(); + + for (ci, expect) in to_check.into_iter().zip(expects.into_iter()) { + assert_eq!(map.overlap(&ci), expect); + } + }); + } + + #[test] + fn check_max_is_ok() { + with_map_and_generator(|mut map, mut gen| { + let intervals: Vec<_> = std::iter::repeat_with(|| gen.next_unique()) + .take(1000) + .collect(); + for i in intervals.clone() { + let _ignore = map.insert(i, ()); + map.check_max(); + } + assert_eq!(map.len(), 1000); + for i in intervals { + let _ignore = map.remove(&i); + map.check_max(); + } + }); + } + + #[test] + fn remove_non_exist_interval_will_do_nothing() { + with_map_and_generator(|mut map, mut gen| { + let intervals: Vec<_> = std::iter::repeat_with(|| gen.next_unique()) + .take(1000) + .collect(); + for i in intervals { + let _ignore = map.insert(i, ()); + } + assert_eq!(map.len(), 1000); + let to_remove: Vec<_> = std::iter::repeat_with(|| gen.next_unique()) + .take(1000) + .collect(); + for i in to_remove { + let _ignore = map.remove(&i); + } + assert_eq!(map.len(), 1000); + }); + } + + #[test] + fn find_all_overlap_is_ok() { + with_map_and_generator(|mut map, mut gen| { + let intervals: Vec<_> = std::iter::repeat_with(|| gen.next_unique()) + .take(1000) + .collect(); + for i in intervals.clone() { + let _ignore = map.insert(i, ()); + } + let to_find: Vec<_> = std::iter::repeat_with(|| gen.next()).take(1000).collect(); + + let expects: Vec> = to_find + .iter() + .map(|ti| intervals.iter().filter(|i| ti.overlap(i)).collect()) + .collect(); + + for (ti, mut expect) in to_find.into_iter().zip(expects.into_iter()) { + let mut result = map.find_all_overlap(&ti); + expect.sort_unstable(); + result.sort_unstable(); + assert_eq!(expect.len(), result.len()); + for (e, r) in expect.into_iter().zip(result.into_iter()) { + assert_eq!(e, r.0); + } + } + }); + } + + #[test] + fn iterate_through_map_is_sorted() { + with_map_and_generator(|mut map, mut gen| { + let mut intervals: Vec<_> = std::iter::repeat_with(|| gen.next_unique()) + .enumerate() + .take(1000) + .collect(); + for (v, i) in intervals.clone() { + let _ignore = map.insert(i, v); + } + intervals.sort_unstable_by(|a, b| a.1.cmp(&b.1)); + + for ((ei, ev), (v, i)) in map.iter().zip(intervals.iter()) { + assert_eq!(ei, i); + assert_eq!(ev, v); + } + }); + } + + #[test] + fn interval_map_clear_is_ok() { + let mut map = IntervalMap::new(); + map.insert(Interval::new(1, 3), 1); + map.insert(Interval::new(2, 4), 2); + map.insert(Interval::new(6, 7), 3); + assert_eq!(map.len(), 3); + map.clear(); + assert_eq!(map.len(), 0); + assert!(map.is_empty()); + assert_eq!(map.nodes.len(), 1); + assert!(map.nodes[0].is_sentinel()); + } +} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..13e5141 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,32 @@ +//! `interval_map` is a thread-safe map based on interval tree. +//! +//! It fully implements the insertion and deletion functionality of a red-black tree, +//! ensuring that each modification operation requires at most O(logN) time complexity. +//! +//! To safely and efficiently handle insertion and deletion operations in Rust, +//! `interval_map` innovatively uses arrays to simulate pointers for managing the parent-child +//! references in the red-black tree. This approach also ensures that interval_map has the +//! `Send` and `Unpin` traits, allowing it to be safely transferred between threads and +//! to maintain a fixed memory location during asynchronous operations. +//! +//! # Example +//! +//! ```rust +//! use interval_map::{Interval, IntervalMap}; +//! +//! let mut map = IntervalMap::new(); +//! let int = Interval::new(1, 2); +//! map.insert(int.clone(), 123456); +//! assert_eq!(map.get(&int), Some(&123456)); +//! ``` +//! + +mod entry; +mod index; +mod interval; +mod intervalmap; +mod node; + +pub use entry::{Entry, OccupiedEntry, VacantEntry}; +pub use interval::Interval; +pub use intervalmap::{IntervalMap, Iter}; diff --git a/src/node.rs b/src/node.rs new file mode 100644 index 0000000..63cbb03 --- /dev/null +++ b/src/node.rs @@ -0,0 +1,124 @@ +use crate::interval::Interval; + +use crate::index::{IndexType, NodeIndex}; + +/// Node of the interval tree +#[derive(Debug)] +pub struct Node { + /// Left children + pub left: Option>, + /// Right children + pub right: Option>, + /// Parent + pub parent: Option>, + /// Color of the node + pub color: Color, + + /// Interval of the node + pub interval: Option>, + /// The index that point to the node with the max value + pub max_index: Option>, + /// Value of the node + pub value: Option, +} + +// Convenient getter/setter methods +impl Node +where + Ix: IndexType, +{ + pub fn color(&self) -> Color { + self.color + } + + pub fn interval(&self) -> &Interval { + self.interval.as_ref().unwrap() + } + + pub fn max_index(&self) -> NodeIndex { + self.max_index.unwrap() + } + + pub fn left(&self) -> NodeIndex { + self.left.unwrap() + } + + pub fn right(&self) -> NodeIndex { + self.right.unwrap() + } + + pub fn parent(&self) -> NodeIndex { + self.parent.unwrap() + } + + pub fn is_sentinel(&self) -> bool { + self.interval.is_none() + } + + pub fn sentinel(&self) -> Option<&Self> { + self.interval.is_some().then_some(self) + } + + pub fn is_black(&self) -> bool { + matches!(self.color, Color::Black) + } + + pub fn is_red(&self) -> bool { + matches!(self.color, Color::Red) + } + + pub fn value(&self) -> &V { + self.value.as_ref().unwrap() + } + + pub fn value_mut(&mut self) -> &mut V { + self.value.as_mut().unwrap() + } + + pub fn take_value(&mut self) -> V { + self.value.take().unwrap() + } + + pub fn set_value(value: V) -> impl FnOnce(&mut Node) -> V { + move |node: &mut Node| node.value.replace(value).unwrap() + } + + pub fn set_color(color: Color) -> impl FnOnce(&mut Node) { + move |node: &mut Node| { + node.color = color; + } + } + + pub fn set_max_index(max_index: NodeIndex) -> impl FnOnce(&mut Node) { + move |node: &mut Node| { + let _ignore = node.max_index.replace(max_index); + } + } + + pub fn set_left(left: NodeIndex) -> impl FnOnce(&mut Node) { + move |node: &mut Node| { + let _ignore = node.left.replace(left); + } + } + + pub fn set_right(right: NodeIndex) -> impl FnOnce(&mut Node) { + move |node: &mut Node| { + let _ignore = node.right.replace(right); + } + } + + pub fn set_parent(parent: NodeIndex) -> impl FnOnce(&mut Node) { + move |node: &mut Node| { + let _ignore = node.parent.replace(parent); + } + } +} + +/// The color of the node +#[derive(Debug, Clone, Copy)] +pub enum Color { + /// Red node + Red, + /// Black node + Black, +} From a346d499d72e562c5604dbc817e299ca30d8daa3 Mon Sep 17 00:00:00 2001 From: feathercyc Date: Tue, 16 Jul 2024 10:38:18 +0800 Subject: [PATCH 3/3] feat: add some fn Signed-off-by: feathercyc --- Cargo.lock | 2 + Cargo.toml | 8 +- README.md | 21 +- benches/bench.rs | 101 ++- examples/new_point.rs | 27 + examples/string_affine.rs | 68 +++ src/entry.rs | 21 +- src/index.rs | 65 +- src/interval.rs | 78 ++- src/intervalmap.rs | 1217 +++++++++++++++++++++++++------------ src/iter.rs | 248 ++++++++ src/lib.rs | 7 +- src/node.rs | 259 ++++++-- src/tests.rs | 451 ++++++++++++++ 14 files changed, 2064 insertions(+), 509 deletions(-) create mode 100644 examples/new_point.rs create mode 100644 examples/string_affine.rs create mode 100644 src/iter.rs create mode 100644 src/tests.rs diff --git a/Cargo.lock b/Cargo.lock index 78c1808..493b595 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -205,6 +205,8 @@ version = "0.1.0" dependencies = [ "criterion", "rand", + "serde", + "serde_json", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 8a89c0e..78b398b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,14 +8,20 @@ license = "Apache-2.0" keywords = ["Interval Tree", "Augmented Tree", "Red-Black Tree"] [dependencies] +serde = { version = "1.0", default-features = false, features = [ + "derive", + "std", +], optional = true } [dev-dependencies] criterion = "0.5.1" rand = "0.8.5" +serde_json = "1.0" [features] default = [] -interval_tree_find_overlap_ordered = [] +graphviz = [] +serde = ["dep:serde"] [[bench]] name = "bench" diff --git a/README.md b/README.md index 0fdb9b5..a7f1cc6 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ The implementation of the interval tree in interval_map references "Introduction To safely and efficiently handle insertion and deletion operations in Rust, `interval_map` innovatively **uses arrays to simulate pointers** for managing the parent-child references in the red-black tree. This approach also ensures that interval_map has the `Send` and `Unpin` traits, allowing it to be safely transferred between threads and to maintain a fixed memory location during asynchronous operations. `interval_map` implements an `IntervalMap` struct: -- It accepts `Interval` as the key, where `T` can be any type that implements `Ord+Clone` trait. Therefore, intervals such as $[1, 2)$ and $["aaa", "bbb")$ are allowed +- It accepts `Interval` as the key, where `T` can be any type that implements `Ord` trait. Therefore, intervals such as $[1, 2)$ and $["aaa", "bbb")$ are allowed - The value can be of any type `interval_map` supports `insert`, `delete`, and `iter` fns. Traversal is performed in the order of `Interval` . For instance, with intervals of type `Interval`: @@ -22,15 +22,16 @@ Currently, `interval_map` only supports half-open intervals, i.e., $[...,...)$. The benchmark was conducted on a platform with `AMD R7 7840H + DDR5 5600MHz`. The result are as follows: 1. Only insert - | insert | 100 | 1000 | 10, 000 | 100, 000 | - | --------------- | --------- | --------- | --------- | --------- | - | Time per insert | 5.4168 µs | 80.518 µs | 2.2823 ms | 36.528 ms | + | insert | 100 | 1000 | 10, 000 | 100, 000 | + | ---------- | --------- | --------- | --------- | --------- | + | Total time | 5.4168 µs | 80.518 µs | 2.2823 ms | 36.528 ms | 2. Insert N and remove N - | insert_and_remove | 100 | 1000 | 10, 000 | 100, 000 | - | ------------------ | --------- | --------- | --------- | --------- | - | Time per operation | 10.333 µs | 223.43 µs | 4.9358 ms | 81.634 ms | + | insert_and_remove | 100 | 1000 | 10, 000 | 100, 000 | + | ----------------- | --------- | --------- | --------- | --------- | + | Total time | 10.333 µs | 223.43 µs | 4.9358 ms | 81.634 ms | ## TODO -- [] Support for $(...,...)$, $[...,...]$ and $(...,...]$ interval types. -- [] Add more tests like [etcd](https://github.com/etcd-io/etcd/blob/main/pkg/adt/interval_tree_test.go) -- [] Add Point type for Interval +- [ ] ~~Support for $(...,...)$, $[...,...]$ and $(...,...]$ interval types.~~ There's no way to support these interval type without performance loss now. +- [ ] ~~Add Point type for Interval~~ To support Point type, it should also support $[...,...]$, so it couldn't be supported now, either. But you could write code like [examples/new_point](examples/new_point.rs). +- [x] Add more tests like [etcd](https://github.com/etcd-io/etcd/blob/main/pkg/adt/interval_tree_test.go). +- [x] Refine iter mod. \ No newline at end of file diff --git a/benches/bench.rs b/benches/bench.rs index f7f7435..c9f9de1 100644 --- a/benches/bench.rs +++ b/benches/bench.rs @@ -1,44 +1,25 @@ use criterion::{criterion_group, criterion_main, Bencher, Criterion}; use interval_map::{Interval, IntervalMap}; +use rand::{rngs::StdRng, Rng, SeedableRng}; use std::hint::black_box; -struct Rng { - state: u32, -} -impl Rng { - fn new() -> Self { - Self { state: 0x87654321 } - } - - fn gen_u32(&mut self) -> u32 { - self.state ^= self.state << 13; - self.state ^= self.state >> 17; - self.state ^= self.state << 5; - self.state - } - - fn gen_range_i32(&mut self, low: i32, high: i32) -> i32 { - let d = (high - low) as u32; - low + (self.gen_u32() % d) as i32 - } -} - struct IntervalGenerator { - rng: Rng, - limit: i32, + rng: StdRng, } impl IntervalGenerator { fn new() -> Self { - const LIMIT: i32 = 100000; + let seed: [u8; 32] = [ + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, + ]; Self { - rng: Rng::new(), - limit: LIMIT, + rng: StdRng::from_seed(seed), } } - fn next(&mut self) -> Interval { - let low = self.rng.gen_range_i32(0, self.limit - 1); - let high = self.rng.gen_range_i32(low + 1, self.limit); + fn next(&mut self) -> Interval { + let low = self.rng.gen(); + let high = self.rng.gen_range(low + 1..=u32::MAX); Interval::new(low, high) } } @@ -65,7 +46,7 @@ fn interval_map_insert_remove(count: usize, bench: &mut Bencher) { black_box(map.insert(i, ())); } for i in &intervals { - black_box(map.remove(&i)); + black_box(map.remove(i)); } }); } @@ -100,14 +81,68 @@ fn bench_interval_map_insert_remove(c: &mut Criterion) { }); } +// FilterIter helper fn +fn interval_map_filter_iter(count: usize, bench: &mut Bencher) { + let mut gen = IntervalGenerator::new(); + let intervals: Vec<_> = std::iter::repeat_with(|| gen.next()).take(count).collect(); + let mut map = IntervalMap::new(); + for i in intervals.clone() { + map.insert(i, ()); + } + bench.iter(|| { + for i in intervals.clone() { + black_box(map.filter_iter(&i).collect::>()); + } + }); +} + +// iter().filter() helper fn +fn interval_map_iter_filter(count: usize, bench: &mut Bencher) { + let mut gen = IntervalGenerator::new(); + let intervals: Vec<_> = std::iter::repeat_with(|| gen.next()).take(count).collect(); + let mut map = IntervalMap::new(); + for i in intervals.clone() { + map.insert(i, ()); + } + bench.iter(|| { + for i in intervals.clone() { + black_box(map.iter().filter(|v| v.0.overlap(&i)).collect::>()); + } + }); +} + +fn bench_interval_map_filter_iter(c: &mut Criterion) { + c.bench_function("bench_interval_map_filter_iter_100", |b| { + interval_map_filter_iter(100, b) + }); + c.bench_function("bench_interval_map_filter_iter_1000", |b| { + interval_map_filter_iter(1000, b) + }); +} + +fn bench_interval_map_iter_filter(c: &mut Criterion) { + c.bench_function("bench_interval_map_iter_filter_100", |b| { + interval_map_iter_filter(100, b) + }); + c.bench_function("bench_interval_map_iter_filter_1000", |b| { + interval_map_iter_filter(1000, b) + }); +} + fn criterion_config() -> Criterion { Criterion::default().configure_from_args().without_plots() } criterion_group! { - name = benches; + name = benches_basic_op; + config = criterion_config(); + targets = bench_interval_map_insert, bench_interval_map_insert_remove, +} + +criterion_group! { + name = benches_iter; config = criterion_config(); - targets = bench_interval_map_insert, bench_interval_map_insert_remove + targets = bench_interval_map_filter_iter, bench_interval_map_iter_filter } -criterion_main!(benches); +criterion_main!(benches_basic_op, benches_iter); diff --git a/examples/new_point.rs b/examples/new_point.rs new file mode 100644 index 0000000..e872dd6 --- /dev/null +++ b/examples/new_point.rs @@ -0,0 +1,27 @@ +use interval_map::{Interval, IntervalMap}; + +trait Point { + fn new_point(x: T) -> Interval; +} + +impl Point for Interval { + fn new_point(x: u32) -> Self { + Interval::new(x, x + 1) + } +} + +fn main() { + let mut interval_map = IntervalMap::::new(); + interval_map.insert(Interval::new(3, 7), 20); + interval_map.insert(Interval::new(2, 6), 15); + + let tmp_point = Interval::new_point(5); + assert_eq!(tmp_point, Interval::new(5, 6)); + + interval_map.insert(tmp_point.clone(), 10); + assert_eq!(interval_map.get(&tmp_point).unwrap(), &10); + assert_eq!( + interval_map.find_all_overlap(&Interval::new_point(5)).len(), + 3 + ); +} diff --git a/examples/string_affine.rs b/examples/string_affine.rs new file mode 100644 index 0000000..5595b18 --- /dev/null +++ b/examples/string_affine.rs @@ -0,0 +1,68 @@ +use std::cmp; + +use interval_map::{Interval, IntervalMap}; + +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum StringAffine { + /// String + String(String), + /// Unbounded + Unbounded, +} + +impl StringAffine { + pub fn new_key(s: &str) -> Self { + Self::String(s.to_string()) + } + + pub fn new_unbounded() -> Self { + Self::Unbounded + } +} + +impl PartialOrd for StringAffine { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl Ord for StringAffine { + fn cmp(&self, other: &Self) -> cmp::Ordering { + match (self, other) { + (StringAffine::String(x), StringAffine::String(y)) => x.cmp(y), + (StringAffine::String(_), StringAffine::Unbounded) => cmp::Ordering::Less, + (StringAffine::Unbounded, StringAffine::String(_)) => cmp::Ordering::Greater, + (StringAffine::Unbounded, StringAffine::Unbounded) => cmp::Ordering::Equal, + } + } +} + +trait Point { + fn new_point(x: T) -> Interval; +} + +impl Point for Interval { + fn new_point(x: StringAffine) -> Interval { + match x { + StringAffine::String(mut x_string) => { + let low = x_string.clone(); + x_string.push('\0'); + Interval::new( + StringAffine::new_key(&low), + StringAffine::new_key(&x_string), + ) + } + _ => panic!("new_point only receive StringAffine::String!"), + } + } +} + +fn main() { + let mut interval_map = IntervalMap::::new(); + interval_map.insert( + Interval::new(StringAffine::new_key("8"), StringAffine::Unbounded), + 123, + ); + assert!(interval_map.overlaps(&Interval::new_point(StringAffine::new_key("9")))); + assert!(!interval_map.overlaps(&Interval::new_point(StringAffine::new_key("7")))); +} diff --git a/src/entry.rs b/src/entry.rs index 731a55a..59139aa 100644 --- a/src/entry.rs +++ b/src/entry.rs @@ -5,7 +5,10 @@ use crate::node::Node; /// A view into a single entry in a map, which may either be vacant or occupied. #[derive(Debug)] -pub enum Entry<'a, T, V, Ix> { +pub enum Entry<'a, T, V, Ix> +where + T: Ord, +{ /// An occupied entry. Occupied(OccupiedEntry<'a, T, V, Ix>), /// A vacant entry. @@ -15,17 +18,23 @@ pub enum Entry<'a, T, V, Ix> { /// A view into an occupied entry in a `IntervalMap`. /// It is part of the [`Entry`] enum. #[derive(Debug)] -pub struct OccupiedEntry<'a, T, V, Ix> { +pub struct OccupiedEntry<'a, T, V, Ix> +where + T: Ord, +{ /// Reference to the map pub map_ref: &'a mut IntervalMap, /// The entry node - pub node: NodeIndex, + pub node_idx: NodeIndex, } /// A view into a vacant entry in a `IntervalMap`. /// It is part of the [`Entry`] enum. #[derive(Debug)] -pub struct VacantEntry<'a, T, V, Ix> { +pub struct VacantEntry<'a, T, V, Ix> +where + T: Ord, +{ /// Mutable reference to the map pub map_ref: &'a mut IntervalMap, /// The interval of this entry @@ -53,7 +62,7 @@ where #[inline] pub fn or_insert(self, default: V) -> &'a mut V { match self { - Entry::Occupied(entry) => entry.map_ref.node_mut(entry.node, Node::value_mut), + Entry::Occupied(entry) => entry.map_ref.node_mut(entry.node_idx, Node::value_mut), Entry::Vacant(entry) => { let entry_idx = NodeIndex::new(entry.map_ref.nodes.len()); let _ignore = entry.map_ref.insert(entry.interval, default); @@ -88,7 +97,7 @@ where { match self { Entry::Occupied(entry) => { - f(entry.map_ref.node_mut(entry.node, Node::value_mut)); + f(entry.map_ref.node_mut(entry.node_idx, Node::value_mut)); Self::Occupied(entry) } Entry::Vacant(entry) => Self::Vacant(entry), diff --git a/src/index.rs b/src/index.rs index 657f955..4d628fc 100644 --- a/src/index.rs +++ b/src/index.rs @@ -1,30 +1,49 @@ use std::fmt; use std::hash::Hash; +#[cfg(feature = "serde")] +use serde::{Deserialize, Serialize}; + pub type DefaultIx = u32; -pub unsafe trait IndexType: Copy + Default + Hash + Ord + fmt::Debug + 'static { +pub trait IndexType: Copy + Default + Hash + Ord + fmt::Debug + 'static { + const SENTINEL: Self; fn new(x: usize) -> Self; fn index(&self) -> usize; fn max() -> Self; + fn is_sentinel(&self) -> bool { + *self == Self::SENTINEL + } } -unsafe impl IndexType for u32 { - #[inline(always)] - fn new(x: usize) -> Self { - x as u32 - } - #[inline(always)] - fn index(&self) -> usize { - *self as usize - } - #[inline(always)] - fn max() -> Self { - ::std::u32::MAX - } +macro_rules! impl_index { + ($type:ident) => { + impl IndexType for $type { + const SENTINEL: Self = 0; + + #[inline(always)] + fn new(x: usize) -> Self { + x as $type + } + #[inline(always)] + fn index(&self) -> usize { + *self as usize + } + #[inline(always)] + fn max() -> Self { + $type::MAX + } + } + }; } +impl_index!(u8); +impl_index!(u16); +impl_index!(u32); +impl_index!(u64); + /// Node identifier. +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[derive(Copy, Clone, Default, PartialEq, PartialOrd, Eq, Ord, Hash)] pub struct NodeIndex(Ix); @@ -34,24 +53,30 @@ impl NodeIndex { NodeIndex(IndexType::new(x)) } - #[inline] - pub fn index(self) -> usize { - self.0.index() - } - #[inline] pub fn end() -> Self { NodeIndex(IndexType::max()) } + + pub fn incre(&self) -> Self { + NodeIndex::new(self.index().wrapping_add(1)) + } } -unsafe impl IndexType for NodeIndex { +impl IndexType for NodeIndex { + const SENTINEL: Self = NodeIndex(Ix::SENTINEL); + + #[inline] fn index(&self) -> usize { self.0.index() } + + #[inline] fn new(x: usize) -> Self { NodeIndex::new(x) } + + #[inline] fn max() -> Self { NodeIndex(::max()) } diff --git a/src/interval.rs b/src/interval.rs index e7d78bc..f775c7f 100644 --- a/src/interval.rs +++ b/src/interval.rs @@ -8,9 +8,14 @@ //! //! Currently, `interval_map` only supports half-open intervals, i.e., [...,...). +use std::fmt::{Display, Formatter}; + +#[cfg(feature = "serde")] +use serde::{Deserialize, Deserializer, Serialize, Serializer}; + /// The interval stored in `IntervalMap` represents [low, high) -#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] #[non_exhaustive] +#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] pub struct Interval { /// Low value pub low: T, @@ -35,6 +40,24 @@ impl Interval { pub fn overlap(&self, other: &Self) -> bool { self.high > other.low && other.high > self.low } + + /// Checks if self contains other interval + /// e.g. [1,10) contains [1,8) + #[inline] + pub fn contain(&self, other: &Self) -> bool { + self.low <= other.low && self.high > other.high + } + + /// Checks if self contains a point + pub fn contain_point(&self, p: T) -> bool { + self.low <= p && self.high > p + } +} + +impl Display for Interval { + fn fmt(&self, f: &mut Formatter) -> std::fmt::Result { + write!(f, "[{},{})", self.low, self.high) + } } /// Reference type of `Interval` @@ -64,6 +87,21 @@ impl<'a, T: Ord> IntervalRef<'a, T> { } } +#[cfg(feature = "serde")] +impl Serialize for Interval { + fn serialize(&self, serializer: S) -> Result { + (&self.low, &self.high).serialize(serializer) + } +} + +#[cfg(feature = "serde")] +impl<'de, T: Deserialize<'de> + Ord> Deserialize<'de> for Interval { + fn deserialize>(deserializer: D) -> Result { + let (low, high) = <(T, T)>::deserialize(deserializer)?; + Ok(Interval::new(low, high)) + } +} + #[cfg(test)] mod test { use super::*; @@ -73,4 +111,42 @@ mod test { fn invalid_range_should_panic() { let _interval = Interval::new(3, 1); } + + #[test] + fn test_interval_clone() { + let interval1 = Interval::new(1, 10); + let interval2 = interval1.clone(); + assert_eq!(interval1, interval2); + } + + #[test] + fn test_interval_compare() { + let interval1 = Interval::new(1, 10); + let interval2 = Interval::new(5, 15); + assert!(interval1 < interval2); + assert!(interval2 > interval1); + assert_eq!(interval1, Interval::new(1, 10)); + assert_ne!(interval1, interval2); + } + + #[test] + fn test_interval_hash() { + let interval1 = Interval::new(1, 10); + let interval2 = Interval::new(1, 10); + let interval3 = Interval::new(5, 15); + let mut hashset = std::collections::HashSet::new(); + hashset.insert(interval1); + hashset.insert(interval2); + hashset.insert(interval3); + assert_eq!(hashset.len(), 2); + } + + #[cfg(feature = "serde")] + #[test] + fn test_interval_serialize_deserialize() { + let interval = Interval::new(1, 10); + let serialized = serde_json::to_string(&interval).unwrap(); + let deserialized: Interval = serde_json::from_str(&serialized).unwrap(); + assert_eq!(interval, deserialized); + } } diff --git a/src/intervalmap.rs b/src/intervalmap.rs index 51dcb24..7c2a13e 100644 --- a/src/intervalmap.rs +++ b/src/intervalmap.rs @@ -1,12 +1,29 @@ use crate::entry::{Entry, OccupiedEntry, VacantEntry}; -use crate::index::{DefaultIx, IndexType, NodeIndex}; +use crate::index::{self, DefaultIx, IndexType, NodeIndex}; use crate::interval::{Interval, IntervalRef}; +use crate::iter::{FilterIter, IntoIter, Iter, UnsortedIter}; use crate::node::{Color, Node}; + use std::collections::VecDeque; +use std::fmt::Debug; + +#[cfg(feature = "serde")] +use serde::{Deserialize, Serialize}; + +#[cfg(feature = "graphviz")] +use std::fmt::Display; +#[cfg(feature = "graphviz")] +use std::fs::OpenOptions; +#[cfg(feature = "graphviz")] +use std::io::Write; /// An interval-value map, which support operations on dynamic sets of intervals. +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[derive(Debug)] -pub struct IntervalMap { +pub struct IntervalMap +where + T: Ord, +{ /// Vector that stores nodes pub(crate) nodes: Vec>, /// Root of the interval tree @@ -24,16 +41,16 @@ where #[inline] #[must_use] pub fn with_capacity(capacity: usize) -> Self { - let mut nodes = vec![Self::new_sentinel()]; + let mut nodes = vec![Node::new_sentinel()]; nodes.reserve(capacity); IntervalMap { nodes, - root: Self::sentinel(), + root: NodeIndex::SENTINEL, len: 0, } } - /// Insert an interval-value pair into the map. + /// insert an interval-value pair into the map. /// If the interval exists, overwrite and return the previous value. /// /// # Panics @@ -52,10 +69,11 @@ where #[inline] pub fn insert(&mut self, interval: Interval, value: V) -> Option { let node_idx = NodeIndex::new(self.nodes.len()); - let node = Self::new_node(interval, value, node_idx); + let node = Node::new(interval, value, node_idx); // check for max capacity, except if we use usize assert!( - ::max().index() == !0 || NodeIndex::end() != node_idx, + ::max().index() == !0 + || as index::IndexType>::max() != node_idx, "Reached maximum number of nodes" ); self.nodes.push(node); @@ -101,19 +119,22 @@ where /// map.insert(Interval::new(1, 3), ()); /// map.insert(Interval::new(6, 7), ()); /// map.insert(Interval::new(9, 11), ()); - /// assert!(map.overlap(&Interval::new(2, 5))); - /// assert!(map.overlap(&Interval::new(1, 17))); - /// assert!(!map.overlap(&Interval::new(3, 6))); - /// assert!(!map.overlap(&Interval::new(11, 23))); + /// assert!(map.overlaps(&Interval::new(2, 5))); + /// assert!(map.overlaps(&Interval::new(1, 17))); + /// assert!(!map.overlaps(&Interval::new(3, 6))); + /// assert!(!map.overlaps(&Interval::new(11, 23))); /// ``` #[inline] - pub fn overlap(&self, interval: &Interval) -> bool { + pub fn overlaps(&self, interval: &Interval) -> bool { let node_idx = self.search(interval); !self.node_ref(node_idx, Node::is_sentinel) } /// Find all intervals in the map that overlaps with the given interval. /// + /// # Note + /// This method's returned data is unordered. To get ordered data, please use `find_all_overlap_ordered`. + /// /// # Example /// ```rust /// use interval_map::{Interval, IntervalMap}; @@ -129,10 +150,45 @@ where /// ``` #[inline] pub fn find_all_overlap(&self, interval: &Interval) -> Vec<(&Interval, &V)> { + if self.node_ref(self.root, Node::is_sentinel) { + return Vec::new(); + } + if self.len() > 20 { + self.find_all_overlap_inner(self.root, interval) + } else { + self.unsorted_iter() + .filter(|v| v.0.overlap(interval)) + .collect() + } + } + + /// Find all intervals in the map that overlaps with the given interval. + /// + /// # Note + /// This method's returned data is ordered. Generally, it's much slower than `find_all_overlap`. + /// + /// # Example + /// ```rust + /// use interval_map::{Interval, IntervalMap}; + /// + /// let mut map = IntervalMap::new(); + /// map.insert(Interval::new(1, 3), ()); + /// map.insert(Interval::new(2, 4), ()); + /// map.insert(Interval::new(6, 7), ()); + /// map.insert(Interval::new(7, 11), ()); + /// assert_eq!(map.find_all_overlap(&Interval::new(2, 7)).len(), 3); + /// map.remove(&Interval::new(1, 3)); + /// assert_eq!(map.find_all_overlap(&Interval::new(2, 7)).len(), 2); + /// ``` + #[inline] + pub fn find_all_overlap_ordered<'a>( + &'a self, + interval: &'a Interval, + ) -> Vec<(&Interval, &V)> { if self.node_ref(self.root, Node::is_sentinel) { Vec::new() } else { - self.find_all_overlap_inner_unordered(self.root, interval) + self.filter_iter(interval).collect() } } @@ -176,10 +232,71 @@ where #[inline] #[must_use] pub fn iter(&self) -> Iter<'_, T, V, Ix> { - Iter { - map_ref: self, - stack: None, - } + Iter::new(self) + } + + /// Get an into iterator over the entries of the map, sorted by key. + // #[inline] + // #[must_use] + // pub fn into_iter(self) -> IntoIter { + // IntoIter::new(self) + // } + + /// Get an iterator over the entries of the map, unsorted. + #[inline] + pub fn unsorted_iter(&self) -> UnsortedIter { + UnsortedIter::new(self) + } + + /// Get an iterator over the entries that overlap the `query`, sorted by key. + /// + /// # Panics + /// + /// The method panics when `query` contains a value that cannot be compared. + #[inline] + pub fn filter_iter<'a, 'b: 'a>(&'a self, query: &'b Interval) -> FilterIter { + FilterIter::new(self, query) + } + + /// Return true if the interval tree's key cover the entire given interval. + /// + /// # Example + /// ```rust + /// use interval_map::{Interval, IntervalMap}; + /// + /// let mut map = IntervalMap::new(); + /// map.insert(Interval::new(3, 5), 0); + /// map.insert(Interval::new(5, 8), 1); + /// map.insert(Interval::new(9, 12), 1); + /// assert!(map.contains(&Interval::new(4, 6))); + /// assert!(!map.contains(&Interval::new(7, 10))); + /// ``` + #[inline] + pub fn contains(&self, interval: &Interval) -> bool { + let mut max_end: Option<&T> = None; + let mut min_begin: Option<&T> = None; + + let mut continuous = true; + self.filter_iter(interval).find(|v| { + if min_begin.is_none() { + min_begin = Some(&v.0.low); + max_end = Some(&v.0.high); + return false; + } + if max_end.map(|mv| mv < &v.0.low).unwrap() { + continuous = false; + return true; + } + if max_end.map(|mv| mv < &v.0.high).unwrap() { + max_end = Some(&v.0.high); + } + false + }); + + continuous + && min_begin.is_some() + && max_end.map(|mv| mv >= &interval.high).unwrap() + && min_begin.map(|mv| mv <= &interval.low).unwrap() } /// Get the given key's corresponding entry in the map for in-place manipulation. @@ -199,9 +316,9 @@ where #[inline] pub fn entry(&mut self, interval: Interval) -> Entry<'_, T, V, Ix> { match self.search_exact(&interval) { - Some(node) => Entry::Occupied(OccupiedEntry { + Some(node_idx) => Entry::Occupied(OccupiedEntry { map_ref: self, - node, + node_idx, }), None => Entry::Vacant(VacantEntry { map_ref: self, @@ -214,8 +331,8 @@ where #[inline] pub fn clear(&mut self) { self.nodes.clear(); - self.nodes.push(Self::new_sentinel()); - self.root = Self::sentinel(); + self.nodes.push(Node::new_sentinel()); + self.root = NodeIndex::SENTINEL; self.len = 0; } @@ -234,6 +351,20 @@ where } } +impl IntoIterator for IntervalMap +where + T: Ord, + Ix: IndexType, +{ + type Item = (Interval, V); + + type IntoIter = IntoIter; + + fn into_iter(self) -> Self::IntoIter { + IntoIter::new(self) + } +} + impl IntervalMap where T: Ord, @@ -242,11 +373,7 @@ where #[inline] #[must_use] pub fn new() -> Self { - Self { - nodes: vec![Self::new_sentinel()], - root: Self::sentinel(), - len: 0, - } + Self::with_capacity(0) } } @@ -265,46 +392,9 @@ where T: Ord, Ix: IndexType, { - /// Create a new sentinel node - fn new_sentinel() -> Node { - Node { - interval: None, - value: None, - max_index: None, - left: None, - right: None, - parent: None, - color: Color::Black, - } - } - - /// Create a new tree node - fn new_node(interval: Interval, value: V, index: NodeIndex) -> Node { - Node { - max_index: Some(index), - interval: Some(interval), - value: Some(value), - left: Some(Self::sentinel()), - right: Some(Self::sentinel()), - parent: Some(Self::sentinel()), - color: Color::Red, - } - } - - /// Get the sentinel node index - fn sentinel() -> NodeIndex { - NodeIndex::new(0) - } -} - -impl IntervalMap -where - T: Ord, - Ix: IndexType, -{ - /// Insert a node into the tree. + /// insert a node into the tree. fn insert_inner(&mut self, z: NodeIndex) -> Option { - let mut y = Self::sentinel(); + let mut y = NodeIndex::SENTINEL; let mut x = self.root; while !self.node_ref(x, Node::is_sentinel) { @@ -380,34 +470,10 @@ where self.len = self.len.wrapping_sub(1); } - /// Find all intervals in the map that overlaps with the given interval. - #[cfg(interval_tree_find_overlap_ordered)] - fn find_all_overlap_inner( - &self, - x: NodeIndex, - interval: &Interval, - ) -> Vec<(&Interval, &V)> { - let mut list = vec![]; - if self.node_ref(x, Node::interval).overlap(interval) { - list.push(self.node_ref(x, |nx| (nx.interval(), nx.value()))); - } - if self.max(self.node_ref(x, Node::left)) >= Some(&interval.low) { - list.extend(self.find_all_overlap_inner(self.node_ref(x, Node::left), interval)); - } - if self - .max(self.node_ref(x, Node::right)) - .map(|rmax| IntervalRef::new(&self.node_ref(x, Node::interval).low, rmax)) - .is_some_and(|i| i.overlap(interval)) - { - list.extend(self.find_all_overlap_inner(self.node_ref(x, Node::right), interval)); - } - list - } - /// Find all intervals in the map that overlaps with the given interval. /// /// The result is unordered because of breadth-first search to save stack size - fn find_all_overlap_inner_unordered( + fn find_all_overlap_inner( &self, x: NodeIndex, interval: &Interval, @@ -680,13 +746,13 @@ where } /// Check if a node is a left child of its parent. - fn is_left_child(&self, node: NodeIndex) -> bool { - self.parent_ref(node, Node::left) == node + fn is_left_child(&self, node_idx: NodeIndex) -> bool { + self.parent_ref(node_idx, Node::left) == node_idx } /// Check if a node is a right child of its parent. - fn is_right_child(&self, node: NodeIndex) -> bool { - self.parent_ref(node, Node::right) == node + fn is_right_child(&self, node_idx: NodeIndex) -> bool { + self.parent_ref(node_idx, Node::right) == node_idx } /// Update nodes indices after remove @@ -719,423 +785,792 @@ where } } +#[cfg(feature = "graphviz")] +impl IntervalMap +where + T: Ord + Copy + Display, + V: Display, + Ix: IndexType, +{ + /// writes dot file to `filename`. `T` and `V` should implement `Display`. + pub fn draw(&self, filename: &str) -> std::io::Result<()> { + let mut file = OpenOptions::new() + .write(true) + .create(true) + .truncate(true) + .open(filename)?; + writeln!(file, "digraph {{")?; + // begin at 1, because 0 is sentinel node + for i in 1..self.nodes.len() { + self.nodes[i].draw(i, &mut file)?; + } + writeln!(file, "}}") + } +} + +#[cfg(feature = "graphviz")] +impl IntervalMap +where + T: Ord + Copy + Display, + Ix: IndexType, +{ + /// Writes dot file to `filename` without values. `T` should implement `Display`. + pub fn draw_without_value(&self, filename: &str) -> std::io::Result<()> { + let mut file = OpenOptions::new() + .write(true) + .create(true) + .truncate(true) + .open(filename)?; + writeln!(file, "digraph {{")?; + // begin at 1, because 0 is sentinel node + for i in 1..self.nodes.len() { + self.nodes[i].draw_without_value(i, &mut file)?; + } + writeln!(file, "}}") + } +} + // Convenient methods for reference or mutate current/parent/left/right node impl<'a, T, V, Ix> IntervalMap where + T: Ord, Ix: IndexType, { - fn node_ref(&'a self, node: NodeIndex, op: F) -> R + pub(crate) fn node_ref(&'a self, node_idx: NodeIndex, op: F) -> R where R: 'a, F: FnOnce(&'a Node) -> R, { - op(&self.nodes[node.index()]) + op(&self.nodes[node_idx.index()]) } - pub(crate) fn node_mut(&'a mut self, node: NodeIndex, op: F) -> R + pub(crate) fn node_mut(&'a mut self, node_idx: NodeIndex, op: F) -> R where R: 'a, F: FnOnce(&'a mut Node) -> R, { - op(&mut self.nodes[node.index()]) + op(&mut self.nodes[node_idx.index()]) } - fn left_ref(&'a self, node: NodeIndex, op: F) -> R + pub(crate) fn left_ref(&'a self, node_idx: NodeIndex, op: F) -> R where R: 'a, F: FnOnce(&'a Node) -> R, { - let idx = self.nodes[node.index()].left().index(); + let idx = self.nodes[node_idx.index()].left().index(); op(&self.nodes[idx]) } - fn right_ref(&'a self, node: NodeIndex, op: F) -> R + pub(crate) fn right_ref(&'a self, node_idx: NodeIndex, op: F) -> R where R: 'a, F: FnOnce(&'a Node) -> R, { - let idx = self.nodes[node.index()].right().index(); + let idx = self.nodes[node_idx.index()].right().index(); op(&self.nodes[idx]) } - fn parent_ref(&'a self, node: NodeIndex, op: F) -> R + fn parent_ref(&'a self, node_idx: NodeIndex, op: F) -> R where R: 'a, F: FnOnce(&'a Node) -> R, { - let idx = self.nodes[node.index()].parent().index(); + let idx = self.nodes[node_idx.index()].parent().index(); op(&self.nodes[idx]) } - fn grand_parent_ref(&'a self, node: NodeIndex, op: F) -> R + fn grand_parent_ref(&'a self, node_idx: NodeIndex, op: F) -> R where R: 'a, F: FnOnce(&'a Node) -> R, { - let parent_idx = self.nodes[node.index()].parent().index(); + let parent_idx = self.nodes[node_idx.index()].parent().index(); let grand_parent_idx = self.nodes[parent_idx].parent().index(); op(&self.nodes[grand_parent_idx]) } - fn left_mut(&'a mut self, node: NodeIndex, op: F) -> R + fn left_mut(&'a mut self, node_idx: NodeIndex, op: F) -> R where R: 'a, F: FnOnce(&'a mut Node) -> R, { - let idx = self.nodes[node.index()].left().index(); + let idx = self.nodes[node_idx.index()].left().index(); op(&mut self.nodes[idx]) } - fn right_mut(&'a mut self, node: NodeIndex, op: F) -> R + fn right_mut(&'a mut self, node_idx: NodeIndex, op: F) -> R where R: 'a, F: FnOnce(&'a mut Node) -> R, { - let idx = self.nodes[node.index()].right().index(); + let idx = self.nodes[node_idx.index()].right().index(); op(&mut self.nodes[idx]) } - fn parent_mut(&'a mut self, node: NodeIndex, op: F) -> R + fn parent_mut(&'a mut self, node_idx: NodeIndex, op: F) -> R where R: 'a, F: FnOnce(&'a mut Node) -> R, { - let idx = self.nodes[node.index()].parent().index(); + let idx = self.nodes[node_idx.index()].parent().index(); op(&mut self.nodes[idx]) } - fn grand_parent_mut(&'a mut self, node: NodeIndex, op: F) -> R + fn grand_parent_mut(&'a mut self, node_idx: NodeIndex, op: F) -> R where R: 'a, F: FnOnce(&'a mut Node) -> R, { - let parent_idx = self.nodes[node.index()].parent().index(); + let parent_idx = self.nodes[node_idx.index()].parent().index(); let grand_parent_idx = self.nodes[parent_idx].parent().index(); op(&mut self.nodes[grand_parent_idx]) } - fn max(&self, node: NodeIndex) -> Option<&T> { - let max_index = self.nodes[node.index()].max_index?.index(); + pub(crate) fn max(&self, node_idx: NodeIndex) -> Option<&T> { + let max_index = self.nodes[node_idx.index()].max_index?.index(); self.nodes[max_index].interval.as_ref().map(|i| &i.high) } } -/// An iterator over the entries of a `IntervalMap`. -#[derive(Debug)] -pub struct Iter<'a, T, V, Ix> { - /// Reference to the map - map_ref: &'a IntervalMap, - /// Stack for iteration - stack: Option>>, +#[cfg(test)] +#[derive(Debug, PartialEq, Eq)] +pub struct VisitedInterval { + key: Interval, + left: Option>, + right: Option>, + color: Color, + depth: i32, } -impl Iter<'_, T, V, Ix> -where - Ix: IndexType, -{ - /// Initializes the stack - fn init_stack(&mut self) { - self.stack = Some(Self::left_link(self.map_ref, self.map_ref.root)); - } - - /// Pushes a link of nodes on the left to stack. - fn left_link(map_ref: &IntervalMap, mut x: NodeIndex) -> Vec> { - let mut nodes = vec![]; - while !map_ref.node_ref(x, Node::is_sentinel) { - nodes.push(x); - x = map_ref.node_ref(x, Node::left); +#[cfg(test)] +impl VisitedInterval { + pub fn new( + key: Interval, + left: Option>, + right: Option>, + color: Color, + depth: i32, + ) -> Self { + Self { + key, + left, + right, + color, + depth, } - nodes } } -impl<'a, T, V, Ix> Iterator for Iter<'a, T, V, Ix> +#[cfg(test)] +impl IntervalMap where + T: Ord + Clone, Ix: IndexType, { - type Item = (&'a Interval, &'a V); - - #[inline] - fn next(&mut self) -> Option { - if self.stack.is_none() { - self.init_stack(); - } - let stack = self.stack.as_mut().unwrap(); - if stack.is_empty() { - return None; + fn visit_level(&self) -> Vec> { + let mut res: Vec> = Vec::new(); + let mut queue = VecDeque::new(); + queue.push_back(self.root); + let mut depth = 0; + while !queue.is_empty() { + for _ in 0..queue.len() { + let p = queue.pop_front().unwrap(); + let node = &self.nodes[p.index()]; + let p_left_node = &self.nodes[node.left().index()]; + let p_right_node = &self.nodes[node.right().index()]; + + res.push(VisitedInterval { + key: node.interval.clone().unwrap(), + left: p_left_node.interval.clone(), + right: p_right_node.interval.clone(), + color: node.color(), + depth, + }); + if !p_left_node.is_sentinel() { + queue.push_back(node.left()) + } + if !p_right_node.is_sentinel() { + queue.push_back(node.right()) + } + } + depth += 1; } - let x = stack.pop().unwrap(); - stack.extend(Self::left_link( - self.map_ref, - self.map_ref.node_ref(x, Node::right), - )); - Some(self.map_ref.node_ref(x, |xn| (xn.interval(), xn.value()))) + res } } #[cfg(test)] mod test { - use std::collections::HashSet; - - use rand::{rngs::StdRng, Rng, SeedableRng}; - use super::*; - struct IntervalGenerator { - rng: StdRng, - unique: HashSet>, - limit: i32, - } - - impl IntervalGenerator { - fn new(seed: [u8; 32]) -> Self { - const LIMIT: i32 = 1000; - Self { - rng: SeedableRng::from_seed(seed), - unique: HashSet::new(), - limit: LIMIT, - } - } - - fn next(&mut self) -> Interval { - let low = self.rng.gen_range(0..self.limit - 1); - let high = self.rng.gen_range((low + 1)..self.limit); - Interval::new(low, high) - } - - fn next_unique(&mut self) -> Interval { - let mut interval = self.next(); - while self.unique.contains(&interval) { - interval = self.next(); - } - self.unique.insert(interval.clone()); - interval - } - - fn next_with_range(&mut self, range: i32) -> Interval { - let low = self.rng.gen_range(0..self.limit - 1); - let high = self - .rng - .gen_range((low + 1)..self.limit.min(low + 1 + range)); - Interval::new(low, high) - } - } - - impl IntervalMap { - fn check_max(&self) { - let _ignore = self.check_max_inner(self.root); - } - - fn check_max_inner(&self, x: NodeIndex) -> i32 { - if self.node_ref(x, Node::is_sentinel) { - return 0; - } - let l_max = self.check_max_inner(self.node_ref(x, Node::left)); - let r_max = self.check_max_inner(self.node_ref(x, Node::right)); - let max = self.node_ref(x, |x| x.interval().high.max(l_max).max(r_max)); - assert_eq!(self.max(x), Some(&max)); - max - } - - /// 1. Every node is either red or black. - /// 2. The root is black. - /// 3. Every leaf (NIL) is black. - /// 4. If a node is red, then both its children are black. - /// 5. For each node, all simple paths from the node to descendant leaves contain the - /// same number of black nodes. - fn check_rb_properties(&self) { - assert!(matches!( - self.node_ref(self.root, Node::color), - Color::Black - )); - self.check_children_color(self.root); - self.check_black_height(self.root); - } - - fn check_children_color(&self, x: NodeIndex) { - if self.node_ref(x, Node::is_sentinel) { - return; - } - self.check_children_color(self.node_ref(x, Node::left)); - self.check_children_color(self.node_ref(x, Node::right)); - if self.node_ref(x, Node::is_red) { - assert!(matches!(self.left_ref(x, Node::color), Color::Black)); - assert!(matches!(self.right_ref(x, Node::color), Color::Black)); - } - } - - fn check_black_height(&self, x: NodeIndex) -> usize { - if self.node_ref(x, Node::is_sentinel) { - return 0; - } - let lefth = self.check_black_height(self.node_ref(x, Node::left)); - let righth = self.check_black_height(self.node_ref(x, Node::right)); - assert_eq!(lefth, righth); - if self.node_ref(x, Node::is_black) { - return lefth + 1; - } - lefth - } + #[test] + fn test_interval_tree_insert() { + let mut map = IntervalMap::new(); + map.insert(Interval::new(16, 21), 30); + map.insert(Interval::new(8, 9), 23); + map.insert(Interval::new(0, 3), 3); + map.insert(Interval::new(5, 8), 10); + map.insert(Interval::new(6, 10), 10); + map.insert(Interval::new(15, 23), 23); + map.insert(Interval::new(17, 19), 20); + map.insert(Interval::new(25, 30), 30); + map.insert(Interval::new(26, 27), 26); + map.insert(Interval::new(19, 20), 20); + + let expected = vec![ + VisitedInterval::new( + Interval::new(16, 21), + Some(Interval::new(8, 9)), + Some(Interval::new(25, 30)), + Color::Black, + 0, + ), + VisitedInterval::new( + Interval::new(8, 9), + Some(Interval::new(5, 8)), + Some(Interval::new(15, 23)), + Color::Red, + 1, + ), + VisitedInterval::new( + Interval::new(25, 30), + Some(Interval::new(17, 19)), + Some(Interval::new(26, 27)), + Color::Red, + 1, + ), + VisitedInterval::new( + Interval::new(5, 8), + Some(Interval::new(0, 3)), + Some(Interval::new(6, 10)), + Color::Black, + 2, + ), + VisitedInterval::new(Interval::new(15, 23), None, None, Color::Black, 2), + VisitedInterval::new( + Interval::new(17, 19), + None, + Some(Interval::new(19, 20)), + Color::Black, + 2, + ), + VisitedInterval::new(Interval::new(26, 27), None, None, Color::Black, 2), + VisitedInterval::new(Interval::new(0, 3), None, None, Color::Red, 3), + VisitedInterval::new(Interval::new(6, 10), None, None, Color::Red, 3), + VisitedInterval::new(Interval::new(19, 20), None, None, Color::Red, 3), + ]; + + let res = map.visit_level(); + assert_eq!(res, expected); } - fn with_map_and_generator(test_fn: impl Fn(IntervalMap, IntervalGenerator)) { - let seeds = vec![[0; 32], [1; 32], [2; 32]]; - for seed in seeds { - let gen = IntervalGenerator::new(seed); - let map = IntervalMap::new(); - test_fn(map, gen); - } + #[test] + fn test_interval_tree_self_balanced() { + let mut map = IntervalMap::new(); + map.insert(Interval::new(0, 1), 0); + map.insert(Interval::new(1, 2), 0); + map.insert(Interval::new(3, 4), 0); + map.insert(Interval::new(5, 6), 0); + map.insert(Interval::new(7, 8), 0); + map.insert(Interval::new(8, 9), 0); + + let expected = vec![ + VisitedInterval::new( + Interval::new(1, 2), + Some(Interval::new(0, 1)), + Some(Interval::new(5, 6)), + Color::Black, + 0, + ), + VisitedInterval::new(Interval::new(0, 1), None, None, Color::Black, 1), + VisitedInterval::new( + Interval::new(5, 6), + Some(Interval::new(3, 4)), + Some(Interval::new(7, 8)), + Color::Red, + 1, + ), + VisitedInterval::new(Interval::new(3, 4), None, None, Color::Black, 2), + VisitedInterval::new( + Interval::new(7, 8), + None, + Some(Interval::new(8, 9)), + Color::Black, + 2, + ), + VisitedInterval::new(Interval::new(8, 9), None, None, Color::Red, 3), + ]; + + let res = map.visit_level(); + assert_eq!(res, expected); } #[test] - fn red_black_tree_properties_is_satisfied() { - with_map_and_generator(|mut map, mut gen| { - let intervals: Vec<_> = std::iter::repeat_with(|| gen.next_unique()) - .take(1000) - .collect(); - for i in intervals.clone() { - let _ignore = map.insert(i, ()); - } - map.check_rb_properties(); - }); + fn test_interval_tree_delete() { + let mut map = IntervalMap::new(); + map.insert(Interval::new(510, 511), 0); + map.insert(Interval::new(82, 83), 0); + map.insert(Interval::new(830, 831), 0); + map.insert(Interval::new(11, 12), 0); + map.insert(Interval::new(383, 384), 0); + map.insert(Interval::new(647, 648), 0); + map.insert(Interval::new(899, 900), 0); + map.insert(Interval::new(261, 262), 0); + map.insert(Interval::new(410, 411), 0); + map.insert(Interval::new(514, 515), 0); + map.insert(Interval::new(815, 816), 0); + map.insert(Interval::new(888, 889), 0); + map.insert(Interval::new(972, 973), 0); + map.insert(Interval::new(238, 239), 0); + map.insert(Interval::new(292, 293), 0); + map.insert(Interval::new(953, 954), 0); + + let expected_before_delete = vec![ + VisitedInterval::new( + Interval::new(510, 511), + Some(Interval::new(82, 83)), + Some(Interval::new(830, 831)), + Color::Black, + 0, + ), + VisitedInterval::new( + Interval::new(82, 83), + Some(Interval::new(11, 12)), + Some(Interval::new(383, 384)), + Color::Black, + 1, + ), + VisitedInterval::new( + Interval::new(830, 831), + Some(Interval::new(647, 648)), + Some(Interval::new(899, 900)), + Color::Black, + 1, + ), + VisitedInterval::new(Interval::new(11, 12), None, None, Color::Black, 2), + VisitedInterval::new( + Interval::new(383, 384), + Some(Interval::new(261, 262)), + Some(Interval::new(410, 411)), + Color::Red, + 2, + ), + VisitedInterval::new( + Interval::new(647, 648), + Some(Interval::new(514, 515)), + Some(Interval::new(815, 816)), + Color::Black, + 2, + ), + VisitedInterval::new( + Interval::new(899, 900), + Some(Interval::new(888, 889)), + Some(Interval::new(972, 973)), + Color::Red, + 2, + ), + VisitedInterval::new( + Interval::new(261, 262), + Some(Interval::new(238, 239)), + Some(Interval::new(292, 293)), + Color::Black, + 3, + ), + VisitedInterval::new(Interval::new(410, 411), None, None, Color::Black, 3), + VisitedInterval::new(Interval::new(514, 515), None, None, Color::Red, 3), + VisitedInterval::new(Interval::new(815, 816), None, None, Color::Red, 3), + VisitedInterval::new(Interval::new(888, 889), None, None, Color::Black, 3), + VisitedInterval::new( + Interval::new(972, 973), + Some(Interval::new(953, 954)), + None, + Color::Black, + 3, + ), + VisitedInterval::new(Interval::new(238, 239), None, None, Color::Red, 4), + VisitedInterval::new(Interval::new(292, 293), None, None, Color::Red, 4), + VisitedInterval::new(Interval::new(953, 954), None, None, Color::Red, 4), + ]; + + let res = map.visit_level(); + assert_eq!(res, expected_before_delete); + + // delete the node "514" + let range514 = Interval::new(514, 515); + let deleted = map.remove(&range514); + assert!(deleted.is_some()); + + let expected_after_delete514 = vec![ + VisitedInterval::new( + Interval::new(510, 511), + Some(Interval::new(82, 83)), + Some(Interval::new(830, 831)), + Color::Black, + 0, + ), + VisitedInterval::new( + Interval::new(82, 83), + Some(Interval::new(11, 12)), + Some(Interval::new(383, 384)), + Color::Black, + 1, + ), + VisitedInterval::new( + Interval::new(830, 831), + Some(Interval::new(647, 648)), + Some(Interval::new(899, 900)), + Color::Black, + 1, + ), + VisitedInterval::new(Interval::new(11, 12), None, None, Color::Black, 2), + VisitedInterval::new( + Interval::new(383, 384), + Some(Interval::new(261, 262)), + Some(Interval::new(410, 411)), + Color::Red, + 2, + ), + VisitedInterval::new( + Interval::new(647, 648), + None, + Some(Interval::new(815, 816)), + Color::Black, + 2, + ), + VisitedInterval::new( + Interval::new(899, 900), + Some(Interval::new(888, 889)), + Some(Interval::new(972, 973)), + Color::Red, + 2, + ), + VisitedInterval::new( + Interval::new(261, 262), + Some(Interval::new(238, 239)), + Some(Interval::new(292, 293)), + Color::Black, + 3, + ), + VisitedInterval::new(Interval::new(410, 411), None, None, Color::Black, 3), + VisitedInterval::new(Interval::new(815, 816), None, None, Color::Red, 3), + VisitedInterval::new(Interval::new(888, 889), None, None, Color::Black, 3), + VisitedInterval::new( + Interval::new(972, 973), + Some(Interval::new(953, 954)), + None, + Color::Black, + 3, + ), + VisitedInterval::new(Interval::new(238, 239), None, None, Color::Red, 4), + VisitedInterval::new(Interval::new(292, 293), None, None, Color::Red, 4), + VisitedInterval::new(Interval::new(953, 954), None, None, Color::Red, 4), + ]; + + let res = map.visit_level(); + assert_eq!(res, expected_after_delete514); + + // delete the node "11" + let range11 = Interval::new(11, 12); + let deleted = map.remove(&range11); + assert!(deleted.is_some()); + + let expected_after_delete11 = vec![ + VisitedInterval::new( + Interval::new(510, 511), + Some(Interval::new(383, 384)), + Some(Interval::new(830, 831)), + Color::Black, + 0, + ), + VisitedInterval::new( + Interval::new(383, 384), + Some(Interval::new(261, 262)), + Some(Interval::new(410, 411)), + Color::Black, + 1, + ), + VisitedInterval::new( + Interval::new(830, 831), + Some(Interval::new(647, 648)), + Some(Interval::new(899, 900)), + Color::Black, + 1, + ), + VisitedInterval::new( + Interval::new(261, 262), + Some(Interval::new(82, 83)), + Some(Interval::new(292, 293)), + Color::Red, + 2, + ), + VisitedInterval::new(Interval::new(410, 411), None, None, Color::Black, 2), + VisitedInterval::new( + Interval::new(647, 648), + None, + Some(Interval::new(815, 816)), + Color::Black, + 2, + ), + VisitedInterval::new( + Interval::new(899, 900), + Some(Interval::new(888, 889)), + Some(Interval::new(972, 973)), + Color::Red, + 2, + ), + VisitedInterval::new( + Interval::new(82, 83), + None, + Some(Interval::new(238, 239)), + Color::Black, + 3, + ), + VisitedInterval::new(Interval::new(292, 293), None, None, Color::Black, 3), + VisitedInterval::new(Interval::new(815, 816), None, None, Color::Red, 3), + VisitedInterval::new(Interval::new(888, 889), None, None, Color::Black, 3), + VisitedInterval::new( + Interval::new(972, 973), + Some(Interval::new(953, 954)), + None, + Color::Black, + 3, + ), + VisitedInterval::new(Interval::new(238, 239), None, None, Color::Red, 4), + VisitedInterval::new(Interval::new(953, 954), None, None, Color::Red, 4), + ]; + + let res = map.visit_level(); + assert_eq!(res, expected_after_delete11); } - #[test] - fn map_len_will_update() { - with_map_and_generator(|mut map, mut gen| { - let intervals: Vec<_> = std::iter::repeat_with(|| gen.next_unique()) - .take(100) - .collect(); - for i in intervals.clone() { - let _ignore = map.insert(i, ()); - } - assert_eq!(map.len(), 100); - for i in intervals { - let _ignore = map.remove(&i); + impl Interval { + fn new_point(x: &str) -> Interval { + let mut hx = x.to_owned(); + hx.push('\0'); + Interval { + low: x.to_owned(), + high: hx, } - assert_eq!(map.len(), 0); - }); + } } #[test] - fn check_overlap_is_ok() { - with_map_and_generator(|mut map, mut gen| { - let intervals: Vec<_> = std::iter::repeat_with(|| gen.next_with_range(10)) - .take(100) - .collect(); - for i in intervals.clone() { - let _ignore = map.insert(i, ()); - } - let to_check: Vec<_> = std::iter::repeat_with(|| gen.next_with_range(10)) - .take(1000) - .collect(); - let expects: Vec<_> = to_check - .iter() - .map(|ci| intervals.iter().any(|i| ci.overlap(i))) - .collect(); - - for (ci, expect) in to_check.into_iter().zip(expects.into_iter()) { - assert_eq!(map.overlap(&ci), expect); - } - }); + fn test_interval_tree_intersects() { + let mut map = IntervalMap::::new(); + map.insert(Interval::new(String::from("1"), String::from("3")), 123); + + assert!(!map.overlaps(&Interval::new_point("0")), "contains 0"); + assert!(map.overlaps(&Interval::new_point("1")), "missing 1"); + assert!(map.overlaps(&Interval::new_point("11")), "missing 11"); + assert!(map.overlaps(&Interval::new_point("2")), "missing 2"); + assert!(!map.overlaps(&Interval::new_point("3")), "contains 3"); } #[test] - fn check_max_is_ok() { - with_map_and_generator(|mut map, mut gen| { - let intervals: Vec<_> = std::iter::repeat_with(|| gen.next_unique()) - .take(1000) - .collect(); - for i in intervals.clone() { - let _ignore = map.insert(i, ()); - map.check_max(); - } - assert_eq!(map.len(), 1000); - for i in intervals { - let _ignore = map.remove(&i); - map.check_max(); - } - }); + fn test_interval_tree_find_all_overlap() { + let mut map = IntervalMap::::new(); + map.insert(Interval::new(String::from("0"), String::from("1")), 123); + map.insert(Interval::new(String::from("0"), String::from("2")), 456); + map.insert(Interval::new(String::from("5"), String::from("6")), 789); + map.insert(Interval::new(String::from("6"), String::from("8")), 999); + map.insert(Interval::new(String::from("0"), String::from("3")), 0); + + let tmp = map.node_ref(map.node_ref(map.root, Node::max_index), Node::interval); + assert_eq!(tmp, &Interval::new(String::from("6"), String::from("8"))); + + assert_eq!(map.find_all_overlap(&Interval::new_point("0")).len(), 3); + assert_eq!(map.find_all_overlap(&Interval::new_point("1")).len(), 2); + assert_eq!(map.find_all_overlap(&Interval::new_point("2")).len(), 1); + assert_eq!(map.find_all_overlap(&Interval::new_point("3")).len(), 0); + assert_eq!(map.find_all_overlap(&Interval::new_point("5")).len(), 1); + assert_eq!(map.find_all_overlap(&Interval::new_point("55")).len(), 1); + assert_eq!(map.find_all_overlap(&Interval::new_point("6")).len(), 1); } + type TestCaseBFn = dyn Fn(&(&Interval, &())) -> bool; + struct TestCaseB { + f: Box, + wcount: i32, + } #[test] - fn remove_non_exist_interval_will_do_nothing() { - with_map_and_generator(|mut map, mut gen| { - let intervals: Vec<_> = std::iter::repeat_with(|| gen.next_unique()) - .take(1000) - .collect(); - for i in intervals { - let _ignore = map.insert(i, ()); - } - assert_eq!(map.len(), 1000); - let to_remove: Vec<_> = std::iter::repeat_with(|| gen.next_unique()) - .take(1000) - .collect(); - for i in to_remove { - let _ignore = map.remove(&i); - } - assert_eq!(map.len(), 1000); - }); + fn test_interval_tree_visit_exit() { + let ivls = vec![ + Interval::new(1, 10), + Interval::new(2, 5), + Interval::new(3, 6), + Interval::new(4, 8), + ]; + let ivl_range = Interval::new(0, 100); + + let tests = [ + TestCaseB { + f: Box::new(|_| false), + wcount: 1, + }, + TestCaseB { + f: Box::new({ + let ivls = ivls.clone(); + move |v| v.0.low <= ivls[0].low + }), + wcount: 2, + }, + TestCaseB { + f: Box::new({ + let ivls = ivls.clone(); + move |v| v.0.low < ivls[2].low + }), + wcount: 3, + }, + TestCaseB { + f: Box::new(|_| true), + wcount: 4, + }, + ]; + + for (i, tt) in tests.iter().enumerate() { + let mut map = IntervalMap::new(); + ivls.iter().for_each(|v| { + map.insert(v.clone(), ()); + }); + let mut count = 0; + map.filter_iter(&ivl_range).find(|v| { + count += 1; + !(tt.f)(v) + }); + assert_eq!(count, tt.wcount, "#{}: error", i); + } } - #[test] - fn find_all_overlap_is_ok() { - with_map_and_generator(|mut map, mut gen| { - let intervals: Vec<_> = std::iter::repeat_with(|| gen.next_unique()) - .take(1000) - .collect(); - for i in intervals.clone() { - let _ignore = map.insert(i, ()); - } - let to_find: Vec<_> = std::iter::repeat_with(|| gen.next()).take(1000).collect(); + struct TestCaseC { + ivls: Vec>, + chk_ivl: Interval, - let expects: Vec> = to_find - .iter() - .map(|ti| intervals.iter().filter(|i| ti.overlap(i)).collect()) - .collect(); - - for (ti, mut expect) in to_find.into_iter().zip(expects.into_iter()) { - let mut result = map.find_all_overlap(&ti); - expect.sort_unstable(); - result.sort_unstable(); - assert_eq!(expect.len(), result.len()); - for (e, r) in expect.into_iter().zip(result.into_iter()) { - assert_eq!(e, r.0); - } - } - }); + w_contains: bool, } - #[test] - fn iterate_through_map_is_sorted() { - with_map_and_generator(|mut map, mut gen| { - let mut intervals: Vec<_> = std::iter::repeat_with(|| gen.next_unique()) - .enumerate() - .take(1000) - .collect(); - for (v, i) in intervals.clone() { - let _ignore = map.insert(i, v); - } - intervals.sort_unstable_by(|a, b| a.1.cmp(&b.1)); - - for ((ei, ev), (v, i)) in map.iter().zip(intervals.iter()) { - assert_eq!(ei, i); - assert_eq!(ev, v); - } - }); + fn test_interval_tree_contains() { + let tests = [ + TestCaseC { + ivls: vec![Interval::new(1, 10)], + chk_ivl: Interval::new(0, 100), + + w_contains: false, + }, + TestCaseC { + ivls: vec![Interval::new(1, 10)], + chk_ivl: Interval::new(1, 10), + + w_contains: true, + }, + TestCaseC { + ivls: vec![Interval::new(1, 10)], + chk_ivl: Interval::new(2, 8), + + w_contains: true, + }, + TestCaseC { + ivls: vec![Interval::new(1, 5), Interval::new(6, 10)], + chk_ivl: Interval::new(1, 10), + + w_contains: false, + }, + TestCaseC { + ivls: vec![Interval::new(1, 5), Interval::new(3, 10)], + chk_ivl: Interval::new(1, 10), + + w_contains: true, + }, + TestCaseC { + ivls: vec![ + Interval::new(1, 4), + Interval::new(4, 7), + Interval::new(3, 10), + ], + chk_ivl: Interval::new(1, 10), + + w_contains: true, + }, + TestCaseC { + ivls: vec![], + chk_ivl: Interval::new(1, 10), + + w_contains: false, + }, + ]; + for (i, tt) in tests.iter().enumerate() { + let mut map = IntervalMap::new(); + tt.ivls.iter().for_each(|v| { + map.insert(v.clone(), ()); + }); + assert_eq!(map.contains(&tt.chk_ivl), tt.w_contains, "#{}: error", i); + } } + struct TestCaseA { + ivls: Vec>, + visit_range: Interval, + } #[test] - fn interval_map_clear_is_ok() { - let mut map = IntervalMap::new(); - map.insert(Interval::new(1, 3), 1); - map.insert(Interval::new(2, 4), 2); - map.insert(Interval::new(6, 7), 3); - assert_eq!(map.len(), 3); - map.clear(); - assert_eq!(map.len(), 0); - assert!(map.is_empty()); - assert_eq!(map.nodes.len(), 1); - assert!(map.nodes[0].is_sentinel()); + fn test_interval_tree_sorted_visit() { + let tests = [ + TestCaseA { + ivls: vec![ + Interval::new(1, 10), + Interval::new(2, 5), + Interval::new(3, 6), + ], + visit_range: Interval::new(0, 100), + }, + TestCaseA { + ivls: vec![ + Interval::new(1, 10), + Interval::new(10, 12), + Interval::new(3, 6), + ], + visit_range: Interval::new(0, 100), + }, + TestCaseA { + ivls: vec![ + Interval::new(2, 3), + Interval::new(3, 4), + Interval::new(6, 7), + Interval::new(5, 6), + ], + visit_range: Interval::new(0, 100), + }, + TestCaseA { + ivls: vec![ + Interval::new(2, 3), + Interval::new(2, 4), + Interval::new(3, 7), + Interval::new(2, 5), + Interval::new(3, 8), + Interval::new(3, 5), + ], + visit_range: Interval::new(0, 100), + }, + ]; + for (i, tt) in tests.iter().enumerate() { + let mut map = IntervalMap::new(); + tt.ivls.iter().for_each(|v| { + map.insert(v.clone(), ()); + }); + let mut last = tt.ivls[0].low; + let count = map + .iter() + .filter(|v| v.0.overlap(&tt.visit_range)) + .fold(0, |acc, v| { + assert!( + last <= v.0.low, + "#{}: expected less than {}, got interval {:?}", + i, + last, + v.0 + ); + last = v.0.low; + acc + 1 + }); + assert_eq!(count, tt.ivls.len(), "#{}: did not cover all intervals.", i); + } } } diff --git a/src/iter.rs b/src/iter.rs new file mode 100644 index 0000000..19b8915 --- /dev/null +++ b/src/iter.rs @@ -0,0 +1,248 @@ +use std::fmt::Debug; + +use crate::index::{IndexType, NodeIndex}; +use crate::interval::Interval; +use crate::intervalmap::IntervalMap; +use crate::node::Node; + +/// Pushes a link of nodes on the left to stack. +fn left_link(map_ref: &IntervalMap, mut x: NodeIndex) -> Vec> +where + T: Ord, + Ix: IndexType, +{ + let mut nodes = vec![]; + while !map_ref.node_ref(x, Node::is_sentinel) { + nodes.push(x); + x = map_ref.node_ref(x, Node::left); + } + nodes +} + +/// An iterator over the entries of a `IntervalMap`. +#[derive(Debug)] +pub struct Iter<'a, T, V, Ix> +where + T: Ord, +{ + /// Reference to the map + pub(crate) map_ref: &'a IntervalMap, + /// Stack for iteration + pub(crate) stack: Vec>, +} + +impl<'a, T, V, Ix> Iter<'a, T, V, Ix> +where + T: Ord, + Ix: IndexType, +{ + pub fn new(map_ref: &'a IntervalMap) -> Self { + Iter { + map_ref, + stack: left_link(map_ref, map_ref.root), + } + } +} + +impl<'a, T, V, Ix> Iterator for Iter<'a, T, V, Ix> +where + T: Ord, + Ix: IndexType, +{ + type Item = (&'a Interval, &'a V); + + #[inline] + fn next(&mut self) -> Option { + if self.stack.is_empty() { + return None; + } + let x = self.stack.pop().unwrap(); + self.stack.extend(left_link( + self.map_ref, + self.map_ref.node_ref(x, Node::right), + )); + Some(self.map_ref.node_ref(x, |xn| (xn.interval(), xn.value()))) + } +} + +/// An into iterator over the entries of a `IntervalMap`. +#[derive(Debug)] +pub struct IntoIter +where + T: Ord, +{ + interval_map: IntervalMap, + /// Stack for iteration + pub(crate) stack: Vec>, +} + +impl IntoIter +where + T: Ord, + Ix: IndexType, +{ + pub fn new(interval_map: IntervalMap) -> Self { + let mut temp = IntoIter { + interval_map, + stack: vec![], + }; + temp.stack = left_link(&temp.interval_map, temp.interval_map.root); + temp + } +} + +impl Iterator for IntoIter +where + T: Ord, + Ix: IndexType, +{ + type Item = (Interval, V); + + #[inline] + fn next(&mut self) -> Option { + if self.stack.is_empty() { + return None; + } + let x = self.stack.pop().unwrap(); + self.stack.extend(left_link( + &self.interval_map, + self.interval_map.node_ref(x, Node::right), + )); + let res = &mut self.interval_map.nodes[x.index()]; + Some((res.interval.take().unwrap(), res.value.take().unwrap())) + } +} + +/// An unsorted iterator over the entries of a `IntervalMap`. +#[derive(Debug)] +pub struct UnsortedIter<'a, T, V, Ix> +where + T: Ord, +{ + map_ref: &'a IntervalMap, + /// Stack for iteration + pub(crate) cur: NodeIndex, +} + +impl<'a, T, V, Ix> UnsortedIter<'a, T, V, Ix> +where + T: Ord, + Ix: IndexType, +{ + pub fn new(map_ref: &'a IntervalMap) -> Self { + UnsortedIter { + map_ref, + cur: NodeIndex::SENTINEL, + } + } +} + +impl<'a, T, V, Ix> Iterator for UnsortedIter<'a, T, V, Ix> +where + T: Ord, + Ix: IndexType, +{ + type Item = (&'a Interval, &'a V); + + #[inline] + fn next(&mut self) -> Option { + if self.map_ref.is_empty() + || self.cur.index() >= self.map_ref.len() + || self.cur.index() == ::max().index() + { + return None; + } + self.cur = self.cur.incre(); + Some( + self.map_ref + .node_ref(self.cur, |xn| (xn.interval(), xn.value())), + ) + } +} + +/// A filter iterator over the entries of a `IntervalMap`.It's equal to `iter().filter()` +/// but faster than the latter. +#[derive(Debug)] +pub struct FilterIter<'a, T, V, Ix> +where + T: Ord, +{ + /// Reference to the map + pub(crate) map_ref: &'a IntervalMap, + /// Stack for iteration + pub(crate) stack: Vec>, + /// Filter criteria + pub(crate) query: &'a Interval, +} + +fn left_link_with_query( + map_ref: &IntervalMap, + mut x: NodeIndex, + query: &Interval, +) -> Vec> +where + T: Ord, + Ix: IndexType, +{ + let mut stack: Vec> = vec![]; + if map_ref.max(x).is_some_and(|v| v <= &query.low) { + return stack; + } + while map_ref.node_ref(x, Node::sentinel).is_some() { + if map_ref.node_ref(x, Node::interval).low < query.high { + stack.push(x); + } + if map_ref.max(map_ref.node_ref(x, Node::left)) <= Some(&query.low) { + break; + } + x = map_ref.node_ref(x, Node::left); + } + stack +} + +impl<'a, T, V, Ix> FilterIter<'a, T, V, Ix> +where + T: Ord, + Ix: IndexType, +{ + pub fn new(map_ref: &'a IntervalMap, query: &'a Interval) -> Self { + FilterIter { + map_ref, + stack: left_link_with_query(map_ref, map_ref.root, query), + query, + } + } +} + +impl<'a, T, V, Ix> Iterator for FilterIter<'a, T, V, Ix> +where + T: Ord, + Ix: IndexType, +{ + type Item = (&'a Interval, &'a V); + + #[inline] + fn next(&mut self) -> Option { + if self.stack.is_empty() { + return None; + } + let mut x = self.stack.pop().unwrap(); + while !self.map_ref.node_ref(x, Node::interval).overlap(self.query) { + self.stack.extend(left_link_with_query( + self.map_ref, + self.map_ref.node_ref(x, Node::right), + self.query, + )); + if self.stack.is_empty() { + return None; + } + x = self.stack.pop().unwrap(); + } + self.stack.extend(left_link_with_query( + self.map_ref, + self.map_ref.node_ref(x, Node::right), + self.query, + )); + Some(self.map_ref.node_ref(x, |xn| (xn.interval(), xn.value()))) + } +} diff --git a/src/lib.rs b/src/lib.rs index 13e5141..b0235a9 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -25,8 +25,13 @@ mod entry; mod index; mod interval; mod intervalmap; +mod iter; mod node; +#[cfg(test)] +mod tests; + pub use entry::{Entry, OccupiedEntry, VacantEntry}; pub use interval::Interval; -pub use intervalmap::{IntervalMap, Iter}; +pub use intervalmap::IntervalMap; +pub use iter::Iter; diff --git a/src/node.rs b/src/node.rs index 63cbb03..38f1d51 100644 --- a/src/node.rs +++ b/src/node.rs @@ -1,10 +1,19 @@ +use crate::index::{IndexType, NodeIndex}; use crate::interval::Interval; -use crate::index::{IndexType, NodeIndex}; +#[cfg(feature = "graphviz")] +use std::fmt::Display; +#[cfg(feature = "serde")] +use serde::{Deserialize, Serialize}; + +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] /// Node of the interval tree #[derive(Debug)] -pub struct Node { +pub struct Node +where + T: Ord, +{ /// Left children pub left: Option>, /// Right children @@ -22,103 +31,261 @@ pub struct Node { pub value: Option, } -// Convenient getter/setter methods impl Node where - Ix: IndexType, + T: Ord, { - pub fn color(&self) -> Color { - self.color - } - pub fn interval(&self) -> &Interval { self.interval.as_ref().unwrap() } - pub fn max_index(&self) -> NodeIndex { + pub fn value(&self) -> &V { + self.value.as_ref().unwrap() + } + pub fn value_mut(&mut self) -> &mut V { + self.value.as_mut().unwrap() + } + pub fn take_value(&mut self) -> V { + self.value.take().unwrap() + } + + pub fn set_value(value: V) -> impl FnOnce(&mut Node) -> V { + move |node: &mut Node| node.value.replace(value).unwrap() + } +} + +// Convenient getter/setter methods +impl Node +where + T: Ord, + Ix: IndexType, +{ + pub(crate) fn max_index(&self) -> NodeIndex { self.max_index.unwrap() } - pub fn left(&self) -> NodeIndex { - self.left.unwrap() + pub(crate) fn set_max_index(max_index: NodeIndex) -> impl FnOnce(&mut Node) { + move |node: &mut Node| { + let _ignore = node.max_index.replace(max_index); + } } - pub fn right(&self) -> NodeIndex { - self.right.unwrap() + pub(crate) fn left(&self) -> NodeIndex { + self.left.unwrap() } - pub fn parent(&self) -> NodeIndex { - self.parent.unwrap() + pub(crate) fn set_left(left: NodeIndex) -> impl FnOnce(&mut Node) { + move |node: &mut Node| { + let _ignore = node.left.replace(left); + } } - pub fn is_sentinel(&self) -> bool { - self.interval.is_none() + pub(crate) fn right(&self) -> NodeIndex { + self.right.unwrap() } - pub fn sentinel(&self) -> Option<&Self> { - self.interval.is_some().then_some(self) + pub(crate) fn set_right(right: NodeIndex) -> impl FnOnce(&mut Node) { + move |node: &mut Node| { + let _ignore = node.right.replace(right); + } } - pub fn is_black(&self) -> bool { - matches!(self.color, Color::Black) + pub(crate) fn parent(&self) -> NodeIndex { + self.parent.unwrap() } - pub fn is_red(&self) -> bool { - matches!(self.color, Color::Red) + pub(crate) fn set_parent(parent: NodeIndex) -> impl FnOnce(&mut Node) { + move |node: &mut Node| { + let _ignore = node.parent.replace(parent); + } + } + pub(crate) fn is_sentinel(&self) -> bool { + self.interval.is_none() } - pub fn value(&self) -> &V { - self.value.as_ref().unwrap() + pub(crate) fn sentinel(&self) -> Option<&Self> { + self.interval.is_some().then_some(self) } - pub fn value_mut(&mut self) -> &mut V { - self.value.as_mut().unwrap() + pub(crate) fn color(&self) -> Color { + self.color } - pub fn take_value(&mut self) -> V { - self.value.take().unwrap() + pub(crate) fn is_black(&self) -> bool { + matches!(self.color, Color::Black) } - pub fn set_value(value: V) -> impl FnOnce(&mut Node) -> V { - move |node: &mut Node| node.value.replace(value).unwrap() + pub(crate) fn is_red(&self) -> bool { + matches!(self.color, Color::Red) } - pub fn set_color(color: Color) -> impl FnOnce(&mut Node) { + pub(crate) fn set_color(color: Color) -> impl FnOnce(&mut Node) { move |node: &mut Node| { node.color = color; } } +} - pub fn set_max_index(max_index: NodeIndex) -> impl FnOnce(&mut Node) { - move |node: &mut Node| { - let _ignore = node.max_index.replace(max_index); +#[cfg(feature = "graphviz")] +impl Node +where + T: Ord + Display, + V: Display, + Ix: IndexType, +{ + pub(crate) fn draw( + &self, + index: usize, + mut writer: W, + ) -> std::io::Result<()> { + writeln!( + writer, + " {} [label=\"i={}\\n{}: {}\\n\", fillcolor={}, style=filled]", + index, + index, + self.interval.as_ref().unwrap(), + self.value.as_ref().unwrap(), + if self.is_red() { "salmon" } else { "grey65" } + )?; + if !self.left.unwrap().is_sentinel() { + writeln!( + writer, + " {} -> {} [label=\"L\"]", + index, + self.left.unwrap().index() + )?; } + if !self.right.unwrap().is_sentinel() { + writeln!( + writer, + " {} -> {} [label=\"R\"]", + index, + self.right.unwrap().index() + )?; + } + Ok(()) } +} - pub fn set_left(left: NodeIndex) -> impl FnOnce(&mut Node) { - move |node: &mut Node| { - let _ignore = node.left.replace(left); +#[cfg(feature = "graphviz")] +impl Node +where + T: Display + Ord, + Ix: IndexType, +{ + pub(crate) fn draw_without_value( + &self, + index: usize, + mut writer: W, + ) -> std::io::Result<()> { + writeln!( + writer, + " {} [label=\"i={}: {}\", fillcolor={}, style=filled]", + index, + index, + self.interval.as_ref().unwrap(), + if self.is_red() { "salmon" } else { "grey65" } + )?; + if !self.left.unwrap().is_sentinel() { + writeln!( + writer, + " {} -> {} [label=\"L\"]", + index, + self.left.unwrap().index() + )?; + } + if !self.right.unwrap().is_sentinel() { + writeln!( + writer, + " {} -> {} [label=\"R\"]", + index, + self.right.unwrap().index() + )?; } + Ok(()) } +} - pub fn set_right(right: NodeIndex) -> impl FnOnce(&mut Node) { - move |node: &mut Node| { - let _ignore = node.right.replace(right); +impl Node +where + T: Ord, + Ix: IndexType, +{ + pub fn new(interval: Interval, value: V, index: NodeIndex) -> Self { + Node { + interval: Some(interval), + value: Some(value), + max_index: Some(index), + left: Some(NodeIndex::SENTINEL), + right: Some(NodeIndex::SENTINEL), + parent: Some(NodeIndex::SENTINEL), + color: Color::Red, } } - pub fn set_parent(parent: NodeIndex) -> impl FnOnce(&mut Node) { - move |node: &mut Node| { - let _ignore = node.parent.replace(parent); + pub fn new_sentinel() -> Self { + Node { + interval: None, + value: None, + max_index: None, + left: None, + right: None, + parent: None, + color: Color::Black, } } } /// The color of the node -#[derive(Debug, Clone, Copy)] -pub enum Color { +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub(crate) enum Color { /// Red node Red, /// Black node Black, } + +#[cfg(feature = "serde")] +#[cfg(test)] +mod tests { + use super::*; + use serde_json::{json, Value}; + + #[test] + fn test_node_serialize_deserialize() { + let node = Node:: { + left: Some(NodeIndex::new(0)), + right: Some(NodeIndex::new(1)), + parent: Some(NodeIndex::new(2)), + color: Color::Red, + interval: Some(Interval::new(10, 20)), + max_index: Some(NodeIndex::new(3)), + value: Some(42), + }; + + // Serialize the node to JSON + let serialized = serde_json::to_string(&node).unwrap(); + let expected = json!({ + "left": 0, + "right": 1, + "parent": 2, + "color": "Red", + "interval": [10,20], + "max_index": 3, + "value": 42 + }); + let actual: Value = serde_json::from_str(&serialized).unwrap(); + assert_eq!(expected, actual); + + // Deserialize the node from JSON + let deserialized: Node = serde_json::from_str(&serialized).unwrap(); + assert_eq!(node.left, deserialized.left); + assert_eq!(node.right, deserialized.right); + assert_eq!(node.parent, deserialized.parent); + assert_eq!(node.color, deserialized.color); + assert_eq!(node.interval, deserialized.interval); + assert_eq!(node.max_index, deserialized.max_index); + assert_eq!(node.value, deserialized.value); + } +} diff --git a/src/tests.rs b/src/tests.rs new file mode 100644 index 0000000..50e6013 --- /dev/null +++ b/src/tests.rs @@ -0,0 +1,451 @@ +use std::collections::HashSet; + +use index::NodeIndex; +use node::{Color, Node}; +use rand::{rngs::StdRng, Rng, SeedableRng}; + +use super::*; + +struct IntervalGenerator { + rng: StdRng, + unique: HashSet>, + limit: i32, +} + +impl IntervalGenerator { + fn new(seed: [u8; 32]) -> Self { + const LIMIT: i32 = 1000; + Self { + rng: SeedableRng::from_seed(seed), + unique: HashSet::new(), + limit: LIMIT, + } + } + + fn next(&mut self) -> Interval { + let low = self.rng.gen_range(0..self.limit - 1); + let high = self.rng.gen_range((low + 1)..self.limit); + Interval::new(low, high) + } + + fn next_unique(&mut self) -> Interval { + let mut interval = self.next(); + while self.unique.contains(&interval) { + interval = self.next(); + } + self.unique.insert(interval.clone()); + interval + } + + fn next_with_range(&mut self, range: i32) -> Interval { + let low = self.rng.gen_range(0..self.limit - 1); + let high = self + .rng + .gen_range((low + 1)..self.limit.min(low + 1 + range)); + Interval::new(low, high) + } +} + +impl IntervalMap { + fn check_max(&self) { + let _ignore = self.check_max_inner(self.root); + } + + fn check_max_inner(&self, x: NodeIndex) -> i32 { + if self.node_ref(x, Node::is_sentinel) { + return 0; + } + let l_max = self.check_max_inner(self.node_ref(x, Node::left)); + let r_max = self.check_max_inner(self.node_ref(x, Node::right)); + let max = self.node_ref(x, |x| x.interval().high.max(l_max).max(r_max)); + assert_eq!(self.max(x), Some(&max)); + max + } + + /// 1. Every node is either red or black. + /// 2. The root is black. + /// 3. Every leaf (NIL) is black. + /// 4. If a node is red, then both its children are black. + /// 5. For each node, all simple paths from the node to descendant leaves contain the + /// same number of black nodes. + fn check_rb_properties(&self) { + assert!(matches!( + self.node_ref(self.root, Node::color), + Color::Black + )); + self.check_children_color(self.root); + self.check_black_height(self.root); + } + + fn check_children_color(&self, x: NodeIndex) { + if self.node_ref(x, Node::is_sentinel) { + return; + } + self.check_children_color(self.node_ref(x, Node::left)); + self.check_children_color(self.node_ref(x, Node::right)); + if self.node_ref(x, Node::is_red) { + assert!(matches!(self.left_ref(x, Node::color), Color::Black)); + assert!(matches!(self.right_ref(x, Node::color), Color::Black)); + } + } + + fn check_black_height(&self, x: NodeIndex) -> usize { + if self.node_ref(x, Node::is_sentinel) { + return 0; + } + let lefth = self.check_black_height(self.node_ref(x, Node::left)); + let righth = self.check_black_height(self.node_ref(x, Node::right)); + assert_eq!(lefth, righth); + if self.node_ref(x, Node::is_black) { + return lefth + 1; + } + lefth + } +} + +fn with_map_and_generator(test_fn: impl Fn(IntervalMap, IntervalGenerator)) { + let seeds = vec![[0; 32], [1; 32], [2; 32]]; + for seed in seeds { + let gen = IntervalGenerator::new(seed); + let map = IntervalMap::new(); + test_fn(map, gen); + } +} + +#[test] +fn red_black_tree_properties_is_satisfied() { + with_map_and_generator(|mut map, mut gen| { + let intervals: Vec<_> = std::iter::repeat_with(|| gen.next_unique()) + .take(1000) + .collect(); + for i in intervals.clone() { + let _ignore = map.insert(i, ()); + } + map.check_rb_properties(); + }); +} + +#[test] +fn map_len_will_update() { + with_map_and_generator(|mut map, mut gen| { + let intervals: Vec<_> = std::iter::repeat_with(|| gen.next_unique()) + .take(100) + .collect(); + for i in intervals.clone() { + let _ignore = map.insert(i, ()); + } + assert_eq!(map.len(), 100); + for i in intervals { + let _ignore = map.remove(&i); + } + assert_eq!(map.len(), 0); + }); +} + +#[test] +fn check_overlap_is_ok() { + with_map_and_generator(|mut map, mut gen| { + let intervals: Vec<_> = std::iter::repeat_with(|| gen.next_with_range(10)) + .take(100) + .collect(); + for i in intervals.clone() { + let _ignore = map.insert(i, ()); + } + let to_check: Vec<_> = std::iter::repeat_with(|| gen.next_with_range(10)) + .take(1000) + .collect(); + let expects: Vec<_> = to_check + .iter() + .map(|ci| intervals.iter().any(|i| ci.overlap(i))) + .collect(); + + for (ci, expect) in to_check.into_iter().zip(expects.into_iter()) { + assert_eq!(map.overlaps(&ci), expect); + } + }); +} + +#[test] +fn check_max_is_ok() { + with_map_and_generator(|mut map, mut gen| { + let intervals: Vec<_> = std::iter::repeat_with(|| gen.next_unique()) + .take(1000) + .collect(); + for i in intervals.clone() { + let _ignore = map.insert(i, ()); + map.check_max(); + } + assert_eq!(map.len(), 1000); + for i in intervals { + let _ignore = map.remove(&i); + map.check_max(); + } + }); +} + +#[test] +fn remove_non_exist_interval_will_do_nothing() { + with_map_and_generator(|mut map, mut gen| { + let intervals: Vec<_> = std::iter::repeat_with(|| gen.next_unique()) + .take(1000) + .collect(); + for i in intervals { + let _ignore = map.insert(i, ()); + } + assert_eq!(map.len(), 1000); + let to_remove: Vec<_> = std::iter::repeat_with(|| gen.next_unique()) + .take(1000) + .collect(); + for i in to_remove { + let _ignore = map.remove(&i); + } + assert_eq!(map.len(), 1000); + }); +} + +#[test] +fn find_all_overlap_is_ok() { + with_map_and_generator(|mut map, mut gen| { + let intervals: Vec<_> = std::iter::repeat_with(|| gen.next_unique()) + .take(1000) + .collect(); + for i in intervals.clone() { + let _ignore = map.insert(i, ()); + } + let to_find: Vec<_> = std::iter::repeat_with(|| gen.next()).take(1000).collect(); + + let expects: Vec> = to_find + .iter() + .map(|ti| intervals.iter().filter(|i| ti.overlap(i)).collect()) + .collect(); + + for (ti, mut expect) in to_find.into_iter().zip(expects.into_iter()) { + let mut result = map.find_all_overlap(&ti); + expect.sort_unstable(); + result.sort_unstable(); + assert_eq!(expect.len(), result.len()); + for (e, r) in expect.into_iter().zip(result.into_iter()) { + assert_eq!(e, r.0); + } + } + }); +} + +#[test] +fn iterate_through_map_is_sorted() { + with_map_and_generator(|mut map, mut gen| { + let mut intervals: Vec<_> = std::iter::repeat_with(|| gen.next_unique()) + .enumerate() + .take(1000) + .collect(); + for (v, i) in intervals.clone() { + let _ignore = map.insert(i, v); + } + intervals.sort_unstable_by(|a, b| a.1.cmp(&b.1)); + + for ((ei, ev), (v, i)) in map.iter().zip(intervals.iter()) { + assert_eq!(ei, i); + assert_eq!(ev, v); + } + }); +} + +#[test] +fn interval_map_clear_is_ok() { + let mut map = IntervalMap::new(); + map.insert(Interval::new(1, 3), 1); + map.insert(Interval::new(2, 4), 2); + map.insert(Interval::new(6, 7), 3); + assert_eq!(map.len(), 3); + map.clear(); + assert_eq!(map.len(), 0); + assert!(map.is_empty()); + assert_eq!(map.nodes.len(), 1); + assert!(map.nodes[0].is_sentinel()); +} + +#[cfg(test)] +struct TestCaseFilterIter { + query: Interval, + expected: Vec>, +} + +#[test] +fn interval_map_filter_iter_is_ok() { + let tests = [ + TestCaseFilterIter { + query: Interval::new(50, 51), + expected: vec![Interval::new(6, 99)], + }, + TestCaseFilterIter { + query: Interval::new(23, 26), + expected: vec![Interval::new(6, 99), Interval::new(25, 30)], + }, + TestCaseFilterIter { + query: Interval::new(23, 30), + expected: vec![ + Interval::new(6, 99), + Interval::new(25, 30), + Interval::new(26, 27), + ], + }, + TestCaseFilterIter { + query: Interval::new(6, 17), + expected: vec![ + Interval::new(0, 23), + Interval::new(6, 99), + Interval::new(8, 9), + Interval::new(15, 23), + Interval::new(16, 21), + ], + }, + ]; + + let mut map = IntervalMap::new(); + map.insert(Interval::new(16, 21), 30); + map.insert(Interval::new(8, 9), 23); + map.insert(Interval::new(0, 23), 3); + map.insert(Interval::new(5, 6), 10); + map.insert(Interval::new(6, 99), 10); + map.insert(Interval::new(15, 23), 23); + map.insert(Interval::new(17, 19), 20); + map.insert(Interval::new(25, 30), 30); + map.insert(Interval::new(26, 27), 26); + map.insert(Interval::new(19, 20), 20); + + for (i, tt) in tests.iter().enumerate() { + let v: Vec<_> = map.filter_iter(&tt.query).map(|v| v.0.clone()).collect(); + assert_eq!(v, tt.expected, "#{}: error", i); + } +} + +#[cfg(feature = "graphviz")] +#[test] +fn interval_map_draw_is_ok() { + let mut map = IntervalMap::new(); + map.insert(Interval::new(16, 21), 30); + map.insert(Interval::new(8, 9), 23); + map.insert(Interval::new(0, 23), 3); + map.insert(Interval::new(5, 6), 10); + map.insert(Interval::new(6, 99), 10); + map.insert(Interval::new(15, 23), 23); + map.insert(Interval::new(17, 19), 20); + map.insert(Interval::new(25, 30), 30); + map.insert(Interval::new(26, 27), 26); + map.insert(Interval::new(19, 20), 20); + + let _ = map.draw("./test.dot"); + + let _ = map.draw_without_value("./test.dot"); +} + +#[cfg(feature = "serde")] +#[test] +fn test_serde_interval_map() { + use serde_json::{json, Value}; + + let mut interval_map = IntervalMap::::new(); + interval_map.insert(Interval::new(1, 5), 10); + interval_map.insert(Interval::new(3, 7), 20); + interval_map.insert(Interval::new(2, 6), 15); + + // Serialize the interval map to JSON + let serialized = serde_json::to_string(&interval_map).unwrap(); + let expected = json!({ + "nodes": [ + // sentinel node + { + "left": null, + "right": null, + "parent": null, + "color": "Black", + "interval": null, + "max_index": null, + "value": null + }, + { + "left": 0, + "right": 0, + "parent": 3, + "color": "Red", + "interval": [1,5], + "max_index": 1, + "value": 10 + }, + { + "left": 0, + "right": 0, + "parent": 3, + "color": "Red", + "interval": [3,7], + "max_index": 2, + "value": 20 + }, + { + "left": 1, + "right": 2, + "parent": 0, + "color": "Black", + "interval": [2,6], + "max_index": 2, + "value": 15 + } + ], + "root": 3, + "len": 3 + }); + let actual: Value = serde_json::from_str(&serialized).unwrap(); + assert_eq!(expected, actual); + + // Deserialize the interval map from JSON + let deserialized: IntervalMap = serde_json::from_str(&serialized).unwrap(); + let dv: Vec<_> = deserialized.iter().collect(); + let ev: Vec<_> = interval_map.iter().collect(); + + assert_eq!(ev, dv); +} + +impl Interval { + fn new_point(x: u32) -> Self { + Interval { + low: x, + high: x + 1, + } + } +} + +#[test] +fn test_insert_point() { + let mut interval_map = IntervalMap::::new(); + interval_map.insert(Interval::new_point(5), 10); + interval_map.insert(Interval::new(3, 7), 20); + interval_map.insert(Interval::new(2, 6), 15); + + assert_eq!(interval_map.get(&Interval::new_point(5)).unwrap(), &10); + assert_eq!( + interval_map.find_all_overlap(&Interval::new_point(5)).len(), + 3 + ); +} + +#[test] +fn check_filter_iter_equal_to_iter_filter() { + with_map_and_generator(|mut map, mut gen| { + let intervals: Vec<_> = std::iter::repeat_with(|| gen.next_unique()) + .take(1000) + .collect(); + for i in intervals.clone() { + let _ignore = map.insert(i, ()); + } + let mut map = IntervalMap::new(); + for i in intervals.clone() { + map.insert(i, ()); + } + + for i in intervals { + let filter_iter_res: Vec<_> = map.filter_iter(&i).collect(); + let iter_filter_res: Vec<_> = map.iter().filter(|v| v.0.overlap(&i)).collect(); + assert_eq!(filter_iter_res, iter_filter_res); + } + }); +}