From 3b146c0a296131f893e77183e14ffd13a94395bb Mon Sep 17 00:00:00 2001 From: "James C. Wise" Date: Tue, 19 Nov 2024 21:19:13 -0500 Subject: [PATCH] Many things. You know how my commits go --- Cargo.lock | 448 +++++++++++++++++----- Cargo.toml | 14 +- README.md | 45 ++- benchmarking/benchmark.sh | 2 + default-config.json | 247 +++++++----- src/glue/caching.rs | 11 +- src/lib.rs | 9 +- src/main.rs | 15 +- src/types/config.rs | 8 +- src/types/jobs/job_config.rs | 44 ++- src/types/jobs/job_scratchpad.rs | 2 - src/types/jobs/jobs.rs | 22 +- src/types/rules.rs | 64 ++-- src/types/rules/conditions.rs | 38 +- src/types/rules/mappers.rs | 39 +- src/types/string_matcher.rs | 4 +- src/types/string_modification.rs | 628 ++++++++++++++++++++++--------- src/types/string_source.rs | 142 +++---- src/types/url_part.rs | 8 +- src/util.rs | 10 +- src/util/macros.rs | 15 + 21 files changed, 1234 insertions(+), 581 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d4a33c3..6999622 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -28,9 +28,9 @@ dependencies = [ [[package]] name = "anstream" -version = "0.6.17" +version = "0.6.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "23a1e53f0f5d86382dafe1cf314783b2044280f406e7e1506368220ad11b1338" +checksum = "8acc5369981196006228e28809f761875c0327210a891e941f4c683b3a99529b" dependencies = [ "anstyle", "anstyle-parse", @@ -128,9 +128,9 @@ checksum = "9ac0150caa2ae65ca5bd83f25c7de183dea78d4d366469f148435e2acfbad0da" [[package]] name = "cc" -version = "1.1.34" +version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "67b9470d453346108f93a59222a9a1a5724db32d0a4727b7ab7ace4b4d822dc9" +checksum = "fd9de9f2205d5ef3fd67e685b0df337994ddd4495e2a28d185500d0e1edfea47" dependencies = [ "shlex", ] @@ -143,9 +143,9 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "clap" -version = "4.5.20" +version = "4.5.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b97f376d85a664d5837dbae44bf546e6477a679ff6610010f17276f686d867e8" +checksum = "fb3b4b9e5a7c7514dfa52869339ee98b3156b0bfb4e8a77c4ff4babb64b1604f" dependencies = [ "clap_builder", "clap_derive", @@ -153,9 +153,9 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.5.20" +version = "4.5.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "19bc80abd44e4bed93ca373a0704ccbd1b710dc5749406201bb018272808dc54" +checksum = "b17a95aa67cc7b5ebd32aa5370189aa0d79069ef1c64ce893bd30fb24bff20ec" dependencies = [ "anstream", "anstyle", @@ -177,9 +177,9 @@ dependencies = [ [[package]] name = "clap_lex" -version = "0.7.2" +version = "0.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1462739cb27611015575c0c11df5df7601141071f07518d56fcc1be504cbec97" +checksum = "afb84c814227b90d6895e01398aee0d8033c00e7466aca416fb6a8e0eb19d8a7" [[package]] name = "colorchoice" @@ -206,12 +206,13 @@ dependencies = [ [[package]] name = "cookie_store" -version = "0.21.0" +version = "0.21.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4934e6b7e8419148b6ef56950d277af8561060b56afd59e2aadf98b59fce6baa" +checksum = "2eac901828f88a5241ee0600950ab981148a18f2f756900ffba1b125ca6a3ef9" dependencies = [ "cookie", - "idna 0.5.0", + "document-features", + "idna", "log", "publicsuffix", "serde", @@ -314,6 +315,26 @@ dependencies = [ "syn", ] +[[package]] +name = "displaydoc" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "document-features" +version = "0.2.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb6969eaabd2421f8a2775cfd2471a2b634372b4a25d41e3bd647b79912850a0" +dependencies = [ + "litrs", +] + [[package]] name = "dsl_auto_type" version = "0.1.2" @@ -361,9 +382,9 @@ dependencies = [ [[package]] name = "fastrand" -version = "2.1.1" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e8c02a5121d4ea3eb16a80748c74f5549a5665e4c21333c6098f283870fbdea6" +checksum = "486f806e73c5707928240ddc295403b1b93c96a02038563881c4a2fd84b81ac4" [[package]] name = "fnv" @@ -470,9 +491,9 @@ checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" [[package]] name = "h2" -version = "0.4.6" +version = "0.4.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "524e8ac6999421f49a846c2d4411f337e53497d8ec55d67753beffa43c5d9205" +checksum = "ccae279728d634d083c00f6099cb58f01cc99c145b84b8be2f6c74618d79922e" dependencies = [ "atomic-waker", "bytes", @@ -489,9 +510,9 @@ dependencies = [ [[package]] name = "hashbrown" -version = "0.15.0" +version = "0.15.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e087f84d4f86bf4b218b927129862374b72199ae7d8657835f1e89000eea4fb" +checksum = "3a9bfc1af68b1726ea47d3d5109de126281def866b33970e10fbab11b5dafab3" [[package]] name = "heck" @@ -556,9 +577,9 @@ checksum = "7d71d3574edd2771538b901e6549113b4006ece66150fb69c0fb6d9a2adae946" [[package]] name = "hyper" -version = "1.5.0" +version = "1.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bbbff0a806a4728c99295b254c8838933b5b082d75e3cb70c8dab21fdfbcfa9a" +checksum = "97818827ef4f364230e16705d4706e2897df2bb60617d6ca15d598025a3c481f" dependencies = [ "bytes", "futures-channel", @@ -626,6 +647,124 @@ dependencies = [ "tracing", ] +[[package]] +name = "icu_collections" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db2fa452206ebee18c4b5c2274dbf1de17008e874b4dc4f0aea9d01ca79e4526" +dependencies = [ + "displaydoc", + "yoke", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_locid" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13acbb8371917fc971be86fc8057c41a64b521c184808a698c02acc242dbf637" +dependencies = [ + "displaydoc", + "litemap", + "tinystr", + "writeable", + "zerovec", +] + +[[package]] +name = "icu_locid_transform" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01d11ac35de8e40fdeda00d9e1e9d92525f3f9d887cdd7aa81d727596788b54e" +dependencies = [ + "displaydoc", + "icu_locid", + "icu_locid_transform_data", + "icu_provider", + "tinystr", + "zerovec", +] + +[[package]] +name = "icu_locid_transform_data" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fdc8ff3388f852bede6b579ad4e978ab004f139284d7b28715f773507b946f6e" + +[[package]] +name = "icu_normalizer" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19ce3e0da2ec68599d193c93d088142efd7f9c5d6fc9b803774855747dc6a84f" +dependencies = [ + "displaydoc", + "icu_collections", + "icu_normalizer_data", + "icu_properties", + "icu_provider", + "smallvec", + "utf16_iter", + "utf8_iter", + "write16", + "zerovec", +] + +[[package]] +name = "icu_normalizer_data" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8cafbf7aa791e9b22bec55a167906f9e1215fd475cd22adfcf660e03e989516" + +[[package]] +name = "icu_properties" +version = "1.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93d6020766cfc6302c15dbbc9c8778c37e62c14427cb7f6e601d849e092aeef5" +dependencies = [ + "displaydoc", + "icu_collections", + "icu_locid_transform", + "icu_properties_data", + "icu_provider", + "tinystr", + "zerovec", +] + +[[package]] +name = "icu_properties_data" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67a8effbc3dd3e4ba1afa8ad918d5684b8868b3b26500753effea8d2eed19569" + +[[package]] +name = "icu_provider" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ed421c8a8ef78d3e2dbc98a973be2f3770cb42b606e3ab18d6237c4dfde68d9" +dependencies = [ + "displaydoc", + "icu_locid", + "icu_provider_macros", + "stable_deref_trait", + "tinystr", + "writeable", + "yoke", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_provider_macros" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ec89e9337638ecdc08744df490b221a7399bf8d164eb52a665454e60e075ad6" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "ident_case" version = "1.0.1" @@ -634,22 +773,23 @@ checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" [[package]] name = "idna" -version = "0.3.0" +version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e14ddfc70884202db2244c223200c204c2bda1bc6e0998d11b5e024d657209e6" +checksum = "686f825264d630750a544639377bae737628043f20d38bbc029e8f29ea968a7e" dependencies = [ - "unicode-bidi", - "unicode-normalization", + "idna_adapter", + "smallvec", + "utf8_iter", ] [[package]] -name = "idna" -version = "0.5.0" +name = "idna_adapter" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "634d9b1461af396cad843f47fdba5597a4f9e6ddd4bfb6ff5d85028c25cb12f6" +checksum = "daca1df1c957320b2cf139ac61e7bd64fed304c5040df000a745aa1de3b4ef71" dependencies = [ - "unicode-bidi", - "unicode-normalization", + "icu_normalizer", + "icu_properties", ] [[package]] @@ -691,9 +831,9 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.161" +version = "0.2.164" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e9489c2807c139ffd9c1794f4af0ebe86a828db53ecdc7fea2111d0fed085d1" +checksum = "433bfe06b8c75da9b2e3fbea6e5329ff87748f0b144ef75306e674c3f6f7c13f" [[package]] name = "libsqlite3-sys" @@ -711,6 +851,18 @@ version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89" +[[package]] +name = "litemap" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "643cb0b8d4fcc284004d5fd0d67ccf61dfffadb7f75e1e71bc420f4688a3a704" + +[[package]] +name = "litrs" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4ce301924b7887e9d637144fdade93f9dfff9b60981d4ac161db09720d39aa5" + [[package]] name = "log" version = "0.4.22" @@ -873,9 +1025,9 @@ dependencies = [ [[package]] name = "psl" -version = "2.1.55" +version = "2.1.57" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ce9398ad066421139b2e3afe16ea46772ffda30bd9ba57554dc035df5e26edc8" +checksum = "6d8aece545a59dcac10117b6474ef22c32db103b4bf255b7d8e42ae7156a118e" dependencies = [ "psl-types", ] @@ -888,11 +1040,11 @@ checksum = "33cb294fe86a74cbcf50d4445b37da762029549ebeea341421c7c70370f86cac" [[package]] name = "publicsuffix" -version = "2.2.3" +version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "96a8c1bda5ae1af7f99a2962e49df150414a43d62404644d98dd5c3a93d07457" +checksum = "6f42ea446cab60335f76979ec15e12619a2165b5ae2c12166bef27d283a9fadf" dependencies = [ - "idna 0.3.0", + "idna", "psl-types", ] @@ -919,9 +1071,9 @@ dependencies = [ [[package]] name = "regex-automata" -version = "0.4.8" +version = "0.4.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "368758f23274712b504848e9d5a6f010445cc8b87a7cdb4d7cbee666c1288da3" +checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908" dependencies = [ "aho-corasick", "memchr", @@ -1004,9 +1156,9 @@ checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f" [[package]] name = "rustix" -version = "0.38.38" +version = "0.38.41" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aa260229e6538e52293eeb577aabd09945a09d6d9cc0fc550ed7529056c2e32a" +checksum = "d7f649912bc1495e167a6edee79151c84b1bad49748cb4f1f1167f459f6224f6" dependencies = [ "bitflags", "errno", @@ -1017,9 +1169,9 @@ dependencies = [ [[package]] name = "rustls" -version = "0.23.16" +version = "0.23.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eee87ff5d9b36712a58574e12e9f0ea80f915a5b0ac518d322b24a465617925e" +checksum = "7f1a745511c54ba6d4465e8d5dfbd81b45791756de28d4981af70d6dca128f1e" dependencies = [ "once_cell", "rustls-pki-types", @@ -1062,9 +1214,9 @@ checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f" [[package]] name = "schannel" -version = "0.1.26" +version = "0.1.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "01227be5826fa0690321a2ba6c5cd57a19cf3f6a09e76973b58e61de6ab9d1c1" +checksum = "1f29ebaa345f945cec9fbbc532eb307f0fdad8161f281b6369539c8d84876b3d" dependencies = [ "windows-sys 0.59.0", ] @@ -1084,9 +1236,9 @@ dependencies = [ [[package]] name = "security-framework-sys" -version = "2.12.0" +version = "2.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ea4a292869320c0272d7bc55a5a6aafaff59b4f63404a003887b679a2e05b4b6" +checksum = "fa39c7303dc58b5543c94d22c1766b0d31f2ee58306363ea622b10bbc075eaa2" dependencies = [ "core-foundation-sys", "libc", @@ -1094,18 +1246,18 @@ dependencies = [ [[package]] name = "serde" -version = "1.0.214" +version = "1.0.215" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f55c3193aca71c12ad7890f1785d2b73e1b9f63a0bbc353c08ef26fe03fc56b5" +checksum = "6513c1ad0b11a9376da888e3e0baa0077f1aed55c17f50e7b2397136129fb88f" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.214" +version = "1.0.215" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "de523f781f095e28fa605cdce0f8307e451cc0fd14e2eb4cd2e98a355b147766" +checksum = "ad1e866f866923f252f05c889987993144fb74e722403468a4ebd70c3cd756c0" dependencies = [ "proc-macro2", "quote", @@ -1114,9 +1266,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.132" +version = "1.0.133" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d726bfaff4b320266d395898905d0eba0345aae23b54aee3a737e260fd46db03" +checksum = "c7fceb2473b9166b2294ef05efcb65a3db80803f0b03ef86a5fc88a2b85ee377" dependencies = [ "itoa", "memchr", @@ -1173,6 +1325,12 @@ version = "0.9.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" +[[package]] +name = "stable_deref_trait" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" + [[package]] name = "strsim" version = "0.11.1" @@ -1205,6 +1363,17 @@ dependencies = [ "futures-core", ] +[[package]] +name = "synstructure" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8af7666ab7b6390ab78131fb5b0fce11d6b7a6951602017c35fa82800708971" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "system-configuration" version = "0.6.1" @@ -1228,9 +1397,9 @@ dependencies = [ [[package]] name = "tempfile" -version = "3.13.0" +version = "3.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f0f2c9fc62d0beef6951ccffd757e241266a2c833136efbe35af6cd2567dca5b" +checksum = "28cce251fcbc87fac86a866eeb0d6c2d536fc16d06f184bb61aeae11aa4cee0c" dependencies = [ "cfg-if", "fastrand", @@ -1241,18 +1410,38 @@ dependencies = [ [[package]] name = "thiserror" -version = "1.0.66" +version = "1.0.69" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d171f59dbaa811dbbb1aee1e73db92ec2b122911a48e1390dfe327a821ddede" +checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" dependencies = [ - "thiserror-impl", + "thiserror-impl 1.0.69", +] + +[[package]] +name = "thiserror" +version = "2.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c006c85c7651b3cf2ada4584faa36773bd07bac24acfb39f3c431b36d7e667aa" +dependencies = [ + "thiserror-impl 2.0.3", ] [[package]] name = "thiserror-impl" -version = "1.0.66" +version = "1.0.69" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b08be0f17bd307950653ce45db00cd31200d82b624b36e181337d9c7d92765b5" +checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "thiserror-impl" +version = "2.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f077553d607adc1caf65430528a576c757a71ed73944b66ebb58ef2bbd243568" dependencies = [ "proc-macro2", "quote", @@ -1291,25 +1480,20 @@ dependencies = [ ] [[package]] -name = "tinyvec" -version = "1.8.0" +name = "tinystr" +version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "445e881f4f6d382d5f27c034e25eb92edd7c784ceab92a0937db7f2e9471b938" +checksum = "9117f5d4db391c1cf6927e7bea3db74b9a1c1add8f7eda9ffd5364f40f57b82f" dependencies = [ - "tinyvec_macros", + "displaydoc", + "zerovec", ] -[[package]] -name = "tinyvec_macros" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" - [[package]] name = "tokio" -version = "1.41.0" +version = "1.41.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "145f3413504347a2be84393cc8a7d2fb4d863b375909ea59f2158261aa258bbb" +checksum = "22cfb5bee7a6a52939ca9224d6ac897bb669134078daa8735560897f69de4d33" dependencies = [ "backtrace", "bytes", @@ -1349,7 +1533,7 @@ checksum = "0d4770b8024672c1101b3f6733eab95b18007dbe0847a8afe341fcf79e06043f" dependencies = [ "either", "futures-util", - "thiserror", + "thiserror 1.0.69", "tokio", ] @@ -1397,27 +1581,12 @@ version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" -[[package]] -name = "unicode-bidi" -version = "0.3.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ab17db44d7388991a428b2ee655ce0c212e862eff1768a455c58f9aad6e7893" - [[package]] name = "unicode-ident" version = "1.0.13" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e91b56cd4cadaeb79bbf1a5645f6b4f8dc5bde8834ad5894a8db35fda9efa1fe" -[[package]] -name = "unicode-normalization" -version = "0.1.24" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5033c97c4262335cded6d6fc3e5c18ab755e1a3dc96376350f3d8e9f009ad956" -dependencies = [ - "tinyvec", -] - [[package]] name = "untrusted" version = "0.9.0" @@ -1426,12 +1595,12 @@ checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" [[package]] name = "url" -version = "2.5.2" +version = "2.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22784dbdf76fdde8af1aeda5622b546b422b6fc585325248a2bf9f5e41e94d6c" +checksum = "8d157f1b96d14500ffdc1f10ba712e780825526c03d9a49b4d0324b0d9113ada" dependencies = [ "form_urlencoded", - "idna 0.5.0", + "idna", "percent-encoding", "serde", ] @@ -1453,11 +1622,23 @@ dependencies = [ "reqwest", "serde", "serde_json", - "thiserror", + "thiserror 2.0.3", "url", "which", ] +[[package]] +name = "utf16_iter" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8232dd3cdaed5356e0f716d285e4b40b932ac434100fe9b7e0e8e935b9e6246" + +[[package]] +name = "utf8_iter" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" + [[package]] name = "utf8parse" version = "0.2.2" @@ -1570,9 +1751,9 @@ dependencies = [ [[package]] name = "which" -version = "6.0.3" +version = "7.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b4ee928febd44d98f2f459a4a79bd4d928591333a494a10a868418ac1b39cf1f" +checksum = "c9cad3279ade7346b96e38731a641d7343dd6a53d55083dd54eadfa5a1b38c6b" dependencies = [ "either", "home", @@ -1698,8 +1879,87 @@ version = "0.0.19" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d135d17ab770252ad95e9a872d365cf3090e3be864a34ab46f48555993efc904" +[[package]] +name = "write16" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d1890f4022759daae28ed4fe62859b1236caebfc61ede2f63ed4e695f3f6d936" + +[[package]] +name = "writeable" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e9df38ee2d2c3c5948ea468a8406ff0db0b29ae1ffde1bcf20ef305bcc95c51" + +[[package]] +name = "yoke" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c5b1314b079b0930c31e3af543d8ee1757b1951ae1e1565ec704403a7240ca5" +dependencies = [ + "serde", + "stable_deref_trait", + "yoke-derive", + "zerofrom", +] + +[[package]] +name = "yoke-derive" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "28cc31741b18cb6f1d5ff12f5b7523e3d6eb0852bbbad19d73905511d9849b95" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "synstructure", +] + +[[package]] +name = "zerofrom" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91ec111ce797d0e0784a1116d0ddcdbea84322cd79e5d5ad173daeba4f93ab55" +dependencies = [ + "zerofrom-derive", +] + +[[package]] +name = "zerofrom-derive" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ea7b4a3637ea8669cedf0f1fd5c286a17f3de97b8dd5a70a6c167a1730e63a5" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "synstructure", +] + [[package]] name = "zeroize" version = "1.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ced3678a2879b30306d323f4542626697a464a97c0a07c9aebf7ebca65cd4dde" + +[[package]] +name = "zerovec" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa2b893d79df23bfb12d5461018d408ea19dfafe76c2c7ef6d4eba614f8ff079" +dependencies = [ + "yoke", + "zerofrom", + "zerovec-derive", +] + +[[package]] +name = "zerovec-derive" +version = "0.10.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6eafa6dfb17584ea3e2bd6e76e0cc15ad7af12b09abdd1ca55961bed9b1063c6" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] diff --git a/Cargo.toml b/Cargo.toml index f1030b2..355a4ca 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,20 +11,20 @@ license = "AGPL-3.0-or-later" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -clap = { version = "4.5.20", features = ["derive", "unstable-v5"] } -serde = { version = "1.0.214", features = ["derive"] } -serde_json = "1.0.132" -url = { version = "2.5.2", features = ["serde"] } +clap = { version = "4.5.21", features = ["derive", "unstable-v5"] } +serde = { version = "1.0.215", features = ["derive"] } +serde_json = "1.0.133" +url = { version = "2.5.3", features = ["serde"] } reqwest = { version = "0.12.9", features = ["blocking", "socks"], optional = true } const-str = { version = "0.5.7", optional = true } -thiserror = "1.0.66" +thiserror = "2.0.3" regex = { version = "1.11.1", optional = true } glob = { version = "0.3.1", optional = true } -psl = "2.1.55" +psl = "2.1.57" form_urlencoded = "1.2.1" regex-syntax = { version = "0.8.5", optional = true } percent-encoding = "2.3.1" -which = { version = "6.0.3", optional = true } +which = { version = "7.0.0", optional = true } base64 = { version = "0.22.1", optional = true } diesel = { version = "2.2.4", features = ["sqlite", "returning_clauses_for_sqlite_3_35"], optional = true } diff --git a/README.md b/README.md index 33af1ed..9f60f90 100644 --- a/README.md +++ b/README.md @@ -134,7 +134,50 @@ Currently only one list is included in the default config: Currently there is no command line syntax for them. There really should be. -#### Citations +#### But how fast is it? + +Reasonably fast. [`benchmarking/benchmark.sh`] is a Bash script that runs some hyperfine and valgrind benchmarking so I can reliably check for regressions. + +On a mostly stock lenovo thinkpad T460S (Intel i5-6300U (4) @ 3.000GHz) running Kubuntu 24.10 (kernel 6.11.0) that has "not much" going on (FireFox, Steam, etc. are closed), hyperfine gives me the following benchmark: + +(The numbers are in milliseconds) + +```Json +{ + "https://x.com?a=2": { + "0": 5.176, + "1": 5.455, + "10": 5.284, + "100": 5.859, + "1000": 9.194, + "10000": 45.828 + }, + "https://example.com?fb_action_ids&mc_eid&ml_subscriber_hash&oft_ck&s_cid&unicorn_click_id": { + "0": 5.351, + "1": 5.306, + "10": 5.313, + "100": 5.836, + "1000": 11.340, + "10000": 62.017 + }, + "https://www.amazon.ca/UGREEN-Charger-Compact-Adapter-MacBook/dp/B0C6DX66TN/ref=sr_1_5?crid=2CNEQ7A6QR5NM&keywords=ugreen&qid=1704364659&sprefix=ugreen%2Caps%2C139&sr=8-5&ufe=app_do%3Aamzn1.fos.b06bdbbe-20fd-4ebc-88cf-fa04f1ca0da8": { + "0": 5.516, + "1": 5.228, + "10": 5.562, + "100": 6.279, + "1000": 14.972, + "10000": 101.226 + } +} +``` + +In practice, when using [URL Cleaner Site and its userscript](https://github.com/Scripter17/url-cleaner-site), performance is often up to 10x worse because for some reason `GM_XMLHttpRequest` always takes at least 10ms on my machine and, from basic testing, the amazon homepage has 1k URLs and takes about 8-10 requests to clean all of them. + +Mileage varies wildly but as long as you're not spawning a new instance of URL Cleaner for each URL it should be fast enough. + +Please note that URL Cleaner is currently single threaded because I don't know how to do it well. Parallelizing yourself (for example, with [GNU Parallel](https://www.gnu.org/software/parallel/)) may give better results. + +#### Credits The people and projects I have stolen various parts of the default config from. diff --git a/benchmarking/benchmark.sh b/benchmarking/benchmark.sh index b032d6a..2f9d99b 100755 --- a/benchmarking/benchmark.sh +++ b/benchmarking/benchmark.sh @@ -48,6 +48,8 @@ done COMMAND="../target/release/url-cleaner --config ../default-config.json $@" +echo "$COMMAND" + if [ $compile -eq 1 ]; then cargo build -r --config profile.release.strip=false --config profile.release.debug=2 if [ $? -ne 0 ]; then exit 2; fi diff --git a/default-config.json b/default-config.json index 6276c3c..4914db9 100644 --- a/default-config.json +++ b/default-config.json @@ -75,21 +75,22 @@ ], "redirect-hosts": [ "2kgam.es", "4.nbcla.com", "a.co", "ab.co", "abc7.la", "abc7ne.ws", "adobe.ly", "aje.io", "aje.io", "amzn.asia", "amzn.ew", "amzn.to", - "apple.co", "bbc.in", "bit.ly", "bitly.com", "bitly.com", "bityl.co", "blizz.ly", "blockclubchi.co", "bloom.bg", "boxd.it", "buff.ly", - "cbsn.ws", "cfl.re", "chn.ge", "chng.it", "clckhl.co", "cnb.cx", "cnn.it", "cos.lv", "cutt.ly", "db.tt", "dcdr.me", "depop.app.link", - "dis.gd", "dlvr.it", "econ.st", "etsy.me", "fal.cn", "fanga.me", "fb.me", "flip.it", "forms.gle", "g.co", "glo.bo", "go.forbes.com", - "go.microsoft.com", "go.nasa.gov", "gofund.me", "goo.gl", "goo.su", "gum.co", "hmstr.fr", "hulu.tv", "ift.tt", "intel.ly", "interc.pt", - "is.gd", "iwe.one", "j.mp", "jbgm.es", "k00.fr", "katy.to", "kck.st", "kre.pe", "l.leparisien.fr", "link.animaapp.com", "linkr.it", - "lnk.to", "loom.ly", "lpc.ca", "msft.it", "mzl.la", "n.pr", "nas.cr", "ninten.do", "ntdo.co.uk", "nvda.ws", "ny.ti", "nyer.cm", - "nyp.st", "nyti.ms", "nyto.ms", "on.forbes.com", "on.ft.com", "on.ft.com", "on.msnbc.com", "on.nyc.gov", "onl.bz", "onl.la", - "onl.sc", "operagx.gg", "orlo.uk", "ow.ly", "peoplem.ag", "pin.it", "pixiv.me", "play.st", "politi.co", "propub.li", "pulse.ly", - "py.pl", "qr1.be", "rb.gy", "rb.gy", "rblx.co", "rdbl.co", "redd.it", "reurl.cc", "reut.rs", "rzr.to", "s.goodsmile.link", "s.team", - "s76.co", "shor.tf", "shorturl.at", "spoti.fi", "spr.ly", "spr.ly", "sqex.to", "t.co", "t.ly", "theatln.tc", "thecut.io", "thr.cm", - "tmz.me", "to.pbs.org", "tps.to", "tr.ee", "trib.al", "u.jd.com", "visitlink.me", "w.wiki", "wlgrn.com", "wlo.link", "wn.nr", - "wwdc.io", "x.gd", "xbx.ly", "xhslink.com", "yrp.ca" + "apple.co", "b23.tv", "bbc.in", "bit.ly", "bitly.com", "bitly.com", "bityl.co", "blizz.ly", "blockclubchi.co", "bloom.bg", "boxd.it", + "buff.ly", "bzfd.it", "cbsn.ws", "cfl.re", "chn.ge", "chng.it", "clckhl.co", "cnb.cx", "cnn.it", "cos.lv", "cutt.ly", "db.tt", + "dcdr.me", "depop.app.link", "dis.gd", "dlvr.it", "econ.st", "etsy.me", "fal.cn", "fanga.me", "fb.me", "flip.it", "forms.gle", + "g.co", "glo.bo", "go.bsky.app", "go.forbes.com", "go.microsoft.com", "go.nasa.gov", "gofund.me", "goo.gl", "goo.su", "gum.co", + "hmstr.fr", "hulu.tv", "ift.tt", "intel.ly", "interc.pt", "is.gd", "iwe.one", "j.mp", "jbgm.es", "k00.fr", "katy.to", "kck.st", + "kre.pe", "l.leparisien.fr", "link.animaapp.com", "linkr.it", "lnk.to", "loom.ly", "loom.ly", "lpc.ca", "msft.it", "mzl.la", "n.pr", + "nas.cr", "ninten.do", "ntdo.co.uk", "nvda.ws", "ny.ti", "nyer.cm", "nyp.st", "nyti.ms", "nyto.ms", "on.forbes.com", "on.ft.com", + "on.ft.com", "on.msnbc.com", "on.nyc.gov", "onl.bz", "onl.la", "onl.sc", "operagx.gg", "orlo.uk", "ow.ly", "peoplem.ag", "pin.it", + "pixiv.me", "play.st", "politi.co", "prn.to", "propub.li", "pulse.ly", "py.pl", "qr1.be", "rb.gy", "rb.gy", "rblx.co", "rdbl.co", + "redd.it", "reurl.cc", "reut.rs", "rzr.to", "s.goodsmile.link", "s.team", "s76.co", "shor.tf", "shorturl.at", "spoti.fi", "spr.ly", + "spr.ly", "sqex.to", "t.co", "t.ly", "theatln.tc", "thecut.io", "thr.cm", "thrn.co", "tiny.cc", "tmz.me", "to.pbs.org", "tps.to", + "tr.ee", "trib.al", "u.jd.com", "unes.co", "uni.cf", "visitlink.me", "w.wiki", "wlgrn.com", "wlo.link", "wn.nr", "wwdc.io", "x.gd", + "xbx.ly", "xhslink.com", "yrp.ca" ], "redirect-not-subdomains": [ - "lnk.to", "visitlink.me", "goo.gl" + "lnk.to", "visitlink.me", "goo.gl", "o93x.net" ], "utps": [ "Tcsack", "__hsfp", "__hssc", "__hstc", "__io_lv", "__s", "_branch_match_id", "_branch_referrer", "_clde", "_cldee", "_ga", @@ -166,8 +167,8 @@ "value": {"Cache": { "category": "redirect", "key": {"Part": "Whole"}, - "source": {"ExtractBetween": { - "source": {"HttpRequest": {}}, + "value": {"ExtractBetween": { + "value": {"HttpRequest": {}}, "start": {"CommonVar": "start"}, "end": {"CommonVar": "end"} }} @@ -196,8 +197,27 @@ {"job_config": "https://https//example1.com/example2.com/abc", "result": "https://example2.com/abc"}, {"job_config": "https://https//example1.com/user/example2.com/abc", "result": "https://example2.com/abc"}, {"job_config": "https://www.username.example.com", "result": "https://username.example.com"}, + {"job_config": "https://twitter.com/twitter.com/user" , "result": "https://x.com/user"}, + {"job_config": "https://twitter.com/x.com/user" , "result": "https://x.com/user"}, + {"job_config": "https://twitter.com/www.twitter.com/user" , "result": "https://x.com/user"}, + {"job_config": "https://twitter.com/www.x.com/user" , "result": "https://x.com/user"}, + {"job_config": "https://x.com/twitter.com/user" , "result": "https://x.com/user"}, + {"job_config": "https://x.com/x.com/user" , "result": "https://x.com/user"}, + {"job_config": "https://x.com/www.twitter.com/user" , "result": "https://x.com/user"}, + {"job_config": "https://x.com/www.x.com/user" , "result": "https://x.com/user"}, + {"job_config": "https://www.twitter.com/twitter.com/user" , "result": "https://x.com/user"}, + {"job_config": "https://www.twitter.com/x.com/user" , "result": "https://x.com/user"}, + {"job_config": "https://www.twitter.com/www.twitter.com/user", "result": "https://x.com/user"}, + {"job_config": "https://www.twitter.com/www.x.com/user" , "result": "https://x.com/user"}, + {"job_config": "https://www.x.com/twitter.com/user" , "result": "https://www.x.com/user"}, + {"job_config": "https://www.x.com/x.com/user" , "result": "https://www.x.com/user"}, + {"job_config": "https://www.x.com/www.twitter.com/user" , "result": "https://www.x.com/user"}, + {"job_config": "https://www.x.com/www.x.com/user" , "result": "https://www.x.com/user"}, {"job_config": "https://bsky.app", "result": "https://bsky.app/"}, {"job_config": "https://fxbsky.app", "result": "https://bsky.app/"}, + {"job_config": "https://bsky.app/profile/user", "result": "https://bsky.app/profile/user.bsky.social"}, + {"job_config": "https://bsky.app/profile/user.bsky", "result": "https://bsky.app/profile/user.bsky.social"}, + {"job_config": "https://bsky.app/profile/user.com", "result": "https://bsky.app/profile/user.com"}, {"job_config": "https://127.0.0.1", "result": "https://127.0.0.1"}, {"job_config": "https://com" , "result": "https://com"}, @@ -268,13 +288,14 @@ {"InSet": "redirect-hosts"}, {"InSet": "bypass.vip-host-without-www-dot-prefixes"}, {"IsOneOf": [ - "1link.club", "action.openmedia.org", "allmylinks.com", "api.linkr.bio", "api.pinterest.com", "awin1.com", "bfy.tw", "buymeacoff.ee", - "buymeacoffee.com", "cach.me", "cash.app", "click.notification.elsevier.com", "deviantart.com", "duckduckgo.com", "duckduckgo.com", - "duckduckgogg42xjoc72x3sjasowoarfbgcmvfimaftt6twagswzczad.onion", "e.emailalerts.cnn.com", "gf.me", "gofundme.com", "gofundme.com", - "goodreads.com", "google.com", "gprivate.com", "href.li", "instagr.am", "instagram.com", "l.instagram.com", "l.threads.com", - "lmddgtfy.net", "lmgtfy2.com", "lnk.bio", "open.substack.com", "out.reddit.com", "pawoo.net", "pixiv.net", "preview.tinyurl.com", - "proxy.notsobot.com", "rd.goodreads.com", "shareasale-analytics.com", "shareasale.com", "sketchfab.com", "sketchfab.com", "skfb.ly", - "smarturl.it", "steamcommunity.com", "subscribestar.adult", "substack.com", "t.umblr.com", "tinyurl.com", "toyhou.se", "youtube.com" + "1link.club", "action.openmedia.org", "allmylinks.com", "api.linkr.bio", "api.pinterest.com", "awin1.com", "bfy.tw", "bugzil.la", + "buymeacoff.ee", "buymeacoffee.com", "cach.me", "cash.app", "click.notification.elsevier.com", "deviantart.com", "duckduckgo.com", + "duckduckgo.com", "duckduckgogg42xjoc72x3sjasowoarfbgcmvfimaftt6twagswzczad.onion", "e.emailalerts.cnn.com", "facebook.com", + "gf.me", "gofundme.com", "gofundme.com", "goodreads.com", "google.com", "gprivate.com", "href.li", "instagr.am", "instagram.com", + "l.instagram.com", "l.threads.com", "lmddgtfy.net", "lmgtfy2.com", "lnk.bio", "old.reddit.com", "open.substack.com", "out.reddit.com", + "pawoo.net", "pixiv.net", "preview.tinyurl.com", "proxy.notsobot.com", "rd.goodreads.com", "reddit.com", "shareasale-analytics.com", + "shareasale.com", "sketchfab.com", "sketchfab.com", "skfb.ly", "smarturl.it", "steamcommunity.com", "subscribestar.adult", + "substack.com", "t.umblr.com", "tinyurl.com", "toyhou.se", "youtube.com", "carleton.ca" ]}, {"InSet": "lmgtfy-hosts"}, {"LengthIs": 4}, @@ -283,10 +304,7 @@ }}, {"TreatErrorAsFail": {"PartMatches": { "part": "NotSubdomain", - "matcher": {"Any": [ - {"InSet": "redirect-not-subdomains"}, - {"IsOneOf": ["bsky.social"]} - ]} + "matcher": {"InSet": "redirect-not-subdomains"} }}} ]}, "mapper": {"Rule": { @@ -322,13 +340,13 @@ "try": {"SetPart": { "part": "Whole", "value": {"Modified": { - "source": {"Part": {"QueryParam": "url"}}, + "value": {"Part": {"QueryParam": "url"}}, "modification": {"IgnoreError": "UrlDecode"} }} }}, "else": { "SetPart": { "part": "Whole", - "value": {"Modified": {"source": {"Part": "Query"}, "modification": "UrlDecode"}} + "value": {"Modified": {"value": {"Part": "Query"}, "modification": "UrlDecode"}} }} }} }, @@ -340,7 +358,7 @@ "part": "HostWithoutWWWDotPrefix", "map": { "t.co": {"IfCondition": { - "condition": {"Not": {"StringIs": {"source": {"ContextVar": "alt_text"}, "value": null}}}, + "condition": {"Not": {"StringIs": {"value": {"ContextVar": "alt_text"}, "value": null}}}, "mapper": {"SetPart": {"part": "Whole", "value": {"ContextVar": "alt_text"}}} }}, "t.umblr.com": {"GetUrlFromQueryParam": "z"}, @@ -357,7 +375,7 @@ "mapper": {"SetPart": { "part": "Whole", "value": {"Modified": { - "source": {"HttpRequest": {}}, + "value": {"HttpRequest": {}}, "modification": {"All": [ {"ExtractBetween": { "start": "\"originalUrl\":\"", @@ -373,7 +391,7 @@ "href.li": {"SetPart": { "part": "Whole", "value": {"Modified": { - "source": {"Part": "Query"}, + "value": {"Part": "Query"}, "modification": "UrlDecode" }} }}, @@ -387,7 +405,7 @@ "mapper": {"SetPart": { "part": "Whole", "value": {"ExtractBetween": { - "source": {"HttpRequest": { + "value": {"HttpRequest": { "http_client_config_diff": {"danger_accept_invalid_certs": true} }}, "start": "href=\"", @@ -406,7 +424,7 @@ "mapper": {"SetPart": { "part": "Whole", "value": {"ExtractBetween": { - "source": {"HttpRequest": { + "value": {"HttpRequest": { "http_client_config_diff": {"danger_accept_invalid_certs": true} }}, "start": "window.location = \"", @@ -446,7 +464,7 @@ "mapper": {"SetPart": { "part": "Whole", "value": {"Modified": { - "source": {"Part": {"QueryParam": "url"}}, + "value": {"Part": {"QueryParam": "url"}}, "modification": {"All": [ {"Replace": {"find": "\n", "replace": ""}}, "Base64Decode" @@ -466,7 +484,7 @@ "e.emailalerts.cnn.com": {"SetPart": { "part": "Whole", "value": {"Modified": { - "source": {"Part": "Query"}, + "value": {"Part": "Query"}, "modification": {"All": [ {"KeepNthSegment": {"split": "/", "n": 2}}, {"Remove": 0}, @@ -489,7 +507,7 @@ "mapper": {"SetPart": { "part": "Whole", "value": {"Modified": { - "source": {"Part": {"PathSegment": 2}}, + "value": {"Part": {"PathSegment": 2}}, "modification": {"All": [ {"KeepNthSegment": {"split": ".", "n": 0}}, "Base64Decode", @@ -500,7 +518,7 @@ }, { "condition": {"PathIs": "/app-link/post"}, - "mapper": {"AllowQueryParams": ["publication_id", "post_id"]} + "mapper": {"AllowQueryParams": ["publication_id", "post_id", "submitLike", "action"]} }, { "condition": {"All": [ @@ -523,7 +541,7 @@ "mapper": {"SetPart": { "part": "Whole", "value": {"Modified": { - "source": {"Part": "Path"}, + "value": {"Part": "Path"}, "modification": {"All": [ {"KeepSegmentRange": {"split": "/", "start": 2}}, "UrlDecode" @@ -576,11 +594,11 @@ "SetPart": { "part": "Whole", "value": {"Modified": { - "source": {"Map": { - "source": {"Part": {"QueryParam": "s"}}, + "value": {"Map": { + "value": {"Part": {"QueryParam": "s"}}, "map": { "g": {"Map": { - "source": {"Part": {"QueryParam": "t"}}, + "value": {"Part": {"QueryParam": "t"}}, "map": { "w": "https://www.google.com/search?q=", "i": "https://www.google.com/search?tbm=isch&q=", @@ -623,11 +641,11 @@ {"SetPart": { "part": "Whole", "value": {"Modified": { - "source": {"Cache": { + "value": {"Cache": { "category": "sharesale-escaped-redirect", "key": {"Part": "Whole"}, - "source": {"ExtractBetween": { - "source": {"HttpRequest": {}}, + "value": {"ExtractBetween": { + "value": {"HttpRequest": {}}, "start": "replace('", "end": "')" }} @@ -646,11 +664,11 @@ {"SetPart": { "part": "Whole", "value": {"Modified": { - "source": {"Cache": { + "value": {"Cache": { "category": "redirect", "key": {"Part": "Whole"}, - "source": {"ExtractBetween": { - "source": {"HttpRequest": {}}, + "value": {"ExtractBetween": { + "value": {"HttpRequest": {}}, "start": "replace('", "end": "')" }} @@ -660,7 +678,13 @@ }} ]} }}, - "mfy.gg": {"SetHost": "metafy.gg"} + "mfy.gg": {"SetHost": "metafy.gg"}, + "bugzil.la": {"All": [ + {"SetHost": "bugzilla.mozilla.org"}, + {"CopyPart": {"from": {"PathSegment": 0}, "to": {"QueryParam": "id"}}}, + {"SetPart": {"part": "Path", "value": "/show_bug.cgi"}} + ]}, + "o93x.net": {"GetUrlFromQueryParam": "u"} } } }, @@ -672,12 +696,9 @@ "part": "NotSubdomain", "map": { "visitlink.me": {"SetPart": {"part": "Subdomain", "value": null}}, - "bsky.social": {"IfCondition": { - "condition": {"PartMatches": {"part": {"NoneToEmptyString": "Subdomain"}, "matcher": {"Not": {"IsOneOf": ["", "www"]}}}}, - "mapper": {"All": [ - {"SetPart": {"part": {"BeforePathSegment": 0}, "value": {"Join": {"sources": ["profile/", {"Part": "Subdomain"}, ".bsky.social"]}}}}, - {"SetHost": "bsky.app"} - ]} + "carleton.ca": {"IfCondition": { + "condition": {"PartIs": {"part": {"QueryParam": "action"}, "value": "redirect"}}, + "mapper": {"GetUrlFromQueryParam": "url"} }} } } @@ -698,23 +719,23 @@ "mapper": {"SetPart": { "part": "Whole", "value": {"Modified": { - "source": {"HttpRequest": { + "value": {"HttpRequest": { "url": {"Join": { "sources": [ {"IfSourceIsNone": { - "source": {"NoneTo": { - "source": {"Var": "bypass.vip-api-key"}, + "value": {"NoneTo": { + "value": {"Var": "bypass.vip-api-key"}, "if_none": {"EnvVar": "URL_CLEANER_BYPASS_VIP_API_KEY"} }}, "then": "https://api.bypass.vip/bypass?url=", "else": "https://api.bypass.vip/premium/bypass?url=" }}, - {"Modified": {"source": {"Part": "Whole"}, "modification": "UrlEncode"}} + {"Modified": {"value": {"Part": "Whole"}, "modification": "UrlEncode"}} ] }}, "headers": { "x-api-key": {"NoneTo": { - "source": {"Var": "bypass.vip-api-key"}, + "value": {"Var": "bypass.vip-api-key"}, "if_none": {"EnvVar": "URL_CLEANER_BYPASS_VIP_API_KEY"} }} } @@ -781,7 +802,11 @@ ]}, {"PathIs": "/app-link/post"} ]}, - "sketchfab.com": {"PartIs": {"part": {"PathSegment": 0}, "value": "s"}} + "sketchfab.com": {"PartIs": {"part": {"PathSegment": 0}, "value": "s"}}, + "qudsnen.co": {"Not": {"PartIs": {"part": {"QueryParam": "p"}, "value": null}}}, + "facebook.com": {"PartContains": {"part": "Path", "where": "Start", "value": "/share/r/"}}, + "reddit.com": {"PartContains": {"part": "Path", "value": "/s/"}}, + "old.reddit.com": {"PartContains": {"part": "Path", "value": "/s/"}} } }}, {"All": [ @@ -823,7 +848,7 @@ "condition": {"All": [ {"Not": {"FlagIsSet": "no-unmangle"}}, {"Any": [ - {"HostIsOneOf": ["http", "https", "bsky.app", "www.bsky.app"]}, + {"HostIsOneOf": ["http", "https"]}, {"PartContains": {"part": "Path", "value": "http"}}, {"Not": {"PartIs": {"part": {"DomainSegment": 3}, "value": null}}} ]} @@ -885,7 +910,7 @@ ]}, "mapper": {"SetPart": { "part": "Whole", - "value": {"Modified": {"source": {"Part": "Path"}, "modification": {"StripPrefix": "/"}}} + "value": {"Modified": {"value": {"Part": "Path"}, "modification": {"StripPrefix": "/"}}} }} }, { @@ -895,8 +920,8 @@ "part": "NotSubdomain", "value": {"ExtractPart": { "part": "NotSubdomain", - "source": {"Modified": { - "source": {"Part": "Path"}, + "value": {"Modified": { + "value": {"Part": "Path"}, "modification": {"All": ["UrlDecode", {"StripPrefix": "/"}]} }} }} @@ -904,7 +929,7 @@ ]}, "mapper": {"SetPart": { "part": "Whole", - "value": {"Modified": {"source": {"Part": "Path"}, "modification": {"All": ["UrlDecode", {"StripPrefix": "/"}]}}} + "value": {"Modified": {"value": {"Part": "Path"}, "modification": {"All": ["UrlDecode", {"StripPrefix": "/"}]}}} }} }, { @@ -940,25 +965,6 @@ - { - "condition": {"All": [ - {"MaybeWWWDomain": "bsky.app"}, - {"PartContains": {"part": "Path", "where": "End", "value": ".bsky"}} - ]}, - "mapper": {"ModifyPart": {"part": "Path", "modification": {"Append": ".social"}}} - }, - { - "condition": {"All": [ - {"MaybeWWWDomain": "bsky.app"}, - {"PartIs": {"part": {"PathSegment": 0}, "value": "profile"}}, - {"Not": {"PartContains": {"part": {"PathSegment": 1}, "value": "."}}}, - {"Not": {"PartContains": {"part": {"PathSegment": 1}, "value": ":"}}} - ]}, - "mapper": {"ModifyPart": {"part": {"PathSegment": 1}, "modification": {"Append": ".bsky.social"}}} - }, - - - { "comment": "https://www.username.example.com -> https://username.example.com", "condition": {"All": [ @@ -1028,7 +1034,6 @@ "map": { "theonion.com" : "RemoveQuery", "teespring.com": "RemoveQuery", - "instagram.com": "RemoveQuery", "twitter.com" : {"SetHost": "x.com"}, "vxtwitter.com": {"SetHost": "x.com"}, "fixvx.com" : {"SetHost": "x.com"}, @@ -1038,9 +1043,12 @@ "condition": {"Not": {"FlagIsSet": "discord-compatibility"}}, "mapper": {"SetPart": {"part": "NotSubdomain", "value": "bsky.app"}} }}, - "bsky.app": {"IfCondition": { - "condition": {"FlagIsSet": "discord-compatibility"}, - "mapper": {"SetPart": {"part": "NotSubdomain", "value": "fxbsky.app"}} + "bsky.social": {"IfCondition": { + "condition": {"PartMatches": {"part": {"NoneToEmptyString": "Subdomain"}, "matcher": {"Not": {"IsOneOf": ["", "www"]}}}}, + "mapper": {"All": [ + {"SetPart": {"part": "Path", "value": {"Join": {"sources": ["/profile/", {"Part": "Subdomain"}, ".bsky.social"]}}}}, + {"SetHost": "bsky.app"} + ]} }}, "youtube.com" : {"All": [ {"RemoveQueryParams": ["si", "feature", "pp"]}, @@ -1074,20 +1082,23 @@ {"SetPart" : {"part": "Path", "value": "watch"}}, {"RemoveQueryParams": ["si", "feature", "pp"]} ]}, - "instagram.com": {"RemoveQueryParams": ["igshid", "igsh", "xmt"]}, - "threads.net" : {"RemoveQueryParams": ["igshid", "igsh", "xmt"]}, + "instagram.com": {"RemoveQueryParams": ["igshid", "igsh", "xmt", "ig_mid"]}, + "threads.net" : {"RemoveQueryParams": ["igshid", "igsh", "xmt", "ig_mid"]}, "stackoverflow.com" : {"RemoveQueryParams": ["so_medium", "so_source", "c"]}, "duckduckgo.com" : {"RemoveQueryParams": ["t", "atb", "ia"]}, "duckduckgogg42xjoc72x3sjasowoarfbgcmvfimaftt6twagswzczad.onion": {"RemoveQueryParams": ["t", "atb", "ia"]}, "washingtonpost.com": {"RemoveQueryParams": ["itid"]}, "cnn.com": {"RemoveQueryParams": ["bt_ee", "bt_ts"]}, - "facebook.com": {"RemoveQueryParams": ["fs", "s", "mibextid"]}, + "facebook.com": {"All": [ + {"IfCondition": { + "condition": {"PartIs": {"part": {"PathSegment": 0}, "value": "reel"}}, + "mapper": "RemoveQuery" + }}, + {"RemoveQueryParams": ["fs", "s", "mibextid"]} + ]}, "bbc.com": {"RemoveQueryParamsMatching": {"Contains": {"where": "Start", "value": "at_"}}}, "linktr.ee": {"RemoveQueryParams": ["ltsid"]}, - "theguardian.com": {"All": [ - {"RemoveQueryParams": ["CMP"]}, - {"SetPart": {"part": "Fragment", "value": null}} - ]}, + "theguardian.com": {"RemoveQueryParams": ["CMP"]}, "roblox.com": {"AllowQueryParams": ["keyword", "Keyword"]}, "hp.com": {"RemoveQueryParams": ["jumpid"]}, "lenovo.com": {"RemoveQueryParams": ["IPromoID"]}, @@ -1108,10 +1119,13 @@ "addons.mozilla.org": {"RemoveQueryParams": ["platform", "appver"]}, "amtrak.com": {"RemoveQueryParams": ["atv", "cmp"]}, "support.google.com": {"AllowQueryParams": ["q"]}, - "reddit.com": {"IfCondition": { - "condition": {"Not": {"PathIs": "/search"}}, - "mapper": {"AllowQueryParams": ["f"]} - }}, + "reddit.com": {"All": [ + {"RemoveQueryParams": ["share_id"]}, + {"IfCondition": { + "condition": {"Not": {"PathIs": "/search"}}, + "mapper": {"AllowQueryParams": ["f"]} + }} + ]}, "cnn.com" : {"IfCondition": { "condition": {"PartContains": {"part": {"PathSegment": -1}, "value": "h_", "where": "Start"}}, "mapper": {"SetPart": {"part": {"PathSegment": -1}, "value": null}} @@ -1192,7 +1206,8 @@ }}, "alibaba.com": {"RemoveQueryParams": ["spm", "selectedCarrierCode", "fsb"]}, "fori.io": {"SetHost": "foriio.com"} - } + }, + "pawoo.net": {"RemoveQueryParams": ["provider"]} } }, { @@ -1218,6 +1233,11 @@ "mapper": {"RemoveQueryParams": ["q"]} }}, "x.com": {"Rules": [ + { + "comment": "https://(www\\.)?(twitter|x).com/(www\\.)?(twitter|x).com -> x.com", + "condition": {"PartMatches": {"part": {"PathSegment": 0}, "matcher": {"IsOneOf": ["x.com", "www.x.com", "twitter.com", "www.twitter.com"]}}}, + "mapper": {"SetPart": {"part": {"PathSegment": 0}, "value": null}} + }, { "condition": {"PathIs": "/i/flow/login"}, "mapper": {"SetPart": { @@ -1237,7 +1257,28 @@ "mapper": {"SetPart": {"part": "Host", "value": {"Var": "twitter-embed-domain"}}} } ]}, - "overview.mail.yahoo.com": "RemoveQuery" + "overview.mail.yahoo.com": "RemoveQuery", + "bsky.app": {"Rules": [ + { + "condition": {"All": [ + {"PartIs": {"part": {"PathSegment": 0}, "value": "profile"}}, + {"PartContains": {"part": {"PathSegment": 1}, "where": "End", "value": ".bsky"}} + ]}, + "mapper": {"ModifyPart": {"part": "Path", "modification": {"Append": ".social"}}} + }, + { + "condition": {"All": [ + {"PartIs": {"part": {"PathSegment": 0}, "value": "profile"}}, + {"Not": {"PartContains": {"part": {"PathSegment": 1}, "value": "."}}}, + {"Not": {"PartContains": {"part": {"PathSegment": 1}, "value": ":"}}} + ]}, + "mapper": {"ModifyPart": {"part": {"PathSegment": 1}, "modification": {"Append": ".bsky.social"}}} + }, + { + "condition": {"FlagIsSet": "discord-compatibility"}, + "mapper": {"SetPart": {"part": "NotSubdomain", "value": "fxbsky.app"}} + } + ]} } } }, @@ -1415,9 +1456,9 @@ "value": {"Cache": { "category": "onion-location", "key": {"Part": "Origin"}, - "source": {"ExtractPart": { + "value": {"ExtractPart": { "part": "Origin", - "source": {"HttpRequest": {"response_handler": {"Header": "Onion-Location"}}} + "value": {"HttpRequest": {"response_handler": {"Header": "Onion-Location"}}} }} }} } diff --git a/src/glue/caching.rs b/src/glue/caching.rs index 3cabc6c..b6eb1ee 100644 --- a/src/glue/caching.rs +++ b/src/glue/caching.rs @@ -49,18 +49,9 @@ pub struct NewCacheEntry<'a> { /// Convenience wrapper to contain the annoyingness of it all. /// /// Internally it's an [`Arc`] of a [`Mutex`] so cloning is O(1) and sharing immutable references is not a problem. -#[derive(Debug, Clone)] +#[derive(Debug, Clone, Default)] pub struct Cache(pub Arc>); -impl Default for Cache { - /// Has the "path" of `:memory:`, which just stores the database in memory until the program exits. - /// - /// Seems like a reasonable default. - fn default() -> Self { - Self(Default::default()) - } -} - /// The internals of [`Cache`] that handles lazily connecting. pub struct InnerCache { /// The path being connected to. diff --git a/src/lib.rs b/src/lib.rs index 2491843..33d5dd5 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -4,8 +4,9 @@ //! # Examples //! ``` //! use std::borrow::Cow; +//! use std::str::FromStr; //! use url::Url; -//! +//! //! use url_cleaner::types::*; #![cfg_attr(feature = "cache", doc = "use url_cleaner::glue::Cache;")] //! @@ -33,9 +34,9 @@ #![cfg_attr(feature = "cache", doc = " // That's fine because cloning a `Cache` is extremely cheap, because it's an `Arc>`.")] #![cfg_attr(feature = "cache", doc = " cache: config.cache_path.as_str().into(),")] //! // Ideally you'll be handling URLs in bulk. -//! job_config_source: Box::new(vec![ -//! Url::parse("https://example.com?utm_source=url-cleaner-docs").unwrap() -//! ].into_iter().map(|url| Ok(url.into()))) +//! job_configs_source: Box::new([ +//! JobConfig::from_str("https://example.com?utm_source=url-cleaner-docs") +//! ].into_iter()) //! }; //! //! for job in jobs.iter() { diff --git a/src/main.rs b/src/main.rs index 649f20d..071671a 100644 --- a/src/main.rs +++ b/src/main.rs @@ -147,6 +147,7 @@ pub enum CliError { #[error(transparent)] SerdeJsonError(#[from] serde_json::Error) } +/// Shorthand for serializing a string to JSON. fn str_to_json_str(s: &str) -> String { serde_json::to_string(s).expect("Serializing a string to never fail.") } @@ -272,10 +273,10 @@ fn main() -> Result { let mut jobs = Jobs { #[cfg(feature = "cache")] cache: args.cache_path.as_deref().unwrap_or(&*config.cache_path).into(), - job_config_source: { - let ret = args.urls.into_iter().map(|url| JobConfig::from_str(&url).map_err(Into::into)); + job_configs_source: { + let ret = args.urls.into_iter().map(|url| JobConfig::from_str(&url)); if !io::stdin().is_terminal() { - Box::new(ret.chain(io::stdin().lines().map(|line| JobConfig::from_str(&line?).map_err(Into::into)))) + Box::new(ret.chain(io::stdin().lines().map(|line| JobConfig::from_str(&line?)))) } else { Box::new(ret) } @@ -328,7 +329,7 @@ fn main() -> Result { }, Err(e) => { println!(); - eprintln!("GetJobError\t{e:?}"); + eprintln!("MakeJobError\t{e:?}"); some_error = true; } } @@ -336,11 +337,11 @@ fn main() -> Result { } #[cfg(feature = "debug-time")] eprintln!("Run Jobs: {:?}", x.elapsed()); - #[cfg(feature = "debug-time")] let x = std::time::Instant::now(); + // #[cfg(feature = "debug-time")] let x = std::time::Instant::now(); - #[cfg(feature = "debug-time")] drop(jobs); + // #[cfg(feature = "debug-time")] drop(jobs); - #[cfg(feature = "debug-time")] eprintln!("Drop Jobs: {:?}", x.elapsed()); + // #[cfg(feature = "debug-time")] eprintln!("Drop Jobs: {:?}", x.elapsed()); #[cfg(feature = "debug-time")] eprintln!("Total: {:?}", start_time.elapsed()); Ok(match (some_ok, some_error) { diff --git a/src/types/config.rs b/src/types/config.rs index 03365cc..4e298d6 100644 --- a/src/types/config.rs +++ b/src/types/config.rs @@ -67,7 +67,7 @@ impl Config { /// If the specified file can't be loaded, returns the error [`GetConfigError::CantLoadConfigFile`]. /// /// If the config contained in the specified file can't be parsed, returns the error [`GetConfigError::CantParseConfigFile`]. - pub fn load_from_file(path: &Path) -> Result { + pub fn load_from_file>(path: T) -> Result { serde_json::from_str(&read_to_string(path).map_err(GetConfigError::CantLoadConfigFile)?).map_err(GetConfigError::CantParseConfigFile) } @@ -106,7 +106,7 @@ impl Config { /// If `path` is `Some` and the call to [`Self::load_from_file`] returns an error, that error is returned. #[allow(dead_code, reason = "Public API.")] #[cfg(feature = "default-config")] - pub fn get_default_or_load(path: Option<&Path>) -> Result, GetConfigError> { + pub fn get_default_or_load>(path: Option) -> Result, GetConfigError> { Ok(match path { Some(path) => Cow::Owned(Self::load_from_file(path)?), None => Cow::Borrowed(Self::get_default()?) @@ -119,7 +119,7 @@ impl Config { /// # Errors /// If the default config cannot be parsed, returns the error [`GetConfigError::CantParseDefaultConfig`]. #[cfg(feature = "default-config")] - pub fn get_default_no_cache_or_load(path: Option<&Path>) -> Result { + pub fn get_default_no_cache_or_load>(path: Option) -> Result { Ok(match path { Some(path) => Self::load_from_file(path)?, None => Self::get_default_no_cache()? @@ -189,7 +189,7 @@ pub const DEFAULT_CONFIG_STR: &str = include_str!("../../default-config.json"); #[allow(dead_code, reason = "Public API.")] pub static DEFAULT_CONFIG: OnceLock = OnceLock::new(); -/// An enum containing all possible errors that can happen when loading/parsing a rules into a [`Rules`] +/// An enum containing all possible errors that can happen when loading/parsing a config. #[derive(Debug, Error)] pub enum GetConfigError { /// Could not load the specified config file. diff --git a/src/types/jobs/job_config.rs b/src/types/jobs/job_config.rs index 332f7ba..6a98c34 100644 --- a/src/types/jobs/job_config.rs +++ b/src/types/jobs/job_config.rs @@ -2,6 +2,7 @@ use std::error::Error; use std::str::FromStr; +use std::io; use serde::{Serialize, Deserialize}; use url::Url; @@ -11,6 +12,24 @@ use crate::types::*; use crate::util::*; /// Defines how each [`Job`] from a [`Jobs`] should be constructed. +/// +/// When deserializing from a string or using [`FromStr::from_str`]/[`TryFrom<&str>`], if the string starts with `{`, it's deserializes the string's value. +/// +/// For example, `{"url": "https://example.com"}` and `"{\"url\": \"https://example.com\"}"` deserialize to the same value. +/// +/// This allows for more flexible APIs where having to input JSON objects is infeasable, like in command line interfaces. +/// ``` +/// # use std::str::FromStr; +/// # use url_cleaner::types::*; +/// assert_eq!( +/// serde_json::from_str::("{\"url\": \"https://example.com\"}").unwrap(), +/// serde_json::from_str::("\"{\\\"url\\\": \\\"https://example.com\\\"}\"").unwrap() +/// ); +/// assert_eq!( +/// JobConfig::from_str("https://example.com").unwrap(), +/// JobConfig::from_str("{\"url\": \"https://example.com\"}").unwrap() +/// ); +/// ``` #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] #[serde(remote = "Self")] pub struct JobConfig { @@ -31,6 +50,8 @@ impl From for JobConfig { } /// The enum of errors [`JobConfig::from_str`] and [`>::try_from`] can return. +/// +/// Additionally has [`Self::IoError`] and [`Self::Other`] to accomodate [`Jobs::job_configs_source`] iterators. #[derive(Debug, Error)] pub enum MakeJobConfigError { /// Returned when a [`url::ParseError`] is encountered. @@ -38,7 +59,13 @@ pub enum MakeJobConfigError { UrlParseError(#[from] url::ParseError), /// Returned when a [`serde_json::Error`] is encountered. #[error(transparent)] - SerdeJsonError(#[from] serde_json::Error) + SerdeJsonError(#[from] serde_json::Error), + /// Returned when an [`io::Error`] is encountered. + #[error(transparent)] + IoError(#[from] io::Error), + /// Generic error wrapper. + #[error(transparent)] + Other(#[from] Box) } impl FromStr for JobConfig { @@ -66,18 +93,3 @@ impl TryFrom<&str> for JobConfig { } string_or_struct_magic!(JobConfig); - -/// The enum of errors that can happen when [`Jobs::iter`] tries to get a URL. -#[derive(Debug, Error)] -pub enum JobConfigSourceError { - /// Returned when a [`MakeJobConfigError`] is encountered. - #[error(transparent)] - MakeJobConfigError(#[from] MakeJobConfigError), - /// Returned when a [`std::io::Error`] is encountered. - #[error(transparent)] - IoError(#[from] std::io::Error), - /// Catch-all for user-defined URL sources with errors not listed here. - #[allow(dead_code, reason = "Public API for use in other people's code.")] - #[error(transparent)] - Other(#[from] Box) -} diff --git a/src/types/jobs/job_scratchpad.rs b/src/types/jobs/job_scratchpad.rs index ec046fd..d0112b4 100644 --- a/src/types/jobs/job_scratchpad.rs +++ b/src/types/jobs/job_scratchpad.rs @@ -4,8 +4,6 @@ use std::collections::HashMap; use serde::{Serialize, Deserialize}; -#[allow(unused_imports, reason = "Used in a doc comment.")] -use crate::types::*; use crate::util::*; /// Mutable state that you can use to track data between rules outside of the URL. diff --git a/src/types/jobs/jobs.rs b/src/types/jobs/jobs.rs index 3295fba..ce56737 100644 --- a/src/types/jobs/jobs.rs +++ b/src/types/jobs/jobs.rs @@ -21,7 +21,7 @@ pub struct Jobs<'a> { #[cfg(feature = "cache")] pub cache: Cache, /// The iterator [`JobConfig`]s are acquired from. - pub job_config_source: Box>> + pub job_configs_source: Box>> } impl ::core::fmt::Debug for Jobs<'_> { @@ -31,15 +31,15 @@ impl ::core::fmt::Debug for Jobs<'_> { x.field("config", &self.config); #[cfg(feature = "cache")] x.field("cache", &self.cache); - x.field("job_config_source", &"..."); + x.field("job_configs_source", &"..."); x.finish() } } -impl Jobs<'_> { - /// Iterates over [`Job`]s created from [`JobConfig`]s returned from [`Self::job_config_source`]. - pub fn iter(&mut self) -> impl Iterator, GetJobError>> { - (&mut self.job_config_source) +impl<'a> Jobs<'a> { + /// Iterates over [`Job`]s created from [`JobConfig`]s returned from [`Self::job_configs_source`]. + pub fn iter(&'a mut self) -> impl Iterator, MakeJobError>> { + (&mut self.job_configs_source) .map(|job_config_result| match job_config_result { Ok(JobConfig {url, context}) => Ok(Job { url, @@ -56,7 +56,7 @@ impl Jobs<'_> { /// /// Can be more convenient than [`Self::iter`]. #[allow(dead_code, reason = "Public API.")] - pub fn with_job_config(&self, job_config: JobConfig) -> Job<'_> { + pub fn with_job_config(&'a self, job_config: JobConfig) -> Job<'a> { Job { url: job_config.url, config: &self.config, @@ -67,10 +67,10 @@ impl Jobs<'_> { } } -/// The enum of errors [`Jobs::iter`] can return. +/// The enum of errors that can happen when [`Jobs::iter`] tries to get a URL. #[derive(Debug, Error)] -pub enum GetJobError { - /// Returned when a [`JobConfigSourceError`] is encountered. +pub enum MakeJobError { + /// Returned when a [`MakeJobConfigError`] is encountered. #[error(transparent)] - JobConfigSourceError(#[from] JobConfigSourceError) + MakeJobConfigError(#[from] MakeJobConfigError) } diff --git a/src/types/rules.rs b/src/types/rules.rs index 72c5632..5762b4c 100644 --- a/src/types/rules.rs +++ b/src/types/rules.rs @@ -28,7 +28,10 @@ pub enum Rule { /// The part to get. part: UrlPart, /// The map determining which [`Mapper`] to apply. - map: HashMap, Mapper> + map: HashMap, Mapper>, + /// If the part isn't in the map, use this. + #[serde(default, skip_serializing_if = "is_default")] + r#else: Option }, /// Gets a certain part of a URL then applies a [`Rule`] depending on the returned value. /// # Errors @@ -39,7 +42,10 @@ pub enum Rule { /// The part to get. part: UrlPart, /// The map determining which [`Rule`] to apply. - map: HashMap, Rule> + map: HashMap, Rule>, + /// If the part isn't in the map, use this. + #[serde(default, skip_serializing_if = "is_default")] + r#else: Option> }, /// Gets a certain part of a URL then applies a [`Rules`] depending on the returned value. /// # Errors @@ -50,7 +56,10 @@ pub enum Rule { /// The part to get. part: UrlPart, /// The map determining which [`Rules`] to apply. - map: HashMap, Rules> + map: HashMap, Rules>, + /// If the part isn't in the map, use this. + #[serde(default, skip_serializing_if = "is_default")] + r#else: Option }, /// Gets a string from a [`StringSource`] then applies a [`Mapper`] depending on the returned value. /// # Rules @@ -61,9 +70,12 @@ pub enum Rule { /// If the call to [`Mapper::apply`] returns an error, that error is returned. StringMap { /// The [`StringSource`] to get the string from. - source: Option, + value: Option, /// The map determining which [`Mapper`] to apply. - map: HashMap, Mapper> + map: HashMap, Mapper>, + /// If the string isn't in the map, use this. + #[serde(default, skip_serializing_if = "is_default")] + r#else: Option }, /// Gets a string from a [`StringSource`] then applies a [`Rule`] depending on the returned value. /// # Rules @@ -74,9 +86,12 @@ pub enum Rule { /// If the call to [`Rule::apply`] returns an error, that error is returned. StringRuleMap { /// The [`StringSource`] to get the string from. - source: Option, + value: Option, /// The map determining which [`Mapper`] to apply. - map: HashMap, Rule> + map: HashMap, Rule>, + /// If the string isn't in the map, use this. + #[serde(default, skip_serializing_if = "is_default")] + r#else: Option> }, /// Gets a string from a [`StringSource`] then applies a [`Rules`] depending on the returned value. /// # Rules @@ -87,9 +102,12 @@ pub enum Rule { /// If the call to [`Rules::apply`] returns an error, that error is returned. StringRulesMap { /// The [`StringSource`] to get the string from. - source: Option, + value: Option, /// The map determining which [`Mapper`] to apply. - map: HashMap, Rules> + map: HashMap, Rules>, + /// If the string isn't in the map, use this. + #[serde(default, skip_serializing_if = "is_default")] + r#else: Option }, /// Runs all the contained rules in a loop until the specified [`StopLoopCondition`] returns [`true`]. /// @@ -183,7 +201,7 @@ pub enum Rule { /// Cannot be serialized or deserialized. #[expect(clippy::type_complexity, reason = "Who cares")] #[cfg(feature = "experiment-custom")] - Custom(FnWrapper Result<(), RuleError>>), + Custom(FnWrapper Result<(), RuleError>>), /// The most basic type of rule. If the call to [`Condition::satisfied_by`] returns `Ok(true)`, calls [`Mapper::apply`] on the provided URL. /// /// This is the last variant because of the [`#[serde(untageed)]`](https://serde.rs/variant-attrs.html#untagged) macro. @@ -271,12 +289,12 @@ impl Rule { } else { Err(RuleError::FailedCondition) }, - Self::PartMap {part, map} => Ok(map.get(&part.get(job_state.url).map(|x| x.into_owned())).ok_or(RuleError::ValueNotInMap)?.apply(job_state)?), - Self::PartRuleMap {part, map} => Ok(map.get(&part.get(job_state.url).map(|x| x.into_owned())).ok_or(RuleError::ValueNotInMap)?.apply(job_state)?), - Self::PartRulesMap {part, map} => Ok(map.get(&part.get(job_state.url).map(|x| x.into_owned())).ok_or(RuleError::ValueNotInMap)?.apply(job_state)?), - Self::StringMap {source, map} => Ok(map.get(&get_option_string!(source, job_state)).ok_or(RuleError::ValueNotInMap)?.apply(job_state)?), - Self::StringRuleMap {source, map} => Ok(map.get(&get_option_string!(source, job_state)).ok_or(RuleError::ValueNotInMap)?.apply(job_state)?), - Self::StringRulesMap {source, map} => Ok(map.get(&get_option_string!(source, job_state)).ok_or(RuleError::ValueNotInMap)?.apply(job_state)?), + Self::PartMap {part , map, r#else} => Ok(map.get(&part.get(job_state.url).map(|x| x.into_owned())).or(r#else.as_ref ()).ok_or(RuleError::ValueNotInMap)?.apply(job_state)?), + Self::PartRuleMap {part , map, r#else} => Ok(map.get(&part.get(job_state.url).map(|x| x.into_owned())).or(r#else.as_deref()).ok_or(RuleError::ValueNotInMap)?.apply(job_state)?), + Self::PartRulesMap {part , map, r#else} => Ok(map.get(&part.get(job_state.url).map(|x| x.into_owned())).or(r#else.as_ref ()).ok_or(RuleError::ValueNotInMap)?.apply(job_state)?), + Self::StringMap {value, map, r#else} => Ok(map.get(&get_option_string!(value, job_state) ).or(r#else.as_ref ()).ok_or(RuleError::ValueNotInMap)?.apply(job_state)?), + Self::StringRuleMap {value, map, r#else} => Ok(map.get(&get_option_string!(value, job_state) ).or(r#else.as_deref()).ok_or(RuleError::ValueNotInMap)?.apply(job_state)?), + Self::StringRulesMap {value, map, r#else} => Ok(map.get(&get_option_string!(value, job_state) ).or(r#else.as_ref ()).ok_or(RuleError::ValueNotInMap)?.apply(job_state)?), Self::Repeat{rules, stop_loop_condition, limit} => { // MAKE SURE THIS IS ALWAYS SYNCED UP WITH [`Rules::apply`]!!! @@ -329,19 +347,19 @@ impl Rule { }) }, #[cfg(feature = "experiment-custom")] - Self::Custom(function) => function(self, job_state) + Self::Custom(function) => function(job_state) } } /// Internal method to make sure I don't accidentally commit Debug variants and other stuff unsuitable for the default config. pub(crate) fn is_suitable_for_release(&self, config: &Config) -> bool { assert!(match self { - Self::PartMap {part, map} => part.is_suitable_for_release(config) && map.iter().all(|(_, mapper)| mapper.is_suitable_for_release(config)), - Self::PartRuleMap {part, map} => part.is_suitable_for_release(config) && map.iter().all(|(_, rule)| rule.is_suitable_for_release(config)), - Self::PartRulesMap {part, map} => part.is_suitable_for_release(config) && map.iter().all(|(_, rules)| rules.is_suitable_for_release(config)), - Self::StringMap {source, map} => source.as_ref().is_none_or(|source| source.is_suitable_for_release(config)) && map.iter().all(|(_, mapper)| mapper.is_suitable_for_release(config)), - Self::StringRuleMap {source, map} => source.as_ref().is_none_or(|source| source.is_suitable_for_release(config)) && map.iter().all(|(_, rule)| rule.is_suitable_for_release(config)), - Self::StringRulesMap {source, map} => source.as_ref().is_none_or(|source| source.is_suitable_for_release(config)) && map.iter().all(|(_, rules)| rules.is_suitable_for_release(config)), + Self::PartMap {part , map, r#else} => part.is_suitable_for_release(config) && map.iter().all(|(_, mapper)| mapper.is_suitable_for_release(config)) && r#else.as_ref().is_none_or(|x| x.is_suitable_for_release(config)), + Self::PartRuleMap {part , map, r#else} => part.is_suitable_for_release(config) && map.iter().all(|(_, rule)| rule.is_suitable_for_release(config)) && r#else.as_ref().is_none_or(|x| x.is_suitable_for_release(config)), + Self::PartRulesMap {part , map, r#else} => part.is_suitable_for_release(config) && map.iter().all(|(_, rules)| rules.is_suitable_for_release(config)) && r#else.as_ref().is_none_or(|x| x.is_suitable_for_release(config)), + Self::StringMap {value, map, r#else} => value.as_ref().is_none_or(|value| value.is_suitable_for_release(config)) && map.iter().all(|(_, mapper)| mapper.is_suitable_for_release(config)) && r#else.as_ref().is_none_or(|x| x.is_suitable_for_release(config)), + Self::StringRuleMap {value, map, r#else} => value.as_ref().is_none_or(|value| value.is_suitable_for_release(config)) && map.iter().all(|(_, rule)| rule.is_suitable_for_release(config)) && r#else.as_ref().is_none_or(|x| x.is_suitable_for_release(config)), + Self::StringRulesMap {value, map, r#else} => value.as_ref().is_none_or(|value| value.is_suitable_for_release(config)) && map.iter().all(|(_, rules)| rules.is_suitable_for_release(config)) && r#else.as_ref().is_none_or(|x| x.is_suitable_for_release(config)), Self::Repeat {rules, ..} => rules.is_suitable_for_release(config), Self::SharedCondition {condition, rules} => condition.is_suitable_for_release(config) && rules.is_suitable_for_release(config), Self::DontTriggerLoop(rule) => rule.is_suitable_for_release(config), diff --git a/src/types/rules/conditions.rs b/src/types/rules/conditions.rs index 871db41..3688de1 100644 --- a/src/types/rules/conditions.rs +++ b/src/types/rules/conditions.rs @@ -118,14 +118,14 @@ pub enum Condition { /// The map specifying which values should run which conditions. map: HashMap, Self> }, - /// Passes if the condition in `map` whose key is the value returned by `source`'s [`StringSource::get`] passes. + /// Passes if the condition in `map` whose key is the value returned by `value`'s [`StringSource::get`] passes. /// # Errors /// If the call to [`StringSource::get`] returns an error, that error is returned. /// /// If the call to [`Self::satisfied_by`] returns an error, that error is returned. StringMap { /// The string to index the map with. - source: Option, + value: Option, /// The map specifying which values should run which conditions. map: HashMap, Self> }, @@ -525,32 +525,32 @@ pub enum Condition { /// If either call to [`StringSource::get`] returns an error, that error is returned. StringIs { /// The left hand side of the `==` operation. - source: Option, + left: Option, /// The right hand side of the `==` operation.` - value: Option + right: Option }, - /// Passes if [`Self::StringContains::source`] contains [`Self::StringContains::value`] at [`Self::StringContains::where`]. + /// Passes if [`Self::StringContains::value`] contains [`Self::StringContains::substring`] at [`Self::StringContains::where`]. /// # Errors /// If either call to [`StringSource::get`] returns an error, that error is returned. /// /// If the call to [`StringLocation::satisfied_by`] returns an error, that error is returned. StringContains { /// The haystack to search in. - source: StringSource, - /// The needle to look for. value: StringSource, + /// The needle to look for. + substring: StringSource, /// Where to look (defaults to [`StringLocation::Anywhere`]). #[serde(default)] r#where: StringLocation }, - /// Passes if [`Self::StringMatches::source`] contains [`Self::StringMatches::matcher`]. + /// Passes if [`Self::StringMatches::value`] contains [`Self::StringMatches::matcher`]. /// # Errors /// If the call to [`StringSource::get`] returns an error, that error is returned. /// /// If the call to [`StringMatcher::satisfied_by`] returns an error, that error is returned. StringMatches { /// The string to match. - source: StringSource, + value: StringSource, /// The matcher. matcher: StringMatcher }, @@ -602,7 +602,7 @@ pub enum Condition { /// Cannot be serialized or deserialized. #[expect(clippy::type_complexity, reason = "Who cares")] #[cfg(feature = "experiment-custom")] - Custom(FnWrapper Result>) + Custom(FnWrapper Result>) } /// An enum of all possible errors a [`Condition`] can return. @@ -695,7 +695,7 @@ impl Condition { Some(condition) => condition.satisfied_by(job_state)?, None => false }, - Self::StringMap{source, map} => match map.get(&get_option_string!(source, job_state)) { + Self::StringMap{value, map} => match map.get(&get_option_string!(value, job_state)) { Some(condition) => condition.satisfied_by(job_state)?, None => false }, @@ -764,9 +764,9 @@ impl Condition { // String source. - Self::StringIs {source, value} => get_option_str!(source, job_state)==get_option_str!(value, job_state), - Self::StringContains {source, value, r#where} => r#where.satisfied_by(get_str!(source, job_state, ConditionError), get_str!(value, job_state, ConditionError))?, - Self::StringMatches {source, matcher} => matcher.satisfied_by(get_str!(source, job_state, ConditionError), job_state)?, + Self::StringIs {left, right} => get_option_str!(left, job_state)==get_option_str!(right, job_state), + Self::StringContains {value, substring, r#where} => r#where.satisfied_by(get_str!(value, job_state, ConditionError), get_str!(substring, job_state, ConditionError))?, + Self::StringMatches {value, matcher} => matcher.satisfied_by(get_str!(value, job_state, ConditionError), job_state)?, // Commands. @@ -786,7 +786,7 @@ impl Condition { })? }, #[cfg(feature = "experiment-custom")] - Self::Custom(function) => function(self, job_state)? + Self::Custom(function) => function(job_state)? }) } @@ -799,7 +799,7 @@ impl Condition { Self::All(conditions) => conditions.iter().all(|condition| condition.is_suitable_for_release(config)), Self::Any(conditions) => conditions.iter().all(|condition| condition.is_suitable_for_release(config)), Self::PartMap {part, map} => part.is_suitable_for_release(config) && map.iter().all(|(_, condition)| condition.is_suitable_for_release(config)), - Self::StringMap {source, map} => source.as_ref().is_none_or(|source| source.is_suitable_for_release(config)) && map.iter().all(|(_, condition)| condition.is_suitable_for_release(config)), + Self::StringMap {value, map} => value.as_ref().is_none_or(|value| value.is_suitable_for_release(config)) && map.iter().all(|(_, condition)| condition.is_suitable_for_release(config)), Self::TreatErrorAsPass(condition) => condition.is_suitable_for_release(config), Self::TreatErrorAsFail(condition) => condition.is_suitable_for_release(config), Self::TryElse {r#try, r#else} => r#try.is_suitable_for_release(config) && r#else.is_suitable_for_release(config), @@ -809,9 +809,9 @@ impl Condition { Self::PartMatches {part, matcher} => part.is_suitable_for_release(config) && matcher.is_suitable_for_release(config), Self::VarIs {name, value} => name.is_suitable_for_release(config) && value.as_ref().is_none_or(|value| value.is_suitable_for_release(config)), Self::FlagIsSet(name) => name.is_suitable_for_release(config) && check_docs!(config, flags, name), - Self::StringIs {source, value} => source.as_ref().is_none_or(|source| source.is_suitable_for_release(config)) && value.as_ref().is_none_or(|value| value.is_suitable_for_release(config)), - Self::StringContains {source, value, r#where} => source.is_suitable_for_release(config) && value.is_suitable_for_release(config) && r#where.is_suitable_for_release(config), - Self::StringMatches {source, matcher} => source.is_suitable_for_release(config) && matcher.is_suitable_for_release(config), + Self::StringIs {left, right} => left.as_ref().is_none_or(|left| left.is_suitable_for_release(config)) && right.as_ref().is_none_or(|right| right.is_suitable_for_release(config)), + Self::StringContains {value, substring, r#where} => value.is_suitable_for_release(config) && substring.is_suitable_for_release(config) && r#where.is_suitable_for_release(config), + Self::StringMatches {value, matcher} => value.is_suitable_for_release(config) && matcher.is_suitable_for_release(config), #[cfg(feature = "commands")] Self::CommandExists (_) => false, #[cfg(feature = "commands")] Self::CommandExitStatus {..} => false, Self::Always | Self::Never | Self::Error | Self::MaybeWWWDomain(_) | diff --git a/src/types/rules/mappers.rs b/src/types/rules/mappers.rs index 46d4c32..68e4201 100644 --- a/src/types/rules/mappers.rs +++ b/src/types/rules/mappers.rs @@ -116,7 +116,7 @@ pub enum Mapper { #[serde(default)] r#else: Option> }, - /// Indexes `map` with the string returned by `source` and applies that mapper. + /// Indexes `map` with the string returned by `value` and applies that mapper. /// # Errors /// If the call to [`StringSource::get`] returns an error, that error is returned. /// @@ -125,7 +125,7 @@ pub enum Mapper { /// If the call to [`Mapper::apply`] returns an error, that error is returned. StringMap { /// The string to index `map` with. - source: Option, + value: Option, /// The map specifying which strings should apply which mapper. map: HashMap, Self>, /// The mapper to use if the part is [`None`] and there is no [`None`] key in `map`. @@ -438,7 +438,7 @@ pub enum Mapper { /// Cannot be serialized or deserialized. #[expect(clippy::type_complexity, reason = "Who cares")] #[cfg(feature = "experiment-custom")] - Custom(FnWrapper Result<(), MapperError>>) + Custom(FnWrapper Result<(), MapperError>>) } /// Individual links in the [`Mapper::ConditionChain`] chain. @@ -584,23 +584,18 @@ impl Mapper { } }, Self::All(mappers) => { - let mut temp_url = job_state.url.clone(); - let mut temp_scratchpad = job_state.scratchpad.clone(); - let mut temp_job_state = JobState { - url: &mut temp_url, - params: job_state.params, - scratchpad: &mut temp_scratchpad, - context: job_state.context, - #[cfg(feature = "cache")] - cache: job_state.cache, - commons: job_state.commons, - common_args: job_state.common_args, - }; + let old_url = job_state.url.clone(); + let old_scratchpad = job_state.scratchpad.clone(); for mapper in mappers { - mapper.apply(&mut temp_job_state)?; + match mapper.apply(job_state) { + Ok(_) => {}, + Err(e) => { + *job_state.url = old_url; + *job_state.scratchpad = old_scratchpad; + return Err(e); + } + } } - *job_state.scratchpad = temp_scratchpad; - *job_state.url = temp_url; }, Self::AllNoRevert(mappers) => { for mapper in mappers { @@ -621,8 +616,8 @@ impl Mapper { _ => Err(MapperError::MapperNotFound)? }.apply(job_state)? }, - Self::StringMap {source, map, if_null, r#else} => { - let key = get_option_string!(source, job_state); + Self::StringMap {value, map, if_null, r#else} => { + let key = get_option_string!(value, job_state); match (key.is_none(), map.get(&key), if_null, r#else) { (_ , Some(mapper), _ , _ ) => mapper, (true, _ , Some(mapper), _ ) => mapper, @@ -794,7 +789,7 @@ impl Mapper { })? }, #[cfg(feature = "experiment-custom")] - Self::Custom(function) => function(self, job_state)? + Self::Custom(function) => function(job_state)? }; Ok(()) } @@ -808,7 +803,7 @@ impl Mapper { Self::AllNoRevert(mappers) => mappers.iter().all(|mapper| mapper.is_suitable_for_release(config)), Self::AllIgnoreError(mappers) => mappers.iter().all(|mapper| mapper.is_suitable_for_release(config)), Self::PartMap {part, map, if_null, r#else} => part.is_suitable_for_release(config) && map.iter().all(|(_, mapper)| mapper.is_suitable_for_release(config)) && if_null.as_ref().is_none_or(|if_null| if_null.is_suitable_for_release(config)) && r#else.as_ref().is_none_or(|r#else| r#else.is_suitable_for_release(config)), - Self::StringMap {source, map, if_null, r#else} => source.as_ref().is_none_or(|source| source.is_suitable_for_release(config)) && map.iter().all(|(_, mapper)| mapper.is_suitable_for_release(config)) && if_null.as_ref().is_none_or(|if_null| if_null.is_suitable_for_release(config)) && r#else.as_ref().is_none_or(|r#else| r#else.is_suitable_for_release(config)), + Self::StringMap {value, map, if_null, r#else} => value.as_ref().is_none_or(|value| value.is_suitable_for_release(config)) && map.iter().all(|(_, mapper)| mapper.is_suitable_for_release(config)) && if_null.as_ref().is_none_or(|if_null| if_null.is_suitable_for_release(config)) && r#else.as_ref().is_none_or(|r#else| r#else.is_suitable_for_release(config)), Self::IgnoreError(mapper) => mapper.is_suitable_for_release(config), Self::TryElse {r#try, r#else} => r#try.is_suitable_for_release(config) && r#else.is_suitable_for_release(config), Self::FirstNotError(mappers) => mappers.iter().all(|mapper| mapper.is_suitable_for_release(config)), diff --git a/src/types/string_matcher.rs b/src/types/string_matcher.rs index 3a5d92d..e270c00 100644 --- a/src/types/string_matcher.rs +++ b/src/types/string_matcher.rs @@ -245,7 +245,7 @@ pub enum StringMatcher { /// Cannot be serialized or deserialized. #[expect(clippy::type_complexity, reason = "Who cares")] #[cfg(feature = "experiment-custom")] - Custom(FnWrapper Result>) + Custom(FnWrapper Result>) } #[cfg(feature = "regex")] @@ -437,7 +437,7 @@ impl StringMatcher { )? }, #[cfg(feature = "experiment-custom")] - Self::Custom(function) => function(self, haystack, job_state)?, + Self::Custom(function) => function(haystack, job_state)?, }) } diff --git a/src/types/string_modification.rs b/src/types/string_modification.rs index c294be9..12e26d6 100644 --- a/src/types/string_modification.rs +++ b/src/types/string_modification.rs @@ -332,133 +332,6 @@ pub enum StringModification { /// The end of the range to keep. end: Option }, - /// Splits the provided string by `split` and keeps only the `n`th segment. - /// # Errors - /// If the `n`th segment is not found, returns the error [`StringModificationError::SegmentNotFound`]. - KeepNthSegment { - /// The value to split the string by. - split: StringSource, - /// The index of the segment to keep. - n: isize - }, - /// Splits the provided string by `split` and keeps only the segments in the specified range. - /// # Errors - /// If the segment range is not found, returns the error [`StringModificationError::SegmentRangeNotFound`]. - KeepSegmentRange { - /// The value to split the string by. - split: StringSource, - /// The start of the range of segments to keep. - start: Option, - /// The end of the range of segments to keep. - end: Option - }, - /// Splits the provided string by `split`, replaces the `n`th segment with `value` or removes the segment if `value` is `None`, then joins the segments back together. - /// # Errors - /// If `n` is not in the range of of segments, returns the error [`StringModificationError::SegmentNotFound`]. - /// # Examples - /// ``` - /// # use url_cleaner::types::*; - /// url_cleaner::job_state!(job_state;); - /// - /// let mut x = "a.b.c.d.e.f".to_string(); - /// StringModification::SetNthSegment{split: ".".into(), n: 1, value: Some( "1".into())}.apply(&mut x, &job_state.to_view()).unwrap(); - /// assert_eq!(&x, "a.1.c.d.e.f"); - /// StringModification::SetNthSegment{split: ".".into(), n: -1, value: Some("-1".into())}.apply(&mut x, &job_state.to_view()).unwrap(); - /// assert_eq!(&x, "a.1.c.d.e.-1"); - /// StringModification::SetNthSegment{split: ".".into(), n: -2, value: None}.apply(&mut x, &job_state.to_view()).unwrap(); - /// assert_eq!(&x, "a.1.c.d.-1"); - /// StringModification::SetNthSegment{split: ".".into(), n: 5, value: Some( "E".into())}.apply(&mut x, &job_state.to_view()).unwrap_err(); - /// StringModification::SetNthSegment{split: ".".into(), n: -6, value: Some( "E".into())}.apply(&mut x, &job_state.to_view()).unwrap_err(); - /// StringModification::SetNthSegment{split: ".".into(), n: -5, value: Some("-5".into())}.apply(&mut x, &job_state.to_view()).unwrap(); - /// assert_eq!(&x, "-5.1.c.d.-1"); - /// ``` - SetNthSegment { - /// The value to split the string by. - split: StringSource, - /// The index of the segment to modify. - n: isize, - /// The value to place at the segment index. If `None` then the segment is erased. - value: Option - }, - /// Splits the provided string by `split`, replaces the range of segments specified by `start` and `end` with `value`, then joins the segments back together. - /// # Errors - /// If either call to [`StringSource::get`] returns an error, that error is returned. - /// - /// If there is no segment at `start` (or `0` if `start` is [`None`]), returns the error [`StringModificationError::SegmentNotFound`]. - /// - /// If the segment range is not found, returns the error [`StringModificationError::SegmentRangeNotFound`]. - SetSegmentRange { - /// The value to split the string by. - split: StringSource, - /// The start of the range of segments to replace. - start: Option, - /// The end of the range of segments to replace. - end: Option, - /// The value to replace the segments with. - value: Option - }, - /// Like [`Self::SetNthSegment`] except it inserts `value` before the `n`th segment instead of overwriting. - /// # Errors - /// If `n` is not in the range of of segments, returns the error [`StringModificationError::SegmentNotFound`]. - /// - /// Please note that trying to append a new segment at the end still errors. - /// # Examples - /// ``` - /// # use url_cleaner::types::*; - /// url_cleaner::job_state!(job_state;); - /// - /// let mut x = "a.b.c".to_string(); - /// StringModification::InsertSegmentBefore{split: ".".into(), n: 1, value: Some( "1".into())}.apply(&mut x, &job_state.to_view()).unwrap(); - /// assert_eq!(&x, "a.1.b.c"); - /// StringModification::InsertSegmentBefore{split: ".".into(), n: -1, value: Some("-1".into())}.apply(&mut x, &job_state.to_view()).unwrap(); - /// assert_eq!(&x, "a.1.b.-1.c"); - /// StringModification::InsertSegmentBefore{split: ".".into(), n: 4, value: Some( "4".into())}.apply(&mut x, &job_state.to_view()).unwrap(); - /// assert_eq!(&x, "a.1.b.-1.4.c"); - /// StringModification::InsertSegmentBefore{split: ".".into(), n: 6, value: Some( "6".into())}.apply(&mut x, &job_state.to_view()).unwrap(); - /// assert_eq!(&x, "a.1.b.-1.4.c.6"); - /// StringModification::InsertSegmentBefore{split: ".".into(), n: 8, value: Some( "E".into())}.apply(&mut x, &job_state.to_view()).unwrap_err(); - /// StringModification::InsertSegmentBefore{split: ".".into(), n: -8, value: Some( "E".into())}.apply(&mut x, &job_state.to_view()).unwrap_err(); - /// StringModification::InsertSegmentBefore{split: ".".into(), n: -7, value: Some("-7".into())}.apply(&mut x, &job_state.to_view()).unwrap(); - /// assert_eq!(&x, "-7.a.1.b.-1.4.c.6"); - /// ``` - InsertSegmentBefore { - /// The value to split the string by. - split: StringSource, - /// The segment index to insert before. - n: isize, - /// The value to insert. - value: Option - }, - /// Like [`Self::SetNthSegment`] except it inserts `value` after the `n`th segment instead of overwriting. - /// # Errors - /// If `n` is not in the range of of segments, returns the error [`StringModificationError::SegmentNotFound`]. - /// - /// Please note that trying to append a new segment at the end still errors. - /// # Examples - /// ``` - /// # use url_cleaner::types::*; - /// url_cleaner::job_state!(job_state;); - /// - /// let mut x = "a.b.c".to_string(); - /// StringModification::InsertSegmentAfter{split: ".".into(), n: 1, value: Some( "1".into())}.apply(&mut x, &job_state.to_view()).unwrap(); - /// assert_eq!(&x, "a.b.1.c"); - /// StringModification::InsertSegmentAfter{split: ".".into(), n: -1, value: Some("-1".into())}.apply(&mut x, &job_state.to_view()).unwrap(); - /// assert_eq!(&x, "a.b.1.c.-1"); - /// StringModification::InsertSegmentAfter{split: ".".into(), n: 4, value: Some( "4".into())}.apply(&mut x, &job_state.to_view()).unwrap(); - /// assert_eq!(&x, "a.b.1.c.-1.4"); - /// StringModification::InsertSegmentAfter{split: ".".into(), n: 6, value: Some( "E".into())}.apply(&mut x, &job_state.to_view()).unwrap_err(); - /// StringModification::InsertSegmentAfter{split: ".".into(), n: -7, value: Some( "E".into())}.apply(&mut x, &job_state.to_view()).unwrap_err(); - /// StringModification::InsertSegmentAfter{split: ".".into(), n: -6, value: Some("-6".into())}.apply(&mut x, &job_state.to_view()).unwrap(); - /// assert_eq!(&x, "a.-6.b.1.c.-1.4"); - /// ``` - InsertSegmentAfter { - /// The value to split the string by. - split: StringSource, - /// The segment index to insert before. - n: isize, - /// The value to insert. - value: Option - }, /// [`Regex::captures`] and [`::regex::Captures::expand`]. /// # Errors /// When the call to [`Regex::captures`] returns [`None`], returns the error [`StringModificationError::RegexMatchNotFound`] @@ -603,6 +476,201 @@ pub enum StringModification { /// The modification to apply. modification: Box }, + /// Splits the provided string by `split` and keeps only the `n`th segment. + /// # Errors + /// If the `n`th segment is not found, returns the error [`StringModificationError::SegmentNotFound`]. + KeepNthSegment { + /// The value to split the string by. + split: StringSource, + /// The index of the segment to keep. + n: isize + }, + /// Splits the provided string by `split` and keeps only the segments in the specified range. + /// # Errors + /// If the segment range is not found, returns the error [`StringModificationError::SegmentRangeNotFound`]. + KeepSegmentRange { + /// The value to split the string by. + split: StringSource, + /// The start of the range of segments to keep. + start: Option, + /// The end of the range of segments to keep. + end: Option + }, + /// Splits the provided string by `split`, replaces the `n`th segment with `value` or removes the segment if `value` is `None`, then joins the segments back together. + /// # Errors + /// If `n` is not in the range of of segments, returns the error [`StringModificationError::SegmentNotFound`]. + /// + /// If either call to [`StringSource::get`] returns an error, that error is returned. + /// # Examples + /// ``` + /// # use url_cleaner::types::*; + /// url_cleaner::job_state!(job_state;); + /// + /// let mut x = "a.b.c.d.e.f".to_string(); + /// StringModification::SetNthSegment{split: ".".into(), n: 1, value: Some( "1".into())}.apply(&mut x, &job_state.to_view()).unwrap(); + /// assert_eq!(&x, "a.1.c.d.e.f"); + /// StringModification::SetNthSegment{split: ".".into(), n: -1, value: Some("-1".into())}.apply(&mut x, &job_state.to_view()).unwrap(); + /// assert_eq!(&x, "a.1.c.d.e.-1"); + /// StringModification::SetNthSegment{split: ".".into(), n: -2, value: None}.apply(&mut x, &job_state.to_view()).unwrap(); + /// assert_eq!(&x, "a.1.c.d.-1"); + /// StringModification::SetNthSegment{split: ".".into(), n: 5, value: Some( "E".into())}.apply(&mut x, &job_state.to_view()).unwrap_err(); + /// StringModification::SetNthSegment{split: ".".into(), n: -6, value: Some( "E".into())}.apply(&mut x, &job_state.to_view()).unwrap_err(); + /// StringModification::SetNthSegment{split: ".".into(), n: -5, value: Some("-5".into())}.apply(&mut x, &job_state.to_view()).unwrap(); + /// assert_eq!(&x, "-5.1.c.d.-1"); + /// ``` + SetNthSegment { + /// The value to split the string by. + split: StringSource, + /// The index of the segment to modify. + n: isize, + /// The value to set. If `None` then the segment is removed. + value: Option + }, + /// Finds the `n`th segment matching `matcher` and sets it to `value`. + /// # Errors + /// If `n` is not in the range of segments, returns the error [`StringModificationError::SegmentNotFound`]. + /// + /// If the call to [`StringMatcher::satisfied_by`] returns an error, that error is returned. + /// + /// If either call to [`StringSource::get`] returns an error, that error is returned. + SetNthMatchingSegment { + /// The value to split the siring by. + split: StringSource, + /// The index of the segments to modify. + n: isize, + /// The [`StringMatcher`] to test each segment with. + matcher: Box, + /// The value to set. If `None` then the segment is removed. + value: Option + }, + /// # Examples + /// ``` + /// # use url_cleaner::types::*; + /// url_cleaner::job_state!(job_state;); + /// + /// let modification = StringModification::SetAroundNthMatchingSegment { + /// split: " ".into(), + /// n: 0, + /// matcher: Box::new(StringMatcher::Equals("b".into())), + /// shift: 1, + /// value: None + /// }; + /// + /// let mut x = "a b c".to_string(); + /// modification.apply(&mut x, &job_state.to_view()).unwrap(); + /// assert_eq!(x, "a b"); + /// + /// let mut x = "a b".to_string(); + /// modification.apply(&mut x, &job_state.to_view()).unwrap_err(); + /// + /// + /// + /// let modification = StringModification::SetAroundNthMatchingSegment { + /// split: " ".into(), + /// n: 0, + /// matcher: Box::new(StringMatcher::Equals("b".into())), + /// shift: -1, + /// value: None + /// }; + /// + /// let mut x = "a b c".to_string(); + /// modification.apply(&mut x, &job_state.to_view()).unwrap(); + /// assert_eq!(x, "b c"); + /// + /// let mut x = "b c".to_string(); + /// modification.apply(&mut x, &job_state.to_view()).unwrap_err(); + /// ``` + SetAroundNthMatchingSegment { + /// The value to split the siring by. + split: StringSource, + /// The index of the segments to modify. + n: isize, + /// The [`StringMatcher`] to test each segment with. + matcher: Box, + /// The offset of the segment to set. + shift: isize, + /// The value to set. If `None` then the segment is removed. + value: Option + }, + /// Splits the provided string by `split`, replaces the range of segments specified by `start` and `end` with `value`, then joins the segments back together. + /// # Errors + /// If either call to [`StringSource::get`] returns an error, that error is returned. + /// + /// If there is no segment at `start` (or `0` if `start` is [`None`]), returns the error [`StringModificationError::SegmentNotFound`]. + /// + /// If the segment range is not found, returns the error [`StringModificationError::SegmentRangeNotFound`]. + SetSegmentRange { + /// The value to split the string by. + split: StringSource, + /// The start of the range of segments to replace. + start: Option, + /// The end of the range of segments to replace. + end: Option, + /// The value to replace the segments with. + value: Option + }, + /// Like [`Self::SetNthSegment`] except it inserts `value` before the `n`th segment instead of overwriting. + /// # Errors + /// If `n` is not in the range of of segments, returns the error [`StringModificationError::SegmentNotFound`]. + /// + /// Please note that trying to append a new segment at the end still errors. + /// # Examples + /// ``` + /// # use url_cleaner::types::*; + /// url_cleaner::job_state!(job_state;); + /// + /// let mut x = "a.b.c".to_string(); + /// StringModification::InsertSegmentBefore{split: ".".into(), n: 1, value: Some( "1".into())}.apply(&mut x, &job_state.to_view()).unwrap(); + /// assert_eq!(&x, "a.1.b.c"); + /// StringModification::InsertSegmentBefore{split: ".".into(), n: -1, value: Some("-1".into())}.apply(&mut x, &job_state.to_view()).unwrap(); + /// assert_eq!(&x, "a.1.b.-1.c"); + /// StringModification::InsertSegmentBefore{split: ".".into(), n: 4, value: Some( "4".into())}.apply(&mut x, &job_state.to_view()).unwrap(); + /// assert_eq!(&x, "a.1.b.-1.4.c"); + /// StringModification::InsertSegmentBefore{split: ".".into(), n: 6, value: Some( "6".into())}.apply(&mut x, &job_state.to_view()).unwrap(); + /// assert_eq!(&x, "a.1.b.-1.4.c.6"); + /// StringModification::InsertSegmentBefore{split: ".".into(), n: 8, value: Some( "E".into())}.apply(&mut x, &job_state.to_view()).unwrap_err(); + /// StringModification::InsertSegmentBefore{split: ".".into(), n: -8, value: Some( "E".into())}.apply(&mut x, &job_state.to_view()).unwrap_err(); + /// StringModification::InsertSegmentBefore{split: ".".into(), n: -7, value: Some("-7".into())}.apply(&mut x, &job_state.to_view()).unwrap(); + /// assert_eq!(&x, "-7.a.1.b.-1.4.c.6"); + /// ``` + InsertSegmentBefore { + /// The value to split the string by. + split: StringSource, + /// The segment index to insert before. + n: isize, + /// The value to insert. + value: Option + }, + /// Like [`Self::SetNthSegment`] except it inserts `value` after the `n`th segment instead of overwriting. + /// # Errors + /// If `n` is not in the range of of segments, returns the error [`StringModificationError::SegmentNotFound`]. + /// + /// Please note that trying to append a new segment at the end still errors. + /// # Examples + /// ``` + /// # use url_cleaner::types::*; + /// url_cleaner::job_state!(job_state;); + /// + /// let mut x = "a.b.c".to_string(); + /// StringModification::InsertSegmentAfter{split: ".".into(), n: 1, value: Some( "1".into())}.apply(&mut x, &job_state.to_view()).unwrap(); + /// assert_eq!(&x, "a.b.1.c"); + /// StringModification::InsertSegmentAfter{split: ".".into(), n: -1, value: Some("-1".into())}.apply(&mut x, &job_state.to_view()).unwrap(); + /// assert_eq!(&x, "a.b.1.c.-1"); + /// StringModification::InsertSegmentAfter{split: ".".into(), n: 4, value: Some( "4".into())}.apply(&mut x, &job_state.to_view()).unwrap(); + /// assert_eq!(&x, "a.b.1.c.-1.4"); + /// StringModification::InsertSegmentAfter{split: ".".into(), n: 6, value: Some( "E".into())}.apply(&mut x, &job_state.to_view()).unwrap_err(); + /// StringModification::InsertSegmentAfter{split: ".".into(), n: -7, value: Some( "E".into())}.apply(&mut x, &job_state.to_view()).unwrap_err(); + /// StringModification::InsertSegmentAfter{split: ".".into(), n: -6, value: Some("-6".into())}.apply(&mut x, &job_state.to_view()).unwrap(); + /// assert_eq!(&x, "a.-6.b.1.c.-1.4"); + /// ``` + InsertSegmentAfter { + /// The value to split the string by. + split: StringSource, + /// The segment index to insert before. + n: isize, + /// The value to insert. + value: Option + }, /// # Examples /// ``` /// # use url_cleaner::types::*; @@ -628,6 +696,92 @@ pub enum StringModification { /// The modification to apply. modification: Box }, + /// Modifies the `n`th segment that matches `matcher`. + /// # Examples + /// ``` + /// # use url_cleaner::types::*; + /// url_cleaner::job_state!(job_state;); + /// + /// let mut x = "aaa aaaa aaaa".to_string(); + /// StringModification::ModifyNthMatchingSegment { + /// split: " ".into(), + /// n: 1, + /// matcher: Box::new(StringMatcher::LengthIs(4)), + /// modification: Box::new(StringModification::Set("zzzz".into())) + /// }.apply(&mut x, &job_state.to_view()).unwrap(); + /// + /// assert_eq!(x, "aaa aaaa zzzz"); + /// ``` + ModifyNthMatchingSegment { + /// The value to split the siring by. + split: StringSource, + /// The index of the segments to modify. + n: isize, + /// The [`StringMatcher`] to test each segment with. + matcher: Box, + /// The [`Self`] to apply. + modification: Box + }, + /// For each `n` in `ns`, modifies the `n`th segment that matches `matcher`. + ModifyMatchingSegments { + /// The value to split the siring by. + split: StringSource, + /// The indices of the segments to modify. + ns: Vec, + /// The [`StringMatcher`] to test each segment with. + matcher: Box, + /// The [`Self`] to apply. + modification: Box + }, + /// # Examples + /// ``` + /// # use url_cleaner::types::*; + /// url_cleaner::job_state!(job_state;); + /// + /// let modification = StringModification::ModifyAroundNthMatchingSegment { + /// split: " ".into(), + /// n: 0, + /// matcher: Box::new(StringMatcher::Equals("b".into())), + /// shift: 1, + /// modification: Box::new(StringModification::Set("-".into())) + /// }; + /// + /// let mut x = "a b c".to_string(); + /// modification.apply(&mut x, &job_state.to_view()).unwrap(); + /// assert_eq!(x, "a b -"); + /// + /// let mut x = "a b".to_string(); + /// modification.apply(&mut x, &job_state.to_view()).unwrap_err(); + /// + /// + /// + /// let modification = StringModification::ModifyAroundNthMatchingSegment { + /// split: " ".into(), + /// n: 0, + /// matcher: Box::new(StringMatcher::Equals("b".into())), + /// shift: -1, + /// modification: Box::new(StringModification::Set("-".into())) + /// }; + /// + /// let mut x = "a b c".to_string(); + /// modification.apply(&mut x, &job_state.to_view()).unwrap(); + /// assert_eq!(x, "- b c"); + /// + /// let mut x = "b c".to_string(); + /// modification.apply(&mut x, &job_state.to_view()).unwrap_err(); + /// ``` + ModifyAroundNthMatchingSegment { + /// The value to split the siring by. + split: StringSource, + /// The index of the segments to modify. + n: isize, + /// The [`StringMatcher`] to test each segment with. + matcher: Box, + /// The offset of the segment to modify. + shift: isize, + /// The value to set. If `None` then the segment is removed. + modification: Box + }, /// If the provided string is in the specified map, return the value of its corresponding [`StringSource`]. /// # Errors /// If the provided string is not in the specified map, returns the error [`StringModificationError::StringNotInMap`]. @@ -679,8 +833,6 @@ pub enum StringModification { /// What do do when a [`char`] that isn't in `map` is found. not_found_behavior: CharNotFoundBehavior }, - /// Uses a [`Self`] from the [`JobState::commons`]'s [`Commons::string_modifications`]. - Common(CommonCall), /// Be careful to make sure no element key is a prefix of any other element key. /// /// The current implementation sucks and can't handle that. @@ -698,12 +850,14 @@ pub enum StringModification { /// assert_eq!(x, "/a\n\\n"); /// ``` RunEscapeCodes(HashMap), + /// Uses a [`Self`] from the [`JobState::commons`]'s [`Commons::string_modifications`]. + Common(CommonCall), /// Uses a function pointer. /// /// Cannot be serialized or deserialized. #[expect(clippy::type_complexity, reason = "Who cares")] #[cfg(feature = "experiment-custom")] - Custom(FnWrapper Result<(), StringModificationError>>) + Custom(FnWrapper Result<(), StringModificationError>>) } /// Tells [`StringModification::MapChars`] what to do when a [`char`] isn't found in the map. @@ -949,6 +1103,44 @@ impl StringModification { Self::Insert{r#where, value} => if to.is_char_boundary(neg_index(*r#where, to.len()).ok_or(StringModificationError::InvalidIndex)?) {to.insert_str(neg_index(*r#where, to.len()).ok_or(StringModificationError::InvalidIndex)?, get_str!(value, job_state, StringModificationError));} else {Err(StringModificationError::InvalidIndex)?;}, Self::Remove(r#where) => if to.is_char_boundary(neg_index(*r#where, to.len()).ok_or(StringModificationError::InvalidIndex)?) {to.remove (neg_index(*r#where, to.len()).ok_or(StringModificationError::InvalidIndex)? );} else {Err(StringModificationError::InvalidIndex)?;}, Self::KeepRange{start, end} => *to = to.get(neg_range(*start, *end, to.len()).ok_or(StringModificationError::InvalidSlice)?).ok_or(StringModificationError::InvalidSlice)?.to_string(), + #[cfg(feature = "regex")] + Self::RegexCaptures {regex, replace} => { + let replace = get_str!(replace, job_state, StringModificationError); + let mut temp = "".to_string(); + regex.get_regex()?.captures(to).ok_or(StringModificationError::RegexMatchNotFound)?.expand(replace, &mut temp); + *to = temp; + }, + #[cfg(feature = "regex")] + Self::JoinAllRegexCaptures {regex, replace, join} => { + let replace = get_str!(replace, job_state, StringModificationError); + let join = get_str!(join, job_state, StringModificationError); + let mut temp = "".to_string(); + if join.is_empty() { + for captures in regex.get_regex()?.captures_iter(to) { + captures.expand(replace, &mut temp); + } + } else { + let mut iter = regex.get_regex()?.captures_iter(to).peekable(); + while let Some(captures) = iter.next() { + captures.expand(replace, &mut temp); + if iter.peek().is_some() {temp.push_str(join);} + } + } + *to = temp; + }, + #[cfg(feature = "regex")] Self::RegexFind (regex ) => *to = regex.get_regex()?.find (to ).ok_or(StringModificationError::RegexMatchNotFound)?.as_str().to_string(), + #[cfg(feature = "regex")] Self::RegexReplace {regex, replace} => *to = regex.get_regex()?.replace (to, get_str!(replace, job_state, StringModificationError)).into_owned(), + #[cfg(feature = "regex")] Self::RegexReplaceAll {regex, replace} => *to = regex.get_regex()?.replace_all(to, get_str!(replace, job_state, StringModificationError)).into_owned(), + #[cfg(feature = "regex")] Self::RegexReplacen {regex, n, replace} => *to = regex.get_regex()?.replacen (to, *n, get_str!(replace, job_state, StringModificationError)).into_owned(), + Self::IfFlag {flag, then, r#else} => if job_state.params.flags.contains(get_str!(flag, job_state, StringModificationError)) {then} else {r#else}.apply(to, job_state)?, + Self::UrlEncode => *to=utf8_percent_encode(to, NON_ALPHANUMERIC).to_string(), + Self::UrlDecode => *to=percent_decode_str(to).decode_utf8()?.into_owned(), + #[cfg(feature = "base64")] Self::Base64Encode(config) => *to = config.make_engine()?.encode(to.as_bytes()), + #[cfg(feature = "base64")] Self::Base64Decode(config) => *to = String::from_utf8(config.make_engine()?.decode(to.as_bytes())?)?, + Self::JsonPointer(pointer) => *to = serde_json::from_str::(to)?.pointer(get_str!(pointer, job_state, StringModificationError)).ok_or(StringModificationError::JsonValueNotFound)?.as_str().ok_or(StringModificationError::JsonValueIsNotAString)?.to_string(), + + + Self::KeepNthSegment {split, n} => *to = neg_nth(to.split(get_str!(split, job_state, StringModificationError)), *n).ok_or(StringModificationError::SegmentNotFound)?.to_string(), Self::KeepSegmentRange {split, start, end} => { let split = get_str!(split, job_state, StringModificationError); @@ -958,7 +1150,6 @@ impl StringModification { let split = get_str!(split, job_state, StringModificationError); let mut temp=to.split(split).collect::>(); let fixed_n=neg_index(*n, temp.len()).ok_or(StringModificationError::SegmentNotFound)?; - if fixed_n==temp.len() {Err(StringModificationError::SegmentNotFound)?;} let x = get_option_string!(value, job_state); #[expect(clippy::indexing_slicing, reason = "`fixed_n` is guaranteed to be in bounds.")] match x.as_deref() { @@ -967,6 +1158,60 @@ impl StringModification { } *to=temp.join(split); }, + Self::SetNthMatchingSegment {split, n, matcher, value} => { + let split = get_str!(split, job_state, StringModificationError); + let mut segments = to.split(split).collect::>(); + let fixed_n=neg_index(*n, segments.len()).ok_or(StringModificationError::SegmentNotFound)?; + let x = get_option_string!(value, job_state); + let mut count = 0usize; + let mut nth_match_found = false; + for (index, segment) in segments.iter_mut().enumerate() { + if matcher.satisfied_by(segment, job_state)? { + if count == fixed_n { + match x.as_deref() { + #[expect(clippy::indexing_slicing, reason = "`count` is guaranteed to be in bounds.")] + Some(value) => segments[index] = value, + None => if index < segments.len() {segments.remove(index);} else {Err(StringModificationError::SegmentNotFound)?} + } + nth_match_found = true; + break; + } + #[allow(clippy::arithmetic_side_effects, reason = "Never exceeds `segments.len()`")] + {count += 1;} + } + } + if !nth_match_found {Err(StringModificationError::SegmentNotFound)?;} + *to=segments.join(split); + }, + Self::SetAroundNthMatchingSegment {split, n, matcher, value, shift} => { + let split = get_str!(split, job_state, StringModificationError); + let mut segments = to.split(split).map(Cow::Borrowed).collect::>(); + let fixed_n = neg_index(*n, segments.len()).ok_or(StringModificationError::SegmentNotFound)?; + let mut matched = 0usize; + let mut didnt_match = 0usize; + let mut nth_match_found = false; + for segment in segments.iter() { + if matcher.satisfied_by(segment, job_state)? { + if matched == fixed_n { + nth_match_found = true; + break; + } + #[allow(clippy::arithmetic_side_effects, reason = "Never exceeds `segments.len()`")] + {matched += 1;} + } else { + #[allow(clippy::arithmetic_side_effects, reason = "Never exceeds `segments.len()`")] + {didnt_match += 1;} + } + } + if !nth_match_found {Err(StringModificationError::SegmentNotFound)?;} + #[allow(clippy::arithmetic_side_effects, reason = "The length of a vector is at most isize::MAX so `matched + didnt_match` is always a valid isize.")] + let shifted_n = ((matched + didnt_match) as isize).checked_add(*shift).ok_or(StringModificationError::SegmentNotFound)?.try_into().map_err(|_| StringModificationError::SegmentNotFound)?; + match get_option_cow!(value, job_state) { + Some(value) => *segments.get_mut(shifted_n).ok_or(StringModificationError::SegmentNotFound)? = value, + None => if shifted_n >= segments.len() {Err(StringModificationError::SegmentNotFound)?} else {segments.remove(shifted_n);} + } + *to = segments.join(split); + }, Self::SetSegmentRange {split, start, end, value} => { let split = get_str!(split, job_state, StringModificationError); let mut segments = to.split(split).collect::>(); @@ -994,41 +1239,6 @@ impl StringModification { *to=temp.join(split); } }, - #[cfg(feature = "regex")] - Self::RegexCaptures {regex, replace} => { - let replace = get_str!(replace, job_state, StringModificationError); - let mut temp = "".to_string(); - regex.get_regex()?.captures(to).ok_or(StringModificationError::RegexMatchNotFound)?.expand(replace, &mut temp); - *to = temp; - }, - #[cfg(feature = "regex")] - Self::JoinAllRegexCaptures {regex, replace, join} => { - let replace = get_str!(replace, job_state, StringModificationError); - let join = get_str!(join, job_state, StringModificationError); - let mut temp = "".to_string(); - if join.is_empty() { - for captures in regex.get_regex()?.captures_iter(to) { - captures.expand(replace, &mut temp); - } - } else { - let mut iter = regex.get_regex()?.captures_iter(to).peekable(); - while let Some(captures) = iter.next() { - captures.expand(replace, &mut temp); - if iter.peek().is_some() {temp.push_str(join);} - } - } - *to = temp; - }, - #[cfg(feature = "regex")] Self::RegexFind (regex ) => *to = regex.get_regex()?.find (to ).ok_or(StringModificationError::RegexMatchNotFound)?.as_str().to_string(), - #[cfg(feature = "regex")] Self::RegexReplace {regex, replace} => *to = regex.get_regex()?.replace (to, get_str!(replace, job_state, StringModificationError)).into_owned(), - #[cfg(feature = "regex")] Self::RegexReplaceAll {regex, replace} => *to = regex.get_regex()?.replace_all(to, get_str!(replace, job_state, StringModificationError)).into_owned(), - #[cfg(feature = "regex")] Self::RegexReplacen {regex, n, replace} => *to = regex.get_regex()?.replacen (to, *n, get_str!(replace, job_state, StringModificationError)).into_owned(), - Self::IfFlag {flag, then, r#else} => if job_state.params.flags.contains(get_str!(flag, job_state, StringModificationError)) {then} else {r#else}.apply(to, job_state)?, - Self::UrlEncode => *to=utf8_percent_encode(to, NON_ALPHANUMERIC).to_string(), - Self::UrlDecode => *to=percent_decode_str(to).decode_utf8()?.into_owned(), - #[cfg(feature = "base64")] Self::Base64Encode(config) => *to = config.make_engine()?.encode(to.as_bytes()), - #[cfg(feature = "base64")] Self::Base64Decode(config) => *to = String::from_utf8(config.make_engine()?.decode(to.as_bytes())?)?, - Self::JsonPointer(pointer) => *to = serde_json::from_str::(to)?.pointer(get_str!(pointer, job_state, StringModificationError)).ok_or(StringModificationError::JsonValueNotFound)?.as_str().ok_or(StringModificationError::JsonValueIsNotAString)?.to_string(), #[expect(clippy::indexing_slicing, reason = "`fixed_n` is guaranteed to be in bounds.")] Self::ModifyNthSegment {split, n, modification} => { let split = get_str!(split, job_state, StringModificationError); @@ -1051,6 +1261,73 @@ impl StringModification { } *to = segments.join(split); }, + Self::ModifyNthMatchingSegment {split, n, matcher, modification} => { + let split = get_str!(split, job_state, StringModificationError); + let mut segments = to.split(split).map(Cow::Borrowed).collect::>(); + let fixed_n = neg_index(*n, segments.len()).ok_or(StringModificationError::SegmentNotFound)?; + let mut count = 0usize; + for segment in segments.iter_mut() { + if matcher.satisfied_by(segment, job_state)? { + if count == fixed_n { + let mut temp = segment.to_string(); + modification.apply(&mut temp, job_state)?; + *segment = Cow::Owned(temp); + break; + } + #[allow(clippy::arithmetic_side_effects, reason = "Never exceeds `segments.len()`")] + {count += 1;} + } + } + *to = segments.join(split); + }, + Self::ModifyMatchingSegments {split, ns, matcher, modification} => { + let split = get_str!(split, job_state, StringModificationError); + let mut segments = to.split(split).map(Cow::Borrowed).collect::>(); + let mut count = 0usize; + let fixed_ns = ns.iter().map(|n| neg_index(*n, segments.len()).ok_or(StringModificationError::SegmentNotFound)).collect::, _>>()?; + for segment in segments.iter_mut() { + if matcher.satisfied_by(segment, job_state)? { + if fixed_ns.iter().any(|x| *x==count) { + let mut temp = segment.to_string(); + modification.apply(&mut temp, job_state)?; + *segment = Cow::Owned(temp); + } + #[allow(clippy::arithmetic_side_effects, reason = "Never exceeds `segments.len()`")] + {count += 1;} + } + } + *to = segments.join(split); + }, + Self::ModifyAroundNthMatchingSegment {split, n, matcher, modification, shift} => { + let split = get_str!(split, job_state, StringModificationError); + let mut segments = to.split(split).map(Cow::Borrowed).collect::>(); + let fixed_n = neg_index(*n, segments.len()).ok_or(StringModificationError::SegmentNotFound)?; + let mut matched = 0usize; + let mut didnt_match = 0usize; + let mut nth_match_found = false; + for segment in segments.iter() { + if matcher.satisfied_by(segment, job_state)? { + if matched == fixed_n { + nth_match_found = true; + break; + } + #[allow(clippy::arithmetic_side_effects, reason = "Never exceeds `segments.len()`")] + {matched += 1;} + } else { + #[allow(clippy::arithmetic_side_effects, reason = "Never exceeds `segments.len()`")] + {didnt_match += 1;} + } + } + if !nth_match_found {Err(StringModificationError::SegmentNotFound)?;} + #[allow(clippy::arithmetic_side_effects, reason = "The length of a vector is at most isize::MAX so `matched + didnt_match` is always a valid isize.")] + let shifted_n: usize = ((matched + didnt_match) as isize).checked_add(*shift).ok_or(StringModificationError::SegmentNotFound)?.try_into().map_err(|_| StringModificationError::SegmentNotFound)?; + let segment = segments.get_mut(shifted_n).ok_or(StringModificationError::SegmentNotFound)?.to_mut(); + modification.apply(segment, job_state)?; + *to = segments.join(split); + }, + + + Self::Map(map) => *to = get_string!(map.get(to).ok_or(StringModificationError::StringNotInMap)?, job_state, StringModificationError), Self::ExtractBetween {start, end} => { *to = to @@ -1107,7 +1384,7 @@ impl StringModification { *to=ret; }, #[cfg(feature = "experiment-custom")] - Self::Custom(function) => function(self, to, job_state)? + Self::Custom(function) => function(to, job_state)? }; Ok(()) } @@ -1134,12 +1411,6 @@ impl StringModification { Self::StripMaybeSuffix(source) => source.is_suitable_for_release(config), Self::Replacen {find, replace, ..} => find.is_suitable_for_release(config) && replace.is_suitable_for_release(config), Self::Insert {value, ..} => value.is_suitable_for_release(config), - Self::KeepNthSegment {split, ..} => split.is_suitable_for_release(config), - Self::KeepSegmentRange {split, ..} => split.is_suitable_for_release(config), - Self::SetNthSegment {split, value, ..} => split.is_suitable_for_release(config) && value.as_ref().is_none_or(|value| value.is_suitable_for_release(config)), - Self::SetSegmentRange {split, value, ..} => split.is_suitable_for_release(config) && value.as_ref().is_none_or(|value| value.is_suitable_for_release(config)), - Self::InsertSegmentBefore {split, value, ..} => split.is_suitable_for_release(config) && value.as_ref().is_none_or(|value| value.is_suitable_for_release(config)), - Self::InsertSegmentAfter {split, value, ..} => split.is_suitable_for_release(config) && value.as_ref().is_none_or(|value| value.is_suitable_for_release(config)), #[cfg(feature = "regex")] Self::RegexCaptures {replace, ..} => replace.is_suitable_for_release(config), #[cfg(feature = "regex")] Self::JoinAllRegexCaptures {replace, join, ..} => replace.is_suitable_for_release(config) && join.is_suitable_for_release(config), #[cfg(feature = "regex")] Self::RegexReplace {replace, ..} => replace.is_suitable_for_release(config), @@ -1147,8 +1418,19 @@ impl StringModification { #[cfg(feature = "regex")] Self::RegexReplacen {replace, ..} => replace.is_suitable_for_release(config), Self::IfFlag {flag, then, r#else} => flag.is_suitable_for_release(config) && then.is_suitable_for_release(config) && r#else.is_suitable_for_release(config), Self::JsonPointer(pointer) => pointer.is_suitable_for_release(config), + Self::KeepNthSegment {split, ..} => split.is_suitable_for_release(config), + Self::KeepSegmentRange {split, ..} => split.is_suitable_for_release(config), + Self::SetNthSegment {split, value, ..} => split.is_suitable_for_release(config) && value.as_ref().is_none_or(|value| value.is_suitable_for_release(config)), + Self::SetNthMatchingSegment {split, matcher, value, ..} => split.is_suitable_for_release(config) && matcher.is_suitable_for_release(config) && value.as_ref().is_none_or(|value| value.is_suitable_for_release(config)), + Self::SetAroundNthMatchingSegment {split, matcher, value, ..} => split.is_suitable_for_release(config) && matcher.is_suitable_for_release(config) && value.as_ref().is_none_or(|value| value.is_suitable_for_release(config)), + Self::SetSegmentRange {split, value, ..} => split.is_suitable_for_release(config) && value.as_ref().is_none_or(|value| value.is_suitable_for_release(config)), + Self::InsertSegmentBefore {split, value, ..} => split.is_suitable_for_release(config) && value.as_ref().is_none_or(|value| value.is_suitable_for_release(config)), + Self::InsertSegmentAfter {split, value, ..} => split.is_suitable_for_release(config) && value.as_ref().is_none_or(|value| value.is_suitable_for_release(config)), Self::ModifyNthSegment {split, modification, ..} => split.is_suitable_for_release(config) && modification.is_suitable_for_release(config), Self::ModifySegments {split, modification, ..} => split.is_suitable_for_release(config) && modification.is_suitable_for_release(config), + Self::ModifyNthMatchingSegment {split, matcher, modification, ..} => split.is_suitable_for_release(config) && matcher.is_suitable_for_release(config) && modification.is_suitable_for_release(config), + Self::ModifyAroundNthMatchingSegment {split, matcher, modification, ..} => split.is_suitable_for_release(config) && matcher.is_suitable_for_release(config) && modification.is_suitable_for_release(config), + Self::ModifyMatchingSegments {split, matcher, modification, ..} => split.is_suitable_for_release(config) && matcher.is_suitable_for_release(config) && modification.is_suitable_for_release(config), Self::Map(map) => map.iter().all(|(_, x)| x.is_suitable_for_release(config)), Self::Debug(_) => false, Self::None | Self::Error | Self::Lowercase | Self::Uppercase | Self::Remove(_) | diff --git a/src/types/string_source.rs b/src/types/string_source.rs index 4213b02..a9a921a 100644 --- a/src/types/string_source.rs +++ b/src/types/string_source.rs @@ -35,15 +35,15 @@ pub enum StringSource { /// # Errors /// If the call to [`Self::get`] returns an error, that error is returned. NoneToEmptyString(Box), - /// If [`Self::NoneTo::source`] returns `None`, instead return the value of [`Self::NoneTo::if_none`]. + /// If [`Self::NoneTo::value`] returns `None`, instead return the value of [`Self::NoneTo::if_none`]. /// /// Please note that [`Self::NoneTo::if_none`] can still return [`None`] and does not return an error when it does so. /// # Errors /// If either call to [`Self::get`] returns an error, that error is returned. NoneTo { /// The [`Self`] to use by default. - source: Box, - /// The [`Self`] to use if [`Self::NoneTo::source`] returns [`None`]. + value: Box, + /// The [`Self`] to use if [`Self::NoneTo::value`] returns [`None`]. if_none: Box }, @@ -116,14 +116,14 @@ pub enum StringSource { /// If the flag is not set, use this. r#else: Box }, - /// If the value of `source` matches `matcher`, returns the value of `then`, otherwise returns the value of `else`. + /// If the value of `value` matches `matcher`, returns the value of `then`, otherwise returns the value of `else`. /// # Errors /// If any call to [`StringSource::get`] returns an error, that error is returned. /// /// If the call to [`StringMatcher::satisfied_by`] returns an error, that error is returned. IfSourceMatches { /// The [`Self`] to match on. - source: Box, + value: Box, /// The matcher. matcher: Box, /// The [`Self`] to return if the matcher passes. @@ -131,34 +131,34 @@ pub enum StringSource { /// The [`Self`] to return if the matcher fails. r#else: Box }, - /// If the value of `source` is [`None`], returns the value of `then`, otherwise returns the value of `else`. + /// If the value of `value` is [`None`], returns the value of `then`, otherwise returns the value of `else`. /// # Errors /// If any of the calls to [`StringSource::get`] return an error, that error is returned. IfSourceIsNone { /// The value to check the [`None`]ness of. - source: Box, - /// The value to return if `source` is [`None`]. + value: Box, + /// The value to return if `value` is [`None`]. then: Box, - /// The value to return if `source` is not [`None`] + /// The value to return if `value` is not [`None`] r#else: Box }, - /// Gets the `Option` from [`Self::Map::source`] then, if it exists in [`Self::Map::map`], gets its corresponding [`Self`]'s value. + /// Gets the `Option` from [`Self::Map::value`] then, if it exists in [`Self::Map::map`], gets its corresponding [`Self`]'s value. /// /// The main benefit of this over [`StringModification::Map`] is this can handle [`None`]. /// # Errors /// If either call to [`Self::get`] returns an error, that error is returned. /// - /// If string returned by [`Self::Map::source`] is not in the specified map, returns the error [`StringModificationError::StringNotInMap`]. + /// If string returned by [`Self::Map::value`] is not in the specified map, returns the error [`StringModificationError::StringNotInMap`]. Map { /// The string to index the map with. - source: Option>, + value: Option>, /// The map to map the string with. /// /// God these docs need a total rewrite. map: HashMap, Self>, /// JSON doesn't allow `null`/[`None`] to be a key in objects. /// - /// If `source` returns [`None`], there's no [`None`] in `map`, and `if_null` is not [`None`], the [`Self`] in `if_null` is used. + /// If `value` returns [`None`], there's no [`None`] in `map`, and `if_null` is not [`None`], the [`Self`] in `if_null` is used. /// /// Defaults to [`None`]. #[serde(default)] @@ -194,7 +194,7 @@ pub enum StringSource { /// assert_eq!(StringSource::Part(UrlPart::Domain).get(&job_state.to_view()).unwrap(), Some(Cow::Borrowed("example.com"))); /// ``` Part(UrlPart), - /// Parses `source` as a URL and gets the specified part. + /// Parses `value` as a URL and gets the specified part. /// # Errors /// If the call to [`Self::get`] returns an error, that error is returned. /// @@ -209,7 +209,7 @@ pub enum StringSource { /// /// assert_eq!( /// StringSource::ExtractPart { - /// source: "https://example.com".into(), + /// value: "https://example.com".into(), /// part: UrlPart::Scheme /// }.get(&job_state.to_view()).unwrap(), /// Some(Cow::Borrowed("https")) @@ -217,8 +217,8 @@ pub enum StringSource { /// ``` ExtractPart { /// The string to parse and extract `part` from. - source: Box, - /// The part to extract from `source`. + value: Box, + /// The part to extract from `value`. part: UrlPart }, /// Indexes [`JobState::common_args`]. @@ -265,12 +265,12 @@ pub enum StringSource { /// The key to index the map with. key: Box }, - /// Gets a string with `source`, modifies it with `modification`, and returns the result. + /// Gets a string with `value`, modifies it with `modification`, and returns the result. /// # Errors /// If the call to [`StringModification::apply`] errors, returns that error. Modified { - /// The source to get the string from. - source: Box, + /// The [`Self`] get the string from. + value: Box, /// The modification to apply to the string. modification: Box }, @@ -315,24 +315,24 @@ pub enum StringSource { /// The key to cache with. key: Box, /// The [`Self`] to cache. - source: Box + value: Box }, - /// Extracts the substring of `source` found between the first `start` and the first subsequent `end`. + /// Extracts the substring of `value` found between the first `start` and the first subsequent `end`. /// /// The same as [`StringModification::ExtractBetween`] but preserves borrowedness. /// - /// If `source` returns a [`Cow::Borrowed`], this will also return a [`Cow::Borrowed`]. + /// If `value` returns a [`Cow::Borrowed`], this will also return a [`Cow::Borrowed`]. /// # Errors /// If any call to [`Self::get`] returns an error, that error is returned. /// /// If any call to [`Self::get`] returns [`None`], returns the error [`StringSourceError::StringSourceIsNone`]. /// - /// If `start` is not found in `source`, returns the error [`StringSourceError::ExtractBetweenStartNotFound`]. + /// If `start` is not found in `value`, returns the error [`StringSourceError::ExtractBetweenStartNotFound`]. /// - /// If `end` is not found in `source` after `start`, returns the error [`StringSourceError::ExtractBetweenEndNotFound`]. + /// If `end` is not found in `value` after `start`, returns the error [`StringSourceError::ExtractBetweenEndNotFound`]. ExtractBetween { /// The [`Self`] to get a substring from. - source: Box, + value: Box, /// The [`Self`] to look for before the substring. start: Box, /// The [`Self`] to look for after the substring. @@ -345,7 +345,7 @@ pub enum StringSource { /// Cannot be serialized or deserialized. #[expect(clippy::type_complexity, reason = "Who cares")] #[cfg(feature = "experiment-custom")] - Custom(FnWrapper fn(&'a Self, &'a JobStateView) -> Result>, StringSourceError>>) + Custom(FnWrapper fn(&'a JobStateView) -> Result>, StringSourceError>>) } impl FromStr for StringSource { @@ -450,10 +450,10 @@ pub enum StringSourceError { /// Returned when [`StringSource::Common`] is used outside of a common context. #[error("Not in a common context.")] NotInACommonContext, - /// Returned when the `start` of a [`StringSource::ExtractBetween`] is not found in the `source`. + /// Returned when the `start` of a [`StringSource::ExtractBetween`] is not found in the `value`. #[error("The `start` of an `ExtractBetween` was not found in the string.")] ExtractBetweenStartNotFound, - /// Returned when the `start` of a [`StringSource::ExtractBetween`] is not found in the `source`. + /// Returned when the `start` of a [`StringSource::ExtractBetween`] is not found in the `value`. #[error("The `end` of an `ExtractBetween` was not found in the string.")] ExtractBetweenEndNotFound, /// Returned when the common [`StringSource`] is not found. @@ -489,37 +489,38 @@ impl StringSource { pub fn get<'a>(&'a self, job_state: &'a JobStateView) -> Result>, StringSourceError> { debug!(StringSource::get, self, job_state); Ok(match self { + Self::String(string) => Some(Cow::Borrowed(string.as_str())), Self::Error => Err(StringSourceError::ExplicitError)?, Self::Debug(source) => { - let ret=source.get(job_state); + let ret = source.get(job_state); eprintln!("=== StringSource::Debug ===\nSource: {source:?}\nJob state: {job_state:?}\nret: {ret:?}"); ret? }, - Self::NoneToEmptyString(source) => source.get(job_state)?.or(Some(Cow::Borrowed(""))), - Self::NoneTo {source, if_none} => source.get(job_state).transpose().or_else(|| if_none.get(job_state).transpose()).transpose()?, + Self::NoneToEmptyString(value) => value.get(job_state)?.or(Some(Cow::Borrowed(""))), + Self::NoneTo {value, if_none} => value.get(job_state).transpose().or_else(|| if_none.get(job_state).transpose()).transpose()?, // I love that [`Result`] and [`Option`] implement [`FromIterator`]. // It's so silly but it works SO well. - Self::Join {sources, join} => sources.iter().map(|source| source.get(job_state)).collect::>, _>>()?.map(|x| Cow::Owned(x.join(join))), + Self::Join {sources, join} => sources.iter().map(|value| value.get(job_state)).collect::>, _>>()?.map(|x| Cow::Owned(x.join(join))), Self::IfFlag {flag, then, r#else} => if job_state.params.flags.contains(&get_string!(flag, job_state, StringSourceError)) {then} else {r#else}.get(job_state)?, - Self::IfSourceMatches {source, matcher, then, r#else} => { - if matcher.satisfied_by(get_str!(source, job_state, StringSourceError), job_state)? { + Self::IfSourceMatches {value, matcher, then, r#else} => { + if matcher.satisfied_by(get_str!(value, job_state, StringSourceError), job_state)? { then.get(job_state)? } else { r#else.get(job_state)? } }, - Self::IfSourceIsNone {source, then, r#else} => { - if source.get(job_state)?.is_none() { + Self::IfSourceIsNone {value, then, r#else} => { + if value.get(job_state)?.is_none() { then.get(job_state)? } else { r#else.get(job_state)? } }, - Self::Map {source, map, if_null, r#else} => { - let key = get_option_string!(source, job_state); + Self::Map {value, map, if_null, r#else} => { + let key = get_option_string!(value, job_state); match (key.is_none(), map.get(&key), if_null, r#else) { (_ , Some(mapper), _ , _ ) => mapper, (true, _ , Some(mapper), _ ) => mapper, @@ -530,16 +531,15 @@ impl StringSource { - Self::String(string) => Some(Cow::Borrowed(string.as_str())), Self::Part(part) => part.get(job_state.url), - Self::ExtractPart{source, part} => source.get(job_state)?.map(|url_str| Url::parse(&url_str)).transpose()?.and_then(|url| part.get(&url).map(|part_value| Cow::Owned(part_value.into_owned()))), + Self::ExtractPart{value, part} => value.get(job_state)?.map(|url_str| Url::parse(&url_str)).transpose()?.and_then(|url| part.get(&url).map(|part_value| Cow::Owned(part_value.into_owned()))), Self::CommonVar(name) => job_state.common_args.ok_or(StringSourceError::NotInACommonContext)?.vars.get(get_str!(name, job_state, StringSourceError)).map(|value| Cow::Borrowed(value.as_str())), Self::Var(key) => job_state.params.vars.get(get_str!(key, job_state, StringSourceError)).map(|value| Cow::Borrowed(value.as_str())), Self::ScratchpadVar(key) => job_state.scratchpad.vars.get(get_str!(key, job_state, StringSourceError)).map(|value| Cow::Borrowed(&**value)), Self::ContextVar(key) => job_state.context.vars.get(get_str!(key, job_state, StringSourceError)).map(|value| Cow::Borrowed(&**value)), Self::MapKey {map, key} => job_state.params.maps.get(get_str!(map, job_state, StringSourceError)).ok_or(StringSourceError::MapNotFound)?.get(get_str!(key, job_state, StringSourceError)).map(|x| Cow::Borrowed(&**x)), - Self::Modified {source, modification} => { - match source.as_ref().get(job_state)? { + Self::Modified {value, modification} => { + match value.as_ref().get(job_state)? { Some(x) => { let mut x = x.into_owned(); modification.apply(&mut x, job_state)?; @@ -562,23 +562,8 @@ impl StringSource { Self::HttpRequest(config) => Some(Cow::Owned(config.response(job_state)?)), #[cfg(feature = "commands")] Self::CommandOutput(command) => Some(Cow::Owned(command.output(job_state)?)), - #[cfg(feature = "cache")] - Self::Cache {category, key, source} => { - let category = get_string!(category, job_state, StringSourceError); - let key = get_string!(key, job_state, StringSourceError); - if job_state.params.read_cache { - if let Some(ret) = job_state.cache.read(&category, &key)? { - return Ok(ret.map(Cow::Owned)); - } - } - let ret = source.get(job_state)?; - if job_state.params.write_cache { - job_state.cache.write(&category, &key, ret.as_deref())?; - } - ret - }, - Self::ExtractBetween {source, start, end} => { - Some(match source.get(job_state)?.ok_or(StringSourceError::StringSourceIsNone)? { + Self::ExtractBetween {value, start, end} => { + Some(match value.get(job_state)?.ok_or(StringSourceError::StringSourceIsNone)? { Cow::Borrowed(x) => Cow::Borrowed(x .split_once(get_str!(start, job_state, StringSourceError)) .ok_or(StringSourceError::ExtractBetweenStartNotFound)? @@ -596,6 +581,21 @@ impl StringSource { .to_string()) }) }, + #[cfg(feature = "cache")] + Self::Cache {category, key, value} => { + let category = get_string!(category, job_state, StringSourceError); + let key = get_string!(key, job_state, StringSourceError); + if job_state.params.read_cache { + if let Some(ret) = job_state.cache.read(&category, &key)? { + return Ok(ret.map(Cow::Owned)); + } + } + let ret = value.get(job_state)?; + if job_state.params.write_cache { + job_state.cache.write(&category, &key, ret.as_deref())?; + } + ret + }, Self::Common(common_call) => { job_state.commons.string_sources.get(get_str!(common_call.name, job_state, StringSourceError)).ok_or(StringSourceError::CommonStringSourceNotFound)?.get(&JobStateView { url: job_state.url, @@ -609,37 +609,37 @@ impl StringSource { })?.map(|x| Cow::Owned(x.into_owned())) }, #[cfg(feature = "experiment-custom")] - Self::Custom(function) => function(self, job_state)? + Self::Custom(function) => function(job_state)? }) } /// Internal method to make sure I don't accidentally commit Debug variants and other stuff unsuitable for the default config. pub(crate) fn is_suitable_for_release(&self, config: &Config) -> bool { assert!(match self { - Self::NoneToEmptyString(source) => source.is_suitable_for_release(config), - Self::NoneTo {source, if_none} => source.is_suitable_for_release(config) && if_none.is_suitable_for_release(config), - Self::Join {sources, ..} => sources.iter().all(|source| source.is_suitable_for_release(config)), + Self::NoneToEmptyString(value) => value.is_suitable_for_release(config), + Self::NoneTo {value, if_none} => value.is_suitable_for_release(config) && if_none.is_suitable_for_release(config), + Self::Join {sources, ..} => sources.iter().all(|value| value.is_suitable_for_release(config)), Self::IfFlag {flag, then, r#else} => flag.is_suitable_for_release(config) && then.is_suitable_for_release(config) && r#else.is_suitable_for_release(config) && check_docs!(config, flags, flag.as_ref()), - Self::IfSourceMatches {source, matcher, then, r#else} => source.is_suitable_for_release(config) && matcher.is_suitable_for_release(config) && then.is_suitable_for_release(config) && r#else.is_suitable_for_release(config), - Self::IfSourceIsNone {source, then, r#else} => source.is_suitable_for_release(config) && then.is_suitable_for_release(config) && r#else.is_suitable_for_release(config), - Self::Map {source, map, if_null, r#else} => source.as_ref().is_none_or(|source| source.is_suitable_for_release(config)) && map.iter().all(|(_, source)| source.is_suitable_for_release(config)) && if_null.as_ref().is_none_or(|if_null| if_null.is_suitable_for_release(config)) && r#else.as_ref().is_none_or(|r#else| r#else.is_suitable_for_release(config)), + Self::IfSourceMatches {value, matcher, then, r#else} => value.is_suitable_for_release(config) && matcher.is_suitable_for_release(config) && then.is_suitable_for_release(config) && r#else.is_suitable_for_release(config), + Self::IfSourceIsNone {value, then, r#else} => value.is_suitable_for_release(config) && then.is_suitable_for_release(config) && r#else.is_suitable_for_release(config), + Self::Map {value, map, if_null, r#else} => value.as_ref().is_none_or(|value| value.is_suitable_for_release(config)) && map.iter().all(|(_, value)| value.is_suitable_for_release(config)) && if_null.as_ref().is_none_or(|if_null| if_null.is_suitable_for_release(config)) && r#else.as_ref().is_none_or(|r#else| r#else.is_suitable_for_release(config)), Self::Part(part) => part.is_suitable_for_release(config), - Self::ExtractPart {source, part} => source.is_suitable_for_release(config) && part.is_suitable_for_release(config), + Self::ExtractPart {value, part} => value.is_suitable_for_release(config) && part.is_suitable_for_release(config), Self::CommonVar(name) => name.is_suitable_for_release(config), Self::Var(name) => name.is_suitable_for_release(config) && check_docs!(config, vars, name.as_ref()), Self::ScratchpadVar(name) => name.is_suitable_for_release(config), Self::ContextVar(name) => name.is_suitable_for_release(config), Self::MapKey {map, key} => map.is_suitable_for_release(config) && key.is_suitable_for_release(config) && check_docs!(config, maps, map.as_ref()), - Self::Modified {source, modification} => source.is_suitable_for_release(config) && modification.is_suitable_for_release(config), + Self::Modified {value, modification} => value.is_suitable_for_release(config) && modification.is_suitable_for_release(config), Self::EnvVar(name) => name.is_suitable_for_release(config) && check_docs!(config, environment_vars, name.as_ref()), - #[cfg(feature = "cache")] Self::Cache {category, key, source} => category.is_suitable_for_release(config) && key.is_suitable_for_release(config) && source.is_suitable_for_release(config), + #[cfg(feature = "cache")] Self::Cache {category, key, value} => category.is_suitable_for_release(config) && key.is_suitable_for_release(config) && value.is_suitable_for_release(config), Self::Debug(_) => false, #[cfg(feature = "commands")] Self::CommandOutput(_) => false, Self::Error | Self::String(_) => true, #[cfg(feature = "advanced-http")] Self::HttpRequest(request_config) => request_config.is_suitable_for_release(config), - Self::ExtractBetween {source, start, end} => source.is_suitable_for_release(config) && start.is_suitable_for_release(config) && end.is_suitable_for_release(config), + Self::ExtractBetween {value, start, end} => value.is_suitable_for_release(config) && start.is_suitable_for_release(config) && end.is_suitable_for_release(config), Self::Common(common_call) => common_call.is_suitable_for_release(config), #[cfg(feature = "experiment-custom")] Self::Custom(_) => false diff --git a/src/types/url_part.rs b/src/types/url_part.rs index 12ca6a2..58a718b 100644 --- a/src/types/url_part.rs +++ b/src/types/url_part.rs @@ -896,13 +896,7 @@ impl UrlPart { let domain=url.domain().map(|x| x.strip_suffix('.').unwrap_or(x))?; Cow::Borrowed(domain.strip_suffix(psl::suffix_str(domain)?)?.strip_suffix('.')?) }, - Self::DomainMiddle => { - // Cow::Borrowed(url.domain().map(|x| x.strip_suffix('.').unwrap_or(x).strip_suffix(psl::suffix_str(x)?))?? - // .rsplit('.').nth(1)?) - // let domain=url.domain().map(|x| x.strip_suffix('.').unwrap_or(x))?; - // Cow::Borrowed(domain.strip_suffix(psl::suffix_str(domain)?)?.rsplit('.').nth(1)?) - Cow::Borrowed(psl::domain_str(url.domain()?)?.split_once('.')?.0) - }, + Self::DomainMiddle => Cow::Borrowed(psl::domain_str(url.domain()?)?.split_once('.')?.0), Self::MaybeWWWDomainMiddle => if matches!(Self::Subdomain.get(url).as_deref(), Some("www") | None) {Self::DomainMiddle.get(url)} else {None}?, Self::Domain => Cow::Borrowed(url.domain()?), Self::DomainSuffix => Cow::Borrowed(url.domain().and_then(psl::suffix_str)?), diff --git a/src/util.rs b/src/util.rs index c12c21f..2d1c9aa 100644 --- a/src/util.rs +++ b/src/util.rs @@ -13,10 +13,10 @@ pub(crate) fn is_default(t: &T) -> bool {t == &T::defaul /// Loops negative `index`es around similar to Python. pub(crate) const fn neg_index(index: isize, len: usize) -> Option { - if 0<=index && (index as usize) Option { /// [`neg_index`] but doesn't [`None`] when `index == len`. pub(crate) const fn neg_range_boundary(index: isize, len: usize) -> Option { - if index >= 0 && index as usize == len { - Some(index as usize) + if index as usize == len { + Some(len) } else { neg_index(index, len) } diff --git a/src/util/macros.rs b/src/util/macros.rs index 964ba31..2ce3009 100644 --- a/src/util/macros.rs +++ b/src/util/macros.rs @@ -121,6 +121,20 @@ macro_rules! get_option_str { } } +/// A macro that makes handling the difference between [`Option`]s of [`StringSource`] and [`String`] easier. +macro_rules! get_option_cow { + ($value:expr, $job_state:expr) => { + // $value.as_ref().map(|source| source.get(&$job_state.to_view())).transpose()?.flatten().as_deref() + { + let view = &$job_state.to_view(); + match $value.as_ref() { + Some(source) => source.get(view), + None => Ok(None) + }? + } + } +} + /// Shorthand for checking a [`Config::docs`] has an entry for [`StringSource::String`] flags/sets/etc.. macro_rules! check_docs { ($config:expr, $type:ident, $name:expr) => { @@ -141,4 +155,5 @@ pub(crate) use get_str; pub(crate) use get_string; pub(crate) use get_option_str; pub(crate) use get_option_string; +pub(crate) use get_option_cow; pub(crate) use check_docs;