1.82 and some stuff

- Moved advanced_requests.rs to glue - Added lint reasons and changed `allow` to `expect` whwew relevant - `Mapper::ConditionChain` and `StringModification::StringMatcherChain`
Scripter17 · Sep 7, 2024 · 9443dd2 · 9443dd2
1 parent dcbf4a6
commit 9443dd2
Show file tree

Hide file tree

Showing 30 changed files with 206 additions and 151 deletions.
diff --git a/README.md b/README.md
@@ -1,4 +1,4 @@
-# URL Cleaner
+t# URL Cleaner
 
 Websites often put unique identifiers into URLs so that, when you send a link to a friend and they open it, the website knows it was you who sent it to them.  
 As most people do not understand and therefore cannot consent to this, it is polite to remove the spytext query parameters before sending URLs to people.  
@@ -202,7 +202,7 @@ That said, if you notice any rules that use but don't actually need HTTP request
 
 Note: [JSON output is supported](#json-output).
 
-Unless `Mapper::(e|)Print(ln|)` or a `Debug` variant is used, the following should always be true:
+Unless a `Debug` variant is used, the following should always be true:
 
 1. Input URLs are a list of URLs starting with URLs provided as command line arguments then each line of the STDIN.
 
@@ -220,8 +220,6 @@ The `--json`/`-j` flag can be used to have URL Cleaner output JSON instead of li
 
 The exact format is currently in flux.
 
-If a `Mapper::Print(ln|)` is used, this is not guaranteed to be valid JSON.
-
 ## Panic policy
 
 URL Cleaner should only ever panic under the following circumstances:

diff --git a/default-config.json b/default-config.json
@@ -15,7 +15,7 @@
 				"bw.artemislena.eu", "bw.hamstro.dev", "bw.projectsegfau.lt", "bw.skunky7dhv7nohsoalpwe3sxfz3fbkad7r3wk632riye25vqm3meqead.onion",
 				"nerd.whatever.social", "z.opnxng.com"
 			],
-			"shortlink-hosts": [
+			"redirect-hosts": [
 				"2kgam.es", "4.nbcla.com", "a.co", "ab.co", "abc7.la", "abc7ne.ws", "adobe.ly", "aje.io", "aje.io", "amzn.asia", "amzn.ew", "amzn.to",
 				"apple.co", "bbc.in", "bit.ly", "bitly.com", "bitly.com", "bityl.co", "blizz.ly", "blockclubchi.co", "bloom.bg", "boxd.it", "buff.ly",
 				"cbsn.ws", "cfl.re", "chn.ge", "chng.it", "cnb.cx", "cnn.it", "cos.lv", "cutt.ly", "db.tt", "dcdr.me", "depop.app.link", "dis.gd",
@@ -26,7 +26,7 @@
 				"on.nyc.gov", "onl.bz", "onl.la", "onl.sc", "operagx.gg", "orlo.uk", "ow.ly", "pin.it", "pixiv.me", "play.st", "politi.co", "py.pl",
 				"qr1.be", "rb.gy", "rb.gy", "rblx.co", "rdbl.co", "redd.it", "reurl.cc", "reut.rs", "rzr.to", "s.goodsmile.link", "s76.co", "shor.tf",
 				"shorturl.at", "spoti.fi", "spr.ly", "spr.ly", "sqex.to", "t.co", "t.ly", "thecutio", "thr.cm", "tmz.me", "to.pbs.org", "tr.ee",
-				"trib.al", "u.jd.com", "w.wiki", "wlgrn.com", "wlo.link", "wn.nr", "wwdc.io", "x.gd", "xbx.ly", "xhslink.com", "yrp.ca", "visitlink.me"
+				"trib.al", "u.jd.com", "w.wiki", "wlgrn.com", "wlo.link", "wn.nr", "wwdc.io", "x.gd", "xbx.ly", "xhslink.com", "yrp.ca", "visitlink.me", "hmstr.fr"
 			],
 			"utps": [
 				"Tcsack", "__hsfp", "__hssc", "__hstc", "__io_lv", "__s", "_branch_match_id", "_branch_referrer", "_clde", "_cldee", "_ga",
@@ -118,6 +118,7 @@
 				{"before": "https://https//example1.com/example2.com/abc", "after": "https://example2.com/abc"},
 				{"before": "https://http//www.deviantart.com/user.deviantart.com/gallery", "after": "https://deviantart.com/user/gallery"},
 				{"before": "https://https//abc.tumblr.com.tumblr.com", "after": "https://tumblr.com/abc/"},
+				{"before": "https://abc.deviantart.com.deviantart.com", "after": "https://deviantart.com/abc/"},
 
 				{"before": "https://https//example.com/abc", "after": "https://example.com/abc"},
 				{"before": "https://https//example1.com/example2.com/abc", "after": "https://example2.com/abc"},
@@ -196,7 +197,7 @@
 			"condition": {"PartMatches": {
 				"part": "HostWithoutWWWDotPrefix",
 				"matcher": {"Any": [
-					{"InSet": "shortlink-hosts"},
+					{"InSet": "redirect-hosts"},
 					{"InSet": "bypass.vip-hosts"},
 					{"IsOneOf": [
 				    "allmylinks.com", "api.linkr.bio", "api.pinterest.com", "bfy.tw", "buymeacoff.ee", "buymeacoffee.com",
@@ -205,7 +206,7 @@
 				    "goodreads.com", "google.com", "gprivate.com", "href.li", "instagr.am", "instagram.com", "l.instagram.com", "l.threads.com",
 				    "lmddgtfy.net", "lmgtfy2.com", "lnk.bio", "open.substack.com", "out.reddit.com", "pawoo.net", "pixiv.net", "preview.tinyurl.com",
 				    "rd.goodreads.com", "sketchfab.com", "sketchfab.com", "skfb.ly", "smarturl.it", "steamcommunity.com", "subscribestar.adult",
-				    "substack.com", "t.umblr.com", "tinyurl.com", "toyhou.se", "youtube.com", "action.openmedia.org", "1link.club", "proxy.notsobot.com"
+				    "substack.com", "t.umblr.com", "tinyurl.com", "toyhou.se", "youtube.com", "action.openmedia.org", "1link.club", "proxy.notsobot.com", "awin1.com"
 				  ]},
 				  {"InSet": "lmgtfy-hosts"},
 				  {"LengthIs": 4},
@@ -452,20 +453,33 @@
 										"condition": {"PathIs": "/gp/r.html"},
 										"mapper": {"GetUrlFromQueryParam": "U"}
 									}},
-									"substack.com": {"IfCondition": {
-										"condition": {"PartContains": {"part": "Path", "where": "Start", "value": "/redirect/2/"}},
-										"mapper": {"SetPart": {
-											"part": "Whole",
-											"value": {"Modified": {
-												"source": {"Part": {"PathSegment": 2}},
-												"modification": {"All": [
-													{"KeepNthSegment": {"split": ".", "n": 0}},
-													"Base64Decode",
-													{"JsonPointer": "/e"}
-												]}
+									"substack.com": {"ConditionChain": [
+										{
+											"condition": {"PartContains": {"part": "Path", "where": "Start", "value": "/redirect/2/"}},
+											"mapper": {"SetPart": {
+												"part": "Whole",
+												"value": {"Modified": {
+													"source": {"Part": {"PathSegment": 2}},
+													"modification": {"All": [
+														{"KeepNthSegment": {"split": ".", "n": 0}},
+														"Base64Decode",
+														{"JsonPointer": "/e"}
+													]}
+												}}
 											}}
-										}}
-									}},
+										},
+										{
+											"condition": {"PathIs": "/app-link/post"},
+											"mapper": {"AllowQueryParams": ["publication_id", "post_id"]}
+										},
+										{
+											"condition": {"All": [
+												{"PartIs": {"part": {"PathSegment": 0}, "value": "redirect"}},
+												{"PartMatches": {"part": {"PathSegment": 1}, "matcher": {"LengthIs": 36}}}
+											]},
+											"mapper": "RemoveQuery"
+										}
+									]},
 									"open.substack.com": {"IfCondition": {
 										"condition": {"PartIs": {"part": {"PathSegment": 0}, "value": "pub"}},
 										"mapper": {"All": [
@@ -561,7 +575,11 @@
 											}
 										}
 									}},
-									"proxy.notsobot.com": {"GetUrlFromQueryParam": "url"}
+									"proxy.notsobot.com": {"GetUrlFromQueryParam": "url"},
+									"awin1.com": {"IfCondition": {
+										"condition": {"PathIs": "/cread.php"},
+										"mapper": {"GetUrlFromQueryParam": "ued"}
+									}}
 								}
 							}
 						},
@@ -596,16 +614,16 @@
 								{"Not": {"FlagIsSet": "no-http"}},
 								{"Any": [
 		              {"All": [
-		              	{"FlagIsSet": "assume-1-dot-2-is-shortlink"},
+		              	{"FlagIsSet": "assume-1-dot-2-is-redirect"},
 		              	{"PartMatches": {"part": "Domain", "matcher": {"Regex": "^.\\...$"}}}
 		              ]},
 									{"PartMatches": {
 										"part": "HostWithoutWWWDotPrefix",
-										"matcher": {"InSet": "shortlink-hosts"}
+										"matcher": {"InSet": "redirect-hosts"}
 									}},
 									{"TreatErrorAsFail": {"PartMatches": {
 										"part": "NotSubdomain",
-										"matcher": {"InSet": "shortlink-hosts"}
+										"matcher": {"InSet": "redirect-hosts"}
 									}}},
 									{"PartMap": {
 										"part": "HostWithoutWWWDotPrefix",
@@ -614,9 +632,12 @@
 											"api.pinterest.com": {"PartIs": {"part": {"PathSegment": 0}, "value": "url_shortener"}},
 											"allmylinks.com"   : {"PathIs": "/link/out"},
 											"gofundme.com"     : {"PartIs": {"part": {"PathSegment": 0}, "value": "url_shortener"}},
-											"substack.com"     : {"All": [
-												{"PartIs": {"part": {"PathSegment": 0}, "value": "redirect"}},
-												{"Not": {"PartIs": {"part": {"PathSegment": 1}, "value": "2"}}}
+											"substack.com"     : {"Any": [
+												{"All": [
+													{"PartIs": {"part": {"PathSegment": 0}, "value": "redirect"}},
+													{"Not": {"PartIs": {"part": {"PathSegment": 1}, "value": "2"}}}
+												]},
+												{"PathIs": "/app-link/post"}
 											]},
 											"sketchfab.com": {"PartIs": {"part": {"PathSegment": 0}, "value": "s"}}
 										}
@@ -632,7 +653,7 @@
 							]},
 							"mapper": {"All": [
 								{"Common": {"name": "utps"}},
-								{"ExpandShortLink": {}}
+								{"ExpandRedirect": {}}
 							]}
 						},
 						{
@@ -662,10 +683,7 @@
 					{"Any": [
 						{"HostIsOneOf": ["http", "https", "bsky.app", "www.bsky.app"]},
 						{"PartContains": {"part": "Path", "value": "http"}},
-						{"All": [
-							{"PartIs": {"part": {"DomainSegment": 0}, "value": "www"}},
-							{"Not": {"PartIs": {"part": {"DomainSegment": 3}, "value": null}}}
-						]}
+						{"Not": {"PartIs": {"part": {"DomainSegment": 3}, "value": null}}}
 					]}
 				]},
 				"rules": [

diff --git a/src/glue.rs b/src/glue.rs
@@ -1,17 +1,19 @@
 //! "Glue" to make working with types from other crates easier.
 
-#[cfg(feature = "regex"   )] mod regex;
-#[cfg(feature = "regex"   )] pub use regex::*;
-#[cfg(feature = "glob"    )] mod glob;
-#[cfg(feature = "glob"    )] pub use glob::*;
-#[cfg(feature = "commands")] mod command;
-#[cfg(feature = "commands")] pub use command::*;
-#[cfg(feature = "http"    )] pub mod proxy;
-#[cfg(feature = "http"    )] pub use proxy::*;
-#[cfg(feature = "http"    )] pub(crate) mod headermap;
-#[cfg(feature = "http"    )] pub(crate) mod headervalue;
-#[cfg(feature = "http"    )] pub(crate) mod method;
-#[cfg(feature = "cache"   )] mod caching;
-#[cfg(feature = "cache"   )] pub use caching::*;
-#[cfg(feature = "base64"  )] mod base64;
-#[cfg(feature = "base64"  )] pub use base64::*;
+#[cfg(feature = "regex"            )] mod regex;
+#[cfg(feature = "regex"            )] pub use regex::*;
+#[cfg(feature = "glob"             )] mod glob;
+#[cfg(feature = "glob"             )] pub use glob::*;
+#[cfg(feature = "commands"         )] mod command;
+#[cfg(feature = "commands"         )] pub use command::*;
+#[cfg(feature = "advanced-requests")] mod advanced_requests;
+#[cfg(feature = "advanced-requests")] pub use advanced_requests::*;
+#[cfg(feature = "http"             )] pub mod proxy;
+#[cfg(feature = "http"             )] pub use proxy::*;
+#[cfg(feature = "http"             )] pub(crate) mod headermap;
+#[cfg(feature = "http"             )] pub(crate) mod headervalue;
+#[cfg(feature = "http"             )] pub(crate) mod method;
+#[cfg(feature = "cache"            )] mod caching;
+#[cfg(feature = "cache"            )] pub use caching::*;
+#[cfg(feature = "base64"           )] mod base64;
+#[cfg(feature = "base64"           )] pub use base64::*;
diff --git a/src/types/advanced_requests.rs → src/glue/advanced_requests.rs b/src/types/advanced_requests.rs → src/glue/advanced_requests.rs
@@ -1,4 +1,6 @@
 //! Provides [`RequestConfig`], [`RequestBody`], and [`ResponseHandler`] which allows for sending HTTP requests and getting strings from their responses.
+//! 
+//! Enabled by the `advanced-requests` feature flag.
 
 use std::collections::HashMap;
 
@@ -7,7 +9,7 @@ use serde::{Deserialize, Serialize};
 use serde_json::value::Value;
 use reqwest::{Method, header::{HeaderName, HeaderValue, HeaderMap}};
 use thiserror::Error;
-#[allow(unused_imports)] // Used for documentation.
+#[allow(unused_imports, reason = "Used in a doc comment.")]
 use reqwest::cookie::Cookie;
 
 use crate::types::*;

diff --git a/src/glue/base64.rs b/src/glue/base64.rs
@@ -1,4 +1,6 @@
 //! Glue to make using [`base64`] easier.
+//! 
+//! Enabled by the `base64` feature flag.
 
 use std::str::FromStr;
 

diff --git a/src/glue/caching.rs b/src/glue/caching.rs
@@ -1,4 +1,6 @@
-#![doc = "Allows caching to an SQLite file."]
+//! Allows caching to an SQLite file.
+//! 
+//! Enabled by the `caching` feature flag.
 
 use std::sync::{Arc, Mutex};
 use std::str::FromStr;
@@ -10,7 +12,7 @@ use diesel::prelude::*;
 
 use crate::util::*;
 
-#[allow(clippy::missing_docs_in_private_items, missing_docs)]
+#[allow(clippy::missing_docs_in_private_items, missing_docs, reason = "File is auto-generated by diesel's CLI.")]
 mod schema;
 pub use schema::cache;
 

diff --git a/src/glue/command.rs b/src/glue/command.rs
@@ -1,4 +1,6 @@
 //! Provides [`CommandConfig`] to allow usage of external commands.
+//! 
+//! Enabled by the `commands` feature flag.
 
 use std::process::{Command, Stdio};
 use std::io::Write;
@@ -13,16 +15,11 @@ use thiserror::Error;
 use serde::{Serialize, Deserialize};
 use which::which;
 
-// Used just for documentation.
-#[allow(unused_imports)]
+#[allow(unused_imports, reason = "Used in a doc comment.")]
 use crate::types::*;
 use crate::util::*;
 
 /// Instructions on how to make and run a [`Command`] object.
-/// 
-/// If you are making a URL-Cleaner-as-a-service service, you should disable the `commands` feature to block access to this.
-/// 
-/// I don't care if you use sandboxing. You shouldn't tempt fate.
 #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
 #[serde(remote= "Self")]
 pub struct CommandConfig {
@@ -148,7 +145,7 @@ impl CommandConfig {
     /// If `stdin` is `Some` and the calls to [`Command::spawn`], [`std::process::ChildStdin::write_all`], or [`std::process::Child::wait_with_output`] returns an error, that error is returned.
     /// 
     /// If `stdin` is `None` and the call to [`Command::output`] returns an error, that error is returned.
-    #[allow(clippy::missing_panics_doc)]
+    #[allow(clippy::missing_panics_doc, reason = "Shouldn't ever panic.")]
     pub fn output(&self, job_state: &JobState) -> Result<String, CommandError> {
         // https://stackoverflow.com/a/49597789/10720231
         let mut command = self.make_command(job_state)?;
@@ -171,7 +168,6 @@ impl CommandConfig {
     /// If the call to [`Self::output`] returns an error, that error is returned.
     /// 
     /// If the call to [`Url::parse`] returns an error, that error is returned.
-    #[allow(dead_code)]
     pub fn get_url(&self, job_state: &JobState) -> Result<Url, CommandError> {
         Ok(Url::parse(self.output(job_state)?.trim_end_matches(&['\r', '\n']))?)
     }

diff --git a/src/glue/glob.rs b/src/glue/glob.rs
@@ -1,4 +1,6 @@
 //! Provides [`GlobWrapper`], a serializable/deserializable wrapper around [`Pattern`] and [`MatchOptions`].
+//! 
+//! Enabled by the `glob` feature flag.
 
 use std::str::FromStr;
 

diff --git a/src/glue/headermap.rs b/src/glue/headermap.rs
@@ -3,8 +3,9 @@
 use std::collections::HashMap;
 
 use serde::{Deserialize, ser::{Serializer, Error as _}, de::{Deserializer, Error as _}};
-#[allow(unused_imports)] // [`HeaderValue`] is imported for [`serialize`]'s documentation.
-use reqwest::header::{HeaderMap, HeaderValue};
+use reqwest::header::HeaderMap;
+#[allow(unused_imports, reason = "Used in a doc comment.")] // [`HeaderValue`] is imported for [`serialize`]'s documentation.
+use reqwest::header::HeaderValue;
 
 /// Deserializes a [`HeaderMap`]
 /// # Errors

diff --git a/src/glue/headervalue.rs b/src/glue/headervalue.rs
@@ -1,7 +1,7 @@
 //! Provides serialization and deserialization functions for [`HeaderValue`].
 
 use serde::{Deserialize, ser::{Serializer, Error as _}, de::{Deserializer, Error as _}};
-#[allow(unused_imports)] // [`HeaderValue`] is imported for [`serialize`]'s documentation.
+#[allow(unused_imports, reason = "Used in a doc comment.")]
 use reqwest::header::HeaderValue;
 
 /// Deserializes a [`HeaderValue`]

diff --git a/src/glue/proxy.rs b/src/glue/proxy.rs
@@ -1,4 +1,6 @@
 //! Proxy support for HTTP and HTTPS requests.
+//! 
+//! Enabled by the `http` feature flag.
 
 use std::str::FromStr;
 
@@ -10,7 +12,7 @@ use reqwest::Proxy;
 use crate::util::is_default;
 
 // Used for doc links.
-#[allow(unused_imports)]
+#[allow(unused_imports, reason = "Used in a doc comment.")]
 use crate::types::HttpClientConfig;
 
 /// Used by [`HttpClientConfig`] to detail how a [`reqwest::Proxy`] should be made.

diff --git a/src/glue/regex.rs b/src/glue/regex.rs
@@ -1,4 +1,6 @@
 //! Provides [`RegexWrapper`], a lazy, serializable/deserializable, and deconstructable wrapper around [`Regex`].
+//! 
+//! Enabled by the `regex` feature flag.
 
 mod regex_parts;
 pub use regex_parts::*;

diff --git a/src/glue/regex/regex_parts.rs b/src/glue/regex/regex_parts.rs
@@ -1,13 +1,15 @@
 //! Provides [`RegexParts`] and [`RegexConfig`] which are instructions for how to create a [`Regex`].
 //! 
 //! Used by [`RegexWrapper`].
+//! 
+//! Enabled by the `regex` feature flag.
 
 use std::str::FromStr;
 
 use serde::{Serialize, Deserialize};
 use regex::{Regex, RegexBuilder};
 use regex_syntax::{ParserBuilder, Parser, Error as RegexSyntaxError};
-#[allow(unused_imports)]
+#[allow(unused_imports, reason = "Used in a doc comment.")]
 use super::RegexWrapper;
 
 use crate::util::*;
@@ -51,7 +53,6 @@ impl From<RegexParts> for (String, RegexConfig) {
     }
 }
 
-#[allow(dead_code)]
 impl RegexParts {
     /// Creates a [`RegexParts`] with the provided pattern and a default config.
     /// # Errors

diff --git a/src/types.rs b/src/types.rs
@@ -22,8 +22,6 @@ mod string_matcher;
 pub use string_matcher::*;
 mod char_matcher;
 pub use char_matcher::*;
-#[cfg(feature = "advanced-requests")] mod advanced_requests;
-#[cfg(feature = "advanced-requests")] pub use advanced_requests::*;
 mod jobs;
 pub use jobs::*;
 mod stop_loop_condition;

diff --git a/src/types/char_matcher.rs b/src/types/char_matcher.rs
@@ -239,7 +239,7 @@ impl CharMatcher {
     }
 
     /// Internal method to make sure I don't accidetnally commit Debug variants and other stuff unsuitable for the default config.
-    #[allow(clippy::unwrap_used)]
+    #[allow(clippy::unwrap_used, reason = "Private API, but they should be replaced by [`Option::is_none_or`] in 1.82.")]
     pub(crate) fn is_suitable_for_release(&self) -> bool {
         match self {
             Self::If {r#if, then, r#else} => r#if.is_suitable_for_release() && then.is_suitable_for_release() && r#else.is_suitable_for_release(),