Skip to content

Commit

Permalink
1.82 and some stuff
Browse files Browse the repository at this point in the history
- Moved advanced_requests.rs to glue
- Added lint reasons and changed `allow` to `expect` whwew relevant
- `Mapper::ConditionChain` and `StringModification::StringMatcherChain`
  • Loading branch information
Scripter17 committed Sep 7, 2024
1 parent dcbf4a6 commit 9443dd2
Show file tree
Hide file tree
Showing 30 changed files with 206 additions and 151 deletions.
6 changes: 2 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# URL Cleaner
t# URL Cleaner

Websites often put unique identifiers into URLs so that, when you send a link to a friend and they open it, the website knows it was you who sent it to them.
As most people do not understand and therefore cannot consent to this, it is polite to remove the spytext query parameters before sending URLs to people.
Expand Down Expand Up @@ -202,7 +202,7 @@ That said, if you notice any rules that use but don't actually need HTTP request

Note: [JSON output is supported](#json-output).

Unless `Mapper::(e|)Print(ln|)` or a `Debug` variant is used, the following should always be true:
Unless a `Debug` variant is used, the following should always be true:

1. Input URLs are a list of URLs starting with URLs provided as command line arguments then each line of the STDIN.

Expand All @@ -220,8 +220,6 @@ The `--json`/`-j` flag can be used to have URL Cleaner output JSON instead of li

The exact format is currently in flux.

If a `Mapper::Print(ln|)` is used, this is not guaranteed to be valid JSON.

## Panic policy

URL Cleaner should only ever panic under the following circumstances:
Expand Down
76 changes: 47 additions & 29 deletions default-config.json
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
"bw.artemislena.eu", "bw.hamstro.dev", "bw.projectsegfau.lt", "bw.skunky7dhv7nohsoalpwe3sxfz3fbkad7r3wk632riye25vqm3meqead.onion",
"nerd.whatever.social", "z.opnxng.com"
],
"shortlink-hosts": [
"redirect-hosts": [
"2kgam.es", "4.nbcla.com", "a.co", "ab.co", "abc7.la", "abc7ne.ws", "adobe.ly", "aje.io", "aje.io", "amzn.asia", "amzn.ew", "amzn.to",
"apple.co", "bbc.in", "bit.ly", "bitly.com", "bitly.com", "bityl.co", "blizz.ly", "blockclubchi.co", "bloom.bg", "boxd.it", "buff.ly",
"cbsn.ws", "cfl.re", "chn.ge", "chng.it", "cnb.cx", "cnn.it", "cos.lv", "cutt.ly", "db.tt", "dcdr.me", "depop.app.link", "dis.gd",
Expand All @@ -26,7 +26,7 @@
"on.nyc.gov", "onl.bz", "onl.la", "onl.sc", "operagx.gg", "orlo.uk", "ow.ly", "pin.it", "pixiv.me", "play.st", "politi.co", "py.pl",
"qr1.be", "rb.gy", "rb.gy", "rblx.co", "rdbl.co", "redd.it", "reurl.cc", "reut.rs", "rzr.to", "s.goodsmile.link", "s76.co", "shor.tf",
"shorturl.at", "spoti.fi", "spr.ly", "spr.ly", "sqex.to", "t.co", "t.ly", "thecutio", "thr.cm", "tmz.me", "to.pbs.org", "tr.ee",
"trib.al", "u.jd.com", "w.wiki", "wlgrn.com", "wlo.link", "wn.nr", "wwdc.io", "x.gd", "xbx.ly", "xhslink.com", "yrp.ca", "visitlink.me"
"trib.al", "u.jd.com", "w.wiki", "wlgrn.com", "wlo.link", "wn.nr", "wwdc.io", "x.gd", "xbx.ly", "xhslink.com", "yrp.ca", "visitlink.me", "hmstr.fr"
],
"utps": [
"Tcsack", "__hsfp", "__hssc", "__hstc", "__io_lv", "__s", "_branch_match_id", "_branch_referrer", "_clde", "_cldee", "_ga",
Expand Down Expand Up @@ -118,6 +118,7 @@
{"before": "https://https//example1.com/example2.com/abc", "after": "https://example2.com/abc"},
{"before": "https://http//www.deviantart.com/user.deviantart.com/gallery", "after": "https://deviantart.com/user/gallery"},
{"before": "https://https//abc.tumblr.com.tumblr.com", "after": "https://tumblr.com/abc/"},
{"before": "https://abc.deviantart.com.deviantart.com", "after": "https://deviantart.com/abc/"},

{"before": "https://https//example.com/abc", "after": "https://example.com/abc"},
{"before": "https://https//example1.com/example2.com/abc", "after": "https://example2.com/abc"},
Expand Down Expand Up @@ -196,7 +197,7 @@
"condition": {"PartMatches": {
"part": "HostWithoutWWWDotPrefix",
"matcher": {"Any": [
{"InSet": "shortlink-hosts"},
{"InSet": "redirect-hosts"},
{"InSet": "bypass.vip-hosts"},
{"IsOneOf": [
"allmylinks.com", "api.linkr.bio", "api.pinterest.com", "bfy.tw", "buymeacoff.ee", "buymeacoffee.com",
Expand All @@ -205,7 +206,7 @@
"goodreads.com", "google.com", "gprivate.com", "href.li", "instagr.am", "instagram.com", "l.instagram.com", "l.threads.com",
"lmddgtfy.net", "lmgtfy2.com", "lnk.bio", "open.substack.com", "out.reddit.com", "pawoo.net", "pixiv.net", "preview.tinyurl.com",
"rd.goodreads.com", "sketchfab.com", "sketchfab.com", "skfb.ly", "smarturl.it", "steamcommunity.com", "subscribestar.adult",
"substack.com", "t.umblr.com", "tinyurl.com", "toyhou.se", "youtube.com", "action.openmedia.org", "1link.club", "proxy.notsobot.com"
"substack.com", "t.umblr.com", "tinyurl.com", "toyhou.se", "youtube.com", "action.openmedia.org", "1link.club", "proxy.notsobot.com", "awin1.com"
]},
{"InSet": "lmgtfy-hosts"},
{"LengthIs": 4},
Expand Down Expand Up @@ -452,20 +453,33 @@
"condition": {"PathIs": "/gp/r.html"},
"mapper": {"GetUrlFromQueryParam": "U"}
}},
"substack.com": {"IfCondition": {
"condition": {"PartContains": {"part": "Path", "where": "Start", "value": "/redirect/2/"}},
"mapper": {"SetPart": {
"part": "Whole",
"value": {"Modified": {
"source": {"Part": {"PathSegment": 2}},
"modification": {"All": [
{"KeepNthSegment": {"split": ".", "n": 0}},
"Base64Decode",
{"JsonPointer": "/e"}
]}
"substack.com": {"ConditionChain": [
{
"condition": {"PartContains": {"part": "Path", "where": "Start", "value": "/redirect/2/"}},
"mapper": {"SetPart": {
"part": "Whole",
"value": {"Modified": {
"source": {"Part": {"PathSegment": 2}},
"modification": {"All": [
{"KeepNthSegment": {"split": ".", "n": 0}},
"Base64Decode",
{"JsonPointer": "/e"}
]}
}}
}}
}}
}},
},
{
"condition": {"PathIs": "/app-link/post"},
"mapper": {"AllowQueryParams": ["publication_id", "post_id"]}
},
{
"condition": {"All": [
{"PartIs": {"part": {"PathSegment": 0}, "value": "redirect"}},
{"PartMatches": {"part": {"PathSegment": 1}, "matcher": {"LengthIs": 36}}}
]},
"mapper": "RemoveQuery"
}
]},
"open.substack.com": {"IfCondition": {
"condition": {"PartIs": {"part": {"PathSegment": 0}, "value": "pub"}},
"mapper": {"All": [
Expand Down Expand Up @@ -561,7 +575,11 @@
}
}
}},
"proxy.notsobot.com": {"GetUrlFromQueryParam": "url"}
"proxy.notsobot.com": {"GetUrlFromQueryParam": "url"},
"awin1.com": {"IfCondition": {
"condition": {"PathIs": "/cread.php"},
"mapper": {"GetUrlFromQueryParam": "ued"}
}}
}
}
},
Expand Down Expand Up @@ -596,16 +614,16 @@
{"Not": {"FlagIsSet": "no-http"}},
{"Any": [
{"All": [
{"FlagIsSet": "assume-1-dot-2-is-shortlink"},
{"FlagIsSet": "assume-1-dot-2-is-redirect"},
{"PartMatches": {"part": "Domain", "matcher": {"Regex": "^.\\...$"}}}
]},
{"PartMatches": {
"part": "HostWithoutWWWDotPrefix",
"matcher": {"InSet": "shortlink-hosts"}
"matcher": {"InSet": "redirect-hosts"}
}},
{"TreatErrorAsFail": {"PartMatches": {
"part": "NotSubdomain",
"matcher": {"InSet": "shortlink-hosts"}
"matcher": {"InSet": "redirect-hosts"}
}}},
{"PartMap": {
"part": "HostWithoutWWWDotPrefix",
Expand All @@ -614,9 +632,12 @@
"api.pinterest.com": {"PartIs": {"part": {"PathSegment": 0}, "value": "url_shortener"}},
"allmylinks.com" : {"PathIs": "/link/out"},
"gofundme.com" : {"PartIs": {"part": {"PathSegment": 0}, "value": "url_shortener"}},
"substack.com" : {"All": [
{"PartIs": {"part": {"PathSegment": 0}, "value": "redirect"}},
{"Not": {"PartIs": {"part": {"PathSegment": 1}, "value": "2"}}}
"substack.com" : {"Any": [
{"All": [
{"PartIs": {"part": {"PathSegment": 0}, "value": "redirect"}},
{"Not": {"PartIs": {"part": {"PathSegment": 1}, "value": "2"}}}
]},
{"PathIs": "/app-link/post"}
]},
"sketchfab.com": {"PartIs": {"part": {"PathSegment": 0}, "value": "s"}}
}
Expand All @@ -632,7 +653,7 @@
]},
"mapper": {"All": [
{"Common": {"name": "utps"}},
{"ExpandShortLink": {}}
{"ExpandRedirect": {}}
]}
},
{
Expand Down Expand Up @@ -662,10 +683,7 @@
{"Any": [
{"HostIsOneOf": ["http", "https", "bsky.app", "www.bsky.app"]},
{"PartContains": {"part": "Path", "value": "http"}},
{"All": [
{"PartIs": {"part": {"DomainSegment": 0}, "value": "www"}},
{"Not": {"PartIs": {"part": {"DomainSegment": 3}, "value": null}}}
]}
{"Not": {"PartIs": {"part": {"DomainSegment": 3}, "value": null}}}
]}
]},
"rules": [
Expand Down
32 changes: 17 additions & 15 deletions src/glue.rs
Original file line number Diff line number Diff line change
@@ -1,17 +1,19 @@
//! "Glue" to make working with types from other crates easier.
#[cfg(feature = "regex" )] mod regex;
#[cfg(feature = "regex" )] pub use regex::*;
#[cfg(feature = "glob" )] mod glob;
#[cfg(feature = "glob" )] pub use glob::*;
#[cfg(feature = "commands")] mod command;
#[cfg(feature = "commands")] pub use command::*;
#[cfg(feature = "http" )] pub mod proxy;
#[cfg(feature = "http" )] pub use proxy::*;
#[cfg(feature = "http" )] pub(crate) mod headermap;
#[cfg(feature = "http" )] pub(crate) mod headervalue;
#[cfg(feature = "http" )] pub(crate) mod method;
#[cfg(feature = "cache" )] mod caching;
#[cfg(feature = "cache" )] pub use caching::*;
#[cfg(feature = "base64" )] mod base64;
#[cfg(feature = "base64" )] pub use base64::*;
#[cfg(feature = "regex" )] mod regex;
#[cfg(feature = "regex" )] pub use regex::*;
#[cfg(feature = "glob" )] mod glob;
#[cfg(feature = "glob" )] pub use glob::*;
#[cfg(feature = "commands" )] mod command;
#[cfg(feature = "commands" )] pub use command::*;
#[cfg(feature = "advanced-requests")] mod advanced_requests;
#[cfg(feature = "advanced-requests")] pub use advanced_requests::*;
#[cfg(feature = "http" )] pub mod proxy;
#[cfg(feature = "http" )] pub use proxy::*;
#[cfg(feature = "http" )] pub(crate) mod headermap;
#[cfg(feature = "http" )] pub(crate) mod headervalue;
#[cfg(feature = "http" )] pub(crate) mod method;
#[cfg(feature = "cache" )] mod caching;
#[cfg(feature = "cache" )] pub use caching::*;
#[cfg(feature = "base64" )] mod base64;
#[cfg(feature = "base64" )] pub use base64::*;
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
//! Provides [`RequestConfig`], [`RequestBody`], and [`ResponseHandler`] which allows for sending HTTP requests and getting strings from their responses.
//!
//! Enabled by the `advanced-requests` feature flag.
use std::collections::HashMap;

Expand All @@ -7,7 +9,7 @@ use serde::{Deserialize, Serialize};
use serde_json::value::Value;
use reqwest::{Method, header::{HeaderName, HeaderValue, HeaderMap}};
use thiserror::Error;
#[allow(unused_imports)] // Used for documentation.
#[allow(unused_imports, reason = "Used in a doc comment.")]
use reqwest::cookie::Cookie;

use crate::types::*;
Expand Down
2 changes: 2 additions & 0 deletions src/glue/base64.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
//! Glue to make using [`base64`] easier.
//!
//! Enabled by the `base64` feature flag.
use std::str::FromStr;

Expand Down
6 changes: 4 additions & 2 deletions src/glue/caching.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
#![doc = "Allows caching to an SQLite file."]
//! Allows caching to an SQLite file.
//!
//! Enabled by the `caching` feature flag.
use std::sync::{Arc, Mutex};
use std::str::FromStr;
Expand All @@ -10,7 +12,7 @@ use diesel::prelude::*;

use crate::util::*;

#[allow(clippy::missing_docs_in_private_items, missing_docs)]
#[allow(clippy::missing_docs_in_private_items, missing_docs, reason = "File is auto-generated by diesel's CLI.")]
mod schema;
pub use schema::cache;

Expand Down
12 changes: 4 additions & 8 deletions src/glue/command.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
//! Provides [`CommandConfig`] to allow usage of external commands.
//!
//! Enabled by the `commands` feature flag.
use std::process::{Command, Stdio};
use std::io::Write;
Expand All @@ -13,16 +15,11 @@ use thiserror::Error;
use serde::{Serialize, Deserialize};
use which::which;

// Used just for documentation.
#[allow(unused_imports)]
#[allow(unused_imports, reason = "Used in a doc comment.")]
use crate::types::*;
use crate::util::*;

/// Instructions on how to make and run a [`Command`] object.
///
/// If you are making a URL-Cleaner-as-a-service service, you should disable the `commands` feature to block access to this.
///
/// I don't care if you use sandboxing. You shouldn't tempt fate.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
#[serde(remote= "Self")]
pub struct CommandConfig {
Expand Down Expand Up @@ -148,7 +145,7 @@ impl CommandConfig {
/// If `stdin` is `Some` and the calls to [`Command::spawn`], [`std::process::ChildStdin::write_all`], or [`std::process::Child::wait_with_output`] returns an error, that error is returned.
///
/// If `stdin` is `None` and the call to [`Command::output`] returns an error, that error is returned.
#[allow(clippy::missing_panics_doc)]
#[allow(clippy::missing_panics_doc, reason = "Shouldn't ever panic.")]
pub fn output(&self, job_state: &JobState) -> Result<String, CommandError> {
// https://stackoverflow.com/a/49597789/10720231
let mut command = self.make_command(job_state)?;
Expand All @@ -171,7 +168,6 @@ impl CommandConfig {
/// If the call to [`Self::output`] returns an error, that error is returned.
///
/// If the call to [`Url::parse`] returns an error, that error is returned.
#[allow(dead_code)]
pub fn get_url(&self, job_state: &JobState) -> Result<Url, CommandError> {
Ok(Url::parse(self.output(job_state)?.trim_end_matches(&['\r', '\n']))?)
}
Expand Down
2 changes: 2 additions & 0 deletions src/glue/glob.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
//! Provides [`GlobWrapper`], a serializable/deserializable wrapper around [`Pattern`] and [`MatchOptions`].
//!
//! Enabled by the `glob` feature flag.
use std::str::FromStr;

Expand Down
5 changes: 3 additions & 2 deletions src/glue/headermap.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,9 @@
use std::collections::HashMap;

use serde::{Deserialize, ser::{Serializer, Error as _}, de::{Deserializer, Error as _}};
#[allow(unused_imports)] // [`HeaderValue`] is imported for [`serialize`]'s documentation.
use reqwest::header::{HeaderMap, HeaderValue};
use reqwest::header::HeaderMap;
#[allow(unused_imports, reason = "Used in a doc comment.")] // [`HeaderValue`] is imported for [`serialize`]'s documentation.
use reqwest::header::HeaderValue;

/// Deserializes a [`HeaderMap`]
/// # Errors
Expand Down
2 changes: 1 addition & 1 deletion src/glue/headervalue.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
//! Provides serialization and deserialization functions for [`HeaderValue`].
use serde::{Deserialize, ser::{Serializer, Error as _}, de::{Deserializer, Error as _}};
#[allow(unused_imports)] // [`HeaderValue`] is imported for [`serialize`]'s documentation.
#[allow(unused_imports, reason = "Used in a doc comment.")]
use reqwest::header::HeaderValue;

/// Deserializes a [`HeaderValue`]
Expand Down
4 changes: 3 additions & 1 deletion src/glue/proxy.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
//! Proxy support for HTTP and HTTPS requests.
//!
//! Enabled by the `http` feature flag.
use std::str::FromStr;

Expand All @@ -10,7 +12,7 @@ use reqwest::Proxy;
use crate::util::is_default;

// Used for doc links.
#[allow(unused_imports)]
#[allow(unused_imports, reason = "Used in a doc comment.")]
use crate::types::HttpClientConfig;

/// Used by [`HttpClientConfig`] to detail how a [`reqwest::Proxy`] should be made.
Expand Down
2 changes: 2 additions & 0 deletions src/glue/regex.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
//! Provides [`RegexWrapper`], a lazy, serializable/deserializable, and deconstructable wrapper around [`Regex`].
//!
//! Enabled by the `regex` feature flag.
mod regex_parts;
pub use regex_parts::*;
Expand Down
5 changes: 3 additions & 2 deletions src/glue/regex/regex_parts.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
//! Provides [`RegexParts`] and [`RegexConfig`] which are instructions for how to create a [`Regex`].
//!
//! Used by [`RegexWrapper`].
//!
//! Enabled by the `regex` feature flag.
use std::str::FromStr;

use serde::{Serialize, Deserialize};
use regex::{Regex, RegexBuilder};
use regex_syntax::{ParserBuilder, Parser, Error as RegexSyntaxError};
#[allow(unused_imports)]
#[allow(unused_imports, reason = "Used in a doc comment.")]
use super::RegexWrapper;

use crate::util::*;
Expand Down Expand Up @@ -51,7 +53,6 @@ impl From<RegexParts> for (String, RegexConfig) {
}
}

#[allow(dead_code)]
impl RegexParts {
/// Creates a [`RegexParts`] with the provided pattern and a default config.
/// # Errors
Expand Down
2 changes: 0 additions & 2 deletions src/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,6 @@ mod string_matcher;
pub use string_matcher::*;
mod char_matcher;
pub use char_matcher::*;
#[cfg(feature = "advanced-requests")] mod advanced_requests;
#[cfg(feature = "advanced-requests")] pub use advanced_requests::*;
mod jobs;
pub use jobs::*;
mod stop_loop_condition;
Expand Down
2 changes: 1 addition & 1 deletion src/types/char_matcher.rs
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,7 @@ impl CharMatcher {
}

/// Internal method to make sure I don't accidetnally commit Debug variants and other stuff unsuitable for the default config.
#[allow(clippy::unwrap_used)]
#[allow(clippy::unwrap_used, reason = "Private API, but they should be replaced by [`Option::is_none_or`] in 1.82.")]
pub(crate) fn is_suitable_for_release(&self) -> bool {
match self {
Self::If {r#if, then, r#else} => r#if.is_suitable_for_release() && then.is_suitable_for_release() && r#else.is_suitable_for_release(),
Expand Down
Loading

0 comments on commit 9443dd2

Please sign in to comment.