From f342e343718dae25f09948716c361aad119bbb4d Mon Sep 17 00:00:00 2001 From: "James C. Wise" Date: Fri, 1 Mar 2024 05:22:29 -0500 Subject: [PATCH] Stuff and bypass.vip support --- .gitignore | 1 + Cargo.toml | 2 +- default-config.json | 47 ++++++++++++++++++++++++++++++-- src/rules/conditions.rs | 8 +++--- src/rules/mappers.rs | 56 ++++++++++++++++++++++++++++++++++---- src/types/bool_source.rs | 46 +++++++++++++++++++++++++++++-- src/types/config.rs | 16 +++++++---- src/types/string_source.rs | 6 ++-- 8 files changed, 158 insertions(+), 24 deletions(-) diff --git a/.gitignore b/.gitignore index 0a0572b..5f438da 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,4 @@ massif* callgrind* hyperfine* *.zip +mprocs.yaml diff --git a/Cargo.toml b/Cargo.toml index f81f8b6..f4fa6d7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,7 +16,7 @@ serde = { version = "1.0.192", features = ["derive"] } serde_json = "1.0.108" url = "2.4.1" wasm-bindgen = "0.2.88" -reqwest = { version = "0.11.22", features = ["blocking"], optional = true} +reqwest = { version = "0.11.22", features = ["blocking", "json"], optional = true } const-str = { version = "0.5.6", optional = true } atty = { version = "0.2.14", optional = true } thiserror = "1.0.50" diff --git a/default-config.json b/default-config.json index 30657fd..6797cc0 100644 --- a/default-config.json +++ b/default-config.json @@ -9,6 +9,16 @@ { "RepeatUntilNonePass": { "rules": [ + { + "condition": {"All": [ + {"Not": {"FlagIsSet": "no-https"}}, + {"PartIs": {"part": "Scheme", "value": "http"}} + ]}, + "mapper": {"SetPart": {"part": "Scheme", "value": "https"}} + }, + + + { "condition": {"All": [ {"MaybeWWWDomain": "pixiv.net"}, @@ -54,7 +64,10 @@ { "condition": {"Any": [ - {"HostIsOneOf": ["t.co", "bit.ly", "pixiv.me", "tr.ee", "redd.it", "pin.it", "linkr.it"]}, + {"HostIsOneOf": [ + "t.co", "bit.ly", "bitly.com", "pixiv.me", "tr.ee", "redd.it", "pin.it", "linkr.it", + "cfl.re", "goo.gl", "ow.ly" + ]}, {"All": [ {"QualifiedDomain": "pawoo.net"}, {"PartIs": {"part": {"PathSegment": 0}, "value": "oauth_authentications"}} @@ -113,6 +126,22 @@ { "condition": {"QualifiedDomain": "l.threads.com"}, "mapper": {"GetUrlFromQueryParam": "u"} + }, + + { + "condition": {"All": [ + {"FlagIsSet": "bypass.vip"}, + {"HostIsOneOf": [ + "adf.ly", "adfoc.us", "ay.live", "aylink.co", "bc.vc", "bcvc.live", "bitly.com", + "boost.fusedgt.com", "boost.ink", "boostme.link", "cutt.ly", "eio.io", "exe.app", "exe.io", + "exee.io", "exey.io", "fc-lc.com", "fc.lc", "freehottip.com", "gestyy.com", "justpaste.it", + "letsboost.net", "mboost.me", "onlyme.ga", "ouo.io", "ouo.press", "ph.apps2app.com", + "rekonise.com", "sh.st", "shortconnect.com", "shorte.st", "shrto.ml", "social-unlock.com", + "steps2unlock.com", "sub2get.com", "sub2unlock.com", "sub2unlock.net", "sub4unlock.com", + "youtube.com", "ytsubme.com", "za.gl", "za.uy", "zee.gl" + ]} + ]}, + "mapper": "BypassVip" } ] } @@ -212,6 +241,13 @@ + { + "condition": {"QualifiedDomain": "at.tumblr.com"}, + "mapper": {"All": [ + {"SetHost": "www.tumblr.com"}, + {"SetPart": {"part": {"PathSegment": 1}, "value": null}} + ]} + }, { "condition": {"All": [ {"UnqualifiedDomain": "tumblr.com"}, @@ -433,6 +469,13 @@ ]}, "mapper": {"AllowQueryParams": ["k", "i", "rh", "bbn"]} }, + { + "condition": {"All": [ + {"PartContains": {"part": {"PathSegment": -1}, "where": "Start", "value": "ref="}}, + {"UnqualifiedAnyTld": "amazon"} + ]}, + "mapper": {"SetPart": {"part": {"PathSegment": -1}, "value": null}} + }, @@ -468,7 +511,7 @@ "value": { "ExtractPart": { "part": "Origin", - "source": {"HeaderValue": {"name": "Onion-Location"}} + "source": {"ResponseHeader": {"name": "Onion-Location"}} } } }} diff --git a/src/rules/conditions.rs b/src/rules/conditions.rs index 021fe0b..ec21d6f 100644 --- a/src/rules/conditions.rs +++ b/src/rules/conditions.rs @@ -435,7 +435,7 @@ pub enum Condition { // Miscellaneous. - /// Passes if the specified rule variable is set to the specified value. + /// Passes if the specified variable is set to the specified value. /// # Examples /// ``` /// # use url_cleaner::rules::Condition; @@ -464,11 +464,11 @@ pub enum Condition { #[serde(deserialize_with = "optional_string_or_struct")] value: Option, /// Decides if getting the variable should return `Some("")` instead of `None`. - /// Defaults to `true`. + /// Defaults to `false`. #[serde(default)] value_none_to_empty_string: bool }, - /// Passes if the specified rule variable is set to the specified value. + /// Passes if the specified variable is set to the specified value. /// # Examples /// ``` /// # use url_cleaner::rules::Condition; @@ -494,7 +494,7 @@ pub enum Condition { /// The expected value of the variable. value: Option, /// Does nothing; Only here to fix tests between feature flags. - /// Defaults to `true`. + /// Defaults to `false`. #[serde(default)] value_none_to_empty_string: bool }, diff --git a/src/rules/mappers.rs b/src/rules/mappers.rs index bc361b9..d657f36 100644 --- a/src/rules/mappers.rs +++ b/src/rules/mappers.rs @@ -7,13 +7,15 @@ use std::{ fs::{OpenOptions, File} }; +use std::str::Utf8Error; +use std::collections::hash_set::HashSet; +use std::collections::HashMap; + use serde::{Serialize, Deserialize}; use thiserror::Error; use url::{Url, ParseError}; -use std::str::Utf8Error; -use std::collections::hash_set::HashSet; #[cfg(all(feature = "http", not(target_family = "wasm")))] -use reqwest::{self, Error as ReqwestError, header::HeaderMap}; +use reqwest::{self, Error as ReqwestError, header::{HeaderMap, HeaderName, HeaderValue}}; use crate::glue::*; use crate::types::*; @@ -365,7 +367,13 @@ pub enum Mapper { /// # Errors /// Returns the error [`CommandError`] if the command fails. #[cfg(feature = "commands")] - ReplaceWithCommandOutput(CommandWrapper) + ReplaceWithCommandOutput(CommandWrapper), + #[cfg(all(feature = "http", not(target_family = "wasm")))] + /// Uses [bypass.vip](https://bypass.vip/) to bypass various link shorteners too complex for URL Cleaner. + /// ```Python + /// requests.post("https://api.bypass.vip/", data="url={URL_GOES_HERE}", headers={"Origin": "https://bypass.vip", "Content-Type": "application/x-www-form-urlencoded"}).json()["destination"] + /// ``` + BypassVip } const fn get_true() -> bool {true} @@ -437,7 +445,13 @@ pub enum MapperError { /// Returned when a [`StringModificationError`] is encountered. #[cfg(feature = "string-modification")] #[error(transparent)] - StringModificationError(#[from] StringModificationError) + StringModificationError(#[from] StringModificationError), + #[error("ResponseJsonIsNotAMap")] + ResponseJsonIsNotAMap, + #[error("ResponseJsonMapDoesNotHaveKey")] + ResponseJsonMapDoesNotHaveKey, + #[error("ResponseJsonIsNotAStr")] + ResponseJsonIsNotAStr } #[cfg(feature = "cache-redirects")] @@ -595,7 +609,7 @@ impl Mapper { let _=x.write(format!("\n{}\t{}", url.as_str(), new_url.as_str()).as_bytes()); } } - *url=new_url.clone(); + *url=new_url; }, #[cfg(all(feature = "http", feature = "regex", not(target_family = "wasm")))] Self::ExtractUrlFromPage{headers, regex, expand} => if let Some(expand) = expand.get(url, params, false)? { @@ -608,6 +622,36 @@ impl Mapper { #[cfg(feature = "commands")] Self::ReplaceWithCommandOutput(command) => {*url=command.get_url(Some(url))?;}, + Self::BypassVip => { + // requests.post("https://api.bypass.vip/", data="url=https://t.co/3XdBbanQpQ", headers={"Origin": "https://bypass.vip", "Content-Type": "application/x-www-form-urlencoded"}).json()["destination"]g + #[cfg(feature = "cache-redirects")] + if let Ok(lines) = read_lines("redirect-cache.txt") { + for line in lines.map_while(Result::ok) { + if let Some((short, long)) = line.split_once('\t') { + if url.as_str()==short { + *url=Url::parse(long)?; + return Ok(()); + } + } + } + } + let new_url=Url::parse(params.http_client()?.post("https://api.bypass.vip") + .form(&HashMap::<&str, &str>::from_iter([("url", url.as_str())])) + .headers(HeaderMap::from_iter([(HeaderName::from_static("origin"), HeaderValue::from_static("https://bypass.vip"))])) + .send()? + .json::()? + .as_object().ok_or(MapperError::ResponseJsonIsNotAMap)? + .get("destination").ok_or(MapperError::ResponseJsonMapDoesNotHaveKey)? + .as_str().ok_or(MapperError::ResponseJsonIsNotAStr)?)?; + #[cfg(feature = "cache-redirects")] + if !params.amnesia { + if let Ok(mut x) = OpenOptions::new().create(true).append(true).open("redirect-cache.txt") { + let _=x.write(format!("\n{}\t{}", url.as_str(), new_url.as_str()).as_bytes()); + } + } + *url=new_url; + }, + // Testing Self::None => {}, diff --git a/src/types/bool_source.rs b/src/types/bool_source.rs index e8dc1fe..eb0d8ad 100644 --- a/src/types/bool_source.rs +++ b/src/types/bool_source.rs @@ -139,7 +139,41 @@ pub enum BoolSource { FlagIsSet(#[serde(deserialize_with = "string_or_struct")] StringSource), /// Checks if the specified flag is set. #[cfg(not(feature = "string-source"))] - FlagIsSet(String) + FlagIsSet(String), + /// Checks if the specified variable's value is the specified value. + #[cfg(feature = "string-source")] + VarIs { + /// The name of the variable to check. + #[serde(deserialize_with = "string_or_struct")] + name: StringSource, + /// Decides if `name`'s call to [`StringSource::get`] should return `Some("")` instead of `None`. + /// Defaults to `true`. + #[serde(default)] + name_none_to_empty_string: bool, + /// The expected value of the variable. + #[serde(deserialize_with = "optional_string_or_struct")] + value: Option, + /// Decides if getting the variable should return `Some("")` instead of `None`. + /// Defaults to `false`. + #[serde(default)] + value_none_to_empty_string: bool + }, + /// Checks if the specified variable's value is the specified value. + #[cfg(not(feature = "string-source"))] + VarIs { + /// The name of the variable + name: String, + /// Does nothing; Only here for compatibility between feature flags. + /// Defaults to `true`. + #[serde(default)] + name_none_to_empty_string: bool, + /// The expected value of the variable. + value: Option, + /// Does nothing; Only here to fix tests between feature flags. + /// Defaults to `false`. + #[serde(default)] + value_none_to_empty_string: bool + }, } const fn get_true() -> bool {true} @@ -231,7 +265,15 @@ impl BoolSource { #[cfg(feature = "string-source")] Self::FlagIsSet(name) => params.flags.contains(&name.get(url, params, false)?.ok_or(BoolSourceError::StringSourceIsNone)?.into_owned()), #[cfg(not(feature = "string-source"))] - Self::FlagIsSet(name) => params.flags.contains(name) + Self::FlagIsSet(name) => params.flags.contains(name), + + #[cfg(feature = "string-source")] + Self::VarIs {name, name_none_to_empty_string, value, value_none_to_empty_string} => match value.as_ref() { + Some(source) => params.vars.get(&name.get(url, params, *name_none_to_empty_string)?.ok_or(BoolSourceError::StringSourceIsNone)?.to_string()).map(|x| &**x)==source.get(url, params, *value_none_to_empty_string)?.as_deref(), + None => params.vars.get(&name.get(url, params, *name_none_to_empty_string)?.ok_or(BoolSourceError::StringSourceIsNone)?.to_string()).is_none() + }, + #[cfg(not(feature = "string-source"))] + Self::VarIs {name, name_none_to_empty_string: _, value, value_none_to_empty_string} => params.vars.get(name).map(|x| &**x).or(if *value_none_to_empty_string {Some("")} else {None})==value.as_deref() }) } } diff --git a/src/types/config.rs b/src/types/config.rs index cb06d8b..3becd02 100644 --- a/src/types/config.rs +++ b/src/types/config.rs @@ -16,11 +16,11 @@ use crate::rules::Rules; /// The rules and rule parameters describing how to modify URLs. #[derive(Debug, Clone, Deserialize, Serialize)] pub struct Config { - /// The conditions and mappers that modify the URLS. - pub rules: Rules, /// The parameters passed into the rule's conditions and mappers. #[serde(default)] - pub params: Params + pub params: Params, + /// The conditions and mappers that modify the URLS. + pub rules: Rules } impl Config { @@ -87,6 +87,7 @@ pub struct Params { pub default_http_headers: HeaderMap, /// If `true`, disables all form of logging to disk. /// Currently just caching HTTP redirects. + #[serde(default)] pub amnesia: bool } @@ -181,11 +182,10 @@ mod tests { set_flag! (config, "tor2web2tor"); test_config!(config, "https://example.onion.example", "https://example.onion/"); unset_var! (config, "tor2web-suffix"); - + test_config!(config, "https://x.com?a=2", "https://twitter.com/"); test_config!(config, "https://example.com?fb_action_ids&mc_eid&ml_subscriber_hash&oft_ck&s_cid&unicorn_click_id", "https://example.com/"); - test_config!(config, "https://www.amazon.ca/UGREEN-Charger-Compact-Adapter-MacBook/dp/B0C6DX66TN/ref=sr_1_5?crid=2CNEQ7A6QR5NM&keywords=ugreen&qid=1704364659&sprefix=ugreen%2Caps%2C139&sr=8-5&ufe=app_do%3Aamzn1.fos.b06bdbbe-20fd-4ebc-88cf-fa04f1ca0da8", - "https://www.amazon.ca/dp/B0C6DX66TN"); + test_config!(config, "https://www.amazon.ca/UGREEN-Charger-Compact-Adapter-MacBook/dp/B0C6DX66TN/ref=sr_1_5?crid=2CNEQ7A6QR5NM&keywords=ugreen&qid=1704364659&sprefix=ugreen%2Caps%2C139&sr=8-5&ufe=app_do%3Aamzn1.fos.b06bdbbe-20fd-4ebc-88cf-fa04f1ca0da8", "https://www.amazon.ca/dp/B0C6DX66TN"); set_flag! (config, "unbreezewiki"); test_config!(config, "https://antifandom.com/tardis/wiki/Genocide", "https://tardis.fandom.com/wiki/Genocide"); @@ -195,6 +195,10 @@ mod tests { test_config!(config, "https://tardis.fandom.com/wiki/Genocide" , "https://breezewiki.com/tardis/wiki/Genocide"); unset_flag! (config, "breezewiki"); + set_flag! (config, "unmobile"); + test_config!(config, "https://en.m.wikipedia.org/wiki/Self-immolation_of_Aaron_Bushnell", "https://en.wikipedia.org/wiki/Self-immolation_of_Aaron_Bushnell"); + unset_flag! (config, "unmobile"); + config.apply(&mut Url::parse("https://127.0.0.1").unwrap()).unwrap(); } } diff --git a/src/types/string_source.rs b/src/types/string_source.rs index e3bdaa7..eda81c5 100644 --- a/src/types/string_source.rs +++ b/src/types/string_source.rs @@ -113,7 +113,7 @@ pub enum StringSource { /// If the call to [`reqwest::header::HeaderValue::to_str`] returns an error, that error is returned. /// Note that, as I write this, [`reqwest::header::HeaderValue::to_str`] only works if the result is valid ASCII. #[cfg(all(feature = "http", not(target_family = "wasm")))] - HeaderValue { + ResponseHeader { /// The name of the response header to get the value of. name: String, /// The headers to send in the HTTP GET request. @@ -121,7 +121,7 @@ pub enum StringSource { headers: HeaderMap }, /// Parses `source` as a URL and gets the specified value. - /// Useful when used with [`Self::HeaderValue`]. + /// Useful when used with [`Self::ResponseHeader`]. ExtractPart { /// The string to parse and extract `part` from. source: Box, @@ -231,7 +231,7 @@ impl StringSource { }, Self::Join {sources, join} => sources.iter().map(|source| source.get(url, params, none_to_empty_string)).collect::>, _>>()?.map(|x| Cow::Owned(x.join(join))), #[cfg(all(feature = "http", not(target_family = "wasm")))] - Self::HeaderValue{name, headers} => Some(Cow::Owned(params.http_client()?.get(url.as_str()).headers(headers.clone()).send()?.headers().get(name).ok_or(StringSourceError::HeaderNotFound)?.to_str()?.to_string())), + Self::ResponseHeader{name, headers} => Some(Cow::Owned(params.http_client()?.get(url.as_str()).headers(headers.clone()).send()?.headers().get(name).ok_or(StringSourceError::HeaderNotFound)?.to_str()?.to_string())), Self::ExtractPart{source, part} => source.get(url, params, false)?.map(|x| Url::parse(&x)).transpose()?.and_then(|x| part.get(&x, none_to_empty_string).map(|x| Cow::Owned(x.into_owned()))), #[cfg(all(feature = "http", feature = "regex", not(target_family = "wasm")))] Self::ExtractFromPage{headers, regex, expand} => if let Some(expand) = expand.get(url, params, false)? {