Skip to content

Commit

Permalink
Stuff and bypass.vip support
Browse files Browse the repository at this point in the history
  • Loading branch information
Scripter17 committed Mar 1, 2024
1 parent 29b1a75 commit f342e34
Show file tree
Hide file tree
Showing 8 changed files with 158 additions and 24 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,4 @@ massif*
callgrind*
hyperfine*
*.zip
mprocs.yaml
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ serde = { version = "1.0.192", features = ["derive"] }
serde_json = "1.0.108"
url = "2.4.1"
wasm-bindgen = "0.2.88"
reqwest = { version = "0.11.22", features = ["blocking"], optional = true}
reqwest = { version = "0.11.22", features = ["blocking", "json"], optional = true }
const-str = { version = "0.5.6", optional = true }
atty = { version = "0.2.14", optional = true }
thiserror = "1.0.50"
Expand Down
47 changes: 45 additions & 2 deletions default-config.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,16 @@
{
"RepeatUntilNonePass": {
"rules": [
{
"condition": {"All": [
{"Not": {"FlagIsSet": "no-https"}},
{"PartIs": {"part": "Scheme", "value": "http"}}
]},
"mapper": {"SetPart": {"part": "Scheme", "value": "https"}}
},



{
"condition": {"All": [
{"MaybeWWWDomain": "pixiv.net"},
Expand Down Expand Up @@ -54,7 +64,10 @@

{
"condition": {"Any": [
{"HostIsOneOf": ["t.co", "bit.ly", "pixiv.me", "tr.ee", "redd.it", "pin.it", "linkr.it"]},
{"HostIsOneOf": [
"t.co", "bit.ly", "bitly.com", "pixiv.me", "tr.ee", "redd.it", "pin.it", "linkr.it",
"cfl.re", "goo.gl", "ow.ly"
]},
{"All": [
{"QualifiedDomain": "pawoo.net"},
{"PartIs": {"part": {"PathSegment": 0}, "value": "oauth_authentications"}}
Expand Down Expand Up @@ -113,6 +126,22 @@
{
"condition": {"QualifiedDomain": "l.threads.com"},
"mapper": {"GetUrlFromQueryParam": "u"}
},

{
"condition": {"All": [
{"FlagIsSet": "bypass.vip"},
{"HostIsOneOf": [
"adf.ly", "adfoc.us", "ay.live", "aylink.co", "bc.vc", "bcvc.live", "bitly.com",
"boost.fusedgt.com", "boost.ink", "boostme.link", "cutt.ly", "eio.io", "exe.app", "exe.io",
"exee.io", "exey.io", "fc-lc.com", "fc.lc", "freehottip.com", "gestyy.com", "justpaste.it",
"letsboost.net", "mboost.me", "onlyme.ga", "ouo.io", "ouo.press", "ph.apps2app.com",
"rekonise.com", "sh.st", "shortconnect.com", "shorte.st", "shrto.ml", "social-unlock.com",
"steps2unlock.com", "sub2get.com", "sub2unlock.com", "sub2unlock.net", "sub4unlock.com",
"youtube.com", "ytsubme.com", "za.gl", "za.uy", "zee.gl"
]}
]},
"mapper": "BypassVip"
}
]
}
Expand Down Expand Up @@ -212,6 +241,13 @@



{
"condition": {"QualifiedDomain": "at.tumblr.com"},
"mapper": {"All": [
{"SetHost": "www.tumblr.com"},
{"SetPart": {"part": {"PathSegment": 1}, "value": null}}
]}
},
{
"condition": {"All": [
{"UnqualifiedDomain": "tumblr.com"},
Expand Down Expand Up @@ -433,6 +469,13 @@
]},
"mapper": {"AllowQueryParams": ["k", "i", "rh", "bbn"]}
},
{
"condition": {"All": [
{"PartContains": {"part": {"PathSegment": -1}, "where": "Start", "value": "ref="}},
{"UnqualifiedAnyTld": "amazon"}
]},
"mapper": {"SetPart": {"part": {"PathSegment": -1}, "value": null}}
},



Expand Down Expand Up @@ -468,7 +511,7 @@
"value": {
"ExtractPart": {
"part": "Origin",
"source": {"HeaderValue": {"name": "Onion-Location"}}
"source": {"ResponseHeader": {"name": "Onion-Location"}}
}
}
}}
Expand Down
8 changes: 4 additions & 4 deletions src/rules/conditions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -435,7 +435,7 @@ pub enum Condition {

// Miscellaneous.

/// Passes if the specified rule variable is set to the specified value.
/// Passes if the specified variable is set to the specified value.
/// # Examples
/// ```
/// # use url_cleaner::rules::Condition;
Expand Down Expand Up @@ -464,11 +464,11 @@ pub enum Condition {
#[serde(deserialize_with = "optional_string_or_struct")]
value: Option<StringSource>,
/// Decides if getting the variable should return `Some("")` instead of `None`.
/// Defaults to `true`.
/// Defaults to `false`.
#[serde(default)]
value_none_to_empty_string: bool
},
/// Passes if the specified rule variable is set to the specified value.
/// Passes if the specified variable is set to the specified value.
/// # Examples
/// ```
/// # use url_cleaner::rules::Condition;
Expand All @@ -494,7 +494,7 @@ pub enum Condition {
/// The expected value of the variable.
value: Option<String>,
/// Does nothing; Only here to fix tests between feature flags.
/// Defaults to `true`.
/// Defaults to `false`.
#[serde(default)]
value_none_to_empty_string: bool
},
Expand Down
56 changes: 50 additions & 6 deletions src/rules/mappers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,15 @@ use std::{
fs::{OpenOptions, File}
};

use std::str::Utf8Error;
use std::collections::hash_set::HashSet;
use std::collections::HashMap;

use serde::{Serialize, Deserialize};
use thiserror::Error;
use url::{Url, ParseError};
use std::str::Utf8Error;
use std::collections::hash_set::HashSet;
#[cfg(all(feature = "http", not(target_family = "wasm")))]
use reqwest::{self, Error as ReqwestError, header::HeaderMap};
use reqwest::{self, Error as ReqwestError, header::{HeaderMap, HeaderName, HeaderValue}};

use crate::glue::*;
use crate::types::*;
Expand Down Expand Up @@ -365,7 +367,13 @@ pub enum Mapper {
/// # Errors
/// Returns the error [`CommandError`] if the command fails.
#[cfg(feature = "commands")]
ReplaceWithCommandOutput(CommandWrapper)
ReplaceWithCommandOutput(CommandWrapper),
#[cfg(all(feature = "http", not(target_family = "wasm")))]
/// Uses [bypass.vip](https://bypass.vip/) to bypass various link shorteners too complex for URL Cleaner.
/// ```Python
/// requests.post("https://api.bypass.vip/", data="url={URL_GOES_HERE}", headers={"Origin": "https://bypass.vip", "Content-Type": "application/x-www-form-urlencoded"}).json()["destination"]
/// ```
BypassVip
}

const fn get_true() -> bool {true}
Expand Down Expand Up @@ -437,7 +445,13 @@ pub enum MapperError {
/// Returned when a [`StringModificationError`] is encountered.
#[cfg(feature = "string-modification")]
#[error(transparent)]
StringModificationError(#[from] StringModificationError)
StringModificationError(#[from] StringModificationError),
#[error("ResponseJsonIsNotAMap")]
ResponseJsonIsNotAMap,
#[error("ResponseJsonMapDoesNotHaveKey")]
ResponseJsonMapDoesNotHaveKey,
#[error("ResponseJsonIsNotAStr")]
ResponseJsonIsNotAStr
}

#[cfg(feature = "cache-redirects")]
Expand Down Expand Up @@ -595,7 +609,7 @@ impl Mapper {
let _=x.write(format!("\n{}\t{}", url.as_str(), new_url.as_str()).as_bytes());
}
}
*url=new_url.clone();
*url=new_url;
},
#[cfg(all(feature = "http", feature = "regex", not(target_family = "wasm")))]
Self::ExtractUrlFromPage{headers, regex, expand} => if let Some(expand) = expand.get(url, params, false)? {
Expand All @@ -608,6 +622,36 @@ impl Mapper {
#[cfg(feature = "commands")]
Self::ReplaceWithCommandOutput(command) => {*url=command.get_url(Some(url))?;},

Self::BypassVip => {
// requests.post("https://api.bypass.vip/", data="url=https://t.co/3XdBbanQpQ", headers={"Origin": "https://bypass.vip", "Content-Type": "application/x-www-form-urlencoded"}).json()["destination"]g
#[cfg(feature = "cache-redirects")]
if let Ok(lines) = read_lines("redirect-cache.txt") {
for line in lines.map_while(Result::ok) {
if let Some((short, long)) = line.split_once('\t') {
if url.as_str()==short {
*url=Url::parse(long)?;
return Ok(());
}
}
}
}
let new_url=Url::parse(params.http_client()?.post("https://api.bypass.vip")
.form(&HashMap::<&str, &str>::from_iter([("url", url.as_str())]))
.headers(HeaderMap::from_iter([(HeaderName::from_static("origin"), HeaderValue::from_static("https://bypass.vip"))]))
.send()?
.json::<serde_json::value::Value>()?
.as_object().ok_or(MapperError::ResponseJsonIsNotAMap)?
.get("destination").ok_or(MapperError::ResponseJsonMapDoesNotHaveKey)?
.as_str().ok_or(MapperError::ResponseJsonIsNotAStr)?)?;
#[cfg(feature = "cache-redirects")]
if !params.amnesia {
if let Ok(mut x) = OpenOptions::new().create(true).append(true).open("redirect-cache.txt") {
let _=x.write(format!("\n{}\t{}", url.as_str(), new_url.as_str()).as_bytes());
}
}
*url=new_url;
},

// Testing

Self::None => {},
Expand Down
46 changes: 44 additions & 2 deletions src/types/bool_source.rs
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,41 @@ pub enum BoolSource {
FlagIsSet(#[serde(deserialize_with = "string_or_struct")] StringSource),
/// Checks if the specified flag is set.
#[cfg(not(feature = "string-source"))]
FlagIsSet(String)
FlagIsSet(String),
/// Checks if the specified variable's value is the specified value.
#[cfg(feature = "string-source")]
VarIs {
/// The name of the variable to check.
#[serde(deserialize_with = "string_or_struct")]
name: StringSource,
/// Decides if `name`'s call to [`StringSource::get`] should return `Some("")` instead of `None`.
/// Defaults to `true`.
#[serde(default)]
name_none_to_empty_string: bool,
/// The expected value of the variable.
#[serde(deserialize_with = "optional_string_or_struct")]
value: Option<StringSource>,
/// Decides if getting the variable should return `Some("")` instead of `None`.
/// Defaults to `false`.
#[serde(default)]
value_none_to_empty_string: bool
},
/// Checks if the specified variable's value is the specified value.
#[cfg(not(feature = "string-source"))]
VarIs {
/// The name of the variable
name: String,
/// Does nothing; Only here for compatibility between feature flags.
/// Defaults to `true`.
#[serde(default)]
name_none_to_empty_string: bool,
/// The expected value of the variable.
value: Option<String>,
/// Does nothing; Only here to fix tests between feature flags.
/// Defaults to `false`.
#[serde(default)]
value_none_to_empty_string: bool
},
}

const fn get_true() -> bool {true}
Expand Down Expand Up @@ -231,7 +265,15 @@ impl BoolSource {
#[cfg(feature = "string-source")]
Self::FlagIsSet(name) => params.flags.contains(&name.get(url, params, false)?.ok_or(BoolSourceError::StringSourceIsNone)?.into_owned()),
#[cfg(not(feature = "string-source"))]
Self::FlagIsSet(name) => params.flags.contains(name)
Self::FlagIsSet(name) => params.flags.contains(name),

#[cfg(feature = "string-source")]
Self::VarIs {name, name_none_to_empty_string, value, value_none_to_empty_string} => match value.as_ref() {
Some(source) => params.vars.get(&name.get(url, params, *name_none_to_empty_string)?.ok_or(BoolSourceError::StringSourceIsNone)?.to_string()).map(|x| &**x)==source.get(url, params, *value_none_to_empty_string)?.as_deref(),
None => params.vars.get(&name.get(url, params, *name_none_to_empty_string)?.ok_or(BoolSourceError::StringSourceIsNone)?.to_string()).is_none()
},
#[cfg(not(feature = "string-source"))]
Self::VarIs {name, name_none_to_empty_string: _, value, value_none_to_empty_string} => params.vars.get(name).map(|x| &**x).or(if *value_none_to_empty_string {Some("")} else {None})==value.as_deref()
})
}
}
16 changes: 10 additions & 6 deletions src/types/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,11 @@ use crate::rules::Rules;
/// The rules and rule parameters describing how to modify URLs.
#[derive(Debug, Clone, Deserialize, Serialize)]
pub struct Config {
/// The conditions and mappers that modify the URLS.
pub rules: Rules,
/// The parameters passed into the rule's conditions and mappers.
#[serde(default)]
pub params: Params
pub params: Params,
/// The conditions and mappers that modify the URLS.
pub rules: Rules
}

impl Config {
Expand Down Expand Up @@ -87,6 +87,7 @@ pub struct Params {
pub default_http_headers: HeaderMap,
/// If `true`, disables all form of logging to disk.
/// Currently just caching HTTP redirects.
#[serde(default)]
pub amnesia: bool
}

Expand Down Expand Up @@ -181,11 +182,10 @@ mod tests {
set_flag! (config, "tor2web2tor");
test_config!(config, "https://example.onion.example", "https://example.onion/");
unset_var! (config, "tor2web-suffix");

test_config!(config, "https://x.com?a=2", "https://twitter.com/");
test_config!(config, "https://example.com?fb_action_ids&mc_eid&ml_subscriber_hash&oft_ck&s_cid&unicorn_click_id", "https://example.com/");
test_config!(config, "https://www.amazon.ca/UGREEN-Charger-Compact-Adapter-MacBook/dp/B0C6DX66TN/ref=sr_1_5?crid=2CNEQ7A6QR5NM&keywords=ugreen&qid=1704364659&sprefix=ugreen%2Caps%2C139&sr=8-5&ufe=app_do%3Aamzn1.fos.b06bdbbe-20fd-4ebc-88cf-fa04f1ca0da8",
"https://www.amazon.ca/dp/B0C6DX66TN");
test_config!(config, "https://www.amazon.ca/UGREEN-Charger-Compact-Adapter-MacBook/dp/B0C6DX66TN/ref=sr_1_5?crid=2CNEQ7A6QR5NM&keywords=ugreen&qid=1704364659&sprefix=ugreen%2Caps%2C139&sr=8-5&ufe=app_do%3Aamzn1.fos.b06bdbbe-20fd-4ebc-88cf-fa04f1ca0da8", "https://www.amazon.ca/dp/B0C6DX66TN");

set_flag! (config, "unbreezewiki");
test_config!(config, "https://antifandom.com/tardis/wiki/Genocide", "https://tardis.fandom.com/wiki/Genocide");
Expand All @@ -195,6 +195,10 @@ mod tests {
test_config!(config, "https://tardis.fandom.com/wiki/Genocide" , "https://breezewiki.com/tardis/wiki/Genocide");
unset_flag! (config, "breezewiki");

set_flag! (config, "unmobile");
test_config!(config, "https://en.m.wikipedia.org/wiki/Self-immolation_of_Aaron_Bushnell", "https://en.wikipedia.org/wiki/Self-immolation_of_Aaron_Bushnell");
unset_flag! (config, "unmobile");

config.apply(&mut Url::parse("https://127.0.0.1").unwrap()).unwrap();
}
}
6 changes: 3 additions & 3 deletions src/types/string_source.rs
Original file line number Diff line number Diff line change
Expand Up @@ -113,15 +113,15 @@ pub enum StringSource {
/// If the call to [`reqwest::header::HeaderValue::to_str`] returns an error, that error is returned.
/// Note that, as I write this, [`reqwest::header::HeaderValue::to_str`] only works if the result is valid ASCII.
#[cfg(all(feature = "http", not(target_family = "wasm")))]
HeaderValue {
ResponseHeader {
/// The name of the response header to get the value of.
name: String,
/// The headers to send in the HTTP GET request.
#[serde(default, with = "crate::glue::headermap")]
headers: HeaderMap
},
/// Parses `source` as a URL and gets the specified value.
/// Useful when used with [`Self::HeaderValue`].
/// Useful when used with [`Self::ResponseHeader`].
ExtractPart {
/// The string to parse and extract `part` from.
source: Box<Self>,
Expand Down Expand Up @@ -231,7 +231,7 @@ impl StringSource {
},
Self::Join {sources, join} => sources.iter().map(|source| source.get(url, params, none_to_empty_string)).collect::<Result<Option<Vec<_>>, _>>()?.map(|x| Cow::Owned(x.join(join))),
#[cfg(all(feature = "http", not(target_family = "wasm")))]
Self::HeaderValue{name, headers} => Some(Cow::Owned(params.http_client()?.get(url.as_str()).headers(headers.clone()).send()?.headers().get(name).ok_or(StringSourceError::HeaderNotFound)?.to_str()?.to_string())),
Self::ResponseHeader{name, headers} => Some(Cow::Owned(params.http_client()?.get(url.as_str()).headers(headers.clone()).send()?.headers().get(name).ok_or(StringSourceError::HeaderNotFound)?.to_str()?.to_string())),
Self::ExtractPart{source, part} => source.get(url, params, false)?.map(|x| Url::parse(&x)).transpose()?.and_then(|x| part.get(&x, none_to_empty_string).map(|x| Cow::Owned(x.into_owned()))),
#[cfg(all(feature = "http", feature = "regex", not(target_family = "wasm")))]
Self::ExtractFromPage{headers, regex, expand} => if let Some(expand) = expand.get(url, params, false)? {
Expand Down

0 comments on commit f342e34

Please sign in to comment.