Skip to content

Commit

Permalink
Add UrlPart::HostWithoutWWWDotPrefix
Browse files Browse the repository at this point in the history
  • Loading branch information
Scripter17 committed Sep 1, 2024
1 parent 28f6ac9 commit e65d02e
Show file tree
Hide file tree
Showing 2 changed files with 58 additions and 43 deletions.
73 changes: 32 additions & 41 deletions default-config.json
Original file line number Diff line number Diff line change
Expand Up @@ -16,17 +16,17 @@
"nerd.whatever.social", "z.opnxng.com"
],
"shortlink-hosts": [
"2kgam.es", "4.nbcla.com", "a.co", "ab.co", "abc7.la", "abc7ne.ws", "adobe.ly", "aje.io", "aje.io", "amzn.asia", "amzn.to", "apple.co",
"bbc.in", "bit.ly", "bitly.com", "bitly.com", "bityl.co", "blizz.ly", "blockclubchi.co", "bloom.bg", "boxd.it", "buff.ly", "cbsn.ws",
"cfl.re", "chn.ge", "chng.it", "cnb.cx", "cnn.it", "cos.lv", "cutt.ly", "db.tt", "dcdr.me", "depop.app.link", "dis.gd", "dlvr.it",
"etsy.me", "fal.cn", "fb.me", "flip.it", "forms.gle", "g.co", "glo.bo", "go.forbes.com", "go.microsoft.com", "go.nasa.gov",
"gofund.me", "goo.gl", "goo.su", "gum.co", "hulu.tv", "ift.tt", "interc.pt", "iwe.one", "j.mp", "jbgm.es", "k00.fr", "katy.to",
"kck.st", "l.leparisien.fr", "link.animaapp.com", "linkr.it", "lnk.to", "loom.ly", "msft.it", "mzl.la", "n.pr", "nas.cr", "ninten.do",
"ntdo.co.uk", "ny.ti", "nyer.cm", "nyti.ms", "nyto.ms", "on.forbes.com", "on.ft.com", "on.ft.com", "on.msnbc.com", "on.nyc.gov",
"onl.bz", "onl.la", "onl.sc", "operagx.gg", "orlo.uk", "ow.ly", "pin.it", "pixiv.me", "play.st", "politi.co", "py.pl", "qr1.be",
"rb.gy", "rb.gy", "rblx.co", "rdbl.co", "redd.it", "reurl.cc", "reut.rs", "rzr.to", "s.goodsmile.link", "shorturl.at", "spoti.fi",
"spr.ly", "spr.ly", "sqex.to", "t.co", "t.ly", "thecutio", "thr.cm", "tmz.me", "to.pbs.org", "tr.ee", "trib.al", "u.jd.com", "w.wiki",
"wlgrn.com", "wlo.link", "wn.nr", "wwdc.io", "x.gd", "xbx.ly", "xhslink.com", "yrp.ca", "fanga.me", "s76.co"
"2kgam.es", "4.nbcla.com", "a.co", "ab.co", "abc7.la", "abc7ne.ws", "adobe.ly", "aje.io", "aje.io", "amzn.asia", "amzn.ew", "amzn.to",
"apple.co", "bbc.in", "bit.ly", "bitly.com", "bitly.com", "bityl.co", "blizz.ly", "blockclubchi.co", "bloom.bg", "boxd.it", "buff.ly",
"cbsn.ws", "cfl.re", "chn.ge", "chng.it", "cnb.cx", "cnn.it", "cos.lv", "cutt.ly", "db.tt", "dcdr.me", "depop.app.link", "dis.gd",
"dlvr.it", "etsy.me", "fal.cn", "fanga.me", "fb.me", "flip.it", "forms.gle", "g.co", "glo.bo", "go.forbes.com", "go.microsoft.com",
"go.nasa.gov", "gofund.me", "goo.gl", "goo.su", "gum.co", "hulu.tv", "ift.tt", "interc.pt", "iwe.one", "j.mp", "jbgm.es", "k00.fr",
"katy.to", "kck.st", "l.leparisien.fr", "link.animaapp.com", "linkr.it", "lnk.to", "loom.ly", "msft.it", "mzl.la", "n.pr", "nas.cr",
"ninten.do", "ntdo.co.uk", "ny.ti", "nyer.cm", "nyti.ms", "nyto.ms", "on.forbes.com", "on.ft.com", "on.ft.com", "on.msnbc.com",
"on.nyc.gov", "onl.bz", "onl.la", "onl.sc", "operagx.gg", "orlo.uk", "ow.ly", "pin.it", "pixiv.me", "play.st", "politi.co", "py.pl",
"qr1.be", "rb.gy", "rb.gy", "rblx.co", "rdbl.co", "redd.it", "reurl.cc", "reut.rs", "rzr.to", "s.goodsmile.link", "s76.co", "shor.tf",
"shorturl.at", "spoti.fi", "spr.ly", "spr.ly", "sqex.to", "t.co", "t.ly", "thecutio", "thr.cm", "tmz.me", "to.pbs.org", "tr.ee",
"trib.al", "u.jd.com", "w.wiki", "wlgrn.com", "wlo.link", "wn.nr", "wwdc.io", "x.gd", "xbx.ly", "xhslink.com", "yrp.ca"
],
"utps": [
"Tcsack", "__hsfp", "__hssc", "__hstc", "__io_lv", "__s", "_branch_match_id", "_branch_referrer", "_clde", "_cldee", "_ga",
Expand Down Expand Up @@ -193,11 +193,8 @@
],
"rules": [
{
"condition": {"StringMatches": {
"source": {"Modified": {
"source": {"Part": "Host"},
"modification": {"StripMaybePrefix": "www."}
}},
"condition": {"PartMatches": {
"part": "HostWithoutWWWDotPrefix",
"matcher": {"Any": [
{"InSet": "shortlink-hosts"},
{"InSet": "bypass.vip-hosts"},
Expand Down Expand Up @@ -246,8 +243,8 @@


{
"condition": {"StringMap": {
"source": {"Modified": {"source": {"Part": "Host"}, "modification": {"StripMaybePrefix": "www."}}},
"condition": {"PartMap": {
"part": "HostWithoutWWWDotPrefix",
"map": {
"pixiv.net" : {"PathIs": "/jump.php"},
"deviantart.com": {"PathIs": "/users/outgoing"},
Expand All @@ -272,21 +269,14 @@


{
"StringMap": {
"source": {"Modified": {"source": {"Part": "Host"}, "modification": {"StripMaybePrefix": "www."}}},
"PartMap": {
"part": "HostWithoutWWWDotPrefix",
"map": {
"t.umblr.com": {"GetUrlFromQueryParam": "z"},
"skfb.ly": {"All": [
{"SetHost": "sketchfab.com"},
{"SetPart": {"part": {"BeforePathSegment": 0}, "value": "s"}}
]},
"sketchfab.com": {"IfCondition": {
"condition": {"PartIs": {"part": {"PathSegment": 0}, "value": "s"}},
"mapper": {"All": [
{"Common": {"name": "utps"}},
{"ExpandShortLink": {}}
]}
}},
"smarturl.it": {"IfCondition": {
"condition": {"Not": {"FlagIsSet": "no-http"}},
"mapper": {"All": [
Expand Down Expand Up @@ -579,8 +569,8 @@


{"DontTriggerLoop": {
"condition": {"StringMatches": {
"source": {"Modified": {"source": {"Part": "Host"}, "modification": {"StripMaybePrefix": "www."}}},
"condition": {"PartMatches": {
"part": "HostWithoutWWWDotPrefix",
"matcher": {"InSet": "lmgtfy-hosts"}
}},
"mapper": {"All": [
Expand Down Expand Up @@ -609,12 +599,12 @@
{"FlagIsSet": "assume-1-dot-2-is-shortlink"},
{"PartMatches": {"part": "Domain", "matcher": {"Regex": "^.\\...$"}}}
]},
{"StringMatches": {
"source": {"Modified": {"source": {"Part": "Host"}, "modification": {"StripMaybePrefix": "www."}}},
{"PartMatches": {
"part": "HostWithoutWWWDotPrefix",
"matcher": {"InSet": "shortlink-hosts"}
}},
{"StringMap": {
"source": {"Modified": {"source": {"Part": "Host"}, "modification": {"StripMaybePrefix": "www."}}},
{"PartMap": {
"part": "HostWithoutWWWDotPrefix",
"map": {
"pawoo.net" : {"PartIs": {"part": {"PathSegment": 0}, "value": "oauth_authentications"}},
"api.pinterest.com": {"PartIs": {"part": {"PathSegment": 0}, "value": "url_shortener"}},
Expand All @@ -623,12 +613,13 @@
"substack.com" : {"All": [
{"PartIs": {"part": {"PathSegment": 0}, "value": "redirect"}},
{"Not": {"PartIs": {"part": {"PathSegment": 1}, "value": "2"}}}
]}
]},
"sketchfab.com": {"PartIs": {"part": {"PathSegment": 0}, "value": "s"}}
}
}},
{"All": [
{"StringMatches": {
"source": {"Modified": {"source": {"Part": "Host"}, "modification": {"StripMaybePrefix": "www."}}},
{"PartMatches": {
"part": "HostWithoutWWWDotPrefix",
"matcher": {"InSet": "email-link-format-1-hosts"}
}},
{"PartContains": {"part": "Path", "value": "/page/email/click", "where": "Start"}}
Expand All @@ -642,8 +633,8 @@
},
{
"condition": {"All": [
{"StringMatches": {
"source": {"Modified": {"source": {"Part": "Host"}, "modification": {"StripMaybePrefix": "www."}}},
{"PartMatches": {
"part": "HostWithoutWWWDotPrefix",
"matcher": {"InSet": "email-link-format-1-hosts"}
}},
{"PartContains": {"part": "Path", "value": "/page/email/redirect", "where": "Start"}}
Expand Down Expand Up @@ -1029,9 +1020,9 @@
}
},
{
"StringMap": {
"source": {"Modified": {"source": {"Part": "Host"}, "modification": {"StripMaybePrefix": "www."}}},
"map": {
"PartMap": {
"part": "HostWithoutWWWDotPrefix",
"map": {
"at.tumblr.com": {"All": [
{"SetHost": "tumblr.com"},
{"SetPart": {"part": {"PathSegment": 1}, "value": null}},
Expand Down
28 changes: 26 additions & 2 deletions src/types/url_part.rs
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,18 @@ pub enum UrlPart {
/// assert_eq!(UrlPart::Host.get(&Url::parse("https://example.com" ).unwrap()), Some(Cow::Borrowed("example.com" )));
/// ```
Host,
/// [`Self::Host`] but with the `www.` at the start, if it exists, removed.
/// # Getting
/// Can be [`None`]
/// # Setting
/// Cannot be [`None`]
///
/// If the URL does not have a host ([`Url::host_str`] returns [`None`]), returns the error [`UrlPartGetError::UrlDoesNotHaveAHost`].
///
/// If [`Self::Host`] starts with `www.`, replaces the rest of the host.
///
/// If [`Self::Host`] does not start with `www.`, returns the error [`UrlPartSetError::HostDoesNotStartWithWWWDot`].
HostWithoutWWWDotPrefix,
/// The domain segment between segments N-1 and N.
///
/// Please note that, if a URL has N domain segments, setting `BeforeDomainSegment(N)` (the N+1th segment) will error even though it's reasonable to expect it to work like [`Self::NextDomainSegment`].
Expand Down Expand Up @@ -874,6 +886,7 @@ impl UrlPart {
Self::Query => Cow::Borrowed(url.query()?),
Self::Whole => Cow::Borrowed(url.as_str()),
Self::Host => Cow::Borrowed(url.host_str()?),
Self::HostWithoutWWWDotPrefix => Cow::Borrowed(url.host_str().map(|x| x.strip_prefix("www.").unwrap_or(x))?),
Self::DomainSegment(n) => Cow::Borrowed(neg_nth(url.domain()?.split('.'), *n)?),
Self::Subdomain => {
let url_domain=url.domain().map(|x| x.strip_suffix('.').unwrap_or(x))?;
Expand Down Expand Up @@ -951,6 +964,11 @@ impl UrlPart {
// Ordered hopefully most used to least used.
(Self::Query, _) => url.set_query(to),
(Self::Host , _) => url.set_host (to)?,
(Self::HostWithoutWWWDotPrefix, Some(to)) => match url.host_str().map(|host| host.starts_with("www.")) {
Some(true) => url.set_host(Some(&format!("www.{to}")))?,
Some(false) => Err(UrlPartSetError::HostDoesNotStartWithWWWDot)?,
None => Err(UrlPartGetError::UrlDoesNotHaveAHost)?
},
(Self::BeforeDomainSegment(n), _) => if let Some(to) = to {
let mut segments = url.domain().ok_or(UrlPartGetError::HostIsNotADomain)?.split('.').collect::<Vec<_>>();
let fixed_n=neg_range_boundary(*n, segments.len()).ok_or(UrlPartGetError::SegmentBoundaryNotFound)?;
Expand Down Expand Up @@ -1234,7 +1252,10 @@ pub enum UrlPartGetError {
PartIsNone,
/// Returned when the requested segment boundary is not found.
#[error("The requested segment boundary was not found.")]
SegmentBoundaryNotFound
SegmentBoundaryNotFound,
/// Returned when the URL does not have a host.
#[error("The URL did not have a host.")]
UrlDoesNotHaveAHost
}

/// The enum of all possible errors [`UrlPart::set`] can return.
Expand Down Expand Up @@ -1278,7 +1299,10 @@ pub enum UrlPartSetError {
InvalidDomain,
/// Returned when attempting to set a URL's not WWW domain but the URL's subdomain exists and is not www.
#[error("Attempted to set a URL's not WWW domain but the URL's subdomain exists and is not www.")]
HostIsNotMaybeWWWDomain
HostIsNotMaybeWWWDomain,
/// Returned when Attempting to set a URL's UrlPart::HostWithoutWWWDotPrefix when its UrlPart::Host does not start with \"www.\".
#[error("Attempted to set a URL's UrlPart::HostWithoutWWWDotPrefix when its UrlPart::Host does not start with \"www.\".")]
HostDoesNotStartWithWWWDot
}

/// The enum of all possible errors [`UrlPart::modify`] can return.
Expand Down

0 comments on commit e65d02e

Please sign in to comment.