diff --git a/default-config.json b/default-config.json index 3435819..74e973a 100644 --- a/default-config.json +++ b/default-config.json @@ -16,17 +16,17 @@ "nerd.whatever.social", "z.opnxng.com" ], "shortlink-hosts": [ - "2kgam.es", "4.nbcla.com", "a.co", "ab.co", "abc7.la", "abc7ne.ws", "adobe.ly", "aje.io", "aje.io", "amzn.asia", "amzn.to", "apple.co", - "bbc.in", "bit.ly", "bitly.com", "bitly.com", "bityl.co", "blizz.ly", "blockclubchi.co", "bloom.bg", "boxd.it", "buff.ly", "cbsn.ws", - "cfl.re", "chn.ge", "chng.it", "cnb.cx", "cnn.it", "cos.lv", "cutt.ly", "db.tt", "dcdr.me", "depop.app.link", "dis.gd", "dlvr.it", - "etsy.me", "fal.cn", "fb.me", "flip.it", "forms.gle", "g.co", "glo.bo", "go.forbes.com", "go.microsoft.com", "go.nasa.gov", - "gofund.me", "goo.gl", "goo.su", "gum.co", "hulu.tv", "ift.tt", "interc.pt", "iwe.one", "j.mp", "jbgm.es", "k00.fr", "katy.to", - "kck.st", "l.leparisien.fr", "link.animaapp.com", "linkr.it", "lnk.to", "loom.ly", "msft.it", "mzl.la", "n.pr", "nas.cr", "ninten.do", - "ntdo.co.uk", "ny.ti", "nyer.cm", "nyti.ms", "nyto.ms", "on.forbes.com", "on.ft.com", "on.ft.com", "on.msnbc.com", "on.nyc.gov", - "onl.bz", "onl.la", "onl.sc", "operagx.gg", "orlo.uk", "ow.ly", "pin.it", "pixiv.me", "play.st", "politi.co", "py.pl", "qr1.be", - "rb.gy", "rb.gy", "rblx.co", "rdbl.co", "redd.it", "reurl.cc", "reut.rs", "rzr.to", "s.goodsmile.link", "shorturl.at", "spoti.fi", - "spr.ly", "spr.ly", "sqex.to", "t.co", "t.ly", "thecutio", "thr.cm", "tmz.me", "to.pbs.org", "tr.ee", "trib.al", "u.jd.com", "w.wiki", - "wlgrn.com", "wlo.link", "wn.nr", "wwdc.io", "x.gd", "xbx.ly", "xhslink.com", "yrp.ca", "fanga.me", "s76.co" + "2kgam.es", "4.nbcla.com", "a.co", "ab.co", "abc7.la", "abc7ne.ws", "adobe.ly", "aje.io", "aje.io", "amzn.asia", "amzn.ew", "amzn.to", + "apple.co", "bbc.in", "bit.ly", "bitly.com", "bitly.com", "bityl.co", "blizz.ly", "blockclubchi.co", "bloom.bg", "boxd.it", "buff.ly", + "cbsn.ws", "cfl.re", "chn.ge", "chng.it", "cnb.cx", "cnn.it", "cos.lv", "cutt.ly", "db.tt", "dcdr.me", "depop.app.link", "dis.gd", + "dlvr.it", "etsy.me", "fal.cn", "fanga.me", "fb.me", "flip.it", "forms.gle", "g.co", "glo.bo", "go.forbes.com", "go.microsoft.com", + "go.nasa.gov", "gofund.me", "goo.gl", "goo.su", "gum.co", "hulu.tv", "ift.tt", "interc.pt", "iwe.one", "j.mp", "jbgm.es", "k00.fr", + "katy.to", "kck.st", "l.leparisien.fr", "link.animaapp.com", "linkr.it", "lnk.to", "loom.ly", "msft.it", "mzl.la", "n.pr", "nas.cr", + "ninten.do", "ntdo.co.uk", "ny.ti", "nyer.cm", "nyti.ms", "nyto.ms", "on.forbes.com", "on.ft.com", "on.ft.com", "on.msnbc.com", + "on.nyc.gov", "onl.bz", "onl.la", "onl.sc", "operagx.gg", "orlo.uk", "ow.ly", "pin.it", "pixiv.me", "play.st", "politi.co", "py.pl", + "qr1.be", "rb.gy", "rb.gy", "rblx.co", "rdbl.co", "redd.it", "reurl.cc", "reut.rs", "rzr.to", "s.goodsmile.link", "s76.co", "shor.tf", + "shorturl.at", "spoti.fi", "spr.ly", "spr.ly", "sqex.to", "t.co", "t.ly", "thecutio", "thr.cm", "tmz.me", "to.pbs.org", "tr.ee", + "trib.al", "u.jd.com", "w.wiki", "wlgrn.com", "wlo.link", "wn.nr", "wwdc.io", "x.gd", "xbx.ly", "xhslink.com", "yrp.ca" ], "utps": [ "Tcsack", "__hsfp", "__hssc", "__hstc", "__io_lv", "__s", "_branch_match_id", "_branch_referrer", "_clde", "_cldee", "_ga", @@ -193,11 +193,8 @@ ], "rules": [ { - "condition": {"StringMatches": { - "source": {"Modified": { - "source": {"Part": "Host"}, - "modification": {"StripMaybePrefix": "www."} - }}, + "condition": {"PartMatches": { + "part": "HostWithoutWWWDotPrefix", "matcher": {"Any": [ {"InSet": "shortlink-hosts"}, {"InSet": "bypass.vip-hosts"}, @@ -246,8 +243,8 @@ { - "condition": {"StringMap": { - "source": {"Modified": {"source": {"Part": "Host"}, "modification": {"StripMaybePrefix": "www."}}}, + "condition": {"PartMap": { + "part": "HostWithoutWWWDotPrefix", "map": { "pixiv.net" : {"PathIs": "/jump.php"}, "deviantart.com": {"PathIs": "/users/outgoing"}, @@ -272,21 +269,14 @@ { - "StringMap": { - "source": {"Modified": {"source": {"Part": "Host"}, "modification": {"StripMaybePrefix": "www."}}}, + "PartMap": { + "part": "HostWithoutWWWDotPrefix", "map": { "t.umblr.com": {"GetUrlFromQueryParam": "z"}, "skfb.ly": {"All": [ {"SetHost": "sketchfab.com"}, {"SetPart": {"part": {"BeforePathSegment": 0}, "value": "s"}} ]}, - "sketchfab.com": {"IfCondition": { - "condition": {"PartIs": {"part": {"PathSegment": 0}, "value": "s"}}, - "mapper": {"All": [ - {"Common": {"name": "utps"}}, - {"ExpandShortLink": {}} - ]} - }}, "smarturl.it": {"IfCondition": { "condition": {"Not": {"FlagIsSet": "no-http"}}, "mapper": {"All": [ @@ -579,8 +569,8 @@ {"DontTriggerLoop": { - "condition": {"StringMatches": { - "source": {"Modified": {"source": {"Part": "Host"}, "modification": {"StripMaybePrefix": "www."}}}, + "condition": {"PartMatches": { + "part": "HostWithoutWWWDotPrefix", "matcher": {"InSet": "lmgtfy-hosts"} }}, "mapper": {"All": [ @@ -609,12 +599,12 @@ {"FlagIsSet": "assume-1-dot-2-is-shortlink"}, {"PartMatches": {"part": "Domain", "matcher": {"Regex": "^.\\...$"}}} ]}, - {"StringMatches": { - "source": {"Modified": {"source": {"Part": "Host"}, "modification": {"StripMaybePrefix": "www."}}}, + {"PartMatches": { + "part": "HostWithoutWWWDotPrefix", "matcher": {"InSet": "shortlink-hosts"} }}, - {"StringMap": { - "source": {"Modified": {"source": {"Part": "Host"}, "modification": {"StripMaybePrefix": "www."}}}, + {"PartMap": { + "part": "HostWithoutWWWDotPrefix", "map": { "pawoo.net" : {"PartIs": {"part": {"PathSegment": 0}, "value": "oauth_authentications"}}, "api.pinterest.com": {"PartIs": {"part": {"PathSegment": 0}, "value": "url_shortener"}}, @@ -623,12 +613,13 @@ "substack.com" : {"All": [ {"PartIs": {"part": {"PathSegment": 0}, "value": "redirect"}}, {"Not": {"PartIs": {"part": {"PathSegment": 1}, "value": "2"}}} - ]} + ]}, + "sketchfab.com": {"PartIs": {"part": {"PathSegment": 0}, "value": "s"}} } }}, {"All": [ - {"StringMatches": { - "source": {"Modified": {"source": {"Part": "Host"}, "modification": {"StripMaybePrefix": "www."}}}, + {"PartMatches": { + "part": "HostWithoutWWWDotPrefix", "matcher": {"InSet": "email-link-format-1-hosts"} }}, {"PartContains": {"part": "Path", "value": "/page/email/click", "where": "Start"}} @@ -642,8 +633,8 @@ }, { "condition": {"All": [ - {"StringMatches": { - "source": {"Modified": {"source": {"Part": "Host"}, "modification": {"StripMaybePrefix": "www."}}}, + {"PartMatches": { + "part": "HostWithoutWWWDotPrefix", "matcher": {"InSet": "email-link-format-1-hosts"} }}, {"PartContains": {"part": "Path", "value": "/page/email/redirect", "where": "Start"}} @@ -1029,9 +1020,9 @@ } }, { - "StringMap": { - "source": {"Modified": {"source": {"Part": "Host"}, "modification": {"StripMaybePrefix": "www."}}}, - "map": { + "PartMap": { + "part": "HostWithoutWWWDotPrefix", + "map": { "at.tumblr.com": {"All": [ {"SetHost": "tumblr.com"}, {"SetPart": {"part": {"PathSegment": 1}, "value": null}}, diff --git a/src/types/url_part.rs b/src/types/url_part.rs index 0d4ae52..e85dc2c 100644 --- a/src/types/url_part.rs +++ b/src/types/url_part.rs @@ -159,6 +159,18 @@ pub enum UrlPart { /// assert_eq!(UrlPart::Host.get(&Url::parse("https://example.com" ).unwrap()), Some(Cow::Borrowed("example.com" ))); /// ``` Host, + /// [`Self::Host`] but with the `www.` at the start, if it exists, removed. + /// # Getting + /// Can be [`None`] + /// # Setting + /// Cannot be [`None`] + /// + /// If the URL does not have a host ([`Url::host_str`] returns [`None`]), returns the error [`UrlPartGetError::UrlDoesNotHaveAHost`]. + /// + /// If [`Self::Host`] starts with `www.`, replaces the rest of the host. + /// + /// If [`Self::Host`] does not start with `www.`, returns the error [`UrlPartSetError::HostDoesNotStartWithWWWDot`]. + HostWithoutWWWDotPrefix, /// The domain segment between segments N-1 and N. /// /// Please note that, if a URL has N domain segments, setting `BeforeDomainSegment(N)` (the N+1th segment) will error even though it's reasonable to expect it to work like [`Self::NextDomainSegment`]. @@ -874,6 +886,7 @@ impl UrlPart { Self::Query => Cow::Borrowed(url.query()?), Self::Whole => Cow::Borrowed(url.as_str()), Self::Host => Cow::Borrowed(url.host_str()?), + Self::HostWithoutWWWDotPrefix => Cow::Borrowed(url.host_str().map(|x| x.strip_prefix("www.").unwrap_or(x))?), Self::DomainSegment(n) => Cow::Borrowed(neg_nth(url.domain()?.split('.'), *n)?), Self::Subdomain => { let url_domain=url.domain().map(|x| x.strip_suffix('.').unwrap_or(x))?; @@ -951,6 +964,11 @@ impl UrlPart { // Ordered hopefully most used to least used. (Self::Query, _) => url.set_query(to), (Self::Host , _) => url.set_host (to)?, + (Self::HostWithoutWWWDotPrefix, Some(to)) => match url.host_str().map(|host| host.starts_with("www.")) { + Some(true) => url.set_host(Some(&format!("www.{to}")))?, + Some(false) => Err(UrlPartSetError::HostDoesNotStartWithWWWDot)?, + None => Err(UrlPartGetError::UrlDoesNotHaveAHost)? + }, (Self::BeforeDomainSegment(n), _) => if let Some(to) = to { let mut segments = url.domain().ok_or(UrlPartGetError::HostIsNotADomain)?.split('.').collect::>(); let fixed_n=neg_range_boundary(*n, segments.len()).ok_or(UrlPartGetError::SegmentBoundaryNotFound)?; @@ -1234,7 +1252,10 @@ pub enum UrlPartGetError { PartIsNone, /// Returned when the requested segment boundary is not found. #[error("The requested segment boundary was not found.")] - SegmentBoundaryNotFound + SegmentBoundaryNotFound, + /// Returned when the URL does not have a host. + #[error("The URL did not have a host.")] + UrlDoesNotHaveAHost } /// The enum of all possible errors [`UrlPart::set`] can return. @@ -1278,7 +1299,10 @@ pub enum UrlPartSetError { InvalidDomain, /// Returned when attempting to set a URL's not WWW domain but the URL's subdomain exists and is not www. #[error("Attempted to set a URL's not WWW domain but the URL's subdomain exists and is not www.")] - HostIsNotMaybeWWWDomain + HostIsNotMaybeWWWDomain, + /// Returned when Attempting to set a URL's UrlPart::HostWithoutWWWDotPrefix when its UrlPart::Host does not start with \"www.\". + #[error("Attempted to set a URL's UrlPart::HostWithoutWWWDotPrefix when its UrlPart::Host does not start with \"www.\".")] + HostDoesNotStartWithWWWDot } /// The enum of all possible errors [`UrlPart::modify`] can return.