Skip to content

Commit

Permalink
Fixes and reorganization. Release 0.1.0 maybe soon
Browse files Browse the repository at this point in the history
  • Loading branch information
Scripter17 committed Feb 26, 2024
1 parent 0feb2ce commit 63bb0a5
Show file tree
Hide file tree
Showing 16 changed files with 336 additions and 275 deletions.
8 changes: 4 additions & 4 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

66 changes: 57 additions & 9 deletions default-config.json
Original file line number Diff line number Diff line change
@@ -1,4 +1,16 @@
{
"docs": {
"flags": {
"no-unmangle": "...",
"...": ["..."]
},
"vars": {
"...": [
"...",
"...2"
]
}
},
"rules": [
{
"RepeatUntilNonePass": {
Expand Down Expand Up @@ -108,19 +120,55 @@


{
"condition": {"PartIs": {"part": "Scheme", "value": "http"}},
"mapper": {"SetPart": {"part": "Scheme", "value": "https"}}
"comment": "https://abc.com/https://abc.com/user",
"condition": {"All": [
{"Not": {"FlagIsSet": "no-unmangle"}},
{"Any": [
{"PartContains": {"part": "Path", "where": "Start", "value": "/http:"}},
{"PartContains": {"part": "Path", "where": "Start", "value": "/https:"}}
]}
]},
"mapper": {"SetPart": {
"part": "Whole",
"value": {"Modified": {"source": {"Part": "Path"}, "modification": {"StripPrefix": "/"}}}
}}
},
{
"comment": "https://abc.com/profile/https://abc.com/profile/user",
"comment2": "The general case requires conditions and mappers to talk to each other and would break the WayBack machine.",
"condition": {"All": [
{"Not": {"FlagIsSet": "no-unmangle"}},
{"Any": [
{"PartIs": {"part": {"PathSegment": 1}, "value": "http:"}},
{"PartIs": {"part": {"PathSegment": 1}, "value": "https:"}}
]}
]},
"mapper": {"SetPart": {
"part": "Whole",
"value": {"Part": {"PartSegments": {"part": "Path", "split": "/", "start": 2, "end": null}}}
}}
},

{
"comment": "https://profile.abc.com.abc.com",
"condition": {"All": [
{"Not": {"FlagIsSet": "no-unmangle"}},
{"TreatErrorAsFail": {"PartContains": {
"part": {"PartSegments": {"part": "Domain", "split": ".", "start": 1, "end": null}},
"where": "Start",
"value": {"Join": {"sources": [{"Part": "NotSubdomain"}, {"String": "."}]}}
}}}
]},
"mapper": {"SetPart": {"part": "NotSubdomain", "value": null}}
},


{
"condition": {"TreatErrorAsFail": {"PartContains": {
"part": {"PartSegments": {"part": "Domain", "split": ".", "start": 1, "end": null}},
"where": "Start",
"value": {"Join": {"sources": [{"Part": "NotSubdomain"}, {"String": "."}]}}
}}},
"mapper": {"SetPart": {"part": "NotSubdomain", "value": null}}
"condition": {"All": [
{"Not": {"FlagIsSet": "no-https"}},
{"PartIs": {"part": "Scheme", "value": "http"}}
]},
"mapper": {"SetPart": {"part": "Scheme", "value": "https"}}
},


Expand All @@ -132,7 +180,7 @@
]},
"mapper": {"All": [
{"SetPart": {"part": "Path", "value": "/users"}},
{"CopyPart": {"from": {"QueryParam": "id"}, "none_to_empty_string": false, "to": "NextPathSegment"}},
{"CopyPart": {"from": {"QueryParam": "id"}, "from_none_to_empty_string": false, "to": "NextPathSegment"}},
"RemoveQuery"
]}
},
Expand Down
19 changes: 9 additions & 10 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,9 @@ pub mod rules;
pub mod glue;
/// Types that don't fit in the other modules.
pub mod types;
/// Deserializing and handling configuration.
pub mod config;
pub(crate) mod util;

/// Takes a URL, an optional [`config::Config`], an optional [`config::Params`], and returns the result of applying the config and params to the URL.
/// Takes a URL, an optional [`types::Config`], an optional [`types::Params`], and returns the result of applying the config and params to the URL.
/// This function's name is set to `clean_url` in WASM for API simplicity.
/// # Errors
/// If the config or params can't be parsed, returns the parsing error.
Expand All @@ -25,30 +24,30 @@ pub fn wasm_clean_url(url: &str, config: wasm_bindgen::JsValue, params: wasm_bin
Ok(JsValue::from_str(url.as_str()))
}

/// Takes a URL, an optional [`config::Config`], an optional [`config::Params`], and returns the result of applying the config and params to the URL.
/// Takes a URL, an optional [`types::Config`], an optional [`types::Params`], and returns the result of applying the config and params to the URL.
/// # Errors
/// If applying the rules returns an error, that error is returned.
pub fn clean_url(url: &mut Url, config: Option<&config::Config>, params: Option<&config::Params>) -> Result<(), types::CleaningError> {
pub fn clean_url(url: &mut Url, config: Option<&types::Config>, params: Option<&types::Params>) -> Result<(), types::CleaningError> {
let mut config=match config {
Some(config) => config.clone(),
None => config::Config::get_default()?.clone()
None => types::Config::get_default()?.clone()
};
if let Some(params) = params {config.params.merge(params.clone());}
config.apply(url)?;
Ok(())
}

fn js_value_to_config(config: wasm_bindgen::JsValue) -> Result<Cow<'static, config::Config>, JsError> {
fn js_value_to_config(config: wasm_bindgen::JsValue) -> Result<Cow<'static, types::Config>, JsError> {
Ok(if config.is_null() {
Cow::Borrowed(config::Config::get_default()?)
Cow::Borrowed(types::Config::get_default()?)
} else {
Cow::Owned(serde_wasm_bindgen::from_value(config)?)
})
}

fn js_value_to_params(params: wasm_bindgen::JsValue) -> Result<config::Params, JsError> {
fn js_value_to_params(params: wasm_bindgen::JsValue) -> Result<types::Params, JsError> {
Ok(if params.is_null() {
config::Params::default()
types::Params::default()
} else {
serde_wasm_bindgen::from_value(params)?
})
Expand Down
14 changes: 7 additions & 7 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ use url::Url;
mod rules;
mod glue;
mod types;
mod config;
mod util;

#[derive(Parser)]
struct Args {
Expand All @@ -24,28 +24,28 @@ struct Args {
flag: Vec<String>
}

impl TryFrom<Args> for (Vec<Url>, config::Config) {
type Error=config::GetConfigError;
impl TryFrom<Args> for (Vec<Url>, types::Config) {
type Error=types::GetConfigError;

fn try_from(args: Args) -> Result<Self, Self::Error> {
let mut config=config::Config::get_default_or_load(args.config.as_deref())?.into_owned();
let mut config=types::Config::get_default_or_load(args.config.as_deref())?.into_owned();
config.params.merge(
#[allow(clippy::needless_update)]
config::Params {
types::Params {
vars: args.var
.into_iter()
.filter_map(|mut kev| kev.find('=').map(|e| {let mut v=kev.split_off(e); v.drain(..1); kev.shrink_to_fit(); (kev, v)}))
.collect(),
flags: args.flag.into_iter().collect(),
..config::Params::default()
..types::Params::default()
}
);
Ok((args.urls, config))
}
}

fn main() -> Result<(), types::CleaningError> {
let (urls, config): (Vec<Url>, config::Config)=Args::parse().try_into()?;
let (urls, config): (Vec<Url>, types::Config)=Args::parse().try_into()?;

for mut url in urls {
match config.apply(&mut url) {
Expand Down
17 changes: 9 additions & 8 deletions src/rules.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@ mod conditions;
pub use conditions::*;
mod mappers;
pub use mappers::*;
use crate::config;

pub use crate::types::*;

/// The core unit describing when and how URLs are modified.
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
Expand All @@ -21,7 +22,7 @@ pub enum Rule {
/// # Examples
/// ```
/// # use url_cleaner::rules::{Rule, Mapper};
/// # use url_cleaner::config::Params;
/// # use url_cleaner::types::Params;
/// # use url::Url;
/// # use std::collections::HashMap;
/// let rule=Rule::HostMap(HashMap::from_iter([
Expand All @@ -43,7 +44,7 @@ pub enum Rule {
/// # Examples
/// ```
/// # use url_cleaner::rules::{Rule, Condition, Mapper};
/// # use url_cleaner::config::Params;
/// # use url_cleaner::types::Params;
/// # use url_cleaner::types::UrlPart;
/// # use url::Url;
/// # use std::str::FromStr;
Expand All @@ -54,8 +55,8 @@ pub enum Rule {
/// condition: Condition::Always,
/// mapper: Mapper::SetPart {
/// part: UrlPart::NextPathSegment,
/// none_to_empty_string: false,
/// value: Some(FromStr::from_str("a").unwrap())
/// value: Some(FromStr::from_str("a").unwrap()),
/// value_none_to_empty_string: false
/// }
/// }
/// ],
Expand All @@ -77,7 +78,7 @@ pub enum Rule {
/// # Examples
/// ```
/// # use url_cleaner::rules::{Rule, Condition, Mapper};
/// # use url_cleaner::config::Params;
/// # use url_cleaner::types::Params;
/// # use url::Url;
/// assert!(Rule::Normal{condition: Condition::Never, mapper: Mapper::None}.apply(&mut Url::parse("https://example.com").unwrap(), &Params::default()).is_err());
/// ```
Expand Down Expand Up @@ -119,7 +120,7 @@ impl Rule {
/// If the rule is a [`Self::Normal`] and the contained condition or mapper returns an error, that error is returned.
/// If the rule is a [`Self::HostMap`] and the provided URL doesn't have a host, returns the error [`RuleError::UrlHasNoHost`].
/// If the rule is a [`Self::HostMap`] and the provided URL's host isn't in the rule's map, returns the error [`RuleError::HostNotInMap`].
pub fn apply(&self, url: &mut Url, params: &config::Params) -> Result<(), RuleError> {
pub fn apply(&self, url: &mut Url, params: &Params) -> Result<(), RuleError> {
match self {
Self::Normal{condition, mapper} => if condition.satisfied_by(url, params)? {
mapper.apply(url, params)?;
Expand Down Expand Up @@ -175,7 +176,7 @@ impl Rules {
/// If an error is returned, `url` is left unmodified.
/// # Errors
/// If the error [`RuleError::FailedCondition`], [`RuleError::UrlHasNoHost`], or [`RuleError::HostNotInMap`] is encountered, it is ignored.
pub fn apply(&self, url: &mut Url, params: &config::Params) -> Result<(), RuleError> {
pub fn apply(&self, url: &mut Url, params: &Params) -> Result<(), RuleError> {
let mut temp_url=url.clone();
for rule in &**self {
match rule.apply(&mut temp_url, params) {
Expand Down
Loading

0 comments on commit 63bb0a5

Please sign in to comment.