From e144d472f95e1b9810b345dd1eb4f518896af83f Mon Sep 17 00:00:00 2001 From: Alexander Mironov Date: Wed, 7 Jul 2021 17:23:12 +0400 Subject: [PATCH] Improve heuristic --- Cargo.toml | 2 +- src/utils.rs | 12 +++++++++--- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 7915b2c..2f9da62 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "x8" -version = "2.2.1" +version = "2.3.0" authors = ["Alexander Mironov "] edition = "2018" license = "GPL-3.0-or-later" diff --git a/src/utils.rs b/src/utils.rs index fa34644..b40f3de 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -68,10 +68,12 @@ pub fn compare( (code, diffs) } -//get possible parameters from the page source code +//get possible parameters from the page code pub fn heuristic(body: &str) -> Vec { let mut found: Vec = Vec::new(); + let re_special_chars = Regex::new(r#"[\W]"#).unwrap(); + let re_name = Regex::new(r#"(?i)name=("|')?"#).unwrap(); let re_inputs = Regex::new(r#"(?i)name=("|')?[\w-]+"#).unwrap(); for cap in re_inputs.captures_iter(body) { @@ -84,10 +86,14 @@ pub fn heuristic(body: &str) -> Vec { found.push(re_var.replace_all(&cap[0], "").to_string()); } - let re_quotes = Regex::new(r#"("|')"#).unwrap(); let re_words_in_quotes = Regex::new(r#"("|')\w{3,20}('|")"#).unwrap(); for cap in re_words_in_quotes.captures_iter(body) { - found.push(re_quotes.replace_all(&cap[0], "").to_string()); + found.push(re_special_chars.replace_all(&cap[0], "").to_string()); + } + + let re_words_within_objects = Regex::new(r#"[\{,]\s*[[:alpha:]]\w{2,25}:"#).unwrap(); + for cap in re_words_within_objects.captures_iter(body){ + found.push(re_special_chars.replace_all(&cap[0], "").to_string()); } found.sort();