diff --git a/Cargo.lock b/Cargo.lock index 76e8c9e..ad23d44 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -97,9 +97,9 @@ checksum = "514de17de45fdb8dc022b1a7975556c53c86f9f0aa5f534b98977b171857c2c9" [[package]] name = "cc" -version = "1.0.105" +version = "1.0.106" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5208975e568d83b6b05cc0a063c8e7e9acc2b43bee6da15616a5b73e109d7437" +checksum = "066fce287b1d4eafef758e89e09d724a24808a9196fe9756b8ca90e86d0719a2" [[package]] name = "cfg-if" diff --git a/matcher_rs/src/regex_matcher.rs b/matcher_rs/src/regex_matcher.rs index f57861c..0daf2c5 100644 --- a/matcher_rs/src/regex_matcher.rs +++ b/matcher_rs/src/regex_matcher.rs @@ -144,25 +144,25 @@ struct RegexPatternTable { /// /// # Fields /// -/// * `word` - A [Cow<'a, str>] that holds the matched word or pattern. This field can either be a -/// borrowed string slice or an owned [String], offering flexibility in how the match result is stored. +/// * `match_id` - A [u32] that serves as an identifier for the match. This identifier +/// is used to differentiate between match results originating from different regex tables, allowing +/// for more detailed and organized match results. /// /// * `table_id` - A [u32] representing the unique identifier of the regex table that produced the match result. /// This helps in distinguishing which regex table contributed to the result, facilitating organized processing /// and categorization of matches. /// -/// * `match_id` - A [u32] that serves as an identifier for the match. This identifier -/// is used to differentiate between match results originating from different regex tables, allowing -/// for more detailed and organized match results. +/// * `word` - A [Cow<'a, str>] that holds the matched word or pattern. This field can either be a +/// borrowed string slice or an owned [String], offering flexibility in how the match result is stored. /// /// This structure is primarily utilized in text matching applications where regex patterns are used /// to identify specific words or patterns within the target text, and the results need to be tracked /// and processed accordingly. #[derive(Debug, Clone)] pub struct RegexResult<'a> { - pub word: Cow<'a, str>, - pub table_id: u32, pub match_id: u32, + pub table_id: u32, + pub word: Cow<'a, str>, } impl MatchResultTrait<'_> for RegexResult<'_> { @@ -497,41 +497,40 @@ impl<'a> TextMatcherTrait<'a, RegexResult<'a>> for RegexMatcher { for regex_table in &self.regex_pattern_table_list { match ®ex_table.regex_type { RegexType::Standard { regex } => { - for caps in regex.captures_iter(text).map(|caps| caps.unwrap()) { - result_list.push(RegexResult { + result_list.extend(regex.captures_iter(text).map(|caps| { + RegexResult { + match_id: regex_table.match_id, + table_id: regex_table.table_id, word: Cow::Owned( - caps.iter() + caps.unwrap() + .iter() .skip(1) .filter_map(|m| m.map(|match_char| match_char.as_str())) .collect::(), ), - table_id: regex_table.table_id, - match_id: regex_table.match_id, - }); - } + } + })) } RegexType::List { regex_list, word_list, - } => { - for (index, regex) in regex_list.iter().enumerate() { - if regex.is_match(text).unwrap() { - result_list.push(RegexResult { - word: Cow::Borrowed(&word_list[index]), - table_id: regex_table.table_id, - match_id: regex_table.match_id, - }); - } - } - } + } => result_list.extend(regex_list.iter().enumerate().filter_map( + |(index, regex)| { + regex.is_match(text).unwrap().then_some(RegexResult { + match_id: regex_table.match_id, + table_id: regex_table.table_id, + word: Cow::Borrowed(&word_list[index]), + }) + }, + )), RegexType::Set { regex_set, word_list, } => result_list.extend(regex_set.matches(text).into_iter().map(|index| { RegexResult { - word: Cow::Borrowed(&word_list[index]), - table_id: regex_table.table_id, match_id: regex_table.match_id, + table_id: regex_table.table_id, + word: Cow::Borrowed(&word_list[index]), } })), } diff --git a/matcher_rs/src/sim_matcher.rs b/matcher_rs/src/sim_matcher.rs index bfa2253..f118b55 100644 --- a/matcher_rs/src/sim_matcher.rs +++ b/matcher_rs/src/sim_matcher.rs @@ -125,17 +125,17 @@ struct SimProcessedTable { /// /// # Fields /// +/// - `match_id` ([u32]): An ID that serves as an identifier for the match. +/// - `table_id` ([u32]): The unique identifier of the table where the word was found. /// - `word` ([Cow<'a, str>]): The word that was found to be similar. It is stored as a [Cow] /// (clone-on-write) to allow for both owned and borrowed strings. -/// - `table_id` ([u32]): The unique identifier of the table where the word was found. -/// - `match_id` ([u32]): An ID that serves as an identifier for the match. /// - `similarity` ([f64]): The similarity score computed for the match. This score typically /// ranges from 0.0 to 1.0, with higher values indicating greater similarity. #[derive(Debug, Clone)] pub struct SimResult<'a> { - pub word: Cow<'a, str>, - pub table_id: u32, pub match_id: u32, + pub table_id: u32, + pub word: Cow<'a, str>, pub similarity: f64, } @@ -407,12 +407,12 @@ impl<'a> TextMatcherTrait<'a, SimResult<'a>> for SimMatcher { .score_cutoff(sim_table.threshold), ) .map(|similarity| SimResult { - word: Cow::Borrowed(text), - table_id: sim_table.table_id, match_id: sim_table.match_id, + table_id: sim_table.table_id, + word: Cow::Borrowed(text), similarity, }) - })); + })) } SimMatchType::DamerauLevenshtein => { result_list.extend(sim_table.word_list.iter().filter_map(|text| { @@ -423,12 +423,12 @@ impl<'a> TextMatcherTrait<'a, SimResult<'a>> for SimMatcher { .score_cutoff(sim_table.threshold), ) .map(|similarity| SimResult { - word: Cow::Borrowed(text), - table_id: sim_table.table_id, match_id: sim_table.match_id, + table_id: sim_table.table_id, + word: Cow::Borrowed(text), similarity, }) - })); + })) } SimMatchType::Indel => { result_list.extend(sim_table.word_list.iter().filter_map(|text| { @@ -438,12 +438,12 @@ impl<'a> TextMatcherTrait<'a, SimResult<'a>> for SimMatcher { &distance::indel::Args::default().score_cutoff(sim_table.threshold), ) .map(|similarity| SimResult { - word: Cow::Borrowed(text), - table_id: sim_table.table_id, match_id: sim_table.match_id, + table_id: sim_table.table_id, + word: Cow::Borrowed(text), similarity, }) - })); + })) } SimMatchType::Jaro => { result_list.extend(sim_table.word_list.iter().filter_map(|text| { @@ -453,12 +453,12 @@ impl<'a> TextMatcherTrait<'a, SimResult<'a>> for SimMatcher { &distance::jaro::Args::default().score_cutoff(sim_table.threshold), ) .map(|similarity| SimResult { - word: Cow::Borrowed(text), - table_id: sim_table.table_id, match_id: sim_table.match_id, + table_id: sim_table.table_id, + word: Cow::Borrowed(text), similarity, }) - })); + })) } SimMatchType::JaroWinkler => { result_list.extend(sim_table.word_list.iter().filter_map(|text| { @@ -469,12 +469,12 @@ impl<'a> TextMatcherTrait<'a, SimResult<'a>> for SimMatcher { .score_cutoff(sim_table.threshold), ) .map(|similarity| SimResult { - word: Cow::Borrowed(text), - table_id: sim_table.table_id, match_id: sim_table.match_id, + table_id: sim_table.table_id, + word: Cow::Borrowed(text), similarity, }) - })); + })) } } }