Skip to content

Commit

Permalink
change Result struct field
Browse files Browse the repository at this point in the history
  • Loading branch information
Lips7 committed Jul 8, 2024
1 parent 5a8cd7c commit 211a0d9
Show file tree
Hide file tree
Showing 3 changed files with 47 additions and 48 deletions.
4 changes: 2 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

53 changes: 26 additions & 27 deletions matcher_rs/src/regex_matcher.rs
Original file line number Diff line number Diff line change
Expand Up @@ -144,25 +144,25 @@ struct RegexPatternTable {
///
/// # Fields
///
/// * `word` - A [Cow<'a, str>] that holds the matched word or pattern. This field can either be a
/// borrowed string slice or an owned [String], offering flexibility in how the match result is stored.
/// * `match_id` - A [u32] that serves as an identifier for the match. This identifier
/// is used to differentiate between match results originating from different regex tables, allowing
/// for more detailed and organized match results.
///
/// * `table_id` - A [u32] representing the unique identifier of the regex table that produced the match result.
/// This helps in distinguishing which regex table contributed to the result, facilitating organized processing
/// and categorization of matches.
///
/// * `match_id` - A [u32] that serves as an identifier for the match. This identifier
/// is used to differentiate between match results originating from different regex tables, allowing
/// for more detailed and organized match results.
/// * `word` - A [Cow<'a, str>] that holds the matched word or pattern. This field can either be a
/// borrowed string slice or an owned [String], offering flexibility in how the match result is stored.
///
/// This structure is primarily utilized in text matching applications where regex patterns are used
/// to identify specific words or patterns within the target text, and the results need to be tracked
/// and processed accordingly.
#[derive(Debug, Clone)]
pub struct RegexResult<'a> {
pub word: Cow<'a, str>,
pub table_id: u32,
pub match_id: u32,
pub table_id: u32,
pub word: Cow<'a, str>,
}

impl MatchResultTrait<'_> for RegexResult<'_> {
Expand Down Expand Up @@ -497,41 +497,40 @@ impl<'a> TextMatcherTrait<'a, RegexResult<'a>> for RegexMatcher {
for regex_table in &self.regex_pattern_table_list {
match &regex_table.regex_type {
RegexType::Standard { regex } => {
for caps in regex.captures_iter(text).map(|caps| caps.unwrap()) {
result_list.push(RegexResult {
result_list.extend(regex.captures_iter(text).map(|caps| {
RegexResult {
match_id: regex_table.match_id,
table_id: regex_table.table_id,
word: Cow::Owned(
caps.iter()
caps.unwrap()
.iter()
.skip(1)
.filter_map(|m| m.map(|match_char| match_char.as_str()))
.collect::<String>(),
),
table_id: regex_table.table_id,
match_id: regex_table.match_id,
});
}
}
}))
}
RegexType::List {
regex_list,
word_list,
} => {
for (index, regex) in regex_list.iter().enumerate() {
if regex.is_match(text).unwrap() {
result_list.push(RegexResult {
word: Cow::Borrowed(&word_list[index]),
table_id: regex_table.table_id,
match_id: regex_table.match_id,
});
}
}
}
} => result_list.extend(regex_list.iter().enumerate().filter_map(
|(index, regex)| {
regex.is_match(text).unwrap().then_some(RegexResult {
match_id: regex_table.match_id,
table_id: regex_table.table_id,
word: Cow::Borrowed(&word_list[index]),
})
},
)),
RegexType::Set {
regex_set,
word_list,
} => result_list.extend(regex_set.matches(text).into_iter().map(|index| {
RegexResult {
word: Cow::Borrowed(&word_list[index]),
table_id: regex_table.table_id,
match_id: regex_table.match_id,
table_id: regex_table.table_id,
word: Cow::Borrowed(&word_list[index]),
}
})),
}
Expand Down
38 changes: 19 additions & 19 deletions matcher_rs/src/sim_matcher.rs
Original file line number Diff line number Diff line change
Expand Up @@ -125,17 +125,17 @@ struct SimProcessedTable {
///
/// # Fields
///
/// - `match_id` ([u32]): An ID that serves as an identifier for the match.
/// - `table_id` ([u32]): The unique identifier of the table where the word was found.
/// - `word` ([Cow<'a, str>]): The word that was found to be similar. It is stored as a [Cow]
/// (clone-on-write) to allow for both owned and borrowed strings.
/// - `table_id` ([u32]): The unique identifier of the table where the word was found.
/// - `match_id` ([u32]): An ID that serves as an identifier for the match.
/// - `similarity` ([f64]): The similarity score computed for the match. This score typically
/// ranges from 0.0 to 1.0, with higher values indicating greater similarity.
#[derive(Debug, Clone)]
pub struct SimResult<'a> {
pub word: Cow<'a, str>,
pub table_id: u32,
pub match_id: u32,
pub table_id: u32,
pub word: Cow<'a, str>,
pub similarity: f64,
}

Expand Down Expand Up @@ -407,12 +407,12 @@ impl<'a> TextMatcherTrait<'a, SimResult<'a>> for SimMatcher {
.score_cutoff(sim_table.threshold),
)
.map(|similarity| SimResult {
word: Cow::Borrowed(text),
table_id: sim_table.table_id,
match_id: sim_table.match_id,
table_id: sim_table.table_id,
word: Cow::Borrowed(text),
similarity,
})
}));
}))
}
SimMatchType::DamerauLevenshtein => {
result_list.extend(sim_table.word_list.iter().filter_map(|text| {
Expand All @@ -423,12 +423,12 @@ impl<'a> TextMatcherTrait<'a, SimResult<'a>> for SimMatcher {
.score_cutoff(sim_table.threshold),
)
.map(|similarity| SimResult {
word: Cow::Borrowed(text),
table_id: sim_table.table_id,
match_id: sim_table.match_id,
table_id: sim_table.table_id,
word: Cow::Borrowed(text),
similarity,
})
}));
}))
}
SimMatchType::Indel => {
result_list.extend(sim_table.word_list.iter().filter_map(|text| {
Expand All @@ -438,12 +438,12 @@ impl<'a> TextMatcherTrait<'a, SimResult<'a>> for SimMatcher {
&distance::indel::Args::default().score_cutoff(sim_table.threshold),
)
.map(|similarity| SimResult {
word: Cow::Borrowed(text),
table_id: sim_table.table_id,
match_id: sim_table.match_id,
table_id: sim_table.table_id,
word: Cow::Borrowed(text),
similarity,
})
}));
}))
}
SimMatchType::Jaro => {
result_list.extend(sim_table.word_list.iter().filter_map(|text| {
Expand All @@ -453,12 +453,12 @@ impl<'a> TextMatcherTrait<'a, SimResult<'a>> for SimMatcher {
&distance::jaro::Args::default().score_cutoff(sim_table.threshold),
)
.map(|similarity| SimResult {
word: Cow::Borrowed(text),
table_id: sim_table.table_id,
match_id: sim_table.match_id,
table_id: sim_table.table_id,
word: Cow::Borrowed(text),
similarity,
})
}));
}))
}
SimMatchType::JaroWinkler => {
result_list.extend(sim_table.word_list.iter().filter_map(|text| {
Expand All @@ -469,12 +469,12 @@ impl<'a> TextMatcherTrait<'a, SimResult<'a>> for SimMatcher {
.score_cutoff(sim_table.threshold),
)
.map(|similarity| SimResult {
word: Cow::Borrowed(text),
table_id: sim_table.table_id,
match_id: sim_table.match_id,
table_id: sim_table.table_id,
word: Cow::Borrowed(text),
similarity,
})
}));
}))
}
}
}
Expand Down

0 comments on commit 211a0d9

Please sign in to comment.