Skip to content

Commit

Permalink
fix in outputting unicode offset #15 (for real now I hope)
Browse files Browse the repository at this point in the history
  • Loading branch information
proycon committed May 10, 2022
1 parent d346196 commit 79ba61b
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 6 deletions.
8 changes: 4 additions & 4 deletions src/search.rs
Original file line number Diff line number Diff line change
Expand Up @@ -482,12 +482,12 @@ impl ContextRule {
pub(crate) fn remap_offsets_to_unicodepoints<'a>(text: &'a str, mut matches: Vec<Match<'a>>) -> Vec<Match<'a>> {
let mut bytes2unicodepoints: Vec<Option<usize>> = Vec::new();
let mut end = 0;
for (unicodeoffset, (byteoffset, _char)) in text.char_indices().enumerate() {
for _ in bytes2unicodepoints.len()..byteoffset {
for (unicodeoffset, c) in text.chars().enumerate() {
bytes2unicodepoints.push(Some(unicodeoffset));
for _ in 0..c.len_utf8()-1 {
bytes2unicodepoints.push(None);
}
bytes2unicodepoints.push(Some(unicodeoffset));
end = byteoffset+1;
end = unicodeoffset+1;
}
//add an end offset
bytes2unicodepoints.push(Some(end));
Expand Down
4 changes: 2 additions & 2 deletions tests/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -989,7 +989,7 @@ fn test0706_find_all_matches_unicodeoffsets() {
model.add_to_vocabulary(text,None,&VocabParams::default());
}
model.build();
let matches = model.find_all_matches("I thиnk you are rihgt", &get_test_searchparams().with_max_ngram(1).with_unicodeoffsets());
let matches = model.find_all_matches("I thиnk you are righт", &get_test_searchparams().with_max_ngram(1).with_unicodeoffsets());
assert!( !matches.is_empty() );
assert_eq!( matches.get(0).unwrap().text , "I" );
assert_eq!( matches.get(1).unwrap().text , "thиnk" );
Expand All @@ -998,7 +998,7 @@ fn test0706_find_all_matches_unicodeoffsets() {
assert_eq!( model.match_to_str(matches.get(1).unwrap()) , "think" );
assert_eq!( matches.get(2).unwrap().text , "you" );
assert_eq!( matches.get(3).unwrap().text , "are" );
assert_eq!( matches.get(4).unwrap().text , "rihgt" );
assert_eq!( matches.get(4).unwrap().text , "righт" );
assert_eq!( model.match_to_str(matches.get(4).unwrap()) , "right" );
}

Expand Down

0 comments on commit 79ba61b

Please sign in to comment.