From 79ba61b980b9fda015518e23df7528f47f1180f0 Mon Sep 17 00:00:00 2001 From: Maarten van Gompel Date: Tue, 10 May 2022 11:21:41 +0200 Subject: [PATCH] fix in outputting unicode offset #15 (for real now I hope) --- src/search.rs | 8 ++++---- tests/main.rs | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/search.rs b/src/search.rs index 6f6a2e8..370ce7f 100644 --- a/src/search.rs +++ b/src/search.rs @@ -482,12 +482,12 @@ impl ContextRule { pub(crate) fn remap_offsets_to_unicodepoints<'a>(text: &'a str, mut matches: Vec>) -> Vec> { let mut bytes2unicodepoints: Vec> = Vec::new(); let mut end = 0; - for (unicodeoffset, (byteoffset, _char)) in text.char_indices().enumerate() { - for _ in bytes2unicodepoints.len()..byteoffset { + for (unicodeoffset, c) in text.chars().enumerate() { + bytes2unicodepoints.push(Some(unicodeoffset)); + for _ in 0..c.len_utf8()-1 { bytes2unicodepoints.push(None); } - bytes2unicodepoints.push(Some(unicodeoffset)); - end = byteoffset+1; + end = unicodeoffset+1; } //add an end offset bytes2unicodepoints.push(Some(end)); diff --git a/tests/main.rs b/tests/main.rs index 4548d43..9551fe2 100644 --- a/tests/main.rs +++ b/tests/main.rs @@ -989,7 +989,7 @@ fn test0706_find_all_matches_unicodeoffsets() { model.add_to_vocabulary(text,None,&VocabParams::default()); } model.build(); - let matches = model.find_all_matches("I thиnk you are rihgt", &get_test_searchparams().with_max_ngram(1).with_unicodeoffsets()); + let matches = model.find_all_matches("I thиnk you are righт", &get_test_searchparams().with_max_ngram(1).with_unicodeoffsets()); assert!( !matches.is_empty() ); assert_eq!( matches.get(0).unwrap().text , "I" ); assert_eq!( matches.get(1).unwrap().text , "thиnk" ); @@ -998,7 +998,7 @@ fn test0706_find_all_matches_unicodeoffsets() { assert_eq!( model.match_to_str(matches.get(1).unwrap()) , "think" ); assert_eq!( matches.get(2).unwrap().text , "you" ); assert_eq!( matches.get(3).unwrap().text , "are" ); - assert_eq!( matches.get(4).unwrap().text , "rihgt" ); + assert_eq!( matches.get(4).unwrap().text , "righт" ); assert_eq!( model.match_to_str(matches.get(4).unwrap()) , "right" ); }