Skip to content

Commit

Permalink
fix: char_indices support rev
Browse files Browse the repository at this point in the history
  • Loading branch information
SyMind committed Dec 30, 2024
1 parent f706b2c commit 09bcbbd
Show file tree
Hide file tree
Showing 3 changed files with 102 additions and 143 deletions.
17 changes: 3 additions & 14 deletions src/helpers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1252,10 +1252,7 @@ pub trait SourceText<'a>: Default + Clone + ToString {
fn ends_with(&self, value: &str) -> bool;

/// Returns an iterator over the char indices in the text.
fn char_indices(&self) -> impl Iterator<Item = (usize, char)>;

/// Returns an iterator over the [`char`]s of a string slice.
fn chars(&self) -> impl DoubleEndedIterator<Item = char>;
fn char_indices(&self) -> impl DoubleEndedIterator<Item = (usize, char)>;

/// Gets the byte at the specified index, if it exists.
fn get_byte(&self, byte_index: usize) -> Option<u8>;
Expand Down Expand Up @@ -1292,14 +1289,10 @@ impl<'a> SourceText<'a> for Rope<'a> {
(*self).ends_with(value)
}

fn char_indices(&self) -> impl Iterator<Item = (usize, char)> {
fn char_indices(&self) -> impl DoubleEndedIterator<Item = (usize, char)> {
self.char_indices()
}

fn chars(&self) -> impl DoubleEndedIterator<Item = char> {
(*self).chars()
}

fn byte_slice(&self, range: Range<usize>) -> Self {
self.byte_slice(range)
}
Expand Down Expand Up @@ -1338,14 +1331,10 @@ impl<'a> SourceText<'a> for &'a str {
(*self).ends_with(value)
}

fn char_indices(&self) -> impl Iterator<Item = (usize, char)> {
fn char_indices(&self) -> impl DoubleEndedIterator<Item = (usize, char)> {
(*self).char_indices()
}

fn chars(&self) -> impl DoubleEndedIterator<Item = char> {
(*self).chars()
}

fn byte_slice(&self, range: Range<usize>) -> Self {
self.get(range).unwrap_or_default()
}
Expand Down
217 changes: 94 additions & 123 deletions src/rope.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,9 @@

use std::{
borrow::Cow,
collections::VecDeque,
hash::Hash,
ops::{Bound, RangeBounds},
rc::Rc,
str::Chars,
};

use crate::Error;
Expand Down Expand Up @@ -135,32 +133,22 @@ impl<'a> Rope<'a> {
iter: s.char_indices(),
},
},
Repr::Full(data) => CharIndices {
iter: CharIndicesEnum::Full {
chunks: data,
char_indices: VecDeque::new(),
chunk_index: 0,
},
},
}
}

/// Returns an iterator over the [`char`]s of a string slice.
#[inline(always)]
pub fn chars(&self) -> RopeChars<'_> {
match &self.repr {
Repr::Light(s) => RopeChars {
iters: vec![s.chars()],
left: 0,
right: 0,
},
Repr::Full(data) => {
let iters = data.iter().map(|(s, _)| s.chars()).collect::<Vec<_>>();
let len = iters.len();
RopeChars {
iters,
left: 0,
right: (len - 1) as u32,
Repr::Full(vec) => {
let right_byte_offset = vec.iter().map(|(s, _)| s.len() as u32).sum();

CharIndices {
iter: CharIndicesEnum::Full {
iters: vec
.iter()
.map(|(s, _)| s.char_indices())
.collect::<Vec<_>>(),
left_chunk_index: 0,
left_byte_offset: 0,
last_left_indice: None,
right_chunk_index: (vec.len() - 1) as u32,
right_byte_offset,
right_byte_offset_for: vec.len() as u32,
},
}
}
}
Expand Down Expand Up @@ -680,9 +668,13 @@ enum CharIndicesEnum<'a, 'b> {
iter: std::str::CharIndices<'b>,
},
Full {
chunks: &'a [(&'b str, usize)],
char_indices: VecDeque<(usize, char)>,
chunk_index: usize,
iters: Vec<std::str::CharIndices<'a>>,
left_chunk_index: u32,
left_byte_offset: u32,
last_left_indice: Option<(usize, char)>,
right_chunk_index: u32,
right_byte_offset: u32,
right_byte_offset_for: u32,
},
}

Expand All @@ -697,29 +689,59 @@ impl Iterator for CharIndices<'_, '_> {
match &mut self.iter {
CharIndicesEnum::Light { iter } => iter.next(),
CharIndicesEnum::Full {
chunks,
char_indices,
chunk_index,
iters,
left_chunk_index,
left_byte_offset,
last_left_indice,
..
} => {
if let Some(item) = char_indices.pop_front() {
return Some(item);
}

if *chunk_index >= chunks.len() {
if (*left_chunk_index as usize) >= iters.len() {
return None;
}

// skip empty chunks
while *chunk_index < chunks.len() && chunks[*chunk_index].0.is_empty() {
*chunk_index += 1;
if let Some((byte_index, char)) =
iters[*left_chunk_index as usize].next()
{
*last_left_indice = Some((byte_index, char));
Some((byte_index + (*left_byte_offset as usize), char))
} else {
*left_chunk_index += 1;
if let Some((byte_index, char)) = last_left_indice.take() {
*left_byte_offset =
*left_byte_offset + byte_index as u32 + char.len_utf8() as u32;
}
self.next()
}
}
}
}
}

let (chunk, start_pos) = chunks[*chunk_index];

char_indices
.extend(chunk.char_indices().map(|(i, c)| (start_pos + i, c)));
*chunk_index += 1;
char_indices.pop_front()
impl DoubleEndedIterator for CharIndices<'_, '_> {
fn next_back(&mut self) -> Option<Self::Item> {
match &mut self.iter {
CharIndicesEnum::Light { iter } => iter.next_back(),
CharIndicesEnum::Full {
iters,
right_chunk_index,
right_byte_offset,
right_byte_offset_for,
..
} => {
if let Some((byte_index, char)) =
iters[*right_chunk_index as usize].next_back()
{
if *right_byte_offset_for != *right_chunk_index {
*right_byte_offset =
*right_byte_offset - byte_index as u32 - char.len_utf8() as u32;
*right_byte_offset_for = *right_chunk_index;
}
Some((byte_index + (*right_byte_offset as usize), char))
} else if *right_chunk_index > 0 {
*right_chunk_index -= 1;
self.next_back()
} else {
None
}
}
}
}
Expand Down Expand Up @@ -961,46 +983,6 @@ fn end_bound_to_range_end(end: Bound<&usize>) -> Option<usize> {
}
}

pub struct RopeChars<'a> {
iters: Vec<Chars<'a>>,
left: u32,
right: u32,
}

impl Iterator for RopeChars<'_> {
type Item = char;

#[inline]
fn next(&mut self) -> Option<char> {
let left = self.left as usize;
if left >= self.iters.len() {
return None;
}
if let Some(char) = self.iters[left].next() {
Some(char)
} else {
self.left += 1;
self.next()
}
}
}

impl DoubleEndedIterator for RopeChars<'_> {
#[inline]
fn next_back(&mut self) -> Option<Self::Item> {
let right = self.right as usize;
if right == 0 {
return self.iters[right].next_back();
}
if let Some(char) = self.iters[right].next_back() {
Some(char)
} else {
self.right -= 1;
self.next_back()
}
}
}

#[cfg(test)]
mod tests {
use std::rc::Rc;
Expand Down Expand Up @@ -1230,6 +1212,29 @@ mod tests {
);
}

#[test]
fn reverse_char_indices() {
let mut a = Rope::new();
a.add("abc");
a.add("def");
assert_eq!(
a.char_indices().rev().collect::<Vec<_>>(),
"abcdef".char_indices().rev().collect::<Vec<_>>()
);

// let mut a = Rope::new();
// a.add("こんにちは");
// assert_eq!(
// a.char_indices().collect::<Vec<_>>(),
// "こんにちは".char_indices().collect::<Vec<_>>()
// );
// a.add("世界");
// assert_eq!(
// a.char_indices().collect::<Vec<_>>(),
// "こんにちは世界".char_indices().collect::<Vec<_>>()
// );
}

#[test]
fn lines1() {
let rope = Rope::from("abc");
Expand Down Expand Up @@ -1306,38 +1311,4 @@ mod tests {
.collect::<Vec<_>>();
assert_eq!(lines, ["\n"]);
}

#[test]
fn chars() {
let rope = Rope::from("abc");
let mut chars = rope.chars();
assert_eq!(chars.next(), Some('a'));
assert_eq!(chars.next(), Some('b'));
assert_eq!(chars.next(), Some('c'));
assert_eq!(chars.next(), None);

let rope = Rope::from_iter(["a", "b", "c"]);
let mut chars = rope.chars();
assert_eq!(chars.next(), Some('a'));
assert_eq!(chars.next(), Some('b'));
assert_eq!(chars.next(), Some('c'));
assert_eq!(chars.next(), None);
}

#[test]
fn reverse_chars() {
let rope = Rope::from("abc");
let mut chars = rope.chars().rev();
assert_eq!(chars.next(), Some('c'));
assert_eq!(chars.next(), Some('b'));
assert_eq!(chars.next(), Some('a'));
assert_eq!(chars.next(), None);

let rope = Rope::from_iter(["a", "b", "c"]);
let mut chars = rope.chars().rev();
assert_eq!(chars.next(), Some('c'));
assert_eq!(chars.next(), Some('b'));
assert_eq!(chars.next(), Some('a'));
assert_eq!(chars.next(), None);
}
}
11 changes: 5 additions & 6 deletions src/with_indices.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ where

let (last_char_index, last_byte_index) =
self.last_char_index_to_byte_index.get();
let mut byte_index = last_byte_index as usize;
let byte_index = last_byte_index as usize;
let mut char_index = last_char_index as usize;

if start_char_index >= last_char_index as usize
Expand Down Expand Up @@ -85,18 +85,17 @@ where
// will always lie on UTF-8 sequence boundaries.
self.line.byte_slice_unchecked(0..byte_index)
};
for char in slice.chars().rev() {
byte_index -= char.len_utf8();
char_index -= 1;
for (byte_index, char) in slice.char_indices().rev() {
if char_index == end_char_index {
end_byte_index = Some(byte_index);
end_byte_index = Some(byte_index + char.len_utf8());
if start_byte_index.is_some() {
break;
}
} else if char_index == start_char_index {
start_byte_index = Some(byte_index);
start_byte_index = Some(byte_index + char.len_utf8());
break;
}
char_index -= 1;
}
}

Expand Down

0 comments on commit 09bcbbd

Please sign in to comment.