Skip to content

Commit

Permalink
Merge pull request #16 from dristic/expose_hash
Browse files Browse the repository at this point in the history
Expose hash on chunk struct.
  • Loading branch information
nlfiedler authored Feb 7, 2022
2 parents 86bb83f + 1f5fa32 commit 300a866
Showing 1 changed file with 23 additions and 8 deletions.
31 changes: 23 additions & 8 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ pub const MAXIMUM_MAX: usize = 1_073_741_824;
/// Represents a chunk, returned from the FastCDC iterator.
#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
pub struct Chunk {
/// Hash found at this location.
pub hash: u32,
/// Starting byte position within the original content.
pub offset: usize,
/// Length of the chunk in bytes.
Expand Down Expand Up @@ -116,12 +118,12 @@ impl<'a> FastCDC<'a> {
}

/// Returns the size of the next chunk.
fn cut(&mut self, mut source_offset: usize, mut source_size: usize) -> usize {
fn cut(&mut self, mut source_offset: usize, mut source_size: usize) -> (u32, usize) {
if source_size <= self.min_size {
if !self.eof {
0
(0, 0)
} else {
source_size
(0, source_size)
}
} else {
if source_size > self.max_size {
Expand All @@ -140,7 +142,7 @@ impl<'a> FastCDC<'a> {
source_offset += 1;
hash = (hash >> 1) + TABLE[index];
if (hash & self.mask_s) == 0 {
return source_offset - source_start;
return (hash, source_offset - source_start);
}
}
// Fall back to using the "easier" chunking judgement to find chunks
Expand All @@ -150,17 +152,17 @@ impl<'a> FastCDC<'a> {
source_offset += 1;
hash = (hash >> 1) + TABLE[index];
if (hash & self.mask_l) == 0 {
return source_offset - source_start;
return (hash, source_offset - source_start);
}
}
// If source is not the last buffer, we may yet find a larger chunk.
// If sourceSize === maximum, we will not find a larger chunk and should emit.
if !self.eof && source_size < self.max_size {
0
(hash, 0)
} else {
// All else fails, return the whole chunk. This will happen with
// pathological data, such as all zeroes.
source_size
(hash, source_size)
}
}
}
Expand All @@ -173,14 +175,15 @@ impl<'a> Iterator for FastCDC<'a> {
if self.bytes_remaining == 0 {
None
} else {
let chunk_size = self.cut(self.bytes_processed, self.bytes_remaining);
let (chunk_hash, chunk_size) = self.cut(self.bytes_processed, self.bytes_remaining);
if chunk_size == 0 {
None
} else {
let chunk_start = self.bytes_processed;
self.bytes_processed += chunk_size;
self.bytes_remaining -= chunk_size;
Some(Chunk {
hash: chunk_hash,
offset: chunk_start,
length: chunk_size,
})
Expand Down Expand Up @@ -406,6 +409,7 @@ mod tests {
let results: Vec<Chunk> = chunker.collect();
assert_eq!(results.len(), 10);
for entry in results {
assert_eq!(entry.hash, 3106636015);
assert_eq!(entry.offset % 1024, 0);
assert_eq!(entry.length, 1024);
}
Expand All @@ -419,16 +423,22 @@ mod tests {
let chunker = FastCDC::new(&contents, 8192, 16384, 32768);
let results: Vec<Chunk> = chunker.collect();
assert_eq!(results.len(), 6);
assert_eq!(results[0].hash, 1527472128);
assert_eq!(results[0].offset, 0);
assert_eq!(results[0].length, 22366);
assert_eq!(results[1].hash, 1174757376);
assert_eq!(results[1].offset, 22366);
assert_eq!(results[1].length, 8282);
assert_eq!(results[2].hash, 2687197184);
assert_eq!(results[2].offset, 30648);
assert_eq!(results[2].length, 16303);
assert_eq!(results[3].hash, 1210105856);
assert_eq!(results[3].offset, 46951);
assert_eq!(results[3].length, 18696);
assert_eq!(results[4].hash, 2984739645);
assert_eq!(results[4].offset, 65647);
assert_eq!(results[4].length, 32768);
assert_eq!(results[5].hash, 1121740051);
assert_eq!(results[5].offset, 98415);
assert_eq!(results[5].length, 11051);
}
Expand Down Expand Up @@ -491,10 +501,13 @@ mod tests {
let chunker = FastCDC::new(&contents, 16384, 32768, 65536);
let results: Vec<Chunk> = chunker.collect();
assert_eq!(results.len(), 3);
assert_eq!(results[0].hash, 2772598784);
assert_eq!(results[0].offset, 0);
assert_eq!(results[0].length, 32857);
assert_eq!(results[1].hash, 1651589120);
assert_eq!(results[1].offset, 32857);
assert_eq!(results[1].length, 16408);
assert_eq!(results[2].hash, 1121740051);
assert_eq!(results[2].offset, 49265);
assert_eq!(results[2].length, 60201);
}
Expand All @@ -507,8 +520,10 @@ mod tests {
let chunker = FastCDC::new(&contents, 32768, 65536, 131_072);
let results: Vec<Chunk> = chunker.collect();
assert_eq!(results.len(), 2);
assert_eq!(results[0].hash, 2772598784);
assert_eq!(results[0].offset, 0);
assert_eq!(results[0].length, 32857);
assert_eq!(results[1].hash, 1121740051);
assert_eq!(results[1].offset, 32857);
assert_eq!(results[1].length, 76609);
}
Expand Down

0 comments on commit 300a866

Please sign in to comment.