-
Notifications
You must be signed in to change notification settings - Fork 7
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
bugfix, comment fix, force compile fails for big-endian #5
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -103,19 +103,19 @@ impl SymbolTable { | |
fn optimize(&self, counters: Counter) -> Self { | ||
let mut res = SymbolTable::default(); | ||
let mut pqueue = BinaryHeap::new(); | ||
for code1 in 0..511 { | ||
for code1 in 0u16..(256u16 + self.n_symbols as u16) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
let symbol1 = self.symbols[code1 as usize]; | ||
let gain = counters.count1(code1) * symbol1.len(); | ||
pqueue.push(Candidate { | ||
symbol: symbol1, | ||
gain, | ||
}); | ||
|
||
for code2 in 0..511 { | ||
for code2 in 0u16..(256u16 + self.n_symbols as u16) { | ||
let symbol2 = &self.symbols[code2 as usize]; | ||
// If either symbol is zero-length, or if merging would yield a symbol of | ||
// length greater than 8, skip. | ||
if symbol1.len() + symbol2.len() >= 8 || symbol1.is_empty() || symbol2.is_empty() { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. symbols can never be empty, the last PR changed that |
||
if symbol1.len() + symbol2.len() >= 8 { | ||
continue; | ||
} | ||
let new_symbol = symbol1.concat(symbol2); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,7 @@ | ||
#![cfg(test)] | ||
|
||
use fsst_rs::Symbol; | ||
|
||
static PREAMBLE: &str = r#" | ||
When in the Course of human events, it becomes necessary for one people to dissolve | ||
the political bands which have connected them with another, and to assume among the | ||
|
@@ -9,6 +11,8 @@ that they should declare the causes which impel them to the separation."#; | |
|
||
static DECLARATION: &str = include_str!("./fixtures/declaration.txt"); | ||
|
||
static ART_OF_WAR: &str = include_str!("./fixtures/art_of_war.txt"); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 👍 ⚔️ There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
|
||
#[test] | ||
fn test_basic() { | ||
// Roundtrip the declaration | ||
|
@@ -29,6 +33,18 @@ fn test_train_on_empty() { | |
); | ||
} | ||
|
||
#[test] | ||
fn test_one_byte() { | ||
let mut empty = fsst_rs::SymbolTable::default(); | ||
// Assign code 0 to map to the symbol containing byte 0x01 | ||
empty.insert(Symbol::from_u8(0x01)); | ||
|
||
let compressed = empty.compress(&[0x01]); | ||
assert_eq!(compressed, vec![0u8]); | ||
|
||
assert_eq!(empty.decompress(&compressed), vec![0x01]); | ||
} | ||
|
||
#[test] | ||
fn test_zeros() { | ||
println!("training zeros"); | ||
|
@@ -57,3 +73,12 @@ fn test_large() { | |
let compressed = trained.compress(massive.as_bytes()); | ||
assert_eq!(trained.decompress(&compressed), massive.as_bytes()); | ||
} | ||
|
||
#[test] | ||
fn test_chinese() { | ||
let trained = fsst_rs::train(ART_OF_WAR.as_bytes()); | ||
assert_eq!( | ||
ART_OF_WAR.as_bytes(), | ||
trained.decompress(&trained.compress(ART_OF_WAR.as_bytes())) | ||
); | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
these were kinda useless