diff --git a/gulagcleaner_python/Cargo.toml b/gulagcleaner_python/Cargo.toml index 83d1cd4..f0ca033 100644 --- a/gulagcleaner_python/Cargo.toml +++ b/gulagcleaner_python/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "gulagcleaner_python" -version = "0.12.1" +version = "0.12.2" edition = "2021" authors = ["YM162 "] description = "Ad removal tool for PDFs." diff --git a/gulagcleaner_python/pyproject.toml b/gulagcleaner_python/pyproject.toml index 43ec250..056c288 100644 --- a/gulagcleaner_python/pyproject.toml +++ b/gulagcleaner_python/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "gulagcleaner" -version = "0.12.1" +version = "0.12.2" description = "Ad removal tool for PDFs." authors = [ {name = "YM162", email = "david.fontaneda16@gmail.com"}] diff --git a/gulagcleaner_python/python/gulagcleaner/command_line.py b/gulagcleaner_python/python/gulagcleaner/command_line.py index 8369761..cfb5f4a 100644 --- a/gulagcleaner_python/python/gulagcleaner/command_line.py +++ b/gulagcleaner_python/python/gulagcleaner/command_line.py @@ -61,7 +61,7 @@ def main(): # Check for the -v argument if arguments["version"]: - print("Current version: 0.12.1") + print("Current version: 0.12.2") return # Get the pdf_path argument diff --git a/gulagcleaner_rs/Cargo.toml b/gulagcleaner_rs/Cargo.toml index 0f9bb33..70ef78e 100644 --- a/gulagcleaner_rs/Cargo.toml +++ b/gulagcleaner_rs/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "gulagcleaner_rs" -version = "0.12.1" +version = "0.12.2" edition = "2021" authors = ["YM162 "] description = "Ad removal tool for PDFs." diff --git a/gulagcleaner_rs/example_docs/wuolah-free-example.pdf b/gulagcleaner_rs/example_docs/wuolah-free-example.pdf index c1fba37..2c223c4 100644 Binary files a/gulagcleaner_rs/example_docs/wuolah-free-example.pdf and b/gulagcleaner_rs/example_docs/wuolah-free-example.pdf differ diff --git a/gulagcleaner_rs/src/clean.rs b/gulagcleaner_rs/src/clean.rs index fca956a..87fe475 100644 --- a/gulagcleaner_rs/src/clean.rs +++ b/gulagcleaner_rs/src/clean.rs @@ -71,13 +71,12 @@ fn match_method(doc: &Document, force_naive: bool) -> Method { .map(|x| doc.get_page_contents(*x.1)) .filter(|x| x.len() > 1) .collect(); - //let to_delete: Vec = pages - let _: Vec = pages + let to_delete: Vec = pages .iter() .filter(|x| { let contents = doc.get_page_contents(*x.1); - contents.len() == 1 + contents.len() < 4 }) .map(|x| *x.0) .collect(); @@ -88,23 +87,26 @@ fn match_method(doc: &Document, force_naive: bool) -> Method { .filter(|x| *x == 3) .collect::>() .len() - > 1 + == pages.len() { return Method::StuDocu(content_list); } + let long_content_list: Vec> = pages + .iter() + .map(|x| doc.get_page_contents(*x.1)) + .filter(|x| x.len() > 3) + .collect(); - if content_list.len() > 1 - && content_list[0] + if long_content_list.len() > 1 + && long_content_list[0] .iter() .collect::>() - .intersection(&content_list[1].iter().collect::>()) + .intersection(&long_content_list[1].iter().collect::>()) .collect::>() .len() > 1 { - //return Method::Wuolah(content_list, to_delete); - //SEE COMMENT AT THE TOP OF THE FUNCTION - return Method::Naive; + return Method::Wuolah(long_content_list, to_delete); } Method::Naive } diff --git a/gulagcleaner_rs/src/models/method.rs b/gulagcleaner_rs/src/models/method.rs index dd6c080..8f432c9 100644 --- a/gulagcleaner_rs/src/models/method.rs +++ b/gulagcleaner_rs/src/models/method.rs @@ -46,7 +46,7 @@ impl Cleaner for Method { let vector: Vec<(&u32, &(u32, u16))> = pages .iter() - .filter(|x| doc.get_page_contents(*x.1).len() > 1) + .filter(|x| doc.get_page_contents(*x.1).len() > 3) .collect(); for (i, page) in vector.iter().enumerate() { let mutable_page = doc.get_object_mut(*page.1).unwrap().as_dict_mut().unwrap(); diff --git a/gulagcleaner_wasm/Cargo.toml b/gulagcleaner_wasm/Cargo.toml index 062c9fd..7b355c6 100644 --- a/gulagcleaner_wasm/Cargo.toml +++ b/gulagcleaner_wasm/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "gulagcleaner_wasm" -version = "0.12.1" +version = "0.12.2" edition = "2021" authors = ["YM162 "] description = "Ad removal tool for PDFs."