Skip to content

Commit

Permalink
Re-enabled Wuolah method
Browse files Browse the repository at this point in the history
  • Loading branch information
YM162 committed Jan 31, 2024
1 parent 246f5e8 commit 81e7aa8
Show file tree
Hide file tree
Showing 8 changed files with 18 additions and 16 deletions.
2 changes: 1 addition & 1 deletion gulagcleaner_python/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "gulagcleaner_python"
version = "0.12.1"
version = "0.12.2"
edition = "2021"
authors = ["YM162 <[email protected]>"]
description = "Ad removal tool for PDFs."
Expand Down
2 changes: 1 addition & 1 deletion gulagcleaner_python/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "gulagcleaner"
version = "0.12.1"
version = "0.12.2"
description = "Ad removal tool for PDFs."
authors = [
{name = "YM162", email = "[email protected]"}]
Expand Down
2 changes: 1 addition & 1 deletion gulagcleaner_python/python/gulagcleaner/command_line.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ def main():

# Check for the -v argument
if arguments["version"]:
print("Current version: 0.12.1")
print("Current version: 0.12.2")
return

# Get the pdf_path argument
Expand Down
2 changes: 1 addition & 1 deletion gulagcleaner_rs/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "gulagcleaner_rs"
version = "0.12.1"
version = "0.12.2"
edition = "2021"
authors = ["YM162 <[email protected]>"]
description = "Ad removal tool for PDFs."
Expand Down
Binary file modified gulagcleaner_rs/example_docs/wuolah-free-example.pdf
Binary file not shown.
22 changes: 12 additions & 10 deletions gulagcleaner_rs/src/clean.rs
Original file line number Diff line number Diff line change
Expand Up @@ -71,13 +71,12 @@ fn match_method(doc: &Document, force_naive: bool) -> Method {
.map(|x| doc.get_page_contents(*x.1))
.filter(|x| x.len() > 1)
.collect();
//let to_delete: Vec<u32> = pages
let _: Vec<u32> = pages
let to_delete: Vec<u32> = pages
.iter()
.filter(|x| {
let contents = doc.get_page_contents(*x.1);

contents.len() == 1
contents.len() < 4
})
.map(|x| *x.0)
.collect();
Expand All @@ -88,23 +87,26 @@ fn match_method(doc: &Document, force_naive: bool) -> Method {
.filter(|x| *x == 3)
.collect::<Vec<_>>()
.len()
> 1
== pages.len()
{
return Method::StuDocu(content_list);
}
let long_content_list: Vec<Vec<(u32, u16)>> = pages
.iter()
.map(|x| doc.get_page_contents(*x.1))
.filter(|x| x.len() > 3)
.collect();

if content_list.len() > 1
&& content_list[0]
if long_content_list.len() > 1
&& long_content_list[0]
.iter()
.collect::<HashSet<_>>()
.intersection(&content_list[1].iter().collect::<HashSet<_>>())
.intersection(&long_content_list[1].iter().collect::<HashSet<_>>())
.collect::<Vec<_>>()
.len()
> 1
{
//return Method::Wuolah(content_list, to_delete);
//SEE COMMENT AT THE TOP OF THE FUNCTION
return Method::Naive;
return Method::Wuolah(long_content_list, to_delete);
}
Method::Naive
}
2 changes: 1 addition & 1 deletion gulagcleaner_rs/src/models/method.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ impl Cleaner for Method {

let vector: Vec<(&u32, &(u32, u16))> = pages
.iter()
.filter(|x| doc.get_page_contents(*x.1).len() > 1)
.filter(|x| doc.get_page_contents(*x.1).len() > 3)
.collect();
for (i, page) in vector.iter().enumerate() {
let mutable_page = doc.get_object_mut(*page.1).unwrap().as_dict_mut().unwrap();
Expand Down
2 changes: 1 addition & 1 deletion gulagcleaner_wasm/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "gulagcleaner_wasm"
version = "0.12.1"
version = "0.12.2"
edition = "2021"
authors = ["YM162 <[email protected]>"]
description = "Ad removal tool for PDFs."
Expand Down

0 comments on commit 81e7aa8

Please sign in to comment.