diff --git a/gulagcleaner_python/Cargo.toml b/gulagcleaner_python/Cargo.toml index 8df19e7..51f986b 100644 --- a/gulagcleaner_python/Cargo.toml +++ b/gulagcleaner_python/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "gulagcleaner_python" -version = "0.14.3" +version = "0.14.4" edition = "2021" authors = ["YM162 "] description = "Ad removal tool for PDFs." diff --git a/gulagcleaner_python/pyproject.toml b/gulagcleaner_python/pyproject.toml index 596d281..fbf06e1 100644 --- a/gulagcleaner_python/pyproject.toml +++ b/gulagcleaner_python/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "gulagcleaner" -version = "0.14.3" +version = "4" description = "Ad removal tool for PDFs." authors = [ {name = "YM162", email = "david.fontaneda@YM162.dev"}] diff --git a/gulagcleaner_python/python/gulagcleaner/command_line.py b/gulagcleaner_python/python/gulagcleaner/command_line.py index 3594acf..ee6fbf4 100644 --- a/gulagcleaner_python/python/gulagcleaner/command_line.py +++ b/gulagcleaner_python/python/gulagcleaner/command_line.py @@ -61,7 +61,7 @@ def main(): # Check for the -v argument if arguments["version"]: - print("Current version: 0.14.3") + print("Current version: 0.14.4") return # Get the pdf_path argument diff --git a/gulagcleaner_rs/Cargo.toml b/gulagcleaner_rs/Cargo.toml index cd6367d..685d92f 100644 --- a/gulagcleaner_rs/Cargo.toml +++ b/gulagcleaner_rs/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "gulagcleaner_rs" -version = "0.14.3" +version = "0.14.4" edition = "2021" authors = ["YM162 "] description = "Ad removal tool for PDFs." diff --git a/gulagcleaner_rs/example_docs/wuolah-280924-2-example.pdf b/gulagcleaner_rs/example_docs/wuolah-280924-2-example.pdf new file mode 100644 index 0000000..5f22185 Binary files /dev/null and b/gulagcleaner_rs/example_docs/wuolah-280924-2-example.pdf differ diff --git a/gulagcleaner_rs/src/models/method.rs b/gulagcleaner_rs/src/models/method.rs index f5f5fa4..0e5e2c3 100644 --- a/gulagcleaner_rs/src/models/method.rs +++ b/gulagcleaner_rs/src/models/method.rs @@ -124,6 +124,7 @@ impl Cleaner for Method { for page in &pages { let page_type = page_type::PageType::get_page_type(doc, page.1).unwrap_or_default(); + println!("{:?}", page_type); let mutable_page = doc.get_object_mut(*page.1).unwrap().as_dict_mut().unwrap(); let mediabox = mutable_page.get(b"MediaBox").unwrap().as_array().unwrap(); @@ -245,7 +246,7 @@ pub fn find_iobj_pairs(first_page: &[(u32, u16)], second_page: &[(u32, u16)]) -> if len < 2 { return (0, 0); } - + (indexes[len - 3], indexes[len - 2]) } diff --git a/gulagcleaner_rs/src/models/page_type.rs b/gulagcleaner_rs/src/models/page_type.rs index 2517bbb..4904937 100644 --- a/gulagcleaner_rs/src/models/page_type.rs +++ b/gulagcleaner_rs/src/models/page_type.rs @@ -5,6 +5,7 @@ use lopdf::{Document, ObjectId}; use super::method::{get_images, get_xobjs}; #[derive(Default)] +#[derive(Debug)] /// Represents the different methods used in the Gulag Cleaner application. pub enum PageType { BannerAds, @@ -14,9 +15,9 @@ pub enum PageType { Idk, } -pub const LOGO_DIMS: [(i64, i64); 3] = [(71, 390), (37, 203), (73, 390)]; +pub const LOGO_DIMS: [(i64, i64); 6] = [(71, 390), (37, 203), (73, 390),(23,130),(19,109),(72,391)]; -const HORIZONTAL_BANNER_DIMS: [(i64, i64); 7] = [ +const HORIZONTAL_BANNER_DIMS: [(i64, i64); 9] = [ (247, 1414), (213, 1219), (215, 1219), @@ -24,8 +25,10 @@ const HORIZONTAL_BANNER_DIMS: [(i64, i64); 7] = [ (217, 1240), (147, 1757), (221, 1240), + (136, 780), + (218,1241) ]; -const VERTICAL_BANNER_DIMS: [(i64, i64); 8] = [ +const VERTICAL_BANNER_DIMS: [(i64, i64); 10] = [ (1753, 170), (1518, 248), (1520, 147), @@ -34,8 +37,10 @@ const VERTICAL_BANNER_DIMS: [(i64, i64); 8] = [ (1537, 147), (1093, 217), (1534, 150), + (970, 92), + (1538, 148) ]; -const FULL_PAGE_DIMS: [(i64, i64); 7] = [ +const FULL_PAGE_DIMS: [(i64, i64); 9] = [ (842, 595), (1754, 1240), (2526, 1785), @@ -43,6 +48,8 @@ const FULL_PAGE_DIMS: [(i64, i64); 7] = [ (3508, 2480), (2339, 1653), (1785, 2526), + (1109, 782), + (1759,1241) ]; impl PageType { @@ -64,6 +71,7 @@ impl PageType { pub fn get_page_type(doc: &Document, page: &ObjectId) -> Result> { let xobjs = get_xobjs(doc, page)?; let images = get_images(doc, xobjs)?; + println!("{:?}", images); let has_logo = !LOGO_DIMS .iter() .collect::>() diff --git a/gulagcleaner_rs/src/tests.rs b/gulagcleaner_rs/src/tests.rs index 2b45f37..d88affe 100644 --- a/gulagcleaner_rs/src/tests.rs +++ b/gulagcleaner_rs/src/tests.rs @@ -113,6 +113,14 @@ fn test_280924_wuolah_pdf() { }); } +#[test] +fn test_280924_2_wuolah_pdf() { + run_test_for_config(&TestConfig { + input_path: "example_docs/wuolah-280924-2-example.pdf", + output_filename: "wuolah-280924-2-example_clean.pdf", + }); +} + #[test] fn test_studocu_pdf() { run_test_for_config(&TestConfig { diff --git a/gulagcleaner_wasm/Cargo.toml b/gulagcleaner_wasm/Cargo.toml index 1c0430a..c27f576 100644 --- a/gulagcleaner_wasm/Cargo.toml +++ b/gulagcleaner_wasm/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "gulagcleaner_wasm" -version = "0.14.3" +version = "0.14.4" edition = "2021" authors = ["YM162 "] description = "Ad removal tool for PDFs."