diff --git a/gulagcleaner_rs/Cargo.toml b/gulagcleaner_rs/Cargo.toml index 240f2fa..c067cca 100644 --- a/gulagcleaner_rs/Cargo.toml +++ b/gulagcleaner_rs/Cargo.toml @@ -17,3 +17,4 @@ name = "gulagcleaner_rs" [dependencies] flate2 = "1.0.27" lopdf = "0.34.0" +regex = "1.11.0" \ No newline at end of file diff --git a/gulagcleaner_rs/example_docs/wuolah-061024-example.pdf b/gulagcleaner_rs/example_docs/wuolah-061024-example.pdf new file mode 100644 index 0000000..d592cf5 Binary files /dev/null and b/gulagcleaner_rs/example_docs/wuolah-061024-example.pdf differ diff --git a/gulagcleaner_rs/src/models/page_type.rs b/gulagcleaner_rs/src/models/page_type.rs index ff1140c..d192805 100644 --- a/gulagcleaner_rs/src/models/page_type.rs +++ b/gulagcleaner_rs/src/models/page_type.rs @@ -129,7 +129,12 @@ fn is_annots_wuolah(annot: &&&lopdf::Dictionary, doc: &lopdf::Document) -> bool Ok(x) => { match doc.dereference(x).unwrap().1.as_dict().unwrap().get(b"URI") { Ok(y) => { - doc.dereference(y).unwrap().1.as_string().unwrap().contains("track.wlh.es") + let url = doc.dereference(y).unwrap().1.as_string().unwrap(); + if url.contains("track.wlh.es"){ + return !(url.contains("apuntes")); + } else { + return false; + } }, Err(_) => false, } diff --git a/gulagcleaner_rs/src/tests.rs b/gulagcleaner_rs/src/tests.rs index 60c2141..4776b2e 100644 --- a/gulagcleaner_rs/src/tests.rs +++ b/gulagcleaner_rs/src/tests.rs @@ -161,6 +161,14 @@ fn test_041024_wuolah_pdf() { }); } +#[test] +fn test_061024_wuolah_pdf() { + run_test_for_config(&TestConfig { + input_path: "example_docs/wuolah-061024-example.pdf", + output_filename: "wuolah-061024-example_clean.pdf", + }); +} + #[test] fn test_studocu_pdf() { run_test_for_config(&TestConfig {