Skip to content

Commit

Permalink
Fixed Naive method for latest PDFs
Browse files Browse the repository at this point in the history
  • Loading branch information
YM162 committed Sep 28, 2024
1 parent 3b98ac6 commit 7836e7e
Show file tree
Hide file tree
Showing 9 changed files with 27 additions and 10 deletions.
2 changes: 1 addition & 1 deletion gulagcleaner_python/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "gulagcleaner_python"
version = "0.14.3"
version = "0.14.4"
edition = "2021"
authors = ["YM162 <[email protected]>"]
description = "Ad removal tool for PDFs."
Expand Down
2 changes: 1 addition & 1 deletion gulagcleaner_python/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "gulagcleaner"
version = "0.14.3"
version = "4"
description = "Ad removal tool for PDFs."
authors = [
{name = "YM162", email = "[email protected]"}]
Expand Down
2 changes: 1 addition & 1 deletion gulagcleaner_python/python/gulagcleaner/command_line.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ def main():

# Check for the -v argument
if arguments["version"]:
print("Current version: 0.14.3")
print("Current version: 0.14.4")
return

# Get the pdf_path argument
Expand Down
2 changes: 1 addition & 1 deletion gulagcleaner_rs/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "gulagcleaner_rs"
version = "0.14.3"
version = "0.14.4"
edition = "2021"
authors = ["YM162 <[email protected]>"]
description = "Ad removal tool for PDFs."
Expand Down
Binary file not shown.
3 changes: 2 additions & 1 deletion gulagcleaner_rs/src/models/method.rs
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,7 @@ impl Cleaner for Method {
for page in &pages {
let page_type =
page_type::PageType::get_page_type(doc, page.1).unwrap_or_default();
println!("{:?}", page_type);
let mutable_page = doc.get_object_mut(*page.1).unwrap().as_dict_mut().unwrap();

let mediabox = mutable_page.get(b"MediaBox").unwrap().as_array().unwrap();
Expand Down Expand Up @@ -245,7 +246,7 @@ pub fn find_iobj_pairs(first_page: &[(u32, u16)], second_page: &[(u32, u16)]) ->
if len < 2 {
return (0, 0);
}

(indexes[len - 3], indexes[len - 2])
}

Expand Down
16 changes: 12 additions & 4 deletions gulagcleaner_rs/src/models/page_type.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ use lopdf::{Document, ObjectId};
use super::method::{get_images, get_xobjs};

#[derive(Default)]
#[derive(Debug)]
/// Represents the different methods used in the Gulag Cleaner application.
pub enum PageType {
BannerAds,
Expand All @@ -14,18 +15,20 @@ pub enum PageType {
Idk,
}

pub const LOGO_DIMS: [(i64, i64); 3] = [(71, 390), (37, 203), (73, 390)];
pub const LOGO_DIMS: [(i64, i64); 6] = [(71, 390), (37, 203), (73, 390),(23,130),(19,109),(72,391)];

const HORIZONTAL_BANNER_DIMS: [(i64, i64); 7] = [
const HORIZONTAL_BANNER_DIMS: [(i64, i64); 9] = [
(247, 1414),
(213, 1219),
(215, 1219),
(249, 1414),
(217, 1240),
(147, 1757),
(221, 1240),
(136, 780),
(218,1241)
];
const VERTICAL_BANNER_DIMS: [(i64, i64); 8] = [
const VERTICAL_BANNER_DIMS: [(i64, i64); 10] = [
(1753, 170),
(1518, 248),
(1520, 147),
Expand All @@ -34,15 +37,19 @@ const VERTICAL_BANNER_DIMS: [(i64, i64); 8] = [
(1537, 147),
(1093, 217),
(1534, 150),
(970, 92),
(1538, 148)
];
const FULL_PAGE_DIMS: [(i64, i64); 7] = [
const FULL_PAGE_DIMS: [(i64, i64); 9] = [
(842, 595),
(1754, 1240),
(2526, 1785),
(1733, 1219),
(3508, 2480),
(2339, 1653),
(1785, 2526),
(1109, 782),
(1759,1241)
];

impl PageType {
Expand All @@ -64,6 +71,7 @@ impl PageType {
pub fn get_page_type(doc: &Document, page: &ObjectId) -> Result<PageType, Box<dyn Error>> {
let xobjs = get_xobjs(doc, page)?;
let images = get_images(doc, xobjs)?;
println!("{:?}", images);
let has_logo = !LOGO_DIMS
.iter()
.collect::<HashSet<_>>()
Expand Down
8 changes: 8 additions & 0 deletions gulagcleaner_rs/src/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,14 @@ fn test_280924_wuolah_pdf() {
});
}

#[test]
fn test_280924_2_wuolah_pdf() {
run_test_for_config(&TestConfig {
input_path: "example_docs/wuolah-280924-2-example.pdf",
output_filename: "wuolah-280924-2-example_clean.pdf",
});
}

#[test]
fn test_studocu_pdf() {
run_test_for_config(&TestConfig {
Expand Down
2 changes: 1 addition & 1 deletion gulagcleaner_wasm/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "gulagcleaner_wasm"
version = "0.14.3"
version = "0.14.4"
edition = "2021"
authors = ["YM162 <[email protected]>"]
description = "Ad removal tool for PDFs."
Expand Down

0 comments on commit 7836e7e

Please sign in to comment.