Skip to content

Commit

Permalink
Merge pull request #6 from jseg380/development
Browse files Browse the repository at this point in the history
Multiple files / folders feature added + New argument parsing
  • Loading branch information
YM162 authored Sep 19, 2023
2 parents c5481c3 + 618c3f6 commit 9090208
Show file tree
Hide file tree
Showing 3 changed files with 87 additions and 49 deletions.
7 changes: 4 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,17 +32,18 @@ Gulag Cleaner can be used through both a Command Line Interface (CLI) and in you

## Command Line Interface

To use Gulag Cleaner through the CLI, simply run the following command, replacing `<filename>` with the name of your PDF file:
To use Gulag Cleaner through the CLI, simply run the following command, replacing `<filename>` with the name of one or more PDF files or folders containing PDF:

```
gulagcleaner [-r] [-h] [-v] <filename>
gulagcleaner [-r] [-s] [-h] [-v] <filename>...
```

## Options

Gulag Cleaner provides several options for its usage:

> * '-r': Replace the original file with the cleaned version.
> * '-s': Do not show metadata about cleaned files.
> * '-h': Display the help message, providing information on how to use Gulag Cleaner.
> * '-v': Display the current version of Gulag Cleaner.
Expand All @@ -60,4 +61,4 @@ return_msg = clean_pdf("file.pdf")
Gulag Cleaner is distributed under the GPL-3 license, which means it's open-source and free to use.

# Contributing
We're always looking for ways to improve Gulag Cleaner, and we welcome contributions from the community. If you have ideas for improvements or bug fixes, please feel free to submit a pull request.
We're always looking for ways to improve Gulag Cleaner, and we welcome contributions from the community. If you have ideas for improvements or bug fixes, please feel free to submit a pull request.
125 changes: 81 additions & 44 deletions gulagcleaner/command_line.py
Original file line number Diff line number Diff line change
@@ -1,82 +1,119 @@
from gulagcleaner.extract import clean_pdf
from gulagcleaner.decrypt import decrypt_pdf
from gulagcleaner.metadata import extract_metadata
from os.path import exists
from os.path import exists, isdir, join
from os import listdir

def parseArgs():
'''
Function to parse arguments.
Checks for any optional arguments passed to the program and activates
the corresponding flags.
'''
from sys import argv
targeted = ['-h', '-r', '-s', '-v', argv[0]]

return {
'help': '-h' in argv,
'replace': '-r' in argv,
'short': '-s' in argv,
'version': '-v' in argv,
'files': [arg for arg in argv if arg not in targeted]
}

def main():
'''
Main function for the "gulagcleaner" CLI command.
The "gulagcleaner" command takes an argument for the path of a PDF file and tries to remove the ads inside it. The new PDF is saved in the same folder.
The "gulagcleaner" command takes arguments for the path of one or more
files which can be PDF files or folders containing PDFs, and tries to
remove the ads inside of them. The new PDFs are saved in their original
location.
Available CLI arguments:
-h : Display help information.
-r : Replace the original file with the cleaned file.
-r : Replace original files with their cleaned version.
-s : Do not show metadata about cleaned files.
-v : Display the version of the program.
'''
import sys
arguments = parseArgs()

# Check for the -h argument
if '-h' in sys.argv:
print("Usage: gulagcleaner [-h] [-r] [-o] [-v] <pdf_path>")
if arguments["help"]:
print("Usage: gulagcleaner [-h] [-r] [-s] [-v] <pdf_path>...")
print("")
print("Removes ads from a PDF file.")
print("Removes ads from PDF files.")
print("")
print("Positional arguments:")
print(" pdf_path The PDF file to clean.")
print(" pdf_path PDF file to clean.")
print("")
print("Optional arguments:")
print(" -h Show this help message.")
print(" -r Replace the original file with the cleaned file.")
print(" -r Replace original files with their cleaned version.")
print(" -s Do not show metadata about cleaned files.")
print(" -v Show the version of the program.")
return

# Check for the -v argument
if '-v' in sys.argv:
print("Current version: 0.7.0")
if arguments["version"]:
print("Current version: 0.8.0")
return

# Get the pdf_path argument
if len(sys.argv) < 2:
print('Usage: gulagcleaner [-h] [-r] [-v] <pdf_path>')
return
pdf_path = sys.argv[-1]

# Check if the file exists
if not exists(pdf_path):
print("File not found.")
if len(arguments["files"]) == 0:
print('Usage: gulagcleaner [-h] [-r] [-s] [-v] <pdf_path>...')
return

# Check if the -r argument is present
if '-r' in sys.argv:
output_path = pdf_path
else:
output_path = pdf_path[:-4] + "_clean.pdf"
replace = arguments["replace"]
short = arguments["short"]

for element in arguments["files"]:
# Check if the file exists
if not exists(element):
print(element + " not found.")
continue

# Check if file is a directory
if isdir(element):
# Add PDF files of directory to list of files to clean
arguments["files"] += [join(element, file) for file in listdir(element)
if file.endswith('.pdf') or isdir(join(element, file))]
continue

#We decrypt the PDF file
pdf_path = decrypt_pdf(pdf_path)
pdf_path = element

if replace:
output_path = pdf_path
else:
output_path = pdf_path[:-4] + "_clean.pdf"

# We decrypt the PDF file
pdf_path = decrypt_pdf(pdf_path)

#Extract metadata
try:
metadict = extract_metadata(pdf_path)
print("Metadata:")
print("Archivo: " + metadict["Archivo"])
print("Autor: " + metadict["Autor"])
print("Asignatura: " + metadict["Asignatura"])
print("Curso y Grado: " + metadict["Curso y Grado"])
print("Facultad: " + metadict["Facultad"])
print("Universidad: " + metadict["Universidad"])
except Exception as e:
print("Failed to extract metadata:", e)
# If short mode is not active, extract metadata
if not short:
try:
metadict = extract_metadata(pdf_path)
print("Metadata:")
print("Archivo: " + metadict["Archivo"])
print("Autor: " + metadict["Autor"])
print("Asignatura: " + metadict["Asignatura"])
print("Curso y Grado: " + metadict["Curso y Grado"])
print("Facultad: " + metadict["Facultad"])
print("Universidad: " + metadict["Universidad"])
except Exception as e:
print("Failed to extract metadata:", e)

# Call the cleaning function
return_msg = clean_pdf(pdf_path, output_path)
# Call the cleaning function
return_msg = clean_pdf(pdf_path, output_path)

if return_msg["Success"]:
print("Cleaning successful. File saved in", return_msg["return_path"])
else:
print("Error:", return_msg["Error"])
if return_msg["Success"]:
print(pdf_path + "cleaning successful. File saved in " +
return_msg["return_path"])
else:
print("Error cleaning " + pdf_path + ": " + return_msg["Error"])

if __name__ == "__main__":
print('Call from the "gulagcleaner" command.')
4 changes: 2 additions & 2 deletions setup.cfg
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[metadata]
name = gulagcleaner
version = 0.7.0
version = 0.8.0
author = YM162
author_email = [email protected]
description = Ad removal tool for PDFs written in python.
Expand Down Expand Up @@ -29,4 +29,4 @@ exclude =

[options.entry_points]
console_scripts =
gulagcleaner = gulagcleaner.command_line:main
gulagcleaner = gulagcleaner.command_line:main

0 comments on commit 9090208

Please sign in to comment.