From 618c3f63f89b2086d83728642fb436e9264b022e Mon Sep 17 00:00:00 2001 From: jseg380 Date: Fri, 15 Sep 2023 11:10:39 +0200 Subject: [PATCH] Multiple files / folders feature added --- README.md | 7 +- gulagcleaner/command_line.py | 125 +++++++++++++++++++++++------------ setup.cfg | 4 +- 3 files changed, 87 insertions(+), 49 deletions(-) diff --git a/README.md b/README.md index 936bb7c..04a851f 100644 --- a/README.md +++ b/README.md @@ -32,10 +32,10 @@ Gulag Cleaner can be used through both a Command Line Interface (CLI) and in you ## Command Line Interface -To use Gulag Cleaner through the CLI, simply run the following command, replacing `` with the name of your PDF file: +To use Gulag Cleaner through the CLI, simply run the following command, replacing `` with the name of one or more PDF files or folders containing PDF: ``` -gulagcleaner [-r] [-h] [-v] +gulagcleaner [-r] [-s] [-h] [-v] ... ``` ## Options @@ -43,6 +43,7 @@ gulagcleaner [-r] [-h] [-v] Gulag Cleaner provides several options for its usage: > * '-r': Replace the original file with the cleaned version. +> * '-s': Do not show metadata about cleaned files. > * '-h': Display the help message, providing information on how to use Gulag Cleaner. > * '-v': Display the current version of Gulag Cleaner. @@ -60,4 +61,4 @@ return_msg = clean_pdf("file.pdf") Gulag Cleaner is distributed under the GPL-3 license, which means it's open-source and free to use. # Contributing -We're always looking for ways to improve Gulag Cleaner, and we welcome contributions from the community. If you have ideas for improvements or bug fixes, please feel free to submit a pull request. \ No newline at end of file +We're always looking for ways to improve Gulag Cleaner, and we welcome contributions from the community. If you have ideas for improvements or bug fixes, please feel free to submit a pull request. diff --git a/gulagcleaner/command_line.py b/gulagcleaner/command_line.py index 250925d..682b5f9 100644 --- a/gulagcleaner/command_line.py +++ b/gulagcleaner/command_line.py @@ -1,82 +1,119 @@ from gulagcleaner.extract import clean_pdf from gulagcleaner.decrypt import decrypt_pdf from gulagcleaner.metadata import extract_metadata -from os.path import exists +from os.path import exists, isdir, join +from os import listdir + +def parseArgs(): + ''' + Function to parse arguments. + + Checks for any optional arguments passed to the program and activates + the corresponding flags. + ''' + from sys import argv + targeted = ['-h', '-r', '-s', '-v', argv[0]] + + return { + 'help': '-h' in argv, + 'replace': '-r' in argv, + 'short': '-s' in argv, + 'version': '-v' in argv, + 'files': [arg for arg in argv if arg not in targeted] + } def main(): ''' Main function for the "gulagcleaner" CLI command. - The "gulagcleaner" command takes an argument for the path of a PDF file and tries to remove the ads inside it. The new PDF is saved in the same folder. + The "gulagcleaner" command takes arguments for the path of one or more + files which can be PDF files or folders containing PDFs, and tries to + remove the ads inside of them. The new PDFs are saved in their original + location. Available CLI arguments: -h : Display help information. - -r : Replace the original file with the cleaned file. + -r : Replace original files with their cleaned version. + -s : Do not show metadata about cleaned files. -v : Display the version of the program. ''' - import sys + arguments = parseArgs() # Check for the -h argument - if '-h' in sys.argv: - print("Usage: gulagcleaner [-h] [-r] [-o] [-v] ") + if arguments["help"]: + print("Usage: gulagcleaner [-h] [-r] [-s] [-v] ...") print("") - print("Removes ads from a PDF file.") + print("Removes ads from PDF files.") print("") print("Positional arguments:") - print(" pdf_path The PDF file to clean.") + print(" pdf_path PDF file to clean.") print("") print("Optional arguments:") print(" -h Show this help message.") - print(" -r Replace the original file with the cleaned file.") + print(" -r Replace original files with their cleaned version.") + print(" -s Do not show metadata about cleaned files.") print(" -v Show the version of the program.") return # Check for the -v argument - if '-v' in sys.argv: - print("Current version: 0.7.0") + if arguments["version"]: + print("Current version: 0.8.0") return # Get the pdf_path argument - if len(sys.argv) < 2: - print('Usage: gulagcleaner [-h] [-r] [-v] ') - return - pdf_path = sys.argv[-1] - - # Check if the file exists - if not exists(pdf_path): - print("File not found.") + if len(arguments["files"]) == 0: + print('Usage: gulagcleaner [-h] [-r] [-s] [-v] ...') return - # Check if the -r argument is present - if '-r' in sys.argv: - output_path = pdf_path - else: - output_path = pdf_path[:-4] + "_clean.pdf" + replace = arguments["replace"] + short = arguments["short"] + + for element in arguments["files"]: + # Check if the file exists + if not exists(element): + print(element + " not found.") + continue + + # Check if file is a directory + if isdir(element): + # Add PDF files of directory to list of files to clean + arguments["files"] += [join(element, file) for file in listdir(element) + if file.endswith('.pdf') or isdir(join(element, file))] + continue - #We decrypt the PDF file - pdf_path = decrypt_pdf(pdf_path) + pdf_path = element + + if replace: + output_path = pdf_path + else: + output_path = pdf_path[:-4] + "_clean.pdf" + + # We decrypt the PDF file + pdf_path = decrypt_pdf(pdf_path) - #Extract metadata - try: - metadict = extract_metadata(pdf_path) - print("Metadata:") - print("Archivo: " + metadict["Archivo"]) - print("Autor: " + metadict["Autor"]) - print("Asignatura: " + metadict["Asignatura"]) - print("Curso y Grado: " + metadict["Curso y Grado"]) - print("Facultad: " + metadict["Facultad"]) - print("Universidad: " + metadict["Universidad"]) - except Exception as e: - print("Failed to extract metadata:", e) + # If short mode is not active, extract metadata + if not short: + try: + metadict = extract_metadata(pdf_path) + print("Metadata:") + print("Archivo: " + metadict["Archivo"]) + print("Autor: " + metadict["Autor"]) + print("Asignatura: " + metadict["Asignatura"]) + print("Curso y Grado: " + metadict["Curso y Grado"]) + print("Facultad: " + metadict["Facultad"]) + print("Universidad: " + metadict["Universidad"]) + except Exception as e: + print("Failed to extract metadata:", e) - # Call the cleaning function - return_msg = clean_pdf(pdf_path, output_path) + # Call the cleaning function + return_msg = clean_pdf(pdf_path, output_path) - if return_msg["Success"]: - print("Cleaning successful. File saved in", return_msg["return_path"]) - else: - print("Error:", return_msg["Error"]) + if return_msg["Success"]: + print(pdf_path + "cleaning successful. File saved in " + + return_msg["return_path"]) + else: + print("Error cleaning " + pdf_path + ": " + return_msg["Error"]) if __name__ == "__main__": print('Call from the "gulagcleaner" command.') diff --git a/setup.cfg b/setup.cfg index 119b092..c30ed28 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = gulagcleaner -version = 0.7.0 +version = 0.8.0 author = YM162 author_email = david.fontaneda16@gmail.com description = Ad removal tool for PDFs written in python. @@ -29,4 +29,4 @@ exclude = [options.entry_points] console_scripts = - gulagcleaner = gulagcleaner.command_line:main \ No newline at end of file + gulagcleaner = gulagcleaner.command_line:main