-
-
Notifications
You must be signed in to change notification settings - Fork 10
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #6 from jseg380/development
Multiple files / folders feature added + New argument parsing
- Loading branch information
Showing
3 changed files
with
87 additions
and
49 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,82 +1,119 @@ | ||
from gulagcleaner.extract import clean_pdf | ||
from gulagcleaner.decrypt import decrypt_pdf | ||
from gulagcleaner.metadata import extract_metadata | ||
from os.path import exists | ||
from os.path import exists, isdir, join | ||
from os import listdir | ||
|
||
def parseArgs(): | ||
''' | ||
Function to parse arguments. | ||
Checks for any optional arguments passed to the program and activates | ||
the corresponding flags. | ||
''' | ||
from sys import argv | ||
targeted = ['-h', '-r', '-s', '-v', argv[0]] | ||
|
||
return { | ||
'help': '-h' in argv, | ||
'replace': '-r' in argv, | ||
'short': '-s' in argv, | ||
'version': '-v' in argv, | ||
'files': [arg for arg in argv if arg not in targeted] | ||
} | ||
|
||
def main(): | ||
''' | ||
Main function for the "gulagcleaner" CLI command. | ||
The "gulagcleaner" command takes an argument for the path of a PDF file and tries to remove the ads inside it. The new PDF is saved in the same folder. | ||
The "gulagcleaner" command takes arguments for the path of one or more | ||
files which can be PDF files or folders containing PDFs, and tries to | ||
remove the ads inside of them. The new PDFs are saved in their original | ||
location. | ||
Available CLI arguments: | ||
-h : Display help information. | ||
-r : Replace the original file with the cleaned file. | ||
-r : Replace original files with their cleaned version. | ||
-s : Do not show metadata about cleaned files. | ||
-v : Display the version of the program. | ||
''' | ||
import sys | ||
arguments = parseArgs() | ||
|
||
# Check for the -h argument | ||
if '-h' in sys.argv: | ||
print("Usage: gulagcleaner [-h] [-r] [-o] [-v] <pdf_path>") | ||
if arguments["help"]: | ||
print("Usage: gulagcleaner [-h] [-r] [-s] [-v] <pdf_path>...") | ||
print("") | ||
print("Removes ads from a PDF file.") | ||
print("Removes ads from PDF files.") | ||
print("") | ||
print("Positional arguments:") | ||
print(" pdf_path The PDF file to clean.") | ||
print(" pdf_path PDF file to clean.") | ||
print("") | ||
print("Optional arguments:") | ||
print(" -h Show this help message.") | ||
print(" -r Replace the original file with the cleaned file.") | ||
print(" -r Replace original files with their cleaned version.") | ||
print(" -s Do not show metadata about cleaned files.") | ||
print(" -v Show the version of the program.") | ||
return | ||
|
||
# Check for the -v argument | ||
if '-v' in sys.argv: | ||
print("Current version: 0.7.0") | ||
if arguments["version"]: | ||
print("Current version: 0.8.0") | ||
return | ||
|
||
# Get the pdf_path argument | ||
if len(sys.argv) < 2: | ||
print('Usage: gulagcleaner [-h] [-r] [-v] <pdf_path>') | ||
return | ||
pdf_path = sys.argv[-1] | ||
|
||
# Check if the file exists | ||
if not exists(pdf_path): | ||
print("File not found.") | ||
if len(arguments["files"]) == 0: | ||
print('Usage: gulagcleaner [-h] [-r] [-s] [-v] <pdf_path>...') | ||
return | ||
|
||
# Check if the -r argument is present | ||
if '-r' in sys.argv: | ||
output_path = pdf_path | ||
else: | ||
output_path = pdf_path[:-4] + "_clean.pdf" | ||
replace = arguments["replace"] | ||
short = arguments["short"] | ||
|
||
for element in arguments["files"]: | ||
# Check if the file exists | ||
if not exists(element): | ||
print(element + " not found.") | ||
continue | ||
|
||
# Check if file is a directory | ||
if isdir(element): | ||
# Add PDF files of directory to list of files to clean | ||
arguments["files"] += [join(element, file) for file in listdir(element) | ||
if file.endswith('.pdf') or isdir(join(element, file))] | ||
continue | ||
|
||
#We decrypt the PDF file | ||
pdf_path = decrypt_pdf(pdf_path) | ||
pdf_path = element | ||
|
||
if replace: | ||
output_path = pdf_path | ||
else: | ||
output_path = pdf_path[:-4] + "_clean.pdf" | ||
|
||
# We decrypt the PDF file | ||
pdf_path = decrypt_pdf(pdf_path) | ||
|
||
#Extract metadata | ||
try: | ||
metadict = extract_metadata(pdf_path) | ||
print("Metadata:") | ||
print("Archivo: " + metadict["Archivo"]) | ||
print("Autor: " + metadict["Autor"]) | ||
print("Asignatura: " + metadict["Asignatura"]) | ||
print("Curso y Grado: " + metadict["Curso y Grado"]) | ||
print("Facultad: " + metadict["Facultad"]) | ||
print("Universidad: " + metadict["Universidad"]) | ||
except Exception as e: | ||
print("Failed to extract metadata:", e) | ||
# If short mode is not active, extract metadata | ||
if not short: | ||
try: | ||
metadict = extract_metadata(pdf_path) | ||
print("Metadata:") | ||
print("Archivo: " + metadict["Archivo"]) | ||
print("Autor: " + metadict["Autor"]) | ||
print("Asignatura: " + metadict["Asignatura"]) | ||
print("Curso y Grado: " + metadict["Curso y Grado"]) | ||
print("Facultad: " + metadict["Facultad"]) | ||
print("Universidad: " + metadict["Universidad"]) | ||
except Exception as e: | ||
print("Failed to extract metadata:", e) | ||
|
||
# Call the cleaning function | ||
return_msg = clean_pdf(pdf_path, output_path) | ||
# Call the cleaning function | ||
return_msg = clean_pdf(pdf_path, output_path) | ||
|
||
if return_msg["Success"]: | ||
print("Cleaning successful. File saved in", return_msg["return_path"]) | ||
else: | ||
print("Error:", return_msg["Error"]) | ||
if return_msg["Success"]: | ||
print(pdf_path + "cleaning successful. File saved in " + | ||
return_msg["return_path"]) | ||
else: | ||
print("Error cleaning " + pdf_path + ": " + return_msg["Error"]) | ||
|
||
if __name__ == "__main__": | ||
print('Call from the "gulagcleaner" command.') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,6 @@ | ||
[metadata] | ||
name = gulagcleaner | ||
version = 0.7.0 | ||
version = 0.8.0 | ||
author = YM162 | ||
author_email = [email protected] | ||
description = Ad removal tool for PDFs written in python. | ||
|
@@ -29,4 +29,4 @@ exclude = | |
|
||
[options.entry_points] | ||
console_scripts = | ||
gulagcleaner = gulagcleaner.command_line:main | ||
gulagcleaner = gulagcleaner.command_line:main |