From 618c3f63f89b2086d83728642fb436e9264b022e Mon Sep 17 00:00:00 2001
From: jseg380 <jseg380@correo.ugr.es>
Date: Fri, 15 Sep 2023 11:10:39 +0200
Subject: [PATCH] Multiple files / folders feature added

---
 README.md                    |   7 +-
 gulagcleaner/command_line.py | 125 +++++++++++++++++++++++------------
 setup.cfg                    |   4 +-
 3 files changed, 87 insertions(+), 49 deletions(-)
diff --git a/README.md b/README.md
index 936bb7c..04a851f 100644
--- a/README.md
+++ b/README.md
@@ -32,10 +32,10 @@ Gulag Cleaner can be used through both a Command Line Interface (CLI) and in you
 
 ## Command Line Interface
 
-To use Gulag Cleaner through the CLI, simply run the following command, replacing `<filename>` with the name of your PDF file:
+To use Gulag Cleaner through the CLI, simply run the following command, replacing `<filename>` with the name of one or more PDF files or folders containing PDF:
 
 ```
-gulagcleaner [-r] [-h] [-v] <filename>
+gulagcleaner [-r] [-s] [-h] [-v] <filename>...
 ```
 
 ## Options
@@ -43,6 +43,7 @@ gulagcleaner [-r] [-h] [-v] <filename>
 Gulag Cleaner provides several options for its usage:
 
 > * '-r': Replace the original file with the cleaned version.
+> * '-s': Do not show metadata about cleaned files.
 > * '-h': Display the help message, providing information on how to use Gulag Cleaner.
 > * '-v': Display the current version of Gulag Cleaner.
 
@@ -60,4 +61,4 @@ return_msg = clean_pdf("file.pdf")
 Gulag Cleaner is distributed under the GPL-3 license, which means it's open-source and free to use.
 
 # Contributing
-We're always looking for ways to improve Gulag Cleaner, and we welcome contributions from the community. If you have ideas for improvements or bug fixes, please feel free to submit a pull request.
\ No newline at end of file
+We're always looking for ways to improve Gulag Cleaner, and we welcome contributions from the community. If you have ideas for improvements or bug fixes, please feel free to submit a pull request.
diff --git a/gulagcleaner/command_line.py b/gulagcleaner/command_line.py
index 250925d..682b5f9 100644
--- a/gulagcleaner/command_line.py
+++ b/gulagcleaner/command_line.py
@@ -1,82 +1,119 @@
 from gulagcleaner.extract import clean_pdf
 from gulagcleaner.decrypt import decrypt_pdf
 from gulagcleaner.metadata import extract_metadata
-from os.path import exists
+from os.path import exists, isdir, join
+from os import listdir
+
+def parseArgs():
+    '''
+    Function to parse arguments.
+
+    Checks for any optional arguments passed to the program and activates
+    the corresponding flags.
+    '''
+    from sys import argv
+    targeted = ['-h', '-r', '-s', '-v', argv[0]]
+
+    return {
+        'help': '-h' in argv,
+        'replace': '-r' in argv,
+        'short': '-s' in argv,
+        'version': '-v' in argv,
+        'files': [arg for arg in argv if arg not in targeted]
+    }
 
 def main():
     '''
     Main function for the "gulagcleaner" CLI command.
 
-    The "gulagcleaner" command takes an argument for the path of a PDF file and tries to remove the ads inside it. The new PDF is saved in the same folder.
+    The "gulagcleaner" command takes arguments for the path of one or more 
+    files which can be PDF files or folders containing PDFs, and tries to 
+    remove the ads inside of them. The new PDFs are saved in their original 
+    location.
     
     Available CLI arguments:
     -h : Display help information.
-    -r : Replace the original file with the cleaned file.
+    -r : Replace original files with their cleaned version.
+    -s : Do not show metadata about cleaned files.
     -v : Display the version of the program.
 
     '''
-    import sys
+    arguments = parseArgs()
 
     # Check for the -h argument
-    if '-h' in sys.argv:
-        print("Usage: gulagcleaner [-h] [-r] [-o] [-v] <pdf_path>")
+    if arguments["help"]:
+        print("Usage: gulagcleaner [-h] [-r] [-s] [-v] <pdf_path>...")
         print("")
-        print("Removes ads from a PDF file.")
+        print("Removes ads from PDF files.")
         print("")
         print("Positional arguments:")
-        print("  pdf_path      The PDF file to clean.")
+        print("  pdf_path      PDF file to clean.")
         print("")
         print("Optional arguments:")
         print("  -h            Show this help message.")
-        print("  -r            Replace the original file with the cleaned file.")
+        print("  -r            Replace original files with their cleaned version.")
+        print("  -s            Do not show metadata about cleaned files.")
         print("  -v            Show the version of the program.")
         return
 
     # Check for the -v argument
-    if '-v' in sys.argv:
-        print("Current version: 0.7.0")
+    if arguments["version"]:
+        print("Current version: 0.8.0")
         return
 
     # Get the pdf_path argument
-    if len(sys.argv) < 2:
-        print('Usage: gulagcleaner [-h] [-r] [-v] <pdf_path>')
-        return
-    pdf_path = sys.argv[-1]
-
-    # Check if the file exists
-    if not exists(pdf_path):
-        print("File not found.")
+    if len(arguments["files"]) == 0:
+        print('Usage: gulagcleaner [-h] [-r] [-s] [-v] <pdf_path>...')
         return
     
-    # Check if the -r argument is present
-    if '-r' in sys.argv:
-        output_path = pdf_path
-    else:
-        output_path = pdf_path[:-4] + "_clean.pdf"
+    replace = arguments["replace"]
+    short = arguments["short"]
+    
+    for element in arguments["files"]:
+        # Check if the file exists
+        if not exists(element):
+            print(element + " not found.")
+            continue
+        
+        # Check if file is a directory
+        if isdir(element):
+            # Add PDF files of directory to list of files to clean
+            arguments["files"] += [join(element, file) for file in listdir(element) 
+                                   if file.endswith('.pdf') or isdir(join(element, file))]
+            continue
 
-    #We decrypt the PDF file
-    pdf_path = decrypt_pdf(pdf_path)
+        pdf_path = element
+        
+        if replace:
+            output_path = pdf_path
+        else:
+            output_path = pdf_path[:-4] + "_clean.pdf"
+        
+        # We decrypt the PDF file
+        pdf_path = decrypt_pdf(pdf_path)
 
-    #Extract metadata
-    try:
-        metadict = extract_metadata(pdf_path)
-        print("Metadata:")
-        print("Archivo: " + metadict["Archivo"])
-        print("Autor: " + metadict["Autor"])
-        print("Asignatura: " + metadict["Asignatura"])
-        print("Curso y Grado: " + metadict["Curso y Grado"])
-        print("Facultad: " + metadict["Facultad"])
-        print("Universidad: " + metadict["Universidad"])
-    except Exception as e:
-        print("Failed to extract metadata:", e)         
+        # If short mode is not active, extract metadata
+        if not short:
+            try:
+                metadict = extract_metadata(pdf_path)
+                print("Metadata:")
+                print("Archivo: " + metadict["Archivo"])
+                print("Autor: " + metadict["Autor"])
+                print("Asignatura: " + metadict["Asignatura"])
+                print("Curso y Grado: " + metadict["Curso y Grado"])
+                print("Facultad: " + metadict["Facultad"])
+                print("Universidad: " + metadict["Universidad"])
+            except Exception as e:
+                print("Failed to extract metadata:", e)
 
-    # Call the cleaning function
-    return_msg = clean_pdf(pdf_path, output_path)
+        # Call the cleaning function
+        return_msg = clean_pdf(pdf_path, output_path)
 
-    if return_msg["Success"]:
-        print("Cleaning successful. File saved in", return_msg["return_path"])
-    else:
-        print("Error:", return_msg["Error"])
+        if return_msg["Success"]:
+            print(pdf_path + "cleaning successful. File saved in " + 
+                  return_msg["return_path"])
+        else:
+            print("Error cleaning " + pdf_path + ": " + return_msg["Error"])
 
 if __name__ == "__main__":
     print('Call from the "gulagcleaner" command.')
diff --git a/setup.cfg b/setup.cfg
index 119b092..c30ed28 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,6 +1,6 @@
 [metadata]
 name = gulagcleaner
-version = 0.7.0
+version = 0.8.0
 author = YM162
 author_email = david.fontaneda16@gmail.com
 description = Ad removal tool for PDFs written in python.
@@ -29,4 +29,4 @@ exclude =
 
 [options.entry_points]
 console_scripts =
-    gulagcleaner = gulagcleaner.command_line:main
\ No newline at end of file
+    gulagcleaner = gulagcleaner.command_line:main