Actualizado el nombre de PDFU a gulagcleaner.

YM162 · Aug 22, 2022 · 7829514 · 7829514
1 parent 776868b
commit 7829514
Show file tree

Hide file tree

Showing 10 changed files with 39 additions and 29 deletions.
diff --git a/README.md b/README.md
@@ -1,16 +1,18 @@
-# PDFU
-PDF Unembedder: Functional inverse of functions that embbed pdf pages inside other documents. The most prominent example is the embedPages() function of PDF-lib.js</br>
+# Gulag-cleaner-cli
 
-This has the side efect of removing ads and watermarks placed by many websites.
+Herramienta de eliminación de anuncios en PDFs generados por la plataforma Wuolah.
+Es un inverso funcional de las funciones que insertan páginas pdf dentro de otros documentos. El ejemplo más prominente es la función embedPages() de la librería PDF-lib.js</br>
 
-# How to install:</br>
->pip install pdfu==0.2.1</br>
+Adicionalmente también es capaz de extraer los Metadatos (Autor, Asignatura, Universidad...) del archivo. Para más información consultar la descripción de la función.</br>
 
-# Usage</br>
+# Como instalar</br>
+>pip install gulagcleaner</br>
+
+# Uso</br>
 CLI:</br>
->pdfu \<filename\></br>
+>gulagcleaner \<filename\></br>
 
 Code:
->from PDFU.PDFU_Extract import deembed
+>from gulagcleaner.gulagcleaner_extract import deembed
 >
->return_msg = deembed( "file.pdf" )
+>return_msg = deembed( "file.pdf" )
diff --git a/dist/PDFU-0.2.1-py3-none-any.whl b/dist/PDFU-0.2.1-py3-none-any.whl
diff --git a/dist/PDFU-0.2.1.tar.gz b/dist/PDFU-0.2.1.tar.gz
diff --git a/dist/gulagcleaner-0.4.1-py3-none-any.whl b/dist/gulagcleaner-0.4.1-py3-none-any.whl
diff --git a/dist/gulagcleaner-0.4.1.tar.gz b/dist/gulagcleaner-0.4.1.tar.gz
diff --git a/PDFU/__init__.py → gulagcleaner/__init__.py b/PDFU/__init__.py → gulagcleaner/__init__.py
diff --git a/PDFU/__pycache__/PDFU_extract.cpython-39.pyc → ...r/__pycache__/PDFU_extract.cpython-39.pyc b/PDFU/__pycache__/PDFU_extract.cpython-39.pyc → ...r/__pycache__/PDFU_extract.cpython-39.pyc
diff --git a/PDFU/command_line.py → gulagcleaner/command_line.py b/PDFU/command_line.py → gulagcleaner/command_line.py
@@ -1,18 +1,18 @@
-from PDFU import PDFU_extract
+from gulagcleaner import gulagcleaner_extract
 from os.path import exists
 
 def main():
     '''
-    Main function called from the "pdfu" CLI command.
-    The pdfu command takes an argv for the path and tries to deembed the pages inside it.
+    Main function called from the "gulagcleaner" CLI command.
+    The gulagcleaner command takes an argv for the path and tries to deembed the pages inside it.
     The pages are saved in a new PDF in the same folder.
     '''
 
     import sys
     if len(sys.argv)>1:
         arg = sys.argv[1]
         if exists(arg):
-            return_msg=PDFU_extract.deembed(arg)
+            return_msg=gulagcleaner_extract.deembed(arg)
 
             if return_msg["Success"]:
                 print("Deembedding successful. File saved in",return_msg["return_path"])
@@ -29,7 +29,7 @@ def main():
         else:
             print("File not found.")
     else:
-        print('Usage: pdfu "filename"')
+        print('Usage: gulagcleaner "filename"')
 
 if __name__ == "__main__":
-    print('Call from the "pdfu" command.')
+    print('Call from the "gulagcleaner" command.')
diff --git a/PDFU/PDFU_extract.py → gulagcleaner/gulagcleaner_extract.py b/PDFU/PDFU_extract.py → gulagcleaner/gulagcleaner_extract.py
@@ -30,12 +30,20 @@ def deembed(pdf_path):
         pdf_path: The path where the pdf file is located.
         
     Returns:
-        return_msg: Dict. with four values:
-            Success: bool indicating whether the process was successful.
-            return_path: If successful, returns the path of the deembedded file.
-            Error: If unsuccessful, returns a description of the error.
-	        Meta: Dictionary with information about the file.
+        return_msg: (Dictionary):
+            Success: (bool) indicating whether the process was successful.
+            return_path: (string) If successful, returns the path of the deembedded file.
+            Error: (string) If unsuccessful, returns a description of the error.
+	        Meta: (dictionary) Information about the file:
+                Archivo (string)
+                Autor (string)
+                Asignatura (string)
+                Curso y Grado (string)
+                Facultad (string)
+                Universidad (string)
+
     '''
+
     print("Trying to Deembed:",pdf_path)
     return_msg={"Success":False,"return_path":"","Error":"","Meta":{}}
     try:
@@ -50,7 +58,7 @@ def deembed(pdf_path):
             metadict = meta(pdf_path)
             return_msg["Meta"]=metadict
         except:
-            print("Meta not extracted. Probably not a W file.")
+            print("Meta not extracted. Probably not a Wuolah file.")
 
         prepdf.save(pdf_path[:-4]+"_inter.pdf")
         prepdf.close()
@@ -82,6 +90,5 @@ def deembed(pdf_path):
 
 
 if __name__ == "__main__":
-    print('Call from the "pdfu" command.')
-    print(deembed("../tests/testpdf/AnonimoTema9.pdf"))
+    print('Call from the "gulagcleaner" command.')
 
diff --git a/setup.cfg b/setup.cfg
@@ -1,14 +1,14 @@
 [metadata]
-name = PDFU
-version = 0.2.1
+name = gulagcleaner
+version = 0.4.1
 author = YM162
 author_email = [email protected]
-description = PDF Unembedder for PDFPage objects
+description = Elimina los anuncios de Wuolah y extrae sus metadatos.
 long_description = file: README.md
 long_description_content_type = text/markdown
-url = https://github.com/YM162/PDFU
+url = https://github.com/YM162/gulag-cleaner-cli
 project_urls =
-    Bug Tracker = https://github.com/YM162/PDFU/issues
+    Bug Tracker = https://github.com/YM162/gulag-cleaner-cli/issues
 classifiers =
     Programming Language :: Python :: 3
     License :: OSI Approved :: MIT License
@@ -18,6 +18,7 @@ classifiers =
 install_requires =
     pdfrw>=0.4
     pikepdf>=5.1.2
+    pdfminer.six>=20220524
 
 packages = find:
 python_requires = >=3.6
@@ -28,4 +29,4 @@ exclude =
 
 [options.entry_points]
 console_scripts =
-    pdfu = PDFU.command_line:main
+    gulagcleaner = gulagcleaner.command_line:main