diff --git a/README.md b/README.md index d86e5d7..9873f35 100644 --- a/README.md +++ b/README.md @@ -1,16 +1,18 @@ -# PDFU -PDF Unembedder: Functional inverse of functions that embbed pdf pages inside other documents. The most prominent example is the embedPages() function of PDF-lib.js
+# Gulag-cleaner-cli -This has the side efect of removing ads and watermarks placed by many websites. +Herramienta de eliminación de anuncios en PDFs generados por la plataforma Wuolah. +Es un inverso funcional de las funciones que insertan páginas pdf dentro de otros documentos. El ejemplo más prominente es la función embedPages() de la librería PDF-lib.js
-# How to install:
->pip install pdfu==0.2.1
+Adicionalmente también es capaz de extraer los Metadatos (Autor, Asignatura, Universidad...) del archivo. Para más información consultar la descripción de la función.
-# Usage
+# Como instalar
+>pip install gulagcleaner
+ +# Uso
CLI:
->pdfu \
+>gulagcleaner \
Code: ->from PDFU.PDFU_Extract import deembed +>from gulagcleaner.gulagcleaner_extract import deembed > ->return_msg = deembed( "file.pdf" ) +>return_msg = deembed( "file.pdf" ) \ No newline at end of file diff --git a/dist/PDFU-0.2.1-py3-none-any.whl b/dist/PDFU-0.2.1-py3-none-any.whl deleted file mode 100644 index 826b6b8..0000000 Binary files a/dist/PDFU-0.2.1-py3-none-any.whl and /dev/null differ diff --git a/dist/PDFU-0.2.1.tar.gz b/dist/PDFU-0.2.1.tar.gz deleted file mode 100644 index e9747bc..0000000 Binary files a/dist/PDFU-0.2.1.tar.gz and /dev/null differ diff --git a/dist/gulagcleaner-0.4.1-py3-none-any.whl b/dist/gulagcleaner-0.4.1-py3-none-any.whl new file mode 100644 index 0000000..5101abf Binary files /dev/null and b/dist/gulagcleaner-0.4.1-py3-none-any.whl differ diff --git a/dist/gulagcleaner-0.4.1.tar.gz b/dist/gulagcleaner-0.4.1.tar.gz new file mode 100644 index 0000000..a7fc539 Binary files /dev/null and b/dist/gulagcleaner-0.4.1.tar.gz differ diff --git a/PDFU/__init__.py b/gulagcleaner/__init__.py similarity index 100% rename from PDFU/__init__.py rename to gulagcleaner/__init__.py diff --git a/PDFU/__pycache__/PDFU_extract.cpython-39.pyc b/gulagcleaner/__pycache__/PDFU_extract.cpython-39.pyc similarity index 100% rename from PDFU/__pycache__/PDFU_extract.cpython-39.pyc rename to gulagcleaner/__pycache__/PDFU_extract.cpython-39.pyc diff --git a/PDFU/command_line.py b/gulagcleaner/command_line.py similarity index 72% rename from PDFU/command_line.py rename to gulagcleaner/command_line.py index f3e6fa1..f77aed5 100644 --- a/PDFU/command_line.py +++ b/gulagcleaner/command_line.py @@ -1,10 +1,10 @@ -from PDFU import PDFU_extract +from gulagcleaner import gulagcleaner_extract from os.path import exists def main(): ''' - Main function called from the "pdfu" CLI command. - The pdfu command takes an argv for the path and tries to deembed the pages inside it. + Main function called from the "gulagcleaner" CLI command. + The gulagcleaner command takes an argv for the path and tries to deembed the pages inside it. The pages are saved in a new PDF in the same folder. ''' @@ -12,7 +12,7 @@ def main(): if len(sys.argv)>1: arg = sys.argv[1] if exists(arg): - return_msg=PDFU_extract.deembed(arg) + return_msg=gulagcleaner_extract.deembed(arg) if return_msg["Success"]: print("Deembedding successful. File saved in",return_msg["return_path"]) @@ -29,7 +29,7 @@ def main(): else: print("File not found.") else: - print('Usage: pdfu "filename"') + print('Usage: gulagcleaner "filename"') if __name__ == "__main__": - print('Call from the "pdfu" command.') \ No newline at end of file + print('Call from the "gulagcleaner" command.') \ No newline at end of file diff --git a/PDFU/PDFU_extract.py b/gulagcleaner/gulagcleaner_extract.py similarity index 77% rename from PDFU/PDFU_extract.py rename to gulagcleaner/gulagcleaner_extract.py index d74f650..333e04a 100644 --- a/PDFU/PDFU_extract.py +++ b/gulagcleaner/gulagcleaner_extract.py @@ -30,12 +30,20 @@ def deembed(pdf_path): pdf_path: The path where the pdf file is located. Returns: - return_msg: Dict. with four values: - Success: bool indicating whether the process was successful. - return_path: If successful, returns the path of the deembedded file. - Error: If unsuccessful, returns a description of the error. - Meta: Dictionary with information about the file. + return_msg: (Dictionary): + Success: (bool) indicating whether the process was successful. + return_path: (string) If successful, returns the path of the deembedded file. + Error: (string) If unsuccessful, returns a description of the error. + Meta: (dictionary) Information about the file: + Archivo (string) + Autor (string) + Asignatura (string) + Curso y Grado (string) + Facultad (string) + Universidad (string) + ''' + print("Trying to Deembed:",pdf_path) return_msg={"Success":False,"return_path":"","Error":"","Meta":{}} try: @@ -50,7 +58,7 @@ def deembed(pdf_path): metadict = meta(pdf_path) return_msg["Meta"]=metadict except: - print("Meta not extracted. Probably not a W file.") + print("Meta not extracted. Probably not a Wuolah file.") prepdf.save(pdf_path[:-4]+"_inter.pdf") prepdf.close() @@ -82,6 +90,5 @@ def deembed(pdf_path): if __name__ == "__main__": - print('Call from the "pdfu" command.') - print(deembed("../tests/testpdf/AnonimoTema9.pdf")) + print('Call from the "gulagcleaner" command.') \ No newline at end of file diff --git a/setup.cfg b/setup.cfg index 876af39..b9a0bd7 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,14 +1,14 @@ [metadata] -name = PDFU -version = 0.2.1 +name = gulagcleaner +version = 0.4.1 author = YM162 author_email = david.fontaneda@estudiante.uam.es -description = PDF Unembedder for PDFPage objects +description = Elimina los anuncios de Wuolah y extrae sus metadatos. long_description = file: README.md long_description_content_type = text/markdown -url = https://github.com/YM162/PDFU +url = https://github.com/YM162/gulag-cleaner-cli project_urls = - Bug Tracker = https://github.com/YM162/PDFU/issues + Bug Tracker = https://github.com/YM162/gulag-cleaner-cli/issues classifiers = Programming Language :: Python :: 3 License :: OSI Approved :: MIT License @@ -18,6 +18,7 @@ classifiers = install_requires = pdfrw>=0.4 pikepdf>=5.1.2 + pdfminer.six>=20220524 packages = find: python_requires = >=3.6 @@ -28,4 +29,4 @@ exclude = [options.entry_points] console_scripts = - pdfu = PDFU.command_line:main \ No newline at end of file + gulagcleaner = gulagcleaner.command_line:main \ No newline at end of file