diff --git a/mailcom/parse.py b/mailcom/parse.py index 77546f7..4226978 100644 --- a/mailcom/parse.py +++ b/mailcom/parse.py @@ -2,7 +2,7 @@ import spacy as sp from transformers import pipeline from pathlib import Path -from mailcom import inout +from mailcom.inout import InoutHandler # please modify this section depending on your setup # input language - either "es" or "fr" @@ -116,13 +116,16 @@ def make_dir(path: str): print("Generating output directory/ies.") make_dir(path_output) # process the text - io = inout.InoutHandler() - eml_files = io.list_of_files(path_input) + io = InoutHandler(path_input) + io.list_of_files() # html_files = list_of_files(path_input, "html") - for file in eml_files: + for file in io.email_list: text = io.get_text(file) text = io.get_html_text(text) print(text) + print(io.email_content["date"]) + print(io.email_content["attachment"]) + print(io.email_content["attachement type"]) # skip this text if email could not be parsed if not text: continue