From fe2c78fe53f9d45c2e742971e371b032d264968e Mon Sep 17 00:00:00 2001 From: slashtechno <77907286+slashtechno@users.noreply.github.com> Date: Fri, 17 May 2024 18:56:51 -0500 Subject: [PATCH] feat: Add regex validation for email subject --- llmail/__main__.py | 37 ++++++++++++++++++++++++++----------- llmail/utils/cli_args.py | 7 ++++--- 2 files changed, 30 insertions(+), 14 deletions(-) diff --git a/llmail/__main__.py b/llmail/__main__.py index cf26408..84c191c 100644 --- a/llmail/__main__.py +++ b/llmail/__main__.py @@ -11,6 +11,7 @@ from email.utils import getaddresses, parsedate_to_datetime, make_msgid from datetime import timezone import time +import re from llmail.utils.cli_args import argparser @@ -71,7 +72,6 @@ def __repr__(self): email_threads = {} - def main(): """Main entry point for the script.""" global args @@ -79,7 +79,6 @@ def main(): global email_threads args = argparser.parse_args() - match args.subcommand: case "list-folders": with IMAPClient(args.imap_host) as client: @@ -97,7 +96,7 @@ def main(): logger.info(f"Watching for new emails every {args.watch_interval} seconds") while True: fetch_and_process_emails( - subject=args.subject_key, + look_for_subject=args.subject_key, alias=args.alias, system_prompt=args.system_prompt, ) @@ -106,15 +105,16 @@ def main(): email_threads = {} else: fetch_and_process_emails( - subject=args.subject_key, + look_for_subject=args.subject_key, alias=args.alias, system_prompt=args.system_prompt, ) + def fetch_and_process_emails( - subject: str, - alias: str = None, - system_prompt: str = None, + look_for_subject: str, + alias: str = None, + system_prompt: str = None, ): """Fetch and process emails from the IMAP server.""" global email_threads @@ -135,12 +135,23 @@ def fetch_and_process_emails( logger.debug(f"Failed to select folder {folder[2]}. Skipping...") continue # Might be smart to also search for forwarded emails - messages = client.search(["OR", "SUBJECT", subject, "SUBJECT", f"Re: {subject}"]) + messages = client.search( + ["OR", "SUBJECT", look_for_subject, "SUBJECT", f"Re: {look_for_subject}"] + ) for msg_id in messages: # TODO: It seems this will throw a KeyError if an email is sent while this for loop is running. May have been fixed by emptying email_threads at the end of the while loop? This should be tested again to confirm msg_data = client.fetch([msg_id], ["ENVELOPE", "BODY[]", "RFC822.HEADER"]) envelope = msg_data[msg_id][b"ENVELOPE"] subject = envelope.subject.decode() + # Use regex to verify that the subject optionally starts with "Fwd: " or "Re: " and then the intended subject (nothing case-sensitive) + # re.escape is used to escape any special characters in the subject + if not re.match( + r"^(Fwd: ?|Re: ?)?" + re.escape(look_for_subject) + r"$", subject, re.IGNORECASE + ): + logger.info( + f"Skipping email with subject '{subject}' as it does not match the intended subject" + ) + continue timestamp = envelope.date # Parse the headers from the email data message = message_from_bytes(msg_data[msg_id][b"RFC822.HEADER"]) @@ -218,7 +229,9 @@ def fetch_and_process_emails( msg_id = email_thread.initial_email.imap_id references_ids = email_thread.initial_email.references elif len(email_thread.replies) > 0 and email_thread.replies[-1].sender != bot_email: - logger.debug(f"Last email in thread for email {message_id} is from {email_thread.replies[-1].sender}") + logger.debug( + f"Last email in thread for email {message_id} is from {email_thread.replies[-1].sender}" + ) message_id = email_thread.replies[-1].message_id msg_id = email_thread.replies[-1].imap_id references_ids = email_thread.replies[-1].references @@ -424,7 +437,7 @@ def send_reply( # Set roles deletes the sender key so we need to store the sender before calling it sender = thread[-1]["sender"] thread = set_roles(thread) - if system_prompt: + if system_prompt: thread.insert(0, {"role": "system", "content": system_prompt}) references_ids.append(message_id) # thread_from_msg_id = get_thread_history(client, msg_id) @@ -453,7 +466,9 @@ def send_reply( subject=f"Re: {subject}", headers={"In-Reply-To": message_id, "References": " ".join(references_ids)}, contents=generated_response, - message_id=make_msgid(domain=args.message_id_domain if args.message_id_domain else "llmail"), + message_id=make_msgid( + domain=args.message_id_domain if args.message_id_domain else "llmail" + ), ) diff --git a/llmail/utils/cli_args.py b/llmail/utils/cli_args.py index 7a48af9..4cf9644 100644 --- a/llmail/utils/cli_args.py +++ b/llmail/utils/cli_args.py @@ -15,7 +15,6 @@ """ - def set_argparse(): global argparser @@ -36,7 +35,7 @@ def set_argparse(): # Dest means that the current subcommand can be accessed via args.subcommand dest="subcommand", title="Subcommands", - ) + ) # Subcommand: list-folders _ = subparsers.add_parser("list-folders", help="List all folders in the IMAP account and exit") # General arguments @@ -66,7 +65,9 @@ def set_argparse(): ai_api.add_argument( "--openai-model", help="Model to use for the LLM", - default=os.getenv("OPENAI_MODEL") if os.getenv("OPENAI_MODEL") else "mistralai/mistral-7b-instruct:free", + default=os.getenv("OPENAI_MODEL") + if os.getenv("OPENAI_MODEL") + else "mistralai/mistral-7b-instruct:free", ) ai_api.add_argument( "--system-prompt",