From 734a4855694768cf179526b4dcb78c20036e0899 Mon Sep 17 00:00:00 2001 From: Michael Bideau Date: Sun, 7 May 2023 11:05:25 +0200 Subject: [PATCH] feat: support email in name fields (single and double) --- .../expected/Double Parenthesesemail.vcard | 6 ++ .../Email in Name/expected/Second Email.vcard | 6 ++ .../Email in Name/src/Double_Email.v3.0.vcf | 1 + .../src/Double_ParenthesesEmail.v3.0.vcf | 1 + .../Empty Name/expected/Empty Name.vcard | 6 ++ test/cases/Empty Name/expected/No Name.vcard | 6 ++ test/cases/Empty Name/src/Empty_Name.v3.0.vcf | 1 + test/cases/Empty Name/src/No_Name.v3.0.vcf | 1 + test/ressources/src/Double_Email.v3.0.vcf | 6 ++ .../src/Double_ParenthesesEmail.v3.0.vcf | 6 ++ test/ressources/src/Empty_Name.v3.0.vcf | 6 ++ test/ressources/src/No_Name.v3.0.vcf | 6 ++ vcardlib.py | 97 +++++++++++++++++-- 13 files changed, 143 insertions(+), 6 deletions(-) create mode 100644 test/cases/Email in Name/expected/Double Parenthesesemail.vcard create mode 100644 test/cases/Email in Name/expected/Second Email.vcard create mode 120000 test/cases/Email in Name/src/Double_Email.v3.0.vcf create mode 120000 test/cases/Email in Name/src/Double_ParenthesesEmail.v3.0.vcf create mode 100644 test/cases/Empty Name/expected/Empty Name.vcard create mode 100644 test/cases/Empty Name/expected/No Name.vcard create mode 120000 test/cases/Empty Name/src/Empty_Name.v3.0.vcf create mode 120000 test/cases/Empty Name/src/No_Name.v3.0.vcf create mode 100644 test/ressources/src/Double_Email.v3.0.vcf create mode 100644 test/ressources/src/Double_ParenthesesEmail.v3.0.vcf create mode 100644 test/ressources/src/Empty_Name.v3.0.vcf create mode 100644 test/ressources/src/No_Name.v3.0.vcf diff --git a/test/cases/Email in Name/expected/Double Parenthesesemail.vcard b/test/cases/Email in Name/expected/Double Parenthesesemail.vcard new file mode 100644 index 0000000..f444371 --- /dev/null +++ b/test/cases/Email in Name/expected/Double Parenthesesemail.vcard @@ -0,0 +1,6 @@ +BEGIN:VCARD +VERSION:3.0 +EMAIL:double.parentheses_email@example.com +FN:Double Parenthesesemail +N:Parenthesesemail;Double;;; +END:VCARD diff --git a/test/cases/Email in Name/expected/Second Email.vcard b/test/cases/Email in Name/expected/Second Email.vcard new file mode 100644 index 0000000..4463011 --- /dev/null +++ b/test/cases/Email in Name/expected/Second Email.vcard @@ -0,0 +1,6 @@ +BEGIN:VCARD +VERSION:3.0 +EMAIL:double.email@example.com +FN:Second Email +N:Email;Second;;; +END:VCARD diff --git a/test/cases/Email in Name/src/Double_Email.v3.0.vcf b/test/cases/Email in Name/src/Double_Email.v3.0.vcf new file mode 120000 index 0000000..72a996f --- /dev/null +++ b/test/cases/Email in Name/src/Double_Email.v3.0.vcf @@ -0,0 +1 @@ +../../../ressources/src/Double_Email.v3.0.vcf \ No newline at end of file diff --git a/test/cases/Email in Name/src/Double_ParenthesesEmail.v3.0.vcf b/test/cases/Email in Name/src/Double_ParenthesesEmail.v3.0.vcf new file mode 120000 index 0000000..09d1610 --- /dev/null +++ b/test/cases/Email in Name/src/Double_ParenthesesEmail.v3.0.vcf @@ -0,0 +1 @@ +../../../ressources/src/Double_ParenthesesEmail.v3.0.vcf \ No newline at end of file diff --git a/test/cases/Empty Name/expected/Empty Name.vcard b/test/cases/Empty Name/expected/Empty Name.vcard new file mode 100644 index 0000000..0b3437d --- /dev/null +++ b/test/cases/Empty Name/expected/Empty Name.vcard @@ -0,0 +1,6 @@ +BEGIN:VCARD +VERSION:3.0 +EMAIL:empty.name@example.com +FN:Empty Name +N:Name;Empty;;; +END:VCARD diff --git a/test/cases/Empty Name/expected/No Name.vcard b/test/cases/Empty Name/expected/No Name.vcard new file mode 100644 index 0000000..28f2d30 --- /dev/null +++ b/test/cases/Empty Name/expected/No Name.vcard @@ -0,0 +1,6 @@ +BEGIN:VCARD +VERSION:3.0 +EMAIL:no.name@example.com +FN:No Name +N:Name;No;;; +END:VCARD diff --git a/test/cases/Empty Name/src/Empty_Name.v3.0.vcf b/test/cases/Empty Name/src/Empty_Name.v3.0.vcf new file mode 120000 index 0000000..1343394 --- /dev/null +++ b/test/cases/Empty Name/src/Empty_Name.v3.0.vcf @@ -0,0 +1 @@ +../../../ressources/src/Empty_Name.v3.0.vcf \ No newline at end of file diff --git a/test/cases/Empty Name/src/No_Name.v3.0.vcf b/test/cases/Empty Name/src/No_Name.v3.0.vcf new file mode 120000 index 0000000..1bb7fb8 --- /dev/null +++ b/test/cases/Empty Name/src/No_Name.v3.0.vcf @@ -0,0 +1 @@ +../../../ressources/src/No_Name.v3.0.vcf \ No newline at end of file diff --git a/test/ressources/src/Double_Email.v3.0.vcf b/test/ressources/src/Double_Email.v3.0.vcf new file mode 100644 index 0000000..10eff0c --- /dev/null +++ b/test/ressources/src/Double_Email.v3.0.vcf @@ -0,0 +1,6 @@ +BEGIN:VCARD +VERSION:3.0 +N:Email;Double;;; +FN:first.email@example.com +EMAIL:double.email@example.com +END:VCARD diff --git a/test/ressources/src/Double_ParenthesesEmail.v3.0.vcf b/test/ressources/src/Double_ParenthesesEmail.v3.0.vcf new file mode 100644 index 0000000..a56b74b --- /dev/null +++ b/test/ressources/src/Double_ParenthesesEmail.v3.0.vcf @@ -0,0 +1,6 @@ +BEGIN:VCARD +VERSION:3.0 +N:ParenthesesEmail;Double;;; +FN:first.email@example.com (second.email@example.com) +EMAIL:double.parentheses_email@example.com +END:VCARD diff --git a/test/ressources/src/Empty_Name.v3.0.vcf b/test/ressources/src/Empty_Name.v3.0.vcf new file mode 100644 index 0000000..96e65dc --- /dev/null +++ b/test/ressources/src/Empty_Name.v3.0.vcf @@ -0,0 +1,6 @@ +BEGIN:VCARD +VERSION:3.0 +N:Name;Empty;;; +FN: +EMAIL:empty.name@example.com +END:VCARD diff --git a/test/ressources/src/No_Name.v3.0.vcf b/test/ressources/src/No_Name.v3.0.vcf new file mode 100644 index 0000000..2d07c16 --- /dev/null +++ b/test/ressources/src/No_Name.v3.0.vcf @@ -0,0 +1,6 @@ +BEGIN:VCARD +VERSION:3.0 +N:;;;; +FN: +EMAIL:no.name@example.com +END:VCARD diff --git a/vcardlib.py b/vcardlib.py index 5a71233..ae8195b 100644 --- a/vcardlib.py +++ b/vcardlib.py @@ -17,6 +17,7 @@ import warnings import binascii from os.path import exists, basename +from email.utils import parseaddr # @see: https://eventable.github.io/vobject/ from vobject import vCard, readComponents from vobject.vcard import Name @@ -48,6 +49,15 @@ REGEX_ANY_NUMBER = re.compile('[0-9]') REGEX_WITHOUT_EXTENSION = re.compile('(.+)\\.[a-zA-Z]+$') REGEX_NAME_IN_EMAIL = re.compile('^ *"(?P[^"]+)" *<[^>]+> *$') +REGEX_EMAIL_SURROUNDINGS = [ + re.compile( + '(?P\\s*)\\[(?P[^@]]+)@(?P[^]]+)\\](?P\\s*)'), + re.compile( + '(?P\\s*)\\((?P[^@)]+)@(?P[^)]+)\\)(?P\\s*)'), + re.compile( + '(?P\\s*)"(?P[^@"]+)@(?P[^"]+)"(?P\\s*)'), + re.compile( + '(?P\\s*)\'(?P[^@\']+)@(?P[^\']+)\'(?P\\s*)')] REGEX_EMAIL_WITH_NAME = re.compile('^ *"[^"]+" *<(?P[^>]+)> *$') REGEX_INVALID_MAIL = re.compile('^nobody[a-z0-9]*@nowhere.invalid$') REGEX_ONY_NON_ALPHANUM = re.compile('^[ ]*[^\\w]*[ ]*$') @@ -531,12 +541,87 @@ def collect_vcard_names(vcard): # pylint: disable=too-many-statements,too-many- for attr_n in getattr(vcard, name_key + '_list'): value = close_parentheses_or_braces(str(attr_n.value).strip()) if not REGEX_ONY_NON_ALPHANUM.match(value): - if value.count('@') == 1: - name = build_name_from_email(value) - if not name in available_names: - available_names.append(name) - logging.debug("\t\tadding '%s' from built email for '%s'", - name, name_key) + if '@' in value: + normalized_value = value.strip() + for regex in REGEX_EMAIL_SURROUNDINGS: + normalized_value = regex.sub( + '\\g<\\g@\\g>\\g', + normalized_value).replace('<<', '<').replace('>>', '>') + logging.debug("\t\tnormalized value '%s' from '%s'", + normalized_value, value) + realname, email = parseaddr(normalized_value) + logging.debug("\t\tparsed email: '%s', '%s' from '%s'", + realname, email, normalized_value) + if realname or email: + + # Special case to be able to process value with double email like: + # "1st.email@example.com <2nd.email@example.com>" + # In this case, the parsing will return an empty name, and the 1st + # email as email, ignoring the second one. + # In order to force the processing of the second email, we remove + # the 1st email from the original value and re-parse to get the 2nd + # one. + if not realname and email and normalized_value.count('@') == 2: + realname = email + _, email = parseaddr(normalized_value.replace(email, '')) + logging.debug("\t\tforce parsed email: '%s', '%s' from '%s'", + realname, email, normalized_value) + + if realname: + if '@' in realname: + normalized_realname = realname.strip() + for regex in REGEX_EMAIL_SURROUNDINGS: + normalized_realname = regex.sub( + '\\g<\\g@' + '\\g>\\g', + normalized_realname).\ + replace('<<', '<').replace('>>', '>') + logging.debug("\t\tnormalized sub-value '%s' from '%s'", + normalized_realname, realname) + _realname, _email = parseaddr(normalized_realname) + logging.debug("\t\tparsed email: '%s', '%s' from '%s'", + _realname, _email, normalized_realname) + if _realname or _email: + if _realname: + name = sanitize_name(_realname) + if not name in available_names: + available_names.append(name) + logging.debug( + "\t\tadding '%s' from built email for '%s'", + name, name_key) + if _email: + name = build_name_from_email(_email) + if not name in available_names: + available_names.append(name) + logging.debug( + "\t\tadding '%s' from built email for '%s'", + name, name_key) + elif normalized_realname.count('@') == 1: + name = build_name_from_email(normalized_realname) + if not name in available_names: + available_names.append(name) + logging.debug( + "\t\tadding '%s' from built email for '%s'", + name, name_key) + else: + logging.debug( + "\t\tcan't parse name value containing email '%s'", + normalized_realname) + if email: + name = build_name_from_email(email) + if not name in available_names: + available_names.append(name) + logging.debug("\t\tadding '%s' from built email for '%s'", + name, name_key) + elif normalized_value.count('@') == 1: + name = build_name_from_email(normalized_value) + if not name in available_names: + available_names.append(name) + logging.debug("\t\tadding '%s' from built email for '%s'", + name, name_key) + else: + logging.debug("\t\tcan't parse name value containing email '%s'", + normalized_value) else: name = sanitize_name(value) if not name in available_names: