Skip to content

Commit

Permalink
SDAN-722 Remove embeds from monitoring email (#1176)
Browse files Browse the repository at this point in the history
  • Loading branch information
marwoodandrew authored Aug 10, 2023
1 parent 3cddb9f commit 3f26da3
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 1 deletion.
28 changes: 28 additions & 0 deletions newsroom/monitoring/utils.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
from flask import current_app as app
from lxml import html as lxml_html
import re
import collections
from superdesk.text_utils import get_text
from newsroom.utils import get_items_by_id
Expand Down Expand Up @@ -45,6 +47,7 @@ def truncate_article_body(items, monitoring_profile, full_text=False):
# To make sure PDF creator and RTF creator does truncate for linked_text settings
# Manually truncate it
for i in items:
remove_all_embeds(i)
i['body_str'] = get_text(i.get('body_html', ''), content='html', lf_on_block=True)
if monitoring_profile['alert_type'] == 'linked_text':
if not full_text and len(i['body_str']) > 160:
Expand All @@ -66,3 +69,28 @@ def get_items_for_monitoring_report(_ids, monitoring_profile, full_text=False):
items = get_items_by_id(_ids, 'items')
truncate_article_body(items, monitoring_profile, full_text)
return items


def remove_all_embeds(item):
"""
Remove the all embeds from the body of the article
:param item:
:return:
"""
root_elem = lxml_html.fromstring(item.get('body_html') or '<p></p>')
regex = r" EMBED START (?:Image|Video|Audio) {id: \"editor_([0-9]+)"
html_updated = False
comments = root_elem.xpath('//comment()')
for comment in comments:
m = re.search(regex, comment.text)
# if we've found an Embed Start comment
if m and m.group(1):
parent = comment.getparent()
for elem in comment.itersiblings():
parent.remove(elem)
if elem.text and ' EMBED END ' in elem.text:
break
parent.remove(comment)
html_updated = True
if html_updated:
item["body_html"] = sd_etree.to_string(root_elem, method="html")
11 changes: 10 additions & 1 deletion tests/test_monitoring.py
Original file line number Diff line number Diff line change
Expand Up @@ -835,7 +835,14 @@ def test_send_immediate_email_alerts(client, app):
'products': [{'code': '12345'}],
"versioncreated": utcnow(),
'byline': 'Testy McTestface',
'body_html': '<p>line 1 of the article text\nline 2 of the story\nand a bit more.</p>',
'body_html': '<p>line 1 of the article text\nline 2 of the story\nand a bit more.</p>'
'<!-- EMBED START Audio {id: "editor_2\"} -->'
'<figure>'
' <audio controls src="/assets.mp3"></audio>'
' <figcaption>Assistant Treasurer</figcaption>'
'</figure>'
'<!-- EMBED END Audio {id: \"editor_2\"} -->'
'<p>Something after the embed',
'source': 'AAAA'
}])
w = app.data.find_one('monitoring', None, _id='5db11ec55f627d8aa0b545fb')
Expand All @@ -849,6 +856,8 @@ def test_send_immediate_email_alerts(client, app):
assert outbox[0].recipients == ['[email protected]', '[email protected]']
assert outbox[0].sender == 'newsroom@localhost'
assert outbox[0].subject == 'Monitoring Subject'
assert 'Something after the embed' in outbox[0].body
assert 'Assistant Treasurer' not in outbox[0].body
assert 'Newsroom Monitoring: W1' in outbox[0].body


Expand Down

0 comments on commit 3f26da3

Please sign in to comment.