From 17af056f9a5b5eb4a1e7f2aeca61fe513cb51026 Mon Sep 17 00:00:00 2001 From: Simon THOBY Date: Sun, 16 Sep 2018 13:32:12 +0200 Subject: [PATCH] URL: Do not send "OLD!" messages whenever the link is quite fresh --- url/__init__.py | 53 +++++++++++++++++++++++++++++++++---------------- url/model.py | 6 +++++- 2 files changed, 41 insertions(+), 18 deletions(-) diff --git a/url/__init__.py b/url/__init__.py index acab3d2..212f488 100644 --- a/url/__init__.py +++ b/url/__init__.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- import re +from datetime import timedelta, datetime import sqlalchemy.exc from pipobot.lib.known_users import KnownUser @@ -18,7 +19,10 @@ class CmdUrl(ListenModule): - _config = (("repost", bool, False), ("repost_ignore", list, [])) + # repost_ignore_delay is the number of seconds to wait between two + # submissions of the same url before prompting that this is an old message. + _config = (("repost", bool, False), ("repost_ignore", list, []), + ("repost_ignore_delay", int, 60)) def __init__(self, bot): desc = "Extracting title of page from URL" @@ -31,41 +35,56 @@ def answer(self, sender, message): else: urls = set(URLS_RE.findall(message)) + # We cannot iter by number on sets, because of their intrinsic structure + urls = list(urls) + + title_page = self.get_title(urls) + try: - repost_msg = self.check_repost(sender, urls) + repost_msg = self.check_repost(sender, urls, title_page) except sqlalchemy.exc.OperationalError: self.bot.session.rollback() repost_msg = [] except sqlalchemy.exc.InvalidRequestError: repost_msg = [] - title_page = self.get_title(urls) send = repost_msg + title_page return None if send == [] else "\n".join(send) - def check_repost(self, sender, urls): + def check_repost(self, sender, urls, titles): + if not self.repost: + return [] send = [] - if self.repost: - for url in urls: - if not any(i in url for i in self.repost_ignore): - res = self.bot.session.query(RepostUrl).filter(RepostUrl.url == url).first() - if res: + for i in range(0, len(urls)): + url = urls[i] + # the conversion to unicode is quite important to prevent sqlite conversion errors between 8-bytestrings and UTF-8 sqlite3 values + title_page = unicode(titles[i]) + if not any(k in url for k in self.repost_ignore): + res = self.bot.session.query(RepostUrl).filter(RepostUrl.url == url).first() + if res: + # Do not send a message if the link was shared less than repost_ignore_delay + # seconds ago or the page title changed since its submission + if (datetime.now() - res.last_date) > timedelta(seconds=self.repost_ignore_delay) and title_page == res.title: send.append('OLD! ') first = KnownUser.get_antihl(res.jid, self.bot) first_date = 'le ' + res.date.strftime('%x') + ' à ' + res.date.strftime('%X') first_date = first_date.decode("utf-8") if res.count == 1: - send.append(u'Ce lien a déjà été posté %s par %s sur %s…' % (first_date, first, res.chan)) + send.append(u'Ce lien a déjà été posté %s par %s sur %s…' % (first_date, first, first.chan)) else: ret = u'Ce lien a déjà été posté %s fois depuis que %s l’a découvert, %s, sur %s…' send.append(ret % (res.count, first, first_date, res.chan)) - res.count += 1 - else: - u = RepostUrl(url, - self.bot.occupants.pseudo_to_jid(sender), - self.bot.chatname) - self.bot.session.add(u) - self.bot.session.commit() + res.title = title_page + res.count += 1 + # Update the time someone posted the link + res.last_date = datetime.now() + else: + u = RepostUrl(url, + self.bot.occupants.pseudo_to_jid(sender), + self.bot.chatname, + title_page) + self.bot.session.add(u) + self.bot.session.commit() return send def get_title(self, urls): diff --git a/url/model.py b/url/model.py index e4a5f48..ad95287 100644 --- a/url/model.py +++ b/url/model.py @@ -9,13 +9,17 @@ class RepostUrl(Base): __tablename__ = "url" url = Column(String(250), primary_key=True) count = Column(Integer) + title = Column(String(250)) date = Column(DateTime) + last_date = Column(DateTime) jid = Column(String(250)) chan = Column(String(250)) - def __init__(self, url, jid, chan): + def __init__(self, url, jid, chan, title=""): self.url = url self.jid = jid self.count = 1 + self.title = title self.date = datetime.datetime.now() + self.last_date = self.date self.chan = chan