Skip to content

Commit

Permalink
Make htmlify_tweet() more robust when encountering strs not ints
Browse files Browse the repository at this point in the history
Some tweet JSON have the `display_text_range` set as strings not ints,
e.g. `["0", "140"]` rather than `[0, 140]`. Particularly when the
JSON has come from the downloaded Twitter archive.

And

Some tweet JSON have the `["entities"][<kind>]["indices"]` set as
strings not ints, e.g. `["0", "9"]` rather than `[0, 9]`.
Particularly when the JSON has come from the downloaded Twitter
archive.

For #229
  • Loading branch information
philgyford committed Feb 12, 2022
1 parent 81d3c0a commit 4948f07
Show file tree
Hide file tree
Showing 4 changed files with 119 additions and 0 deletions.
15 changes: 15 additions & 0 deletions ditto/twitter/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,21 @@ def htmlify_tweet(json_data):
if "entities" in json_data and "symbols" not in json_data["entities"]:
json_data["entities"]["symbols"] = []

# Some Tweets (eg from a downloaded archive) have strings instead of ints
# to define text ranges. ["0", "140"] rather than [0, 140].
# We fix those here so that Twython doesn't complain.
if "display_text_range" in json_data:
json_data["display_text_range"] = [
int(n) for n in json_data["display_text_range"]
]
if "entities" in json_data:
for key, value in json_data["entities"].items():
for count, entity in enumerate(value):
if "indices" in entity:
json_data["entities"][key][count]["indices"] = [
int(n) for n in entity["indices"]
]

# This does most of the work for us:
# https://twython.readthedocs.org/en/latest/usage/special_functions.html#html-for-tweet
html = Twython.html_for_tweet(
Expand Down
20 changes: 20 additions & 0 deletions tests/twitter/fixtures/api/tweet_with_display_text_range_str.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
{
"retweeted": false,
"source": "<a href=\"http://tapbots.com/tweetbot\" rel=\"nofollow\">Tweetbot for iΟS</a>",
"entities": {
"hashtags": [],
"symbols": [],
"user_mentions": [],
"urls": []
},
"display_text_range": ["0", "140"],
"favorite_count": "6",
"id_str": "915152022273449987",
"truncated": false,
"retweet_count": "0",
"id": "915152022273449987",
"created_at": "Tue Oct 03 09:50:19 +0000 2017",
"favorited": false,
"full_text": "Open iPad Slack, scroll to bottom. Close Slack. Open Slack; it’s scrolled back up. Every time. Maddening. Not biggest problem in world, BUT.",
"lang": "en"
}
40 changes: 40 additions & 0 deletions tests/twitter/fixtures/api/tweet_with_entities_indices_str.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
{
"retweeted": false,
"source": "<a href=\"https://tapbots.com/software/tweetbot/mac\" rel=\"nofollow\">Tweetbot for Mac</a>",
"entities": {
"hashtags": [],
"symbols": [],
"user_mentions": [
{
"name": "Terry Collier",
"screen_name": "terrycol",
"indices": ["0", "9"],
"id_str": "123",
"id": "123"
},
{
"name": "Bob Ferris",
"screen_name": "bobferris",
"indices": ["10", "20"],
"id_str": "234",
"id": "234"
}
],
"urls": []
},
"display_text_range": [0, 142],
"favorite_count": "0",
"in_reply_to_status_id_str": "914906397061664768",
"id_str": "914908752482111488",
"in_reply_to_user_id": "123",
"truncated": false,
"retweet_count": "0",
"id": "914908752482111488",
"in_reply_to_status_id": "914906397061664768",
"created_at": "Mon Oct 02 17:43:39 +0000 2017",
"favorited": false,
"full_text": "@terrycol @bobferris I liked it and only thought some of it was a bit silly. But analysis beyond that is probably beyond the scope of Twitter.",
"lang": "en",
"in_reply_to_screen_name": "terrycol",
"in_reply_to_user_id_str": "123"
}
44 changes: 44 additions & 0 deletions tests/twitter/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ def test_htmlify_description(self):
)



class HtmlifyTweetEntitiesTestCase(HtmlifyTestCase):
"Linkify URLs from entities: urls, screen_names, hashtags."

Expand Down Expand Up @@ -160,6 +161,49 @@ def test_strip(self):
self.assertEqual("Y", tweet_html[-1])


class HtmlifyTweetStrsNotIntsTestCase(HtmlifyTestCase):

def test_handles_display_text_range_str(self):
"""Cope correctly if display_text_range is strings, not ints.
Some tweet JSON have the display_text_range set as strings not ints,
e.g. ["0", "140"] rather than [0, 140]. Particularly when the
JSON has come from the downloaded Twitter archive.
It should be able to cope with that.
"""
api_fixture = "tweet_with_display_text_range_str.json"
tweet_html = htmlify_tweet(self.getJson(api_fixture))
self.assertEqual(
tweet_html,
(
"Open iPad Slack, scroll to bottom. Close Slack. Open Slack; it’s "
"scrolled back up. Every time. Maddening. Not biggest problem in "
"world, BUT."
)
)

def test_handles_entities_indicies_str(self):
"""Cope correctly if entities' indicies are strings, not ints.
Some tweet JSON have the ["entities"][<kind>]["indices"] set as
strings not ints, e.g. ["0", "9"] rather than [0, 9].
Particularly when the JSON has come from the downloaded Twitter
archive.
It should be able to cope with that.
"""
api_fixture = "tweet_with_entities_indices_str.json"
tweet_html = htmlify_tweet(self.getJson(api_fixture))
self.assertEqual(
tweet_html,
(
'<a href="https://twitter.com/terrycol" rel="external">@terrycol</a> '
'<a href="https://twitter.com/bobferris" rel="external">@bobferris</a> '
'I liked it and only thought some of it was a bit silly. But analysis '
'beyond that is probably beyond the scope of Twitter.'
)
)


class HtmlifyTweetUrlsTestCase(HtmlifyTestCase):
"Further tests for specific problems with URLs."

Expand Down

0 comments on commit 4948f07

Please sign in to comment.