Skip to content

Commit

Permalink
add tweets to each paper, I think i like it
Browse files Browse the repository at this point in the history
  • Loading branch information
karpathy committed Apr 4, 2020
1 parent 1ef7833 commit fdeb562
Show file tree
Hide file tree
Showing 5 changed files with 161 additions and 1 deletion.
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,10 @@ And in my `crontab -l` I make sure this runs every 1 hour, for example:
3 * * * * /root/covid-sanity/pull.sh > /root/cron.log 2>&1
```

## seeing tweets

Seeing the tweets for each paper is purely optional. To achieve this you need to follow the instructions on setting up [python-twitter API](https://python-twitter.readthedocs.io/en/latest/) and then write your secrets into a file `twitter.txt`, which get loaded in `twitter_daemon.py`. I run this daemon process in a screen session where it pulls tweets for every paper in circles and saves the results.

## License

MIT
16 changes: 15 additions & 1 deletion serve.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
Simple flask server for the interface
"""

import os
import json

from flask import Flask, request, redirect, url_for
Expand All @@ -23,13 +24,23 @@
with open('search.json', 'r') as f:
search_dict = json.load(f)

# OPTIONAL: load tweet dictionary, if twitter_daemon has run
tweets_dict = {}
if os.path.isfile('tweets.json'):
with open('tweets.json', 'r') as f:
tweets_dict = json.load(f)
# decorate each paper with tweets
for j in jall['rels']:
j['tweets'] = tweets_dict.get(j['rel_doi'], [])
j['tweets'].sort(key=lambda t: t['followers'], reverse=True)

# do some precomputation since we're going to be doing lookups of doi -> doc index
doi_to_ix = {}
for i, j in enumerate(jall['rels']):
doi_to_ix[j['rel_doi']] = i

# -----------------------------------------------------------------------------
# routes below
# few helper functions for routes

def default_context(papers, **kwargs):
""" build a default context for the frontend """
Expand All @@ -38,6 +49,9 @@ def default_context(papers, **kwargs):
context = {'papers': papers, 'gvars': gvars}
return context

# -----------------------------------------------------------------------------
# routes below

@app.route("/search", methods=['GET'])
def search():
q = request.args.get('q', '') # get the search request
Expand Down
17 changes: 17 additions & 0 deletions static/paper_list.js
Original file line number Diff line number Diff line change
@@ -1,8 +1,24 @@
'use strict';

const Tweet = props => {
const t = props.tweet;
const turl = "https://twitter.com/" + t.name + "/status/" + t.id;
return (
<div class='tweet'>
<a href={turl}><img src={t.image_url}></img></a>
<div class='meta'>
<span class="following">{t.followers}</span>
<span class="uname"><a href={turl}>{t.name}</a></span>
<span class="text">{t.text}</span>
</div>
</div>
)
}

const Paper = props => {
const p = props.paper
const url = p.rel_link + '.full.pdf';
const tlst = p.tweets.map((jtweet, ix) => <Tweet key={ix} tweet={jtweet} />);
return (
<div class={'rel_paper ' + p.rel_site}>
<div class='dllinks'>
Expand All @@ -13,6 +29,7 @@ const Paper = props => {
<div class='rel_title'><a href={p.rel_link}>{p.rel_title}</a></div>
<div class='rel_authors'>{p.rel_authors}</div>
<div class='rel_abs'>{p.rel_abs}</div>
<div class='rel_tweets'>{tlst}</div>
</div>
)
}
Expand Down
47 changes: 47 additions & 0 deletions static/style.css
Original file line number Diff line number Diff line change
Expand Up @@ -92,9 +92,56 @@ h1 {
background-color: #EFE;
padding: 10px;
margin-top: 10px;
border-radius: 5px 5px 0px 0px;
}

.rel_tweets {
}

.tweet {
background-color: #EEF;
font-family: Arial, Helvetica, sans-serif;
font-size: 14px;
border-radius: 5px;
padding: 5px;
margin-top: 5px;
min-height: 48px;
}

.tweet img {
float: left;
margin-right: 5px;
border-radius: 5px;
}

.tweet .meta {
margin-top: 3px;
}

.tweet .following {
background-color: #0e4c92;
padding: 3px 5px 3px 5px;
border-radius: 3px 0px 0px 3px;
color: white;
}

.tweet .uname {
padding: 3px 5px 3px 5px;
background-color: #6a99d4;
color: white;
border-radius: 0px 3px 3px 0px;
margin-right: 5px;
}

.tweet .uname a {
color: white;
}

.tweet .text {
line-height: 18px;
}


#info {
background-color: #EEF;
padding: 10px;
Expand Down
78 changes: 78 additions & 0 deletions twitter_daemon.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
"""
Continuously iterates over existing database and pulls in tweets for each paper.
"""

import time
import json
import urllib

import twitter # pip install python-twitter

from run import write_json

# -----------------------------------------------------------------------------

def get_api_keys():
lines = open('twitter.txt', 'r').read().splitlines()
return lines

def process_tweet(r):
tweet = {}
tweet['id'] = str(r.id)
tweet['name'] = r.user.screen_name
tweet['image_url'] = r.user.profile_image_url
tweet['followers'] = r.user.followers_count
tweet['verified'] = r.user.verified
tweet['text'] = r.full_text
return tweet

def get_tweets(j):

# note: we're assuming v1, which is kinda sketchy and slightly wrong...
q = f"https://www.{j['rel_site']}.org/content/{j['rel_doi']}v1"
q = urllib.parse.quote(q, safe='')
exclude_replies = '%20-filter%3Areplies'
exclude_retweets = '%20-filter%3Aretweets'
suffix = exclude_replies + exclude_retweets
results = api.GetSearch(raw_query="q=%s%s&result_type=recent&count=100" % (q, suffix)) # rate limit: 1 per 5 seconds

# extract just what we need from tweets and not much more
jtweets = [process_tweet(r) for r in results]

# ban a few simple aggregator accounts
banned = ['medrxivpreprint', 'biorxivpreprint', 'glycopreprint']
jtweets = [t for t in jtweets if t['name'] not in banned]

return jtweets

# -----------------------------------------------------------------------------

if __name__ == '__main__':

keys = get_api_keys()
api = twitter.Api(consumer_key=keys[0],
consumer_secret=keys[1],
access_token_key=keys[2],
access_token_secret=keys[3],
tweet_mode='extended')

# run forever
while True:

# open the latest state of database
with open('jall.json', 'r') as f:
jall = json.load(f)

# get all tweets for all papers
tweets = {}
for i, j in enumerate(jall['rels']):
jtweets = get_tweets(j)
tweets[j['rel_doi']] = jtweets
print('%d/%d: found %d tweets for %s' % (i+1, len(jall['rels']), len(jtweets), j['rel_link']))
# rate limit is 180 calls per 5 minutes, or 1 call per 5 seconds. so sleep 7
time.sleep(10)

# save to file when done
write_json(tweets, 'tweets.json')
print('-'*80)

0 comments on commit fdeb562

Please sign in to comment.