This repository has been archived by the owner on Jan 19, 2018. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 39
/
Copy pathhtml-transcript.py
156 lines (127 loc) · 4.6 KB
/
html-transcript.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
#!/usr/bin/env python2
"""
Module to translate JSON transcript into a pretty HTML output.
This module actually should create a folder that contains an index.html as well
as all the necessary images for the transcript to be rendered in browser
completely offline.
"""
from UserDict import UserDict
from inspect import cleandoc
import json
import requests
import datetime
import os.path
import sys
import shutil
_HTML_HEADER = """<!doctype html>\n'
<html>\n<head>
<meta charset="UTF-8">
<title>GroupMe Transcript</title>
<link rel="stylesheet" type="text/css" href="groupme.css">
<script src="http://cdn.jsdelivr.net/emojione/1.5.0/lib/js/emojione.min.js"></script>
<link rel="stylesheet" href="http://cdn.jsdelivr.net/emojione/1.5.0/assets/css/emojione.min.css"/>
<script src="groupme.js"></script>
</head>\n<body>
<div class="container">
<h1>GroupMe Transcript</h1>
<div class="chat">
"""
_HTML_FOOTER = """</div>
</div>
</body>
</html>
"""
class ImageCache(UserDict):
"""Maps image URLs to local filenames."""
def __init__(self, folder, initialdata={}):
UserDict.__init__(self, initialdata)
self._folder = folder
def _save_image(self, url):
# Full disclosure, largely adapted from this SO answer:
# http://stackoverflow.com/a/16696317
local_file = url.split('/')[-1]
local = os.path.join(self._folder, local_file)
if os.path.exists(local):
return local_file
print 'Downloading image.'
resp = requests.get(url, stream=True)
with open(local, 'wb') as f:
for chunk in resp.iter_content(chunk_size=1024):
if chunk:
f.write(chunk)
f.flush()
return local_file
def __getitem__(self, key):
try:
return UserDict.__getitem__(self, key)
except KeyError:
local = self._save_image(key)
self[key] = local
return local
def write_html_transcript(messages, outfile, imgcache):
for i, message in enumerate(messages):
# Get variables
name = message[u'name']
time_obj = datetime.datetime.fromtimestamp(message[u'created_at'])
time_str = time_obj.strftime('%Y-%m-%d %H:%M')
text = message[u'text']
if text is None:
text = u''
system = message[u'system']
faves = message[u'favorited_by']
nlikes = faves if faves == 0 else len(faves)
pic = message[u'picture_url']
# Open div
outfile.write('<div class="message-container')
if system:
outfile.write(' system')
outfile.write('">')
# Author
outfile.write('<div class="author">')
outfile.write(name.encode('utf-8'))
outfile.write('</div>')
# Message span
outfile.write('<div class="message"><span class="message-span" title="%s">' % time_str)
outfile.write(text.encode('utf-8'))
outfile.write('</span></div>')
# Likes
if nlikes > 0:
outfile.write('<div class="likes">')
outfile.write("<img class='emojione' src='http://cdn.jsdelivr.net/emojione/assets/png/2764.png'>x</img>")
outfile.write('<span class="likes-count">%d</span>' % nlikes)
outfile.write('</div>')
# Image
if pic:
local = imgcache[pic]
outfile.write('<img src="' + local + '" class="picture-message">')
# Close div
outfile.write('</div>\n')
print '%04d/%04d messages processed' % (i, len(messages))
def write_html(folder, messages, emoji=True):
imgcache = ImageCache(folder)
index_fn = os.path.join(folder, 'index.html')
shutil.copyfile('assets/groupme.css', os.path.join(folder, 'groupme.css'))
shutil.copyfile('assets/groupme.js', os.path.join(folder, 'groupme.js'))
with open(index_fn, 'w') as f:
f.write(_HTML_HEADER)
write_html_transcript(messages, f, imgcache)
f.write(_HTML_FOOTER)
def main():
"""
Usage: html-transcript.py filename.json html-output-folder
Takes a JSON GroupMe transcript and writes a mostly offline HTML version of
your transcript. Downloads all images sent over GroupMe, and uses a
Javascript library + CDN to render all of the Emoji. GroupMe-specific
emoji will end up unrecognizable.
"""
if len(sys.argv) < 3:
print cleandoc(main.__doc__)
sys.exit(1)
if not os.path.exists(sys.argv[2]):
os.mkdir(sys.argv[2])
trans_file = open(sys.argv[1])
transcript = json.load(trans_file)
trans_file.close()
write_html(sys.argv[2], transcript)
if __name__ == '__main__':
main()