-
Notifications
You must be signed in to change notification settings - Fork 1
/
movies.py
242 lines (207 loc) · 7.57 KB
/
movies.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
'''
Build an OO interface for our movie picker code, but fetching data on-the-fly
from Wikipedia and omdbapi.com.
Example output:
$ python solved4.py 1990s_comedy_films
Pterodactyl Woman from Beverly Hills (1997)
Plot: Pixie is cursed with turning into a Pterodactyl when her husband is caught messing with bones on an ancient burial ground. Her husband, children, friends, and neighbours must come to terms ...
Genre: Comedy
IMDb URL: http://www.imdb.com/title/tt0110910
IMDb rating: 4.8/10
Add movie to your list? no
Le schpountz (1999)
Plot: Irene has no desire to work in his uncle's grocery shop and dreams of becoming an actor. His opportunity comes when a crew of movie makers came to his little village. Irene begins to go ...
Genre: Comedy, Romance
IMDb URL: http://www.imdb.com/title/tt0200091
IMDb rating: 3.3/10
Add movie to your list? no
The Pest (1997)
Plot: A Miami con man agrees to be the human target for a Neo-Nazi manhunter, in order to collect $50,000 if he survives.
Genre: Comedy
IMDb URL: http://www.imdb.com/title/tt0119887
IMDb rating: 4.9/10
Add movie to your list? yes
Mystery Men (1999)
Plot: A group of inept amateur superheroes must try to save the day when a supervillian threatens to destroy a major superhero and the city.
Genre: Action, Comedy, Fantasy
IMDb URL: http://www.imdb.com/title/tt0132347
IMDb rating: 6.0/10
Add movie to your list? yes
A Fish in the Bathtub (1999)
Plot: Sam (Jerry Stiller) and Molly (Anne Meara) are a classic bickering old couple, and their marriage has been 40 years of sparring. Yet, when Sam refuses to move the carp he's keeping in their...
Genre: Comedy
IMDb URL: http://www.imdb.com/title/tt0126908
IMDb rating: 5.7/10
Add movie to your list? yes
== Your movies ==
The Pest
Mystery Men
A Fish in the Bathtub
'''
import json
import random
import urllib
DEFAULT_CATEGORY = "American_science_fiction_action_films"
WIKIPEDIA_CATEGORY_URL = "https://en.wikipedia.org/w/api.php?action=query&list=categorymembers&cmtitle=Category:{}&format=json&cmlimit=250&cmcontinue={}"
OMDBAPI_TITLE_URL = "http://www.omdbapi.com/?t={}&y=&plot=short&r=json&tomatoes=true"
#have the user pick this many movies before quitting
NUM_MOVIES = 3
#use this to build the URL to the movie on IMDB, e.g.: http://www.imdb.com/title/tt0093773
IMDB_URL = "http://www.imdb.com/title/{}"
## API code ###################################################################
def clean_title(title):
'''
Removes "(film)", "(serial)", "(1998 film)", etc. from the given Wikipedia
page title.
'''
title = title.replace("(film)", "")
title = title.replace("(serial)", "")
title = title.replace("(series)", "")
if "film)" in title:
#"Godzilla (2014 film)" -> ['Godzilla ', '2014 film)']
title, year_of_film = title.split("(", 1)
#if this assert fails, then I need to think of another way to split this text off the title...
assert "film)" in year_of_film
return title.strip()
def filter_titles(members):
'''
Filter and clean the raw page titles from Wikipedia categorymembers call.
'''
titles = []
for m in members:
title = m['title']
if 'Category:' in title:
continue #ignore sub-categories
title = clean_title(title)
titles.append(title)
return titles
def fetch_wikipedia_titles(category):
'''
Returns a full list of members returned by the Wikipedia categorymembers
API call.
'''
cmcontinue = ""
out = []
while True:
url = WIKIPEDIA_CATEGORY_URL.format(category, cmcontinue)
response = urllib.urlopen(url)
data = json.loads(response.read())
out.extend(data['query']['categorymembers'])
if 'continue' not in data:
break
cmcontinue = data['continue']['cmcontinue']
return filter_titles(out)
def is_valid_category(category):
if not category:
raise RuntimeError("Category must not be blank.")
#try fetching it to ensure it actually returns some titles
titles = fetch_wikipedia_titles(category)
if not titles:
raise RuntimeError("Category is empty.")
return True
def fetch_omdb_info(title):
'''
Retrieve movie information from OMDb API's title search.
'''
url = OMDBAPI_TITLE_URL.format(title.encode('utf8'))
response = urllib.urlopen(url)
data = json.loads(response.read())
if data.get('Error'):
raise RuntimeError("OMDb API returned {!r} when looking up {!r}".format(data['Error'], title))
return data
## classes ####################################################################
class MovieData(object):
'''
Represents a movie with data from the omdbapi.com.
'''
def __init__(self, data):
self.data = data
@property
def title(self):
return self.data['Title']
@property
def imdb_url(self):
return IMDB_URL.format(self.data['imdbID'])
@property
def poster_url(self):
url = self.data.get('Poster')
return '' if url == 'N/A' else url
def __str__(self):
txt = [
u"{0[Title]} ({0[Year]})",
u"Plot: {0[Plot]}",
u"Genre: {0[Genre]}",
u"IMDb URL: {imdb_url}",
u"IMDb rating: {0[imdbRating]}/10",
]
txt = u"\n".join(txt)
txt = txt.format(
self.data,
imdb_url=self.imdb_url,
)
return txt.encode('utf8')
def __html__(self):
html = u'''
<h2>{0[Title]} ({0[Year]})</h2>
<p>{0[Plot]}</p>
<ul>
<li><strong>Genre</strong>: {0[Genre]}</li>
<li><strong>IMDb rating</strong>: {0[imdbRating]}/10</li>
<li><a href="{imdb_url}">View this movie on IMDb</a>.</li>
</ul>
'''.format(
self.data,
imdb_url=self.imdb_url,
)
return html
class MoviePicker(object):
'''
Random movie picker functionality. Returns random movies and keeps track of picked movies.
'''
def __init__(self, titles):
'''
Initializes the MoviePicker with a list of `titles` that will be picked from randomly.
'''
self.titles = random.sample(titles, len(titles))
self.picked = []
def get_random_movie(self):
'''
Pick a random title, fetch its data from OMDB, and return it as a MovieData object.
'''
movie = None
while not movie:
title = self.titles.pop()
try:
movie = fetch_omdb_info(title)
except RuntimeError:
#retry "RuntimeError: OMDb API returned u'Movie not found!'" exceptions
movie = None
return MovieData(movie)
def add_to_list(self, m):
'''Add the given Movie object `m` to the list of picked movies.'''
self.picked.append(m)
def get_list(self):
'''Returns a list of picked titles.'''
return [m.title for m in self.picked]
## main #######################################################################
def main(category):
picker = MoviePicker(fetch_wikipedia_titles(category))
num_picked = 0
while num_picked < NUM_MOVIES:
movie = picker.get_random_movie()
print movie
print ""
answer = raw_input("Add movie to your list? ")
if answer.lower().startswith('y'):
picker.add_to_list(movie)
num_picked += 1
print "\n== Your movies ==\n"
for title in picker.get_list():
print title
if __name__ == "__main__":
import sys
if len(sys.argv) == 2:
category = sys.argv[1]
else:
category = DEFAULT_CATEGORY
main(category)