-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathRetrieveMovieList.py
72 lines (58 loc) · 2.13 KB
/
RetrieveMovieList.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import urllib2
from BeautifulSoup import BeautifulSoup
import datetime
def findDate():
"""Finds the current month for use in building the URL. Uses system time."""
thisMonth = int(datetime.datetime.now().strftime("%m"))
if thisMonth == 1:
thisMonth = "january"
elif thisMonth == 2:
thisMonth = "february"
elif thisMonth == 3:
thisMonth = "march"
elif thisMonth == 4:
thisMonth = "april"
elif thisMonth == 9:
thisMonth = "september"
elif thisMonth == 10:
thisMonth = "october"
elif thisMonth == 11:
thisMonth = "november"
elif thisMonth == 12:
thisMonth = "december"
else:
thisMonth = "break"
return thisMonth
def monthCheck(month):
"""Checks a given month to determine if it is valid."""
L = ["january", "february", "march", "april", "september", "october", "november", "december"]
if(month in L):
return True
else:
return False
def scraper(month):
"""Gets the movie list, formats it, and returns it as a tuple. More work needs to be done for special cases. Currently depends on BeautifulSoup."""
page = urllib2.urlopen("http://temple.edu/mcpb/thereel/" + month + ".html")
soup = BeautifulSoup(page)
movie1 = str(soup.find('div', id="rightbodyevents").findAll('p')[0])
movie1 = movie1[4:-4].split('<br />')
movie1[0] = movie1[0].strip()
movie1[1] = movie1[1].strip()
# print movie1
movie2 = str(soup.find('div', id="movie3").findAll('p')[0])
movie2 = movie2[4:-4].split('<br />')
movie2[0] = movie2[0].strip()
movie2[1] = movie2[1].strip()
# print movie2
movie3 = str(soup.find('div', id="movie2").findAll('p')[0])
movie3 = movie3[4:-4].split('<br />')
movie3[0] = movie3[0].strip()
movie3[1] = movie3[1].strip()
# print movie3
movie4 = str(soup.find('div', id="movie4").findAll('p')[0])
movie4 = movie4[4:-4].split('<br />')
movie4[0] = movie4[0].strip()
movie4[1] = movie4[1].strip()
# print movie4
a = (movie1, movie2, movie3, movie4) #THIS IS WHERE I REACHED ENLIGHTENMENT
return a