-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathscraper.py
123 lines (96 loc) · 3.01 KB
/
scraper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
import sys
# Try to import pyquery. If the module is not found, print an error and quit program
try:
from pyquery import PyQuery as pq
except:
print("\nError. The 'pyquery' module was not found on your system.\nInstall it by running 'pip install pyquery'")
sys.exit(4)
class Subject:
def __init__(self, name, color):
self.name = name
self.color = color
def __eq__(self, other):
return self.name == other.name
class Lecture:
def __init__(self, day, start, finish):
self.day = day
self.start = start
self.finish = finish
class Class:
def __init__(self, subject, comp, descr, include):
self.subject = subject
self.comp = comp
self.descr = descr
self.include = include
self.lectures = []
def isEqual(self, c):
return c.subject == self.subject and c.comp == self.comp and c.descr == self.descr
def getInclude(self):
if(self.include):
return "true"
else:
return "false"
classes = []
dayOfWeek = {
"Segunda" : "Monday",
"Terça" : "Tuesday",
"Quarta" : "Wednesday",
"Quinta" : "Thursday",
"Sexta" : "Friday",
"Sábado" : "Saturday"
}
def findClass(list, c):
for item in list:
if c.isEqual(item):
return item
list.append(c)
return c
def getTime(time):
time = time.split(":")
return (int(time[0]) * 60) + int(time[1])
def parseSubject(subject):
if "-" not in subject:
return subject
split = subject.split("-")
numerals = split[len(split) - 1]
if "I" in numerals or "V" in numerals:
value = 0
for char in numerals:
if char == "I":
value += 1
elif char == "V":
value += 5
return subject[:subject.rfind("-")] + str(value)
elif numerals.isdigit():
return subject[:subject.rfind("-")] + numerals
else:
return subject
def handleEvent(index, node):
global classes
n = pq(node)
title = node.attrib['title'].split(" ")
color = node.attrib['bgcolor'].split("#")[1]
day = dayOfWeek[title[0]]
start = getTime(title[1].split("-")[0])
finish = getTime(title[1].split("-")[1])
subject = parseSubject(n.find("span").text().split(" ")[0])
location = n.find("sala").text()[1:]
info = n.find("c").text().split(" ")
descr = info[0].split(" ")[-1]
comp = info[1][1:-1]
if comp in descr:
descr = descr[len(comp):]
# If no descr is specified, hardcode value 1 to avoid NullPointerException on SmartTimeTable
if len(descr) == 0:
descr = "1"
newClass = Class(Subject(subject, color), comp, descr, not comp == "OT")
newLecture = Lecture(day, start, finish)
# Determine if Class already exists
newClass = findClass(classes, newClass)
newClass.lectures.append(newLecture)
def getClasses(url):
global classes
classes = []
f = pq(url=url)
f("#gvHorario").find(".event").each(handleEvent)
return classes