This repository was archived by the owner on Apr 6, 2020. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdictionary.py
69 lines (53 loc) · 2.12 KB
/
dictionary.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
from urllib import urlencode
from urllib2 import Request, urlopen
_url = "http://dictionary.tamilcube.com/index.aspx"
_data = ''
_viewstate = ''
_eventvalidation = ''
def getTamilCube_Data(refresh=False):
global _data
if _data == '' or refresh:
_data = getWebPage(_url)
setTamilCube_id()
def getHeaders():
return {
'HTTP_USER_AGENT': 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; '
'rv:1.9.0.13) Gecko/2009073022 Firefox/3.0.13',
'HTTP_ACCEPT': 'text/html,application/xhtml+xml,application/xml; q=0.9,'
'*/*; q=0.8',
'Content-Type': 'application/x-www-form-urlencoded'
}
def getWebPage(url):
request = Request(url, headers=getHeaders())
response = urlopen(request)
if response.getcode() == 200:
return response.read()
def setTamilCube_id():
global _viewstate, _eventvalidation
if _viewstate == '' and _eventvalidation == '':
viewstate_list = list()
eventvalidation_list = list()
for line in _data.split('\n'):
if "id=\"__VIEWSTATE\" value=\"/" in line:
viewstate_list.append(line)
if "id=\"__EVENTVALIDATION\" value=\"/" in line:
eventvalidation_list.append(line)
_viewstate = viewstate_list[0].split('"')[7]
_eventvalidation = eventvalidation_list[0].split('"')[7]
def getTamilCube_FormData(search_term):
formFields = ((r'__EVENTTARGET', r''), (r'__EVENTARGUMENT', r''),
(r'__VIEWSTATE', _viewstate),
(r'__EVENTVALIDATION', _eventvalidation),
(r'name', search_term), (r'Submit1', r'Search'))
return urlencode(formFields)
def TamilCube_Eng2Tm(search_term):
encodedFields = getTamilCube_FormData(search_term)
request = Request(_url, encodedFields, getHeaders())
response= urlopen(request)
for line in response:
if search_term in line:
if (not 'script' in line and not 'span' in line and
not 'input' in line):
#print line.strip().split('.<')[0]
yield line.strip().split('.<')[0]
getTamilCube_Data()