-
Notifications
You must be signed in to change notification settings - Fork 0
/
0_Data_Extraction.py
76 lines (49 loc) · 1.62 KB
/
0_Data_Extraction.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
# coding: utf-8
# In[18]:
# https://marcobonzanini.com/2015/03/09/mining-twitter-data-with-python-part-2/
# Libreria teewpy
import os
import tweepy
import sys
from tweepy.streaming import StreamListener
from tweepy import Stream
config = {}
config_path = os.path.join(os.path.abspath('..'))
config_name = r'config.py'
config_file = os.path.join(config_path,config_name)
exec(open(config_file).read(),config)
# Key and Secret
consumer_key=config['TWITTER_KEY']
consumer_secret=config['TWITTER_SECRET']
access_token=config['TOKEN']
access_token_secret=config['TOKEN_SECRET']
#HashTag to Check
hashtag = 'trump'
# In[9]:
# Authentication
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth)
# In[10]:
# Data Extraction - Stream
# https://dev.twitter.com/streaming/overview
class MyListener(StreamListener):
tweet_number=0 # class variable
def __init__(self,max_tweets):
self.max_tweets=max_tweets # max number of tweets
def on_data(self, data):
self.tweet_number+=1
try:
with open(hashtag+'.json', 'a') as f:
f.write(data)
except BaseException as e:
print('Error on_data: %s' % str(e))
if self.tweet_number>=self.max_tweets:
sys.exit('Limit of '+str(self.max_tweets)+' tweets reached.')
def on_error(self, status):
print(status)
return True
# In[19]:
# Read Stream of tweets based on HashTag
twitter_stream = Stream(auth, MyListener(10000))
twitter_stream.filter(track=['#'+hashtag],languages=['en'])