-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathfetch_youtube_comments.py
124 lines (85 loc) · 4.02 KB
/
fetch_youtube_comments.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
import os, csv
import pandas as pd
import langid
import googleapiclient.discovery
def setup_init():
os.environ["OAUTHLIB_INSECURE_TRANSPORT"] = "1"
# YOUR_API_KEY = AIzaSyC8aUtdCpsAuEbTnnqH9YyftynTOXkBAJE
api_service_name = "youtube"
api_version = "v3"
DEVELOPER_KEY = "AIzaSyC8aUtdCpsAuEbTnnqH9YyftynTOXkBAJE"
youtube = googleapiclient.discovery.build(
api_service_name, api_version, developerKey = DEVELOPER_KEY)
return youtube
def get_search_response(youtube, query, part="id,snippet", type_video="video", maxResults=5, \
publishedAfter=None, publishedBefore=None):
request_search = youtube.search().list(
part=part,
type=type_video,
q=query,
maxResults=maxResults,
publishedAfter = publishedAfter,
publishedBefore = publishedBefore
)
# Request execution
response_search = request_search.execute()
return response_search
def get_comment_response(youtube, videoId, part="id,snippet,replies", maxResults=50, order="relevance"):
request_comments = youtube.commentThreads().list(
part=part,
videoId=videoId,
maxResults=maxResults,
order=order
)
response = request_comments.execute()
return response
def is_english_comment(text):
detected_lang = langid.classify(text)
if detected_lang[0] == 'en':
return True
else:
return False
def main():
youtube = setup_init()
#Fetching search results
search_query = "#BharatJodoYatra"
response_search = get_search_response(youtube, query=search_query, maxResults=5, publishedAfter=None, publishedBefore=None)
textDisplay_value, textOriginal_value = [], []
authorDisplayName_value, authorChannelId_value = [], []
publishedAt_value, videoId_value = [], []
for i in range(len(response_search["items"])):
videoId_temp = str(response_search["items"][i]["id"]["videoId"])
#Fetching comments from search videos
try:
response_comments = get_comment_response(youtube, videoId=videoId_temp )
except Exception as E:
print("Exception occured", E)
for j in range(len(response_comments["items"])):
textOriginal_temp = response_comments['items'][j]["snippet"]["topLevelComment"]["snippet"]["textOriginal"]
eng_comment = is_english_comment(textOriginal_temp)
if (eng_comment):
textOriginal_value.append(textOriginal_temp)
textDisplay_temp = response_comments['items'][j]["snippet"]["topLevelComment"]["snippet"]["textDisplay"]
textDisplay_value.append(textDisplay_temp)
authorDisplayName_temp = response_comments['items'][j]["snippet"]["topLevelComment"]["snippet"]["authorDisplayName"]
authorDisplayName_value.append(authorDisplayName_temp)
authorChannelId_temp = response_comments['items'][j]["snippet"]["topLevelComment"]["snippet"]["authorChannelId"]['value']
authorChannelId_value.append(authorChannelId_temp)
publishedAt_temp = response_comments['items'][j]["snippet"]["topLevelComment"]["snippet"]["publishedAt"]
publishedAt_value.append(publishedAt_temp)
videoId_value.append(videoId_temp)
# print(textDisplay_value, authorDisplayName_value, authorChannelId_value, publishedAt_value)
yt_comments_data = {
"videoID":videoId_value,
"textDisplay": textDisplay_value,
"textOriginal": textOriginal_value,
"authorDisplayName": authorDisplayName_value,
"authorChannelId":authorChannelId_value,
"publishedAt":publishedAt_value,
}
# print(yt_comments_data)
df = pd.DataFrame.from_dict(yt_comments_data)
df.to_csv (r'yt_comments_'+str(search_query)+'.csv', index = False, header=True)
main()
# if __name__ == "__main__":
# main()