-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathtweet.py
67 lines (57 loc) · 2.73 KB
/
tweet.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
from prettytable import PrettyTable
from ntscraper import Nitter
import csv
buzzer = ["PakPrabowo", "DadiPresidenku", "PrabowoGibran", "GaweAyem",
"BersamaIndonesiaMaju", "PrabowoGemoy", "KodeKita08Gemoy", "2024gantiwarna"
"IndonesiaSentris", "02Melanjutkan", "AnakMudaIndonesiaEmas", "MenangSeputaran"]
official = ["@liputan6dotcom", "@tempodotco", "@Metro_TV", "@kompascom",
"@KompasTV", "@hariankompas", "@TirtoID", "@pikiran_rakyat",
"@CNNIndonesia", "@KATADATAcoid", "@jpnncom", "@Beritasatu",
"@GATRA_com", "@antaranews", "@mediasemut", "@DppAliansi",
"@RepelitaO", "@kabaridcom", "@IDNTimes", "@kumparan",
"@voidotid", "@okezonenews", "@OposisiCerdas", "@OfficialDPP_PBB",
"@cnbcindonesia", "@VIVAcoid", "@democrazymedia", "@KompasData"]
nbuzzer, invalid, irrelevant, duplicate = 0, 0, 0, 0
keyword = "prabowo gibran"
jumlah = 1000
start = "2023-12-25"
end = "2023-12-26"
output = "tweet.csv"
if __name__ == "__main__":
scraper = Nitter(log_level=1, skip_initial_check=True)
results = scraper.get_tweets(keyword, mode="term", number=jumlah, since=start, until=end)
tweets = results['tweets']
table = PrettyTable()
table.field_names = ["Date", "Username", "Text", "Comments", "Likes"]
for tweet in tweets:
link = tweet['link']
text = tweet['text']
user = tweet['user']
name = user['name']
username = user['username']
profile = user['profile_id']
avatar = user['avatar']
date = tweet['date']
retweet = tweet['is-retweet']
stats = tweet['stats']
comments = stats['comments']
retweets = stats['retweets']
quotes = stats['quotes']
likes = stats['likes']
if any(teks in text for teks in buzzer): nbuzzer += 1
elif any(name in username for name in official): irrelevant += 1
elif any(text in row[2] for row in table._rows): duplicate += 1
elif text == "" or not text: invalid += 1
else: table.add_row([date, username, text])
table.align["Text"] = "l"
table.max_width["Text"] = 100
with open(output, "w", newline="", encoding="utf-8") as outfile:
csv_writer = csv.writer(outfile)
csv_writer.writerow(table.field_names)
csv_writer.writerows(table._rows)
print(table)
print(f"\n\n [*] Berhasil menghapus {invalid} tweet kosong")
print(f" [*] Berhasil menghapus {nbuzzer} tweet yang terdeteksi sebagai buzzer")
print(f" [*] Berhasil menghapus {irrelevant} tweet yang terdeteksi akun non-relevan")
print(f" [*] Berhasil menghapus {duplicate} tweet yang terdeteksi sebagai duplikat")
print(f" [*] Data {len(table._rows)} tweet disimpan di {output}")