Skip to content

Commit

Permalink
updated goe_filter so it doesn't use google trans
Browse files Browse the repository at this point in the history
  • Loading branch information
DE0CH committed Jun 21, 2020
1 parent c822ddc commit 0161ce9
Showing 1 changed file with 3 additions and 18 deletions.
21 changes: 3 additions & 18 deletions geo_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,18 +5,16 @@
import multiprocessing
import coloredlogs, logging
import pickle
import googletrans


def worker(q, geo_filtered_dict):
translator = googletrans.Translator()
while True:
path, dirs, files = q.get()
process_files(path, dirs, files, geo_filtered_dict, translator)
process_files(path, dirs, files, geo_filtered_dict)
q.task_done()


def process_files(path, dirs, files, geo_filtered_dict, translator):
def process_files(path, dirs, files, geo_filtered_dict):
for file_name in files:
if os.path.join(path, file_name) in geo_filtered_dict:
continue
Expand All @@ -35,15 +33,7 @@ def process_files(path, dirs, files, geo_filtered_dict, translator):
continue
tweet = json.loads(line)
if tweet.get('place', None) is not None and tweet['place']['country_code']:
text = tweet['text'].encode('utf-8').decode('utf-8')
if tweet.get('lang', '') == 'en':
tweet["translated_text"] = text
else:
tweet["translated_text"] = translator.translate(
text,
dest='en').text
out_s = json.dumps(tweet) + '\n'
out_file.write(out_s)
out_file.write(line)
except Exception:
logging.exception('failed to process tweet')
logging.info('finished ' + os.path.join(path, file_name))
Expand All @@ -70,11 +60,6 @@ def process_files(path, dirs, files, geo_filtered_dict, translator):
for i in range(100):
multiprocessing.Process(target=worker, args=(q, geo_filtered_dict)).start()
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = 'twitter-sentiment-analysis-f22ce784b0a8.json'
translator = googletrans.Translator(
service_urls=[
'translate.google.com.hk',
]
)
try:
for path, dirs, files in os.walk('untarred'):
q.put((path, dirs, files))
Expand Down

0 comments on commit 0161ce9

Please sign in to comment.