-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathvoice.py
executable file
·146 lines (110 loc) · 5.35 KB
/
voice.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
#!/usr/bin/env python
import os
import sys
import shutil
import re
from bs4 import BeautifulSoup as bs
from dateutil.parser import *
from dateutil.tz import *
from datetime import datetime,time
from unidecode import unidecode
from types import *
from text_message import TextMessage, Sender
class VoiceParser(object):
""" Parses directory with Google Voice html files,
stores in text message, call, sender, and picture
objects. Goal to export to database. """
def __init__(self,directory):
""" Initializes the parser by parsing the text messages
and storing the text message objects into an array.
Fills a phonebook phoneBookDict and conversations dict"""
self.conversations = {}
self.phonebook = {}
if os.path.isdir(directory):
print "Opening "+directory+"..."
for filename in os.listdir(directory):
soup = bs(open(directory+"/"+filename))
for message_to_parse in soup.find_all(class_="message"):
get = {'name':self.get_name,'phone':self.get_phone,\
'text':self.get_text,'time':self.get_time}
for item in get:
try:
get[item] = get[item](message_to_parse)
except:
print "No "+item+" found in "+filename
get[item] = ""
if get['name'] is not FunctionType and \
get['phone'] is not FunctionType:
sender = Sender(get['phone'], get['name'])
if not self.phonebook.has_key(get['phone']):
print "Added "+get['name']+" to phonebook "
self.phonebook[get['phone']] = get['name']
if get['time'] is not FunctionType and \
get['text'] is not FunctionType:
message = TextMessage(get['time'], sender, get['text'])
if self.conversations.has_key(sender):
self.conversations[sender].append(message)
else:
self.conversations[sender] = [message]
def sort_conversations(self):
""" Sorts conversations in the dictionary's lists """
for sender in self.conversations:
self.conversations[sender].sort(key=lambda message: message.send_time)
def get_name(self, div):
""" Parses name from div in Google Voice html file """
try:
name = div.find(class_="fn").get_text()
except:
name = self.get_phone(div)
return name
def get_phone(self,div):
""" Parses phone number from div in Google Voice html file """
return div.find(class_="tel")['href'][4:]
def get_text(self,div):
""" Parses text from div in Google Voice html file """
return div.find('q').get_text()
def get_time(self,div):
""" Parses time stamp from div in Google Voice html file.
Returns datetime object."""
return parse(div.find(class_="dt").get_text())
def check_and_make_directory(self, directory):
""" Checks if directory exists, makes it if it doesn't """
if not os.path.isdir(directory):
try:
os.mkdir(directory)
return False
except:
print "Error creating directory: "+directory
else:
return True
def save_phonebook_in_file(self, directory_name):
""" Saves phonebook into file in specified directory. """
if self.phonebook:
self.check_and_make_directory("./" + directory_name)
with open("./" +directory_name+"/phonebook") as phonebookFile:
for number in self.phonebook:
phonebookFile.write(number + ": " +self.phonebook[number] + "\n")
def save_conversations_in_files(self, directory_name):
""" Saves parsed conversations in folder directory """
if self.conversations:
check_and_make_directory("./" + directory_name)
for number in self.phonebook:
if self.phonebook[number]:
name = self.phonebook[number]
else:
name = number
base_path = "./" + directory_name + "/" + name
check_and_make_directory(base_path)
for message in self.conversations[name]:
split_time_string = message.get_time_string().split()
path = base_path
for time_string in split_time_string:
if time_string == split_time_string[-1]:
with open(path+"/"+time_string,'a') as conversation_file:
conversation_file.write(message.text)
else:
path += "/" + time_string
check_and_make_directory(path)
return None
if __name__ == "__main__":
parser = VoiceParser("/home/twcurrie/Projects/Voice/Takeout/Voice/Calls")