Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

basic refactor of analysis_function.py #6

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 10 additions & 12 deletions analysis_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ def crawl_the_chat(chat):
# To identify messages, regular expressions are used to identify dates format
# Depending on the user's mobile clock settings, there existing two clock patterns

global pattern_date
print("Crawling the chat")
# pattern_time_24hr = ", (0?[0-9]|1[0-9]|2[0-3]):([0-5][0-9])"
# pattern_time_12hr = ", (0?[0-9]|1[0-2]):([0-9]|[0-5][0-9]) [AP]M"
Expand Down Expand Up @@ -122,7 +123,7 @@ def crawl_the_chat(chat):
if "M" in last_time or "m" in last_time:
# AM/PM format - Convert to 24 hr format
AM_PM = time_split[-1].split(" ")[1]
if AM_PM == "PM" or AM_PM == "pm":
if AM_PM in ["PM", "pm"]:
hour += 12
if hour == 24:
hour = 12
Expand Down Expand Up @@ -289,11 +290,8 @@ def emojis_per_user(df, save_path, sort=False, plot=False):
grouped = df.groupby("Contact", as_index=False)
df_list = []
for name, group in grouped:
emoji_count = 0
msgs = group["Message"].str.split(" ")
for m in msgs:
if any(x in m for x in emoji.UNICODE_EMOJI):
emoji_count += 1
emoji_count = sum(1 for m in msgs if any(x in m for x in emoji.UNICODE_EMOJI))
df_list.append([name, emoji_count])

df_ = pd.DataFrame(df_list, columns=["Contact", "WordCount"])
Expand Down Expand Up @@ -366,11 +364,8 @@ def this_word_per_contact(
grouped = df.groupby("Contact", as_index=False)
df_list = []
for name, group in grouped:
word_count = 0
msgs = group["Message"].str.split(" ")
for m in msgs:
if any(x in m for x in word_list):
word_count += 1
word_count = sum(1 for m in msgs if any(x in m for x in word_list))
df_list.append([name, word_count])

df_ = pd.DataFrame(df_list, columns=["Contact", "WordCount"])
Expand Down Expand Up @@ -417,10 +412,13 @@ def average_words_per_message_per_contact(df, save_path, sort=False, plot=False)


def media_per_contact(df, save_path, sort=False, plot=False):
df_ = this_word_per_contact(
df, check_word=["<Media", "omitted>"], save_path=save_path, sort=sort, plot=plot
return this_word_per_contact(
df,
check_word=["<Media", "omitted>"],
save_path=save_path,
sort=sort,
plot=plot,
)
return df_


def emojis_per_msg_per_contact(df, save_path, sort=False, plot=False):
Expand Down