-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathutils.py
69 lines (54 loc) · 2.18 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import pandas as pd
from sklearn.metrics import cohen_kappa_score
from typing import Union, List
import numpy as np
def compute_cohen_on_pandas_columns(onion_data: pd.DataFrame, emma_data: pd.DataFrame, gems: Union[str, List[str]]):
"""
Compute cohen's kappa on two sets of data, selecting one or more columns of the dataframes.
Each dataframe is assumed to be an indipendent "rater", and each entry in the matrix selected according
to the "gems" (i.e., each song-emotion pair) is assumed to be a candidate to evaluate.
:param onion_data:
:param emma_data:
:param gems:
:return:
"""
onion_data = list(onion_data[gems].to_numpy().reshape(-1,))
emma_data = list(emma_data[gems].to_numpy().reshape(-1, ))
kappa = cohen_kappa_score(onion_data, emma_data)
return kappa
def compute_contingency_table(onion_data: pd.DataFrame, emma_data: pd.DataFrame, gems: Union[str, List[str]]):
"""
Computes the contingency table over two dataframes storing binary data, over either a column (if gems is a string)
or over several (if gems is a list of strings).
:param onion_data:
:param emma_data:
:param gems:
:return contingency: Contingency table as numpy array
"""
pairs = list(zip(onion_data[gems].to_numpy().ravel(), emma_data[gems].to_numpy().ravel()))
n_00 = 0
n_01 = 0
n_10 = 0
n_11 = 0
for tuple in pairs:
if tuple == (0, 0):
n_00 += 1
elif tuple == (0, 1):
n_01 += 1
elif tuple == (1, 0):
n_10 += 1
elif tuple == (1, 1):
n_11 += 1
contingency = np.array([
[n_00, n_01],
[n_10, n_11]
])
return contingency
def convert_to_recbole(dataframe: pd.DataFrame) -> pd.DataFrame:
features = dataframe.columns[1:]
dataframe.columns = ['item_id:token'] + list(features)
index_to_column = {index: column for index, column in enumerate(features)}
indices = [list(np.nonzero(x)[0].astype(int)) for x in dataframe[features].to_numpy()]
dataframe['emotions:token_seq'] = indices
song_majority_bin_for_recbole = dataframe[['item_id:token', 'emotions:token_seq']]
return song_majority_bin_for_recbole