-
Notifications
You must be signed in to change notification settings - Fork 0
/
mcm.py
executable file
·88 lines (69 loc) · 2.96 KB
/
mcm.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
"""mcm"""
import pandas as pd
def mcm(tn, fp, fn, tp):
"""Let be a confusion matrix like this:
N P
+----+----+
| | |
| TN | FP |
| | |
+----+----+
| | |
| FN | TP |
| | |
+----+----+
The observed values by columns and the expected values
by rows and the positive class in right column.
With these definitions, the TN, FP, FN and TP values are that order.
Parameters
----------
TN : integer
True Negatives (TN) is the total number of outcomes where the model correctly predicts the negative class.
FP : integer
False Positives (FP) is the total number of outcomes where the model incorrectly predicts the positive class.
FN : integer
False Negatives (FN) is the total number of outcomes where the model incorrectly predicts the negative class.
TP : integer
True Positives (TP) is the total number of outcomes where the model correctly predicts the positive class.
Returns
-------
sum : DataFrame
DataFrame with several metrics
Notes
-----
https://en.wikipedia.org/wiki/Confusion_matrix
https://developer.lsst.io/python/numpydoc.html
https://www.mathworks.com/help/risk/explore-fairness-metrics-for-credit-scoring-model.html
Examples
--------
data = pd.DataFrame({
'y_true': ['Positive']*47 + ['Negative']*18,
'y_pred': ['Positive']*37 + ['Negative']*10 + ['Positive']*5 + ['Negative']*13})
tn, fp, fn, tp = confusion_matrix(y_true = data.y_true,
y_pred = data.y_pred,
labels = ['Negative',
'Positive']).ravel()
"""
df_mcm = []
df_mcm.append(['Sensitivity', tp / (tp + fn)])
df_mcm.append(['Recall', tp / (tp + fn)])
df_mcm.append(['True Positive rate (TPR)', tp / (tp + fn)])
df_mcm.append(['Specificity', tn / (tn + fp)])
df_mcm.append(['True Negative Rate (TNR)', tn / (tn + fp)])
df_mcm.append(['Precision', tp / (tp + fp)])
df_mcm.append(['Positive Predictive Value (PPV)', tp / (tp + fp)])
df_mcm.append(['Negative Predictive Value (NPV)', tn / (tn + fn)])
df_mcm.append(['False Negative Rate (FNR)', fn / (fn + tp)])
df_mcm.append(['False Positive Rate (FPR)', fp / (fp + tn)])
df_mcm.append(['False Discovery Rate (FDR)', fp / (fp + tp)])
df_mcm.append(['Rate of Positive Predictions (PRR)', (fp + tp) / (tn + tp + fn + fp)])
df_mcm.append(['Rate of Negative Predictions (RNP)', (fn + tn) / (tn + tp + fn + fp)])
df_mcm.append(['Accuracy', (tp + tn) / (tp + tn + fp + fn)])
df_mcm.append(['F1 Score', 2*tp / (2*tp + fp + fn)])
tpr = tp / (tp + fn)
fpr = fp / (fp + tn)
fnr = fn / (fn + tp)
tnr = tn / (tn + fp)
df_mcm.append(['Positive Likelihood Ratio (LR+)', tpr / fpr])
df_mcm.append(['Negative Likelihood Ratio (LR-)', fnr / tnr])
return pd.DataFrame(df_mcm, columns = ['Metric', 'Value'])