-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtest_protein_impute.py
37 lines (29 loc) · 1.12 KB
/
test_protein_impute.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Thu Aug 24 19:02:17 2017
@author: mehmetaktukmak
"""
import numpy as np
from utils import random_data_remove
from utils import read_data_set
from utils import normalize
from nnmc_model import nnmc_model
fraction_of_missings = 0.1 # Fraction of values to be removed
# Load dataset
X, Miss, = read_data_set('protein.xlsx', 'DS5')
# Normalize
X_norm,_,_ = normalize(X)
print( "INFO: Dataset loaded successfully!")
print( "INFO: Sample size = {:d}, Feature size = {:d}" .format(X.shape[0], X.shape[1]))
print( "INFO: {:.1f}% of values are missing in the dataset" .format(np.count_nonzero(Miss)*100/(X.shape[0]*X.shape[1])))
# Remove values randomly
X_train, X_mask, X_miss = random_data_remove(X_norm, fraction_of_missings, Miss)
print( "INFO: {:.1f}% of data removed" .format(fraction_of_missings*100))
# Impute missing values with NNMC model
model = nnmc_model()
X_out = model.mc_complete(X_train, X_mask, X_miss)
# Evaluate MSE
miss_glob = np.where(X_mask == 0)
MSE = np.sum(np.power((X_out[miss_glob] - X_miss[miss_glob]), 2)) / miss_glob[0].size
print("MSE = %f" %MSE)