-
Notifications
You must be signed in to change notification settings - Fork 0
/
utilities.py
57 lines (41 loc) · 1.83 KB
/
utilities.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import sys
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
def import_train_data(train_file='train_2008.csv', one_d_array=True):
data = np.genfromtxt(train_file, delimiter=',', dtype=float)[1:]
data_x = data[:, list(range(3, 382))]
data_y = data[:, [382]]
if one_d_array:
data_y = [i[0] for i in data_y]
return data_x, data_y
def import_test_data(test_file='test_2008.csv'):
data = np.genfromtxt(test_file, delimiter=',', dtype=float)[1:]
data_x = data[:, list(range(3, 382))]
return data_x
def normalize_data(train_x, test_x, test_x_2):
"""Function normalizes both the training x and testing x vectors
"""
num_cols = len(train_x[0])
mean_train_x = np.mean(train_x, axis=0)
std_train_x = np.std(train_x, axis=0)
train_x = (train_x - mean_train_x) / std_train_x
test_x = (test_x - mean_train_x) / std_train_x
test_x_2 = (test_x_2 - mean_train_x) / std_train_x
train_x=np.ma.compress_cols(np.ma.masked_invalid(train_x))
test_x=np.ma.compress_cols(np.ma.masked_invalid(test_x))
test_x_2=np.ma.compress_cols(np.ma.masked_invalid(test_x_2))
return train_x, test_x, test_x_2
def import_data(train_file = 'train_2008.csv', one_d_array=True):
train_x, train_y = import_train_data(train_file, one_d_array)
test_x = import_test_data('test_2008.csv')
test_x_2 = import_test_data('test_2012.csv')
train_x, test_x, test_x_2 = normalize_data(train_x, test_x, test_x_2)
return train_x, train_y, test_x, test_x_2
def write_output_file(output, file_name='output.csv'):
output_id = np.array(list((range(0, len(output)))))
columns = ['id', 'PES1']
df = pd.DataFrame({'id': output_id, 'PES1': output.astype(int)}, columns=columns)
df.to_csv('csvs/' + file_name, index=False)
input = np.array([0.0, 1.0])
write_output_file(input)