-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmypreprocessing.py
84 lines (40 loc) · 1.85 KB
/
mypreprocessing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
# -*- coding: utf-8 -*-
"""myPreprocessing.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/1HoIhlycT1lIjfOv8_bL9_JnUl9dH_4Hj
"""
import numpy as np
def zscore(train_data,test_data,valid_data):
#Se calcula la media para cada columna
train_col_means = np.mean(train_data, axis=0) #axis=0 significa en las columnas
test_col_means = np.mean(test_data, axis=0) #axis=0 significa en las columnas
valid_col_means = np.mean(valid_data, axis=0) #axis=0 significa en las columnas
#Se calcula la desviación estándar para cada columna
train_col_stds = np.std(train_data, axis=0) #axis=0 significa en las columnas
test_col_stds = np.std(test_data, axis=0) #axis=0 significa en las columnas
valid_col_stds = np.std(valid_data, axis=0) #axis=0 significa en las columnas
# se calcula el zscore=(X-means)/stds y ese es el nuevo dataset
proc_train_data = ( train_data - train_col_means ) / ( train_col_stds )
proc_test_data = ( test_data - test_col_means ) / ( test_col_stds )
proc_valid_data = ( valid_data - valid_col_means ) / ( valid_col_stds )
# print(np.mean(train_data))
# print(np.std(train_data))
# print("-----------------------")
# print(np.mean(test_data))
# print(np.std(test_data))
# print("-----------------------")
# print(np.mean(valid_data))
# print(np.std(valid_data))
# print("-----------------------")
return[proc_train_data,proc_test_data,proc_valid_data]
def get_labeled_data(input_labels):
Y=[]
for label in input_labels:
if label==1:
Y.append([0,1,0])
if label==0:
Y.append([1,0,0])
if label==55:
Y.append([0,0,1])
return np.array(Y)