-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path1.ReadingData.py
40 lines (26 loc) · 870 Bytes
/
1.ReadingData.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
# -*- coding: utf-8 -*-
"""
Daniel Manrique
"""
import pandas as pd
ATT_FILE = "OceanProximityPreparedCleanAttributes.csv"
LABEL_FILE = str(input("Name of the label file: "))
TRAIN_RATE = 0.8
attributes = pd.read_csv(ATT_FILE)
label = pd.read_csv(LABEL_FILE)
n_instances = attributes.shape[0]
n_train = int(n_instances*TRAIN_RATE)
n_dev = int((n_instances-n_train)/2)
n_test = n_instances-n_train-n_dev
x_train = attributes.values[:n_train]
t_train = label.values[:n_train]
x_dev = attributes.values[n_train:n_train+n_dev]
t_dev = label.values[n_train:n_train+n_dev]
x_test = attributes.values[n_train+n_dev:n_instances]
t_test = label.values[n_train+n_dev:n_instances]
print ("x_train:",x_train.shape)
print ("t_train:",t_train.shape)
print ("x_dev:",x_dev.shape)
print ("t_dev:",t_dev.shape)
print ("x_test:",x_test.shape)
print ("t_test:",t_test.shape)