-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathshuffle.py
79 lines (54 loc) · 1.8 KB
/
shuffle.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import os
import pandas as pd
import argparse
import operator
import random
#------- python shuffle.py -p C:\\Users\\zhtang\\Desktop\\water\\rawdatafinalnoise
parser = argparse.ArgumentParser()
parser.add_argument("-v", "--verbosity", help="increase output verbosity")
#parser.add_argument('-l', "--label", type=int, help="the num of labels")
parser.add_argument("-p", "--path", type=str, help="path of files")
def datashuffle(args):
tempf_path = 'preshuffle'
train_path = 'train'
test_path = 'test'
data_dir = os.path.join(args.path, 'orderd_data')
tempf_path = os.path.join(args.path, tempf_path)
train_path = os.path.join(args.path, train_path)
test_path = os.path.join(args.path, test_path)
file_list = os.listdir(data_dir)
length = len(file_list)
li=list(range(0, length))
random.shuffle(li)
if not os.path.exists(train_path):
os.mkdir(train_path)
if not os.path.exists(test_path):
os.mkdir(test_path)
if not os.path.exists(tempf_path):
os.mkdir(tempf_path)
num_train = int(length*0.8)
for i, file in enumerate(file_list):
tempfile_path = os.path.join(tempf_path, str(li[i]) + '.txt')
pref = open(os.path.join(data_dir, file), 'r')
tempf = open(tempfile_path, 'w')
for line in pref:
tempf.write(line)
print('temp complete %s', file)
pref.close()
tempf.close()
file_list = os.listdir(tempf_path)
for i, file in enumerate(file_list):
if i < num_train:
afterf_path = os.path.join(train_path, str(li[i]) + '.txt')
else:
afterf_path = os.path.join(test_path, str(li[i]) + '.txt')
pref = open(os.path.join(tempf_path, file), 'r')
afterf = open(afterf_path, 'w')
for line in pref:
afterf.write(line)
print('complete %s', file)
pref.close()
afterf.close( )
if __name__ == '__main__':
args = parser.parse_args()
datashuffle(args)