-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathpre.py
executable file
·30 lines (25 loc) · 1.02 KB
/
pre.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
#!/usr/bin/env python
import os
import argparse
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Preprocess Dataset for my FM')
parser.add_argument('dataset', help='path to data file')
parser.add_argument('output', help='ouput path')
parser.add_argument('-d', '--delimiter', help='specify delimiter of dataset')
parser.add_argument('-t', '--target', default=0, help='specify y column')
args = vars(parser.parse_args())
outf = open(args['output'], "w+")
target = int(args['target'])
with open(args['dataset']) as inf:
for line in inf:
temp_string = ""
for i, token in enumerate(line.rstrip().split(args['delimiter'])):
if token == "x":
continue
if i == target:
temp_string = token + " " + temp_string
else:
temp_string += (str(i) + ":" + token + " ")
outf.write(temp_string + "\n")
inf.close()
outf.close()