-
Notifications
You must be signed in to change notification settings - Fork 21
/
prepare.py
32 lines (24 loc) · 968 Bytes
/
prepare.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
import os
import argparse
import pandas as pd
def get_runtime_args():
parser = argparse.ArgumentParser()
parser.add_argument('--data-input-path', type=str)
parser.add_argument('--data-output-path', type=str)
args = parser.parse_args()
return args
def main():
args = get_runtime_args()
# Create output dir
os.makedirs(args.data_output_path, exist_ok=True)
input_file_path = os.path.join(args.data_input_path, 'german_credit_data.csv')
output_file_path = os.path.join(args.data_output_path, 'german_credit_data.csv')
print(f'Reading data from {input_file_path} and writing processed output to {output_file_path}')
print(f'Output dir: {os.listdir(args.data_output_path)}')
# Read input data
credit_data_df = pd.read_csv(input_file_path)
# Some data preprocessing should happen here...
# Write output data
credit_data_df.to_csv(output_file_path, index=False)
if __name__ == "__main__":
main()