-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathbase_code.py
48 lines (31 loc) · 1.04 KB
/
base_code.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import pandas as pd
import numpy as np
from sklearn import linear_model, svm
from sklearn import cross_validation
df = pd.read_csv('2008_new.csv',header=0)
print("----------------------------------------------------------------------------------")
#df.info()
df = df[df.Cancelled == 0]
df = df[df.Diverted == 0]
cols = ['DayOfWeek', 'UniqueCarrier', 'Origin', 'Dest', 'Distance', 'ArrDelay', 'DepDelay']
df = df[cols]
df.info()
dummies = []
cols = ['DayOfWeek','UniqueCarrier','Origin','Dest']
for col in cols:
dummies.append(pd.get_dummies(df[col]))
data_dummies = pd.concat(dummies, axis=1)
df = pd.concat((df,data_dummies),axis=1)
df = df.drop(cols,axis=1)
print("----------------------------------------------------------------------------------")
df.info()
y1 = df['ArrDelay'].values
y2 = df['DepDelay'].values
df.drop(['ArrDelay','DepDelay'],axis=1)
X = df.values
clf = linear_model.LinearRegression()
clf.fit(X, y1)
X_test = df.values
y_res = clf.predict(X_test)
y_res = np.around(y_res)
np.savetxt('outArrDelay.csv', y_res, fmt="%d")