-
Notifications
You must be signed in to change notification settings - Fork 1
/
misc.py
82 lines (64 loc) · 2.44 KB
/
misc.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
from pandas import read_csv
from matplotlib import pyplot
from statsmodels.tsa.arima_model import ARIMA
from sklearn import metrics
import pandas as pd
import numpy as np
from ta import *
# Function that calls ARIMA model to fit and forecast the data
def StartARIMAForecasting(Actual, P, D, Q):
model = ARIMA(Actual, order=(P, D, Q))
model_fit = model.fit(disp=0)
prediction = model_fit.forecast()[0]
return prediction
# Get exchange rates
ActualData = pd.read_csv('COCOA.CMDUSD_Candlestick_5_M_ASK_04.12.2017-03.05.2019.csv')
ActualData['Returns'] = ActualData['Close'].pct_change()
ActualData['Returns'] = ActualData['Returns'].round(5)
ActualData = ActualData.replace([np.inf, -np.inf], np.nan)
ActualData = ActualData.dropna()
ActualData = ActualData['Returns'].values
np.seterr (all='ignore')
# Size of exchange rates
NumberOfElements = len(ActualData)
# Use 70% of data as training, rest 30% to Test model
TrainingSize = int(NumberOfElements * 0.7)
TrainingData = ActualData[0:TrainingSize]
TestData = ActualData[TrainingSize:NumberOfElements]
# new arrays to store actual and predictions
Actual = [x for x in TrainingData]
Predictions = list()
# in a for loop, predict values using ARIMA model
for timepoint in range(len(TestData)):
try:
ActualValue = TestData[timepoint]
# forcast value
Prediction = StartARIMAForecasting(Actual, 3, 1, 0)
print('Actual=%f, Predicted=%f' % (ActualValue, Prediction))
# add it in the list
Predictions.append(Prediction)
Actual.append(ActualValue)
except:
pass
df = pd.DataFrame()
df['Predictions'] = pd.Series(Predictions)
df['Actual'] = pd.Series(Actual)
#print(df)
df['Predicted (1 or -1)'] = np.where(df['Predictions'] > df['Actual'].shift(1), 1, -1)
df['Actual (1 r -1)'] = np.where(df['Actual'] > df['Actual'].shift(1), 1, -1)
# shift Predictions up one when comparing agaisnt other classifier so you can comare the against teh same return shift(-1) later
df['True'] = np.where (df['Actual (1 r -1)'] == df['Predicted (1 or -1)'], 1, 0)
sum = np.sum(df['True'])
prob_right = sum/len(df['True'])
print(sum)
print(prob_right * 100)
# Print MSE to see how good the model is
if len(Actual)>len(Predictions):
err = metrics.mean_squared_error(Actual[:len(Predictions)], Predictions)
else:
err = metrics. mean_squared_error(Actual, Predictions[:len(Actual)])
print(err)
# plot
pyplot.plot(TestData)
pyplot.plot(Predictions, color='red')
pyplot.show()