-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathbasestuff.py
97 lines (77 loc) · 3.48 KB
/
basestuff.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
# Abolfazl Asudeh, http://asudeh.github.io
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pandasql import sqldf
pysqldf = lambda q: sqldf(q, globals())
# -----------------------------------------------------------------------------------
def mylog(st, outputfile):
outfile = open(outputfile, 'a');
outfile.write(st + '\n');
outfile.close();
# -----------------------------------------------------------------------------------
n = None; # database size
d = None; # number of attributes (|D|)
data = None; # The first d attributes are trend attributes, the last is the objective value y
col = [] # The NAMES of the columns
B = None; # the index of the poins in Beginning
E = None; # the index of the points in End
debugmod = 'on' # type: string; turns the debug mode on and off
def load_from_csv(dataset, columns, headerIndex=0, nrows=-1, datecols=None):
global data,n,d, col
filename = dataset
col = columns # the last one is the objective value
if datecols is None: data = pd.read_csv(filename,usecols = columns ,header=headerIndex, skipinitialspace=True) if nrows==-1 else pd.read_csv(filename, usecols = columns,header=headerIndex,nrows=nrows, skipinitialspace=True)
else: data = pd.read_csv(filename,usecols = columns ,header=headerIndex, parse_dates=datecols, skipinitialspace=True) if nrows==-1 else pd.read_csv(filename, usecols = columns,header=headerIndex,nrows=nrows,parse_dates=datecols, skipinitialspace=True)
n = len(data)
d = len(columns) - 1
def RoI_S(Bconds, Econds):
global B, E
st = "SELECT * FROM data"
first = True
for i in range(len(Bconds)):
if Bconds[i][0] is not None:
if first:
st+= " WHERE " + col[i] + ">=" + str(Bconds[i][0])
first = False
else: st+= " AND " + col[i] + ">=" + str(Bconds[i][0])
if Bconds[i][1] is not None:
if first:
st+= " WHERE " + col[i] + "<=" + str(Bconds[i][1])
first = False
else: st+= " AND " + col[i] + "<=" + str(Bconds[i][1])
#print st
B = query(st)
st = "SELECT * FROM data"
first = True
for i in range(len(Econds)):
if Econds[i][0] is not None:
if first:
st+= " WHERE " + col[i] + ">=" + str(Econds[i][0])
first = False
else: st+= " AND " + col[i] + ">=" + str(Econds[i][0])
if Econds[i][1] is not None:
if first:
st+= " WHERE " + col[i] + "<=" + str(Econds[i][1])
first = False
else: st+= " AND " + col[i] + "<=" + str(Econds[i][1])
# print st
E = query(st)
def RoI_Split():
global B,E
B = data.iloc[0:n/2]
E = B = data.iloc[n/2+1:n]
#print len(B), len(E)
# ------------------- Private functions ---------------------------
def query(querystring):
#print(querystring + ", cost = "+str(cost))
T = pysqldf(querystring)
return T
'''
#Test
load_from_csv("data/Wine.csv",["Rank","Vintage","Score","Price"])
#print data[1:10]
RoI_S([(None,None),(None,None),(None,None),(None,20)], [(None,None),(None,None),(None,None),(60,None)])
print B
print E
'''