-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcurs.py
176 lines (134 loc) · 3.7 KB
/
curs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
import proxy
import sys
import numpy
All = type("All", (), {})()
type(All).__init__ = NotImplemented
class cursor(object):
columns = NotImplemented # { name: dtype }
# n - > If not specified, return as many rows as you want.
# > If specified and an integer then try to return n rows unless eof was
# reached in the process.
# > If specified and an iterable then return the rows indexed by
# the elements of n.
#
# cols - subset of columns to fetch, or All
def fetch(self, n = None, cols = All):
raise NotImplementedError
def select(self, expr):
# General select syntax:
#
# table.select(lambda a, b, .., z: {
# col1: expr1(a, b, .. z),
# col2: expr2(a, b, .. z),
# ...
# }
#
# where a, b, ... z are columns of this cursor.
return select(self, expr)
indexable = 0
def __getitem__(self, index):
if self.indexable:
return indexed(self, index)
else:
raise TypeError("Cursor of type %s is not indexable." % type(self))
class indexed(cursor):
indexable = 1
def __init__(self, curs, index):
self.curs = curs
self.index = index
self.columns = self.curs.columns
def fetch(self, n = None, cols = All):
r = self.index.fetch(n)
I = r.itervalues().next()
return self.curs.fetch(I, cols)
def __getitem__(self, index2):
return indexed(self.curs, self.index[index2])
class select(cursor):
indexable = 1
def __init__(self, c, expr):
self.c = c
self.expr = expr
self.refs = get_refs(expr, c.columns) # list of c's cols in the order they appear as lambda arguments
self.columns = calc_column_types(expr, c.columns)
def fetch(self, n = None, cols = All):
data = self.c.fetch(n, self.refs)
args = [ data[col] for col in self.refs ]
res = self.expr(* args)
return res
def get_refs(f, names):
co = f.func_code
argnames = co.co_varnames[:co.co_argcount]
for arg in argnames:
if arg not in names:
raise NameError(arg)
return argnames
def shandy(dtype):
return numpy.array([], dtype)
def calc_column_types(f, col_types):
co = f.func_code
argnames = co.co_varnames[:co.co_argcount]
for arg in argnames:
if arg not in col_types:
raise NameError(arg)
args = [ shandy(col_types[a]) for a in argnames ]
res = f(* args)
res_types = dict( (k, v.dtype) for (k, v) in res.iteritems() )
return res_types
def transpose_dict(d):
# [ (k, v) ] -> [ (v, [ k ]) ]
# A problem that crops up. Sometimes.
V = set(d.values())
b = dict([ (v, []) for v in V])
for (k, v) in d.iteritems():
b[v].append(k)
return b
# Horizontal join
class hjoin(cursor):
indexable = 1
def __init__(self, *cursors):
self.cursors = cursors
self.columns = dict(
[ (name, typ)
for c in self.cursors
for (name, typ) in c.columns.iteritems()
]
)
self.col_src = dict(
[ (name, c)
for c in self.cursors
for (name, typ) in c.columns.iteritems()
]
)
def fetch(self, n = None, cols = All):
if cols is All:
cols = self.columns.keys()
cols_curs = dict([(c, self.col_src[c]) for c in cols])
curs_cols = transpose_dict(cols_curs)
res = {}
for curs_i, cols_i in curs_cols.iteritems():
res_i = curs_i.fetch(n, cols_i)
res.update(res_i)
return res
hj = hjoin
class npcur(cursor):
indexable = 1
def __init__(self, arr, name = 'val'):
self.name = name
self.arr = numpy.array(arr)
self.columns = { self.name: self.arr.dtype }
self.i = 0
def fetch(self, n = None, cols = All):
if cols is All:
cols = self.columns.keys()
if hasattr(n, '__iter__'):
I = numpy.array(n)
res = self.arr[I]
return { self.name: res }
else:
n = n or 128
n = min(n, 128)
i = self.i
res = self.arr[i : i + n]
i += len(res)
self.i = i
return { self.name: res }