-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathdata.py
177 lines (123 loc) · 4.74 KB
/
data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
# Erin Lavoie
# CS251
# Project 2
import csv
import sys
import numpy as np
import random as r
class Data:
def __init__( self, filename = None ):
#fields
self.headers = []
self.data = []
self.header2data = {}
#read in a file if provided
if ( filename != None ):
self.file = filename
self.readData()
#reads the data from a file
def readData( self ):
#read the file lines
fp = file( self.file, "rU" )
lines = fp.readlines()
fp.close()
#create a csv object
csvr = csv.reader( lines )
#set raw_headers to first line
self.headers = csvr.next()
for i in range( len( self.headers ) ):
self.headers[i] = self.headers[i].strip()
# loop through the rest of csvr and append each list to raw_data
for thing in csvr:
self.data.append( thing )
# loop through the headers and k,v pair them w/ the corresponding index
c = 0
for i in range( len( self.headers ) ):
self.header2data[ self.headers[ i ] ] = c
c += 1
# returns a list of the raw headers
def get_headers( self ):
return self.headers
# returns the number of raw columns
def get_num_columns( self ):
return len( self.headers )
# returns the number of rows
def get_num_rows( self ):
return len( self.data )
# returns a row of raw data with the specified row number
def get_row( self, rowNum ):
return self.data[ rowNum ]
# returns a column of data with the specified header string
def get_column( self, header ):
# list to column values
col = []
# header index
ind = self.header2data.get( header )
# adding data to column list
for row in self.data:
col.append( row[ind] )
return col
# returns the raw data at the given header, with the given row number
def get_value( self, rowNum, header ):
return self.data[ rowNum ][ self.header2data.get( header ) ]
# sets the value at the given header, with the given row number
def set_value( self, rowNum, header, value ):
self.data[ rowNum ][ self.header2data.get( header ) ] = value
# adds a column to the data set require a header, a type, and the correct number of points
def add_column( self, header, plist = None ):
# adding header to list of headers
self.headers.append( header )
# initializing counter
c = 0
# loop through raw data
for row in self.data:
if plist != None:
# appending data to end of row
row.append( plist[c] )
c += 1 # incrementing counter
else:
row.append( "" )
# adding entry to headers2raw dictionary
self.header2data[ header ] = len( self.headers ) - 1
def save( self, filename = None ):
if filename == None:
filename = self.file
with open(filename, 'wb') as csvfile:
writer = csv.writer(csvfile, dialect = 'excel')
writer.writerow( self.headers )
for row in self.data:
writer.writerow( row )
# prints the raw data
def printData( self ):
print self.headers
for thing in self.data:
print thing
if __name__ == "__main__":
test = Data( sys.argv[1] )
print "\n------------------------\ntesting header accesors: \n"
print test.get_headers()
print "\n------------------------\ntesting num_cols and num_rows: \n"
print test.get_num_columns()
print test.get_num_rows()
print "\n------------------------\ntesting row accesor: \n"
print test.get_row(2)
print "\n------------------------\ntesting get_value and get_raw_value: \n"
print test.get_value( 2, test.get_headers()[0] )
print "\n------------------------\ntesting printData: \n"
#print test.printData()
print "\n\n--------------------------------------------------------\n\n"
print "testing add column function"
l = []
for i in range(test.get_num_rows()):
l.append(r.randint(0, 100))
test.add_column("numeric", l)
print "\n------------------------\ntesting header accesors: \n"
print test.get_headers()
print "\n------------------------\ntesting num_cols and num_rows: \n"
print test.get_num_columns()
print "\n------------------------\ntesting row accesor: \n"
print test.get_row( test.get_num_columns() - 1 )
print "\n------------------------\ntesting get_value and get_raw_value: \n"
print test.get_value( 31, test.get_headers()[0] )
print "\n------------------------\ntesting get_column: \n"
#print test.get_column( "PROVENANCE" )