-
Notifications
You must be signed in to change notification settings - Fork 18
/
Copy pathcsvToDatabase.py
213 lines (173 loc) · 4.8 KB
/
csvToDatabase.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
# convert from several CSV to STScI database input files
# defines the CSV data filenames
inFile = "processed.csv"
outFile = "processed_targets.csv"
outFile2 = "processed_pp.csv"
piFile = "pi.csv"
descrFile = "descr.csv"
# necessary imports
import csv
import time
# start timer
start = time.time()
# number of error iterations
iter = 0
# get the CSV data file as input
input = open(inFile, "rU")
reader = csv.reader(input)
# prep output file/s
output = open(outFile, "wb")
output2 = open(outFile2, "wb")
writer = csv.writer(output)
writer2 = csv.writer(output2)
# initialize row variable
firstPass = 1
# process CSV file, row-by-row
for row in reader:
# open CSV files to process PI name and target descriptions
inputPI = open(piFile, "rU")
inputDescr = open(descrFile, "rU")
readerPI = csv.reader(inputPI)
readerDescr = csv.reader(inputDescr)
# row to be output
outRow = row
# split filename column
rootname = str.split(outRow[0], "_")
rootname[0] = rootname[0].lower()
# detect improper extension, if any
if len(rootname) > 1:
if rootname[1] != "c0m.fits":
iter += 1
# when processing column name, define new column name
if firstPass == 1:
PI = "PI"
descr = "target_descr"
# check for the JPL_ tag, remove it
for i,column in enumerate(outRow):
if column.startswith("JPL_"):
column = column.split("_", 1)
outRow[i] = column[1].lower()
else:
outRow[i] = column.lower()
# be sure we do this processing for the first row only
firstPass = 0
# turn filename into just the rootname
outRow[0] = rootname[0]
# turn n.a. and -1 values into NULL/blank values!
for i,val in enumerate(outRow):
# watch out for headers!
try:
j = int(val)
except ValueError:
j = 0
# see if there are supposed to be null values and make it so
if val.find("n.a.") != -1:
outRow[i] = ""
elif val.find("photif error") != -1:
outRow[i] = ""
elif val.find("#VALUE!") != -1:
outRow[i] = ""
elif j == -1:
outRow[i] = ""
# start second CSV file data, with rootname
outRow2 = [rootname[0]]
# find the PI name
foundPI = 0
for rowPI in readerPI:
if rowPI[1] == row[3]:
PI = rowPI[0].lower()
foundPI = 1
# error detection for no PI found
if foundPI == 0:
PI = "NOT FOUND"
iter += 1
# add PI name, matched with proposal ID
outRow2.append(PI)
# find the target description (adds about 2 minutes to compile time!)
foundDescr = 0
for rowDescr in readerDescr:
if rowDescr[0] == row[0]:
descr = rowDescr[1].lower()
foundDescr = 1
# error detection for no description found
if foundDescr == 0:
descr = "NOT FOUND"
iter += 1
# add the target description
outRow2.append(descr)
# remove unneeded columns from ephemeris target data
outRow.pop(1)
outRow.pop(1)
outRow.pop(1)
outRow.pop(1)
outRow2.append(outRow.pop(1))
outRow.pop(1)
outRow.pop(1)
outRow.pop(1)
outRow.pop(1)
outRow.pop(1)
outRow.pop(1)
outRow2.append(outRow.pop(1))
outRow2.append(outRow.pop(1))
outRow2.append(outRow.pop(1))
outRow2.append(outRow.pop(1))
outRow2.append(outRow.pop(1))
outRow2.append(outRow.pop(1))
outRow2.append(outRow.pop(1))
outRow2.append(outRow.pop(1))
outRow2.append(outRow.pop(1))
outRow2.append(outRow.pop(1))
outRow2.append(outRow.pop(1))
outRow2.append(outRow.pop(1))
outRow2.append(outRow.pop(1))
outRow2.append(outRow.pop(1))
outRow2.append(outRow.pop(1))
outRow2.append(outRow.pop(1))
outRow2.append(outRow.pop(1))
outRow2.append(outRow.pop(1))
outRow2.append(outRow.pop(1))
outRow2.append(outRow.pop(1))
outRow.pop(1)
# move target column near the beginning
outRow.insert(1, outRow.pop(23))
# get the RA and DEC values
RA = str.split(outRow[2], " ")
DEC = str.split(outRow[3], " ")
# convert RA, DEC if not in header column
if len(RA) > 1 and len(DEC) > 1:
# define RA and DEC values
hRA = float(RA[0])
mRA = float(RA[1])
sRA = float(RA[2])
hDEC = float(DEC[0])
mDEC = float(DEC[1])
sDEC = float(DEC[2])
# convert RA
dRA = (hRA * 15) + (mRA * 15 / 60) + (sRA * 15 / 3600)
# convert DEC
dDEC = (hDEC) + (mDEC / 60) + (sDEC / 3600)
# save converted RA and DEC values
outRow[2] = dRA
outRow[3] = dDEC
# write each row
writer.writerow(outRow)
writer2.writerow(outRow2)
# close the PI file
inputPI.close()
# close all files
input.close()
output.close()
output2.close()
# stop timer
end = time.time()
# process the time elapsed
elapsed = end - start
min = round(elapsed / 60, 3)
# display important info
if iter == 1:
print("There was " + str(iter) + " error.")
elif iter == 0:
print("There were no errors!")
else:
print("There were " + str(iter) + " errors.")
print("CSV conversion operation complete after", min, "minutes.")