-
Notifications
You must be signed in to change notification settings - Fork 1
/
WellUtil.py
210 lines (187 loc) · 8.31 KB
/
WellUtil.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
import os
import fnmatch
import wellapplication as wa
import pandas as pd
import xmltodict
# Well Utility
def match_path(rootdir, pattern, remove_comp=False):
"""
Returns a list of filepaths that match the fnmatch pattern within the given directory.
:param path: filepath
:param pattern: fnmatch pattern
:parem remove_comp: masks out path strings that include "Compensated", which is generated by
Levellogger.exe when a barometric compensation is performed.
:return: list of paths
"""
newlist = []
DupList = list(set(newlist))
for root, dirnames, filenames in os.walk(rootdir):
for filename in fnmatch.filter(filenames, pattern):
if os.path.join(root, filename) not in DupList:
newlist.append(os.path.join(root, filename))
print("Duplicates found: " + str(len(DupList)))
# dup functin not working maybe DupList should be in the loop?
if remove_comp == True:
newlist = [k for k in newlist if 'Compensated' not in k]
return newlist
def solinst_df(path_list):
"""
Create list of DataFrames from list of filepaths.
:param path_list:
:return:
"""
dfs = []
for counter, f in enumerate(path_list):
if f.endswith('.xle'):
try:
dfs.append(new_xle_imp(f))
print(counter)
except:
# Fun fact: early versions of Levellogger saved the fixed-width .levs as .xles (this might stil fail).
print("Corrupt .xle file found, trying .lev opener")
dfs.append(new_lev_imp(f))
print(counter)
elif f.endswith('.lev'):
dfs.append(new_lev_imp(f))
print(counter)
return dfs
def printmes(x):
try:
from arcpy import AddMessage
AddMessage(x)
print(x)
except ModuleNotFoundError:
print(x)
def new_xle_imp(infile):
"""This function uses an exact file path to upload a xle transducer file.
Args:
infile (file):
complete file path to input file
Returns:
A Pandas DataFrame containing the transducer data
"""
# open text file
with open(infile, "rb") as f:
obj = xmltodict.parse(f, xml_attribs=True, encoding="ISO-8859-1")
# navigate through xml to the data
wellrawdata = obj['Body_xle']['Data']['Log']
# convert xml data to pandas dataframe
try:
f = pd.DataFrame(wellrawdata)
except ValueError:
printmes('xle file {:} incomplete'.format(infile))
return
# CH 3 check
try:
ch3ID = obj['Body_xle']['Ch3_data_header']['Identification']
f[str(ch3ID).title()] = f['ch3']
except(KeyError, UnboundLocalError):
pass
# CH 2 manipulation
try:
ch2ID = obj['Body_xle']['Ch2_data_header']['Identification']
f[str(ch2ID).title()] = f['ch2']
ch2Unit = obj['Body_xle']['Ch2_data_header']['Unit']
numCh2 = pd.to_numeric(f['ch2'])
if ch2Unit == 'Deg C' or ch2Unit == u'\N{DEGREE SIGN}' + u'C':
f[str(ch2ID).title()] = numCh2
elif ch2Unit == 'Deg F' or ch2Unit == u'\N{DEGREE SIGN}' + u'F':
printmes('Temp in F, converting to C')
f[str(ch2ID).title()] = (numCh2 - 32) * 5 / 9
f[str(ch2ID).title()] = pd.to_numeric(f[str(ch2ID).title()])
except (KeyError,UnboundLocalError):
printmes('No channel 2 for {:}'.format(infile))
# CH 1 manipulation
ch1ID = obj['Body_xle']['Ch1_data_header']['Identification'] # Usually level
ch1Unit = obj['Body_xle']['Ch1_data_header']['Unit'] # Usually ft
unit = str(ch1Unit).lower()
if unit == "feet" or unit == "ft":
f[str(ch1ID).title()] = pd.to_numeric(f['ch1'])
elif unit == "kpa":
f[str(ch1ID).title()] = pd.to_numeric(f['ch1']) * 0.33456
printmes("Units in kpa, converting {:} to ft...".format(os.path.basename(infile)))
elif unit == "mbar":
f[str(ch1ID).title()] = pd.to_numeric(f['ch1']) * 0.0334552565551
elif unit == "psi":
f[str(ch1ID).title()] = pd.to_numeric(f['ch1']) * 2.306726
printmes("Units in psi, converting {:} to ft...".format(os.path.basename(infile)))
elif unit == "m" or unit == "meters":
f[str(ch1ID).title()] = pd.to_numeric(f['ch1']) * 3.28084
printmes("Units in psi, converting {:} to ft...".format(os.path.basename(infile)))
else:
f[str(ch1ID).title()] = pd.to_numeric(f['ch1'])
printmes("Unknown units, no conversion")
# add extension-free file name to dataframe
f['name'] = infile.split('\\').pop().split('/').pop().rsplit('.', 1)[0]
# combine Date and Time fields into one field
f['DateTime'] = pd.to_datetime(f.apply(lambda x: x['Date'] + ' ' + x['Time'], 1))
f[str(ch1ID).title()] = pd.to_numeric(f[str(ch1ID).title()])
# add logger information to dataframe
model = obj['Body_xle']['Instrument_info']['Instrument_type']
f['model'] = model
serial = obj['Body_xle']['Instrument_info']['Serial_number']
f['sn'] = serial
site_loc = obj['Body_xle']['Instrument_info_data_header']['Location']
f['location'] = site_loc
try:
ch3ID = obj['Body_xle']['Ch3_data_header']['Identification']
f[str(ch3ID).title()] = pd.to_numeric(f[str(ch3ID).title()])
except(KeyError, UnboundLocalError):
pass
f = f.reset_index()
f = f.set_index('DateTime')
f['Level'] = f[str(ch1ID).title()]
f = f.drop(['Date', 'Time', '@id', 'ch1', 'ch2', 'index', 'ms'], axis=1)
return f
def new_lev_imp(infile):
with open(infile, "r") as fd:
txt = fd.readlines()
try:
data_ind = txt.index('[Data]\n')
inst_info_ind = txt.index('[Instrument info from data header]\n')
ch1_ind = txt.index('[CHANNEL 1 from data header]\n')
ch2_ind = txt.index('[CHANNEL 2 from data header]\n')
level = txt[ch1_ind + 1].split('=')[-1].strip().title()
level_units = txt[ch1_ind + 2].split('=')[-1].strip().lower()
temp = txt[ch2_ind + 1].split('=')[-1].strip().title()
temp_units = txt[ch2_ind + 2].split('=')[-1].strip().lower()
# serial_num = txt[inst_info_ind+1].split('=')[-1].strip().strip(".")
# inst_num = txt[inst_info_ind+2].split('=')[-1].strip()
# location = txt[inst_info_ind+3].split('=')[-1].strip()
# start_time = txt[inst_info_ind+6].split('=')[-1].strip()
# stop_time = txt[inst_info_ind+7].split('=')[-1].strip()
df = pd.read_table(infile, parse_dates=[[0, 1]], sep='\s+', skiprows=data_ind + 2,
names=['Date', 'Time', level, temp],
skipfooter=1, engine='python')
df.rename(columns={'Date_Time': 'DateTime'}, inplace=True)
df.set_index('DateTime', inplace=True)
# add logger metadata to dataframe
serial_num = txt[inst_info_ind+1].split('=')[-1].strip().strip(".")
df['sn'] = serial_num
location = txt[inst_info_ind+3].split('=')[-1].strip()
df['location'] = location
if level_units == "feet" or level_units == "ft":
df[level] = pd.to_numeric(df[level])
elif level_units == "kpa":
df[level] = pd.to_numeric(df[level]) * 0.33456
printmes("Units in kpa, converting {:} to ft...".format(os.path.basename(infile)))
elif level_units == "mbar":
df[level] = pd.to_numeric(df[level]) * 0.0334552565551
elif level_units == "psi":
df[level] = pd.to_numeric(df[level]) * 2.306726
printmes("Units in psi, converting {:} to ft...".format(os.path.basename(infile)))
elif level_units == "m" or level_units == "meters":
df[level] = pd.to_numeric(df[level]) * 3.28084
printmes("Units in psi, converting {:} to ft...".format(os.path.basename(infile)))
else:
df[level] = pd.to_numeric(df[level])
printmes("Unknown units, no conversion")
if temp_units == 'Deg C' or temp_units == u'\N{DEGREE SIGN}' + u'C':
df[temp] = df[temp]
elif temp_units == 'Deg F' or temp_units == u'\N{DEGREE SIGN}' + u'F':
printmes('Temp in F, converting {:} to C...'.format(os.path.basename(infile)))
df[temp] = (df[temp] - 32.0) * 5.0 / 9.0
df['name'] = infile
return df
except ValueError:
printmes('File {:} has formatting issues'.format(infile))