-
Notifications
You must be signed in to change notification settings - Fork 0
/
NCSS-ImportNCSSTextFiles_into_FGDB.py
458 lines (364 loc) · 21.5 KB
/
NCSS-ImportNCSSTextFiles_into_FGDB.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
#-------------------------------------------------------------------------------
# Name: NCSS-ImportNCSSTextFiles_into_FGDB
#
# Author: Adolfo.Diaz
# This tool will import NCSS Soil Characterization pedon records
# from existing text files. These text files were exported by Jason Nemecek.
# and include the following 17 pipe delimitted ascii tables:
# - analyte.txt
# - combine_nasis_ncss.txt
# - lab_analysis_prodcedure.txt
# - lab_area.txt
# - lab_calculations_including_estimates_and_default_values.txt
# - lab_chemical_properties.txt
# - lab_major_and_trace_elements_and_oxides.txt
# - lab_method_code.txt
# - lab_mineralogy_glass_count.txt
# - lab_physical_properties.txt
# - lab_webmap.txt
# - lab_xray_and_thermal.txt
# - layer.txt
# - pedon.txt
# - preparation.txt
# - rosetta.txt
# - site.txt
# The tool will import the above text files into an existing empty FGDB with
# the appropriate tabular schema produced from the 'create_Schema_from_NCSS_Lab_Table_Metadata.py'
# The tool takes in 3 parameters:
# 1) directory path of text files
# 2) directory where FGDB will be created
# 3) Name of FGDB
# The tool will error out if there is a discrepancy between the FGDB
# schema and the import files. The names have to match exactly.
# There were several temporary adjustments that needed to be made:
# 1) Webmap table was manually imported using catalog and not the script
# 2) analyte table schemas do not match.
#
#
# Created: 14/11/2019
# ArcGIS Desktop 7.3
#-------------------------------------------------------------------------------
## ===================================================================================
def AddMsgAndPrint(msg, severity=0):
# prints message to screen if run as a python script
# Adds tool message to the geoprocessor
#
#Split the message on \n first, so that if it's multiple lines, a GPMessage will be added for each line
try:
print(msg)
#for string in msg.split('\n'):
#Add a geoprocessing message (in case this is run as a tool)
if severity == 0:
arcpy.AddMessage(msg)
elif severity == 1:
arcpy.AddWarning(msg)
elif severity == 2:
arcpy.AddError("\n" + msg)
except:
pass
## ===================================================================================
def errorMsg():
try:
exc_type, exc_value, exc_traceback = sys.exc_info()
theMsg = "\t" + traceback.format_exception(exc_type, exc_value, exc_traceback)[1] + "\n\t" + traceback.format_exception(exc_type, exc_value, exc_traceback)[-1]
if theMsg.find("exit") > -1:
AddMsgAndPrint("\n\n")
pass
else:
AddMsgAndPrint(theMsg,2)
except:
AddMsgAndPrint("Unhandled error in unHandledException method", 2)
pass
## ================================================================================================================
def splitThousands(someNumber):
""" will determine where to put a thousands seperator if one is needed.
Input is an integer. Integer with or without thousands seperator is returned."""
try:
return re.sub(r'(\d{3})(?=\d)', r'\1,', str(someNumber)[::-1])[::-1]
except:
errorMsg()
return someNumber
## ================================================================================================================
def createPedonFGDB():
"""This Function will create a new File Geodatabase using a pre-established XML workspace
schema. All Tables will be empty and should correspond to incoming text files.
Relationships will also be pre-established.
Return false if XML workspace document is missing OR an existing FGDB with the user-defined
name already exists and cannot be deleted OR an unhandled error is encountered.
Return the path to the new Pedon File Geodatabase if everything executes correctly."""
try:
AddMsgAndPrint("\nCreating File Geodatabase")
arcpy.SetProgressorLabel("Creating File Geodatabase")
# pedon xml template that contains empty pedon Tables and relationships
# schema and will be copied over to the output location
pedonXML = os.path.dirname(sys.argv[0]) + os.sep + "Create_NCSSLabDatabase_Schema_XMLWorkspace.xml"
localPedonDB = os.path.dirname(sys.argv[0]) + os.sep + "NCSSLabDatabase_Schema_Template.gdb"
# Return false if pedon fGDB template is not found
if not arcpy.Exists(localPedonDB):
AddMsgAndPrint("\t" + os.path.basename(localPedonDB) + " FGDB template was not found!",2)
return False
newPedonDB = os.path.join(outputFolder,DBname + ".gdb")
if arcpy.Exists(newPedonDB):
try:
arcpy.Delete_management(newPedonDB)
AddMsgAndPrint("\t" + DBname + ".gdb already exists. Deleting and re-creating pedon database\n",1)
except:
AddMsgAndPrint("\t" + DBname + ".gdb already exists. Failed to delete\n",2)
return False
# copy template over to new location
AddMsgAndPrint("\tCreating " + DBname + ".gdb with NCSS Characterization Data V.XXXXX")
arcpy.Copy_management(localPedonDB,newPedonDB)
""" ------------------------------ Code to use XML Workspace -------------------------------------------"""
## # Return false if xml file is not found
## if not arcpy.Exists(pedonXML):
## AddMsgAndPrint("\t" + os.path.basename(pedonXML) + " Workspace document was not found!",2)
## return False
##
## # Create empty temp File Geodatabae
## arcpy.CreateFileGDB_management(outputFolder,os.path.splitext(os.path.basename(newPedonFGDB))[0])
##
## # set the pedon schema on the newly created temp Pedon FGDB
## AddMsgAndPrint("\tImporting NCSS Pedon Schema 7.3 into " + GDBname + ".gdb")
## arcpy.ImportXMLWorkspaceDocument_management(newPedonFGDB, pedonXML, "DATA", "DEFAULTS")
arcpy.UncompressFileGeodatabaseData_management(newPedonDB)
AddMsgAndPrint("\tSuccessfully created: " + DBname + ".gdb")
return newPedonDB
except arcpy.ExecuteError:
AddMsgAndPrint(arcpy.GetMessages(2),2)
return False
except:
AddMsgAndPrint("Unhandled exception (createFGDB)", 2)
errorMsg()
return False
## ================================================================================================================
def importTabularData(DB,pedonTables):
# Description
# This function will import the lab data .csv files into the lab data spatial database.
# Return False if error occurs, true otherwise.
# Parameters:
# DB - The absolute path to the spatial database that will be used to import the .csv files
# pedonTables - A python list of the tables within the pedon database along with feature classes.
# This list will be used to iterate through the tables and corresponding .csv files
# and import the data into
try:
arcpy.env.workspace = DB
AddMsgAndPrint("\nImporting Tabular Data")
# List of missing .csv files
missingCSVFile = list()
longField = 'longitude_decimal_degrees'
latFileld = 'latitude_decimal_degrees'
# For each item in sorted keys
for DBtable in sorted(pedonTables):
#if DBtable != 'lab_area':continue
#if DBtable != 'lab_chemical_properties':continue
#if not DBtable in ('combine_nasis_ncss'):continue
# Absolute path to text file
txtPath = os.path.join(textFilePath,DBtable + ".txt")
if os.path.exists(txtPath):
# Continue if the text file contains values. Not Empty file
if os.path.getsize(txtPath) > 0:
# Put all the field names and their associated field lengths in a list;
# used to initiate insertCursor object
fieldList = arcpy.Describe(DBtable).fields
nameOfFields = []
fldLengths = list()
for field in fieldList:
# record is an X,Y LAB pedon. Addd XY token to list
if DBtable == pedonFC and field == 'Shape':
# Pedon feature class will have X,Y geometry added; Add XY token to list
nameOfFields.append('SHAPE@XY')
fldLengths.append(0) # X coord
fldLengths.append(0) # Y coord
continue
if field.type != "OID":
nameOfFields.append(field.name)
if field.type.lower() == "string":
fldLengths.append(field.length)
else:
fldLengths.append(0)
del fieldList, field
#print("------------------- Number of fields: " + str(len(nameOfFields)))
#print(nameOfFields)
# The csv file might contain very huge fields, therefore increase the field_size_limit:
# Exception thrown with IL177 in legend.txt. Not sure why, only 1 record was present
## Python 3.x doesn't like either one. Test without.
## try:
## csv.field_size_limit(sys.maxsize)
## except:
## csv.field_size_limit(sys.maxint)
# Number of records in the SSURGO text file
textFileRecords = sum(1 for row in csv.reader(open(txtPath, 'r'), delimiter='|', quotechar='"'))
# Subtract header from total records
if fileContainHeaders:
textFileRecords = textFileRecords - 1
# Initiate Cursor to add rows
cursor = arcpy.da.InsertCursor(DBtable,nameOfFields)
# counter for number of records successfully added; used for reporting
numOfRowsAdded = 0
reader = csv.reader(open(txtPath, 'r'), delimiter='|', quotechar='"')
#reader = csv.reader(codecs.open(txtPath, 'r', encoding='utf-8', errors='ignore'), delimiter='|', quotechar='"')
# Strictly for headers
lineNumber = 0
headerRow = ""
unicodeErrors = list()
arcpy.SetProgressor("step", "Importing Tabular data for: " + DBtable, 0, textFileRecords)
# Return a reader object which will iterate through records in txtPath
for rowInFile in reader:
try:
lineNumber+=1
# Skip first record if text files contain headers
if fileContainHeaders and lineNumber==1:
headerRow = rowInFile
continue
## # This is strictly temporary bc the 'combine_nasis_ncss' txt
## # file contained duplicate sets of records. Use the header
## # row as breaking point.
## if DBtable in (pedonFC,'lab_chemical_properties'):
## try:
## if rowInFile == headerRow:
## print "Duplicate records found ---- Ending"
## break
## except:
## print rowInFile
## print headerRow
## exit()
""" Cannot use this snippet of code b/c some values have exceeded their field lengths
so now every value has to be assessed."""
# replace all blank values with 'None' so that the values are properly inserted
# into integer values otherwise insertRow fails
# newRow = [None if value =='' else value for value in rowInFile]
newRow = list() # list containing the values that make up a specific row
fldNo = 0 # list position to reference the field lengths in order to compare
# Iterate through every value in the row and assess the value to ensure it will
# fit in the corresponding table. Maybe there is a faster way of doing this?
for value in rowInFile:
# incoming strings had a variety of non-ascii symbols that could
# not be decoded by utf-8 including the degree symbol and the lower
# case beta. Had to use the ISO8859 code set.
value = value.decode('ISO8859-1')
# Get the field length associated with this field
fldLen = fldLengths[fldNo]
# set NULL values to None
if value == '' or value == 'NULL':
value = None
# If field is a string, truncate the value to the limit
# of the field. Is this correct? Should these be put in
# a list and then reported to user?
elif fldLen > 0:
value = value[0:fldLen]
# Remove empty space if value starts with blank spaces.
# How do you know that 2 consecutive spaces aren't found?
if value != None and value.startswith(" "):
value = value[1:len(value)]
newRow.append(value)
fldNo += 1
# Add XY coordinates to the pedon spatial layer.
if DBtable == pedonFC:
longPos = fieldList.index(longField) # index position of lat/long fields
latPos = fieldList.index(latFileld)
try:
xValue = float(newRow[longPos]) # Long
yValue = float(newRow[latPos]) # Lat
except:
# The pedon did not have a lat/long;
# Send it to the North Pole :)
xValue = 0.00
yValue = 90.0
newRow.insert(nameOfFields.index('Shape'),(xValue,yValue))
cursor.insertRow(newRow)
numOfRowsAdded += 1
arcpy.SetProgressorPosition()
except:
arcpy.SetProgressorPosition()
AddMsgAndPrint("Line Error: " + str(lineNumber))
errorMsg()
## exc_type, exc_value, exc_traceback = sys.exc_info()
## theMsg = "\t" + traceback.format_exception(exc_type, exc_value, exc_traceback)[1] + "\n\t" + traceback.format_exception(exc_type, exc_value, exc_traceback)[-1]
##
## if theMsg.find('codec') > -1:
## unicodeErrors.append((DBtable,lineNumber,fldNo))
## continue
##
## else:
## AddMsgAndPrint("\n\t\tError inserting record in table: " + DBtable,2)
## AddMsgAndPrint("\t\t\tRecord # " + str(lineNumber),2)
## AddMsgAndPrint("\t\t\tValue: " + str(newRow),2)
## AddMsgAndPrint("\t\t\tField Number: " + str(fldNo))
## errorMsg()
continue
#AddMsgAndPrint("\t\t--> " + iefileName + theAlias + theRecLength + " Records Added: " + str(splitThousands(numOfRowsAdded)),0)
AddMsgAndPrint("\t\t--> " + DBtable + ": Records Added: " + str(splitThousands(numOfRowsAdded)),0)
# compare the # of rows inserted with the number of valid rows in the text file.
if numOfRowsAdded != textFileRecords:
AddMsgAndPrint("\t\t\t Incorrect # of records inserted into: " + DBtable, 2 )
AddMsgAndPrint("\t\t\t\t TextFile records: " + str(splitThousands(textFileRecords)),2)
AddMsgAndPrint("\t\t\t\t Records Inserted: " + str(splitThousands(numOfRowsAdded)),2)
AddMsgAndPrint("\t\t\t\t Unicode Errors: " + str(splitThousands(len(unicodeErrors))),2)
AddMsgAndPrint("\n\n" + str(unicodeErrors))
# Resets the progressor back to is initial state
arcpy.ResetProgressor()
else:
AddMsgAndPrint("\t\t--> " + DBtable + ": Records Added: 0")
else:
AddMsgAndPrint("\t\t--> " + DBtable + ".csv does NOT exist.....SKIPPING ",1)
missingCSVFile.append(DBtable)
return True
except arcpy.ExecuteError:
AddMsgAndPrint(arcpy.GetMessages(2),2)
return False
except:
errorMsg()
return False
# =========================================== Main Body ==========================================
# Import modules
import sys, string, os, traceback, re, arcpy, time, csv, codecs
from arcpy import env
from sys import getsizeof, stderr
from itertools import chain
from collections import deque
if __name__ == '__main__':
try:
## textFilePath = arcpy.GetParameter(0)
## outputFolder = arcpy.GetParameterAsText(1)
## GDBname = arcpy.GetParameterAsText(2)
## fileContainHeaders = True
textFilePath = r'E:\Pedons\NCSS_Characterization_Database\2019_Schema_Short'
outputFolder = r'E:\Pedons\Temp\Import_NCSS_Testing_Short'
DBname = 'Testing_short'
fileContainHeaders = True
pedonFC = 'combine_nasis_ncss' # name of FC that contains the Lat,Long and will be afeature class
# Step 1 - Create List of .txt files
textFileList = [file.replace('.txt','') for file in os.listdir(textFilePath) if file.endswith('.txt')]
# Step 2 - Create FGDB
""" ------------------------------------------------------Create New File Geodatabaes and get Table Aliases for printing -------------------------------------------------------------------
Create a new FGDB using a pre-established XML workspace schema. All tables will be empty
and relationships established. A dictionary of empty lists will be created as a placeholder
for the values from the XML report. The name and quantity of lists will be the same as the FGDB"""
pedonDB = createPedonFGDB()
#pedonFGDB = r'N:\flex\Dylan\NCSS_Characterization_Database\Updated_Schema_2019\NCSS_Characterization_Database_newSchema2.gdb'
arcpy.env.workspace = pedonDB
if not pedonDB:
AddMsgAndPrint("\nFailed to Initiate Empty Pedon File Geodatabase. Error in createPedonFGDB() function. Exiting!",2)
exit()
# Step 3 - Compare .csv files to tables in the pedonDB and report any
# discrepancies between the two. What should happen if discrepancies
# are found? exit?
pedonDBTables = arcpy.ListTables("*")
for fcs in arcpy.ListFeatureClasses('*'):
pedonDBTables.append(fcs)
discrepancy = list(set(pedonDBTables) - set(textFileList))
if len(discrepancy) > 0:
AddMsgAndPrint("\nThere are " + str(len(discrepancy)) + " discrepancies between the FGDB and text files",2)
for item in discrepancy:
if item in pedonDBTables:
AddMsgAndPrint("\t\"" + item + "\" Table does not have a corresponding text file")
else:
AddMsgAndPrint("\t\"" + item + "\" text file does not have a corresponding FGDB Table")
AddMsgAndPrint("\nAll discrepancies must be addressed before continuing.....EXITING!",1)
#exit()
importTabularData(pedonDB,pedonDBTables)
except MemoryError:
AddMsgAndPrint("\n\nOut of Memory Genius! --- " + str(sys.getsizeof(pedonDBTables)),2)
exit()
except:
errorMsg()