From a858ea5ed68405cf648b64dff46add85d22d0052 Mon Sep 17 00:00:00 2001 From: Marc Weber Date: Thu, 8 Apr 2021 09:52:57 -0700 Subject: [PATCH] Get python3 working (#27) * update MakeFinalTables * use double quotes, f-strings * update MakeFinalTables * fix print stmtnts, dbf2DF to use gpd * return pandas DF from dbf2DF * working through framework creation * update arcpro install, add to path manually * lint with black and isort * Updated ReadMe to reflect use of new StreamCat environment, changed parameters in LakeCat.py and LakeCat_functions.py to allow slope calculation and tweaked a few parameters. * add download link to border.py * update dbf2DF * merge commit Co-authored-by: rickD --- .gitignore | 1 + ControlTable_LakeCat.csv | 256 +++--- LakeCat.py | 185 ++--- LakeCat_functions.py | 1485 ++++++++++++++++------------------- MakeFinalTables_LakeCat.py | 195 +++-- README.md | 69 +- border.py | 21 +- lake_cat_config.py.template | 2 +- 8 files changed, 1017 insertions(+), 1197 deletions(-) diff --git a/.gitignore b/.gitignore index db7bfaa..e6bb685 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,4 @@ .RData .Ruserdata lake_cat_config.py +.Rproj.user diff --git a/ControlTable_LakeCat.csv b/ControlTable_LakeCat.csv index 32553cc..fbd052e 100644 --- a/ControlTable_LakeCat.csv +++ b/ControlTable_LakeCat.csv @@ -1,111 +1,145 @@ -f_d_Title,DirectoryLocations,FullTableName,accum_type,MetricName,AppendMetric,LandscapeLayer,summaryfield,Final_Table_Name,MetricType,Conversion,by_RPU,run,notes -ingrid_dir,L:/Priv/CORFiles/Geospatial_Library_Projects/StreamCat/LandscapeRasters/QAComplete,BFI,Continuous,BFI,none,bfi.tif,,BFI,Mean,1,0,0, -Framework,L:/Priv/CORFiles/Geospatial_Library_Projects/LakeCat/LakeCat_Framework,Clay,Continuous,Clay,none,clay.tif,,STATSGO_Set1,Mean,0.01,0,0, -out_dir,L:/Priv/CORFiles/Geospatial_Library_Projects/LakeCat/Allocation_Accumulation,PctImp2006,Continuous,PctImp2006,none,imp2006.tif,,ImperviousSurfaces2006,Mean,1,0,0, -LakeCat_repo_dir,F:/GitProjects/s/LakeCat,Om,Continuous,Om,none,om.tif,,STATSGO_Set2,Mean,0.01,0,0, -StreamCat_dir,L:/Priv/CORFiles/Geospatial_Library_Projects/StreamCat/Allocation_and_Accumulation,Perm,Continuous,Perm,none,perm.tif,,STATSGO_Set2,Mean,0.01,0,0, -NHD_dir,H:/NHDPlusV21,Rckdep,Continuous,Rckdep,none,rckdep.tif,,STATSGO_Set2,Mean,0.01,0,0, -final_tables_dir,L:/Priv/CORFiles/Geospatial_Library_Projects/LakeCat/FTP_Staging/LakeCat/FinalTables,RdDens,Continuous,RdDens,none,roadden.tif,,RoadDensity,Mean,1,0,0, -,,RdCrs,Continuous,RdCrs,none,rdstcrs.tif,,RoadStreamCrossings,Density,1,0,0, -,,Runoff,Continuous,Runoff,none,runoff.tif,,Runoff,Mean,1,0,0, -,,Sand,Continuous,Sand,none,sand.tif,,STATSGO_Set1,Mean,0.01,0,0, -,,WtDep,Continuous,WtDep,none,wtdep.tif,,STATSGO_Set2,Mean,0.01,0,0, -,,Elev,Continuous,Elev,none,elev_cm,,Elevation,Mean,0.01,1,0, -,,PopDen2010,Continuous,PopDen2010,none,POP_SQKM.tif,,USCensus2010,Mean,1,0,0, -,,HUDen2010,Continuous,HUDen2010,none,HU_SQKM.tif,,USCensus2010,Mean,1,0,0, -,,NH4_2008,Continuous,NH4_2008,none,dep_nh4_2008.tif,,NADP,Mean,1,0,0, -,,NO3_2008,Continuous,NO3_2008,none,dep_no3_2008.tif,,NADP,Mean,1,0,0, -,,InorgNWetDep_2008,Continuous,InorgNWetDep_2008,none,dep_totalN_2008.tif,,NADP,Mean,1,0,0, -,,SN_2008,Continuous,SN_2008,none,dep_splusn_2008.tif,,NADP,Mean,1,0,0, -,,tmin,Continuous,Tmin8110,none,tmin.tif,,PRISM_1981_2010,Mean,1,0,0, -,,tmean,Continuous,Tmean8110,none,tmean.tif,,PRISM_1981_2010,Mean,1,0,0, -,,tmax,Continuous,Tmax8110,none,tmax.tif,,PRISM_1981_2010,Mean,1,0,0, -,,precip,Continuous,Precip8110,none,precip.tif,,PRISM_1981_2010,Mean,1,0,0, -,,AgKffact,Continuous,AgKffact,none,AgKffact.tif,,Kffact,Mean,0.01,0,0, -,,Kffact,Continuous,Kffact,none,kffact.tif,,Kffact,Mean,0.01,0,0, -,,CanalDensity,Continuous,CanalDens,none,CanalsDitches.tif,,CanalsDitches,Density,0.03,0,0, -,,Pestic97,Continuous,Pestic97,none,pestic.tif,,Pesticides97,Mean,1,0,0, -,,Al2O3,Continuous,Al2O3,none,al20mar14.tif,,GeoChemPhys1,Mean,1,0,0, -,,CaO,Continuous,CaO,none,cao20mar14.tif,,GeoChemPhys1,Mean,1,0,0, -,,Fe2O3,Continuous,Fe2O3,none,fe20mar14.tif,,GeoChemPhys1,Mean,1,0,0, -,,K2O,Continuous,K2O,none,k20mar14.tif,,GeoChemPhys1,Mean,1,0,0, -,,MgO,Continuous,MgO,none,mgo20mar14.tif,,GeoChemPhys1,Mean,1,0,0, -,,Na2O,Continuous,Na2O,none,na20mar14.tif,,GeoChemPhys1,Mean,1,0,0, -,,P2O5,Continuous,P2O5,none,p20mar14.tif,,GeoChemPhys1,Mean,1,0,0, -,,S,Continuous,S,none,s20mar14.tif,,GeoChemPhys1,Mean,1,0,0, -,,SiO2,Continuous,SiO2,none,si20mar14.tif,,GeoChemPhys1,Mean,1,0,0, -,,N,Continuous,N,none,n20mar14.tif,,GeoChemPhys2,Mean,1,0,0, -,,HydrlCond,Continuous,HydrlCond,none,perm20mar14.tif,,GeoChemPhys3,Mean,1,0,0, -,,CompStrgth,Continuous,CompStrgth,none,ucs20mar14.tif,,GeoChemPhys4,Mean,1,0,0, -,,CBNF,Continuous,CBNF,none,cbnf.tif,,AgriculturalNitrogen,Mean,1,0,0, -,,Fert,Continuous,Fert,none,fert.tif,,AgriculturalNitrogen,Mean,1,0,0, -,,Manure,Continuous,Manure,none,manure.tif,,AgriculturalNitrogen,Mean,1,0,0, -,,Fire2000,Continuous,PctFire2000,none,fire2000.tif,,FirePerimeters,Mean,100,0,0, -,,Fire2001,Continuous,PctFire2001,none,fire2001.tif,,FirePerimeters,Mean,100,0,0, -,,Fire2002,Continuous,PctFire2002,none,fire2002.tif,,FirePerimeters,Mean,100,0,0, -,,Fire2003,Continuous,PctFire2003,none,fire2003.tif,,FirePerimeters,Mean,100,0,0, -,,Fire2004,Continuous,PctFire2004,none,fire2004.tif,,FirePerimeters,Mean,100,0,0, -,,Fire2005,Continuous,PctFire2005,none,fire2005.tif,,FirePerimeters,Mean,100,0,0, -,,Fire2006,Continuous,PctFire2006,none,fire2006.tif,,FirePerimeters,Mean,100,0,0, -,,Fire2007,Continuous,PctFire2007,none,fire2007.tif,,FirePerimeters,Mean,100,0,0, -,,Fire2008,Continuous,PctFire2008,none,fire2008.tif,,FirePerimeters,Mean,100,0,0, -,,Fire2009,Continuous,PctFire2009,none,fire2009.tif,,FirePerimeters,Mean,100,0,0, -,,Fire2010,Continuous,PctFire2010,none,fire2010.tif,,FirePerimeters,Mean,100,0,0, -,,PctNonAgIntrodManagVeg,Continuous,PctNonAgIntrodManagVeg,none,IntrodManagVeg.tif,,NonAgIntrodManagVeg,Mean,100,0,0, -,,PRISMppt_2008,Continuous,Precip08,none,PRISMppt_2008.tif,,PRISM_0809,Mean,1,0,0, -,,PRISMppt_2009,Continuous,Precip09,none,PRISMppt_2009.tif,,PRISM_0809,Mean,1,0,0, -,,PRISMtmean_2008,Continuous,Tmean08,none,PRISMtmean_2008.tif,,PRISM_0809,Mean,1,0,0, -,,PRISMtmean_2009,Continuous,Tmean09,none,PRISMtmean_2009.tif,,PRISM_0809,Mean,1,0,0, -,,WetnessIndex,Continuous,WetIndex,none,cti_v3.tif,,WetIndx,Mean,1,0,0, -,,nlcd2006,Categorical,L:/Priv/CORFiles/Geospatial_Library/Data/Project/StreamCat/ControlTables/NLCD2006_lookup.csv,none,nlcd2006.tif,,NLCD2006,Percent,1,0,0, -,,nlcd2011,Categorical,L:/Priv/CORFiles/Geospatial_Library/Data/Project/StreamCat/ControlTables/NLCD2011_lookup.csv,none,nlcd2011.tif,,NLCD2011,Percent,1,0,0, -,,lith,Categorical,L:/Priv/CORFiles/Geospatial_Library/Data/Project/StreamCat/ControlTables/Lithology_lookup.csv,none,us_lithology_1km_dd83.tif,,Lithology,Percent,1,0,0, -,,Ag2006HiSlp,Categorical,L:/Priv/CORFiles/Geospatial_Library/Data/Project/StreamCat/ControlTables/Ag2006HiSlp_lookup.csv,none,Ag2006HiSlp.tif,,AgMidHiSlopes,Percent,1,0,0, -,,Ag2006MidSlp,Categorical,L:/Priv/CORFiles/Geospatial_Library/Data/Project/StreamCat/ControlTables/Ag2006MidSlp_lookup.csv,none,Ag2006MidSlp.tif,,AgMidHiSlopes,Percent,1,0,0, -,,PctFrstLossByYear,Categorical,L:/Priv/CORFiles/Geospatial_Library/Data/Project/StreamCat/ControlTables/ForestLossByYear_lookup.csv,none,lossyr.tif,,ForestLossByYear0013,Percent,1,0,0, -,,NABD,Point,NABD_Dens,none,NABD.shp,NIDStorM3;NrmStorM3,NABD,Density,1,0,0, -,,Dams,Point,DamDens,none,dams.shp,NIDStorM3;NrmStorM3,Dams,Density,1,0,0, -,,Mine,Point,MineDens,none,mines.shp,,Mines,Density,1,0,0, -,,NPDES,Point,NPDESDens,none,NPDES_Major.shp,,EPA_FRS,Density,1,0,0, -,,Superfund,Point,SuperfundDens,none,Superfund.shp,,EPA_FRS,Density,1,0,0, -,,TRI,Point,TRIDens,none,TRI.shp,,EPA_FRS,Density,1,0,0, -,,CoalMines,Point,CoalMineDens,none,USTRAT.shp,,CoalMines,Density,1,0,0, -,,PctImp2011,Continuous,PctImp2011,none,imp2011.tif,,ImperviousSurfaces2011,Mean,1,0,0, -,,PADUS,Categorical,L:/Priv/CORFiles/Geospatial_Library_Projects/StreamCat/ControlTables/GAP_Status_lookup.csv,none,gap_sts.tif,,PADUS,Percent,1,0,0, -,,MODIS_Ir_Ag_2002,Categorical,L:/Priv/CORFiles/Geospatial_Library_Projects/StreamCat/ControlTables/MODIS_IrAg_lookup.csv,none,mirad250_02.tif,,MODIS_IrrigAg,Percent,1,0,0, -,,MODIS_Ir_Ag_2007,Categorical,L:/Priv/CORFiles/Geospatial_Library_Projects/StreamCat/ControlTables/MODIS_IrAg_lookup.csv,none,mirad250_07.tif,,MODIS_IrrigAg,Percent,1,0,0, -,,MODIS_Ir_Ag_2012,Categorical,L:/Priv/CORFiles/Geospatial_Library_Projects/StreamCat/ControlTables/MODIS_IrAg_lookup.csv,none,mirad250_12.tif,,MODIS_IrrigAg,Percent,1,0,0, -,,MTBS_Severity_1984,Categorical,L:/Priv/CORFiles/Geospatial_Library_Projects/StreamCat/ControlTables/MTBS_severity_lookup.csv,none,MTBS_Severity_1984.tif,,MTBS_Severity_1984,Percent,1,0,0, -,,MTBS_Severity_1985,Categorical,L:/Priv/CORFiles/Geospatial_Library_Projects/StreamCat/ControlTables/MTBS_severity_lookup.csv,none,MTBS_Severity_1985.tif,,MTBS_Severity_1985,Percent,1,0,0, -,,MTBS_Severity_1986,Categorical,L:/Priv/CORFiles/Geospatial_Library_Projects/StreamCat/ControlTables/MTBS_severity_lookup.csv,none,MTBS_Severity_1986.tif,,MTBS_Severity_1986,Percent,1,0,0, -,,MTBS_Severity_1987,Categorical,L:/Priv/CORFiles/Geospatial_Library_Projects/StreamCat/ControlTables/MTBS_severity_lookup.csv,none,MTBS_Severity_1987.tif,,MTBS_Severity_1987,Percent,1,0,0, -,,MTBS_Severity_1988,Categorical,L:/Priv/CORFiles/Geospatial_Library_Projects/StreamCat/ControlTables/MTBS_severity_lookup.csv,none,MTBS_Severity_1988.tif,,MTBS_Severity_1988,Percent,1,0,0, -,,MTBS_Severity_1989,Categorical,L:/Priv/CORFiles/Geospatial_Library_Projects/StreamCat/ControlTables/MTBS_severity_lookup.csv,none,MTBS_Severity_1989.tif,,MTBS_Severity_1989,Percent,1,0,0, -,,MTBS_Severity_1990,Categorical,L:/Priv/CORFiles/Geospatial_Library_Projects/StreamCat/ControlTables/MTBS_severity_lookup.csv,none,MTBS_Severity_1990.tif,,MTBS_Severity_1990,Percent,1,0,0, -,,MTBS_Severity_1991,Categorical,L:/Priv/CORFiles/Geospatial_Library_Projects/StreamCat/ControlTables/MTBS_severity_lookup.csv,none,MTBS_Severity_1991.tif,,MTBS_Severity_1991,Percent,1,0,0, -,,MTBS_Severity_1992,Categorical,L:/Priv/CORFiles/Geospatial_Library_Projects/StreamCat/ControlTables/MTBS_severity_lookup.csv,none,MTBS_Severity_1992.tif,,MTBS_Severity_1992,Percent,1,0,0, -,,MTBS_Severity_1993,Categorical,L:/Priv/CORFiles/Geospatial_Library_Projects/StreamCat/ControlTables/MTBS_severity_lookup.csv,none,MTBS_Severity_1993.tif,,MTBS_Severity_1993,Percent,1,0,0, -,,MTBS_Severity_1994,Categorical,L:/Priv/CORFiles/Geospatial_Library_Projects/StreamCat/ControlTables/MTBS_severity_lookup.csv,none,MTBS_Severity_1994.tif,,MTBS_Severity_1994,Percent,1,0,0, -,,MTBS_Severity_1995,Categorical,L:/Priv/CORFiles/Geospatial_Library_Projects/StreamCat/ControlTables/MTBS_severity_lookup.csv,none,MTBS_Severity_1995.tif,,MTBS_Severity_1995,Percent,1,0,0, -,,MTBS_Severity_1996,Categorical,L:/Priv/CORFiles/Geospatial_Library_Projects/StreamCat/ControlTables/MTBS_severity_lookup.csv,none,MTBS_Severity_1996.tif,,MTBS_Severity_1996,Percent,1,0,0, -,,MTBS_Severity_1997,Categorical,L:/Priv/CORFiles/Geospatial_Library_Projects/StreamCat/ControlTables/MTBS_severity_lookup.csv,none,MTBS_Severity_1997.tif,,MTBS_Severity_1997,Percent,1,0,0, -,,MTBS_Severity_1998,Categorical,L:/Priv/CORFiles/Geospatial_Library_Projects/StreamCat/ControlTables/MTBS_severity_lookup.csv,none,MTBS_Severity_1998.tif,,MTBS_Severity_1998,Percent,1,0,0, -,,MTBS_Severity_1999,Categorical,L:/Priv/CORFiles/Geospatial_Library_Projects/StreamCat/ControlTables/MTBS_severity_lookup.csv,none,MTBS_Severity_1999.tif,,MTBS_Severity_1999,Percent,1,0,0, -,,MTBS_Severity_2000,Categorical,L:/Priv/CORFiles/Geospatial_Library_Projects/StreamCat/ControlTables/MTBS_severity_lookup.csv,none,MTBS_Severity_2000.tif,,MTBS_Severity_2000,Percent,1,0,0, -,,MTBS_Severity_2001,Categorical,L:/Priv/CORFiles/Geospatial_Library_Projects/StreamCat/ControlTables/MTBS_severity_lookup.csv,none,MTBS_Severity_2001.tif,,MTBS_Severity_2001,Percent,1,0,0, -,,MTBS_Severity_2002,Categorical,L:/Priv/CORFiles/Geospatial_Library_Projects/StreamCat/ControlTables/MTBS_severity_lookup.csv,none,MTBS_Severity_2002.tif,,MTBS_Severity_2002,Percent,1,0,0, -,,MTBS_Severity_2003,Categorical,L:/Priv/CORFiles/Geospatial_Library_Projects/StreamCat/ControlTables/MTBS_severity_lookup.csv,none,MTBS_Severity_2003.tif,,MTBS_Severity_2003,Percent,1,0,0, -,,MTBS_Severity_2004,Categorical,L:/Priv/CORFiles/Geospatial_Library_Projects/StreamCat/ControlTables/MTBS_severity_lookup.csv,none,MTBS_Severity_2004.tif,,MTBS_Severity_2004,Percent,1,0,0, -,,MTBS_Severity_2005,Categorical,L:/Priv/CORFiles/Geospatial_Library_Projects/StreamCat/ControlTables/MTBS_severity_lookup.csv,none,MTBS_Severity_2005.tif,,MTBS_Severity_2005,Percent,1,0,0, -,,MTBS_Severity_2006,Categorical,L:/Priv/CORFiles/Geospatial_Library_Projects/StreamCat/ControlTables/MTBS_severity_lookup.csv,none,MTBS_Severity_2006.tif,,MTBS_Severity_2006,Percent,1,0,0, -,,MTBS_Severity_2007,Categorical,L:/Priv/CORFiles/Geospatial_Library_Projects/StreamCat/ControlTables/MTBS_severity_lookup.csv,none,MTBS_Severity_2007.tif,,MTBS_Severity_2007,Percent,1,0,0, -,,MTBS_Severity_2008,Categorical,L:/Priv/CORFiles/Geospatial_Library_Projects/StreamCat/ControlTables/MTBS_severity_lookup.csv,none,MTBS_Severity_2008.tif,,MTBS_Severity_2008,Percent,1,0,0, -,,MTBS_Severity_2009,Categorical,L:/Priv/CORFiles/Geospatial_Library_Projects/StreamCat/ControlTables/MTBS_severity_lookup.csv,none,MTBS_Severity_2009.tif,,MTBS_Severity_2009,Percent,1,0,0, -,,MTBS_Severity_2010,Categorical,L:/Priv/CORFiles/Geospatial_Library_Projects/StreamCat/ControlTables/MTBS_severity_lookup.csv,none,MTBS_Severity_2010.tif,,MTBS_Severity_2010,Percent,1,0,0, -,,MTBS_Severity_2011,Categorical,L:/Priv/CORFiles/Geospatial_Library_Projects/StreamCat/ControlTables/MTBS_severity_lookup.csv,none,MTBS_Severity_2011.tif,,MTBS_Severity_2011,Percent,1,0,0, -,,MTBS_Severity_2012,Categorical,L:/Priv/CORFiles/Geospatial_Library_Projects/StreamCat/ControlTables/MTBS_severity_lookup.csv,none,MTBS_Severity_2012.tif,,MTBS_Severity_2012,Percent,1,0,0, -,,MTBS_Severity_2013,Categorical,L:/Priv/CORFiles/Geospatial_Library_Projects/StreamCat/ControlTables/MTBS_severity_lookup.csv,none,MTBS_Severity_2013.tif,,MTBS_Severity_2013,Percent,1,0,0, -,,MTBS_Severity_2014,Categorical,L:/Priv/CORFiles/Geospatial_Library_Projects/StreamCat/ControlTables/MTBS_severity_lookup.csv,none,MTBS_Severity_2014.tif,,MTBS_Severity_2014,Percent,1,0,0, -,,MTBS_Severity_2015,Categorical,L:/Priv/CORFiles/Geospatial_Library_Projects/StreamCat/ControlTables/MTBS_severity_lookup.csv,none,MTBS_Severity_2015.tif,,MTBS_Severity_2015,Percent,1,0,0, -,,MTBS_Severity_2016,Categorical,L:/Priv/CORFiles/Geospatial_Library_Projects/StreamCat/ControlTables/MTBS_severity_lookup.csv,none,MTBS_Severity_2016.tif,,MTBS_Severity_2016,Percent,1,0,0, -,,US_Level_III_Ecoregions,Categorical,L:/Priv/CORFiles/Geospatial_Library_Projects/StreamCat/ControlTables/US_Level_III_Ecoregions_lookup.csv,none,US_Level_III_Ecoregions.tif,,US_Level_III_Ecoregions,Percent,1,0,1, +FullTableName,accum_type,MetricName,AppendMetric,LandscapeLayer,summaryfield,Final_Table_Name,MetricType,Conversion,by_RPU,run,notes +Ag2006HiSlp,Categorical,L:/Priv/CORFiles/Geospatial_Library/Data/Project/StreamCat/ControlTables/Ag2006HiSlp_lookup.csv,none,Ag2006HiSlp.tif,,AgMidHiSlopes,Percent,1,0,0, +Ag2006MidSlp,Categorical,L:/Priv/CORFiles/Geospatial_Library/Data/Project/StreamCat/ControlTables/Ag2006MidSlp_lookup.csv,none,Ag2006MidSlp.tif,,AgMidHiSlopes,Percent,1,0,0, +AgDrain,Categorical,lookup/AgDrain_lookup.csv,none,AgDrain_stlvl_FINAL.tif,,AgDrain,Percent,1,0,0, +AgKffact,Continuous,AgKffact,none,AgKffact.tif,,Kffact,Mean,0.01,0,0, +Al2O3,Continuous,Al2O3,none,al20mar14.tif,,GeoChemPhys1,Mean,1,0,0, +Aquifers,Categorical,lookup/Aquifer_Table_lookup.csv,none,us_aquifers_two.tif,,Aquifers,Percent,1,0,0, +BFI,Continuous,BFI,none,bfi.tif,,BFI,Mean,1,0,0, +CanalDensity,Continuous,CanalDens,none,CanalsDitches.tif,,CanalsDitches,Density,0.03,0,0, +CaO,Continuous,CaO,none,cao20mar14.tif,,GeoChemPhys1,Mean,1,0,0, +CBNF,Continuous,CBNF,none,cbnf.tif,,AgriculturalNitrogen,Mean,1,0,0, +Clay,Continuous,Clay,none,clay.tif,,STATSGO_Set1,Mean,0.01,0,0, +CoalMines,Point,CoalMineDens,none,USTRAT.shp,,CoalMines,Density,1,0,0, +CompStrgth,Continuous,CompStrgth,none,ucs20mar14.tif,,GeoChemPhys4,Mean,1,0,0, +Dams,Point,DamDens,none,dams.shp,NIDStorM3;NrmStorM3,Dams,Density,1,0,0, +Elev,Continuous,Elev,none,elev_cm,,Elevation,Mean,0.01,1,0, +Fe2O3,Continuous,Fe2O3,none,fe20mar14.tif,,GeoChemPhys1,Mean,1,0,0, +Fert,Continuous,Fert,none,fert.tif,,AgriculturalNitrogen,Mean,1,0,0, +Fire2000,Continuous,PctFire2000,none,fire2000.tif,,FirePerimeters,Mean,100,0,0, +Fire2001,Continuous,PctFire2001,none,fire2001.tif,,FirePerimeters,Mean,100,0,0, +Fire2002,Continuous,PctFire2002,none,fire2002.tif,,FirePerimeters,Mean,100,0,0, +Fire2003,Continuous,PctFire2003,none,fire2003.tif,,FirePerimeters,Mean,100,0,0, +Fire2004,Continuous,PctFire2004,none,fire2004.tif,,FirePerimeters,Mean,100,0,0, +Fire2005,Continuous,PctFire2005,none,fire2005.tif,,FirePerimeters,Mean,100,0,0, +Fire2006,Continuous,PctFire2006,none,fire2006.tif,,FirePerimeters,Mean,100,0,0, +Fire2007,Continuous,PctFire2007,none,fire2007.tif,,FirePerimeters,Mean,100,0,0, +Fire2008,Continuous,PctFire2008,none,fire2008.tif,,FirePerimeters,Mean,100,0,0, +Fire2009,Continuous,PctFire2009,none,fire2009.tif,,FirePerimeters,Mean,100,0,0, +Fire2010,Continuous,PctFire2010,none,fire2010.tif,,FirePerimeters,Mean,100,0,0, +HUDen2010,Continuous,HUDen2010,none,HU_SQKM.tif,,USCensus2010,Mean,1,0,0, +HydrlCond,Continuous,HydrlCond,none,perm20mar14.tif,,GeoChemPhys3,Mean,1,0,0, +InorgNWetDep_2008,Continuous,InorgNWetDep_2008,none,dep_totalN_2008.tif,,NADP,Mean,1,0,0, +K2O,Continuous,K2O,none,k20mar14.tif,,GeoChemPhys1,Mean,1,0,0, +Kffact,Continuous,Kffact,none,kffact.tif,,Kffact,Mean,0.01,0,0, +lith,Categorical,L:/Priv/CORFiles/Geospatial_Library/Data/Project/StreamCat/ControlTables/Lithology_lookup.csv,none,us_lithology_1km_dd83.tif,,Lithology,Percent,1,0,0, +Manure,Continuous,Manure,none,manure.tif,,AgriculturalNitrogen,Mean,1,0,0, +MercDep,Continuous,MercDep,none,Hg_dep_2011.tif,,MercDep,Mean,1,0,0, +MgO,Continuous,MgO,none,mgo20mar14.tif,,GeoChemPhys1,Mean,1,0,0, +Mine,Point,MineDens,none,mines.shp,,Mines,Density,1,0,0, +MODIS_Ir_Ag_2002,Categorical,L:/Priv/CORFiles/Geospatial_Library_Projects/StreamCat/ControlTables/MODIS_IrAg_lookup.csv,none,mirad250_02.tif,,,Percent,1,0,0, +MODIS_Ir_Ag_2007,Categorical,L:/Priv/CORFiles/Geospatial_Library_Projects/StreamCat/ControlTables/MODIS_IrAg_lookup.csv,none,mirad250_07.tif,,,Percent,1,0,0, +MODIS_Ir_Ag_2012,Categorical,L:/Priv/CORFiles/Geospatial_Library_Projects/StreamCat/ControlTables/MODIS_IrAg_lookup.csv,none,mirad250_12.tif,,,Percent,1,0,0, +MTBS_Severity_1984,Categorical,L:/Priv/CORFiles/Geospatial_Library_Projects/StreamCat/ControlTables/MTBS_severity_lookup.csv,none,MTBS_Severity_1984.tif,,MTBS_Severity_1984,Percent,1,0,0, +MTBS_Severity_1985,Categorical,L:/Priv/CORFiles/Geospatial_Library_Projects/StreamCat/ControlTables/MTBS_severity_lookup.csv,none,MTBS_Severity_1985.tif,,MTBS_Severity_1985,Percent,1,0,0, +MTBS_Severity_1986,Categorical,L:/Priv/CORFiles/Geospatial_Library_Projects/StreamCat/ControlTables/MTBS_severity_lookup.csv,none,MTBS_Severity_1986.tif,,MTBS_Severity_1986,Percent,1,0,0, +MTBS_Severity_1987,Categorical,L:/Priv/CORFiles/Geospatial_Library_Projects/StreamCat/ControlTables/MTBS_severity_lookup.csv,none,MTBS_Severity_1987.tif,,MTBS_Severity_1987,Percent,1,0,0, +MTBS_Severity_1988,Categorical,L:/Priv/CORFiles/Geospatial_Library_Projects/StreamCat/ControlTables/MTBS_severity_lookup.csv,none,MTBS_Severity_1988.tif,,MTBS_Severity_1988,Percent,1,0,0, +MTBS_Severity_1989,Categorical,L:/Priv/CORFiles/Geospatial_Library_Projects/StreamCat/ControlTables/MTBS_severity_lookup.csv,none,MTBS_Severity_1989.tif,,MTBS_Severity_1989,Percent,1,0,0, +MTBS_Severity_1990,Categorical,L:/Priv/CORFiles/Geospatial_Library_Projects/StreamCat/ControlTables/MTBS_severity_lookup.csv,none,MTBS_Severity_1990.tif,,MTBS_Severity_1990,Percent,1,0,0, +MTBS_Severity_1991,Categorical,L:/Priv/CORFiles/Geospatial_Library_Projects/StreamCat/ControlTables/MTBS_severity_lookup.csv,none,MTBS_Severity_1991.tif,,MTBS_Severity_1991,Percent,1,0,0, +MTBS_Severity_1992,Categorical,L:/Priv/CORFiles/Geospatial_Library_Projects/StreamCat/ControlTables/MTBS_severity_lookup.csv,none,MTBS_Severity_1992.tif,,MTBS_Severity_1992,Percent,1,0,0, +MTBS_Severity_1993,Categorical,L:/Priv/CORFiles/Geospatial_Library_Projects/StreamCat/ControlTables/MTBS_severity_lookup.csv,none,MTBS_Severity_1993.tif,,MTBS_Severity_1993,Percent,1,0,0, +MTBS_Severity_1994,Categorical,L:/Priv/CORFiles/Geospatial_Library_Projects/StreamCat/ControlTables/MTBS_severity_lookup.csv,none,MTBS_Severity_1994.tif,,MTBS_Severity_1994,Percent,1,0,0, +MTBS_Severity_1995,Categorical,L:/Priv/CORFiles/Geospatial_Library_Projects/StreamCat/ControlTables/MTBS_severity_lookup.csv,none,MTBS_Severity_1995.tif,,MTBS_Severity_1995,Percent,1,0,0, +MTBS_Severity_1996,Categorical,L:/Priv/CORFiles/Geospatial_Library_Projects/StreamCat/ControlTables/MTBS_severity_lookup.csv,none,MTBS_Severity_1996.tif,,MTBS_Severity_1996,Percent,1,0,0, +MTBS_Severity_1997,Categorical,L:/Priv/CORFiles/Geospatial_Library_Projects/StreamCat/ControlTables/MTBS_severity_lookup.csv,none,MTBS_Severity_1997.tif,,MTBS_Severity_1997,Percent,1,0,0, +MTBS_Severity_1998,Categorical,L:/Priv/CORFiles/Geospatial_Library_Projects/StreamCat/ControlTables/MTBS_severity_lookup.csv,none,MTBS_Severity_1998.tif,,MTBS_Severity_1998,Percent,1,0,0, +MTBS_Severity_1999,Categorical,L:/Priv/CORFiles/Geospatial_Library_Projects/StreamCat/ControlTables/MTBS_severity_lookup.csv,none,MTBS_Severity_1999.tif,,MTBS_Severity_1999,Percent,1,0,0, +MTBS_Severity_2000,Categorical,L:/Priv/CORFiles/Geospatial_Library_Projects/StreamCat/ControlTables/MTBS_severity_lookup.csv,none,MTBS_Severity_2000.tif,,MTBS_Severity_2000,Percent,1,0,0, +MTBS_Severity_2001,Categorical,L:/Priv/CORFiles/Geospatial_Library_Projects/StreamCat/ControlTables/MTBS_severity_lookup.csv,none,MTBS_Severity_2001.tif,,MTBS_Severity_2001,Percent,1,0,0, +MTBS_Severity_2002,Categorical,L:/Priv/CORFiles/Geospatial_Library_Projects/StreamCat/ControlTables/MTBS_severity_lookup.csv,none,MTBS_Severity_2002.tif,,MTBS_Severity_2002,Percent,1,0,0, +MTBS_Severity_2003,Categorical,L:/Priv/CORFiles/Geospatial_Library_Projects/StreamCat/ControlTables/MTBS_severity_lookup.csv,none,MTBS_Severity_2003.tif,,MTBS_Severity_2003,Percent,1,0,0, +MTBS_Severity_2004,Categorical,L:/Priv/CORFiles/Geospatial_Library_Projects/StreamCat/ControlTables/MTBS_severity_lookup.csv,none,MTBS_Severity_2004.tif,,MTBS_Severity_2004,Percent,1,0,0, +MTBS_Severity_2005,Categorical,L:/Priv/CORFiles/Geospatial_Library_Projects/StreamCat/ControlTables/MTBS_severity_lookup.csv,none,MTBS_Severity_2005.tif,,MTBS_Severity_2005,Percent,1,0,0, +MTBS_Severity_2006,Categorical,L:/Priv/CORFiles/Geospatial_Library_Projects/StreamCat/ControlTables/MTBS_severity_lookup.csv,none,MTBS_Severity_2006.tif,,MTBS_Severity_2006,Percent,1,0,0, +MTBS_Severity_2007,Categorical,L:/Priv/CORFiles/Geospatial_Library_Projects/StreamCat/ControlTables/MTBS_severity_lookup.csv,none,MTBS_Severity_2007.tif,,MTBS_Severity_2007,Percent,1,0,0, +MTBS_Severity_2008,Categorical,L:/Priv/CORFiles/Geospatial_Library_Projects/StreamCat/ControlTables/MTBS_severity_lookup.csv,none,MTBS_Severity_2008.tif,,MTBS_Severity_2008,Percent,1,0,0, +MTBS_Severity_2009,Categorical,L:/Priv/CORFiles/Geospatial_Library_Projects/StreamCat/ControlTables/MTBS_severity_lookup.csv,none,MTBS_Severity_2009.tif,,MTBS_Severity_2009,Percent,1,0,0, +MTBS_Severity_2010,Categorical,L:/Priv/CORFiles/Geospatial_Library_Projects/StreamCat/ControlTables/MTBS_severity_lookup.csv,none,MTBS_Severity_2010.tif,,MTBS_Severity_2010,Percent,1,0,0, +MTBS_Severity_2011,Categorical,L:/Priv/CORFiles/Geospatial_Library_Projects/StreamCat/ControlTables/MTBS_severity_lookup.csv,none,MTBS_Severity_2011.tif,,MTBS_Severity_2011,Percent,1,0,0, +MTBS_Severity_2012,Categorical,L:/Priv/CORFiles/Geospatial_Library_Projects/StreamCat/ControlTables/MTBS_severity_lookup.csv,none,MTBS_Severity_2012.tif,,MTBS_Severity_2012,Percent,1,0,0, +MTBS_Severity_2013,Categorical,L:/Priv/CORFiles/Geospatial_Library_Projects/StreamCat/ControlTables/MTBS_severity_lookup.csv,none,MTBS_Severity_2013.tif,,MTBS_Severity_2013,Percent,1,0,0, +MTBS_Severity_2014,Categorical,L:/Priv/CORFiles/Geospatial_Library_Projects/StreamCat/ControlTables/MTBS_severity_lookup.csv,none,MTBS_Severity_2014.tif,,MTBS_Severity_2014,Percent,1,0,0, +MTBS_Severity_2015,Categorical,L:/Priv/CORFiles/Geospatial_Library_Projects/StreamCat/ControlTables/MTBS_severity_lookup.csv,none,MTBS_Severity_2015.tif,,MTBS_Severity_2015,Percent,1,0,0, +MTBS_Severity_2016,Categorical,L:/Priv/CORFiles/Geospatial_Library_Projects/StreamCat/ControlTables/MTBS_severity_lookup.csv,none,MTBS_Severity_2016.tif,,MTBS_Severity_2016,Percent,1,0,0, +N,Continuous,N,none,n20mar14.tif,,GeoChemPhys2,Mean,1,0,0, +Na2O,Continuous,Na2O,none,na20mar14.tif,,GeoChemPhys1,Mean,1,0,0, +NABD,Point,NABD_Dens,none,NABD.shp,NIDStorM3;NrmStorM3,NABD,Density,1,0,0, +NH4_2008,Continuous,NH4_2008,none,dep_nh4_2008.tif,,NADP,Mean,1,0,0, +NH4_2014,Continuous,NH4_2014,none,NH4_dep_2014.tif,,NADP_2014_2018,Mean,1,0,0, +NH4_2015,Continuous,NH4_2015,none,NH4_dep_2015.tif,,NADP_2014_2018,Mean,1,0,0, +NH4_2016,Continuous,NH4_2016,none,NH4_dep_2016.tif,,NADP_2014_2018,Mean,1,0,0, +NH4_2017,Continuous,NH4_2017,none,NH4_dep_2017.tif,,NADP_2014_2018,Mean,1,0,0, +NH4_2018,Continuous,NH4_2018,none,NH4_dep_2018.tif,,NADP_2014_2018,Mean,1,0,0, +nlcd2001,Categorical,lookup/NLCD2001_lookup.csv,none,NLCD_2001_Land_Cover_L48_20190424.tif,,NLCD2001,Percent,1,0,0, +nlcd2004,Categorical,lookup/NLCD2004_lookup.csv,none,NLCD_2004_Land_Cover_L48_20190424.tif,,NLCD2004,Percent,1,0,0, +nlcd2006,Categorical,lookup/NLCD2006_lookup.csv,none,NLCD_2006_Land_Cover_L48_20190424.tif,,NLCD2006,Percent,1,0,0, +nlcd2008,Categorical,lookup/NLCD2008_lookup.csv,none,NLCD_2008_Land_Cover_L48_20190424.tif,,NLCD2008,Percent,1,0,0, +nlcd2011,Categorical,lookup/NLCD2011_lookup.csv,none,NLCD_2011_Land_Cover_L48_20190424.tif,,NLCD2011,Percent,1,0,0, +nlcd2013,Categorical,lookup/NLCD2013_lookup.csv,none,NLCD_2013_Land_Cover_L48_20190424.tif,,NLCD2013,Percent,1,0,0, +nlcd2016,Categorical,lookup/NLCD2016_lookup.csv,none,NLCD_2016_Land_Cover_L48_20190424.tif,,NLCD2016,Percent,1,0,0, +NO3_2008,Continuous,NO3_2008,none,dep_no3_2008.tif,,NADP,Mean,1,0,0, +NO3_2014,Continuous,NO3_2014,none,NO3_dep_2014.tif,,NADP_2014_2018,Mean,1,0,0, +NO3_2015,Continuous,NO3_2015,none,NO3_dep_2015.tif,,NADP_2014_2018,Mean,1,0,0, +NO3_2016,Continuous,NO3_2016,none,NO3_dep_2016.tif,,NADP_2014_2018,Mean,1,0,0, +NO3_2017,Continuous,NO3_2017,none,NO3_dep_2017.tif,,NADP_2014_2018,Mean,1,0,0, +NO3_2018,Continuous,NO3_2018,none,NO3_dep_2018.tif,,NADP_2014_2018,Mean,1,0,0, +NPDES,Point,NPDESDens,none,NPDES_Major.shp,,EPA_FRS,Density,1,0,0, +Om,Continuous,Om,none,om.tif,,STATSGO_Set2,Mean,0.01,0,0, +P2O5,Continuous,P2O5,none,p20mar14.tif,,GeoChemPhys1,Mean,1,0,0, +PADUS,Categorical,L:/Priv/CORFiles/Geospatial_Library_Projects/StreamCat/ControlTables/GAP_Status_lookup.csv,none,gap_sts.tif,,PADUS,Percent,1,0,0, +PctFrstLossByYear,Categorical,L:/Priv/CORFiles/Geospatial_Library/Data/Project/StreamCat/ControlTables/ForestLossByYear_lookup.csv,none,lossyr.tif,,ForestLossByYear0013,Percent,1,0,0, +PctImp2001,Continuous,PctImp2001,none,NLCD_2001_Impervious_descriptor_L48_20190405.tif,,ImperviousSurfaces2001,Mean,1,0,0, +PctImp2006,Continuous,PctImp2006,none,NLCD_2006_Impervious_descriptor_L48_20190405.tif,,ImperviousSurfaces2006,Mean,1,0,0, +PctImp2011,Continuous,PctImp2011,none,NLCD_2011_Impervious_descriptor_L48_20190405.tif,,ImperviousSurfaces2011,Mean,1,0,0, +PctImp2016,Continuous,PctImp2016,none,NLCD_2016_Impervious_descriptor_L48_20190405.tif,,ImperviousSurfaces2016,Mean,1,0,0, +PctNonAgIntrodManagVeg,Continuous,PctNonAgIntrodManagVeg,none,IntrodManagVeg.tif,,NonAgIntrodManagVeg,Mean,100,0,0, +PCVPY_2017,Continuous,PCVPY_2017,none,PCVPY.tif,,PRISM_2017,Mean,1,0,0, +Perm,Continuous,Perm,none,perm.tif,,STATSGO_Set2,Mean,0.01,0,0, +Pestic97,Continuous,Pestic97,none,pestic.tif,,Pesticides97,Mean,1,0,0, +PIP_2017,Continuous,PIP_2017,none,PIP.tif,,PRISM_2017,Mean,1,0,0, +PopDen2010,Continuous,PopDen2010,none,POP_SQKM.tif,,USCensus2010,Mean,1,0,0, +precip,Continuous,Precip8110,none,precip.tif,,PRISM_1981_2010,Mean,1,0,0, +Precip_Minus_EVT,Continuous,Precip_Minus_EVT,none,USAavgPeriod_pptSurp_1994to2016.tif,,Precip_Minus_EVT,Mean,1,0,0, +PRISMppt_2008,Continuous,Precip08,none,PRISMppt_2008.tif,,PRISM_0809,Mean,1,0,0, +PRISMppt_2009,Continuous,Precip09,none,PRISMppt_2009.tif,,PRISM_0809,Mean,1,0,0, +PRISMtmean_2008,Continuous,Tmean08,none,PRISMtmean_2008.tif,,PRISM_0809,Mean,1,0,0, +PRISMtmean_2009,Continuous,Tmean09,none,PRISMtmean_2009.tif,,PRISM_0809,Mean,1,0,0, +PSUM6_2017,Continuous,PSUM6_2017,none,PSUM6.tif,,PRISM_2017,Mean,1,0,0, +PSUMPY_2017,Continuous,PSUMPY_2017,none,PSUMPY.tif,,PRISM_2017,Mean,1,0,0, +Rckdep,Continuous,Rckdep,none,rckdep.tif,,STATSGO_Set2,Mean,0.01,0,0, +RdCrs,Continuous,RdCrs,none,rdstcrs.tif,,RoadStreamCrossings,Density,1,0,0, +RdDens,Continuous,RdDens,none,roadden.tif,,RoadDensity,Mean,1,0,0, +RockN,Continuous,RockN,none,RockN_USA_USGSproj_1km_kgkm2.tif,,RockN,Mean,1,0,0, +Runoff,Continuous,Runoff,none,runoff.tif,,Runoff,Mean,1,0,0, +S,Continuous,S,none,s20mar14.tif,,GeoChemPhys1,Mean,1,0,0, +Sand,Continuous,Sand,none,sand.tif,,STATSGO_Set1,Mean,0.01,0,0, +SiO2,Continuous,SiO2,none,si20mar14.tif,,GeoChemPhys1,Mean,1,0,0, +SN_2008,Continuous,SN_2008,none,dep_splusn_2008.tif,,NADP,Mean,1,0,0, +Superfund,Point,SuperfundDens,none,Superfund.shp,,EPA_FRS,Density,1,0,0, +sw_flux,Continuous,sw_flux,none,TN_load_px.tif,,sw_flux,Mean,1,0,0, +TIP_2017,Continuous,PIP_2017,none,TIP.tif,,PRISM_2017,Mean,1,0,0, +tmax,Continuous,Tmax8110,none,tmax.tif,,PRISM_1981_2010,Mean,1,0,0, +tmean,Continuous,Tmean8110,none,tmean.tif,,PRISM_1981_2010,Mean,1,0,0, +TMEANPW_2017,Continuous,TMEANPW_2017,none,TMEANPW.tif,,PRISM_2017,Mean,1,0,0, +TMEANPY_2017,Continuous,TMEANPY_2017,none,TMEANPY.tif,,PRISM_2017,Mean,1,0,0, +tmin,Continuous,Tmin8110,none,tmin.tif,,PRISM_1981_2010,Mean,1,0,0, +TRI,Point,TRIDens,none,TRI.shp,,EPA_FRS,Density,1,0,0, +US_Level_III_Ecoregions,Categorical,L:/Priv/CORFiles/Geospatial_Library_Projects/StreamCat/ControlTables/US_Level_III_Ecoregions_lookup.csv,none,US_Level_III_Ecoregions.tif,,US_Level_III_Ecoregions,Percent,1,0,0, +WetnessIndex,Continuous,WetIndex,none,cti_v3.tif,,WetIndx,Mean,1,0,0, +WtDep,Continuous,WtDep,none,wtdep.tif,,STATSGO_Set2,Mean,0.01,0,0, +WWTPAll,Point,WWTPAllDens,none,WWTP_All_CWA_Active_2013_CONUS.shp,,WWTP,Density,1,0,0, +WWTPMajor,Point,WWTPMajorDens,none,WWTP_Major_CWA_Active_2013_CONUS.shp,,WWTP,Density,1,0,0, +WWTPMinor,Point,WWTPMinorDens,none,WWTP_Minor_CWA_Active_2013_CONUS.shp,,WWTP,Density,1,0,0, +Slope,Continuous,Slope,none,slope_perc,,Slope,Mean,1,1,1, diff --git a/LakeCat.py b/LakeCat.py index b24fd10..e06c90b 100644 --- a/LakeCat.py +++ b/LakeCat.py @@ -3,151 +3,56 @@ Created on Tue May 31 15:24:06 2016 -#ctl = pd.read_csv('D:/Projects/LakeCat_scrap/ControlTable_LakeCat_RD.csv') - @author: Rdebbout """ import os import sys -import arcpy -import numpy as np -import pandas as pd -import geopandas as gpd -#ctl = pd.read_csv(sys.argv[1]) -ctl = pd.read_csv(r'F:/GitProjects/LakeCat/ControlTable_LakeCat.csv') -ctl = pd.read_csv('F:/GitProjects/NARS/Landscape Metrics/ControlTable_LakeCat_NLA17.csv') -arcpy.CheckOutExtension("spatial") from datetime import datetime as dt -#sys.path.append(ctl.DirectoryLocations.values[3]) -sys.path.append('F:/GitProjects/LakeCat') -from arcpy.sa import ZonalStatisticsAsTable, TabulateArea -from LakeCat_functions import inputs, rpus, dbf2DF, getOnNetLakes2, chkColumnLength, PointInPoly, Accumulation - -arcpy.env.cellSize = "30" -ingrid_dir = ctl.DirectoryLocations.values[0] - -frame = ctl.DirectoryLocations.values[1] -out_dir = ctl.DirectoryLocations.values[2] -StreamCat = ctl.DirectoryLocations.values[4] -NHD_dir = ctl.DirectoryLocations.values[5] - -if not os.path.exists(out_dir): - os.mkdir(out_dir) - os.mkdir('%s/ZStats' % out_dir) - -lls = [line for line in ctl.index if ctl.run[line] == 1] - -for ll in lls: # loop through each FullTableName in control table - print 'running....%s' % ctl.LandscapeLayer[ll] - accum_type = ctl.accum_type[ll] - LLyr = '%s/%s' % (ingrid_dir, ctl.LandscapeLayer[ll]) - metric = ctl.MetricName[ll] - name = ctl.FullTableName[ll] - ddir = '%s/ZStats/%s' % (out_dir,name) - if not os.path.exists(ddir) and accum_type != 'Point': - os.mkdir(ddir) - summaryfield = None - if type(ctl.summaryfield[ll]) == str: - summaryfield = ctl.summaryfield[ll].split(';') - start = dt.now() +import geopandas as gpd +import numpy as np +import pandas as pd - if accum_type != 'Point': - csv = "%s/%s.csv" % (out_dir, name) - stats = pd.DataFrame() - for zone in rpus.keys(): - pre = '%s/NHDPlus%s/NHDPlus%s' % (NHD_dir, inputs[zone], zone) - for rpu in rpus[zone]: - if metric == 'Elev': - LLyr = '%s/NEDSnapshot/Ned%s/%s' % (pre, rpu, - ctl.LandscapeLayer[ll]) - out = '{0}/ZStats/{1}/{1}_{2}.dbf'.format(out_dir, name, rpu) - if not os.path.exists(out): - raster = '%s/rasters/wsheds/wtshds_%s.tif' % (frame, rpu) - if accum_type == 'Categorical': - TabulateArea(raster, "Value", LLyr, "Value", - out, "30") - if accum_type == 'Continuous': - ZonalStatisticsAsTable(raster, "Value", LLyr, - out, "DATA", "ALL") - tbl = dbf2DF(out) - tbl.rename(columns={'VALUE':'UID'},inplace=True) - stats = pd.concat([stats, tbl]) - stats.to_csv(csv, index=False) - - if accum_type == 'Point': - - pct_full = pd.read_csv('%s/border/pct_full.csv' % frame) - points = gpd.GeoDataFrame.from_file(LLyr) - basins = '%s/shps/allBasins.shp' % (frame) - stats = PointInPoly(points, basins, pct_full, 'UID', summaryfield) - - print 'ZonalStats Results Complete in : ' + str(dt.now() - start) - - if accum_type != 'Point': - b = pd.DataFrame() - for zone in rpus.keys(): - for rpu in rpus[zone]: - b_ = dbf2DF('%s/rasters/wsheds/wtshds_%s.tif.vat.dbf' % (frame, - rpu)) - b_['BSNAREASQKM'] = (b_.COUNT * 900) * 1e-6 - b_ = b_[['VALUE', 'BSNAREASQKM', 'COUNT']] - b_.columns = ['UID', 'AreaSqKm', 'COUNT'] - b = pd.concat([b,b_]) - - - if accum_type == 'Categorical': - stats = chkColumnLength(stats,LLyr) - cols = stats.columns.tolist()[1:] - stats['AREA'] = stats[stats.columns.tolist()[1:]].sum(axis=1) - stats = pd.merge(b, stats, how='left', on='UID') - stats['PctFull'] = (((stats.AREA * 1e-6) / stats.AreaSqKm) * 100) - stats = stats[['UID', 'AreaSqKm'] + cols + ['PctFull']] - cols = stats.columns[1:] - stats.columns = np.append('UID', 'Cat' + cols.values) - stats = stats.fillna(0) - - if accum_type == 'Continuous': - stats = pd.merge(b, stats, how='left', on='UID') - stats['CatPctFull'] = ((stats.COUNT_y / stats.COUNT_x) * 100) - if name == 'Elev': - stats = stats[['UID','AreaSqKm','COUNT_x','SUM', - 'MAX', 'MIN', 'CatPctFull']] - stats.columns = ['UID', 'CatAreaSqKm', 'CatCount', 'CatSum', - 'CatMax', 'CatMin', 'CatPctFull'] - else: - stats = stats[['UID','AreaSqKm','COUNT_x','SUM', 'CatPctFull']] - stats.columns = ['UID', 'CatAreaSqKm', 'CatCount', - 'CatSum', 'CatPctFull'] - stats.CatPctFull = stats.CatPctFull.fillna(0) - start2 = dt.now() - npy = '%s/LakeCat_npy' % frame - accum = np.load('%s/bastards/accum.npz' % npy) - up = Accumulation(stats, accum['comids'], - accum['lengths'], - accum['upstream'], - 'UpCat','UID') - accum = np.load('%s/children/accum.npz' % npy) - ws = Accumulation(stats, accum['comids'], - accum['lengths'], - accum['upstream'], - 'Ws','UID') - stats = pd.merge(stats, up, on='UID') - stats = pd.merge(stats, ws, on='UID') - cols = stats.columns[1:].tolist() - # goto StreamCat to get On-Net-work lake results from assoc. COMIDs - stats['inStreamCat'] = 0 - # Join UID to COMID for final deliverable.. - lks = dbf2DF('%s/off-network.dbf' % frame)[['COMID','UID']] - off = pd.merge(lks,stats,on='UID',how='right') - off.drop('UID',axis=1,inplace=True) - on = getOnNetLakes2(name, StreamCat, - '%s/joinTables' % frame , - '%s/onNet_LakeCat.npz' % npy, - NHD_dir) - on['inStreamCat'] = 1 - print "Length of on_Net: " + str(len(on)) - tot = pd.concat([off, on]) - tot.to_csv('%s/%s.csv' % (out_dir, name), index=False) - print 'Accumulation Results Complete in : ' + str(dt.now() - start2) +from border import makeBrdrPctFile +from lake_cat_config import LYR_DIR, NHD_DIR, OUT_DIR, STREAMCAT_DIR, FRAMEWORK +from LakeCat_functions import (Accumulation, NHDtblMerge, PointInPoly, + chkColumnLength, doStats, getOnNetLakes, inputs, + makeBasins, makeNParrays, rpus) + +if __name__ == "__main__": + + if not os.path.exists(FRAMEWORK): + print("making framework") + os.mkdir(FRAMEWORK) + os.mkdir(f"{FRAMEWORK}/rasters") + os.mkdir(f"{FRAMEWORK}/rasters/lakes") + os.mkdir(f"{FRAMEWORK}/rasters/lakes/scratchArc") + os.mkdir(f"{FRAMEWORK}/rasters/wsheds") + os.mkdir(f"{FRAMEWORK}/shps") + os.mkdir(f"{FRAMEWORK}/joinTables") + os.mkdir(f"{FRAMEWORK}/LakeCat_npy") + + NHDbounds = gpd.read_file( + f"{NHD_DIR}/NHDPlusGlobalData/BoundaryUnit.shp" + ).to_crs(epsg="5070") + NHDbounds.drop( + ["AreaSqKM", "DrainageID", "Shape_Area", "Shape_Leng", "UnitName"], + axis=1, + inplace=True, + ) + + if not os.path.exists(f"{FRAMEWORK}/Lake_QA.csv"): + NHDtblMerge(NHD_DIR, NHDbounds, FRAMEWORK) + makeBasins(NHD_DIR, NHDbounds, FRAMEWORK) + makeNParrays(FRAMEWORK) + us_file = ( + "L:/Priv/CORFiles/Geospatial_Library_Resource/" + "POLITICAL/BOUNDARIES/NATIONAL/TIGER_2010_State_Boundaries.shp" + ) + bsns = f"{FRAMEWORK}/shps/allBasins.shp" + brdr = makeBrdrPctFile(us_file, bsns, "NAME10", "UID") + os.mkdir(f"{FRAMEWORK}/border") + brdr.to_csv(f"{FRAMEWORK}/border/pct_full.csv") + + doStats(OUT_DIR, LYR_DIR, NHD_DIR, FRAMEWORK) diff --git a/LakeCat_functions.py b/LakeCat_functions.py index 156c4bb..1de1029 100644 --- a/LakeCat_functions.py +++ b/LakeCat_functions.py @@ -9,77 +9,82 @@ import os import re import sys -import arcpy -import struct -import decimal import warnings -import datetime -import itertools +from collections import OrderedDict, defaultdict, deque +from datetime import datetime as dt +from itertools import chain + +import geopandas as gpd import numpy as np -import pysal as ps import pandas as pd import rasterio as rs -from osgeo import gdal -from Tkinter import Tk -import geopandas as gpd -# we run out of memory when trying to convert the basin rasters to features -# so are now using Arc's RasterToPolygon -#from rasterio.crs import CRS -#from rasterio import features -from arcpy.sa import Watershed from geopandas.tools import sjoin +from osgeo import gdal + +os.environ["PATH"] = r"{};{}".format( + os.environ["PATH"], r"C:\Program Files\ArcGIS\Pro\bin" +) +sys.path.append(r"C:\Program Files\ArcGIS\Pro\Resources\ArcPy") +import arcpy from arcpy import PolygonToRaster_conversion as p2r from arcpy import RasterToPolygon_conversion as r2p -from collections import deque, OrderedDict, defaultdict -from tkFileDialog import askdirectory -arcpy.CheckOutExtension("spatial") -warnings.filterwarnings("ignore", category=FutureWarning) -Tk().withdraw() -############################################################################## - -inputs = OrderedDict([('06', 'MS'), - ('05', 'MS'), - ('10U', 'MS'), - ('10L', 'MS'), - ('07', 'MS'), - ('11', 'MS'), - ('14', 'CO'), - ('01', 'NE'), - ('17', 'PN'), - ('16', 'GB'), - ('15', 'CO'), - ('13', 'RG'), - ('12', 'TX'), - ('09', 'SR'), - ('02', 'MA'), - ('08', 'MS'), - ('04', 'GL'), - ('03W', 'SA'), - ('03S', 'SA'), - ('03N', 'SA'), - ('18', 'CA')]) - -rpus = OrderedDict([(u'01', ['01a']), - (u'02', ['02a', '02b']), - (u'03N', ['03a', '03b']), - (u'03S', ['03c', '03d']), - (u'03W', ['03e', '03f']), - (u'04', ['04a', '04b', '04c', '04d']), - (u'05', ['05a', '05b', '05c', '05d']), - (u'06', ['06a']), - (u'07', ['07a', '07b', '07c']), - (u'08', ['03g', '08a', '08b']), - (u'09', ['09a']), - (u'10L', ['10a', '10b', '10c', '10d']), - (u'10U', ['10e', '10f', '10g', '10h', '10i']), - (u'11', ['11a', '11b', '11c', '11d']), - (u'12', ['12a', '12b', '12c', '12d']), - (u'13', ['13a', '13b', '13c', '13d']), - (u'14', ['14a', '14b']), - (u'15', ['15a', '15b']), - (u'16', ['16a', '16b']), - (u'17', ['17a', '17b', '17c', '17d']), - (u'18', ['18a', '18b', '18c'])]) +from arcpy.sa import TabulateArea, Watershed, ZonalStatisticsAsTable + +from lake_cat_config import LYR_DIR, NHD_DIR, OUT_DIR, STREAMCAT_DIR + +warnings.filterwarnings("ignore", category=RuntimeWarning) + +inputs = OrderedDict( + [ + ("06", "MS"), + ("05", "MS"), + ("10U", "MS"), + ("10L", "MS"), + ("07", "MS"), + ("11", "MS"), + ("14", "CO"), + ("01", "NE"), + ("17", "PN"), + ("16", "GB"), + ("15", "CO"), + ("13", "RG"), + ("12", "TX"), + ("09", "SR"), + ("02", "MA"), + ("08", "MS"), + ("04", "GL"), + ("03W", "SA"), + ("03S", "SA"), + ("03N", "SA"), + ("18", "CA"), + ] +) + +rpus = OrderedDict( + [ + ("01", ["01a"]), + ("02", ["02a", "02b"]), + ("03N", ["03a", "03b"]), + ("03S", ["03c", "03d"]), + ("03W", ["03e", "03f"]), + ("04", ["04a", "04b", "04c", "04d"]), + ("05", ["05a", "05b", "05c", "05d"]), + ("06", ["06a"]), + ("07", ["07a", "07b", "07c"]), + ("08", ["03g", "08a", "08b"]), + ("09", ["09a"]), + ("10L", ["10a", "10b", "10c", "10d"]), + ("10U", ["10e", "10f", "10g", "10h", "10i"]), + ("11", ["11a", "11b", "11c", "11d"]), + ("12", ["12a", "12b", "12c", "12d"]), + ("13", ["13a", "13b", "13c", "13d"]), + ("14", ["14a", "14b"]), + ("15", ["15a", "15b"]), + ("16", ["16a", "16b"]), + ("17", ["17a", "17b", "17c", "17d"]), + ("18", ["18a", "18b", "18c"]), + ] +) # this is the same projection as the 2 below, but this matches the name of the proj # from the NHD fdr files.. @@ -102,119 +107,162 @@ PARAMETER['Direction',1.0],\ UNIT['User_Defined_Unit',0.01]]" -# this is the projection that will be written out if set to 5070 in QGIS.. - -#"PROJCS['NAD83_Conus_Albers',\ -#GEOGCS['GCS_North_American_1983',\ -#DATUM['D_North_American_1983',\ -#SPHEROID['GRS_1980',6378137.0,298.257222101]],\ -#PRIMEM['Greenwich',0.0],\ -#UNIT['Degree',0.0174532925199433]],\ -#PROJECTION['Albers'],\ -#PARAMETER['false_easting',0.0],\ -#PARAMETER['false_northing',0.0],\ -#PARAMETER['central_meridian',-96.0],\ -#PARAMETER['standard_parallel_1',29.5],\ -#PARAMETER['standard_parallel_2',45.5],\ -#PARAMETER['latitude_of_origin',23.0],\ -#UNIT['Meter',1.0]]" - -# this is just another that we had been using, it matches others - -#"PROJCS['NAD_1983_Contiguous_USA_Albers',\ -#GEOGCS['GCS_North_American_1983',\ -#DATUM['D_North_American_1983',\ -#SPHEROID['GRS_1980',6378137.0,298.257222101]],\ -#PRIMEM['Greenwich',0.0],\ -#UNIT['Degree',0.0174532925199433]],\ -#PROJECTION['Albers'],\ -#PARAMETER['false_easting',0.0],\ -#PARAMETER['false_northing',0.0],\ -#PARAMETER['central_meridian',-96.0],\ -#PARAMETER['standard_parallel_1',29.5],\ -#PARAMETER['standard_parallel_2',45.5],\ -#PARAMETER['latitude_of_origin',23.0],\ -#UNIT['Meter',1.0]]" -############################################################################## - - -def dbfreader(f): - """Returns an iterator over records in a Xbase DBF file. - - The first row returned contains the field names. - The second row contains field specs: (type, size, decimal places). - Subsequent rows contain the data records. - If a record is marked as deleted, it is skipped. - - File should be opened for binary reads. - """ - # See DBF format spec at: - # http://www.pgts.com.au/download/public/xbase.htm#DBF_STRUCT - - numrec, lenheader = struct.unpack('" - Uses the 'Cat' and 'UpCat' columns to caluculate watershed values and returns those values in 'Cat' columns - so they can be appended to 'CatResult' tables in other zones before accumulation. + Uses the 'Cat' and 'UpCat' columns to caluculate watershed values and returns those values in 'Cat' columns + so they can be appended to 'CatResult' tables in other zones before accumulation. Arguments --------- @@ -223,54 +271,63 @@ def Accumulation(arr, COMIDs, lengths, upStream, tbl_type, icol): lengths : numpy array with lengths of upstream COMIDs upstream : numpy array of all upstream arrays for each COMID tbl_type : string value of table metrics to be returned - ''' - coms = np.array(arr[icol]) #Read in COMIDs - indices = swapper(coms, upStream) #Get indices that will be used to map values + """ + coms = np.array(arr[icol]) # Read in COMIDs + indices = swapper(coms, upStream) # Get indices that will be used to map values del upStream # a and indices are big - clean up to minimize RAM - cols = arr.columns[1:] #Get column names that will be accumulated - z = np.zeros(COMIDs.shape) #Make empty vector for placing values - outT = np.zeros((len(COMIDs), len(arr.columns))) #Make empty array for placing final values - outT[:,0] = COMIDs #Define first column as comids - #Loop and accumulate values - for k in range(0,len(cols)): + cols = arr.columns[1:] # Get column names that will be accumulated + z = np.zeros(COMIDs.shape) # Make empty vector for placing values + outT = np.zeros( + (len(COMIDs), len(arr.columns)) + ) # Make empty array for placing final values + outT[:, 0] = COMIDs # Define first column as comids + # Loop and accumulate values + for k in range(0, len(cols)): col = cols[k] - c = np.array(arr[col]) # arr[col].fillna(0) keep out zeros where no data! - d = c[indices] #Make final vector from desired data (c) - if 'PctFull' in col: - area = np.array(arr.ix[:, 1]) + c = np.array(arr[col]) # arr[col].fillna(0) keep out zeros where no data! + d = c[indices] # Make final vector from desired data (c) + if "PctFull" in col: + area = np.array(arr.iloc[:, 1]) ar = area[indices] x = 0 for i in range(0, len(lengths)): # using nan_to_num in average function to treat NA's as zeros when summing - z[i] = np.ma.average(np.nan_to_num(d[x:x + lengths[i]]), weights=ar[x:x + lengths[i]]) + z[i] = np.ma.average( + np.nan_to_num(d[x : x + lengths[i]]), weights=ar[x : x + lengths[i]] + ) x = x + lengths[i] else: x = 0 for i in range(0, len(lengths)): - z[i] = np.nansum(d[x:x + lengths[i]]) + z[i] = np.nansum(d[x : x + lengths[i]]) x = x + lengths[i] - outT[:,k+1] = z #np.nan_to_num() -used to convert to zeros here, now done above in the np.ma.average() - outT = outT[np.in1d(outT[:,0], coms),:] #Remove the extra COMIDs + outT[:, k + 1] = z + outT = outT[np.in1d(outT[:, 0], coms), :] # Remove the extra COMIDs outDF = pd.DataFrame(outT) - if tbl_type == 'Ws': - outDF.columns = np.append(icol, map(lambda x : x.replace('Cat', 'Ws'),cols.values)) - elif tbl_type == 'UpCat': - outDF.columns = np.append(icol, 'Up' + cols.values) + if tbl_type == "Ws": + outDF.columns = np.append( + icol, list(map(lambda x: x.replace("Cat", "Ws"), cols.values)) + ) + elif tbl_type == "UpCat": + outDF.columns = np.append(icol, "Up" + cols.values) else: outDF.columns = [icol] + cols.tolist() for name in outDF.columns: - if 'AreaSqKm' in name: + if "AreaSqKm" in name: areaName = name - if 'PctFull' in name: + if "PctFull" in name: pct = name - outDF.loc[(outDF[areaName] == 0), outDF.columns[2:]] = np.nan # identifies that there is no area in catchment mask, then NA values across the table - outDF.loc[(outDF[pct] == 0), outDF.columns[2:-1]] = np.nan + outDF.loc[ + (outDF[areaName] == 0), outDF.columns[2:] + ] = ( + np.nan + ) # identifies that there is no area in catchment mask, then NA values across the table + outDF.loc[(outDF[pct] == 0), outDF.columns[2:-1]] = np.nan return outDF -############################################################################## def children(token, tree, chkset=None): - ''' + """ __author__ = "Ryan Hill " "Marc Weber " returns a list of every child @@ -280,7 +337,7 @@ def children(token, tree, chkset=None): token : a single COMID tree : Full dictionary of list of upstream COMIDs for each COMID in the zone chkset : set of all the NHD catchment COMIDs used to remove flowlines with no associated catchment - ''' + """ visited = set() to_crawl = deque([token]) while to_crawl: @@ -290,15 +347,14 @@ def children(token, tree, chkset=None): visited.add(current) node_children = set(tree[current]) to_crawl.extendleft(node_children - visited) - #visited.remove(token) + # visited.remove(token) if chkset != None: visited = visited.intersection(chkset) return list(visited) -############################################################################## def bastards(token, tree, chkset=None): - ''' + """ __author__ = "Ryan Hill " "Marc Weber " returns a list of every child w/ out father (key) included @@ -308,7 +364,7 @@ def bastards(token, tree, chkset=None): token : a single COMID tree : Full dictionary of list of upstream COMIDs for each COMID in the zone chkset : set of all the NHD catchment COMIDs, used to remove flowlines with no associated catchment - ''' + """ visited = set() to_crawl = deque([token]) while to_crawl: @@ -323,11 +379,9 @@ def bastards(token, tree, chkset=None): visited = visited.intersection(chkset) return list(visited) -############################################################################## - def swapper(coms, upStream): - ''' + """ __author__ = "Marc Weber " "Ryan Hill " Creates array of indexes for all upstream COMIDs that will be summarized for each local catchment. @@ -336,37 +390,34 @@ def swapper(coms, upStream): --------- coms : numpy array of all COMIDs in the zone upstream : numpy array of all upstream COMIDs for each local catchment - ''' + """ bsort = np.argsort(coms) apos = np.searchsorted(coms[bsort], upStream) indices = bsort[apos] return indices -############################################################################## -def findUpstreamNpy(numpy_dir, com): - """ Unpacks Numpy files describing the array of upstream COMID's for - each catchment in NHD. Similar to the Arc add-in tool that Marc made to +def findUpstreamNpy(numpy_dir, com): + """Unpacks Numpy files describing the array of upstream COMID's for + each catchment in NHD. Similar to the Arc add-in tool that Marc made to identify upstream flowline/catchments. NOT USED IN LAKECAT PROCESS! for QA. - + Arguments --------- numpy_dir : Loccation of numpy files - com : ID of feature + com : ID of feature """ - comids = np.load(numpy_dir + '/comids.npy') - lengths= np.load(numpy_dir + '/lengths.npy') - upStream = np.load(numpy_dir + '/upStream.npy') + comids = np.load(numpy_dir + "/comids.npy") + lengths = np.load(numpy_dir + "/lengths.npy") + upStream = np.load(numpy_dir + "/upStream.npy") itemindex = int(np.where(comids == com)[0]) n = lengths[:itemindex].sum() arrlen = lengths[itemindex] - return upStream[n:n+arrlen] - -############################################################################## - - -def PointInPoly(points, inZoneData, pct_full, fld='GRIDCODE', summaryfield=None): - ''' + return upStream[n : n + arrlen] + + +def PointInPoly(points, inZoneData, pct_full, fld="GRIDCODE", summaryfield=None): + """ __author__ = "Marc Weber " "Rick Debbout " Returns either the count of spatial points feature in every polygon in a spatial polygons feature or the summary of @@ -379,84 +430,85 @@ def PointInPoly(points, inZoneData, pct_full, fld='GRIDCODE', summaryfield=None) pct_full : table that links COMIDs to pct_full, determined from catchments that are not within the US Census border fld : the field in the InZoneData file that uniquely identifies each polygon summaryfield : a list of the field/s in points feature to use for getting summary stats in polygons - ''' + """ polys = gpd.GeoDataFrame.from_file(inZoneData) if not points.crs == polys.crs: points = points.to_crs(polys.crs) # Get list of lat/long fields in the table - latlon = [s for s in points.columns if any(xs in s.upper() for xs in ['LONGIT','LATIT'])] + latlon = [ + s for s in points.columns if any(xs in s.upper() for xs in ["LONGIT", "LATIT"]) + ] # Remove duplicate points for 'Count' - points2 = points.ix[~points.duplicated(latlon)] + points2 = points.loc[~points.duplicated(latlon)] try: point_poly_join = sjoin(points2, polys, how="left", op="within") except: - polys['link'] = np.nan - point_poly_join = polys #gpd.GeoDataFrame( pd.concat( [points2, polys], ignore_index=True) ) - fld = 'link' + polys["link"] = np.nan + point_poly_join = polys + fld = "link" # Create group of all points in catchment grouped = point_poly_join.groupby(fld) - point_poly_count = grouped[fld].count() # point_poly_count.head() next((x for x in points2.columns if x != 'geometry'),None) - point_poly_count.name = 'COUNT' + point_poly_count = grouped[fld].count() + point_poly_count.name = "COUNT" # Join Count column on to NHDCatchments table and keep only 'COMID','CatAreaSqKm','CatCount' - final = polys.join(point_poly_count, on=fld, how='left') - final = final[[fld, 'AreaSqKM', 'COUNT']].fillna(0) - cols = [fld, 'CatAreaSqKm', 'CatCount'] - if not summaryfield == None: # Summarize fields in list with gpd table including duplicates + final = polys.join(point_poly_count, on=fld, how="left") + final = final[[fld, "AreaSqKM", "COUNT"]].fillna(0) + cols = [fld, "CatAreaSqKm", "CatCount"] + if ( + not summaryfield == None + ): # Summarize fields in list with gpd table including duplicates point_poly_dups = sjoin(points, polys, how="left", op="within") grouped2 = point_poly_dups.groupby(fld) - for x in summaryfield: # Sum the field in summary field list for each catchment + for x in summaryfield: # Sum the field in summary field list for each catchment point_poly_stats = grouped2[x].sum() - final = final.join(point_poly_stats, on=fld, how='left').fillna(0) - cols.append('Cat' + x) + final = final.join(point_poly_stats, on=fld, how="left").fillna(0) + cols.append("Cat" + x) final.columns = cols # Merge final table with Pct_Full table based on COMID and fill NA's with 0 - final = pd.merge(final, pct_full, on=fld, how='left') - final['CatPctFull'] = final['CatPctFull'].fillna(100) # final.head() final.ix[final.CatCount == 0] - #print "elapsed time " + str(dt.now()-startTime) + final = pd.merge(final, pct_full, on=fld, how="left") + final["CatPctFull"] = final["CatPctFull"].fillna(100) for name in final.columns: - if 'AreaSqKm' in name: + if "AreaSqKm" in name: area = name # replace CatAreaSqKm with NANs where value is zero final.loc[(final[area] == 0), final.columns[2:]] = np.nan return final - -############################################################################## + def chkColumnLength(table, LandscapeLayer): - ''' + """ __author__ = "Marc Weber " "Ryan Hill " - Checks the number of columns returned from zonal stats and adds any of the - categorical values that that didn't exist within the zone and fills the + Checks the number of columns returned from zonal stats and adds any of the + categorical values that that didn't exist within the zone and fills the column with zeros so that all categories will be represented in the table. Arguments --------- table : Results table of catchment summarizations LandscapeLayer : string to file holding the table of inter VPU COMIDs - ''' + """ # Get ALL categorical values from the dbf associated with the raster to retain all values # in the raster in every table, even when a given value doesn't exist in a given hydroregion - AllCols = dbf2DF(LandscapeLayer + '.vat.dbf').VALUE.tolist() + AllCols = dbf2DF(LandscapeLayer + ".vat.dbf").VALUE.tolist() col_list = table.columns.tolist() col_list.sort() - col_list.sort(key=len) # table.columns + col_list.sort(key=len) # table.columns table = table[col_list] if len(AllCols) != len(col_list[1:]): - AllCols = ['VALUE_'+str(x) for x in AllCols] + AllCols = ["VALUE_" + str(x) for x in AllCols] diff = list(set(AllCols) - set(col_list[1:])) diff.sort() diff.sort(key=len) for spot in diff: here = AllCols.index(spot) + 1 - table.insert(here, spot ,0) + table.insert(here, spot, 0) return table -############################################################################## - -def getOnNetLakes2(metric, StreamCat, LakeComs, npy_files, nhd): - ''' + +def getOnNetLakes(metric, StreamCat, LakeComs, npy_files, nhd): + """ __author__ = "Rick Debbout " Grabs records from StreamCat for on-network lakes. Adjusts cat results to be an accumulated result of all associated catchments @@ -467,66 +519,39 @@ def getOnNetLakes2(metric, StreamCat, LakeComs, npy_files, nhd): metric : Metric name StreamCat : Location of intermediate StreamCat csv files LakeComs : Location of csv's that join waterbody COMID to catchment COMID - npy_files : Location of files that associate all catchments with WBAREACOMID - ''' - final = pd.DataFrame() - for zone in inputs: - tbl = pd.read_csv('%s/join_%s.csv' % (LakeComs,zone))[['catCOMID','wbCOMID']] # remove - strmCat = pd.read_csv('%s/%s_%s.csv' % (StreamCat, metric, zone)) - if metric == 'RdCrs': - strmCat = strmCat.drop([x for x in strmCat.columns if 'SlpWtd' in x], axis=1) -# if metric == 'Elev': -# strmCat = strmCat.drop([x for x in strmCat.columns if 'MAX' in x or 'MIN' in x], axis=1) - cols = [col for col in strmCat.columns if col[:3] =='Cat'] - iso = strmCat[['COMID'] + cols] - accum = np.load(npy_files)['vpus'].item()[zone] - accumCats = Accumulation(iso, accum['comids'], - accum['lengths'], - accum['upstream'], - '', 'COMID') -# # shouldn't be needed if keep tbl_type arg as empty string in Accumulation -# accumCats.columns = [col.replace('Ws','Cat') for col in accumCats.columns] - upWs = strmCat.ix[strmCat.COMID.isin(tbl.catCOMID)].drop(cols, axis=1) - newCats = pd.merge(accumCats, upWs, on="COMID") - tbl2 = pd.merge(tbl, newCats, left_on='catCOMID', right_on='COMID') - tbl2 = tbl2.drop(['COMID','catCOMID'], axis=1) - tbl2.rename(columns={'wbCOMID': 'COMID'}, inplace=True) - final = pd.concat([final, tbl2]) - return final - -############################################################################## - - -def getOnNetLakes(metric, StreamCat, LakeComs): - """Using the LakeCat join tables linking waterbody COMID to catchment COMID, - Streamcat ouput is collected and manipulated to accumulate catchment stats - for lakes that have more than one catchment associated through the - WBAREACOMI attribute in NHD flowlines. - - Arguments - --------- - metric : name of the metric to collect data for in StreamCat - StreamCat : location of StreamCat outputs(AllocationandAccumulation) - LakeComs : location of join tables in LakeCat + npy_files : Location of files that associate all catchments with WBAREACOMID """ final = pd.DataFrame() for zone in inputs: - tbl = pd.read_csv('%s/LakeCOMs%s.csv' % (LakeComs,zone)) - strmCat = pd.read_csv('%s/%s_%s.csv' % (StreamCat, metric, zone)) - tbl2 = pd.merge(tbl, strmCat.ix[strmCat.COMID.isin(tbl.catCOMID)], left_on='catCOMID', right_on='COMID') - tbl2 = tbl2.drop(['COMID','catCOMID'], axis=1) - if metric == 'RdCrs': - tbl2 = tbl2.drop([x for x in tbl2.columns if 'SlpWtd' in x], axis=1) - if metric == 'Elev': - tbl2 = tbl2.drop([x for x in tbl2.columns if 'MAX' in x or 'MIN' in x], axis=1) - tbl2.rename(columns={'wbCOMID': 'COMID'}, inplace=True) + tbl = pd.read_csv("%s/join_%s.csv" % (LakeComs, zone))[ + ["catCOMID", "wbCOMID"] + ] # remove + strmCat = pd.read_csv("%s/%s_%s.csv" % (StreamCat, metric, zone)) + if metric == "RdCrs": + strmCat = strmCat.drop( + [x for x in strmCat.columns if "SlpWtd" in x], axis=1 + ) + # if metric == 'Elev': + # strmCat = strmCat.drop([x for x in strmCat.columns if 'MAX' in x or 'MIN' in x], axis=1) + cols = [col for col in strmCat.columns if col[:3] == "Cat"] + iso = strmCat[["COMID"] + cols] + accum = np.load(npy_files, allow_pickle=True)["vpus"].item()[zone] + accumCats = Accumulation( + iso, accum["comids"], accum["lengths"], accum["upstream"], "", "COMID" + ) + # # shouldn't be needed if keep tbl_type arg as empty string in Accumulation + # accumCats.columns = [col.replace('Ws','Cat') for col in accumCats.columns] + upWs = strmCat.loc[strmCat.COMID.isin(tbl.catCOMID)].drop(cols, axis=1) + newCats = pd.merge(accumCats, upWs, on="COMID") + tbl2 = pd.merge(tbl, newCats, left_on="catCOMID", right_on="COMID") + tbl2 = tbl2.drop(["COMID", "catCOMID"], axis=1) + tbl2.rename(columns={"wbCOMID": "COMID"}, inplace=True) final = pd.concat([final, tbl2]) return final -############################################################################## def makeRat(fn): - ''' + """ __author__ = "Matt Gregory " Adds a Raster Attribute Table to the .tif.aux.xml file, then passes those values to rat_to_df function to return the RAT in a pandas DataFrame. @@ -534,7 +559,7 @@ def makeRat(fn): Arguments --------- fn : raster filename - ''' + """ ds = gdal.Open(fn) rb = ds.GetRasterBand(1) nd = rb.GetNoDataValue() @@ -542,27 +567,25 @@ def makeRat(fn): # Get unique values in the band and return counts for COUNT val u = np.array(np.unique(data, return_counts=True)) # remove NoData value - u = np.delete(u, np.argwhere(u==nd), axis=1) - + u = np.delete(u, np.argwhere(u == nd), axis=1) + # Create and populate the RAT rat = gdal.RasterAttributeTable() - rat.CreateColumn('Value', gdal.GFT_Integer, gdal.GFU_Generic) - rat.CreateColumn('Count', gdal.GFT_Integer, gdal.GFU_Generic) + rat.CreateColumn("Value", gdal.GFT_Integer, gdal.GFU_Generic) + rat.CreateColumn("Count", gdal.GFT_Integer, gdal.GFU_Generic) for i in range(u[0].size): rat.SetValueAsInt(i, 0, int(u[0][i])) rat.SetValueAsInt(i, 1, int(u[1][i])) - + # Associate with the band rb.SetDefaultRAT(rat) - + # Close the dataset and persist the RAT - ds = None - - #return the rat to build DataFrame + ds = None + + # return the rat to build DataFrame df = rat_to_df(rat) return df - -############################################################################## def rat_to_df(in_rat): @@ -580,152 +603,102 @@ def rat_to_df(in_rat): """ # Read in each column from the RAT and convert it to a series infering # data type automatically - s = [pd.Series(in_rat.ReadAsArray(i), name=in_rat.GetNameOfCol(i)) - for i in xrange(in_rat.GetColumnCount())] + s = [ + pd.Series(in_rat.ReadAsArray(i), name=in_rat.GetNameOfCol(i)) + for i in range(in_rat.GetColumnCount()) + ] # Concatenate all series together into a dataframe and return return pd.concat(s, axis=1) -############################################################################## - - -def DF2dbf(df, dbf_path, my_specs=None): - ''' - Convert a pandas.DataFrame into a dbf. - - __author__ = "Dani Arribas-Bel " - ... - - Arguments - --------- - df : DataFrame - Pandas dataframe object to be entirely written out to a dbf - dbf_path : str - Path to the output dbf. It is also returned by the function - my_specs : list - List with the field_specs to use for each column. - Defaults to None and applies the following scheme: - * int: ('N', 14, 0) - * float: ('N', 14, 14) - * str: ('C', 14, 0) - ''' - if my_specs: - specs = my_specs - else: - type2spec = {int: ('N', 20, 0), - np.int64: ('N', 20, 0), - float: ('N', 36, 15), - np.float64: ('N', 36, 15), - str: ('C', 14, 0), - np.int32: ('N', 14, 0) - } - types = [type(df[i].iloc[0]) for i in df.columns] - specs = [type2spec[t] for t in types] - db = ps.open(dbf_path, 'w') - db.header = list(df.columns) - db.field_spec = specs - for i, row in df.T.iteritems(): - db.write(row) - db.close() - return dbf_path - -############################################################################## - def purge(directory, pattern): - ''' + """ __author__ = "Rick Debbout " - Clears directory of created rasters that will need to be re-written due to - holding on-network like properties, i.e basins created are larger than the + Clears directory of created rasters that will need to be re-written due to + holding on-network like properties, i.e basins created are larger than the associated catchment. Arguments --------- directory : directory to be cleared pattern : string value to find in filename for removal - ''' + """ for f in os.listdir(directory): if re.search(pattern, f): os.remove(os.path.join(directory, f)) - -############################################################################## -def updateSinks(wbDF,flDF): - ''' +def updateSinks(wbDF, flDF): + """ __author__ = "Rick Debbout " Updates the WBARECOMI field in the NHDFlowline GeoDatFrame where NHDSinks - intersect with NHDWaterbodies. Not currently held in the NHDPlusV21, but + intersect with NHDWaterbodies. Not currently held in the NHDPlusV21, but we can process the waterbodies with our on-network approach. Arguments --------- wbDF : Metric name flDF : Location of intermediate StreamCat csv files - ''' - flow = flDF.set_index('COMID') - sinks = wbDF.ix[wbDF.COMID_sink.notnull()].copy() - sinks.rename(columns={'COMID': 'WBAREACOMI'}, inplace=True) + """ + flow = flDF.set_index("COMID") + sinks = wbDF.loc[wbDF.COMID_sink.notnull()].copy() + sinks.rename(columns={"COMID": "WBAREACOMI"}, inplace=True) flow.update(sinks) flow.WBAREACOMI = flow.WBAREACOMI.astype(np.int64) return flow.reset_index(level=0) -############################################################################## -def rollArray(a, d): + +def rollArray(a, d): if len(d) == 4: - a = a[0,:] + a = a[0, :] new = np.roll(np.roll(a, d[0], axis=0), d[1], axis=1) - new[d[2],:] = a[d[2],:] - new[:, d[3]] = a[:, d[3]] + new[d[2], :] = a[d[2], :] + new[:, d[3]] = a[:, d[3]] if len(d) == 3: - new = np.roll(a[0,:], d[0], axis=d[1]) + new = np.roll(a[0, :], d[0], axis=d[1]) if d[1] == 0: - new[d[2],:] = a[0,d[2],:] + new[d[2], :] = a[0, d[2], :] if d[1] == 1: - new[:,d[2]] = a[0,:,d[2]] - return np.expand_dims(new, axis=0) + new[:, d[2]] = a[0, :, d[2]] + return np.expand_dims(new, axis=0) -############################################################################## - def makeFlows(arr, shiftd, fdr, path, nd): - iso = np.not_equal(arr, shiftd) * np.not_equal(shiftd, nd) * np.not_equal(arr, nd) # cells change value after shift * cells not equal to NoData - pth = np.equal(fdr,path) # True when equal to path value - val = iso * pth * arr + iso = ( + np.not_equal(arr, shiftd) * np.not_equal(shiftd, nd) * np.not_equal(arr, nd) + ) # cells change value after shift * cells not equal to NoData + pth = np.equal(fdr, path) # True when equal to path value + val = iso * pth * arr shiftval = iso * pth * shiftd - idx = np.not_equal(val,shiftd) + idx = np.not_equal(val, shiftd) fromcom = val[idx] tocom = shiftval[idx] fromcom = fromcom[fromcom > 0] - tocom = tocom[tocom > 0] + tocom = tocom[tocom > 0] # don't load-in the entire array to the DF, just connection vals - df = pd.DataFrame({'TOCOMID' : tocom, - 'FROMCOMID' : fromcom, - 'move' : path}) - return df.drop_duplicates(['FROMCOMID','TOCOMID']) - -############################################################################## + df = pd.DataFrame({"TOCOMID": tocom, "FROMCOMID": fromcom, "move": path}) + return df.drop_duplicates(["FROMCOMID", "TOCOMID"]) -def compAll(arr, fdr ,moves, from_to, nd): +def compare_all(arr, fdr, moves, from_to, nd): for move in moves: - flow = makeFlows(arr, rollArray(np.copy(arr), moves[move][0]), fdr, moves[move][1], nd) - from_to = pd.concat([from_to,flow]) + flow = makeFlows( + arr, rollArray(np.copy(arr), moves[move][0]), fdr, moves[move][1], nd + ) + from_to = pd.concat([from_to, flow]) return from_to - -############################################################################## def expand(window, size=1): r, c = window return ((r[0] - size, r[1] + size), (c[0] - size, c[1] + size)) + def check_window(window, w, h): r, c = window return ((max(0, r[0]), min(h, r[1])), (max(0, c[0]), min(w, c[1]))) -############################################################################## - def chunk_windows(r, indexes=None, max_ram=250000000): if indexes is None: @@ -733,13 +706,13 @@ def chunk_windows(r, indexes=None, max_ram=250000000): elif isinstance(indexes, int): indexes = [indexes] if not indexes: - raise ValueError('No indexes to read') + raise ValueError("No indexes to read") pixel_size = 0 for bidx in indexes: if bidx not in r.indexes: - raise IndexError('band index out of range') + raise IndexError("band index out of range") idx = r.indexes.index(bidx) - pixel_size += np.dtype(r.dtypes[idx]).itemsize + pixel_size += np.dtype(r.dtypes[idx]).itemsize chunk_size, _ = divmod(max_ram, pixel_size) r_h, r_w = r.height, r.width if chunk_size >= r_h * r_w: @@ -749,40 +722,42 @@ def chunk_windows(r, indexes=None, max_ram=250000000): d, _ = divmod(chunk_size, r_w * b_h) chunk_height = d * b_h d, m = divmod(r_h, chunk_height) - n_chunks = d + int(m>0) + n_chunks = d + int(m > 0) for i in range(n_chunks): row = i * chunk_height # height = min(chunk_height, r_h - row) - yield (i, 0), ((row, row+chunk_height), (0, r_w)) - -############################################################################## + yield (i, 0), ((row, row + chunk_height), (0, r_w)) def findFlows(zone_file, fdr_file): - moves = {'up':[(-1,0,-1),4],'left':[(-1,1,-1),1],'down' :[(1,0,0),64], - 'right':[(1,1,0),16],'downRight':[(1,1,0,0),32], - 'downLeft':[(1,-1,0,-1), 128],'upRight':[(-1,1,-1,0),8], - 'upLeft':[(-1,-1,-1,-1),2]} + moves = { + "up": [(-1, 0, -1), 4], + "left": [(-1, 1, -1), 1], + "down": [(1, 0, 0), 64], + "right": [(1, 1, 0), 16], + "downRight": [(1, 1, 0, 0), 32], + "downLeft": [(1, -1, 0, -1), 128], + "upRight": [(-1, 1, -1, 0), 8], + "upLeft": [(-1, -1, -1, -1), 2], + } flows = pd.DataFrame() with rs.open(zone_file) as z: with rs.open(fdr_file) as f: # 0 is NoData for fdr profile = z.profile.copy() - nd = profile['nodata'] + nd = profile["nodata"] assert z.shape == f.shape, "Rasters have different extents!" for _, w in chunk_windows(z): # currently defaults to 250MB - new_w = check_window(expand(w,2), z.width, z.height) + new_w = check_window(expand(w, 2), z.width, z.height) data = z.read(window=new_w) f_r = f.read(window=new_w) - flows = pd.concat([flows,compAll(data,f_r,moves,flows,nd)]) - return flows.drop_duplicates(['FROMCOMID','TOCOMID']) - -############################################################################## + flows = pd.concat([flows, compare_all(data, f_r, moves, flows, nd)]) + return flows.drop_duplicates(["FROMCOMID", "TOCOMID"]) def NHDtblMerge(nhd, bounds, out): - ''' + """ __author__ = "Rick Debbout " - Merges all of the NHD tables needed to find on-network lakes. Returns the + Merges all of the NHD tables needed to find on-network lakes. Returns the GeoDataFrames that will be used to find off-network lakes. Attribute fields COMID, WBARECOMI, and FEATUREID are used to link waterbodies to catchments. @@ -791,306 +766,345 @@ def NHDtblMerge(nhd, bounds, out): nhd : string value of prefix to NHD directory bounds : GeoDataFrame of Vector Processing Unit boundaries out : directory to write out to - ''' + """ # build dict of hr/vpu labels to read through NHD vpus = bounds.query("UnitType == 'VPU'").copy() # initialize containers to append to through processing - onNet_connect = {} - Obounds = gpd.GeoDataFrame(crs={'init': u'epsg:4269'}) - qa_cols=['Total Waterbodies','On-Network','Off-network','FTYPE_drop', - 'Sink_add','Out_of_bounds'] + onNet_connect = {} + Obounds = gpd.GeoDataFrame() + qa_cols = [ + "Total Waterbodies", + "On-Network", + "Off-network", + "FTYPE_drop", + "Sink_add", + "Out_of_bounds", + ] qa_tbl = pd.DataFrame() - ons = [] - for zone in inputs: - print zone - hr = inputs[zone] - pre = "%s/NHDPlus%s/NHDPlus%s" % (nhd, hr, zone) - wbShp = gpd.read_file("%s/NHDSnapshot/Hydrography/NHDWaterbody.shp"%(pre)) - # hold length of total Waterbodies + ons = [] + print("finding off-network lakes for ", end="", flush=True) + for zone, hr in inputs.items(): + print(zone, end=", ", flush=True) + pre = f"{nhd}/NHDPlus{hr}/NHDPlus{zone}" + wbShp = gpd.read_file(f"{pre}/NHDSnapshot/Hydrography/NHDWaterbody.shp").to_crs( + epsg="5070" + ) + # hold length of total Waterbodies ttl_WB = len(wbShp) # format columns and select out FTYPE - wbShp.columns = wbShp.columns[:-1].str.upper().tolist() + ['geometry'] - wbShp = wbShp[['AREASQKM','COMID','FTYPE','geometry']] - wbShp = wbShp.loc[wbShp['FTYPE'].isin(['LakePond','Reservoir'])] + wbShp.columns = wbShp.columns[:-1].str.upper().tolist() + ["geometry"] + wbShp = wbShp[["AREASQKM", "COMID", "FTYPE", "geometry"]] + wbShp = wbShp.loc[wbShp["FTYPE"].isin(["LakePond", "Reservoir"])] # hold number of lakes removed from FTYPE ttl_FTYPE = ttl_WB - len(wbShp) - fl = dbf2DF("%s/NHDSnapshot/Hydrography/NHDFlowline.dbf"%(pre))[['COMID', - 'WBAREACOMI']] - cat = gpd.read_file('%s/NHDPlusCatchment/Catchment.shp'%(pre)).drop( - ['GRIDCODE', 'SOURCEFC'], axis=1) - cat.columns = cat.columns[:-1].str.upper().tolist() + ['geometry'] - vaa = dbf2DF('%s/NHDPlusAttributes/PlusFlowlineVAA.dbf'%(pre))[['COMID', - 'HYDROSEQ']] + fl = dbf2DF(f"{pre}/NHDSnapshot/Hydrography/NHDFlowline.dbf")[ + ["COMID", "WBAREACOMI"] + ] + cat = ( + gpd.read_file(f"{pre}/NHDPlusCatchment/Catchment.shp") + .drop(["GRIDCODE", "SOURCEFC"], axis=1) + .to_crs(epsg="5070") + ) + cat.columns = cat.columns[:-1].str.upper().tolist() + ["geometry"] + vaa = dbf2DF(f"{pre}/NHDPlusAttributes/PlusFlowlineVAA.dbf")[ + ["COMID", "HYDROSEQ"] + ] # merge all necessary NHD tables - final = pd.merge(cat.drop('geometry', axis=1),fl,left_on='FEATUREID', - right_on='COMID',how='inner') - final = pd.merge(wbShp.drop('geometry',axis=1),final,left_on='COMID', - right_on='WBAREACOMI',how='left', - suffixes=('_wb','_cat')) - final = pd.merge(final,vaa,left_on='COMID_cat', - right_on='COMID',how='left') - - # initialize containers for on-net lakes - cols = ['catCOMID','wbCOMID','CatAreaSqKm'] + final = pd.merge( + cat.drop("geometry", axis=1), + fl, + left_on="FEATUREID", + right_on="COMID", + how="inner", + ) + final = pd.merge( + wbShp.drop("geometry", axis=1), + final, + left_on="COMID", + right_on="WBAREACOMI", + how="left", + suffixes=("_wb", "_cat"), + ) + final = pd.merge(final, vaa, left_on="COMID_cat", right_on="COMID", how="left") + + # initialize containers for on-net lakes + cols = ["catCOMID", "wbCOMID", "CatAreaSqKm"] onNetDF = pd.DataFrame(columns=cols) - catDict = {} # holds associated lake catchments to an on-network lake - + catDict = {} # holds associated lake catchments to an on-network lake + # group by the waterbody COMID to find associated catchment - for name, group in final.groupby('COMID_wb'): + for name, group in final.groupby("COMID_wb"): if not pd.isnull(group.FEATUREID).any(): - base = group.ix[group.HYDROSEQ.idxmin()] - row = pd.Series([int(base.COMID_cat), int(base.COMID_wb), - base.AREASQKM_cat], index=cols) + base = group.loc[group.HYDROSEQ.idxmin()] + row = pd.Series( + [int(base.COMID_cat), int(base.COMID_wb), base.AREASQKM_cat], + index=cols, + ) onNetDF = onNetDF.append(row, ignore_index=True) catDict[int(base.COMID_cat)] = group.FEATUREID.astype(int).tolist() # hold length of on-net lakes ttl_ON = len(onNetDF) - # add in related sinks -- this could be done in tables and found in + # add in related sinks -- this could be done in tables and found in # groupby object above, but this allows for isolation of number added - sinks = gpd.read_file("%s/NHDPlusBurnComponents/Sink.shp"%(pre)) + sinks = gpd.read_file(f"{pre}/NHDPlusBurnComponents/Sink.shp").to_crs( + epsg="5070" + ) exp = '(SOURCEFC== "NHDWaterbody")&(PURPDESC== "NHDWaterbody closed lake")' if len(sinks) > 0: sinks = sinks.query(exp) try: assert len(sinks) > 0 catSink = sjoin(sinks, cat) - catSink = catSink[['FEATUREID_right', - 'FEATUREID_left', - 'AREASQKM']] + catSink = catSink[["FEATUREID_right", "FEATUREID_left", "AREASQKM"]] catSink.columns = cols - catSink = catSink.ix[catSink.wbCOMID.isin(wbShp.COMID)] #this will remove any NHDWaterbody COMIDs that have the wrong FTYPE - catSink = catSink.ix[~catSink.wbCOMID.isin(onNetDF.wbCOMID)] #remove any COMIDs that are already in the onNetDF + catSink = catSink.loc[ + catSink.wbCOMID.isin(wbShp.COMID) + ] # this will remove any NHDWaterbody COMIDs that have the wrong FTYPE + catSink = catSink.loc[ + ~catSink.wbCOMID.isin(onNetDF.wbCOMID) + ] # remove any COMIDs that are already in the onNetDF ttl_SINK = len(catSink) for idx, line in catSink.iterrows(): catDict[line.catCOMID] = [line.wbCOMID] - onNetDF = pd.concat([onNetDF,catSink]) + onNetDF = pd.concat([onNetDF, catSink]) except AssertionError: - ttl_SINK = 0 # all sinks got queried out! + ttl_SINK = 0 # all sinks got queried out! pass else: - ttl_SINK = len(sinks) # get val if no sinks for QA - - # create numpy arrays for connected catchments to waterbody - allLinkd = map(lambda x: catDict[x], catDict.keys()) - onNet_connect[zone] = {'comids':np.array(catDict.keys()), - 'lengths':np.array([len(v) for v in allLinkd]), - 'upstream':np.int32(np.hstack(np.array(allLinkd)))} - + ttl_SINK = len(sinks) # get val if no sinks for QA + + # create numpy arrays for connected catchments to waterbody + onNet_connect[zone] = { + "comids": np.array(list(catDict.keys())), + "lengths": np.array([len(v) for v in catDict.values()]), + "upstream": np.int32(list(chain.from_iterable(catDict.values()))), + } + # write-out table of catchment-lake COMID connections - onNetDF.to_csv("%s/joinTables/join_%s.csv" % (out, zone), index=False) - offLks = wbShp.ix[~wbShp.COMID.isin(onNetDF.wbCOMID)].copy() - ons = ons + onNetDF.wbCOMID.tolist() + onNetDF.to_csv(f"{out}/joinTables/join_{zone}.csv", index=False) + offLks = wbShp.loc[~wbShp.COMID.isin(onNetDF.wbCOMID)].copy() + ons = ons + onNetDF.wbCOMID.tolist() # find off-netowrk lakes that are out-of-bounds vpu = vpus.query("UnitID == '%s'" % zone) offCen = offLks.copy() offCen.geometry = offLks.geometry.centroid # find centroids within the vpu - lkVPUjoin = sjoin(offCen, vpu, op='within')[['AREASQKM','COMID','FTYPE', - 'UnitID','geometry']] + lkVPUjoin = sjoin(offCen, vpu, op="within")[ + ["AREASQKM", "COMID", "FTYPE", "UnitID", "geometry"] + ] # hold lakes that aren't within the VPU boundary - out_of_bounds = offLks.ix[~offLks.COMID.isin(lkVPUjoin.COMID)].copy() - out_of_bounds['VPU_orig'] = zone # identify the zone it came from + out_of_bounds = offLks.loc[~offLks.COMID.isin(lkVPUjoin.COMID)].copy() + out_of_bounds["VPU_orig"] = zone # identify the zone it came from # find the correct vpu for those out-of-bounds - outCen = offCen.ix[~offCen.COMID.isin(lkVPUjoin.COMID)] - unit = sjoin(outCen, vpus, op='within')[['COMID','UnitID']] - out_of_bounds = out_of_bounds.merge(unit, how='left', on='COMID') + outCen = offCen.loc[~offCen.COMID.isin(lkVPUjoin.COMID)] + unit = sjoin(outCen, vpus, op="within")[["COMID", "UnitID"]] + out_of_bounds = out_of_bounds.merge(unit, how="left", on="COMID") # add out-of-bounds to GeoDF to hold all, and select only lakes within - # the vpu - Obounds = pd.concat([Obounds,out_of_bounds]) -# Obounds = gpd.GeoDataFrame( pd.concat([Obounds,out_of_bounds], -# ignore_index=True) ) - offLks = offLks.ix[offLks.COMID.isin(lkVPUjoin.COMID)].copy() - - ttl_OOB = len(out_of_bounds) + # the vpu + Obounds = pd.concat([Obounds, out_of_bounds]) + offLks = offLks.loc[offLks.COMID.isin(lkVPUjoin.COMID)].copy() + + ttl_OOB = len(out_of_bounds) ttl_OFF = len(offLks) # add VPU info to offLks table - offLks['VPU_orig'] = zone - vpu_tbl = sjoin(offCen, vpus, op='within')[['COMID','UnitID']] - offLks = offLks.merge(vpu_tbl, on='COMID', how='left') + offLks["VPU_orig"] = zone + vpu_tbl = sjoin(offCen, vpus, op="within")[["COMID", "UnitID"]] + offLks = offLks.merge(vpu_tbl, on="COMID", how="left") # write-out off-net lakes and add series of QA info to DF - offLks.to_file("%s/off_net_%s.shp" % (out, zone)) - qa_tbl[zone] = [ttl_WB,ttl_ON,ttl_OFF,ttl_FTYPE,ttl_SINK,ttl_OOB] + offLks.to_file(f"{out}/off_net_{zone}.shp") + qa_tbl[zone] = [ttl_WB, ttl_ON, ttl_OFF, ttl_FTYPE, ttl_SINK, ttl_OOB] # write-out all zone DF's and the numpy files created to - assert Obounds.crs == {'init': u'epsg:4269'} - # remove lakes out of bounds that would be off-network from that zone, but + assert Obounds.crs.is_projected + # remove lakes out of bounds that would be off-network from that zone, but # were established as on-net in the zone they exist # 02/04 : coms 15516920, 15516922 NHD Problem.....again - Obounds = Obounds.ix[~Obounds.COMID.isin(ons)] - Obounds.to_file("%s/out_of_bounds.shp" % out) - np.savez_compressed('%s/LakeCat_npy/onNet_LakeCat.npz' % (out), - vpus=onNet_connect) + Obounds = Obounds.loc[~Obounds.COMID.isin(ons)] + Obounds.to_file(f"{out}/out_of_bounds.shp") + np.savez_compressed(f"{out}/LakeCat_npy/onNet_LakeCat.npz", vpus=onNet_connect) qa_tbl.index = qa_cols - qa_tbl.T.index.rename('VPU', inplace=True) - qa_tbl['TOTALS'] = qa_tbl.ix[:].sum(axis=1) + qa_tbl.T.index.rename("VPU", inplace=True) + qa_tbl["TOTALS"] = qa_tbl.loc[:].sum(axis=1) qa_tbl.T.to_csv("%s/Lake_QA.csv" % out) - -############################################################################## + print("done!") def makeBasins(nhd, bounds, out): - ''' + """ __author__ = "Rick Debbout " - + Makes GeoDataFrame of all lakes within each raster processing unit, converts each to raster, and then draws watershed basins for each lake identified as - off-network. Creates flow table for all adjacent lake basin boundaries to + off-network. Creates flow table for all adjacent lake basin boundaries to identify zones that have hydrological connectivity. Arguments --------- - nhd : string value of prefix to NHD directory + nhd : string value of prefloc to NHD directory bounds : GeoDataFrame of Vector Processing Unit boundaries out : directory to write out to - ''' - problems = pd.DataFrame() # holding for overdrawn basin delineations, - # i.e. bigger watershed than respective catchment - allOff = gpd.GeoDataFrame(crs={'init': u'epsg:4269'}) # concat for all lakes - allBsns = gpd.GeoDataFrame() # concat for shpfile of all basins(PointInPoly) - runit = bounds.query("UnitType == 'RPU'").copy() # GeoDF of RPU bounds only - Obounds = gpd.read_file("%s/out_of_bounds.shp" % out) # lakes found out of - # their respective hydroregion - arcpy.env.workspace = "%s/rasters/lakes/scratchArc" % out - arcpy.env.outputCoordinateSystem = fiftyseventy # output crs in ESRI WKT - cols = ['VPU','out_of_raster'] - addOut = pd.DataFrame(columns=cols) # DF to hold no. of lakes not in raster - # for QA table - #countTbl = pd.DataFrame() # concat for all RAT info into csv - flow_tbl = pd.DataFrame() # concat for all flow tables into one csv + """ + problems = pd.DataFrame() # holding for overdrawn basin delineations, + # i.e. bigger watershed than respective catchment + allOff = gpd.GeoDataFrame() # concat for all lakes + allBsns = gpd.GeoDataFrame() # concat for shpfile of all basins(PointInPoly) + runit = bounds.query("UnitType == 'RPU'").copy() # GeoDF of RPU bounds only + Obounds = gpd.read_file(f"{out}/out_of_bounds.shp") # lakes found out of + # their respective hydroregion + # arcpy.env.workspace = f"{out}/rasters/lakes/scratchArc" + arcpy.env.outputCoordinateSystem = fiftyseventy # output crs in ESRI WKT + cols = ["VPU", "out_of_raster"] + addOut = pd.DataFrame(columns=cols) # DF to hold no. of lakes not in raster + # for QA table + # countTbl = pd.DataFrame() # concat for all RAT info into csv + flow_tbl = pd.DataFrame() # concat for all flow tables into one csv uid = 1000 - for zone in inputs: - print zone - hr = inputs[zone] - pre = "%s/NHDPlus%s/NHDPlus%s" % (nhd, hr, zone) + print("making basins for ", end="", flush=True) + for zone, hr in inputs.items(): + print(zone, end=", ", flush=True) + pre = f"{nhd}/NHDPlus{hr}/NHDPlus{zone}" # get the lakes that were out-of-bounds into the correct vpu - addLks = Obounds.ix[Obounds.UnitID == zone].copy() - offLks = gpd.read_file("%s/off_net_%s.shp" % (out, zone)) - # remove duplicated lakes across zones - addLks.drop(addLks.loc[addLks.COMID.isin(offLks.COMID)].index,inplace=True) + addLks = Obounds.loc[Obounds.UnitID == zone].copy() + offLks = gpd.read_file(f"{out}/off_net_{zone}.shp") + # remove duplicated lakes across zones + addLks.drop(addLks.loc[addLks.COMID.isin(offLks.COMID)].index, inplace=True) # add back-in lakes that are in other zones - offLks = pd.concat([offLks,addLks]).reset_index(drop=True) + offLks = pd.concat([offLks, addLks]).reset_index(drop=True) - assert offLks.crs == {'init': u'epsg:4269'} - offLks.rename(columns={'UnitID':'VPU_moved'}, inplace=True) + assert offLks.crs.is_projected + offLks.rename(columns={"UnitID": "VPU_moved"}, inplace=True) ttl_LOST = 0 - cat = gpd.read_file('%s/NHDPlusCatchment/Catchment.shp'%(pre)) + cat = gpd.read_file(f"{pre}/NHDPlusCatchment/Catchment.shp").to_crs(offLks.crs) for rpu in rpus[zone]: lakes = offLks.copy() if len(rpus[zone]) > 1: - rpuShp = runit.query("UnitID == '%s'" % rpu).drop(['Hydroseq', - 'UnitType'], - axis=1) - lakes = sjoin(lakes, rpuShp, op='within') #this line removes lakes that straddle RPU borders - lakes = lakes.drop('index_right',axis=1) - lakes.rename(columns={'UnitID': 'RPU'}, inplace=True) + rpuShp = runit.query("UnitID == '%s'" % rpu).drop( + ["Hydroseq", "UnitType"], axis=1 + ) + # next line removes lakes that straddle RPU borders + lakes = sjoin(lakes, rpuShp, op="within") + lakes = lakes.drop("index_right", axis=1) + lakes.rename(columns={"UnitID": "RPU"}, inplace=True) if len(rpus[zone]) == 1: - lakes['RPU'] = rpu - lakes.drop_duplicates('COMID', inplace=True) # filter out duplicated + lakes["RPU"] = rpu + lakes.drop_duplicates("COMID", inplace=True) ln = len(lakes) - lakes['UID'] = range(uid,uid+ln) - uid += ln - lakes.to_file('%s/rasters/lakes/scratchArc/rasPrep_%s.shp' % (out, rpu)) - fdr = arcpy.sa.Raster("%s/NHDPlusFdrFac%s/fdr" % (pre, rpu)) + lakes["UID"] = range(uid, uid + ln) + uid += ln + lakes.to_file(f"{out}/rasters/lakes/scratchArc/rasPrep_{rpu}.shp") + fdr = arcpy.sa.Raster(f"{pre}/NHDPlusFdrFac{rpu}/fdr") arcpy.env.extent = fdr.extent - arcpy.env.snapRaster = "%s/NHDPlusFdrFac%s/fdr" % (pre, rpu) - p2r('%s/rasters/lakes/scratchArc/rasPrep_%s.shp' % (out, rpu),'UID', - "%s/rasters/lakes/lakes_%s.tif" % (out, rpu),"CELL_CENTER",'',30) - purge('%s/rasters/lakes/scratchArc' % out, 'rasPrep_%s' % rpu) - - outWshed = Watershed("%s/NHDPlusFdrNull%s/fdrnull" % (pre, rpu), - "%s/rasters/lakes/lakes_%s.tif" % (out, rpu), - "VALUE") - outWshed.save("%s/rasters/wsheds/wtshds_%s.tif" % (out,rpu)) - # create shp File of basins 4 point in poly, combine into 1 file - shpOut = "%s/shps/wtshds_%s.shp"%(out,rpu) - r2p("%s/rasters/wsheds/wtshds_%s.tif"%(out,rpu),shpOut,"NO_SIMPLIFY","VALUE") + arcpy.env.snapRaster = f"{pre}/NHDPlusFdrFac{rpu}/fdr" + p2r( + f"{out}/rasters/lakes/scratchArc/rasPrep_{rpu}.shp", + "UID", + f"{out}/rasters/lakes/lakes_{rpu}.tif", + "CELL_CENTER", + "", + 30, + ) + purge(f"{out}/rasters/lakes/scratchArc", f"rasPrep_{rpu}") + outWshed = Watershed( + f"{pre}/NHDPlusFdrNull{rpu}/fdrnull", + f"{out}/rasters/lakes/lakes_{rpu}.tif", + ) + outWshed.save(f"{out}/rasters/wsheds/wtshds_{rpu}.tif") + # create shp file of basins for point in poly, combine into 1 file + shpOut = f"{out}/shps/wtshds_{rpu}.shp" + r2p( + f"{out}/rasters/wsheds/wtshds_{rpu}.tif", shpOut, "NO_SIMPLIFY", "VALUE" + ) bsnShps = gpd.read_file(shpOut) - bsnShps.drop('ID',axis=1,inplace=True) - ref = bsnShps.crs - bsnShps = bsnShps.dissolve(by='GRIDCODE') # old version on gpd loses CRS in dissolve - bsnShps.crs = ref - bsnShps['UID'] = bsnShps.index - bsnShps['AreaSqKM'] = bsnShps['geometry'].area/ 10**6 - # build raster attribute table, - rat = makeRat("%s/rasters/wsheds/wtshds_%s.tif"%(out,rpu)) + bsnShps.drop("Id", axis=1, inplace=True) + bsnShps = bsnShps.dissolve(by="gridcode") + bsnShps["UID"] = bsnShps.index + bsnShps["AreaSqKM"] = bsnShps["geometry"].area / 10 ** 6 + # build raster attribute table, + rat = makeRat(f"{out}/rasters/wsheds/wtshds_{rpu}.tif") rat.columns = rat.columns.str.upper() - rat.rename(columns={'VALUE':'UID','COUNT':'BSN_COUNT'},inplace=True) - bsnShps = bsnShps.merge(rat,on='UID') - bsnShps['RPU'] = rpu - bsnShps = bsnShps.merge(lakes[['COMID','UID']],on='UID') - bsnShps = bsnShps[['COMID','UID','BSN_COUNT','AreaSqKM','RPU','geometry']] + rat.rename(columns={"VALUE": "UID", "COUNT": "BSN_COUNT"}, inplace=True) + bsnShps = bsnShps.merge(rat, on="UID") + bsnShps["RPU"] = rpu + bsnShps = bsnShps.merge(lakes[["COMID", "UID"]], on="UID") + bsnShps = bsnShps[ + ["COMID", "UID", "BSN_COUNT", "AreaSqKM", "RPU", "geometry"] + ] bsnShps.to_file(shpOut) - allBsns = pd.concat([allBsns,bsnShps]) + allBsns = pd.concat([allBsns, bsnShps]) # hold VALUE/COUNT in csv for processing - #countTbl = pd.concat([countTbl,rat]) + # countTbl = pd.concat([countTbl,rat]) # find number of lakes that don't get represented in raster, size/duplicated - ttl_LOST += (len(lakes) - len(rat)) - # write out dbf for reference -# DF2dbf(rat, "%s/rasters/wsheds/wtshds_%s.tif.vat.dbf"%(out,rpu), -# my_specs=[('N', 10, 0), ('N', 10, 0)]) - centroids = lakes.to_crs({'init': u'epsg:4269'}).copy().drop( - 'AREASQKM',axis=1) + ttl_LOST += len(lakes) - len(rat) + + # lakes.to_crs(bsnShps.crs, inplace=True) + centroids = lakes.copy().drop("AREASQKM", axis=1) centroids.geometry = centroids.centroid - if 12691112 in centroids.COMID.values: - print centroids.ix[centroids.COMID == 12691112].geometry - lkCat = sjoin(centroids, cat, op='intersects', how='left') - lkCat.columns = lkCat.columns.str.upper() + # if 12691112 in centroids.COMID.values: + # print(centroids.loc[centroids.COMID == 12691112].geometry) + lkCat = sjoin(centroids, cat, op="intersects", how="left") + lkCat.columns = lkCat.columns.str.upper() # add assoc. cats and areas to off-net lakes ---------------------- - lakes = lakes.to_crs({'init': u'epsg:4269'}).copy() - lakes = lakes.merge(lkCat[['COMID','FEATUREID','AREASQKM']].rename( - columns={'FEATUREID':'catCOMID', - 'AREASQKM':'catAREASQKM'}), - on='COMID') + lakes = lakes.merge( + lkCat[["COMID", "FEATUREID", "AREASQKM"]].rename( + columns={"FEATUREID": "catCOMID", "AREASQKM": "catAREASQKM"} + ), + on="COMID", + ) # may be a problem coming out of the spatial join above w/ assoc. cat - allOff = pd.concat([allOff,lakes.copy()]) + allOff = pd.concat([allOff, lakes.copy()]) # compare basin sizes --------------------------------------------- - both = pd.merge(lkCat[['COMID','UID','FEATUREID','AREASQKM']], rat, - how='inner', on='UID') - both['AreaSqKM_basin'] = (both.BSN_COUNT * 900) * 1e-6 - bigs = both.ix[both.AREASQKM < both.AreaSqKM_basin].copy() - bigs['diff'] = abs(bigs.AREASQKM - bigs.AreaSqKM_basin) - bigs['VPU'] = zone - bigs['RPU'] = rpu - problems = pd.concat([problems,bigs], ignore_index=True) # pd.DF - flow_rpu = findFlows("%s/rasters/wsheds/wtshds_%s.tif"%(out,rpu), - "%s/NHDPlusFdrFac%s/fdr" % (pre, rpu)) - flow_rpu['RPU'] = rpu - flow_tbl = pd.concat([flow_tbl, flow_rpu]) # pd.DF + both = pd.merge( + lkCat[["COMID", "UID", "FEATUREID", "AREASQKM"]], + rat, + how="inner", + on="UID", + ) + both["AreaSqKM_basin"] = (both.BSN_COUNT * 900) * 1e-6 + bigs = both.loc[both.AREASQKM < both.AreaSqKM_basin].copy() + bigs["diff"] = abs(bigs.AREASQKM - bigs.AreaSqKM_basin) + bigs["VPU"] = zone + bigs["RPU"] = rpu + problems = pd.concat([problems, bigs], ignore_index=True) + flow_rpu = findFlows( + f"{out}/rasters/wsheds/wtshds_{rpu}.tif", + f"{pre}/NHDPlusFdrFac{rpu}/fdr", + ) + flow_rpu["RPU"] = rpu + flow_tbl = pd.concat([flow_tbl, flow_rpu]) # add the number of COMIDs that get lost in raster creation i.e. too small/overlap row = pd.Series([zone, ttl_LOST], index=cols) addOut = addOut.append(row, ignore_index=True) - # write-out lakes that have a larger watershed + # write-out lakes that have a larger watershed # than their containing catchment - #countTbl.to_csv("%s/LakeCat_RasterCounts.csv" % out,index=False) # this can be merged w/ allBasin.shp file - flow_tbl.to_csv("%s/LakeCat_npy/LakeCat_PlusFlow.csv" % out,index=False) - problems.to_csv("%s/rasters/problems.csv" % out,index=False) - allOff.to_crs(offLks.crs,inplace=True) - allOff.to_file("%s/off-network.shp" % out) + # countTbl.to_csv("%s/LakeCat_RasterCounts.csv" % out,index=False) # this can be merged w/ allBasin.shp file + flow_tbl.to_csv(f"{out}/LakeCat_npy/LakeCat_PlusFlow.csv", index=False) + problems.to_csv(f"{out}/rasters/problems.csv", index=False) + # allOff.to_crs(offLks.crs,inplace=True) + allOff.to_file(f"{out}/off-network.shp") addOut.loc[len(addOut)] = pd.Series( - ['TOTALS', - addOut['out_of_raster'].sum()], - index=cols) - qa_tbl = pd.read_csv("%s/Lake_QA.csv" % out) - qa_tbl = pd.merge(qa_tbl, addOut, on='VPU') - qa_tbl.to_csv("%s/Lake_QA.csv" % out, index=False) - allBsns.to_file("%s/shps/allBasins.shp" % out) + ["TOTALS", addOut["out_of_raster"].sum()], index=cols + ) + qa_tbl = pd.read_csv(f"{out}/Lake_QA.csv") + qa_tbl = pd.merge(qa_tbl, addOut, on="VPU") + qa_tbl.to_csv(f"{out}/Lake_QA.csv", index=False) + allBsns.to_file(f"{out}/shps/allBasins.shp") purge(out, "off_net_") # delete the individual zone files -############################################################################## + print("done!") def makeNParrays(loc): - ''' + """ __author__ = "Rick Debbout " - Creates numpy arrays for LakeCat, uses a 'PlusFlow' table with - TOCOMID/FROMCOMID fields along with a shapefile dbf that holds all + Creates numpy arrays for LakeCat, uses a 'PlusFlow' table with + TOCOMID/FROMCOMID fields along with a shapefile dbf that holds all of the unique id values that were used to check for connections. Arguments --------- loc : location of LakeCat output directory - ''' - flow = pd.read_csv("%s/LakeCat_npy/LakeCat_PlusFlow.csv" % loc) - fcom,tcom = flow.FROMCOMID.values,flow.TOCOMID.values + """ + flow = pd.read_csv(f"{loc}/LakeCat_npy/LakeCat_PlusFlow.csv") + fcom, tcom = flow.FROMCOMID.values, flow.TOCOMID.values UpCOMs = defaultdict(list) for i in range(0, len(flow), 1): FROMCOMID = fcom[i] @@ -1098,193 +1112,24 @@ def makeNParrays(loc): UpCOMs[tcom[i]] = [] else: UpCOMs[tcom[i]].append(FROMCOMID) - # get unique IDs from shapefile dbf - tbl = dbf2DF('%s/off-network.dbf' % loc) + # get unique IDs from shapefile dbf + tbl = dbf2DF(f"{loc}/off-network.dbf") coms = tbl.UID.values - d = '%s/LakeCat_npy' % loc - if not os.path.exists(d + '/bastards'): - os.mkdir(d + '/bastards') - os.mkdir(d + '/children') + d = f"{loc}/LakeCat_npy" + if not os.path.exists(f"{d}/bastards"): + os.mkdir(f"{d}/bastards") + os.mkdir(f"{d}/children") # create bastard arrays - a = map(lambda x: bastards(x, UpCOMs), coms) + a = list(map(lambda x: bastards(x, UpCOMs), coms)) lengths = np.array([len(v) for v in a]) - a = np.int32(np.hstack(np.array(a))) #Convert to 1d vector - np.savez_compressed('%s/bastards/accum.npz' % (d), comids=coms,lengths=lengths,upstream=a) - # create children arrays - a = map(lambda x: children(x, UpCOMs), coms) + a = np.int32(np.hstack(a)) # Convert to 1d vector + np.savez_compressed( + f"{d}/bastards/accum.npz", comids=coms, lengths=lengths, upstream=a + ) + # create children arrays + a = list(map(lambda x: children(x, UpCOMs), coms)) lengths = np.array([len(v) for v in a]) - a = np.int32(np.hstack(np.array(a))) # Convert to 1d vector - np.savez_compressed('%s/children/accum.npz' % (d), comids=coms,lengths=lengths,upstream=a) - -############################################################################## - - -def main (nhd, out): - - if not os.path.exists("%s/rasters" % out): - if not os.path.exists(out): - os.mkdir(out) - os.mkdir("%s/rasters" % out) - os.mkdir("%s/rasters/lakes" % out) - os.mkdir("%s/rasters/lakes/scratchArc" % out) - os.mkdir("%s/rasters/wsheds" % out) - os.mkdir("%s/shps" % out) - os.mkdir("%s/joinTables" % out) - os.mkdir("%s/LakeCat_npy" % out) - - - NHDbounds = gpd.read_file( - "%s/NHDPlusGlobalData/BoundaryUnit.shp" % nhd).drop( - ['AreaSqKM','DrainageID','Shape_Area', - 'Shape_Leng','UnitName'], axis=1) - if not os.path.exists("%s/Lake_QA.csv" % out): - NHDtblMerge(nhd, NHDbounds, out) - makeBasins(nhd, NHDbounds, out) - makeNParrays('%s' % out) -############################################################################## - - -if __name__=='__main__': - if len(sys.argv) == 1: - nhd_dir = askdirectory(title='Select the location of NHDPlusv21:', - initialdir='.') - q = 'Select the location where you want LakeCat data written:' - out_dir = askdirectory(title=q, initialdir='.') - elif len(sys.argv) > 1: - nhd_dir = sys.argv[1] - out_dir = sys.argv[2] - - main(nhd_dir, out_dir) - -############################################################################## -def onNetZone(x, here=r'D:\Projects\LKCAT_frame\joinTables'): - for f in os.listdir(here): - print f - tbl = pd.read_csv('%s/%s' % (here,f)) - if x in tbl.wbCOMID.values: - print f.split('.')[0].split('_')[-1] - print tbl.ix[tbl.wbCOMID == x] - -# Unused functions, but would be useful for creating flexibility w/ new datasets - -#def makeVPUdict(directory): -# ''' -# __author__ = "Rick Debbout " -# Creates an OrderdDict for looping through regions of the NHD to carry InterVPU -# connections across VPU zones -# -# Arguments -# --------- -# directory : the directory contining NHDPlus data at the top level -# ''' -# B = dbf2DF('%s/NHDPlusGlobalData/BoundaryUnit.dbf' % directory) -# B = B.drop(B.ix[B.DRAINAGEID.isin(['HI','CI'])].index, axis=0) -# B = B.ix[B.UNITTYPE == 'VPU'].sort_values('HYDROSEQ',ascending=False) -# inputs = OrderedDict() # inputs = OrderedDict((k, inputs[k]) for k in order) -# for idx, row in B.iterrows(): -# inputs[row.UNITID] = row.DRAINAGEID -# print 'HydroRegion (value): ' + row.DRAINAGEID + ' in VPU (key): ' + row.UNITID -# np.save('%s/StreamCat_npy/zoneInputs.npy' % directory, inputs) -# return inputs -############################################################################### -# -# -#def makeRPUdict(directory): -# ''' -# __author__ = "Rick Debbout " -# Creates an OrderdDict for looping through regions of the NHD RPU zones -# -# Arguments -# --------- -# directory : the directory contining NHDPlus data at the top level -# ''' -# B = dbf2DF('%s/NHDPlusGlobalData/BoundaryUnit.dbf' % directory) -# B = B.drop(B.ix[B.DRAINAGEID.isin(['HI','CI'])].index, axis=0) -# rpuinputs = OrderedDict() -# for idx, row in B.iterrows(): -# if row.UNITTYPE == 'RPU': -# hr = row.DRAINAGEID -# rpu = row.UNITID -# for root, dirs, files in os.walk('%s/NHDPlus%s' % (directory, hr)): -# for name in dirs: -# if rpu in os.path.join(root, name): -# zone = os.path.join(root, name).split('\\')[-3].replace('NHDPlus','') -# break -# if not zone in rpuinputs.keys(): -# rpuinputs[zone] = [] -# print 'RPU: ' + rpu + ' in zone: ' + zone -# rpuinputs[zone].append(row.UNITID) -# np.save('%s/StreamCat_npy/rpuInputs.npy' % directory, rpuinputs) -# return rpuinputs -############################################################################### -# -#def NHDdict(directory, unit='VPU'): -# ''' -# __author__ = "Rick Debbout " -# Creates an OrderdDict for looping through regions of the NHD RPU zones -# -# Arguments -# --------- -# directory : the directory contining NHDPlus data at the top level -# unit : Vector or Raster processing units 'VPU' or 'RPU' -# ''' -# if unit == 'VPU': -# if not os.path.exists('%s/StreamCat_npy' % directory): -# os.mkdir('%s/StreamCat_npy' % directory) -# if not os.path.exists('%s/StreamCat_npy/zoneInputs.npy' % directory): -# inputs = makeVPUdict(directory) -# else: -# inputs = np.load('%s/StreamCat_npy/zoneInputs.npy' % directory).item() -# if unit == 'RPU': -# if not os.path.exists('%s/StreamCat_npy' % directory): -# os.mkdir('%s/StreamCat_npy' % directory) -# if not os.path.exists('%s/StreamCat_npy/rpuInputs.npy' % directory): -# inputs = makeRPUdict(directory) -# else: -# inputs = np.load('%s/StreamCat_npy/rpuInputs.npy' % directory).item() -# return inputs -############################################################################### -# -#def createAccumTable(table, d, tbl_type, icol='COMID', zone=""): -# ''' -# __author__ = "Marc Weber " -# "Ryan Hill " -# Accesses either children or bastards directory to pass in to Accumulation -# function for either UpCat metrics or Ws metrics. -# -# Arguments -# --------- -# table : table containing catchment values -# d : location of numpy files -# zone : string of an NHDPlusV2 VPU zone, i.e. 10L, 16, 17 -# tbl_type : string value of table metrics to be returned -# ''' -# tbl_type = 'Ws' if d.split('/')[-1] == 'children' else 'UpCat' -# accum = np.load('%s/accum_%s.npz' % (d, zone)) -# add = Accumulation(table, accum['comids'], accum['lengths'], accum['upstream'], tbl_type, icol) -# return add -# -############################################################################### -# -#def createAccumTable2(table, directory, tbl_type, zone=""): -# ''' -# __author__ = "Marc Weber " -# "Ryan Hill " -# Accesses either children or bastards directory to pass in to Accumulation -# function for either UpCat metrics or Ws metrics. -# -# Arguments -# --------- -# table : table containing catchment values -# directory : location of numpy files -# zone : string of an NHDPlusV2 VPU zone, i.e. 10L, 16, 17 -# tbl_type : string value of table metrics to be returned -# ''' -# a = np.load(directory)['Connect_arrays'] -# comids = a.item()[zone]['comids'] -# lengths= a.item()[zone]['lengths'] -# upstream = a.item()[zone]['upstream'] -# add = Accumulation(table, comids, lengths, upstream, tbl_type, 'COMID') -# return add -# -# + a = np.int32(np.hstack(a)) # Convert to 1d vector + np.savez_compressed( + f"{d}/children/accum.npz", comids=coms, lengths=lengths, upstream=a + ) diff --git a/MakeFinalTables_LakeCat.py b/MakeFinalTables_LakeCat.py index 95ab925..d3cd63a 100644 --- a/MakeFinalTables_LakeCat.py +++ b/MakeFinalTables_LakeCat.py @@ -1,12 +1,18 @@ -# Script to build final LakeCat tables. -# Date: Jan 22, 2016 -# Author: Rick Debbout -# NOTE: run script from command line passing directory and name of this script -# and then directory and name of the control table to use like this: -# > python "/path/to/makeFinalTables.py" /path/to/ControlTable_LakeCat.csv -# - -import sys, os +""" +Script to build final LakeCat tables. +Date: Jan 22, 2016 +Author: Rick Debbout + +NOTE: run script from command line w/in the LakeCat directory, the ControlTable +is located at the root of this directory and will be read-in from there. + +> python MakeFinalTables_LakeCat.py + +""" + + +import os +import sys import zipfile import pandas as pd @@ -14,6 +20,8 @@ ctl = pd.read_csv("ControlTable_LakeCat.csv") +assert ctl.FullTableName.duplicated().any() == False, "FullTableName column must be unique" + runners = ctl.query("run == 1").groupby("Final_Table_Name") tables = runners["FullTableName"].unique().to_dict() missing = [] @@ -23,111 +31,130 @@ if not os.path.exists(accumulated_file): missing.append(metric) -if len(missing) > 0: +if missing: for miss in missing: - print('Missing ' + miss) - print('Check output from LakeCat.py') + print(f"Missing {miss}") + print("Check output from LakeCat.py") sys.exit() + allStats = pd.DataFrame() -for table in tables: - if not os.path.exists(FINAL_DIR +'/' + table + '.csv'): - print 'Running ' + table + ' .....' - for var in range(len(tables[table])): - accum = ctl.accum_type.loc[ctl.Final_Table_Name == table].any() - metricName = ctl.MetricName.loc[ctl.FullTableName == tables[table][var]].item() - metricType = ctl.MetricType.loc[ctl.FullTableName == tables[table][var]].item() - appendMetric = ctl.AppendMetric.loc[ctl.FullTableName == tables[table][var]].item() - if appendMetric == 'none': - appendMetric = '' - conversion = float(ctl.Conversion.loc[ctl.FullTableName == tables[table][var]].values[0]) - tbl = pd.read_csv(OUT_DIR + '/%s.csv'%(tables[table][var])) - frontCols = [title for title in tbl.columns for x in ['COMID','AreaSqKm','PctFull','inStreamCat'] if x in title and not 'Up' in title] - catArea = frontCols[1] - catPct = frontCols[2] - wsArea = frontCols[3] - wsPct = frontCols[4] - frontCols = [frontCols[i] for i in [0,1,3,2,4,5]] #re-order for correct sequence - summary = None - if ctl.summaryfield.loc[ctl.Final_Table_Name == table].any(): - summary = ctl.summaryfield.loc[ctl.FullTableName == tables[table][var]].item().split(';') - if metricType == 'Mean': - colname1 = metricName + 'Cat' + appendMetric - colname2 = metricName + 'Ws' + appendMetric - tbl[colname1] = ((tbl['CatSum%s' % appendMetric] / tbl['CatCount%s' % appendMetric]) * conversion) - tbl[colname2] = ((tbl['WsSum%s' % appendMetric] / tbl['WsCount%s' % appendMetric]) * conversion) - if var == 0: +for table, metrics in tables.items(): + + if not os.path.exists(f"{FINAL_DIR}/{table}.csv"): + + print(f"Running {table}.....") + combined_metrics = ctl.FullTableName.isin(metrics) + metric_table = ctl.loc[combined_metrics].copy().reset_index(drop=True) + + for idx, row in metric_table.iterrows(): + + a_m = row.AppendMetric if not row.AppendMetric == "none" else "" + tbl = pd.read_csv(f"{OUT_DIR}/{row.FullTableName}.csv") + frontCols = [title + for title in tbl.columns + for x in ["COMID","AreaSqKm","PctFull","inStreamCat"] + if x in title and not "Up" in title] + catArea, catPct, wsArea, wsPct = frontCols[1:5] + # re-order for correct sequence + frontCols = [frontCols[i] for i in [0,1,3,2,4,5]] + summary = row.summaryfield.split(";") if type(row.summaryfield) == str else None + + if row.MetricType == "Mean": + colname1 = f"{row.MetricName}Cat{a_m}" + colname2 = f"{row.MetricName}Ws{a_m}" + tbl[colname1] = ((tbl[f"CatSum{a_m}"] / tbl[f"CatCount{a_m}"]) * row.Conversion) + tbl[colname2] = ((tbl[f"WsSum{a_m}"] / tbl[f"WsCount{a_m}"]) * row.Conversion) + if idx == 0: final = tbl[frontCols + [colname1] + [colname2]] else: - final = pd.merge(final,tbl[["COMID",colname1,colname2]],on='COMID') - if metricType == 'Density': - colname1 = metricName + 'Cat' + appendMetric - colname2 = metricName + 'Ws' + appendMetric + final = pd.merge(final,tbl[["COMID",colname1,colname2]],on="COMID") + if row.MetricType == "Density": + colname1 = f"{row.MetricName}Cat{a_m}" + colname2 = f"{row.MetricName}Ws{a_m}" if summary: finalNameList = [] for sname in summary: - if 'Dens' in metricName: - metricName = metricName[:-4] - fnlname1 = metricName + sname + 'Cat' + appendMetric - fnlname2 = metricName + sname + 'Ws' + appendMetric - tbl[fnlname1] = tbl['Cat' + sname] / (tbl[catArea] * (tbl[catPct]/100)) - tbl[fnlname2] = tbl['Ws' + sname] / (tbl[wsArea] * (tbl[wsPct]/100)) + if "Dens" in row.MetricName: + metricName = row.MetricName[:-4] + fnlname1 = f"{metricName}{sname}Cat{a_m}" + fnlname2 = f"{metricName}{sname}Ws{a_m}" + tbl[fnlname1] = tbl[f"Cat{sname}"] / (tbl[catArea] * (tbl[catPct]/100)) + tbl[fnlname2] = tbl[f"Ws{sname}"] / (tbl[wsArea] * (tbl[wsPct]/100)) finalNameList.append(fnlname1) finalNameList.append(fnlname2) - if table == 'RoadStreamCrossings' or table == 'CanalsDitches': - tbl[colname1] = (tbl.CatSum / (tbl.CatAreaSqKm * (tbl.CatPctFull/100)) * conversion) ## NOTE: Will there ever be a situation where we will need to use 'conversion' here - tbl[colname2] = (tbl.WsSum / (tbl.WsAreaSqKm * (tbl.WsPctFull/100)) * conversion) + if table == "RoadStreamCrossings" or table == "CanalsDitches": + ## NOTE: Will there ever be a situation where we will need to use "conversion" here + tbl[colname1] = (tbl.CatSum / (tbl.CatAreaSqKm * (tbl.CatPctFull/100)) * row.Conversion) + tbl[colname2] = (tbl.WsSum / (tbl.WsAreaSqKm * (tbl.WsPctFull/100)) * row.Conversion) else: - tbl[colname1] = tbl['CatCount%s' % appendMetric] / (tbl['CatAreaSqKm%s' % appendMetric] * (tbl['CatPctFull%s' % appendMetric]/100)) ## NOTE: Will there ever be a situation where we will need to use 'conversion' here - tbl[colname2] = tbl['WsCount%s' % appendMetric] / (tbl['WsAreaSqKm%s' % appendMetric] * (tbl['WsPctFull%s' % appendMetric]/100)) - if var == 0: + tbl[colname1] = tbl[f"CatCount{a_m}"] / (tbl[f"CatAreaSqKm{a_m}"] * (tbl[f"CatPctFull{a_m}"]/100)) + tbl[colname2] = tbl[f"WsCount{a_m}"] / (tbl[f"WsAreaSqKm{a_m}"] * (tbl[f"WsPctFull{a_m}"]/100)) + if idx == 0: if summary: - final = tbl[frontCols + [colname1] + [x for x in finalNameList if 'Cat' in x] + [colname2] + [x for x in finalNameList if 'Ws' in x]] - final.columns = [x.replace('M3','') for x in final.columns] + final = tbl[frontCols + + [colname1] + + [x for x in finalNameList if "Cat" in x] + + [colname2] + + [x for x in finalNameList if "Ws" in x] + ] + final.columns = [x.replace("M3","") for x in final.columns] else: final = tbl[frontCols + [colname1] + [colname2]] else: if summary: - final = pd.merge(final,tbl[["COMID"] + [colname1] + [x for x in finalNameList if 'Cat' in x] + [colname2] + [x for x in finalNameList if 'Ws' in x]],on='COMID') - final.columns = [x.replace('M3','') for x in final.columns] + final = pd.merge(final,tbl[["COMID"] + + [colname1] + + [x for x in finalNameList if "Cat" in x] + + [colname2] + + [x for x in finalNameList if "Ws" in x]], + on="COMID") + final.columns = [x.replace("M3","") for x in final.columns] else: - final = pd.merge(final,tbl[["COMID",colname1,colname2]],on='COMID') - if metricType == 'Percent': - lookup = pd.read_csv(metricName) + final = pd.merge(final,tbl[["COMID",colname1,colname2]],on="COMID") + if row.MetricType == "Percent": + lookup = pd.read_csv(row.MetricName) catcols,wscols = [],[] for col in tbl.columns: - if 'CatVALUE' in col and not 'Up' in col: + if "CatVALUE" in col and not "Up" in col: tbl[col] = ((tbl[col] * 1e-6)/(tbl[catArea]*(tbl[catPct]/100))*100) catcols.append(col) - if 'WsVALUE' in col: + if "WsVALUE" in col: tbl[col] = ((tbl[col] * 1e-6)/(tbl[wsArea]*(tbl[wsPct]/100))*100) wscols.append(col) - if var == 0: + if idx == 0: final = tbl[frontCols+catcols + wscols].copy() - final.columns = frontCols + ['Pct' + x + 'Cat' + appendMetric for x in lookup.final_val.values] + ['Pct' + y + 'Ws' + appendMetric for y in lookup.final_val.values] + final.columns = (frontCols + + [f"Pct{x}Cat{a_m}" for x in lookup.final_val.values] + + [f"Pct{y}Ws{a_m}" for y in lookup.final_val.values]) else: - final2 = tbl[['COMID'] + catcols + wscols] - final2.columns = ['COMID'] + ['Pct' + x + 'Cat' + appendMetric for x in lookup.final_val.values] + ['Pct' + y + 'Ws' + appendMetric for y in lookup.final_val.values] - final = pd.merge(final,final2,on='COMID') - if table == 'AgMidHiSlopes': - final = final.drop(['PctUnknown1Cat','PctUnknown2Cat', - 'PctUnknown1Ws', 'PctUnknown2Ws'], + final2 = tbl[["COMID"] + catcols + wscols] + final2.columns = (["COMID"] + + [f"Pct{x}Cat{a_m}" for x in lookup.final_val.values] + + [f"Pct{y}Ws{a_m}" for y in lookup.final_val.values]) + final = pd.merge(final,final2,on="COMID") + if table == "AgMidHiSlopes": + final = final.drop(["PctUnknown1Cat","PctUnknown2Cat", + "PctUnknown1Ws", "PctUnknown2Ws"], axis=1) - statTbl = pd.DataFrame({'ATT':[table]},columns=['ATT','MAX','MIN']) - if 'ForestLossByYear0013' == table: - final.drop([col for col in final.columns if 'NoData' in col], axis=1, inplace=True) + statTbl = pd.DataFrame({"ATT":[table]},columns=["ATT","MAX","MIN"]) + if "ForestLossByYear0013" == table: + final.drop([col for col in final.columns if "NoData" in col], axis=1, inplace=True) for c in final.columns.tolist(): - statTbl = pd.concat([statTbl,pd.DataFrame({'ATT': [c], - 'MIN': [final[c].min()], - 'MAX':[final[c].max()]})]) + statTbl = pd.concat([statTbl,pd.DataFrame({"ATT": [c], + "MIN": [final[c].min()], + "MAX":[final[c].max()]})]) allStats = pd.concat([allStats,statTbl]) print(statTbl) - final = final.set_index('COMID').fillna('NA') - final = final[final.columns.tolist()[:5] + [x for x in final.columns[5:] if 'Cat' in x] + [x for x in final.columns[5:] if 'Ws' in x]].fillna('NA') - out_file = '%s/%s.csv' % (FINAL_DIR, table) + final = final.set_index("COMID").fillna("NA") + final = final[final.columns.tolist()[:5] + + [x for x in final.columns[5:] if "Cat" in x] + + [x for x in final.columns[5:] if "Ws" in x] + ].fillna("NA") + out_file = f"{FINAL_DIR}/{table}.csv" final.to_csv(out_file) # zip up the file.... - zf = zipfile.ZipFile("{}/zips/{}.zip".format(FINAL_DIR, table), mode="w") - zf.write(out_file, "{}.csv".format(table), compress_type=zipfile.ZIP_DEFLATED) + zf = zipfile.ZipFile(f"{FINAL_DIR}/zips/{table}.zip", mode="w") + zf.write(out_file, f"{table}.csv", compress_type=zipfile.ZIP_DEFLATED) zf.close() -print 'All Done.....' +print("All Done.....") diff --git a/README.md b/README.md index 9ba9203..d7f3611 100644 --- a/README.md +++ b/README.md @@ -1,56 +1,61 @@ # LakeCat -## Description: -The LakeCat DataSet (http://www2.epa.gov/national-aquatic-resource-surveys/lakecat) provides summaries of natural and anthropogenic landscape features for ~378,000 lakes and their associated catchments within the conterminous USA. This repo contains code used in LakeCat to process a suite of landscape rasters to produce watershed metrics for these lakes. - ## Necessary Python Packages and Installation Tips - -The scripts for LakeCat rely on several python modules a user will need to install such as numpy, pandas, gdal, fiona, rasterio, geopandas, shapely, pysal, and ArcPy with an ESRI license (minimal steps still using ArcPy). We highly recommend using a scientific python distribution such as [Anaconda](https://www.continuum.io/downloads) or [Enthought Canopy](https://www.enthought.com/products/canopy/). We used the conda package manager to install necessary python modules. Our essential packages and versions used are listed below (Windows 64 and Python 2.7.11): +The scripts for StreamCat rely on several python modules a user will need to install such as numpy, pandas, gdal, fiona, rasterio, geopandas, shapely, pysal, and ArcPy with an ESRI license (minimal steps still using ArcPy). We highly recommend using a scientific python distribution such as [Anaconda](https://www.continuum.io/downloads) or [Enthought Canopy](https://www.enthought.com/products/canopy/). We used the conda package manager to install necessary python modules. Note that package configurations and dependencies are sensitive and can change - in particular, setting up an environment with a working version of both `geopandas` and `arcpy` can be challenging. Our working version of the conda environment is contained in the StreamCat.yml file in the repository, and our essential packages and versions when code was last used are listed below - note that other configurations may work, we simply have verified this particular combination (Windows 64 and Python 3.7.10): | Package | Version | | ------------- |--------------:| -| fiona | 1.7.7 | -| gdal | 2.2.0 | -| geopandas | 0.2.1 | -| geos | 3.5.1 | -| libgdal | 2.0.0 | -| numpy | 1.12.1 | -| pandas | 0.20.2 | -| pyproj | 1.9.5.1 | -| pysal | 1.13.0 | -| rasterio | 1.0a9 | -| shapely | 1.5.17 | - -If you are using Anaconda, creating a new, clean 'LakeCat' environment with these needed packages can be done easily and simply one of several ways: - -* In your conda shell, add one necessary channel and then download the lakecat environment from the Anaconda cloud: +| python | 3.7.10 | +| fiona | 1.8.18 | +| gdal | 3.1.4=py37 | +| geopandas | 0.9.0 | +| geos | 3.8.1 | +| libgdal | 3.1.4 | +| numpy | 1.19.5 | +| pandas | 1.2.3 | +| pyproj | 2.6.1 | +| rasterio | 1.2.1=py37 | +| shapely | 1.7.1 | + +If you are using Anaconda, creating a new, clean 'StreamCat' environment with these needed packages can be done one of several ways: + +* In your conda shell, add one necessary channel and then download the streamcat environment from the Anaconda cloud: + conda config --add channels conda-forge - + conda env create mweber36/lakecat + + conda env create mweber36/StreamCat -* Alternatively, using the lakecat.yml file in this repository, in your conda shell cd to the directory where your lakecat.yml file is located and run: - + conda env create -f LakeCat.yml +* Alternatively, using the streamcat.yml file in this repository, in your conda shell cd to the directory where your streamcat.yml file is located and run: + + conda env create -f StreamCat.yml -* To build environment yourself, do: - + conda env create -n LakeCat rasterio geopandas - + pip install georasters +* To build environment yourself, we [followed the steps suggest here](https://www.e-education.psu.edu/geog489/node/2348) which are: + + conda create -n StreamCat -c conda-forge python=3.7 anaconda gdal=3.1.4 vs2015_runtime=14.28.29325 numpy=1.19.5 jupyter pandas geopandas matplotlib cartopy beautifulsoup4 shapely rpy2=3.4.1 simplegeneric r-raster=3.4_5 r-dismo=1.3_3 r-maptools pyproj=2.6.1.post1 rasterio + +* Activate the new environment: -* To activate this new environment and open Spyder, type the following at the conda prompt - + activate LakeCat + + conda activate StreamCat + +* +* To open Spyder, type the following at the conda prompt + + activate Streamcat Then + Spyder - -Finally, to use arcpy in this new environment, you will need to copy your Arc .pth file into your new environment. Copy the .pth file for your install of ArcGIS located in a directory like: -+ C:\Python27\ArcGISx6410.3\Lib\site-packages\DTBGGP64.pth +Finally, to use arcpy in this new environment, you will need to copy several ArcPro files and folders to your new environment as follows: + ++ C:/Program Files/ArcGIS/Pro/bin/Python/envs/arcgispro-py3/Lib/site-packages/Arcgisscripting + ++ C:/Program Files/ArcGIS/Pro/bin/Python/envs/arcgispro-py3/Lib/site-packages/arcpy_wmx + ++ C:/Program Files/ArcGIS/Pro/bin/Python/envs/arcgispro-py3/Lib/site-packages/Gapy To your environment directory which should look something like: -+ C:\Anaconda\envs\lakecat\Lib\site-packages\DTBGGP64.pth ++ C:/Users/mweber/AppData/Local/Continuum/anaconda3/envs/StreamCat/Lib/site-packages Note that the exact paths may vary depending on the version of ArcGIS and Anaconda you have installed and the configuration of your computer + ## How to Run Scripts ### The scripts make use of 'control tables' to pass all the particular parameters to the two primary scripts: diff --git a/border.py b/border.py index 1a46155..7625e21 100644 --- a/border.py +++ b/border.py @@ -47,11 +47,11 @@ def dissolveStates(f, nm): 'American Samoa', 'Puerto Rico', 'Hawaii'] - sts = sts.drop(sts.ix[sts[nm].isin(nin)].index) + sts = sts.drop(sts.loc[sts[nm].isin(nin)].index) sts['dissolve'] = 1 conus = sts.dissolve(by='dissolve') - conus = conus[[nm,'geometry']] - conus.ix[conus.index[0]][nm] = 'CONUS' + conus = conus[[nm,'geometry']].copy() + conus.loc[conus.index[0], nm] = 'CONUS' return conus def brdrPctFull(zns, brdr, ncol, acol='AreaSqKM'): @@ -65,9 +65,9 @@ def brdrPctFull(zns, brdr, ncol, acol='AreaSqKM'): ''' # move poly to albers, need to stay in this CRS to cal. area later if brdr.crs != zns.crs: - brdr.to_crs(zns.crs,inplace=True) - touch = sjoin(zns,brdr,op='within') - nwin = zns.ix[~zns[ncol].isin(touch[ncol])].copy() + brdr.to_crs(zns.crs, inplace=True) + touch = sjoin(zns, brdr, op='within') + nwin = zns.loc[~zns[ncol].isin(touch[ncol])].copy() if len(nwin) == 0: return pd.DataFrame() tot = pd.DataFrame() @@ -107,16 +107,19 @@ def makeBrdrPctFile(b_file, z_file, b_field, z_field): return final if __name__ == '__main__': - + + # below is the file to download that we use to assess pct full on border + # cats/basins + # https://www2.census.gov/geo/tiger/TIGER2010/STATE/2010/tl_2010_us_state10.zip us_file = 'L:/Priv/CORFiles/Geospatial_Library/Data/RESOURCE/POLITICAL/BOUNDARIES/NATIONAL/TIGER_2010_State_Boundaries.shp' lake_basins = 'D:/Projects/Frame_NULL/shps/allBasins.shp' here = 'D:/Projects/Frame_NULL/border' if not os.path.exists(here): os.mkdir(here) nhd = 'D:/NHDPlusV21' - print 'Making border PctFull csv' + print('Making border PctFull csv') # LakeCat csv = makeBrdrPctFile(us_file, lake_basins, 'NAME10', 'UID') # StreamCat # csv = makeBrdrPctFile(us_file, nhd, 'NAME10', 'FEATUREID') - csv.to_csv('%s/pct_full.csv' % here) \ No newline at end of file + csv.to_csv(f'{here}/pct_full.csv') \ No newline at end of file diff --git a/lake_cat_config.py.template b/lake_cat_config.py.template index 4e45830..0b52a54 100644 --- a/lake_cat_config.py.template +++ b/lake_cat_config.py.template @@ -26,7 +26,7 @@ STATES_FILE = "path/to/file/tl_2008_us_state.shp" OUT_DIR = ('C:/path/to/write/out/files/to') # location for the final tables -FINAL_DIR = "L:/Priv/CORFiles/Geospatial_Library_Projects/StreamCat/FTP_Staging/Hydroregions" +FINAL_DIR = "L:/Priv/CORFiles/Geospatial_Library_Projects/LakeCat/FTP_Staging/FinalTables" # files that hold the pct_full data, created from ??? pct_full_file = "L:/Priv/CORFiles/Geospatial_Library_Projects/StreamCat/ControlTables/ALL_BORDER_CATS.csv"