Skip to content

Commit

Permalink
new data from Steve Campbel, more metadata, addressing #61
Browse files Browse the repository at this point in the history
  • Loading branch information
dylanbeaudette committed Mar 28, 2018
1 parent 18da567 commit e4553d5
Show file tree
Hide file tree
Showing 4 changed files with 287 additions and 14 deletions.
Binary file modified data/SCAN_SNOTEL_metadata.rda
Binary file not shown.
122 changes: 108 additions & 14 deletions misc/make-scan-snotel-metadata-db.R
Original file line number Diff line number Diff line change
@@ -1,41 +1,135 @@
library(soilDB)
library(plyr)

# https://github.com/ncss-tech/soilDB/issues/61

## get these data from SCAN www map, zoom all the way out and then click on export to CSV
x <- read.csv('scan-snotel-data/station-metadata.csv', stringsAsFactors = FALSE)

# fix HUC formatting
x$HUC <- as.character(as.numeric(x$HUC))
# attempt to cross-reference a lab ID via pedon ID
# using a LIMS report and HTML scraping, ick
# about 5 seconds per request
getLabPedon <- function(pedonID) {
url <- sprintf('https://nasis.sc.egov.usda.gov/NasisReportsWebSite/limsreport.aspx?report_name=Pedon+Description+html+(userpedid)&pedon_id=%s', pedonID)

rpt <- read_html(url)
n <- html_node(rpt, xpath = "//*/table/tr[10]/td[1]/*")
n <- xml_text(n)
lab.id <- gsub(' ', '', strsplit(n, ':')[[1]][2])

return(lab.id)
}

getLabPedon <- Vectorize(getLabPedon)



##
## station list / site information
##

# get these data from SCAN/SNOTEL www map, zoom all the way out and then click on export to CSV
# there are some trash data in here, trailing tabs
x <- read.csv('scan-snotel-data/station-metadata.csv', stringsAsFactors = FALSE, colClasses = 'character')

# fix formatting
x$Name <- trimws(x$Name)
x$ID <- as.numeric(trimws(x$ID))
x$State <- trimws(x$State)
x$Network <- trimws(x$Network)
x$County <- trimws(x$County)
x$Elevation_ft <- as.numeric(trimws(x$Elevation_ft))
x$Latitude <- as.numeric(trimws(x$Latitude))
x$Longitude <- as.numeric(trimws(x$Longitude))
x$HUC <- trimws(x$HUC)

# re-name ID
names(x)[2] <- 'Site'

# check: OK
nrow(x)
str(x)


## most of these files are maintained by regional staff
## naming convention from NASIS site table
p <- read.csv('scan-snotel-data/Utah_DCO_Soil_Lab_Data.csv', stringsAsFactors = FALSE)
##
## pedon / lab IDs
##

## SCAN / SNOTEL sites from western US
# most of these files are maintained by regional staff
# naming convention from NASIS site table
p.west <- read.csv('scan-snotel-data/Utah_DCO_Soil_Lab_Data.csv', stringsAsFactors = FALSE)

# whats in here:
# many sites from several states!
str(p)
table(p$state)
str(p.west)
table(p.west$state)

# keep subset of columns
p <- p[, c('climstaid', 'climstanm', 'upedonid', 'pedlabsampnum')]
p.west <- p.west[, c('climstaid', 'climstanm', 'upedonid', 'pedlabsampnum')]

# re-name ID
names(p)[1] <- 'Site'
names(p.west)[1] <- 'Site'

# re-name for mixing
names(p.west)[-1] <- paste0(names(p.west)[-1], '-WEST')

# check: ok
str(p)
str(p.west)



## SCAN data via Steve Campbell / soil climate center
# missing lab IDs
# missing SNOTEL sites
p.scan <- read.csv('scan-snotel-data/SCAN_Pedon_Master.csv', stringsAsFactors = FALSE)

str(p.scan)
table(p.scan$State)

# re-name to match other metadata
names(p.scan) <- c('Site', 'climstanm', 'state', 'upedonid')

# look-up lab ID via LIMS report
# takes a couple of minutes
# some pedon IDs won't map to a lab ID (not linked in NASIS)
p.scan$pedlabsampnum <- getLabPedon(p.scan$upedonid)

# replace missing values with NA
p.scan$pedlabsampnum[which(p.scan$pedlabsampnum == '')] <- NA

# re-name and subset columns
p.scan <- p.scan[, c('Site', 'climstanm', 'upedonid', 'pedlabsampnum')]
names(p.scan)[-1] <- paste0(names(p.scan)[-1], '-SCAN')


##
## merge metada from various sources, filling in the missing values with best available data
##

# unique set of site IDs
m <- data.frame(Site=unique(c(p.west$Site, p.scan$Site)), stringsAsFactors = FALSE)

# western data
m <- join(m, p.west, by='Site', type='left')

# SCAN master list
m <- join(m, p.scan, by='Site', type='left')

# new columns for best-available
m$climstanm <- NA
m$upedonid <- NA
m$pedlabsampnum <- NA

### TODO: double check logic
# select best available
m$climstanm <- ifelse(! is.na(m$`climstanm-WEST`), m$`climstanm-WEST`, m$`climstanm-SCAN`)
m$upedonid <- ifelse(! is.na(m$`upedonid-WEST`), m$`upedonid-WEST`, m$`upedonid-SCAN`)
m$pedlabsampnum <- ifelse(! is.na(m$`pedlabsampnum-WEST`), m$`pedlabsampnum-WEST`, m$`pedlabsampnum-SCAN`)


##
## combine site metadata and pedon links
SCAN_SNOTEL_metadata <- join(x, p, by='Site', type='left')
##

SCAN_SNOTEL_metadata <- join(x, m[, c('Site', 'climstanm', 'upedonid', 'pedlabsampnum')], by='Site', type='left')

# check for possible errors via station name comparison
idx <- which( ! SCAN_SNOTEL_metadata$Name == SCAN_SNOTEL_metadata$climstanm )
Expand Down
179 changes: 179 additions & 0 deletions misc/scan-snotel-data/SCAN_Pedon_Master.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,179 @@
Station ID,Site Name,State,Pedon ID
2065,Aniak,AK,S2001AK178001
1232,Canyon Lake,AK,S2012AK070008
2213,Checksers Creek,AK,S2014AK180002
2210,Hozatka Lake,AK,S2014AK290013
2211,Innoko Camp,AK,S2014AK290015
2208,Kanaryagak Camp,AK,S2013AK270002
2212,Kanuti Lake,AK,S2014AK290011
1233,Lower Mulchatna,AK,S2012AK060001
2209,Naknek River,AK,S2013AK060002
2080,Tok,AK,S2002AK174002
1234,Weary Lake,AK,S2012AK070006
2057,AAMU-JTG,AL,S02AL089002
2078,Bragg Farm,AL,S2003AL089005
2177,Broad Acres,AL,S10AL101001
2113,Cullman-NAHRC,AL,S2006AL043001
2055,Hodges,AL,S2002AL095001
2173,Isbell Farms,AL,S10AL033001
2180,Koptis Farms,AL,S2010AL003001
2114,Livingston-UWA,AL,S2006AL119001
2178,Morris Farms,AL,S10AL087001
2178,Morris Farms,AL,S10AL087001
2181,Perdido Riv Farms,AL,S2006AL053001
2176,Selma,AL,S10AL047001
2056,Stanley Farm,AL,S2002AL103002
2179,Sudduth Farms,AL,S10AL133001
2115,Tuskegee,AL,S2006AL087001
2175,Wedowee,AL,S10AL111001
2053,Wtars,AL,S2002AL089001
2083,Uapb Campus-PB,AR,04AR069001
2091,Uapb Dewitt,AR,S2004AR001001
2090,Uapb Point Remove,AR,S2004AR115001
2030,Uapb-Lonoke Farm,AR,99AR085001
2084,Uapb-Marianna,AR,04AR077001
2026,Walnut Gulch #1,AZ,99AZ003001
2189,Cochora Ranch,CA,S2012CA079003
2190,Death Valley Jct,CA,S2012CA027001
2187,Deep Springs,CA,S2012CA027002
2191,Doe Ridge,CA,S2012CA051002
2192,Eagle Lake,CA,S2012CA063001
2185,Essex,CA,S2011CA071003
2149,Marble Creek,CA,S2009CA051001
2186,Shadow Mtns,CA,S2011CA071004
2017,Nunn #1,CO,91CO123003
2051,Everglades ARS,FL,97FL025005A
2012,Sellers Lake #1,FL,92FL069000
2009,Wakulla #1,FL,91FL073001
2027,Little River,GA,99GA277001
2013,Watkinsville #1,GA,92GA219001
2100,Island Dairy,HI,S05HI001007
2096,Kainaliu,HI,S2005HI001001
2103,Kemole Gulch,HI,S05HI001008
2097,Kukuihaele,HI,S05HI001002
2102,Mana House,HI,S05HI001006
2098,Pua Akala,HI,S05HI001003
2101,Silver Sword,HI,S2005HI001004
2099,Waimea Plain,HI,S2005HI001005
2031,Ames,IA,S2001IA015001
2068,Sharbark Hills,IA,S2002IA193001
2004,Mason #1,IL,S1991IL125001
2092,Abrams,KS,04KS035001
2094,Centralia Lake,KS,04KS131100
2093,Phillipsburg,KS,04KS147001
2079,Mammoth Cave,KY,2003KY099001
2005,Princeton #1,KY,1991KY033002
2049,Powder Mill,MD,S2002MD033006
2002,Crescent Lake #1,MN,S1991MN141001
2050,Glacial Ridge,MN,S2002MN119001
2048,Dexter,MO,S2002MO207001
2220,Elsberry PMC,MO,S2015MO113001
2194,Journagan Ranch,MO,S2012MO067001
2060,Mt. Vernon,MO,S2002MO109001
2193,Schell-Osage,MO,S2012MO185001
2032,Beasley Lake,MS,99MS133001
2024,Goodwin Ck Pasture,MS,S2002MS107001
2025,Goodwin Ck Timber,MS,S2002MS107002
2110,Mayday,MS,S2005MS163001
2087,North Issaquena,MS,S2004MS055001
2033,Onward,MS,99MS125003
2046,Perthshire,MS,S02MS-011-001
2061,Powell Gardens,MS,S2002MO101001
2109,Sandy Ridge,MS,S2005MS133001
2070,Scott,MS,S05MS-011-001
2086,Silver City,MS,S2004MS053001
2047,Spickard,MS,S2002MO079001
2064,Starkville,MS,S02MS-105-001
2082,TNC Fort Bayou,MS,03MS059001
2034,Tunica,MS,99MS143001
2035,Vance,MS,99MS135001
2117,Conrad Ag Rc,MT,S06MT073001
2019,Fort Assiniboine #1,MT,92MT041003
2121,Jordan,MT,S2006MT033001
581,Lindsay,MT,S2006MT021001
2119,Moccasin,MT,S06MT045001
2120,Sidney,MT,S06MT083001
2118,Violett,MT,S06MT051001
2008,Tidewater #1,NC,91NC187001
2020,Mandan #1,ND,92ND059400
2111,Johnson Farm,NE,S06NE029001
2001,Rogers Farm #1,NE,91NE109001
2069,Hubbard Brook,NH,S2002NH009001
2043,Mascoma River,NH,S1998NH009001
2015,Adam's Ranch,NM,92NM027001
2172,Alcalde,NM,S2010NM039001
2107,Crossroads,NM,S2004NM025001
2168,Jornada Exp Range,NM,S2009NM013011
2169,Los Lunas PMC,NM,S09NM061001
2108,Willow Wells,NM,S2004NM025002
2171,Sevilleta,NM ,S2010NM053001
2141,Kyle Canyon,NV,S08NV003002
2146,Lovell Summit,NV,S08NV003004
2116,Lovelock NNR,NV,S2006NV027001
2144,Pine Nut,NV,S08NV003003
750,Sheldon,NV,89NV031003
2011,Geneva #1,NY,S1991NY069001
2014,Molly Caren #1,OH,S1992OH097001
2073,Sunleaf Nursery,OH,S2003OH085001
2022,Fort Reno #1,OK,S2001OK017001
2074,Lynhart Ranch,OR,03OR035015
2028,Mahantango Ck,PA,99PA049014
2036,Rock Springs Pa,PA,99PA027095
2067,Bosque Seco,PR,D99PR055007
2066,Combate,PR,D99PR023004
2188,Corozal,PR,S2004PR047001
2045,Guilarte Forest,PR,S2001PR153004
2052,Isabela,PR,82PR007001
15,Maricao Forest,PR,S2001PR125007R
2112,Mayaguez TARS,PR,S2007PR097001
2072,Eros Data Center,SD,S2003SD099001
2076,Allen Farms,TN,2003TN055001S
2077,Eastview Farm,TN,2003TN051004
2075,McAllister Farm,TN,2003TN103001
2006,Bushland #1,TX,91TX381001
2206,Kingsville,TX,S2012TX273017
2201,Knox City,TX,S2012TX275001
2106,Lehman,TX,S2004TX079001
2105,Levelland,TX,S2004TX219001
2016,Prairie View #1,TX,92TX473001
2104,Reese Center,TX,S2004TX303001
2138,Alkali Mesa,UT,S07UT037004
2207,Blue Creek,UT,S07UT003-001
2151,Buffalo Jump,UT,S2009UT033001
2136,Cache Junction ,UT,S07UT005-001
2161,Cave Valley,UT,2014UT0531053
2150,Chicken Ridge,UT,S2009UT029001
2125,Circleville,UT,S07UT031001
2167,Dugway,UT,S2009UT045002
2130,Eastland,UT,S07UT037001
2128,Enterprise,UT,S07UT021001
2126,Ephraim,UT,S07UT039001
2165,Goshute,UT,S2009UT003001
2131,Green River,UT,S07UT015001
2160,Grouse Creek,UT,S2009UT003001
2164,Hals Canyon,UT,S2009UT027002
2166,Harms Way,UT,S2009UT037001
2127,Holden,UT,S07UT027001
2155,Little Red Fox,UT,S2009UT013004
2156,Manderfield,UT,S2009UT001001
2140,Mccracken Mesa,UT,S07UT037005
2129,Milford,UT,S07UT001001
2133,Morgan,UT,S2007UT029001
2134,Mountian Home,UT,S07UT013001
2137,Nephi,UT,S07UT023001
2157,Panguitch,UT,S2009UT017001
2153,Park Valley,UT,S2009UT003002
2132,Price,UT,S07UT007001
2159,Sand Hollow,UT,S2009UT053001
2154,Split Mountian,UT,1998MO039173
2163,Tule Valley,UT,S2009UT027001
2162,Vermillion,UT,S2009UT025001
2139,West Summit,UT,S07UT037003
2123,Upper Bethlehem,VI,S07VI010001
2042,Lye Brook,VT,S2000VT003002
2041,Mount Mansfield,VT,S2000VT007001
2198,Cook Farm Field D,WA,S2013WA075001
2021,Lind #1,WA,S1992WA001001
2196,UW Platteville,WI,S2004WI065002
2003,Webeno #1,WI,S1990WI041006
2018,Torrington #1,WY,92WY015000
Binary file added misc/scan-snotel-data/SCAN_Pedon_Master.xlsx
Binary file not shown.

0 comments on commit e4553d5

Please sign in to comment.