From 65179540a0f10618eb20208d079c9be9de137e2a Mon Sep 17 00:00:00 2001 From: Beaudette Date: Thu, 9 Mar 2023 10:26:38 -0800 Subject: [PATCH] ideas related to #43 --- misc/print-ST.R | 32 +++++++++++++++++++++++++++++--- 1 file changed, 29 insertions(+), 3 deletions(-) diff --git a/misc/print-ST.R b/misc/print-ST.R index 9ac9c45..ec3556b 100644 --- a/misc/print-ST.R +++ b/misc/print-ST.R @@ -9,36 +9,45 @@ library(data.tree) ## TODO: # * add subgroup acreage # * 13th ed. -# * compact style of dir_tree(), but in proper order # * add series count to subgroups +# 12th ed. data("ST") +# latest SC DB sc <- get_soilseries_from_NASIS() +# susbet / rename columns for simpler joining sc <- sc[, c('soilseriesname', 'taxclname', 'taxsubgrp')] names(sc) <- c('series', 'family', 'subgroup') +# combine ST hierarchy + series by subgroup +# this will introduct family details z <- merge(ST, sc, by.x = 'subgroup', all.x = TRUE, sort = FALSE) head(z) +# normalization via lower case z$family <- tolower(z$family) z$series <- tolower(z$series) +# remove subgroup component of family spec z$f <- NA_character_ for(i in 1:nrow(z)) { z$f[i] <- gsub(pattern = z$subgroup[i], replacement = '', z$family[i], fixed = TRUE) } +# remove white space z$f <- trimws(z$f, which = 'both') +# ok head(z) # ordering used by 'Keys z <- z[order(z$code, method = 'radix'), ] - +## hack #1: use directory listing for a compact representation +# note: order isn't correct td <- tempdir() unlink(file.path(td, 'ST'), recursive = TRUE) @@ -70,16 +79,20 @@ for(i in 1:nrow(z)) { dir.create(path = fp, recursive = TRUE) } + +# dump output file text file setwd(td) sink('e:/temp/st12.txt') dir_tree(file.path('ST')) sink() +# cleanup unlink(file.path(td, 'ST'), recursive = TRUE) - +## hack #2: use directory listing for a compact representation +## prefix with taxon codes for correct ordering for(i in 1:nrow(z)) { # account for subgroups without series @@ -110,25 +123,35 @@ for(i in 1:nrow(z)) { dir.create(path = fp, recursive = TRUE) } +# save output to text file setwd(td) sink('e:/temp/st12-codes.txt') dir_tree(file.path('ST')) sink() +# cleanup unlink(file.path(td, 'ST'), recursive = TRUE) +## correct ordering via data.tree, as long as order of `z` is correct +# less compact, but doesn't require crazy file system manipulation +# required columns only, smaller data.tree v <- c('order', 'suborder', 'greatgroup', 'subgroup', 'f', 'series', 'path') + +# init data.tree object z$path <- sprintf("ST/%s/%s/%s/%s/%s/%s", z$order, z$suborder, z$greatgroup, z$subgroup, z$f, z$series) n <- as.Node(z[, v], pathName = 'path') +## missing family / series result in an ugly tree, prune accordingly # prune missing family / series pf <- function(i) { + # NA due to left join + # note odd approach required, matching to 'NA' vs. is.na() if(GetAttribute(i, 'name') == 'NA') { return(FALSE) } else { @@ -137,6 +160,7 @@ pf <- function(i) { } +# dump to text file options('max.print' = 1e7) sink('e:/temp/st12-DT.txt') print(n, limit = NULL, pruneFun = pf) @@ -144,3 +168,5 @@ sink() options('max.print' = 1000) + +