diff --git a/.Rbuildignore b/.Rbuildignore index f5aa41f..9a2c1d3 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -15,3 +15,4 @@ README.Rmd ^Meta$ ^cran-comments\.md$ ^CRAN-SUBMISSION$ +^data-raw$ diff --git a/DESCRIPTION b/DESCRIPTION index 763a016..6b8a755 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -46,4 +46,7 @@ biocViews: Config/testthat/edition: 3 Encoding: UTF-8 Roxygen: list(markdown = TRUE) -RoxygenNote: 7.3.1 +RoxygenNote: 7.3.2 +Depends: + R (>= 2.10) +LazyData: true diff --git a/NEWS.md b/NEWS.md index d5fce7b..fb6fbaf 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,6 +1,7 @@ # volcalc (development version) * adds a `validate = TRUE` option to `calc_vol()` and `get_fx_groups()` that returns `NA`s when there are suspected errors in parsing SMILES or .mol files. This is unfortunately not available on Windows due to differences in the windows version of `ChemmineOB` +* adds a dataset, `smarts_simpol1`, describing how functional groups are defined for the SIMPOL.1 and Meredith et al. methods # volcalc 2.1.2 diff --git a/R/data.R b/R/data.R new file mode 100644 index 0000000..a056632 --- /dev/null +++ b/R/data.R @@ -0,0 +1,26 @@ +#' Search patterns used for SIMPOL.1 functional groups +#' +#' This dataframe documents how functional groups for the SIMPOL.1 and Meredith +#' et al. method are defined using SMARTS strings or `ChemmineR` functions. +#' +#' @format +#' \describe{ +#' \item{method}{Either "simpol1" for functional groups only used with the SIMPOL.1 method, or "meredith" for additional groups used in the Meredith et al. method.} +#' \item{functional_groups}{These correspond to matching column names in the results of [get_fx_groups()].} +#' \item{description}{Functional group description from Table 5 of Pankow & Asher (2008)} +#' \item{smarts}{SMARTS strings used to capture groups, when applicable} +#' \item{fun}{The function used to capture the functional group. When `smarts` is not `NA`, this is always "[ChemmineR::smartsSearchOB]". Other groups are captured with other `ChemmineR` functions or as calculations using other functional groups.} +#' \item{notes}{Notes including how any functional group counts are corrected when there is overlap. E.g. when one SMARTS pattern is a subset of another pattern, but the two groups are counted separately without overlap in the SIMPOL.1 method.} +#' } +#' +#' @references +#' Meredith L, Ledford S, Riemer K, Geffre P, Graves K, Honeker L, LeBauer D, +#' Tfaily M, Krechmer J. 2023. Automating methods for estimating metabolite +#' volatility. Frontiers in Microbiology. \doi{10.3389/fmicb.2023.1267234} +#' +#' Pankow, J.F., Asher, W.E. 2008. SIMPOL.1: a simple group +#' contribution method for predicting vapor pressures and enthalpies of +#' vaporization of multifunctional organic compounds. Atmos. Chem. Phys. +#' \doi{10.5194/acp-8-2773-2008} +#' +"smarts_simpol1" \ No newline at end of file diff --git a/R/get_fx_groups.R b/R/get_fx_groups.R index e5f1161..822b4e1 100644 --- a/R/get_fx_groups.R +++ b/R/get_fx_groups.R @@ -17,13 +17,16 @@ #' risk! Validation is not available on Windows. See **Details** for more #' information. #' -#' @details It is unfortunately difficult to capture errors and warnings -#' produced by the command line tool OpenBabel used by `ChemmineOB`, a -#' dependency of `volcalc`. These errors and warnings are printed to the R -#' console, but they are *not* R errors and do not stop code from running and -#' producing potentially incorrect data. `validate = TRUE` checks the output -#' of certain OpenBabel procedures for the *symptoms* of these errors, namely -#' missing values for InChI and molecular formula. Unfortunately, since InChI +#' @details For more details on how functional groups are defined, see the +#' [smarts_simpol1] data set. +#' +#' It is unfortunately difficult to capture errors and warnings produced by +#' the command line tool OpenBabel used by `ChemmineOB`, a dependency of +#' `volcalc`. These errors and warnings are printed to the R console, but they +#' are *not* R errors and do not stop code from running and producing +#' potentially incorrect data. `validate = TRUE` checks the output of certain +#' OpenBabel procedures for the *symptoms* of these errors, namely missing +#' values for InChI and molecular formula. Unfortunately, since InChI #' generation is not available with the Windows version of `ChemmineOB`, this #' validation step cannot be performed on Windows and `validate = TRUE` will #' simply print a warning that can be silenced by setting `validate = FALSE`. @@ -97,48 +100,7 @@ Set `validate = FALSE` to silence this warning.") carbon_dbl_count <- tibble::add_row(carbon_dbl_count, n = 0) } - # *_pattern are SMARTS strings: https://www.daylight.com/dayhtml_tutorials/languages/smarts/smarts_examples.html - carbon_dbl_bonds_pattern <- "C=C" #non-aromatic carbon double bonds - CCCO_pattern <- "C(C=C[AR1])(=O)[AR1]" #C=C-C=O in a non-aromatic ring - # ether_alkyl_pattern <- "[OD2]([C!R1])[C!R1]" #currently unused--ether_alkly calculated as total - other ethers - ether_alicyclic_pattern <- "[OD2]([C!R0])[C!R0]" - ether_aromatic_pattern <- "O(c)[C,c]" #only one of the carbons has to be aromatic - nitro_pattern <- "[$([NX3](=O)=O),$([NX3+](=O)[O-])][!#8]" - hydroxyl_aromatic_pattern <- "[OX2H]c" - nitrate_pattern <- "[$([NX3](=[OX1])(=[OX1])O),$([NX3+]([OX1-])(=[OX1])O)]" - - #TODO need patterns for amines that don't pick up amides - amine_primary_pattern <- "[NX3;H2;!$(NC=[!#6]);!$(NC#[!#6])][#6X4]" - amine_secondary_pattern <- "[NX3H1!$(NC=[!#6])!$(NC#[!#6])]([#6X4])[#6X4]" - amine_tertiary_pattern <- "[NX3H0!$(NC=[!#6])!$(NC#[!#6])]([#6X4])([#6X4])[#6X4]" - amine_aromatic_pattern <- "[NX3;!$(NO)]c" - - amide_primary_pattern <- "[CX3;$([R0][#6]),$([H1R0])](=[OX1])[#7X3H2]" - amide_secondary_pattern <- "[CX3;$([R0][#6]),$([H1R0])](=[OX1])[#7X3H1][#6;!$(C=[O,N,S])]" - amide_tertiary_pattern <- - "[CX3;$([R0][#6]),$([H1R0])](=[OX1])[#7X3H0]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])]" - - # amide_total_pattern <- "[CX3;$([R0][#6]),$([H1R0])](=[OX1])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])]" - - carbonylperoxynitrate_pattern <- "*C(=O)OO[N+1](=O)[O-1]" - peroxide_pattern <- "[OX2D2][OX2D2]" #this captures carbonylperoxynitrates too - hydroperoxide_pattern <- "[OX2][OX2H,OX1-]" #this captures peroxyacids too - carbonylperoxyacid_pattern <- "[CX3;$([R0][#6]),$([H1R0])](=[OX1])[OX2][$([OX2H]),$([OX1-])]" - nitroester_pattern <- "C(=O)(OC)C~[NX3](-,=[OX1])-,=[OX1]" - # This captures OH groups on a ring that also has a nitro group (para, ortho, or meta). Need to correct aromatic hydroxyl count later. - nitrophenol_pattern <- - "[OX2H][$(c1ccccc1[$([NX3](=O)=O),$([NX3+](=O)[O-])]),$(c1cccc(c1)[$([NX3](=O)=O),$([NX3+](=O)[O-])]),$(c1ccc(cc1)[$([NX3](=O)=O),$([NX3+](=O)[O-])])]" - phosphoric_acid_pattern <- - "[$(P(=[OX1])([$([OX2H]),$([OX1-]),$([OX2]P)])([$([OX2H]),$([OX1-]),$([OX2]P)])[$([OX2H]),$([OX1-]),$([OX2]P)]),$([P+]([OX1-])([$([OX2H]),$([OX1-]),$([OX2]P)])([$([OX2H]),$([OX1-]),$([OX2]P)])[$([OX2H]),$([OX1-]),$([OX2]P)])]" - phosphoric_ester_pattern <- - "[$(P(=[OX1])([OX2][#6])([$([OX2H]),$([OX1-]),$([OX2][#6])])[$([OX2H]),$([OX1-]),$([OX2][#6]),$([OX2]P)]),$([P+]([OX1-])([OX2][#6])([$([OX2H]),$([OX1-]),$([OX2][#6])])[$([OX2H]),$([OX1-]),$([OX2][#6]),$([OX2]P)])]" - sulfate_pattern <- - "[$([#16X4](=[OX1])(=[OX1])([OX2H,OX1H0-])[OX2][#6]),$([#16X4+2]([OX1-])([OX1-])([OX2H,OX1H0-])[OX2][#6])]" - #sulfonate groups; sulfonate ions, and conjugate acid, sulfonic acids - sulfonate_pattern <- - "[#16X4](=[OX1])(=[OX1])([#6])[*$([O-1]),*$([OH1]),*$([OX2H0])]" - thiol_pattern <- "[#16X2H]" - carbothioester_pattern <- "S([#6])[CX3](=O)[#6]" + smarts <- smarts_patterns_simpol1 fx_groups_df <- dplyr::tibble( @@ -154,43 +116,42 @@ Set `validate = FALSE` to silence this warning.") rings_aromatic = as.integer(rings$AROMATIC), rings_total = as.integer(rings$RINGS), rings_aliphatic = NA_integer_, #calculated below - carbon_dbl_bonds_aliphatic = ChemmineR::smartsSearchOB(compound_sdf, carbon_dbl_bonds_pattern), - CCCO_aliphatic_ring = ChemmineR::smartsSearchOB(compound_sdf, CCCO_pattern), # C=C-C=O in a non-aromatic ring + carbon_dbl_bonds_aliphatic = ChemmineR::smartsSearchOB(compound_sdf, smarts$carbon_dbl_bonds_aliphatic), + CCCO_aliphatic_ring = ChemmineR::smartsSearchOB(compound_sdf, smarts$CCCO_aliphatic_ring), hydroxyl_total = groups$ROH, - hydroxyl_aromatic = ChemmineR::smartsSearchOB(compound_sdf, hydroxyl_aromatic_pattern, uniqueMatches = FALSE), + hydroxyl_aromatic = ChemmineR::smartsSearchOB(compound_sdf, smarts$hydroxyl_aromatic, uniqueMatches = FALSE), hydroxyl_aliphatic = NA_integer_, #calculated below aldehydes = groups$RCHO, ketones = groups$RCOR, carbox_acids = groups$RCOOH, ester = groups$RCOOR, ether_total = groups$ROR, - # ether_alkyl = ChemmineR::smartsSearchOB(compound_sdf, ether_alkyl_pattern), - ether_alkyl = NA_integer_, - ether_alicyclic = ChemmineR::smartsSearchOB(compound_sdf, ether_alicyclic_pattern), - ether_aromatic = ChemmineR::smartsSearchOB(compound_sdf, ether_aromatic_pattern), - nitrate = ChemmineR::smartsSearchOB(compound_sdf, nitrate_pattern), - nitro = ChemmineR::smartsSearchOB(compound_sdf, nitro_pattern), - amine_primary = ChemmineR::smartsSearchOB(compound_sdf, amine_primary_pattern), - amine_secondary = ChemmineR::smartsSearchOB(compound_sdf, amine_secondary_pattern), - amine_tertiary = ChemmineR::smartsSearchOB(compound_sdf, amine_tertiary_pattern), - amine_aromatic = ChemmineR::smartsSearchOB(compound_sdf, amine_aromatic_pattern), - amide_primary = ChemmineR::smartsSearchOB(compound_sdf, amide_primary_pattern), - amide_secondary = ChemmineR::smartsSearchOB(compound_sdf, amide_secondary_pattern), - amide_tertiary = ChemmineR::smartsSearchOB(compound_sdf, amide_tertiary_pattern), - carbonylperoxynitrate = ChemmineR::smartsSearchOB(compound_sdf, carbonylperoxynitrate_pattern), - peroxide = ChemmineR::smartsSearchOB(compound_sdf, peroxide_pattern), - hydroperoxide = ChemmineR::smartsSearchOB(compound_sdf, hydroperoxide_pattern), - carbonylperoxyacid = ChemmineR::smartsSearchOB(compound_sdf, carbonylperoxyacid_pattern), - nitrophenol = ChemmineR::smartsSearchOB(compound_sdf, nitrophenol_pattern), - nitroester = ChemmineR::smartsSearchOB(compound_sdf, nitroester_pattern), + ether_alkyl = NA_integer_, #calculated below + ether_alicyclic = ChemmineR::smartsSearchOB(compound_sdf, smarts$ether_alicyclic), + ether_aromatic = ChemmineR::smartsSearchOB(compound_sdf, smarts$ether_aromatic), + nitrate = ChemmineR::smartsSearchOB(compound_sdf, smarts$nitrate), + nitro = ChemmineR::smartsSearchOB(compound_sdf, smarts$nitro), + amine_primary = ChemmineR::smartsSearchOB(compound_sdf, smarts$amine_primary), + amine_secondary = ChemmineR::smartsSearchOB(compound_sdf, smarts$amine_secondary), + amine_tertiary = ChemmineR::smartsSearchOB(compound_sdf, smarts$amine_tertiary), + amine_aromatic = ChemmineR::smartsSearchOB(compound_sdf, smarts$amine_aromatic), + amide_primary = ChemmineR::smartsSearchOB(compound_sdf, smarts$amide_primary), + amide_secondary = ChemmineR::smartsSearchOB(compound_sdf, smarts$amide_secondary), + amide_tertiary = ChemmineR::smartsSearchOB(compound_sdf, smarts$amide_tertiary), + carbonylperoxynitrate = ChemmineR::smartsSearchOB(compound_sdf, smarts$carbonylperoxynitrate), + peroxide = ChemmineR::smartsSearchOB(compound_sdf, smarts$peroxide), + hydroperoxide = ChemmineR::smartsSearchOB(compound_sdf, smarts$hydroperoxide), + carbonylperoxyacid = ChemmineR::smartsSearchOB(compound_sdf, smarts$carbonylperoxyacid), + nitrophenol = ChemmineR::smartsSearchOB(compound_sdf, smarts$nitrophenol), + nitroester = ChemmineR::smartsSearchOB(compound_sdf, smarts$nitroester), # Additional groups from Meredith et al. 2023 - phosphoric_acids = ChemmineR::smartsSearchOB(compound_sdf, phosphoric_acid_pattern), - phosphoric_esters = ChemmineR::smartsSearchOB(compound_sdf, phosphoric_ester_pattern), - sulfates = ChemmineR::smartsSearchOB(compound_sdf, sulfate_pattern), - sulfonates = ChemmineR::smartsSearchOB(compound_sdf, sulfonate_pattern), - thiols = ChemmineR::smartsSearchOB(compound_sdf, thiol_pattern), - carbothioesters = ChemmineR::smartsSearchOB(compound_sdf, carbothioester_pattern), + phosphoric_acids = ChemmineR::smartsSearchOB(compound_sdf, smarts$phosphoric_acids), + phosphoric_esters = ChemmineR::smartsSearchOB(compound_sdf, smarts$phosphoric_esters), + sulfates = ChemmineR::smartsSearchOB(compound_sdf, smarts$sulfates), + sulfonates = ChemmineR::smartsSearchOB(compound_sdf, smarts$sulfonates), + thiols = ChemmineR::smartsSearchOB(compound_sdf, smarts$thiols), + carbothioesters = ChemmineR::smartsSearchOB(compound_sdf, smarts$carbothioesters), oxygens = atoms[["O"]] %||% 0L, chlorines = atoms[["Cl"]] %||% 0L, nitrogens = atoms[["N"]] %||% 0L, diff --git a/R/sysdata.rda b/R/sysdata.rda new file mode 100644 index 0000000..bb23864 Binary files /dev/null and b/R/sysdata.rda differ diff --git a/data-raw/README.md b/data-raw/README.md new file mode 100644 index 0000000..236cb11 --- /dev/null +++ b/data-raw/README.md @@ -0,0 +1,4 @@ +In the future, if new methods are added, create a separate .csv file named smarts_.csv. +To turn this into a user-facing dataset, edit `make_data.R` to add another `usethis::use_data()` and document it by adding a new entry to `R/data.R`. To also use this in internal data, it needs to be added as an argument to `usethis::use_data(..., internal = TRUE)` since only one sysdata.rda can exist for holding internal data. E.g. `usethis::use_data(smarts_patterns_simpol1, smarts_patterns_newmethod, internal = TRUE, overwrite = TRUE)` + +Be sure to run the code in `make_data.R` and to run `devtools::document()` to update data and documentation. \ No newline at end of file diff --git a/data-raw/make_data.R b/data-raw/make_data.R new file mode 100644 index 0000000..65dcab0 --- /dev/null +++ b/data-raw/make_data.R @@ -0,0 +1,14 @@ +## code to prepare `smarts` dataset goes here +smarts_simpol1 <- readr::read_csv("data-raw/smarts_simpol1.csv") + +#create user-facing data.frame +usethis::use_data(smarts_simpol1, overwrite = TRUE) + +#create internal named list with just SMARTS strings +just_smarts_simpol1 <- + smarts_simpol1 %>% + dplyr::filter(!is.na(smarts)) +smarts_patterns_simpol1 <- as.list(just_smarts_simpol1$smarts) +names(smarts_patterns_simpol1) <- just_smarts_simpol1$functional_group + +usethis::use_data(smarts_patterns_simpol1, internal = TRUE, overwrite = TRUE) diff --git a/data-raw/smarts_simpol1.csv b/data-raw/smarts_simpol1.csv new file mode 100644 index 0000000..464f218 --- /dev/null +++ b/data-raw/smarts_simpol1.csv @@ -0,0 +1,39 @@ +method,functional_group,description,smarts,fun,notes +simpol1,carbons_asa,carbon number on the acid-side of an amide,NA,NA,Not possible to capture with SMARTS +simpol1,rings_aromatic,aromatic ring,NA,ChemmineR::rings, +simpol1,rings_total,,NA,ChemmineR::rings, +simpol1,rings_aliphatic,non-aromatic ring,NA,rings_total - rings_aromatic, +simpol1,carbon_dbl_bonds_aliphatic,C=C (non-aromatic),C=C,ChemmineR::smartsSearchOB, +simpol1,CCCO_aliphatic_ring,C=C-C=O in non-aromatic ring,C(C=C[AR1])(=O)[AR1],ChemmineR::smartsSearchOB, +simpol1,hydroxyl_total,,NA,ChemmineR::groups, +simpol1,hydroxyl_aromatic,"aromatic hydroxyl (e.g., phenol)",[OX2H]c,ChemmineR::smartsSearchOB,"This pattern also captures nitrophenols, so the number of nitrophenols is subtracted" +simpol1,hydroxyl_aliphatic,hydroxyl (alkyl),NA,hydroxyl_total - hydroxyl_aromatic, +simpol1,aldehydes,aldehyde,NA,ChemmineR::groups, +simpol1,ketones,ketone,NA,ChemmineR::groups, +simpol1,carbox_acids,carboxylic acid,NA,ChemmineR::groups, +simpol1,ester,ester,NA,ChemmineR::groups,"This also captures carbonylperoxynitrates and nitroesters, so the number of carbonylperoxynitrates and nitroesters are subtracted" +simpol1,ether_total,,NA,ChemmineR::groups, +simpol1,ether_alkyl,ether,NA,ether_total - ether_alicyclic - ether_aromatic, +simpol1,ether_alicyclic,ether (alicyclic),[OD2]([C!R0])[C!R0],ChemmineR::smartsSearchOB, +simpol1,ether_aromatic,"ether, aromatic","O(c)[C,c]",ChemmineR::smartsSearchOB,Only one of the carbons has to be aromatic +simpol1,nitrate,nitrate,"[$([NX3](=[OX1])(=[OX1])O),$([NX3+]([OX1-])(=[OX1])O)]",ChemmineR::smartsSearchOB,"This pattern also captures carbonylperoxynitrates, so the number of carbonylperoxynitrates is subtracted" +simpol1,nitro,nitro,"[$([NX3](=O)=O),$([NX3+](=O)[O-])][!#8]",ChemmineR::smartsSearchOB, +simpol1,amine_primary,"amine, primary",[NX3;H2;!$(NC=[!#6]);!$(NC#[!#6])][#6X4],ChemmineR::smartsSearchOB, +simpol1,amine_secondary,"amine, secondary",[NX3H1!$(NC=[!#6])!$(NC#[!#6])]([#6X4])[#6X4],ChemmineR::smartsSearchOB, +simpol1,amine_tertiary,"amine, tertiary",[NX3H0!$(NC=[!#6])!$(NC#[!#6])]([#6X4])([#6X4])[#6X4],ChemmineR::smartsSearchOB, +simpol1,amine_aromatic,"amine, aromatic",[NX3;!$(NO)]c,ChemmineR::smartsSearchOB, +simpol1,amide_primary,"amide, primary","[CX3;$([R0][#6]),$([H1R0])](=[OX1])[#7X3H2]",ChemmineR::smartsSearchOB, +simpol1,amide_secondary,"amide, secondary","[CX3;$([R0][#6]),$([H1R0])](=[OX1])[#7X3H1][#6;!$(C=[O,N,S])]",ChemmineR::smartsSearchOB, +simpol1,amide_tertiary,"amide, tertiary","[CX3;$([R0][#6]),$([H1R0])](=[OX1])[#7X3H0]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])]",ChemmineR::smartsSearchOB, +simpol1,carbonylperoxynitrate,carbonylperoxynitrate,*C(=O)OO[N+1](=O)[O-1],ChemmineR::smartsSearchOB, +simpol1,peroxide,peroxide,[OX2D2][OX2D2],ChemmineR::smartsSearchOB,"This pattern also captures carbonylperoxynitrates, so the number of carbonylperoxinitrates is subtracted" +simpol1,hydroperoxide,hydroperoxide,"[OX2][OX2H,OX1-]",ChemmineR::smartsSearchOB,"This pattern also captures peroxyacids, so the number of carbonylperoxyacids is subtracted" +simpol1,carbonylperoxyacid,carbonylperoxyacid,"[CX3;$([R0][#6]),$([H1R0])](=[OX1])[OX2][$([OX2H]),$([OX1-])]",ChemmineR::smartsSearchOB, +simpol1,nitrophenol,nitrophenol,"[OX2H][$(c1ccccc1[$([NX3](=O)=O),$([NX3+](=O)[O-])]),$(c1cccc(c1)[$([NX3](=O)=O),$([NX3+](=O)[O-])]),$(c1ccc(cc1)[$([NX3](=O)=O),$([NX3+](=O)[O-])])]",ChemmineR::smartsSearchOB, +simpol1,nitroester,nitroester,"C(=O)(OC)C~[NX3](-,=[OX1])-,=[OX1]",ChemmineR::smartsSearchOB,"This pattern captures OH groups on a ring that also has a nitro group (para, ortho, or meta)" +meredith,phosphoric_acids,,"[$(P(=[OX1])([$([OX2H]),$([OX1-]),$([OX2]P)])([$([OX2H]),$([OX1-]),$([OX2]P)])[$([OX2H]),$([OX1-]),$([OX2]P)]),$([P+]([OX1-])([$([OX2H]),$([OX1-]),$([OX2]P)])([$([OX2H]),$([OX1-]),$([OX2]P)])[$([OX2H]),$([OX1-]),$([OX2]P)])]",ChemmineR::smartsSearchOB,"This pattern also captures phosphoric esthers, so the number of phosphoric esters is subtracted" +meredith,phosphoric_esters,,"[$(P(=[OX1])([OX2][#6])([$([OX2H]),$([OX1-]),$([OX2][#6])])[$([OX2H]),$([OX1-]),$([OX2][#6]),$([OX2]P)]),$([P+]([OX1-])([OX2][#6])([$([OX2H]),$([OX1-]),$([OX2][#6])])[$([OX2H]),$([OX1-]),$([OX2][#6]),$([OX2]P)])]",ChemmineR::smartsSearchOB, +meredith,sulfates,,"[$([#16X4](=[OX1])(=[OX1])([OX2H,OX1H0-])[OX2][#6]),$([#16X4+2]([OX1-])([OX1-])([OX2H,OX1H0-])[OX2][#6])]",ChemmineR::smartsSearchOB, +meredith,sulfonates,,"[#16X4](=[OX1])(=[OX1])([#6])[*$([O-1]),*$([OH1]),*$([OX2H0])]",ChemmineR::smartsSearchOB,This pattern captures sulfonate ions and their conjugate acids (sulfonic acids) +meredith,thiols,,[#16X2H],ChemmineR::smartsSearchOB, +meredith,carbothioesters,,S([#6])[CX3](=O)[#6],ChemmineR::smartsSearchOB, diff --git a/data/smarts_simpol1.rda b/data/smarts_simpol1.rda new file mode 100644 index 0000000..b6ff863 Binary files /dev/null and b/data/smarts_simpol1.rda differ diff --git a/man/get_fx_groups.Rd b/man/get_fx_groups.Rd index 41eadf6..dbd2c08 100644 --- a/man/get_fx_groups.Rd +++ b/man/get_fx_groups.Rd @@ -26,13 +26,16 @@ for specified compounds. Users will not typically interact with this function directly, but rather by using \code{\link[=calc_vol]{calc_vol()}}. } \details{ -It is unfortunately difficult to capture errors and warnings -produced by the command line tool OpenBabel used by \code{ChemmineOB}, a -dependency of \code{volcalc}. These errors and warnings are printed to the R -console, but they are \emph{not} R errors and do not stop code from running and -producing potentially incorrect data. \code{validate = TRUE} checks the output -of certain OpenBabel procedures for the \emph{symptoms} of these errors, namely -missing values for InChI and molecular formula. Unfortunately, since InChI +For more details on how functional groups are defined, see the +\link{smarts_simpol1} data set. + +It is unfortunately difficult to capture errors and warnings produced by +the command line tool OpenBabel used by \code{ChemmineOB}, a dependency of +\code{volcalc}. These errors and warnings are printed to the R console, but they +are \emph{not} R errors and do not stop code from running and producing +potentially incorrect data. \code{validate = TRUE} checks the output of certain +OpenBabel procedures for the \emph{symptoms} of these errors, namely missing +values for InChI and molecular formula. Unfortunately, since InChI generation is not available with the Windows version of \code{ChemmineOB}, this validation step cannot be performed on Windows and \code{validate = TRUE} will simply print a warning that can be silenced by setting \code{validate = FALSE}. diff --git a/man/smarts_simpol1.Rd b/man/smarts_simpol1.Rd new file mode 100644 index 0000000..f9e5534 --- /dev/null +++ b/man/smarts_simpol1.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/data.R +\docType{data} +\name{smarts_simpol1} +\alias{smarts_simpol1} +\title{Search patterns used for SIMPOL.1 functional groups} +\format{ +\describe{ +\item{method}{Either "simpol1" for functional groups only used with the SIMPOL.1 method, or "meredith" for additional groups used in the Meredith et al. method.} +\item{functional_groups}{These correspond to matching column names in the results of \code{\link[=get_fx_groups]{get_fx_groups()}}.} +\item{description}{Functional group description from Table 5 of Pankow & Asher (2008)} +\item{smarts}{SMARTS strings used to capture groups, when applicable} +\item{fun}{The function used to capture the functional group. When \code{smarts} is not \code{NA}, this is always "\link[ChemmineR:smartsSearchOB]{ChemmineR::smartsSearchOB}". Other groups are captured with other \code{ChemmineR} functions or as calculations using other functional groups.} +\item{notes}{Notes including how any functional group counts are corrected when there is overlap. E.g. when one SMARTS pattern is a subset of another pattern, but the two groups are counted separately without overlap in the SIMPOL.1 method.} +} +} +\usage{ +smarts_simpol1 +} +\description{ +This dataframe documents how functional groups for the SIMPOL.1 and Meredith +et al. method are defined using SMARTS strings or \code{ChemmineR} functions. +} +\references{ +Meredith L, Ledford S, Riemer K, Geffre P, Graves K, Honeker L, LeBauer D, +Tfaily M, Krechmer J. 2023. Automating methods for estimating metabolite +volatility. Frontiers in Microbiology. \doi{10.3389/fmicb.2023.1267234} + +Pankow, J.F., Asher, W.E. 2008. SIMPOL.1: a simple group +contribution method for predicting vapor pressures and enthalpies of +vaporization of multifunctional organic compounds. Atmos. Chem. Phys. +\doi{10.5194/acp-8-2773-2008} +} +\keyword{datasets}